From 6d072bf729ad585b2c9ce1f95b062726bba2c896 Mon Sep 17 00:00:00 2001
From: dos-reis <gdr@axiomatics.org>
Date: Sat, 22 Jun 2013 02:59:16 +0000
Subject: Use Byte for data from input source.

---
 src/utils/sexpr.cc | 57 +++++++++++++++++++++++++++++-------------------------
 1 file changed, 31 insertions(+), 26 deletions(-)

(limited to 'src/utils/sexpr.cc')

diff --git a/src/utils/sexpr.cc b/src/utils/sexpr.cc
index 9e57765f..77ed3060 100644
--- a/src/utils/sexpr.cc
+++ b/src/utils/sexpr.cc
@@ -122,16 +122,16 @@ namespace OpenAxiom {
 
       // Move `cur' past all consecutive blank characters, and
       // return the new position.
-      static const char* 
-      skip_blank(const char*& cur, const char* end) {
+      static const Byte* 
+      skip_blank(const Byte*& cur, const Byte* end) {
          while (cur < end and is_blank(*cur))
             ++cur;
          return cur;
       }
 
       // Move `cur' to end-of-line marker.
-      static const char*
-      skip_to_eol(const char*& cur, const char* end) {
+      static const Byte*
+      skip_to_eol(const Byte*& cur, const Byte* end) {
          // FIXME: properly handle CR+LF.
          while (cur < end and *cur != '\n')
             ++cur;
@@ -139,8 +139,8 @@ namespace OpenAxiom {
       }
 
       // Move `cur' until a word boundary is reached.
-      static const char*
-      skip_to_word_boundary(const char*& cur, const char* end) {
+      static const Byte*
+      skip_to_word_boundary(const Byte*& cur, const Byte* end) {
          bool saw_escape = false;
          for (; cur < end; ++cur) {
             if (saw_escape)
@@ -156,7 +156,7 @@ namespace OpenAxiom {
       // Move `cur' one-past a non-esacaped character `c'.
       // Return true if the character was seen.
       static bool
-      skip_to_nonescaped_char(const char*& cur, const char* end, char c) {
+      skip_to_nonescaped_char(const Byte*& cur, const Byte* end, char c) {
          bool saw_escape = false;
          for (; cur < end; ++cur)
             if (saw_escape)
@@ -173,7 +173,7 @@ namespace OpenAxiom {
       // Move `cur' past the closing quote of string literal.
       // Return true if the closing fence was effectively seen.
       static inline bool
-      skip_to_quote(const char*& cur, const char* end) {
+      skip_to_quote(const Byte*& cur, const Byte* end) {
          return skip_to_nonescaped_char(cur, end, '"');
       }
 
@@ -204,7 +204,7 @@ namespace OpenAxiom {
       // an integer followrd by the equal sign or the sharp sign.
       // `cur' is moved along the way.
       static bool
-      only_digits_before_equal_or_shap(const char*& cur, const char* end) {
+      only_digits_before_equal_or_shap(const Byte*& cur, const Byte* end) {
          while (cur < end and isdigit(*cur))
             ++cur;
          return cur < end and (*cur == '#' or *cur == '=');
@@ -215,8 +215,8 @@ namespace OpenAxiom {
       // entirely of digits.
       static void
       maybe_reclassify(Token& t) {
-         const char* cur = t.lexeme->begin();
-         const char* end = t.lexeme->end();
+         const Byte* cur = t.lexeme->begin();
+         const Byte* end = t.lexeme->end();
          while (cur < end and isdigit(*cur))
             ++cur;
          if (cur == end)
@@ -226,7 +226,7 @@ namespace OpenAxiom {
       // Returns true if the first characters in the range
       // [cur, last) start an identifier.
       static bool
-      start_symbol(const char* cur, const char* last) {
+      start_symbol(const Byte* cur, const Byte* last) {
          if (cur >= last)
             return false;
          return identifier_part(*cur)
@@ -236,7 +236,7 @@ namespace OpenAxiom {
       // We are processing a symbol token.  Accumulate all
       // legitimate characters till the end of the token.
       static void
-      skip_to_end_of_symbol(const char*& cur, const char* end) {
+      skip_to_end_of_symbol(const Byte*& cur, const Byte* end) {
          const char c = *cur;
          if (*cur == '|') 
             skip_to_nonescaped_char(++cur, end, c);
@@ -247,22 +247,22 @@ namespace OpenAxiom {
       }
 
       static Token
-      match_maybe_symbol(Lexer* lexer, const char*& cur, const char* end) {
+      match_maybe_symbol(Lexer* lexer, const Byte*& cur, const Byte* end) {
          Token t = { Token::identifier, 0 };
-         const char* start = cur;
+         const Byte* start = cur;
          skip_to_end_of_symbol(cur, end);
          t.lexeme = lexer->intern(start, cur - start);
          maybe_reclassify(t);
          return t;
       }
 
-      const char*
-      Lexer::tokenize(const char* cur, const char* end) {
+      const Byte*
+      Lexer::tokenize(const Byte* cur, const Byte* end) {
          while (skip_blank(cur, end) < end) {
             Token t = { Token::unknown, 0 };
             switch (*cur) {
             case ';': {
-               const char* start = cur;
+               const Byte* start = cur;
                t.type = Token::semicolon;
                skip_to_eol(cur, end);
                t.lexeme = intern(start, cur - start);
@@ -276,7 +276,7 @@ namespace OpenAxiom {
                break;
 
             case ',': {
-               const char* start = cur;
+               const Byte* start = cur;
                if (++cur < end and *cur == '@') {
                   t.type = Token::comma_at;
                   ++cur;
@@ -292,7 +292,7 @@ namespace OpenAxiom {
                break;
 
             case '#': {
-               const char* start = cur;
+               const Byte* start = cur;
                if (cur + 1 < end and special_after_sharp(cur[1])) {
                   t.type = Token::Type(OPENAXIOM_SEXPR_TOKEN2(cur[0], cur[1]));
                   t.lexeme = intern(cur, 2);
@@ -322,7 +322,7 @@ namespace OpenAxiom {
             }
 
             case '"': {
-               const char* start = cur;
+               const Byte* start = cur;
                skip_to_quote(++cur, end);
                t.type = Token::string;
                t.lexeme = intern(start, cur - start);
@@ -333,7 +333,7 @@ namespace OpenAxiom {
                if (start_symbol(cur, end))
                   t = match_maybe_symbol(this, cur, end);
                else {
-                  const char* start = cur;
+                  const Byte* start = cur;
                   skip_to_word_boundary(++cur, end);
                   t.lexeme = intern(start, cur - start);
                }
@@ -651,7 +651,7 @@ namespace OpenAxiom {
       // The sequence of characters in [cur, last) consists
       // entirely of digits.  Return the corresponding natural value.
       static size_t
-      natural_value(const char* cur, const char* last) {
+      natural_value(const Byte* cur, const Byte* last) {
          size_t n = 0;
          for (; cur < last; ++cur)
             // FIXME: check for overflow.
@@ -678,7 +678,7 @@ namespace OpenAxiom {
       equal_character_name(BasicString lhs, const char* rhs) {
          if (lhs->size() != strlen(rhs))
             return false;
-         for (const char* cur = lhs->begin(); cur != lhs->end(); ++cur)
+         for (const Byte* cur = lhs->begin(); cur != lhs->end(); ++cur)
             if (tolower(*cur) != *rhs++)
                return false;
          return true;
@@ -839,6 +839,11 @@ namespace OpenAxiom {
       Parser::Parser(Allocator& a, std::vector<const Syntax*>& v)
             : alloc(a), syns(v) { }
 
+      static std::string
+      to_string(BasicString s) {
+         return { s->begin(), s->end() };
+      }
+
       const Syntax*
       Parser::parse_syntax(const Token*& cur, const Token* last) {
          if (not skip_ignorable_tokens(cur, last))
@@ -898,7 +903,7 @@ namespace OpenAxiom {
 
          default:
             parse_error(std::string("parse error before ")
-                        + cur->lexeme->begin());
+                        + to_string(cur->lexeme));
             return 0;           // never executed
          }
       }
@@ -915,7 +920,7 @@ namespace OpenAxiom {
          std::vector<Token> tokens;
          Memory::FileMapping input(s);
          Lexer lexer(raw_strs, tokens);
-         const char* rest = lexer.tokenize(input.begin(), input.end());
+         const Byte* rest = lexer.tokenize(input.begin(), input.end());
          if (rest != input.end())
             syntax_error("syntax error");
          Parser parser(allocator, *this);
-- 
cgit v1.2.3