diff options
author | dos-reis <gdr@axiomatics.org> | 2011-03-18 03:06:05 +0000 |
---|---|---|
committer | dos-reis <gdr@axiomatics.org> | 2011-03-18 03:06:05 +0000 |
commit | 378ef73f2cd7a54f405035b3b569b395554377d7 (patch) | |
tree | 88188654062a8f1410f57db32498bff61efc1b29 /src/utils | |
parent | a101ddeb749d481e984a843de98e560e88af4c96 (diff) | |
download | open-axiom-378ef73f2cd7a54f405035b3b569b395554377d7.tar.gz |
* utils/sexpr.H: Support more specialized s-expressions.
* utils/sexpr.cc: Likewise.
Diffstat (limited to 'src/utils')
-rw-r--r-- | src/utils/sexpr.H | 181 | ||||
-rw-r--r-- | src/utils/sexpr.cc | 425 |
2 files changed, 479 insertions, 127 deletions
diff --git a/src/utils/sexpr.H b/src/utils/sexpr.H index 29fe07a9..7b62a328 100644 --- a/src/utils/sexpr.H +++ b/src/utils/sexpr.H @@ -53,12 +53,20 @@ namespace OpenAxiom { namespace Sexpr { + struct BasicError { + explicit BasicError(const std::string& s) : msg(s) { } + const std::string& message() const { return msg; } + protected: + std::string msg; + }; + // ----------- // -- Token -- // ----------- struct Token { enum Type { unknown, // unidentified token + semicolon = OPENAXIOM_SEXPR_TOKEN1(';'), // comment dot = OPENAXIOM_SEXPR_TOKEN1('.'), // "." comma = OPENAXIOM_SEXPR_TOKEN1(','), // "," open_paren = OPENAXIOM_SEXPR_TOKEN1('('), // "(" @@ -69,6 +77,10 @@ namespace OpenAxiom { sharp_open_paren = OPENAXIOM_SEXPR_TOKEN2('#','('), // "#(" sharp_apostrophe = OPENAXIOM_SEXPR_TOKEN2('#','\''), // "#'" sharp_colon = OPENAXIOM_SEXPR_TOKEN2('#',':'), // "#:" + sharp_plus = OPENAXIOM_SEXPR_TOKEN2('#','+'), // "#+" + sharp_minus = OPENAXIOM_SEXPR_TOKEN2('#','-'), // "#-" + sharp_dot = OPENAXIOM_SEXPR_TOKEN2('#','.'), // "#." + comma_at = OPENAXIOM_SEXPR_TOKEN2(',','@'), // ",@" digraph_end = OPENAXIOM_SEXPR_TOKEN2(256,256), integer, // integer literal character, // character literal @@ -100,6 +112,9 @@ namespace OpenAxiom { : strings(pool), tokens(toks) { } const char* tokenize(const char*, const char*); + BasicString intern(const char* s, size_t n) { + return strings.intern(s, n); + } private: StringPool& strings; // where to allocate lexemes from @@ -109,6 +124,7 @@ namespace OpenAxiom { // ------------ // -- Syntax -- // ------------ + // Base class of syntax object classes. struct Syntax { struct Visitor; // base class of syntax visitors virtual void accept(Visitor&) const = 0; @@ -132,6 +148,7 @@ namespace OpenAxiom { // ------------- // -- Integer -- // ------------- + // Integer literal syntax objects struct Integer : Atom { explicit Integer(const Token&); void accept(Visitor&) const; @@ -140,6 +157,7 @@ namespace OpenAxiom { // --------------- // -- Character -- // --------------- + // Character literal syntax objects. struct Character : Atom { explicit Character(const Token&); void accept(Visitor&) const; @@ -148,6 +166,7 @@ namespace OpenAxiom { // ------------ // -- String -- // ------------ + // Striing literal syntax objjects. struct String : Atom { explicit String(const Token&); void accept(Visitor&) const; @@ -172,6 +191,7 @@ namespace OpenAxiom { // --------------- // -- Reference -- // --------------- + // Back reference object to a syntax object. struct Reference : Atom { Reference(const Token&, size_t); size_t tag() const { return pos; } @@ -183,6 +203,7 @@ namespace OpenAxiom { // ------------ // -- Anchor -- // ------------ + // Base anchor syntax object. struct Anchor : Syntax { Anchor(size_t, const Syntax*); size_t ref() const { return tag; } @@ -193,43 +214,94 @@ namespace OpenAxiom { const Syntax* const val; }; - // ----------- - // -- Quote -- - // ----------- - struct Quote : Syntax { - explicit Quote(const Syntax*); + // -- Abstract over common implementation of unary special operators. + template<typename T> + struct unary_form : Syntax { const Syntax* body() const { return form; } void accept(Visitor&) const; + protected: + unary_form(const Syntax* f) : form(f) { } private: const Syntax* const form; }; + + // ----------- + // -- Quote -- + // ----------- + // Quotation syntax object. + struct Quote : unary_form<Quote> { + explicit Quote(const Syntax*); + }; + + // --------------- + // -- Antiquote -- + // --------------- + // Quasi-quotation syntax object. + struct Antiquote : unary_form<Antiquote> { + explicit Antiquote(const Syntax*); + }; + + // ------------ + // -- Expand -- + // ------------ + // Expansion request inside a quasi-quotation. + struct Expand : unary_form<Expand> { + explicit Expand(const Syntax*); + }; + + // ---------- + // -- Eval -- + // ---------- + // Read-time evaluation request syntax object. + struct Eval : unary_form<Eval> { + explicit Eval(const Syntax*); + }; + + // ------------ + // -- Splice -- + // ------------ + // Splice request syntax object inside a quasi-quotation. + struct Splice : unary_form<Splice> { + explicit Splice(const Syntax*); + }; // -------------- // -- Function -- // -------------- - struct Function : Syntax { + // Function literal syntax object. + struct Function : unary_form<Function> { explicit Function(const Syntax*); - const Syntax* code() const { return form; } - void accept(Visitor&) const; - private: - const Syntax* const form; }; - // ---------- - // -- Pair -- - // ---------- - struct Pair : Syntax { - Pair(const Syntax*, const Syntax*); - const Syntax* first() const { return elts.first; } - const Syntax* second() const { return elts.second; } - void accept(Visitor&) const; - private: - const std::pair<const Syntax*, const Syntax*> elts; + // ------------- + // -- DotTail -- + // ------------- + // Objects of this type represents the tail of syntactic + // objects denoting dotted pair syntax `(a . b)'. + struct DotTail : unary_form<DotTail> { + explicit DotTail(const Syntax*); + }; + + // ------------- + // -- Include -- + // ------------- + // Conditional inclusion syntax object + struct Include : unary_form<Include> { + explicit Include(const Syntax*); + }; + + // ------------- + // -- Exclude -- + // ------------- + // Conditional exclusion syntax object + struct Exclude : unary_form<Exclude> { + explicit Exclude(const Syntax*); }; // ---------- // -- List -- // ---------- + // List syntax objects. struct List : Syntax, private std::vector<const Syntax*> { typedef std::vector<const Syntax*> base; using base::const_iterator; @@ -247,6 +319,7 @@ namespace OpenAxiom { // ------------ // -- Vector -- // ------------ + // Vector syntax objects. struct Vector : Syntax, private std::vector<const Syntax*> { typedef std::vector<const Syntax*> base; using base::const_iterator; @@ -274,12 +347,24 @@ namespace OpenAxiom { virtual void visit(const Reference&); virtual void visit(const Anchor&) = 0; virtual void visit(const Quote&) = 0; + virtual void visit(const Antiquote&) = 0; + virtual void visit(const Expand&) = 0; + virtual void visit(const Eval&) = 0; + virtual void visit(const Splice&) = 0; virtual void visit(const Function&) = 0; - virtual void visit(const Pair&) = 0; + virtual void visit(const Include&) = 0; + virtual void visit(const Exclude&) = 0; + virtual void visit(const DotTail&) = 0; virtual void visit(const List&) = 0; virtual void visit(const Vector&) = 0; }; + template<typename T> + void + unary_form<T>::accept(Visitor& v) const { + v.visit(static_cast<const T&>(*this)); + } + // --------------- // -- Allocator -- // --------------- @@ -295,26 +380,15 @@ namespace OpenAxiom { return std::less<BasicString>()(lhs.lexeme(), rhs.lexeme()); } - bool operator()(const Quote& lhs, const Quote& rhs) const { + template<typename T> + bool + operator()(const unary_form<T>& lhs, const unary_form<T>& rhs) const { return std::less<const void*>()(lhs.body(), rhs.body()); } bool operator()(const Anchor& lhs, const Anchor& rhs) const { return std::less<size_t>()(lhs.ref(), rhs.ref()); } - - bool operator()(const Function& lhs, const Function& rhs) const { - return std::less<const void*>()(lhs.code(), rhs.code()); - } - - bool operator()(const Pair& lhs, const Pair& rhs) const { - std::less<const void*> cmp; - if (cmp(lhs.first(), rhs.first())) - return true; - if (cmp(rhs.first(), lhs.first())) - return false; - return cmp(lhs.second(), rhs.second()); - } }; template<typename T> @@ -345,8 +419,14 @@ namespace OpenAxiom { const Reference* make_reference(const Token&, size_t); const Anchor* make_anchor(size_t, const Syntax*); const Quote* make_quote(const Syntax*); + const Antiquote* make_antiquote(const Syntax*); + const Expand* make_expand(const Syntax*); + const Eval* make_eval(const Syntax*); + const Splice* make_splice(const Syntax*); const Function* make_function(const Syntax*); - const Pair* make_pair(const Syntax*, const Syntax*); + const Include* make_include(const Syntax*); + const Exclude* make_exclude(const Syntax*); + const DotTail* make_dot_tail(const Syntax*); const List* make_list(const std::vector<const Syntax*>&); const Vector* make_vector(const std::vector<const Syntax*>&); @@ -358,8 +438,14 @@ namespace OpenAxiom { UniqueAllocator<Anchor> ancs; UniqueAllocator<Reference> refs; UniqueAllocator<Quote> quotes; + UniqueAllocator<Antiquote> antis; + UniqueAllocator<Expand> exps; UniqueAllocator<Function> funs; - UniqueAllocator<Pair> pairs; + UniqueAllocator<Include> incs; + UniqueAllocator<Exclude> excs; + UniqueAllocator<Eval> evls; + UniqueAllocator<Splice> spls; + UniqueAllocator<DotTail> tails; Memory::Factory<List> lists; Memory::Factory<Vector> vectors; List empty_list; @@ -388,10 +474,29 @@ namespace OpenAxiom { const Symbol* parse_uninterned(const Token*&, const Token*); const Function* parse_function(const Token*&, const Token*); const Quote* parse_quote(const Token*&, const Token*); + const Antiquote* parse_antiquote(const Token*&, const Token*); + const Include* parse_include(const Token*&, const Token*); + const Exclude* parse_exclude(const Token*&, const Token*); + const Expand* parse_expand(const Token*&, const Token*); + const Eval* parse_eval(const Token*&, const Token*); + const Splice* parse_splice(const Token*&, const Token*); const Vector* parse_vector(const Token*&, const Token*); - const Syntax* parse_list_or_pair(const Token*&, const Token*); + const List* parse_list(const Token*&, const Token*); const Syntax* parse_syntax(const Token*&, const Token*); }; + + // ------------ + // -- Module -- + // ------------ + // Entire s-expression input file. + struct Module : std::vector<const Syntax*> { + explicit Module(const std::string&); + const std::string& name() const { return nm; } + private: + const std::string nm; + StringPool raw_strs; + Allocator allocator; + }; } } diff --git a/src/utils/sexpr.cc b/src/utils/sexpr.cc index 6ed2a964..69dc7b72 100644 --- a/src/utils/sexpr.cc +++ b/src/utils/sexpr.cc @@ -1,4 +1,4 @@ -// Copyright (C) 2010, Gabriel Dos Reis. +// Copyright (C) 2010-2011, Gabriel Dos Reis. // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -38,9 +38,28 @@ namespace OpenAxiom { namespace Sexpr { + template<typename T, int N> + static inline int + length(const T(&)[N]) { + return N; + } + + template<typename Sequence> + static inline typename Sequence::const_pointer + begin_ptr(const Sequence& s) { + return &*s.begin(); + } + + template<typename Sequence> + static inline typename Sequence::const_pointer + end_ptr(const Sequence& s) { + return s.empty() ? 0 : &*s.begin() + s.size(); + } + std::ostream& operator<<(std::ostream& os, const Token& t) { switch (t.type) { + case Token::semicolon: os << "SEMICOLON"; break; case Token::dot: os << "DOT"; break; case Token::comma: os << "COMMA"; break; case Token::open_paren: os << "OPEN_PAREN"; break; @@ -51,6 +70,10 @@ namespace OpenAxiom { case Token::sharp_open_paren: os << "SHARP_OPEN_PAREN"; break; case Token::sharp_apostrophe: os << "SHARP_APOSTROPHE"; break; case Token::sharp_colon: os << "SHARP_COLON"; break; + case Token::sharp_plus: os << "SHARP_PLUS"; break; + case Token::sharp_minus: os << "SHARP_MINUS"; break; + case Token::sharp_dot: os << "SHARP_DOT"; break; + case Token::comma_at: os << "COMMA_AT"; break; case Token::integer: os << "INTEGER"; break; case Token::character: os << "CHARACTER"; break; case Token::string: os << "STRING"; break; @@ -73,6 +96,14 @@ namespace OpenAxiom { return os << ')'; } + // ----------- + // -- Lexer -- + // ----------- + static void + syntax_error(const std::string& s) { + throw BasicError(s); + } + // Return true if character `c' introduces a blank. static bool is_blank(char c) { @@ -85,7 +116,7 @@ namespace OpenAxiom { is_delimiter(char c) { return is_blank(c) or c == '(' or c == ')' or c == '\'' - or c == '`' or c == '\\' or c == '#'; + or c == '`' or c == '#'; } // Move `cur' past all consecutive blank characters, and @@ -97,11 +128,27 @@ namespace OpenAxiom { return cur; } + // Move `cur' to end-of-line marker. + static const char* + skip_to_eol(const char*& cur, const char* end) { + // FIXME: properly handle CR+LF. + while (cur < end and *cur != '\n') + ++cur; + return cur; + } + // Move `cur' until a word boundary is reached. static const char* skip_to_word_boundary(const char*& cur, const char* end) { - while (cur < end and not is_delimiter(*cur)) - ++cur; + bool saw_escape = false; + for (; cur < end; ++cur) { + if (saw_escape) + saw_escape = false; + else if (*cur == '\\') + saw_escape = true; + else if (is_delimiter(*cur)) + break; + } return cur; } @@ -109,21 +156,19 @@ namespace OpenAxiom { // Return true if the character was seen. static bool skip_to_nonescaped_char(const char*& cur, const char* end, char c) { + bool saw_escape = false; for (; cur < end; ++cur) - if (cur[0] == c and cur[-1] != '\\') { + if (saw_escape) + saw_escape = false; + else if (*cur == '\\') + saw_escape = true; + else if (*cur == c) { ++cur; return true; } return false; } - // Move `cur' past the closing fence of an absolute identifier. - // Return true if the closing fence was effectively seen. - static inline bool - skip_to_fence(const char*& cur, const char* end) { - return skip_to_nonescaped_char(cur, end, '|'); - } - // Move `cur' past the closing quote of string literal. // Return true if the closing fence was effectively seen. static inline bool @@ -137,8 +182,9 @@ namespace OpenAxiom { identifier_part(char c) { switch (c) { case '+': case '-': case '*': case '/': case '%': case '^': - case '~': case '@': case '$': case '&': case ':': case '=': + case '~': case '@': case '$': case '&': case '=': case '<': case '>': case '?': case '!': case '_': + case '[': case ']': case '{': case '}': return true; default: return isalnum(c); @@ -149,7 +195,8 @@ namespace OpenAxiom { // the sharp character. static bool special_after_sharp(char c) { - return c == '(' or c == '\'' or c == ':'; + return c == '(' or c == '\'' or c == ':' + or c == '+' or c == '-' or c == '.'; } // Return true if the sequence `[cur, end)' has a prefix that is @@ -175,73 +222,119 @@ namespace OpenAxiom { t.type = Token::integer; } + // Returns true if the first characters in the range + // [cur, last) start an identifier. + static bool + start_symbol(const char* cur, const char* last) { + if (cur >= last) + return false; + return identifier_part(*cur) + or *cur == '|' or *cur == ':'; + } + + // We are processing a symbol token. Accumulate all + // legitimate characters till the end of the token. + static void + skip_to_end_of_symbol(const char*& cur, const char* end) { + const char c = *cur; + if (*cur == '|') + skip_to_nonescaped_char(++cur, end, c); + else + skip_to_word_boundary(cur, end); + if (cur < end and *cur == ':') + skip_to_end_of_symbol(cur, end); + } + + static Token + match_maybe_symbol(Lexer* lexer, const char*& cur, const char* end) { + Token t = { Token::identifier, 0 }; + const char* start = cur; + skip_to_end_of_symbol(cur, end); + t.lexeme = lexer->intern(start, cur - start); + maybe_reclassify(t); + return t; + } + const char* Lexer::tokenize(const char* cur, const char* end) { while (skip_blank(cur, end) < end) { Token t = { Token::unknown, 0 }; switch (*cur) { - case '.': case ',': case '(': case ')': - case '\'': case '\\': + case ';': { + const char* start = cur; + t.type = Token::semicolon; + skip_to_eol(cur, end); + t.lexeme = intern(start, cur - start); + break; + } + + case '.': case '(': case ')': case '\'': case '`': t.type = Token::Type(OPENAXIOM_SEXPR_TOKEN1(*cur)); - t.lexeme = strings.intern(cur, 1); + t.lexeme = intern(cur, 1); ++cur; break; + case ',': { + const char* start = cur; + if (++cur < end and *cur == '@') { + t.type = Token::comma_at; + ++cur; + } + else + t.type = Token::comma; + t.lexeme = intern(start, cur - start); + break; + } + + case '\\': + t = match_maybe_symbol(this, cur, end); + break; + case '#': { const char* start = cur; if (cur + 1 < end and special_after_sharp(cur[1])) { t.type = Token::Type(OPENAXIOM_SEXPR_TOKEN2(cur[0], cur[1])); - t.lexeme = strings.intern(cur, 2); + t.lexeme = intern(cur, 2); cur += 2; } + else if (cur + 1 < end and cur[1] == '\\') { + start = cur += 2; + if (not isalnum(*cur)) + ++cur; + else + skip_to_word_boundary(cur, end); + t.type = Token::character; + t.lexeme = intern(start, cur - start); + } else if (only_digits_before_equal_or_shap(++cur, end)) { t.type = *cur == '#' ? Token::sharp_integer_sharp : Token::sharp_integer_equal; - t.lexeme = strings.intern(start, cur - start + 1); + t.lexeme = intern(start, cur - start + 1); ++cur; } - else if (cur + 1 < end and cur[1] == '\\') { - start = cur += 2; - skip_to_word_boundary(cur, end); - t.type = Token::character; - t.lexeme = strings.intern(start, cur - start); - } else { skip_to_word_boundary(cur, end); - t.lexeme = strings.intern(start, cur - start); + t.lexeme = intern(start, cur - start); } break; } - case '|': { - const char* start = cur; - skip_to_fence(++cur, end); - t.type = Token::identifier; - t.lexeme = strings.intern(start, cur - start); - break; - } - case '"': { const char* start = cur; skip_to_quote(++cur, end); t.type = Token::string; - t.lexeme = strings.intern(start, cur - start); + t.lexeme = intern(start, cur - start); break; } default: - if (identifier_part(*cur)) { - const char* start = cur; - skip_to_word_boundary(++cur, end); - t.type = Token::identifier; - t.lexeme = strings.intern(start, cur - start); - maybe_reclassify(t); - } + if (start_symbol(cur, end)) + t = match_maybe_symbol(this, cur, end); else { const char* start = cur; skip_to_word_boundary(++cur, end); - t.lexeme = strings.intern(start, cur - start); + t.lexeme = intern(start, cur - start); } break; } @@ -323,32 +416,45 @@ namespace OpenAxiom { // ----------- // -- Quote -- // ----------- - Quote::Quote(const Syntax* s) : form(s) { } + Quote::Quote(const Syntax* s) : unary_form<Quote>(s) { } - void - Quote::accept(Visitor& v) const { - v.visit(*this); - } + // --------------- + // -- Antiquote -- + // --------------- + Antiquote::Antiquote(const Syntax* s) : unary_form<Antiquote>(s) { } + + // ------------ + // -- Expand -- + // ------------ + Expand::Expand(const Syntax* s) : unary_form<Expand>(s) { } + + // ---------- + // -- Eval -- + // ---------- + Eval::Eval(const Syntax* s) : unary_form<Eval>(s) { } + + // ------------ + // -- Splice -- + // ------------ + Splice::Splice(const Syntax* s) : unary_form<Splice>(s) { } // -------------- // -- Function -- // -------------- - Function::Function(const Syntax* s) : form(s) { } + Function::Function(const Syntax* s) : unary_form<Function>(s) { } - void - Function::accept(Visitor& v) const { - v.visit(*this); - } + // ------------- + // -- Include -- + Include::Include(const Syntax* s) : unary_form<Include>(s) { } - // ---------- - // -- Pair -- - // ---------- - Pair::Pair(const Syntax* f, const Syntax* s) : elts(f, s) { } + // ------------- + // -- Exclude -- + Exclude::Exclude(const Syntax* s) : unary_form<Exclude>(s) { } - void - Pair::accept(Visitor& v) const { - v.visit(*this); - } + // ------------- + // -- DotTail -- + // ------------- + DotTail::DotTail(const Syntax* f) : unary_form<DotTail>(f) { } // ---------- // -- List -- @@ -459,14 +565,44 @@ namespace OpenAxiom { return quotes.allocate(s); } + const Antiquote* + Allocator::make_antiquote(const Syntax* s) { + return antis.allocate(s); + } + + const Expand* + Allocator::make_expand(const Syntax* s) { + return exps.allocate(s); + } + + const Eval* + Allocator::make_eval(const Syntax* s) { + return evls.allocate(s); + } + + const Splice* + Allocator::make_splice(const Syntax* s) { + return spls.allocate(s); + } + const Function* Allocator::make_function(const Syntax* s) { return funs.allocate(s); } - const Pair* - Allocator::make_pair(const Syntax* f, const Syntax* s) { - return pairs.allocate(f, s); + const Include* + Allocator::make_include(const Syntax* s) { + return incs.allocate(s); + } + + const Exclude* + Allocator::make_exclude(const Syntax* s) { + return excs.allocate(s); + } + + const DotTail* + Allocator::make_dot_tail(const Syntax* f) { + return tails.allocate(f); } const List* @@ -490,7 +626,7 @@ namespace OpenAxiom { // Signal a parse error static void parse_error(const std::string& s) { - throw SystemError(s); + throw BasicError(s); } // Signal that an expected syntax object was missing @@ -531,10 +667,34 @@ namespace OpenAxiom { return alloc.make_symbol(*cur++, kind); } + // List of lower case character names + static const char* charname[] = { + "newline", "space", "page", "tab", + "backspace", "return", "linefeed" + }; + + static bool + equal_character_name(BasicString lhs, const char* rhs) { + if (lhs->size() != strlen(rhs)) + return false; + for (const char* cur = lhs->begin(); cur != lhs->end(); ++cur) + if (tolower(*cur) != *rhs++) + return false; + return true; + } + + static bool + valid_character_name(BasicString s) { + for (int i = 0; i < length(charname); ++i) + if (equal_character_name(s, charname[i])) + return true; + return false; + } + const Character* Parser::parse_character(const Token*& cur, const Token* last) { - // NOTE: For the time being, accept only simple characters. - if (cur->lexeme->size() != 1) + if (cur->lexeme->size() != 1 + and not valid_character_name(cur->lexeme)) parse_error("invalid literal character syntax"); return alloc.make_character(*cur++); } @@ -582,11 +742,71 @@ namespace OpenAxiom { return alloc.make_quote(parse_syntax(cur, last)); } + // Parse an antiquotation + const Antiquote* + Parser::parse_antiquote(const Token*& cur, const Token* last) { + if (cur == last) + unexpected_end_of_input("backquote sign"); + return alloc.make_antiquote(parse_syntax(cur, last)); + } + + // Parse an expansion request form + const Expand* + Parser::parse_expand(const Token*& cur, const Token* last) { + const Syntax* s = parse_syntax(cur, last); + if (s == 0) + unexpected_end_of_input("comma sign"); + return alloc.make_expand(s); + } + + // Parse conditional inclusions + const Include* + Parser::parse_include(const Token*& cur, const Token* last) { + const Syntax* s = parse_syntax(cur, last); + if (s == 0) + unexpected_end_of_input("sharp-plus sign"); + return alloc.make_include(s); + } + + const Exclude* + Parser::parse_exclude(const Token*& cur, const Token* last) { + const Syntax* s = parse_syntax(cur, last); + if (s == 0) + unexpected_end_of_input("sharp-minus sign"); + return alloc.make_exclude(s); + } + + const Eval* + Parser::parse_eval(const Token*& cur, const Token* last) { + const Syntax* s = parse_syntax(cur, last); + if (s == 0) + unexpected_end_of_input("sharp-dot sign"); + return alloc.make_eval(s); + } + + const Splice* + Parser::parse_splice(const Token*& cur, const Token* last) { + const Syntax* s = parse_syntax(cur, last); + if (s == 0) + unexpected_end_of_input("comma-at sign"); + return alloc.make_splice(s); + } + + // Skip tokens that are semantically blanks, e.g. comments. + // Return true if not at end of tokens. + static bool + skip_ignorable_tokens(const Token*& cur, const Token* last) { + while (cur < last and cur->type == Token::semicolon) + ++cur; + return cur != last; + } + // Parse a vector of syntax objects: #(s .. s) const Vector* Parser::parse_vector(const Token*& cur, const Token* last) { std::vector<const Syntax*> elts; - while (cur < last and cur->type != Token::close_paren) + while (skip_ignorable_tokens(cur, last) + and cur->type != Token::close_paren) elts.push_back(parse_syntax(cur, last)); if (cur == last) missing_closer_for("vector"); @@ -595,31 +815,23 @@ namespace OpenAxiom { } // Constructs a pair or a list syntax object. - // This function is hairy for three reasons: (a) it is not known - // whether we list or a pair until after we have seen the - // enclosed tokens; (b) a dot is allowed at most once; (c) Lisp-style - // improper lists are not allowed. - const Syntax* - Parser::parse_list_or_pair(const Token*& cur, const Token* last) { + const List* + Parser::parse_list(const Token*& cur, const Token* last) { std::vector<const Syntax*> elts; - bool saw_dot = false; - while (cur < last and cur->type != Token::close_paren) { + while (skip_ignorable_tokens(cur, last) + and cur->type != Token::close_paren) { if (cur->type == Token::dot) { - if (elts.size() != 1) - parse_error("unexpected dot sign"); - saw_dot = true; - ++cur; - continue; + skip_ignorable_tokens(++cur, last); + if (const Syntax* s = parse_syntax(cur, last)) { + elts.push_back(alloc.make_dot_tail(s)); + break; + } } elts.push_back(parse_syntax(cur, last)); - if (saw_dot && elts.size() == 2) - break; } if (cur == last or cur->type != Token::close_paren) - missing_closer_for(saw_dot ? "pair" : "list"); + missing_closer_for("list"); ++cur; - if (saw_dot) - return alloc.make_pair(elts.front(), elts.back()); return alloc.make_list(elts); } @@ -628,6 +840,9 @@ namespace OpenAxiom { const Syntax* Parser::parse_syntax(const Token*& cur, const Token* last) { + if (not skip_ignorable_tokens(cur, last)) + return 0; + switch (cur->type) { case Token::integer: return alloc.make_integer(*cur++); @@ -660,7 +875,25 @@ namespace OpenAxiom { return parse_quote(++cur, last); case Token::open_paren: - return parse_list_or_pair(++cur, last); + return parse_list(++cur, last); + + case Token::sharp_plus: + return parse_include(++cur, last); + + case Token::sharp_minus: + return parse_exclude(++cur, last); + + case Token::sharp_dot: + return parse_eval(++cur, last); + + case Token::backquote: + return parse_antiquote(++cur, last); + + case Token::comma: + return parse_expand(++cur, last); + + case Token::comma_at: + return parse_splice(++cur, last); default: parse_error(std::string("parse error before ") @@ -671,9 +904,23 @@ namespace OpenAxiom { const Token* Parser::parse(const Token* cur, const Token* last) { - while (cur < last) - syns.push_back(parse_syntax(cur, last)); + while (cur < last) + if (const Syntax* s = parse_syntax(cur, last)) + syns.push_back(s); return cur; } + + Module::Module(const std::string& s) : nm(s) { + std::vector<Token> tokens; + Memory::FileMapping input(s); + Lexer lexer(raw_strs, tokens); + const char* rest = lexer.tokenize(input.begin(), input.end()); + if (rest != input.end()) + syntax_error("syntax error"); + Parser parser(allocator, *this); + const Token* tok = parser.parse(begin_ptr(tokens), end_ptr(tokens)); + if (tok != end_ptr(tokens)) + parse_error("parse error"); + } } } |