From 7f57a915cee3c91cddd166fe9964655696666c4b Mon Sep 17 00:00:00 2001 From: dos-reis Date: Wed, 26 Jun 2013 11:43:56 +0000 Subject: Rewrite s-expression reader. --- src/ChangeLog | 13 + src/Makefile.am | 3 +- src/Makefile.in | 3 +- src/gui/gui.pro.in | 3 +- src/gui/main-window.cc | 24 ++ src/gui/main-window.h | 5 +- src/include/diagnostics.H | 8 + src/include/sexpr.H | 295 +++++-------- src/include/structure.H | 2 - src/syntax/sexpr.cc | 1014 +++++++++++++++++---------------------------- src/utils/hammer.cc | 2 +- 11 files changed, 545 insertions(+), 827 deletions(-) (limited to 'src') diff --git a/src/ChangeLog b/src/ChangeLog index e72747a2..47561b81 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,16 @@ +2013-06-26 Gabriel Dos Reis + + * include/sexpr.H (Lexer): Remove. + (Parser): Likewise. + (Reader): New. + * syntax/sexpr.cc: Propagate changes. + * gui/main-window.h (MainWindow::read_databases): Declare. + (MainWindow::display_error): Likewise. + * gui/main-window.cc: Implement. + * gui/gui.pro.in (LIBS): Include syntax library. + * Makefile.am (oa_src_include_headers): Add sexpr.H. Fix build + failure from previous commit. + 2013-06-24 Gabriel Dos Reis * include/sexpr.H: Move from utils. diff --git a/src/Makefile.am b/src/Makefile.am index 9f1f04c5..d4084173 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -57,7 +57,8 @@ oa_src_include_headers = \ Input.H \ diagnostics.H \ dialect.H \ - token.H + token.H \ + sexpr.H if OA_BUILD_SMAN OA_SMAN_TARGETS = all-sman all-clef diff --git a/src/Makefile.in b/src/Makefile.in index 9e37cafb..b8e9d18b 100644 --- a/src/Makefile.in +++ b/src/Makefile.in @@ -402,7 +402,8 @@ oa_src_include_headers = \ Input.H \ diagnostics.H \ dialect.H \ - token.H + token.H \ + sexpr.H @OA_BUILD_SMAN_TRUE@OA_SMAN_TARGETS = all-sman all-clef @OA_BUILD_GRAPHICS_TRUE@OA_GRAPHICS_GRAPH_TARGET = all-graph diff --git a/src/gui/gui.pro.in b/src/gui/gui.pro.in index 6969b454..d118c08c 100644 --- a/src/gui/gui.pro.in +++ b/src/gui/gui.pro.in @@ -8,6 +8,7 @@ oa_targetdir = @top_builddir@/@target@ OA_INC = $${oa_targetdir}/include OA_LIB = -L@top_builddir@/@target@/lib -lOpenAxiom OA_IOLIB = -L@top_builddir@/src/io -lio +OA_SYNTAX_LIB = -L@top_builddir@/src/syntax -lsyntax ## We build in release mode. CONFIG += release @@ -38,7 +39,7 @@ DEPENDPATH += @srcdir@ SOURCES += server.cc conversation.cc main-window.cc debate.cc main.cc ## Additional support libraries -LIBS += $$OA_LIB $$OA_IOLIB +LIBS += $$OA_SYNTAX_LIB $$OA_LIB $$OA_IOLIB ## C++ compiler #QMAKE_CXX = @CXX@ diff --git a/src/gui/main-window.cc b/src/gui/main-window.cc index 6d3eb205..1ef6ec56 100644 --- a/src/gui/main-window.cc +++ b/src/gui/main-window.cc @@ -36,10 +36,33 @@ #include #include +#include +#include +#include #include "debate.h" #include "main-window.h" namespace OpenAxiom { + void + MainWindow::display_error(const std::string& s) { + QMessageBox::critical(this, tr("System error"), QString(s.c_str())); + } + + void + MainWindow::read_databases() { + try { + const auto& fs = server()->system_root(); + Memory::FileMapping db { fs.dbdir() + "/interp.daase" }; + Sexpr::Reader rd { db.begin(), db.end() }; + while (rd.read()) + ; + } + catch(const Diagnostics::BasicError& e) { + display_error(e.message()); + } + } + + static void connect_server_io(MainWindow* win, Debate* debate) { QObject::connect(win->server(), SIGNAL(readyReadStandardError()), win, SLOT(display_error())); @@ -70,6 +93,7 @@ namespace OpenAxiom { // wait to be pinged before displaying a prompt. This is // an unfortunate result of a rather awkward hack. server()->input(""); + read_databases(); } MainWindow::~MainWindow() { diff --git a/src/gui/main-window.h b/src/gui/main-window.h index 5e08067e..499e5f66 100644 --- a/src/gui/main-window.h +++ b/src/gui/main-window.h @@ -1,4 +1,4 @@ -// Copyright (C) 2011, Gabriel Dos Reis. +// Copyright (C) 2011-2013, Gabriel Dos Reis. // All rights reserved. // Written by Gabriel Dos Reis. // @@ -48,6 +48,7 @@ namespace OpenAxiom { ~MainWindow(); Server* server() { return &srv; } + void display_error(const std::string&); private slots: void done(int, QProcess::ExitStatus); @@ -56,6 +57,8 @@ namespace OpenAxiom { private: Server srv; QTabWidget tabs; + + void read_databases(); }; } diff --git a/src/include/diagnostics.H b/src/include/diagnostics.H index 8f877e2f..9cb0fce1 100644 --- a/src/include/diagnostics.H +++ b/src/include/diagnostics.H @@ -33,6 +33,7 @@ #ifndef OPENAXIOM_DIAGNOSTICS_included #define OPENAXIOM_DIAGNOSTICS_included +#include #include namespace OpenAxiom { @@ -49,6 +50,13 @@ namespace OpenAxiom { std::ostream* out; std::ostream* err; }; + + struct BasicError { + explicit BasicError(const std::string& s) : msg(s) { } + const std::string& message() const { return msg; } + protected: + std::string msg; + }; } } diff --git a/src/include/sexpr.H b/src/include/sexpr.H index a9371139..73e21f31 100644 --- a/src/include/sexpr.H +++ b/src/include/sexpr.H @@ -45,22 +45,14 @@ #include #include #include -#include #include namespace OpenAxiom { namespace Sexpr { - struct BasicError { - explicit BasicError(const std::string& s) : msg(s) { } - const std::string& message() const { return msg; } - protected: - std::string msg; - }; - - // ----------- - // -- Token -- - // ----------- - struct Token { + // ------------ + // -- Lexeme -- + // ------------ + struct Lexeme { enum Type { unknown, // unidentified token semicolon = token::value(";"), // comment @@ -87,35 +79,8 @@ namespace OpenAxiom { sharp_integer_sharp // back reference, #n# }; - Type type; // class of this token - BasicString lexeme; // characters making up this token - }; - - // Print a token object on an output stream. - // Note: this function is for debugging purpose; in particular - // it does not `prettyprint' tokens. - std::ostream& operator<<(std::ostream&, const Token&); - - // ----------- - // -- Lexer -- - // ----------- - // An object of this type transforms a sequence of characters - // into a sequence of tokens as defined above. - // A lexer does not manage memory itself. Rather, it delegates - // storage allocation for lexemes and tokens to specialized - // agents used to construct it. - struct Lexer { - Lexer(StringPool& pool, std::vector& toks) - : strings(pool), tokens(toks) { } - - const Byte* tokenize(const Byte*, const Byte*); - BasicString intern(const Byte* s, size_t n) { - return strings.intern(s, n); - } - - private: - StringPool& strings; // where to allocate lexemes from - std::vector& tokens; // where to deposite tokens. + std::pair boundary; + Ordinal line; }; // ------------ @@ -127,59 +92,59 @@ namespace OpenAxiom { virtual void accept(Visitor&) const = 0; }; - // ---------- - // -- Atom -- - // ---------- + // ---------------- + // -- AtomSyntax -- + // ---------------- // An atom is a syntax object consisting of exatly one token. // This should not be confused with the notion of atom // in Lisp languages. - struct Atom : Syntax { - const Token& token() const { return tok; } - BasicString lexeme() const { return tok.lexeme; } + struct AtomSyntax : Syntax { + const Lexeme& lexeme() const { return lex; } void accept(Visitor&) const; protected: - const Token tok; - Atom(const Token&); + Lexeme lex; + explicit AtomSyntax(const Lexeme&); }; - // ------------- - // -- Integer -- - // ------------- + // ------------------- + // -- IntegerSyntax -- + // ------------------- // Integer literal syntax objects - struct Integer : Atom { - explicit Integer(const Token&); + struct IntegerSyntax : AtomSyntax { + explicit IntegerSyntax(const Lexeme&); void accept(Visitor&) const; }; - // --------------- - // -- Character -- - // --------------- + // --------------------- + // -- CharacterSyntax -- + // --------------------- // Character literal syntax objects. - struct Character : Atom { - explicit Character(const Token&); + struct CharacterSyntax : AtomSyntax { + explicit CharacterSyntax(const Lexeme&); void accept(Visitor&) const; }; - // ------------ - // -- String -- - // ------------ + // ------------------ + // -- StringSyntax -- + // ------------------ // Striing literal syntax objjects. - struct String : Atom { - explicit String(const Token&); + struct StringSyntax : AtomSyntax { + explicit StringSyntax(const Lexeme&); void accept(Visitor&) const; }; - // ------------ - // -- Symbol -- - // ------------ - struct Symbol : Atom { + // ------------------ + // -- SymbolSyntax -- + // ------------------ + struct SymbolSyntax : AtomSyntax { enum Kind { uninterned, // uninterned symbol ordinary, // an interned symbol + absolute, // case-sensitive symbol keyword // a keyword symbol }; - Symbol(const Token&, Kind); - Kind kin() const { return sort; } + SymbolSyntax(const Lexeme&, Kind); + Kind kind() const { return sort; } void accept(Visitor&) const; private: const Kind sort; @@ -189,20 +154,20 @@ namespace OpenAxiom { // -- Reference -- // --------------- // Back reference object to a syntax object. - struct Reference : Atom { - Reference(const Token&, size_t); + struct Reference : AtomSyntax { + Reference(const Lexeme&, Ordinal); size_t tag() const { return pos; } void accept(Visitor&) const; private: - const size_t pos; + Ordinal pos; }; - // ------------ - // -- Anchor -- - // ------------ + // ------------------ + // -- AnchorSyntax -- + // ------------------ // Base anchor syntax object. - struct Anchor : Syntax { - Anchor(size_t, const Syntax*); + struct AnchorSyntax : Syntax { + AnchorSyntax(size_t, const Syntax*); size_t ref() const { return tag; } const Syntax* value() const { return val; } void accept(Visitor&) const; @@ -222,20 +187,20 @@ namespace OpenAxiom { const Syntax* const form; }; - // ----------- - // -- Quote -- - // ----------- + // ----------------- + // -- QuoteSyntax -- + // ----------------- // Quotation syntax object. - struct Quote : unary_form { - explicit Quote(const Syntax*); + struct QuoteSyntax : unary_form { + explicit QuoteSyntax(const Syntax*); }; - // --------------- - // -- Antiquote -- - // --------------- + // --------------------- + // -- AntiquoteSyntax -- + // --------------------- // Quasi-quotation syntax object. - struct Antiquote : unary_form { - explicit Antiquote(const Syntax*); + struct AntiquoteSyntax : unary_form { + explicit AntiquoteSyntax(const Syntax*); }; // ------------ @@ -270,15 +235,6 @@ namespace OpenAxiom { explicit Function(const Syntax*); }; - // ------------- - // -- DotTail -- - // ------------- - // Objects of this type represents the tail of syntactic - // objects denoting dotted pair syntax `(a . b)'. - struct DotTail : unary_form { - explicit DotTail(const Syntax*); - }; - // ------------- // -- Include -- // ------------- @@ -296,10 +252,10 @@ namespace OpenAxiom { }; // ---------- - // -- List -- + // -- ListSyntax -- // ---------- // List syntax objects. - struct List : Syntax, private std::vector { + struct ListSyntax : Syntax, private std::vector { typedef std::vector base; using base::const_iterator; using base::begin; @@ -307,17 +263,20 @@ namespace OpenAxiom { using base::size; using base::empty; - List(); - explicit List(const base&); - ~List(); + ListSyntax(); + ListSyntax(const base&, bool); + ~ListSyntax(); void accept(Visitor&) const; + bool dotted() const { return dot; } + private: + bool dot; }; // ------------ - // -- Vector -- + // -- VectorSyntax -- // ------------ - // Vector syntax objects. - struct Vector : Syntax, private std::vector { + // VectorSyntax syntax objects. + struct VectorSyntax : Syntax, private std::vector { typedef std::vector base; using base::const_iterator; using base::begin; @@ -326,9 +285,9 @@ namespace OpenAxiom { using base::operator[]; using base::empty; - Vector(); - explicit Vector(const base&); - ~Vector(); + VectorSyntax(); + explicit VectorSyntax(const base&); + ~VectorSyntax(); void accept(Visitor&) const; }; @@ -336,24 +295,23 @@ namespace OpenAxiom { // -- Syntax::Visitor -- // --------------------- struct Syntax::Visitor { - virtual void visit(const Atom&) = 0; - virtual void visit(const Integer&); - virtual void visit(const Character&); - virtual void visit(const String&); - virtual void visit(const Symbol&); + virtual void visit(const AtomSyntax&) = 0; + virtual void visit(const IntegerSyntax&); + virtual void visit(const CharacterSyntax&); + virtual void visit(const StringSyntax&); + virtual void visit(const SymbolSyntax&); virtual void visit(const Reference&); - virtual void visit(const Anchor&) = 0; - virtual void visit(const Quote&) = 0; - virtual void visit(const Antiquote&) = 0; + virtual void visit(const AnchorSyntax&) = 0; + virtual void visit(const QuoteSyntax&) = 0; + virtual void visit(const AntiquoteSyntax&) = 0; virtual void visit(const Expand&) = 0; virtual void visit(const Eval&) = 0; virtual void visit(const Splice&) = 0; virtual void visit(const Function&) = 0; virtual void visit(const Include&) = 0; virtual void visit(const Exclude&) = 0; - virtual void visit(const DotTail&) = 0; - virtual void visit(const List&) = 0; - virtual void visit(const Vector&) = 0; + virtual void visit(const ListSyntax&) = 0; + virtual void visit(const VectorSyntax&) = 0; }; template @@ -370,90 +328,59 @@ namespace OpenAxiom { Allocator(); ~Allocator(); - const Integer* make_integer(const Token&); - const Character* make_character(const Token&); - const String* make_string(const Token&); - const Symbol* make_symbol(const Token&, Symbol::Kind); - const Reference* make_reference(const Token&, size_t); - const Anchor* make_anchor(size_t, const Syntax*); - const Quote* make_quote(const Syntax*); - const Antiquote* make_antiquote(const Syntax*); + const IntegerSyntax* make_integer(const Lexeme&); + const CharacterSyntax* make_character(const Lexeme&); + const StringSyntax* make_string(const Lexeme&); + const SymbolSyntax* make_symbol(SymbolSyntax::Kind, const Lexeme&); + const Reference* make_reference(size_t, const Lexeme&); + const AnchorSyntax* make_anchor(size_t, const Syntax*); + const QuoteSyntax* make_quote(const Syntax*); + const AntiquoteSyntax* make_antiquote(const Syntax*); const Expand* make_expand(const Syntax*); const Eval* make_eval(const Syntax*); const Splice* make_splice(const Syntax*); const Function* make_function(const Syntax*); const Include* make_include(const Syntax*); const Exclude* make_exclude(const Syntax*); - const DotTail* make_dot_tail(const Syntax*); - const List* make_list(const std::vector&); - const Vector* make_vector(const std::vector&); + const ListSyntax* make_list(const std::vector&, bool = false); + const VectorSyntax* make_vector(const std::vector&); private: - Memory::Factory ints; - Memory::Factory chars; - Memory::Factory strs; - Memory::Factory syms; - Memory::Factory ancs; + Memory::Factory ints; + Memory::Factory chars; + Memory::Factory strs; + Memory::Factory syms; + Memory::Factory ancs; Memory::Factory refs; - Memory::Factory quotes; - Memory::Factory antis; + Memory::Factory quotes; + Memory::Factory antis; Memory::Factory exps; Memory::Factory funs; Memory::Factory incs; Memory::Factory excs; Memory::Factory evls; Memory::Factory spls; - Memory::Factory tails; - Memory::Factory lists; - Memory::Factory vectors; - List empty_list; - Vector empty_vector; + Memory::Factory lists; + Memory::Factory vectors; + ListSyntax empty_list; + VectorSyntax empty_vector; }; - // ------------ - // -- Parser -- - // ------------ - // An object of this type transforms a sequence of tokens - // into a sequence of syntax objects. - // A parser object does not manage memory itself. Rather, it delegates - // storage allocation for syntax objects to specialized - // agents used to construct it. - struct Parser { - Parser(Allocator&, std::vector&); - const Token* parse(const Token*, const Token*); - private: - Allocator& alloc; - std::vector& syns; - - const Symbol* parse_symbol(const Token*&, const Token*); - const Character* parse_character(const Token*&, const Token*); - const Anchor* parse_anchor(const Token*&, const Token*); - const Reference* parse_reference(const Token*&, const Token*); - const Symbol* parse_uninterned(const Token*&, const Token*); - const Function* parse_function(const Token*&, const Token*); - const Quote* parse_quote(const Token*&, const Token*); - const Antiquote* parse_antiquote(const Token*&, const Token*); - const Include* parse_include(const Token*&, const Token*); - const Exclude* parse_exclude(const Token*&, const Token*); - const Expand* parse_expand(const Token*&, const Token*); - const Eval* parse_eval(const Token*&, const Token*); - const Splice* parse_splice(const Token*&, const Token*); - const Vector* parse_vector(const Token*&, const Token*); - const List* parse_list(const Token*&, const Token*); - const Syntax* parse_syntax(const Token*&, const Token*); - }; + // -- Reader -- + struct Reader { + struct State { + const Byte* start; + const Byte* end; + const Byte* cur; + const Byte* line; + Ordinal lineno; + Allocator alloc; + }; - // ------------ - // -- Module -- - // ------------ - // Entire s-expression input file. - struct Module : std::vector { - explicit Module(const std::string&); - const std::string& name() const { return nm; } + Reader(const Byte*, const Byte*); + const Syntax* read(); private: - const std::string nm; - StringPool raw_strs; - Allocator allocator; + State st; }; } } diff --git a/src/include/structure.H b/src/include/structure.H index d9434423..33c084f2 100644 --- a/src/include/structure.H +++ b/src/include/structure.H @@ -33,8 +33,6 @@ #ifndef OPENAXIOM_STRUCTURE_included #define OPENAXIOM_STRUCTURE_included -#include - namespace OpenAxiom { // -- helper classes for structural abstractions -- namespace structure { diff --git a/src/syntax/sexpr.cc b/src/syntax/sexpr.cc index 14113164..0a3b8071 100644 --- a/src/syntax/sexpr.cc +++ b/src/syntax/sexpr.cc @@ -38,73 +38,24 @@ #include #include #include +#include namespace OpenAxiom { namespace Sexpr { - template - static inline int - length(const T(&)[N]) { - return N; - } - - template - static inline typename Sequence::const_pointer - begin_ptr(const Sequence& s) { - return &*s.begin(); - } - - template - static inline typename Sequence::const_pointer - end_ptr(const Sequence& s) { - return s.empty() ? 0 : &*s.begin() + s.size(); - } - - std::ostream& - operator<<(std::ostream& os, const Token& t) { - switch (t.type) { - case Token::semicolon: os << "SEMICOLON"; break; - case Token::dot: os << "DOT"; break; - case Token::comma: os << "COMMA"; break; - case Token::open_paren: os << "OPEN_PAREN"; break; - case Token::close_paren: os << "CLOSE_PAREN"; break; - case Token::apostrophe: os << "APOSTROPHE"; break; - case Token::backquote: os << "BACKQUOTE"; break; - case Token::backslash: os << "BACKSLASH"; break; - case Token::sharp_open_paren: os << "SHARP_OPEN_PAREN"; break; - case Token::sharp_apostrophe: os << "SHARP_APOSTROPHE"; break; - case Token::sharp_colon: os << "SHARP_COLON"; break; - case Token::sharp_plus: os << "SHARP_PLUS"; break; - case Token::sharp_minus: os << "SHARP_MINUS"; break; - case Token::sharp_dot: os << "SHARP_DOT"; break; - case Token::comma_at: os << "COMMA_AT"; break; - case Token::integer: os << "INTEGER"; break; - case Token::character: os << "CHARACTER"; break; - case Token::string: os << "STRING"; break; - case Token::identifier: os << "IDENTIFIER"; break; - case Token::sharp_integer_sharp: - os << "SHARP_INTEGER_SHARP"; break; - case Token::sharp_integer_equal: - os << "SHARP_INTEGER_EQUAL"; break; - default: os << "UNKNOWN"; break; - } - os << '('; - if (t.lexeme != 0) { - os << '"'; - std::copy(t.lexeme->begin(), t.lexeme->end(), - std::ostream_iterator(os)); - os << '"'; - } - else - os << ""; - return os << ')'; + static void + invalid_character(Reader::State& s) { + auto line = std::to_string(s.lineno); + auto column = std::to_string(s.cur - s.line); + auto msg = "invalid character on line " + line + + " and column " + column; + if (isprint(*s.cur)) + throw Diagnostics::BasicError(msg + ": " + std::string(1, *s.cur)); + throw Diagnostics::BasicError(msg + " with code " + std::to_string(*s.cur)); } - - // ----------- - // -- Lexer -- - // ----------- + static void syntax_error(const std::string& s) { - throw BasicError(s); + throw Diagnostics::BasicError(s); } // Return true if character `c' introduces a blank. @@ -122,67 +73,67 @@ namespace OpenAxiom { or c == '`' or c == '#'; } - // Move `cur' past all consecutive blank characters, and - // return the new position. - static const Byte* - skip_blank(const Byte*& cur, const Byte* end) { - while (cur < end and is_blank(*cur)) - ++cur; - return cur; + // Move the cursor past all consecutive blank characters, and + // return true if there are more input characters to consider. + static bool + skip_blank(Reader::State& s) { + for (bool done = false; s.cur < s.end and not done; ) + switch (*s.cur) { + case '\n': + ++s.lineno; + s.line = ++s.cur; + break; + case ' ': case '\t': case '\v': case '\r': case '\f': + ++s.cur; + break; + default: done = true; break; + } + return s.cur < s.end; } // Move `cur' to end-of-line marker. - static const Byte* - skip_to_eol(const Byte*& cur, const Byte* end) { + static void + skip_to_eol(Reader::State& s) { // FIXME: properly handle CR+LF. - while (cur < end and *cur != '\n') - ++cur; - return cur; - } - - // Move `cur' until a word boundary is reached. - static const Byte* - skip_to_word_boundary(const Byte*& cur, const Byte* end) { - bool saw_escape = false; - for (; cur < end; ++cur) { - if (saw_escape) - saw_escape = false; - else if (*cur == '\\') - saw_escape = true; - else if (is_delimiter(*cur)) - break; - } - return cur; + while (s.cur < s.end and *s.cur != '\n') + ++s.cur; } // Move `cur' one-past a non-esacaped character `c'. // Return true if the character was seen. static bool - skip_to_nonescaped_char(const Byte*& cur, const Byte* end, char c) { - bool saw_escape = false; - for (; cur < end; ++cur) + skip_to_nonescaped_char(Reader::State& s, char c) { + for (bool saw_escape = false; s.cur < s.end; ++s.cur) if (saw_escape) saw_escape = false; - else if (*cur == '\\') + else if (*s.cur == '\\') saw_escape = true; - else if (*cur == c) { - ++cur; + else if (*s.cur == c) { + ++s.cur; return true; } return false; } - // Move `cur' past the closing quote of string literal. - // Return true if the closing fence was effectively seen. + // Move the cursor past the closing quote of string literal. + // Return true if the closing quote was effectively seen. static inline bool - skip_to_quote(const Byte*& cur, const Byte* end) { - return skip_to_nonescaped_char(cur, end, '"'); + skip_to_quote(Reader::State& s) { + return skip_to_nonescaped_char(s, '"'); + } + + template + static bool + advance_while(Reader::State& s, Pred p) { + while (s.cur < s.end and p(*s.cur)) + ++s.cur; + return s.cur < s.end; } // Return true if the character `c' be part of a non-absolute // identifier. static bool - identifier_part(char c) { + identifier_part(Byte c) { switch (c) { case '+': case '-': case '*': case '/': case '%': case '^': case '~': case '@': case '$': case '&': case '=': @@ -194,296 +145,117 @@ namespace OpenAxiom { } } - // Return true if the character `c' has a special meaning after - // the sharp character. - static bool - special_after_sharp(char c) { - return c == '(' or c == '\'' or c == ':' - or c == '+' or c == '-' or c == '.'; - } - - // Return true if the sequence `[cur, end)' has a prefix that is - // an integer followrd by the equal sign or the sharp sign. - // `cur' is moved along the way. - static bool - only_digits_before_equal_or_shap(const Byte*& cur, const Byte* end) { - while (cur < end and isdigit(*cur)) - ++cur; - return cur < end and (*cur == '#' or *cur == '='); - } - - // The token `t' was thought to designate an identifier. - // Reclassify it as an integer if, in fact, its lexeme consists - // entirely of digits. - static void - maybe_reclassify(Token& t) { - const Byte* cur = t.lexeme->begin(); - const Byte* end = t.lexeme->end(); - while (cur < end and isdigit(*cur)) - ++cur; - if (cur == end) - t.type = Token::integer; - } - - // Returns true if the first characters in the range - // [cur, last) start an identifier. - static bool - start_symbol(const Byte* cur, const Byte* last) { - if (cur >= last) - return false; - return identifier_part(*cur) - or *cur == '|' or *cur == ':'; - } - - // We are processing a symbol token. Accumulate all - // legitimate characters till the end of the token. - static void - skip_to_end_of_symbol(const Byte*& cur, const Byte* end) { - const char c = *cur; - if (*cur == '|') - skip_to_nonescaped_char(++cur, end, c); - else - skip_to_word_boundary(cur, end); - if (cur < end and *cur == ':') - skip_to_end_of_symbol(cur, end); - } - - static Token - match_maybe_symbol(Lexer* lexer, const Byte*& cur, const Byte* end) { - Token t = { Token::identifier, 0 }; - const Byte* start = cur; - skip_to_end_of_symbol(cur, end); - t.lexeme = lexer->intern(start, cur - start); - maybe_reclassify(t); - return t; - } - - const Byte* - Lexer::tokenize(const Byte* cur, const Byte* end) { - while (skip_blank(cur, end) < end) { - Token t = { Token::unknown, 0 }; - switch (*cur) { - case ';': { - const Byte* start = cur; - t.type = Token::semicolon; - skip_to_eol(cur, end); - t.lexeme = intern(start, cur - start); - break; - } - - case '.': case '(': case ')': case '\'': case '`': - t.type = Token::Type(token::value(*cur)); - t.lexeme = intern(cur, 1); - ++cur; - break; - - case ',': { - const Byte* start = cur; - if (++cur < end and *cur == '@') { - t.type = Token::comma_at; - ++cur; - } - else - t.type = Token::comma; - t.lexeme = intern(start, cur - start); - break; - } - - case '\\': - t = match_maybe_symbol(this, cur, end); - break; - - case '#': { - const Byte* start = cur; - if (cur + 1 < end and special_after_sharp(cur[1])) { - t.type = Token::Type(token::value(cur[0], cur[1])); - t.lexeme = intern(cur, 2); - cur += 2; - } - else if (cur + 1 < end and cur[1] == '\\') { - start = cur += 2; - if (not isalnum(*cur)) - ++cur; - else - skip_to_word_boundary(cur, end); - t.type = Token::character; - t.lexeme = intern(start, cur - start); - } - else if (only_digits_before_equal_or_shap(++cur, end)) { - t.type = *cur == '#' - ? Token::sharp_integer_sharp - : Token::sharp_integer_equal; - t.lexeme = intern(start, cur - start + 1); - ++cur; - } - else { - skip_to_word_boundary(cur, end); - t.lexeme = intern(start, cur - start); - } - break; - } - - case '"': { - const Byte* start = cur; - skip_to_quote(++cur, end); - t.type = Token::string; - t.lexeme = intern(start, cur - start); - break; - } - - default: - if (start_symbol(cur, end)) - t = match_maybe_symbol(this, cur, end); - else { - const Byte* start = cur; - skip_to_word_boundary(++cur, end); - t.lexeme = intern(start, cur - start); - } - break; - } - tokens.push_back(t); - } - return cur; - } - - // ---------- - // -- Atom -- - // ---------- - Atom::Atom(const Token& t) : tok(t) { } + // -- AtomSyntax -- + AtomSyntax::AtomSyntax(const Lexeme& t) : lex(t) { } void - Atom::accept(Visitor& v) const { + AtomSyntax::accept(Visitor& v) const { v.visit(*this); } - // ------------- - // -- Integer -- - // ------------- - Integer::Integer(const Token& t) : Atom(t) { } + // -- IntegerSyntax -- + IntegerSyntax::IntegerSyntax(const Lexeme& t) : AtomSyntax(t) { } void - Integer::accept(Visitor& v) const { + IntegerSyntax::accept(Visitor& v) const { v.visit(*this); } - // --------------- - // -- Character -- - // --------------- - Character::Character(const Token& t) : Atom(t) { } + // -- CharacterSyntax -- + CharacterSyntax::CharacterSyntax(const Lexeme& t) : AtomSyntax(t) { } void - Character::accept(Visitor& v) const { + CharacterSyntax::accept(Visitor& v) const { v.visit(*this); } - // ------------ - // -- String -- - // ------------ - String::String(const Token& t) : Atom(t) { } + // -- StringSyntax -- + StringSyntax::StringSyntax(const Lexeme& t) : AtomSyntax(t) { } void - String::accept(Visitor& v) const { + StringSyntax::accept(Visitor& v) const { v.visit(*this); } - // ------------ - // -- Symbol -- - // ------------ - Symbol::Symbol(const Token& t, Kind k) : Atom(t), sort(k) { } + // -- SymbolSyntax -- + SymbolSyntax::SymbolSyntax(const Lexeme& t, Kind k) + : AtomSyntax(t), sort(k) + { } void - Symbol::accept(Visitor& v) const { + SymbolSyntax::accept(Visitor& v) const { v.visit(*this); } - // ------------ - // -- Anchor -- - // ------------ - Anchor::Anchor(size_t t, const Syntax* s) : tag(t), val(s) { } + // -- AnchorSyntax -- + AnchorSyntax::AnchorSyntax(size_t t, const Syntax* s) : tag(t), val(s) { } void - Anchor::accept(Visitor& v) const { + AnchorSyntax::accept(Visitor& v) const { v.visit(*this); } - // --------------- // -- Reference -- - // --------------- - Reference::Reference(const Token& t, size_t v) : Atom(t), pos(v) { } + Reference::Reference(const Lexeme& t, Ordinal n) + : AtomSyntax(t), pos(n) + { } void Reference::accept(Visitor& v) const { v.visit(*this); } - // ----------- - // -- Quote -- - // ----------- - Quote::Quote(const Syntax* s) : unary_form(s) { } + // -- QuoteSyntax -- + QuoteSyntax::QuoteSyntax(const Syntax* s) + : unary_form(s) + { } - // --------------- - // -- Antiquote -- - // --------------- - Antiquote::Antiquote(const Syntax* s) : unary_form(s) { } + // -- AntiquoteSyntax -- + AntiquoteSyntax::AntiquoteSyntax(const Syntax* s) + : unary_form(s) + { } - // ------------ // -- Expand -- - // ------------ Expand::Expand(const Syntax* s) : unary_form(s) { } - // ---------- // -- Eval -- - // ---------- Eval::Eval(const Syntax* s) : unary_form(s) { } - // ------------ // -- Splice -- - // ------------ Splice::Splice(const Syntax* s) : unary_form(s) { } - // -------------- // -- Function -- - // -------------- Function::Function(const Syntax* s) : unary_form(s) { } - // ------------- // -- Include -- Include::Include(const Syntax* s) : unary_form(s) { } - // ------------- // -- Exclude -- Exclude::Exclude(const Syntax* s) : unary_form(s) { } - // ------------- - // -- DotTail -- - // ------------- - DotTail::DotTail(const Syntax* f) : unary_form(f) { } + // -- ListSyntax -- + ListSyntax::ListSyntax() : dot(false) { } - // ---------- - // -- List -- - // ---------- - List::List() { } + ListSyntax::ListSyntax(const base& elts, bool d) + : base(elts), dot(d) + { } - List::List(const base& elts) : base(elts) { } - - List::~List() { } + ListSyntax::~ListSyntax() { } void - List::accept(Visitor& v) const { + ListSyntax::accept(Visitor& v) const { v.visit(*this); } - // ------------ - // -- Vector -- - // ------------ - Vector::Vector() { } + // -- VectorSyntax -- + VectorSyntax::VectorSyntax() { } - Vector::Vector(const base& elts) : base(elts) { } + VectorSyntax::VectorSyntax(const base& elts) : base(elts) { } - Vector::~Vector() { } + VectorSyntax::~VectorSyntax() { } void - Vector::accept(Visitor& v) const { + VectorSyntax::accept(Visitor& v) const { v.visit(*this); } @@ -499,28 +271,28 @@ namespace OpenAxiom { } void - Syntax::Visitor::visit(const Integer& i) { - visit(as(i)); + Syntax::Visitor::visit(const IntegerSyntax& i) { + visit(as(i)); } void - Syntax::Visitor::visit(const Character& c) { - visit(as(c)); + Syntax::Visitor::visit(const CharacterSyntax& c) { + visit(as(c)); } void - Syntax::Visitor::visit(const String& s) { - visit(as(s)); + Syntax::Visitor::visit(const StringSyntax& s) { + visit(as(s)); } void - Syntax::Visitor::visit(const Symbol& s) { - visit(as(s)); + Syntax::Visitor::visit(const SymbolSyntax& s) { + visit(as(s)); } void Syntax::Visitor::visit(const Reference& r) { - visit(as(r)); + visit(as(r)); } // --------------- @@ -533,42 +305,42 @@ namespace OpenAxiom { // used templates floating around. Allocator::~Allocator() { } - const Character* - Allocator::make_character(const Token& t) { + const CharacterSyntax* + Allocator::make_character(const Lexeme& t) { return chars.make(t); } - const Integer* - Allocator::make_integer(const Token& t) { + const IntegerSyntax* + Allocator::make_integer(const Lexeme& t) { return ints.make(t); } - const String* - Allocator::make_string(const Token& t) { + const StringSyntax* + Allocator::make_string(const Lexeme& t) { return strs.make(t); } - const Symbol* - Allocator::make_symbol(const Token& t, Symbol::Kind k) { + const SymbolSyntax* + Allocator::make_symbol(SymbolSyntax::Kind k, const Lexeme& t) { return syms.make(t, k); } - const Anchor* - Allocator::make_anchor(size_t t, const Syntax* s) { - return ancs.make(t, s); - } - const Reference* - Allocator::make_reference(const Token& t, size_t i) { + Allocator::make_reference(size_t i, const Lexeme& t) { return refs.make(t, i); } - const Quote* + const AnchorSyntax* + Allocator::make_anchor(size_t t, const Syntax* s) { + return ancs.make(t, s); + } + + const QuoteSyntax* Allocator::make_quote(const Syntax* s) { return quotes.make(s); } - const Antiquote* + const AntiquoteSyntax* Allocator::make_antiquote(const Syntax* s) { return antis.make(s); } @@ -603,53 +375,20 @@ namespace OpenAxiom { return excs.make(s); } - const DotTail* - Allocator::make_dot_tail(const Syntax* f) { - return tails.make(f); - } - - const List* - Allocator::make_list(const std::vector& elts) { + const ListSyntax* + Allocator::make_list(const std::vector& elts, bool dot) { if (elts.empty()) return &empty_list; - return lists.make(elts); + return lists.make(elts, dot); } - const Vector* + const VectorSyntax* Allocator::make_vector(const std::vector& elts) { if (elts.empty()) return &empty_vector; return vectors.make(elts); } - // ------------ - // -- Parser -- - // ------------ - - // Signal a parse error - static void - parse_error(const std::string& s) { - throw BasicError(s); - } - - // Signal that an expected syntax object was missing - static void - expected_syntax(const std::string& s) { - parse_error("expected " + s); - } - - // Signal an abrupt end of input - static void - unexpected_end_of_input(const std::string& s) { - parse_error("unexpected end of input after " + s); - } - - // Signal a missing closing parenthesis - static void - missing_closer_for(const std::string& s) { - parse_error("missing closing parenthesis for " + s); - } - // The sequence of characters in [cur, last) consists // entirely of digits. Return the corresponding natural value. static size_t @@ -661,274 +400,277 @@ namespace OpenAxiom { return n; } - // Parse a plain identifier or a Lisp-style keyword identifier. - const Symbol* - Parser::parse_symbol(const Token*& cur, const Token* last) { - Symbol::Kind kind = *cur->lexeme->begin() == ':' - ? Symbol::keyword - : Symbol::ordinary; - return alloc.make_symbol(*cur++, kind); - } - - // List of lower case character names - static const char* charname[] = { - "newline", "space", "page", "tab", - "backspace", "return", "linefeed" - }; - - static bool - equal_character_name(BasicString lhs, const char* rhs) { - if (lhs->size() != strlen(rhs)) - return false; - for (const Byte* cur = lhs->begin(); cur != lhs->end(); ++cur) - if (tolower(*cur) != *rhs++) - return false; - return true; - } - - static bool - valid_character_name(BasicString s) { - for (int i = 0; i < length(charname); ++i) - if (equal_character_name(s, charname[i])) - return true; - return false; - } - - const Character* - Parser::parse_character(const Token*& cur, const Token* last) { - if (cur->lexeme->size() != 1 - and not valid_character_name(cur->lexeme)) - parse_error("invalid literal character syntax"); - return alloc.make_character(*cur++); - } - - // Parse an anchor definition of the form #n= - const Anchor* - Parser::parse_anchor(const Token*& cur, const Token* last) { - const size_t n = natural_value(cur->lexeme->begin() + 1, - cur->lexeme->end() - 1); - if (++cur == last) - unexpected_end_of_input("sharp-integer-equal sign"); - return alloc.make_anchor(n, parse_syntax(cur, last)); - } - - // Parse a reference to an anchor, #n# - const Reference* - Parser::parse_reference(const Token*& cur, const Token* last) { - const size_t n = natural_value(cur->lexeme->begin() + 1, - cur->lexeme->end() - 1); - return alloc.make_reference(*cur++, n); - } - - // Parse an uninterned symbol #: - const Symbol* - Parser::parse_uninterned(const Token*& cur, const Token* last) { - if (cur == last or cur->type != Token::identifier) - expected_syntax("symbol after sharp-colon sign"); - // FIXME: check that the identifier is not a keyword. - return alloc.make_symbol(*cur++, Symbol::uninterned); - } - - // Parse a function syntax: #' - const Function* - Parser::parse_function(const Token*& cur, const Token* last) { - if (cur == last) - unexpected_end_of_input("sharp-quote sign"); - return alloc.make_function(parse_syntax(cur, last)); - } - - // Parse a quotation - const Quote* - Parser::parse_quote(const Token*& cur, const Token* last) { - if (cur == last) - unexpected_end_of_input("quote sign"); - return alloc.make_quote(parse_syntax(cur, last)); - } - - // Parse an antiquotation - const Antiquote* - Parser::parse_antiquote(const Token*& cur, const Token* last) { - if (cur == last) - unexpected_end_of_input("backquote sign"); - return alloc.make_antiquote(parse_syntax(cur, last)); - } - - // Parse an expansion request form - const Expand* - Parser::parse_expand(const Token*& cur, const Token* last) { - const Syntax* s = parse_syntax(cur, last); - if (s == 0) - unexpected_end_of_input("comma sign"); - return alloc.make_expand(s); - } - - // Parse conditional inclusions - const Include* - Parser::parse_include(const Token*& cur, const Token* last) { - const Syntax* s = parse_syntax(cur, last); - if (s == 0) - unexpected_end_of_input("sharp-plus sign"); - return alloc.make_include(s); - } - - const Exclude* - Parser::parse_exclude(const Token*& cur, const Token* last) { - const Syntax* s = parse_syntax(cur, last); - if (s == 0) - unexpected_end_of_input("sharp-minus sign"); - return alloc.make_exclude(s); - } - - const Eval* - Parser::parse_eval(const Token*& cur, const Token* last) { - const Syntax* s = parse_syntax(cur, last); - if (s == 0) - unexpected_end_of_input("sharp-dot sign"); - return alloc.make_eval(s); - } - - const Splice* - Parser::parse_splice(const Token*& cur, const Token* last) { - const Syntax* s = parse_syntax(cur, last); - if (s == 0) - unexpected_end_of_input("comma-at sign"); - return alloc.make_splice(s); - } - - // Skip tokens that are semantically blanks, e.g. comments. - // Return true if not at end of tokens. - static bool - skip_ignorable_tokens(const Token*& cur, const Token* last) { - while (cur < last and cur->type == Token::semicolon) - ++cur; - return cur != last; - } - - // Parse a vector of syntax objects: #(s .. s) - const Vector* - Parser::parse_vector(const Token*& cur, const Token* last) { - std::vector elts; - while (skip_ignorable_tokens(cur, last) - and cur->type != Token::close_paren) - elts.push_back(parse_syntax(cur, last)); - if (cur == last) - missing_closer_for("vector"); - ++cur; - return alloc.make_vector(elts); - } - - // Constructs a pair or a list syntax object. - const List* - Parser::parse_list(const Token*& cur, const Token* last) { - std::vector elts; - while (skip_ignorable_tokens(cur, last) - and cur->type != Token::close_paren) { - if (cur->type == Token::dot) { - skip_ignorable_tokens(++cur, last); - if (const Syntax* s = parse_syntax(cur, last)) { - elts.push_back(alloc.make_dot_tail(s)); - break; - } + // -- Reader -- + Reader::Reader(const Byte* f, const Byte* l) + : st{ f, l, f, f, 1, } + { } + + static const Syntax* read_sexpr(Reader::State&); + + // Parse a string literal + static const Syntax* + read_string(Reader::State& s) { + auto start = s.cur++; + if (not skip_to_quote(s)) + syntax_error("missing closing quote sign for string literal"); + Lexeme t = { { start, s.cur }, s.lineno }; + return s.alloc.make_string(t); + } + + // Parse an absolute identifier. + static const Syntax* + read_absolute_symbol(Reader::State& s) { + auto start = ++s.cur; + if (not skip_to_nonescaped_char(s, '|')) + syntax_error("missing closing bar sign for an absolute symbol"); + Lexeme t = { { start, s.cur - 1 }, s.lineno }; + return s.alloc.make_symbol(SymbolSyntax::absolute, t); + } + + // Read an atom starting with digits. + static const Syntax* + read_maybe_natural(Reader::State& s) { + auto start = s.cur; + advance_while (s, isdigit); + if (s.cur >= s.end or is_delimiter(*s.cur)) { + Lexeme t = { { start, s.cur }, s.lineno }; + return s.alloc.make_integer(t); + } + advance_while(s, identifier_part); + Lexeme t = { { start, s.cur }, s.lineno }; + return s.alloc.make_symbol(SymbolSyntax::ordinary, t); + } + + // Read an identifier. + static const Syntax* + read_identifier(Reader::State& s) { + auto start = s.cur; + advance_while(s, identifier_part); + Lexeme t = { { start, s.cur }, s.lineno }; + return s.alloc.make_symbol(SymbolSyntax::ordinary, t); + } + + // Read an atom starting with a '+' or '-' sign; this + // should be identifier, or a signed integer. + static const Syntax* + read_maybe_signed_number(Reader::State& s) { + auto start = s.cur++; + if (s.cur < s.end and isdigit(*s.cur)) { + advance_while(s, isdigit); + if (s.cur >= s.end or is_delimiter(*s.cur)) { + Lexeme t = { { start, s.cur }, s.lineno }; + return s.alloc.make_integer(t); } - elts.push_back(parse_syntax(cur, last)); } - if (cur == last or cur->type != Token::close_paren) - missing_closer_for("list"); - ++cur; - return alloc.make_list(elts); + advance_while(s, identifier_part); + Lexeme t = { { start, s.cur }, s.lineno }; + return s.alloc.make_symbol(SymbolSyntax::ordinary, t); } - Parser::Parser(Allocator& a, std::vector& v) - : alloc(a), syns(v) { } - - static std::string - to_string(BasicString s) { - return { s->begin(), s->end() }; + static const Syntax* + read_keyword(Reader::State& s) { + auto start = s.cur++; + advance_while(s, identifier_part); + Lexeme t = { { start, s.cur }, s.lineno }; + return s.alloc.make_symbol(SymbolSyntax::keyword, t); } - const Syntax* - Parser::parse_syntax(const Token*& cur, const Token* last) { - if (not skip_ignorable_tokens(cur, last)) - return 0; - - switch (cur->type) { - case Token::integer: - return alloc.make_integer(*cur++); - - case Token::character: - return parse_character(cur, last); - - case Token::string: - return alloc.make_string(*cur++); - - case Token::identifier: - return parse_symbol(cur, last); - - case Token::sharp_integer_equal: - return parse_anchor(cur, last); - - case Token::sharp_integer_sharp: - return parse_reference(cur, last); - - case Token::sharp_colon: - return parse_uninterned(++cur, last); + // Read an atom. + static const Syntax* + read_atom(Reader::State& s) { + switch (*s.cur) { + case '"': return read_string(s); + case ':': return read_keyword(s); + case '-': case '+': return read_maybe_signed_number(s); - case Token::sharp_apostrophe: - return parse_function(++cur, last); + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return read_maybe_natural(s); - case Token::sharp_open_paren: - return parse_vector(++cur, last); - - case Token::apostrophe: - return parse_quote(++cur, last); - - case Token::open_paren: - return parse_list(++cur, last); - - case Token::sharp_plus: - return parse_include(++cur, last); - - case Token::sharp_minus: - return parse_exclude(++cur, last); - - case Token::sharp_dot: - return parse_eval(++cur, last); - - case Token::backquote: - return parse_antiquote(++cur, last); - - case Token::comma: - return parse_expand(++cur, last); + default: + if (identifier_part(*s.cur)) + return read_identifier(s); + invalid_character(s); + ++s.cur; + return nullptr; + } + } - case Token::comma_at: - return parse_splice(++cur, last); + // Parse a quote expression. + static const Syntax* + read_quote(Reader::State& s) { + ++s.cur; // skip the quote character + auto x = read_sexpr(s); + if (x == nullptr) + syntax_error("end of input reached after quote sign"); + return s.alloc.make_quote(x); + } + + // Parse a backquote expression. + static const Syntax* + read_backquote(Reader::State& s) { + ++s.cur; // skip the backquote character + auto x = read_sexpr(s); + if (x == nullptr) + syntax_error("end of input reached after backquote sign"); + return s.alloc.make_antiquote(x); + } + + // We've just seen "#(" indicating the start of a literal + // vector. Read the elements and return the corresponding form. + static const Syntax* + finish_literal_vector(Reader::State& s) { + ++s.cur; // Skip the open paren. + std::vector elts { }; + while (skip_blank(s) and *s.cur != ')') { + if (auto x = read_sexpr(s)) + elts.push_back(x); + else + syntax_error("syntax error while reading vector elements"); + } + if (s.cur >= s.end) + syntax_error("unfinished literal vector"); + else + ++s.cur; + return s.alloc.make_vector(elts); + } + + // We've just seen the sharp sign followed by a digit. We assume + // we are about to read an anchor or a back reference. + static const Syntax* + finish_anchor_or_reference(Reader::State& s) { + auto start = s.cur; + advance_while(s, isdigit); + if (s.cur >= s.end) + syntax_error("end-of-input after sharp-number sign"); + const Byte c = *s.cur; + if (c != '#' and c != '=') + syntax_error("syntax error after sharp-number-equal sign"); + Lexeme t = { { start, s.cur }, s.lineno }; + auto n = natural_value(start, s.cur); + ++s.cur; + if (c == '#') + return s.alloc.make_reference(n, t); + auto x = read_sexpr(s); + if (x == nullptr) + syntax_error("syntax error after sharp-number-equal sign"); + return s.alloc.make_anchor(n, x); + } + + static const Syntax* + finish_function(Reader::State& s) { + ++s.cur; // skip quote sign. + auto x = read_sexpr(s); + if (x == nullptr) + syntax_error("missing function designator after sharp-quote sign"); + return s.alloc.make_function(x); + } + + static const Syntax* + finish_uninterned_symbol(Reader::State& s) { + ++s.cur; // skip colon sign. + auto start = s.cur; + advance_while(s, identifier_part); + Lexeme t = { { start, s.cur }, s.lineno }; + return s.alloc.make_symbol(SymbolSyntax::uninterned, t); + } + + static const Syntax* + finish_readtime_eval(Reader::State& s) { + ++s.cur; // skip dot sign. + auto x = read_sexpr(s); + if (x == nullptr) + syntax_error("parse error after sharp-dot sign"); + return s.alloc.make_eval(x); + } + + static const Syntax* + finish_character(Reader::State& s) { + ++s.cur; // skip backslash sign + auto start = s.cur; + advance_while(s, identifier_part); + Lexeme t = { { start, s.cur }, s.lineno }; + return s.alloc.make_character(t); + } + + static const Syntax* + read_sharp_et_al(Reader::State& s) { + if (++s.cur >= s.end) + syntax_error("end-of-input reached after sharp sign"); + switch (*s.cur) { + case '(': return finish_literal_vector(s); + case '\'': return finish_function(s); + case ':': return finish_uninterned_symbol(s); + case '.': return finish_readtime_eval(s); + case '\\': return finish_character(s); default: - parse_error(std::string("parse error before ") - + to_string(cur->lexeme)); - return 0; // never executed + if (isdigit(*s.cur)) + return finish_anchor_or_reference(s); + syntax_error("syntax error after sharp-sign"); } - } + return nullptr; + } + + // We have just seen a dot; read the tail and the closing parenthesis. + static const Syntax* + finish_dotted_list(Reader::State& s, std::vector& elts) { + ++s.cur; // Skip dot sign. + auto x = read_sexpr(s); + if (x == nullptr) + syntax_error("missing expression after dot sign"); + if (not skip_blank(s) or *s.cur != ')') + syntax_error("missing closing parenthesis"); + ++s.cur; + elts.push_back(x); + return s.alloc.make_list(elts, true); + } + + static const Syntax* + read_pair(Reader::State& s) { + ++s.cur; // skip opening parenthesis + std::vector elts { }; + while (skip_blank(s)) + switch (*s.cur) { + case '.': + if (elts.empty()) + syntax_error("missing expression before dot sign."); + return finish_dotted_list(s, elts); + + case ')': + ++s.cur; + return s.alloc.make_list(elts); - const Token* - Parser::parse(const Token* cur, const Token* last) { - while (cur < last) - if (const Syntax* s = parse_syntax(cur, last)) - syns.push_back(s); - return cur; + default: + if (auto x = read_sexpr(s)) + elts.push_back(x); + else + syntax_error("unfinished pair expression"); + break; + } + syntax_error("end-of-input while looking for closing parenthesis"); + return nullptr; + } + + static const Syntax* + read_sexpr(Reader::State& s) { + while (skip_blank(s)) + switch (*s.cur) { + case ';': skip_to_eol(s); break; + case '\'': return read_quote(s); + case '`': return read_backquote(s); + case '|': return read_absolute_symbol(s); + case '#': return read_sharp_et_al(s); + case '(': return read_pair(s); + default: return read_atom(s); + } + return nullptr; } - Module::Module(const std::string& s) : nm(s) { - std::vector tokens; - Memory::FileMapping input(s); - Lexer lexer(raw_strs, tokens); - const Byte* rest = lexer.tokenize(input.begin(), input.end()); - if (rest != input.end()) - syntax_error("syntax error"); - Parser parser(allocator, *this); - const Token* tok = parser.parse(begin_ptr(tokens), end_ptr(tokens)); - if (tok != end_ptr(tokens)) - parse_error("parse error"); + const Syntax* + Reader::read() { + return read_sexpr(st); } + } } diff --git a/src/utils/hammer.cc b/src/utils/hammer.cc index 1c7e050b..f4241aaf 100644 --- a/src/utils/hammer.cc +++ b/src/utils/hammer.cc @@ -69,7 +69,7 @@ namespace OpenAxiom { BasicText(const Byte* f, const Byte* l) : span(f, l) { } // Pointer to the start of this basic text element const Byte* begin() const { return span.first; } - // Oone-past-the-end of the this basic text element. + // One-past-the-end of the this basic text element. const Byte* end() const { return span.second; } private: std::pair span; -- cgit v1.2.3