From 7f57a915cee3c91cddd166fe9964655696666c4b Mon Sep 17 00:00:00 2001 From: dos-reis Date: Wed, 26 Jun 2013 11:43:56 +0000 Subject: Rewrite s-expression reader. --- src/include/diagnostics.H | 8 ++ src/include/sexpr.H | 295 +++++++++++++++++----------------------------- src/include/structure.H | 2 - 3 files changed, 119 insertions(+), 186 deletions(-) (limited to 'src/include') diff --git a/src/include/diagnostics.H b/src/include/diagnostics.H index 8f877e2f..9cb0fce1 100644 --- a/src/include/diagnostics.H +++ b/src/include/diagnostics.H @@ -33,6 +33,7 @@ #ifndef OPENAXIOM_DIAGNOSTICS_included #define OPENAXIOM_DIAGNOSTICS_included +#include #include namespace OpenAxiom { @@ -49,6 +50,13 @@ namespace OpenAxiom { std::ostream* out; std::ostream* err; }; + + struct BasicError { + explicit BasicError(const std::string& s) : msg(s) { } + const std::string& message() const { return msg; } + protected: + std::string msg; + }; } } diff --git a/src/include/sexpr.H b/src/include/sexpr.H index a9371139..73e21f31 100644 --- a/src/include/sexpr.H +++ b/src/include/sexpr.H @@ -45,22 +45,14 @@ #include #include #include -#include #include namespace OpenAxiom { namespace Sexpr { - struct BasicError { - explicit BasicError(const std::string& s) : msg(s) { } - const std::string& message() const { return msg; } - protected: - std::string msg; - }; - - // ----------- - // -- Token -- - // ----------- - struct Token { + // ------------ + // -- Lexeme -- + // ------------ + struct Lexeme { enum Type { unknown, // unidentified token semicolon = token::value(";"), // comment @@ -87,35 +79,8 @@ namespace OpenAxiom { sharp_integer_sharp // back reference, #n# }; - Type type; // class of this token - BasicString lexeme; // characters making up this token - }; - - // Print a token object on an output stream. - // Note: this function is for debugging purpose; in particular - // it does not `prettyprint' tokens. - std::ostream& operator<<(std::ostream&, const Token&); - - // ----------- - // -- Lexer -- - // ----------- - // An object of this type transforms a sequence of characters - // into a sequence of tokens as defined above. - // A lexer does not manage memory itself. Rather, it delegates - // storage allocation for lexemes and tokens to specialized - // agents used to construct it. - struct Lexer { - Lexer(StringPool& pool, std::vector& toks) - : strings(pool), tokens(toks) { } - - const Byte* tokenize(const Byte*, const Byte*); - BasicString intern(const Byte* s, size_t n) { - return strings.intern(s, n); - } - - private: - StringPool& strings; // where to allocate lexemes from - std::vector& tokens; // where to deposite tokens. + std::pair boundary; + Ordinal line; }; // ------------ @@ -127,59 +92,59 @@ namespace OpenAxiom { virtual void accept(Visitor&) const = 0; }; - // ---------- - // -- Atom -- - // ---------- + // ---------------- + // -- AtomSyntax -- + // ---------------- // An atom is a syntax object consisting of exatly one token. // This should not be confused with the notion of atom // in Lisp languages. - struct Atom : Syntax { - const Token& token() const { return tok; } - BasicString lexeme() const { return tok.lexeme; } + struct AtomSyntax : Syntax { + const Lexeme& lexeme() const { return lex; } void accept(Visitor&) const; protected: - const Token tok; - Atom(const Token&); + Lexeme lex; + explicit AtomSyntax(const Lexeme&); }; - // ------------- - // -- Integer -- - // ------------- + // ------------------- + // -- IntegerSyntax -- + // ------------------- // Integer literal syntax objects - struct Integer : Atom { - explicit Integer(const Token&); + struct IntegerSyntax : AtomSyntax { + explicit IntegerSyntax(const Lexeme&); void accept(Visitor&) const; }; - // --------------- - // -- Character -- - // --------------- + // --------------------- + // -- CharacterSyntax -- + // --------------------- // Character literal syntax objects. - struct Character : Atom { - explicit Character(const Token&); + struct CharacterSyntax : AtomSyntax { + explicit CharacterSyntax(const Lexeme&); void accept(Visitor&) const; }; - // ------------ - // -- String -- - // ------------ + // ------------------ + // -- StringSyntax -- + // ------------------ // Striing literal syntax objjects. - struct String : Atom { - explicit String(const Token&); + struct StringSyntax : AtomSyntax { + explicit StringSyntax(const Lexeme&); void accept(Visitor&) const; }; - // ------------ - // -- Symbol -- - // ------------ - struct Symbol : Atom { + // ------------------ + // -- SymbolSyntax -- + // ------------------ + struct SymbolSyntax : AtomSyntax { enum Kind { uninterned, // uninterned symbol ordinary, // an interned symbol + absolute, // case-sensitive symbol keyword // a keyword symbol }; - Symbol(const Token&, Kind); - Kind kin() const { return sort; } + SymbolSyntax(const Lexeme&, Kind); + Kind kind() const { return sort; } void accept(Visitor&) const; private: const Kind sort; @@ -189,20 +154,20 @@ namespace OpenAxiom { // -- Reference -- // --------------- // Back reference object to a syntax object. - struct Reference : Atom { - Reference(const Token&, size_t); + struct Reference : AtomSyntax { + Reference(const Lexeme&, Ordinal); size_t tag() const { return pos; } void accept(Visitor&) const; private: - const size_t pos; + Ordinal pos; }; - // ------------ - // -- Anchor -- - // ------------ + // ------------------ + // -- AnchorSyntax -- + // ------------------ // Base anchor syntax object. - struct Anchor : Syntax { - Anchor(size_t, const Syntax*); + struct AnchorSyntax : Syntax { + AnchorSyntax(size_t, const Syntax*); size_t ref() const { return tag; } const Syntax* value() const { return val; } void accept(Visitor&) const; @@ -222,20 +187,20 @@ namespace OpenAxiom { const Syntax* const form; }; - // ----------- - // -- Quote -- - // ----------- + // ----------------- + // -- QuoteSyntax -- + // ----------------- // Quotation syntax object. - struct Quote : unary_form { - explicit Quote(const Syntax*); + struct QuoteSyntax : unary_form { + explicit QuoteSyntax(const Syntax*); }; - // --------------- - // -- Antiquote -- - // --------------- + // --------------------- + // -- AntiquoteSyntax -- + // --------------------- // Quasi-quotation syntax object. - struct Antiquote : unary_form { - explicit Antiquote(const Syntax*); + struct AntiquoteSyntax : unary_form { + explicit AntiquoteSyntax(const Syntax*); }; // ------------ @@ -270,15 +235,6 @@ namespace OpenAxiom { explicit Function(const Syntax*); }; - // ------------- - // -- DotTail -- - // ------------- - // Objects of this type represents the tail of syntactic - // objects denoting dotted pair syntax `(a . b)'. - struct DotTail : unary_form { - explicit DotTail(const Syntax*); - }; - // ------------- // -- Include -- // ------------- @@ -296,10 +252,10 @@ namespace OpenAxiom { }; // ---------- - // -- List -- + // -- ListSyntax -- // ---------- // List syntax objects. - struct List : Syntax, private std::vector { + struct ListSyntax : Syntax, private std::vector { typedef std::vector base; using base::const_iterator; using base::begin; @@ -307,17 +263,20 @@ namespace OpenAxiom { using base::size; using base::empty; - List(); - explicit List(const base&); - ~List(); + ListSyntax(); + ListSyntax(const base&, bool); + ~ListSyntax(); void accept(Visitor&) const; + bool dotted() const { return dot; } + private: + bool dot; }; // ------------ - // -- Vector -- + // -- VectorSyntax -- // ------------ - // Vector syntax objects. - struct Vector : Syntax, private std::vector { + // VectorSyntax syntax objects. + struct VectorSyntax : Syntax, private std::vector { typedef std::vector base; using base::const_iterator; using base::begin; @@ -326,9 +285,9 @@ namespace OpenAxiom { using base::operator[]; using base::empty; - Vector(); - explicit Vector(const base&); - ~Vector(); + VectorSyntax(); + explicit VectorSyntax(const base&); + ~VectorSyntax(); void accept(Visitor&) const; }; @@ -336,24 +295,23 @@ namespace OpenAxiom { // -- Syntax::Visitor -- // --------------------- struct Syntax::Visitor { - virtual void visit(const Atom&) = 0; - virtual void visit(const Integer&); - virtual void visit(const Character&); - virtual void visit(const String&); - virtual void visit(const Symbol&); + virtual void visit(const AtomSyntax&) = 0; + virtual void visit(const IntegerSyntax&); + virtual void visit(const CharacterSyntax&); + virtual void visit(const StringSyntax&); + virtual void visit(const SymbolSyntax&); virtual void visit(const Reference&); - virtual void visit(const Anchor&) = 0; - virtual void visit(const Quote&) = 0; - virtual void visit(const Antiquote&) = 0; + virtual void visit(const AnchorSyntax&) = 0; + virtual void visit(const QuoteSyntax&) = 0; + virtual void visit(const AntiquoteSyntax&) = 0; virtual void visit(const Expand&) = 0; virtual void visit(const Eval&) = 0; virtual void visit(const Splice&) = 0; virtual void visit(const Function&) = 0; virtual void visit(const Include&) = 0; virtual void visit(const Exclude&) = 0; - virtual void visit(const DotTail&) = 0; - virtual void visit(const List&) = 0; - virtual void visit(const Vector&) = 0; + virtual void visit(const ListSyntax&) = 0; + virtual void visit(const VectorSyntax&) = 0; }; template @@ -370,90 +328,59 @@ namespace OpenAxiom { Allocator(); ~Allocator(); - const Integer* make_integer(const Token&); - const Character* make_character(const Token&); - const String* make_string(const Token&); - const Symbol* make_symbol(const Token&, Symbol::Kind); - const Reference* make_reference(const Token&, size_t); - const Anchor* make_anchor(size_t, const Syntax*); - const Quote* make_quote(const Syntax*); - const Antiquote* make_antiquote(const Syntax*); + const IntegerSyntax* make_integer(const Lexeme&); + const CharacterSyntax* make_character(const Lexeme&); + const StringSyntax* make_string(const Lexeme&); + const SymbolSyntax* make_symbol(SymbolSyntax::Kind, const Lexeme&); + const Reference* make_reference(size_t, const Lexeme&); + const AnchorSyntax* make_anchor(size_t, const Syntax*); + const QuoteSyntax* make_quote(const Syntax*); + const AntiquoteSyntax* make_antiquote(const Syntax*); const Expand* make_expand(const Syntax*); const Eval* make_eval(const Syntax*); const Splice* make_splice(const Syntax*); const Function* make_function(const Syntax*); const Include* make_include(const Syntax*); const Exclude* make_exclude(const Syntax*); - const DotTail* make_dot_tail(const Syntax*); - const List* make_list(const std::vector&); - const Vector* make_vector(const std::vector&); + const ListSyntax* make_list(const std::vector&, bool = false); + const VectorSyntax* make_vector(const std::vector&); private: - Memory::Factory ints; - Memory::Factory chars; - Memory::Factory strs; - Memory::Factory syms; - Memory::Factory ancs; + Memory::Factory ints; + Memory::Factory chars; + Memory::Factory strs; + Memory::Factory syms; + Memory::Factory ancs; Memory::Factory refs; - Memory::Factory quotes; - Memory::Factory antis; + Memory::Factory quotes; + Memory::Factory antis; Memory::Factory exps; Memory::Factory funs; Memory::Factory incs; Memory::Factory excs; Memory::Factory evls; Memory::Factory spls; - Memory::Factory tails; - Memory::Factory lists; - Memory::Factory vectors; - List empty_list; - Vector empty_vector; + Memory::Factory lists; + Memory::Factory vectors; + ListSyntax empty_list; + VectorSyntax empty_vector; }; - // ------------ - // -- Parser -- - // ------------ - // An object of this type transforms a sequence of tokens - // into a sequence of syntax objects. - // A parser object does not manage memory itself. Rather, it delegates - // storage allocation for syntax objects to specialized - // agents used to construct it. - struct Parser { - Parser(Allocator&, std::vector&); - const Token* parse(const Token*, const Token*); - private: - Allocator& alloc; - std::vector& syns; - - const Symbol* parse_symbol(const Token*&, const Token*); - const Character* parse_character(const Token*&, const Token*); - const Anchor* parse_anchor(const Token*&, const Token*); - const Reference* parse_reference(const Token*&, const Token*); - const Symbol* parse_uninterned(const Token*&, const Token*); - const Function* parse_function(const Token*&, const Token*); - const Quote* parse_quote(const Token*&, const Token*); - const Antiquote* parse_antiquote(const Token*&, const Token*); - const Include* parse_include(const Token*&, const Token*); - const Exclude* parse_exclude(const Token*&, const Token*); - const Expand* parse_expand(const Token*&, const Token*); - const Eval* parse_eval(const Token*&, const Token*); - const Splice* parse_splice(const Token*&, const Token*); - const Vector* parse_vector(const Token*&, const Token*); - const List* parse_list(const Token*&, const Token*); - const Syntax* parse_syntax(const Token*&, const Token*); - }; + // -- Reader -- + struct Reader { + struct State { + const Byte* start; + const Byte* end; + const Byte* cur; + const Byte* line; + Ordinal lineno; + Allocator alloc; + }; - // ------------ - // -- Module -- - // ------------ - // Entire s-expression input file. - struct Module : std::vector { - explicit Module(const std::string&); - const std::string& name() const { return nm; } + Reader(const Byte*, const Byte*); + const Syntax* read(); private: - const std::string nm; - StringPool raw_strs; - Allocator allocator; + State st; }; } } diff --git a/src/include/structure.H b/src/include/structure.H index d9434423..33c084f2 100644 --- a/src/include/structure.H +++ b/src/include/structure.H @@ -33,8 +33,6 @@ #ifndef OPENAXIOM_STRUCTURE_included #define OPENAXIOM_STRUCTURE_included -#include - namespace OpenAxiom { // -- helper classes for structural abstractions -- namespace structure { -- cgit v1.2.3