From f9e4a03a220766099d6b5fc683a58185c4805a05 Mon Sep 17 00:00:00 2001 From: dos-reis Date: Tue, 25 Jun 2013 04:08:39 +0000 Subject: * include/sexpr.H: Move from utils. (UniqueAllocator): Remove. (SyntaxComparator): Likewise. * syntax/sexpr.cc: Move from utils. --- src/ChangeLog | 7 + src/include/sexpr.H | 461 ++++++++++++++++++++++++ src/syntax/Makefile.am | 2 +- src/syntax/Makefile.in | 20 +- src/syntax/sexpr.cc | 934 +++++++++++++++++++++++++++++++++++++++++++++++++ src/utils/Makefile.in | 6 +- src/utils/sexpr.H | 495 -------------------------- src/utils/sexpr.cc | 934 ------------------------------------------------- 8 files changed, 1424 insertions(+), 1435 deletions(-) create mode 100644 src/include/sexpr.H create mode 100644 src/syntax/sexpr.cc delete mode 100644 src/utils/sexpr.H delete mode 100644 src/utils/sexpr.cc diff --git a/src/ChangeLog b/src/ChangeLog index 88d35204..e72747a2 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,10 @@ +2013-06-24 Gabriel Dos Reis + + * include/sexpr.H: Move from utils. + (UniqueAllocator): Remove. + (SyntaxComparator): Likewise. + * syntax/sexpr.cc: Move from utils. + 2013-06-24 Gabriel Dos Reis * interp/cattable.boot (squeezeList): Remove. diff --git a/src/include/sexpr.H b/src/include/sexpr.H new file mode 100644 index 00000000..a9371139 --- /dev/null +++ b/src/include/sexpr.H @@ -0,0 +1,461 @@ +// Copyright (C) 2010-2013, Gabriel Dos Reis. +// All rights reserved. +// Written by Gabriel Dos Reis. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// - Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in +// the documentation and/or other materials provided with the +// distribution. +// +// - Neither the name of The Numerical Algorithms Group Ltd. nor the +// names of its contributors may be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER +// OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef OPENAXIOM_SEXPR_included +#define OPENAXIOM_SEXPR_included + +// --% Author: Gabriel Dos Reis. +// --% Description: +// --% A simple support for s-expressions. By design, no ambition +// --% for full-fledged Common Lisp reader capability. Rather, +// --% the aim is a simple data structure for exchanging data +// --% between several components of the OpenAxiom system. +// --% Users interested in fullblown Lisp syntax should seek +// --% to acquire Lisp systems, many of which are freely available. + +#include +#include +#include +#include +#include + +namespace OpenAxiom { + namespace Sexpr { + struct BasicError { + explicit BasicError(const std::string& s) : msg(s) { } + const std::string& message() const { return msg; } + protected: + std::string msg; + }; + + // ----------- + // -- Token -- + // ----------- + struct Token { + enum Type { + unknown, // unidentified token + semicolon = token::value(";"), // comment + dot = token::value("."), + comma = token::value(","), + open_paren = token::value("("), + close_paren = token::value(")"), + apostrophe = token::value("'"), + backquote = token::value("`"), + backslash = token::value("\\"), + sharp_open_paren = token::value("#("), + sharp_apostrophe = token::value("#'"), + sharp_colon = token::value("#:"), + sharp_plus = token::value("#+"), + sharp_minus = token::value("#-"), + sharp_dot = token::value("#."), + comma_at = token::value(",@"), + digraph_end = token::value(0xff,0xff), + integer, // integer literal + character, // character literal + string, // string literal + identifier, // plain identifier + sharp_integer_equal, // anchor definition, #n=
+ sharp_integer_sharp // back reference, #n# + }; + + Type type; // class of this token + BasicString lexeme; // characters making up this token + }; + + // Print a token object on an output stream. + // Note: this function is for debugging purpose; in particular + // it does not `prettyprint' tokens. + std::ostream& operator<<(std::ostream&, const Token&); + + // ----------- + // -- Lexer -- + // ----------- + // An object of this type transforms a sequence of characters + // into a sequence of tokens as defined above. + // A lexer does not manage memory itself. Rather, it delegates + // storage allocation for lexemes and tokens to specialized + // agents used to construct it. + struct Lexer { + Lexer(StringPool& pool, std::vector& toks) + : strings(pool), tokens(toks) { } + + const Byte* tokenize(const Byte*, const Byte*); + BasicString intern(const Byte* s, size_t n) { + return strings.intern(s, n); + } + + private: + StringPool& strings; // where to allocate lexemes from + std::vector& tokens; // where to deposite tokens. + }; + + // ------------ + // -- Syntax -- + // ------------ + // Base class of syntax object classes. + struct Syntax { + struct Visitor; // base class of syntax visitors + virtual void accept(Visitor&) const = 0; + }; + + // ---------- + // -- Atom -- + // ---------- + // An atom is a syntax object consisting of exatly one token. + // This should not be confused with the notion of atom + // in Lisp languages. + struct Atom : Syntax { + const Token& token() const { return tok; } + BasicString lexeme() const { return tok.lexeme; } + void accept(Visitor&) const; + protected: + const Token tok; + Atom(const Token&); + }; + + // ------------- + // -- Integer -- + // ------------- + // Integer literal syntax objects + struct Integer : Atom { + explicit Integer(const Token&); + void accept(Visitor&) const; + }; + + // --------------- + // -- Character -- + // --------------- + // Character literal syntax objects. + struct Character : Atom { + explicit Character(const Token&); + void accept(Visitor&) const; + }; + + // ------------ + // -- String -- + // ------------ + // Striing literal syntax objjects. + struct String : Atom { + explicit String(const Token&); + void accept(Visitor&) const; + }; + + // ------------ + // -- Symbol -- + // ------------ + struct Symbol : Atom { + enum Kind { + uninterned, // uninterned symbol + ordinary, // an interned symbol + keyword // a keyword symbol + }; + Symbol(const Token&, Kind); + Kind kin() const { return sort; } + void accept(Visitor&) const; + private: + const Kind sort; + }; + + // --------------- + // -- Reference -- + // --------------- + // Back reference object to a syntax object. + struct Reference : Atom { + Reference(const Token&, size_t); + size_t tag() const { return pos; } + void accept(Visitor&) const; + private: + const size_t pos; + }; + + // ------------ + // -- Anchor -- + // ------------ + // Base anchor syntax object. + struct Anchor : Syntax { + Anchor(size_t, const Syntax*); + size_t ref() const { return tag; } + const Syntax* value() const { return val; } + void accept(Visitor&) const; + private: + const size_t tag; + const Syntax* const val; + }; + + // -- Abstract over common implementation of unary special operators. + template + struct unary_form : Syntax { + const Syntax* body() const { return form; } + void accept(Visitor&) const; + protected: + unary_form(const Syntax* f) : form(f) { } + private: + const Syntax* const form; + }; + + // ----------- + // -- Quote -- + // ----------- + // Quotation syntax object. + struct Quote : unary_form { + explicit Quote(const Syntax*); + }; + + // --------------- + // -- Antiquote -- + // --------------- + // Quasi-quotation syntax object. + struct Antiquote : unary_form { + explicit Antiquote(const Syntax*); + }; + + // ------------ + // -- Expand -- + // ------------ + // Expansion request inside a quasi-quotation. + struct Expand : unary_form { + explicit Expand(const Syntax*); + }; + + // ---------- + // -- Eval -- + // ---------- + // Read-time evaluation request syntax object. + struct Eval : unary_form { + explicit Eval(const Syntax*); + }; + + // ------------ + // -- Splice -- + // ------------ + // Splice request syntax object inside a quasi-quotation. + struct Splice : unary_form { + explicit Splice(const Syntax*); + }; + + // -------------- + // -- Function -- + // -------------- + // Function literal syntax object. + struct Function : unary_form { + explicit Function(const Syntax*); + }; + + // ------------- + // -- DotTail -- + // ------------- + // Objects of this type represents the tail of syntactic + // objects denoting dotted pair syntax `(a . b)'. + struct DotTail : unary_form { + explicit DotTail(const Syntax*); + }; + + // ------------- + // -- Include -- + // ------------- + // Conditional inclusion syntax object + struct Include : unary_form { + explicit Include(const Syntax*); + }; + + // ------------- + // -- Exclude -- + // ------------- + // Conditional exclusion syntax object + struct Exclude : unary_form { + explicit Exclude(const Syntax*); + }; + + // ---------- + // -- List -- + // ---------- + // List syntax objects. + struct List : Syntax, private std::vector { + typedef std::vector base; + using base::const_iterator; + using base::begin; + using base::end; + using base::size; + using base::empty; + + List(); + explicit List(const base&); + ~List(); + void accept(Visitor&) const; + }; + + // ------------ + // -- Vector -- + // ------------ + // Vector syntax objects. + struct Vector : Syntax, private std::vector { + typedef std::vector base; + using base::const_iterator; + using base::begin; + using base::end; + using base::size; + using base::operator[]; + using base::empty; + + Vector(); + explicit Vector(const base&); + ~Vector(); + void accept(Visitor&) const; + }; + + // --------------------- + // -- Syntax::Visitor -- + // --------------------- + struct Syntax::Visitor { + virtual void visit(const Atom&) = 0; + virtual void visit(const Integer&); + virtual void visit(const Character&); + virtual void visit(const String&); + virtual void visit(const Symbol&); + virtual void visit(const Reference&); + virtual void visit(const Anchor&) = 0; + virtual void visit(const Quote&) = 0; + virtual void visit(const Antiquote&) = 0; + virtual void visit(const Expand&) = 0; + virtual void visit(const Eval&) = 0; + virtual void visit(const Splice&) = 0; + virtual void visit(const Function&) = 0; + virtual void visit(const Include&) = 0; + virtual void visit(const Exclude&) = 0; + virtual void visit(const DotTail&) = 0; + virtual void visit(const List&) = 0; + virtual void visit(const Vector&) = 0; + }; + + template + void + unary_form::accept(Visitor& v) const { + v.visit(static_cast(*this)); + } + + // --------------- + // -- Allocator -- + // --------------- + // Allocator of syntax objects. + struct Allocator { + Allocator(); + ~Allocator(); + + const Integer* make_integer(const Token&); + const Character* make_character(const Token&); + const String* make_string(const Token&); + const Symbol* make_symbol(const Token&, Symbol::Kind); + const Reference* make_reference(const Token&, size_t); + const Anchor* make_anchor(size_t, const Syntax*); + const Quote* make_quote(const Syntax*); + const Antiquote* make_antiquote(const Syntax*); + const Expand* make_expand(const Syntax*); + const Eval* make_eval(const Syntax*); + const Splice* make_splice(const Syntax*); + const Function* make_function(const Syntax*); + const Include* make_include(const Syntax*); + const Exclude* make_exclude(const Syntax*); + const DotTail* make_dot_tail(const Syntax*); + const List* make_list(const std::vector&); + const Vector* make_vector(const std::vector&); + + private: + Memory::Factory ints; + Memory::Factory chars; + Memory::Factory strs; + Memory::Factory syms; + Memory::Factory ancs; + Memory::Factory refs; + Memory::Factory quotes; + Memory::Factory antis; + Memory::Factory exps; + Memory::Factory funs; + Memory::Factory incs; + Memory::Factory excs; + Memory::Factory evls; + Memory::Factory spls; + Memory::Factory tails; + Memory::Factory lists; + Memory::Factory vectors; + List empty_list; + Vector empty_vector; + }; + + // ------------ + // -- Parser -- + // ------------ + // An object of this type transforms a sequence of tokens + // into a sequence of syntax objects. + // A parser object does not manage memory itself. Rather, it delegates + // storage allocation for syntax objects to specialized + // agents used to construct it. + struct Parser { + Parser(Allocator&, std::vector&); + const Token* parse(const Token*, const Token*); + private: + Allocator& alloc; + std::vector& syns; + + const Symbol* parse_symbol(const Token*&, const Token*); + const Character* parse_character(const Token*&, const Token*); + const Anchor* parse_anchor(const Token*&, const Token*); + const Reference* parse_reference(const Token*&, const Token*); + const Symbol* parse_uninterned(const Token*&, const Token*); + const Function* parse_function(const Token*&, const Token*); + const Quote* parse_quote(const Token*&, const Token*); + const Antiquote* parse_antiquote(const Token*&, const Token*); + const Include* parse_include(const Token*&, const Token*); + const Exclude* parse_exclude(const Token*&, const Token*); + const Expand* parse_expand(const Token*&, const Token*); + const Eval* parse_eval(const Token*&, const Token*); + const Splice* parse_splice(const Token*&, const Token*); + const Vector* parse_vector(const Token*&, const Token*); + const List* parse_list(const Token*&, const Token*); + const Syntax* parse_syntax(const Token*&, const Token*); + }; + + // ------------ + // -- Module -- + // ------------ + // Entire s-expression input file. + struct Module : std::vector { + explicit Module(const std::string&); + const std::string& name() const { return nm; } + private: + const std::string nm; + StringPool raw_strs; + Allocator allocator; + }; + } +} + +#endif // OPENAXIOM_SEXPR_included diff --git a/src/syntax/Makefile.am b/src/syntax/Makefile.am index a731618f..964f7e54 100644 --- a/src/syntax/Makefile.am +++ b/src/syntax/Makefile.am @@ -32,7 +32,7 @@ noinst_LIBRARIES = libsyntax.a -libsyntax_a_SOURCES = token.cc +libsyntax_a_SOURCES = token.cc sexpr.cc oa_target_incdir = $(top_builddir)/$(target)/include diff --git a/src/syntax/Makefile.in b/src/syntax/Makefile.in index 8982abc0..f492a7f7 100644 --- a/src/syntax/Makefile.in +++ b/src/syntax/Makefile.in @@ -136,7 +136,8 @@ am__v_AR_0 = @echo " AR " $@; am__v_AR_1 = libsyntax_a_AR = $(AR) $(ARFLAGS) libsyntax_a_LIBADD = -am_libsyntax_a_OBJECTS = libsyntax_a-token.$(OBJEXT) +am_libsyntax_a_OBJECTS = libsyntax_a-token.$(OBJEXT) \ + libsyntax_a-sexpr.$(OBJEXT) libsyntax_a_OBJECTS = $(am_libsyntax_a_OBJECTS) AM_V_P = $(am__v_P_@AM_V@) am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) @@ -375,7 +376,7 @@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ void_type = @void_type@ noinst_LIBRARIES = libsyntax.a -libsyntax_a_SOURCES = token.cc +libsyntax_a_SOURCES = token.cc sexpr.cc oa_target_incdir = $(top_builddir)/$(target)/include libsyntax_a_CPPFLAGS = -I$(top_srcdir)/src/include -I$(oa_target_incdir) all: all-am @@ -427,6 +428,7 @@ mostlyclean-compile: distclean-compile: -rm -f *.tab.c +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libsyntax_a-sexpr.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libsyntax_a-token.Po@am__quote@ .cc.o: @@ -464,6 +466,20 @@ libsyntax_a-token.obj: token.cc @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libsyntax_a_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -c -o libsyntax_a-token.obj `if test -f 'token.cc'; then $(CYGPATH_W) 'token.cc'; else $(CYGPATH_W) '$(srcdir)/token.cc'; fi` +libsyntax_a-sexpr.o: sexpr.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libsyntax_a_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -MT libsyntax_a-sexpr.o -MD -MP -MF $(DEPDIR)/libsyntax_a-sexpr.Tpo -c -o libsyntax_a-sexpr.o `test -f 'sexpr.cc' || echo '$(srcdir)/'`sexpr.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libsyntax_a-sexpr.Tpo $(DEPDIR)/libsyntax_a-sexpr.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='sexpr.cc' object='libsyntax_a-sexpr.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libsyntax_a_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -c -o libsyntax_a-sexpr.o `test -f 'sexpr.cc' || echo '$(srcdir)/'`sexpr.cc + +libsyntax_a-sexpr.obj: sexpr.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libsyntax_a_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -MT libsyntax_a-sexpr.obj -MD -MP -MF $(DEPDIR)/libsyntax_a-sexpr.Tpo -c -o libsyntax_a-sexpr.obj `if test -f 'sexpr.cc'; then $(CYGPATH_W) 'sexpr.cc'; else $(CYGPATH_W) '$(srcdir)/sexpr.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libsyntax_a-sexpr.Tpo $(DEPDIR)/libsyntax_a-sexpr.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='sexpr.cc' object='libsyntax_a-sexpr.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libsyntax_a_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -c -o libsyntax_a-sexpr.obj `if test -f 'sexpr.cc'; then $(CYGPATH_W) 'sexpr.cc'; else $(CYGPATH_W) '$(srcdir)/sexpr.cc'; fi` + mostlyclean-libtool: -rm -f *.lo diff --git a/src/syntax/sexpr.cc b/src/syntax/sexpr.cc new file mode 100644 index 00000000..14113164 --- /dev/null +++ b/src/syntax/sexpr.cc @@ -0,0 +1,934 @@ +// Copyright (C) 2010-2013, Gabriel Dos Reis. +// All rights reserved. +// Written by Gabriel Dos Reis. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// - Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in +// the documentation and/or other materials provided with the +// distribution. +// +// - Neither the name of The Numerical Algorithms Group Ltd. nor the +// names of its contributors may be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER +// OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --% Author: Gabriel Dos Reis. + +#include +#include +#include +#include +#include +#include + +namespace OpenAxiom { + namespace Sexpr { + template + static inline int + length(const T(&)[N]) { + return N; + } + + template + static inline typename Sequence::const_pointer + begin_ptr(const Sequence& s) { + return &*s.begin(); + } + + template + static inline typename Sequence::const_pointer + end_ptr(const Sequence& s) { + return s.empty() ? 0 : &*s.begin() + s.size(); + } + + std::ostream& + operator<<(std::ostream& os, const Token& t) { + switch (t.type) { + case Token::semicolon: os << "SEMICOLON"; break; + case Token::dot: os << "DOT"; break; + case Token::comma: os << "COMMA"; break; + case Token::open_paren: os << "OPEN_PAREN"; break; + case Token::close_paren: os << "CLOSE_PAREN"; break; + case Token::apostrophe: os << "APOSTROPHE"; break; + case Token::backquote: os << "BACKQUOTE"; break; + case Token::backslash: os << "BACKSLASH"; break; + case Token::sharp_open_paren: os << "SHARP_OPEN_PAREN"; break; + case Token::sharp_apostrophe: os << "SHARP_APOSTROPHE"; break; + case Token::sharp_colon: os << "SHARP_COLON"; break; + case Token::sharp_plus: os << "SHARP_PLUS"; break; + case Token::sharp_minus: os << "SHARP_MINUS"; break; + case Token::sharp_dot: os << "SHARP_DOT"; break; + case Token::comma_at: os << "COMMA_AT"; break; + case Token::integer: os << "INTEGER"; break; + case Token::character: os << "CHARACTER"; break; + case Token::string: os << "STRING"; break; + case Token::identifier: os << "IDENTIFIER"; break; + case Token::sharp_integer_sharp: + os << "SHARP_INTEGER_SHARP"; break; + case Token::sharp_integer_equal: + os << "SHARP_INTEGER_EQUAL"; break; + default: os << "UNKNOWN"; break; + } + os << '('; + if (t.lexeme != 0) { + os << '"'; + std::copy(t.lexeme->begin(), t.lexeme->end(), + std::ostream_iterator(os)); + os << '"'; + } + else + os << ""; + return os << ')'; + } + + // ----------- + // -- Lexer -- + // ----------- + static void + syntax_error(const std::string& s) { + throw BasicError(s); + } + + // Return true if character `c' introduces a blank. + static bool + is_blank(char c) { + return c == ' ' or c == '\t' or c == '\v' + or c == '\n' or c == '\f' or c == '\r'; + } + + // Return true if the character `c' introduces a delimiter. + static bool + is_delimiter(char c) { + return is_blank(c) + or c == '(' or c == ')' or c == '\'' + or c == '`' or c == '#'; + } + + // Move `cur' past all consecutive blank characters, and + // return the new position. + static const Byte* + skip_blank(const Byte*& cur, const Byte* end) { + while (cur < end and is_blank(*cur)) + ++cur; + return cur; + } + + // Move `cur' to end-of-line marker. + static const Byte* + skip_to_eol(const Byte*& cur, const Byte* end) { + // FIXME: properly handle CR+LF. + while (cur < end and *cur != '\n') + ++cur; + return cur; + } + + // Move `cur' until a word boundary is reached. + static const Byte* + skip_to_word_boundary(const Byte*& cur, const Byte* end) { + bool saw_escape = false; + for (; cur < end; ++cur) { + if (saw_escape) + saw_escape = false; + else if (*cur == '\\') + saw_escape = true; + else if (is_delimiter(*cur)) + break; + } + return cur; + } + + // Move `cur' one-past a non-esacaped character `c'. + // Return true if the character was seen. + static bool + skip_to_nonescaped_char(const Byte*& cur, const Byte* end, char c) { + bool saw_escape = false; + for (; cur < end; ++cur) + if (saw_escape) + saw_escape = false; + else if (*cur == '\\') + saw_escape = true; + else if (*cur == c) { + ++cur; + return true; + } + return false; + } + + // Move `cur' past the closing quote of string literal. + // Return true if the closing fence was effectively seen. + static inline bool + skip_to_quote(const Byte*& cur, const Byte* end) { + return skip_to_nonescaped_char(cur, end, '"'); + } + + // Return true if the character `c' be part of a non-absolute + // identifier. + static bool + identifier_part(char c) { + switch (c) { + case '+': case '-': case '*': case '/': case '%': case '^': + case '~': case '@': case '$': case '&': case '=': + case '<': case '>': case '?': case '!': case '_': + case '[': case ']': case '{': case '}': + return true; + default: + return isalnum(c); + } + } + + // Return true if the character `c' has a special meaning after + // the sharp character. + static bool + special_after_sharp(char c) { + return c == '(' or c == '\'' or c == ':' + or c == '+' or c == '-' or c == '.'; + } + + // Return true if the sequence `[cur, end)' has a prefix that is + // an integer followrd by the equal sign or the sharp sign. + // `cur' is moved along the way. + static bool + only_digits_before_equal_or_shap(const Byte*& cur, const Byte* end) { + while (cur < end and isdigit(*cur)) + ++cur; + return cur < end and (*cur == '#' or *cur == '='); + } + + // The token `t' was thought to designate an identifier. + // Reclassify it as an integer if, in fact, its lexeme consists + // entirely of digits. + static void + maybe_reclassify(Token& t) { + const Byte* cur = t.lexeme->begin(); + const Byte* end = t.lexeme->end(); + while (cur < end and isdigit(*cur)) + ++cur; + if (cur == end) + t.type = Token::integer; + } + + // Returns true if the first characters in the range + // [cur, last) start an identifier. + static bool + start_symbol(const Byte* cur, const Byte* last) { + if (cur >= last) + return false; + return identifier_part(*cur) + or *cur == '|' or *cur == ':'; + } + + // We are processing a symbol token. Accumulate all + // legitimate characters till the end of the token. + static void + skip_to_end_of_symbol(const Byte*& cur, const Byte* end) { + const char c = *cur; + if (*cur == '|') + skip_to_nonescaped_char(++cur, end, c); + else + skip_to_word_boundary(cur, end); + if (cur < end and *cur == ':') + skip_to_end_of_symbol(cur, end); + } + + static Token + match_maybe_symbol(Lexer* lexer, const Byte*& cur, const Byte* end) { + Token t = { Token::identifier, 0 }; + const Byte* start = cur; + skip_to_end_of_symbol(cur, end); + t.lexeme = lexer->intern(start, cur - start); + maybe_reclassify(t); + return t; + } + + const Byte* + Lexer::tokenize(const Byte* cur, const Byte* end) { + while (skip_blank(cur, end) < end) { + Token t = { Token::unknown, 0 }; + switch (*cur) { + case ';': { + const Byte* start = cur; + t.type = Token::semicolon; + skip_to_eol(cur, end); + t.lexeme = intern(start, cur - start); + break; + } + + case '.': case '(': case ')': case '\'': case '`': + t.type = Token::Type(token::value(*cur)); + t.lexeme = intern(cur, 1); + ++cur; + break; + + case ',': { + const Byte* start = cur; + if (++cur < end and *cur == '@') { + t.type = Token::comma_at; + ++cur; + } + else + t.type = Token::comma; + t.lexeme = intern(start, cur - start); + break; + } + + case '\\': + t = match_maybe_symbol(this, cur, end); + break; + + case '#': { + const Byte* start = cur; + if (cur + 1 < end and special_after_sharp(cur[1])) { + t.type = Token::Type(token::value(cur[0], cur[1])); + t.lexeme = intern(cur, 2); + cur += 2; + } + else if (cur + 1 < end and cur[1] == '\\') { + start = cur += 2; + if (not isalnum(*cur)) + ++cur; + else + skip_to_word_boundary(cur, end); + t.type = Token::character; + t.lexeme = intern(start, cur - start); + } + else if (only_digits_before_equal_or_shap(++cur, end)) { + t.type = *cur == '#' + ? Token::sharp_integer_sharp + : Token::sharp_integer_equal; + t.lexeme = intern(start, cur - start + 1); + ++cur; + } + else { + skip_to_word_boundary(cur, end); + t.lexeme = intern(start, cur - start); + } + break; + } + + case '"': { + const Byte* start = cur; + skip_to_quote(++cur, end); + t.type = Token::string; + t.lexeme = intern(start, cur - start); + break; + } + + default: + if (start_symbol(cur, end)) + t = match_maybe_symbol(this, cur, end); + else { + const Byte* start = cur; + skip_to_word_boundary(++cur, end); + t.lexeme = intern(start, cur - start); + } + break; + } + tokens.push_back(t); + } + return cur; + } + + // ---------- + // -- Atom -- + // ---------- + Atom::Atom(const Token& t) : tok(t) { } + + void + Atom::accept(Visitor& v) const { + v.visit(*this); + } + + // ------------- + // -- Integer -- + // ------------- + Integer::Integer(const Token& t) : Atom(t) { } + + void + Integer::accept(Visitor& v) const { + v.visit(*this); + } + + // --------------- + // -- Character -- + // --------------- + Character::Character(const Token& t) : Atom(t) { } + + void + Character::accept(Visitor& v) const { + v.visit(*this); + } + + // ------------ + // -- String -- + // ------------ + String::String(const Token& t) : Atom(t) { } + + void + String::accept(Visitor& v) const { + v.visit(*this); + } + + // ------------ + // -- Symbol -- + // ------------ + Symbol::Symbol(const Token& t, Kind k) : Atom(t), sort(k) { } + + void + Symbol::accept(Visitor& v) const { + v.visit(*this); + } + + // ------------ + // -- Anchor -- + // ------------ + Anchor::Anchor(size_t t, const Syntax* s) : tag(t), val(s) { } + + void + Anchor::accept(Visitor& v) const { + v.visit(*this); + } + + // --------------- + // -- Reference -- + // --------------- + Reference::Reference(const Token& t, size_t v) : Atom(t), pos(v) { } + + void + Reference::accept(Visitor& v) const { + v.visit(*this); + } + + // ----------- + // -- Quote -- + // ----------- + Quote::Quote(const Syntax* s) : unary_form(s) { } + + // --------------- + // -- Antiquote -- + // --------------- + Antiquote::Antiquote(const Syntax* s) : unary_form(s) { } + + // ------------ + // -- Expand -- + // ------------ + Expand::Expand(const Syntax* s) : unary_form(s) { } + + // ---------- + // -- Eval -- + // ---------- + Eval::Eval(const Syntax* s) : unary_form(s) { } + + // ------------ + // -- Splice -- + // ------------ + Splice::Splice(const Syntax* s) : unary_form(s) { } + + // -------------- + // -- Function -- + // -------------- + Function::Function(const Syntax* s) : unary_form(s) { } + + // ------------- + // -- Include -- + Include::Include(const Syntax* s) : unary_form(s) { } + + // ------------- + // -- Exclude -- + Exclude::Exclude(const Syntax* s) : unary_form(s) { } + + // ------------- + // -- DotTail -- + // ------------- + DotTail::DotTail(const Syntax* f) : unary_form(f) { } + + // ---------- + // -- List -- + // ---------- + List::List() { } + + List::List(const base& elts) : base(elts) { } + + List::~List() { } + + void + List::accept(Visitor& v) const { + v.visit(*this); + } + + // ------------ + // -- Vector -- + // ------------ + Vector::Vector() { } + + Vector::Vector(const base& elts) : base(elts) { } + + Vector::~Vector() { } + + void + Vector::accept(Visitor& v) const { + v.visit(*this); + } + + // --------------------- + // -- Syntax::Visitor -- + // --------------------- + + // implicitly convert a reference to `T' to a reference to `S'. + template + inline const S& + as(const T& t) { + return t; + } + + void + Syntax::Visitor::visit(const Integer& i) { + visit(as(i)); + } + + void + Syntax::Visitor::visit(const Character& c) { + visit(as(c)); + } + + void + Syntax::Visitor::visit(const String& s) { + visit(as(s)); + } + + void + Syntax::Visitor::visit(const Symbol& s) { + visit(as(s)); + } + + void + Syntax::Visitor::visit(const Reference& r) { + visit(as(r)); + } + + // --------------- + // -- Allocator -- + // --------------- + Allocator::Allocator() { } + + // This destructor is defined here so that it provides + // a single instantiation point for destructors of all + // used templates floating around. + Allocator::~Allocator() { } + + const Character* + Allocator::make_character(const Token& t) { + return chars.make(t); + } + + const Integer* + Allocator::make_integer(const Token& t) { + return ints.make(t); + } + + const String* + Allocator::make_string(const Token& t) { + return strs.make(t); + } + + const Symbol* + Allocator::make_symbol(const Token& t, Symbol::Kind k) { + return syms.make(t, k); + } + + const Anchor* + Allocator::make_anchor(size_t t, const Syntax* s) { + return ancs.make(t, s); + } + + const Reference* + Allocator::make_reference(const Token& t, size_t i) { + return refs.make(t, i); + } + + const Quote* + Allocator::make_quote(const Syntax* s) { + return quotes.make(s); + } + + const Antiquote* + Allocator::make_antiquote(const Syntax* s) { + return antis.make(s); + } + + const Expand* + Allocator::make_expand(const Syntax* s) { + return exps.make(s); + } + + const Eval* + Allocator::make_eval(const Syntax* s) { + return evls.make(s); + } + + const Splice* + Allocator::make_splice(const Syntax* s) { + return spls.make(s); + } + + const Function* + Allocator::make_function(const Syntax* s) { + return funs.make(s); + } + + const Include* + Allocator::make_include(const Syntax* s) { + return incs.make(s); + } + + const Exclude* + Allocator::make_exclude(const Syntax* s) { + return excs.make(s); + } + + const DotTail* + Allocator::make_dot_tail(const Syntax* f) { + return tails.make(f); + } + + const List* + Allocator::make_list(const std::vector& elts) { + if (elts.empty()) + return &empty_list; + return lists.make(elts); + } + + const Vector* + Allocator::make_vector(const std::vector& elts) { + if (elts.empty()) + return &empty_vector; + return vectors.make(elts); + } + + // ------------ + // -- Parser -- + // ------------ + + // Signal a parse error + static void + parse_error(const std::string& s) { + throw BasicError(s); + } + + // Signal that an expected syntax object was missing + static void + expected_syntax(const std::string& s) { + parse_error("expected " + s); + } + + // Signal an abrupt end of input + static void + unexpected_end_of_input(const std::string& s) { + parse_error("unexpected end of input after " + s); + } + + // Signal a missing closing parenthesis + static void + missing_closer_for(const std::string& s) { + parse_error("missing closing parenthesis for " + s); + } + + // The sequence of characters in [cur, last) consists + // entirely of digits. Return the corresponding natural value. + static size_t + natural_value(const Byte* cur, const Byte* last) { + size_t n = 0; + for (; cur < last; ++cur) + // FIXME: check for overflow. + n = 10 * n + (*cur - '0'); + return n; + } + + // Parse a plain identifier or a Lisp-style keyword identifier. + const Symbol* + Parser::parse_symbol(const Token*& cur, const Token* last) { + Symbol::Kind kind = *cur->lexeme->begin() == ':' + ? Symbol::keyword + : Symbol::ordinary; + return alloc.make_symbol(*cur++, kind); + } + + // List of lower case character names + static const char* charname[] = { + "newline", "space", "page", "tab", + "backspace", "return", "linefeed" + }; + + static bool + equal_character_name(BasicString lhs, const char* rhs) { + if (lhs->size() != strlen(rhs)) + return false; + for (const Byte* cur = lhs->begin(); cur != lhs->end(); ++cur) + if (tolower(*cur) != *rhs++) + return false; + return true; + } + + static bool + valid_character_name(BasicString s) { + for (int i = 0; i < length(charname); ++i) + if (equal_character_name(s, charname[i])) + return true; + return false; + } + + const Character* + Parser::parse_character(const Token*& cur, const Token* last) { + if (cur->lexeme->size() != 1 + and not valid_character_name(cur->lexeme)) + parse_error("invalid literal character syntax"); + return alloc.make_character(*cur++); + } + + // Parse an anchor definition of the form #n= + const Anchor* + Parser::parse_anchor(const Token*& cur, const Token* last) { + const size_t n = natural_value(cur->lexeme->begin() + 1, + cur->lexeme->end() - 1); + if (++cur == last) + unexpected_end_of_input("sharp-integer-equal sign"); + return alloc.make_anchor(n, parse_syntax(cur, last)); + } + + // Parse a reference to an anchor, #n# + const Reference* + Parser::parse_reference(const Token*& cur, const Token* last) { + const size_t n = natural_value(cur->lexeme->begin() + 1, + cur->lexeme->end() - 1); + return alloc.make_reference(*cur++, n); + } + + // Parse an uninterned symbol #: + const Symbol* + Parser::parse_uninterned(const Token*& cur, const Token* last) { + if (cur == last or cur->type != Token::identifier) + expected_syntax("symbol after sharp-colon sign"); + // FIXME: check that the identifier is not a keyword. + return alloc.make_symbol(*cur++, Symbol::uninterned); + } + + // Parse a function syntax: #' + const Function* + Parser::parse_function(const Token*& cur, const Token* last) { + if (cur == last) + unexpected_end_of_input("sharp-quote sign"); + return alloc.make_function(parse_syntax(cur, last)); + } + + // Parse a quotation + const Quote* + Parser::parse_quote(const Token*& cur, const Token* last) { + if (cur == last) + unexpected_end_of_input("quote sign"); + return alloc.make_quote(parse_syntax(cur, last)); + } + + // Parse an antiquotation + const Antiquote* + Parser::parse_antiquote(const Token*& cur, const Token* last) { + if (cur == last) + unexpected_end_of_input("backquote sign"); + return alloc.make_antiquote(parse_syntax(cur, last)); + } + + // Parse an expansion request form + const Expand* + Parser::parse_expand(const Token*& cur, const Token* last) { + const Syntax* s = parse_syntax(cur, last); + if (s == 0) + unexpected_end_of_input("comma sign"); + return alloc.make_expand(s); + } + + // Parse conditional inclusions + const Include* + Parser::parse_include(const Token*& cur, const Token* last) { + const Syntax* s = parse_syntax(cur, last); + if (s == 0) + unexpected_end_of_input("sharp-plus sign"); + return alloc.make_include(s); + } + + const Exclude* + Parser::parse_exclude(const Token*& cur, const Token* last) { + const Syntax* s = parse_syntax(cur, last); + if (s == 0) + unexpected_end_of_input("sharp-minus sign"); + return alloc.make_exclude(s); + } + + const Eval* + Parser::parse_eval(const Token*& cur, const Token* last) { + const Syntax* s = parse_syntax(cur, last); + if (s == 0) + unexpected_end_of_input("sharp-dot sign"); + return alloc.make_eval(s); + } + + const Splice* + Parser::parse_splice(const Token*& cur, const Token* last) { + const Syntax* s = parse_syntax(cur, last); + if (s == 0) + unexpected_end_of_input("comma-at sign"); + return alloc.make_splice(s); + } + + // Skip tokens that are semantically blanks, e.g. comments. + // Return true if not at end of tokens. + static bool + skip_ignorable_tokens(const Token*& cur, const Token* last) { + while (cur < last and cur->type == Token::semicolon) + ++cur; + return cur != last; + } + + // Parse a vector of syntax objects: #(s .. s) + const Vector* + Parser::parse_vector(const Token*& cur, const Token* last) { + std::vector elts; + while (skip_ignorable_tokens(cur, last) + and cur->type != Token::close_paren) + elts.push_back(parse_syntax(cur, last)); + if (cur == last) + missing_closer_for("vector"); + ++cur; + return alloc.make_vector(elts); + } + + // Constructs a pair or a list syntax object. + const List* + Parser::parse_list(const Token*& cur, const Token* last) { + std::vector elts; + while (skip_ignorable_tokens(cur, last) + and cur->type != Token::close_paren) { + if (cur->type == Token::dot) { + skip_ignorable_tokens(++cur, last); + if (const Syntax* s = parse_syntax(cur, last)) { + elts.push_back(alloc.make_dot_tail(s)); + break; + } + } + elts.push_back(parse_syntax(cur, last)); + } + if (cur == last or cur->type != Token::close_paren) + missing_closer_for("list"); + ++cur; + return alloc.make_list(elts); + } + + Parser::Parser(Allocator& a, std::vector& v) + : alloc(a), syns(v) { } + + static std::string + to_string(BasicString s) { + return { s->begin(), s->end() }; + } + + const Syntax* + Parser::parse_syntax(const Token*& cur, const Token* last) { + if (not skip_ignorable_tokens(cur, last)) + return 0; + + switch (cur->type) { + case Token::integer: + return alloc.make_integer(*cur++); + + case Token::character: + return parse_character(cur, last); + + case Token::string: + return alloc.make_string(*cur++); + + case Token::identifier: + return parse_symbol(cur, last); + + case Token::sharp_integer_equal: + return parse_anchor(cur, last); + + case Token::sharp_integer_sharp: + return parse_reference(cur, last); + + case Token::sharp_colon: + return parse_uninterned(++cur, last); + + case Token::sharp_apostrophe: + return parse_function(++cur, last); + + case Token::sharp_open_paren: + return parse_vector(++cur, last); + + case Token::apostrophe: + return parse_quote(++cur, last); + + case Token::open_paren: + return parse_list(++cur, last); + + case Token::sharp_plus: + return parse_include(++cur, last); + + case Token::sharp_minus: + return parse_exclude(++cur, last); + + case Token::sharp_dot: + return parse_eval(++cur, last); + + case Token::backquote: + return parse_antiquote(++cur, last); + + case Token::comma: + return parse_expand(++cur, last); + + case Token::comma_at: + return parse_splice(++cur, last); + + default: + parse_error(std::string("parse error before ") + + to_string(cur->lexeme)); + return 0; // never executed + } + } + + const Token* + Parser::parse(const Token* cur, const Token* last) { + while (cur < last) + if (const Syntax* s = parse_syntax(cur, last)) + syns.push_back(s); + return cur; + } + + Module::Module(const std::string& s) : nm(s) { + std::vector tokens; + Memory::FileMapping input(s); + Lexer lexer(raw_strs, tokens); + const Byte* rest = lexer.tokenize(input.begin(), input.end()); + if (rest != input.end()) + syntax_error("syntax error"); + Parser parser(allocator, *this); + const Token* tok = parser.parse(begin_ptr(tokens), end_ptr(tokens)); + if (tok != end_ptr(tokens)) + parse_error("parse error"); + } + } +} diff --git a/src/utils/Makefile.in b/src/utils/Makefile.in index a3021748..0ac6b300 100644 --- a/src/utils/Makefile.in +++ b/src/utils/Makefile.in @@ -36,14 +36,14 @@ hammer_SOURCES = hammer.cc hammer_OBJECTS = $(hammer_SOURCES:.cc=.lo) hammer_LDADD = -L. -lOpenAxiom -libOpenAxiom_HEADERS = hash-table.H string-pool.H sexpr.H vm.H +libOpenAxiom_HEADERS = hash-table.H string-pool.H vm.H libOpenAxiom_SOURCES = \ - storage.cc string-pool.cc sexpr.cc command.cc \ + storage.cc string-pool.cc command.cc \ filesystem.cc vm.cc libOpenAxiom_OBJECTS = $(libOpenAxiom_SOURCES:.cc=.lo) -oa_public_headers = hash-table string-pool sexpr vm +oa_public_headers = hash-table string-pool vm ## Where we store public header files oa_target_headerdir = $(oa_target_includedir)/open-axiom diff --git a/src/utils/sexpr.H b/src/utils/sexpr.H deleted file mode 100644 index 5c9934d5..00000000 --- a/src/utils/sexpr.H +++ /dev/null @@ -1,495 +0,0 @@ -// Copyright (C) 2010-2013, Gabriel Dos Reis. -// All rights reserved. -// Written by Gabriel Dos Reis. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// - Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in -// the documentation and/or other materials provided with the -// distribution. -// -// - Neither the name of The Numerical Algorithms Group Ltd. nor the -// names of its contributors may be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS -// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER -// OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#ifndef OPENAXIOM_SEXPR_INCLUDED -#define OPENAXIOM_SEXPR_INCLUDED - -// --% Author: Gabriel Dos Reis. -// --% Description: -// --% A simple support for s-expressions. By design, no ambition -// --% for full-fledged Common Lisp reader capability. Rather, -// --% the aim is a simple data structure for exchanging data -// --% between several components of the OpenAxiom system. -// --% Users interested in fullblown Lisp syntax should seek -// --% to acquire Lisp systems, many of which are freely available. - -#include -#include -#include -#include -#include - -namespace OpenAxiom { - namespace Sexpr { - struct BasicError { - explicit BasicError(const std::string& s) : msg(s) { } - const std::string& message() const { return msg; } - protected: - std::string msg; - }; - - // ----------- - // -- Token -- - // ----------- - struct Token { - enum Type { - unknown, // unidentified token - semicolon = token::value(";"), // comment - dot = token::value("."), - comma = token::value(","), - open_paren = token::value("("), - close_paren = token::value(")"), - apostrophe = token::value("'"), - backquote = token::value("`"), - backslash = token::value("\\"), - sharp_open_paren = token::value("#("), - sharp_apostrophe = token::value("#'"), - sharp_colon = token::value("#:"), - sharp_plus = token::value("#+"), - sharp_minus = token::value("#-"), - sharp_dot = token::value("#."), - comma_at = token::value(",@"), - digraph_end = token::value(0xff,0xff), - integer, // integer literal - character, // character literal - string, // string literal - identifier, // plain identifier - sharp_integer_equal, // anchor definition, #n= - sharp_integer_sharp // back reference, #n# - }; - - Type type; // class of this token - BasicString lexeme; // characters making up this token - }; - - // Print a token object on an output stream. - // Note: this function is for debugging purpose; in particular - // it does not `prettyprint' tokens. - std::ostream& operator<<(std::ostream&, const Token&); - - // ----------- - // -- Lexer -- - // ----------- - // An object of this type transforms a sequence of characters - // into a sequence of tokens as defined above. - // A lexer does not manage memory itself. Rather, it delegates - // storage allocation for lexemes and tokens to specialized - // agents used to construct it. - struct Lexer { - Lexer(StringPool& pool, std::vector& toks) - : strings(pool), tokens(toks) { } - - const Byte* tokenize(const Byte*, const Byte*); - BasicString intern(const Byte* s, size_t n) { - return strings.intern(s, n); - } - - private: - StringPool& strings; // where to allocate lexemes from - std::vector& tokens; // where to deposite tokens. - }; - - // ------------ - // -- Syntax -- - // ------------ - // Base class of syntax object classes. - struct Syntax { - struct Visitor; // base class of syntax visitors - virtual void accept(Visitor&) const = 0; - }; - - // ---------- - // -- Atom -- - // ---------- - // An atom is a syntax object consisting of exatly one token. - // This should not be confused with the notion of atom - // in Lisp languages. - struct Atom : Syntax { - const Token& token() const { return tok; } - BasicString lexeme() const { return tok.lexeme; } - void accept(Visitor&) const; - protected: - const Token tok; - Atom(const Token&); - }; - - // ------------- - // -- Integer -- - // ------------- - // Integer literal syntax objects - struct Integer : Atom { - explicit Integer(const Token&); - void accept(Visitor&) const; - }; - - // --------------- - // -- Character -- - // --------------- - // Character literal syntax objects. - struct Character : Atom { - explicit Character(const Token&); - void accept(Visitor&) const; - }; - - // ------------ - // -- String -- - // ------------ - // Striing literal syntax objjects. - struct String : Atom { - explicit String(const Token&); - void accept(Visitor&) const; - }; - - // ------------ - // -- Symbol -- - // ------------ - struct Symbol : Atom { - enum Kind { - uninterned, // uninterned symbol - ordinary, // an interned symbol - keyword // a keyword symbol - }; - Symbol(const Token&, Kind); - Kind kin() const { return sort; } - void accept(Visitor&) const; - private: - const Kind sort; - }; - - // --------------- - // -- Reference -- - // --------------- - // Back reference object to a syntax object. - struct Reference : Atom { - Reference(const Token&, size_t); - size_t tag() const { return pos; } - void accept(Visitor&) const; - private: - const size_t pos; - }; - - // ------------ - // -- Anchor -- - // ------------ - // Base anchor syntax object. - struct Anchor : Syntax { - Anchor(size_t, const Syntax*); - size_t ref() const { return tag; } - const Syntax* value() const { return val; } - void accept(Visitor&) const; - private: - const size_t tag; - const Syntax* const val; - }; - - // -- Abstract over common implementation of unary special operators. - template - struct unary_form : Syntax { - const Syntax* body() const { return form; } - void accept(Visitor&) const; - protected: - unary_form(const Syntax* f) : form(f) { } - private: - const Syntax* const form; - }; - - // ----------- - // -- Quote -- - // ----------- - // Quotation syntax object. - struct Quote : unary_form { - explicit Quote(const Syntax*); - }; - - // --------------- - // -- Antiquote -- - // --------------- - // Quasi-quotation syntax object. - struct Antiquote : unary_form { - explicit Antiquote(const Syntax*); - }; - - // ------------ - // -- Expand -- - // ------------ - // Expansion request inside a quasi-quotation. - struct Expand : unary_form { - explicit Expand(const Syntax*); - }; - - // ---------- - // -- Eval -- - // ---------- - // Read-time evaluation request syntax object. - struct Eval : unary_form { - explicit Eval(const Syntax*); - }; - - // ------------ - // -- Splice -- - // ------------ - // Splice request syntax object inside a quasi-quotation. - struct Splice : unary_form { - explicit Splice(const Syntax*); - }; - - // -------------- - // -- Function -- - // -------------- - // Function literal syntax object. - struct Function : unary_form { - explicit Function(const Syntax*); - }; - - // ------------- - // -- DotTail -- - // ------------- - // Objects of this type represents the tail of syntactic - // objects denoting dotted pair syntax `(a . b)'. - struct DotTail : unary_form { - explicit DotTail(const Syntax*); - }; - - // ------------- - // -- Include -- - // ------------- - // Conditional inclusion syntax object - struct Include : unary_form { - explicit Include(const Syntax*); - }; - - // ------------- - // -- Exclude -- - // ------------- - // Conditional exclusion syntax object - struct Exclude : unary_form { - explicit Exclude(const Syntax*); - }; - - // ---------- - // -- List -- - // ---------- - // List syntax objects. - struct List : Syntax, private std::vector { - typedef std::vector base; - using base::const_iterator; - using base::begin; - using base::end; - using base::size; - using base::empty; - - List(); - explicit List(const base&); - ~List(); - void accept(Visitor&) const; - }; - - // ------------ - // -- Vector -- - // ------------ - // Vector syntax objects. - struct Vector : Syntax, private std::vector { - typedef std::vector base; - using base::const_iterator; - using base::begin; - using base::end; - using base::size; - using base::operator[]; - using base::empty; - - Vector(); - explicit Vector(const base&); - ~Vector(); - void accept(Visitor&) const; - }; - - // --------------------- - // -- Syntax::Visitor -- - // --------------------- - struct Syntax::Visitor { - virtual void visit(const Atom&) = 0; - virtual void visit(const Integer&); - virtual void visit(const Character&); - virtual void visit(const String&); - virtual void visit(const Symbol&); - virtual void visit(const Reference&); - virtual void visit(const Anchor&) = 0; - virtual void visit(const Quote&) = 0; - virtual void visit(const Antiquote&) = 0; - virtual void visit(const Expand&) = 0; - virtual void visit(const Eval&) = 0; - virtual void visit(const Splice&) = 0; - virtual void visit(const Function&) = 0; - virtual void visit(const Include&) = 0; - virtual void visit(const Exclude&) = 0; - virtual void visit(const DotTail&) = 0; - virtual void visit(const List&) = 0; - virtual void visit(const Vector&) = 0; - }; - - template - void - unary_form::accept(Visitor& v) const { - v.visit(static_cast(*this)); - } - - // --------------- - // -- Allocator -- - // --------------- - - // The next two classes are helper classes for the main - // allocation class Allocator. We use std::set as allocator - // that guarantee uuniqueness of atomic syntax object with - // respect to the constituent token. That container needs - // a relational comparator. In an ideal world, this class - // should not exist. - struct SyntaxComparator { - bool operator()(const Atom& lhs, const Atom& rhs) const { - return std::less()(lhs.lexeme(), rhs.lexeme()); - } - - template - bool - operator()(const unary_form& lhs, const unary_form& rhs) const { - return std::less()(lhs.body(), rhs.body()); - } - - bool operator()(const Anchor& lhs, const Anchor& rhs) const { - return std::less()(lhs.ref(), rhs.ref()); - } - }; - - template - struct UniqueAllocator : std::set { - typedef std::set base; - typedef typename base::const_iterator const_iterator; - - template - const T* allocate(const Args&... args) { - return &*this->insert(T(args...)).first; - } - }; - - // Allocator of syntax objects. - struct Allocator { - Allocator(); - ~Allocator(); - - const Integer* make_integer(const Token&); - const Character* make_character(const Token&); - const String* make_string(const Token&); - const Symbol* make_symbol(const Token&, Symbol::Kind); - const Reference* make_reference(const Token&, size_t); - const Anchor* make_anchor(size_t, const Syntax*); - const Quote* make_quote(const Syntax*); - const Antiquote* make_antiquote(const Syntax*); - const Expand* make_expand(const Syntax*); - const Eval* make_eval(const Syntax*); - const Splice* make_splice(const Syntax*); - const Function* make_function(const Syntax*); - const Include* make_include(const Syntax*); - const Exclude* make_exclude(const Syntax*); - const DotTail* make_dot_tail(const Syntax*); - const List* make_list(const std::vector&); - const Vector* make_vector(const std::vector&); - - private: - UniqueAllocator ints; - UniqueAllocator chars; - UniqueAllocator strs; - UniqueAllocator syms; - UniqueAllocator ancs; - UniqueAllocator refs; - UniqueAllocator quotes; - UniqueAllocator antis; - UniqueAllocator exps; - UniqueAllocator funs; - UniqueAllocator incs; - UniqueAllocator excs; - UniqueAllocator evls; - UniqueAllocator spls; - UniqueAllocator tails; - Memory::Factory lists; - Memory::Factory vectors; - List empty_list; - Vector empty_vector; - }; - - // ------------ - // -- Parser -- - // ------------ - // An object of this type transforms a sequence of tokens - // into a sequence of syntax objects. - // A parser object does not manage memory itself. Rather, it delegates - // storage allocation for syntax objects to specialized - // agents used to construct it. - struct Parser { - Parser(Allocator&, std::vector&); - const Token* parse(const Token*, const Token*); - private: - Allocator& alloc; - std::vector& syns; - - const Symbol* parse_symbol(const Token*&, const Token*); - const Character* parse_character(const Token*&, const Token*); - const Anchor* parse_anchor(const Token*&, const Token*); - const Reference* parse_reference(const Token*&, const Token*); - const Symbol* parse_uninterned(const Token*&, const Token*); - const Function* parse_function(const Token*&, const Token*); - const Quote* parse_quote(const Token*&, const Token*); - const Antiquote* parse_antiquote(const Token*&, const Token*); - const Include* parse_include(const Token*&, const Token*); - const Exclude* parse_exclude(const Token*&, const Token*); - const Expand* parse_expand(const Token*&, const Token*); - const Eval* parse_eval(const Token*&, const Token*); - const Splice* parse_splice(const Token*&, const Token*); - const Vector* parse_vector(const Token*&, const Token*); - const List* parse_list(const Token*&, const Token*); - const Syntax* parse_syntax(const Token*&, const Token*); - }; - - // ------------ - // -- Module -- - // ------------ - // Entire s-expression input file. - struct Module : std::vector { - explicit Module(const std::string&); - const std::string& name() const { return nm; } - private: - const std::string nm; - StringPool raw_strs; - Allocator allocator; - }; - } -} - -#endif // OPENAXIOM_SEXPR_INCLUDED diff --git a/src/utils/sexpr.cc b/src/utils/sexpr.cc deleted file mode 100644 index 3e6b5e90..00000000 --- a/src/utils/sexpr.cc +++ /dev/null @@ -1,934 +0,0 @@ -// Copyright (C) 2010-2013, Gabriel Dos Reis. -// All rights reserved. -// Written by Gabriel Dos Reis. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// - Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in -// the documentation and/or other materials provided with the -// distribution. -// -// - Neither the name of The Numerical Algorithms Group Ltd. nor the -// names of its contributors may be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS -// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER -// OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -// --% Author: Gabriel Dos Reis. - -#include -#include -#include -#include -#include -#include - -namespace OpenAxiom { - namespace Sexpr { - template - static inline int - length(const T(&)[N]) { - return N; - } - - template - static inline typename Sequence::const_pointer - begin_ptr(const Sequence& s) { - return &*s.begin(); - } - - template - static inline typename Sequence::const_pointer - end_ptr(const Sequence& s) { - return s.empty() ? 0 : &*s.begin() + s.size(); - } - - std::ostream& - operator<<(std::ostream& os, const Token& t) { - switch (t.type) { - case Token::semicolon: os << "SEMICOLON"; break; - case Token::dot: os << "DOT"; break; - case Token::comma: os << "COMMA"; break; - case Token::open_paren: os << "OPEN_PAREN"; break; - case Token::close_paren: os << "CLOSE_PAREN"; break; - case Token::apostrophe: os << "APOSTROPHE"; break; - case Token::backquote: os << "BACKQUOTE"; break; - case Token::backslash: os << "BACKSLASH"; break; - case Token::sharp_open_paren: os << "SHARP_OPEN_PAREN"; break; - case Token::sharp_apostrophe: os << "SHARP_APOSTROPHE"; break; - case Token::sharp_colon: os << "SHARP_COLON"; break; - case Token::sharp_plus: os << "SHARP_PLUS"; break; - case Token::sharp_minus: os << "SHARP_MINUS"; break; - case Token::sharp_dot: os << "SHARP_DOT"; break; - case Token::comma_at: os << "COMMA_AT"; break; - case Token::integer: os << "INTEGER"; break; - case Token::character: os << "CHARACTER"; break; - case Token::string: os << "STRING"; break; - case Token::identifier: os << "IDENTIFIER"; break; - case Token::sharp_integer_sharp: - os << "SHARP_INTEGER_SHARP"; break; - case Token::sharp_integer_equal: - os << "SHARP_INTEGER_EQUAL"; break; - default: os << "UNKNOWN"; break; - } - os << '('; - if (t.lexeme != 0) { - os << '"'; - std::copy(t.lexeme->begin(), t.lexeme->end(), - std::ostream_iterator(os)); - os << '"'; - } - else - os << ""; - return os << ')'; - } - - // ----------- - // -- Lexer -- - // ----------- - static void - syntax_error(const std::string& s) { - throw BasicError(s); - } - - // Return true if character `c' introduces a blank. - static bool - is_blank(char c) { - return c == ' ' or c == '\t' or c == '\v' - or c == '\n' or c == '\f' or c == '\r'; - } - - // Return true if the character `c' introduces a delimiter. - static bool - is_delimiter(char c) { - return is_blank(c) - or c == '(' or c == ')' or c == '\'' - or c == '`' or c == '#'; - } - - // Move `cur' past all consecutive blank characters, and - // return the new position. - static const Byte* - skip_blank(const Byte*& cur, const Byte* end) { - while (cur < end and is_blank(*cur)) - ++cur; - return cur; - } - - // Move `cur' to end-of-line marker. - static const Byte* - skip_to_eol(const Byte*& cur, const Byte* end) { - // FIXME: properly handle CR+LF. - while (cur < end and *cur != '\n') - ++cur; - return cur; - } - - // Move `cur' until a word boundary is reached. - static const Byte* - skip_to_word_boundary(const Byte*& cur, const Byte* end) { - bool saw_escape = false; - for (; cur < end; ++cur) { - if (saw_escape) - saw_escape = false; - else if (*cur == '\\') - saw_escape = true; - else if (is_delimiter(*cur)) - break; - } - return cur; - } - - // Move `cur' one-past a non-esacaped character `c'. - // Return true if the character was seen. - static bool - skip_to_nonescaped_char(const Byte*& cur, const Byte* end, char c) { - bool saw_escape = false; - for (; cur < end; ++cur) - if (saw_escape) - saw_escape = false; - else if (*cur == '\\') - saw_escape = true; - else if (*cur == c) { - ++cur; - return true; - } - return false; - } - - // Move `cur' past the closing quote of string literal. - // Return true if the closing fence was effectively seen. - static inline bool - skip_to_quote(const Byte*& cur, const Byte* end) { - return skip_to_nonescaped_char(cur, end, '"'); - } - - // Return true if the character `c' be part of a non-absolute - // identifier. - static bool - identifier_part(char c) { - switch (c) { - case '+': case '-': case '*': case '/': case '%': case '^': - case '~': case '@': case '$': case '&': case '=': - case '<': case '>': case '?': case '!': case '_': - case '[': case ']': case '{': case '}': - return true; - default: - return isalnum(c); - } - } - - // Return true if the character `c' has a special meaning after - // the sharp character. - static bool - special_after_sharp(char c) { - return c == '(' or c == '\'' or c == ':' - or c == '+' or c == '-' or c == '.'; - } - - // Return true if the sequence `[cur, end)' has a prefix that is - // an integer followrd by the equal sign or the sharp sign. - // `cur' is moved along the way. - static bool - only_digits_before_equal_or_shap(const Byte*& cur, const Byte* end) { - while (cur < end and isdigit(*cur)) - ++cur; - return cur < end and (*cur == '#' or *cur == '='); - } - - // The token `t' was thought to designate an identifier. - // Reclassify it as an integer if, in fact, its lexeme consists - // entirely of digits. - static void - maybe_reclassify(Token& t) { - const Byte* cur = t.lexeme->begin(); - const Byte* end = t.lexeme->end(); - while (cur < end and isdigit(*cur)) - ++cur; - if (cur == end) - t.type = Token::integer; - } - - // Returns true if the first characters in the range - // [cur, last) start an identifier. - static bool - start_symbol(const Byte* cur, const Byte* last) { - if (cur >= last) - return false; - return identifier_part(*cur) - or *cur == '|' or *cur == ':'; - } - - // We are processing a symbol token. Accumulate all - // legitimate characters till the end of the token. - static void - skip_to_end_of_symbol(const Byte*& cur, const Byte* end) { - const char c = *cur; - if (*cur == '|') - skip_to_nonescaped_char(++cur, end, c); - else - skip_to_word_boundary(cur, end); - if (cur < end and *cur == ':') - skip_to_end_of_symbol(cur, end); - } - - static Token - match_maybe_symbol(Lexer* lexer, const Byte*& cur, const Byte* end) { - Token t = { Token::identifier, 0 }; - const Byte* start = cur; - skip_to_end_of_symbol(cur, end); - t.lexeme = lexer->intern(start, cur - start); - maybe_reclassify(t); - return t; - } - - const Byte* - Lexer::tokenize(const Byte* cur, const Byte* end) { - while (skip_blank(cur, end) < end) { - Token t = { Token::unknown, 0 }; - switch (*cur) { - case ';': { - const Byte* start = cur; - t.type = Token::semicolon; - skip_to_eol(cur, end); - t.lexeme = intern(start, cur - start); - break; - } - - case '.': case '(': case ')': case '\'': case '`': - t.type = Token::Type(token::value(*cur)); - t.lexeme = intern(cur, 1); - ++cur; - break; - - case ',': { - const Byte* start = cur; - if (++cur < end and *cur == '@') { - t.type = Token::comma_at; - ++cur; - } - else - t.type = Token::comma; - t.lexeme = intern(start, cur - start); - break; - } - - case '\\': - t = match_maybe_symbol(this, cur, end); - break; - - case '#': { - const Byte* start = cur; - if (cur + 1 < end and special_after_sharp(cur[1])) { - t.type = Token::Type(token::value(cur[0], cur[1])); - t.lexeme = intern(cur, 2); - cur += 2; - } - else if (cur + 1 < end and cur[1] == '\\') { - start = cur += 2; - if (not isalnum(*cur)) - ++cur; - else - skip_to_word_boundary(cur, end); - t.type = Token::character; - t.lexeme = intern(start, cur - start); - } - else if (only_digits_before_equal_or_shap(++cur, end)) { - t.type = *cur == '#' - ? Token::sharp_integer_sharp - : Token::sharp_integer_equal; - t.lexeme = intern(start, cur - start + 1); - ++cur; - } - else { - skip_to_word_boundary(cur, end); - t.lexeme = intern(start, cur - start); - } - break; - } - - case '"': { - const Byte* start = cur; - skip_to_quote(++cur, end); - t.type = Token::string; - t.lexeme = intern(start, cur - start); - break; - } - - default: - if (start_symbol(cur, end)) - t = match_maybe_symbol(this, cur, end); - else { - const Byte* start = cur; - skip_to_word_boundary(++cur, end); - t.lexeme = intern(start, cur - start); - } - break; - } - tokens.push_back(t); - } - return cur; - } - - // ---------- - // -- Atom -- - // ---------- - Atom::Atom(const Token& t) : tok(t) { } - - void - Atom::accept(Visitor& v) const { - v.visit(*this); - } - - // ------------- - // -- Integer -- - // ------------- - Integer::Integer(const Token& t) : Atom(t) { } - - void - Integer::accept(Visitor& v) const { - v.visit(*this); - } - - // --------------- - // -- Character -- - // --------------- - Character::Character(const Token& t) : Atom(t) { } - - void - Character::accept(Visitor& v) const { - v.visit(*this); - } - - // ------------ - // -- String -- - // ------------ - String::String(const Token& t) : Atom(t) { } - - void - String::accept(Visitor& v) const { - v.visit(*this); - } - - // ------------ - // -- Symbol -- - // ------------ - Symbol::Symbol(const Token& t, Kind k) : Atom(t), sort(k) { } - - void - Symbol::accept(Visitor& v) const { - v.visit(*this); - } - - // ------------ - // -- Anchor -- - // ------------ - Anchor::Anchor(size_t t, const Syntax* s) : tag(t), val(s) { } - - void - Anchor::accept(Visitor& v) const { - v.visit(*this); - } - - // --------------- - // -- Reference -- - // --------------- - Reference::Reference(const Token& t, size_t v) : Atom(t), pos(v) { } - - void - Reference::accept(Visitor& v) const { - v.visit(*this); - } - - // ----------- - // -- Quote -- - // ----------- - Quote::Quote(const Syntax* s) : unary_form(s) { } - - // --------------- - // -- Antiquote -- - // --------------- - Antiquote::Antiquote(const Syntax* s) : unary_form(s) { } - - // ------------ - // -- Expand -- - // ------------ - Expand::Expand(const Syntax* s) : unary_form(s) { } - - // ---------- - // -- Eval -- - // ---------- - Eval::Eval(const Syntax* s) : unary_form(s) { } - - // ------------ - // -- Splice -- - // ------------ - Splice::Splice(const Syntax* s) : unary_form(s) { } - - // -------------- - // -- Function -- - // -------------- - Function::Function(const Syntax* s) : unary_form(s) { } - - // ------------- - // -- Include -- - Include::Include(const Syntax* s) : unary_form(s) { } - - // ------------- - // -- Exclude -- - Exclude::Exclude(const Syntax* s) : unary_form(s) { } - - // ------------- - // -- DotTail -- - // ------------- - DotTail::DotTail(const Syntax* f) : unary_form(f) { } - - // ---------- - // -- List -- - // ---------- - List::List() { } - - List::List(const base& elts) : base(elts) { } - - List::~List() { } - - void - List::accept(Visitor& v) const { - v.visit(*this); - } - - // ------------ - // -- Vector -- - // ------------ - Vector::Vector() { } - - Vector::Vector(const base& elts) : base(elts) { } - - Vector::~Vector() { } - - void - Vector::accept(Visitor& v) const { - v.visit(*this); - } - - // --------------------- - // -- Syntax::Visitor -- - // --------------------- - - // implicitly convert a reference to `T' to a reference to `S'. - template - inline const S& - as(const T& t) { - return t; - } - - void - Syntax::Visitor::visit(const Integer& i) { - visit(as(i)); - } - - void - Syntax::Visitor::visit(const Character& c) { - visit(as(c)); - } - - void - Syntax::Visitor::visit(const String& s) { - visit(as(s)); - } - - void - Syntax::Visitor::visit(const Symbol& s) { - visit(as(s)); - } - - void - Syntax::Visitor::visit(const Reference& r) { - visit(as(r)); - } - - // --------------- - // -- Allocator -- - // --------------- - Allocator::Allocator() { } - - // This destructor is defined here so that it provides - // a single instantiation point for destructors of all - // used templates floating around. - Allocator::~Allocator() { } - - const Character* - Allocator::make_character(const Token& t) { - return chars.allocate(t); - } - - const Integer* - Allocator::make_integer(const Token& t) { - return ints.allocate(t); - } - - const String* - Allocator::make_string(const Token& t) { - return strs.allocate(t); - } - - const Symbol* - Allocator::make_symbol(const Token& t, Symbol::Kind k) { - return syms.allocate(t, k); - } - - const Anchor* - Allocator::make_anchor(size_t t, const Syntax* s) { - return ancs.allocate(t, s); - } - - const Reference* - Allocator::make_reference(const Token& t, size_t i) { - return refs.allocate(t, i); - } - - const Quote* - Allocator::make_quote(const Syntax* s) { - return quotes.allocate(s); - } - - const Antiquote* - Allocator::make_antiquote(const Syntax* s) { - return antis.allocate(s); - } - - const Expand* - Allocator::make_expand(const Syntax* s) { - return exps.allocate(s); - } - - const Eval* - Allocator::make_eval(const Syntax* s) { - return evls.allocate(s); - } - - const Splice* - Allocator::make_splice(const Syntax* s) { - return spls.allocate(s); - } - - const Function* - Allocator::make_function(const Syntax* s) { - return funs.allocate(s); - } - - const Include* - Allocator::make_include(const Syntax* s) { - return incs.allocate(s); - } - - const Exclude* - Allocator::make_exclude(const Syntax* s) { - return excs.allocate(s); - } - - const DotTail* - Allocator::make_dot_tail(const Syntax* f) { - return tails.allocate(f); - } - - const List* - Allocator::make_list(const std::vector& elts) { - if (elts.empty()) - return &empty_list; - return lists.make(elts); - } - - const Vector* - Allocator::make_vector(const std::vector& elts) { - if (elts.empty()) - return &empty_vector; - return vectors.make(elts); - } - - // ------------ - // -- Parser -- - // ------------ - - // Signal a parse error - static void - parse_error(const std::string& s) { - throw BasicError(s); - } - - // Signal that an expected syntax object was missing - static void - expected_syntax(const std::string& s) { - parse_error("expected " + s); - } - - // Signal an abrupt end of input - static void - unexpected_end_of_input(const std::string& s) { - parse_error("unexpected end of input after " + s); - } - - // Signal a missing closing parenthesis - static void - missing_closer_for(const std::string& s) { - parse_error("missing closing parenthesis for " + s); - } - - // The sequence of characters in [cur, last) consists - // entirely of digits. Return the corresponding natural value. - static size_t - natural_value(const Byte* cur, const Byte* last) { - size_t n = 0; - for (; cur < last; ++cur) - // FIXME: check for overflow. - n = 10 * n + (*cur - '0'); - return n; - } - - // Parse a plain identifier or a Lisp-style keyword identifier. - const Symbol* - Parser::parse_symbol(const Token*& cur, const Token* last) { - Symbol::Kind kind = *cur->lexeme->begin() == ':' - ? Symbol::keyword - : Symbol::ordinary; - return alloc.make_symbol(*cur++, kind); - } - - // List of lower case character names - static const char* charname[] = { - "newline", "space", "page", "tab", - "backspace", "return", "linefeed" - }; - - static bool - equal_character_name(BasicString lhs, const char* rhs) { - if (lhs->size() != strlen(rhs)) - return false; - for (const Byte* cur = lhs->begin(); cur != lhs->end(); ++cur) - if (tolower(*cur) != *rhs++) - return false; - return true; - } - - static bool - valid_character_name(BasicString s) { - for (int i = 0; i < length(charname); ++i) - if (equal_character_name(s, charname[i])) - return true; - return false; - } - - const Character* - Parser::parse_character(const Token*& cur, const Token* last) { - if (cur->lexeme->size() != 1 - and not valid_character_name(cur->lexeme)) - parse_error("invalid literal character syntax"); - return alloc.make_character(*cur++); - } - - // Parse an anchor definition of the form #n= - const Anchor* - Parser::parse_anchor(const Token*& cur, const Token* last) { - const size_t n = natural_value(cur->lexeme->begin() + 1, - cur->lexeme->end() - 1); - if (++cur == last) - unexpected_end_of_input("sharp-integer-equal sign"); - return alloc.make_anchor(n, parse_syntax(cur, last)); - } - - // Parse a reference to an anchor, #n# - const Reference* - Parser::parse_reference(const Token*& cur, const Token* last) { - const size_t n = natural_value(cur->lexeme->begin() + 1, - cur->lexeme->end() - 1); - return alloc.make_reference(*cur++, n); - } - - // Parse an uninterned symbol #: - const Symbol* - Parser::parse_uninterned(const Token*& cur, const Token* last) { - if (cur == last or cur->type != Token::identifier) - expected_syntax("symbol after sharp-colon sign"); - // FIXME: check that the identifier is not a keyword. - return alloc.make_symbol(*cur++, Symbol::uninterned); - } - - // Parse a function syntax: #' - const Function* - Parser::parse_function(const Token*& cur, const Token* last) { - if (cur == last) - unexpected_end_of_input("sharp-quote sign"); - return alloc.make_function(parse_syntax(cur, last)); - } - - // Parse a quotation - const Quote* - Parser::parse_quote(const Token*& cur, const Token* last) { - if (cur == last) - unexpected_end_of_input("quote sign"); - return alloc.make_quote(parse_syntax(cur, last)); - } - - // Parse an antiquotation - const Antiquote* - Parser::parse_antiquote(const Token*& cur, const Token* last) { - if (cur == last) - unexpected_end_of_input("backquote sign"); - return alloc.make_antiquote(parse_syntax(cur, last)); - } - - // Parse an expansion request form - const Expand* - Parser::parse_expand(const Token*& cur, const Token* last) { - const Syntax* s = parse_syntax(cur, last); - if (s == 0) - unexpected_end_of_input("comma sign"); - return alloc.make_expand(s); - } - - // Parse conditional inclusions - const Include* - Parser::parse_include(const Token*& cur, const Token* last) { - const Syntax* s = parse_syntax(cur, last); - if (s == 0) - unexpected_end_of_input("sharp-plus sign"); - return alloc.make_include(s); - } - - const Exclude* - Parser::parse_exclude(const Token*& cur, const Token* last) { - const Syntax* s = parse_syntax(cur, last); - if (s == 0) - unexpected_end_of_input("sharp-minus sign"); - return alloc.make_exclude(s); - } - - const Eval* - Parser::parse_eval(const Token*& cur, const Token* last) { - const Syntax* s = parse_syntax(cur, last); - if (s == 0) - unexpected_end_of_input("sharp-dot sign"); - return alloc.make_eval(s); - } - - const Splice* - Parser::parse_splice(const Token*& cur, const Token* last) { - const Syntax* s = parse_syntax(cur, last); - if (s == 0) - unexpected_end_of_input("comma-at sign"); - return alloc.make_splice(s); - } - - // Skip tokens that are semantically blanks, e.g. comments. - // Return true if not at end of tokens. - static bool - skip_ignorable_tokens(const Token*& cur, const Token* last) { - while (cur < last and cur->type == Token::semicolon) - ++cur; - return cur != last; - } - - // Parse a vector of syntax objects: #(s .. s) - const Vector* - Parser::parse_vector(const Token*& cur, const Token* last) { - std::vector elts; - while (skip_ignorable_tokens(cur, last) - and cur->type != Token::close_paren) - elts.push_back(parse_syntax(cur, last)); - if (cur == last) - missing_closer_for("vector"); - ++cur; - return alloc.make_vector(elts); - } - - // Constructs a pair or a list syntax object. - const List* - Parser::parse_list(const Token*& cur, const Token* last) { - std::vector elts; - while (skip_ignorable_tokens(cur, last) - and cur->type != Token::close_paren) { - if (cur->type == Token::dot) { - skip_ignorable_tokens(++cur, last); - if (const Syntax* s = parse_syntax(cur, last)) { - elts.push_back(alloc.make_dot_tail(s)); - break; - } - } - elts.push_back(parse_syntax(cur, last)); - } - if (cur == last or cur->type != Token::close_paren) - missing_closer_for("list"); - ++cur; - return alloc.make_list(elts); - } - - Parser::Parser(Allocator& a, std::vector& v) - : alloc(a), syns(v) { } - - static std::string - to_string(BasicString s) { - return { s->begin(), s->end() }; - } - - const Syntax* - Parser::parse_syntax(const Token*& cur, const Token* last) { - if (not skip_ignorable_tokens(cur, last)) - return 0; - - switch (cur->type) { - case Token::integer: - return alloc.make_integer(*cur++); - - case Token::character: - return parse_character(cur, last); - - case Token::string: - return alloc.make_string(*cur++); - - case Token::identifier: - return parse_symbol(cur, last); - - case Token::sharp_integer_equal: - return parse_anchor(cur, last); - - case Token::sharp_integer_sharp: - return parse_reference(cur, last); - - case Token::sharp_colon: - return parse_uninterned(++cur, last); - - case Token::sharp_apostrophe: - return parse_function(++cur, last); - - case Token::sharp_open_paren: - return parse_vector(++cur, last); - - case Token::apostrophe: - return parse_quote(++cur, last); - - case Token::open_paren: - return parse_list(++cur, last); - - case Token::sharp_plus: - return parse_include(++cur, last); - - case Token::sharp_minus: - return parse_exclude(++cur, last); - - case Token::sharp_dot: - return parse_eval(++cur, last); - - case Token::backquote: - return parse_antiquote(++cur, last); - - case Token::comma: - return parse_expand(++cur, last); - - case Token::comma_at: - return parse_splice(++cur, last); - - default: - parse_error(std::string("parse error before ") - + to_string(cur->lexeme)); - return 0; // never executed - } - } - - const Token* - Parser::parse(const Token* cur, const Token* last) { - while (cur < last) - if (const Syntax* s = parse_syntax(cur, last)) - syns.push_back(s); - return cur; - } - - Module::Module(const std::string& s) : nm(s) { - std::vector tokens; - Memory::FileMapping input(s); - Lexer lexer(raw_strs, tokens); - const Byte* rest = lexer.tokenize(input.begin(), input.end()); - if (rest != input.end()) - syntax_error("syntax error"); - Parser parser(allocator, *this); - const Token* tok = parser.parse(begin_ptr(tokens), end_ptr(tokens)); - if (tok != end_ptr(tokens)) - parse_error("parse error"); - } - } -} -- cgit v1.2.3