* include/sexpr.H: Move from utils.

(UniqueAllocator): Remove. (SyntaxComparator): Likewise. * syntax/sexpr.cc: Move from utils.
author: dos-reis <gdr@axiomatics.org> 2013-06-25 04:08:39 +0000
committer: dos-reis <gdr@axiomatics.org> 2013-06-25 04:08:39 +0000
commit: f9e4a03a220766099d6b5fc683a58185c4805a05 (patch)
tree: 9a7eb92745c5feeed83d37aff61123f6db4fd7cc /src/utils
parent: bba0c431de24f2291011be1960eeb1f5d15dfb8b (diff)
download: open-axiom-f9e4a03a220766099d6b5fc683a58185c4805a05.tar.gz
3 files changed, 3 insertions, 1432 deletions
diff --git a/src/utils/Makefile.in b/src/utils/Makefile.in
index a3021748..0ac6b300 100644
--- a/src/utils/Makefile.in
+++ b/src/utils/Makefile.in
@@ -36,14 +36,14 @@ hammer_SOURCES = hammer.cc
 hammer_OBJECTS = $(hammer_SOURCES:.cc=.lo)
 hammer_LDADD = -L. -lOpenAxiom
 
-libOpenAxiom_HEADERS = hash-table.H string-pool.H sexpr.H vm.H
+libOpenAxiom_HEADERS = hash-table.H string-pool.H vm.H
 libOpenAxiom_SOURCES = \
-	storage.cc string-pool.cc sexpr.cc command.cc \
+	storage.cc string-pool.cc command.cc \
 	filesystem.cc vm.cc
 
 libOpenAxiom_OBJECTS = $(libOpenAxiom_SOURCES:.cc=.lo)
 
-oa_public_headers = hash-table string-pool sexpr vm
+oa_public_headers = hash-table string-pool vm
 
 ## Where we store public header files
 oa_target_headerdir = $(oa_target_includedir)/open-axiom
diff --git a/src/utils/sexpr.H b/src/utils/sexpr.H
deleted file mode 100644
index 5c9934d5..00000000
--- a/src/utils/sexpr.H
+++ /dev/null
@@ -1,495 +0,0 @@
-// Copyright (C) 2010-2013, Gabriel Dos Reis.
-// All rights reserved.
-// Written by Gabriel Dos Reis.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     - Redistributions of source code must retain the above copyright
-//       notice, this list of conditions and the following disclaimer.
-//
-//     - Redistributions in binary form must reproduce the above copyright
-//       notice, this list of conditions and the following disclaimer in
-//       the documentation and/or other materials provided with the
-//       distribution.
-//
-//     - Neither the name of The Numerical Algorithms Group Ltd. nor the
-//       names of its contributors may be used to endorse or promote products
-//       derived from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
-// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
-// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
-// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
-// OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#ifndef OPENAXIOM_SEXPR_INCLUDED
-#define OPENAXIOM_SEXPR_INCLUDED
-
-// --% Author: Gabriel Dos Reis.
-// --% Description:
-// --%   A simple support for s-expressions.  By design, no ambition
-// --%   for full-fledged Common Lisp reader capability.  Rather,
-// --%   the aim is a simple data structure for exchanging data
-// --%   between several components of the OpenAxiom system.
-// --%   Users interested in fullblown Lisp syntax should seek
-// --%   to acquire Lisp systems, many of which are freely available.
-
-#include <iosfwd>
-#include <vector>
-#include <set>
-#include <open-axiom/string-pool>
-#include <open-axiom/token>
-
-namespace OpenAxiom {
-   namespace Sexpr {
-      struct BasicError {
-         explicit BasicError(const std::string& s) : msg(s) { }
-         const std::string& message() const { return msg; }
-      protected:
-         std::string msg;
-      };
-
-      // -----------
-      // -- Token --
-      // -----------
-      struct Token {
-         enum Type {
-            unknown,                 // unidentified token
-            semicolon        = token::value(";"), // comment
-            dot              = token::value("."),
-            comma            = token::value(","),
-            open_paren       = token::value("("),
-            close_paren      = token::value(")"),
-            apostrophe       = token::value("'"),
-            backquote        = token::value("`"),
-            backslash        = token::value("\\"),
-            sharp_open_paren = token::value("#("),
-            sharp_apostrophe = token::value("#'"),
-            sharp_colon      = token::value("#:"),
-            sharp_plus       = token::value("#+"),
-            sharp_minus      = token::value("#-"),
-            sharp_dot        = token::value("#."),
-            comma_at         = token::value(",@"),
-            digraph_end      = token::value(0xff,0xff),
-            integer,                // integer literal
-            character,              // character literal
-            string,                 // string literal
-            identifier,             // plain identifier
-            sharp_integer_equal,    // anchor definition, #n=<form>
-            sharp_integer_sharp     // back reference, #n#
-         };
-
-         Type type;             // class of this token
-         BasicString lexeme;    // characters making up this token
-      };
-
-      // Print a token object on an output stream.
-      // Note: this function is for debugging purpose; in particular
-      // it does not `prettyprint' tokens.
-      std::ostream& operator<<(std::ostream&, const Token&);
-
-      // -----------
-      // -- Lexer --
-      // -----------
-      // An object of this type transforms a sequence of characters
-      // into a sequence of tokens as defined above.
-      // A lexer does not manage memory itself.  Rather, it delegates
-      // storage allocation for lexemes and tokens to specialized
-      // agents used to construct it.
-      struct Lexer {
-         Lexer(StringPool& pool, std::vector<Token>& toks)
-               : strings(pool), tokens(toks) { }
-
-         const Byte* tokenize(const Byte*, const Byte*);
-         BasicString intern(const Byte* s, size_t n) {
-            return strings.intern(s, n);
-         }
-
-      private:
-         StringPool& strings;        // where to allocate lexemes from
-         std::vector<Token>& tokens; // where to deposite tokens.
-      };
-
-      // ------------
-      // -- Syntax --
-      // ------------
-      // Base class of syntax object classes.
-      struct Syntax {
-         struct Visitor;        // base class of syntax visitors
-         virtual void accept(Visitor&) const = 0;
-      };
-
-      // ----------
-      // -- Atom --
-      // ----------
-      // An atom is a syntax object consisting of exatly one token.
-      // This should not be confused with the notion of atom
-      // in Lisp languages. 
-      struct Atom : Syntax {
-         const Token& token() const { return tok; }
-         BasicString lexeme() const { return tok.lexeme; }
-         void accept(Visitor&) const;
-      protected:
-         const Token tok;
-         Atom(const Token&);
-      };
-
-      // -------------
-      // -- Integer --
-      // -------------
-      // Integer literal syntax objects
-      struct Integer : Atom {
-         explicit Integer(const Token&);
-         void accept(Visitor&) const;
-      };
-
-      // ---------------
-      // -- Character --
-      // ---------------
-      // Character literal syntax objects.
-      struct Character : Atom {
-         explicit Character(const Token&);
-         void accept(Visitor&) const;
-      };
-
-      // ------------
-      // -- String --
-      // ------------
-      // Striing literal syntax objjects.
-      struct String : Atom {
-         explicit String(const Token&);
-         void accept(Visitor&) const;
-      };
-
-      // ------------
-      // -- Symbol --
-      // ------------
-      struct Symbol : Atom {
-         enum Kind {
-            uninterned,         // uninterned symbol
-            ordinary,           // an interned symbol
-            keyword             // a keyword symbol
-         };
-         Symbol(const Token&, Kind);
-         Kind kin() const { return sort; }
-         void accept(Visitor&) const;
-      private:
-         const Kind sort;
-      };
-
-      // ---------------
-      // -- Reference --
-      // ---------------
-      // Back reference object to a syntax object.
-      struct Reference : Atom {
-         Reference(const Token&, size_t);
-         size_t tag() const { return pos; }
-         void accept(Visitor&) const;
-      private:
-         const size_t pos;
-      };
-
-      // ------------
-      // -- Anchor --
-      // ------------
-      // Base anchor syntax object.
-      struct Anchor : Syntax {
-         Anchor(size_t, const Syntax*);
-         size_t ref() const { return tag; }
-         const Syntax* value() const { return val; }
-         void accept(Visitor&) const;
-      private:
-         const size_t tag;
-         const Syntax* const val;
-      };
-
-      // -- Abstract over common implementation of unary special operators.
-      template<typename T>
-      struct unary_form : Syntax {
-         const Syntax* body() const { return form; }
-         void accept(Visitor&) const;
-      protected:
-         unary_form(const Syntax* f) : form(f) { }
-      private:
-         const Syntax* const form;
-      };
-
-      // -----------
-      // -- Quote --
-      // -----------
-      // Quotation syntax object.
-      struct Quote : unary_form<Quote> {
-         explicit Quote(const Syntax*);
-      };
-      
-      // ---------------
-      // -- Antiquote --
-      // ---------------
-      // Quasi-quotation syntax object.
-      struct Antiquote : unary_form<Antiquote> {
-         explicit Antiquote(const Syntax*);
-      };
-      
-      // ------------
-      // -- Expand --
-      // ------------
-      // Expansion request inside a quasi-quotation.
-      struct Expand : unary_form<Expand> {
-         explicit Expand(const Syntax*);
-      };
-      
-      // ----------
-      // -- Eval --
-      // ----------
-      // Read-time evaluation request syntax object.
-      struct Eval : unary_form<Eval> {
-         explicit Eval(const Syntax*);
-      };
-      
-      // ------------
-      // -- Splice --
-      // ------------
-      // Splice request syntax object inside a quasi-quotation.
-      struct Splice : unary_form<Splice> {
-         explicit Splice(const Syntax*);
-      };
-      
-      // --------------
-      // -- Function --
-      // --------------
-      // Function literal syntax object.
-      struct Function : unary_form<Function> {
-         explicit Function(const Syntax*);
-      };
-
-      // -------------
-      // -- DotTail --
-      // -------------
-      // Objects of this type represents the tail of syntactic
-      // objects denoting dotted pair syntax `(a . b)'.
-      struct DotTail : unary_form<DotTail> {
-         explicit DotTail(const Syntax*);
-      };
-
-      // -------------
-      // -- Include --
-      // -------------
-      // Conditional inclusion syntax object
-      struct Include : unary_form<Include> {
-         explicit Include(const Syntax*);
-      };
-
-      // -------------
-      // -- Exclude --
-      // -------------
-      // Conditional exclusion syntax object
-      struct Exclude : unary_form<Exclude> {
-         explicit Exclude(const Syntax*);
-      };
-
-      // ----------
-      // -- List --
-      // ----------
-      // List syntax objects.
-      struct List : Syntax, private std::vector<const Syntax*> {
-         typedef std::vector<const Syntax*> base;
-         using base::const_iterator;
-         using base::begin;
-         using base::end;
-         using base::size;
-         using base::empty;
-
-         List();
-         explicit List(const base&);
-         ~List();
-         void accept(Visitor&) const;
-      };
-      
-      // ------------
-      // -- Vector --
-      // ------------
-      // Vector syntax objects.
-      struct Vector : Syntax, private std::vector<const Syntax*> {
-         typedef std::vector<const Syntax*> base;
-         using base::const_iterator;
-         using base::begin;
-         using base::end;
-         using base::size;
-         using base::operator[];
-         using base::empty;
-
-         Vector();
-         explicit Vector(const base&);
-         ~Vector();
-         void accept(Visitor&) const;
-      };
-
-      // ---------------------
-      // -- Syntax::Visitor --
-      // ---------------------
-      struct Syntax::Visitor {
-         virtual void visit(const Atom&) = 0;
-         virtual void visit(const Integer&);
-         virtual void visit(const Character&);
-         virtual void visit(const String&);
-         virtual void visit(const Symbol&);
-         virtual void visit(const Reference&);
-         virtual void visit(const Anchor&) = 0;
-         virtual void visit(const Quote&) = 0;
-         virtual void visit(const Antiquote&) = 0;
-         virtual void visit(const Expand&) = 0;
-         virtual void visit(const Eval&) = 0;
-         virtual void visit(const Splice&) = 0;
-         virtual void visit(const Function&) = 0;
-         virtual void visit(const Include&) = 0;
-         virtual void visit(const Exclude&) = 0;
-         virtual void visit(const DotTail&) = 0;
-         virtual void visit(const List&) = 0;
-         virtual void visit(const Vector&) = 0;
-      };
-
-      template<typename T>
-      void
-      unary_form<T>::accept(Visitor& v) const {
-         v.visit(static_cast<const T&>(*this));
-      }
-
-      // ---------------
-      // -- Allocator --
-      // ---------------
-
-      // The next two classes are helper classes for the main
-      // allocation class Allocator.  We use std::set as allocator
-      // that guarantee uuniqueness of atomic syntax object with
-      // respect to the constituent token.  That container needs
-      // a relational comparator.  In an ideal world, this class
-      // should not exist.
-      struct SyntaxComparator {
-         bool operator()(const Atom& lhs, const Atom& rhs) const {
-            return std::less<BasicString>()(lhs.lexeme(), rhs.lexeme());
-         }
-
-         template<typename T>
-         bool
-         operator()(const unary_form<T>& lhs, const unary_form<T>& rhs) const {
-            return std::less<const void*>()(lhs.body(), rhs.body());
-         }
-
-         bool operator()(const Anchor& lhs, const Anchor& rhs) const {
-            return std::less<size_t>()(lhs.ref(), rhs.ref());
-         }
-      };
-
-      template<typename T>
-      struct UniqueAllocator : std::set<T, SyntaxComparator> {
-         typedef std::set<T, SyntaxComparator> base;
-         typedef typename base::const_iterator const_iterator;
-
-         template<typename... Args>
-         const T* allocate(const Args&... args) {
-            return &*this->insert(T(args...)).first;
-         }
-      };
-      
-      // Allocator of syntax objects.
-      struct Allocator {
-         Allocator();
-         ~Allocator();
-
-         const Integer* make_integer(const Token&);
-         const Character* make_character(const Token&);
-         const String* make_string(const Token&);
-         const Symbol* make_symbol(const Token&, Symbol::Kind);
-         const Reference* make_reference(const Token&, size_t);
-         const Anchor* make_anchor(size_t, const Syntax*);
-         const Quote* make_quote(const Syntax*);
-         const Antiquote* make_antiquote(const Syntax*);
-         const Expand* make_expand(const Syntax*);
-         const Eval* make_eval(const Syntax*);
-         const Splice* make_splice(const Syntax*);
-         const Function* make_function(const Syntax*);
-         const Include* make_include(const Syntax*);
-         const Exclude* make_exclude(const Syntax*);
-         const DotTail* make_dot_tail(const Syntax*);
-         const List* make_list(const std::vector<const Syntax*>&);
-         const Vector* make_vector(const std::vector<const Syntax*>&);
-         
-      private:
-         UniqueAllocator<Integer> ints;
-         UniqueAllocator<Character> chars;
-         UniqueAllocator<String> strs;
-         UniqueAllocator<Symbol> syms;
-         UniqueAllocator<Anchor> ancs;
-         UniqueAllocator<Reference> refs;
-         UniqueAllocator<Quote> quotes;
-         UniqueAllocator<Antiquote> antis;
-         UniqueAllocator<Expand> exps;
-         UniqueAllocator<Function> funs;
-         UniqueAllocator<Include> incs;
-         UniqueAllocator<Exclude> excs;
-         UniqueAllocator<Eval> evls;
-         UniqueAllocator<Splice> spls;
-         UniqueAllocator<DotTail> tails;
-         Memory::Factory<List> lists;
-         Memory::Factory<Vector> vectors;
-         List empty_list;
-         Vector empty_vector;
-      };
-
-      // ------------
-      // -- Parser --
-      // ------------
-      // An object of this type transforms a sequence of tokens
-      // into a sequence of syntax objects.
-      // A parser object does not manage memory itself.  Rather, it delegates
-      // storage allocation for syntax objects to specialized
-      // agents used to construct it.
-      struct Parser {
-         Parser(Allocator&, std::vector<const Syntax*>&);
-         const Token* parse(const Token*, const Token*);
-      private:
-         Allocator& alloc;
-         std::vector<const Syntax*>& syns;
-
-         const Symbol* parse_symbol(const Token*&, const Token*);
-         const Character* parse_character(const Token*&, const Token*);
-         const Anchor* parse_anchor(const Token*&, const Token*);
-         const Reference* parse_reference(const Token*&, const Token*);
-         const Symbol* parse_uninterned(const Token*&, const Token*);
-         const Function* parse_function(const Token*&, const Token*);
-         const Quote* parse_quote(const Token*&, const Token*);
-         const Antiquote* parse_antiquote(const Token*&, const Token*);
-         const Include* parse_include(const Token*&, const Token*);
-         const Exclude* parse_exclude(const Token*&, const Token*);
-         const Expand* parse_expand(const Token*&, const Token*);
-         const Eval* parse_eval(const Token*&, const Token*);
-         const Splice* parse_splice(const Token*&, const Token*);
-         const Vector* parse_vector(const Token*&, const Token*);
-         const List* parse_list(const Token*&, const Token*);
-         const Syntax* parse_syntax(const Token*&, const Token*);
-      };
-
-      // ------------
-      // -- Module --
-      // ------------
-      // Entire s-expression input file.
-      struct Module : std::vector<const Syntax*> {
-         explicit Module(const std::string&);
-         const std::string& name() const { return nm; }
-      private:
-         const std::string nm;
-         StringPool raw_strs;
-         Allocator allocator;
-      };
-   }
-}
-
-#endif  // OPENAXIOM_SEXPR_INCLUDED
diff --git a/src/utils/sexpr.cc b/src/utils/sexpr.cc
deleted file mode 100644
index 3e6b5e90..00000000
--- a/src/utils/sexpr.cc
+++ /dev/null
@@ -1,934 +0,0 @@
-// Copyright (C) 2010-2013, Gabriel Dos Reis.
-// All rights reserved.
-// Written by Gabriel Dos Reis.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     - Redistributions of source code must retain the above copyright
-//       notice, this list of conditions and the following disclaimer.
-//
-//     - Redistributions in binary form must reproduce the above copyright
-//       notice, this list of conditions and the following disclaimer in
-//       the documentation and/or other materials provided with the
-//       distribution.
-//
-//     - Neither the name of The Numerical Algorithms Group Ltd. nor the
-//       names of its contributors may be used to endorse or promote products
-//       derived from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
-// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
-// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
-// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
-// OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// --% Author: Gabriel Dos Reis.
-
-#include <ctype.h>
-#include <string.h>
-#include <iostream>
-#include <iterator>
-#include <open-axiom/sexpr>
-#include <open-axiom/FileMapping>
-
-namespace OpenAxiom {
-   namespace Sexpr {
-      template<typename T, int N>
-      static inline int
-      length(const T(&)[N]) {
-         return N;
-      }
-
-      template<typename Sequence>
-      static inline typename Sequence::const_pointer
-      begin_ptr(const Sequence& s) {
-         return &*s.begin();
-      }
-
-      template<typename Sequence>
-      static inline typename Sequence::const_pointer
-      end_ptr(const Sequence& s) {
-         return s.empty() ? 0 : &*s.begin() + s.size();
-      }
-
-      std::ostream&
-      operator<<(std::ostream& os, const Token& t) {
-         switch (t.type) {
-         case Token::semicolon: os << "SEMICOLON"; break;
-         case Token::dot: os << "DOT"; break;
-         case Token::comma: os << "COMMA"; break;
-         case Token::open_paren: os << "OPEN_PAREN"; break;
-         case Token::close_paren: os << "CLOSE_PAREN"; break;
-         case Token::apostrophe: os << "APOSTROPHE"; break;
-         case Token::backquote: os << "BACKQUOTE"; break;
-         case Token::backslash: os << "BACKSLASH"; break;
-         case Token::sharp_open_paren: os << "SHARP_OPEN_PAREN"; break;
-         case Token::sharp_apostrophe: os << "SHARP_APOSTROPHE"; break;
-         case Token::sharp_colon: os << "SHARP_COLON"; break;
-         case Token::sharp_plus: os << "SHARP_PLUS"; break;
-         case Token::sharp_minus: os << "SHARP_MINUS"; break;
-         case Token::sharp_dot: os << "SHARP_DOT"; break;
-         case Token::comma_at: os << "COMMA_AT"; break;
-         case Token::integer: os << "INTEGER"; break;
-         case Token::character: os << "CHARACTER"; break;
-         case Token::string: os << "STRING"; break;
-         case Token::identifier: os << "IDENTIFIER"; break;
-         case Token::sharp_integer_sharp:
-            os << "SHARP_INTEGER_SHARP"; break;
-         case Token::sharp_integer_equal:
-            os << "SHARP_INTEGER_EQUAL"; break;
-         default: os << "UNKNOWN"; break;
-         }
-         os << '(';
-         if (t.lexeme != 0) {
-            os << '"';
-            std::copy(t.lexeme->begin(), t.lexeme->end(),
-                      std::ostream_iterator<char>(os));
-            os << '"';
-         }
-         else
-            os << "<missing>";
-         return os << ')';
-      }
-
-      // -----------
-      // -- Lexer --
-      // -----------
-      static void
-      syntax_error(const std::string& s) {
-         throw BasicError(s);
-      }
-
-      // Return true if character `c' introduces a blank.
-      static bool
-      is_blank(char c) {
-         return c == ' ' or c == '\t' or c == '\v'
-            or c == '\n' or c == '\f' or c == '\r';
-      }
-      
-      // Return true if the character `c' introduces a delimiter.
-      static bool
-      is_delimiter(char c) {
-         return is_blank(c)
-            or c == '(' or c == ')' or c == '\''
-            or c == '`' or c == '#';
-      }
-
-      // Move `cur' past all consecutive blank characters, and
-      // return the new position.
-      static const Byte* 
-      skip_blank(const Byte*& cur, const Byte* end) {
-         while (cur < end and is_blank(*cur))
-            ++cur;
-         return cur;
-      }
-
-      // Move `cur' to end-of-line marker.
-      static const Byte*
-      skip_to_eol(const Byte*& cur, const Byte* end) {
-         // FIXME: properly handle CR+LF.
-         while (cur < end and *cur != '\n')
-            ++cur;
-         return cur;
-      }
-
-      // Move `cur' until a word boundary is reached.
-      static const Byte*
-      skip_to_word_boundary(const Byte*& cur, const Byte* end) {
-         bool saw_escape = false;
-         for (; cur < end; ++cur) {
-            if (saw_escape)
-               saw_escape = false;
-            else if (*cur == '\\')
-               saw_escape = true;
-            else if (is_delimiter(*cur))
-               break;
-         }
-         return cur;
-      }
-
-      // Move `cur' one-past a non-esacaped character `c'.
-      // Return true if the character was seen.
-      static bool
-      skip_to_nonescaped_char(const Byte*& cur, const Byte* end, char c) {
-         bool saw_escape = false;
-         for (; cur < end; ++cur)
-            if (saw_escape)
-               saw_escape = false;
-            else if (*cur == '\\')
-               saw_escape = true;
-            else if (*cur == c) {
-               ++cur;
-               return true;
-            }
-         return false;
-      }
-
-      // Move `cur' past the closing quote of string literal.
-      // Return true if the closing fence was effectively seen.
-      static inline bool
-      skip_to_quote(const Byte*& cur, const Byte* end) {
-         return skip_to_nonescaped_char(cur, end, '"');
-      }
-
-      // Return true if the character `c' be part of a non-absolute
-      // identifier.
-      static bool
-      identifier_part(char c) {
-         switch (c) {
-         case '+': case '-': case '*': case '/': case '%': case '^':
-         case '~': case '@': case '$': case '&': case '=':
-         case '<': case '>': case '?': case '!': case '_':
-         case '[': case ']': case '{': case '}':
-            return true;
-         default:
-            return isalnum(c);
-         }
-      }
-
-      // Return true if the character `c' has a special meaning after
-      // the sharp character.
-      static bool
-      special_after_sharp(char c) {
-         return c == '(' or c == '\'' or c == ':'
-            or c == '+' or c == '-' or c == '.';
-      }
-
-      // Return true if the sequence `[cur, end)' has a prefix that is 
-      // an integer followrd by the equal sign or the sharp sign.
-      // `cur' is moved along the way.
-      static bool
-      only_digits_before_equal_or_shap(const Byte*& cur, const Byte* end) {
-         while (cur < end and isdigit(*cur))
-            ++cur;
-         return cur < end and (*cur == '#' or *cur == '=');
-      }
-
-      // The token `t' was thought to designate an identifier.
-      // Reclassify it as an integer if, in fact, its lexeme consists
-      // entirely of digits.
-      static void
-      maybe_reclassify(Token& t) {
-         const Byte* cur = t.lexeme->begin();
-         const Byte* end = t.lexeme->end();
-         while (cur < end and isdigit(*cur))
-            ++cur;
-         if (cur == end)
-            t.type = Token::integer;
-      }
-
-      // Returns true if the first characters in the range
-      // [cur, last) start an identifier.
-      static bool
-      start_symbol(const Byte* cur, const Byte* last) {
-         if (cur >= last)
-            return false;
-         return identifier_part(*cur)
-            or *cur == '|' or *cur == ':';
-      }
-
-      // We are processing a symbol token.  Accumulate all
-      // legitimate characters till the end of the token.
-      static void
-      skip_to_end_of_symbol(const Byte*& cur, const Byte* end) {
-         const char c = *cur;
-         if (*cur == '|') 
-            skip_to_nonescaped_char(++cur, end, c);
-         else
-            skip_to_word_boundary(cur, end);
-         if (cur < end and *cur == ':')
-            skip_to_end_of_symbol(cur, end);
-      }
-
-      static Token
-      match_maybe_symbol(Lexer* lexer, const Byte*& cur, const Byte* end) {
-         Token t = { Token::identifier, 0 };
-         const Byte* start = cur;
-         skip_to_end_of_symbol(cur, end);
-         t.lexeme = lexer->intern(start, cur - start);
-         maybe_reclassify(t);
-         return t;
-      }
-
-      const Byte*
-      Lexer::tokenize(const Byte* cur, const Byte* end) {
-         while (skip_blank(cur, end) < end) {
-            Token t = { Token::unknown, 0 };
-            switch (*cur) {
-            case ';': {
-               const Byte* start = cur;
-               t.type = Token::semicolon;
-               skip_to_eol(cur, end);
-               t.lexeme = intern(start, cur - start);
-               break;
-            }
-               
-            case '.': case '(': case ')': case '\'': case '`':
-               t.type = Token::Type(token::value(*cur));
-               t.lexeme = intern(cur, 1);
-               ++cur;
-               break;
-
-            case ',': {
-               const Byte* start = cur;
-               if (++cur < end and *cur == '@') {
-                  t.type = Token::comma_at;
-                  ++cur;
-               }
-               else
-                  t.type = Token::comma;
-               t.lexeme = intern(start, cur - start);
-               break;
-            }
-
-            case '\\':
-               t = match_maybe_symbol(this, cur, end);
-               break;
-
-            case '#': {
-               const Byte* start = cur;
-               if (cur + 1 < end and special_after_sharp(cur[1])) {
-                  t.type = Token::Type(token::value(cur[0], cur[1]));
-                  t.lexeme = intern(cur, 2);
-                  cur += 2;
-               }
-               else if (cur + 1 < end and cur[1] == '\\') {
-                  start = cur += 2;
-                  if (not isalnum(*cur))
-                     ++cur;
-                  else
-                     skip_to_word_boundary(cur, end);
-                  t.type = Token::character;
-                  t.lexeme = intern(start, cur - start);
-               }
-               else if (only_digits_before_equal_or_shap(++cur, end)) {
-                  t.type = *cur == '#'
-                     ? Token::sharp_integer_sharp
-                     : Token::sharp_integer_equal;
-                  t.lexeme = intern(start, cur - start + 1);
-                  ++cur;
-               }
-               else {
-                  skip_to_word_boundary(cur, end);
-                  t.lexeme = intern(start, cur - start);
-               }
-               break;
-            }
-
-            case '"': {
-               const Byte* start = cur;
-               skip_to_quote(++cur, end);
-               t.type = Token::string;
-               t.lexeme = intern(start, cur - start);
-               break;
-            }
-
-            default:
-               if (start_symbol(cur, end))
-                  t = match_maybe_symbol(this, cur, end);
-               else {
-                  const Byte* start = cur;
-                  skip_to_word_boundary(++cur, end);
-                  t.lexeme = intern(start, cur - start);
-               }
-               break;
-            }
-            tokens.push_back(t);
-         }
-         return cur;
-      }
-
-      // ----------
-      // -- Atom --
-      // ----------
-      Atom::Atom(const Token& t) : tok(t) { }
-
-      void
-      Atom::accept(Visitor& v) const {
-         v.visit(*this);
-      }
-
-      // -------------
-      // -- Integer --
-      // -------------
-      Integer::Integer(const Token& t) : Atom(t) { }
-
-      void
-      Integer::accept(Visitor& v) const {
-         v.visit(*this);
-      }
-
-      // ---------------
-      // -- Character --
-      // ---------------
-      Character::Character(const Token& t) : Atom(t) { }
-
-      void
-      Character::accept(Visitor& v) const {
-         v.visit(*this);
-      }
-
-      // ------------
-      // -- String --
-      // ------------
-      String::String(const Token& t) : Atom(t) { }
-
-      void
-      String::accept(Visitor& v) const {
-         v.visit(*this);
-      }
-
-      // ------------
-      // -- Symbol --
-      // ------------
-      Symbol::Symbol(const Token& t, Kind k) : Atom(t), sort(k) { }
-
-      void
-      Symbol::accept(Visitor& v) const {
-         v.visit(*this);
-      }
-
-      // ------------
-      // -- Anchor --
-      // ------------
-      Anchor::Anchor(size_t t, const Syntax* s) : tag(t), val(s) { }
-
-      void
-      Anchor::accept(Visitor& v) const {
-         v.visit(*this);
-      }
-
-      // ---------------
-      // -- Reference --
-      // ---------------
-      Reference::Reference(const Token& t, size_t v) : Atom(t), pos(v) { }
-
-      void
-      Reference::accept(Visitor& v) const {
-         v.visit(*this);
-      }
-
-      // -----------
-      // -- Quote --
-      // -----------
-      Quote::Quote(const Syntax* s) : unary_form<Quote>(s) { }
-
-      // ---------------
-      // -- Antiquote --
-      // ---------------
-      Antiquote::Antiquote(const Syntax* s) : unary_form<Antiquote>(s) { }
-
-      // ------------
-      // -- Expand --
-      // ------------
-      Expand::Expand(const Syntax* s) : unary_form<Expand>(s) { }
-
-      // ----------
-      // -- Eval --
-      // ----------
-      Eval::Eval(const Syntax* s) : unary_form<Eval>(s) { }
-
-      // ------------
-      // -- Splice --
-      // ------------
-      Splice::Splice(const Syntax* s) : unary_form<Splice>(s) { }
-
-      // --------------
-      // -- Function --
-      // --------------
-      Function::Function(const Syntax* s) : unary_form<Function>(s) { }
-
-      // -------------
-      // -- Include --
-      Include::Include(const Syntax* s) : unary_form<Include>(s) { }
-
-      // -------------
-      // -- Exclude --
-      Exclude::Exclude(const Syntax* s) : unary_form<Exclude>(s) { }
-
-      // -------------
-      // -- DotTail --
-      // -------------
-      DotTail::DotTail(const Syntax* f) : unary_form<DotTail>(f) { }
-
-      // ----------
-      // -- List --
-      // ----------
-      List::List() { }
-
-      List::List(const base& elts) : base(elts) { }
-
-      List::~List() { }
-
-      void
-      List::accept(Visitor& v) const {
-         v.visit(*this);
-      }
-
-      // ------------
-      // -- Vector --
-      // ------------
-      Vector::Vector() { }
-
-      Vector::Vector(const base& elts) : base(elts) { }
-
-      Vector::~Vector() { }
-      
-      void
-      Vector::accept(Visitor& v) const {
-         v.visit(*this);
-      }
-
-      // ---------------------
-      // -- Syntax::Visitor --
-      // ---------------------
-
-      // implicitly convert a reference to `T' to a reference to `S'.
-      template<typename S, typename T>
-      inline const S&
-      as(const T& t) {
-         return t;
-      }
-
-      void
-      Syntax::Visitor::visit(const Integer& i) {
-         visit(as<Atom>(i));
-      }
-
-      void
-      Syntax::Visitor::visit(const Character& c) {
-         visit(as<Atom>(c));
-      }
-
-      void
-      Syntax::Visitor::visit(const String& s) {
-         visit(as<Atom>(s));
-      }
-
-      void
-      Syntax::Visitor::visit(const Symbol& s) {
-         visit(as<Atom>(s));
-      }
-
-      void
-      Syntax::Visitor::visit(const Reference& r) {
-         visit(as<Atom>(r));
-      }
-
-      // ---------------
-      // -- Allocator --
-      // ---------------
-      Allocator::Allocator() { }
-
-      // This destructor is defined here so that it provides
-      // a single instantiation point for destructors of all
-      // used templates floating around.
-      Allocator::~Allocator() { }
-
-      const Character*
-      Allocator::make_character(const Token& t) {
-         return chars.allocate(t);
-      }
-
-      const Integer*
-      Allocator::make_integer(const Token& t) {
-         return ints.allocate(t);
-      }
-
-      const String*
-      Allocator::make_string(const Token& t) {
-         return strs.allocate(t);
-      }
-
-      const Symbol*
-      Allocator::make_symbol(const Token& t, Symbol::Kind k) {
-         return syms.allocate(t, k);
-      }
-
-      const Anchor*
-      Allocator::make_anchor(size_t t, const Syntax* s) {
-         return ancs.allocate(t, s);
-      }
-
-      const Reference*
-      Allocator::make_reference(const Token& t, size_t i) {
-         return refs.allocate(t, i);
-      }
-
-      const Quote*
-      Allocator::make_quote(const Syntax* s) {
-         return quotes.allocate(s);
-      }
-
-      const Antiquote*
-      Allocator::make_antiquote(const Syntax* s) {
-         return antis.allocate(s);
-      }
-
-      const Expand*
-      Allocator::make_expand(const Syntax* s) {
-         return exps.allocate(s);
-      }
-
-      const Eval*
-      Allocator::make_eval(const Syntax* s) {
-         return evls.allocate(s);
-      }
-
-      const Splice*
-      Allocator::make_splice(const Syntax* s) {
-         return spls.allocate(s);
-      }
-
-      const Function*
-      Allocator::make_function(const Syntax* s) {
-         return funs.allocate(s);
-      }
-
-      const Include*
-      Allocator::make_include(const Syntax* s) {
-         return incs.allocate(s);
-      }
-
-      const Exclude*
-      Allocator::make_exclude(const Syntax* s) {
-         return excs.allocate(s);
-      }
-
-      const DotTail*
-      Allocator::make_dot_tail(const Syntax* f) {
-         return tails.allocate(f);
-      }
-
-      const List*
-      Allocator::make_list(const std::vector<const Syntax*>& elts) {
-         if (elts.empty())
-            return &empty_list;
-         return lists.make(elts);
-      }
-
-      const Vector*
-      Allocator::make_vector(const std::vector<const Syntax*>& elts) {
-         if (elts.empty())
-            return &empty_vector;
-         return vectors.make(elts);
-      }
-
-      // ------------
-      // -- Parser --
-      // ------------
-
-      // Signal a parse error
-      static void
-      parse_error(const std::string& s) {
-         throw BasicError(s);
-      }
-
-      // Signal that an expected syntax object was missing
-      static void
-      expected_syntax(const std::string& s) {
-         parse_error("expected " + s);
-      }
-
-      // Signal an abrupt end of input
-      static void
-      unexpected_end_of_input(const std::string& s) {
-         parse_error("unexpected end of input after " + s);
-      }
-
-      // Signal a missing closing parenthesis
-      static void
-      missing_closer_for(const std::string& s) {
-         parse_error("missing closing parenthesis for " + s);
-      }
-
-      // The sequence of characters in [cur, last) consists
-      // entirely of digits.  Return the corresponding natural value.
-      static size_t
-      natural_value(const Byte* cur, const Byte* last) {
-         size_t n = 0;
-         for (; cur < last; ++cur)
-            // FIXME: check for overflow.
-            n = 10 * n + (*cur - '0');
-         return n;
-      }
-
-      // Parse a plain identifier or a Lisp-style keyword identifier.
-      const Symbol*
-      Parser::parse_symbol(const Token*& cur, const Token* last) {
-         Symbol::Kind kind = *cur->lexeme->begin() == ':'
-            ? Symbol::keyword
-            : Symbol::ordinary;
-         return alloc.make_symbol(*cur++, kind);
-      }
-
-      // List of lower case character names
-      static const char* charname[] = {
-         "newline", "space", "page", "tab",
-         "backspace", "return", "linefeed"
-      };
-
-      static bool
-      equal_character_name(BasicString lhs, const char* rhs) {
-         if (lhs->size() != strlen(rhs))
-            return false;
-         for (const Byte* cur = lhs->begin(); cur != lhs->end(); ++cur)
-            if (tolower(*cur) != *rhs++)
-               return false;
-         return true;
-      }
-
-      static bool
-      valid_character_name(BasicString s) {
-         for (int i = 0; i < length(charname); ++i)
-            if (equal_character_name(s, charname[i]))
-               return true;
-         return false;
-      }
-
-      const Character*
-      Parser::parse_character(const Token*& cur, const Token* last) {
-         if (cur->lexeme->size() != 1
-             and not valid_character_name(cur->lexeme))
-            parse_error("invalid literal character syntax");
-         return alloc.make_character(*cur++);
-      }
-
-      // Parse an anchor definition of the form #n=<syntax>
-      const Anchor*
-      Parser::parse_anchor(const Token*& cur, const Token* last) {
-         const size_t n = natural_value(cur->lexeme->begin() + 1,
-                                        cur->lexeme->end() - 1);
-         if (++cur == last)
-            unexpected_end_of_input("sharp-integer-equal sign");
-         return alloc.make_anchor(n, parse_syntax(cur, last));
-      }
-
-      // Parse a reference to an anchor, #n#
-      const Reference*
-      Parser::parse_reference(const Token*& cur, const Token* last) {
-         const size_t n = natural_value(cur->lexeme->begin() + 1,
-                                        cur->lexeme->end() - 1);
-         return alloc.make_reference(*cur++, n);
-      }
-
-      // Parse an uninterned symbol #:<identifier>
-      const Symbol*
-      Parser::parse_uninterned(const Token*& cur, const Token* last) {
-         if (cur == last or cur->type != Token::identifier)
-            expected_syntax("symbol after sharp-colon sign");
-         // FIXME: check that the identifier is not a keyword.
-         return alloc.make_symbol(*cur++, Symbol::uninterned);
-      }
-
-      // Parse a function syntax: #'<syntax>
-      const Function*
-      Parser::parse_function(const Token*& cur, const Token* last) {
-         if (cur == last)
-            unexpected_end_of_input("sharp-quote sign");
-         return alloc.make_function(parse_syntax(cur, last));
-      }
-
-      // Parse a quotation
-      const Quote*
-      Parser::parse_quote(const Token*& cur, const Token* last) {
-         if (cur == last)
-            unexpected_end_of_input("quote sign");
-         return alloc.make_quote(parse_syntax(cur, last));
-      }
-
-      // Parse an antiquotation
-      const Antiquote*
-      Parser::parse_antiquote(const Token*& cur, const Token* last) {
-         if (cur == last)
-            unexpected_end_of_input("backquote sign");
-         return alloc.make_antiquote(parse_syntax(cur, last));
-      }
-
-      // Parse an expansion request form
-      const Expand*
-      Parser::parse_expand(const Token*& cur, const Token* last) {
-         const Syntax* s = parse_syntax(cur, last);
-         if (s == 0)
-            unexpected_end_of_input("comma sign");
-         return alloc.make_expand(s);
-      }
-
-      // Parse conditional inclusions
-      const Include*
-      Parser::parse_include(const Token*& cur, const Token* last) {
-         const Syntax* s = parse_syntax(cur, last);
-         if (s == 0)
-            unexpected_end_of_input("sharp-plus sign");
-         return alloc.make_include(s);
-      }
-
-      const Exclude*
-      Parser::parse_exclude(const Token*& cur, const Token* last) {
-         const Syntax* s = parse_syntax(cur, last);
-         if (s == 0)
-            unexpected_end_of_input("sharp-minus sign");
-         return alloc.make_exclude(s);
-      }
-
-      const Eval*
-      Parser::parse_eval(const Token*& cur, const Token* last) {
-         const Syntax* s = parse_syntax(cur, last);
-         if (s == 0)
-            unexpected_end_of_input("sharp-dot sign");
-         return alloc.make_eval(s);
-      }
-
-      const Splice*
-      Parser::parse_splice(const Token*& cur, const Token* last) {
-         const Syntax* s = parse_syntax(cur, last);
-         if (s == 0)
-            unexpected_end_of_input("comma-at sign");
-         return alloc.make_splice(s);
-      }
-
-      // Skip tokens that are semantically blanks, e.g. comments.
-      // Return true if not at end of tokens.
-      static bool
-      skip_ignorable_tokens(const Token*& cur, const Token* last) {
-         while (cur < last and cur->type == Token::semicolon)
-            ++cur;
-         return cur != last;
-      }
-
-      // Parse a vector of syntax objects: #(s .. s)
-      const Vector*
-      Parser::parse_vector(const Token*& cur, const Token* last) {
-         std::vector<const Syntax*> elts;
-         while (skip_ignorable_tokens(cur, last)
-                and cur->type != Token::close_paren)
-            elts.push_back(parse_syntax(cur, last));
-         if (cur == last)
-            missing_closer_for("vector");
-         ++cur;
-         return alloc.make_vector(elts);
-      }
-
-      // Constructs a pair or a list syntax object.
-      const List*
-      Parser::parse_list(const Token*& cur, const Token* last) {
-         std::vector<const Syntax*> elts;
-         while (skip_ignorable_tokens(cur, last)
-                and cur->type != Token::close_paren) {
-            if (cur->type == Token::dot) {
-               skip_ignorable_tokens(++cur, last);
-               if (const Syntax* s = parse_syntax(cur, last)) {
-                  elts.push_back(alloc.make_dot_tail(s));
-                  break;
-               }
-            }
-            elts.push_back(parse_syntax(cur, last));
-         }
-         if (cur == last or cur->type != Token::close_paren)
-            missing_closer_for("list");
-         ++cur;
-         return alloc.make_list(elts);
-      }
-
-      Parser::Parser(Allocator& a, std::vector<const Syntax*>& v)
-            : alloc(a), syns(v) { }
-
-      static std::string
-      to_string(BasicString s) {
-         return { s->begin(), s->end() };
-      }
-
-      const Syntax*
-      Parser::parse_syntax(const Token*& cur, const Token* last) {
-         if (not skip_ignorable_tokens(cur, last))
-            return 0;
-
-         switch (cur->type) {
-         case Token::integer:
-            return alloc.make_integer(*cur++);
-
-         case Token::character:
-            return parse_character(cur, last);
-            
-         case Token::string:
-            return alloc.make_string(*cur++);
-               
-         case Token::identifier:
-            return parse_symbol(cur, last);
-
-         case Token::sharp_integer_equal:
-            return parse_anchor(cur, last);
-
-         case Token::sharp_integer_sharp:
-            return parse_reference(cur, last);
-
-         case Token::sharp_colon:
-            return parse_uninterned(++cur, last);
-
-         case Token::sharp_apostrophe:
-            return parse_function(++cur, last);
-
-         case Token::sharp_open_paren:
-            return parse_vector(++cur, last);
-
-         case Token::apostrophe:
-            return parse_quote(++cur, last);
-
-         case Token::open_paren:
-            return parse_list(++cur, last);
-
-         case Token::sharp_plus:
-            return parse_include(++cur, last);
-
-         case Token::sharp_minus:
-            return parse_exclude(++cur, last);
-
-         case Token::sharp_dot:
-            return parse_eval(++cur, last);
-
-         case Token::backquote:
-            return parse_antiquote(++cur, last);
-
-         case Token::comma:
-            return parse_expand(++cur, last);
-
-         case Token::comma_at:
-            return parse_splice(++cur, last);
-
-         default:
-            parse_error(std::string("parse error before ")
-                        + to_string(cur->lexeme));
-            return 0;           // never executed
-         }
-      }
-
-      const Token*
-      Parser::parse(const Token* cur, const Token* last) {
-         while (cur < last)
-            if (const Syntax* s = parse_syntax(cur, last))
-               syns.push_back(s);
-         return cur;
-      }
-
-      Module::Module(const std::string& s) : nm(s) {
-         std::vector<Token> tokens;
-         Memory::FileMapping input(s);
-         Lexer lexer(raw_strs, tokens);
-         const Byte* rest = lexer.tokenize(input.begin(), input.end());
-         if (rest != input.end())
-            syntax_error("syntax error");
-         Parser parser(allocator, *this);
-         const Token* tok = parser.parse(begin_ptr(tokens), end_ptr(tokens));
-         if (tok != end_ptr(tokens))
-            parse_error("parse error");
-      }
-   }
-}
author	dos-reis <gdr@axiomatics.org>	2013-06-25 04:08:39 +0000
committer	dos-reis <gdr@axiomatics.org>	2013-06-25 04:08:39 +0000
commit	f9e4a03a220766099d6b5fc683a58185c4805a05 (patch)
tree	9a7eb92745c5feeed83d37aff61123f6db4fd7cc /src/utils
parent	bba0c431de24f2291011be1960eeb1f5d15dfb8b (diff)
download	open-axiom-f9e4a03a220766099d6b5fc683a58185c4805a05.tar.gz