aboutsummaryrefslogtreecommitdiff
path: root/src/utils
diff options
context:
space:
mode:
authordos-reis <gdr@axiomatics.org>2013-06-25 04:08:39 +0000
committerdos-reis <gdr@axiomatics.org>2013-06-25 04:08:39 +0000
commitf9e4a03a220766099d6b5fc683a58185c4805a05 (patch)
tree9a7eb92745c5feeed83d37aff61123f6db4fd7cc /src/utils
parentbba0c431de24f2291011be1960eeb1f5d15dfb8b (diff)
downloadopen-axiom-f9e4a03a220766099d6b5fc683a58185c4805a05.tar.gz
* include/sexpr.H: Move from utils.
(UniqueAllocator): Remove. (SyntaxComparator): Likewise. * syntax/sexpr.cc: Move from utils.
Diffstat (limited to 'src/utils')
-rw-r--r--src/utils/Makefile.in6
-rw-r--r--src/utils/sexpr.H495
-rw-r--r--src/utils/sexpr.cc934
3 files changed, 3 insertions, 1432 deletions
diff --git a/src/utils/Makefile.in b/src/utils/Makefile.in
index a3021748..0ac6b300 100644
--- a/src/utils/Makefile.in
+++ b/src/utils/Makefile.in
@@ -36,14 +36,14 @@ hammer_SOURCES = hammer.cc
hammer_OBJECTS = $(hammer_SOURCES:.cc=.lo)
hammer_LDADD = -L. -lOpenAxiom
-libOpenAxiom_HEADERS = hash-table.H string-pool.H sexpr.H vm.H
+libOpenAxiom_HEADERS = hash-table.H string-pool.H vm.H
libOpenAxiom_SOURCES = \
- storage.cc string-pool.cc sexpr.cc command.cc \
+ storage.cc string-pool.cc command.cc \
filesystem.cc vm.cc
libOpenAxiom_OBJECTS = $(libOpenAxiom_SOURCES:.cc=.lo)
-oa_public_headers = hash-table string-pool sexpr vm
+oa_public_headers = hash-table string-pool vm
## Where we store public header files
oa_target_headerdir = $(oa_target_includedir)/open-axiom
diff --git a/src/utils/sexpr.H b/src/utils/sexpr.H
deleted file mode 100644
index 5c9934d5..00000000
--- a/src/utils/sexpr.H
+++ /dev/null
@@ -1,495 +0,0 @@
-// Copyright (C) 2010-2013, Gabriel Dos Reis.
-// All rights reserved.
-// Written by Gabriel Dos Reis.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// - Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright
-// notice, this list of conditions and the following disclaimer in
-// the documentation and/or other materials provided with the
-// distribution.
-//
-// - Neither the name of The Numerical Algorithms Group Ltd. nor the
-// names of its contributors may be used to endorse or promote products
-// derived from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
-// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
-// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
-// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
-// OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#ifndef OPENAXIOM_SEXPR_INCLUDED
-#define OPENAXIOM_SEXPR_INCLUDED
-
-// --% Author: Gabriel Dos Reis.
-// --% Description:
-// --% A simple support for s-expressions. By design, no ambition
-// --% for full-fledged Common Lisp reader capability. Rather,
-// --% the aim is a simple data structure for exchanging data
-// --% between several components of the OpenAxiom system.
-// --% Users interested in fullblown Lisp syntax should seek
-// --% to acquire Lisp systems, many of which are freely available.
-
-#include <iosfwd>
-#include <vector>
-#include <set>
-#include <open-axiom/string-pool>
-#include <open-axiom/token>
-
-namespace OpenAxiom {
- namespace Sexpr {
- struct BasicError {
- explicit BasicError(const std::string& s) : msg(s) { }
- const std::string& message() const { return msg; }
- protected:
- std::string msg;
- };
-
- // -----------
- // -- Token --
- // -----------
- struct Token {
- enum Type {
- unknown, // unidentified token
- semicolon = token::value(";"), // comment
- dot = token::value("."),
- comma = token::value(","),
- open_paren = token::value("("),
- close_paren = token::value(")"),
- apostrophe = token::value("'"),
- backquote = token::value("`"),
- backslash = token::value("\\"),
- sharp_open_paren = token::value("#("),
- sharp_apostrophe = token::value("#'"),
- sharp_colon = token::value("#:"),
- sharp_plus = token::value("#+"),
- sharp_minus = token::value("#-"),
- sharp_dot = token::value("#."),
- comma_at = token::value(",@"),
- digraph_end = token::value(0xff,0xff),
- integer, // integer literal
- character, // character literal
- string, // string literal
- identifier, // plain identifier
- sharp_integer_equal, // anchor definition, #n=<form>
- sharp_integer_sharp // back reference, #n#
- };
-
- Type type; // class of this token
- BasicString lexeme; // characters making up this token
- };
-
- // Print a token object on an output stream.
- // Note: this function is for debugging purpose; in particular
- // it does not `prettyprint' tokens.
- std::ostream& operator<<(std::ostream&, const Token&);
-
- // -----------
- // -- Lexer --
- // -----------
- // An object of this type transforms a sequence of characters
- // into a sequence of tokens as defined above.
- // A lexer does not manage memory itself. Rather, it delegates
- // storage allocation for lexemes and tokens to specialized
- // agents used to construct it.
- struct Lexer {
- Lexer(StringPool& pool, std::vector<Token>& toks)
- : strings(pool), tokens(toks) { }
-
- const Byte* tokenize(const Byte*, const Byte*);
- BasicString intern(const Byte* s, size_t n) {
- return strings.intern(s, n);
- }
-
- private:
- StringPool& strings; // where to allocate lexemes from
- std::vector<Token>& tokens; // where to deposite tokens.
- };
-
- // ------------
- // -- Syntax --
- // ------------
- // Base class of syntax object classes.
- struct Syntax {
- struct Visitor; // base class of syntax visitors
- virtual void accept(Visitor&) const = 0;
- };
-
- // ----------
- // -- Atom --
- // ----------
- // An atom is a syntax object consisting of exatly one token.
- // This should not be confused with the notion of atom
- // in Lisp languages.
- struct Atom : Syntax {
- const Token& token() const { return tok; }
- BasicString lexeme() const { return tok.lexeme; }
- void accept(Visitor&) const;
- protected:
- const Token tok;
- Atom(const Token&);
- };
-
- // -------------
- // -- Integer --
- // -------------
- // Integer literal syntax objects
- struct Integer : Atom {
- explicit Integer(const Token&);
- void accept(Visitor&) const;
- };
-
- // ---------------
- // -- Character --
- // ---------------
- // Character literal syntax objects.
- struct Character : Atom {
- explicit Character(const Token&);
- void accept(Visitor&) const;
- };
-
- // ------------
- // -- String --
- // ------------
- // Striing literal syntax objjects.
- struct String : Atom {
- explicit String(const Token&);
- void accept(Visitor&) const;
- };
-
- // ------------
- // -- Symbol --
- // ------------
- struct Symbol : Atom {
- enum Kind {
- uninterned, // uninterned symbol
- ordinary, // an interned symbol
- keyword // a keyword symbol
- };
- Symbol(const Token&, Kind);
- Kind kin() const { return sort; }
- void accept(Visitor&) const;
- private:
- const Kind sort;
- };
-
- // ---------------
- // -- Reference --
- // ---------------
- // Back reference object to a syntax object.
- struct Reference : Atom {
- Reference(const Token&, size_t);
- size_t tag() const { return pos; }
- void accept(Visitor&) const;
- private:
- const size_t pos;
- };
-
- // ------------
- // -- Anchor --
- // ------------
- // Base anchor syntax object.
- struct Anchor : Syntax {
- Anchor(size_t, const Syntax*);
- size_t ref() const { return tag; }
- const Syntax* value() const { return val; }
- void accept(Visitor&) const;
- private:
- const size_t tag;
- const Syntax* const val;
- };
-
- // -- Abstract over common implementation of unary special operators.
- template<typename T>
- struct unary_form : Syntax {
- const Syntax* body() const { return form; }
- void accept(Visitor&) const;
- protected:
- unary_form(const Syntax* f) : form(f) { }
- private:
- const Syntax* const form;
- };
-
- // -----------
- // -- Quote --
- // -----------
- // Quotation syntax object.
- struct Quote : unary_form<Quote> {
- explicit Quote(const Syntax*);
- };
-
- // ---------------
- // -- Antiquote --
- // ---------------
- // Quasi-quotation syntax object.
- struct Antiquote : unary_form<Antiquote> {
- explicit Antiquote(const Syntax*);
- };
-
- // ------------
- // -- Expand --
- // ------------
- // Expansion request inside a quasi-quotation.
- struct Expand : unary_form<Expand> {
- explicit Expand(const Syntax*);
- };
-
- // ----------
- // -- Eval --
- // ----------
- // Read-time evaluation request syntax object.
- struct Eval : unary_form<Eval> {
- explicit Eval(const Syntax*);
- };
-
- // ------------
- // -- Splice --
- // ------------
- // Splice request syntax object inside a quasi-quotation.
- struct Splice : unary_form<Splice> {
- explicit Splice(const Syntax*);
- };
-
- // --------------
- // -- Function --
- // --------------
- // Function literal syntax object.
- struct Function : unary_form<Function> {
- explicit Function(const Syntax*);
- };
-
- // -------------
- // -- DotTail --
- // -------------
- // Objects of this type represents the tail of syntactic
- // objects denoting dotted pair syntax `(a . b)'.
- struct DotTail : unary_form<DotTail> {
- explicit DotTail(const Syntax*);
- };
-
- // -------------
- // -- Include --
- // -------------
- // Conditional inclusion syntax object
- struct Include : unary_form<Include> {
- explicit Include(const Syntax*);
- };
-
- // -------------
- // -- Exclude --
- // -------------
- // Conditional exclusion syntax object
- struct Exclude : unary_form<Exclude> {
- explicit Exclude(const Syntax*);
- };
-
- // ----------
- // -- List --
- // ----------
- // List syntax objects.
- struct List : Syntax, private std::vector<const Syntax*> {
- typedef std::vector<const Syntax*> base;
- using base::const_iterator;
- using base::begin;
- using base::end;
- using base::size;
- using base::empty;
-
- List();
- explicit List(const base&);
- ~List();
- void accept(Visitor&) const;
- };
-
- // ------------
- // -- Vector --
- // ------------
- // Vector syntax objects.
- struct Vector : Syntax, private std::vector<const Syntax*> {
- typedef std::vector<const Syntax*> base;
- using base::const_iterator;
- using base::begin;
- using base::end;
- using base::size;
- using base::operator[];
- using base::empty;
-
- Vector();
- explicit Vector(const base&);
- ~Vector();
- void accept(Visitor&) const;
- };
-
- // ---------------------
- // -- Syntax::Visitor --
- // ---------------------
- struct Syntax::Visitor {
- virtual void visit(const Atom&) = 0;
- virtual void visit(const Integer&);
- virtual void visit(const Character&);
- virtual void visit(const String&);
- virtual void visit(const Symbol&);
- virtual void visit(const Reference&);
- virtual void visit(const Anchor&) = 0;
- virtual void visit(const Quote&) = 0;
- virtual void visit(const Antiquote&) = 0;
- virtual void visit(const Expand&) = 0;
- virtual void visit(const Eval&) = 0;
- virtual void visit(const Splice&) = 0;
- virtual void visit(const Function&) = 0;
- virtual void visit(const Include&) = 0;
- virtual void visit(const Exclude&) = 0;
- virtual void visit(const DotTail&) = 0;
- virtual void visit(const List&) = 0;
- virtual void visit(const Vector&) = 0;
- };
-
- template<typename T>
- void
- unary_form<T>::accept(Visitor& v) const {
- v.visit(static_cast<const T&>(*this));
- }
-
- // ---------------
- // -- Allocator --
- // ---------------
-
- // The next two classes are helper classes for the main
- // allocation class Allocator. We use std::set as allocator
- // that guarantee uuniqueness of atomic syntax object with
- // respect to the constituent token. That container needs
- // a relational comparator. In an ideal world, this class
- // should not exist.
- struct SyntaxComparator {
- bool operator()(const Atom& lhs, const Atom& rhs) const {
- return std::less<BasicString>()(lhs.lexeme(), rhs.lexeme());
- }
-
- template<typename T>
- bool
- operator()(const unary_form<T>& lhs, const unary_form<T>& rhs) const {
- return std::less<const void*>()(lhs.body(), rhs.body());
- }
-
- bool operator()(const Anchor& lhs, const Anchor& rhs) const {
- return std::less<size_t>()(lhs.ref(), rhs.ref());
- }
- };
-
- template<typename T>
- struct UniqueAllocator : std::set<T, SyntaxComparator> {
- typedef std::set<T, SyntaxComparator> base;
- typedef typename base::const_iterator const_iterator;
-
- template<typename... Args>
- const T* allocate(const Args&... args) {
- return &*this->insert(T(args...)).first;
- }
- };
-
- // Allocator of syntax objects.
- struct Allocator {
- Allocator();
- ~Allocator();
-
- const Integer* make_integer(const Token&);
- const Character* make_character(const Token&);
- const String* make_string(const Token&);
- const Symbol* make_symbol(const Token&, Symbol::Kind);
- const Reference* make_reference(const Token&, size_t);
- const Anchor* make_anchor(size_t, const Syntax*);
- const Quote* make_quote(const Syntax*);
- const Antiquote* make_antiquote(const Syntax*);
- const Expand* make_expand(const Syntax*);
- const Eval* make_eval(const Syntax*);
- const Splice* make_splice(const Syntax*);
- const Function* make_function(const Syntax*);
- const Include* make_include(const Syntax*);
- const Exclude* make_exclude(const Syntax*);
- const DotTail* make_dot_tail(const Syntax*);
- const List* make_list(const std::vector<const Syntax*>&);
- const Vector* make_vector(const std::vector<const Syntax*>&);
-
- private:
- UniqueAllocator<Integer> ints;
- UniqueAllocator<Character> chars;
- UniqueAllocator<String> strs;
- UniqueAllocator<Symbol> syms;
- UniqueAllocator<Anchor> ancs;
- UniqueAllocator<Reference> refs;
- UniqueAllocator<Quote> quotes;
- UniqueAllocator<Antiquote> antis;
- UniqueAllocator<Expand> exps;
- UniqueAllocator<Function> funs;
- UniqueAllocator<Include> incs;
- UniqueAllocator<Exclude> excs;
- UniqueAllocator<Eval> evls;
- UniqueAllocator<Splice> spls;
- UniqueAllocator<DotTail> tails;
- Memory::Factory<List> lists;
- Memory::Factory<Vector> vectors;
- List empty_list;
- Vector empty_vector;
- };
-
- // ------------
- // -- Parser --
- // ------------
- // An object of this type transforms a sequence of tokens
- // into a sequence of syntax objects.
- // A parser object does not manage memory itself. Rather, it delegates
- // storage allocation for syntax objects to specialized
- // agents used to construct it.
- struct Parser {
- Parser(Allocator&, std::vector<const Syntax*>&);
- const Token* parse(const Token*, const Token*);
- private:
- Allocator& alloc;
- std::vector<const Syntax*>& syns;
-
- const Symbol* parse_symbol(const Token*&, const Token*);
- const Character* parse_character(const Token*&, const Token*);
- const Anchor* parse_anchor(const Token*&, const Token*);
- const Reference* parse_reference(const Token*&, const Token*);
- const Symbol* parse_uninterned(const Token*&, const Token*);
- const Function* parse_function(const Token*&, const Token*);
- const Quote* parse_quote(const Token*&, const Token*);
- const Antiquote* parse_antiquote(const Token*&, const Token*);
- const Include* parse_include(const Token*&, const Token*);
- const Exclude* parse_exclude(const Token*&, const Token*);
- const Expand* parse_expand(const Token*&, const Token*);
- const Eval* parse_eval(const Token*&, const Token*);
- const Splice* parse_splice(const Token*&, const Token*);
- const Vector* parse_vector(const Token*&, const Token*);
- const List* parse_list(const Token*&, const Token*);
- const Syntax* parse_syntax(const Token*&, const Token*);
- };
-
- // ------------
- // -- Module --
- // ------------
- // Entire s-expression input file.
- struct Module : std::vector<const Syntax*> {
- explicit Module(const std::string&);
- const std::string& name() const { return nm; }
- private:
- const std::string nm;
- StringPool raw_strs;
- Allocator allocator;
- };
- }
-}
-
-#endif // OPENAXIOM_SEXPR_INCLUDED
diff --git a/src/utils/sexpr.cc b/src/utils/sexpr.cc
deleted file mode 100644
index 3e6b5e90..00000000
--- a/src/utils/sexpr.cc
+++ /dev/null
@@ -1,934 +0,0 @@
-// Copyright (C) 2010-2013, Gabriel Dos Reis.
-// All rights reserved.
-// Written by Gabriel Dos Reis.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// - Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright
-// notice, this list of conditions and the following disclaimer in
-// the documentation and/or other materials provided with the
-// distribution.
-//
-// - Neither the name of The Numerical Algorithms Group Ltd. nor the
-// names of its contributors may be used to endorse or promote products
-// derived from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
-// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
-// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
-// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
-// OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// --% Author: Gabriel Dos Reis.
-
-#include <ctype.h>
-#include <string.h>
-#include <iostream>
-#include <iterator>
-#include <open-axiom/sexpr>
-#include <open-axiom/FileMapping>
-
-namespace OpenAxiom {
- namespace Sexpr {
- template<typename T, int N>
- static inline int
- length(const T(&)[N]) {
- return N;
- }
-
- template<typename Sequence>
- static inline typename Sequence::const_pointer
- begin_ptr(const Sequence& s) {
- return &*s.begin();
- }
-
- template<typename Sequence>
- static inline typename Sequence::const_pointer
- end_ptr(const Sequence& s) {
- return s.empty() ? 0 : &*s.begin() + s.size();
- }
-
- std::ostream&
- operator<<(std::ostream& os, const Token& t) {
- switch (t.type) {
- case Token::semicolon: os << "SEMICOLON"; break;
- case Token::dot: os << "DOT"; break;
- case Token::comma: os << "COMMA"; break;
- case Token::open_paren: os << "OPEN_PAREN"; break;
- case Token::close_paren: os << "CLOSE_PAREN"; break;
- case Token::apostrophe: os << "APOSTROPHE"; break;
- case Token::backquote: os << "BACKQUOTE"; break;
- case Token::backslash: os << "BACKSLASH"; break;
- case Token::sharp_open_paren: os << "SHARP_OPEN_PAREN"; break;
- case Token::sharp_apostrophe: os << "SHARP_APOSTROPHE"; break;
- case Token::sharp_colon: os << "SHARP_COLON"; break;
- case Token::sharp_plus: os << "SHARP_PLUS"; break;
- case Token::sharp_minus: os << "SHARP_MINUS"; break;
- case Token::sharp_dot: os << "SHARP_DOT"; break;
- case Token::comma_at: os << "COMMA_AT"; break;
- case Token::integer: os << "INTEGER"; break;
- case Token::character: os << "CHARACTER"; break;
- case Token::string: os << "STRING"; break;
- case Token::identifier: os << "IDENTIFIER"; break;
- case Token::sharp_integer_sharp:
- os << "SHARP_INTEGER_SHARP"; break;
- case Token::sharp_integer_equal:
- os << "SHARP_INTEGER_EQUAL"; break;
- default: os << "UNKNOWN"; break;
- }
- os << '(';
- if (t.lexeme != 0) {
- os << '"';
- std::copy(t.lexeme->begin(), t.lexeme->end(),
- std::ostream_iterator<char>(os));
- os << '"';
- }
- else
- os << "<missing>";
- return os << ')';
- }
-
- // -----------
- // -- Lexer --
- // -----------
- static void
- syntax_error(const std::string& s) {
- throw BasicError(s);
- }
-
- // Return true if character `c' introduces a blank.
- static bool
- is_blank(char c) {
- return c == ' ' or c == '\t' or c == '\v'
- or c == '\n' or c == '\f' or c == '\r';
- }
-
- // Return true if the character `c' introduces a delimiter.
- static bool
- is_delimiter(char c) {
- return is_blank(c)
- or c == '(' or c == ')' or c == '\''
- or c == '`' or c == '#';
- }
-
- // Move `cur' past all consecutive blank characters, and
- // return the new position.
- static const Byte*
- skip_blank(const Byte*& cur, const Byte* end) {
- while (cur < end and is_blank(*cur))
- ++cur;
- return cur;
- }
-
- // Move `cur' to end-of-line marker.
- static const Byte*
- skip_to_eol(const Byte*& cur, const Byte* end) {
- // FIXME: properly handle CR+LF.
- while (cur < end and *cur != '\n')
- ++cur;
- return cur;
- }
-
- // Move `cur' until a word boundary is reached.
- static const Byte*
- skip_to_word_boundary(const Byte*& cur, const Byte* end) {
- bool saw_escape = false;
- for (; cur < end; ++cur) {
- if (saw_escape)
- saw_escape = false;
- else if (*cur == '\\')
- saw_escape = true;
- else if (is_delimiter(*cur))
- break;
- }
- return cur;
- }
-
- // Move `cur' one-past a non-esacaped character `c'.
- // Return true if the character was seen.
- static bool
- skip_to_nonescaped_char(const Byte*& cur, const Byte* end, char c) {
- bool saw_escape = false;
- for (; cur < end; ++cur)
- if (saw_escape)
- saw_escape = false;
- else if (*cur == '\\')
- saw_escape = true;
- else if (*cur == c) {
- ++cur;
- return true;
- }
- return false;
- }
-
- // Move `cur' past the closing quote of string literal.
- // Return true if the closing fence was effectively seen.
- static inline bool
- skip_to_quote(const Byte*& cur, const Byte* end) {
- return skip_to_nonescaped_char(cur, end, '"');
- }
-
- // Return true if the character `c' be part of a non-absolute
- // identifier.
- static bool
- identifier_part(char c) {
- switch (c) {
- case '+': case '-': case '*': case '/': case '%': case '^':
- case '~': case '@': case '$': case '&': case '=':
- case '<': case '>': case '?': case '!': case '_':
- case '[': case ']': case '{': case '}':
- return true;
- default:
- return isalnum(c);
- }
- }
-
- // Return true if the character `c' has a special meaning after
- // the sharp character.
- static bool
- special_after_sharp(char c) {
- return c == '(' or c == '\'' or c == ':'
- or c == '+' or c == '-' or c == '.';
- }
-
- // Return true if the sequence `[cur, end)' has a prefix that is
- // an integer followrd by the equal sign or the sharp sign.
- // `cur' is moved along the way.
- static bool
- only_digits_before_equal_or_shap(const Byte*& cur, const Byte* end) {
- while (cur < end and isdigit(*cur))
- ++cur;
- return cur < end and (*cur == '#' or *cur == '=');
- }
-
- // The token `t' was thought to designate an identifier.
- // Reclassify it as an integer if, in fact, its lexeme consists
- // entirely of digits.
- static void
- maybe_reclassify(Token& t) {
- const Byte* cur = t.lexeme->begin();
- const Byte* end = t.lexeme->end();
- while (cur < end and isdigit(*cur))
- ++cur;
- if (cur == end)
- t.type = Token::integer;
- }
-
- // Returns true if the first characters in the range
- // [cur, last) start an identifier.
- static bool
- start_symbol(const Byte* cur, const Byte* last) {
- if (cur >= last)
- return false;
- return identifier_part(*cur)
- or *cur == '|' or *cur == ':';
- }
-
- // We are processing a symbol token. Accumulate all
- // legitimate characters till the end of the token.
- static void
- skip_to_end_of_symbol(const Byte*& cur, const Byte* end) {
- const char c = *cur;
- if (*cur == '|')
- skip_to_nonescaped_char(++cur, end, c);
- else
- skip_to_word_boundary(cur, end);
- if (cur < end and *cur == ':')
- skip_to_end_of_symbol(cur, end);
- }
-
- static Token
- match_maybe_symbol(Lexer* lexer, const Byte*& cur, const Byte* end) {
- Token t = { Token::identifier, 0 };
- const Byte* start = cur;
- skip_to_end_of_symbol(cur, end);
- t.lexeme = lexer->intern(start, cur - start);
- maybe_reclassify(t);
- return t;
- }
-
- const Byte*
- Lexer::tokenize(const Byte* cur, const Byte* end) {
- while (skip_blank(cur, end) < end) {
- Token t = { Token::unknown, 0 };
- switch (*cur) {
- case ';': {
- const Byte* start = cur;
- t.type = Token::semicolon;
- skip_to_eol(cur, end);
- t.lexeme = intern(start, cur - start);
- break;
- }
-
- case '.': case '(': case ')': case '\'': case '`':
- t.type = Token::Type(token::value(*cur));
- t.lexeme = intern(cur, 1);
- ++cur;
- break;
-
- case ',': {
- const Byte* start = cur;
- if (++cur < end and *cur == '@') {
- t.type = Token::comma_at;
- ++cur;
- }
- else
- t.type = Token::comma;
- t.lexeme = intern(start, cur - start);
- break;
- }
-
- case '\\':
- t = match_maybe_symbol(this, cur, end);
- break;
-
- case '#': {
- const Byte* start = cur;
- if (cur + 1 < end and special_after_sharp(cur[1])) {
- t.type = Token::Type(token::value(cur[0], cur[1]));
- t.lexeme = intern(cur, 2);
- cur += 2;
- }
- else if (cur + 1 < end and cur[1] == '\\') {
- start = cur += 2;
- if (not isalnum(*cur))
- ++cur;
- else
- skip_to_word_boundary(cur, end);
- t.type = Token::character;
- t.lexeme = intern(start, cur - start);
- }
- else if (only_digits_before_equal_or_shap(++cur, end)) {
- t.type = *cur == '#'
- ? Token::sharp_integer_sharp
- : Token::sharp_integer_equal;
- t.lexeme = intern(start, cur - start + 1);
- ++cur;
- }
- else {
- skip_to_word_boundary(cur, end);
- t.lexeme = intern(start, cur - start);
- }
- break;
- }
-
- case '"': {
- const Byte* start = cur;
- skip_to_quote(++cur, end);
- t.type = Token::string;
- t.lexeme = intern(start, cur - start);
- break;
- }
-
- default:
- if (start_symbol(cur, end))
- t = match_maybe_symbol(this, cur, end);
- else {
- const Byte* start = cur;
- skip_to_word_boundary(++cur, end);
- t.lexeme = intern(start, cur - start);
- }
- break;
- }
- tokens.push_back(t);
- }
- return cur;
- }
-
- // ----------
- // -- Atom --
- // ----------
- Atom::Atom(const Token& t) : tok(t) { }
-
- void
- Atom::accept(Visitor& v) const {
- v.visit(*this);
- }
-
- // -------------
- // -- Integer --
- // -------------
- Integer::Integer(const Token& t) : Atom(t) { }
-
- void
- Integer::accept(Visitor& v) const {
- v.visit(*this);
- }
-
- // ---------------
- // -- Character --
- // ---------------
- Character::Character(const Token& t) : Atom(t) { }
-
- void
- Character::accept(Visitor& v) const {
- v.visit(*this);
- }
-
- // ------------
- // -- String --
- // ------------
- String::String(const Token& t) : Atom(t) { }
-
- void
- String::accept(Visitor& v) const {
- v.visit(*this);
- }
-
- // ------------
- // -- Symbol --
- // ------------
- Symbol::Symbol(const Token& t, Kind k) : Atom(t), sort(k) { }
-
- void
- Symbol::accept(Visitor& v) const {
- v.visit(*this);
- }
-
- // ------------
- // -- Anchor --
- // ------------
- Anchor::Anchor(size_t t, const Syntax* s) : tag(t), val(s) { }
-
- void
- Anchor::accept(Visitor& v) const {
- v.visit(*this);
- }
-
- // ---------------
- // -- Reference --
- // ---------------
- Reference::Reference(const Token& t, size_t v) : Atom(t), pos(v) { }
-
- void
- Reference::accept(Visitor& v) const {
- v.visit(*this);
- }
-
- // -----------
- // -- Quote --
- // -----------
- Quote::Quote(const Syntax* s) : unary_form<Quote>(s) { }
-
- // ---------------
- // -- Antiquote --
- // ---------------
- Antiquote::Antiquote(const Syntax* s) : unary_form<Antiquote>(s) { }
-
- // ------------
- // -- Expand --
- // ------------
- Expand::Expand(const Syntax* s) : unary_form<Expand>(s) { }
-
- // ----------
- // -- Eval --
- // ----------
- Eval::Eval(const Syntax* s) : unary_form<Eval>(s) { }
-
- // ------------
- // -- Splice --
- // ------------
- Splice::Splice(const Syntax* s) : unary_form<Splice>(s) { }
-
- // --------------
- // -- Function --
- // --------------
- Function::Function(const Syntax* s) : unary_form<Function>(s) { }
-
- // -------------
- // -- Include --
- Include::Include(const Syntax* s) : unary_form<Include>(s) { }
-
- // -------------
- // -- Exclude --
- Exclude::Exclude(const Syntax* s) : unary_form<Exclude>(s) { }
-
- // -------------
- // -- DotTail --
- // -------------
- DotTail::DotTail(const Syntax* f) : unary_form<DotTail>(f) { }
-
- // ----------
- // -- List --
- // ----------
- List::List() { }
-
- List::List(const base& elts) : base(elts) { }
-
- List::~List() { }
-
- void
- List::accept(Visitor& v) const {
- v.visit(*this);
- }
-
- // ------------
- // -- Vector --
- // ------------
- Vector::Vector() { }
-
- Vector::Vector(const base& elts) : base(elts) { }
-
- Vector::~Vector() { }
-
- void
- Vector::accept(Visitor& v) const {
- v.visit(*this);
- }
-
- // ---------------------
- // -- Syntax::Visitor --
- // ---------------------
-
- // implicitly convert a reference to `T' to a reference to `S'.
- template<typename S, typename T>
- inline const S&
- as(const T& t) {
- return t;
- }
-
- void
- Syntax::Visitor::visit(const Integer& i) {
- visit(as<Atom>(i));
- }
-
- void
- Syntax::Visitor::visit(const Character& c) {
- visit(as<Atom>(c));
- }
-
- void
- Syntax::Visitor::visit(const String& s) {
- visit(as<Atom>(s));
- }
-
- void
- Syntax::Visitor::visit(const Symbol& s) {
- visit(as<Atom>(s));
- }
-
- void
- Syntax::Visitor::visit(const Reference& r) {
- visit(as<Atom>(r));
- }
-
- // ---------------
- // -- Allocator --
- // ---------------
- Allocator::Allocator() { }
-
- // This destructor is defined here so that it provides
- // a single instantiation point for destructors of all
- // used templates floating around.
- Allocator::~Allocator() { }
-
- const Character*
- Allocator::make_character(const Token& t) {
- return chars.allocate(t);
- }
-
- const Integer*
- Allocator::make_integer(const Token& t) {
- return ints.allocate(t);
- }
-
- const String*
- Allocator::make_string(const Token& t) {
- return strs.allocate(t);
- }
-
- const Symbol*
- Allocator::make_symbol(const Token& t, Symbol::Kind k) {
- return syms.allocate(t, k);
- }
-
- const Anchor*
- Allocator::make_anchor(size_t t, const Syntax* s) {
- return ancs.allocate(t, s);
- }
-
- const Reference*
- Allocator::make_reference(const Token& t, size_t i) {
- return refs.allocate(t, i);
- }
-
- const Quote*
- Allocator::make_quote(const Syntax* s) {
- return quotes.allocate(s);
- }
-
- const Antiquote*
- Allocator::make_antiquote(const Syntax* s) {
- return antis.allocate(s);
- }
-
- const Expand*
- Allocator::make_expand(const Syntax* s) {
- return exps.allocate(s);
- }
-
- const Eval*
- Allocator::make_eval(const Syntax* s) {
- return evls.allocate(s);
- }
-
- const Splice*
- Allocator::make_splice(const Syntax* s) {
- return spls.allocate(s);
- }
-
- const Function*
- Allocator::make_function(const Syntax* s) {
- return funs.allocate(s);
- }
-
- const Include*
- Allocator::make_include(const Syntax* s) {
- return incs.allocate(s);
- }
-
- const Exclude*
- Allocator::make_exclude(const Syntax* s) {
- return excs.allocate(s);
- }
-
- const DotTail*
- Allocator::make_dot_tail(const Syntax* f) {
- return tails.allocate(f);
- }
-
- const List*
- Allocator::make_list(const std::vector<const Syntax*>& elts) {
- if (elts.empty())
- return &empty_list;
- return lists.make(elts);
- }
-
- const Vector*
- Allocator::make_vector(const std::vector<const Syntax*>& elts) {
- if (elts.empty())
- return &empty_vector;
- return vectors.make(elts);
- }
-
- // ------------
- // -- Parser --
- // ------------
-
- // Signal a parse error
- static void
- parse_error(const std::string& s) {
- throw BasicError(s);
- }
-
- // Signal that an expected syntax object was missing
- static void
- expected_syntax(const std::string& s) {
- parse_error("expected " + s);
- }
-
- // Signal an abrupt end of input
- static void
- unexpected_end_of_input(const std::string& s) {
- parse_error("unexpected end of input after " + s);
- }
-
- // Signal a missing closing parenthesis
- static void
- missing_closer_for(const std::string& s) {
- parse_error("missing closing parenthesis for " + s);
- }
-
- // The sequence of characters in [cur, last) consists
- // entirely of digits. Return the corresponding natural value.
- static size_t
- natural_value(const Byte* cur, const Byte* last) {
- size_t n = 0;
- for (; cur < last; ++cur)
- // FIXME: check for overflow.
- n = 10 * n + (*cur - '0');
- return n;
- }
-
- // Parse a plain identifier or a Lisp-style keyword identifier.
- const Symbol*
- Parser::parse_symbol(const Token*& cur, const Token* last) {
- Symbol::Kind kind = *cur->lexeme->begin() == ':'
- ? Symbol::keyword
- : Symbol::ordinary;
- return alloc.make_symbol(*cur++, kind);
- }
-
- // List of lower case character names
- static const char* charname[] = {
- "newline", "space", "page", "tab",
- "backspace", "return", "linefeed"
- };
-
- static bool
- equal_character_name(BasicString lhs, const char* rhs) {
- if (lhs->size() != strlen(rhs))
- return false;
- for (const Byte* cur = lhs->begin(); cur != lhs->end(); ++cur)
- if (tolower(*cur) != *rhs++)
- return false;
- return true;
- }
-
- static bool
- valid_character_name(BasicString s) {
- for (int i = 0; i < length(charname); ++i)
- if (equal_character_name(s, charname[i]))
- return true;
- return false;
- }
-
- const Character*
- Parser::parse_character(const Token*& cur, const Token* last) {
- if (cur->lexeme->size() != 1
- and not valid_character_name(cur->lexeme))
- parse_error("invalid literal character syntax");
- return alloc.make_character(*cur++);
- }
-
- // Parse an anchor definition of the form #n=<syntax>
- const Anchor*
- Parser::parse_anchor(const Token*& cur, const Token* last) {
- const size_t n = natural_value(cur->lexeme->begin() + 1,
- cur->lexeme->end() - 1);
- if (++cur == last)
- unexpected_end_of_input("sharp-integer-equal sign");
- return alloc.make_anchor(n, parse_syntax(cur, last));
- }
-
- // Parse a reference to an anchor, #n#
- const Reference*
- Parser::parse_reference(const Token*& cur, const Token* last) {
- const size_t n = natural_value(cur->lexeme->begin() + 1,
- cur->lexeme->end() - 1);
- return alloc.make_reference(*cur++, n);
- }
-
- // Parse an uninterned symbol #:<identifier>
- const Symbol*
- Parser::parse_uninterned(const Token*& cur, const Token* last) {
- if (cur == last or cur->type != Token::identifier)
- expected_syntax("symbol after sharp-colon sign");
- // FIXME: check that the identifier is not a keyword.
- return alloc.make_symbol(*cur++, Symbol::uninterned);
- }
-
- // Parse a function syntax: #'<syntax>
- const Function*
- Parser::parse_function(const Token*& cur, const Token* last) {
- if (cur == last)
- unexpected_end_of_input("sharp-quote sign");
- return alloc.make_function(parse_syntax(cur, last));
- }
-
- // Parse a quotation
- const Quote*
- Parser::parse_quote(const Token*& cur, const Token* last) {
- if (cur == last)
- unexpected_end_of_input("quote sign");
- return alloc.make_quote(parse_syntax(cur, last));
- }
-
- // Parse an antiquotation
- const Antiquote*
- Parser::parse_antiquote(const Token*& cur, const Token* last) {
- if (cur == last)
- unexpected_end_of_input("backquote sign");
- return alloc.make_antiquote(parse_syntax(cur, last));
- }
-
- // Parse an expansion request form
- const Expand*
- Parser::parse_expand(const Token*& cur, const Token* last) {
- const Syntax* s = parse_syntax(cur, last);
- if (s == 0)
- unexpected_end_of_input("comma sign");
- return alloc.make_expand(s);
- }
-
- // Parse conditional inclusions
- const Include*
- Parser::parse_include(const Token*& cur, const Token* last) {
- const Syntax* s = parse_syntax(cur, last);
- if (s == 0)
- unexpected_end_of_input("sharp-plus sign");
- return alloc.make_include(s);
- }
-
- const Exclude*
- Parser::parse_exclude(const Token*& cur, const Token* last) {
- const Syntax* s = parse_syntax(cur, last);
- if (s == 0)
- unexpected_end_of_input("sharp-minus sign");
- return alloc.make_exclude(s);
- }
-
- const Eval*
- Parser::parse_eval(const Token*& cur, const Token* last) {
- const Syntax* s = parse_syntax(cur, last);
- if (s == 0)
- unexpected_end_of_input("sharp-dot sign");
- return alloc.make_eval(s);
- }
-
- const Splice*
- Parser::parse_splice(const Token*& cur, const Token* last) {
- const Syntax* s = parse_syntax(cur, last);
- if (s == 0)
- unexpected_end_of_input("comma-at sign");
- return alloc.make_splice(s);
- }
-
- // Skip tokens that are semantically blanks, e.g. comments.
- // Return true if not at end of tokens.
- static bool
- skip_ignorable_tokens(const Token*& cur, const Token* last) {
- while (cur < last and cur->type == Token::semicolon)
- ++cur;
- return cur != last;
- }
-
- // Parse a vector of syntax objects: #(s .. s)
- const Vector*
- Parser::parse_vector(const Token*& cur, const Token* last) {
- std::vector<const Syntax*> elts;
- while (skip_ignorable_tokens(cur, last)
- and cur->type != Token::close_paren)
- elts.push_back(parse_syntax(cur, last));
- if (cur == last)
- missing_closer_for("vector");
- ++cur;
- return alloc.make_vector(elts);
- }
-
- // Constructs a pair or a list syntax object.
- const List*
- Parser::parse_list(const Token*& cur, const Token* last) {
- std::vector<const Syntax*> elts;
- while (skip_ignorable_tokens(cur, last)
- and cur->type != Token::close_paren) {
- if (cur->type == Token::dot) {
- skip_ignorable_tokens(++cur, last);
- if (const Syntax* s = parse_syntax(cur, last)) {
- elts.push_back(alloc.make_dot_tail(s));
- break;
- }
- }
- elts.push_back(parse_syntax(cur, last));
- }
- if (cur == last or cur->type != Token::close_paren)
- missing_closer_for("list");
- ++cur;
- return alloc.make_list(elts);
- }
-
- Parser::Parser(Allocator& a, std::vector<const Syntax*>& v)
- : alloc(a), syns(v) { }
-
- static std::string
- to_string(BasicString s) {
- return { s->begin(), s->end() };
- }
-
- const Syntax*
- Parser::parse_syntax(const Token*& cur, const Token* last) {
- if (not skip_ignorable_tokens(cur, last))
- return 0;
-
- switch (cur->type) {
- case Token::integer:
- return alloc.make_integer(*cur++);
-
- case Token::character:
- return parse_character(cur, last);
-
- case Token::string:
- return alloc.make_string(*cur++);
-
- case Token::identifier:
- return parse_symbol(cur, last);
-
- case Token::sharp_integer_equal:
- return parse_anchor(cur, last);
-
- case Token::sharp_integer_sharp:
- return parse_reference(cur, last);
-
- case Token::sharp_colon:
- return parse_uninterned(++cur, last);
-
- case Token::sharp_apostrophe:
- return parse_function(++cur, last);
-
- case Token::sharp_open_paren:
- return parse_vector(++cur, last);
-
- case Token::apostrophe:
- return parse_quote(++cur, last);
-
- case Token::open_paren:
- return parse_list(++cur, last);
-
- case Token::sharp_plus:
- return parse_include(++cur, last);
-
- case Token::sharp_minus:
- return parse_exclude(++cur, last);
-
- case Token::sharp_dot:
- return parse_eval(++cur, last);
-
- case Token::backquote:
- return parse_antiquote(++cur, last);
-
- case Token::comma:
- return parse_expand(++cur, last);
-
- case Token::comma_at:
- return parse_splice(++cur, last);
-
- default:
- parse_error(std::string("parse error before ")
- + to_string(cur->lexeme));
- return 0; // never executed
- }
- }
-
- const Token*
- Parser::parse(const Token* cur, const Token* last) {
- while (cur < last)
- if (const Syntax* s = parse_syntax(cur, last))
- syns.push_back(s);
- return cur;
- }
-
- Module::Module(const std::string& s) : nm(s) {
- std::vector<Token> tokens;
- Memory::FileMapping input(s);
- Lexer lexer(raw_strs, tokens);
- const Byte* rest = lexer.tokenize(input.begin(), input.end());
- if (rest != input.end())
- syntax_error("syntax error");
- Parser parser(allocator, *this);
- const Token* tok = parser.parse(begin_ptr(tokens), end_ptr(tokens));
- if (tok != end_ptr(tokens))
- parse_error("parse error");
- }
- }
-}