diff options
author | dos-reis <gdr@axiomatics.org> | 2013-06-25 04:08:39 +0000 |
---|---|---|
committer | dos-reis <gdr@axiomatics.org> | 2013-06-25 04:08:39 +0000 |
commit | f9e4a03a220766099d6b5fc683a58185c4805a05 (patch) | |
tree | 9a7eb92745c5feeed83d37aff61123f6db4fd7cc /src/utils | |
parent | bba0c431de24f2291011be1960eeb1f5d15dfb8b (diff) | |
download | open-axiom-f9e4a03a220766099d6b5fc683a58185c4805a05.tar.gz |
* include/sexpr.H: Move from utils.
(UniqueAllocator): Remove.
(SyntaxComparator): Likewise.
* syntax/sexpr.cc: Move from utils.
Diffstat (limited to 'src/utils')
-rw-r--r-- | src/utils/Makefile.in | 6 | ||||
-rw-r--r-- | src/utils/sexpr.H | 495 | ||||
-rw-r--r-- | src/utils/sexpr.cc | 934 |
3 files changed, 3 insertions, 1432 deletions
diff --git a/src/utils/Makefile.in b/src/utils/Makefile.in index a3021748..0ac6b300 100644 --- a/src/utils/Makefile.in +++ b/src/utils/Makefile.in @@ -36,14 +36,14 @@ hammer_SOURCES = hammer.cc hammer_OBJECTS = $(hammer_SOURCES:.cc=.lo) hammer_LDADD = -L. -lOpenAxiom -libOpenAxiom_HEADERS = hash-table.H string-pool.H sexpr.H vm.H +libOpenAxiom_HEADERS = hash-table.H string-pool.H vm.H libOpenAxiom_SOURCES = \ - storage.cc string-pool.cc sexpr.cc command.cc \ + storage.cc string-pool.cc command.cc \ filesystem.cc vm.cc libOpenAxiom_OBJECTS = $(libOpenAxiom_SOURCES:.cc=.lo) -oa_public_headers = hash-table string-pool sexpr vm +oa_public_headers = hash-table string-pool vm ## Where we store public header files oa_target_headerdir = $(oa_target_includedir)/open-axiom diff --git a/src/utils/sexpr.H b/src/utils/sexpr.H deleted file mode 100644 index 5c9934d5..00000000 --- a/src/utils/sexpr.H +++ /dev/null @@ -1,495 +0,0 @@ -// Copyright (C) 2010-2013, Gabriel Dos Reis. -// All rights reserved. -// Written by Gabriel Dos Reis. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// - Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in -// the documentation and/or other materials provided with the -// distribution. -// -// - Neither the name of The Numerical Algorithms Group Ltd. nor the -// names of its contributors may be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS -// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER -// OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#ifndef OPENAXIOM_SEXPR_INCLUDED -#define OPENAXIOM_SEXPR_INCLUDED - -// --% Author: Gabriel Dos Reis. -// --% Description: -// --% A simple support for s-expressions. By design, no ambition -// --% for full-fledged Common Lisp reader capability. Rather, -// --% the aim is a simple data structure for exchanging data -// --% between several components of the OpenAxiom system. -// --% Users interested in fullblown Lisp syntax should seek -// --% to acquire Lisp systems, many of which are freely available. - -#include <iosfwd> -#include <vector> -#include <set> -#include <open-axiom/string-pool> -#include <open-axiom/token> - -namespace OpenAxiom { - namespace Sexpr { - struct BasicError { - explicit BasicError(const std::string& s) : msg(s) { } - const std::string& message() const { return msg; } - protected: - std::string msg; - }; - - // ----------- - // -- Token -- - // ----------- - struct Token { - enum Type { - unknown, // unidentified token - semicolon = token::value(";"), // comment - dot = token::value("."), - comma = token::value(","), - open_paren = token::value("("), - close_paren = token::value(")"), - apostrophe = token::value("'"), - backquote = token::value("`"), - backslash = token::value("\\"), - sharp_open_paren = token::value("#("), - sharp_apostrophe = token::value("#'"), - sharp_colon = token::value("#:"), - sharp_plus = token::value("#+"), - sharp_minus = token::value("#-"), - sharp_dot = token::value("#."), - comma_at = token::value(",@"), - digraph_end = token::value(0xff,0xff), - integer, // integer literal - character, // character literal - string, // string literal - identifier, // plain identifier - sharp_integer_equal, // anchor definition, #n=<form> - sharp_integer_sharp // back reference, #n# - }; - - Type type; // class of this token - BasicString lexeme; // characters making up this token - }; - - // Print a token object on an output stream. - // Note: this function is for debugging purpose; in particular - // it does not `prettyprint' tokens. - std::ostream& operator<<(std::ostream&, const Token&); - - // ----------- - // -- Lexer -- - // ----------- - // An object of this type transforms a sequence of characters - // into a sequence of tokens as defined above. - // A lexer does not manage memory itself. Rather, it delegates - // storage allocation for lexemes and tokens to specialized - // agents used to construct it. - struct Lexer { - Lexer(StringPool& pool, std::vector<Token>& toks) - : strings(pool), tokens(toks) { } - - const Byte* tokenize(const Byte*, const Byte*); - BasicString intern(const Byte* s, size_t n) { - return strings.intern(s, n); - } - - private: - StringPool& strings; // where to allocate lexemes from - std::vector<Token>& tokens; // where to deposite tokens. - }; - - // ------------ - // -- Syntax -- - // ------------ - // Base class of syntax object classes. - struct Syntax { - struct Visitor; // base class of syntax visitors - virtual void accept(Visitor&) const = 0; - }; - - // ---------- - // -- Atom -- - // ---------- - // An atom is a syntax object consisting of exatly one token. - // This should not be confused with the notion of atom - // in Lisp languages. - struct Atom : Syntax { - const Token& token() const { return tok; } - BasicString lexeme() const { return tok.lexeme; } - void accept(Visitor&) const; - protected: - const Token tok; - Atom(const Token&); - }; - - // ------------- - // -- Integer -- - // ------------- - // Integer literal syntax objects - struct Integer : Atom { - explicit Integer(const Token&); - void accept(Visitor&) const; - }; - - // --------------- - // -- Character -- - // --------------- - // Character literal syntax objects. - struct Character : Atom { - explicit Character(const Token&); - void accept(Visitor&) const; - }; - - // ------------ - // -- String -- - // ------------ - // Striing literal syntax objjects. - struct String : Atom { - explicit String(const Token&); - void accept(Visitor&) const; - }; - - // ------------ - // -- Symbol -- - // ------------ - struct Symbol : Atom { - enum Kind { - uninterned, // uninterned symbol - ordinary, // an interned symbol - keyword // a keyword symbol - }; - Symbol(const Token&, Kind); - Kind kin() const { return sort; } - void accept(Visitor&) const; - private: - const Kind sort; - }; - - // --------------- - // -- Reference -- - // --------------- - // Back reference object to a syntax object. - struct Reference : Atom { - Reference(const Token&, size_t); - size_t tag() const { return pos; } - void accept(Visitor&) const; - private: - const size_t pos; - }; - - // ------------ - // -- Anchor -- - // ------------ - // Base anchor syntax object. - struct Anchor : Syntax { - Anchor(size_t, const Syntax*); - size_t ref() const { return tag; } - const Syntax* value() const { return val; } - void accept(Visitor&) const; - private: - const size_t tag; - const Syntax* const val; - }; - - // -- Abstract over common implementation of unary special operators. - template<typename T> - struct unary_form : Syntax { - const Syntax* body() const { return form; } - void accept(Visitor&) const; - protected: - unary_form(const Syntax* f) : form(f) { } - private: - const Syntax* const form; - }; - - // ----------- - // -- Quote -- - // ----------- - // Quotation syntax object. - struct Quote : unary_form<Quote> { - explicit Quote(const Syntax*); - }; - - // --------------- - // -- Antiquote -- - // --------------- - // Quasi-quotation syntax object. - struct Antiquote : unary_form<Antiquote> { - explicit Antiquote(const Syntax*); - }; - - // ------------ - // -- Expand -- - // ------------ - // Expansion request inside a quasi-quotation. - struct Expand : unary_form<Expand> { - explicit Expand(const Syntax*); - }; - - // ---------- - // -- Eval -- - // ---------- - // Read-time evaluation request syntax object. - struct Eval : unary_form<Eval> { - explicit Eval(const Syntax*); - }; - - // ------------ - // -- Splice -- - // ------------ - // Splice request syntax object inside a quasi-quotation. - struct Splice : unary_form<Splice> { - explicit Splice(const Syntax*); - }; - - // -------------- - // -- Function -- - // -------------- - // Function literal syntax object. - struct Function : unary_form<Function> { - explicit Function(const Syntax*); - }; - - // ------------- - // -- DotTail -- - // ------------- - // Objects of this type represents the tail of syntactic - // objects denoting dotted pair syntax `(a . b)'. - struct DotTail : unary_form<DotTail> { - explicit DotTail(const Syntax*); - }; - - // ------------- - // -- Include -- - // ------------- - // Conditional inclusion syntax object - struct Include : unary_form<Include> { - explicit Include(const Syntax*); - }; - - // ------------- - // -- Exclude -- - // ------------- - // Conditional exclusion syntax object - struct Exclude : unary_form<Exclude> { - explicit Exclude(const Syntax*); - }; - - // ---------- - // -- List -- - // ---------- - // List syntax objects. - struct List : Syntax, private std::vector<const Syntax*> { - typedef std::vector<const Syntax*> base; - using base::const_iterator; - using base::begin; - using base::end; - using base::size; - using base::empty; - - List(); - explicit List(const base&); - ~List(); - void accept(Visitor&) const; - }; - - // ------------ - // -- Vector -- - // ------------ - // Vector syntax objects. - struct Vector : Syntax, private std::vector<const Syntax*> { - typedef std::vector<const Syntax*> base; - using base::const_iterator; - using base::begin; - using base::end; - using base::size; - using base::operator[]; - using base::empty; - - Vector(); - explicit Vector(const base&); - ~Vector(); - void accept(Visitor&) const; - }; - - // --------------------- - // -- Syntax::Visitor -- - // --------------------- - struct Syntax::Visitor { - virtual void visit(const Atom&) = 0; - virtual void visit(const Integer&); - virtual void visit(const Character&); - virtual void visit(const String&); - virtual void visit(const Symbol&); - virtual void visit(const Reference&); - virtual void visit(const Anchor&) = 0; - virtual void visit(const Quote&) = 0; - virtual void visit(const Antiquote&) = 0; - virtual void visit(const Expand&) = 0; - virtual void visit(const Eval&) = 0; - virtual void visit(const Splice&) = 0; - virtual void visit(const Function&) = 0; - virtual void visit(const Include&) = 0; - virtual void visit(const Exclude&) = 0; - virtual void visit(const DotTail&) = 0; - virtual void visit(const List&) = 0; - virtual void visit(const Vector&) = 0; - }; - - template<typename T> - void - unary_form<T>::accept(Visitor& v) const { - v.visit(static_cast<const T&>(*this)); - } - - // --------------- - // -- Allocator -- - // --------------- - - // The next two classes are helper classes for the main - // allocation class Allocator. We use std::set as allocator - // that guarantee uuniqueness of atomic syntax object with - // respect to the constituent token. That container needs - // a relational comparator. In an ideal world, this class - // should not exist. - struct SyntaxComparator { - bool operator()(const Atom& lhs, const Atom& rhs) const { - return std::less<BasicString>()(lhs.lexeme(), rhs.lexeme()); - } - - template<typename T> - bool - operator()(const unary_form<T>& lhs, const unary_form<T>& rhs) const { - return std::less<const void*>()(lhs.body(), rhs.body()); - } - - bool operator()(const Anchor& lhs, const Anchor& rhs) const { - return std::less<size_t>()(lhs.ref(), rhs.ref()); - } - }; - - template<typename T> - struct UniqueAllocator : std::set<T, SyntaxComparator> { - typedef std::set<T, SyntaxComparator> base; - typedef typename base::const_iterator const_iterator; - - template<typename... Args> - const T* allocate(const Args&... args) { - return &*this->insert(T(args...)).first; - } - }; - - // Allocator of syntax objects. - struct Allocator { - Allocator(); - ~Allocator(); - - const Integer* make_integer(const Token&); - const Character* make_character(const Token&); - const String* make_string(const Token&); - const Symbol* make_symbol(const Token&, Symbol::Kind); - const Reference* make_reference(const Token&, size_t); - const Anchor* make_anchor(size_t, const Syntax*); - const Quote* make_quote(const Syntax*); - const Antiquote* make_antiquote(const Syntax*); - const Expand* make_expand(const Syntax*); - const Eval* make_eval(const Syntax*); - const Splice* make_splice(const Syntax*); - const Function* make_function(const Syntax*); - const Include* make_include(const Syntax*); - const Exclude* make_exclude(const Syntax*); - const DotTail* make_dot_tail(const Syntax*); - const List* make_list(const std::vector<const Syntax*>&); - const Vector* make_vector(const std::vector<const Syntax*>&); - - private: - UniqueAllocator<Integer> ints; - UniqueAllocator<Character> chars; - UniqueAllocator<String> strs; - UniqueAllocator<Symbol> syms; - UniqueAllocator<Anchor> ancs; - UniqueAllocator<Reference> refs; - UniqueAllocator<Quote> quotes; - UniqueAllocator<Antiquote> antis; - UniqueAllocator<Expand> exps; - UniqueAllocator<Function> funs; - UniqueAllocator<Include> incs; - UniqueAllocator<Exclude> excs; - UniqueAllocator<Eval> evls; - UniqueAllocator<Splice> spls; - UniqueAllocator<DotTail> tails; - Memory::Factory<List> lists; - Memory::Factory<Vector> vectors; - List empty_list; - Vector empty_vector; - }; - - // ------------ - // -- Parser -- - // ------------ - // An object of this type transforms a sequence of tokens - // into a sequence of syntax objects. - // A parser object does not manage memory itself. Rather, it delegates - // storage allocation for syntax objects to specialized - // agents used to construct it. - struct Parser { - Parser(Allocator&, std::vector<const Syntax*>&); - const Token* parse(const Token*, const Token*); - private: - Allocator& alloc; - std::vector<const Syntax*>& syns; - - const Symbol* parse_symbol(const Token*&, const Token*); - const Character* parse_character(const Token*&, const Token*); - const Anchor* parse_anchor(const Token*&, const Token*); - const Reference* parse_reference(const Token*&, const Token*); - const Symbol* parse_uninterned(const Token*&, const Token*); - const Function* parse_function(const Token*&, const Token*); - const Quote* parse_quote(const Token*&, const Token*); - const Antiquote* parse_antiquote(const Token*&, const Token*); - const Include* parse_include(const Token*&, const Token*); - const Exclude* parse_exclude(const Token*&, const Token*); - const Expand* parse_expand(const Token*&, const Token*); - const Eval* parse_eval(const Token*&, const Token*); - const Splice* parse_splice(const Token*&, const Token*); - const Vector* parse_vector(const Token*&, const Token*); - const List* parse_list(const Token*&, const Token*); - const Syntax* parse_syntax(const Token*&, const Token*); - }; - - // ------------ - // -- Module -- - // ------------ - // Entire s-expression input file. - struct Module : std::vector<const Syntax*> { - explicit Module(const std::string&); - const std::string& name() const { return nm; } - private: - const std::string nm; - StringPool raw_strs; - Allocator allocator; - }; - } -} - -#endif // OPENAXIOM_SEXPR_INCLUDED diff --git a/src/utils/sexpr.cc b/src/utils/sexpr.cc deleted file mode 100644 index 3e6b5e90..00000000 --- a/src/utils/sexpr.cc +++ /dev/null @@ -1,934 +0,0 @@ -// Copyright (C) 2010-2013, Gabriel Dos Reis. -// All rights reserved. -// Written by Gabriel Dos Reis. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// - Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in -// the documentation and/or other materials provided with the -// distribution. -// -// - Neither the name of The Numerical Algorithms Group Ltd. nor the -// names of its contributors may be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS -// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER -// OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -// --% Author: Gabriel Dos Reis. - -#include <ctype.h> -#include <string.h> -#include <iostream> -#include <iterator> -#include <open-axiom/sexpr> -#include <open-axiom/FileMapping> - -namespace OpenAxiom { - namespace Sexpr { - template<typename T, int N> - static inline int - length(const T(&)[N]) { - return N; - } - - template<typename Sequence> - static inline typename Sequence::const_pointer - begin_ptr(const Sequence& s) { - return &*s.begin(); - } - - template<typename Sequence> - static inline typename Sequence::const_pointer - end_ptr(const Sequence& s) { - return s.empty() ? 0 : &*s.begin() + s.size(); - } - - std::ostream& - operator<<(std::ostream& os, const Token& t) { - switch (t.type) { - case Token::semicolon: os << "SEMICOLON"; break; - case Token::dot: os << "DOT"; break; - case Token::comma: os << "COMMA"; break; - case Token::open_paren: os << "OPEN_PAREN"; break; - case Token::close_paren: os << "CLOSE_PAREN"; break; - case Token::apostrophe: os << "APOSTROPHE"; break; - case Token::backquote: os << "BACKQUOTE"; break; - case Token::backslash: os << "BACKSLASH"; break; - case Token::sharp_open_paren: os << "SHARP_OPEN_PAREN"; break; - case Token::sharp_apostrophe: os << "SHARP_APOSTROPHE"; break; - case Token::sharp_colon: os << "SHARP_COLON"; break; - case Token::sharp_plus: os << "SHARP_PLUS"; break; - case Token::sharp_minus: os << "SHARP_MINUS"; break; - case Token::sharp_dot: os << "SHARP_DOT"; break; - case Token::comma_at: os << "COMMA_AT"; break; - case Token::integer: os << "INTEGER"; break; - case Token::character: os << "CHARACTER"; break; - case Token::string: os << "STRING"; break; - case Token::identifier: os << "IDENTIFIER"; break; - case Token::sharp_integer_sharp: - os << "SHARP_INTEGER_SHARP"; break; - case Token::sharp_integer_equal: - os << "SHARP_INTEGER_EQUAL"; break; - default: os << "UNKNOWN"; break; - } - os << '('; - if (t.lexeme != 0) { - os << '"'; - std::copy(t.lexeme->begin(), t.lexeme->end(), - std::ostream_iterator<char>(os)); - os << '"'; - } - else - os << "<missing>"; - return os << ')'; - } - - // ----------- - // -- Lexer -- - // ----------- - static void - syntax_error(const std::string& s) { - throw BasicError(s); - } - - // Return true if character `c' introduces a blank. - static bool - is_blank(char c) { - return c == ' ' or c == '\t' or c == '\v' - or c == '\n' or c == '\f' or c == '\r'; - } - - // Return true if the character `c' introduces a delimiter. - static bool - is_delimiter(char c) { - return is_blank(c) - or c == '(' or c == ')' or c == '\'' - or c == '`' or c == '#'; - } - - // Move `cur' past all consecutive blank characters, and - // return the new position. - static const Byte* - skip_blank(const Byte*& cur, const Byte* end) { - while (cur < end and is_blank(*cur)) - ++cur; - return cur; - } - - // Move `cur' to end-of-line marker. - static const Byte* - skip_to_eol(const Byte*& cur, const Byte* end) { - // FIXME: properly handle CR+LF. - while (cur < end and *cur != '\n') - ++cur; - return cur; - } - - // Move `cur' until a word boundary is reached. - static const Byte* - skip_to_word_boundary(const Byte*& cur, const Byte* end) { - bool saw_escape = false; - for (; cur < end; ++cur) { - if (saw_escape) - saw_escape = false; - else if (*cur == '\\') - saw_escape = true; - else if (is_delimiter(*cur)) - break; - } - return cur; - } - - // Move `cur' one-past a non-esacaped character `c'. - // Return true if the character was seen. - static bool - skip_to_nonescaped_char(const Byte*& cur, const Byte* end, char c) { - bool saw_escape = false; - for (; cur < end; ++cur) - if (saw_escape) - saw_escape = false; - else if (*cur == '\\') - saw_escape = true; - else if (*cur == c) { - ++cur; - return true; - } - return false; - } - - // Move `cur' past the closing quote of string literal. - // Return true if the closing fence was effectively seen. - static inline bool - skip_to_quote(const Byte*& cur, const Byte* end) { - return skip_to_nonescaped_char(cur, end, '"'); - } - - // Return true if the character `c' be part of a non-absolute - // identifier. - static bool - identifier_part(char c) { - switch (c) { - case '+': case '-': case '*': case '/': case '%': case '^': - case '~': case '@': case '$': case '&': case '=': - case '<': case '>': case '?': case '!': case '_': - case '[': case ']': case '{': case '}': - return true; - default: - return isalnum(c); - } - } - - // Return true if the character `c' has a special meaning after - // the sharp character. - static bool - special_after_sharp(char c) { - return c == '(' or c == '\'' or c == ':' - or c == '+' or c == '-' or c == '.'; - } - - // Return true if the sequence `[cur, end)' has a prefix that is - // an integer followrd by the equal sign or the sharp sign. - // `cur' is moved along the way. - static bool - only_digits_before_equal_or_shap(const Byte*& cur, const Byte* end) { - while (cur < end and isdigit(*cur)) - ++cur; - return cur < end and (*cur == '#' or *cur == '='); - } - - // The token `t' was thought to designate an identifier. - // Reclassify it as an integer if, in fact, its lexeme consists - // entirely of digits. - static void - maybe_reclassify(Token& t) { - const Byte* cur = t.lexeme->begin(); - const Byte* end = t.lexeme->end(); - while (cur < end and isdigit(*cur)) - ++cur; - if (cur == end) - t.type = Token::integer; - } - - // Returns true if the first characters in the range - // [cur, last) start an identifier. - static bool - start_symbol(const Byte* cur, const Byte* last) { - if (cur >= last) - return false; - return identifier_part(*cur) - or *cur == '|' or *cur == ':'; - } - - // We are processing a symbol token. Accumulate all - // legitimate characters till the end of the token. - static void - skip_to_end_of_symbol(const Byte*& cur, const Byte* end) { - const char c = *cur; - if (*cur == '|') - skip_to_nonescaped_char(++cur, end, c); - else - skip_to_word_boundary(cur, end); - if (cur < end and *cur == ':') - skip_to_end_of_symbol(cur, end); - } - - static Token - match_maybe_symbol(Lexer* lexer, const Byte*& cur, const Byte* end) { - Token t = { Token::identifier, 0 }; - const Byte* start = cur; - skip_to_end_of_symbol(cur, end); - t.lexeme = lexer->intern(start, cur - start); - maybe_reclassify(t); - return t; - } - - const Byte* - Lexer::tokenize(const Byte* cur, const Byte* end) { - while (skip_blank(cur, end) < end) { - Token t = { Token::unknown, 0 }; - switch (*cur) { - case ';': { - const Byte* start = cur; - t.type = Token::semicolon; - skip_to_eol(cur, end); - t.lexeme = intern(start, cur - start); - break; - } - - case '.': case '(': case ')': case '\'': case '`': - t.type = Token::Type(token::value(*cur)); - t.lexeme = intern(cur, 1); - ++cur; - break; - - case ',': { - const Byte* start = cur; - if (++cur < end and *cur == '@') { - t.type = Token::comma_at; - ++cur; - } - else - t.type = Token::comma; - t.lexeme = intern(start, cur - start); - break; - } - - case '\\': - t = match_maybe_symbol(this, cur, end); - break; - - case '#': { - const Byte* start = cur; - if (cur + 1 < end and special_after_sharp(cur[1])) { - t.type = Token::Type(token::value(cur[0], cur[1])); - t.lexeme = intern(cur, 2); - cur += 2; - } - else if (cur + 1 < end and cur[1] == '\\') { - start = cur += 2; - if (not isalnum(*cur)) - ++cur; - else - skip_to_word_boundary(cur, end); - t.type = Token::character; - t.lexeme = intern(start, cur - start); - } - else if (only_digits_before_equal_or_shap(++cur, end)) { - t.type = *cur == '#' - ? Token::sharp_integer_sharp - : Token::sharp_integer_equal; - t.lexeme = intern(start, cur - start + 1); - ++cur; - } - else { - skip_to_word_boundary(cur, end); - t.lexeme = intern(start, cur - start); - } - break; - } - - case '"': { - const Byte* start = cur; - skip_to_quote(++cur, end); - t.type = Token::string; - t.lexeme = intern(start, cur - start); - break; - } - - default: - if (start_symbol(cur, end)) - t = match_maybe_symbol(this, cur, end); - else { - const Byte* start = cur; - skip_to_word_boundary(++cur, end); - t.lexeme = intern(start, cur - start); - } - break; - } - tokens.push_back(t); - } - return cur; - } - - // ---------- - // -- Atom -- - // ---------- - Atom::Atom(const Token& t) : tok(t) { } - - void - Atom::accept(Visitor& v) const { - v.visit(*this); - } - - // ------------- - // -- Integer -- - // ------------- - Integer::Integer(const Token& t) : Atom(t) { } - - void - Integer::accept(Visitor& v) const { - v.visit(*this); - } - - // --------------- - // -- Character -- - // --------------- - Character::Character(const Token& t) : Atom(t) { } - - void - Character::accept(Visitor& v) const { - v.visit(*this); - } - - // ------------ - // -- String -- - // ------------ - String::String(const Token& t) : Atom(t) { } - - void - String::accept(Visitor& v) const { - v.visit(*this); - } - - // ------------ - // -- Symbol -- - // ------------ - Symbol::Symbol(const Token& t, Kind k) : Atom(t), sort(k) { } - - void - Symbol::accept(Visitor& v) const { - v.visit(*this); - } - - // ------------ - // -- Anchor -- - // ------------ - Anchor::Anchor(size_t t, const Syntax* s) : tag(t), val(s) { } - - void - Anchor::accept(Visitor& v) const { - v.visit(*this); - } - - // --------------- - // -- Reference -- - // --------------- - Reference::Reference(const Token& t, size_t v) : Atom(t), pos(v) { } - - void - Reference::accept(Visitor& v) const { - v.visit(*this); - } - - // ----------- - // -- Quote -- - // ----------- - Quote::Quote(const Syntax* s) : unary_form<Quote>(s) { } - - // --------------- - // -- Antiquote -- - // --------------- - Antiquote::Antiquote(const Syntax* s) : unary_form<Antiquote>(s) { } - - // ------------ - // -- Expand -- - // ------------ - Expand::Expand(const Syntax* s) : unary_form<Expand>(s) { } - - // ---------- - // -- Eval -- - // ---------- - Eval::Eval(const Syntax* s) : unary_form<Eval>(s) { } - - // ------------ - // -- Splice -- - // ------------ - Splice::Splice(const Syntax* s) : unary_form<Splice>(s) { } - - // -------------- - // -- Function -- - // -------------- - Function::Function(const Syntax* s) : unary_form<Function>(s) { } - - // ------------- - // -- Include -- - Include::Include(const Syntax* s) : unary_form<Include>(s) { } - - // ------------- - // -- Exclude -- - Exclude::Exclude(const Syntax* s) : unary_form<Exclude>(s) { } - - // ------------- - // -- DotTail -- - // ------------- - DotTail::DotTail(const Syntax* f) : unary_form<DotTail>(f) { } - - // ---------- - // -- List -- - // ---------- - List::List() { } - - List::List(const base& elts) : base(elts) { } - - List::~List() { } - - void - List::accept(Visitor& v) const { - v.visit(*this); - } - - // ------------ - // -- Vector -- - // ------------ - Vector::Vector() { } - - Vector::Vector(const base& elts) : base(elts) { } - - Vector::~Vector() { } - - void - Vector::accept(Visitor& v) const { - v.visit(*this); - } - - // --------------------- - // -- Syntax::Visitor -- - // --------------------- - - // implicitly convert a reference to `T' to a reference to `S'. - template<typename S, typename T> - inline const S& - as(const T& t) { - return t; - } - - void - Syntax::Visitor::visit(const Integer& i) { - visit(as<Atom>(i)); - } - - void - Syntax::Visitor::visit(const Character& c) { - visit(as<Atom>(c)); - } - - void - Syntax::Visitor::visit(const String& s) { - visit(as<Atom>(s)); - } - - void - Syntax::Visitor::visit(const Symbol& s) { - visit(as<Atom>(s)); - } - - void - Syntax::Visitor::visit(const Reference& r) { - visit(as<Atom>(r)); - } - - // --------------- - // -- Allocator -- - // --------------- - Allocator::Allocator() { } - - // This destructor is defined here so that it provides - // a single instantiation point for destructors of all - // used templates floating around. - Allocator::~Allocator() { } - - const Character* - Allocator::make_character(const Token& t) { - return chars.allocate(t); - } - - const Integer* - Allocator::make_integer(const Token& t) { - return ints.allocate(t); - } - - const String* - Allocator::make_string(const Token& t) { - return strs.allocate(t); - } - - const Symbol* - Allocator::make_symbol(const Token& t, Symbol::Kind k) { - return syms.allocate(t, k); - } - - const Anchor* - Allocator::make_anchor(size_t t, const Syntax* s) { - return ancs.allocate(t, s); - } - - const Reference* - Allocator::make_reference(const Token& t, size_t i) { - return refs.allocate(t, i); - } - - const Quote* - Allocator::make_quote(const Syntax* s) { - return quotes.allocate(s); - } - - const Antiquote* - Allocator::make_antiquote(const Syntax* s) { - return antis.allocate(s); - } - - const Expand* - Allocator::make_expand(const Syntax* s) { - return exps.allocate(s); - } - - const Eval* - Allocator::make_eval(const Syntax* s) { - return evls.allocate(s); - } - - const Splice* - Allocator::make_splice(const Syntax* s) { - return spls.allocate(s); - } - - const Function* - Allocator::make_function(const Syntax* s) { - return funs.allocate(s); - } - - const Include* - Allocator::make_include(const Syntax* s) { - return incs.allocate(s); - } - - const Exclude* - Allocator::make_exclude(const Syntax* s) { - return excs.allocate(s); - } - - const DotTail* - Allocator::make_dot_tail(const Syntax* f) { - return tails.allocate(f); - } - - const List* - Allocator::make_list(const std::vector<const Syntax*>& elts) { - if (elts.empty()) - return &empty_list; - return lists.make(elts); - } - - const Vector* - Allocator::make_vector(const std::vector<const Syntax*>& elts) { - if (elts.empty()) - return &empty_vector; - return vectors.make(elts); - } - - // ------------ - // -- Parser -- - // ------------ - - // Signal a parse error - static void - parse_error(const std::string& s) { - throw BasicError(s); - } - - // Signal that an expected syntax object was missing - static void - expected_syntax(const std::string& s) { - parse_error("expected " + s); - } - - // Signal an abrupt end of input - static void - unexpected_end_of_input(const std::string& s) { - parse_error("unexpected end of input after " + s); - } - - // Signal a missing closing parenthesis - static void - missing_closer_for(const std::string& s) { - parse_error("missing closing parenthesis for " + s); - } - - // The sequence of characters in [cur, last) consists - // entirely of digits. Return the corresponding natural value. - static size_t - natural_value(const Byte* cur, const Byte* last) { - size_t n = 0; - for (; cur < last; ++cur) - // FIXME: check for overflow. - n = 10 * n + (*cur - '0'); - return n; - } - - // Parse a plain identifier or a Lisp-style keyword identifier. - const Symbol* - Parser::parse_symbol(const Token*& cur, const Token* last) { - Symbol::Kind kind = *cur->lexeme->begin() == ':' - ? Symbol::keyword - : Symbol::ordinary; - return alloc.make_symbol(*cur++, kind); - } - - // List of lower case character names - static const char* charname[] = { - "newline", "space", "page", "tab", - "backspace", "return", "linefeed" - }; - - static bool - equal_character_name(BasicString lhs, const char* rhs) { - if (lhs->size() != strlen(rhs)) - return false; - for (const Byte* cur = lhs->begin(); cur != lhs->end(); ++cur) - if (tolower(*cur) != *rhs++) - return false; - return true; - } - - static bool - valid_character_name(BasicString s) { - for (int i = 0; i < length(charname); ++i) - if (equal_character_name(s, charname[i])) - return true; - return false; - } - - const Character* - Parser::parse_character(const Token*& cur, const Token* last) { - if (cur->lexeme->size() != 1 - and not valid_character_name(cur->lexeme)) - parse_error("invalid literal character syntax"); - return alloc.make_character(*cur++); - } - - // Parse an anchor definition of the form #n=<syntax> - const Anchor* - Parser::parse_anchor(const Token*& cur, const Token* last) { - const size_t n = natural_value(cur->lexeme->begin() + 1, - cur->lexeme->end() - 1); - if (++cur == last) - unexpected_end_of_input("sharp-integer-equal sign"); - return alloc.make_anchor(n, parse_syntax(cur, last)); - } - - // Parse a reference to an anchor, #n# - const Reference* - Parser::parse_reference(const Token*& cur, const Token* last) { - const size_t n = natural_value(cur->lexeme->begin() + 1, - cur->lexeme->end() - 1); - return alloc.make_reference(*cur++, n); - } - - // Parse an uninterned symbol #:<identifier> - const Symbol* - Parser::parse_uninterned(const Token*& cur, const Token* last) { - if (cur == last or cur->type != Token::identifier) - expected_syntax("symbol after sharp-colon sign"); - // FIXME: check that the identifier is not a keyword. - return alloc.make_symbol(*cur++, Symbol::uninterned); - } - - // Parse a function syntax: #'<syntax> - const Function* - Parser::parse_function(const Token*& cur, const Token* last) { - if (cur == last) - unexpected_end_of_input("sharp-quote sign"); - return alloc.make_function(parse_syntax(cur, last)); - } - - // Parse a quotation - const Quote* - Parser::parse_quote(const Token*& cur, const Token* last) { - if (cur == last) - unexpected_end_of_input("quote sign"); - return alloc.make_quote(parse_syntax(cur, last)); - } - - // Parse an antiquotation - const Antiquote* - Parser::parse_antiquote(const Token*& cur, const Token* last) { - if (cur == last) - unexpected_end_of_input("backquote sign"); - return alloc.make_antiquote(parse_syntax(cur, last)); - } - - // Parse an expansion request form - const Expand* - Parser::parse_expand(const Token*& cur, const Token* last) { - const Syntax* s = parse_syntax(cur, last); - if (s == 0) - unexpected_end_of_input("comma sign"); - return alloc.make_expand(s); - } - - // Parse conditional inclusions - const Include* - Parser::parse_include(const Token*& cur, const Token* last) { - const Syntax* s = parse_syntax(cur, last); - if (s == 0) - unexpected_end_of_input("sharp-plus sign"); - return alloc.make_include(s); - } - - const Exclude* - Parser::parse_exclude(const Token*& cur, const Token* last) { - const Syntax* s = parse_syntax(cur, last); - if (s == 0) - unexpected_end_of_input("sharp-minus sign"); - return alloc.make_exclude(s); - } - - const Eval* - Parser::parse_eval(const Token*& cur, const Token* last) { - const Syntax* s = parse_syntax(cur, last); - if (s == 0) - unexpected_end_of_input("sharp-dot sign"); - return alloc.make_eval(s); - } - - const Splice* - Parser::parse_splice(const Token*& cur, const Token* last) { - const Syntax* s = parse_syntax(cur, last); - if (s == 0) - unexpected_end_of_input("comma-at sign"); - return alloc.make_splice(s); - } - - // Skip tokens that are semantically blanks, e.g. comments. - // Return true if not at end of tokens. - static bool - skip_ignorable_tokens(const Token*& cur, const Token* last) { - while (cur < last and cur->type == Token::semicolon) - ++cur; - return cur != last; - } - - // Parse a vector of syntax objects: #(s .. s) - const Vector* - Parser::parse_vector(const Token*& cur, const Token* last) { - std::vector<const Syntax*> elts; - while (skip_ignorable_tokens(cur, last) - and cur->type != Token::close_paren) - elts.push_back(parse_syntax(cur, last)); - if (cur == last) - missing_closer_for("vector"); - ++cur; - return alloc.make_vector(elts); - } - - // Constructs a pair or a list syntax object. - const List* - Parser::parse_list(const Token*& cur, const Token* last) { - std::vector<const Syntax*> elts; - while (skip_ignorable_tokens(cur, last) - and cur->type != Token::close_paren) { - if (cur->type == Token::dot) { - skip_ignorable_tokens(++cur, last); - if (const Syntax* s = parse_syntax(cur, last)) { - elts.push_back(alloc.make_dot_tail(s)); - break; - } - } - elts.push_back(parse_syntax(cur, last)); - } - if (cur == last or cur->type != Token::close_paren) - missing_closer_for("list"); - ++cur; - return alloc.make_list(elts); - } - - Parser::Parser(Allocator& a, std::vector<const Syntax*>& v) - : alloc(a), syns(v) { } - - static std::string - to_string(BasicString s) { - return { s->begin(), s->end() }; - } - - const Syntax* - Parser::parse_syntax(const Token*& cur, const Token* last) { - if (not skip_ignorable_tokens(cur, last)) - return 0; - - switch (cur->type) { - case Token::integer: - return alloc.make_integer(*cur++); - - case Token::character: - return parse_character(cur, last); - - case Token::string: - return alloc.make_string(*cur++); - - case Token::identifier: - return parse_symbol(cur, last); - - case Token::sharp_integer_equal: - return parse_anchor(cur, last); - - case Token::sharp_integer_sharp: - return parse_reference(cur, last); - - case Token::sharp_colon: - return parse_uninterned(++cur, last); - - case Token::sharp_apostrophe: - return parse_function(++cur, last); - - case Token::sharp_open_paren: - return parse_vector(++cur, last); - - case Token::apostrophe: - return parse_quote(++cur, last); - - case Token::open_paren: - return parse_list(++cur, last); - - case Token::sharp_plus: - return parse_include(++cur, last); - - case Token::sharp_minus: - return parse_exclude(++cur, last); - - case Token::sharp_dot: - return parse_eval(++cur, last); - - case Token::backquote: - return parse_antiquote(++cur, last); - - case Token::comma: - return parse_expand(++cur, last); - - case Token::comma_at: - return parse_splice(++cur, last); - - default: - parse_error(std::string("parse error before ") - + to_string(cur->lexeme)); - return 0; // never executed - } - } - - const Token* - Parser::parse(const Token* cur, const Token* last) { - while (cur < last) - if (const Syntax* s = parse_syntax(cur, last)) - syns.push_back(s); - return cur; - } - - Module::Module(const std::string& s) : nm(s) { - std::vector<Token> tokens; - Memory::FileMapping input(s); - Lexer lexer(raw_strs, tokens); - const Byte* rest = lexer.tokenize(input.begin(), input.end()); - if (rest != input.end()) - syntax_error("syntax error"); - Parser parser(allocator, *this); - const Token* tok = parser.parse(begin_ptr(tokens), end_ptr(tokens)); - if (tok != end_ptr(tokens)) - parse_error("parse error"); - } - } -} |