aboutsummaryrefslogtreecommitdiff
path: root/src/syntax/sexpr.cc
diff options
context:
space:
mode:
Diffstat (limited to 'src/syntax/sexpr.cc')
-rw-r--r--src/syntax/sexpr.cc934
1 files changed, 934 insertions, 0 deletions
diff --git a/src/syntax/sexpr.cc b/src/syntax/sexpr.cc
new file mode 100644
index 00000000..14113164
--- /dev/null
+++ b/src/syntax/sexpr.cc
@@ -0,0 +1,934 @@
+// Copyright (C) 2010-2013, Gabriel Dos Reis.
+// All rights reserved.
+// Written by Gabriel Dos Reis.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// - Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in
+// the documentation and/or other materials provided with the
+// distribution.
+//
+// - Neither the name of The Numerical Algorithms Group Ltd. nor the
+// names of its contributors may be used to endorse or promote products
+// derived from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+// OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// --% Author: Gabriel Dos Reis.
+
+#include <ctype.h>
+#include <string.h>
+#include <iostream>
+#include <iterator>
+#include <open-axiom/sexpr>
+#include <open-axiom/FileMapping>
+
+namespace OpenAxiom {
+ namespace Sexpr {
+ template<typename T, int N>
+ static inline int
+ length(const T(&)[N]) {
+ return N;
+ }
+
+ template<typename Sequence>
+ static inline typename Sequence::const_pointer
+ begin_ptr(const Sequence& s) {
+ return &*s.begin();
+ }
+
+ template<typename Sequence>
+ static inline typename Sequence::const_pointer
+ end_ptr(const Sequence& s) {
+ return s.empty() ? 0 : &*s.begin() + s.size();
+ }
+
+ std::ostream&
+ operator<<(std::ostream& os, const Token& t) {
+ switch (t.type) {
+ case Token::semicolon: os << "SEMICOLON"; break;
+ case Token::dot: os << "DOT"; break;
+ case Token::comma: os << "COMMA"; break;
+ case Token::open_paren: os << "OPEN_PAREN"; break;
+ case Token::close_paren: os << "CLOSE_PAREN"; break;
+ case Token::apostrophe: os << "APOSTROPHE"; break;
+ case Token::backquote: os << "BACKQUOTE"; break;
+ case Token::backslash: os << "BACKSLASH"; break;
+ case Token::sharp_open_paren: os << "SHARP_OPEN_PAREN"; break;
+ case Token::sharp_apostrophe: os << "SHARP_APOSTROPHE"; break;
+ case Token::sharp_colon: os << "SHARP_COLON"; break;
+ case Token::sharp_plus: os << "SHARP_PLUS"; break;
+ case Token::sharp_minus: os << "SHARP_MINUS"; break;
+ case Token::sharp_dot: os << "SHARP_DOT"; break;
+ case Token::comma_at: os << "COMMA_AT"; break;
+ case Token::integer: os << "INTEGER"; break;
+ case Token::character: os << "CHARACTER"; break;
+ case Token::string: os << "STRING"; break;
+ case Token::identifier: os << "IDENTIFIER"; break;
+ case Token::sharp_integer_sharp:
+ os << "SHARP_INTEGER_SHARP"; break;
+ case Token::sharp_integer_equal:
+ os << "SHARP_INTEGER_EQUAL"; break;
+ default: os << "UNKNOWN"; break;
+ }
+ os << '(';
+ if (t.lexeme != 0) {
+ os << '"';
+ std::copy(t.lexeme->begin(), t.lexeme->end(),
+ std::ostream_iterator<char>(os));
+ os << '"';
+ }
+ else
+ os << "<missing>";
+ return os << ')';
+ }
+
+ // -----------
+ // -- Lexer --
+ // -----------
+ static void
+ syntax_error(const std::string& s) {
+ throw BasicError(s);
+ }
+
+ // Return true if character `c' introduces a blank.
+ static bool
+ is_blank(char c) {
+ return c == ' ' or c == '\t' or c == '\v'
+ or c == '\n' or c == '\f' or c == '\r';
+ }
+
+ // Return true if the character `c' introduces a delimiter.
+ static bool
+ is_delimiter(char c) {
+ return is_blank(c)
+ or c == '(' or c == ')' or c == '\''
+ or c == '`' or c == '#';
+ }
+
+ // Move `cur' past all consecutive blank characters, and
+ // return the new position.
+ static const Byte*
+ skip_blank(const Byte*& cur, const Byte* end) {
+ while (cur < end and is_blank(*cur))
+ ++cur;
+ return cur;
+ }
+
+ // Move `cur' to end-of-line marker.
+ static const Byte*
+ skip_to_eol(const Byte*& cur, const Byte* end) {
+ // FIXME: properly handle CR+LF.
+ while (cur < end and *cur != '\n')
+ ++cur;
+ return cur;
+ }
+
+ // Move `cur' until a word boundary is reached.
+ static const Byte*
+ skip_to_word_boundary(const Byte*& cur, const Byte* end) {
+ bool saw_escape = false;
+ for (; cur < end; ++cur) {
+ if (saw_escape)
+ saw_escape = false;
+ else if (*cur == '\\')
+ saw_escape = true;
+ else if (is_delimiter(*cur))
+ break;
+ }
+ return cur;
+ }
+
+ // Move `cur' one-past a non-esacaped character `c'.
+ // Return true if the character was seen.
+ static bool
+ skip_to_nonescaped_char(const Byte*& cur, const Byte* end, char c) {
+ bool saw_escape = false;
+ for (; cur < end; ++cur)
+ if (saw_escape)
+ saw_escape = false;
+ else if (*cur == '\\')
+ saw_escape = true;
+ else if (*cur == c) {
+ ++cur;
+ return true;
+ }
+ return false;
+ }
+
+ // Move `cur' past the closing quote of string literal.
+ // Return true if the closing fence was effectively seen.
+ static inline bool
+ skip_to_quote(const Byte*& cur, const Byte* end) {
+ return skip_to_nonescaped_char(cur, end, '"');
+ }
+
+ // Return true if the character `c' be part of a non-absolute
+ // identifier.
+ static bool
+ identifier_part(char c) {
+ switch (c) {
+ case '+': case '-': case '*': case '/': case '%': case '^':
+ case '~': case '@': case '$': case '&': case '=':
+ case '<': case '>': case '?': case '!': case '_':
+ case '[': case ']': case '{': case '}':
+ return true;
+ default:
+ return isalnum(c);
+ }
+ }
+
+ // Return true if the character `c' has a special meaning after
+ // the sharp character.
+ static bool
+ special_after_sharp(char c) {
+ return c == '(' or c == '\'' or c == ':'
+ or c == '+' or c == '-' or c == '.';
+ }
+
+ // Return true if the sequence `[cur, end)' has a prefix that is
+ // an integer followrd by the equal sign or the sharp sign.
+ // `cur' is moved along the way.
+ static bool
+ only_digits_before_equal_or_shap(const Byte*& cur, const Byte* end) {
+ while (cur < end and isdigit(*cur))
+ ++cur;
+ return cur < end and (*cur == '#' or *cur == '=');
+ }
+
+ // The token `t' was thought to designate an identifier.
+ // Reclassify it as an integer if, in fact, its lexeme consists
+ // entirely of digits.
+ static void
+ maybe_reclassify(Token& t) {
+ const Byte* cur = t.lexeme->begin();
+ const Byte* end = t.lexeme->end();
+ while (cur < end and isdigit(*cur))
+ ++cur;
+ if (cur == end)
+ t.type = Token::integer;
+ }
+
+ // Returns true if the first characters in the range
+ // [cur, last) start an identifier.
+ static bool
+ start_symbol(const Byte* cur, const Byte* last) {
+ if (cur >= last)
+ return false;
+ return identifier_part(*cur)
+ or *cur == '|' or *cur == ':';
+ }
+
+ // We are processing a symbol token. Accumulate all
+ // legitimate characters till the end of the token.
+ static void
+ skip_to_end_of_symbol(const Byte*& cur, const Byte* end) {
+ const char c = *cur;
+ if (*cur == '|')
+ skip_to_nonescaped_char(++cur, end, c);
+ else
+ skip_to_word_boundary(cur, end);
+ if (cur < end and *cur == ':')
+ skip_to_end_of_symbol(cur, end);
+ }
+
+ static Token
+ match_maybe_symbol(Lexer* lexer, const Byte*& cur, const Byte* end) {
+ Token t = { Token::identifier, 0 };
+ const Byte* start = cur;
+ skip_to_end_of_symbol(cur, end);
+ t.lexeme = lexer->intern(start, cur - start);
+ maybe_reclassify(t);
+ return t;
+ }
+
+ const Byte*
+ Lexer::tokenize(const Byte* cur, const Byte* end) {
+ while (skip_blank(cur, end) < end) {
+ Token t = { Token::unknown, 0 };
+ switch (*cur) {
+ case ';': {
+ const Byte* start = cur;
+ t.type = Token::semicolon;
+ skip_to_eol(cur, end);
+ t.lexeme = intern(start, cur - start);
+ break;
+ }
+
+ case '.': case '(': case ')': case '\'': case '`':
+ t.type = Token::Type(token::value(*cur));
+ t.lexeme = intern(cur, 1);
+ ++cur;
+ break;
+
+ case ',': {
+ const Byte* start = cur;
+ if (++cur < end and *cur == '@') {
+ t.type = Token::comma_at;
+ ++cur;
+ }
+ else
+ t.type = Token::comma;
+ t.lexeme = intern(start, cur - start);
+ break;
+ }
+
+ case '\\':
+ t = match_maybe_symbol(this, cur, end);
+ break;
+
+ case '#': {
+ const Byte* start = cur;
+ if (cur + 1 < end and special_after_sharp(cur[1])) {
+ t.type = Token::Type(token::value(cur[0], cur[1]));
+ t.lexeme = intern(cur, 2);
+ cur += 2;
+ }
+ else if (cur + 1 < end and cur[1] == '\\') {
+ start = cur += 2;
+ if (not isalnum(*cur))
+ ++cur;
+ else
+ skip_to_word_boundary(cur, end);
+ t.type = Token::character;
+ t.lexeme = intern(start, cur - start);
+ }
+ else if (only_digits_before_equal_or_shap(++cur, end)) {
+ t.type = *cur == '#'
+ ? Token::sharp_integer_sharp
+ : Token::sharp_integer_equal;
+ t.lexeme = intern(start, cur - start + 1);
+ ++cur;
+ }
+ else {
+ skip_to_word_boundary(cur, end);
+ t.lexeme = intern(start, cur - start);
+ }
+ break;
+ }
+
+ case '"': {
+ const Byte* start = cur;
+ skip_to_quote(++cur, end);
+ t.type = Token::string;
+ t.lexeme = intern(start, cur - start);
+ break;
+ }
+
+ default:
+ if (start_symbol(cur, end))
+ t = match_maybe_symbol(this, cur, end);
+ else {
+ const Byte* start = cur;
+ skip_to_word_boundary(++cur, end);
+ t.lexeme = intern(start, cur - start);
+ }
+ break;
+ }
+ tokens.push_back(t);
+ }
+ return cur;
+ }
+
+ // ----------
+ // -- Atom --
+ // ----------
+ Atom::Atom(const Token& t) : tok(t) { }
+
+ void
+ Atom::accept(Visitor& v) const {
+ v.visit(*this);
+ }
+
+ // -------------
+ // -- Integer --
+ // -------------
+ Integer::Integer(const Token& t) : Atom(t) { }
+
+ void
+ Integer::accept(Visitor& v) const {
+ v.visit(*this);
+ }
+
+ // ---------------
+ // -- Character --
+ // ---------------
+ Character::Character(const Token& t) : Atom(t) { }
+
+ void
+ Character::accept(Visitor& v) const {
+ v.visit(*this);
+ }
+
+ // ------------
+ // -- String --
+ // ------------
+ String::String(const Token& t) : Atom(t) { }
+
+ void
+ String::accept(Visitor& v) const {
+ v.visit(*this);
+ }
+
+ // ------------
+ // -- Symbol --
+ // ------------
+ Symbol::Symbol(const Token& t, Kind k) : Atom(t), sort(k) { }
+
+ void
+ Symbol::accept(Visitor& v) const {
+ v.visit(*this);
+ }
+
+ // ------------
+ // -- Anchor --
+ // ------------
+ Anchor::Anchor(size_t t, const Syntax* s) : tag(t), val(s) { }
+
+ void
+ Anchor::accept(Visitor& v) const {
+ v.visit(*this);
+ }
+
+ // ---------------
+ // -- Reference --
+ // ---------------
+ Reference::Reference(const Token& t, size_t v) : Atom(t), pos(v) { }
+
+ void
+ Reference::accept(Visitor& v) const {
+ v.visit(*this);
+ }
+
+ // -----------
+ // -- Quote --
+ // -----------
+ Quote::Quote(const Syntax* s) : unary_form<Quote>(s) { }
+
+ // ---------------
+ // -- Antiquote --
+ // ---------------
+ Antiquote::Antiquote(const Syntax* s) : unary_form<Antiquote>(s) { }
+
+ // ------------
+ // -- Expand --
+ // ------------
+ Expand::Expand(const Syntax* s) : unary_form<Expand>(s) { }
+
+ // ----------
+ // -- Eval --
+ // ----------
+ Eval::Eval(const Syntax* s) : unary_form<Eval>(s) { }
+
+ // ------------
+ // -- Splice --
+ // ------------
+ Splice::Splice(const Syntax* s) : unary_form<Splice>(s) { }
+
+ // --------------
+ // -- Function --
+ // --------------
+ Function::Function(const Syntax* s) : unary_form<Function>(s) { }
+
+ // -------------
+ // -- Include --
+ Include::Include(const Syntax* s) : unary_form<Include>(s) { }
+
+ // -------------
+ // -- Exclude --
+ Exclude::Exclude(const Syntax* s) : unary_form<Exclude>(s) { }
+
+ // -------------
+ // -- DotTail --
+ // -------------
+ DotTail::DotTail(const Syntax* f) : unary_form<DotTail>(f) { }
+
+ // ----------
+ // -- List --
+ // ----------
+ List::List() { }
+
+ List::List(const base& elts) : base(elts) { }
+
+ List::~List() { }
+
+ void
+ List::accept(Visitor& v) const {
+ v.visit(*this);
+ }
+
+ // ------------
+ // -- Vector --
+ // ------------
+ Vector::Vector() { }
+
+ Vector::Vector(const base& elts) : base(elts) { }
+
+ Vector::~Vector() { }
+
+ void
+ Vector::accept(Visitor& v) const {
+ v.visit(*this);
+ }
+
+ // ---------------------
+ // -- Syntax::Visitor --
+ // ---------------------
+
+ // implicitly convert a reference to `T' to a reference to `S'.
+ template<typename S, typename T>
+ inline const S&
+ as(const T& t) {
+ return t;
+ }
+
+ void
+ Syntax::Visitor::visit(const Integer& i) {
+ visit(as<Atom>(i));
+ }
+
+ void
+ Syntax::Visitor::visit(const Character& c) {
+ visit(as<Atom>(c));
+ }
+
+ void
+ Syntax::Visitor::visit(const String& s) {
+ visit(as<Atom>(s));
+ }
+
+ void
+ Syntax::Visitor::visit(const Symbol& s) {
+ visit(as<Atom>(s));
+ }
+
+ void
+ Syntax::Visitor::visit(const Reference& r) {
+ visit(as<Atom>(r));
+ }
+
+ // ---------------
+ // -- Allocator --
+ // ---------------
+ Allocator::Allocator() { }
+
+ // This destructor is defined here so that it provides
+ // a single instantiation point for destructors of all
+ // used templates floating around.
+ Allocator::~Allocator() { }
+
+ const Character*
+ Allocator::make_character(const Token& t) {
+ return chars.make(t);
+ }
+
+ const Integer*
+ Allocator::make_integer(const Token& t) {
+ return ints.make(t);
+ }
+
+ const String*
+ Allocator::make_string(const Token& t) {
+ return strs.make(t);
+ }
+
+ const Symbol*
+ Allocator::make_symbol(const Token& t, Symbol::Kind k) {
+ return syms.make(t, k);
+ }
+
+ const Anchor*
+ Allocator::make_anchor(size_t t, const Syntax* s) {
+ return ancs.make(t, s);
+ }
+
+ const Reference*
+ Allocator::make_reference(const Token& t, size_t i) {
+ return refs.make(t, i);
+ }
+
+ const Quote*
+ Allocator::make_quote(const Syntax* s) {
+ return quotes.make(s);
+ }
+
+ const Antiquote*
+ Allocator::make_antiquote(const Syntax* s) {
+ return antis.make(s);
+ }
+
+ const Expand*
+ Allocator::make_expand(const Syntax* s) {
+ return exps.make(s);
+ }
+
+ const Eval*
+ Allocator::make_eval(const Syntax* s) {
+ return evls.make(s);
+ }
+
+ const Splice*
+ Allocator::make_splice(const Syntax* s) {
+ return spls.make(s);
+ }
+
+ const Function*
+ Allocator::make_function(const Syntax* s) {
+ return funs.make(s);
+ }
+
+ const Include*
+ Allocator::make_include(const Syntax* s) {
+ return incs.make(s);
+ }
+
+ const Exclude*
+ Allocator::make_exclude(const Syntax* s) {
+ return excs.make(s);
+ }
+
+ const DotTail*
+ Allocator::make_dot_tail(const Syntax* f) {
+ return tails.make(f);
+ }
+
+ const List*
+ Allocator::make_list(const std::vector<const Syntax*>& elts) {
+ if (elts.empty())
+ return &empty_list;
+ return lists.make(elts);
+ }
+
+ const Vector*
+ Allocator::make_vector(const std::vector<const Syntax*>& elts) {
+ if (elts.empty())
+ return &empty_vector;
+ return vectors.make(elts);
+ }
+
+ // ------------
+ // -- Parser --
+ // ------------
+
+ // Signal a parse error
+ static void
+ parse_error(const std::string& s) {
+ throw BasicError(s);
+ }
+
+ // Signal that an expected syntax object was missing
+ static void
+ expected_syntax(const std::string& s) {
+ parse_error("expected " + s);
+ }
+
+ // Signal an abrupt end of input
+ static void
+ unexpected_end_of_input(const std::string& s) {
+ parse_error("unexpected end of input after " + s);
+ }
+
+ // Signal a missing closing parenthesis
+ static void
+ missing_closer_for(const std::string& s) {
+ parse_error("missing closing parenthesis for " + s);
+ }
+
+ // The sequence of characters in [cur, last) consists
+ // entirely of digits. Return the corresponding natural value.
+ static size_t
+ natural_value(const Byte* cur, const Byte* last) {
+ size_t n = 0;
+ for (; cur < last; ++cur)
+ // FIXME: check for overflow.
+ n = 10 * n + (*cur - '0');
+ return n;
+ }
+
+ // Parse a plain identifier or a Lisp-style keyword identifier.
+ const Symbol*
+ Parser::parse_symbol(const Token*& cur, const Token* last) {
+ Symbol::Kind kind = *cur->lexeme->begin() == ':'
+ ? Symbol::keyword
+ : Symbol::ordinary;
+ return alloc.make_symbol(*cur++, kind);
+ }
+
+ // List of lower case character names
+ static const char* charname[] = {
+ "newline", "space", "page", "tab",
+ "backspace", "return", "linefeed"
+ };
+
+ static bool
+ equal_character_name(BasicString lhs, const char* rhs) {
+ if (lhs->size() != strlen(rhs))
+ return false;
+ for (const Byte* cur = lhs->begin(); cur != lhs->end(); ++cur)
+ if (tolower(*cur) != *rhs++)
+ return false;
+ return true;
+ }
+
+ static bool
+ valid_character_name(BasicString s) {
+ for (int i = 0; i < length(charname); ++i)
+ if (equal_character_name(s, charname[i]))
+ return true;
+ return false;
+ }
+
+ const Character*
+ Parser::parse_character(const Token*& cur, const Token* last) {
+ if (cur->lexeme->size() != 1
+ and not valid_character_name(cur->lexeme))
+ parse_error("invalid literal character syntax");
+ return alloc.make_character(*cur++);
+ }
+
+ // Parse an anchor definition of the form #n=<syntax>
+ const Anchor*
+ Parser::parse_anchor(const Token*& cur, const Token* last) {
+ const size_t n = natural_value(cur->lexeme->begin() + 1,
+ cur->lexeme->end() - 1);
+ if (++cur == last)
+ unexpected_end_of_input("sharp-integer-equal sign");
+ return alloc.make_anchor(n, parse_syntax(cur, last));
+ }
+
+ // Parse a reference to an anchor, #n#
+ const Reference*
+ Parser::parse_reference(const Token*& cur, const Token* last) {
+ const size_t n = natural_value(cur->lexeme->begin() + 1,
+ cur->lexeme->end() - 1);
+ return alloc.make_reference(*cur++, n);
+ }
+
+ // Parse an uninterned symbol #:<identifier>
+ const Symbol*
+ Parser::parse_uninterned(const Token*& cur, const Token* last) {
+ if (cur == last or cur->type != Token::identifier)
+ expected_syntax("symbol after sharp-colon sign");
+ // FIXME: check that the identifier is not a keyword.
+ return alloc.make_symbol(*cur++, Symbol::uninterned);
+ }
+
+ // Parse a function syntax: #'<syntax>
+ const Function*
+ Parser::parse_function(const Token*& cur, const Token* last) {
+ if (cur == last)
+ unexpected_end_of_input("sharp-quote sign");
+ return alloc.make_function(parse_syntax(cur, last));
+ }
+
+ // Parse a quotation
+ const Quote*
+ Parser::parse_quote(const Token*& cur, const Token* last) {
+ if (cur == last)
+ unexpected_end_of_input("quote sign");
+ return alloc.make_quote(parse_syntax(cur, last));
+ }
+
+ // Parse an antiquotation
+ const Antiquote*
+ Parser::parse_antiquote(const Token*& cur, const Token* last) {
+ if (cur == last)
+ unexpected_end_of_input("backquote sign");
+ return alloc.make_antiquote(parse_syntax(cur, last));
+ }
+
+ // Parse an expansion request form
+ const Expand*
+ Parser::parse_expand(const Token*& cur, const Token* last) {
+ const Syntax* s = parse_syntax(cur, last);
+ if (s == 0)
+ unexpected_end_of_input("comma sign");
+ return alloc.make_expand(s);
+ }
+
+ // Parse conditional inclusions
+ const Include*
+ Parser::parse_include(const Token*& cur, const Token* last) {
+ const Syntax* s = parse_syntax(cur, last);
+ if (s == 0)
+ unexpected_end_of_input("sharp-plus sign");
+ return alloc.make_include(s);
+ }
+
+ const Exclude*
+ Parser::parse_exclude(const Token*& cur, const Token* last) {
+ const Syntax* s = parse_syntax(cur, last);
+ if (s == 0)
+ unexpected_end_of_input("sharp-minus sign");
+ return alloc.make_exclude(s);
+ }
+
+ const Eval*
+ Parser::parse_eval(const Token*& cur, const Token* last) {
+ const Syntax* s = parse_syntax(cur, last);
+ if (s == 0)
+ unexpected_end_of_input("sharp-dot sign");
+ return alloc.make_eval(s);
+ }
+
+ const Splice*
+ Parser::parse_splice(const Token*& cur, const Token* last) {
+ const Syntax* s = parse_syntax(cur, last);
+ if (s == 0)
+ unexpected_end_of_input("comma-at sign");
+ return alloc.make_splice(s);
+ }
+
+ // Skip tokens that are semantically blanks, e.g. comments.
+ // Return true if not at end of tokens.
+ static bool
+ skip_ignorable_tokens(const Token*& cur, const Token* last) {
+ while (cur < last and cur->type == Token::semicolon)
+ ++cur;
+ return cur != last;
+ }
+
+ // Parse a vector of syntax objects: #(s .. s)
+ const Vector*
+ Parser::parse_vector(const Token*& cur, const Token* last) {
+ std::vector<const Syntax*> elts;
+ while (skip_ignorable_tokens(cur, last)
+ and cur->type != Token::close_paren)
+ elts.push_back(parse_syntax(cur, last));
+ if (cur == last)
+ missing_closer_for("vector");
+ ++cur;
+ return alloc.make_vector(elts);
+ }
+
+ // Constructs a pair or a list syntax object.
+ const List*
+ Parser::parse_list(const Token*& cur, const Token* last) {
+ std::vector<const Syntax*> elts;
+ while (skip_ignorable_tokens(cur, last)
+ and cur->type != Token::close_paren) {
+ if (cur->type == Token::dot) {
+ skip_ignorable_tokens(++cur, last);
+ if (const Syntax* s = parse_syntax(cur, last)) {
+ elts.push_back(alloc.make_dot_tail(s));
+ break;
+ }
+ }
+ elts.push_back(parse_syntax(cur, last));
+ }
+ if (cur == last or cur->type != Token::close_paren)
+ missing_closer_for("list");
+ ++cur;
+ return alloc.make_list(elts);
+ }
+
+ Parser::Parser(Allocator& a, std::vector<const Syntax*>& v)
+ : alloc(a), syns(v) { }
+
+ static std::string
+ to_string(BasicString s) {
+ return { s->begin(), s->end() };
+ }
+
+ const Syntax*
+ Parser::parse_syntax(const Token*& cur, const Token* last) {
+ if (not skip_ignorable_tokens(cur, last))
+ return 0;
+
+ switch (cur->type) {
+ case Token::integer:
+ return alloc.make_integer(*cur++);
+
+ case Token::character:
+ return parse_character(cur, last);
+
+ case Token::string:
+ return alloc.make_string(*cur++);
+
+ case Token::identifier:
+ return parse_symbol(cur, last);
+
+ case Token::sharp_integer_equal:
+ return parse_anchor(cur, last);
+
+ case Token::sharp_integer_sharp:
+ return parse_reference(cur, last);
+
+ case Token::sharp_colon:
+ return parse_uninterned(++cur, last);
+
+ case Token::sharp_apostrophe:
+ return parse_function(++cur, last);
+
+ case Token::sharp_open_paren:
+ return parse_vector(++cur, last);
+
+ case Token::apostrophe:
+ return parse_quote(++cur, last);
+
+ case Token::open_paren:
+ return parse_list(++cur, last);
+
+ case Token::sharp_plus:
+ return parse_include(++cur, last);
+
+ case Token::sharp_minus:
+ return parse_exclude(++cur, last);
+
+ case Token::sharp_dot:
+ return parse_eval(++cur, last);
+
+ case Token::backquote:
+ return parse_antiquote(++cur, last);
+
+ case Token::comma:
+ return parse_expand(++cur, last);
+
+ case Token::comma_at:
+ return parse_splice(++cur, last);
+
+ default:
+ parse_error(std::string("parse error before ")
+ + to_string(cur->lexeme));
+ return 0; // never executed
+ }
+ }
+
+ const Token*
+ Parser::parse(const Token* cur, const Token* last) {
+ while (cur < last)
+ if (const Syntax* s = parse_syntax(cur, last))
+ syns.push_back(s);
+ return cur;
+ }
+
+ Module::Module(const std::string& s) : nm(s) {
+ std::vector<Token> tokens;
+ Memory::FileMapping input(s);
+ Lexer lexer(raw_strs, tokens);
+ const Byte* rest = lexer.tokenize(input.begin(), input.end());
+ if (rest != input.end())
+ syntax_error("syntax error");
+ Parser parser(allocator, *this);
+ const Token* tok = parser.parse(begin_ptr(tokens), end_ptr(tokens));
+ if (tok != end_ptr(tokens))
+ parse_error("parse error");
+ }
+ }
+}