From cea95f2cc443a5bee5cb69476ae73fd99d32fc0d Mon Sep 17 00:00:00 2001
From: Gabriel Dos Reis <gdr@axiomatics.org>
Date: Sun, 15 Jan 2017 09:14:39 -0800
Subject: Add s-expression parsing to libopen-axiom-core.a

---
 src/syntax/sexpr.cc  | 670 ---------------------------------------------------
 src/syntax/sexpr.cxx | 670 +++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 670 insertions(+), 670 deletions(-)
 delete mode 100644 src/syntax/sexpr.cc
 create mode 100644 src/syntax/sexpr.cxx

(limited to 'src/syntax')
diff --git a/src/syntax/sexpr.cc b/src/syntax/sexpr.cc
deleted file mode 100644
index f9d76825..00000000
--- a/src/syntax/sexpr.cc
+++ /dev/null
@@ -1,670 +0,0 @@
-// Copyright (C) 2010-2013, Gabriel Dos Reis.
-// All rights reserved.
-// Written by Gabriel Dos Reis.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     - Redistributions of source code must retain the above copyright
-//       notice, this list of conditions and the following disclaimer.
-//
-//     - Redistributions in binary form must reproduce the above copyright
-//       notice, this list of conditions and the following disclaimer in
-//       the documentation and/or other materials provided with the
-//       distribution.
-//
-//     - Neither the name of The Numerical Algorithms Group Ltd. nor the
-//       names of its contributors may be used to endorse or promote products
-//       derived from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
-// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
-// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
-// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
-// OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// --% Author: Gabriel Dos Reis.
-
-#include <ctype.h>
-#include <string.h>
-#include <iostream>
-#include <iterator>
-#include <open-axiom/sexpr>
-#include <open-axiom/FileMapping>
-#include <open-axiom/diagnostics>
-
-namespace OpenAxiom {
-   namespace Sexpr {
-      static void
-      invalid_character(Reader::State& s) {
-         auto line = std::to_string(s.bytes.lineno);
-         auto column = std::to_string(s.cur - s.line);
-         auto msg = "invalid character on line " + line +
-            " and column " + column;
-         if (isprint(*s.cur))
-            throw Diagnostics::BasicError(msg + ": " + std::string(1, *s.cur));
-         throw Diagnostics::BasicError(msg + " with code " + std::to_string(*s.cur));
-      }
-      
-      static void
-      syntax_error(const std::string& s) {
-         throw Diagnostics::BasicError(s);
-      }
-
-      // Return true if character `c' introduces a blank.
-      static bool
-      is_blank(char c) {
-         return c == ' ' or c == '\t' or c == '\v'
-            or c == '\n' or c == '\f' or c == '\r';
-      }
-      
-      // Return true if the character `c' introduces a delimiter.
-      static bool
-      is_delimiter(char c) {
-         return is_blank(c)
-            or c == '(' or c == ')' or c == '\''
-            or c == '`' or c == '#';
-      }
-
-      // Move the cursor past all consecutive blank characters, and
-      // return true if there are more input characters to consider.
-      static bool
-      skip_blank(Reader::State& s) {
-         for (bool done = false; s.cur < s.bytes.end and not done; )
-            switch (*s.cur) {
-            case '\n':
-               ++s.bytes.lineno;
-               s.line = ++s.cur;
-               break;
-            case ' ': case '\t': case '\v': case '\r': case '\f':
-               ++s.cur;
-               break;
-            default: done = true; break;
-            }
-         return s.cur < s.bytes.end;
-      }
-
-      // Move `cur' to end-of-line marker.
-      static void
-      skip_to_eol(Reader::State& s) {
-         // FIXME: properly handle CR+LF.
-         while (s.cur < s.bytes.end and *s.cur != '\n')
-            ++s.cur;
-      }
-
-      // Move `cur' one-past a non-esacaped character `c'.
-      // Return true if the character was seen.
-      static bool
-      skip_to_nonescaped_char(Reader::State& s, char c) {
-         for (bool saw_escape = false; s.cur < s.bytes.end; ++s.cur)
-            if (saw_escape)
-               saw_escape = false;
-            else if (*s.cur == '\\')
-               saw_escape = true;
-            else if (*s.cur == c) {
-               ++s.cur;
-               return true;
-            }
-         return false;
-      }
-
-      // Move the cursor past the closing quote of string literal.
-      // Return true if the closing quote was effectively seen.
-      static inline bool
-      skip_to_quote(Reader::State& s) {
-         return skip_to_nonescaped_char(s, '"');
-      }
-
-      template<typename Pred>
-      static bool
-      advance_while(Reader::State& s, Pred p) {
-         while (s.cur < s.bytes.end and p(*s.cur))
-            ++s.cur;
-         return s.cur < s.bytes.end;
-      }
-
-      // Return true if the character `c' be part of a non-absolute
-      // identifier.
-      static bool
-      identifier_part(Byte c) {
-         switch (c) {
-         case '+': case '-': case '*': case '/': case '%': case '^':
-         case '~': case '@': case '$': case '&': case '=':
-         case '<': case '>': case '?': case '!': case '_':
-         case '[': case ']': case '{': case '}':
-            return true;
-         default:
-            return isalnum(c);
-         }
-      }
-
-      // -- AtomSyntax --
-      AtomSyntax::AtomSyntax(const Lexeme& t) : lex(t) { }
-
-      // -- IntegerSyntax --
-      IntegerSyntax::IntegerSyntax(const Lexeme& t) : AtomSyntax(t) { }
-
-      void
-      IntegerSyntax::accept(Visitor& v) const {
-         v.visit(*this);
-      }
-
-      // -- CharacterSyntax --
-      CharacterSyntax::CharacterSyntax(const Lexeme& t) : AtomSyntax(t) { }
-
-      void
-      CharacterSyntax::accept(Visitor& v) const {
-         v.visit(*this);
-      }
-
-      // -- StringSyntax --
-      StringSyntax::StringSyntax(const Lexeme& t) : AtomSyntax(t) { }
-
-      void
-      StringSyntax::accept(Visitor& v) const {
-         v.visit(*this);
-      }
-
-      // -- SymbolSyntax --
-      SymbolSyntax::SymbolSyntax(const Lexeme& t, Kind k)
-            : AtomSyntax(t), sort(k)
-      { }
-
-      void
-      SymbolSyntax::accept(Visitor& v) const {
-         v.visit(*this);
-      }
-
-      // -- AnchorSyntax --
-      AnchorSyntax::AnchorSyntax(size_t t, const Syntax* s) : tag(t), val(s) { }
-
-      void
-      AnchorSyntax::accept(Visitor& v) const {
-         v.visit(*this);
-      }
-
-      // -- ReferenceSyntax --
-       ReferenceSyntax::ReferenceSyntax(const Lexeme& t, Ordinal n)
-             : AtomSyntax(t), pos(n)
-      { }
-
-      void
-      ReferenceSyntax::accept(Visitor& v) const {
-         v.visit(*this);
-      }
-
-      // -- QuoteSyntax --
-      QuoteSyntax::QuoteSyntax(const Syntax* s)
-            : unary_form<QuoteSyntax>(s)
-      { }
-
-      // -- AntiquoteSyntax --
-      AntiquoteSyntax::AntiquoteSyntax(const Syntax* s)
-            : unary_form<AntiquoteSyntax>(s)
-      { }
-
-      // -- Expand --
-      Expand::Expand(const Syntax* s) : unary_form<Expand>(s) { }
-
-      // -- Eval --
-      Eval::Eval(const Syntax* s) : unary_form<Eval>(s) { }
-
-      // -- Splice --
-      Splice::Splice(const Syntax* s) : unary_form<Splice>(s) { }
-
-      // -- Function --
-      Function::Function(const Syntax* s) : unary_form<Function>(s) { }
-
-      // -- Include --
-      Include::Include(const Syntax* c, const Syntax* s)
-            : binary_form<Include>(c, s)
-      { }
-
-      // -- Exclude --
-      Exclude::Exclude(const Syntax* c, const Syntax* s)
-            : binary_form<Exclude>(c, s)
-      { }
-
-      // -- ListSyntax --
-      ListSyntax::ListSyntax() : dot(false) { }
-
-      ListSyntax::ListSyntax(const base& elts, bool d)
-            : base(elts), dot(d)
-      { }
-
-      ListSyntax::~ListSyntax() { }
-
-      void
-      ListSyntax::accept(Visitor& v) const {
-         v.visit(*this);
-      }
-
-      // -- VectorSyntax --
-      VectorSyntax::VectorSyntax() { }
-
-      VectorSyntax::VectorSyntax(const base& elts) : base(elts) { }
-
-      VectorSyntax::~VectorSyntax() { }
-      
-      void
-      VectorSyntax::accept(Visitor& v) const {
-         v.visit(*this);
-      }
-
-      // ---------------
-      // -- Allocator --
-      // ---------------
-      Allocator::Allocator() { }
-
-      // This destructor is defined here so that it provides
-      // a single instantiation point for destructors of all
-      // used templates floating around.
-      Allocator::~Allocator() { }
-
-      const CharacterSyntax*
-      Allocator::make_character(const Lexeme& t) {
-         return chars.make(t);
-      }
-
-      const IntegerSyntax*
-      Allocator::make_integer(const Lexeme& t) {
-         return ints.make(t);
-      }
-
-      const StringSyntax*
-      Allocator::make_string(const Lexeme& t) {
-         return strs.make(t);
-      }
-
-      const SymbolSyntax*
-      Allocator::make_symbol(SymbolSyntax::Kind k, const Lexeme& t) {
-         return syms.make(t, k);
-      }
-
-      const ReferenceSyntax*
-      Allocator::make_reference(size_t i, const Lexeme& t) {
-         return refs.make(t, i);
-      }
-
-      const AnchorSyntax*
-      Allocator::make_anchor(size_t t, const Syntax* s) {
-         return ancs.make(t, s);
-      }
-
-      const QuoteSyntax*
-      Allocator::make_quote(const Syntax* s) {
-         return quotes.make(s);
-      }
-
-      const AntiquoteSyntax*
-      Allocator::make_antiquote(const Syntax* s) {
-         return antis.make(s);
-      }
-
-      const Expand*
-      Allocator::make_expand(const Syntax* s) {
-         return exps.make(s);
-      }
-
-      const Eval*
-      Allocator::make_eval(const Syntax* s) {
-         return evls.make(s);
-      }
-
-      const Splice*
-      Allocator::make_splice(const Syntax* s) {
-         return spls.make(s);
-      }
-
-      const Function*
-      Allocator::make_function(const Syntax* s) {
-         return funs.make(s);
-      }
-
-      const Include*
-      Allocator::make_include(const Syntax* c, const Syntax* s) {
-         return incs.make(c, s);
-      }
-
-      const Exclude*
-      Allocator::make_exclude(const Syntax* c, const Syntax* s) {
-         return excs.make(c, s);
-      }
-
-      const ListSyntax*
-      Allocator::make_list(const std::vector<const Syntax*>& elts, bool dot) {
-         if (elts.empty())
-            return &empty_list;
-         return lists.make(elts, dot);
-      }
-
-      const VectorSyntax*
-      Allocator::make_vector(const std::vector<const Syntax*>& elts) {
-         if (elts.empty())
-            return &empty_vector;
-         return vectors.make(elts);
-      }
-
-      // The sequence of characters in [cur, last) consists
-      // entirely of digits.  Return the corresponding natural value.
-      static size_t
-      natural_value(const Byte* cur, const Byte* last) {
-         size_t n = 0;
-         for (; cur < last; ++cur)
-            // FIXME: check for overflow.
-            n = 10 * n + (*cur - '0');
-         return n;
-      }
-
-      // -- Reader --
-      Reader::Reader(const Byte* f, const Byte* l)
-            : st{ { f, l, 1 }, f, f }
-      { }
-
-      Reader::Reader(const RawInput& ri)
-            : st { ri, ri.start, ri.start }
-      { }
-
-      static const Syntax* read_sexpr(Reader::State&);
-
-      // Parse a string literal
-      static const Syntax*
-      read_string(Reader::State& s) {
-         auto start = s.cur++;
-         if (not skip_to_quote(s))
-            syntax_error("missing closing quote sign for string literal");
-         Lexeme t = { { start, s.cur }, s.bytes.lineno };
-         return s.alloc.make_string(t);
-      }
-
-      // Parse an absolute identifier.
-      static const Syntax*
-      read_absolute_symbol(Reader::State& s) {
-         auto start = ++s.cur;
-         if (not skip_to_nonescaped_char(s, '|'))
-            syntax_error("missing closing bar sign for an absolute symbol");
-         Lexeme t = { { start, s.cur - 1 }, s.bytes.lineno };
-         return s.alloc.make_symbol(SymbolSyntax::absolute, t);
-      }
-
-      // Read an atom starting with digits.
-      static const Syntax*
-      read_maybe_natural(Reader::State& s) {
-         auto start = s.cur;
-         advance_while (s, isdigit);
-         if (s.cur >= s.bytes.end or is_delimiter(*s.cur)) {
-            Lexeme t = { { start, s.cur }, s.bytes.lineno };
-            return s.alloc.make_integer(t);
-         }
-         advance_while(s, identifier_part);
-         Lexeme t = { { start, s.cur }, s.bytes.lineno };
-         return s.alloc.make_symbol(SymbolSyntax::ordinary, t);
-      }
-
-      // Read an identifier.
-      static const Syntax*
-      read_identifier(Reader::State& s) {
-         auto start = s.cur;
-         advance_while(s, identifier_part);
-         Lexeme t = { { start, s.cur }, s.bytes.lineno };
-         return s.alloc.make_symbol(SymbolSyntax::ordinary, t);
-      }
-
-      // Read an atom starting with a '+' or '-' sign; this
-      // should be identifier, or a signed integer.
-      static const Syntax*
-      read_maybe_signed_number(Reader::State& s) {
-         auto start = s.cur++;
-         if (s.cur < s.bytes.end and isdigit(*s.cur)) {
-            advance_while(s, isdigit);
-            if (s.cur >= s.bytes.end or is_delimiter(*s.cur)) {
-               Lexeme t = { { start, s.cur }, s.bytes.lineno };
-               return s.alloc.make_integer(t);
-            }
-         }
-         advance_while(s, identifier_part);
-         Lexeme t = { { start, s.cur }, s.bytes.lineno };
-         return s.alloc.make_symbol(SymbolSyntax::ordinary, t);
-      }
-
-      static const Syntax*
-      read_keyword(Reader::State& s) {
-         auto start = s.cur++;
-         advance_while(s, identifier_part);
-         Lexeme t = { { start, s.cur }, s.bytes.lineno };
-         return s.alloc.make_symbol(SymbolSyntax::keyword, t);
-      }
-
-      // Read an atom.
-      static const Syntax*
-      read_atom(Reader::State& s) {
-         switch (*s.cur) {
-         case '"': return read_string(s);
-         case ':': return read_keyword(s);
-         case '-': case '+': return read_maybe_signed_number(s);
-
-         case '0': case '1': case '2': case '3': case '4':
-         case '5': case '6': case '7': case '8': case '9':
-            return read_maybe_natural(s);
-
-         default:
-            if (identifier_part(*s.cur))
-               return read_identifier(s);
-            invalid_character(s);
-            ++s.cur;
-            return nullptr;
-         }
-      }
-
-      // Parse a quote expression.
-      static const Syntax*
-      read_quote(Reader::State& s) {
-         ++s.cur;               // skip the quote character
-         auto x = read_sexpr(s);
-         if (x == nullptr)
-            syntax_error("end of input reached after quote sign");
-         return s.alloc.make_quote(x);
-      }
-
-      // Parse a backquote expression.
-      static const Syntax*
-      read_backquote(Reader::State& s) {
-         ++s.cur;               // skip the backquote character
-         auto x = read_sexpr(s);
-         if (x == nullptr)
-            syntax_error("end of input reached after backquote sign");
-         return s.alloc.make_antiquote(x);
-      }
-
-      // We've just seen "#(" indicating the start of a literal
-      // vector.  Read the elements and return the corresponding form.
-      static const Syntax*
-      finish_literal_vector(Reader::State& s) {
-         ++s.cur;               // Skip the open paren.
-         std::vector<const Syntax*> elts { };
-         while (skip_blank(s) and *s.cur != ')') {
-            if (auto x = read_sexpr(s))
-               elts.push_back(x);
-            else
-               syntax_error("syntax error while reading vector elements");
-         }
-         if (s.cur >= s.bytes.end)
-            syntax_error("unfinished literal vector");
-         else
-            ++s.cur;
-         return s.alloc.make_vector(elts);
-      }
-
-      // We've just seen the sharp sign followed by a digit.  We assume
-      // we are about to read an anchor or a back reference.
-      static const Syntax*
-      finish_anchor_or_reference(Reader::State& s) {
-         auto start = s.cur;
-         advance_while(s, isdigit);
-         if (s.cur >= s.bytes.end)
-            syntax_error("end-of-input after sharp-number sign");
-         const Byte c = *s.cur;
-         if (c != '#' and c != '=')
-            syntax_error("syntax error after sharp-number-equal sign");
-         Lexeme t = { { start, s.cur }, s.bytes.lineno };
-         auto n = natural_value(start, s.cur);
-         ++s.cur;
-         if (c == '#')
-            return s.alloc.make_reference(n, t);
-         auto x = read_sexpr(s);
-         if (x == nullptr)
-            syntax_error("syntax error after sharp-number-equal sign");
-         return s.alloc.make_anchor(n, x);
-      }
-
-      static const Syntax*
-      finish_function(Reader::State& s) {
-         ++s.cur;               // skip quote sign.
-         auto x = read_sexpr(s);
-         if (x == nullptr)
-            syntax_error("missing function designator after sharp-quote sign");
-         return s.alloc.make_function(x);
-      }
-
-      static const Syntax*
-      finish_uninterned_symbol(Reader::State& s) {
-         ++s.cur;               // skip colon sign.
-         auto start = s.cur;
-         advance_while(s, identifier_part);
-         Lexeme t = { { start, s.cur }, s.bytes.lineno };
-         return s.alloc.make_symbol(SymbolSyntax::uninterned, t);
-      }
-
-      static const Syntax*
-      finish_readtime_eval(Reader::State& s) {
-         ++s.cur;               // skip dot sign.
-         auto x = read_sexpr(s);
-         if (x == nullptr)
-            syntax_error("parse error after sharp-dot sign");
-         return s.alloc.make_eval(x);
-      }
-
-      static const Syntax*
-      finish_character(Reader::State& s) {
-         ++s.cur;               // skip backslash sign
-         auto start = s.cur;
-         advance_while(s, identifier_part);
-         Lexeme t = { { start, s.cur }, s.bytes.lineno };
-         return s.alloc.make_character(t);
-      }
-
-      static const Syntax*
-      finish_include(Reader::State& s) {
-         ++s.cur;
-         auto cond = read_sexpr(s);
-         auto form = read_sexpr(s);
-         return s.alloc.make_include(cond, form);
-      }
-
-      static const Syntax*
-      finish_exclude(Reader::State& s) {
-         ++s.cur;
-         auto cond = read_sexpr(s);
-         auto form = read_sexpr(s);
-         return s.alloc.make_exclude(cond, form);
-      }
-
-      static const Syntax*
-      read_sharp_et_al(Reader::State& s) {
-         if (++s.cur >= s.bytes.end)
-            syntax_error("end-of-input reached after sharp sign");
-         switch (*s.cur) {
-         case '(':  return finish_literal_vector(s);
-         case '\'': return finish_function(s);
-         case ':': return finish_uninterned_symbol(s);
-         case '.': return finish_readtime_eval(s);
-         case '\\': return finish_character(s);
-         case '+': return finish_include(s);
-         case '-': return finish_exclude(s);
-
-         default:
-            if (isdigit(*s.cur))
-               return finish_anchor_or_reference(s);
-            syntax_error("syntax error after sharp-sign");
-         }
-         return nullptr;
-      }
-
-      // We have just seen a dot; read the tail and the closing parenthesis.
-      static const Syntax*
-      finish_dotted_list(Reader::State& s, std::vector<const Syntax*>& elts) {
-         ++s.cur;               // Skip dot sign.
-         auto x = read_sexpr(s);
-         if (x == nullptr)
-            syntax_error("missing expression after dot sign");
-         if (not skip_blank(s) or *s.cur != ')')
-            syntax_error("missing closing parenthesis");
-         ++s.cur;
-         elts.push_back(x);
-         return s.alloc.make_list(elts, true);
-      }
-
-      static const Syntax*
-      read_pair(Reader::State& s) {
-         ++s.cur;               // skip opening parenthesis
-         std::vector<const Syntax*> elts { };
-         while (skip_blank(s))
-            switch (*s.cur) {
-            case '.':
-               if (elts.empty())
-                  syntax_error("missing expression before dot sign.");
-               return finish_dotted_list(s, elts);
-
-            case ')':
-               ++s.cur;
-               return s.alloc.make_list(elts);
-
-            default:
-               if (auto x = read_sexpr(s))
-                  elts.push_back(x);
-               else
-                  syntax_error("unfinished pair expression");
-               break;
-            }
-         syntax_error("end-of-input while looking for closing parenthesis");
-         return nullptr;
-      }
-
-      static const Syntax*
-      read_sexpr(Reader::State& s) {
-         while (skip_blank(s))
-            switch (*s.cur) {
-            case ';': skip_to_eol(s); break;
-            case '\'': return read_quote(s);
-            case '`': return read_backquote(s);
-            case '|': return read_absolute_symbol(s);
-            case '#': return read_sharp_et_al(s);
-            case '(': return read_pair(s);
-            default: return read_atom(s);
-            }
-         return nullptr;
-      }
-
-      const Syntax*
-      Reader::read() {
-         return read_sexpr(st);
-      }
-
-      const Byte*
-      Reader::position(Ordinal p) {
-         st.cur = st.bytes.start + p;
-         st.line = st.cur;
-         // while (st.line > st.start and st.line[-1] != '\n')
-         //    --st.line;
-         return st.cur;
-      }
-
-   }
-}
diff --git a/src/syntax/sexpr.cxx b/src/syntax/sexpr.cxx
new file mode 100644
index 00000000..f9d76825
--- /dev/null
+++ b/src/syntax/sexpr.cxx
@@ -0,0 +1,670 @@
+// Copyright (C) 2010-2013, Gabriel Dos Reis.
+// All rights reserved.
+// Written by Gabriel Dos Reis.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     - Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//
+//     - Redistributions in binary form must reproduce the above copyright
+//       notice, this list of conditions and the following disclaimer in
+//       the documentation and/or other materials provided with the
+//       distribution.
+//
+//     - Neither the name of The Numerical Algorithms Group Ltd. nor the
+//       names of its contributors may be used to endorse or promote products
+//       derived from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+// OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// --% Author: Gabriel Dos Reis.
+
+#include <ctype.h>
+#include <string.h>
+#include <iostream>
+#include <iterator>
+#include <open-axiom/sexpr>
+#include <open-axiom/FileMapping>
+#include <open-axiom/diagnostics>
+
+namespace OpenAxiom {
+   namespace Sexpr {
+      static void
+      invalid_character(Reader::State& s) {
+         auto line = std::to_string(s.bytes.lineno);
+         auto column = std::to_string(s.cur - s.line);
+         auto msg = "invalid character on line " + line +
+            " and column " + column;
+         if (isprint(*s.cur))
+            throw Diagnostics::BasicError(msg + ": " + std::string(1, *s.cur));
+         throw Diagnostics::BasicError(msg + " with code " + std::to_string(*s.cur));
+      }
+      
+      static void
+      syntax_error(const std::string& s) {
+         throw Diagnostics::BasicError(s);
+      }
+
+      // Return true if character `c' introduces a blank.
+      static bool
+      is_blank(char c) {
+         return c == ' ' or c == '\t' or c == '\v'
+            or c == '\n' or c == '\f' or c == '\r';
+      }
+      
+      // Return true if the character `c' introduces a delimiter.
+      static bool
+      is_delimiter(char c) {
+         return is_blank(c)
+            or c == '(' or c == ')' or c == '\''
+            or c == '`' or c == '#';
+      }
+
+      // Move the cursor past all consecutive blank characters, and
+      // return true if there are more input characters to consider.
+      static bool
+      skip_blank(Reader::State& s) {
+         for (bool done = false; s.cur < s.bytes.end and not done; )
+            switch (*s.cur) {
+            case '\n':
+               ++s.bytes.lineno;
+               s.line = ++s.cur;
+               break;
+            case ' ': case '\t': case '\v': case '\r': case '\f':
+               ++s.cur;
+               break;
+            default: done = true; break;
+            }
+         return s.cur < s.bytes.end;
+      }
+
+      // Move `cur' to end-of-line marker.
+      static void
+      skip_to_eol(Reader::State& s) {
+         // FIXME: properly handle CR+LF.
+         while (s.cur < s.bytes.end and *s.cur != '\n')
+            ++s.cur;
+      }
+
+      // Move `cur' one-past a non-esacaped character `c'.
+      // Return true if the character was seen.
+      static bool
+      skip_to_nonescaped_char(Reader::State& s, char c) {
+         for (bool saw_escape = false; s.cur < s.bytes.end; ++s.cur)
+            if (saw_escape)
+               saw_escape = false;
+            else if (*s.cur == '\\')
+               saw_escape = true;
+            else if (*s.cur == c) {
+               ++s.cur;
+               return true;
+            }
+         return false;
+      }
+
+      // Move the cursor past the closing quote of string literal.
+      // Return true if the closing quote was effectively seen.
+      static inline bool
+      skip_to_quote(Reader::State& s) {
+         return skip_to_nonescaped_char(s, '"');
+      }
+
+      template<typename Pred>
+      static bool
+      advance_while(Reader::State& s, Pred p) {
+         while (s.cur < s.bytes.end and p(*s.cur))
+            ++s.cur;
+         return s.cur < s.bytes.end;
+      }
+
+      // Return true if the character `c' be part of a non-absolute
+      // identifier.
+      static bool
+      identifier_part(Byte c) {
+         switch (c) {
+         case '+': case '-': case '*': case '/': case '%': case '^':
+         case '~': case '@': case '$': case '&': case '=':
+         case '<': case '>': case '?': case '!': case '_':
+         case '[': case ']': case '{': case '}':
+            return true;
+         default:
+            return isalnum(c);
+         }
+      }
+
+      // -- AtomSyntax --
+      AtomSyntax::AtomSyntax(const Lexeme& t) : lex(t) { }
+
+      // -- IntegerSyntax --
+      IntegerSyntax::IntegerSyntax(const Lexeme& t) : AtomSyntax(t) { }
+
+      void
+      IntegerSyntax::accept(Visitor& v) const {
+         v.visit(*this);
+      }
+
+      // -- CharacterSyntax --
+      CharacterSyntax::CharacterSyntax(const Lexeme& t) : AtomSyntax(t) { }
+
+      void
+      CharacterSyntax::accept(Visitor& v) const {
+         v.visit(*this);
+      }
+
+      // -- StringSyntax --
+      StringSyntax::StringSyntax(const Lexeme& t) : AtomSyntax(t) { }
+
+      void
+      StringSyntax::accept(Visitor& v) const {
+         v.visit(*this);
+      }
+
+      // -- SymbolSyntax --
+      SymbolSyntax::SymbolSyntax(const Lexeme& t, Kind k)
+            : AtomSyntax(t), sort(k)
+      { }
+
+      void
+      SymbolSyntax::accept(Visitor& v) const {
+         v.visit(*this);
+      }
+
+      // -- AnchorSyntax --
+      AnchorSyntax::AnchorSyntax(size_t t, const Syntax* s) : tag(t), val(s) { }
+
+      void
+      AnchorSyntax::accept(Visitor& v) const {
+         v.visit(*this);
+      }
+
+      // -- ReferenceSyntax --
+       ReferenceSyntax::ReferenceSyntax(const Lexeme& t, Ordinal n)
+             : AtomSyntax(t), pos(n)
+      { }
+
+      void
+      ReferenceSyntax::accept(Visitor& v) const {
+         v.visit(*this);
+      }
+
+      // -- QuoteSyntax --
+      QuoteSyntax::QuoteSyntax(const Syntax* s)
+            : unary_form<QuoteSyntax>(s)
+      { }
+
+      // -- AntiquoteSyntax --
+      AntiquoteSyntax::AntiquoteSyntax(const Syntax* s)
+            : unary_form<AntiquoteSyntax>(s)
+      { }
+
+      // -- Expand --
+      Expand::Expand(const Syntax* s) : unary_form<Expand>(s) { }
+
+      // -- Eval --
+      Eval::Eval(const Syntax* s) : unary_form<Eval>(s) { }
+
+      // -- Splice --
+      Splice::Splice(const Syntax* s) : unary_form<Splice>(s) { }
+
+      // -- Function --
+      Function::Function(const Syntax* s) : unary_form<Function>(s) { }
+
+      // -- Include --
+      Include::Include(const Syntax* c, const Syntax* s)
+            : binary_form<Include>(c, s)
+      { }
+
+      // -- Exclude --
+      Exclude::Exclude(const Syntax* c, const Syntax* s)
+            : binary_form<Exclude>(c, s)
+      { }
+
+      // -- ListSyntax --
+      ListSyntax::ListSyntax() : dot(false) { }
+
+      ListSyntax::ListSyntax(const base& elts, bool d)
+            : base(elts), dot(d)
+      { }
+
+      ListSyntax::~ListSyntax() { }
+
+      void
+      ListSyntax::accept(Visitor& v) const {
+         v.visit(*this);
+      }
+
+      // -- VectorSyntax --
+      VectorSyntax::VectorSyntax() { }
+
+      VectorSyntax::VectorSyntax(const base& elts) : base(elts) { }
+
+      VectorSyntax::~VectorSyntax() { }
+      
+      void
+      VectorSyntax::accept(Visitor& v) const {
+         v.visit(*this);
+      }
+
+      // ---------------
+      // -- Allocator --
+      // ---------------
+      Allocator::Allocator() { }
+
+      // This destructor is defined here so that it provides
+      // a single instantiation point for destructors of all
+      // used templates floating around.
+      Allocator::~Allocator() { }
+
+      const CharacterSyntax*
+      Allocator::make_character(const Lexeme& t) {
+         return chars.make(t);
+      }
+
+      const IntegerSyntax*
+      Allocator::make_integer(const Lexeme& t) {
+         return ints.make(t);
+      }
+
+      const StringSyntax*
+      Allocator::make_string(const Lexeme& t) {
+         return strs.make(t);
+      }
+
+      const SymbolSyntax*
+      Allocator::make_symbol(SymbolSyntax::Kind k, const Lexeme& t) {
+         return syms.make(t, k);
+      }
+
+      const ReferenceSyntax*
+      Allocator::make_reference(size_t i, const Lexeme& t) {
+         return refs.make(t, i);
+      }
+
+      const AnchorSyntax*
+      Allocator::make_anchor(size_t t, const Syntax* s) {
+         return ancs.make(t, s);
+      }
+
+      const QuoteSyntax*
+      Allocator::make_quote(const Syntax* s) {
+         return quotes.make(s);
+      }
+
+      const AntiquoteSyntax*
+      Allocator::make_antiquote(const Syntax* s) {
+         return antis.make(s);
+      }
+
+      const Expand*
+      Allocator::make_expand(const Syntax* s) {
+         return exps.make(s);
+      }
+
+      const Eval*
+      Allocator::make_eval(const Syntax* s) {
+         return evls.make(s);
+      }
+
+      const Splice*
+      Allocator::make_splice(const Syntax* s) {
+         return spls.make(s);
+      }
+
+      const Function*
+      Allocator::make_function(const Syntax* s) {
+         return funs.make(s);
+      }
+
+      const Include*
+      Allocator::make_include(const Syntax* c, const Syntax* s) {
+         return incs.make(c, s);
+      }
+
+      const Exclude*
+      Allocator::make_exclude(const Syntax* c, const Syntax* s) {
+         return excs.make(c, s);
+      }
+
+      const ListSyntax*
+      Allocator::make_list(const std::vector<const Syntax*>& elts, bool dot) {
+         if (elts.empty())
+            return &empty_list;
+         return lists.make(elts, dot);
+      }
+
+      const VectorSyntax*
+      Allocator::make_vector(const std::vector<const Syntax*>& elts) {
+         if (elts.empty())
+            return &empty_vector;
+         return vectors.make(elts);
+      }
+
+      // The sequence of characters in [cur, last) consists
+      // entirely of digits.  Return the corresponding natural value.
+      static size_t
+      natural_value(const Byte* cur, const Byte* last) {
+         size_t n = 0;
+         for (; cur < last; ++cur)
+            // FIXME: check for overflow.
+            n = 10 * n + (*cur - '0');
+         return n;
+      }
+
+      // -- Reader --
+      Reader::Reader(const Byte* f, const Byte* l)
+            : st{ { f, l, 1 }, f, f }
+      { }
+
+      Reader::Reader(const RawInput& ri)
+            : st { ri, ri.start, ri.start }
+      { }
+
+      static const Syntax* read_sexpr(Reader::State&);
+
+      // Parse a string literal
+      static const Syntax*
+      read_string(Reader::State& s) {
+         auto start = s.cur++;
+         if (not skip_to_quote(s))
+            syntax_error("missing closing quote sign for string literal");
+         Lexeme t = { { start, s.cur }, s.bytes.lineno };
+         return s.alloc.make_string(t);
+      }
+
+      // Parse an absolute identifier.
+      static const Syntax*
+      read_absolute_symbol(Reader::State& s) {
+         auto start = ++s.cur;
+         if (not skip_to_nonescaped_char(s, '|'))
+            syntax_error("missing closing bar sign for an absolute symbol");
+         Lexeme t = { { start, s.cur - 1 }, s.bytes.lineno };
+         return s.alloc.make_symbol(SymbolSyntax::absolute, t);
+      }
+
+      // Read an atom starting with digits.
+      static const Syntax*
+      read_maybe_natural(Reader::State& s) {
+         auto start = s.cur;
+         advance_while (s, isdigit);
+         if (s.cur >= s.bytes.end or is_delimiter(*s.cur)) {
+            Lexeme t = { { start, s.cur }, s.bytes.lineno };
+            return s.alloc.make_integer(t);
+         }
+         advance_while(s, identifier_part);
+         Lexeme t = { { start, s.cur }, s.bytes.lineno };
+         return s.alloc.make_symbol(SymbolSyntax::ordinary, t);
+      }
+
+      // Read an identifier.
+      static const Syntax*
+      read_identifier(Reader::State& s) {
+         auto start = s.cur;
+         advance_while(s, identifier_part);
+         Lexeme t = { { start, s.cur }, s.bytes.lineno };
+         return s.alloc.make_symbol(SymbolSyntax::ordinary, t);
+      }
+
+      // Read an atom starting with a '+' or '-' sign; this
+      // should be identifier, or a signed integer.
+      static const Syntax*
+      read_maybe_signed_number(Reader::State& s) {
+         auto start = s.cur++;
+         if (s.cur < s.bytes.end and isdigit(*s.cur)) {
+            advance_while(s, isdigit);
+            if (s.cur >= s.bytes.end or is_delimiter(*s.cur)) {
+               Lexeme t = { { start, s.cur }, s.bytes.lineno };
+               return s.alloc.make_integer(t);
+            }
+         }
+         advance_while(s, identifier_part);
+         Lexeme t = { { start, s.cur }, s.bytes.lineno };
+         return s.alloc.make_symbol(SymbolSyntax::ordinary, t);
+      }
+
+      static const Syntax*
+      read_keyword(Reader::State& s) {
+         auto start = s.cur++;
+         advance_while(s, identifier_part);
+         Lexeme t = { { start, s.cur }, s.bytes.lineno };
+         return s.alloc.make_symbol(SymbolSyntax::keyword, t);
+      }
+
+      // Read an atom.
+      static const Syntax*
+      read_atom(Reader::State& s) {
+         switch (*s.cur) {
+         case '"': return read_string(s);
+         case ':': return read_keyword(s);
+         case '-': case '+': return read_maybe_signed_number(s);
+
+         case '0': case '1': case '2': case '3': case '4':
+         case '5': case '6': case '7': case '8': case '9':
+            return read_maybe_natural(s);
+
+         default:
+            if (identifier_part(*s.cur))
+               return read_identifier(s);
+            invalid_character(s);
+            ++s.cur;
+            return nullptr;
+         }
+      }
+
+      // Parse a quote expression.
+      static const Syntax*
+      read_quote(Reader::State& s) {
+         ++s.cur;               // skip the quote character
+         auto x = read_sexpr(s);
+         if (x == nullptr)
+            syntax_error("end of input reached after quote sign");
+         return s.alloc.make_quote(x);
+      }
+
+      // Parse a backquote expression.
+      static const Syntax*
+      read_backquote(Reader::State& s) {
+         ++s.cur;               // skip the backquote character
+         auto x = read_sexpr(s);
+         if (x == nullptr)
+            syntax_error("end of input reached after backquote sign");
+         return s.alloc.make_antiquote(x);
+      }
+
+      // We've just seen "#(" indicating the start of a literal
+      // vector.  Read the elements and return the corresponding form.
+      static const Syntax*
+      finish_literal_vector(Reader::State& s) {
+         ++s.cur;               // Skip the open paren.
+         std::vector<const Syntax*> elts { };
+         while (skip_blank(s) and *s.cur != ')') {
+            if (auto x = read_sexpr(s))
+               elts.push_back(x);
+            else
+               syntax_error("syntax error while reading vector elements");
+         }
+         if (s.cur >= s.bytes.end)
+            syntax_error("unfinished literal vector");
+         else
+            ++s.cur;
+         return s.alloc.make_vector(elts);
+      }
+
+      // We've just seen the sharp sign followed by a digit.  We assume
+      // we are about to read an anchor or a back reference.
+      static const Syntax*
+      finish_anchor_or_reference(Reader::State& s) {
+         auto start = s.cur;
+         advance_while(s, isdigit);
+         if (s.cur >= s.bytes.end)
+            syntax_error("end-of-input after sharp-number sign");
+         const Byte c = *s.cur;
+         if (c != '#' and c != '=')
+            syntax_error("syntax error after sharp-number-equal sign");
+         Lexeme t = { { start, s.cur }, s.bytes.lineno };
+         auto n = natural_value(start, s.cur);
+         ++s.cur;
+         if (c == '#')
+            return s.alloc.make_reference(n, t);
+         auto x = read_sexpr(s);
+         if (x == nullptr)
+            syntax_error("syntax error after sharp-number-equal sign");
+         return s.alloc.make_anchor(n, x);
+      }
+
+      static const Syntax*
+      finish_function(Reader::State& s) {
+         ++s.cur;               // skip quote sign.
+         auto x = read_sexpr(s);
+         if (x == nullptr)
+            syntax_error("missing function designator after sharp-quote sign");
+         return s.alloc.make_function(x);
+      }
+
+      static const Syntax*
+      finish_uninterned_symbol(Reader::State& s) {
+         ++s.cur;               // skip colon sign.
+         auto start = s.cur;
+         advance_while(s, identifier_part);
+         Lexeme t = { { start, s.cur }, s.bytes.lineno };
+         return s.alloc.make_symbol(SymbolSyntax::uninterned, t);
+      }
+
+      static const Syntax*
+      finish_readtime_eval(Reader::State& s) {
+         ++s.cur;               // skip dot sign.
+         auto x = read_sexpr(s);
+         if (x == nullptr)
+            syntax_error("parse error after sharp-dot sign");
+         return s.alloc.make_eval(x);
+      }
+
+      static const Syntax*
+      finish_character(Reader::State& s) {
+         ++s.cur;               // skip backslash sign
+         auto start = s.cur;
+         advance_while(s, identifier_part);
+         Lexeme t = { { start, s.cur }, s.bytes.lineno };
+         return s.alloc.make_character(t);
+      }
+
+      static const Syntax*
+      finish_include(Reader::State& s) {
+         ++s.cur;
+         auto cond = read_sexpr(s);
+         auto form = read_sexpr(s);
+         return s.alloc.make_include(cond, form);
+      }
+
+      static const Syntax*
+      finish_exclude(Reader::State& s) {
+         ++s.cur;
+         auto cond = read_sexpr(s);
+         auto form = read_sexpr(s);
+         return s.alloc.make_exclude(cond, form);
+      }
+
+      static const Syntax*
+      read_sharp_et_al(Reader::State& s) {
+         if (++s.cur >= s.bytes.end)
+            syntax_error("end-of-input reached after sharp sign");
+         switch (*s.cur) {
+         case '(':  return finish_literal_vector(s);
+         case '\'': return finish_function(s);
+         case ':': return finish_uninterned_symbol(s);
+         case '.': return finish_readtime_eval(s);
+         case '\\': return finish_character(s);
+         case '+': return finish_include(s);
+         case '-': return finish_exclude(s);
+
+         default:
+            if (isdigit(*s.cur))
+               return finish_anchor_or_reference(s);
+            syntax_error("syntax error after sharp-sign");
+         }
+         return nullptr;
+      }
+
+      // We have just seen a dot; read the tail and the closing parenthesis.
+      static const Syntax*
+      finish_dotted_list(Reader::State& s, std::vector<const Syntax*>& elts) {
+         ++s.cur;               // Skip dot sign.
+         auto x = read_sexpr(s);
+         if (x == nullptr)
+            syntax_error("missing expression after dot sign");
+         if (not skip_blank(s) or *s.cur != ')')
+            syntax_error("missing closing parenthesis");
+         ++s.cur;
+         elts.push_back(x);
+         return s.alloc.make_list(elts, true);
+      }
+
+      static const Syntax*
+      read_pair(Reader::State& s) {
+         ++s.cur;               // skip opening parenthesis
+         std::vector<const Syntax*> elts { };
+         while (skip_blank(s))
+            switch (*s.cur) {
+            case '.':
+               if (elts.empty())
+                  syntax_error("missing expression before dot sign.");
+               return finish_dotted_list(s, elts);
+
+            case ')':
+               ++s.cur;
+               return s.alloc.make_list(elts);
+
+            default:
+               if (auto x = read_sexpr(s))
+                  elts.push_back(x);
+               else
+                  syntax_error("unfinished pair expression");
+               break;
+            }
+         syntax_error("end-of-input while looking for closing parenthesis");
+         return nullptr;
+      }
+
+      static const Syntax*
+      read_sexpr(Reader::State& s) {
+         while (skip_blank(s))
+            switch (*s.cur) {
+            case ';': skip_to_eol(s); break;
+            case '\'': return read_quote(s);
+            case '`': return read_backquote(s);
+            case '|': return read_absolute_symbol(s);
+            case '#': return read_sharp_et_al(s);
+            case '(': return read_pair(s);
+            default: return read_atom(s);
+            }
+         return nullptr;
+      }
+
+      const Syntax*
+      Reader::read() {
+         return read_sexpr(st);
+      }
+
+      const Byte*
+      Reader::position(Ordinal p) {
+         st.cur = st.bytes.start + p;
+         st.line = st.cur;
+         // while (st.line > st.start and st.line[-1] != '\n')
+         //    --st.line;
+         return st.cur;
+      }
+
+   }
+}
-- 
cgit v1.2.3