Add s-expression parsing to libopen-axiom-core.a

author: Gabriel Dos Reis <gdr@axiomatics.org> 2017-01-15 09:14:39 -0800
committer: Gabriel Dos Reis <gdr@axiomatics.org> 2017-01-15 09:14:39 -0800
commit: cea95f2cc443a5bee5cb69476ae73fd99d32fc0d (patch)
tree: ca8765a4f777fbc131423b13e17ca2be56b6a251 /src/syntax/sexpr.cc
parent: 77be242e6af75148771620fcbbaad191d1c11b60 (diff)
download: open-axiom-cea95f2cc443a5bee5cb69476ae73fd99d32fc0d.tar.gz
1 files changed, 0 insertions, 670 deletions
diff --git a/src/syntax/sexpr.cc b/src/syntax/sexpr.cc
deleted file mode 100644
index f9d76825..00000000
--- a/src/syntax/sexpr.cc
+++ /dev/null
@@ -1,670 +0,0 @@
-// Copyright (C) 2010-2013, Gabriel Dos Reis.
-// All rights reserved.
-// Written by Gabriel Dos Reis.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     - Redistributions of source code must retain the above copyright
-//       notice, this list of conditions and the following disclaimer.
-//
-//     - Redistributions in binary form must reproduce the above copyright
-//       notice, this list of conditions and the following disclaimer in
-//       the documentation and/or other materials provided with the
-//       distribution.
-//
-//     - Neither the name of The Numerical Algorithms Group Ltd. nor the
-//       names of its contributors may be used to endorse or promote products
-//       derived from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
-// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
-// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
-// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
-// OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// --% Author: Gabriel Dos Reis.
-
-#include <ctype.h>
-#include <string.h>
-#include <iostream>
-#include <iterator>
-#include <open-axiom/sexpr>
-#include <open-axiom/FileMapping>
-#include <open-axiom/diagnostics>
-
-namespace OpenAxiom {
-   namespace Sexpr {
-      static void
-      invalid_character(Reader::State& s) {
-         auto line = std::to_string(s.bytes.lineno);
-         auto column = std::to_string(s.cur - s.line);
-         auto msg = "invalid character on line " + line +
-            " and column " + column;
-         if (isprint(*s.cur))
-            throw Diagnostics::BasicError(msg + ": " + std::string(1, *s.cur));
-         throw Diagnostics::BasicError(msg + " with code " + std::to_string(*s.cur));
-      }
-      
-      static void
-      syntax_error(const std::string& s) {
-         throw Diagnostics::BasicError(s);
-      }
-
-      // Return true if character `c' introduces a blank.
-      static bool
-      is_blank(char c) {
-         return c == ' ' or c == '\t' or c == '\v'
-            or c == '\n' or c == '\f' or c == '\r';
-      }
-      
-      // Return true if the character `c' introduces a delimiter.
-      static bool
-      is_delimiter(char c) {
-         return is_blank(c)
-            or c == '(' or c == ')' or c == '\''
-            or c == '`' or c == '#';
-      }
-
-      // Move the cursor past all consecutive blank characters, and
-      // return true if there are more input characters to consider.
-      static bool
-      skip_blank(Reader::State& s) {
-         for (bool done = false; s.cur < s.bytes.end and not done; )
-            switch (*s.cur) {
-            case '\n':
-               ++s.bytes.lineno;
-               s.line = ++s.cur;
-               break;
-            case ' ': case '\t': case '\v': case '\r': case '\f':
-               ++s.cur;
-               break;
-            default: done = true; break;
-            }
-         return s.cur < s.bytes.end;
-      }
-
-      // Move `cur' to end-of-line marker.
-      static void
-      skip_to_eol(Reader::State& s) {
-         // FIXME: properly handle CR+LF.
-         while (s.cur < s.bytes.end and *s.cur != '\n')
-            ++s.cur;
-      }
-
-      // Move `cur' one-past a non-esacaped character `c'.
-      // Return true if the character was seen.
-      static bool
-      skip_to_nonescaped_char(Reader::State& s, char c) {
-         for (bool saw_escape = false; s.cur < s.bytes.end; ++s.cur)
-            if (saw_escape)
-               saw_escape = false;
-            else if (*s.cur == '\\')
-               saw_escape = true;
-            else if (*s.cur == c) {
-               ++s.cur;
-               return true;
-            }
-         return false;
-      }
-
-      // Move the cursor past the closing quote of string literal.
-      // Return true if the closing quote was effectively seen.
-      static inline bool
-      skip_to_quote(Reader::State& s) {
-         return skip_to_nonescaped_char(s, '"');
-      }
-
-      template<typename Pred>
-      static bool
-      advance_while(Reader::State& s, Pred p) {
-         while (s.cur < s.bytes.end and p(*s.cur))
-            ++s.cur;
-         return s.cur < s.bytes.end;
-      }
-
-      // Return true if the character `c' be part of a non-absolute
-      // identifier.
-      static bool
-      identifier_part(Byte c) {
-         switch (c) {
-         case '+': case '-': case '*': case '/': case '%': case '^':
-         case '~': case '@': case '$': case '&': case '=':
-         case '<': case '>': case '?': case '!': case '_':
-         case '[': case ']': case '{': case '}':
-            return true;
-         default:
-            return isalnum(c);
-         }
-      }
-
-      // -- AtomSyntax --
-      AtomSyntax::AtomSyntax(const Lexeme& t) : lex(t) { }
-
-      // -- IntegerSyntax --
-      IntegerSyntax::IntegerSyntax(const Lexeme& t) : AtomSyntax(t) { }
-
-      void
-      IntegerSyntax::accept(Visitor& v) const {
-         v.visit(*this);
-      }
-
-      // -- CharacterSyntax --
-      CharacterSyntax::CharacterSyntax(const Lexeme& t) : AtomSyntax(t) { }
-
-      void
-      CharacterSyntax::accept(Visitor& v) const {
-         v.visit(*this);
-      }
-
-      // -- StringSyntax --
-      StringSyntax::StringSyntax(const Lexeme& t) : AtomSyntax(t) { }
-
-      void
-      StringSyntax::accept(Visitor& v) const {
-         v.visit(*this);
-      }
-
-      // -- SymbolSyntax --
-      SymbolSyntax::SymbolSyntax(const Lexeme& t, Kind k)
-            : AtomSyntax(t), sort(k)
-      { }
-
-      void
-      SymbolSyntax::accept(Visitor& v) const {
-         v.visit(*this);
-      }
-
-      // -- AnchorSyntax --
-      AnchorSyntax::AnchorSyntax(size_t t, const Syntax* s) : tag(t), val(s) { }
-
-      void
-      AnchorSyntax::accept(Visitor& v) const {
-         v.visit(*this);
-      }
-
-      // -- ReferenceSyntax --
-       ReferenceSyntax::ReferenceSyntax(const Lexeme& t, Ordinal n)
-             : AtomSyntax(t), pos(n)
-      { }
-
-      void
-      ReferenceSyntax::accept(Visitor& v) const {
-         v.visit(*this);
-      }
-
-      // -- QuoteSyntax --
-      QuoteSyntax::QuoteSyntax(const Syntax* s)
-            : unary_form<QuoteSyntax>(s)
-      { }
-
-      // -- AntiquoteSyntax --
-      AntiquoteSyntax::AntiquoteSyntax(const Syntax* s)
-            : unary_form<AntiquoteSyntax>(s)
-      { }
-
-      // -- Expand --
-      Expand::Expand(const Syntax* s) : unary_form<Expand>(s) { }
-
-      // -- Eval --
-      Eval::Eval(const Syntax* s) : unary_form<Eval>(s) { }
-
-      // -- Splice --
-      Splice::Splice(const Syntax* s) : unary_form<Splice>(s) { }
-
-      // -- Function --
-      Function::Function(const Syntax* s) : unary_form<Function>(s) { }
-
-      // -- Include --
-      Include::Include(const Syntax* c, const Syntax* s)
-            : binary_form<Include>(c, s)
-      { }
-
-      // -- Exclude --
-      Exclude::Exclude(const Syntax* c, const Syntax* s)
-            : binary_form<Exclude>(c, s)
-      { }
-
-      // -- ListSyntax --
-      ListSyntax::ListSyntax() : dot(false) { }
-
-      ListSyntax::ListSyntax(const base& elts, bool d)
-            : base(elts), dot(d)
-      { }
-
-      ListSyntax::~ListSyntax() { }
-
-      void
-      ListSyntax::accept(Visitor& v) const {
-         v.visit(*this);
-      }
-
-      // -- VectorSyntax --
-      VectorSyntax::VectorSyntax() { }
-
-      VectorSyntax::VectorSyntax(const base& elts) : base(elts) { }
-
-      VectorSyntax::~VectorSyntax() { }
-      
-      void
-      VectorSyntax::accept(Visitor& v) const {
-         v.visit(*this);
-      }
-
-      // ---------------
-      // -- Allocator --
-      // ---------------
-      Allocator::Allocator() { }
-
-      // This destructor is defined here so that it provides
-      // a single instantiation point for destructors of all
-      // used templates floating around.
-      Allocator::~Allocator() { }
-
-      const CharacterSyntax*
-      Allocator::make_character(const Lexeme& t) {
-         return chars.make(t);
-      }
-
-      const IntegerSyntax*
-      Allocator::make_integer(const Lexeme& t) {
-         return ints.make(t);
-      }
-
-      const StringSyntax*
-      Allocator::make_string(const Lexeme& t) {
-         return strs.make(t);
-      }
-
-      const SymbolSyntax*
-      Allocator::make_symbol(SymbolSyntax::Kind k, const Lexeme& t) {
-         return syms.make(t, k);
-      }
-
-      const ReferenceSyntax*
-      Allocator::make_reference(size_t i, const Lexeme& t) {
-         return refs.make(t, i);
-      }
-
-      const AnchorSyntax*
-      Allocator::make_anchor(size_t t, const Syntax* s) {
-         return ancs.make(t, s);
-      }
-
-      const QuoteSyntax*
-      Allocator::make_quote(const Syntax* s) {
-         return quotes.make(s);
-      }
-
-      const AntiquoteSyntax*
-      Allocator::make_antiquote(const Syntax* s) {
-         return antis.make(s);
-      }
-
-      const Expand*
-      Allocator::make_expand(const Syntax* s) {
-         return exps.make(s);
-      }
-
-      const Eval*
-      Allocator::make_eval(const Syntax* s) {
-         return evls.make(s);
-      }
-
-      const Splice*
-      Allocator::make_splice(const Syntax* s) {
-         return spls.make(s);
-      }
-
-      const Function*
-      Allocator::make_function(const Syntax* s) {
-         return funs.make(s);
-      }
-
-      const Include*
-      Allocator::make_include(const Syntax* c, const Syntax* s) {
-         return incs.make(c, s);
-      }
-
-      const Exclude*
-      Allocator::make_exclude(const Syntax* c, const Syntax* s) {
-         return excs.make(c, s);
-      }
-
-      const ListSyntax*
-      Allocator::make_list(const std::vector<const Syntax*>& elts, bool dot) {
-         if (elts.empty())
-            return &empty_list;
-         return lists.make(elts, dot);
-      }
-
-      const VectorSyntax*
-      Allocator::make_vector(const std::vector<const Syntax*>& elts) {
-         if (elts.empty())
-            return &empty_vector;
-         return vectors.make(elts);
-      }
-
-      // The sequence of characters in [cur, last) consists
-      // entirely of digits.  Return the corresponding natural value.
-      static size_t
-      natural_value(const Byte* cur, const Byte* last) {
-         size_t n = 0;
-         for (; cur < last; ++cur)
-            // FIXME: check for overflow.
-            n = 10 * n + (*cur - '0');
-         return n;
-      }
-
-      // -- Reader --
-      Reader::Reader(const Byte* f, const Byte* l)
-            : st{ { f, l, 1 }, f, f }
-      { }
-
-      Reader::Reader(const RawInput& ri)
-            : st { ri, ri.start, ri.start }
-      { }
-
-      static const Syntax* read_sexpr(Reader::State&);
-
-      // Parse a string literal
-      static const Syntax*
-      read_string(Reader::State& s) {
-         auto start = s.cur++;
-         if (not skip_to_quote(s))
-            syntax_error("missing closing quote sign for string literal");
-         Lexeme t = { { start, s.cur }, s.bytes.lineno };
-         return s.alloc.make_string(t);
-      }
-
-      // Parse an absolute identifier.
-      static const Syntax*
-      read_absolute_symbol(Reader::State& s) {
-         auto start = ++s.cur;
-         if (not skip_to_nonescaped_char(s, '|'))
-            syntax_error("missing closing bar sign for an absolute symbol");
-         Lexeme t = { { start, s.cur - 1 }, s.bytes.lineno };
-         return s.alloc.make_symbol(SymbolSyntax::absolute, t);
-      }
-
-      // Read an atom starting with digits.
-      static const Syntax*
-      read_maybe_natural(Reader::State& s) {
-         auto start = s.cur;
-         advance_while (s, isdigit);
-         if (s.cur >= s.bytes.end or is_delimiter(*s.cur)) {
-            Lexeme t = { { start, s.cur }, s.bytes.lineno };
-            return s.alloc.make_integer(t);
-         }
-         advance_while(s, identifier_part);
-         Lexeme t = { { start, s.cur }, s.bytes.lineno };
-         return s.alloc.make_symbol(SymbolSyntax::ordinary, t);
-      }
-
-      // Read an identifier.
-      static const Syntax*
-      read_identifier(Reader::State& s) {
-         auto start = s.cur;
-         advance_while(s, identifier_part);
-         Lexeme t = { { start, s.cur }, s.bytes.lineno };
-         return s.alloc.make_symbol(SymbolSyntax::ordinary, t);
-      }
-
-      // Read an atom starting with a '+' or '-' sign; this
-      // should be identifier, or a signed integer.
-      static const Syntax*
-      read_maybe_signed_number(Reader::State& s) {
-         auto start = s.cur++;
-         if (s.cur < s.bytes.end and isdigit(*s.cur)) {
-            advance_while(s, isdigit);
-            if (s.cur >= s.bytes.end or is_delimiter(*s.cur)) {
-               Lexeme t = { { start, s.cur }, s.bytes.lineno };
-               return s.alloc.make_integer(t);
-            }
-         }
-         advance_while(s, identifier_part);
-         Lexeme t = { { start, s.cur }, s.bytes.lineno };
-         return s.alloc.make_symbol(SymbolSyntax::ordinary, t);
-      }
-
-      static const Syntax*
-      read_keyword(Reader::State& s) {
-         auto start = s.cur++;
-         advance_while(s, identifier_part);
-         Lexeme t = { { start, s.cur }, s.bytes.lineno };
-         return s.alloc.make_symbol(SymbolSyntax::keyword, t);
-      }
-
-      // Read an atom.
-      static const Syntax*
-      read_atom(Reader::State& s) {
-         switch (*s.cur) {
-         case '"': return read_string(s);
-         case ':': return read_keyword(s);
-         case '-': case '+': return read_maybe_signed_number(s);
-
-         case '0': case '1': case '2': case '3': case '4':
-         case '5': case '6': case '7': case '8': case '9':
-            return read_maybe_natural(s);
-
-         default:
-            if (identifier_part(*s.cur))
-               return read_identifier(s);
-            invalid_character(s);
-            ++s.cur;
-            return nullptr;
-         }
-      }
-
-      // Parse a quote expression.
-      static const Syntax*
-      read_quote(Reader::State& s) {
-         ++s.cur;               // skip the quote character
-         auto x = read_sexpr(s);
-         if (x == nullptr)
-            syntax_error("end of input reached after quote sign");
-         return s.alloc.make_quote(x);
-      }
-
-      // Parse a backquote expression.
-      static const Syntax*
-      read_backquote(Reader::State& s) {
-         ++s.cur;               // skip the backquote character
-         auto x = read_sexpr(s);
-         if (x == nullptr)
-            syntax_error("end of input reached after backquote sign");
-         return s.alloc.make_antiquote(x);
-      }
-
-      // We've just seen "#(" indicating the start of a literal
-      // vector.  Read the elements and return the corresponding form.
-      static const Syntax*
-      finish_literal_vector(Reader::State& s) {
-         ++s.cur;               // Skip the open paren.
-         std::vector<const Syntax*> elts { };
-         while (skip_blank(s) and *s.cur != ')') {
-            if (auto x = read_sexpr(s))
-               elts.push_back(x);
-            else
-               syntax_error("syntax error while reading vector elements");
-         }
-         if (s.cur >= s.bytes.end)
-            syntax_error("unfinished literal vector");
-         else
-            ++s.cur;
-         return s.alloc.make_vector(elts);
-      }
-
-      // We've just seen the sharp sign followed by a digit.  We assume
-      // we are about to read an anchor or a back reference.
-      static const Syntax*
-      finish_anchor_or_reference(Reader::State& s) {
-         auto start = s.cur;
-         advance_while(s, isdigit);
-         if (s.cur >= s.bytes.end)
-            syntax_error("end-of-input after sharp-number sign");
-         const Byte c = *s.cur;
-         if (c != '#' and c != '=')
-            syntax_error("syntax error after sharp-number-equal sign");
-         Lexeme t = { { start, s.cur }, s.bytes.lineno };
-         auto n = natural_value(start, s.cur);
-         ++s.cur;
-         if (c == '#')
-            return s.alloc.make_reference(n, t);
-         auto x = read_sexpr(s);
-         if (x == nullptr)
-            syntax_error("syntax error after sharp-number-equal sign");
-         return s.alloc.make_anchor(n, x);
-      }
-
-      static const Syntax*
-      finish_function(Reader::State& s) {
-         ++s.cur;               // skip quote sign.
-         auto x = read_sexpr(s);
-         if (x == nullptr)
-            syntax_error("missing function designator after sharp-quote sign");
-         return s.alloc.make_function(x);
-      }
-
-      static const Syntax*
-      finish_uninterned_symbol(Reader::State& s) {
-         ++s.cur;               // skip colon sign.
-         auto start = s.cur;
-         advance_while(s, identifier_part);
-         Lexeme t = { { start, s.cur }, s.bytes.lineno };
-         return s.alloc.make_symbol(SymbolSyntax::uninterned, t);
-      }
-
-      static const Syntax*
-      finish_readtime_eval(Reader::State& s) {
-         ++s.cur;               // skip dot sign.
-         auto x = read_sexpr(s);
-         if (x == nullptr)
-            syntax_error("parse error after sharp-dot sign");
-         return s.alloc.make_eval(x);
-      }
-
-      static const Syntax*
-      finish_character(Reader::State& s) {
-         ++s.cur;               // skip backslash sign
-         auto start = s.cur;
-         advance_while(s, identifier_part);
-         Lexeme t = { { start, s.cur }, s.bytes.lineno };
-         return s.alloc.make_character(t);
-      }
-
-      static const Syntax*
-      finish_include(Reader::State& s) {
-         ++s.cur;
-         auto cond = read_sexpr(s);
-         auto form = read_sexpr(s);
-         return s.alloc.make_include(cond, form);
-      }
-
-      static const Syntax*
-      finish_exclude(Reader::State& s) {
-         ++s.cur;
-         auto cond = read_sexpr(s);
-         auto form = read_sexpr(s);
-         return s.alloc.make_exclude(cond, form);
-      }
-
-      static const Syntax*
-      read_sharp_et_al(Reader::State& s) {
-         if (++s.cur >= s.bytes.end)
-            syntax_error("end-of-input reached after sharp sign");
-         switch (*s.cur) {
-         case '(':  return finish_literal_vector(s);
-         case '\'': return finish_function(s);
-         case ':': return finish_uninterned_symbol(s);
-         case '.': return finish_readtime_eval(s);
-         case '\\': return finish_character(s);
-         case '+': return finish_include(s);
-         case '-': return finish_exclude(s);
-
-         default:
-            if (isdigit(*s.cur))
-               return finish_anchor_or_reference(s);
-            syntax_error("syntax error after sharp-sign");
-         }
-         return nullptr;
-      }
-
-      // We have just seen a dot; read the tail and the closing parenthesis.
-      static const Syntax*
-      finish_dotted_list(Reader::State& s, std::vector<const Syntax*>& elts) {
-         ++s.cur;               // Skip dot sign.
-         auto x = read_sexpr(s);
-         if (x == nullptr)
-            syntax_error("missing expression after dot sign");
-         if (not skip_blank(s) or *s.cur != ')')
-            syntax_error("missing closing parenthesis");
-         ++s.cur;
-         elts.push_back(x);
-         return s.alloc.make_list(elts, true);
-      }
-
-      static const Syntax*
-      read_pair(Reader::State& s) {
-         ++s.cur;               // skip opening parenthesis
-         std::vector<const Syntax*> elts { };
-         while (skip_blank(s))
-            switch (*s.cur) {
-            case '.':
-               if (elts.empty())
-                  syntax_error("missing expression before dot sign.");
-               return finish_dotted_list(s, elts);
-
-            case ')':
-               ++s.cur;
-               return s.alloc.make_list(elts);
-
-            default:
-               if (auto x = read_sexpr(s))
-                  elts.push_back(x);
-               else
-                  syntax_error("unfinished pair expression");
-               break;
-            }
-         syntax_error("end-of-input while looking for closing parenthesis");
-         return nullptr;
-      }
-
-      static const Syntax*
-      read_sexpr(Reader::State& s) {
-         while (skip_blank(s))
-            switch (*s.cur) {
-            case ';': skip_to_eol(s); break;
-            case '\'': return read_quote(s);
-            case '`': return read_backquote(s);
-            case '|': return read_absolute_symbol(s);
-            case '#': return read_sharp_et_al(s);
-            case '(': return read_pair(s);
-            default: return read_atom(s);
-            }
-         return nullptr;
-      }
-
-      const Syntax*
-      Reader::read() {
-         return read_sexpr(st);
-      }
-
-      const Byte*
-      Reader::position(Ordinal p) {
-         st.cur = st.bytes.start + p;
-         st.line = st.cur;
-         // while (st.line > st.start and st.line[-1] != '\n')
-         //    --st.line;
-         return st.cur;
-      }
-
-   }
-}
author	Gabriel Dos Reis <gdr@axiomatics.org>	2017-01-15 09:14:39 -0800
committer	Gabriel Dos Reis <gdr@axiomatics.org>	2017-01-15 09:14:39 -0800
commit	cea95f2cc443a5bee5cb69476ae73fd99d32fc0d (patch)
tree	ca8765a4f777fbc131423b13e17ca2be56b6a251 /src/syntax/sexpr.cc
parent	77be242e6af75148771620fcbbaad191d1c11b60 (diff)
download	open-axiom-cea95f2cc443a5bee5cb69476ae73fd99d32fc0d.tar.gz