aboutsummaryrefslogtreecommitdiff
path: root/src/syntax/sexpr.cc
diff options
context:
space:
mode:
authorGabriel Dos Reis <gdr@axiomatics.org>2017-01-15 09:14:39 -0800
committerGabriel Dos Reis <gdr@axiomatics.org>2017-01-15 09:14:39 -0800
commitcea95f2cc443a5bee5cb69476ae73fd99d32fc0d (patch)
treeca8765a4f777fbc131423b13e17ca2be56b6a251 /src/syntax/sexpr.cc
parent77be242e6af75148771620fcbbaad191d1c11b60 (diff)
downloadopen-axiom-cea95f2cc443a5bee5cb69476ae73fd99d32fc0d.tar.gz
Add s-expression parsing to libopen-axiom-core.a
Diffstat (limited to 'src/syntax/sexpr.cc')
-rw-r--r--src/syntax/sexpr.cc670
1 files changed, 0 insertions, 670 deletions
diff --git a/src/syntax/sexpr.cc b/src/syntax/sexpr.cc
deleted file mode 100644
index f9d76825..00000000
--- a/src/syntax/sexpr.cc
+++ /dev/null
@@ -1,670 +0,0 @@
-// Copyright (C) 2010-2013, Gabriel Dos Reis.
-// All rights reserved.
-// Written by Gabriel Dos Reis.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// - Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright
-// notice, this list of conditions and the following disclaimer in
-// the documentation and/or other materials provided with the
-// distribution.
-//
-// - Neither the name of The Numerical Algorithms Group Ltd. nor the
-// names of its contributors may be used to endorse or promote products
-// derived from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
-// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
-// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
-// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
-// OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// --% Author: Gabriel Dos Reis.
-
-#include <ctype.h>
-#include <string.h>
-#include <iostream>
-#include <iterator>
-#include <open-axiom/sexpr>
-#include <open-axiom/FileMapping>
-#include <open-axiom/diagnostics>
-
-namespace OpenAxiom {
- namespace Sexpr {
- static void
- invalid_character(Reader::State& s) {
- auto line = std::to_string(s.bytes.lineno);
- auto column = std::to_string(s.cur - s.line);
- auto msg = "invalid character on line " + line +
- " and column " + column;
- if (isprint(*s.cur))
- throw Diagnostics::BasicError(msg + ": " + std::string(1, *s.cur));
- throw Diagnostics::BasicError(msg + " with code " + std::to_string(*s.cur));
- }
-
- static void
- syntax_error(const std::string& s) {
- throw Diagnostics::BasicError(s);
- }
-
- // Return true if character `c' introduces a blank.
- static bool
- is_blank(char c) {
- return c == ' ' or c == '\t' or c == '\v'
- or c == '\n' or c == '\f' or c == '\r';
- }
-
- // Return true if the character `c' introduces a delimiter.
- static bool
- is_delimiter(char c) {
- return is_blank(c)
- or c == '(' or c == ')' or c == '\''
- or c == '`' or c == '#';
- }
-
- // Move the cursor past all consecutive blank characters, and
- // return true if there are more input characters to consider.
- static bool
- skip_blank(Reader::State& s) {
- for (bool done = false; s.cur < s.bytes.end and not done; )
- switch (*s.cur) {
- case '\n':
- ++s.bytes.lineno;
- s.line = ++s.cur;
- break;
- case ' ': case '\t': case '\v': case '\r': case '\f':
- ++s.cur;
- break;
- default: done = true; break;
- }
- return s.cur < s.bytes.end;
- }
-
- // Move `cur' to end-of-line marker.
- static void
- skip_to_eol(Reader::State& s) {
- // FIXME: properly handle CR+LF.
- while (s.cur < s.bytes.end and *s.cur != '\n')
- ++s.cur;
- }
-
- // Move `cur' one-past a non-esacaped character `c'.
- // Return true if the character was seen.
- static bool
- skip_to_nonescaped_char(Reader::State& s, char c) {
- for (bool saw_escape = false; s.cur < s.bytes.end; ++s.cur)
- if (saw_escape)
- saw_escape = false;
- else if (*s.cur == '\\')
- saw_escape = true;
- else if (*s.cur == c) {
- ++s.cur;
- return true;
- }
- return false;
- }
-
- // Move the cursor past the closing quote of string literal.
- // Return true if the closing quote was effectively seen.
- static inline bool
- skip_to_quote(Reader::State& s) {
- return skip_to_nonescaped_char(s, '"');
- }
-
- template<typename Pred>
- static bool
- advance_while(Reader::State& s, Pred p) {
- while (s.cur < s.bytes.end and p(*s.cur))
- ++s.cur;
- return s.cur < s.bytes.end;
- }
-
- // Return true if the character `c' be part of a non-absolute
- // identifier.
- static bool
- identifier_part(Byte c) {
- switch (c) {
- case '+': case '-': case '*': case '/': case '%': case '^':
- case '~': case '@': case '$': case '&': case '=':
- case '<': case '>': case '?': case '!': case '_':
- case '[': case ']': case '{': case '}':
- return true;
- default:
- return isalnum(c);
- }
- }
-
- // -- AtomSyntax --
- AtomSyntax::AtomSyntax(const Lexeme& t) : lex(t) { }
-
- // -- IntegerSyntax --
- IntegerSyntax::IntegerSyntax(const Lexeme& t) : AtomSyntax(t) { }
-
- void
- IntegerSyntax::accept(Visitor& v) const {
- v.visit(*this);
- }
-
- // -- CharacterSyntax --
- CharacterSyntax::CharacterSyntax(const Lexeme& t) : AtomSyntax(t) { }
-
- void
- CharacterSyntax::accept(Visitor& v) const {
- v.visit(*this);
- }
-
- // -- StringSyntax --
- StringSyntax::StringSyntax(const Lexeme& t) : AtomSyntax(t) { }
-
- void
- StringSyntax::accept(Visitor& v) const {
- v.visit(*this);
- }
-
- // -- SymbolSyntax --
- SymbolSyntax::SymbolSyntax(const Lexeme& t, Kind k)
- : AtomSyntax(t), sort(k)
- { }
-
- void
- SymbolSyntax::accept(Visitor& v) const {
- v.visit(*this);
- }
-
- // -- AnchorSyntax --
- AnchorSyntax::AnchorSyntax(size_t t, const Syntax* s) : tag(t), val(s) { }
-
- void
- AnchorSyntax::accept(Visitor& v) const {
- v.visit(*this);
- }
-
- // -- ReferenceSyntax --
- ReferenceSyntax::ReferenceSyntax(const Lexeme& t, Ordinal n)
- : AtomSyntax(t), pos(n)
- { }
-
- void
- ReferenceSyntax::accept(Visitor& v) const {
- v.visit(*this);
- }
-
- // -- QuoteSyntax --
- QuoteSyntax::QuoteSyntax(const Syntax* s)
- : unary_form<QuoteSyntax>(s)
- { }
-
- // -- AntiquoteSyntax --
- AntiquoteSyntax::AntiquoteSyntax(const Syntax* s)
- : unary_form<AntiquoteSyntax>(s)
- { }
-
- // -- Expand --
- Expand::Expand(const Syntax* s) : unary_form<Expand>(s) { }
-
- // -- Eval --
- Eval::Eval(const Syntax* s) : unary_form<Eval>(s) { }
-
- // -- Splice --
- Splice::Splice(const Syntax* s) : unary_form<Splice>(s) { }
-
- // -- Function --
- Function::Function(const Syntax* s) : unary_form<Function>(s) { }
-
- // -- Include --
- Include::Include(const Syntax* c, const Syntax* s)
- : binary_form<Include>(c, s)
- { }
-
- // -- Exclude --
- Exclude::Exclude(const Syntax* c, const Syntax* s)
- : binary_form<Exclude>(c, s)
- { }
-
- // -- ListSyntax --
- ListSyntax::ListSyntax() : dot(false) { }
-
- ListSyntax::ListSyntax(const base& elts, bool d)
- : base(elts), dot(d)
- { }
-
- ListSyntax::~ListSyntax() { }
-
- void
- ListSyntax::accept(Visitor& v) const {
- v.visit(*this);
- }
-
- // -- VectorSyntax --
- VectorSyntax::VectorSyntax() { }
-
- VectorSyntax::VectorSyntax(const base& elts) : base(elts) { }
-
- VectorSyntax::~VectorSyntax() { }
-
- void
- VectorSyntax::accept(Visitor& v) const {
- v.visit(*this);
- }
-
- // ---------------
- // -- Allocator --
- // ---------------
- Allocator::Allocator() { }
-
- // This destructor is defined here so that it provides
- // a single instantiation point for destructors of all
- // used templates floating around.
- Allocator::~Allocator() { }
-
- const CharacterSyntax*
- Allocator::make_character(const Lexeme& t) {
- return chars.make(t);
- }
-
- const IntegerSyntax*
- Allocator::make_integer(const Lexeme& t) {
- return ints.make(t);
- }
-
- const StringSyntax*
- Allocator::make_string(const Lexeme& t) {
- return strs.make(t);
- }
-
- const SymbolSyntax*
- Allocator::make_symbol(SymbolSyntax::Kind k, const Lexeme& t) {
- return syms.make(t, k);
- }
-
- const ReferenceSyntax*
- Allocator::make_reference(size_t i, const Lexeme& t) {
- return refs.make(t, i);
- }
-
- const AnchorSyntax*
- Allocator::make_anchor(size_t t, const Syntax* s) {
- return ancs.make(t, s);
- }
-
- const QuoteSyntax*
- Allocator::make_quote(const Syntax* s) {
- return quotes.make(s);
- }
-
- const AntiquoteSyntax*
- Allocator::make_antiquote(const Syntax* s) {
- return antis.make(s);
- }
-
- const Expand*
- Allocator::make_expand(const Syntax* s) {
- return exps.make(s);
- }
-
- const Eval*
- Allocator::make_eval(const Syntax* s) {
- return evls.make(s);
- }
-
- const Splice*
- Allocator::make_splice(const Syntax* s) {
- return spls.make(s);
- }
-
- const Function*
- Allocator::make_function(const Syntax* s) {
- return funs.make(s);
- }
-
- const Include*
- Allocator::make_include(const Syntax* c, const Syntax* s) {
- return incs.make(c, s);
- }
-
- const Exclude*
- Allocator::make_exclude(const Syntax* c, const Syntax* s) {
- return excs.make(c, s);
- }
-
- const ListSyntax*
- Allocator::make_list(const std::vector<const Syntax*>& elts, bool dot) {
- if (elts.empty())
- return &empty_list;
- return lists.make(elts, dot);
- }
-
- const VectorSyntax*
- Allocator::make_vector(const std::vector<const Syntax*>& elts) {
- if (elts.empty())
- return &empty_vector;
- return vectors.make(elts);
- }
-
- // The sequence of characters in [cur, last) consists
- // entirely of digits. Return the corresponding natural value.
- static size_t
- natural_value(const Byte* cur, const Byte* last) {
- size_t n = 0;
- for (; cur < last; ++cur)
- // FIXME: check for overflow.
- n = 10 * n + (*cur - '0');
- return n;
- }
-
- // -- Reader --
- Reader::Reader(const Byte* f, const Byte* l)
- : st{ { f, l, 1 }, f, f }
- { }
-
- Reader::Reader(const RawInput& ri)
- : st { ri, ri.start, ri.start }
- { }
-
- static const Syntax* read_sexpr(Reader::State&);
-
- // Parse a string literal
- static const Syntax*
- read_string(Reader::State& s) {
- auto start = s.cur++;
- if (not skip_to_quote(s))
- syntax_error("missing closing quote sign for string literal");
- Lexeme t = { { start, s.cur }, s.bytes.lineno };
- return s.alloc.make_string(t);
- }
-
- // Parse an absolute identifier.
- static const Syntax*
- read_absolute_symbol(Reader::State& s) {
- auto start = ++s.cur;
- if (not skip_to_nonescaped_char(s, '|'))
- syntax_error("missing closing bar sign for an absolute symbol");
- Lexeme t = { { start, s.cur - 1 }, s.bytes.lineno };
- return s.alloc.make_symbol(SymbolSyntax::absolute, t);
- }
-
- // Read an atom starting with digits.
- static const Syntax*
- read_maybe_natural(Reader::State& s) {
- auto start = s.cur;
- advance_while (s, isdigit);
- if (s.cur >= s.bytes.end or is_delimiter(*s.cur)) {
- Lexeme t = { { start, s.cur }, s.bytes.lineno };
- return s.alloc.make_integer(t);
- }
- advance_while(s, identifier_part);
- Lexeme t = { { start, s.cur }, s.bytes.lineno };
- return s.alloc.make_symbol(SymbolSyntax::ordinary, t);
- }
-
- // Read an identifier.
- static const Syntax*
- read_identifier(Reader::State& s) {
- auto start = s.cur;
- advance_while(s, identifier_part);
- Lexeme t = { { start, s.cur }, s.bytes.lineno };
- return s.alloc.make_symbol(SymbolSyntax::ordinary, t);
- }
-
- // Read an atom starting with a '+' or '-' sign; this
- // should be identifier, or a signed integer.
- static const Syntax*
- read_maybe_signed_number(Reader::State& s) {
- auto start = s.cur++;
- if (s.cur < s.bytes.end and isdigit(*s.cur)) {
- advance_while(s, isdigit);
- if (s.cur >= s.bytes.end or is_delimiter(*s.cur)) {
- Lexeme t = { { start, s.cur }, s.bytes.lineno };
- return s.alloc.make_integer(t);
- }
- }
- advance_while(s, identifier_part);
- Lexeme t = { { start, s.cur }, s.bytes.lineno };
- return s.alloc.make_symbol(SymbolSyntax::ordinary, t);
- }
-
- static const Syntax*
- read_keyword(Reader::State& s) {
- auto start = s.cur++;
- advance_while(s, identifier_part);
- Lexeme t = { { start, s.cur }, s.bytes.lineno };
- return s.alloc.make_symbol(SymbolSyntax::keyword, t);
- }
-
- // Read an atom.
- static const Syntax*
- read_atom(Reader::State& s) {
- switch (*s.cur) {
- case '"': return read_string(s);
- case ':': return read_keyword(s);
- case '-': case '+': return read_maybe_signed_number(s);
-
- case '0': case '1': case '2': case '3': case '4':
- case '5': case '6': case '7': case '8': case '9':
- return read_maybe_natural(s);
-
- default:
- if (identifier_part(*s.cur))
- return read_identifier(s);
- invalid_character(s);
- ++s.cur;
- return nullptr;
- }
- }
-
- // Parse a quote expression.
- static const Syntax*
- read_quote(Reader::State& s) {
- ++s.cur; // skip the quote character
- auto x = read_sexpr(s);
- if (x == nullptr)
- syntax_error("end of input reached after quote sign");
- return s.alloc.make_quote(x);
- }
-
- // Parse a backquote expression.
- static const Syntax*
- read_backquote(Reader::State& s) {
- ++s.cur; // skip the backquote character
- auto x = read_sexpr(s);
- if (x == nullptr)
- syntax_error("end of input reached after backquote sign");
- return s.alloc.make_antiquote(x);
- }
-
- // We've just seen "#(" indicating the start of a literal
- // vector. Read the elements and return the corresponding form.
- static const Syntax*
- finish_literal_vector(Reader::State& s) {
- ++s.cur; // Skip the open paren.
- std::vector<const Syntax*> elts { };
- while (skip_blank(s) and *s.cur != ')') {
- if (auto x = read_sexpr(s))
- elts.push_back(x);
- else
- syntax_error("syntax error while reading vector elements");
- }
- if (s.cur >= s.bytes.end)
- syntax_error("unfinished literal vector");
- else
- ++s.cur;
- return s.alloc.make_vector(elts);
- }
-
- // We've just seen the sharp sign followed by a digit. We assume
- // we are about to read an anchor or a back reference.
- static const Syntax*
- finish_anchor_or_reference(Reader::State& s) {
- auto start = s.cur;
- advance_while(s, isdigit);
- if (s.cur >= s.bytes.end)
- syntax_error("end-of-input after sharp-number sign");
- const Byte c = *s.cur;
- if (c != '#' and c != '=')
- syntax_error("syntax error after sharp-number-equal sign");
- Lexeme t = { { start, s.cur }, s.bytes.lineno };
- auto n = natural_value(start, s.cur);
- ++s.cur;
- if (c == '#')
- return s.alloc.make_reference(n, t);
- auto x = read_sexpr(s);
- if (x == nullptr)
- syntax_error("syntax error after sharp-number-equal sign");
- return s.alloc.make_anchor(n, x);
- }
-
- static const Syntax*
- finish_function(Reader::State& s) {
- ++s.cur; // skip quote sign.
- auto x = read_sexpr(s);
- if (x == nullptr)
- syntax_error("missing function designator after sharp-quote sign");
- return s.alloc.make_function(x);
- }
-
- static const Syntax*
- finish_uninterned_symbol(Reader::State& s) {
- ++s.cur; // skip colon sign.
- auto start = s.cur;
- advance_while(s, identifier_part);
- Lexeme t = { { start, s.cur }, s.bytes.lineno };
- return s.alloc.make_symbol(SymbolSyntax::uninterned, t);
- }
-
- static const Syntax*
- finish_readtime_eval(Reader::State& s) {
- ++s.cur; // skip dot sign.
- auto x = read_sexpr(s);
- if (x == nullptr)
- syntax_error("parse error after sharp-dot sign");
- return s.alloc.make_eval(x);
- }
-
- static const Syntax*
- finish_character(Reader::State& s) {
- ++s.cur; // skip backslash sign
- auto start = s.cur;
- advance_while(s, identifier_part);
- Lexeme t = { { start, s.cur }, s.bytes.lineno };
- return s.alloc.make_character(t);
- }
-
- static const Syntax*
- finish_include(Reader::State& s) {
- ++s.cur;
- auto cond = read_sexpr(s);
- auto form = read_sexpr(s);
- return s.alloc.make_include(cond, form);
- }
-
- static const Syntax*
- finish_exclude(Reader::State& s) {
- ++s.cur;
- auto cond = read_sexpr(s);
- auto form = read_sexpr(s);
- return s.alloc.make_exclude(cond, form);
- }
-
- static const Syntax*
- read_sharp_et_al(Reader::State& s) {
- if (++s.cur >= s.bytes.end)
- syntax_error("end-of-input reached after sharp sign");
- switch (*s.cur) {
- case '(': return finish_literal_vector(s);
- case '\'': return finish_function(s);
- case ':': return finish_uninterned_symbol(s);
- case '.': return finish_readtime_eval(s);
- case '\\': return finish_character(s);
- case '+': return finish_include(s);
- case '-': return finish_exclude(s);
-
- default:
- if (isdigit(*s.cur))
- return finish_anchor_or_reference(s);
- syntax_error("syntax error after sharp-sign");
- }
- return nullptr;
- }
-
- // We have just seen a dot; read the tail and the closing parenthesis.
- static const Syntax*
- finish_dotted_list(Reader::State& s, std::vector<const Syntax*>& elts) {
- ++s.cur; // Skip dot sign.
- auto x = read_sexpr(s);
- if (x == nullptr)
- syntax_error("missing expression after dot sign");
- if (not skip_blank(s) or *s.cur != ')')
- syntax_error("missing closing parenthesis");
- ++s.cur;
- elts.push_back(x);
- return s.alloc.make_list(elts, true);
- }
-
- static const Syntax*
- read_pair(Reader::State& s) {
- ++s.cur; // skip opening parenthesis
- std::vector<const Syntax*> elts { };
- while (skip_blank(s))
- switch (*s.cur) {
- case '.':
- if (elts.empty())
- syntax_error("missing expression before dot sign.");
- return finish_dotted_list(s, elts);
-
- case ')':
- ++s.cur;
- return s.alloc.make_list(elts);
-
- default:
- if (auto x = read_sexpr(s))
- elts.push_back(x);
- else
- syntax_error("unfinished pair expression");
- break;
- }
- syntax_error("end-of-input while looking for closing parenthesis");
- return nullptr;
- }
-
- static const Syntax*
- read_sexpr(Reader::State& s) {
- while (skip_blank(s))
- switch (*s.cur) {
- case ';': skip_to_eol(s); break;
- case '\'': return read_quote(s);
- case '`': return read_backquote(s);
- case '|': return read_absolute_symbol(s);
- case '#': return read_sharp_et_al(s);
- case '(': return read_pair(s);
- default: return read_atom(s);
- }
- return nullptr;
- }
-
- const Syntax*
- Reader::read() {
- return read_sexpr(st);
- }
-
- const Byte*
- Reader::position(Ordinal p) {
- st.cur = st.bytes.start + p;
- st.line = st.cur;
- // while (st.line > st.start and st.line[-1] != '\n')
- // --st.line;
- return st.cur;
- }
-
- }
-}