aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authordos-reis <gdr@axiomatics.org>2013-06-26 11:43:56 +0000
committerdos-reis <gdr@axiomatics.org>2013-06-26 11:43:56 +0000
commit7f57a915cee3c91cddd166fe9964655696666c4b (patch)
treed00aa4323f8dc4280e736c5e15ee8479f89eb31b /src
parentf9e4a03a220766099d6b5fc683a58185c4805a05 (diff)
downloadopen-axiom-7f57a915cee3c91cddd166fe9964655696666c4b.tar.gz
Rewrite s-expression reader.
Diffstat (limited to 'src')
-rw-r--r--src/ChangeLog13
-rw-r--r--src/Makefile.am3
-rw-r--r--src/Makefile.in3
-rw-r--r--src/gui/gui.pro.in3
-rw-r--r--src/gui/main-window.cc24
-rw-r--r--src/gui/main-window.h5
-rw-r--r--src/include/diagnostics.H8
-rw-r--r--src/include/sexpr.H295
-rw-r--r--src/include/structure.H2
-rw-r--r--src/syntax/sexpr.cc1014
-rw-r--r--src/utils/hammer.cc2
11 files changed, 545 insertions, 827 deletions
diff --git a/src/ChangeLog b/src/ChangeLog
index e72747a2..47561b81 100644
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,3 +1,16 @@
+2013-06-26 Gabriel Dos Reis <gdr@integrable-solutions.net>
+
+ * include/sexpr.H (Lexer): Remove.
+ (Parser): Likewise.
+ (Reader): New.
+ * syntax/sexpr.cc: Propagate changes.
+ * gui/main-window.h (MainWindow::read_databases): Declare.
+ (MainWindow::display_error): Likewise.
+ * gui/main-window.cc: Implement.
+ * gui/gui.pro.in (LIBS): Include syntax library.
+ * Makefile.am (oa_src_include_headers): Add sexpr.H. Fix build
+ failure from previous commit.
+
2013-06-24 Gabriel Dos Reis <gdr@integrable-solutions.net>
* include/sexpr.H: Move from utils.
diff --git a/src/Makefile.am b/src/Makefile.am
index 9f1f04c5..d4084173 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -57,7 +57,8 @@ oa_src_include_headers = \
Input.H \
diagnostics.H \
dialect.H \
- token.H
+ token.H \
+ sexpr.H
if OA_BUILD_SMAN
OA_SMAN_TARGETS = all-sman all-clef
diff --git a/src/Makefile.in b/src/Makefile.in
index 9e37cafb..b8e9d18b 100644
--- a/src/Makefile.in
+++ b/src/Makefile.in
@@ -402,7 +402,8 @@ oa_src_include_headers = \
Input.H \
diagnostics.H \
dialect.H \
- token.H
+ token.H \
+ sexpr.H
@OA_BUILD_SMAN_TRUE@OA_SMAN_TARGETS = all-sman all-clef
@OA_BUILD_GRAPHICS_TRUE@OA_GRAPHICS_GRAPH_TARGET = all-graph
diff --git a/src/gui/gui.pro.in b/src/gui/gui.pro.in
index 6969b454..d118c08c 100644
--- a/src/gui/gui.pro.in
+++ b/src/gui/gui.pro.in
@@ -8,6 +8,7 @@ oa_targetdir = @top_builddir@/@target@
OA_INC = $${oa_targetdir}/include
OA_LIB = -L@top_builddir@/@target@/lib -lOpenAxiom
OA_IOLIB = -L@top_builddir@/src/io -lio
+OA_SYNTAX_LIB = -L@top_builddir@/src/syntax -lsyntax
## We build in release mode.
CONFIG += release
@@ -38,7 +39,7 @@ DEPENDPATH += @srcdir@
SOURCES += server.cc conversation.cc main-window.cc debate.cc main.cc
## Additional support libraries
-LIBS += $$OA_LIB $$OA_IOLIB
+LIBS += $$OA_SYNTAX_LIB $$OA_LIB $$OA_IOLIB
## C++ compiler
#QMAKE_CXX = @CXX@
diff --git a/src/gui/main-window.cc b/src/gui/main-window.cc
index 6d3eb205..1ef6ec56 100644
--- a/src/gui/main-window.cc
+++ b/src/gui/main-window.cc
@@ -36,10 +36,33 @@
#include <QMessageBox>
#include <QScrollBar>
+#include <open-axiom/diagnostics>
+#include <open-axiom/sexpr>
+#include <open-axiom/FileMapping>
#include "debate.h"
#include "main-window.h"
namespace OpenAxiom {
+ void
+ MainWindow::display_error(const std::string& s) {
+ QMessageBox::critical(this, tr("System error"), QString(s.c_str()));
+ }
+
+ void
+ MainWindow::read_databases() {
+ try {
+ const auto& fs = server()->system_root();
+ Memory::FileMapping db { fs.dbdir() + "/interp.daase" };
+ Sexpr::Reader rd { db.begin(), db.end() };
+ while (rd.read())
+ ;
+ }
+ catch(const Diagnostics::BasicError& e) {
+ display_error(e.message());
+ }
+ }
+
+
static void connect_server_io(MainWindow* win, Debate* debate) {
QObject::connect(win->server(), SIGNAL(readyReadStandardError()),
win, SLOT(display_error()));
@@ -70,6 +93,7 @@ namespace OpenAxiom {
// wait to be pinged before displaying a prompt. This is
// an unfortunate result of a rather awkward hack.
server()->input("");
+ read_databases();
}
MainWindow::~MainWindow() {
diff --git a/src/gui/main-window.h b/src/gui/main-window.h
index 5e08067e..499e5f66 100644
--- a/src/gui/main-window.h
+++ b/src/gui/main-window.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2011, Gabriel Dos Reis.
+// Copyright (C) 2011-2013, Gabriel Dos Reis.
// All rights reserved.
// Written by Gabriel Dos Reis.
//
@@ -48,6 +48,7 @@ namespace OpenAxiom {
~MainWindow();
Server* server() { return &srv; }
+ void display_error(const std::string&);
private slots:
void done(int, QProcess::ExitStatus);
@@ -56,6 +57,8 @@ namespace OpenAxiom {
private:
Server srv;
QTabWidget tabs;
+
+ void read_databases();
};
}
diff --git a/src/include/diagnostics.H b/src/include/diagnostics.H
index 8f877e2f..9cb0fce1 100644
--- a/src/include/diagnostics.H
+++ b/src/include/diagnostics.H
@@ -33,6 +33,7 @@
#ifndef OPENAXIOM_DIAGNOSTICS_included
#define OPENAXIOM_DIAGNOSTICS_included
+#include <string>
#include <iostream>
namespace OpenAxiom {
@@ -49,6 +50,13 @@ namespace OpenAxiom {
std::ostream* out;
std::ostream* err;
};
+
+ struct BasicError {
+ explicit BasicError(const std::string& s) : msg(s) { }
+ const std::string& message() const { return msg; }
+ protected:
+ std::string msg;
+ };
}
}
diff --git a/src/include/sexpr.H b/src/include/sexpr.H
index a9371139..73e21f31 100644
--- a/src/include/sexpr.H
+++ b/src/include/sexpr.H
@@ -45,22 +45,14 @@
#include <iosfwd>
#include <vector>
#include <open-axiom/storage>
-#include <open-axiom/string-pool>
#include <open-axiom/token>
namespace OpenAxiom {
namespace Sexpr {
- struct BasicError {
- explicit BasicError(const std::string& s) : msg(s) { }
- const std::string& message() const { return msg; }
- protected:
- std::string msg;
- };
-
- // -----------
- // -- Token --
- // -----------
- struct Token {
+ // ------------
+ // -- Lexeme --
+ // ------------
+ struct Lexeme {
enum Type {
unknown, // unidentified token
semicolon = token::value(";"), // comment
@@ -87,35 +79,8 @@ namespace OpenAxiom {
sharp_integer_sharp // back reference, #n#
};
- Type type; // class of this token
- BasicString lexeme; // characters making up this token
- };
-
- // Print a token object on an output stream.
- // Note: this function is for debugging purpose; in particular
- // it does not `prettyprint' tokens.
- std::ostream& operator<<(std::ostream&, const Token&);
-
- // -----------
- // -- Lexer --
- // -----------
- // An object of this type transforms a sequence of characters
- // into a sequence of tokens as defined above.
- // A lexer does not manage memory itself. Rather, it delegates
- // storage allocation for lexemes and tokens to specialized
- // agents used to construct it.
- struct Lexer {
- Lexer(StringPool& pool, std::vector<Token>& toks)
- : strings(pool), tokens(toks) { }
-
- const Byte* tokenize(const Byte*, const Byte*);
- BasicString intern(const Byte* s, size_t n) {
- return strings.intern(s, n);
- }
-
- private:
- StringPool& strings; // where to allocate lexemes from
- std::vector<Token>& tokens; // where to deposite tokens.
+ std::pair<const Byte*, const Byte*> boundary;
+ Ordinal line;
};
// ------------
@@ -127,59 +92,59 @@ namespace OpenAxiom {
virtual void accept(Visitor&) const = 0;
};
- // ----------
- // -- Atom --
- // ----------
+ // ----------------
+ // -- AtomSyntax --
+ // ----------------
// An atom is a syntax object consisting of exatly one token.
// This should not be confused with the notion of atom
// in Lisp languages.
- struct Atom : Syntax {
- const Token& token() const { return tok; }
- BasicString lexeme() const { return tok.lexeme; }
+ struct AtomSyntax : Syntax {
+ const Lexeme& lexeme() const { return lex; }
void accept(Visitor&) const;
protected:
- const Token tok;
- Atom(const Token&);
+ Lexeme lex;
+ explicit AtomSyntax(const Lexeme&);
};
- // -------------
- // -- Integer --
- // -------------
+ // -------------------
+ // -- IntegerSyntax --
+ // -------------------
// Integer literal syntax objects
- struct Integer : Atom {
- explicit Integer(const Token&);
+ struct IntegerSyntax : AtomSyntax {
+ explicit IntegerSyntax(const Lexeme&);
void accept(Visitor&) const;
};
- // ---------------
- // -- Character --
- // ---------------
+ // ---------------------
+ // -- CharacterSyntax --
+ // ---------------------
// Character literal syntax objects.
- struct Character : Atom {
- explicit Character(const Token&);
+ struct CharacterSyntax : AtomSyntax {
+ explicit CharacterSyntax(const Lexeme&);
void accept(Visitor&) const;
};
- // ------------
- // -- String --
- // ------------
+ // ------------------
+ // -- StringSyntax --
+ // ------------------
// Striing literal syntax objjects.
- struct String : Atom {
- explicit String(const Token&);
+ struct StringSyntax : AtomSyntax {
+ explicit StringSyntax(const Lexeme&);
void accept(Visitor&) const;
};
- // ------------
- // -- Symbol --
- // ------------
- struct Symbol : Atom {
+ // ------------------
+ // -- SymbolSyntax --
+ // ------------------
+ struct SymbolSyntax : AtomSyntax {
enum Kind {
uninterned, // uninterned symbol
ordinary, // an interned symbol
+ absolute, // case-sensitive symbol
keyword // a keyword symbol
};
- Symbol(const Token&, Kind);
- Kind kin() const { return sort; }
+ SymbolSyntax(const Lexeme&, Kind);
+ Kind kind() const { return sort; }
void accept(Visitor&) const;
private:
const Kind sort;
@@ -189,20 +154,20 @@ namespace OpenAxiom {
// -- Reference --
// ---------------
// Back reference object to a syntax object.
- struct Reference : Atom {
- Reference(const Token&, size_t);
+ struct Reference : AtomSyntax {
+ Reference(const Lexeme&, Ordinal);
size_t tag() const { return pos; }
void accept(Visitor&) const;
private:
- const size_t pos;
+ Ordinal pos;
};
- // ------------
- // -- Anchor --
- // ------------
+ // ------------------
+ // -- AnchorSyntax --
+ // ------------------
// Base anchor syntax object.
- struct Anchor : Syntax {
- Anchor(size_t, const Syntax*);
+ struct AnchorSyntax : Syntax {
+ AnchorSyntax(size_t, const Syntax*);
size_t ref() const { return tag; }
const Syntax* value() const { return val; }
void accept(Visitor&) const;
@@ -222,20 +187,20 @@ namespace OpenAxiom {
const Syntax* const form;
};
- // -----------
- // -- Quote --
- // -----------
+ // -----------------
+ // -- QuoteSyntax --
+ // -----------------
// Quotation syntax object.
- struct Quote : unary_form<Quote> {
- explicit Quote(const Syntax*);
+ struct QuoteSyntax : unary_form<QuoteSyntax> {
+ explicit QuoteSyntax(const Syntax*);
};
- // ---------------
- // -- Antiquote --
- // ---------------
+ // ---------------------
+ // -- AntiquoteSyntax --
+ // ---------------------
// Quasi-quotation syntax object.
- struct Antiquote : unary_form<Antiquote> {
- explicit Antiquote(const Syntax*);
+ struct AntiquoteSyntax : unary_form<AntiquoteSyntax> {
+ explicit AntiquoteSyntax(const Syntax*);
};
// ------------
@@ -271,15 +236,6 @@ namespace OpenAxiom {
};
// -------------
- // -- DotTail --
- // -------------
- // Objects of this type represents the tail of syntactic
- // objects denoting dotted pair syntax `(a . b)'.
- struct DotTail : unary_form<DotTail> {
- explicit DotTail(const Syntax*);
- };
-
- // -------------
// -- Include --
// -------------
// Conditional inclusion syntax object
@@ -296,10 +252,10 @@ namespace OpenAxiom {
};
// ----------
- // -- List --
+ // -- ListSyntax --
// ----------
// List syntax objects.
- struct List : Syntax, private std::vector<const Syntax*> {
+ struct ListSyntax : Syntax, private std::vector<const Syntax*> {
typedef std::vector<const Syntax*> base;
using base::const_iterator;
using base::begin;
@@ -307,17 +263,20 @@ namespace OpenAxiom {
using base::size;
using base::empty;
- List();
- explicit List(const base&);
- ~List();
+ ListSyntax();
+ ListSyntax(const base&, bool);
+ ~ListSyntax();
void accept(Visitor&) const;
+ bool dotted() const { return dot; }
+ private:
+ bool dot;
};
// ------------
- // -- Vector --
+ // -- VectorSyntax --
// ------------
- // Vector syntax objects.
- struct Vector : Syntax, private std::vector<const Syntax*> {
+ // VectorSyntax syntax objects.
+ struct VectorSyntax : Syntax, private std::vector<const Syntax*> {
typedef std::vector<const Syntax*> base;
using base::const_iterator;
using base::begin;
@@ -326,9 +285,9 @@ namespace OpenAxiom {
using base::operator[];
using base::empty;
- Vector();
- explicit Vector(const base&);
- ~Vector();
+ VectorSyntax();
+ explicit VectorSyntax(const base&);
+ ~VectorSyntax();
void accept(Visitor&) const;
};
@@ -336,24 +295,23 @@ namespace OpenAxiom {
// -- Syntax::Visitor --
// ---------------------
struct Syntax::Visitor {
- virtual void visit(const Atom&) = 0;
- virtual void visit(const Integer&);
- virtual void visit(const Character&);
- virtual void visit(const String&);
- virtual void visit(const Symbol&);
+ virtual void visit(const AtomSyntax&) = 0;
+ virtual void visit(const IntegerSyntax&);
+ virtual void visit(const CharacterSyntax&);
+ virtual void visit(const StringSyntax&);
+ virtual void visit(const SymbolSyntax&);
virtual void visit(const Reference&);
- virtual void visit(const Anchor&) = 0;
- virtual void visit(const Quote&) = 0;
- virtual void visit(const Antiquote&) = 0;
+ virtual void visit(const AnchorSyntax&) = 0;
+ virtual void visit(const QuoteSyntax&) = 0;
+ virtual void visit(const AntiquoteSyntax&) = 0;
virtual void visit(const Expand&) = 0;
virtual void visit(const Eval&) = 0;
virtual void visit(const Splice&) = 0;
virtual void visit(const Function&) = 0;
virtual void visit(const Include&) = 0;
virtual void visit(const Exclude&) = 0;
- virtual void visit(const DotTail&) = 0;
- virtual void visit(const List&) = 0;
- virtual void visit(const Vector&) = 0;
+ virtual void visit(const ListSyntax&) = 0;
+ virtual void visit(const VectorSyntax&) = 0;
};
template<typename T>
@@ -370,90 +328,59 @@ namespace OpenAxiom {
Allocator();
~Allocator();
- const Integer* make_integer(const Token&);
- const Character* make_character(const Token&);
- const String* make_string(const Token&);
- const Symbol* make_symbol(const Token&, Symbol::Kind);
- const Reference* make_reference(const Token&, size_t);
- const Anchor* make_anchor(size_t, const Syntax*);
- const Quote* make_quote(const Syntax*);
- const Antiquote* make_antiquote(const Syntax*);
+ const IntegerSyntax* make_integer(const Lexeme&);
+ const CharacterSyntax* make_character(const Lexeme&);
+ const StringSyntax* make_string(const Lexeme&);
+ const SymbolSyntax* make_symbol(SymbolSyntax::Kind, const Lexeme&);
+ const Reference* make_reference(size_t, const Lexeme&);
+ const AnchorSyntax* make_anchor(size_t, const Syntax*);
+ const QuoteSyntax* make_quote(const Syntax*);
+ const AntiquoteSyntax* make_antiquote(const Syntax*);
const Expand* make_expand(const Syntax*);
const Eval* make_eval(const Syntax*);
const Splice* make_splice(const Syntax*);
const Function* make_function(const Syntax*);
const Include* make_include(const Syntax*);
const Exclude* make_exclude(const Syntax*);
- const DotTail* make_dot_tail(const Syntax*);
- const List* make_list(const std::vector<const Syntax*>&);
- const Vector* make_vector(const std::vector<const Syntax*>&);
+ const ListSyntax* make_list(const std::vector<const Syntax*>&, bool = false);
+ const VectorSyntax* make_vector(const std::vector<const Syntax*>&);
private:
- Memory::Factory<Integer> ints;
- Memory::Factory<Character> chars;
- Memory::Factory<String> strs;
- Memory::Factory<Symbol> syms;
- Memory::Factory<Anchor> ancs;
+ Memory::Factory<IntegerSyntax> ints;
+ Memory::Factory<CharacterSyntax> chars;
+ Memory::Factory<StringSyntax> strs;
+ Memory::Factory<SymbolSyntax> syms;
+ Memory::Factory<AnchorSyntax> ancs;
Memory::Factory<Reference> refs;
- Memory::Factory<Quote> quotes;
- Memory::Factory<Antiquote> antis;
+ Memory::Factory<QuoteSyntax> quotes;
+ Memory::Factory<AntiquoteSyntax> antis;
Memory::Factory<Expand> exps;
Memory::Factory<Function> funs;
Memory::Factory<Include> incs;
Memory::Factory<Exclude> excs;
Memory::Factory<Eval> evls;
Memory::Factory<Splice> spls;
- Memory::Factory<DotTail> tails;
- Memory::Factory<List> lists;
- Memory::Factory<Vector> vectors;
- List empty_list;
- Vector empty_vector;
+ Memory::Factory<ListSyntax> lists;
+ Memory::Factory<VectorSyntax> vectors;
+ ListSyntax empty_list;
+ VectorSyntax empty_vector;
};
- // ------------
- // -- Parser --
- // ------------
- // An object of this type transforms a sequence of tokens
- // into a sequence of syntax objects.
- // A parser object does not manage memory itself. Rather, it delegates
- // storage allocation for syntax objects to specialized
- // agents used to construct it.
- struct Parser {
- Parser(Allocator&, std::vector<const Syntax*>&);
- const Token* parse(const Token*, const Token*);
- private:
- Allocator& alloc;
- std::vector<const Syntax*>& syns;
-
- const Symbol* parse_symbol(const Token*&, const Token*);
- const Character* parse_character(const Token*&, const Token*);
- const Anchor* parse_anchor(const Token*&, const Token*);
- const Reference* parse_reference(const Token*&, const Token*);
- const Symbol* parse_uninterned(const Token*&, const Token*);
- const Function* parse_function(const Token*&, const Token*);
- const Quote* parse_quote(const Token*&, const Token*);
- const Antiquote* parse_antiquote(const Token*&, const Token*);
- const Include* parse_include(const Token*&, const Token*);
- const Exclude* parse_exclude(const Token*&, const Token*);
- const Expand* parse_expand(const Token*&, const Token*);
- const Eval* parse_eval(const Token*&, const Token*);
- const Splice* parse_splice(const Token*&, const Token*);
- const Vector* parse_vector(const Token*&, const Token*);
- const List* parse_list(const Token*&, const Token*);
- const Syntax* parse_syntax(const Token*&, const Token*);
- };
+ // -- Reader --
+ struct Reader {
+ struct State {
+ const Byte* start;
+ const Byte* end;
+ const Byte* cur;
+ const Byte* line;
+ Ordinal lineno;
+ Allocator alloc;
+ };
- // ------------
- // -- Module --
- // ------------
- // Entire s-expression input file.
- struct Module : std::vector<const Syntax*> {
- explicit Module(const std::string&);
- const std::string& name() const { return nm; }
+ Reader(const Byte*, const Byte*);
+ const Syntax* read();
private:
- const std::string nm;
- StringPool raw_strs;
- Allocator allocator;
+ State st;
};
}
}
diff --git a/src/include/structure.H b/src/include/structure.H
index d9434423..33c084f2 100644
--- a/src/include/structure.H
+++ b/src/include/structure.H
@@ -33,8 +33,6 @@
#ifndef OPENAXIOM_STRUCTURE_included
#define OPENAXIOM_STRUCTURE_included
-#include <iterator>
-
namespace OpenAxiom {
// -- helper classes for structural abstractions --
namespace structure {
diff --git a/src/syntax/sexpr.cc b/src/syntax/sexpr.cc
index 14113164..0a3b8071 100644
--- a/src/syntax/sexpr.cc
+++ b/src/syntax/sexpr.cc
@@ -38,73 +38,24 @@
#include <iterator>
#include <open-axiom/sexpr>
#include <open-axiom/FileMapping>
+#include <open-axiom/diagnostics>
namespace OpenAxiom {
namespace Sexpr {
- template<typename T, int N>
- static inline int
- length(const T(&)[N]) {
- return N;
- }
-
- template<typename Sequence>
- static inline typename Sequence::const_pointer
- begin_ptr(const Sequence& s) {
- return &*s.begin();
- }
-
- template<typename Sequence>
- static inline typename Sequence::const_pointer
- end_ptr(const Sequence& s) {
- return s.empty() ? 0 : &*s.begin() + s.size();
- }
-
- std::ostream&
- operator<<(std::ostream& os, const Token& t) {
- switch (t.type) {
- case Token::semicolon: os << "SEMICOLON"; break;
- case Token::dot: os << "DOT"; break;
- case Token::comma: os << "COMMA"; break;
- case Token::open_paren: os << "OPEN_PAREN"; break;
- case Token::close_paren: os << "CLOSE_PAREN"; break;
- case Token::apostrophe: os << "APOSTROPHE"; break;
- case Token::backquote: os << "BACKQUOTE"; break;
- case Token::backslash: os << "BACKSLASH"; break;
- case Token::sharp_open_paren: os << "SHARP_OPEN_PAREN"; break;
- case Token::sharp_apostrophe: os << "SHARP_APOSTROPHE"; break;
- case Token::sharp_colon: os << "SHARP_COLON"; break;
- case Token::sharp_plus: os << "SHARP_PLUS"; break;
- case Token::sharp_minus: os << "SHARP_MINUS"; break;
- case Token::sharp_dot: os << "SHARP_DOT"; break;
- case Token::comma_at: os << "COMMA_AT"; break;
- case Token::integer: os << "INTEGER"; break;
- case Token::character: os << "CHARACTER"; break;
- case Token::string: os << "STRING"; break;
- case Token::identifier: os << "IDENTIFIER"; break;
- case Token::sharp_integer_sharp:
- os << "SHARP_INTEGER_SHARP"; break;
- case Token::sharp_integer_equal:
- os << "SHARP_INTEGER_EQUAL"; break;
- default: os << "UNKNOWN"; break;
- }
- os << '(';
- if (t.lexeme != 0) {
- os << '"';
- std::copy(t.lexeme->begin(), t.lexeme->end(),
- std::ostream_iterator<char>(os));
- os << '"';
- }
- else
- os << "<missing>";
- return os << ')';
+ static void
+ invalid_character(Reader::State& s) {
+ auto line = std::to_string(s.lineno);
+ auto column = std::to_string(s.cur - s.line);
+ auto msg = "invalid character on line " + line +
+ " and column " + column;
+ if (isprint(*s.cur))
+ throw Diagnostics::BasicError(msg + ": " + std::string(1, *s.cur));
+ throw Diagnostics::BasicError(msg + " with code " + std::to_string(*s.cur));
}
-
- // -----------
- // -- Lexer --
- // -----------
+
static void
syntax_error(const std::string& s) {
- throw BasicError(s);
+ throw Diagnostics::BasicError(s);
}
// Return true if character `c' introduces a blank.
@@ -122,67 +73,67 @@ namespace OpenAxiom {
or c == '`' or c == '#';
}
- // Move `cur' past all consecutive blank characters, and
- // return the new position.
- static const Byte*
- skip_blank(const Byte*& cur, const Byte* end) {
- while (cur < end and is_blank(*cur))
- ++cur;
- return cur;
+ // Move the cursor past all consecutive blank characters, and
+ // return true if there are more input characters to consider.
+ static bool
+ skip_blank(Reader::State& s) {
+ for (bool done = false; s.cur < s.end and not done; )
+ switch (*s.cur) {
+ case '\n':
+ ++s.lineno;
+ s.line = ++s.cur;
+ break;
+ case ' ': case '\t': case '\v': case '\r': case '\f':
+ ++s.cur;
+ break;
+ default: done = true; break;
+ }
+ return s.cur < s.end;
}
// Move `cur' to end-of-line marker.
- static const Byte*
- skip_to_eol(const Byte*& cur, const Byte* end) {
+ static void
+ skip_to_eol(Reader::State& s) {
// FIXME: properly handle CR+LF.
- while (cur < end and *cur != '\n')
- ++cur;
- return cur;
- }
-
- // Move `cur' until a word boundary is reached.
- static const Byte*
- skip_to_word_boundary(const Byte*& cur, const Byte* end) {
- bool saw_escape = false;
- for (; cur < end; ++cur) {
- if (saw_escape)
- saw_escape = false;
- else if (*cur == '\\')
- saw_escape = true;
- else if (is_delimiter(*cur))
- break;
- }
- return cur;
+ while (s.cur < s.end and *s.cur != '\n')
+ ++s.cur;
}
// Move `cur' one-past a non-esacaped character `c'.
// Return true if the character was seen.
static bool
- skip_to_nonescaped_char(const Byte*& cur, const Byte* end, char c) {
- bool saw_escape = false;
- for (; cur < end; ++cur)
+ skip_to_nonescaped_char(Reader::State& s, char c) {
+ for (bool saw_escape = false; s.cur < s.end; ++s.cur)
if (saw_escape)
saw_escape = false;
- else if (*cur == '\\')
+ else if (*s.cur == '\\')
saw_escape = true;
- else if (*cur == c) {
- ++cur;
+ else if (*s.cur == c) {
+ ++s.cur;
return true;
}
return false;
}
- // Move `cur' past the closing quote of string literal.
- // Return true if the closing fence was effectively seen.
+ // Move the cursor past the closing quote of string literal.
+ // Return true if the closing quote was effectively seen.
static inline bool
- skip_to_quote(const Byte*& cur, const Byte* end) {
- return skip_to_nonescaped_char(cur, end, '"');
+ skip_to_quote(Reader::State& s) {
+ return skip_to_nonescaped_char(s, '"');
+ }
+
+ template<typename Pred>
+ static bool
+ advance_while(Reader::State& s, Pred p) {
+ while (s.cur < s.end and p(*s.cur))
+ ++s.cur;
+ return s.cur < s.end;
}
// Return true if the character `c' be part of a non-absolute
// identifier.
static bool
- identifier_part(char c) {
+ identifier_part(Byte c) {
switch (c) {
case '+': case '-': case '*': case '/': case '%': case '^':
case '~': case '@': case '$': case '&': case '=':
@@ -194,296 +145,117 @@ namespace OpenAxiom {
}
}
- // Return true if the character `c' has a special meaning after
- // the sharp character.
- static bool
- special_after_sharp(char c) {
- return c == '(' or c == '\'' or c == ':'
- or c == '+' or c == '-' or c == '.';
- }
-
- // Return true if the sequence `[cur, end)' has a prefix that is
- // an integer followrd by the equal sign or the sharp sign.
- // `cur' is moved along the way.
- static bool
- only_digits_before_equal_or_shap(const Byte*& cur, const Byte* end) {
- while (cur < end and isdigit(*cur))
- ++cur;
- return cur < end and (*cur == '#' or *cur == '=');
- }
-
- // The token `t' was thought to designate an identifier.
- // Reclassify it as an integer if, in fact, its lexeme consists
- // entirely of digits.
- static void
- maybe_reclassify(Token& t) {
- const Byte* cur = t.lexeme->begin();
- const Byte* end = t.lexeme->end();
- while (cur < end and isdigit(*cur))
- ++cur;
- if (cur == end)
- t.type = Token::integer;
- }
-
- // Returns true if the first characters in the range
- // [cur, last) start an identifier.
- static bool
- start_symbol(const Byte* cur, const Byte* last) {
- if (cur >= last)
- return false;
- return identifier_part(*cur)
- or *cur == '|' or *cur == ':';
- }
-
- // We are processing a symbol token. Accumulate all
- // legitimate characters till the end of the token.
- static void
- skip_to_end_of_symbol(const Byte*& cur, const Byte* end) {
- const char c = *cur;
- if (*cur == '|')
- skip_to_nonescaped_char(++cur, end, c);
- else
- skip_to_word_boundary(cur, end);
- if (cur < end and *cur == ':')
- skip_to_end_of_symbol(cur, end);
- }
-
- static Token
- match_maybe_symbol(Lexer* lexer, const Byte*& cur, const Byte* end) {
- Token t = { Token::identifier, 0 };
- const Byte* start = cur;
- skip_to_end_of_symbol(cur, end);
- t.lexeme = lexer->intern(start, cur - start);
- maybe_reclassify(t);
- return t;
- }
-
- const Byte*
- Lexer::tokenize(const Byte* cur, const Byte* end) {
- while (skip_blank(cur, end) < end) {
- Token t = { Token::unknown, 0 };
- switch (*cur) {
- case ';': {
- const Byte* start = cur;
- t.type = Token::semicolon;
- skip_to_eol(cur, end);
- t.lexeme = intern(start, cur - start);
- break;
- }
-
- case '.': case '(': case ')': case '\'': case '`':
- t.type = Token::Type(token::value(*cur));
- t.lexeme = intern(cur, 1);
- ++cur;
- break;
-
- case ',': {
- const Byte* start = cur;
- if (++cur < end and *cur == '@') {
- t.type = Token::comma_at;
- ++cur;
- }
- else
- t.type = Token::comma;
- t.lexeme = intern(start, cur - start);
- break;
- }
-
- case '\\':
- t = match_maybe_symbol(this, cur, end);
- break;
-
- case '#': {
- const Byte* start = cur;
- if (cur + 1 < end and special_after_sharp(cur[1])) {
- t.type = Token::Type(token::value(cur[0], cur[1]));
- t.lexeme = intern(cur, 2);
- cur += 2;
- }
- else if (cur + 1 < end and cur[1] == '\\') {
- start = cur += 2;
- if (not isalnum(*cur))
- ++cur;
- else
- skip_to_word_boundary(cur, end);
- t.type = Token::character;
- t.lexeme = intern(start, cur - start);
- }
- else if (only_digits_before_equal_or_shap(++cur, end)) {
- t.type = *cur == '#'
- ? Token::sharp_integer_sharp
- : Token::sharp_integer_equal;
- t.lexeme = intern(start, cur - start + 1);
- ++cur;
- }
- else {
- skip_to_word_boundary(cur, end);
- t.lexeme = intern(start, cur - start);
- }
- break;
- }
-
- case '"': {
- const Byte* start = cur;
- skip_to_quote(++cur, end);
- t.type = Token::string;
- t.lexeme = intern(start, cur - start);
- break;
- }
-
- default:
- if (start_symbol(cur, end))
- t = match_maybe_symbol(this, cur, end);
- else {
- const Byte* start = cur;
- skip_to_word_boundary(++cur, end);
- t.lexeme = intern(start, cur - start);
- }
- break;
- }
- tokens.push_back(t);
- }
- return cur;
- }
-
- // ----------
- // -- Atom --
- // ----------
- Atom::Atom(const Token& t) : tok(t) { }
+ // -- AtomSyntax --
+ AtomSyntax::AtomSyntax(const Lexeme& t) : lex(t) { }
void
- Atom::accept(Visitor& v) const {
+ AtomSyntax::accept(Visitor& v) const {
v.visit(*this);
}
- // -------------
- // -- Integer --
- // -------------
- Integer::Integer(const Token& t) : Atom(t) { }
+ // -- IntegerSyntax --
+ IntegerSyntax::IntegerSyntax(const Lexeme& t) : AtomSyntax(t) { }
void
- Integer::accept(Visitor& v) const {
+ IntegerSyntax::accept(Visitor& v) const {
v.visit(*this);
}
- // ---------------
- // -- Character --
- // ---------------
- Character::Character(const Token& t) : Atom(t) { }
+ // -- CharacterSyntax --
+ CharacterSyntax::CharacterSyntax(const Lexeme& t) : AtomSyntax(t) { }
void
- Character::accept(Visitor& v) const {
+ CharacterSyntax::accept(Visitor& v) const {
v.visit(*this);
}
- // ------------
- // -- String --
- // ------------
- String::String(const Token& t) : Atom(t) { }
+ // -- StringSyntax --
+ StringSyntax::StringSyntax(const Lexeme& t) : AtomSyntax(t) { }
void
- String::accept(Visitor& v) const {
+ StringSyntax::accept(Visitor& v) const {
v.visit(*this);
}
- // ------------
- // -- Symbol --
- // ------------
- Symbol::Symbol(const Token& t, Kind k) : Atom(t), sort(k) { }
+ // -- SymbolSyntax --
+ SymbolSyntax::SymbolSyntax(const Lexeme& t, Kind k)
+ : AtomSyntax(t), sort(k)
+ { }
void
- Symbol::accept(Visitor& v) const {
+ SymbolSyntax::accept(Visitor& v) const {
v.visit(*this);
}
- // ------------
- // -- Anchor --
- // ------------
- Anchor::Anchor(size_t t, const Syntax* s) : tag(t), val(s) { }
+ // -- AnchorSyntax --
+ AnchorSyntax::AnchorSyntax(size_t t, const Syntax* s) : tag(t), val(s) { }
void
- Anchor::accept(Visitor& v) const {
+ AnchorSyntax::accept(Visitor& v) const {
v.visit(*this);
}
- // ---------------
// -- Reference --
- // ---------------
- Reference::Reference(const Token& t, size_t v) : Atom(t), pos(v) { }
+ Reference::Reference(const Lexeme& t, Ordinal n)
+ : AtomSyntax(t), pos(n)
+ { }
void
Reference::accept(Visitor& v) const {
v.visit(*this);
}
- // -----------
- // -- Quote --
- // -----------
- Quote::Quote(const Syntax* s) : unary_form<Quote>(s) { }
+ // -- QuoteSyntax --
+ QuoteSyntax::QuoteSyntax(const Syntax* s)
+ : unary_form<QuoteSyntax>(s)
+ { }
- // ---------------
- // -- Antiquote --
- // ---------------
- Antiquote::Antiquote(const Syntax* s) : unary_form<Antiquote>(s) { }
+ // -- AntiquoteSyntax --
+ AntiquoteSyntax::AntiquoteSyntax(const Syntax* s)
+ : unary_form<AntiquoteSyntax>(s)
+ { }
- // ------------
// -- Expand --
- // ------------
Expand::Expand(const Syntax* s) : unary_form<Expand>(s) { }
- // ----------
// -- Eval --
- // ----------
Eval::Eval(const Syntax* s) : unary_form<Eval>(s) { }
- // ------------
// -- Splice --
- // ------------
Splice::Splice(const Syntax* s) : unary_form<Splice>(s) { }
- // --------------
// -- Function --
- // --------------
Function::Function(const Syntax* s) : unary_form<Function>(s) { }
- // -------------
// -- Include --
Include::Include(const Syntax* s) : unary_form<Include>(s) { }
- // -------------
// -- Exclude --
Exclude::Exclude(const Syntax* s) : unary_form<Exclude>(s) { }
- // -------------
- // -- DotTail --
- // -------------
- DotTail::DotTail(const Syntax* f) : unary_form<DotTail>(f) { }
+ // -- ListSyntax --
+ ListSyntax::ListSyntax() : dot(false) { }
- // ----------
- // -- List --
- // ----------
- List::List() { }
+ ListSyntax::ListSyntax(const base& elts, bool d)
+ : base(elts), dot(d)
+ { }
- List::List(const base& elts) : base(elts) { }
-
- List::~List() { }
+ ListSyntax::~ListSyntax() { }
void
- List::accept(Visitor& v) const {
+ ListSyntax::accept(Visitor& v) const {
v.visit(*this);
}
- // ------------
- // -- Vector --
- // ------------
- Vector::Vector() { }
+ // -- VectorSyntax --
+ VectorSyntax::VectorSyntax() { }
- Vector::Vector(const base& elts) : base(elts) { }
+ VectorSyntax::VectorSyntax(const base& elts) : base(elts) { }
- Vector::~Vector() { }
+ VectorSyntax::~VectorSyntax() { }
void
- Vector::accept(Visitor& v) const {
+ VectorSyntax::accept(Visitor& v) const {
v.visit(*this);
}
@@ -499,28 +271,28 @@ namespace OpenAxiom {
}
void
- Syntax::Visitor::visit(const Integer& i) {
- visit(as<Atom>(i));
+ Syntax::Visitor::visit(const IntegerSyntax& i) {
+ visit(as<AtomSyntax>(i));
}
void
- Syntax::Visitor::visit(const Character& c) {
- visit(as<Atom>(c));
+ Syntax::Visitor::visit(const CharacterSyntax& c) {
+ visit(as<AtomSyntax>(c));
}
void
- Syntax::Visitor::visit(const String& s) {
- visit(as<Atom>(s));
+ Syntax::Visitor::visit(const StringSyntax& s) {
+ visit(as<AtomSyntax>(s));
}
void
- Syntax::Visitor::visit(const Symbol& s) {
- visit(as<Atom>(s));
+ Syntax::Visitor::visit(const SymbolSyntax& s) {
+ visit(as<AtomSyntax>(s));
}
void
Syntax::Visitor::visit(const Reference& r) {
- visit(as<Atom>(r));
+ visit(as<AtomSyntax>(r));
}
// ---------------
@@ -533,42 +305,42 @@ namespace OpenAxiom {
// used templates floating around.
Allocator::~Allocator() { }
- const Character*
- Allocator::make_character(const Token& t) {
+ const CharacterSyntax*
+ Allocator::make_character(const Lexeme& t) {
return chars.make(t);
}
- const Integer*
- Allocator::make_integer(const Token& t) {
+ const IntegerSyntax*
+ Allocator::make_integer(const Lexeme& t) {
return ints.make(t);
}
- const String*
- Allocator::make_string(const Token& t) {
+ const StringSyntax*
+ Allocator::make_string(const Lexeme& t) {
return strs.make(t);
}
- const Symbol*
- Allocator::make_symbol(const Token& t, Symbol::Kind k) {
+ const SymbolSyntax*
+ Allocator::make_symbol(SymbolSyntax::Kind k, const Lexeme& t) {
return syms.make(t, k);
}
- const Anchor*
- Allocator::make_anchor(size_t t, const Syntax* s) {
- return ancs.make(t, s);
- }
-
const Reference*
- Allocator::make_reference(const Token& t, size_t i) {
+ Allocator::make_reference(size_t i, const Lexeme& t) {
return refs.make(t, i);
}
- const Quote*
+ const AnchorSyntax*
+ Allocator::make_anchor(size_t t, const Syntax* s) {
+ return ancs.make(t, s);
+ }
+
+ const QuoteSyntax*
Allocator::make_quote(const Syntax* s) {
return quotes.make(s);
}
- const Antiquote*
+ const AntiquoteSyntax*
Allocator::make_antiquote(const Syntax* s) {
return antis.make(s);
}
@@ -603,53 +375,20 @@ namespace OpenAxiom {
return excs.make(s);
}
- const DotTail*
- Allocator::make_dot_tail(const Syntax* f) {
- return tails.make(f);
- }
-
- const List*
- Allocator::make_list(const std::vector<const Syntax*>& elts) {
+ const ListSyntax*
+ Allocator::make_list(const std::vector<const Syntax*>& elts, bool dot) {
if (elts.empty())
return &empty_list;
- return lists.make(elts);
+ return lists.make(elts, dot);
}
- const Vector*
+ const VectorSyntax*
Allocator::make_vector(const std::vector<const Syntax*>& elts) {
if (elts.empty())
return &empty_vector;
return vectors.make(elts);
}
- // ------------
- // -- Parser --
- // ------------
-
- // Signal a parse error
- static void
- parse_error(const std::string& s) {
- throw BasicError(s);
- }
-
- // Signal that an expected syntax object was missing
- static void
- expected_syntax(const std::string& s) {
- parse_error("expected " + s);
- }
-
- // Signal an abrupt end of input
- static void
- unexpected_end_of_input(const std::string& s) {
- parse_error("unexpected end of input after " + s);
- }
-
- // Signal a missing closing parenthesis
- static void
- missing_closer_for(const std::string& s) {
- parse_error("missing closing parenthesis for " + s);
- }
-
// The sequence of characters in [cur, last) consists
// entirely of digits. Return the corresponding natural value.
static size_t
@@ -661,274 +400,277 @@ namespace OpenAxiom {
return n;
}
- // Parse a plain identifier or a Lisp-style keyword identifier.
- const Symbol*
- Parser::parse_symbol(const Token*& cur, const Token* last) {
- Symbol::Kind kind = *cur->lexeme->begin() == ':'
- ? Symbol::keyword
- : Symbol::ordinary;
- return alloc.make_symbol(*cur++, kind);
- }
-
- // List of lower case character names
- static const char* charname[] = {
- "newline", "space", "page", "tab",
- "backspace", "return", "linefeed"
- };
-
- static bool
- equal_character_name(BasicString lhs, const char* rhs) {
- if (lhs->size() != strlen(rhs))
- return false;
- for (const Byte* cur = lhs->begin(); cur != lhs->end(); ++cur)
- if (tolower(*cur) != *rhs++)
- return false;
- return true;
- }
-
- static bool
- valid_character_name(BasicString s) {
- for (int i = 0; i < length(charname); ++i)
- if (equal_character_name(s, charname[i]))
- return true;
- return false;
- }
-
- const Character*
- Parser::parse_character(const Token*& cur, const Token* last) {
- if (cur->lexeme->size() != 1
- and not valid_character_name(cur->lexeme))
- parse_error("invalid literal character syntax");
- return alloc.make_character(*cur++);
- }
-
- // Parse an anchor definition of the form #n=<syntax>
- const Anchor*
- Parser::parse_anchor(const Token*& cur, const Token* last) {
- const size_t n = natural_value(cur->lexeme->begin() + 1,
- cur->lexeme->end() - 1);
- if (++cur == last)
- unexpected_end_of_input("sharp-integer-equal sign");
- return alloc.make_anchor(n, parse_syntax(cur, last));
- }
-
- // Parse a reference to an anchor, #n#
- const Reference*
- Parser::parse_reference(const Token*& cur, const Token* last) {
- const size_t n = natural_value(cur->lexeme->begin() + 1,
- cur->lexeme->end() - 1);
- return alloc.make_reference(*cur++, n);
- }
-
- // Parse an uninterned symbol #:<identifier>
- const Symbol*
- Parser::parse_uninterned(const Token*& cur, const Token* last) {
- if (cur == last or cur->type != Token::identifier)
- expected_syntax("symbol after sharp-colon sign");
- // FIXME: check that the identifier is not a keyword.
- return alloc.make_symbol(*cur++, Symbol::uninterned);
- }
-
- // Parse a function syntax: #'<syntax>
- const Function*
- Parser::parse_function(const Token*& cur, const Token* last) {
- if (cur == last)
- unexpected_end_of_input("sharp-quote sign");
- return alloc.make_function(parse_syntax(cur, last));
- }
-
- // Parse a quotation
- const Quote*
- Parser::parse_quote(const Token*& cur, const Token* last) {
- if (cur == last)
- unexpected_end_of_input("quote sign");
- return alloc.make_quote(parse_syntax(cur, last));
- }
-
- // Parse an antiquotation
- const Antiquote*
- Parser::parse_antiquote(const Token*& cur, const Token* last) {
- if (cur == last)
- unexpected_end_of_input("backquote sign");
- return alloc.make_antiquote(parse_syntax(cur, last));
- }
-
- // Parse an expansion request form
- const Expand*
- Parser::parse_expand(const Token*& cur, const Token* last) {
- const Syntax* s = parse_syntax(cur, last);
- if (s == 0)
- unexpected_end_of_input("comma sign");
- return alloc.make_expand(s);
- }
-
- // Parse conditional inclusions
- const Include*
- Parser::parse_include(const Token*& cur, const Token* last) {
- const Syntax* s = parse_syntax(cur, last);
- if (s == 0)
- unexpected_end_of_input("sharp-plus sign");
- return alloc.make_include(s);
- }
-
- const Exclude*
- Parser::parse_exclude(const Token*& cur, const Token* last) {
- const Syntax* s = parse_syntax(cur, last);
- if (s == 0)
- unexpected_end_of_input("sharp-minus sign");
- return alloc.make_exclude(s);
- }
-
- const Eval*
- Parser::parse_eval(const Token*& cur, const Token* last) {
- const Syntax* s = parse_syntax(cur, last);
- if (s == 0)
- unexpected_end_of_input("sharp-dot sign");
- return alloc.make_eval(s);
- }
-
- const Splice*
- Parser::parse_splice(const Token*& cur, const Token* last) {
- const Syntax* s = parse_syntax(cur, last);
- if (s == 0)
- unexpected_end_of_input("comma-at sign");
- return alloc.make_splice(s);
- }
-
- // Skip tokens that are semantically blanks, e.g. comments.
- // Return true if not at end of tokens.
- static bool
- skip_ignorable_tokens(const Token*& cur, const Token* last) {
- while (cur < last and cur->type == Token::semicolon)
- ++cur;
- return cur != last;
- }
-
- // Parse a vector of syntax objects: #(s .. s)
- const Vector*
- Parser::parse_vector(const Token*& cur, const Token* last) {
- std::vector<const Syntax*> elts;
- while (skip_ignorable_tokens(cur, last)
- and cur->type != Token::close_paren)
- elts.push_back(parse_syntax(cur, last));
- if (cur == last)
- missing_closer_for("vector");
- ++cur;
- return alloc.make_vector(elts);
- }
-
- // Constructs a pair or a list syntax object.
- const List*
- Parser::parse_list(const Token*& cur, const Token* last) {
- std::vector<const Syntax*> elts;
- while (skip_ignorable_tokens(cur, last)
- and cur->type != Token::close_paren) {
- if (cur->type == Token::dot) {
- skip_ignorable_tokens(++cur, last);
- if (const Syntax* s = parse_syntax(cur, last)) {
- elts.push_back(alloc.make_dot_tail(s));
- break;
- }
+ // -- Reader --
+ Reader::Reader(const Byte* f, const Byte* l)
+ : st{ f, l, f, f, 1, }
+ { }
+
+ static const Syntax* read_sexpr(Reader::State&);
+
+ // Parse a string literal
+ static const Syntax*
+ read_string(Reader::State& s) {
+ auto start = s.cur++;
+ if (not skip_to_quote(s))
+ syntax_error("missing closing quote sign for string literal");
+ Lexeme t = { { start, s.cur }, s.lineno };
+ return s.alloc.make_string(t);
+ }
+
+ // Parse an absolute identifier.
+ static const Syntax*
+ read_absolute_symbol(Reader::State& s) {
+ auto start = ++s.cur;
+ if (not skip_to_nonescaped_char(s, '|'))
+ syntax_error("missing closing bar sign for an absolute symbol");
+ Lexeme t = { { start, s.cur - 1 }, s.lineno };
+ return s.alloc.make_symbol(SymbolSyntax::absolute, t);
+ }
+
+ // Read an atom starting with digits.
+ static const Syntax*
+ read_maybe_natural(Reader::State& s) {
+ auto start = s.cur;
+ advance_while (s, isdigit);
+ if (s.cur >= s.end or is_delimiter(*s.cur)) {
+ Lexeme t = { { start, s.cur }, s.lineno };
+ return s.alloc.make_integer(t);
+ }
+ advance_while(s, identifier_part);
+ Lexeme t = { { start, s.cur }, s.lineno };
+ return s.alloc.make_symbol(SymbolSyntax::ordinary, t);
+ }
+
+ // Read an identifier.
+ static const Syntax*
+ read_identifier(Reader::State& s) {
+ auto start = s.cur;
+ advance_while(s, identifier_part);
+ Lexeme t = { { start, s.cur }, s.lineno };
+ return s.alloc.make_symbol(SymbolSyntax::ordinary, t);
+ }
+
+ // Read an atom starting with a '+' or '-' sign; this
+ // should be identifier, or a signed integer.
+ static const Syntax*
+ read_maybe_signed_number(Reader::State& s) {
+ auto start = s.cur++;
+ if (s.cur < s.end and isdigit(*s.cur)) {
+ advance_while(s, isdigit);
+ if (s.cur >= s.end or is_delimiter(*s.cur)) {
+ Lexeme t = { { start, s.cur }, s.lineno };
+ return s.alloc.make_integer(t);
}
- elts.push_back(parse_syntax(cur, last));
}
- if (cur == last or cur->type != Token::close_paren)
- missing_closer_for("list");
- ++cur;
- return alloc.make_list(elts);
+ advance_while(s, identifier_part);
+ Lexeme t = { { start, s.cur }, s.lineno };
+ return s.alloc.make_symbol(SymbolSyntax::ordinary, t);
}
- Parser::Parser(Allocator& a, std::vector<const Syntax*>& v)
- : alloc(a), syns(v) { }
-
- static std::string
- to_string(BasicString s) {
- return { s->begin(), s->end() };
+ static const Syntax*
+ read_keyword(Reader::State& s) {
+ auto start = s.cur++;
+ advance_while(s, identifier_part);
+ Lexeme t = { { start, s.cur }, s.lineno };
+ return s.alloc.make_symbol(SymbolSyntax::keyword, t);
}
- const Syntax*
- Parser::parse_syntax(const Token*& cur, const Token* last) {
- if (not skip_ignorable_tokens(cur, last))
- return 0;
-
- switch (cur->type) {
- case Token::integer:
- return alloc.make_integer(*cur++);
-
- case Token::character:
- return parse_character(cur, last);
-
- case Token::string:
- return alloc.make_string(*cur++);
-
- case Token::identifier:
- return parse_symbol(cur, last);
-
- case Token::sharp_integer_equal:
- return parse_anchor(cur, last);
-
- case Token::sharp_integer_sharp:
- return parse_reference(cur, last);
-
- case Token::sharp_colon:
- return parse_uninterned(++cur, last);
+ // Read an atom.
+ static const Syntax*
+ read_atom(Reader::State& s) {
+ switch (*s.cur) {
+ case '"': return read_string(s);
+ case ':': return read_keyword(s);
+ case '-': case '+': return read_maybe_signed_number(s);
- case Token::sharp_apostrophe:
- return parse_function(++cur, last);
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ return read_maybe_natural(s);
- case Token::sharp_open_paren:
- return parse_vector(++cur, last);
-
- case Token::apostrophe:
- return parse_quote(++cur, last);
-
- case Token::open_paren:
- return parse_list(++cur, last);
-
- case Token::sharp_plus:
- return parse_include(++cur, last);
-
- case Token::sharp_minus:
- return parse_exclude(++cur, last);
-
- case Token::sharp_dot:
- return parse_eval(++cur, last);
-
- case Token::backquote:
- return parse_antiquote(++cur, last);
-
- case Token::comma:
- return parse_expand(++cur, last);
+ default:
+ if (identifier_part(*s.cur))
+ return read_identifier(s);
+ invalid_character(s);
+ ++s.cur;
+ return nullptr;
+ }
+ }
- case Token::comma_at:
- return parse_splice(++cur, last);
+ // Parse a quote expression.
+ static const Syntax*
+ read_quote(Reader::State& s) {
+ ++s.cur; // skip the quote character
+ auto x = read_sexpr(s);
+ if (x == nullptr)
+ syntax_error("end of input reached after quote sign");
+ return s.alloc.make_quote(x);
+ }
+
+ // Parse a backquote expression.
+ static const Syntax*
+ read_backquote(Reader::State& s) {
+ ++s.cur; // skip the backquote character
+ auto x = read_sexpr(s);
+ if (x == nullptr)
+ syntax_error("end of input reached after backquote sign");
+ return s.alloc.make_antiquote(x);
+ }
+
+ // We've just seen "#(" indicating the start of a literal
+ // vector. Read the elements and return the corresponding form.
+ static const Syntax*
+ finish_literal_vector(Reader::State& s) {
+ ++s.cur; // Skip the open paren.
+ std::vector<const Syntax*> elts { };
+ while (skip_blank(s) and *s.cur != ')') {
+ if (auto x = read_sexpr(s))
+ elts.push_back(x);
+ else
+ syntax_error("syntax error while reading vector elements");
+ }
+ if (s.cur >= s.end)
+ syntax_error("unfinished literal vector");
+ else
+ ++s.cur;
+ return s.alloc.make_vector(elts);
+ }
+
+ // We've just seen the sharp sign followed by a digit. We assume
+ // we are about to read an anchor or a back reference.
+ static const Syntax*
+ finish_anchor_or_reference(Reader::State& s) {
+ auto start = s.cur;
+ advance_while(s, isdigit);
+ if (s.cur >= s.end)
+ syntax_error("end-of-input after sharp-number sign");
+ const Byte c = *s.cur;
+ if (c != '#' and c != '=')
+ syntax_error("syntax error after sharp-number-equal sign");
+ Lexeme t = { { start, s.cur }, s.lineno };
+ auto n = natural_value(start, s.cur);
+ ++s.cur;
+ if (c == '#')
+ return s.alloc.make_reference(n, t);
+ auto x = read_sexpr(s);
+ if (x == nullptr)
+ syntax_error("syntax error after sharp-number-equal sign");
+ return s.alloc.make_anchor(n, x);
+ }
+
+ static const Syntax*
+ finish_function(Reader::State& s) {
+ ++s.cur; // skip quote sign.
+ auto x = read_sexpr(s);
+ if (x == nullptr)
+ syntax_error("missing function designator after sharp-quote sign");
+ return s.alloc.make_function(x);
+ }
+
+ static const Syntax*
+ finish_uninterned_symbol(Reader::State& s) {
+ ++s.cur; // skip colon sign.
+ auto start = s.cur;
+ advance_while(s, identifier_part);
+ Lexeme t = { { start, s.cur }, s.lineno };
+ return s.alloc.make_symbol(SymbolSyntax::uninterned, t);
+ }
+
+ static const Syntax*
+ finish_readtime_eval(Reader::State& s) {
+ ++s.cur; // skip dot sign.
+ auto x = read_sexpr(s);
+ if (x == nullptr)
+ syntax_error("parse error after sharp-dot sign");
+ return s.alloc.make_eval(x);
+ }
+
+ static const Syntax*
+ finish_character(Reader::State& s) {
+ ++s.cur; // skip backslash sign
+ auto start = s.cur;
+ advance_while(s, identifier_part);
+ Lexeme t = { { start, s.cur }, s.lineno };
+ return s.alloc.make_character(t);
+ }
+
+ static const Syntax*
+ read_sharp_et_al(Reader::State& s) {
+ if (++s.cur >= s.end)
+ syntax_error("end-of-input reached after sharp sign");
+ switch (*s.cur) {
+ case '(': return finish_literal_vector(s);
+ case '\'': return finish_function(s);
+ case ':': return finish_uninterned_symbol(s);
+ case '.': return finish_readtime_eval(s);
+ case '\\': return finish_character(s);
default:
- parse_error(std::string("parse error before ")
- + to_string(cur->lexeme));
- return 0; // never executed
+ if (isdigit(*s.cur))
+ return finish_anchor_or_reference(s);
+ syntax_error("syntax error after sharp-sign");
}
- }
+ return nullptr;
+ }
+
+ // We have just seen a dot; read the tail and the closing parenthesis.
+ static const Syntax*
+ finish_dotted_list(Reader::State& s, std::vector<const Syntax*>& elts) {
+ ++s.cur; // Skip dot sign.
+ auto x = read_sexpr(s);
+ if (x == nullptr)
+ syntax_error("missing expression after dot sign");
+ if (not skip_blank(s) or *s.cur != ')')
+ syntax_error("missing closing parenthesis");
+ ++s.cur;
+ elts.push_back(x);
+ return s.alloc.make_list(elts, true);
+ }
+
+ static const Syntax*
+ read_pair(Reader::State& s) {
+ ++s.cur; // skip opening parenthesis
+ std::vector<const Syntax*> elts { };
+ while (skip_blank(s))
+ switch (*s.cur) {
+ case '.':
+ if (elts.empty())
+ syntax_error("missing expression before dot sign.");
+ return finish_dotted_list(s, elts);
+
+ case ')':
+ ++s.cur;
+ return s.alloc.make_list(elts);
- const Token*
- Parser::parse(const Token* cur, const Token* last) {
- while (cur < last)
- if (const Syntax* s = parse_syntax(cur, last))
- syns.push_back(s);
- return cur;
+ default:
+ if (auto x = read_sexpr(s))
+ elts.push_back(x);
+ else
+ syntax_error("unfinished pair expression");
+ break;
+ }
+ syntax_error("end-of-input while looking for closing parenthesis");
+ return nullptr;
+ }
+
+ static const Syntax*
+ read_sexpr(Reader::State& s) {
+ while (skip_blank(s))
+ switch (*s.cur) {
+ case ';': skip_to_eol(s); break;
+ case '\'': return read_quote(s);
+ case '`': return read_backquote(s);
+ case '|': return read_absolute_symbol(s);
+ case '#': return read_sharp_et_al(s);
+ case '(': return read_pair(s);
+ default: return read_atom(s);
+ }
+ return nullptr;
}
- Module::Module(const std::string& s) : nm(s) {
- std::vector<Token> tokens;
- Memory::FileMapping input(s);
- Lexer lexer(raw_strs, tokens);
- const Byte* rest = lexer.tokenize(input.begin(), input.end());
- if (rest != input.end())
- syntax_error("syntax error");
- Parser parser(allocator, *this);
- const Token* tok = parser.parse(begin_ptr(tokens), end_ptr(tokens));
- if (tok != end_ptr(tokens))
- parse_error("parse error");
+ const Syntax*
+ Reader::read() {
+ return read_sexpr(st);
}
+
}
}
diff --git a/src/utils/hammer.cc b/src/utils/hammer.cc
index 1c7e050b..f4241aaf 100644
--- a/src/utils/hammer.cc
+++ b/src/utils/hammer.cc
@@ -69,7 +69,7 @@ namespace OpenAxiom {
BasicText(const Byte* f, const Byte* l) : span(f, l) { }
// Pointer to the start of this basic text element
const Byte* begin() const { return span.first; }
- // Oone-past-the-end of the this basic text element.
+ // One-past-the-end of the this basic text element.
const Byte* end() const { return span.second; }
private:
std::pair<const Byte*, const Byte*> span;