aboutsummaryrefslogtreecommitdiff
path: root/src/include
diff options
context:
space:
mode:
authordos-reis <gdr@axiomatics.org>2013-06-26 11:43:56 +0000
committerdos-reis <gdr@axiomatics.org>2013-06-26 11:43:56 +0000
commit7f57a915cee3c91cddd166fe9964655696666c4b (patch)
treed00aa4323f8dc4280e736c5e15ee8479f89eb31b /src/include
parentf9e4a03a220766099d6b5fc683a58185c4805a05 (diff)
downloadopen-axiom-7f57a915cee3c91cddd166fe9964655696666c4b.tar.gz
Rewrite s-expression reader.
Diffstat (limited to 'src/include')
-rw-r--r--src/include/diagnostics.H8
-rw-r--r--src/include/sexpr.H295
-rw-r--r--src/include/structure.H2
3 files changed, 119 insertions, 186 deletions
diff --git a/src/include/diagnostics.H b/src/include/diagnostics.H
index 8f877e2f..9cb0fce1 100644
--- a/src/include/diagnostics.H
+++ b/src/include/diagnostics.H
@@ -33,6 +33,7 @@
#ifndef OPENAXIOM_DIAGNOSTICS_included
#define OPENAXIOM_DIAGNOSTICS_included
+#include <string>
#include <iostream>
namespace OpenAxiom {
@@ -49,6 +50,13 @@ namespace OpenAxiom {
std::ostream* out;
std::ostream* err;
};
+
+ struct BasicError {
+ explicit BasicError(const std::string& s) : msg(s) { }
+ const std::string& message() const { return msg; }
+ protected:
+ std::string msg;
+ };
}
}
diff --git a/src/include/sexpr.H b/src/include/sexpr.H
index a9371139..73e21f31 100644
--- a/src/include/sexpr.H
+++ b/src/include/sexpr.H
@@ -45,22 +45,14 @@
#include <iosfwd>
#include <vector>
#include <open-axiom/storage>
-#include <open-axiom/string-pool>
#include <open-axiom/token>
namespace OpenAxiom {
namespace Sexpr {
- struct BasicError {
- explicit BasicError(const std::string& s) : msg(s) { }
- const std::string& message() const { return msg; }
- protected:
- std::string msg;
- };
-
- // -----------
- // -- Token --
- // -----------
- struct Token {
+ // ------------
+ // -- Lexeme --
+ // ------------
+ struct Lexeme {
enum Type {
unknown, // unidentified token
semicolon = token::value(";"), // comment
@@ -87,35 +79,8 @@ namespace OpenAxiom {
sharp_integer_sharp // back reference, #n#
};
- Type type; // class of this token
- BasicString lexeme; // characters making up this token
- };
-
- // Print a token object on an output stream.
- // Note: this function is for debugging purpose; in particular
- // it does not `prettyprint' tokens.
- std::ostream& operator<<(std::ostream&, const Token&);
-
- // -----------
- // -- Lexer --
- // -----------
- // An object of this type transforms a sequence of characters
- // into a sequence of tokens as defined above.
- // A lexer does not manage memory itself. Rather, it delegates
- // storage allocation for lexemes and tokens to specialized
- // agents used to construct it.
- struct Lexer {
- Lexer(StringPool& pool, std::vector<Token>& toks)
- : strings(pool), tokens(toks) { }
-
- const Byte* tokenize(const Byte*, const Byte*);
- BasicString intern(const Byte* s, size_t n) {
- return strings.intern(s, n);
- }
-
- private:
- StringPool& strings; // where to allocate lexemes from
- std::vector<Token>& tokens; // where to deposite tokens.
+ std::pair<const Byte*, const Byte*> boundary;
+ Ordinal line;
};
// ------------
@@ -127,59 +92,59 @@ namespace OpenAxiom {
virtual void accept(Visitor&) const = 0;
};
- // ----------
- // -- Atom --
- // ----------
+ // ----------------
+ // -- AtomSyntax --
+ // ----------------
// An atom is a syntax object consisting of exatly one token.
// This should not be confused with the notion of atom
// in Lisp languages.
- struct Atom : Syntax {
- const Token& token() const { return tok; }
- BasicString lexeme() const { return tok.lexeme; }
+ struct AtomSyntax : Syntax {
+ const Lexeme& lexeme() const { return lex; }
void accept(Visitor&) const;
protected:
- const Token tok;
- Atom(const Token&);
+ Lexeme lex;
+ explicit AtomSyntax(const Lexeme&);
};
- // -------------
- // -- Integer --
- // -------------
+ // -------------------
+ // -- IntegerSyntax --
+ // -------------------
// Integer literal syntax objects
- struct Integer : Atom {
- explicit Integer(const Token&);
+ struct IntegerSyntax : AtomSyntax {
+ explicit IntegerSyntax(const Lexeme&);
void accept(Visitor&) const;
};
- // ---------------
- // -- Character --
- // ---------------
+ // ---------------------
+ // -- CharacterSyntax --
+ // ---------------------
// Character literal syntax objects.
- struct Character : Atom {
- explicit Character(const Token&);
+ struct CharacterSyntax : AtomSyntax {
+ explicit CharacterSyntax(const Lexeme&);
void accept(Visitor&) const;
};
- // ------------
- // -- String --
- // ------------
+ // ------------------
+ // -- StringSyntax --
+ // ------------------
// Striing literal syntax objjects.
- struct String : Atom {
- explicit String(const Token&);
+ struct StringSyntax : AtomSyntax {
+ explicit StringSyntax(const Lexeme&);
void accept(Visitor&) const;
};
- // ------------
- // -- Symbol --
- // ------------
- struct Symbol : Atom {
+ // ------------------
+ // -- SymbolSyntax --
+ // ------------------
+ struct SymbolSyntax : AtomSyntax {
enum Kind {
uninterned, // uninterned symbol
ordinary, // an interned symbol
+ absolute, // case-sensitive symbol
keyword // a keyword symbol
};
- Symbol(const Token&, Kind);
- Kind kin() const { return sort; }
+ SymbolSyntax(const Lexeme&, Kind);
+ Kind kind() const { return sort; }
void accept(Visitor&) const;
private:
const Kind sort;
@@ -189,20 +154,20 @@ namespace OpenAxiom {
// -- Reference --
// ---------------
// Back reference object to a syntax object.
- struct Reference : Atom {
- Reference(const Token&, size_t);
+ struct Reference : AtomSyntax {
+ Reference(const Lexeme&, Ordinal);
size_t tag() const { return pos; }
void accept(Visitor&) const;
private:
- const size_t pos;
+ Ordinal pos;
};
- // ------------
- // -- Anchor --
- // ------------
+ // ------------------
+ // -- AnchorSyntax --
+ // ------------------
// Base anchor syntax object.
- struct Anchor : Syntax {
- Anchor(size_t, const Syntax*);
+ struct AnchorSyntax : Syntax {
+ AnchorSyntax(size_t, const Syntax*);
size_t ref() const { return tag; }
const Syntax* value() const { return val; }
void accept(Visitor&) const;
@@ -222,20 +187,20 @@ namespace OpenAxiom {
const Syntax* const form;
};
- // -----------
- // -- Quote --
- // -----------
+ // -----------------
+ // -- QuoteSyntax --
+ // -----------------
// Quotation syntax object.
- struct Quote : unary_form<Quote> {
- explicit Quote(const Syntax*);
+ struct QuoteSyntax : unary_form<QuoteSyntax> {
+ explicit QuoteSyntax(const Syntax*);
};
- // ---------------
- // -- Antiquote --
- // ---------------
+ // ---------------------
+ // -- AntiquoteSyntax --
+ // ---------------------
// Quasi-quotation syntax object.
- struct Antiquote : unary_form<Antiquote> {
- explicit Antiquote(const Syntax*);
+ struct AntiquoteSyntax : unary_form<AntiquoteSyntax> {
+ explicit AntiquoteSyntax(const Syntax*);
};
// ------------
@@ -271,15 +236,6 @@ namespace OpenAxiom {
};
// -------------
- // -- DotTail --
- // -------------
- // Objects of this type represents the tail of syntactic
- // objects denoting dotted pair syntax `(a . b)'.
- struct DotTail : unary_form<DotTail> {
- explicit DotTail(const Syntax*);
- };
-
- // -------------
// -- Include --
// -------------
// Conditional inclusion syntax object
@@ -296,10 +252,10 @@ namespace OpenAxiom {
};
// ----------
- // -- List --
+ // -- ListSyntax --
// ----------
// List syntax objects.
- struct List : Syntax, private std::vector<const Syntax*> {
+ struct ListSyntax : Syntax, private std::vector<const Syntax*> {
typedef std::vector<const Syntax*> base;
using base::const_iterator;
using base::begin;
@@ -307,17 +263,20 @@ namespace OpenAxiom {
using base::size;
using base::empty;
- List();
- explicit List(const base&);
- ~List();
+ ListSyntax();
+ ListSyntax(const base&, bool);
+ ~ListSyntax();
void accept(Visitor&) const;
+ bool dotted() const { return dot; }
+ private:
+ bool dot;
};
// ------------
- // -- Vector --
+ // -- VectorSyntax --
// ------------
- // Vector syntax objects.
- struct Vector : Syntax, private std::vector<const Syntax*> {
+ // VectorSyntax syntax objects.
+ struct VectorSyntax : Syntax, private std::vector<const Syntax*> {
typedef std::vector<const Syntax*> base;
using base::const_iterator;
using base::begin;
@@ -326,9 +285,9 @@ namespace OpenAxiom {
using base::operator[];
using base::empty;
- Vector();
- explicit Vector(const base&);
- ~Vector();
+ VectorSyntax();
+ explicit VectorSyntax(const base&);
+ ~VectorSyntax();
void accept(Visitor&) const;
};
@@ -336,24 +295,23 @@ namespace OpenAxiom {
// -- Syntax::Visitor --
// ---------------------
struct Syntax::Visitor {
- virtual void visit(const Atom&) = 0;
- virtual void visit(const Integer&);
- virtual void visit(const Character&);
- virtual void visit(const String&);
- virtual void visit(const Symbol&);
+ virtual void visit(const AtomSyntax&) = 0;
+ virtual void visit(const IntegerSyntax&);
+ virtual void visit(const CharacterSyntax&);
+ virtual void visit(const StringSyntax&);
+ virtual void visit(const SymbolSyntax&);
virtual void visit(const Reference&);
- virtual void visit(const Anchor&) = 0;
- virtual void visit(const Quote&) = 0;
- virtual void visit(const Antiquote&) = 0;
+ virtual void visit(const AnchorSyntax&) = 0;
+ virtual void visit(const QuoteSyntax&) = 0;
+ virtual void visit(const AntiquoteSyntax&) = 0;
virtual void visit(const Expand&) = 0;
virtual void visit(const Eval&) = 0;
virtual void visit(const Splice&) = 0;
virtual void visit(const Function&) = 0;
virtual void visit(const Include&) = 0;
virtual void visit(const Exclude&) = 0;
- virtual void visit(const DotTail&) = 0;
- virtual void visit(const List&) = 0;
- virtual void visit(const Vector&) = 0;
+ virtual void visit(const ListSyntax&) = 0;
+ virtual void visit(const VectorSyntax&) = 0;
};
template<typename T>
@@ -370,90 +328,59 @@ namespace OpenAxiom {
Allocator();
~Allocator();
- const Integer* make_integer(const Token&);
- const Character* make_character(const Token&);
- const String* make_string(const Token&);
- const Symbol* make_symbol(const Token&, Symbol::Kind);
- const Reference* make_reference(const Token&, size_t);
- const Anchor* make_anchor(size_t, const Syntax*);
- const Quote* make_quote(const Syntax*);
- const Antiquote* make_antiquote(const Syntax*);
+ const IntegerSyntax* make_integer(const Lexeme&);
+ const CharacterSyntax* make_character(const Lexeme&);
+ const StringSyntax* make_string(const Lexeme&);
+ const SymbolSyntax* make_symbol(SymbolSyntax::Kind, const Lexeme&);
+ const Reference* make_reference(size_t, const Lexeme&);
+ const AnchorSyntax* make_anchor(size_t, const Syntax*);
+ const QuoteSyntax* make_quote(const Syntax*);
+ const AntiquoteSyntax* make_antiquote(const Syntax*);
const Expand* make_expand(const Syntax*);
const Eval* make_eval(const Syntax*);
const Splice* make_splice(const Syntax*);
const Function* make_function(const Syntax*);
const Include* make_include(const Syntax*);
const Exclude* make_exclude(const Syntax*);
- const DotTail* make_dot_tail(const Syntax*);
- const List* make_list(const std::vector<const Syntax*>&);
- const Vector* make_vector(const std::vector<const Syntax*>&);
+ const ListSyntax* make_list(const std::vector<const Syntax*>&, bool = false);
+ const VectorSyntax* make_vector(const std::vector<const Syntax*>&);
private:
- Memory::Factory<Integer> ints;
- Memory::Factory<Character> chars;
- Memory::Factory<String> strs;
- Memory::Factory<Symbol> syms;
- Memory::Factory<Anchor> ancs;
+ Memory::Factory<IntegerSyntax> ints;
+ Memory::Factory<CharacterSyntax> chars;
+ Memory::Factory<StringSyntax> strs;
+ Memory::Factory<SymbolSyntax> syms;
+ Memory::Factory<AnchorSyntax> ancs;
Memory::Factory<Reference> refs;
- Memory::Factory<Quote> quotes;
- Memory::Factory<Antiquote> antis;
+ Memory::Factory<QuoteSyntax> quotes;
+ Memory::Factory<AntiquoteSyntax> antis;
Memory::Factory<Expand> exps;
Memory::Factory<Function> funs;
Memory::Factory<Include> incs;
Memory::Factory<Exclude> excs;
Memory::Factory<Eval> evls;
Memory::Factory<Splice> spls;
- Memory::Factory<DotTail> tails;
- Memory::Factory<List> lists;
- Memory::Factory<Vector> vectors;
- List empty_list;
- Vector empty_vector;
+ Memory::Factory<ListSyntax> lists;
+ Memory::Factory<VectorSyntax> vectors;
+ ListSyntax empty_list;
+ VectorSyntax empty_vector;
};
- // ------------
- // -- Parser --
- // ------------
- // An object of this type transforms a sequence of tokens
- // into a sequence of syntax objects.
- // A parser object does not manage memory itself. Rather, it delegates
- // storage allocation for syntax objects to specialized
- // agents used to construct it.
- struct Parser {
- Parser(Allocator&, std::vector<const Syntax*>&);
- const Token* parse(const Token*, const Token*);
- private:
- Allocator& alloc;
- std::vector<const Syntax*>& syns;
-
- const Symbol* parse_symbol(const Token*&, const Token*);
- const Character* parse_character(const Token*&, const Token*);
- const Anchor* parse_anchor(const Token*&, const Token*);
- const Reference* parse_reference(const Token*&, const Token*);
- const Symbol* parse_uninterned(const Token*&, const Token*);
- const Function* parse_function(const Token*&, const Token*);
- const Quote* parse_quote(const Token*&, const Token*);
- const Antiquote* parse_antiquote(const Token*&, const Token*);
- const Include* parse_include(const Token*&, const Token*);
- const Exclude* parse_exclude(const Token*&, const Token*);
- const Expand* parse_expand(const Token*&, const Token*);
- const Eval* parse_eval(const Token*&, const Token*);
- const Splice* parse_splice(const Token*&, const Token*);
- const Vector* parse_vector(const Token*&, const Token*);
- const List* parse_list(const Token*&, const Token*);
- const Syntax* parse_syntax(const Token*&, const Token*);
- };
+ // -- Reader --
+ struct Reader {
+ struct State {
+ const Byte* start;
+ const Byte* end;
+ const Byte* cur;
+ const Byte* line;
+ Ordinal lineno;
+ Allocator alloc;
+ };
- // ------------
- // -- Module --
- // ------------
- // Entire s-expression input file.
- struct Module : std::vector<const Syntax*> {
- explicit Module(const std::string&);
- const std::string& name() const { return nm; }
+ Reader(const Byte*, const Byte*);
+ const Syntax* read();
private:
- const std::string nm;
- StringPool raw_strs;
- Allocator allocator;
+ State st;
};
}
}
diff --git a/src/include/structure.H b/src/include/structure.H
index d9434423..33c084f2 100644
--- a/src/include/structure.H
+++ b/src/include/structure.H
@@ -33,8 +33,6 @@
#ifndef OPENAXIOM_STRUCTURE_included
#define OPENAXIOM_STRUCTURE_included
-#include <iterator>
-
namespace OpenAxiom {
// -- helper classes for structural abstractions --
namespace structure {