// -*- C++ -*- // Copyright (C) 2010-2017, Gabriel Dos Reis. // All rights reserved. // Written by Gabriel Dos Reis. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // - Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // // - Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in // the documentation and/or other materials provided with the // distribution. // // - Neither the name of The Numerical Algorithms Group Ltd. nor the // names of its contributors may be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS // IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED // TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A // PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER // OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef OPENAXIOM_SEXPR_included #define OPENAXIOM_SEXPR_included // --% Author: Gabriel Dos Reis. // --% Description: // --% A simple support for s-expressions. By design, no ambition // --% for full-fledged Common Lisp reader capability. Rather, // --% the aim is a simple data structure for exchanging data // --% between several components of the OpenAxiom system. // --% Users interested in fullblown Lisp syntax should seek // --% to acquire Lisp systems, many of which are freely available. #include #include #include #include namespace OpenAxiom { namespace Sexpr { // ------------ // -- Lexeme -- // ------------ struct Lexeme { enum Type { unknown, // unidentified token semicolon, // ";" for comment dot, // "." comma, // "," open_paren, // "(" close_paren, // ")" apostrophe, // "'" backquote, // "`" backslash, // "\\" sharp_open_paren , // "#(" sharp_apostrophe, // "#'" sharp_colon, // "#:" sharp_plus, // "#+" sharp_minus, // "#-" sharp_dot, // "#." comma_at, // ",@" integer, // integer literal character, // character literal string, // string literal identifier, // plain identifier sharp_integer_equal, // anchor definition, #n=
sharp_integer_sharp // back reference, #n# }; std::pair boundary; Ordinal line; const Byte* begin() const { return boundary.first; } const Byte* end() const { return boundary.second; } Ordinal size() const { return end() - begin(); } }; // ------------ // -- Syntax -- // ------------ // Base class of syntax object classes. struct Syntax { struct Visitor; // base class of syntax visitors virtual void accept(Visitor&) const = 0; }; // ---------------- // -- AtomSyntax -- // ---------------- // An atom is a syntax object consisting of exatly one token. // This should not be confused with the notion of atom // in Lisp languages. struct AtomSyntax : Syntax { const Lexeme& lexeme() const { return lex; } protected: Lexeme lex; explicit AtomSyntax(const Lexeme&); }; // ------------------- // -- IntegerSyntax -- // ------------------- // Integer literal syntax objects struct IntegerSyntax : AtomSyntax { explicit IntegerSyntax(const Lexeme&); void accept(Visitor&) const; }; // --------------------- // -- CharacterSyntax -- // --------------------- // Character literal syntax objects. struct CharacterSyntax : AtomSyntax { explicit CharacterSyntax(const Lexeme&); void accept(Visitor&) const; }; // ------------------ // -- StringSyntax -- // ------------------ // Striing literal syntax objjects. struct StringSyntax : AtomSyntax { explicit StringSyntax(const Lexeme&); void accept(Visitor&) const; }; // ------------------ // -- SymbolSyntax -- // ------------------ struct SymbolSyntax : AtomSyntax { enum Kind { ordinary = 0x0, // an interned symbol uninterned = 0x1, // uninterned symbol absolute = 0x2, // case-sensitive symbol keyword = 0x4, // a keyword symbol }; SymbolSyntax(const Lexeme&, Kind); Kind kind() const { return sort; } const Byte* begin() const { return lexeme().begin(); } const Byte* end() const { return lexeme().end(); } std::size_t size() const { return lexeme().size(); } Byte operator[](std::size_t i) const { return begin()[i]; } void accept(Visitor&) const; private: const Kind sort; }; // --------------------- // -- ReferenceSyntax -- // --------------------- // Back reference object to a syntax object. struct ReferenceSyntax : AtomSyntax { ReferenceSyntax(const Lexeme&, Ordinal); size_t tag() const { return pos; } void accept(Visitor&) const; private: Ordinal pos; }; // ------------------ // -- AnchorSyntax -- // ------------------ // Base anchor syntax object. struct AnchorSyntax : Syntax { AnchorSyntax(size_t, const Syntax*); size_t ref() const { return tag; } const Syntax* value() const { return val; } void accept(Visitor&) const; private: const size_t tag; const Syntax* const val; }; // -- Abstract over common implementation of unary special operators. template struct unary_form : Syntax { const Syntax* body() const { return form; } void accept(Visitor&) const; protected: unary_form(const Syntax* f) : form(f) { } private: const Syntax* const form; }; template struct binary_form : Syntax { const Syntax* first() const { return rep.first; } const Syntax* second() const { return rep.second; } void accept(Visitor&) const; protected: binary_form(const Syntax* f, const Syntax* s) : rep(f, s) { } private: std::pair rep; }; // ----------------- // -- QuoteSyntax -- // ----------------- // Quotation syntax object. struct QuoteSyntax : unary_form { explicit QuoteSyntax(const Syntax*); }; // --------------------- // -- AntiquoteSyntax -- // --------------------- // Quasi-quotation syntax object. struct AntiquoteSyntax : unary_form { explicit AntiquoteSyntax(const Syntax*); }; // ------------ // -- Expand -- // ------------ // Expansion request inside a quasi-quotation. struct Expand : unary_form { explicit Expand(const Syntax*); }; // ---------- // -- Eval -- // ---------- // Read-time evaluation request syntax object. struct Eval : unary_form { explicit Eval(const Syntax*); }; // ------------ // -- Splice -- // ------------ // Splice request syntax object inside a quasi-quotation. struct Splice : unary_form { explicit Splice(const Syntax*); }; // -------------- // -- Function -- // -------------- // Function literal syntax object. struct Function : unary_form { explicit Function(const Syntax*); }; // ------------- // -- Include -- // ------------- // Conditional inclusion syntax object struct Include : binary_form { Include(const Syntax*, const Syntax*); }; // ------------- // -- Exclude -- // ------------- // Conditional exclusion syntax object struct Exclude : binary_form { Exclude(const Syntax*, const Syntax*); }; // ---------------- // -- ListSyntax -- // ---------------- // List syntax objects. struct ListSyntax : Syntax, private std::vector { typedef std::vector base; using base::const_iterator; using base::begin; using base::end; using base::rbegin; using base::rend; using base::size; using base::empty; using base::front; ListSyntax(); ListSyntax(const base&, bool); ~ListSyntax(); void accept(Visitor&) const; bool dotted() const { return dot; } private: bool dot; }; // ------------------ // -- VectorSyntax -- // ------------------ // VectorSyntax syntax objects. struct VectorSyntax : Syntax, private std::vector { typedef std::vector base; using base::const_iterator; using base::begin; using base::end; using base::size; using base::operator[]; using base::empty; VectorSyntax(); explicit VectorSyntax(const base&); ~VectorSyntax(); void accept(Visitor&) const; }; // --------------------- // -- Syntax::Visitor -- // --------------------- struct Syntax::Visitor { virtual void visit(const IntegerSyntax&) = 0; virtual void visit(const CharacterSyntax&) = 0; virtual void visit(const StringSyntax&) = 0; virtual void visit(const SymbolSyntax&) = 0; virtual void visit(const ReferenceSyntax&) = 0; virtual void visit(const AnchorSyntax&) = 0; virtual void visit(const QuoteSyntax&) = 0; virtual void visit(const AntiquoteSyntax&) = 0; virtual void visit(const Expand&) = 0; virtual void visit(const Eval&) = 0; virtual void visit(const Splice&) = 0; virtual void visit(const Function&) = 0; virtual void visit(const Include&) = 0; virtual void visit(const Exclude&) = 0; virtual void visit(const ListSyntax&) = 0; virtual void visit(const VectorSyntax&) = 0; }; template void unary_form::accept(Visitor& v) const { v.visit(static_cast(*this)); } template void binary_form::accept(Visitor& v) const { v.visit(static_cast(*this)); } // --------------- // -- Allocator -- // --------------- // Allocator of syntax objects. struct Allocator { Allocator(); ~Allocator(); const IntegerSyntax* make_integer(const Lexeme&); const CharacterSyntax* make_character(const Lexeme&); const StringSyntax* make_string(const Lexeme&); const SymbolSyntax* make_symbol(SymbolSyntax::Kind, const Lexeme&); const ReferenceSyntax* make_reference(size_t, const Lexeme&); const AnchorSyntax* make_anchor(size_t, const Syntax*); const QuoteSyntax* make_quote(const Syntax*); const AntiquoteSyntax* make_antiquote(const Syntax*); const Expand* make_expand(const Syntax*); const Eval* make_eval(const Syntax*); const Splice* make_splice(const Syntax*); const Function* make_function(const Syntax*); const Include* make_include(const Syntax*, const Syntax*); const Exclude* make_exclude(const Syntax*, const Syntax*); const ListSyntax* make_list(const std::vector&, bool = false); const VectorSyntax* make_vector(const std::vector&); private: Memory::Factory ints; Memory::Factory chars; Memory::Factory strs; Memory::Factory syms; Memory::Factory ancs; Memory::Factory refs; Memory::Factory quotes; Memory::Factory antis; Memory::Factory exps; Memory::Factory funs; Memory::Factory incs; Memory::Factory excs; Memory::Factory evls; Memory::Factory spls; Memory::Factory lists; Memory::Factory vectors; ListSyntax empty_list; VectorSyntax empty_vector; }; // -- Reader -- struct RawInput { const Byte* start; const Byte* end; Ordinal lineno; }; struct Reader { struct State { RawInput bytes; const Byte* cur; const Byte* line; Allocator alloc; }; explicit Reader(const RawInput&); Reader(const Byte*, const Byte*); const Byte* position(Ordinal); bool at_start() const { return st.cur == st.bytes.start; } const Syntax* read(); private: State st; }; } } #endif // OPENAXIOM_SEXPR_included