diff options
Diffstat (limited to 'src/include')
-rw-r--r-- | src/include/open-axiom/InputFragment | 51 | ||||
-rw-r--r-- | src/include/open-axiom/sexpr | 4 | ||||
-rw-r--r-- | src/include/open-axiom/token | 188 |
3 files changed, 117 insertions, 126 deletions
diff --git a/src/include/open-axiom/InputFragment b/src/include/open-axiom/InputFragment index e55cd0a6..63139db4 100644 --- a/src/include/open-axiom/InputFragment +++ b/src/include/open-axiom/InputFragment @@ -36,9 +36,22 @@ #ifndef OPENAXIOM_INPUTFRAGMENT_included #define OPENAXIOM_INPUTFRAGMENT_included -#include <open-axiom/token> +#include <vector> +#include <string> +#include <stack> namespace OpenAxiom { + // Datatypes for locating lines and columns. + using LineNumber = std::size_t; + using ColumnIndex = std::size_t; + + enum class LineKind : uint8_t { + Ordinary, // Ordinary input line + Description, // Documentation commentary lines. + Meta, // Input to the attention of the reader + Ignorable, // Ignorable commentary line + }; + // A physical line is just raw text, coupled with location // information such as line and indentation column. struct Line : std::string { @@ -51,6 +64,34 @@ namespace OpenAxiom { } }; + // Cursor into a fragment. + struct FragmentCursor { + std::size_t line; // index of a line in a fragment + std::size_t column; // column number at line. + + inline FragmentCursor& operator++() { + ++column; + return *this; + } + + inline FragmentCursor operator++(int) { + auto tmp = *this; + ++*this; + return tmp; + } + + inline FragmentCursor& operator--() { + --column; + return *this; + } + + inline FragmentCursor operator--(int) { + auto tmp = *this; + --*this; + return tmp; + } + }; + // A program fragment is a logical line, composed of possibly // several physical lines subject to the off-side rule. As a // special case, a line ending with the underbar character @@ -63,19 +104,19 @@ namespace OpenAxiom { } using std::vector<Line>::operator[]; // Reference a line given by a position into this fragment. - const Line& operator()(const OpenAxiom::FragmentCursor& pos) const { + const Line& operator()(const FragmentCursor& pos) const { return (*this)[pos.line]; } // Reference a character code unit at the position into this fragment. - uint8_t operator[](const OpenAxiom::FragmentCursor& pos) const { + uint8_t operator[](const FragmentCursor& pos) const { return (*this)[pos.line][pos.column]; } // Advance the cursor position to the next character code unit. - uint8_t advance(OpenAxiom::FragmentCursor& pos) const { + uint8_t advance(FragmentCursor& pos) const { return (*this)[pos.line][pos.column++]; } // This predicate holds if this fragment covers the cursor position. - bool covering(const OpenAxiom::FragmentCursor& pos) const { + bool covering(const FragmentCursor& pos) const { return pos.column < (*this)[pos.line].size(); } }; diff --git a/src/include/open-axiom/sexpr b/src/include/open-axiom/sexpr index 8d8eea0b..c7dff7c5 100644 --- a/src/include/open-axiom/sexpr +++ b/src/include/open-axiom/sexpr @@ -1,5 +1,5 @@ // -*- C++ -*- -// Copyright (C) 2010-2014, Gabriel Dos Reis. +// Copyright (C) 2010-2017, Gabriel Dos Reis. // All rights reserved. // Written by Gabriel Dos Reis. // @@ -46,7 +46,7 @@ #include <iosfwd> #include <vector> #include <open-axiom/storage> -#include <open-axiom/token> +#include <open-axiom/Input> namespace OpenAxiom { namespace Sexpr { diff --git a/src/include/open-axiom/token b/src/include/open-axiom/token index 56cea373..f487cb3b 100644 --- a/src/include/open-axiom/token +++ b/src/include/open-axiom/token @@ -37,17 +37,10 @@ #include <stdint.h> #include <stack> #include <iosfwd> -#include <open-axiom/Input> #include <open-axiom/dialect> +#include <open-axiom/InputFragment> namespace OpenAxiom { - enum class LineKind : uint8_t { - Ordinary, // Ordinary input line - Description, // Documentation commentary lines. - Meta, // Input to the attention of the reader - Ignorable, // Ignorable commentary line - }; - // Categorization of Boot and Spad tokens. enum class TokenCategory : uint8_t { Unclassified, // token of unknown class @@ -79,10 +72,6 @@ namespace OpenAxiom { std::ostream& operator<<(std::ostream&, TokenValue); - // Datatypes for locating lines and columns. - using LineNumber = std::size_t; - using ColumnIndex = std::size_t; - struct Locus { LineNumber line; ColumnIndex column; @@ -114,34 +103,6 @@ namespace OpenAxiom { using Location = Locus; }; - // Cursor into a fragment. - struct FragmentCursor { - std::size_t line; // index of a line in a fragment - std::size_t column; // column number at line. - - inline FragmentCursor& operator++() { - ++column; - return *this; - } - - inline FragmentCursor operator++(int) { - auto tmp = *this; - ++*this; - return tmp; - } - - inline FragmentCursor& operator--() { - --column; - return *this; - } - - inline FragmentCursor operator--(int) { - auto tmp = *this; - --*this; - return tmp; - } - }; - // -- Exception types struct EndOfStringUnseen { LineNumber line; @@ -155,9 +116,9 @@ namespace OpenAxiom { // Object of this datatype decompose a program fragment into a // token stream. The tokens are of type indicated by Tok. - template<typename Frag, typename Tok> + template<typename Tok> struct Tokenizer { - Tokenizer(Frag& f) + Tokenizer(Fragment& f) : frag(f), pos{ 0, frag.front().indent } { @@ -170,9 +131,9 @@ namespace OpenAxiom { Tok get(Language = Language::Spad); private: - Frag& frag; + Fragment& frag; FragmentCursor pos; - std::stack<Locus> indents; + std::stack<FragmentCursor> indents; std::size_t line_length() const { return frag(pos).size(); } @@ -211,22 +172,22 @@ namespace OpenAxiom { } template<typename T> - inline T& eos_token(T& t, const FragmentCursor& pos) { + inline T& eos_token(T& t) { t.category = TokenCategory::EOS; t.value = TokenValue::EndOfStream; - t.end = pos; + t.end = t.start; return t; } template<typename T> - inline T& ws_token(T& t, const FragmentCursor& pos) { + inline T& ws_token(T& t, const Locus& loc) { t.category = TokenCategory::Whitespace; - t.end = pos; + t.end = loc; return t; } template<typename L, typename T> - static void junk(L& line, ColumnIndex& idx, T& t) { + void junk(L& line, ColumnIndex& idx, T& t) { while (idx < line.size() and not separator_or_punctuator(line[idx])) ++idx; t.category = TokenCategory::Junk; @@ -239,8 +200,8 @@ namespace OpenAxiom { ++idx; } - template<typename Frag, typename Tok> - void string_literal(Frag& frag, FragmentCursor& pos, Tok& t) { + template<typename Tok> + void string_literal(Fragment& frag, FragmentCursor& pos, Tok& t) { bool done = false; bool escape = false; while (frag.covering(pos) && not done) { @@ -270,8 +231,7 @@ namespace OpenAxiom { ++idx; } - template<typename Frag> - bool next_line(Frag& frag, FragmentCursor& pos) { + static bool next_line(Fragment& frag, FragmentCursor& pos) { if (++pos.line < frag.size()) { pos.column = frag(pos).indent; return true; @@ -362,9 +322,8 @@ namespace OpenAxiom { return t; } - template<typename Frag, typename Tok> - static void - left_paren_et_al(Frag& frag, FragmentCursor& pos, Tok& t) { + template<typename Tok> + void left_paren_et_al(Fragment& frag, FragmentCursor& pos, Tok& t) { punctuator_token(t, TokenValue::OpenParen); if (frag.covering(pos) and frag[pos] == '|') { ++pos; @@ -372,9 +331,8 @@ namespace OpenAxiom { } } - template<typename Frag, typename Tok> - static void - left_brace_et_al(Frag& frag, FragmentCursor& pos, Tok& t) { + template<typename Tok> + void left_brace_et_al(Fragment& frag, FragmentCursor& pos, Tok& t) { punctuator_token(t, TokenValue::OpenBrace); if (frag.covering(pos) and frag[pos] == '|') { ++pos; @@ -382,9 +340,8 @@ namespace OpenAxiom { } } - template<typename Frag, typename Tok> - static void - left_bracket_et_al(Frag& frag, FragmentCursor& pos, Tok& t) { + template<typename Tok> + void left_bracket_et_al(Fragment& frag, FragmentCursor& pos, Tok& t) { punctuator_token(t, TokenValue::OpenBracket); if (frag.covering(pos) and frag[pos] == '|') { ++pos; @@ -392,9 +349,8 @@ namespace OpenAxiom { } } - template<typename Frag, typename Tok> - static void - colon_et_al(Frag& frag, FragmentCursor& pos, Tok& t) { + template<typename Tok> + void colon_et_al(Fragment& frag, FragmentCursor& pos, Tok& t) { operator_token(t, TokenValue::Colon); if (frag.covering(pos)) switch (frag[pos]) { @@ -405,9 +361,8 @@ namespace OpenAxiom { } } - template<typename Frag, typename Tok> - static void - star_et_al(Frag& frag, FragmentCursor& pos, Tok& t) { + template<typename Tok> + void star_et_al(Fragment& frag, FragmentCursor& pos, Tok& t) { operator_token(t, TokenValue::Star); if (frag.covering(pos) and frag[pos] == '*') { t.value = TokenValue::StarStar; @@ -415,9 +370,8 @@ namespace OpenAxiom { } } - template<typename Frag, typename Tok> - static void - slash_et_al(Frag& frag, FragmentCursor& pos, Tok& t) { + template<typename Tok> + void slash_et_al(Fragment& frag, FragmentCursor& pos, Tok& t) { operator_token(t, TokenValue::Slash); if (frag.covering(pos)) switch (frag[pos]) { @@ -427,9 +381,8 @@ namespace OpenAxiom { } } - template<typename Frag, typename Tok> - static void - backslash_et_al(Frag& frag, FragmentCursor& pos, Tok& t) { + template<typename Tok> + void backslash_et_al(Fragment& frag, FragmentCursor& pos, Tok& t) { operator_token(t, TokenValue::Backslash); if (frag.covering(pos)) switch (frag[pos]) { @@ -439,9 +392,8 @@ namespace OpenAxiom { } } - template<typename Frag, typename Tok> - static void - less_et_al(Frag& frag, FragmentCursor& pos, Tok& t) { + template<typename Tok> + void less_et_al(Fragment& frag, FragmentCursor& pos, Tok& t) { operator_token(t, TokenValue::Less); if (frag.covering(pos)) switch (frag[pos]) { @@ -458,9 +410,8 @@ namespace OpenAxiom { } } - template<typename Frag, typename Tok> - static void - equal_et_al(Frag& frag, FragmentCursor& pos, Tok& t) { + template<typename Tok> + void equal_et_al(Fragment& frag, FragmentCursor& pos, Tok& t) { operator_token(t, TokenValue::Eq); if (frag.covering(pos)) switch (frag[pos]) { @@ -476,9 +427,8 @@ namespace OpenAxiom { } } - template<typename Frag, typename Tok> - static void - tilde_et_al(Frag& frag, FragmentCursor& pos, Tok& t) { + template<typename Tok> + void tilde_et_al(Fragment& frag, FragmentCursor& pos, Tok& t) { operator_token(t, TokenValue::Tilde); if (frag.covering(pos) and frag[pos] == '=') { t.value = TokenValue::TildeEq; @@ -486,9 +436,8 @@ namespace OpenAxiom { } } - template<typename Frag, typename Tok> - static void - greater_et_al(Frag& frag, FragmentCursor& pos, Tok& t) { + template<typename Tok> + void greater_et_al(Fragment& frag, FragmentCursor& pos, Tok& t) { operator_token(t, TokenValue::Greater); if (frag.covering(pos)) switch (frag[pos]) { @@ -497,9 +446,8 @@ namespace OpenAxiom { } } - template<typename Frag, typename Tok> - static void - bar_et_al(Frag& frag, FragmentCursor& pos, Tok& t) { + template<typename Tok> + void bar_et_al(Fragment& frag, FragmentCursor& pos, Tok& t) { punctuator_token(t, TokenValue::Bar); if (frag.covering(pos)) switch (frag[pos]) { @@ -510,30 +458,28 @@ namespace OpenAxiom { } } - template<typename Frag, typename Tok> - static void - minus_et_al(Frag& frag, FragmentCursor& pos, Tok& t) { + template<typename Tok> + void minus_et_al(Fragment& frag, FragmentCursor& pos, Tok& t) { operator_token(t, TokenValue::Minus); if (frag.covering(pos)) switch (frag[pos]) { case '>': t.value = TokenValue::RightArrow; ++pos; break; case '-': comment_token(t, TokenValue::Wisecrack); - next_line(frag, pos); + pos.column = frag(pos).length(); break; } } - template<typename Frag, typename Tok> - static void - plus_et_al(Frag& frag, FragmentCursor& pos, Tok& t) { + template<typename Tok> + void plus_et_al(Fragment& frag, FragmentCursor& pos, Tok& t) { operator_token(t, TokenValue::Plus); if (frag.covering(pos)) switch (frag[pos]) { case '+': comment_token(t, TokenValue::Commentary); - next_line(frag, pos); + pos.column = frag(pos).length(); break; case '-': if (pos.column + 1 < frag(pos).size() @@ -546,9 +492,8 @@ namespace OpenAxiom { } } - template<typename Frag, typename Tok> - static void - dot_et_al(Frag& frag, FragmentCursor& pos, Tok& t) { + template<typename Tok> + void dot_et_al(Fragment& frag, FragmentCursor& pos, Tok& t) { operator_token(t, TokenValue::Dot); if (frag.covering(pos) and frag[pos] == '.') { t.value = TokenValue::DotDot; @@ -556,9 +501,9 @@ namespace OpenAxiom { } } - template<typename Frag, typename Tok> - static void - dollar_et_al(Frag& frag, FragmentCursor& pos, Tok& t, Language dialect) { + template<typename Tok> + void + dollar_et_al(Fragment& frag, FragmentCursor& pos, Tok& t, Language dialect) { if (dialect != Language::Boot or not frag.covering(pos) or separator_or_punctuator(frag[pos])) operator_token(t, TokenValue::Dollar); @@ -566,9 +511,9 @@ namespace OpenAxiom { identifier(frag(pos), pos.column, t, dialect); } - template<typename Frag, typename Tok> - static void - sharp_et_al(Frag& frag, FragmentCursor& pos, Tok& t, Language dialect) { + template<typename Tok> + void + sharp_et_al(Fragment& frag, FragmentCursor& pos, Tok& t, Language dialect) { if (dialect != Language::Lisp) operator_token(t, TokenValue::Sharp); else if (frag.covering(pos)) @@ -583,8 +528,8 @@ namespace OpenAxiom { } } - template<typename Frag, typename Tok> - Tok Tokenizer<Frag, Tok>::finish(Tok& t, Language dialect) { + template<typename Tok> + Tok Tokenizer<Tok>::finish(Tok& t, Language dialect) { switch (auto c = frag.advance(pos)) { case '#': sharp_et_al(frag, pos, t, dialect); break; case '@': operator_token(t, TokenValue::At); break; @@ -630,26 +575,32 @@ namespace OpenAxiom { return t; } - template<typename Frag, typename Tok> - Tok Tokenizer<Frag, Tok>::get(Language dialect) { + inline Locus location(const Fragment& frag, const FragmentCursor& pos) { + if (pos.line < frag.size()) + return { frag[pos.line].number, pos.column }; + return { frag.back().number + 1, { } }; + } + + template<typename Tok> + Tok Tokenizer<Tok>::get(Language dialect) { Tok t { }; - t.start = pos; + t.start = location(frag, pos); if (eos()) - return eos_token(t, pos); + return eos_token(t); else if (isblank(frag[pos])) { skip_whitespace(frag(pos), pos.column); - return ws_token(t, pos); + return ws_token(t, location(frag, pos)); } else if (line_continuation()) { if (next_line(frag, pos)) return finish(t, dialect); - return eos_token(t, pos); + return eos_token(t); } else if (pos.column >= line_length()) { if (not next_line(frag, pos)) - return eos_token(t, pos); - t.start = t.end = pos; + return eos_token(t); + t.start = t.end = location(frag, pos); auto indent = indents.top(); if (indent.column < pos.column) { indents.push(pos); @@ -669,9 +620,8 @@ namespace OpenAxiom { // -- Token streams. template<typename T> struct TokenStream : std::vector<T> { - template<typename Frag> - explicit TokenStream(Frag& f, Language dialect = Language::Spad) { - Tokenizer<Frag, T> lex { f }; + explicit TokenStream(Fragment& f, Language dialect = Language::Spad) { + Tokenizer<T> lex { f }; while (auto t = lex.get(dialect)) this->push_back(t); } |