aboutsummaryrefslogtreecommitdiff
path: root/src/include/open-axiom/token
diff options
context:
space:
mode:
Diffstat (limited to 'src/include/open-axiom/token')
-rw-r--r--src/include/open-axiom/token188
1 files changed, 69 insertions, 119 deletions
diff --git a/src/include/open-axiom/token b/src/include/open-axiom/token
index 56cea373..f487cb3b 100644
--- a/src/include/open-axiom/token
+++ b/src/include/open-axiom/token
@@ -37,17 +37,10 @@
#include <stdint.h>
#include <stack>
#include <iosfwd>
-#include <open-axiom/Input>
#include <open-axiom/dialect>
+#include <open-axiom/InputFragment>
namespace OpenAxiom {
- enum class LineKind : uint8_t {
- Ordinary, // Ordinary input line
- Description, // Documentation commentary lines.
- Meta, // Input to the attention of the reader
- Ignorable, // Ignorable commentary line
- };
-
// Categorization of Boot and Spad tokens.
enum class TokenCategory : uint8_t {
Unclassified, // token of unknown class
@@ -79,10 +72,6 @@ namespace OpenAxiom {
std::ostream& operator<<(std::ostream&, TokenValue);
- // Datatypes for locating lines and columns.
- using LineNumber = std::size_t;
- using ColumnIndex = std::size_t;
-
struct Locus {
LineNumber line;
ColumnIndex column;
@@ -114,34 +103,6 @@ namespace OpenAxiom {
using Location = Locus;
};
- // Cursor into a fragment.
- struct FragmentCursor {
- std::size_t line; // index of a line in a fragment
- std::size_t column; // column number at line.
-
- inline FragmentCursor& operator++() {
- ++column;
- return *this;
- }
-
- inline FragmentCursor operator++(int) {
- auto tmp = *this;
- ++*this;
- return tmp;
- }
-
- inline FragmentCursor& operator--() {
- --column;
- return *this;
- }
-
- inline FragmentCursor operator--(int) {
- auto tmp = *this;
- --*this;
- return tmp;
- }
- };
-
// -- Exception types
struct EndOfStringUnseen {
LineNumber line;
@@ -155,9 +116,9 @@ namespace OpenAxiom {
// Object of this datatype decompose a program fragment into a
// token stream. The tokens are of type indicated by Tok.
- template<typename Frag, typename Tok>
+ template<typename Tok>
struct Tokenizer {
- Tokenizer(Frag& f)
+ Tokenizer(Fragment& f)
: frag(f),
pos{ 0, frag.front().indent }
{
@@ -170,9 +131,9 @@ namespace OpenAxiom {
Tok get(Language = Language::Spad);
private:
- Frag& frag;
+ Fragment& frag;
FragmentCursor pos;
- std::stack<Locus> indents;
+ std::stack<FragmentCursor> indents;
std::size_t line_length() const { return frag(pos).size(); }
@@ -211,22 +172,22 @@ namespace OpenAxiom {
}
template<typename T>
- inline T& eos_token(T& t, const FragmentCursor& pos) {
+ inline T& eos_token(T& t) {
t.category = TokenCategory::EOS;
t.value = TokenValue::EndOfStream;
- t.end = pos;
+ t.end = t.start;
return t;
}
template<typename T>
- inline T& ws_token(T& t, const FragmentCursor& pos) {
+ inline T& ws_token(T& t, const Locus& loc) {
t.category = TokenCategory::Whitespace;
- t.end = pos;
+ t.end = loc;
return t;
}
template<typename L, typename T>
- static void junk(L& line, ColumnIndex& idx, T& t) {
+ void junk(L& line, ColumnIndex& idx, T& t) {
while (idx < line.size() and not separator_or_punctuator(line[idx]))
++idx;
t.category = TokenCategory::Junk;
@@ -239,8 +200,8 @@ namespace OpenAxiom {
++idx;
}
- template<typename Frag, typename Tok>
- void string_literal(Frag& frag, FragmentCursor& pos, Tok& t) {
+ template<typename Tok>
+ void string_literal(Fragment& frag, FragmentCursor& pos, Tok& t) {
bool done = false;
bool escape = false;
while (frag.covering(pos) && not done) {
@@ -270,8 +231,7 @@ namespace OpenAxiom {
++idx;
}
- template<typename Frag>
- bool next_line(Frag& frag, FragmentCursor& pos) {
+ static bool next_line(Fragment& frag, FragmentCursor& pos) {
if (++pos.line < frag.size()) {
pos.column = frag(pos).indent;
return true;
@@ -362,9 +322,8 @@ namespace OpenAxiom {
return t;
}
- template<typename Frag, typename Tok>
- static void
- left_paren_et_al(Frag& frag, FragmentCursor& pos, Tok& t) {
+ template<typename Tok>
+ void left_paren_et_al(Fragment& frag, FragmentCursor& pos, Tok& t) {
punctuator_token(t, TokenValue::OpenParen);
if (frag.covering(pos) and frag[pos] == '|') {
++pos;
@@ -372,9 +331,8 @@ namespace OpenAxiom {
}
}
- template<typename Frag, typename Tok>
- static void
- left_brace_et_al(Frag& frag, FragmentCursor& pos, Tok& t) {
+ template<typename Tok>
+ void left_brace_et_al(Fragment& frag, FragmentCursor& pos, Tok& t) {
punctuator_token(t, TokenValue::OpenBrace);
if (frag.covering(pos) and frag[pos] == '|') {
++pos;
@@ -382,9 +340,8 @@ namespace OpenAxiom {
}
}
- template<typename Frag, typename Tok>
- static void
- left_bracket_et_al(Frag& frag, FragmentCursor& pos, Tok& t) {
+ template<typename Tok>
+ void left_bracket_et_al(Fragment& frag, FragmentCursor& pos, Tok& t) {
punctuator_token(t, TokenValue::OpenBracket);
if (frag.covering(pos) and frag[pos] == '|') {
++pos;
@@ -392,9 +349,8 @@ namespace OpenAxiom {
}
}
- template<typename Frag, typename Tok>
- static void
- colon_et_al(Frag& frag, FragmentCursor& pos, Tok& t) {
+ template<typename Tok>
+ void colon_et_al(Fragment& frag, FragmentCursor& pos, Tok& t) {
operator_token(t, TokenValue::Colon);
if (frag.covering(pos))
switch (frag[pos]) {
@@ -405,9 +361,8 @@ namespace OpenAxiom {
}
}
- template<typename Frag, typename Tok>
- static void
- star_et_al(Frag& frag, FragmentCursor& pos, Tok& t) {
+ template<typename Tok>
+ void star_et_al(Fragment& frag, FragmentCursor& pos, Tok& t) {
operator_token(t, TokenValue::Star);
if (frag.covering(pos) and frag[pos] == '*') {
t.value = TokenValue::StarStar;
@@ -415,9 +370,8 @@ namespace OpenAxiom {
}
}
- template<typename Frag, typename Tok>
- static void
- slash_et_al(Frag& frag, FragmentCursor& pos, Tok& t) {
+ template<typename Tok>
+ void slash_et_al(Fragment& frag, FragmentCursor& pos, Tok& t) {
operator_token(t, TokenValue::Slash);
if (frag.covering(pos))
switch (frag[pos]) {
@@ -427,9 +381,8 @@ namespace OpenAxiom {
}
}
- template<typename Frag, typename Tok>
- static void
- backslash_et_al(Frag& frag, FragmentCursor& pos, Tok& t) {
+ template<typename Tok>
+ void backslash_et_al(Fragment& frag, FragmentCursor& pos, Tok& t) {
operator_token(t, TokenValue::Backslash);
if (frag.covering(pos))
switch (frag[pos]) {
@@ -439,9 +392,8 @@ namespace OpenAxiom {
}
}
- template<typename Frag, typename Tok>
- static void
- less_et_al(Frag& frag, FragmentCursor& pos, Tok& t) {
+ template<typename Tok>
+ void less_et_al(Fragment& frag, FragmentCursor& pos, Tok& t) {
operator_token(t, TokenValue::Less);
if (frag.covering(pos))
switch (frag[pos]) {
@@ -458,9 +410,8 @@ namespace OpenAxiom {
}
}
- template<typename Frag, typename Tok>
- static void
- equal_et_al(Frag& frag, FragmentCursor& pos, Tok& t) {
+ template<typename Tok>
+ void equal_et_al(Fragment& frag, FragmentCursor& pos, Tok& t) {
operator_token(t, TokenValue::Eq);
if (frag.covering(pos))
switch (frag[pos]) {
@@ -476,9 +427,8 @@ namespace OpenAxiom {
}
}
- template<typename Frag, typename Tok>
- static void
- tilde_et_al(Frag& frag, FragmentCursor& pos, Tok& t) {
+ template<typename Tok>
+ void tilde_et_al(Fragment& frag, FragmentCursor& pos, Tok& t) {
operator_token(t, TokenValue::Tilde);
if (frag.covering(pos) and frag[pos] == '=') {
t.value = TokenValue::TildeEq;
@@ -486,9 +436,8 @@ namespace OpenAxiom {
}
}
- template<typename Frag, typename Tok>
- static void
- greater_et_al(Frag& frag, FragmentCursor& pos, Tok& t) {
+ template<typename Tok>
+ void greater_et_al(Fragment& frag, FragmentCursor& pos, Tok& t) {
operator_token(t, TokenValue::Greater);
if (frag.covering(pos))
switch (frag[pos]) {
@@ -497,9 +446,8 @@ namespace OpenAxiom {
}
}
- template<typename Frag, typename Tok>
- static void
- bar_et_al(Frag& frag, FragmentCursor& pos, Tok& t) {
+ template<typename Tok>
+ void bar_et_al(Fragment& frag, FragmentCursor& pos, Tok& t) {
punctuator_token(t, TokenValue::Bar);
if (frag.covering(pos))
switch (frag[pos]) {
@@ -510,30 +458,28 @@ namespace OpenAxiom {
}
}
- template<typename Frag, typename Tok>
- static void
- minus_et_al(Frag& frag, FragmentCursor& pos, Tok& t) {
+ template<typename Tok>
+ void minus_et_al(Fragment& frag, FragmentCursor& pos, Tok& t) {
operator_token(t, TokenValue::Minus);
if (frag.covering(pos))
switch (frag[pos]) {
case '>': t.value = TokenValue::RightArrow; ++pos; break;
case '-':
comment_token(t, TokenValue::Wisecrack);
- next_line(frag, pos);
+ pos.column = frag(pos).length();
break;
}
}
- template<typename Frag, typename Tok>
- static void
- plus_et_al(Frag& frag, FragmentCursor& pos, Tok& t) {
+ template<typename Tok>
+ void plus_et_al(Fragment& frag, FragmentCursor& pos, Tok& t) {
operator_token(t, TokenValue::Plus);
if (frag.covering(pos))
switch (frag[pos]) {
case '+':
comment_token(t, TokenValue::Commentary);
- next_line(frag, pos);
+ pos.column = frag(pos).length();
break;
case '-':
if (pos.column + 1 < frag(pos).size()
@@ -546,9 +492,8 @@ namespace OpenAxiom {
}
}
- template<typename Frag, typename Tok>
- static void
- dot_et_al(Frag& frag, FragmentCursor& pos, Tok& t) {
+ template<typename Tok>
+ void dot_et_al(Fragment& frag, FragmentCursor& pos, Tok& t) {
operator_token(t, TokenValue::Dot);
if (frag.covering(pos) and frag[pos] == '.') {
t.value = TokenValue::DotDot;
@@ -556,9 +501,9 @@ namespace OpenAxiom {
}
}
- template<typename Frag, typename Tok>
- static void
- dollar_et_al(Frag& frag, FragmentCursor& pos, Tok& t, Language dialect) {
+ template<typename Tok>
+ void
+ dollar_et_al(Fragment& frag, FragmentCursor& pos, Tok& t, Language dialect) {
if (dialect != Language::Boot or not frag.covering(pos)
or separator_or_punctuator(frag[pos]))
operator_token(t, TokenValue::Dollar);
@@ -566,9 +511,9 @@ namespace OpenAxiom {
identifier(frag(pos), pos.column, t, dialect);
}
- template<typename Frag, typename Tok>
- static void
- sharp_et_al(Frag& frag, FragmentCursor& pos, Tok& t, Language dialect) {
+ template<typename Tok>
+ void
+ sharp_et_al(Fragment& frag, FragmentCursor& pos, Tok& t, Language dialect) {
if (dialect != Language::Lisp)
operator_token(t, TokenValue::Sharp);
else if (frag.covering(pos))
@@ -583,8 +528,8 @@ namespace OpenAxiom {
}
}
- template<typename Frag, typename Tok>
- Tok Tokenizer<Frag, Tok>::finish(Tok& t, Language dialect) {
+ template<typename Tok>
+ Tok Tokenizer<Tok>::finish(Tok& t, Language dialect) {
switch (auto c = frag.advance(pos)) {
case '#': sharp_et_al(frag, pos, t, dialect); break;
case '@': operator_token(t, TokenValue::At); break;
@@ -630,26 +575,32 @@ namespace OpenAxiom {
return t;
}
- template<typename Frag, typename Tok>
- Tok Tokenizer<Frag, Tok>::get(Language dialect) {
+ inline Locus location(const Fragment& frag, const FragmentCursor& pos) {
+ if (pos.line < frag.size())
+ return { frag[pos.line].number, pos.column };
+ return { frag.back().number + 1, { } };
+ }
+
+ template<typename Tok>
+ Tok Tokenizer<Tok>::get(Language dialect) {
Tok t { };
- t.start = pos;
+ t.start = location(frag, pos);
if (eos())
- return eos_token(t, pos);
+ return eos_token(t);
else if (isblank(frag[pos])) {
skip_whitespace(frag(pos), pos.column);
- return ws_token(t, pos);
+ return ws_token(t, location(frag, pos));
}
else if (line_continuation()) {
if (next_line(frag, pos))
return finish(t, dialect);
- return eos_token(t, pos);
+ return eos_token(t);
}
else if (pos.column >= line_length()) {
if (not next_line(frag, pos))
- return eos_token(t, pos);
- t.start = t.end = pos;
+ return eos_token(t);
+ t.start = t.end = location(frag, pos);
auto indent = indents.top();
if (indent.column < pos.column) {
indents.push(pos);
@@ -669,9 +620,8 @@ namespace OpenAxiom {
// -- Token streams.
template<typename T>
struct TokenStream : std::vector<T> {
- template<typename Frag>
- explicit TokenStream(Frag& f, Language dialect = Language::Spad) {
- Tokenizer<Frag, T> lex { f };
+ explicit TokenStream(Fragment& f, Language dialect = Language::Spad) {
+ Tokenizer<T> lex { f };
while (auto t = lex.get(dialect))
this->push_back(t);
}