aboutsummaryrefslogtreecommitdiff
path: root/src/include/token.H
diff options
context:
space:
mode:
Diffstat (limited to 'src/include/token.H')
-rw-r--r--src/include/token.H581
1 files changed, 295 insertions, 286 deletions
diff --git a/src/include/token.H b/src/include/token.H
index 3b3b2950..84862e89 100644
--- a/src/include/token.H
+++ b/src/include/token.H
@@ -92,6 +92,17 @@ namespace OpenAxiom {
using LineNumber = std::size_t;
using ColumnIndex = std::size_t;
+ // Cursor into a fragment.
+ struct FragmentCursor {
+ std::size_t line; // index of a line in a fragment
+ std::size_t column; // column number at line.
+ };
+
+ inline FragmentCursor& operator++(FragmentCursor& p) {
+ ++p.column;
+ return p;
+ }
+
// -- Exception types
struct EndOfStringUnseen {
LineNumber line;
@@ -109,42 +120,40 @@ namespace OpenAxiom {
struct TokenStream {
TokenStream(Frag& f)
: frag(f),
- line(),
- idx(frag.front().indent)
+ pos{ 0, frag.front().indent }
{
- indents.push(idx);
+ indents.push(pos.column);
}
bool eos() const {
- return line >= frag.size()
- or (line + 1 == frag.size() and idx >= frag.back().size());
+ return pos.line >= frag.size()
+ or (pos.line + 1 == frag.size() and pos.column >= frag.back().size());
}
Tok get(Language = Language::Spad);
private:
Frag& frag;
- std::size_t line;
- std::size_t idx;
+ FragmentCursor pos;
std::stack<ColumnIndex> indents;
- std::size_t line_length() const { return frag[line].size(); }
+ std::size_t line_length() const { return frag(pos).size(); }
LineNumber next_line_number() const {
- return line + 1 < frag.size()
- ? frag[line + 1].number
+ return pos.line + 1 < frag.size()
+ ? frag[pos.line + 1].number
: frag.back().number + 1;
}
ColumnIndex next_indentation() const {
- return line + 1 < frag.size() ? frag[line + 1].indent : 0;
+ return pos.line + 1 < frag.size() ? frag[pos.line + 1].indent : 0;
}
LineNumber line_number() const {
- return line < frag.size()
- ? frag[line].number
+ return pos.line < frag.size()
+ ? frag(pos).number
: frag.back().number + 1;
}
ColumnIndex column_number() const {
- return line < frag.size() ? idx : 0;
+ return pos.line < frag.size() ? pos.column : 0;
}
using Locus = typename Tok::Location;
@@ -155,6 +164,24 @@ namespace OpenAxiom {
bool separator_or_punctuator(uint8_t);
+ template<typename T>
+ inline void comment_token(T& t, TokenValue v) {
+ t.category = TokenCategory::Comment;
+ t.value = v;
+ }
+
+ template<typename T>
+ inline void operator_token(T& t, TokenValue v) {
+ t.category = TokenCategory::Operator;
+ t.value = v;
+ }
+
+ template<typename T>
+ inline void punctuator_token(T& t, TokenValue v) {
+ t.category = TokenCategory::Punctuator;
+ t.value = v;
+ }
+
template<typename L, typename T>
static void junk(L& line, ColumnIndex& idx, T& t) {
while (idx < line.size() and not separator_or_punctuator(line[idx]))
@@ -169,12 +196,12 @@ namespace OpenAxiom {
++idx;
}
- template<typename L, typename T>
- void string(L& line, ColumnIndex& idx, T& t) {
+ template<typename Frag, typename Tok>
+ void string_literal(Frag& frag, FragmentCursor& pos, Tok& t) {
bool done = false;
bool escape = false;
- while (idx < line.size() && not done) {
- switch (line[idx++]) {
+ while (frag.covering(pos) && not done) {
+ switch (frag(pos)[pos.column++]) {
case '_': escape = !escape; break;
case '"': done = !escape;
// fallthrough
@@ -182,7 +209,7 @@ namespace OpenAxiom {
}
}
if (not done)
- throw EndOfStringUnseen{ line.number, idx };
+ throw EndOfStringUnseen{ frag(pos).number, pos.column };
t.category = TokenCategory::String;
}
@@ -274,6 +301,211 @@ namespace OpenAxiom {
}
template<typename Frag, typename Tok>
+ static void
+ left_paren_et_al(Frag& frag, FragmentCursor& pos, Tok& t) {
+ punctuator_token(t, TokenValue::OpenParen);
+ if (frag.covering(pos) and frag[pos] == '|') {
+ ++pos;
+ t.value = TokenValue::OpenMetaParen;
+ }
+ }
+
+ template<typename Frag, typename Tok>
+ static void
+ left_brace_et_al(Frag& frag, FragmentCursor& pos, Tok& t) {
+ punctuator_token(t, TokenValue::OpenBrace);
+ if (frag.covering(pos) and frag[pos] == '|') {
+ ++pos;
+ t.value = TokenValue::OpenMetaBrace;
+ }
+ }
+
+ template<typename Frag, typename Tok>
+ static void
+ left_bracket_et_al(Frag& frag, FragmentCursor& pos, Tok& t) {
+ punctuator_token(t, TokenValue::OpenBracket);
+ if (frag.covering(pos) and frag[pos] == '|') {
+ ++pos;
+ t.value = TokenValue::OpenMetaBracket;
+ }
+ }
+
+ template<typename Frag, typename Tok>
+ static void
+ colon_et_al(Frag& frag, FragmentCursor& pos, Tok& t) {
+ operator_token(t, TokenValue::Colon);
+ if (frag.covering(pos))
+ switch (frag[pos]) {
+ case ':': t.value = TokenValue::ColonColon; ++pos; break;
+ case '=': t.value = TokenValue::ColonEq; ++pos; break;
+ case '-': t.value = TokenValue::ColonDash; ++pos; break;
+ default: break;
+ }
+ }
+
+ template<typename Frag, typename Tok>
+ static void
+ star_et_al(Frag& frag, FragmentCursor& pos, Tok& t) {
+ operator_token(t, TokenValue::Star);
+ if (frag.covering(pos) and frag[pos] == '*') {
+ t.value = TokenValue::StarStar;
+ ++pos;
+ }
+ }
+
+ template<typename Frag, typename Tok>
+ static void
+ slash_et_al(Frag& frag, FragmentCursor& pos, Tok& t) {
+ operator_token(t, TokenValue::Slash);
+ if (frag.covering(pos))
+ switch (frag[pos]) {
+ case '/': t.value = TokenValue::SlashSlash; ++pos; break;
+ case '\\': t.value = TokenValue::SlashBackslash; ++pos; break;
+ default: break;
+ }
+ }
+
+ template<typename Frag, typename Tok>
+ static void
+ backslash_et_al(Frag& frag, FragmentCursor& pos, Tok& t) {
+ operator_token(t, TokenValue::Backslash);
+ if (frag.covering(pos))
+ switch (frag[pos]) {
+ case '\\': t.value = TokenValue::BackslashBackslash; ++pos; break;
+ case '/': t.value = TokenValue::BackslashSlash; ++pos; break;
+ default: break;
+ }
+ }
+
+ template<typename Frag, typename Tok>
+ static void
+ less_et_al(Frag& frag, FragmentCursor& pos, Tok& t) {
+ operator_token(t, TokenValue::Less);
+ if (frag.covering(pos))
+ switch (frag[pos]) {
+ case '-': t.value = TokenValue::LeftArrow; ++pos; break;
+ case '<': t.value = TokenValue::OpenChevron; ++pos; break;
+ case '=':
+ t.value = TokenValue::LessEq;
+ if (frag.covering(++pos) and frag[pos] == '>') {
+ t.value = TokenValue::Equiv;
+ ++pos;
+ }
+ break;
+ default: break;
+ }
+ }
+
+ template<typename Frag, typename Tok>
+ static void
+ equal_et_al(Frag& frag, FragmentCursor& pos, Tok& t) {
+ operator_token(t, TokenValue::Eq);
+ if (frag.covering(pos))
+ switch (frag[pos]) {
+ case '>': t.value = TokenValue::Implies; ++pos; break;
+ case '=':
+ t.value = TokenValue::EqEq;
+ if (frag.covering(++pos) and frag[pos] == '>') {
+ t.value = TokenValue::FatArrow;
+ ++pos;
+ }
+ break;
+ default: break;
+ }
+ }
+
+ template<typename Frag, typename Tok>
+ static void
+ tilde_et_al(Frag& frag, FragmentCursor& pos, Tok& t) {
+ operator_token(t, TokenValue::Tilde);
+ if (frag.covering(pos) and frag[pos] == '=') {
+ t.value = TokenValue::TildeEq;
+ ++pos;
+ }
+ }
+
+ template<typename Frag, typename Tok>
+ static void
+ greater_et_al(Frag& frag, FragmentCursor& pos, Tok& t) {
+ operator_token(t, TokenValue::Greater);
+ if (frag.covering(pos))
+ switch (frag[pos]) {
+ case '=': t.value = TokenValue::GreaterEq; ++pos; break;
+ case '>': t.value = TokenValue::CloseChevron; ++pos; break;
+ }
+ }
+
+ template<typename Frag, typename Tok>
+ static void
+ bar_et_al(Frag& frag, FragmentCursor& pos, Tok& t) {
+ punctuator_token(t, TokenValue::Bar);
+ if (frag.covering(pos))
+ switch (frag[pos]) {
+ case ']': t.value = TokenValue::CloseMetaBracket; ++pos; break;
+ case '}': t.value = TokenValue::CloseMetaBrace; ++pos; break;
+ case ')': t.value = TokenValue::CloseMetaParen; ++pos; break;
+ default: break;
+ }
+ }
+
+ template<typename Frag, typename Tok>
+ static void
+ minus_et_al(Frag& frag, FragmentCursor& pos, Tok& t) {
+ operator_token(t, TokenValue::Minus);
+ if (frag.covering(pos))
+ switch (frag[pos]) {
+ case '>': t.value = TokenValue::RightArrow; ++pos; break;
+ case '-':
+ comment_token(t, TokenValue::Wisecrack);
+ pos.column = frag(pos).size();
+ break;
+ }
+ }
+
+
+ template<typename Frag, typename Tok>
+ static void
+ plus_et_al(Frag& frag, FragmentCursor& pos, Tok& t) {
+ operator_token(t, TokenValue::Plus);
+ if (frag.covering(pos))
+ switch (frag[pos]) {
+ case '+':
+ comment_token(t, TokenValue::Commentary);
+ pos.column = frag(pos).size();
+ break;
+ case '-':
+ if (pos.column + 1 < frag(pos).size()
+ and frag(pos)[pos.column + 1] == '>') {
+ t.value = TokenValue::MapsTo;
+ pos.column += 2;
+ }
+ break;
+ default: break;
+ }
+ }
+
+ template<typename Frag, typename Tok>
+ static void
+ dot_et_al(Frag& frag, FragmentCursor& pos, Tok& t) {
+ operator_token(t, TokenValue::Dot);
+ if (frag.covering(pos) and frag[pos] == '.') {
+ t.value = TokenValue::DotDot;
+ ++pos;
+ }
+ }
+
+ template<typename Frag, typename Tok>
+ static void
+ dollar_et_al(Frag& frag, FragmentCursor& pos, Tok& t, Language dialect) {
+ if (dialect != Language::Boot or not frag.covering(pos)
+ or separator_or_punctuator(frag[pos]))
+ operator_token(t, TokenValue::Dollar);
+ else
+ identifier(frag(pos), pos.column, t, dialect);
+ }
+
+
+ template<typename Frag, typename Tok>
Tok TokenStream<Frag, Tok>::get(Language dialect) {
Tok t { };
t.start = current_locus();
@@ -283,24 +515,24 @@ namespace OpenAxiom {
t.end = current_locus();
return t;
}
- else if (isspace(frag[line][idx])) {
- skip_whitespace(frag[line], idx);
+ else if (isspace(frag[pos])) {
+ skip_whitespace(frag(pos), pos.column);
t.category = TokenCategory::Whitespace;
t.end = current_locus();
return t;
}
- else if (idx == line_length() - 1 and frag[line].back() == '_') {
- ++line;
- idx = frag[line].indent;
+ else if (pos.column == line_length() - 1 and frag(pos).back() == '_') {
+ ++pos.line;
+ pos.column = frag(pos).indent;
}
- else if (idx == line_length()) {
+ else if (pos.column == line_length()) {
auto indent = indents.top();
auto next_indent = next_indentation();
t.start = t.end = { next_line_number(), next_indent };
if (indent < next_indent) {
indents.push(next_indent);
- ++line;
- idx = next_indent;
+ ++pos.line;
+ pos.column = next_indent;
t.category = TokenCategory::Formatting;
t.value = TokenValue::Indent;
}
@@ -310,279 +542,56 @@ namespace OpenAxiom {
t.value = TokenValue::Unindent;
}
else {
- ++line;
- idx = next_indent;
+ ++pos.line;
+ pos.column = next_indent;
t.category = TokenCategory::Formatting;
t.value = TokenValue::Justify;
}
return t;
}
- switch (auto c = frag[line][idx++]) {
- case '#':
- t.category = TokenCategory::Operator;
- t.value = TokenValue::Pound;
- break;
-
- case '@':
- t.category = TokenCategory::Operator;
- t.value = TokenValue::At;
- break;
-
- case '^':
- t.category = TokenCategory::Operator;
- t.value = TokenValue::Caret;
- break;
-
- case '&':
- t.category = TokenCategory::Punctuator;
- t.value = TokenValue::Ampersand;
- break;
-
- case '!':
- t.category = TokenCategory::Punctuator;
- t.value = TokenValue::Exclamation;
- break;
-
- case '\'':
- t.category = TokenCategory::Punctuator;
- t.value = TokenValue::Apostrophe;
- break;
- case ',':
- t.category = TokenCategory::Punctuator;
- t.value = TokenValue::Comma;
- break;
-
- case ';':
- t.category = TokenCategory::Punctuator;
- t.value = TokenValue::Semicolon;
- break;
-
- case '`':
- t.category = TokenCategory::Punctuator;
- t.value = TokenValue::Backquote;
- break;
-
- case '(':
- t.category = TokenCategory::Punctuator;
- t.value = TokenValue::OpenParen;
- if (idx < line_length() and frag[line][idx] == '|') {
- ++idx;
- t.value = TokenValue::OpenMetaParen;
- }
- break;
-
- case ')':
- t.category = TokenCategory::Punctuator;
- t.value = TokenValue::CloseParen;
- break;
-
- case '{':
- t.category = TokenCategory::Punctuator;
- t.value = TokenValue::OpenBrace;
- if (idx < line_length() and frag[line][idx] == '|') {
- ++idx;
- t.value = TokenValue::OpenMetaBrace;
- }
- break;
-
- case '}':
- t.category = TokenCategory::Punctuator;
- t.value = TokenValue::CloseBrace;
- break;
-
- case '[':
- t.category = TokenCategory::Punctuator;
- t.value = TokenValue::OpenBracket;
- if (idx < line_length() and frag[line][idx] == '|') {
- ++idx;
- t.value = TokenValue::OpenMetaBracket;
- }
- break;
-
- case ']':
- t.category = TokenCategory::Punctuator;
- t.value = TokenValue::CloseBracket;
- break;
-
- case ':':
- t.category = TokenCategory::Operator;
- t.value = TokenValue::Colon;
- if (idx < line_length())
- switch (frag[line][idx]) {
- case ':': t.value = TokenValue::ColonColon; ++idx; break;
- case '=': t.value = TokenValue::ColonEq; ++idx; break;
- case '-': t.value = TokenValue::ColonDash; ++idx; break;
- default: break;
- }
- break;
-
- case '*':
- t.category = TokenCategory::Operator;
- t.value = TokenValue::Star;
- if (idx < line_length() and frag[line][idx] == '*') {
- t.value = TokenValue::StarStar;
- ++idx;
- }
- break;
-
- case '/':
- t.category = TokenCategory::Operator;
- t.value = TokenValue::Slash;
- if (idx < line_length())
- switch (frag[line][idx]) {
- case '/': t.value = TokenValue::SlashSlash; ++idx; break;
- case '\\': t.value = TokenValue::SlashBackslash; ++idx; break;
- default: break;
- }
- break;
-
- case '\\':
- t.category = TokenCategory::Operator;
- t.value = TokenValue::Backslash;
- if (idx < line_length())
- switch (frag[line][idx]) {
- case '\\': t.value = TokenValue::BackslashBackslash; ++idx; break;
- case '/': t.value = TokenValue::BackslashSlash; ++idx; break;
- default: break;
- }
- break;
-
- case '<':
- t.category = TokenCategory::Operator;
- t.value = TokenValue::Less;
- if (idx < line_length())
- switch (frag[line][idx]) {
- case '-': t.value = TokenValue::LeftArrow; ++idx; break;
- case '<': t.value = TokenValue::OpenChevron; ++idx; break;
- case '=':
- t.value = TokenValue::LessEq;
- if (++idx < line_length() and frag[line][idx] == '>') {
- t.value = TokenValue::Equiv;
- ++idx;
- }
- break;
- default: break;
- }
- break;
-
- case '=':
- t.category = TokenCategory::Operator;
- t.value = TokenValue::Eq;
- if (idx < line_length())
- switch (frag[line][idx]) {
- case '>': t.value = TokenValue::Implies; ++idx; break;
- case '=':
- t.value = TokenValue::EqEq;
- if (++idx < line_length() and frag[line][idx] == '>') {
- t.value = TokenValue::FatArrow;
- ++idx;
- }
- break;
- default: break;
- }
- break;
-
- case '~':
- t.category = TokenCategory::Operator;
- t.value = TokenValue::Tilde;
- if (idx < line_length() and frag[line][idx] == '=') {
- t.value = TokenValue::TildeEq;
- ++idx;
- }
- break;
-
- case '>':
- t.category = TokenCategory::Operator;
- t.value = TokenValue::Greater;
- if (idx < line_length())
- switch (frag[line][idx]) {
- case '=': t.value = TokenValue::GreaterEq; ++idx; break;
- case '>': t.value = TokenValue::CloseChevron; ++idx; break;
- }
- break;
-
- case '|':
- t.category = TokenCategory::Operator;
- t.value = TokenValue::Bar;
- if (idx < line_length())
- switch (frag[line][idx]) {
- case ']': t.value = TokenValue::CloseMetaBracket; ++idx; break;
- case '}': t.value = TokenValue::CloseMetaBrace; ++idx; break;
- case ')': t.value = TokenValue::CloseMetaParen; ++idx; break;
- default: break;
- }
- break;
-
- case '-':
- t.category = TokenCategory::Operator;
- t.value = TokenValue::Minus;
- if (idx < line_length())
- switch (frag[line][idx]) {
- case '>': t.value = TokenValue::RightArrow; ++idx; break;
- case '-':
- t.category = TokenCategory::Comment;
- t.value = TokenValue::Wisecrack;
- idx = frag[line].size();
- break;
- }
- break;
-
- case '+':
- t.category = TokenCategory::Operator;
- t.value = TokenValue::Plus;
- if (idx < line_length())
- switch (frag[line][idx]) {
- case '+':
- t.category = TokenCategory::Comment;
- t.value = TokenValue::Commentary;
- idx = frag[line].size();
- break;
- case '-':
- if (idx + 1 < line_length() and frag[line][idx+1] == '>') {
- t.value = TokenValue::MapsTo;
- idx += 2;
- }
- break;
- default: break;
- }
- break;
-
- case '.':
- t.category = TokenCategory::Punctuator;
- t.value = TokenValue::Dot;
- if (idx < line_length() and frag[line][idx] == '.') {
- t.category = TokenCategory::Operator;
- t.value = TokenValue::DotDot;
- ++idx;
- }
- break;
-
- case '"':
- string(frag[line], idx, t);
- break;
-
- case '$':
- if (dialect != Language::Boot or idx >= line_length()
- or separator_or_punctuator(frag[line][idx])) {
- t.category = TokenCategory::Operator;
- t.value = TokenValue::Dollar;
- }
- else
- identifier(frag[line], idx, t, dialect);
- break;
+ switch (auto c = frag.advance(pos)) {
+ case '#': operator_token(t, TokenValue::Pound); break;
+ case '@': operator_token(t, TokenValue::At); break;
+ case '^': operator_token(t, TokenValue::Caret); break;
+ case '&': punctuator_token(t, TokenValue::Ampersand); break;
+ case '!': punctuator_token(t, TokenValue::Exclamation); break;
+ case '\'': punctuator_token(t, TokenValue::Apostrophe); break;
+ case ',': punctuator_token(t, TokenValue::Comma); break;
+ case ';': punctuator_token(t, TokenValue::Semicolon); break;
+ case '`': punctuator_token(t, TokenValue::Backquote); break;
+ case '(': left_paren_et_al(frag, pos, t); break;
+ case ')': punctuator_token(t, TokenValue::CloseParen); break;
+ case '{': left_brace_et_al(frag, pos, t); break;
+ case '}': punctuator_token(t, TokenValue::CloseBrace); break;
+ case '[': left_bracket_et_al(frag, pos, t); break;
+ case ']': punctuator_token(t, TokenValue::CloseBracket); break;
+ case ':': colon_et_al(frag, pos, t); break;
+ case '*': star_et_al(frag, pos, t); break;
+ case '/': slash_et_al(frag, pos, t); break;
+ case '\\': backslash_et_al(frag, pos, t); break;
+ case '<': less_et_al(frag, pos, t); break;
+ case '=': equal_et_al(frag, pos, t); break;
+ case '~': tilde_et_al(frag, pos, t); break;
+ case '>': greater_et_al(frag, pos, t); break;
+ case '|': bar_et_al(frag, pos, t); break;
+ case '-': minus_et_al(frag, pos, t); break;
+ case '+': plus_et_al(frag, pos, t); break;
+ case '.': dot_et_al(frag, pos, t); break;
+ case '"': string_literal(frag, pos, t); break;
+ case '$': dollar_et_al(frag, pos, t, dialect); break;
default:
if (isdigit(c))
- number(frag[line], idx, t);
+ number(frag(pos), pos.column, t);
else if (identifier_head(c))
- identifier(frag[line], idx, t, dialect);
+ identifier(frag(pos), pos.column, t, dialect);
else
- junk(frag[line], idx, t);
+ junk(frag(pos), pos.column, t);
break;
}
- t.end = { frag[line].number, idx };
+ t.end = { frag(pos).number, pos.column };
return t;
}
}