From 529fe9030468f5d2a2cb0ae376fe9c9b8be3185c Mon Sep 17 00:00:00 2001 From: Gabriel Dos Reis Date: Mon, 16 Jan 2017 21:17:52 -0800 Subject: Split Tokenizer::get into two functions. --- src/include/open-axiom/token | 95 ++++++++++++++++++++++++-------------------- src/syntax/Parser.cxx | 5 ++- 2 files changed, 55 insertions(+), 45 deletions(-) diff --git a/src/include/open-axiom/token b/src/include/open-axiom/token index b45adfa5..32bb8fd9 100644 --- a/src/include/open-axiom/token +++ b/src/include/open-axiom/token @@ -110,7 +110,7 @@ namespace OpenAxiom { TokenClassification classify(const std::string&); // Token data structure: a region of text with a classification. - struct Token : TokenClassification, Region { + struct Token : Region, TokenClassification { using Location = Locus; }; @@ -199,6 +199,8 @@ namespace OpenAxiom { Locus current_locus() { return { line_number(), column_number() }; } + + Tok finish(Tok&, Language); }; bool separator_or_punctuator(uint8_t); @@ -569,6 +571,52 @@ namespace OpenAxiom { } } + template + Tok Tokenizer::finish(Tok& t, Language dialect) { + switch (auto c = frag.advance(pos)) { + case '#': sharp_et_al(frag, pos, t, dialect); break; + case '@': operator_token(t, TokenValue::At); break; + case '^': operator_token(t, TokenValue::Caret); break; + case '&': punctuator_token(t, TokenValue::Ampersand); break; + case '!': punctuator_token(t, TokenValue::Exclamation); break; + case '\'': punctuator_token(t, TokenValue::Apostrophe); break; + case ',': punctuator_token(t, TokenValue::Comma); break; + case ';': punctuator_token(t, TokenValue::Semicolon); break; + case '`': punctuator_token(t, TokenValue::Backquote); break; + case '(': left_paren_et_al(frag, pos, t); break; + case ')': punctuator_token(t, TokenValue::CloseParen); break; + case '{': left_brace_et_al(frag, pos, t); break; + case '}': punctuator_token(t, TokenValue::CloseBrace); break; + case '[': left_bracket_et_al(frag, pos, t); break; + case ']': punctuator_token(t, TokenValue::CloseBracket); break; + case ':': colon_et_al(frag, pos, t); break; + case '*': star_et_al(frag, pos, t); break; + case '/': slash_et_al(frag, pos, t); break; + case '\\': backslash_et_al(frag, pos, t); break; + case '<': less_et_al(frag, pos, t); break; + case '=': equal_et_al(frag, pos, t); break; + case '~': tilde_et_al(frag, pos, t); break; + case '>': greater_et_al(frag, pos, t); break; + case '|': bar_et_al(frag, pos, t); break; + case '-': minus_et_al(frag, pos, t); break; + case '+': plus_et_al(frag, pos, t); break; + case '.': dot_et_al(frag, pos, t); break; + case '"': string_literal(frag, pos, t); break; + case '$': dollar_et_al(frag, pos, t, dialect); break; + + default: + if (isdigit(c)) + number(frag(pos), pos.column, t); + else if (identifier_head(c)) + identifier(frag(pos), pos.column, t, dialect); + else + junk(frag(pos), pos.column, t); + break; + } + + t.end = { frag(pos).number, pos.column }; + return t; + } template Tok Tokenizer::get(Language dialect) { @@ -589,6 +637,7 @@ namespace OpenAxiom { else if (pos.column == line_length() - 1 and frag(pos).back() == '_') { ++pos.line; pos.column = frag(pos).indent; + return finish(t, dialect); } else if (pos.column == line_length()) { auto indent = indents.top(); @@ -615,49 +664,7 @@ namespace OpenAxiom { return t; } - switch (auto c = frag.advance(pos)) { - case '#': sharp_et_al(frag, pos, t, dialect); break; - case '@': operator_token(t, TokenValue::At); break; - case '^': operator_token(t, TokenValue::Caret); break; - case '&': punctuator_token(t, TokenValue::Ampersand); break; - case '!': punctuator_token(t, TokenValue::Exclamation); break; - case '\'': punctuator_token(t, TokenValue::Apostrophe); break; - case ',': punctuator_token(t, TokenValue::Comma); break; - case ';': punctuator_token(t, TokenValue::Semicolon); break; - case '`': punctuator_token(t, TokenValue::Backquote); break; - case '(': left_paren_et_al(frag, pos, t); break; - case ')': punctuator_token(t, TokenValue::CloseParen); break; - case '{': left_brace_et_al(frag, pos, t); break; - case '}': punctuator_token(t, TokenValue::CloseBrace); break; - case '[': left_bracket_et_al(frag, pos, t); break; - case ']': punctuator_token(t, TokenValue::CloseBracket); break; - case ':': colon_et_al(frag, pos, t); break; - case '*': star_et_al(frag, pos, t); break; - case '/': slash_et_al(frag, pos, t); break; - case '\\': backslash_et_al(frag, pos, t); break; - case '<': less_et_al(frag, pos, t); break; - case '=': equal_et_al(frag, pos, t); break; - case '~': tilde_et_al(frag, pos, t); break; - case '>': greater_et_al(frag, pos, t); break; - case '|': bar_et_al(frag, pos, t); break; - case '-': minus_et_al(frag, pos, t); break; - case '+': plus_et_al(frag, pos, t); break; - case '.': dot_et_al(frag, pos, t); break; - case '"': string_literal(frag, pos, t); break; - case '$': dollar_et_al(frag, pos, t, dialect); break; - - default: - if (isdigit(c)) - number(frag(pos), pos.column, t); - else if (identifier_head(c)) - identifier(frag(pos), pos.column, t, dialect); - else - junk(frag(pos), pos.column, t); - break; - } - - t.end = { frag(pos).number, pos.column }; - return t; + return finish(t, dialect); } // -- Token streams. diff --git a/src/syntax/Parser.cxx b/src/syntax/Parser.cxx index d78d3157..df856d89 100644 --- a/src/syntax/Parser.cxx +++ b/src/syntax/Parser.cxx @@ -108,7 +108,10 @@ namespace { // Helper function for streaming out details of tokens. std::ostream& operator<<(std::ostream& os, const Token& t) { - os << t.category << '{' << t.start << '-' << t.end << '}'; + os << t.category << '{' + << t.start << '-' << t.end + << ", " << t.value + << '}'; return os; } -- cgit v1.2.3