From b56562693a88f88e7c290de9e1dc18d96a0da792 Mon Sep 17 00:00:00 2001 From: Gabriel Dos Reis Date: Mon, 2 Jan 2017 14:03:55 -0800 Subject: Include a native entry point for parsing Boot source files, and for transpiling to Lisp. --- src/syntax/Parser.cxx | 134 ++++++++++++++++++++++++++++++++++++++++++++++++++ src/syntax/token.cc | 116 ------------------------------------------- src/syntax/token.cxx | 116 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 250 insertions(+), 116 deletions(-) create mode 100644 src/syntax/Parser.cxx delete mode 100644 src/syntax/token.cc create mode 100644 src/syntax/token.cxx (limited to 'src/syntax') diff --git a/src/syntax/Parser.cxx b/src/syntax/Parser.cxx new file mode 100644 index 00000000..e8759d2b --- /dev/null +++ b/src/syntax/Parser.cxx @@ -0,0 +1,134 @@ +// -*- C++ -*- +// Copyright (C) 2014-2017, Gabriel Dos Reis. +// All rights reserved. +// Written by Gabriel Dos Reis. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// - Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in +// the documentation and/or other materials provided with the +// distribution. +// +// - Neither the name of OpenAxiom. nor the names of its contributors +// may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER +// OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +namespace { + using namespace OpenAxiom; + + using TokenSequence = TokenStream; + + // Simple wrapper around standard file streams, along with the pathname + // to the file. + template + struct FileAs { + const char* path; + T stream; + FileAs(const char* p, std::ios_base::openmode flags) + : path{ p }, stream{ p, flags } + { + if (!stream) + throw Error::CannotOpenFile{ path }; + } + }; + + using InputFile = FileAs; + using OutputFile = FileAs; + + // Helper function for streaming out details of tokens. + std::ostream& operator<<(std::ostream& os, const Token& t) { + os << t.category << '{' << t.start << '-' << t.end << '}'; + return os; + } + + // FIXME: This is just a stub to get a native parsing entry point + // into the bootsys and interpsys images. + void transpile_boot_to_lisp(InputFile& in, OutputFile& out) { + SourceInput src { in.stream }; + while (auto f = src.get()) { + out.stream << "================================================\n"; + out.stream << f; + try { + TokenSequence ts { f, Language::Boot }; + for (auto& t : ts) { + out.stream << '\t' << t; + switch (t.category) { + case TokenCategory::Junk: + case TokenCategory::Unclassified: + out.stream //<< f[t.start.line].sub_string(t.start.column, t.end.column) + << " in file " << in.path + << " at line " << t.start.line + << ", column " << t.start.column; + break; + default: + break; + } + out.stream << '\n'; + } + } + catch(const EndOfStringUnseen& e) { + std::cerr << in.path << ": syntax error: " + << "premature end of line before matching quote " + << "of string literal on line " << e.line + << " at column " << e.column + << std::endl; + } + catch (const MissingExponent& e) { + std::cerr << in.path << ": syntax error: " + << "missing exponent of floating point constant " + << "on line " << e.line + << ", column " << e.column + << std::endl; + } + out.stream << "================================================\n"; + } + out.stream << std::flush; + } +} + +namespace OpenAxiom { + + int boot_to_lisp(const char* boot_path, const char* lisp_path) try { + InputFile in { boot_path, std::ios_base::binary }; + OutputFile out { lisp_path, std::ios_base::binary }; + transpile_boot_to_lisp(in, out); + return 0; + } + catch (const Error::CannotOpenFile& e) { + Diagnostics::StandardStream diagnostics { }; + diagnostics.error() << "error: could not open file `" + << e.path << "'\n"; + return -1; + } +} diff --git a/src/syntax/token.cc b/src/syntax/token.cc deleted file mode 100644 index 74b58fa2..00000000 --- a/src/syntax/token.cc +++ /dev/null @@ -1,116 +0,0 @@ -// Copyright (C) 2013-2014, Gabriel Dos Reis. -// All rights reserved. -// Written by Gabriel Dos Reis. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// - Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in -// the documentation and/or other materials provided with the -// distribution. -// -// - Neither the name of OpenAxiom. nor the names of its contributors -// may be used to endorse or promote products derived from this -// software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS -// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER -// OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#include -#include -#include - -namespace OpenAxiom { - std::ostream& operator<<(std::ostream& os, const Locus& l) { - return os << '{' << l.line << ", " << l.column << '}'; - } - - std::ostream& - operator<<(std::ostream& os, TokenCategory tc) { - switch (tc) { - case TokenCategory::Unclassified: os << "UNCLASSIFIED"; break; - case TokenCategory::Whitespace: os << "WHITESPACE"; break; - case TokenCategory::Comment: os << "COMMENT"; break; - case TokenCategory::Punctuator: os << "PUNCTUATOR"; break; - case TokenCategory::Operator: os << "OPERATOR"; break; - case TokenCategory::Integer: os << "INTEGER"; break; - case TokenCategory::FloatingPoint: os << "FLOATINGPOINT"; break; - case TokenCategory::String: os << "STRING"; break; - case TokenCategory::Keyword: os << "KEYWORD"; break; - case TokenCategory::Identifier: os << "IDENTIFIER"; break; - case TokenCategory::Formatting: os << "FORMATTING"; break; - case TokenCategory::Junk: os << "JUNK"; break; - default: os << "????"; break; - } - return os; - } - - - bool separator_or_punctuator(uint8_t c) { - switch (c) { - case '.': case '`': case '^': case '&': case '~': case '*': - case '-': case '+': case ';': case ',': case '@': case '|': - case '\'': case ':': case '=': case '\\': case '"': case '/': - case '(': case ')': case '{': case '}': case '[': case ']': - case '<': case '>': case '#': case ' ': - return true; - default: - return false; - } - } - - namespace { - struct TokenMapEntry { - const char* const text; - const TokenCategory category; - const TokenValue value; - const Language dialect; // = Language::Spad - }; - } - - const TokenMapEntry token_map[] { -#undef OPENAXIOM_DEFINE_TOKEN -#define OPENAXIOM_DEFINE_TOKEN(T, N, C, ...) \ - { N, TokenCategory::C, TokenValue::T, __VA_ARGS__ }, -#include -#undef OPENAXIOM_DEFINE_TOKEN - }; - - TokenClassification - classify(const std::string& s) { - for (auto& t : token_map) { - if (t.text == s) - return { t.category, t.value }; - } - return { TokenCategory::Identifier, TokenValue::Unknown }; - } - - std::ostream& - operator<<(std::ostream& os, TokenValue tv) { - if (tv < TokenValue::Artificial) - os << token_map[uint8_t(tv)].text; - else switch (tv) { - case TokenValue::Indent: os << "%INDENT"; break; - case TokenValue::Unindent: os << "%UNIDENT"; break; - case TokenValue::Justify: os << "%JUSTIFY"; break; - default: os << "%ALIEN"; break; - } - - return os; - } - -} diff --git a/src/syntax/token.cxx b/src/syntax/token.cxx new file mode 100644 index 00000000..74b58fa2 --- /dev/null +++ b/src/syntax/token.cxx @@ -0,0 +1,116 @@ +// Copyright (C) 2013-2014, Gabriel Dos Reis. +// All rights reserved. +// Written by Gabriel Dos Reis. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// - Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in +// the documentation and/or other materials provided with the +// distribution. +// +// - Neither the name of OpenAxiom. nor the names of its contributors +// may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER +// OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include +#include +#include + +namespace OpenAxiom { + std::ostream& operator<<(std::ostream& os, const Locus& l) { + return os << '{' << l.line << ", " << l.column << '}'; + } + + std::ostream& + operator<<(std::ostream& os, TokenCategory tc) { + switch (tc) { + case TokenCategory::Unclassified: os << "UNCLASSIFIED"; break; + case TokenCategory::Whitespace: os << "WHITESPACE"; break; + case TokenCategory::Comment: os << "COMMENT"; break; + case TokenCategory::Punctuator: os << "PUNCTUATOR"; break; + case TokenCategory::Operator: os << "OPERATOR"; break; + case TokenCategory::Integer: os << "INTEGER"; break; + case TokenCategory::FloatingPoint: os << "FLOATINGPOINT"; break; + case TokenCategory::String: os << "STRING"; break; + case TokenCategory::Keyword: os << "KEYWORD"; break; + case TokenCategory::Identifier: os << "IDENTIFIER"; break; + case TokenCategory::Formatting: os << "FORMATTING"; break; + case TokenCategory::Junk: os << "JUNK"; break; + default: os << "????"; break; + } + return os; + } + + + bool separator_or_punctuator(uint8_t c) { + switch (c) { + case '.': case '`': case '^': case '&': case '~': case '*': + case '-': case '+': case ';': case ',': case '@': case '|': + case '\'': case ':': case '=': case '\\': case '"': case '/': + case '(': case ')': case '{': case '}': case '[': case ']': + case '<': case '>': case '#': case ' ': + return true; + default: + return false; + } + } + + namespace { + struct TokenMapEntry { + const char* const text; + const TokenCategory category; + const TokenValue value; + const Language dialect; // = Language::Spad + }; + } + + const TokenMapEntry token_map[] { +#undef OPENAXIOM_DEFINE_TOKEN +#define OPENAXIOM_DEFINE_TOKEN(T, N, C, ...) \ + { N, TokenCategory::C, TokenValue::T, __VA_ARGS__ }, +#include +#undef OPENAXIOM_DEFINE_TOKEN + }; + + TokenClassification + classify(const std::string& s) { + for (auto& t : token_map) { + if (t.text == s) + return { t.category, t.value }; + } + return { TokenCategory::Identifier, TokenValue::Unknown }; + } + + std::ostream& + operator<<(std::ostream& os, TokenValue tv) { + if (tv < TokenValue::Artificial) + os << token_map[uint8_t(tv)].text; + else switch (tv) { + case TokenValue::Indent: os << "%INDENT"; break; + case TokenValue::Unindent: os << "%UNIDENT"; break; + case TokenValue::Justify: os << "%JUSTIFY"; break; + default: os << "%ALIEN"; break; + } + + return os; + } + +} -- cgit v1.2.3