diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/ChangeLog | 18 | ||||
-rw-r--r-- | src/Makefile.am | 34 | ||||
-rw-r--r-- | src/Makefile.in | 31 | ||||
-rw-r--r-- | src/boot/Makefile.am | 18 | ||||
-rw-r--r-- | src/boot/Makefile.in | 84 | ||||
-rw-r--r-- | src/boot/bemol.cc | 278 | ||||
-rw-r--r-- | src/include/dialect.H | 8 | ||||
-rw-r--r-- | src/include/sexpr.H | 45 | ||||
-rw-r--r-- | src/include/token-value.def | 138 | ||||
-rw-r--r-- | src/include/token.H | 691 | ||||
-rw-r--r-- | src/interp/simpbool.boot | 4 | ||||
-rw-r--r-- | src/syntax/token.cc | 122 | ||||
-rw-r--r-- | src/utils/Makefile.in | 22 |
13 files changed, 1181 insertions, 312 deletions
diff --git a/src/ChangeLog b/src/ChangeLog index 9630e63d..f3318a06 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,4 +1,20 @@ -2014-08-17 Gabriel Dos Reis <gdr@axiomatics.org> +2014-08-26 Gabriel Dos Reis <gdr@axiomatics.org> + + * Makefile.am (oa_src_include_headers): Remove. These headers are + now linked by configure. + * boot/Makefile.am (noinst_PROGRAMS): Add bemol. + (bemol_SOURCES): New. + (bemol_LDADD): Likewise. + (AM_CXXFLAGS): Set. + * boot/bemol.cc: New file. Initial tokenizer. + * include/sexpr.H: Tidy. + * include/token-value.def: New file. + * include/token.H: Expand. Add generic tokenizer. + * syntax/token.cc: Rework. + * interp/simpbool.boot (list1): Escape leading question mark of ?ORDER. + (ordUnion): Likewise. + +2014-08-17 Gabriel Dos Reis <gdr@integrable-solutions.net> * boot/Makefile.am: New. * boot/Makefile.in: Regenerate. diff --git a/src/Makefile.am b/src/Makefile.am index 5f40cf76..78ea4e62 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -44,27 +44,6 @@ subdirs = \ oa_targetdir = $(top_builddir)/$(target) -## Where do we put installed include header files? -oa_incdir = $(oa_targetdir)/include/open-axiom - -oa_src_include_headers = \ - defaults.H \ - structure.H \ - iterator.H \ - storage.H \ - Charset.H \ - FileMapping.H \ - SourceFile.H \ - Input.H \ - diagnostics.H \ - dialect.H \ - vm.H \ - token.H \ - sexpr.H \ - Lisp.H \ - Constructor.H \ - Database.H - if OA_BUILD_SMAN OA_SMAN_TARGETS = all-sman all-clef endif @@ -138,7 +117,7 @@ all-lib: stamp-subdirs all-utils all-lisp: all-lib all-driver cd lisp && $(MAKE) $(AM_MAKEFLAGS) $@ -all-boot: all-lisp +all-boot: all-syntax all-lisp cd boot && $(MAKE) $(AM_MAKEFLAGS) $@ all-interpsys: all-boot all-hyper-pre all-utils @@ -183,22 +162,11 @@ all-hyper-post: all-algebra all-doc all-graph: all-lib all-utils cd graph && $(MAKE) $(AM_MAKEFLAGS) $@ -## Install include headers -.PHONY: all-headers -all-headers: $(patsubst %.H,$(oa_incdir)/%,$(oa_src_include_headers)) - -$(oa_incdir)/%: $(srcdir)/include/%.H | $(oa_incdir) - $(RM) $@ && cp -p $< $@ - -$(oa_incdir): - $(MKDIR_P) $@ - .PHONY: all-check all-check: cd input && $(MAKE) $(AM_MAKEFLAGS) all-check mostlyclean-local: - rm -fr $(oa_incdir) for d in $(subdirs); do \ (cd $$d && $(MAKE) $(AM_MAKEFLAGS) mostlyclean); \ done diff --git a/src/Makefile.in b/src/Makefile.in index a09f81d7..bcea90a4 100644 --- a/src/Makefile.in +++ b/src/Makefile.in @@ -408,25 +408,6 @@ subdirs = \ etc graph hyper input interp \ lib io lisp share sman utils -oa_incdir = $(oa_targetdir)/include/open-axiom -oa_src_include_headers = \ - defaults.H \ - structure.H \ - iterator.H \ - storage.H \ - Charset.H \ - FileMapping.H \ - SourceFile.H \ - Input.H \ - diagnostics.H \ - dialect.H \ - vm.H \ - token.H \ - sexpr.H \ - Lisp.H \ - Constructor.H \ - Database.H - @OA_BUILD_SMAN_TRUE@OA_SMAN_TARGETS = all-sman all-clef @OA_BUILD_GRAPHICS_TRUE@OA_GRAPHICS_GRAPH_TARGET = all-graph @OA_BUILD_GUI_TRUE@OA_GRAPHICS_GUI_TARGET = all-gui @@ -804,7 +785,7 @@ all-lib: stamp-subdirs all-utils all-lisp: all-lib all-driver cd lisp && $(MAKE) $(AM_MAKEFLAGS) $@ -all-boot: all-lisp +all-boot: all-syntax all-lisp cd boot && $(MAKE) $(AM_MAKEFLAGS) $@ all-interpsys: all-boot all-hyper-pre all-utils @@ -849,21 +830,11 @@ all-hyper-post: all-algebra all-doc all-graph: all-lib all-utils cd graph && $(MAKE) $(AM_MAKEFLAGS) $@ -.PHONY: all-headers -all-headers: $(patsubst %.H,$(oa_incdir)/%,$(oa_src_include_headers)) - -$(oa_incdir)/%: $(srcdir)/include/%.H | $(oa_incdir) - $(RM) $@ && cp -p $< $@ - -$(oa_incdir): - $(MKDIR_P) $@ - .PHONY: all-check all-check: cd input && $(MAKE) $(AM_MAKEFLAGS) all-check mostlyclean-local: - rm -fr $(oa_incdir) for d in $(subdirs); do \ (cd $$d && $(MAKE) $(AM_MAKEFLAGS) mostlyclean); \ done diff --git a/src/boot/Makefile.am b/src/boot/Makefile.am index b24e923e..23af6c5d 100644 --- a/src/boot/Makefile.am +++ b/src/boot/Makefile.am @@ -58,7 +58,7 @@ LISP_LINK = \ # We use a noinst_ primary because we take care of installation # procedure ourselves. -noinst_PROGRAMS = bootsys +noinst_PROGRAMS = bootsys bemol bootsys_SOURCES = \ utility.boot \ @@ -70,6 +70,14 @@ bootsys_SOURCES = \ parser.boot \ translator.boot +bemol_SOURCES = \ + bemol.cc + +bemol_LDADD = \ + -L$(oa_target_libdir) -lOpenAxiom \ + -L$(top_builddir)/src/syntax -lsyntax \ + -L$(top_builddir)/src/io -lio + oa_target_bootdir = $(oa_targetdir)/boot if OA_ECL_RT oa_bootsys_linkset = $(oa_target_bootdir)/linkset @@ -89,11 +97,19 @@ bootsys_fn = $(bootsys_SOURCES:.boot=.fn) STAMP = touch +AM_CXXFLAGS = \ + -I$(top_srcdir)/src/include \ + -I$(oa_target_includedir) \ + -I$(top_builddir)/config \ + -DOPENAXIOM_ROOT_DIRECTORY="\"$(open_axiom_installdir)\"" + + # Make rule toplevel entry points. .PHONY: all-boot all-am: all-boot all-boot: $(oa_target_bindir)/bootsys$(EXEEXT) $(oa_bootsys_linkset) +all-boot: bemol$(EXEEXT) # The final `bootsys' image. $(oa_target_bindir)/bootsys$(EXEEXT): stage2/bootsys$(EXEEXT) diff --git a/src/boot/Makefile.in b/src/boot/Makefile.in index d0c35a72..c55e08d3 100644 --- a/src/boot/Makefile.in +++ b/src/boot/Makefile.in @@ -128,10 +128,11 @@ POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ target_triplet = @target@ -noinst_PROGRAMS = bootsys$(EXEEXT) +noinst_PROGRAMS = bootsys$(EXEEXT) bemol$(EXEEXT) subdir = src/boot DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.am \ - $(top_srcdir)/config/mkinstalldirs + $(top_srcdir)/config/mkinstalldirs \ + $(top_srcdir)/config/depcomp ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/config/libtool.m4 \ $(top_srcdir)/config/ltoptions.m4 \ @@ -146,13 +147,16 @@ CONFIG_HEADER = $(top_builddir)/config/openaxiom-c-macros.h CONFIG_CLEAN_FILES = CONFIG_CLEAN_VPATH_FILES = PROGRAMS = $(noinst_PROGRAMS) -am_bootsys_OBJECTS = -bootsys_OBJECTS = $(am_bootsys_OBJECTS) -bootsys_LDADD = $(LDADD) +am_bemol_OBJECTS = bemol.$(OBJEXT) +bemol_OBJECTS = $(am_bemol_OBJECTS) +bemol_DEPENDENCIES = AM_V_lt = $(am__v_lt_@AM_V@) am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) am__v_lt_0 = --silent am__v_lt_1 = +am_bootsys_OBJECTS = +bootsys_OBJECTS = $(am_bootsys_OBJECTS) +bootsys_LDADD = $(LDADD) AM_V_P = $(am__v_P_@AM_V@) am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) am__v_P_0 = false @@ -166,6 +170,27 @@ am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) am__v_at_0 = @ am__v_at_1 = DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)/config +depcomp = $(SHELL) $(top_srcdir)/config/depcomp +am__depfiles_maybe = depfiles +am__mv = mv -f +CXXCOMPILE = $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) +LTCXXCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CXXFLAGS) $(CXXFLAGS) +AM_V_CXX = $(am__v_CXX_@AM_V@) +am__v_CXX_ = $(am__v_CXX_@AM_DEFAULT_V@) +am__v_CXX_0 = @echo " CXX " $@; +am__v_CXX_1 = +CXXLD = $(CXX) +CXXLINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CXXLD) $(AM_CXXFLAGS) \ + $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CXXLD = $(am__v_CXXLD_@AM_V@) +am__v_CXXLD_ = $(am__v_CXXLD_@AM_DEFAULT_V@) +am__v_CXXLD_0 = @echo " CXXLD " $@; +am__v_CXXLD_1 = COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ @@ -184,8 +209,8 @@ AM_V_CCLD = $(am__v_CCLD_@AM_V@) am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) am__v_CCLD_0 = @echo " CCLD " $@; am__v_CCLD_1 = -SOURCES = $(bootsys_SOURCES) -DIST_SOURCES = $(bootsys_SOURCES) +SOURCES = $(bemol_SOURCES) $(bootsys_SOURCES) +DIST_SOURCES = $(bemol_SOURCES) $(bootsys_SOURCES) am__can_run_installinfo = \ case $$AM_UPDATE_INFO_DIR in \ n|no|NO) false;; \ @@ -419,6 +444,14 @@ bootsys_SOURCES = \ parser.boot \ translator.boot +bemol_SOURCES = \ + bemol.cc + +bemol_LDADD = \ + -L$(oa_target_libdir) -lOpenAxiom \ + -L$(top_builddir)/src/syntax -lsyntax \ + -L$(top_builddir)/src/io -lio + oa_target_bootdir = $(oa_targetdir)/boot @OA_ECL_RT_FALSE@oa_bootsys_linkset = @OA_ECL_RT_TRUE@oa_bootsys_linkset = $(oa_target_bootdir)/linkset @@ -432,6 +465,12 @@ bootsys_objects = $(bootsys_SOURCES:.boot=.$(LNKEXT)) bootsys_data = $(bootsys_SOURCES:.boot=.data) bootsys_fn = $(bootsys_SOURCES:.boot=.fn) STAMP = touch +AM_CXXFLAGS = \ + -I$(top_srcdir)/src/include \ + -I$(oa_target_includedir) \ + -I$(top_builddir)/config \ + -DOPENAXIOM_ROOT_DIRECTORY="\"$(open_axiom_installdir)\"" + # # The bootstrapping `bootsys' image. @@ -461,6 +500,7 @@ LISP_COMPILE = \ all: all-am .SUFFIXES: +.SUFFIXES: .cc .lo .o .obj $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ @@ -500,6 +540,10 @@ clean-noinstPROGRAMS: echo " rm -f" $$list; \ rm -f $$list +bemol$(EXEEXT): $(bemol_OBJECTS) $(bemol_DEPENDENCIES) $(EXTRA_bemol_DEPENDENCIES) + @rm -f bemol$(EXEEXT) + $(AM_V_CXXLD)$(CXXLINK) $(bemol_OBJECTS) $(bemol_LDADD) $(LIBS) + bootsys$(EXEEXT): $(bootsys_OBJECTS) $(bootsys_DEPENDENCIES) $(EXTRA_bootsys_DEPENDENCIES) @rm -f bootsys$(EXEEXT) $(AM_V_CCLD)$(LINK) $(bootsys_OBJECTS) $(bootsys_LDADD) $(LIBS) @@ -510,6 +554,29 @@ mostlyclean-compile: distclean-compile: -rm -f *.tab.c +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/bemol.Po@am__quote@ + +.cc.o: +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXXCOMPILE) -c -o $@ $< + +.cc.obj: +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXXCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.cc.lo: +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(LTCXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(LTCXXCOMPILE) -c -o $@ $< + mostlyclean-libtool: -rm -f *.lo @@ -638,6 +705,7 @@ clean-am: clean-generic clean-libtool clean-local clean-noinstPROGRAMS \ mostlyclean-am distclean: distclean-am + -rm -rf ./$(DEPDIR) -rm -f Makefile distclean-am: clean-am distclean-compile distclean-generic \ distclean-local distclean-tags @@ -683,6 +751,7 @@ install-ps-am: installcheck-am: maintainer-clean: maintainer-clean-am + -rm -rf ./$(DEPDIR) -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-generic @@ -723,6 +792,7 @@ uninstall-am: all-am: all-boot all-boot: $(oa_target_bindir)/bootsys$(EXEEXT) $(oa_bootsys_linkset) +all-boot: bemol$(EXEEXT) # The final `bootsys' image. $(oa_target_bindir)/bootsys$(EXEEXT): stage2/bootsys$(EXEEXT) diff --git a/src/boot/bemol.cc b/src/boot/bemol.cc new file mode 100644 index 00000000..0399bb44 --- /dev/null +++ b/src/boot/bemol.cc @@ -0,0 +1,278 @@ +// -*- C++ -*- +// Copyright (C) 2014, Gabriel Dos Reis. +// All rights reserved. +// Written by Gabriel Dos Reis. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// - Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in +// the documentation and/or other materials provided with the +// distribution. +// +// - Neither the name of OpenAxiom. nor the names of its contributors +// may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER +// OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --% Author: Gabriel Dos Reis +// --% Description: + +#include <open-axiom/diagnostics> +#include <open-axiom/token> +#include <iostream> +#include <fstream> +#include <vector> +#include <string> +#include <stack> +#include <iterator> +#include <ctype.h> + +using namespace OpenAxiom; + +// +// -- Reading input source files -- +// + +// A physical line is just raw text, with coupled with location +// information such as line and indentation column. +struct Line : std::string { + LineNumber number; + ColumnIndex indent; + Line() : number(), indent() { } + + std::string sub_string(ColumnIndex s, ColumnIndex e) const { + return substr(s, e - s); + } +}; + +// A program fragment is a logical line, composed of possibly +// several physical lines subject to the off-side rule. As a +// special case, a line ending with the underbar character +// continues to the next next with disregard to the off-side rule. +struct Fragment : std::vector<Line> { + explicit operator bool() const { return not empty(); } + bool line_continuation() const { + return not empty() and back().back() == '_'; + } + ColumnIndex last_indent() const { + return empty() ? 0 : back().indent; + } +}; + +// Formatting program fragments. +static std::ostream& +operator<<(std::ostream& os, const Fragment& f) { + std::copy(f.begin(), f.end(), + std::ostream_iterator<std::string>(std::cout, "\n")); + return os; +} + + +// A source input transform a character stream into a program fragment +// stream, delivering a fragment one at a time. +struct SourceInput { + SourceInput(std::istream& is) : input(is) { } + Fragment get(); + +private: + std::istream& input; + Line line; +}; + +// Return the indentation level of a line. +// FIXME: reject or expand tabs as appropriate. +static ColumnIndex +indentation(const Line& line) { + ColumnIndex idx { }; + for (auto c : line) { + if (not isspace(c)) + break; + ++idx; + } + return idx; +} + +// Remove trailing white-space characters from the line. +static Line& +trim_right(Line& line) { + auto n = line.length(); + while (n > 0 and isspace(line[n-1])) + --n; + line.resize(n); + return line; +} + +// Clean up and dress up the line with indentation information. +static Line& +prop_up(Line& line) { + line.indent = indentation(trim_right(line)); + return line; +} + +// Return true if line is entirely a negative comment. +static bool +negative_comment(const Line& line) { + if (line.indent + 1 >= line.length()) + return false; + return line[line.indent] == '-' and line[line.indent + 1] == '-'; +} + +// Return true if line is either empty or a negative comment. +static bool +blank(const Line& line) { + return line.empty() or negative_comment(line); +} + +// Return true if line is entirely a positive comment, i.e. a description. +static bool +positive_comment(const Line& line) { + if (line.indent + 1 >= line.length()) + return false; + return line[line.indent] == '+' and line[line.indent + 1] == '+'; +} + +// Decompose the input souce file into fragments, and return one +// fragment at a time. +Fragment +SourceInput::get() { + Fragment fragment; + std::stack<ColumnIndex> indents; + + if (not line.empty()) { + indents.push(line.indent); + fragment.push_back(line); + } + + while (std::getline(input, line)) { + ++line.number; + if (blank(prop_up(line))) + continue; // Don't bother with ignorable comments. + else if (fragment.line_continuation()) + ; + else if (indents.empty()) { + if (fragment.empty() and line.indent != 0) + std::cout << "warning: white space at begining of fragment" + << " on line " << line.number << '\n'; + indents.push(line.indent); + } + else if (line.indent == 0 and not positive_comment(fragment.back())) + break; // A completely new line; save for later. + else if (line.indent > indents.top()) + indents.push(line.indent); + else { + while (line.indent < indents.top()) + indents.pop(); + } + fragment.push_back(line); + } + return fragment; +} + +// +// -- Decomposing source files into lexical units of information -- +// + +struct Locus { + OpenAxiom::LineNumber line; + OpenAxiom::ColumnIndex column; +}; + +static std::ostream& +operator<<(std::ostream& os, const Locus& l) +{ + os << '{' << l.line << ", " << l.column << '}'; + return os; +} + +struct BemolToken { + using Location = ::Locus; + OpenAxiom::TokenCategory category; + OpenAxiom::TokenValue value; + Locus start; + Locus end; + + explicit operator bool() const { return category != TokenCategory::EOS; } +}; + +static std::ostream& +operator<<(std::ostream& os, const BemolToken& t) { + os << t.category << '{' << t.start << '-' << t.end << '}'; + return os; +} + +static void +translate_source_file(SourceInput& src, std::ostream& out, const char* path) { + while (auto f = src.get()) { + out << "================================================\n"; + out << f; + OpenAxiom::TokenStream<Fragment, BemolToken> ts { f }; + try { + while (auto t = ts.get(OpenAxiom::Language::Boot)) { + out << '\t' << t; + switch (t.category) { + case TokenCategory::Junk: + case TokenCategory::Unclassified: + out //<< f[t.start.line].sub_string(t.start.column, t.end.column) + << " in file " << path + << " at line " << t.start.line + << ", column " << t.start.column; + break; + default: + break; + } + out << '\n'; + } + } + catch(const OpenAxiom::EndOfStringUnseen& e) { + std::cerr << path << ": syntax error: " + << "premature end of line before matching quote " + << "of string literal on line " << e.line + << " at column " << e.column + << std::endl; + } + catch (const OpenAxiom::MissingExponent& e) { + std::cerr << path << ": syntax error: " + << "missing exponent of floating point constant " + << "on line " << e.line + << ", column " << e.column + << std::endl; + } + out << "================================================\n"; + } + out << std::flush; +} + +static void +process_file(const char* path) { + std::ifstream in { path }; + if (!in) { + std::cerr << "error: could not open file `" << path << "'" + << std::endl; + return; + } + SourceInput src { in }; + translate_source_file(src, std::cout, path); +} + +int main(int argc, char* argv[]) { + for (int i = 1; i < argc; ++i) { + process_file(argv[i]); + } +} diff --git a/src/include/dialect.H b/src/include/dialect.H index f63eac04..bcfddd04 100644 --- a/src/include/dialect.H +++ b/src/include/dialect.H @@ -1,4 +1,4 @@ -// Copyright (C) 2013, Gabriel Dos Reis. +// Copyright (C) 2013-2014, Gabriel Dos Reis. // All rights reserved. // Written by Gabriel Dos Reis. // @@ -36,7 +36,11 @@ namespace OpenAxiom { // Languages for which we have parsers. enum class Language { - Spad, Boot, Lisp + Spad = 0x1, + Boot = 0x2, + Lisp = 0x4, + BootSpad = Spad | Boot, + All = Spad | Boot | Lisp, }; } diff --git a/src/include/sexpr.H b/src/include/sexpr.H index d425b6d8..84513a8b 100644 --- a/src/include/sexpr.H +++ b/src/include/sexpr.H @@ -1,4 +1,4 @@ -// Copyright (C) 2010-2013, Gabriel Dos Reis. +// Copyright (C) 2010-2014, Gabriel Dos Reis. // All rights reserved. // Written by Gabriel Dos Reis. // @@ -55,28 +55,27 @@ namespace OpenAxiom { struct Lexeme { enum Type { unknown, // unidentified token - semicolon = token::value(";"), // comment - dot = token::value("."), - comma = token::value(","), - open_paren = token::value("("), - close_paren = token::value(")"), - apostrophe = token::value("'"), - backquote = token::value("`"), - backslash = token::value("\\"), - sharp_open_paren = token::value("#("), - sharp_apostrophe = token::value("#'"), - sharp_colon = token::value("#:"), - sharp_plus = token::value("#+"), - sharp_minus = token::value("#-"), - sharp_dot = token::value("#."), - comma_at = token::value(",@"), - digraph_end = token::value(0xff,0xff), - integer, // integer literal - character, // character literal - string, // string literal - identifier, // plain identifier - sharp_integer_equal, // anchor definition, #n=<form> - sharp_integer_sharp // back reference, #n# + semicolon, // ";" for comment + dot, // "." + comma, // "," + open_paren, // "(" + close_paren, // ")" + apostrophe, // "'" + backquote, // "`" + backslash, // "\\" + sharp_open_paren , // "#(" + sharp_apostrophe, // "#'" + sharp_colon, // "#:" + sharp_plus, // "#+" + sharp_minus, // "#-" + sharp_dot, // "#." + comma_at, // ",@" + integer, // integer literal + character, // character literal + string, // string literal + identifier, // plain identifier + sharp_integer_equal, // anchor definition, #n=<form> + sharp_integer_sharp // back reference, #n# }; std::pair<const Byte*, const Byte*> boundary; diff --git a/src/include/token-value.def b/src/include/token-value.def new file mode 100644 index 00000000..ea79c9a5 --- /dev/null +++ b/src/include/token-value.def @@ -0,0 +1,138 @@ +// Copyright (C) 2014, Gabriel Dos Reis. +// All rights reserved. +// Written by Gabriel Dos Reis. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// - Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in +// the documentation and/or other materials provided with the +// distribution. +// +// - Neither the name of OpenAxiom. nor the names of its contributors +// may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER +// OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + + +OPENAXIOM_DEFINE_TOKEN(Unknown, "<unknown>", Unclassified, Language::All) +OPENAXIOM_DEFINE_TOKEN(Apostrophe, "'", Punctuator, Language::All) +OPENAXIOM_DEFINE_TOKEN(Backquote, "`", Punctuator, Language::All) +OPENAXIOM_DEFINE_TOKEN(Bar, "|", Operator, Language::BootSpad) +OPENAXIOM_DEFINE_TOKEN(Dot, ".", Punctuator, Language::All) +OPENAXIOM_DEFINE_TOKEN(DotDot, "..", Operator, Language::BootSpad) +OPENAXIOM_DEFINE_TOKEN(Colon, ":", Punctuator, Language::All) +OPENAXIOM_DEFINE_TOKEN(ColonColon, "::", Operator, Language::All) +OPENAXIOM_DEFINE_TOKEN(ColonDash, ":-", Operator, Language::Spad) +OPENAXIOM_DEFINE_TOKEN(ColonEq, ":=", Keyword, Language::BootSpad) +OPENAXIOM_DEFINE_TOKEN(At, "@", Operator, Language::All) +OPENAXIOM_DEFINE_TOKEN(Exclamation, "!", Punctuator, Language::Boot) +OPENAXIOM_DEFINE_TOKEN(Comma, ",", Punctuator, Language::All) +OPENAXIOM_DEFINE_TOKEN(Semicolon, ";", Punctuator, Language::All) +OPENAXIOM_DEFINE_TOKEN(Star, "*", Operator, Language::BootSpad) +OPENAXIOM_DEFINE_TOKEN(StarStar, "**", Operator, Language::BootSpad) +OPENAXIOM_DEFINE_TOKEN(Plus, "+", Operator, Language::BootSpad) +OPENAXIOM_DEFINE_TOKEN(Minus, "-", Operator, Language::BootSpad) +OPENAXIOM_DEFINE_TOKEN(Slash, "/", Operator, Language::BootSpad) +OPENAXIOM_DEFINE_TOKEN(SlashSlash, "//", Operator, Language::Spad) +OPENAXIOM_DEFINE_TOKEN(SlashBackslash, "/\\", Operator, Language::Spad) +OPENAXIOM_DEFINE_TOKEN(Backslash, "\\", Operator, Language::Spad) +OPENAXIOM_DEFINE_TOKEN(BackslashSlash, "\\/", Operator, Language::Spad) +OPENAXIOM_DEFINE_TOKEN(BackslashBackslash, "\\\\", Operator, Language::Spad) +OPENAXIOM_DEFINE_TOKEN(Less, "<", Operator, Language::BootSpad) +OPENAXIOM_DEFINE_TOKEN(LessEq, "<=", Operator, Language::BootSpad) +OPENAXIOM_DEFINE_TOKEN(Greater, ">", Operator, Language::BootSpad) +OPENAXIOM_DEFINE_TOKEN(GreaterEq, ">=", Operator, Language::BootSpad) +OPENAXIOM_DEFINE_TOKEN(Eq, "=", Operator, Language::BootSpad) +OPENAXIOM_DEFINE_TOKEN(EqEq, "==", Keyword, Language::BootSpad) +OPENAXIOM_DEFINE_TOKEN(Tilde, "~", Operator, Language::BootSpad) +OPENAXIOM_DEFINE_TOKEN(TildeEq, "~=", Operator, Language::BootSpad) +OPENAXIOM_DEFINE_TOKEN(Caret, "^", Operator, Language::BootSpad) +OPENAXIOM_DEFINE_TOKEN(Pound, "#", Operator, Language::BootSpad) +OPENAXIOM_DEFINE_TOKEN(Dollar, "$", Operator, Language::Spad) +OPENAXIOM_DEFINE_TOKEN(Ampersand, "&", Operator, Language::Spad) +OPENAXIOM_DEFINE_TOKEN(RightArrow, "->", Operator, Language::BootSpad) +OPENAXIOM_DEFINE_TOKEN(LeftArrow, "<-", Operator, Language::Boot) +OPENAXIOM_DEFINE_TOKEN(Implies, "=>", Keyword, Language::BootSpad) +OPENAXIOM_DEFINE_TOKEN(Equiv, "<=>", Keyword, Language::Boot) +OPENAXIOM_DEFINE_TOKEN(MapsTo, "+->", Keyword, Language::BootSpad) +OPENAXIOM_DEFINE_TOKEN(FatArrow, "==>", Keyword, Language::Spad) +OPENAXIOM_DEFINE_TOKEN(OpenParen, "(", Punctuator, Language::All) +OPENAXIOM_DEFINE_TOKEN(CloseParen, ")", Punctuator, Language::All) +OPENAXIOM_DEFINE_TOKEN(OpenMetaParen, "(|", Punctuator, Language::Spad) +OPENAXIOM_DEFINE_TOKEN(CloseMetaParen, "|)", Punctuator, Language::Spad) +OPENAXIOM_DEFINE_TOKEN(OpenBracket, "[", Punctuator, Language::All) +OPENAXIOM_DEFINE_TOKEN(CloseBracket, "]", Punctuator, Language::All) +OPENAXIOM_DEFINE_TOKEN(OpenMetaBracket, "[|", Punctuator, Language::Spad) +OPENAXIOM_DEFINE_TOKEN(CloseMetaBracket, "|]", Punctuator, Language::Spad) +OPENAXIOM_DEFINE_TOKEN(OpenBrace, "{", Punctuator, Language::BootSpad) +OPENAXIOM_DEFINE_TOKEN(CloseBrace, "}", Punctuator, Language::BootSpad) +OPENAXIOM_DEFINE_TOKEN(OpenMetaBrace, "{|", Punctuator, Language::Spad) +OPENAXIOM_DEFINE_TOKEN(CloseMetaBrace, "|}", Punctuator, Language::Spad) +OPENAXIOM_DEFINE_TOKEN(OpenChevron, "<<", Operator, Language::Spad) +OPENAXIOM_DEFINE_TOKEN(CloseChevron, ">>", Operator, Language::Spad) + +OPENAXIOM_DEFINE_TOKEN(Wisecrack, "--", Comment, Language::BootSpad) +OPENAXIOM_DEFINE_TOKEN(Commentary, "++", Comment, Language::BootSpad) + +OPENAXIOM_DEFINE_TOKEN(Add, "add", Keyword, Language::Spad) +OPENAXIOM_DEFINE_TOKEN(And, "and", Operator, Language::BootSpad) +OPENAXIOM_DEFINE_TOKEN(Assume, "assume", Keyword, Language::Spad) +OPENAXIOM_DEFINE_TOKEN(Break, "break", Keyword, Language::BootSpad) +OPENAXIOM_DEFINE_TOKEN(By, "by", Operator, Language::BootSpad) +OPENAXIOM_DEFINE_TOKEN(Case, "case", Operator, Language::BootSpad) +OPENAXIOM_DEFINE_TOKEN(Catch, "catch", Keyword, Language::BootSpad) +OPENAXIOM_DEFINE_TOKEN(Cross, "cross", Operator, Language::BootSpad) +OPENAXIOM_DEFINE_TOKEN(Do, "do", Keyword, Language::BootSpad) +OPENAXIOM_DEFINE_TOKEN(Else, "else", Keyword, Language::BootSpad) +OPENAXIOM_DEFINE_TOKEN(Exists, "exists", Keyword, Language::Spad) +OPENAXIOM_DEFINE_TOKEN(Finally, "finally", Keyword, Language::Spad) +OPENAXIOM_DEFINE_TOKEN(For, "for", Keyword, Language::BootSpad) +OPENAXIOM_DEFINE_TOKEN(Forall, "forall", Keyword, Language::Spad) +OPENAXIOM_DEFINE_TOKEN(From, "from", Keyword, Language::Spad) +OPENAXIOM_DEFINE_TOKEN(Function, "function", Keyword, Language::Boot) +OPENAXIOM_DEFINE_TOKEN(Has, "has", Operator, Language::BootSpad) +OPENAXIOM_DEFINE_TOKEN(If, "if", Keyword, Language::BootSpad) +OPENAXIOM_DEFINE_TOKEN(Import, "import", Keyword, Language::BootSpad) +OPENAXIOM_DEFINE_TOKEN(In, "in", Operator, Language::BootSpad) +OPENAXIOM_DEFINE_TOKEN(Inline, "inline", Keyword, Language::Spad) +OPENAXIOM_DEFINE_TOKEN(Is, "is", Keyword, Language::BootSpad) +OPENAXIOM_DEFINE_TOKEN(Isnt, "isnt", Keyword, Language::BootSpad) +OPENAXIOM_DEFINE_TOKEN(Iterate, "iterate", Keyword, Language::BootSpad) +OPENAXIOM_DEFINE_TOKEN(Leave, "leave", Keyword, Language::BootSpad) +OPENAXIOM_DEFINE_TOKEN(Macro, "macro", Keyword, Language::BootSpad) +OPENAXIOM_DEFINE_TOKEN(Mod, "mod", Operator, Language::BootSpad) +OPENAXIOM_DEFINE_TOKEN(Namespace, "namespace", Keyword, Language::BootSpad) +OPENAXIOM_DEFINE_TOKEN(Of, "of", Keyword, Language::Boot) +OPENAXIOM_DEFINE_TOKEN(Or, "or", Operator, Language::BootSpad) +OPENAXIOM_DEFINE_TOKEN(Pretend, "pretend", Operator, Language::BootSpad) +OPENAXIOM_DEFINE_TOKEN(Quo, "quo", Operator, Language::BootSpad) +OPENAXIOM_DEFINE_TOKEN(Rem, "rem", Operator, Language::BootSpad) +OPENAXIOM_DEFINE_TOKEN(Repeat, "repeat", Keyword, Language::BootSpad) +OPENAXIOM_DEFINE_TOKEN(Return, "return", Keyword, Language::BootSpad) +OPENAXIOM_DEFINE_TOKEN(Rule, "rule", Keyword, Language::BootSpad) +OPENAXIOM_DEFINE_TOKEN(Structure, "structure", Keyword, Language::Boot) +OPENAXIOM_DEFINE_TOKEN(Then, "then", Keyword, Language::BootSpad) +OPENAXIOM_DEFINE_TOKEN(Throw, "throw", Keyword, Language::Spad) +OPENAXIOM_DEFINE_TOKEN(Try, "try", Keyword, Language::Spad) +OPENAXIOM_DEFINE_TOKEN(Until, "until", Keyword, Language::BootSpad) +OPENAXIOM_DEFINE_TOKEN(With, "with", Keyword, Language::Spad) +OPENAXIOM_DEFINE_TOKEN(Where, "where", Keyword, Language::BootSpad) +OPENAXIOM_DEFINE_TOKEN(While, "while", Keyword, Language::BootSpad) diff --git a/src/include/token.H b/src/include/token.H index ef203b12..3b3b2950 100644 --- a/src/include/token.H +++ b/src/include/token.H @@ -1,4 +1,4 @@ -// Copyright (C) 2013, Gabriel Dos Reis. +// Copyright (C) 2013-2014, Gabriel Dos Reis. // All rights reserved. // Written by Gabriel Dos Reis. // @@ -34,151 +34,556 @@ #define OPENAXIOM_TOKEN_included #include <stdint.h> +#include <stack> +#include <iosfwd> #include <open-axiom/Input> +#include <open-axiom/dialect> namespace OpenAxiom { - namespace token { - // -- Underlying representation of a token class. - using base_type = uint32_t; - - // -- 8-bit byte data type - using u8 = uint8_t; - - constexpr base_type value(u8 c) { return c; } - constexpr base_type value(u8 hi, u8 lo) { return (hi << 8) | lo; } - constexpr base_type value(u8 hi, u8 mi, u8 lo) { - return (value(hi, mi) << 8) | lo; - } - - // -- Type of literal strings of given number of characters. - template<int N> - using text_chunk = const char(&)[N+1]; - - // -- Return the token value of certain literal strings. - constexpr base_type value(text_chunk<0>) { return u8(); } - constexpr base_type value(text_chunk<1> s) { - return value(s[0]); - } - constexpr base_type value(text_chunk<2> s) { - return value(s[0], s[1]); - } - constexpr base_type value(text_chunk<3> s) { - return value(s[0], s[1], s[2]); - } - - // -- Abstract values of tokens. - enum Value : base_type { - Unknown = value(""), - Bar = value("|"), - Dot = value("."), - DotDot = value(".."), - Colon = value(":"), - ColonColon = value("::"), - ColonDash = value(":-"), - ColonEq = value(":="), - At = value("@"), - Comma = value(","), - Semicolon = value(";"), - Star = value("*"), - Plus = value("+"), - Minus = value("-"), - Slash = value("/"), - Backslash = value("\\"), - SlashSlash = value("//"), - BackslashBackslash = value("\\\\"), - BackslashSlash = value("\\/"), - SlashBackslash = value("/\\"), - Less = value("<"), - LessEq = value("<="), - Greater = value(">"), - GreaterEq = value(">="), - Eq = value("="), - EqEq = value("=="), - Tilde = value("~"), - TildeEq = value("~="), - Caret = value("^"), - Pound = value("#"), - Dollar = value("$"), - Ampersand = value("&"), - OpenParen = value("("), - CloseParen = value(")"), - OpenBracket = value("["), - CloseBracket = value("]"), - OpenBrace = value("{"), - CloseBrace = value("}"), - OpenMetParen = value("(|"), - CloseMetaParen = value("|)"), - OpenMetaBracket = value("[|"), - CloseMetaBracket = value("|]"), - OpenMetaBrace = value("{|"), - CloseMetaBrace = value("|}"), - Apostrophe = value("'"), - Backquote = value("`"), - StarStar = value("**"), - Implies = value("=>"), - RightArrow = value("->"), - LeftArrow = value("<-"), - OpenChevron = value("<<"), - CloseChevron = value(">>"), - FatArrow = value("==>"), - Equiv = value("<=>"), - MapsTo = value("+->"), - - Add = value("add"), - And = value("and"), - By = value("by"), - Do = value("do"), - For = value("for"), - Has = value("has"), - If = value("if"), - In = value("in"), - Is = value("is"), - Mod = value("mod"), - Of = value("of"), // -- Boot only - Or = value("or"), - Quo = value("quo"), - Rem = value("rem"), - Try = value("try"), - LastTrigraph = 0xffffff, - - Assume, // "assume" - Break, // "break" - Case, // "case" - Catch, // "catch" - Cross, // "cross" - Else, // "else" - Exists, // "exists" - Finally, // "finally" - From, // "from" - Forall, // "forall" - Function, // "function" -- Boot only - Import, // "import" - Inline, // "inline" - Isnt, // "isnt" - Iterate, // "iterate" - Leave, // "leave" - Macro, // "macro" - Module, // "module" -- Boot only - Namespace, // "namespace" -- Boot only - Pretend, // "pretend" - Repeat, // "repeat" - Return, // "return" - Rule, // "rule" - Structure, // "structure" -- Boot only - Then, // "then" - Throw, // "throw" - Until, // "until" - With, // "with" - Where, // "where" - While, // "while" - - IntegerLiteral, // integer literal - StringLiteral, // string literal - FPLiteral, // floating point literal - Indent, // new line indentation, greater than previous - Unindent, // new line indentation, less than previous - Justify, // align indentation with preceding line. - }; + // Categorization of Boot and Spad tokens. + enum class TokenCategory : uint8_t { + Unclassified, // token of unknown class + Whitespace, // sequence of white-space characters + Comment, // a description of an ignorable comment + Punctuator, // a punctuator character + Operator, // an operator both symbolic and alphabetic + Integer, // an integer literal + FloatingPoint, // a floating-point literal + String, // a string literal + Keyword, // a reserved word both symbolic and alphabetic + Identifier, // an identifier + Formatting, // a layout formatting token + Junk, // invalid/malformed token + EOS // end-of-token-stream indicator + }; + + std::ostream& operator<<(std::ostream&, TokenCategory); + + // The abstract value associated with a token. + enum class TokenValue : uint8_t { +#undef OPENAXIOM_DEFINE_TOKEN +#define OPENAXIOM_DEFINE_TOKEN(T, ...) T, +#include <open-axiom/token-value> +#undef OPENAXIOM_DEFINE_TOKEN + Artificial, // Tokens after this are artificial + Indent, // new line indentation, greater than previous + Unindent, // new line indentation, less than previous + Justify, // align indentation with preceding line. + + EndOfStream // end of token stream + }; + + std::ostream& operator<<(std::ostream&, TokenValue); + + // Given a symbolic or alphabetic token, retrieve its category + // and associated abstract value. + struct TokenClassification { + TokenCategory category; + TokenValue value; + + explicit operator bool() const { + return category != TokenCategory::Unclassified; + } + }; + + TokenClassification classify(const std::string&); + + // Datatypes for locating lines and columns. + using LineNumber = std::size_t; + using ColumnIndex = std::size_t; + + // -- Exception types + struct EndOfStringUnseen { + LineNumber line; + ColumnIndex column; + }; + + struct MissingExponent { + LineNumber line; + ColumnIndex column; + }; + + // Object of this datatype decompose a program fragment into a + // token stream. The tokens are of type indicated by Tok. + template<typename Frag, typename Tok> + struct TokenStream { + TokenStream(Frag& f) + : frag(f), + line(), + idx(frag.front().indent) + { + indents.push(idx); + } + + bool eos() const { + return line >= frag.size() + or (line + 1 == frag.size() and idx >= frag.back().size()); + } + + Tok get(Language = Language::Spad); + private: + Frag& frag; + std::size_t line; + std::size_t idx; + std::stack<ColumnIndex> indents; + + std::size_t line_length() const { return frag[line].size(); } + LineNumber next_line_number() const { + return line + 1 < frag.size() + ? frag[line + 1].number + : frag.back().number + 1; + } + ColumnIndex next_indentation() const { + return line + 1 < frag.size() ? frag[line + 1].indent : 0; + } + + LineNumber line_number() const { + return line < frag.size() + ? frag[line].number + : frag.back().number + 1; + } + + ColumnIndex column_number() const { + return line < frag.size() ? idx : 0; + } + + using Locus = typename Tok::Location; + Locus current_locus() { + return { line_number(), column_number() }; + } + }; + + bool separator_or_punctuator(uint8_t); + + template<typename L, typename T> + static void junk(L& line, ColumnIndex& idx, T& t) { + while (idx < line.size() and not separator_or_punctuator(line[idx])) + ++idx; + t.category = TokenCategory::Junk; + } + + template<typename L> + inline void + skip_whitespace(L& line, ColumnIndex& idx) { + while (idx < line.size() and isspace(line[idx])) + ++idx; + } + + template<typename L, typename T> + void string(L& line, ColumnIndex& idx, T& t) { + bool done = false; + bool escape = false; + while (idx < line.size() && not done) { + switch (line[idx++]) { + case '_': escape = !escape; break; + case '"': done = !escape; + // fallthrough + default: escape = false; break; + } + } + if (not done) + throw EndOfStringUnseen{ line.number, idx }; + t.category = TokenCategory::String; + } + + template<typename L> + void skip_to_end_of_integer(L& line, ColumnIndex& idx) { + while (idx < line.size() and isdigit(line[idx])) + ++idx; + } + + template<typename L, typename T> + void integer(L& line, ColumnIndex& idx, T& t) { + skip_to_end_of_integer(line, idx); + t.category = TokenCategory::Integer; + } + + template<typename L, typename T> + T& number(L& line, ColumnIndex& idx, T& t) { + integer(line, idx, t); + if (idx >= line.size() or line[idx] != '.') + return t; + if (++idx >= line.size() or not isdigit(line[idx])) { + --idx; + return t; + } + + t.category = TokenCategory::FloatingPoint; + skip_to_end_of_integer(line, idx); + if (idx >= line.size() or (line[idx] != 'e' and line[idx] != 'E')) + return t; + if (++idx < line.size() and (line[idx] == '+' or line[idx] == '-')) + ++idx; + if (idx >= line.size() or not isdigit(line[idx])) + throw MissingExponent{ line.number, idx }; + skip_to_end_of_integer(line, idx); + return t; + } + + inline bool + identifier_head(uint8_t c) { + return isalpha(c) or c == '%' or c == '_'; + } + + inline bool + identifier_part(uint8_t c) { + return identifier_head(c) or isdigit(c); + } + + inline bool + identifier_suffix(uint8_t c) { + return c == '!' or c == '?'; + } + + inline bool internal_prefix(uint8_t c) { + return c == '%' or c == '$'; + } + + template<typename L> + inline void + skip_prefix(L& line, ColumnIndex& idx, uint8_t c) { + while (idx < line.size() and line[idx] == c) + ++idx; + } + + template<typename L, typename T> + T& identifier(L& line, ColumnIndex& idx, T& t, Language dialect) { + t.category = TokenCategory::Identifier; + + ColumnIndex start = --idx; // idx was ahead by 1. + if (dialect == Language::Boot and internal_prefix(line[idx])) + skip_prefix(line, idx, line[idx]); + bool saw_escape = false; + while (idx < line.size()) { + if (not identifier_part(line[idx]) and line[idx - 1] != '_') + break; + else if (line[idx] == '_') + saw_escape = true; + ++idx; + } + while (idx < line.size() and identifier_suffix(line[idx])) + ++idx; + + if (saw_escape) + t.category = TokenCategory::Identifier; + else if (auto info = classify(line.sub_string(start, idx))) { + t.category = info.category; + t.value = info.value; + } + return t; + } + + template<typename Frag, typename Tok> + Tok TokenStream<Frag, Tok>::get(Language dialect) { + Tok t { }; + t.start = current_locus(); + + if (eos()) { + t.category = TokenCategory::EOS; + t.end = current_locus(); + return t; + } + else if (isspace(frag[line][idx])) { + skip_whitespace(frag[line], idx); + t.category = TokenCategory::Whitespace; + t.end = current_locus(); + return t; + } + else if (idx == line_length() - 1 and frag[line].back() == '_') { + ++line; + idx = frag[line].indent; + } + else if (idx == line_length()) { + auto indent = indents.top(); + auto next_indent = next_indentation(); + t.start = t.end = { next_line_number(), next_indent }; + if (indent < next_indent) { + indents.push(next_indent); + ++line; + idx = next_indent; + t.category = TokenCategory::Formatting; + t.value = TokenValue::Indent; + } + else if (indent > next_indent) { + indents.pop(); + t.category = TokenCategory::Formatting; + t.value = TokenValue::Unindent; + } + else { + ++line; + idx = next_indent; + t.category = TokenCategory::Formatting; + t.value = TokenValue::Justify; + } + return t; + } + + switch (auto c = frag[line][idx++]) { + case '#': + t.category = TokenCategory::Operator; + t.value = TokenValue::Pound; + break; + + case '@': + t.category = TokenCategory::Operator; + t.value = TokenValue::At; + break; + + case '^': + t.category = TokenCategory::Operator; + t.value = TokenValue::Caret; + break; + + case '&': + t.category = TokenCategory::Punctuator; + t.value = TokenValue::Ampersand; + break; + + case '!': + t.category = TokenCategory::Punctuator; + t.value = TokenValue::Exclamation; + break; + + case '\'': + t.category = TokenCategory::Punctuator; + t.value = TokenValue::Apostrophe; + break; + case ',': + t.category = TokenCategory::Punctuator; + t.value = TokenValue::Comma; + break; + + case ';': + t.category = TokenCategory::Punctuator; + t.value = TokenValue::Semicolon; + break; + + case '`': + t.category = TokenCategory::Punctuator; + t.value = TokenValue::Backquote; + break; + + case '(': + t.category = TokenCategory::Punctuator; + t.value = TokenValue::OpenParen; + if (idx < line_length() and frag[line][idx] == '|') { + ++idx; + t.value = TokenValue::OpenMetaParen; + } + break; + + case ')': + t.category = TokenCategory::Punctuator; + t.value = TokenValue::CloseParen; + break; + + case '{': + t.category = TokenCategory::Punctuator; + t.value = TokenValue::OpenBrace; + if (idx < line_length() and frag[line][idx] == '|') { + ++idx; + t.value = TokenValue::OpenMetaBrace; + } + break; + + case '}': + t.category = TokenCategory::Punctuator; + t.value = TokenValue::CloseBrace; + break; + + case '[': + t.category = TokenCategory::Punctuator; + t.value = TokenValue::OpenBracket; + if (idx < line_length() and frag[line][idx] == '|') { + ++idx; + t.value = TokenValue::OpenMetaBracket; + } + break; + + case ']': + t.category = TokenCategory::Punctuator; + t.value = TokenValue::CloseBracket; + break; + + case ':': + t.category = TokenCategory::Operator; + t.value = TokenValue::Colon; + if (idx < line_length()) + switch (frag[line][idx]) { + case ':': t.value = TokenValue::ColonColon; ++idx; break; + case '=': t.value = TokenValue::ColonEq; ++idx; break; + case '-': t.value = TokenValue::ColonDash; ++idx; break; + default: break; + } + break; + + case '*': + t.category = TokenCategory::Operator; + t.value = TokenValue::Star; + if (idx < line_length() and frag[line][idx] == '*') { + t.value = TokenValue::StarStar; + ++idx; + } + break; + + case '/': + t.category = TokenCategory::Operator; + t.value = TokenValue::Slash; + if (idx < line_length()) + switch (frag[line][idx]) { + case '/': t.value = TokenValue::SlashSlash; ++idx; break; + case '\\': t.value = TokenValue::SlashBackslash; ++idx; break; + default: break; + } + break; + + case '\\': + t.category = TokenCategory::Operator; + t.value = TokenValue::Backslash; + if (idx < line_length()) + switch (frag[line][idx]) { + case '\\': t.value = TokenValue::BackslashBackslash; ++idx; break; + case '/': t.value = TokenValue::BackslashSlash; ++idx; break; + default: break; + } + break; + + case '<': + t.category = TokenCategory::Operator; + t.value = TokenValue::Less; + if (idx < line_length()) + switch (frag[line][idx]) { + case '-': t.value = TokenValue::LeftArrow; ++idx; break; + case '<': t.value = TokenValue::OpenChevron; ++idx; break; + case '=': + t.value = TokenValue::LessEq; + if (++idx < line_length() and frag[line][idx] == '>') { + t.value = TokenValue::Equiv; + ++idx; + } + break; + default: break; + } + break; + + case '=': + t.category = TokenCategory::Operator; + t.value = TokenValue::Eq; + if (idx < line_length()) + switch (frag[line][idx]) { + case '>': t.value = TokenValue::Implies; ++idx; break; + case '=': + t.value = TokenValue::EqEq; + if (++idx < line_length() and frag[line][idx] == '>') { + t.value = TokenValue::FatArrow; + ++idx; + } + break; + default: break; + } + break; + + case '~': + t.category = TokenCategory::Operator; + t.value = TokenValue::Tilde; + if (idx < line_length() and frag[line][idx] == '=') { + t.value = TokenValue::TildeEq; + ++idx; + } + break; + + case '>': + t.category = TokenCategory::Operator; + t.value = TokenValue::Greater; + if (idx < line_length()) + switch (frag[line][idx]) { + case '=': t.value = TokenValue::GreaterEq; ++idx; break; + case '>': t.value = TokenValue::CloseChevron; ++idx; break; + } + break; + + case '|': + t.category = TokenCategory::Operator; + t.value = TokenValue::Bar; + if (idx < line_length()) + switch (frag[line][idx]) { + case ']': t.value = TokenValue::CloseMetaBracket; ++idx; break; + case '}': t.value = TokenValue::CloseMetaBrace; ++idx; break; + case ')': t.value = TokenValue::CloseMetaParen; ++idx; break; + default: break; + } + break; + + case '-': + t.category = TokenCategory::Operator; + t.value = TokenValue::Minus; + if (idx < line_length()) + switch (frag[line][idx]) { + case '>': t.value = TokenValue::RightArrow; ++idx; break; + case '-': + t.category = TokenCategory::Comment; + t.value = TokenValue::Wisecrack; + idx = frag[line].size(); + break; + } + break; + + case '+': + t.category = TokenCategory::Operator; + t.value = TokenValue::Plus; + if (idx < line_length()) + switch (frag[line][idx]) { + case '+': + t.category = TokenCategory::Comment; + t.value = TokenValue::Commentary; + idx = frag[line].size(); + break; + case '-': + if (idx + 1 < line_length() and frag[line][idx+1] == '>') { + t.value = TokenValue::MapsTo; + idx += 2; + } + break; + default: break; + } + break; + + case '.': + t.category = TokenCategory::Punctuator; + t.value = TokenValue::Dot; + if (idx < line_length() and frag[line][idx] == '.') { + t.category = TokenCategory::Operator; + t.value = TokenValue::DotDot; + ++idx; + } + break; + + case '"': + string(frag[line], idx, t); + break; + + case '$': + if (dialect != Language::Boot or idx >= line_length() + or separator_or_punctuator(frag[line][idx])) { + t.category = TokenCategory::Operator; + t.value = TokenValue::Dollar; + } + else + identifier(frag[line], idx, t, dialect); + break; + + default: + if (isdigit(c)) + number(frag[line], idx, t); + else if (identifier_head(c)) + identifier(frag[line], idx, t, dialect); + else + junk(frag[line], idx, t); + break; + } + + t.end = { frag[line].number, idx }; + return t; } } diff --git a/src/interp/simpbool.boot b/src/interp/simpbool.boot index 3432317d..e904869a 100644 --- a/src/interp/simpbool.boot +++ b/src/interp/simpbool.boot @@ -175,7 +175,7 @@ list3 l == orDel(a,l) == l is [h,:t] => a = h => t - ?ORDER(a,h) => [a,:l] + _?ORDER(a,h) => [a,:l] [h,:orDel(a,t)] [a] ordList l == @@ -185,7 +185,7 @@ ordUnion(a,b) == a isnt [c,:r] => b b isnt [d,:s] => a c=d => [c,:ordUnion(r,s)] - ?ORDER(a,b) => [c,:ordUnion(r,b)] + _?ORDER(a,b) => [c,:ordUnion(r,b)] [d,:ordUnion(s,a)] ordIntersection(a,b) == a isnt [h,:t] => nil diff --git a/src/syntax/token.cc b/src/syntax/token.cc index b1db7ec1..fe3cd7a7 100644 --- a/src/syntax/token.cc +++ b/src/syntax/token.cc @@ -31,58 +31,82 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include <open-axiom/token> +#include <ostream> +#include <iostream> namespace OpenAxiom { - namespace token { - struct Keyword { - const char* const key; - const Value value; - }; + std::ostream& + operator<<(std::ostream& os, TokenCategory tc) { + switch (tc) { + case TokenCategory::Unclassified: os << "UNCLASSIFIED"; break; + case TokenCategory::Whitespace: os << "WHITESPACE"; break; + case TokenCategory::Comment: os << "COMMENT"; break; + case TokenCategory::Punctuator: os << "PUNCTUATOR"; break; + case TokenCategory::Operator: os << "OPERATOR"; break; + case TokenCategory::Integer: os << "INTEGER"; break; + case TokenCategory::FloatingPoint: os << "FLOATINGPOINT"; break; + case TokenCategory::String: os << "STRING"; break; + case TokenCategory::Keyword: os << "KEYWORD"; break; + case TokenCategory::Identifier: os << "IDENTIFIER"; break; + case TokenCategory::Formatting: os << "FORMATTING"; break; + case TokenCategory::Junk: os << "JUNK"; break; + default: os << "????"; break; + } + return os; + } + - const Keyword keyword_map[] = { - { "add", Value::Add }, - { "and", Value::And }, - { "assume", Value::Assume }, - { "break", Value::Break }, - { "by", Value::By }, - { "case", Value::Case }, - { "catch", Value::Catch }, - { "do", Value::Do }, - { "else", Value::Else }, - { "exist", Value::Exists }, - { "finally", Value::Finally }, - { "for", Value::For }, - { "from", Value::From }, - { "function", Value::Function }, - { "has", Value::Has }, - { "if", Value::If }, - { "import", Value::Import }, - { "in", Value::In }, - { "inline", Value::Inline }, - { "is", Value::Is }, - { "isnt", Value::Isnt }, - { "iterate", Value::Iterate }, - { "leave", Value::Leave }, - { "macro", Value::Macro }, - { "mod", Value::Mod }, - { "module", Value::Module }, - { "namespace", Value::Namespace }, - { "of", Value::Of }, - { "or", Value::Or }, - { "pretend", Value::Pretend }, - { "quo", Value::Quo }, - { "rem", Value::Rem }, - { "repeat", Value::Repeat }, - { "return", Value::Return }, - { "rule", Value::Rule }, - { "structure", Value::Structure }, - { "then", Value::Then }, - { "throw", Value::Throw }, - { "try", Value::Try }, - { "until", Value::Until }, - { "with", Value::With }, - { "where", Value::Where }, - { "while", Value::While } + bool separator_or_punctuator(uint8_t c) { + switch (c) { + case '.': case '`': case '^': case '&': case '~': case '*': + case '-': case '+': case ';': case ',': case '@': case '|': + case '\'': case ':': case '=': case '\\': case '"': case '/': + case '(': case ')': case '{': case '}': case '[': case ']': + case '<': case '>': case '#': case ' ': + return true; + default: + return false; + } + } + + namespace { + struct TokenMapEntry { + const char* const text; + const TokenCategory category; + const TokenValue value; + const Language dialect; // = Language::Spad }; } + + const TokenMapEntry token_map[] { +#undef OPENAXIOM_DEFINE_TOKEN +#define OPENAXIOM_DEFINE_TOKEN(T, N, C, ...) \ + { N, TokenCategory::C, TokenValue::T, __VA_ARGS__ }, +#include <open-axiom/token-value> +#undef OPENAXIOM_DEFINE_TOKEN + }; + + TokenClassification + classify(const std::string& s) { + for (auto& t : token_map) { + if (t.text == s) + return { t.category, t.value }; + } + return { TokenCategory::Unclassified, TokenValue::Unknown }; + } + + std::ostream& + operator<<(std::ostream& os, TokenValue tv) { + if (tv < TokenValue::Artificial) + os << token_map[uint8_t(tv)].text; + else switch (tv) { + case TokenValue::Indent: os << "%INDENT"; break; + case TokenValue::Unindent: os << "%UNIDENT"; break; + case TokenValue::Justify: os << "%JUSTIFY"; break; + default: os << "%ALIEN"; break; + } + + return os; + } + } diff --git a/src/utils/Makefile.in b/src/utils/Makefile.in index 653b3cb5..3f70fda8 100644 --- a/src/utils/Makefile.in +++ b/src/utils/Makefile.in @@ -36,7 +36,6 @@ hammer_SOURCES = hammer.cc hammer_OBJECTS = $(hammer_SOURCES:.cc=.lo) hammer_LDADD = -L. -lOpenAxiom -libOpenAxiom_HEADERS = hash-table.H string-pool.H libOpenAxiom_SOURCES = \ storage.cc string-pool.cc command.cc \ filesystem.cc @@ -75,37 +74,18 @@ $(oa_target_oalib): libOpenAxiom.$(LIBEXT) fi cp $< $@ -stamp-headers: $(libOpenAxiom_HEADERS) Makefile - rm -f stamp-headers - if [ ! -d $(oa_target_headerdir) ]; then \ - mkdir -p -- $(oa_target_headerdir) || exit 1; \ - fi ; \ - c1=$(top_builddir)/config/openaxiom-c-macros.h; \ - c2=$(oa_target_headerdir)/config; \ - if [ ! -r $$c2 ] || ! cmp -s $$c1 $$c2 ; then \ - cp -p $$c1 $$c2; \ - fi ; \ - for h in $(oa_public_headers); do \ - f1=$(srcdir)/$$h.H; f2=$(oa_target_headerdir)/$$h; \ - if [ ! -r $$f2 ] || ! cmp -s $$f1 $$f2 ; then \ - cp -p $$f1 $$f2; \ - fi; \ - done ; \ - $(STAMP) stamp-headers - hammer$(EXEEXT): $(hammer_OBJECTS) libOpenAxiom.$(LIBEXT) $(LINK) -o $@ $(hammer_OBJECTS) $(hammer_LDADD) $(LDFLAGS) libOpenAxiom.$(LIBEXT): $(libOpenAxiom_OBJECTS) $(LINK) -o $@ $(libOpenAxiom_OBJECTS) -%.lo: %.cc stamp-headers +%.lo: %.cc $(COMPILE) ${CXXFLAGS} $(oa_include_flags) -o $@ $< mostlyclean-local: @rm -rf .libs - @rm -rf $(oa_include_headerdir) stamp-headers @rm -f $(oa_target_oalib) @rm -f $(libOpenAxiom_OBJECTS) @rm -f *~ core |