aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog5
-rw-r--r--config/open-axiom.m420
-rw-r--r--config/openaxiom-c-macros.h.in3
-rwxr-xr-xconfigure91
-rw-r--r--configure.ac2
-rw-r--r--src/utils/Makefile.in4
-rw-r--r--src/utils/hammer.cc16
-rw-r--r--src/utils/hash-table.H82
-rw-r--r--src/utils/sexpr.H385
-rw-r--r--src/utils/sexpr.cc641
-rw-r--r--src/utils/storage.H226
-rw-r--r--src/utils/storage.cc37
-rw-r--r--src/utils/string-pool.H88
-rw-r--r--src/utils/string-pool.cc96
14 files changed, 1674 insertions, 22 deletions
diff --git a/ChangeLog b/ChangeLog
index 384da487..8ec27773 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+2010-08-24 Gabriel Dos Reis <gdr@cs.tamu.edu>
+
+ * config/open-axiom.m4 (OPENAXIOM_ALIGNMENT_OPERATOR): New. Check
+ for alignment operator.
+
2010-08-20 Gabriel Dos Reis <gdr@cs.tamu.edu>
External tool noweb is no longer required.
diff --git a/config/open-axiom.m4 b/config/open-axiom.m4
index a2e26e8b..d7de600a 100644
--- a/config/open-axiom.m4
+++ b/config/open-axiom.m4
@@ -1010,10 +1010,30 @@ if test x"$ac_cv_header_sys_mman_h" = xyes; then
fi
])
+dnl ----------------------------------
+dnl -- OPENAXIOM_ALIGNMENT_OPERATOR --
+dnl ----------------------------------
+dnl Check that the C/C++ compiler understand
+dnl alignment operator, i.e. either `alignof',
+dnl or vendor lock-ins such as `__alignof'.
+AC_DEFUN([OPENAXIOM_ALIGNMENT_OPERATOR],[
+AC_MSG_CHECKING([name of alignment query operator])
+oa_alignment=
+AC_COMPILE_IFELSE([int a = __alignof(int);],
+ [oa_alignment="__alignof"],
+ [AC_COMPILE_IFELSE([int a = alignof(int);],
+ [oa_alignment="alignof"],
+ [AC_MSG_ERROR([C/C++ compiler does not support alignment query operator])])])
+AC_DEFINE_UNQUOTED([openaxiom_alignment],[$oa_alignment],
+ [Alignment query operator])
+AC_MSG_RESULT([$oa_alignment])
+])
+
dnl --------------------------
dnl -- OPENAXIOM_CHECK_MISC --
dnl --------------------------
AC_DEFUN([OPENAXIOM_CHECK_MISC],[
+OPENAXIOM_ALIGNMENT_OPERATOR
case $GCC in
yes)
CFLAGS="$CFLAGS -O2 -Wall"
diff --git a/config/openaxiom-c-macros.h.in b/config/openaxiom-c-macros.h.in
index c92cc8ac..41b47835 100644
--- a/config/openaxiom-c-macros.h.in
+++ b/config/openaxiom-c-macros.h.in
@@ -207,6 +207,9 @@
if such a type exists, and if the system does not define it. */
#undef intptr_t
+/* Alignment query operator */
+#undef openaxiom_alignment
+
/* Define to the type of an unsigned integer type of width exactly 16 bits if
such a type exists and the standard includes do not define it. */
#undef uint16_t
diff --git a/configure b/configure
index 02574f0c..d12ab19f 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.63 for OpenAxiom 1.4.0-2010-08-20.
+# Generated by GNU Autoconf 2.63 for OpenAxiom 1.4.0-2010-08-24.
#
# Report bugs to <open-axiom-bugs@lists.sf.net>.
#
@@ -745,8 +745,8 @@ SHELL=${CONFIG_SHELL-/bin/sh}
# Identity of this package.
PACKAGE_NAME='OpenAxiom'
PACKAGE_TARNAME='openaxiom'
-PACKAGE_VERSION='1.4.0-2010-08-20'
-PACKAGE_STRING='OpenAxiom 1.4.0-2010-08-20'
+PACKAGE_VERSION='1.4.0-2010-08-24'
+PACKAGE_STRING='OpenAxiom 1.4.0-2010-08-24'
PACKAGE_BUGREPORT='open-axiom-bugs@lists.sf.net'
ac_unique_file="src/Makefile.pamphlet"
@@ -1509,7 +1509,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
-\`configure' configures OpenAxiom 1.4.0-2010-08-20 to adapt to many kinds of systems.
+\`configure' configures OpenAxiom 1.4.0-2010-08-24 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1579,7 +1579,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
- short | recursive ) echo "Configuration of OpenAxiom 1.4.0-2010-08-20:";;
+ short | recursive ) echo "Configuration of OpenAxiom 1.4.0-2010-08-24:";;
esac
cat <<\_ACEOF
@@ -1687,7 +1687,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
-OpenAxiom configure 1.4.0-2010-08-20
+OpenAxiom configure 1.4.0-2010-08-24
generated by GNU Autoconf 2.63
Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
@@ -1701,7 +1701,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
-It was created by OpenAxiom $as_me 1.4.0-2010-08-20, which was
+It was created by OpenAxiom $as_me 1.4.0-2010-08-24, which was
generated by GNU Autoconf 2.63. Invocation command line was
$ $0 $@
@@ -22193,6 +22193,79 @@ _ACEOF
fi
+
+{ $as_echo "$as_me:$LINENO: checking name of alignment query operator" >&5
+$as_echo_n "checking name of alignment query operator... " >&6; }
+oa_alignment=
+cat >conftest.$ac_ext <<_ACEOF
+int a = __alignof(int);
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_compile") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest.$ac_objext; then
+ oa_alignment="__alignof"
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ cat >conftest.$ac_ext <<_ACEOF
+int a = alignof(int);
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_compile") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest.$ac_objext; then
+ oa_alignment="alignof"
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ { { $as_echo "$as_me:$LINENO: error: C/C++ compiler does not support alignment query operator" >&5
+$as_echo "$as_me: error: C/C++ compiler does not support alignment query operator" >&2;}
+ { (exit 1); exit 1; }; }
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+
+cat >>confdefs.h <<_ACEOF
+#define openaxiom_alignment $oa_alignment
+_ACEOF
+
+{ $as_echo "$as_me:$LINENO: result: $oa_alignment" >&5
+$as_echo "$oa_alignment" >&6; }
+
case $GCC in
yes)
CFLAGS="$CFLAGS -O2 -Wall"
@@ -22651,7 +22724,7 @@ exec 6>&1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
-This file was extended by OpenAxiom $as_me 1.4.0-2010-08-20, which was
+This file was extended by OpenAxiom $as_me 1.4.0-2010-08-24, which was
generated by GNU Autoconf 2.63. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -22714,7 +22787,7 @@ Report bugs to <bug-autoconf@gnu.org>."
_ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_version="\\
-OpenAxiom config.status 1.4.0-2010-08-20
+OpenAxiom config.status 1.4.0-2010-08-24
configured by $0, generated by GNU Autoconf 2.63,
with options \\"`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`\\"
diff --git a/configure.ac b/configure.ac
index b4fe9057..1a7aafe7 100644
--- a/configure.ac
+++ b/configure.ac
@@ -37,7 +37,7 @@ dnl Most of the macros used in this configure.ac are defined in files
dnl located in the subdirectory config/
sinclude(config/open-axiom.m4)
sinclude(config/aclocal.m4)
-AC_INIT([OpenAxiom], [1.4.0-2010-08-20],
+AC_INIT([OpenAxiom], [1.4.0-2010-08-24],
[open-axiom-bugs@lists.sf.net])
AC_CONFIG_AUX_DIR(config)
diff --git a/src/utils/Makefile.in b/src/utils/Makefile.in
index 024af5dc..b3adb6b9 100644
--- a/src/utils/Makefile.in
+++ b/src/utils/Makefile.in
@@ -36,8 +36,8 @@ hammer_SOURCES = hammer.cc
hammer_OBJECTS = $(hammer_SOURCES:.cc=.lo)
hammer_LDADD = -L. -lOpenAxiom
-libOpenAxiom_HEADERS = storage.H
-libOpenAxiom_SOURCES = storage.cc
+libOpenAxiom_HEADERS = storage.H hash-table.H string-pool.H sexpr.H
+libOpenAxiom_SOURCES = storage.cc string-pool.cc sexpr.cc
libOpenAxiom_OBJECTS = $(libOpenAxiom_SOURCES:.cc=.lo)
.PHONY: all all-ax all-utils
diff --git a/src/utils/hammer.cc b/src/utils/hammer.cc
index 17c5cc73..35ef5c1f 100644
--- a/src/utils/hammer.cc
+++ b/src/utils/hammer.cc
@@ -29,6 +29,15 @@
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+// --% Author: Gabriel Dos Reis.
+// --% Description:
+// --% This program implements basic functionalities for untangling
+// --% algebra source code from the pamphlets. The syntax is that
+// --% of `noweb'. A chunk definition starts with a pattern
+// --% <<name>>= on a line by itself, and ends with `@' by itself
+// --% on a line. A chunk can refer to another chunk through
+// --% a pattern of the form `<<name>>'.
+
#include <string.h>
#include <stdlib.h>
#include <utility>
@@ -41,13 +50,6 @@
#include <map>
#include "storage.H"
-// -- This program implements basic functionalities for untangling
-// -- algebra source code from the pamphlets. The syntax is that
-// -- of `noweb'. A chunk definition starts with a pattern
-// -- <<name>>= on a line by itself, and ends with `@' by itself
-// -- on a line. A chunk can refer to another chunk through
-// -- a pattern of the form `<<name>>'.
-
namespace OpenAxiom {
namespace Hammer {
// -------------
diff --git a/src/utils/hash-table.H b/src/utils/hash-table.H
new file mode 100644
index 00000000..75b485ff
--- /dev/null
+++ b/src/utils/hash-table.H
@@ -0,0 +1,82 @@
+// Copyright (C) 2010, Gabriel Dos Reis.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// - Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in
+// the documentation and/or other materials provided with the
+// distribution.
+//
+// - Neither the name of The Numerical Algorithms Group Ltd. nor the
+// names of its contributors may be used to endorse or promote products
+// derived from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+// OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef OPENAXIOM_HASH_TABLE_INCLUDED
+#define OPENAXIOM_HASH_TABLE_INCLUDED
+
+// --% Author: Gabriel Dos Reis.
+// --% Description:
+// --% Simple hash table facility. To be replaced by C++0x
+// --% hash tables when C++0x compilers become common place.
+
+#include "storage.H"
+
+namespace OpenAxiom {
+ // --------------------
+ // -- HashTableEntry --
+ // --------------------
+ // Datatype for entries in a parameterized hash table.
+ // The type parameter is required to be a value-construcitble datatype.
+ template<typename T>
+ struct HashTableEntry : T {
+ HashTableEntry* chain; // previous item in the same bucket chain
+ size_t hash; // hash code of stored data
+ };
+
+ // --------------------
+ // -- BasicHashTable --
+ // --------------------
+ // A simple hash table data structure. Ideally, we would like to use
+ // standard C++ types, but hash tables were only in a C++ 2003 TR,
+ // officially part of C++0x standard library. We still don't have
+ // wide-spread C++0x compilers.
+ template<typename T>
+ struct BasicHashTable : private Memory::Arena<HashTableEntry<T> > {
+ typedef HashTableEntry<T> EntryType;
+ explicit BasicHashTable(size_t n)
+ : Memory::Arena<HashTableEntry<T> >(n),
+ buckets(this->allocate(n)), nbuckets(n) { }
+
+ EntryType* hash_chain(size_t h) const {
+ return buckets + (h % nbuckets);
+ }
+
+ EntryType* new_bucket() {
+ return this->allocate(1);
+ }
+
+ private:
+ HashTableEntry<T>* const buckets;
+ const size_t nbuckets;
+ };
+}
+
+#endif // OPENAXIOM_HASH_TABLE_INCLUDED
diff --git a/src/utils/sexpr.H b/src/utils/sexpr.H
new file mode 100644
index 00000000..5139b453
--- /dev/null
+++ b/src/utils/sexpr.H
@@ -0,0 +1,385 @@
+// Copyright (C) 2010, Gabriel Dos Reis.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// - Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in
+// the documentation and/or other materials provided with the
+// distribution.
+//
+// - Neither the name of The Numerical Algorithms Group Ltd. nor the
+// names of its contributors may be used to endorse or promote products
+// derived from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+// OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef OPENAXIOM_SEXPR_INCLUDED
+#define OPENAXIOM_SEXPR_INCLUDED
+
+// --% Author: Gabriel Dos Reis.
+// --% Description:
+// --% A simple support for s-expressions. By design, no ambition
+// --% for full-fledged Common Lisp reader capability. Rather,
+// --% the aim is a simple data structure for exchanging data
+// --% between several components of the OpenAxiom system.
+// --% Users interested in fullblown Lisp syntax should seek
+// --% to acquire Lisp systems, many of which are freely available.
+
+#include <iosfwd>
+#include <vector>
+#include <set>
+#include "string-pool.H"
+
+// Helpers for defining token type values for lexeme with more
+// than characters.
+#define OPENAXIOM_SEXPR_TOKEN1(C) (C)
+#define OPENAXIOM_SEXPR_TOKEN2(C1,C2) (C1 * 256 + C2)
+
+namespace OpenAxiom {
+ namespace Sexpr {
+ // -----------
+ // -- Token --
+ // -----------
+ struct Token {
+ enum Type {
+ unknown, // unidentified token
+ dot = OPENAXIOM_SEXPR_TOKEN1('.'), // "."
+ comma = OPENAXIOM_SEXPR_TOKEN1(','), // ","
+ open_paren = OPENAXIOM_SEXPR_TOKEN1('('), // "("
+ close_paren = OPENAXIOM_SEXPR_TOKEN1(')'), // ")"
+ apostrophe = OPENAXIOM_SEXPR_TOKEN1('\''), // "'"
+ backquote = OPENAXIOM_SEXPR_TOKEN1('`'), // "`"
+ backslash = OPENAXIOM_SEXPR_TOKEN1('\\'), // "\\"
+ sharp_open_paren = OPENAXIOM_SEXPR_TOKEN2('#','('), // "#("
+ sharp_apostrophe = OPENAXIOM_SEXPR_TOKEN2('#','\''), // "#'"
+ sharp_colon = OPENAXIOM_SEXPR_TOKEN2('#',':'), // "#:"
+ digraph_end = OPENAXIOM_SEXPR_TOKEN2(256,256),
+ integer, // integer literal
+ string, // string literal
+ identifier, // plain identifier
+ sharp_integer_equal, // anchor definition, #n=<form>
+ sharp_integer_sharp // back reference, #n#
+ };
+
+ Type type; // class of this token
+ BasicString lexeme; // characters making up this token
+ };
+
+ // Print a token object on an output stream.
+ // Note: this function is for debugging purpose; in particular
+ // it does not `prettyprint' tokens.
+ std::ostream& operator<<(std::ostream&, const Token&);
+
+ // -----------
+ // -- Lexer --
+ // -----------
+ // An object of this type transforms a sequence of characters
+ // into a sequence of tokens as defined above.
+ // A lexer does not manage memory itself. Rather, it delegates
+ // storage allocation for lexemes and tokens to specialized
+ // agents used to construct it.
+ struct Lexer {
+ Lexer(StringPool& pool, std::vector<Token>& toks)
+ : strings(pool), tokens(toks) { }
+
+ const char* tokenize(const char*, const char*);
+
+ private:
+ StringPool& strings; // where to allocate lexemes from
+ std::vector<Token>& tokens; // where to deposite tokens.
+ };
+
+ // ------------
+ // -- Syntax --
+ // ------------
+ struct Syntax {
+ struct Visitor; // base class of syntax visitors
+ virtual void accept(Visitor&) const = 0;
+ };
+
+ // ----------
+ // -- Atom --
+ // ----------
+ // An atom is a syntax object consisting of exatly one token.
+ // This should not be confused with the notion of atom
+ // in Lisp languages.
+ struct Atom : Syntax {
+ const Token& token() const { return tok; }
+ BasicString lexeme() const { return tok.lexeme; }
+ void accept(Visitor&) const;
+ protected:
+ const Token tok;
+ Atom(const Token&);
+ };
+
+ // -------------
+ // -- Integer --
+ // -------------
+ struct Integer : Atom {
+ explicit Integer(const Token&);
+ void accept(Visitor&) const;
+ };
+
+ // ------------
+ // -- String --
+ // ------------
+ struct String : Atom {
+ explicit String(const Token&);
+ void accept(Visitor&) const;
+ };
+
+ // ------------
+ // -- Symbol --
+ // ------------
+ struct Symbol : Atom {
+ enum Kind {
+ uninterned, // uninterned symbol
+ ordinary, // an interned symbol
+ keyword // a keyword symbol
+ };
+ Symbol(const Token&, Kind);
+ Kind kin() const { return sort; }
+ void accept(Visitor&) const;
+ private:
+ const Kind sort;
+ };
+
+ // ---------------
+ // -- Reference --
+ // ---------------
+ struct Reference : Atom {
+ Reference(const Token&, size_t);
+ size_t tag() const { return pos; }
+ void accept(Visitor&) const;
+ private:
+ const size_t pos;
+ };
+
+ // ------------
+ // -- Anchor --
+ // ------------
+ struct Anchor : Syntax {
+ Anchor(size_t, const Syntax*);
+ size_t ref() const { return tag; }
+ const Syntax* value() const { return val; }
+ void accept(Visitor&) const;
+ private:
+ const size_t tag;
+ const Syntax* const val;
+ };
+
+ // -----------
+ // -- Quote --
+ // -----------
+ struct Quote : Syntax {
+ explicit Quote(const Syntax*);
+ const Syntax* body() const { return form; }
+ void accept(Visitor&) const;
+ private:
+ const Syntax* const form;
+ };
+
+ // --------------
+ // -- Function --
+ // --------------
+ struct Function : Syntax {
+ explicit Function(const Syntax*);
+ const Syntax* code() const { return form; }
+ void accept(Visitor&) const;
+ private:
+ const Syntax* const form;
+ };
+
+ // ----------
+ // -- Pair --
+ // ----------
+ struct Pair : Syntax {
+ Pair(const Syntax*, const Syntax*);
+ const Syntax* first() const { return elts.first; }
+ const Syntax* second() const { return elts.second; }
+ void accept(Visitor&) const;
+ private:
+ const std::pair<const Syntax*, const Syntax*> elts;
+ };
+
+ // ----------
+ // -- List --
+ // ----------
+ struct List : Syntax, private std::vector<const Syntax*> {
+ typedef std::vector<const Syntax*> base;
+ using base::const_iterator;
+ using base::begin;
+ using base::end;
+ using base::size;
+ using base::empty;
+
+ List();
+ explicit List(const base&);
+ ~List();
+ void accept(Visitor&) const;
+ };
+
+ // ------------
+ // -- Vector --
+ // ------------
+ struct Vector : Syntax, private std::vector<const Syntax*> {
+ typedef std::vector<const Syntax*> base;
+ using base::const_iterator;
+ using base::begin;
+ using base::end;
+ using base::size;
+ using base::operator[];
+ using base::empty;
+
+ Vector();
+ explicit Vector(const base&);
+ ~Vector();
+ void accept(Visitor&) const;
+ };
+
+ // ---------------------
+ // -- Syntax::Visitor --
+ // ---------------------
+ struct Syntax::Visitor {
+ virtual void visit(const Atom&) = 0;
+ virtual void visit(const Integer&);
+ virtual void visit(const String&);
+ virtual void visit(const Symbol&);
+ virtual void visit(const Reference&);
+ virtual void visit(const Anchor&) = 0;
+ virtual void visit(const Quote&) = 0;
+ virtual void visit(const Function&) = 0;
+ virtual void visit(const Pair&) = 0;
+ virtual void visit(const List&) = 0;
+ virtual void visit(const Vector&) = 0;
+ };
+
+ // ---------------
+ // -- Allocator --
+ // ---------------
+
+ // The next two classes are helper classes for the main
+ // allocation class Allocator. We use std::set as allocator
+ // that guarantee uuniqueness of atomic syntax object with
+ // respect to the constituent token. That container needs
+ // a relational comparator. In an ideal world, this class
+ // should not exist.
+ struct SyntaxComparator {
+ bool operator()(const Atom& lhs, const Atom& rhs) const {
+ return std::less<BasicString>()(lhs.lexeme(), rhs.lexeme());
+ }
+
+ bool operator()(const Quote& lhs, const Quote& rhs) const {
+ return std::less<const void*>()(lhs.body(), rhs.body());
+ }
+
+ bool operator()(const Anchor& lhs, const Anchor& rhs) const {
+ return std::less<size_t>()(lhs.ref(), rhs.ref());
+ }
+
+ bool operator()(const Function& lhs, const Function& rhs) const {
+ return std::less<const void*>()(lhs.code(), rhs.code());
+ }
+
+ bool operator()(const Pair& lhs, const Pair& rhs) const {
+ std::less<const void*> cmp;
+ if (cmp(lhs.first(), rhs.first()))
+ return true;
+ if (cmp(rhs.first(), lhs.first()))
+ return false;
+ return cmp(lhs.second(), rhs.second());
+ }
+ };
+
+ template<typename T>
+ struct UniqueAllocator : std::set<T, SyntaxComparator> {
+ typedef std::set<T, SyntaxComparator> base;
+ typedef typename base::const_iterator const_iterator;
+
+ template<typename U>
+ const T* allocate(const U& u) {
+ return &*this->insert(T(u)).first;
+ }
+
+ template<typename U, typename V>
+ const T* allocate(const U& u, const V& v) {
+ return &*this->insert(T(u, v)).first;
+ }
+ };
+
+ // Allocator of syntax objects.
+ struct Allocator {
+ Allocator();
+ ~Allocator();
+
+ const Integer* make_integer(const Token&);
+ const String* make_string(const Token&);
+ const Symbol* make_symbol(const Token&, Symbol::Kind);
+ const Reference* make_reference(const Token&, size_t);
+ const Anchor* make_anchor(size_t, const Syntax*);
+ const Quote* make_quote(const Syntax*);
+ const Function* make_function(const Syntax*);
+ const Pair* make_pair(const Syntax*, const Syntax*);
+ const List* make_list(const std::vector<const Syntax*>&);
+ const Vector* make_vector(const std::vector<const Syntax*>&);
+
+ private:
+ UniqueAllocator<Integer> ints;
+ UniqueAllocator<String> strs;
+ UniqueAllocator<Symbol> syms;
+ UniqueAllocator<Anchor> ancs;
+ UniqueAllocator<Reference> refs;
+ UniqueAllocator<Quote> quotes;
+ UniqueAllocator<Function> funs;
+ UniqueAllocator<Pair> pairs;
+ Memory::Arena<List> lists;
+ Memory::Arena<Vector> vectors;
+ List empty_list;
+ Vector empty_vector;
+ };
+
+ // ------------
+ // -- Parser --
+ // ------------
+ // An object of this type transforms a sequence of tokens
+ // into a sequence of syntax objects.
+ // A parser object does not manage memory itself. Rather, it delegates
+ // storage allocation for syntax objects to specialized
+ // agents used to construct it.
+ struct Parser {
+ Parser(Allocator&, std::vector<const Syntax*>&);
+ const Token* parse(const Token*, const Token*);
+ private:
+ Allocator& alloc;
+ std::vector<const Syntax*>& syns;
+
+ const Symbol* parse_symbol(const Token*&, const Token*);
+ const Anchor* parse_anchor(const Token*&, const Token*);
+ const Reference* parse_reference(const Token*&, const Token*);
+ const Symbol* parse_uninterned(const Token*&, const Token*);
+ const Function* parse_function(const Token*&, const Token*);
+ const Quote* parse_quote(const Token*&, const Token*);
+ const Vector* parse_vector(const Token*&, const Token*);
+ const Syntax* parse_list_or_pair(const Token*&, const Token*);
+ const Syntax* parse_syntax(const Token*&, const Token*);
+ };
+ }
+}
+
+#endif // OPENAXIOM_SEXPR_INCLUDED
diff --git a/src/utils/sexpr.cc b/src/utils/sexpr.cc
new file mode 100644
index 00000000..d9d38220
--- /dev/null
+++ b/src/utils/sexpr.cc
@@ -0,0 +1,641 @@
+// Copyright (C) 2010, Gabriel Dos Reis.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// - Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in
+// the documentation and/or other materials provided with the
+// distribution.
+//
+// - Neither the name of The Numerical Algorithms Group Ltd. nor the
+// names of its contributors may be used to endorse or promote products
+// derived from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+// OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// --% Author: Gabriel Dos Reis.
+
+#include <ctype.h>
+#include <iostream>
+#include <iterator>
+#include "sexpr.H"
+
+namespace OpenAxiom {
+ namespace Sexpr {
+ std::ostream&
+ operator<<(std::ostream& os, const Token& t) {
+ switch (t.type) {
+ case Token::dot: os << "DOT"; break;
+ case Token::comma: os << "COMMA"; break;
+ case Token::open_paren: os << "OPEN_PAREN"; break;
+ case Token::close_paren: os << "CLOSE_PAREN"; break;
+ case Token::apostrophe: os << "APOSTROPHE"; break;
+ case Token::backquote: os << "BACKQUOTE"; break;
+ case Token::backslash: os << "BACKSLASH"; break;
+ case Token::sharp_open_paren: os << "SHARP_OPEN_PAREN"; break;
+ case Token::sharp_apostrophe: os << "SHARP_APOSTROPHE"; break;
+ case Token::sharp_colon: os << "SHARP_COLON"; break;
+ case Token::integer: os << "INTEGER"; break;
+ case Token::string: os << "STRING"; break;
+ case Token::identifier: os << "IDENTIFIER"; break;
+ case Token::sharp_integer_sharp:
+ os << "SHARP_INTEGER_SHARP"; break;
+ case Token::sharp_integer_equal:
+ os << "SHARP_INTEGER_EQUAL"; break;
+ default: os << "UNKNOWN"; break;
+ }
+ os << '(';
+ if (t.lexeme != 0) {
+ os << '"';
+ std::copy(t.lexeme->begin(), t.lexeme->end(),
+ std::ostream_iterator<char>(os));
+ os << '"';
+ }
+ else
+ os << "<missing>";
+ return os << ')';
+ }
+
+ // Return true if character `c' introduces a blank.
+ static bool
+ is_blank(char c) {
+ return c == ' ' or c == '\t' or c == '\v'
+ or c == '\n' or c == '\f' or c == '\r';
+ }
+
+ // Return true if the character `c' introduces a delimiter.
+ static bool
+ is_delimiter(char c) {
+ return is_blank(c)
+ or c == '(' or c == ')' or c == '\''
+ or c == '`' or c == '\\' or c == '#';
+ }
+
+ // Move `cur' past all consecutive blank characters, and
+ // return the new position.
+ static const char*
+ skip_blank(const char*& cur, const char* end) {
+ while (cur < end and is_blank(*cur))
+ ++cur;
+ return cur;
+ }
+
+ // Move `cur' until a word boundary is reached.
+ static const char*
+ skip_to_word_boundary(const char*& cur, const char* end) {
+ while (cur < end and not is_delimiter(*cur))
+ ++cur;
+ return cur;
+ }
+
+ // Move `cur' one-past a non-esacaped character `c'.
+ // Return true if the character was seen.
+ static bool
+ skip_to_nonescaped_char(const char*& cur, const char* end, char c) {
+ for (; cur < end; ++cur)
+ if (cur[0] == c and cur[-1] != '\\') {
+ ++cur;
+ return true;
+ }
+ return false;
+ }
+
+ // Move `cur' past the closing fence of an absolute identifier.
+ // Return true if the closing fence was effectively seen.
+ static inline bool
+ skip_to_fence(const char*& cur, const char* end) {
+ return skip_to_nonescaped_char(cur, end, '|');
+ }
+
+ // Move `cur' past the closing quote of string literal.
+ // Return true if the closing fence was effectively seen.
+ static inline bool
+ skip_to_quote(const char*& cur, const char* end) {
+ return skip_to_nonescaped_char(cur, end, '"');
+ }
+
+ // Return true if the character `c' be part of a non-absolute
+ // identifier.
+ static bool
+ identifier_part(char c) {
+ switch (c) {
+ case '+': case '-': case '*': case '/': case '%': case '^':
+ case '~': case '@': case '$': case '&': case ':': case '=':
+ case '<': case '>': case '?': case '!': case '_':
+ return true;
+ default:
+ return isalnum(c);
+ }
+ }
+
+ // Return true if the character `c' has a special meaning after
+ // the sharp character.
+ static bool
+ special_after_sharp(char c) {
+ return c == '(' or c == '\'' or c == ':';
+ }
+
+ // Return true if the sequence `[cur, end)' has a prefix that is
+ // an integer followrd by the equal sign or the sharp sign.
+ // `cur' is moved along the way.
+ static bool
+ only_digits_before_equal_or_shap(const char*& cur, const char* end) {
+ while (cur < end and isdigit(*cur))
+ ++cur;
+ return cur < end and (*cur == '#' or *cur == '=');
+ }
+
+ // The token `t' was thought to designate an identifier.
+ // Reclassify it as an integer if, in fact, its lexeme consists
+ // entirely of digits.
+ static void
+ maybe_reclassify(Token& t) {
+ const char* cur = t.lexeme->begin();
+ const char* end = t.lexeme->end();
+ while (cur < end and isdigit(*cur))
+ ++cur;
+ if (cur == end)
+ t.type = Token::integer;
+ }
+
+ const char*
+ Lexer::tokenize(const char* cur, const char* end) {
+ while (skip_blank(cur, end) < end) {
+ Token t = { Token::unknown, 0 };
+ switch (*cur) {
+ case '.': case ',': case '(': case ')':
+ case '\'': case '\\':
+ t.type = Token::Type(OPENAXIOM_SEXPR_TOKEN1(*cur));
+ t.lexeme = strings.intern(cur, 1);
+ ++cur;
+ break;
+
+ case '#': {
+ const char* start = cur;
+ if (cur + 1 < end and special_after_sharp(cur[1])) {
+ t.type = Token::Type(OPENAXIOM_SEXPR_TOKEN2(cur[0], cur[1]));
+ t.lexeme = strings.intern(cur, 2);
+ cur += 2;
+ }
+ else if (only_digits_before_equal_or_shap(++cur, end)) {
+ t.type = *cur == '#'
+ ? Token::sharp_integer_sharp
+ : Token::sharp_integer_equal;
+ t.lexeme = strings.intern(start, cur - start + 1);
+ ++cur;
+ }
+ else {
+ skip_to_word_boundary(cur, end);
+ t.lexeme = strings.intern(start, cur - start);
+ }
+ break;
+ }
+
+ case '|': {
+ const char* start = cur;
+ skip_to_fence(++cur, end);
+ t.type = Token::identifier;
+ t.lexeme = strings.intern(start, cur - start);
+ break;
+ }
+
+ case '"': {
+ const char* start = cur;
+ skip_to_quote(++cur, end);
+ t.type = Token::string;
+ t.lexeme = strings.intern(start, cur - start);
+ break;
+ }
+
+ default:
+ if (identifier_part(*cur)) {
+ const char* start = cur;
+ skip_to_word_boundary(++cur, end);
+ t.type = Token::identifier;
+ t.lexeme = strings.intern(start, cur - start);
+ maybe_reclassify(t);
+ }
+ else {
+ const char* start = cur;
+ skip_to_word_boundary(++cur, end);
+ t.lexeme = strings.intern(start, cur - start);
+ }
+ break;
+ }
+ tokens.push_back(t);
+ }
+ return cur;
+ }
+
+ // ----------
+ // -- Atom --
+ // ----------
+ Atom::Atom(const Token& t) : tok(t) { }
+
+ void
+ Atom::accept(Visitor& v) const {
+ v.visit(*this);
+ }
+
+ // -------------
+ // -- Integer --
+ // -------------
+ Integer::Integer(const Token& t) : Atom(t) { }
+
+ void
+ Integer::accept(Visitor& v) const {
+ v.visit(*this);
+ }
+
+ // ------------
+ // -- String --
+ // ------------
+ String::String(const Token& t) : Atom(t) { }
+
+ void
+ String::accept(Visitor& v) const {
+ v.visit(*this);
+ }
+
+ // ------------
+ // -- Symbol --
+ // ------------
+ Symbol::Symbol(const Token& t, Kind k) : Atom(t), sort(k) { }
+
+ void
+ Symbol::accept(Visitor& v) const {
+ v.visit(*this);
+ }
+
+ // ------------
+ // -- Anchor --
+ // ------------
+ Anchor::Anchor(size_t t, const Syntax* s) : tag(t), val(s) { }
+
+ void
+ Anchor::accept(Visitor& v) const {
+ v.visit(*this);
+ }
+
+ // ---------------
+ // -- Reference --
+ // ---------------
+ Reference::Reference(const Token& t, size_t v) : Atom(t), pos(v) { }
+
+ void
+ Reference::accept(Visitor& v) const {
+ v.visit(*this);
+ }
+
+ // -----------
+ // -- Quote --
+ // -----------
+ Quote::Quote(const Syntax* s) : form(s) { }
+
+ void
+ Quote::accept(Visitor& v) const {
+ v.visit(*this);
+ }
+
+ // --------------
+ // -- Function --
+ // --------------
+ Function::Function(const Syntax* s) : form(s) { }
+
+ void
+ Function::accept(Visitor& v) const {
+ v.visit(*this);
+ }
+
+ // ----------
+ // -- Pair --
+ // ----------
+ Pair::Pair(const Syntax* f, const Syntax* s) : elts(f, s) { }
+
+ void
+ Pair::accept(Visitor& v) const {
+ v.visit(*this);
+ }
+
+ // ----------
+ // -- List --
+ // ----------
+ List::List() { }
+
+ List::List(const base& elts) : base(elts) { }
+
+ List::~List() { }
+
+ void
+ List::accept(Visitor& v) const {
+ v.visit(*this);
+ }
+
+ // ------------
+ // -- Vector --
+ // ------------
+ Vector::Vector() { }
+
+ Vector::Vector(const base& elts) : base(elts) { }
+
+ Vector::~Vector() { }
+
+ void
+ Vector::accept(Visitor& v) const {
+ v.visit(*this);
+ }
+
+ // ---------------------
+ // -- Syntax::Visitor --
+ // ---------------------
+
+ // implicitly convert a reference to `T' to a reference to `S'.
+ template<typename S, typename T>
+ inline const S&
+ as(const T& t) {
+ return t;
+ }
+
+ void
+ Syntax::Visitor::visit(const Integer& i) {
+ visit(as<Atom>(i));
+ }
+
+ void
+ Syntax::Visitor::visit(const String& s) {
+ visit(as<Atom>(s));
+ }
+
+ void
+ Syntax::Visitor::visit(const Symbol& s) {
+ visit(as<Atom>(s));
+ }
+
+ void
+ Syntax::Visitor::visit(const Reference& r) {
+ visit(as<Atom>(r));
+ }
+
+ // ---------------
+ // -- Allocator --
+ // ---------------
+ Allocator::Allocator() : lists(4 * 1024), vectors(1024) { }
+
+ // This destructor is defined here so that it provides
+ // a single instantiation point for destructors of all
+ // used templates floating around.
+ Allocator::~Allocator() { }
+
+ const Integer*
+ Allocator::make_integer(const Token& t) {
+ return ints.allocate(t);
+ }
+
+ const String*
+ Allocator::make_string(const Token& t) {
+ return strs.allocate(t);
+ }
+
+ const Symbol*
+ Allocator::make_symbol(const Token& t, Symbol::Kind k) {
+ return syms.allocate(t, k);
+ }
+
+ const Anchor*
+ Allocator::make_anchor(size_t t, const Syntax* s) {
+ return ancs.allocate(t, s);
+ }
+
+ const Reference*
+ Allocator::make_reference(const Token& t, size_t i) {
+ return refs.allocate(t, i);
+ }
+
+ const Quote*
+ Allocator::make_quote(const Syntax* s) {
+ return quotes.allocate(s);
+ }
+
+ const Function*
+ Allocator::make_function(const Syntax* s) {
+ return funs.allocate(s);
+ }
+
+ const Pair*
+ Allocator::make_pair(const Syntax* f, const Syntax* s) {
+ return pairs.allocate(f, s);
+ }
+
+ const List*
+ Allocator::make_list(const std::vector<const Syntax*>& elts) {
+ if (elts.empty())
+ return &empty_list;
+ return new(lists.allocate(1)) List(elts);
+ }
+
+ const Vector*
+ Allocator::make_vector(const std::vector<const Syntax*>& elts) {
+ if (elts.empty())
+ return &empty_vector;
+ return new(vectors.allocate(1)) Vector(elts);
+ }
+
+ // ------------
+ // -- Parser --
+ // ------------
+
+ // Signal a parse error
+ static void
+ parse_error(const std::string& s) {
+ throw SystemError(s);
+ }
+
+ // Signal that an expected syntax object was missing
+ static void
+ expected_syntax(const std::string& s) {
+ parse_error("expected " + s);
+ }
+
+ // Signal an abrupt end of input
+ static void
+ unexpected_end_of_input(const std::string& s) {
+ parse_error("unexpected end of input after " + s);
+ }
+
+ // Signal a missing closing parenthesis
+ static void
+ missing_closer_for(const std::string& s) {
+ parse_error("missing closing parenthesis for " + s);
+ }
+
+ // The sequence of characters in [cur, last) consists
+ // entirely of digits. Return the corresponding natural value.
+ static size_t
+ natural_value(const char* cur, const char* last) {
+ size_t n = 0;
+ for (; cur < last; ++cur)
+ // FIXME: check for overflow.
+ n = 10 * n + (*cur - '0');
+ return n;
+ }
+
+ // Parse a plain identifier or a Lisp-style keyword identifier.
+ const Symbol*
+ Parser::parse_symbol(const Token*& cur, const Token* last) {
+ Symbol::Kind kind = *cur->lexeme->begin() == ':'
+ ? Symbol::keyword
+ : Symbol::ordinary;
+ return alloc.make_symbol(*cur++, kind);
+ }
+
+ // Parse an anchor definition of the form #n=<syntax>
+ const Anchor*
+ Parser::parse_anchor(const Token*& cur, const Token* last) {
+ const size_t n = natural_value(cur->lexeme->begin() + 1,
+ cur->lexeme->end() - 1);
+ if (++cur == last)
+ unexpected_end_of_input("sharp-integer-equal sign");
+ return alloc.make_anchor(n, parse_syntax(cur, last));
+ }
+
+ // Parse a reference to an anchor, #n#
+ const Reference*
+ Parser::parse_reference(const Token*& cur, const Token* last) {
+ const size_t n = natural_value(cur->lexeme->begin() + 1,
+ cur->lexeme->end() - 1);
+ return alloc.make_reference(*cur++, n);
+ }
+
+ // Parse an uninterned symbol #:<identifier>
+ const Symbol*
+ Parser::parse_uninterned(const Token*& cur, const Token* last) {
+ if (cur == last or cur->type != Token::identifier)
+ expected_syntax("symbol after sharp-colon sign");
+ // FIXME: check that the identifier is not a keyword.
+ return alloc.make_symbol(*cur++, Symbol::uninterned);
+ }
+
+ // Parse a function syntax: #'<syntax>
+ const Function*
+ Parser::parse_function(const Token*& cur, const Token* last) {
+ if (cur == last)
+ unexpected_end_of_input("sharp-quote sign");
+ return alloc.make_function(parse_syntax(cur, last));
+ }
+
+ // Parse a quotation
+ const Quote*
+ Parser::parse_quote(const Token*& cur, const Token* last) {
+ if (cur == last)
+ unexpected_end_of_input("quote sign");
+ return alloc.make_quote(parse_syntax(cur, last));
+ }
+
+ // Parse a vector of syntax objects: #(s .. s)
+ const Vector*
+ Parser::parse_vector(const Token*& cur, const Token* last) {
+ std::vector<const Syntax*> elts;
+ while (cur < last and cur->type != Token::close_paren)
+ elts.push_back(parse_syntax(cur, last));
+ if (cur == last)
+ missing_closer_for("vector");
+ ++cur;
+ return alloc.make_vector(elts);
+ }
+
+ // Constructs a pair or a list syntax object.
+ // This function is hairy for three reasons: (a) it is not known
+ // whether we list or a pair until after we have seen the
+ // enclosed tokens; (b) a dot is allowed at most once; (c) Lisp-style
+ // improper lists are not allowed.
+ const Syntax*
+ Parser::parse_list_or_pair(const Token*& cur, const Token* last) {
+ std::vector<const Syntax*> elts;
+ bool saw_dot = false;
+ while (cur < last and cur->type != Token::close_paren) {
+ if (cur->type == Token::dot) {
+ if (elts.size() != 1)
+ parse_error("unexpected dot sign");
+ saw_dot = true;
+ ++cur;
+ continue;
+ }
+ elts.push_back(parse_syntax(cur, last));
+ if (saw_dot && elts.size() == 2)
+ break;
+ }
+ if (cur == last or cur->type != Token::close_paren)
+ missing_closer_for(saw_dot ? "pair" : "list");
+ ++cur;
+ if (saw_dot)
+ return alloc.make_pair(elts.front(), elts.back());
+ return alloc.make_list(elts);
+ }
+
+ Parser::Parser(Allocator& a, std::vector<const Syntax*>& v)
+ : alloc(a), syns(v) { }
+
+ const Syntax*
+ Parser::parse_syntax(const Token*& cur, const Token* last) {
+ switch (cur->type) {
+ case Token::integer:
+ return alloc.make_integer(*cur++);
+
+ case Token::string:
+ return alloc.make_string(*cur++);
+
+ case Token::identifier:
+ return parse_symbol(cur, last);
+
+ case Token::sharp_integer_equal:
+ return parse_anchor(cur, last);
+
+ case Token::sharp_integer_sharp:
+ return parse_reference(cur, last);
+
+ case Token::sharp_colon:
+ return parse_uninterned(++cur, last);
+
+ case Token::sharp_apostrophe:
+ return parse_function(++cur, last);
+
+ case Token::sharp_open_paren:
+ return parse_vector(++cur, last);
+
+ case Token::apostrophe:
+ return parse_quote(++cur, last);
+
+ case Token::open_paren:
+ return parse_list_or_pair(++cur, last);
+
+ default:
+ parse_error(std::string("parse error before ")
+ + cur->lexeme->begin());
+ return 0; // never executed
+ }
+ }
+
+ const Token*
+ Parser::parse(const Token* cur, const Token* last) {
+ while (cur < last)
+ syns.push_back(parse_syntax(cur, last));
+ return cur;
+ }
+ }
+}
diff --git a/src/utils/storage.H b/src/utils/storage.H
index ba14ec97..60d5c0b4 100644
--- a/src/utils/storage.H
+++ b/src/utils/storage.H
@@ -29,9 +29,22 @@
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+// --% Author: Gabriel Dos Reis
+// --% Description:
+// --% Memory management facility. Acquire raw memory directly
+// --% directly for the host OS. Provide random access read to
+// --% files through file mapping.
+
+#ifndef OPENAXIOM_STORAGE_INCLUDED
+#define OPENAXIOM_STORAGE_INCLUDED
+
#include <stddef.h>
+#include <string.h>
+#include <cmath>
#include <string>
+#include "openaxiom-c-macros.h"
+
namespace OpenAxiom {
// -----------------
// -- SystemError --
@@ -47,18 +60,20 @@ namespace OpenAxiom {
const std::string text;
};
- // Report a file system erro
+ // Report a file system error
void filesystem_error(std::string);
namespace Memory {
+ // Datatype for the unit of storage.
+ typedef unsigned char Byte;
+
// Datatype for pointers to data.
typedef void* Pointer;
// Precision of the host OS storage page unit in byte count
size_t page_size();
- // Acquire raw memory from the host OS in multiple
- // of storage page nits.
+ // Acquire raw memory from the host OS.
Pointer os_acquire_raw_memory(size_t);
// Release raw storage to the hosting OS. The first operand must
@@ -66,6 +81,209 @@ namespace OpenAxiom {
// Otherwise, the result is undefined.
void os_release_raw_memory(Pointer, size_t);
+ // Acquire `n' pages of memory storage from the host OS.
+ inline Pointer
+ acquire_raw_pages(size_t n) {
+ return os_acquire_raw_memory(n * page_size());
+ }
+
+ // Release `n' pages of storage starting the location `p'.
+ inline void
+ release_raw_pages(Pointer p, size_t n) {
+ os_release_raw_memory(p, n * page_size());
+ }
+
+ // -------------
+ // -- Storage --
+ // -------------
+ // This class is a low-level abstraction intented for use
+ // to implement higher level storage abstraction.
+ struct Storage {
+ // Acquire storage chunk of `n' bytes, and align
+ // the first allocatable address to `a' booundary.
+ // The result is a pointer to a storage object. That object
+ // `result' is constructed such that `result->free' points
+ // to the next allocatable address, with alignment `a'.
+ static Storage* acquire(size_t a, size_t n);
+
+ // Return the storage pointed to by the operand. It
+ // must be a pointer value previously returned by `acquire'.
+ // Otherwise, the result is undefined.
+ static void release(Storage*);
+
+ // Count of bytes that can fit in this storage.
+ size_t capacity() const { return limit_bot - limit_top; }
+
+ // Count of avaliable allocatable bytes in this storage.
+ size_t room() const { return limit_bot - free; }
+
+ // Count of allocated storage in this storage.
+ size_t occupancy() const { return free - limit_top; }
+
+ // Align next allocatable address to a boundary (operand).
+ // Return true on success.
+ bool align_to(size_t);
+
+ // Allocate `n' bytes of storage. It is assumed that prior
+ // to calling this function, `n' is less than `room()'.
+ // The allocated storage is guaranteed to contain only zeros.
+ void* allocate(size_t n) {
+ void* result = free;
+ free += n;
+ return memset(result, 0, n);
+ }
+
+ // Next unused address
+ void* next_available() { return free; }
+
+ // address at offset `o' from the first allocatable address.
+ void* at_offset(size_t o) {
+ return limit_top + o;
+ }
+
+ // Round up `n' to a multiple of `a', a power of 2.
+ static size_t
+ round_up(size_t n, size_t a) {
+ return (n + a - 1) & ~(a - 1);
+ }
+
+ // Next address after `p' in this storage that has alignment `a'.
+ void*
+ round_up(void* p, size_t a) {
+ return base() + round_up(base() - static_cast<Byte*>(p), a);
+ }
+
+ protected:
+ Byte* limit_top; // first allocatable address
+ Byte* limit_bot; // one-past-the-end of valid allocatable
+ // address in this storage.
+ Byte* free; // first allocatable address suitably
+ // aligned at boundary specified at
+ // construction time.
+
+ Storage() { }
+
+ // Address of the host OS page holding this storage.
+ Byte* base() {
+ return reinterpret_cast<Byte*>(this);
+ }
+
+ size_t extent() {
+ return size_t(limit_bot - base());
+ }
+
+ private:
+ Storage(const Storage&); // not implemented
+ Storage& operator=(const Storage&); // idem.
+ };
+
+ // -----------
+ // -- Arena --
+ // -----------
+ // Extensible storage holding objects of a given type.
+ // The totality of all objects held in such a storage does not
+ // necessarily constitute a contiguous block. However,
+ // it is guaranteed that objects allocated in a single call
+ // to `allocate()' occupy a contiguous block of memory.
+ // Objects are destroyed when the arena is destroyed. So,
+ // allocators of this type implement a form of sticky object
+ // allocator.
+ template<typename T>
+ struct Arena {
+ // Acquire storage capable of holding `n' objects of type `T'.
+ explicit Arena(size_t);
+ // Destroy allocated objects when done.
+ ~Arena();
+ // allocate storage for `n' more objects of type `T'.
+ T* allocate(size_t);
+ // Number of objects of type `T' allocated in this storage.
+ size_t population() const;
+
+ protected:
+ // Address of the first object of type `T' in a storage.
+ static T* first_object(Storage* s) {
+ return static_cast<T*>
+ (s->round_up(&previous(s) + 1, openaxiom_alignment(T)));
+ }
+
+ // Address of one-past-the-end object of type `T' in this storage.
+ static T* last_object(Storage* s) {
+ return static_cast<T*>(s->next_available());
+ }
+
+ // Number of objects allocated in a storage.
+ static size_t object_count(Storage* s) {
+ return last_object(s) - first_object(s);
+ }
+
+ private:
+ Storage* store; // active storage to allocate from
+
+ // The `previous' link in the chain of storage.
+ static Storage*& previous(Storage* s) {
+ return *static_cast<Storage**>(s->at_offset(0));
+ }
+
+ // Acquire storage large enough to hold `n' objects of type `T'.
+ static Storage* acquire(size_t);
+ };
+
+ template<typename T>
+ size_t
+ Arena<T>::population() const {
+ size_t n = 0;
+ for (Storage* s = store; s != 0; s = previous(s))
+ n += object_count(s);
+ return n;
+ }
+
+ template<typename T>
+ T*
+ Arena<T>::allocate(size_t n) {
+ const size_t sz = n * sizeof(T);
+ if (store->room() < sz) {
+ Storage* s = acquire(std::max(n, object_count(store)));
+ previous(s) = store;
+ store = s;
+ }
+ return static_cast<T*>(store->allocate(sz));
+ }
+
+ template<typename T>
+ Arena<T>::Arena(size_t n) : store(acquire(n)) { }
+
+ template<typename T>
+ Arena<T>::~Arena() {
+ // Destroy objects in the reverse order of their
+ // their allocation.
+ while (store != 0) {
+ // The first allocated object is right after the `previous'
+ // link in the storage chain.
+ T* first = first_object(store);
+ T* last = last_object(store);
+ for (--last; first >= last; --last)
+ last->~T();
+ Storage* current = store;
+ store = previous(store);
+ Storage::release(current);
+ }
+ }
+
+ template<typename T>
+ Storage*
+ Arena<T>::acquire(size_t n) {
+ // We build single-linked list of Storage objects, so
+ // don't forget to account for the additional pointer,
+ // and necessary padding.
+ const size_t sz = n * sizeof(T)
+ + Storage::round_up(sizeof(Storage*), openaxiom_alignment(T));
+ Storage* s = Storage::acquire(openaxiom_alignment(Storage*), sz);
+ s->allocate(sizeof(Storage*));
+ previous(s) = 0;
+ s->align_to(openaxiom_alignment(T));
+ return s;
+ }
+
// -----------------
// -- FileMapping --
// -----------------
@@ -85,3 +303,5 @@ namespace OpenAxiom {
}
}
+
+#endif // OPENAXIOM_STORAGE_INCLUDED
diff --git a/src/utils/storage.cc b/src/utils/storage.cc
index 4db7f302..4883de6c 100644
--- a/src/utils/storage.cc
+++ b/src/utils/storage.cc
@@ -29,6 +29,8 @@
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+// --%: Gabriel Dos Reis.
+
#include "openaxiom-c-macros.h"
#ifdef HAVE_SYS_TYPES_H
@@ -53,6 +55,7 @@
#include <errno.h>
#include <stdlib.h>
#include <string.h>
+#include <new> // for placement new.
#include "storage.H"
namespace OpenAxiom {
@@ -124,6 +127,40 @@ namespace OpenAxiom {
#endif
}
+ // -------------
+ // -- Storage --
+ // -------------
+ Storage*
+ Storage::acquire(size_t alignment, size_t byte_count) {
+ // Adjust for overhead, and page boundary.
+ byte_count = round_up(byte_count + sizeof(Storage), page_size());
+ Storage* mem = new(os_acquire_raw_memory(byte_count)) Storage;
+ mem->limit_top = mem->base() + round_up(sizeof(Storage), alignment);
+ mem->limit_bot = mem->base() + byte_count;
+ mem->free = mem->limit_top;
+ return mem;
+ }
+
+ void
+ Storage::release(Storage* store) {
+ os_release_raw_memory(store, store->extent());
+ }
+
+ bool
+ Storage::align_to(size_t alignment) {
+ if (alignment == 0) // protect against nuts
+ return true;
+ if (alignment == 1) // no preferred alignment at all
+ return true;
+ Byte* b = base();
+ const size_t offset = round_up(free - b, alignment);
+ if (offset < size_t(limit_bot - b)) {
+ free = b + offset;
+ return true;
+ }
+ return false; // not enough room left
+ }
+
// -----------------
// -- FileMapping --
// -----------------
diff --git a/src/utils/string-pool.H b/src/utils/string-pool.H
new file mode 100644
index 00000000..f3692a79
--- /dev/null
+++ b/src/utils/string-pool.H
@@ -0,0 +1,88 @@
+// Copyright (C) 2010, Gabriel Dos Reis.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// - Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in
+// the documentation and/or other materials provided with the
+// distribution.
+//
+// - Neither the name of The Numerical Algorithms Group Ltd. nor the
+// names of its contributors may be used to endorse or promote products
+// derived from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+// OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef OPENAXIOM_STRING_POOL_INCLUDED
+#define OPENAXIOM_STRING_POOL_INCLUDED
+
+#include <string.h>
+#include "hash-table.H"
+
+// --% Author: Gabriel Dos Reis.
+// --% Description:
+// --% Basic persistent string facility.
+// --% A stringpool for allocating long-living string objects.
+
+namespace OpenAxiom {
+ struct StringPool;
+
+ // ----------------
+ // -- StringItem --
+ // ----------------
+ // String data allocated from a stringpool.
+ struct StringItem {
+ const char* begin() const { return text; }
+ const char* end() const { return text + length; }
+ size_t size() const { return length; }
+ bool equal(const char*, size_t) const;
+ protected:
+ const char* text; // pointer to the byte sequence
+ size_t length; // number of bytes in this string
+ friend class StringPool;
+ StringItem() : text(), length() { }
+ };
+
+ // ----------------
+ // -- StringPool --
+ // ----------------
+ // A stringpool object is a repository of long-living string objects.
+ // It contains no duplicates, therefore allowing fast string
+ // object comparison for equality.
+ struct StringPool : private BasicHashTable<StringItem> {
+ using BasicHashTable<StringItem>::EntryType;
+
+ StringPool();
+ // Intern a NUL-terminated sequence of characters.
+ EntryType* intern(const char* s) {
+ return intern(s, strlen(s));
+ }
+
+ // Intern a sequence of characters given by its start and length.
+ EntryType* intern(const char*, size_t);
+ private:
+ Memory::Arena<char> strings; // character blub
+ // Allocate a string from the internal arena.
+ const char* make_copy(const char*, size_t);
+ };
+
+ typedef const StringPool::EntryType* BasicString;
+}
+
+#endif // OPENAXIOM_STRING_POOL_INCLUDED
diff --git a/src/utils/string-pool.cc b/src/utils/string-pool.cc
new file mode 100644
index 00000000..5ae6b15d
--- /dev/null
+++ b/src/utils/string-pool.cc
@@ -0,0 +1,96 @@
+// Copyright (C) 2010, Gabriel Dos Reis.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// - Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in
+// the documentation and/or other materials provided with the
+// distribution.
+//
+// - Neither the name of The Numerical Algorithms Group Ltd. nor the
+// names of its contributors may be used to endorse or promote products
+// derived from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+// OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// --% Author: Gabriel Dos Reis
+
+#include "string-pool.H"
+
+namespace OpenAxiom {
+ // ----------------
+ // -- StringItem --
+ // ----------------
+ bool
+ StringItem::equal(const char* str, size_t sz) const {
+ if (length != sz)
+ return false;
+ for (size_t i = 0; i < sz; ++i)
+ if (text[i] != str[i])
+ return false;
+ return true;
+ }
+
+
+ // ----------------
+ // -- StringPool --
+ // ----------------
+ StringPool::StringPool()
+ : BasicHashTable<StringItem>(109),
+ strings(2 * Memory::page_size())
+ { }
+
+ // Return a hash for the string starting from `str'
+ // of length `sz'.
+ static size_t
+ hash(const char* str, size_t sz) {
+ size_t h = 0;
+ for(size_t i = 0; i < sz; ++i)
+ h = str[i] + (h << 6) + (h << 16) - h;
+ return h;
+ }
+
+ const char*
+ StringPool::make_copy(const char* f, size_t sz) {
+ char* s = strings.allocate(sz + 1);
+ memcpy(s, f, sz);
+ s[sz] = '\0';
+ return s;
+ }
+
+ StringPool::EntryType*
+ StringPool::intern(const char* src, size_t sz) {
+ const size_t h = hash(src, sz);
+ EntryType* e = hash_chain(h);
+ if (sz == 0)
+ return e;
+ for (; e->text != 0; e = e->chain) {
+ if (e->hash == h and e->equal(src, sz))
+ return e;
+ // If this is the last entry in this hash chain, allocate
+ // a new bucket to hold the information we want to store.
+ if (e->chain == 0)
+ e->chain = new_bucket();
+ }
+ e->text = make_copy(src, sz);
+ e->length = sz;
+ e->hash = h;
+ return e;
+ }
+}