diff options
-rw-r--r-- | ChangeLog | 5 | ||||
-rw-r--r-- | config/open-axiom.m4 | 20 | ||||
-rw-r--r-- | config/openaxiom-c-macros.h.in | 3 | ||||
-rwxr-xr-x | configure | 91 | ||||
-rw-r--r-- | configure.ac | 2 | ||||
-rw-r--r-- | src/utils/Makefile.in | 4 | ||||
-rw-r--r-- | src/utils/hammer.cc | 16 | ||||
-rw-r--r-- | src/utils/hash-table.H | 82 | ||||
-rw-r--r-- | src/utils/sexpr.H | 385 | ||||
-rw-r--r-- | src/utils/sexpr.cc | 641 | ||||
-rw-r--r-- | src/utils/storage.H | 226 | ||||
-rw-r--r-- | src/utils/storage.cc | 37 | ||||
-rw-r--r-- | src/utils/string-pool.H | 88 | ||||
-rw-r--r-- | src/utils/string-pool.cc | 96 |
14 files changed, 1674 insertions, 22 deletions
@@ -1,3 +1,8 @@ +2010-08-24 Gabriel Dos Reis <gdr@cs.tamu.edu> + + * config/open-axiom.m4 (OPENAXIOM_ALIGNMENT_OPERATOR): New. Check + for alignment operator. + 2010-08-20 Gabriel Dos Reis <gdr@cs.tamu.edu> External tool noweb is no longer required. diff --git a/config/open-axiom.m4 b/config/open-axiom.m4 index a2e26e8b..d7de600a 100644 --- a/config/open-axiom.m4 +++ b/config/open-axiom.m4 @@ -1010,10 +1010,30 @@ if test x"$ac_cv_header_sys_mman_h" = xyes; then fi ]) +dnl ---------------------------------- +dnl -- OPENAXIOM_ALIGNMENT_OPERATOR -- +dnl ---------------------------------- +dnl Check that the C/C++ compiler understand +dnl alignment operator, i.e. either `alignof', +dnl or vendor lock-ins such as `__alignof'. +AC_DEFUN([OPENAXIOM_ALIGNMENT_OPERATOR],[ +AC_MSG_CHECKING([name of alignment query operator]) +oa_alignment= +AC_COMPILE_IFELSE([int a = __alignof(int);], + [oa_alignment="__alignof"], + [AC_COMPILE_IFELSE([int a = alignof(int);], + [oa_alignment="alignof"], + [AC_MSG_ERROR([C/C++ compiler does not support alignment query operator])])]) +AC_DEFINE_UNQUOTED([openaxiom_alignment],[$oa_alignment], + [Alignment query operator]) +AC_MSG_RESULT([$oa_alignment]) +]) + dnl -------------------------- dnl -- OPENAXIOM_CHECK_MISC -- dnl -------------------------- AC_DEFUN([OPENAXIOM_CHECK_MISC],[ +OPENAXIOM_ALIGNMENT_OPERATOR case $GCC in yes) CFLAGS="$CFLAGS -O2 -Wall" diff --git a/config/openaxiom-c-macros.h.in b/config/openaxiom-c-macros.h.in index c92cc8ac..41b47835 100644 --- a/config/openaxiom-c-macros.h.in +++ b/config/openaxiom-c-macros.h.in @@ -207,6 +207,9 @@ if such a type exists, and if the system does not define it. */ #undef intptr_t +/* Alignment query operator */ +#undef openaxiom_alignment + /* Define to the type of an unsigned integer type of width exactly 16 bits if such a type exists and the standard includes do not define it. */ #undef uint16_t @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.63 for OpenAxiom 1.4.0-2010-08-20. +# Generated by GNU Autoconf 2.63 for OpenAxiom 1.4.0-2010-08-24. # # Report bugs to <open-axiom-bugs@lists.sf.net>. # @@ -745,8 +745,8 @@ SHELL=${CONFIG_SHELL-/bin/sh} # Identity of this package. PACKAGE_NAME='OpenAxiom' PACKAGE_TARNAME='openaxiom' -PACKAGE_VERSION='1.4.0-2010-08-20' -PACKAGE_STRING='OpenAxiom 1.4.0-2010-08-20' +PACKAGE_VERSION='1.4.0-2010-08-24' +PACKAGE_STRING='OpenAxiom 1.4.0-2010-08-24' PACKAGE_BUGREPORT='open-axiom-bugs@lists.sf.net' ac_unique_file="src/Makefile.pamphlet" @@ -1509,7 +1509,7 @@ if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures OpenAxiom 1.4.0-2010-08-20 to adapt to many kinds of systems. +\`configure' configures OpenAxiom 1.4.0-2010-08-24 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1579,7 +1579,7 @@ fi if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of OpenAxiom 1.4.0-2010-08-20:";; + short | recursive ) echo "Configuration of OpenAxiom 1.4.0-2010-08-24:";; esac cat <<\_ACEOF @@ -1687,7 +1687,7 @@ fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -OpenAxiom configure 1.4.0-2010-08-20 +OpenAxiom configure 1.4.0-2010-08-24 generated by GNU Autoconf 2.63 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001, @@ -1701,7 +1701,7 @@ cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by OpenAxiom $as_me 1.4.0-2010-08-20, which was +It was created by OpenAxiom $as_me 1.4.0-2010-08-24, which was generated by GNU Autoconf 2.63. Invocation command line was $ $0 $@ @@ -22193,6 +22193,79 @@ _ACEOF fi + +{ $as_echo "$as_me:$LINENO: checking name of alignment query operator" >&5 +$as_echo_n "checking name of alignment query operator... " >&6; } +oa_alignment= +cat >conftest.$ac_ext <<_ACEOF +int a = __alignof(int); +_ACEOF +rm -f conftest.$ac_objext +if { (ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" +$as_echo "$ac_try_echo") >&5 + (eval "$ac_compile") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then + oa_alignment="__alignof" +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + cat >conftest.$ac_ext <<_ACEOF +int a = alignof(int); +_ACEOF +rm -f conftest.$ac_objext +if { (ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" +$as_echo "$ac_try_echo") >&5 + (eval "$ac_compile") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then + oa_alignment="alignof" +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + { { $as_echo "$as_me:$LINENO: error: C/C++ compiler does not support alignment query operator" >&5 +$as_echo "$as_me: error: C/C++ compiler does not support alignment query operator" >&2;} + { (exit 1); exit 1; }; } +fi + +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi + +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + +cat >>confdefs.h <<_ACEOF +#define openaxiom_alignment $oa_alignment +_ACEOF + +{ $as_echo "$as_me:$LINENO: result: $oa_alignment" >&5 +$as_echo "$oa_alignment" >&6; } + case $GCC in yes) CFLAGS="$CFLAGS -O2 -Wall" @@ -22651,7 +22724,7 @@ exec 6>&1 # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by OpenAxiom $as_me 1.4.0-2010-08-20, which was +This file was extended by OpenAxiom $as_me 1.4.0-2010-08-24, which was generated by GNU Autoconf 2.63. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -22714,7 +22787,7 @@ Report bugs to <bug-autoconf@gnu.org>." _ACEOF cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_version="\\ -OpenAxiom config.status 1.4.0-2010-08-20 +OpenAxiom config.status 1.4.0-2010-08-24 configured by $0, generated by GNU Autoconf 2.63, with options \\"`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`\\" diff --git a/configure.ac b/configure.ac index b4fe9057..1a7aafe7 100644 --- a/configure.ac +++ b/configure.ac @@ -37,7 +37,7 @@ dnl Most of the macros used in this configure.ac are defined in files dnl located in the subdirectory config/ sinclude(config/open-axiom.m4) sinclude(config/aclocal.m4) -AC_INIT([OpenAxiom], [1.4.0-2010-08-20], +AC_INIT([OpenAxiom], [1.4.0-2010-08-24], [open-axiom-bugs@lists.sf.net]) AC_CONFIG_AUX_DIR(config) diff --git a/src/utils/Makefile.in b/src/utils/Makefile.in index 024af5dc..b3adb6b9 100644 --- a/src/utils/Makefile.in +++ b/src/utils/Makefile.in @@ -36,8 +36,8 @@ hammer_SOURCES = hammer.cc hammer_OBJECTS = $(hammer_SOURCES:.cc=.lo) hammer_LDADD = -L. -lOpenAxiom -libOpenAxiom_HEADERS = storage.H -libOpenAxiom_SOURCES = storage.cc +libOpenAxiom_HEADERS = storage.H hash-table.H string-pool.H sexpr.H +libOpenAxiom_SOURCES = storage.cc string-pool.cc sexpr.cc libOpenAxiom_OBJECTS = $(libOpenAxiom_SOURCES:.cc=.lo) .PHONY: all all-ax all-utils diff --git a/src/utils/hammer.cc b/src/utils/hammer.cc index 17c5cc73..35ef5c1f 100644 --- a/src/utils/hammer.cc +++ b/src/utils/hammer.cc @@ -29,6 +29,15 @@ // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// --% Author: Gabriel Dos Reis. +// --% Description: +// --% This program implements basic functionalities for untangling +// --% algebra source code from the pamphlets. The syntax is that +// --% of `noweb'. A chunk definition starts with a pattern +// --% <<name>>= on a line by itself, and ends with `@' by itself +// --% on a line. A chunk can refer to another chunk through +// --% a pattern of the form `<<name>>'. + #include <string.h> #include <stdlib.h> #include <utility> @@ -41,13 +50,6 @@ #include <map> #include "storage.H" -// -- This program implements basic functionalities for untangling -// -- algebra source code from the pamphlets. The syntax is that -// -- of `noweb'. A chunk definition starts with a pattern -// -- <<name>>= on a line by itself, and ends with `@' by itself -// -- on a line. A chunk can refer to another chunk through -// -- a pattern of the form `<<name>>'. - namespace OpenAxiom { namespace Hammer { // ------------- diff --git a/src/utils/hash-table.H b/src/utils/hash-table.H new file mode 100644 index 00000000..75b485ff --- /dev/null +++ b/src/utils/hash-table.H @@ -0,0 +1,82 @@ +// Copyright (C) 2010, Gabriel Dos Reis. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// - Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in +// the documentation and/or other materials provided with the +// distribution. +// +// - Neither the name of The Numerical Algorithms Group Ltd. nor the +// names of its contributors may be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER +// OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef OPENAXIOM_HASH_TABLE_INCLUDED +#define OPENAXIOM_HASH_TABLE_INCLUDED + +// --% Author: Gabriel Dos Reis. +// --% Description: +// --% Simple hash table facility. To be replaced by C++0x +// --% hash tables when C++0x compilers become common place. + +#include "storage.H" + +namespace OpenAxiom { + // -------------------- + // -- HashTableEntry -- + // -------------------- + // Datatype for entries in a parameterized hash table. + // The type parameter is required to be a value-construcitble datatype. + template<typename T> + struct HashTableEntry : T { + HashTableEntry* chain; // previous item in the same bucket chain + size_t hash; // hash code of stored data + }; + + // -------------------- + // -- BasicHashTable -- + // -------------------- + // A simple hash table data structure. Ideally, we would like to use + // standard C++ types, but hash tables were only in a C++ 2003 TR, + // officially part of C++0x standard library. We still don't have + // wide-spread C++0x compilers. + template<typename T> + struct BasicHashTable : private Memory::Arena<HashTableEntry<T> > { + typedef HashTableEntry<T> EntryType; + explicit BasicHashTable(size_t n) + : Memory::Arena<HashTableEntry<T> >(n), + buckets(this->allocate(n)), nbuckets(n) { } + + EntryType* hash_chain(size_t h) const { + return buckets + (h % nbuckets); + } + + EntryType* new_bucket() { + return this->allocate(1); + } + + private: + HashTableEntry<T>* const buckets; + const size_t nbuckets; + }; +} + +#endif // OPENAXIOM_HASH_TABLE_INCLUDED diff --git a/src/utils/sexpr.H b/src/utils/sexpr.H new file mode 100644 index 00000000..5139b453 --- /dev/null +++ b/src/utils/sexpr.H @@ -0,0 +1,385 @@ +// Copyright (C) 2010, Gabriel Dos Reis. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// - Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in +// the documentation and/or other materials provided with the +// distribution. +// +// - Neither the name of The Numerical Algorithms Group Ltd. nor the +// names of its contributors may be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER +// OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef OPENAXIOM_SEXPR_INCLUDED +#define OPENAXIOM_SEXPR_INCLUDED + +// --% Author: Gabriel Dos Reis. +// --% Description: +// --% A simple support for s-expressions. By design, no ambition +// --% for full-fledged Common Lisp reader capability. Rather, +// --% the aim is a simple data structure for exchanging data +// --% between several components of the OpenAxiom system. +// --% Users interested in fullblown Lisp syntax should seek +// --% to acquire Lisp systems, many of which are freely available. + +#include <iosfwd> +#include <vector> +#include <set> +#include "string-pool.H" + +// Helpers for defining token type values for lexeme with more +// than characters. +#define OPENAXIOM_SEXPR_TOKEN1(C) (C) +#define OPENAXIOM_SEXPR_TOKEN2(C1,C2) (C1 * 256 + C2) + +namespace OpenAxiom { + namespace Sexpr { + // ----------- + // -- Token -- + // ----------- + struct Token { + enum Type { + unknown, // unidentified token + dot = OPENAXIOM_SEXPR_TOKEN1('.'), // "." + comma = OPENAXIOM_SEXPR_TOKEN1(','), // "," + open_paren = OPENAXIOM_SEXPR_TOKEN1('('), // "(" + close_paren = OPENAXIOM_SEXPR_TOKEN1(')'), // ")" + apostrophe = OPENAXIOM_SEXPR_TOKEN1('\''), // "'" + backquote = OPENAXIOM_SEXPR_TOKEN1('`'), // "`" + backslash = OPENAXIOM_SEXPR_TOKEN1('\\'), // "\\" + sharp_open_paren = OPENAXIOM_SEXPR_TOKEN2('#','('), // "#(" + sharp_apostrophe = OPENAXIOM_SEXPR_TOKEN2('#','\''), // "#'" + sharp_colon = OPENAXIOM_SEXPR_TOKEN2('#',':'), // "#:" + digraph_end = OPENAXIOM_SEXPR_TOKEN2(256,256), + integer, // integer literal + string, // string literal + identifier, // plain identifier + sharp_integer_equal, // anchor definition, #n=<form> + sharp_integer_sharp // back reference, #n# + }; + + Type type; // class of this token + BasicString lexeme; // characters making up this token + }; + + // Print a token object on an output stream. + // Note: this function is for debugging purpose; in particular + // it does not `prettyprint' tokens. + std::ostream& operator<<(std::ostream&, const Token&); + + // ----------- + // -- Lexer -- + // ----------- + // An object of this type transforms a sequence of characters + // into a sequence of tokens as defined above. + // A lexer does not manage memory itself. Rather, it delegates + // storage allocation for lexemes and tokens to specialized + // agents used to construct it. + struct Lexer { + Lexer(StringPool& pool, std::vector<Token>& toks) + : strings(pool), tokens(toks) { } + + const char* tokenize(const char*, const char*); + + private: + StringPool& strings; // where to allocate lexemes from + std::vector<Token>& tokens; // where to deposite tokens. + }; + + // ------------ + // -- Syntax -- + // ------------ + struct Syntax { + struct Visitor; // base class of syntax visitors + virtual void accept(Visitor&) const = 0; + }; + + // ---------- + // -- Atom -- + // ---------- + // An atom is a syntax object consisting of exatly one token. + // This should not be confused with the notion of atom + // in Lisp languages. + struct Atom : Syntax { + const Token& token() const { return tok; } + BasicString lexeme() const { return tok.lexeme; } + void accept(Visitor&) const; + protected: + const Token tok; + Atom(const Token&); + }; + + // ------------- + // -- Integer -- + // ------------- + struct Integer : Atom { + explicit Integer(const Token&); + void accept(Visitor&) const; + }; + + // ------------ + // -- String -- + // ------------ + struct String : Atom { + explicit String(const Token&); + void accept(Visitor&) const; + }; + + // ------------ + // -- Symbol -- + // ------------ + struct Symbol : Atom { + enum Kind { + uninterned, // uninterned symbol + ordinary, // an interned symbol + keyword // a keyword symbol + }; + Symbol(const Token&, Kind); + Kind kin() const { return sort; } + void accept(Visitor&) const; + private: + const Kind sort; + }; + + // --------------- + // -- Reference -- + // --------------- + struct Reference : Atom { + Reference(const Token&, size_t); + size_t tag() const { return pos; } + void accept(Visitor&) const; + private: + const size_t pos; + }; + + // ------------ + // -- Anchor -- + // ------------ + struct Anchor : Syntax { + Anchor(size_t, const Syntax*); + size_t ref() const { return tag; } + const Syntax* value() const { return val; } + void accept(Visitor&) const; + private: + const size_t tag; + const Syntax* const val; + }; + + // ----------- + // -- Quote -- + // ----------- + struct Quote : Syntax { + explicit Quote(const Syntax*); + const Syntax* body() const { return form; } + void accept(Visitor&) const; + private: + const Syntax* const form; + }; + + // -------------- + // -- Function -- + // -------------- + struct Function : Syntax { + explicit Function(const Syntax*); + const Syntax* code() const { return form; } + void accept(Visitor&) const; + private: + const Syntax* const form; + }; + + // ---------- + // -- Pair -- + // ---------- + struct Pair : Syntax { + Pair(const Syntax*, const Syntax*); + const Syntax* first() const { return elts.first; } + const Syntax* second() const { return elts.second; } + void accept(Visitor&) const; + private: + const std::pair<const Syntax*, const Syntax*> elts; + }; + + // ---------- + // -- List -- + // ---------- + struct List : Syntax, private std::vector<const Syntax*> { + typedef std::vector<const Syntax*> base; + using base::const_iterator; + using base::begin; + using base::end; + using base::size; + using base::empty; + + List(); + explicit List(const base&); + ~List(); + void accept(Visitor&) const; + }; + + // ------------ + // -- Vector -- + // ------------ + struct Vector : Syntax, private std::vector<const Syntax*> { + typedef std::vector<const Syntax*> base; + using base::const_iterator; + using base::begin; + using base::end; + using base::size; + using base::operator[]; + using base::empty; + + Vector(); + explicit Vector(const base&); + ~Vector(); + void accept(Visitor&) const; + }; + + // --------------------- + // -- Syntax::Visitor -- + // --------------------- + struct Syntax::Visitor { + virtual void visit(const Atom&) = 0; + virtual void visit(const Integer&); + virtual void visit(const String&); + virtual void visit(const Symbol&); + virtual void visit(const Reference&); + virtual void visit(const Anchor&) = 0; + virtual void visit(const Quote&) = 0; + virtual void visit(const Function&) = 0; + virtual void visit(const Pair&) = 0; + virtual void visit(const List&) = 0; + virtual void visit(const Vector&) = 0; + }; + + // --------------- + // -- Allocator -- + // --------------- + + // The next two classes are helper classes for the main + // allocation class Allocator. We use std::set as allocator + // that guarantee uuniqueness of atomic syntax object with + // respect to the constituent token. That container needs + // a relational comparator. In an ideal world, this class + // should not exist. + struct SyntaxComparator { + bool operator()(const Atom& lhs, const Atom& rhs) const { + return std::less<BasicString>()(lhs.lexeme(), rhs.lexeme()); + } + + bool operator()(const Quote& lhs, const Quote& rhs) const { + return std::less<const void*>()(lhs.body(), rhs.body()); + } + + bool operator()(const Anchor& lhs, const Anchor& rhs) const { + return std::less<size_t>()(lhs.ref(), rhs.ref()); + } + + bool operator()(const Function& lhs, const Function& rhs) const { + return std::less<const void*>()(lhs.code(), rhs.code()); + } + + bool operator()(const Pair& lhs, const Pair& rhs) const { + std::less<const void*> cmp; + if (cmp(lhs.first(), rhs.first())) + return true; + if (cmp(rhs.first(), lhs.first())) + return false; + return cmp(lhs.second(), rhs.second()); + } + }; + + template<typename T> + struct UniqueAllocator : std::set<T, SyntaxComparator> { + typedef std::set<T, SyntaxComparator> base; + typedef typename base::const_iterator const_iterator; + + template<typename U> + const T* allocate(const U& u) { + return &*this->insert(T(u)).first; + } + + template<typename U, typename V> + const T* allocate(const U& u, const V& v) { + return &*this->insert(T(u, v)).first; + } + }; + + // Allocator of syntax objects. + struct Allocator { + Allocator(); + ~Allocator(); + + const Integer* make_integer(const Token&); + const String* make_string(const Token&); + const Symbol* make_symbol(const Token&, Symbol::Kind); + const Reference* make_reference(const Token&, size_t); + const Anchor* make_anchor(size_t, const Syntax*); + const Quote* make_quote(const Syntax*); + const Function* make_function(const Syntax*); + const Pair* make_pair(const Syntax*, const Syntax*); + const List* make_list(const std::vector<const Syntax*>&); + const Vector* make_vector(const std::vector<const Syntax*>&); + + private: + UniqueAllocator<Integer> ints; + UniqueAllocator<String> strs; + UniqueAllocator<Symbol> syms; + UniqueAllocator<Anchor> ancs; + UniqueAllocator<Reference> refs; + UniqueAllocator<Quote> quotes; + UniqueAllocator<Function> funs; + UniqueAllocator<Pair> pairs; + Memory::Arena<List> lists; + Memory::Arena<Vector> vectors; + List empty_list; + Vector empty_vector; + }; + + // ------------ + // -- Parser -- + // ------------ + // An object of this type transforms a sequence of tokens + // into a sequence of syntax objects. + // A parser object does not manage memory itself. Rather, it delegates + // storage allocation for syntax objects to specialized + // agents used to construct it. + struct Parser { + Parser(Allocator&, std::vector<const Syntax*>&); + const Token* parse(const Token*, const Token*); + private: + Allocator& alloc; + std::vector<const Syntax*>& syns; + + const Symbol* parse_symbol(const Token*&, const Token*); + const Anchor* parse_anchor(const Token*&, const Token*); + const Reference* parse_reference(const Token*&, const Token*); + const Symbol* parse_uninterned(const Token*&, const Token*); + const Function* parse_function(const Token*&, const Token*); + const Quote* parse_quote(const Token*&, const Token*); + const Vector* parse_vector(const Token*&, const Token*); + const Syntax* parse_list_or_pair(const Token*&, const Token*); + const Syntax* parse_syntax(const Token*&, const Token*); + }; + } +} + +#endif // OPENAXIOM_SEXPR_INCLUDED diff --git a/src/utils/sexpr.cc b/src/utils/sexpr.cc new file mode 100644 index 00000000..d9d38220 --- /dev/null +++ b/src/utils/sexpr.cc @@ -0,0 +1,641 @@ +// Copyright (C) 2010, Gabriel Dos Reis. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// - Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in +// the documentation and/or other materials provided with the +// distribution. +// +// - Neither the name of The Numerical Algorithms Group Ltd. nor the +// names of its contributors may be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER +// OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --% Author: Gabriel Dos Reis. + +#include <ctype.h> +#include <iostream> +#include <iterator> +#include "sexpr.H" + +namespace OpenAxiom { + namespace Sexpr { + std::ostream& + operator<<(std::ostream& os, const Token& t) { + switch (t.type) { + case Token::dot: os << "DOT"; break; + case Token::comma: os << "COMMA"; break; + case Token::open_paren: os << "OPEN_PAREN"; break; + case Token::close_paren: os << "CLOSE_PAREN"; break; + case Token::apostrophe: os << "APOSTROPHE"; break; + case Token::backquote: os << "BACKQUOTE"; break; + case Token::backslash: os << "BACKSLASH"; break; + case Token::sharp_open_paren: os << "SHARP_OPEN_PAREN"; break; + case Token::sharp_apostrophe: os << "SHARP_APOSTROPHE"; break; + case Token::sharp_colon: os << "SHARP_COLON"; break; + case Token::integer: os << "INTEGER"; break; + case Token::string: os << "STRING"; break; + case Token::identifier: os << "IDENTIFIER"; break; + case Token::sharp_integer_sharp: + os << "SHARP_INTEGER_SHARP"; break; + case Token::sharp_integer_equal: + os << "SHARP_INTEGER_EQUAL"; break; + default: os << "UNKNOWN"; break; + } + os << '('; + if (t.lexeme != 0) { + os << '"'; + std::copy(t.lexeme->begin(), t.lexeme->end(), + std::ostream_iterator<char>(os)); + os << '"'; + } + else + os << "<missing>"; + return os << ')'; + } + + // Return true if character `c' introduces a blank. + static bool + is_blank(char c) { + return c == ' ' or c == '\t' or c == '\v' + or c == '\n' or c == '\f' or c == '\r'; + } + + // Return true if the character `c' introduces a delimiter. + static bool + is_delimiter(char c) { + return is_blank(c) + or c == '(' or c == ')' or c == '\'' + or c == '`' or c == '\\' or c == '#'; + } + + // Move `cur' past all consecutive blank characters, and + // return the new position. + static const char* + skip_blank(const char*& cur, const char* end) { + while (cur < end and is_blank(*cur)) + ++cur; + return cur; + } + + // Move `cur' until a word boundary is reached. + static const char* + skip_to_word_boundary(const char*& cur, const char* end) { + while (cur < end and not is_delimiter(*cur)) + ++cur; + return cur; + } + + // Move `cur' one-past a non-esacaped character `c'. + // Return true if the character was seen. + static bool + skip_to_nonescaped_char(const char*& cur, const char* end, char c) { + for (; cur < end; ++cur) + if (cur[0] == c and cur[-1] != '\\') { + ++cur; + return true; + } + return false; + } + + // Move `cur' past the closing fence of an absolute identifier. + // Return true if the closing fence was effectively seen. + static inline bool + skip_to_fence(const char*& cur, const char* end) { + return skip_to_nonescaped_char(cur, end, '|'); + } + + // Move `cur' past the closing quote of string literal. + // Return true if the closing fence was effectively seen. + static inline bool + skip_to_quote(const char*& cur, const char* end) { + return skip_to_nonescaped_char(cur, end, '"'); + } + + // Return true if the character `c' be part of a non-absolute + // identifier. + static bool + identifier_part(char c) { + switch (c) { + case '+': case '-': case '*': case '/': case '%': case '^': + case '~': case '@': case '$': case '&': case ':': case '=': + case '<': case '>': case '?': case '!': case '_': + return true; + default: + return isalnum(c); + } + } + + // Return true if the character `c' has a special meaning after + // the sharp character. + static bool + special_after_sharp(char c) { + return c == '(' or c == '\'' or c == ':'; + } + + // Return true if the sequence `[cur, end)' has a prefix that is + // an integer followrd by the equal sign or the sharp sign. + // `cur' is moved along the way. + static bool + only_digits_before_equal_or_shap(const char*& cur, const char* end) { + while (cur < end and isdigit(*cur)) + ++cur; + return cur < end and (*cur == '#' or *cur == '='); + } + + // The token `t' was thought to designate an identifier. + // Reclassify it as an integer if, in fact, its lexeme consists + // entirely of digits. + static void + maybe_reclassify(Token& t) { + const char* cur = t.lexeme->begin(); + const char* end = t.lexeme->end(); + while (cur < end and isdigit(*cur)) + ++cur; + if (cur == end) + t.type = Token::integer; + } + + const char* + Lexer::tokenize(const char* cur, const char* end) { + while (skip_blank(cur, end) < end) { + Token t = { Token::unknown, 0 }; + switch (*cur) { + case '.': case ',': case '(': case ')': + case '\'': case '\\': + t.type = Token::Type(OPENAXIOM_SEXPR_TOKEN1(*cur)); + t.lexeme = strings.intern(cur, 1); + ++cur; + break; + + case '#': { + const char* start = cur; + if (cur + 1 < end and special_after_sharp(cur[1])) { + t.type = Token::Type(OPENAXIOM_SEXPR_TOKEN2(cur[0], cur[1])); + t.lexeme = strings.intern(cur, 2); + cur += 2; + } + else if (only_digits_before_equal_or_shap(++cur, end)) { + t.type = *cur == '#' + ? Token::sharp_integer_sharp + : Token::sharp_integer_equal; + t.lexeme = strings.intern(start, cur - start + 1); + ++cur; + } + else { + skip_to_word_boundary(cur, end); + t.lexeme = strings.intern(start, cur - start); + } + break; + } + + case '|': { + const char* start = cur; + skip_to_fence(++cur, end); + t.type = Token::identifier; + t.lexeme = strings.intern(start, cur - start); + break; + } + + case '"': { + const char* start = cur; + skip_to_quote(++cur, end); + t.type = Token::string; + t.lexeme = strings.intern(start, cur - start); + break; + } + + default: + if (identifier_part(*cur)) { + const char* start = cur; + skip_to_word_boundary(++cur, end); + t.type = Token::identifier; + t.lexeme = strings.intern(start, cur - start); + maybe_reclassify(t); + } + else { + const char* start = cur; + skip_to_word_boundary(++cur, end); + t.lexeme = strings.intern(start, cur - start); + } + break; + } + tokens.push_back(t); + } + return cur; + } + + // ---------- + // -- Atom -- + // ---------- + Atom::Atom(const Token& t) : tok(t) { } + + void + Atom::accept(Visitor& v) const { + v.visit(*this); + } + + // ------------- + // -- Integer -- + // ------------- + Integer::Integer(const Token& t) : Atom(t) { } + + void + Integer::accept(Visitor& v) const { + v.visit(*this); + } + + // ------------ + // -- String -- + // ------------ + String::String(const Token& t) : Atom(t) { } + + void + String::accept(Visitor& v) const { + v.visit(*this); + } + + // ------------ + // -- Symbol -- + // ------------ + Symbol::Symbol(const Token& t, Kind k) : Atom(t), sort(k) { } + + void + Symbol::accept(Visitor& v) const { + v.visit(*this); + } + + // ------------ + // -- Anchor -- + // ------------ + Anchor::Anchor(size_t t, const Syntax* s) : tag(t), val(s) { } + + void + Anchor::accept(Visitor& v) const { + v.visit(*this); + } + + // --------------- + // -- Reference -- + // --------------- + Reference::Reference(const Token& t, size_t v) : Atom(t), pos(v) { } + + void + Reference::accept(Visitor& v) const { + v.visit(*this); + } + + // ----------- + // -- Quote -- + // ----------- + Quote::Quote(const Syntax* s) : form(s) { } + + void + Quote::accept(Visitor& v) const { + v.visit(*this); + } + + // -------------- + // -- Function -- + // -------------- + Function::Function(const Syntax* s) : form(s) { } + + void + Function::accept(Visitor& v) const { + v.visit(*this); + } + + // ---------- + // -- Pair -- + // ---------- + Pair::Pair(const Syntax* f, const Syntax* s) : elts(f, s) { } + + void + Pair::accept(Visitor& v) const { + v.visit(*this); + } + + // ---------- + // -- List -- + // ---------- + List::List() { } + + List::List(const base& elts) : base(elts) { } + + List::~List() { } + + void + List::accept(Visitor& v) const { + v.visit(*this); + } + + // ------------ + // -- Vector -- + // ------------ + Vector::Vector() { } + + Vector::Vector(const base& elts) : base(elts) { } + + Vector::~Vector() { } + + void + Vector::accept(Visitor& v) const { + v.visit(*this); + } + + // --------------------- + // -- Syntax::Visitor -- + // --------------------- + + // implicitly convert a reference to `T' to a reference to `S'. + template<typename S, typename T> + inline const S& + as(const T& t) { + return t; + } + + void + Syntax::Visitor::visit(const Integer& i) { + visit(as<Atom>(i)); + } + + void + Syntax::Visitor::visit(const String& s) { + visit(as<Atom>(s)); + } + + void + Syntax::Visitor::visit(const Symbol& s) { + visit(as<Atom>(s)); + } + + void + Syntax::Visitor::visit(const Reference& r) { + visit(as<Atom>(r)); + } + + // --------------- + // -- Allocator -- + // --------------- + Allocator::Allocator() : lists(4 * 1024), vectors(1024) { } + + // This destructor is defined here so that it provides + // a single instantiation point for destructors of all + // used templates floating around. + Allocator::~Allocator() { } + + const Integer* + Allocator::make_integer(const Token& t) { + return ints.allocate(t); + } + + const String* + Allocator::make_string(const Token& t) { + return strs.allocate(t); + } + + const Symbol* + Allocator::make_symbol(const Token& t, Symbol::Kind k) { + return syms.allocate(t, k); + } + + const Anchor* + Allocator::make_anchor(size_t t, const Syntax* s) { + return ancs.allocate(t, s); + } + + const Reference* + Allocator::make_reference(const Token& t, size_t i) { + return refs.allocate(t, i); + } + + const Quote* + Allocator::make_quote(const Syntax* s) { + return quotes.allocate(s); + } + + const Function* + Allocator::make_function(const Syntax* s) { + return funs.allocate(s); + } + + const Pair* + Allocator::make_pair(const Syntax* f, const Syntax* s) { + return pairs.allocate(f, s); + } + + const List* + Allocator::make_list(const std::vector<const Syntax*>& elts) { + if (elts.empty()) + return &empty_list; + return new(lists.allocate(1)) List(elts); + } + + const Vector* + Allocator::make_vector(const std::vector<const Syntax*>& elts) { + if (elts.empty()) + return &empty_vector; + return new(vectors.allocate(1)) Vector(elts); + } + + // ------------ + // -- Parser -- + // ------------ + + // Signal a parse error + static void + parse_error(const std::string& s) { + throw SystemError(s); + } + + // Signal that an expected syntax object was missing + static void + expected_syntax(const std::string& s) { + parse_error("expected " + s); + } + + // Signal an abrupt end of input + static void + unexpected_end_of_input(const std::string& s) { + parse_error("unexpected end of input after " + s); + } + + // Signal a missing closing parenthesis + static void + missing_closer_for(const std::string& s) { + parse_error("missing closing parenthesis for " + s); + } + + // The sequence of characters in [cur, last) consists + // entirely of digits. Return the corresponding natural value. + static size_t + natural_value(const char* cur, const char* last) { + size_t n = 0; + for (; cur < last; ++cur) + // FIXME: check for overflow. + n = 10 * n + (*cur - '0'); + return n; + } + + // Parse a plain identifier or a Lisp-style keyword identifier. + const Symbol* + Parser::parse_symbol(const Token*& cur, const Token* last) { + Symbol::Kind kind = *cur->lexeme->begin() == ':' + ? Symbol::keyword + : Symbol::ordinary; + return alloc.make_symbol(*cur++, kind); + } + + // Parse an anchor definition of the form #n=<syntax> + const Anchor* + Parser::parse_anchor(const Token*& cur, const Token* last) { + const size_t n = natural_value(cur->lexeme->begin() + 1, + cur->lexeme->end() - 1); + if (++cur == last) + unexpected_end_of_input("sharp-integer-equal sign"); + return alloc.make_anchor(n, parse_syntax(cur, last)); + } + + // Parse a reference to an anchor, #n# + const Reference* + Parser::parse_reference(const Token*& cur, const Token* last) { + const size_t n = natural_value(cur->lexeme->begin() + 1, + cur->lexeme->end() - 1); + return alloc.make_reference(*cur++, n); + } + + // Parse an uninterned symbol #:<identifier> + const Symbol* + Parser::parse_uninterned(const Token*& cur, const Token* last) { + if (cur == last or cur->type != Token::identifier) + expected_syntax("symbol after sharp-colon sign"); + // FIXME: check that the identifier is not a keyword. + return alloc.make_symbol(*cur++, Symbol::uninterned); + } + + // Parse a function syntax: #'<syntax> + const Function* + Parser::parse_function(const Token*& cur, const Token* last) { + if (cur == last) + unexpected_end_of_input("sharp-quote sign"); + return alloc.make_function(parse_syntax(cur, last)); + } + + // Parse a quotation + const Quote* + Parser::parse_quote(const Token*& cur, const Token* last) { + if (cur == last) + unexpected_end_of_input("quote sign"); + return alloc.make_quote(parse_syntax(cur, last)); + } + + // Parse a vector of syntax objects: #(s .. s) + const Vector* + Parser::parse_vector(const Token*& cur, const Token* last) { + std::vector<const Syntax*> elts; + while (cur < last and cur->type != Token::close_paren) + elts.push_back(parse_syntax(cur, last)); + if (cur == last) + missing_closer_for("vector"); + ++cur; + return alloc.make_vector(elts); + } + + // Constructs a pair or a list syntax object. + // This function is hairy for three reasons: (a) it is not known + // whether we list or a pair until after we have seen the + // enclosed tokens; (b) a dot is allowed at most once; (c) Lisp-style + // improper lists are not allowed. + const Syntax* + Parser::parse_list_or_pair(const Token*& cur, const Token* last) { + std::vector<const Syntax*> elts; + bool saw_dot = false; + while (cur < last and cur->type != Token::close_paren) { + if (cur->type == Token::dot) { + if (elts.size() != 1) + parse_error("unexpected dot sign"); + saw_dot = true; + ++cur; + continue; + } + elts.push_back(parse_syntax(cur, last)); + if (saw_dot && elts.size() == 2) + break; + } + if (cur == last or cur->type != Token::close_paren) + missing_closer_for(saw_dot ? "pair" : "list"); + ++cur; + if (saw_dot) + return alloc.make_pair(elts.front(), elts.back()); + return alloc.make_list(elts); + } + + Parser::Parser(Allocator& a, std::vector<const Syntax*>& v) + : alloc(a), syns(v) { } + + const Syntax* + Parser::parse_syntax(const Token*& cur, const Token* last) { + switch (cur->type) { + case Token::integer: + return alloc.make_integer(*cur++); + + case Token::string: + return alloc.make_string(*cur++); + + case Token::identifier: + return parse_symbol(cur, last); + + case Token::sharp_integer_equal: + return parse_anchor(cur, last); + + case Token::sharp_integer_sharp: + return parse_reference(cur, last); + + case Token::sharp_colon: + return parse_uninterned(++cur, last); + + case Token::sharp_apostrophe: + return parse_function(++cur, last); + + case Token::sharp_open_paren: + return parse_vector(++cur, last); + + case Token::apostrophe: + return parse_quote(++cur, last); + + case Token::open_paren: + return parse_list_or_pair(++cur, last); + + default: + parse_error(std::string("parse error before ") + + cur->lexeme->begin()); + return 0; // never executed + } + } + + const Token* + Parser::parse(const Token* cur, const Token* last) { + while (cur < last) + syns.push_back(parse_syntax(cur, last)); + return cur; + } + } +} diff --git a/src/utils/storage.H b/src/utils/storage.H index ba14ec97..60d5c0b4 100644 --- a/src/utils/storage.H +++ b/src/utils/storage.H @@ -29,9 +29,22 @@ // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// --% Author: Gabriel Dos Reis +// --% Description: +// --% Memory management facility. Acquire raw memory directly +// --% directly for the host OS. Provide random access read to +// --% files through file mapping. + +#ifndef OPENAXIOM_STORAGE_INCLUDED +#define OPENAXIOM_STORAGE_INCLUDED + #include <stddef.h> +#include <string.h> +#include <cmath> #include <string> +#include "openaxiom-c-macros.h" + namespace OpenAxiom { // ----------------- // -- SystemError -- @@ -47,18 +60,20 @@ namespace OpenAxiom { const std::string text; }; - // Report a file system erro + // Report a file system error void filesystem_error(std::string); namespace Memory { + // Datatype for the unit of storage. + typedef unsigned char Byte; + // Datatype for pointers to data. typedef void* Pointer; // Precision of the host OS storage page unit in byte count size_t page_size(); - // Acquire raw memory from the host OS in multiple - // of storage page nits. + // Acquire raw memory from the host OS. Pointer os_acquire_raw_memory(size_t); // Release raw storage to the hosting OS. The first operand must @@ -66,6 +81,209 @@ namespace OpenAxiom { // Otherwise, the result is undefined. void os_release_raw_memory(Pointer, size_t); + // Acquire `n' pages of memory storage from the host OS. + inline Pointer + acquire_raw_pages(size_t n) { + return os_acquire_raw_memory(n * page_size()); + } + + // Release `n' pages of storage starting the location `p'. + inline void + release_raw_pages(Pointer p, size_t n) { + os_release_raw_memory(p, n * page_size()); + } + + // ------------- + // -- Storage -- + // ------------- + // This class is a low-level abstraction intented for use + // to implement higher level storage abstraction. + struct Storage { + // Acquire storage chunk of `n' bytes, and align + // the first allocatable address to `a' booundary. + // The result is a pointer to a storage object. That object + // `result' is constructed such that `result->free' points + // to the next allocatable address, with alignment `a'. + static Storage* acquire(size_t a, size_t n); + + // Return the storage pointed to by the operand. It + // must be a pointer value previously returned by `acquire'. + // Otherwise, the result is undefined. + static void release(Storage*); + + // Count of bytes that can fit in this storage. + size_t capacity() const { return limit_bot - limit_top; } + + // Count of avaliable allocatable bytes in this storage. + size_t room() const { return limit_bot - free; } + + // Count of allocated storage in this storage. + size_t occupancy() const { return free - limit_top; } + + // Align next allocatable address to a boundary (operand). + // Return true on success. + bool align_to(size_t); + + // Allocate `n' bytes of storage. It is assumed that prior + // to calling this function, `n' is less than `room()'. + // The allocated storage is guaranteed to contain only zeros. + void* allocate(size_t n) { + void* result = free; + free += n; + return memset(result, 0, n); + } + + // Next unused address + void* next_available() { return free; } + + // address at offset `o' from the first allocatable address. + void* at_offset(size_t o) { + return limit_top + o; + } + + // Round up `n' to a multiple of `a', a power of 2. + static size_t + round_up(size_t n, size_t a) { + return (n + a - 1) & ~(a - 1); + } + + // Next address after `p' in this storage that has alignment `a'. + void* + round_up(void* p, size_t a) { + return base() + round_up(base() - static_cast<Byte*>(p), a); + } + + protected: + Byte* limit_top; // first allocatable address + Byte* limit_bot; // one-past-the-end of valid allocatable + // address in this storage. + Byte* free; // first allocatable address suitably + // aligned at boundary specified at + // construction time. + + Storage() { } + + // Address of the host OS page holding this storage. + Byte* base() { + return reinterpret_cast<Byte*>(this); + } + + size_t extent() { + return size_t(limit_bot - base()); + } + + private: + Storage(const Storage&); // not implemented + Storage& operator=(const Storage&); // idem. + }; + + // ----------- + // -- Arena -- + // ----------- + // Extensible storage holding objects of a given type. + // The totality of all objects held in such a storage does not + // necessarily constitute a contiguous block. However, + // it is guaranteed that objects allocated in a single call + // to `allocate()' occupy a contiguous block of memory. + // Objects are destroyed when the arena is destroyed. So, + // allocators of this type implement a form of sticky object + // allocator. + template<typename T> + struct Arena { + // Acquire storage capable of holding `n' objects of type `T'. + explicit Arena(size_t); + // Destroy allocated objects when done. + ~Arena(); + // allocate storage for `n' more objects of type `T'. + T* allocate(size_t); + // Number of objects of type `T' allocated in this storage. + size_t population() const; + + protected: + // Address of the first object of type `T' in a storage. + static T* first_object(Storage* s) { + return static_cast<T*> + (s->round_up(&previous(s) + 1, openaxiom_alignment(T))); + } + + // Address of one-past-the-end object of type `T' in this storage. + static T* last_object(Storage* s) { + return static_cast<T*>(s->next_available()); + } + + // Number of objects allocated in a storage. + static size_t object_count(Storage* s) { + return last_object(s) - first_object(s); + } + + private: + Storage* store; // active storage to allocate from + + // The `previous' link in the chain of storage. + static Storage*& previous(Storage* s) { + return *static_cast<Storage**>(s->at_offset(0)); + } + + // Acquire storage large enough to hold `n' objects of type `T'. + static Storage* acquire(size_t); + }; + + template<typename T> + size_t + Arena<T>::population() const { + size_t n = 0; + for (Storage* s = store; s != 0; s = previous(s)) + n += object_count(s); + return n; + } + + template<typename T> + T* + Arena<T>::allocate(size_t n) { + const size_t sz = n * sizeof(T); + if (store->room() < sz) { + Storage* s = acquire(std::max(n, object_count(store))); + previous(s) = store; + store = s; + } + return static_cast<T*>(store->allocate(sz)); + } + + template<typename T> + Arena<T>::Arena(size_t n) : store(acquire(n)) { } + + template<typename T> + Arena<T>::~Arena() { + // Destroy objects in the reverse order of their + // their allocation. + while (store != 0) { + // The first allocated object is right after the `previous' + // link in the storage chain. + T* first = first_object(store); + T* last = last_object(store); + for (--last; first >= last; --last) + last->~T(); + Storage* current = store; + store = previous(store); + Storage::release(current); + } + } + + template<typename T> + Storage* + Arena<T>::acquire(size_t n) { + // We build single-linked list of Storage objects, so + // don't forget to account for the additional pointer, + // and necessary padding. + const size_t sz = n * sizeof(T) + + Storage::round_up(sizeof(Storage*), openaxiom_alignment(T)); + Storage* s = Storage::acquire(openaxiom_alignment(Storage*), sz); + s->allocate(sizeof(Storage*)); + previous(s) = 0; + s->align_to(openaxiom_alignment(T)); + return s; + } + // ----------------- // -- FileMapping -- // ----------------- @@ -85,3 +303,5 @@ namespace OpenAxiom { } } + +#endif // OPENAXIOM_STORAGE_INCLUDED diff --git a/src/utils/storage.cc b/src/utils/storage.cc index 4db7f302..4883de6c 100644 --- a/src/utils/storage.cc +++ b/src/utils/storage.cc @@ -29,6 +29,8 @@ // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// --%: Gabriel Dos Reis. + #include "openaxiom-c-macros.h" #ifdef HAVE_SYS_TYPES_H @@ -53,6 +55,7 @@ #include <errno.h> #include <stdlib.h> #include <string.h> +#include <new> // for placement new. #include "storage.H" namespace OpenAxiom { @@ -124,6 +127,40 @@ namespace OpenAxiom { #endif } + // ------------- + // -- Storage -- + // ------------- + Storage* + Storage::acquire(size_t alignment, size_t byte_count) { + // Adjust for overhead, and page boundary. + byte_count = round_up(byte_count + sizeof(Storage), page_size()); + Storage* mem = new(os_acquire_raw_memory(byte_count)) Storage; + mem->limit_top = mem->base() + round_up(sizeof(Storage), alignment); + mem->limit_bot = mem->base() + byte_count; + mem->free = mem->limit_top; + return mem; + } + + void + Storage::release(Storage* store) { + os_release_raw_memory(store, store->extent()); + } + + bool + Storage::align_to(size_t alignment) { + if (alignment == 0) // protect against nuts + return true; + if (alignment == 1) // no preferred alignment at all + return true; + Byte* b = base(); + const size_t offset = round_up(free - b, alignment); + if (offset < size_t(limit_bot - b)) { + free = b + offset; + return true; + } + return false; // not enough room left + } + // ----------------- // -- FileMapping -- // ----------------- diff --git a/src/utils/string-pool.H b/src/utils/string-pool.H new file mode 100644 index 00000000..f3692a79 --- /dev/null +++ b/src/utils/string-pool.H @@ -0,0 +1,88 @@ +// Copyright (C) 2010, Gabriel Dos Reis. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// - Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in +// the documentation and/or other materials provided with the +// distribution. +// +// - Neither the name of The Numerical Algorithms Group Ltd. nor the +// names of its contributors may be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER +// OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef OPENAXIOM_STRING_POOL_INCLUDED +#define OPENAXIOM_STRING_POOL_INCLUDED + +#include <string.h> +#include "hash-table.H" + +// --% Author: Gabriel Dos Reis. +// --% Description: +// --% Basic persistent string facility. +// --% A stringpool for allocating long-living string objects. + +namespace OpenAxiom { + struct StringPool; + + // ---------------- + // -- StringItem -- + // ---------------- + // String data allocated from a stringpool. + struct StringItem { + const char* begin() const { return text; } + const char* end() const { return text + length; } + size_t size() const { return length; } + bool equal(const char*, size_t) const; + protected: + const char* text; // pointer to the byte sequence + size_t length; // number of bytes in this string + friend class StringPool; + StringItem() : text(), length() { } + }; + + // ---------------- + // -- StringPool -- + // ---------------- + // A stringpool object is a repository of long-living string objects. + // It contains no duplicates, therefore allowing fast string + // object comparison for equality. + struct StringPool : private BasicHashTable<StringItem> { + using BasicHashTable<StringItem>::EntryType; + + StringPool(); + // Intern a NUL-terminated sequence of characters. + EntryType* intern(const char* s) { + return intern(s, strlen(s)); + } + + // Intern a sequence of characters given by its start and length. + EntryType* intern(const char*, size_t); + private: + Memory::Arena<char> strings; // character blub + // Allocate a string from the internal arena. + const char* make_copy(const char*, size_t); + }; + + typedef const StringPool::EntryType* BasicString; +} + +#endif // OPENAXIOM_STRING_POOL_INCLUDED diff --git a/src/utils/string-pool.cc b/src/utils/string-pool.cc new file mode 100644 index 00000000..5ae6b15d --- /dev/null +++ b/src/utils/string-pool.cc @@ -0,0 +1,96 @@ +// Copyright (C) 2010, Gabriel Dos Reis. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// - Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in +// the documentation and/or other materials provided with the +// distribution. +// +// - Neither the name of The Numerical Algorithms Group Ltd. nor the +// names of its contributors may be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER +// OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --% Author: Gabriel Dos Reis + +#include "string-pool.H" + +namespace OpenAxiom { + // ---------------- + // -- StringItem -- + // ---------------- + bool + StringItem::equal(const char* str, size_t sz) const { + if (length != sz) + return false; + for (size_t i = 0; i < sz; ++i) + if (text[i] != str[i]) + return false; + return true; + } + + + // ---------------- + // -- StringPool -- + // ---------------- + StringPool::StringPool() + : BasicHashTable<StringItem>(109), + strings(2 * Memory::page_size()) + { } + + // Return a hash for the string starting from `str' + // of length `sz'. + static size_t + hash(const char* str, size_t sz) { + size_t h = 0; + for(size_t i = 0; i < sz; ++i) + h = str[i] + (h << 6) + (h << 16) - h; + return h; + } + + const char* + StringPool::make_copy(const char* f, size_t sz) { + char* s = strings.allocate(sz + 1); + memcpy(s, f, sz); + s[sz] = '\0'; + return s; + } + + StringPool::EntryType* + StringPool::intern(const char* src, size_t sz) { + const size_t h = hash(src, sz); + EntryType* e = hash_chain(h); + if (sz == 0) + return e; + for (; e->text != 0; e = e->chain) { + if (e->hash == h and e->equal(src, sz)) + return e; + // If this is the last entry in this hash chain, allocate + // a new bucket to hold the information we want to store. + if (e->chain == 0) + e->chain = new_bucket(); + } + e->text = make_copy(src, sz); + e->length = sz; + e->hash = h; + return e; + } +} |