diff options
Diffstat (limited to 'src/utils/hammer.cc')
-rw-r--r-- | src/utils/hammer.cc | 414 |
1 files changed, 414 insertions, 0 deletions
diff --git a/src/utils/hammer.cc b/src/utils/hammer.cc new file mode 100644 index 00000000..17c5cc73 --- /dev/null +++ b/src/utils/hammer.cc @@ -0,0 +1,414 @@ +// Copyright (C) 2010, Gabriel Dos Reis. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// - Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in +// the documentation and/or other materials provided with the +// distribution. +// +// - Neither the name of The Numerical Algorithms Group Ltd. nor the +// names of its contributors may be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER +// OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include <string.h> +#include <stdlib.h> +#include <utility> +#include <string> +#include <iostream> +#include <fstream> +#include <iterator> +#include <list> +#include <vector> +#include <map> +#include "storage.H" + +// -- This program implements basic functionalities for untangling +// -- algebra source code from the pamphlets. The syntax is that +// -- of `noweb'. A chunk definition starts with a pattern +// -- <<name>>= on a line by itself, and ends with `@' by itself +// -- on a line. A chunk can refer to another chunk through +// -- a pattern of the form `<<name>>'. + +namespace OpenAxiom { + namespace Hammer { + // ------------- + // -- Element -- + // ------------- + // Base class of document elements. + struct Element { + virtual ~Element() { } + }; + + // --------------- + // -- BasicText -- + // --------------- + // Plain text, with no reference to any chunk. + struct BasicText : Element { + BasicText(const char* f, const char* l) : span(f, l) { } + // Pointer to the start of this basic text element + const char* begin() const { return span.first; } + // Oone-past-the-end of the this basic text element. + const char* end() const { return span.second; } + private: + std::pair<const char*, const char*> span; + }; + + // --------------- + // -- Reference -- + // --------------- + // Reference to a a chunk by name. + struct Reference : Element { + explicit Reference(const std::string& s) : label(s) { } + // Naame of the chunk referenced. + const std::string& name() const { return label; } + private: + const std::string label; + }; + + // ------------------- + // -- CompositeText -- + // ------------------- + // Sequence of basic elements and reference to chunks. + struct CompositeText: private std::vector<const Element*> { + typedef std::vector<const Element*> base; + using base::iterator; + using base::begin; + using base::end; + using base::size; + using base::operator[]; + + // Augment this chunk with a basic text in the open interval + // [f,l). + CompositeText& add_text(const char* f, const char* l) { + texts.push_back(BasicText(f, l)); + push_back(&texts.back()); + return *this; + } + + // Augment this chunk with a reference to another chunk + // named `n'. Note that we don't attempt to check for + // possible circularities. + CompositeText reference_chunk(const char* f, const char* l) { + refs.push_back(Reference(std::string(f, l))); + push_back(&refs.back()); + return *this; + } + + private: + std::list<BasicText> texts; + std::list<Reference> refs; + }; + + // -------------- + // -- Document -- + // -------------- + // A whole document; a sequence of chunks. + struct Document : std::list<CompositeText> { + Document(const Memory::FileMapping& file) + : active_chunk(&prose), text_start(file.begin()) { + parse(file); + } + + // Return a pointer to a document chunk name `n'. + // Otherwise, return null. + CompositeText* lookup_chunk(const std::string& n) const { + ChunkTable::const_iterator i = defs.find(n); + return i == defs.end() ? 0 : i->second; + } + + private: + typedef std::map<std::string, CompositeText*> ChunkTable; + CompositeText prose; // the prose around the chunks. + ChunkTable defs; // chunk definition table. + CompositeText* active_chunk; // chunk under construction. + const char* text_start; // begining of current basic text. + + // Append basic text in the range `[text_start,last)' + // to the current chunk. + void finish_chunk(const char* last) { + if (text_start != last) + active_chunk->add_text(text_start, last); + active_chunk = &prose; + text_start = last; + } + + // Start a new chunk or extend an existing chunk. + void begin_chunk(const std::string& name, const char* start) { + if (CompositeText* chunk = lookup_chunk(name)) + active_chunk = chunk; + else { + push_back(CompositeText()); + defs[name] = active_chunk = &back(); + } + text_start = start; + } + + // Parse a file mapping into this document. + void parse(const Memory::FileMapping&); + }; + + // Return true if the character `c' introduces a newline. + static inline bool + looking_at_newline(char c) { + return c == '\n' or c == '\r'; + } + + // Attempt to advance the cursor past newline marker. + // Return true on sucess. + static bool + saw_newline(const char*& cur, const char* end) { + if (*cur == '\n') { + ++cur; + return true; + } + else if (*cur == '\r') { + if (++cur < end and *cur == '\n') + ++cur; + return true; + } + return false; + } + + // Move `cur' to end of line or `end', whichever comes first. + // Return true if the area swept consisted only of blank characters. + static inline bool + trailing_blank(const char*& cur, const char* end) { + bool result = true; + for (; cur < end and not saw_newline(cur, end); ++cur) + result = isspace(*cur); + return result; + } + + // Attempt to advance `cur' past the double left angle brackets + // starting a chunk name. Returm true on success. + static bool + chunk_name_began(const char*& cur, const char* end) { + if (cur[0] == '<' and cur + 1 < end and cur[1] == '<') { + cur += 2; + return true; + } + return false; + } + + // Attempt to move `cur' past the double right angle brackets + // terminating a chunk name. Returm true on success. + static bool + chunk_name_ended(const char*& cur, const char* end) { + if (cur[0] == '>' and cur + 1 < end and cur[1] == '>') { + cur += 2; + return true; + } + return false; + } + + // We've just seen the start of a chunk reference; skip + // characters till we seen of the chunk's name. + static void + skip_to_end_of_chunk_name(const char*& cur, const char* end) { + while (cur < end) { + if (looking_at_newline(*cur) + or (cur + 1 < end and cur[0] == '>' and cur[1] == '>')) + return; + ++cur; + } + } + + // Move the cursor until end of line. + static void + skip_to_end_of_line(const char*& cur, const char* end) { + while (cur < end) { + if (saw_newline(cur, end)) + break; + ++cur; + } + } + + void + Document::parse(const Memory::FileMapping& file) { + const char* cur = text_start; + const char* last = file.end(); + // Process one line at a time. + while (cur < last) { + // 1. `@' ends previous chunk + if (*cur == '@') { + const char* p = cur; + if (trailing_blank(++cur, last)) + finish_chunk(p); + } + // 2. `<<' introduces a chunk reference or a chunk definition. + else if (chunk_name_began(cur, last)) { + const char* label_start = cur; + skip_to_end_of_chunk_name(cur, last); + if (chunk_name_ended(cur, last)) { + const char* label_end = cur - 2; + if (cur < last and *cur == '=') { + if (trailing_blank(++cur, last)) { + // chunk definition or extension + finish_chunk(label_start - 2); + begin_chunk(std::string(label_start, label_end), cur); + } + } + else if (trailing_blank(cur, last)) { + // This is just a reference to a chunk. + active_chunk->add_text(text_start, label_start - 2); + active_chunk->reference_chunk(label_start, label_end); + text_start = cur; + } + else + skip_to_end_of_line(cur, last); + } + } + else + skip_to_end_of_line(cur, last); + } + finish_chunk(cur); + } + + // Capture chunk resolution in a document. + struct resolve_chunk { + resolve_chunk(const std::string& s, const Document& f) + : name(s), doc(f) { } + const std::string name; // name of the chunk + const Document& doc; // document containing the chunk. + }; + + // Print the resolution of a chunk name onto an output stream. + std::ostream& + operator<<(std::ostream& os, const resolve_chunk& rc) { + // FIXME: no attempt at detecting circularities. + const CompositeText* doc = rc.doc.lookup_chunk(rc.name); + if (doc == 0) { + std::cerr << "chunk " << rc.name << " is undefined" << std::endl; + exit(1); + } + for (std::size_t i = 0; i < doc->size(); ++i) { + const Element* elt = (*doc)[i]; + if (const BasicText* t = dynamic_cast<const BasicText*>(elt)) + std::copy(t->begin(), t->end(), + std::ostream_iterator<char>(os)); + else if (const Reference* r = dynamic_cast<const Reference*>(elt)) + os << resolve_chunk(r->name(), rc.doc); + else { + std::cerr << "unknown document element" << std::endl; + exit(1); + } + } + + return os; + } + + // Return true if the `arg' is the option named`opt'. + static inline bool + is_option(const char* arg, const char* opt) { + return strcmp(arg, opt) == 0; + } + + // `arg' is a argument on the command line. If `arg' + // does not match option name `opt', return null. Otherwise, + // return a pointer to the terminating NUL character if there + // is no specified value for that option, or a pointer to the + // start of the value. + static const char* + is_named_arg(const char* arg, const char* opt) { + const int n = strlen(opt); + int i = 0; + // Get out if argion name does not match. + // Note: Ideally, we could use strncmp(). However, that + // function is not available in C++98, so we cannot depend on it. + for (; i < n ; ++i) + if (arg[i] != opt[i]) + return 0; + + if (arg[i] == '\0') + return arg + i; // no value for the option. + return arg + n + 1; // being of the value. + } + } +} + + +int +main(int argc, char* argv[]) { + using namespace OpenAxiom::Hammer; + int error_count = 0; + const char* chunk = 0; // chunck to tangle + const char* output_path = 0; // path to the output file + const char* input_path = 0; // path to the input file. + // 1. Process command line arguments. + for (int pos = 1; error_count == 0 and pos < argc; ++pos) { + if (const char* val = is_named_arg(argv[pos], "--tangle")) { + if (chunk != 0) { + std::cerr << "cannot tangle more than one chunk"; + ++error_count; + } + else + chunk = *val == 0 ? "*" : val; + } + else if (const char* val = is_named_arg(argv[pos], "--output")) { + if (*val == 0) { + std::cerr << "missing output file name" << std::endl; + ++error_count; + } + else + output_path = val; + } + else if (argv[pos][0] == '-' and argv[pos][1] == '-') { + std::cerr << "unknown option " << argv[pos] << std::endl; + ++error_count; + } + else if (input_path != 0) { + std::cerr << "there must be exactly one input file" << std::endl; + ++error_count; + } + else + input_path = argv[pos]; + } + + // 2. Basic sanity check. + if (input_path == 0) { + std::cerr << "missing input file" << std::endl; + return 1; + } + if (output_path == 0) { + std::cerr << "missing output file" << std::endl; + return 1; + } + if (chunk == 0) { + std::cerr << "missing chunk name" << std::endl; + return 1; + } + + if (error_count != 0) + return 1; + + // 3. Attempt to extract the chunk. + try { + OpenAxiom::Memory::FileMapping file(input_path); + std::ofstream os(output_path); + os << resolve_chunk(chunk, Document(file)); + } + catch(const OpenAxiom::SystemError& e) { + std::cerr << e.message() << std::endl; + exit(1); + } + return 0; +} |