// Copyright (C) 2013, Gabriel Dos Reis. // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // - Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // // - Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in // the documentation and/or other materials provided with the // distribution. // // - Neither the name of The Numerical Algorithms Group Ltd. nor the // names of its contributors may be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS // IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED // TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A // PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER // OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // --% Author: Gabriel Dos Reis. // --% Description: // --% This program implements basic functionalities for untangling // --% algebra source code from the pamphlets. The syntax is that // --% of `noweb'. A chunk definition starts with a pattern // --% <<name>>= on a line by itself, and ends with `@' by itself // --% on a line. A chunk can refer to another chunk through // --% a pattern of the form `<<name>>'. #include <string.h> #include <stdlib.h> #include <utility> #include <string> #include <iostream> #include <fstream> #include <iterator> #include <list> #include <vector> #include <map> #include <open-axiom/storage> #include <open-axiom/FileMapping> // Workaround lack of standard streaming operation for std::u8string. static std::ostream& print(std::ostream& os, const std::u8string& s) { constexpr auto cast = [](auto p) { return reinterpret_cast<const char*>(&*p); }; std::copy(cast(s.begin()), cast(s.end()), std::ostream_iterator<char>(os)); return os; } namespace OpenAxiom::Hammer { // ------------- // -- Element -- // ------------- // Base class of document elements. struct Element { virtual ~Element() = default; }; // --------------- // -- BasicText -- // --------------- // Plain text, with no reference to any chunk. struct BasicText : Element { BasicText(const char8_t* f, const char8_t* l) : span(f, l) { } // Pointer to the start of this basic text element const char8_t* begin() const { return span.first; } // One-past-the-end of the this basic text element. const char8_t* end() const { return span.second; } private: std::pair<const char8_t*, const char8_t*> span; }; // --------------- // -- Reference -- // --------------- // Reference to a a chunk by name. struct Reference : Element { explicit Reference(const std::u8string& s) : label(s) { } // Naame of the chunk referenced. const std::u8string& name() const { return label; } private: const std::u8string label; }; // ------------------- // -- CompositeText -- // ------------------- // Sequence of basic elements and reference to chunks. struct CompositeText: private std::vector<const Element*> { typedef std::vector<const Element*> base; using base::iterator; using base::begin; using base::end; using base::size; using base::operator[]; // Augment this chunk with a basic text in the open interval // [f,l). CompositeText& add_text(const char8_t* f, const char8_t* l) { texts.push_back(BasicText(f, l)); push_back(&texts.back()); return *this; } // Augment this chunk with a reference to another chunk // named `n'. Note that we don't attempt to check for // possible circularities. CompositeText reference_chunk(const char8_t* f, const char8_t* l) { refs.push_back(Reference(std::u8string(f, l))); push_back(&refs.back()); return *this; } private: std::list<BasicText> texts; std::list<Reference> refs; }; // -------------- // -- Document -- // -------------- // A whole document; a sequence of chunks. struct Document : std::list<CompositeText> { Document(const Memory::FileMapping& file) : active_chunk{&prose}, text_start{(reinterpret_cast<const char8_t*>(file.begin()))} { parse(file); } // Return a pointer to a document chunk name `n'. // Otherwise, return null. CompositeText* lookup_chunk(const std::u8string& n) const { ChunkTable::const_iterator i = defs.find(n); return i == defs.end() ? 0 : i->second; } private: typedef std::map<std::u8string, CompositeText*> ChunkTable; CompositeText prose; // the prose around the chunks. ChunkTable defs; // chunk definition table. CompositeText* active_chunk; // chunk under construction. const char8_t* text_start; // begining of current basic text. // Append basic text in the range `[text_start,last)' // to the current chunk. void finish_chunk(const char8_t* last) { if (text_start != last) active_chunk->add_text(text_start, last); active_chunk = &prose; text_start = last; } // Start a new chunk or extend an existing chunk. void begin_chunk(const std::u8string& name, const char8_t* start) { if (CompositeText* chunk = lookup_chunk(name)) active_chunk = chunk; else { push_back(CompositeText()); defs[name] = active_chunk = &back(); } text_start = start; } // Parse a file mapping into this document. void parse(const Memory::FileMapping&); }; // Return true if the character `c' introduces a newline. static inline bool looking_at_newline(char8_t c) { return c == u8'\n' or c == u8'\r'; } // Attempt to advance the cursor past newline marker. // Return true on sucess. static bool saw_newline(const char8_t*& cur, const char8_t* end) { if (*cur == u8'\n') { ++cur; return true; } else if (*cur == u8'\r') { if (++cur < end and *cur == u8'\n') ++cur; return true; } return false; } // Move `cur' to end of line or `end', whichever comes first. // Return true if the area swept consisted only of blank characters. static inline bool trailing_blank(const char8_t*& cur, const char8_t* end) { bool result = true; for (; cur < end and not saw_newline(cur, end); ++cur) result = isspace(*cur); return result; } // Attempt to advance `cur' past the double left angle brackets // starting a chunk name. Returm true on success. static bool chunk_name_began(const char8_t*& cur, const char8_t* end) { if (cur[0] == u8'<' and cur + 1 < end and cur[1] == u8'<') { cur += 2; return true; } return false; } // Attempt to move `cur' past the double right angle brackets // terminating a chunk name. Returm true on success. static bool chunk_name_ended(const char8_t*& cur, const char8_t* end) { if (cur[0] == u8'>' and cur + 1 < end and cur[1] == u8'>') { cur += 2; return true; } return false; } // We've just seen the start of a chunk reference; skip // characters till we seen of the chunk's name. static void skip_to_end_of_chunk_name(const char8_t*& cur, const char8_t* end) { while (cur < end) { if (looking_at_newline(*cur) or (cur + 1 < end and cur[0] == u8'>' and cur[1] == u8'>')) return; ++cur; } } // Move the cursor until end of line. static void skip_to_end_of_line(const char8_t*& cur, const char8_t* end) { while (cur < end) { if (saw_newline(cur, end)) break; ++cur; } } void Document::parse(const Memory::FileMapping& file) { auto cur = text_start; auto last = reinterpret_cast<const char8_t*>(file.end()); // Process one line at a time. while (cur < last) { // 1. `@' ends previous chunk if (*cur == u8'@') { auto p = cur; if (trailing_blank(++cur, last)) finish_chunk(p); } // 2. `<<' introduces a chunk reference or a chunk definition. else if (chunk_name_began(cur, last)) { auto label_start = cur; skip_to_end_of_chunk_name(cur, last); if (chunk_name_ended(cur, last)) { auto label_end = cur - 2; if (cur < last and *cur == u8'=') { if (trailing_blank(++cur, last)) { // chunk definition or extension finish_chunk(label_start - 2); begin_chunk(std::u8string(label_start, label_end), cur); } } else if (trailing_blank(cur, last)) { // This is just a reference to a chunk. active_chunk->add_text(text_start, label_start - 2); active_chunk->reference_chunk(label_start, label_end); text_start = cur; } else skip_to_end_of_line(cur, last); } } else skip_to_end_of_line(cur, last); } finish_chunk(cur); } // Capture chunk resolution in a document. struct resolve_chunk { resolve_chunk(const std::u8string& s, const Document& f) : name(s), doc(f) { } const std::u8string name; // name of the chunk const Document& doc; // document containing the chunk. }; // Print the resolution of a chunk name onto an output stream. std::ostream& operator<<(std::ostream& os, const resolve_chunk& rc) { // FIXME: no attempt at detecting circularities. const CompositeText* doc = rc.doc.lookup_chunk(rc.name); if (doc == 0) { print(std::cerr << "chunk ", rc.name) << " is undefined" << std::endl; exit(1); } for (std::size_t i = 0; i < doc->size(); ++i) { const Element* elt = (*doc)[i]; if (const BasicText* t = dynamic_cast<const BasicText*>(elt)) std::copy(t->begin(), t->end(), std::ostream_iterator<char>(os)); else if (const Reference* r = dynamic_cast<const Reference*>(elt)) os << resolve_chunk(r->name(), rc.doc); else { std::cerr << "unknown document element" << std::endl; exit(1); } } return os; } // Return true if the `arg' is the option named`opt'. static inline bool is_option(const char* arg, const char* opt) { return strcmp(arg, opt) == 0; } // `arg' is a argument on the command line. If `arg' // does not match option name `opt', return null. Otherwise, // return a pointer to the terminating NUL character if there // is no specified value for that option, or a pointer to the // start of the value. static const char* is_named_arg(const char* arg, const char* opt) { const int n = strlen(opt); int i = 0; // Get out if argion name does not match. // Note: Ideally, we could use strncmp(). However, that // function is not available in C++98, so we cannot depend on it. for (; i < n ; ++i) if (arg[i] != opt[i]) return 0; if (arg[i] == '\0') return arg + i; // no value for the option. return arg + n + 1; // being of the value. } } int main(int argc, char* argv[]) { using namespace OpenAxiom::Hammer; int error_count = 0; const char* chunk = nullptr; // chunck to tangle const char* output_path = nullptr; // path to the output file const char* input_path = nullptr; // path to the input file. // 1. Process command line arguments. for (int pos = 1; error_count == 0 and pos < argc; ++pos) { if (const char* val = is_named_arg(argv[pos], "--tangle")) { if (chunk != 0) { std::cerr << "cannot tangle more than one chunk"; ++error_count; } else chunk = *val == 0 ? "*" : val; } else if (const char* val = is_named_arg(argv[pos], "--output")) { if (*val == 0) { std::cerr << "missing output file name" << std::endl; ++error_count; } else output_path = val; } else if (argv[pos][0] == '-' and argv[pos][1] == '-') { std::cerr << "unknown option " << argv[pos] << std::endl; ++error_count; } else if (input_path != 0) { std::cerr << "there must be exactly one input file" << std::endl; ++error_count; } else input_path = argv[pos]; } // 2. Basic sanity check. if (input_path == 0) { std::cerr << "missing input file" << std::endl; return 1; } if (output_path == 0) { std::cerr << "missing output file" << std::endl; return 1; } if (chunk == 0) { std::cerr << "missing chunk name" << std::endl; return 1; } if (error_count != 0) return 1; // 3. Attempt to extract the chunk. try { OpenAxiom::Memory::FileMapping file(input_path); std::ofstream os(output_path); auto what = reinterpret_cast<const char8_t*>(chunk); os << resolve_chunk(what, Document(file)); } catch(const OpenAxiom::SystemError& e) { std::cerr << e.message() << std::endl; exit(1); } return 0; }