aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGabriel Dos Reis <GabrielDosReis@users.noreply.github.com>2022-08-13 15:46:19 -0700
committerGabriel Dos Reis <GabrielDosReis@users.noreply.github.com>2022-08-13 15:46:19 -0700
commitcc66f97cf1c45ad3af2456e3fe6d4d27ab80d643 (patch)
tree5bba37dee2d810bd46f6d360f3df7e60e21df9de
parentfcbc13e8dbdd934e02299ca31f001f95fcfdb45c (diff)
downloadopen-axiom-cc66f97cf1c45ad3af2456e3fe6d4d27ab80d643.tar.gz
Use `char8_t` in the tool hammer
-rw-r--r--src/utils/hammer.cc543
1 files changed, 276 insertions, 267 deletions
diff --git a/src/utils/hammer.cc b/src/utils/hammer.cc
index f4241aaf..003cd1f8 100644
--- a/src/utils/hammer.cc
+++ b/src/utils/hammer.cc
@@ -51,300 +51,308 @@
#include <open-axiom/storage>
#include <open-axiom/FileMapping>
-namespace OpenAxiom {
- namespace Hammer {
- // -------------
- // -- Element --
- // -------------
- // Base class of document elements.
- struct Element {
- virtual ~Element() { }
- };
-
- // ---------------
- // -- BasicText --
- // ---------------
- // Plain text, with no reference to any chunk.
- struct BasicText : Element {
- BasicText(const Byte* f, const Byte* l) : span(f, l) { }
- // Pointer to the start of this basic text element
- const Byte* begin() const { return span.first; }
- // One-past-the-end of the this basic text element.
- const Byte* end() const { return span.second; }
- private:
- std::pair<const Byte*, const Byte*> span;
- };
-
- // ---------------
- // -- Reference --
- // ---------------
- // Reference to a a chunk by name.
- struct Reference : Element {
- explicit Reference(const std::string& s) : label(s) { }
- // Naame of the chunk referenced.
- const std::string& name() const { return label; }
- private:
- const std::string label;
- };
-
- // -------------------
- // -- CompositeText --
- // -------------------
- // Sequence of basic elements and reference to chunks.
- struct CompositeText: private std::vector<const Element*> {
- typedef std::vector<const Element*> base;
- using base::iterator;
- using base::begin;
- using base::end;
- using base::size;
- using base::operator[];
-
- // Augment this chunk with a basic text in the open interval
- // [f,l).
- CompositeText& add_text(const Byte* f, const Byte* l) {
- texts.push_back(BasicText(f, l));
- push_back(&texts.back());
- return *this;
- }
+// Workaround lack of standard streaming operation for std::u8string.
+static std::ostream& print(std::ostream& os, const std::u8string& s)
+{
+ constexpr auto cast = [](auto p) { return reinterpret_cast<const char*>(&*p); };
+ std::copy(cast(s.begin()), cast(s.end()), std::ostream_iterator<char>(os));
+ return os;
+}
- // Augment this chunk with a reference to another chunk
- // named `n'. Note that we don't attempt to check for
- // possible circularities.
- CompositeText reference_chunk(const Byte* f, const Byte* l) {
- refs.push_back(Reference(std::string(f, l)));
- push_back(&refs.back());
- return *this;
- }
+namespace OpenAxiom::Hammer {
+ // -------------
+ // -- Element --
+ // -------------
+ // Base class of document elements.
+ struct Element {
+ virtual ~Element() = default;
+ };
+
+ // ---------------
+ // -- BasicText --
+ // ---------------
+ // Plain text, with no reference to any chunk.
+ struct BasicText : Element {
+ BasicText(const char8_t* f, const char8_t* l) : span(f, l) { }
+ // Pointer to the start of this basic text element
+ const char8_t* begin() const { return span.first; }
+ // One-past-the-end of the this basic text element.
+ const char8_t* end() const { return span.second; }
+ private:
+ std::pair<const char8_t*, const char8_t*> span;
+ };
+
+ // ---------------
+ // -- Reference --
+ // ---------------
+ // Reference to a a chunk by name.
+ struct Reference : Element {
+ explicit Reference(const std::u8string& s) : label(s) { }
+ // Naame of the chunk referenced.
+ const std::u8string& name() const { return label; }
+ private:
+ const std::u8string label;
+ };
+
+ // -------------------
+ // -- CompositeText --
+ // -------------------
+ // Sequence of basic elements and reference to chunks.
+ struct CompositeText: private std::vector<const Element*> {
+ typedef std::vector<const Element*> base;
+ using base::iterator;
+ using base::begin;
+ using base::end;
+ using base::size;
+ using base::operator[];
+
+ // Augment this chunk with a basic text in the open interval
+ // [f,l).
+ CompositeText& add_text(const char8_t* f, const char8_t* l) {
+ texts.push_back(BasicText(f, l));
+ push_back(&texts.back());
+ return *this;
+ }
- private:
- std::list<BasicText> texts;
- std::list<Reference> refs;
- };
-
- // --------------
- // -- Document --
- // --------------
- // A whole document; a sequence of chunks.
- struct Document : std::list<CompositeText> {
- Document(const Memory::FileMapping& file)
- : active_chunk(&prose), text_start(file.begin()) {
- parse(file);
- }
+ // Augment this chunk with a reference to another chunk
+ // named `n'. Note that we don't attempt to check for
+ // possible circularities.
+ CompositeText reference_chunk(const char8_t* f, const char8_t* l) {
+ refs.push_back(Reference(std::u8string(f, l)));
+ push_back(&refs.back());
+ return *this;
+ }
- // Return a pointer to a document chunk name `n'.
- // Otherwise, return null.
- CompositeText* lookup_chunk(const std::string& n) const {
- ChunkTable::const_iterator i = defs.find(n);
- return i == defs.end() ? 0 : i->second;
- }
+ private:
+ std::list<BasicText> texts;
+ std::list<Reference> refs;
+ };
+
+ // --------------
+ // -- Document --
+ // --------------
+ // A whole document; a sequence of chunks.
+ struct Document : std::list<CompositeText> {
+ Document(const Memory::FileMapping& file)
+ : active_chunk{&prose},
+ text_start{(reinterpret_cast<const char8_t*>(file.begin()))}
+ {
+ parse(file);
+ }
- private:
- typedef std::map<std::string, CompositeText*> ChunkTable;
- CompositeText prose; // the prose around the chunks.
- ChunkTable defs; // chunk definition table.
- CompositeText* active_chunk; // chunk under construction.
- const Byte* text_start; // begining of current basic text.
-
- // Append basic text in the range `[text_start,last)'
- // to the current chunk.
- void finish_chunk(const Byte* last) {
- if (text_start != last)
- active_chunk->add_text(text_start, last);
- active_chunk = &prose;
- text_start = last;
- }
+ // Return a pointer to a document chunk name `n'.
+ // Otherwise, return null.
+ CompositeText* lookup_chunk(const std::u8string& n) const {
+ ChunkTable::const_iterator i = defs.find(n);
+ return i == defs.end() ? 0 : i->second;
+ }
- // Start a new chunk or extend an existing chunk.
- void begin_chunk(const std::string& name, const Byte* start) {
- if (CompositeText* chunk = lookup_chunk(name))
- active_chunk = chunk;
- else {
- push_back(CompositeText());
- defs[name] = active_chunk = &back();
- }
- text_start = start;
+ private:
+ typedef std::map<std::u8string, CompositeText*> ChunkTable;
+ CompositeText prose; // the prose around the chunks.
+ ChunkTable defs; // chunk definition table.
+ CompositeText* active_chunk; // chunk under construction.
+ const char8_t* text_start; // begining of current basic text.
+
+ // Append basic text in the range `[text_start,last)'
+ // to the current chunk.
+ void finish_chunk(const char8_t* last) {
+ if (text_start != last)
+ active_chunk->add_text(text_start, last);
+ active_chunk = &prose;
+ text_start = last;
+ }
+
+ // Start a new chunk or extend an existing chunk.
+ void begin_chunk(const std::u8string& name, const char8_t* start) {
+ if (CompositeText* chunk = lookup_chunk(name))
+ active_chunk = chunk;
+ else {
+ push_back(CompositeText());
+ defs[name] = active_chunk = &back();
}
+ text_start = start;
+ }
- // Parse a file mapping into this document.
- void parse(const Memory::FileMapping&);
- };
+ // Parse a file mapping into this document.
+ void parse(const Memory::FileMapping&);
+ };
- // Return true if the character `c' introduces a newline.
- static inline bool
- looking_at_newline(char c) {
- return c == '\n' or c == '\r';
- }
+ // Return true if the character `c' introduces a newline.
+ static inline bool
+ looking_at_newline(char8_t c) {
+ return c == u8'\n' or c == u8'\r';
+ }
- // Attempt to advance the cursor past newline marker.
- // Return true on sucess.
- static bool
- saw_newline(const Byte*& cur, const Byte* end) {
- if (*cur == '\n') {
+ // Attempt to advance the cursor past newline marker.
+ // Return true on sucess.
+ static bool
+ saw_newline(const char8_t*& cur, const char8_t* end) {
+ if (*cur == u8'\n') {
+ ++cur;
+ return true;
+ }
+ else if (*cur == u8'\r') {
+ if (++cur < end and *cur == u8'\n')
++cur;
- return true;
- }
- else if (*cur == '\r') {
- if (++cur < end and *cur == '\n')
- ++cur;
- return true;
- }
- return false;
+ return true;
}
+ return false;
+ }
- // Move `cur' to end of line or `end', whichever comes first.
- // Return true if the area swept consisted only of blank characters.
- static inline bool
- trailing_blank(const Byte*& cur, const Byte* end) {
- bool result = true;
- for (; cur < end and not saw_newline(cur, end); ++cur)
- result = isspace(*cur);
- return result;
- }
+ // Move `cur' to end of line or `end', whichever comes first.
+ // Return true if the area swept consisted only of blank characters.
+ static inline bool
+ trailing_blank(const char8_t*& cur, const char8_t* end) {
+ bool result = true;
+ for (; cur < end and not saw_newline(cur, end); ++cur)
+ result = isspace(*cur);
+ return result;
+ }
- // Attempt to advance `cur' past the double left angle brackets
- // starting a chunk name. Returm true on success.
- static bool
- chunk_name_began(const Byte*& cur, const Byte* end) {
- if (cur[0] == '<' and cur + 1 < end and cur[1] == '<') {
- cur += 2;
- return true;
- }
- return false;
+ // Attempt to advance `cur' past the double left angle brackets
+ // starting a chunk name. Returm true on success.
+ static bool
+ chunk_name_began(const char8_t*& cur, const char8_t* end) {
+ if (cur[0] == u8'<' and cur + 1 < end and cur[1] == u8'<') {
+ cur += 2;
+ return true;
}
+ return false;
+ }
- // Attempt to move `cur' past the double right angle brackets
- // terminating a chunk name. Returm true on success.
- static bool
- chunk_name_ended(const Byte*& cur, const Byte* end) {
- if (cur[0] == '>' and cur + 1 < end and cur[1] == '>') {
- cur += 2;
- return true;
- }
- return false;
+ // Attempt to move `cur' past the double right angle brackets
+ // terminating a chunk name. Returm true on success.
+ static bool
+ chunk_name_ended(const char8_t*& cur, const char8_t* end) {
+ if (cur[0] == u8'>' and cur + 1 < end and cur[1] == u8'>') {
+ cur += 2;
+ return true;
}
+ return false;
+ }
- // We've just seen the start of a chunk reference; skip
- // characters till we seen of the chunk's name.
- static void
- skip_to_end_of_chunk_name(const Byte*& cur, const Byte* end) {
- while (cur < end) {
- if (looking_at_newline(*cur)
- or (cur + 1 < end and cur[0] == '>' and cur[1] == '>'))
- return;
- ++cur;
- }
+ // We've just seen the start of a chunk reference; skip
+ // characters till we seen of the chunk's name.
+ static void
+ skip_to_end_of_chunk_name(const char8_t*& cur, const char8_t* end) {
+ while (cur < end) {
+ if (looking_at_newline(*cur)
+ or (cur + 1 < end and cur[0] == u8'>' and cur[1] == u8'>'))
+ return;
+ ++cur;
}
+ }
- // Move the cursor until end of line.
- static void
- skip_to_end_of_line(const Byte*& cur, const Byte* end) {
- while (cur < end) {
- if (saw_newline(cur, end))
- break;
- ++cur;
- }
+ // Move the cursor until end of line.
+ static void
+ skip_to_end_of_line(const char8_t*& cur, const char8_t* end) {
+ while (cur < end) {
+ if (saw_newline(cur, end))
+ break;
+ ++cur;
}
-
- void
- Document::parse(const Memory::FileMapping& file) {
- auto cur = text_start;
- auto last = file.end();
- // Process one line at a time.
- while (cur < last) {
- // 1. `@' ends previous chunk
- if (*cur == '@') {
- auto p = cur;
- if (trailing_blank(++cur, last))
- finish_chunk(p);
- }
- // 2. `<<' introduces a chunk reference or a chunk definition.
- else if (chunk_name_began(cur, last)) {
- auto label_start = cur;
- skip_to_end_of_chunk_name(cur, last);
- if (chunk_name_ended(cur, last)) {
- auto label_end = cur - 2;
- if (cur < last and *cur == '=') {
- if (trailing_blank(++cur, last)) {
- // chunk definition or extension
- finish_chunk(label_start - 2);
- begin_chunk(std::string(label_start, label_end), cur);
- }
- }
- else if (trailing_blank(cur, last)) {
- // This is just a reference to a chunk.
- active_chunk->add_text(text_start, label_start - 2);
- active_chunk->reference_chunk(label_start, label_end);
- text_start = cur;
+ }
+
+ void
+ Document::parse(const Memory::FileMapping& file) {
+ auto cur = text_start;
+ auto last = reinterpret_cast<const char8_t*>(file.end());
+ // Process one line at a time.
+ while (cur < last) {
+ // 1. `@' ends previous chunk
+ if (*cur == u8'@') {
+ auto p = cur;
+ if (trailing_blank(++cur, last))
+ finish_chunk(p);
+ }
+ // 2. `<<' introduces a chunk reference or a chunk definition.
+ else if (chunk_name_began(cur, last)) {
+ auto label_start = cur;
+ skip_to_end_of_chunk_name(cur, last);
+ if (chunk_name_ended(cur, last)) {
+ auto label_end = cur - 2;
+ if (cur < last and *cur == u8'=') {
+ if (trailing_blank(++cur, last)) {
+ // chunk definition or extension
+ finish_chunk(label_start - 2);
+ begin_chunk(std::u8string(label_start, label_end), cur);
}
- else
- skip_to_end_of_line(cur, last);
}
+ else if (trailing_blank(cur, last)) {
+ // This is just a reference to a chunk.
+ active_chunk->add_text(text_start, label_start - 2);
+ active_chunk->reference_chunk(label_start, label_end);
+ text_start = cur;
+ }
+ else
+ skip_to_end_of_line(cur, last);
}
- else
- skip_to_end_of_line(cur, last);
}
- finish_chunk(cur);
+ else
+ skip_to_end_of_line(cur, last);
}
+ finish_chunk(cur);
+ }
- // Capture chunk resolution in a document.
- struct resolve_chunk {
- resolve_chunk(const std::string& s, const Document& f)
- : name(s), doc(f) { }
- const std::string name; // name of the chunk
- const Document& doc; // document containing the chunk.
- };
-
- // Print the resolution of a chunk name onto an output stream.
- std::ostream&
- operator<<(std::ostream& os, const resolve_chunk& rc) {
- // FIXME: no attempt at detecting circularities.
- const CompositeText* doc = rc.doc.lookup_chunk(rc.name);
- if (doc == 0) {
- std::cerr << "chunk " << rc.name << " is undefined" << std::endl;
+ // Capture chunk resolution in a document.
+ struct resolve_chunk {
+ resolve_chunk(const std::u8string& s, const Document& f)
+ : name(s), doc(f) { }
+ const std::u8string name; // name of the chunk
+ const Document& doc; // document containing the chunk.
+ };
+
+ // Print the resolution of a chunk name onto an output stream.
+ std::ostream&
+ operator<<(std::ostream& os, const resolve_chunk& rc) {
+ // FIXME: no attempt at detecting circularities.
+ const CompositeText* doc = rc.doc.lookup_chunk(rc.name);
+ if (doc == 0) {
+ print(std::cerr << "chunk ", rc.name) << " is undefined" << std::endl;
+ exit(1);
+ }
+ for (std::size_t i = 0; i < doc->size(); ++i) {
+ const Element* elt = (*doc)[i];
+ if (const BasicText* t = dynamic_cast<const BasicText*>(elt))
+ std::copy(t->begin(), t->end(),
+ std::ostream_iterator<char>(os));
+ else if (const Reference* r = dynamic_cast<const Reference*>(elt))
+ os << resolve_chunk(r->name(), rc.doc);
+ else {
+ std::cerr << "unknown document element" << std::endl;
exit(1);
}
- for (std::size_t i = 0; i < doc->size(); ++i) {
- const Element* elt = (*doc)[i];
- if (const BasicText* t = dynamic_cast<const BasicText*>(elt))
- std::copy(t->begin(), t->end(),
- std::ostream_iterator<char>(os));
- else if (const Reference* r = dynamic_cast<const Reference*>(elt))
- os << resolve_chunk(r->name(), rc.doc);
- else {
- std::cerr << "unknown document element" << std::endl;
- exit(1);
- }
- }
-
- return os;
}
- // Return true if the `arg' is the option named`opt'.
- static inline bool
- is_option(const char* arg, const char* opt) {
- return strcmp(arg, opt) == 0;
- }
+ return os;
+ }
- // `arg' is a argument on the command line. If `arg'
- // does not match option name `opt', return null. Otherwise,
- // return a pointer to the terminating NUL character if there
- // is no specified value for that option, or a pointer to the
- // start of the value.
- static const char*
- is_named_arg(const char* arg, const char* opt) {
- const int n = strlen(opt);
- int i = 0;
- // Get out if argion name does not match.
- // Note: Ideally, we could use strncmp(). However, that
- // function is not available in C++98, so we cannot depend on it.
- for (; i < n ; ++i)
- if (arg[i] != opt[i])
- return 0;
-
- if (arg[i] == '\0')
- return arg + i; // no value for the option.
- return arg + n + 1; // being of the value.
- }
+ // Return true if the `arg' is the option named`opt'.
+ static inline bool
+ is_option(const char* arg, const char* opt) {
+ return strcmp(arg, opt) == 0;
+ }
+
+ // `arg' is a argument on the command line. If `arg'
+ // does not match option name `opt', return null. Otherwise,
+ // return a pointer to the terminating NUL character if there
+ // is no specified value for that option, or a pointer to the
+ // start of the value.
+ static const char*
+ is_named_arg(const char* arg, const char* opt) {
+ const int n = strlen(opt);
+ int i = 0;
+ // Get out if argion name does not match.
+ // Note: Ideally, we could use strncmp(). However, that
+ // function is not available in C++98, so we cannot depend on it.
+ for (; i < n ; ++i)
+ if (arg[i] != opt[i])
+ return 0;
+
+ if (arg[i] == '\0')
+ return arg + i; // no value for the option.
+ return arg + n + 1; // being of the value.
}
}
@@ -353,9 +361,9 @@ int
main(int argc, char* argv[]) {
using namespace OpenAxiom::Hammer;
int error_count = 0;
- const char* chunk = 0; // chunck to tangle
- const char* output_path = 0; // path to the output file
- const char* input_path = 0; // path to the input file.
+ const char* chunk = nullptr; // chunck to tangle
+ const char* output_path = nullptr; // path to the output file
+ const char* input_path = nullptr; // path to the input file.
// 1. Process command line arguments.
for (int pos = 1; error_count == 0 and pos < argc; ++pos) {
if (const char* val = is_named_arg(argv[pos], "--tangle")) {
@@ -407,7 +415,8 @@ main(int argc, char* argv[]) {
try {
OpenAxiom::Memory::FileMapping file(input_path);
std::ofstream os(output_path);
- os << resolve_chunk(chunk, Document(file));
+ auto what = reinterpret_cast<const char8_t*>(chunk);
+ os << resolve_chunk(what, Document(file));
}
catch(const OpenAxiom::SystemError& e) {
std::cerr << e.message() << std::endl;