diff options
| -rw-r--r-- | src/utils/hammer.cc | 543 | 
1 files changed, 276 insertions, 267 deletions
| diff --git a/src/utils/hammer.cc b/src/utils/hammer.cc index f4241aaf..003cd1f8 100644 --- a/src/utils/hammer.cc +++ b/src/utils/hammer.cc @@ -51,300 +51,308 @@  #include <open-axiom/storage>  #include <open-axiom/FileMapping> -namespace OpenAxiom { -   namespace Hammer { -      // ------------- -      // -- Element -- -      // ------------- -      // Base class of document elements. -      struct Element { -         virtual ~Element() { } -      }; - -      // --------------- -      // -- BasicText -- -      // --------------- -      // Plain text, with no reference to any chunk.   -      struct BasicText : Element { -         BasicText(const Byte* f, const Byte* l) : span(f, l) { } -         // Pointer to the start of this basic text element -         const Byte* begin() const { return span.first; } -         // One-past-the-end of the this basic text element. -         const Byte* end() const { return span.second; } -      private: -         std::pair<const Byte*, const Byte*> span; -      }; - -      // --------------- -      // -- Reference -- -      // --------------- -      // Reference to a a chunk by name. -      struct Reference : Element { -         explicit Reference(const std::string& s) : label(s) { } -         // Naame of the chunk referenced. -         const std::string& name() const { return label; } -      private: -         const std::string label; -      }; - -      // ------------------- -      // -- CompositeText -- -      // ------------------- -      // Sequence of basic elements and reference to chunks. -      struct CompositeText: private std::vector<const Element*> { -         typedef std::vector<const Element*> base; -         using base::iterator; -         using base::begin; -         using base::end; -         using base::size; -         using base::operator[]; - -         // Augment this chunk with a basic text in the open interval -         // [f,l). -         CompositeText& add_text(const Byte* f, const Byte* l) { -            texts.push_back(BasicText(f, l)); -            push_back(&texts.back()); -            return *this; -         } +// Workaround lack of standard streaming operation for std::u8string. +static std::ostream& print(std::ostream& os, const std::u8string& s) +{ +   constexpr auto cast = [](auto p) { return reinterpret_cast<const char*>(&*p); }; +   std::copy(cast(s.begin()), cast(s.end()), std::ostream_iterator<char>(os)); +   return os; +} -         // Augment this chunk with a reference to another chunk -         // named `n'.  Note that we don't attempt to check for -         // possible circularities. -         CompositeText reference_chunk(const Byte* f, const Byte* l) { -            refs.push_back(Reference(std::string(f, l))); -            push_back(&refs.back()); -            return *this; -         } +namespace OpenAxiom::Hammer { +   // ------------- +   // -- Element -- +   // ------------- +   // Base class of document elements. +   struct Element { +      virtual ~Element() = default; +   }; + +   // --------------- +   // -- BasicText -- +   // --------------- +   // Plain text, with no reference to any chunk.   +   struct BasicText : Element { +      BasicText(const char8_t* f, const char8_t* l) : span(f, l) { } +      // Pointer to the start of this basic text element +      const char8_t* begin() const { return span.first; } +      // One-past-the-end of the this basic text element. +      const char8_t* end() const { return span.second; } +   private: +      std::pair<const char8_t*, const char8_t*> span; +   }; + +   // --------------- +   // -- Reference -- +   // --------------- +   // Reference to a a chunk by name. +   struct Reference : Element { +      explicit Reference(const std::u8string& s) : label(s) { } +      // Naame of the chunk referenced. +      const std::u8string& name() const { return label; } +   private: +      const std::u8string label; +   }; + +   // ------------------- +   // -- CompositeText -- +   // ------------------- +   // Sequence of basic elements and reference to chunks. +   struct CompositeText: private std::vector<const Element*> { +      typedef std::vector<const Element*> base; +      using base::iterator; +      using base::begin; +      using base::end; +      using base::size; +      using base::operator[]; + +      // Augment this chunk with a basic text in the open interval +      // [f,l). +      CompositeText& add_text(const char8_t* f, const char8_t* l) { +         texts.push_back(BasicText(f, l)); +         push_back(&texts.back()); +         return *this; +      } -      private: -         std::list<BasicText> texts; -         std::list<Reference> refs; -      }; - -      // -------------- -      // -- Document -- -      // -------------- -      // A whole document; a sequence of chunks. -      struct Document : std::list<CompositeText> { -         Document(const Memory::FileMapping& file) -               : active_chunk(&prose), text_start(file.begin()) { -            parse(file); -         } +      // Augment this chunk with a reference to another chunk +      // named `n'.  Note that we don't attempt to check for +      // possible circularities. +      CompositeText reference_chunk(const char8_t* f, const char8_t* l) { +         refs.push_back(Reference(std::u8string(f, l))); +         push_back(&refs.back()); +         return *this; +      } -         // Return a pointer to a document chunk name `n'. -         // Otherwise, return null. -         CompositeText* lookup_chunk(const std::string& n) const { -            ChunkTable::const_iterator i = defs.find(n); -            return i == defs.end() ? 0 : i->second; -         } +   private: +      std::list<BasicText> texts; +      std::list<Reference> refs; +   }; + +   // -------------- +   // -- Document -- +   // -------------- +   // A whole document; a sequence of chunks. +   struct Document : std::list<CompositeText> { +      Document(const Memory::FileMapping& file) +            : active_chunk{&prose},  +               text_start{(reinterpret_cast<const char8_t*>(file.begin()))} +      { +         parse(file); +      } -      private: -         typedef std::map<std::string, CompositeText*> ChunkTable; -         CompositeText prose;         // the prose around the chunks. -         ChunkTable defs;             // chunk definition table. -         CompositeText* active_chunk; // chunk under construction. -         const Byte* text_start;      // begining of current basic text. - -         // Append basic text in the range `[text_start,last)' -         // to the current chunk. -         void finish_chunk(const Byte* last) { -            if (text_start != last) -               active_chunk->add_text(text_start, last); -            active_chunk = &prose; -            text_start = last; -         } +      // Return a pointer to a document chunk name `n'. +      // Otherwise, return null. +      CompositeText* lookup_chunk(const std::u8string& n) const { +         ChunkTable::const_iterator i = defs.find(n); +         return i == defs.end() ? 0 : i->second; +      } -         // Start a new chunk or extend an existing chunk. -         void begin_chunk(const std::string& name, const Byte* start) { -            if (CompositeText* chunk = lookup_chunk(name)) -               active_chunk = chunk; -            else { -               push_back(CompositeText()); -               defs[name] = active_chunk = &back(); -            } -            text_start = start; +   private: +      typedef std::map<std::u8string, CompositeText*> ChunkTable; +      CompositeText prose;         // the prose around the chunks. +      ChunkTable defs;             // chunk definition table. +      CompositeText* active_chunk; // chunk under construction. +      const char8_t* text_start;      // begining of current basic text. + +      // Append basic text in the range `[text_start,last)' +      // to the current chunk. +      void finish_chunk(const char8_t* last) { +         if (text_start != last) +            active_chunk->add_text(text_start, last); +         active_chunk = &prose; +         text_start = last; +      } + +      // Start a new chunk or extend an existing chunk. +      void begin_chunk(const std::u8string& name, const char8_t* start) { +         if (CompositeText* chunk = lookup_chunk(name)) +            active_chunk = chunk; +         else { +            push_back(CompositeText()); +            defs[name] = active_chunk = &back();           } +         text_start = start; +      } -         // Parse a file mapping into this document. -         void parse(const Memory::FileMapping&); -      }; +      // Parse a file mapping into this document. +      void parse(const Memory::FileMapping&); +   }; -      // Return true if the character `c' introduces a newline. -      static inline bool -      looking_at_newline(char c) { -         return c == '\n' or c == '\r'; -      } +   // Return true if the character `c' introduces a newline. +   static inline bool +   looking_at_newline(char8_t c) { +      return c == u8'\n' or c == u8'\r'; +   } -      // Attempt to advance the cursor past newline marker. -      // Return true on sucess. -      static bool -      saw_newline(const Byte*& cur, const Byte* end) { -         if (*cur == '\n') { +   // Attempt to advance the cursor past newline marker. +   // Return true on sucess. +   static bool +   saw_newline(const char8_t*& cur, const char8_t* end) { +      if (*cur == u8'\n') { +         ++cur; +         return true; +      } +      else if (*cur == u8'\r') { +         if (++cur < end and *cur == u8'\n')              ++cur; -            return true; -         } -         else if (*cur == '\r') { -            if (++cur < end and *cur == '\n') -               ++cur; -            return true; -         } -         return false; +         return true;        } +      return false; +   } -      // Move `cur' to end of line or `end', whichever comes first. -      // Return true if the area swept consisted only of blank characters. -      static inline bool -      trailing_blank(const Byte*& cur, const Byte* end) { -         bool result = true; -         for (; cur < end and not saw_newline(cur, end); ++cur) -            result = isspace(*cur); -         return result; -      } +   // Move `cur' to end of line or `end', whichever comes first. +   // Return true if the area swept consisted only of blank characters. +   static inline bool +   trailing_blank(const char8_t*& cur, const char8_t* end) { +      bool result = true; +      for (; cur < end and not saw_newline(cur, end); ++cur) +         result = isspace(*cur); +      return result; +   } -      // Attempt to advance `cur' past the double left angle brackets -      // starting a chunk name.  Returm true on success. -      static bool -      chunk_name_began(const Byte*& cur, const Byte* end) { -         if (cur[0] == '<' and cur + 1 < end and cur[1] == '<') { -            cur += 2; -            return true; -         } -         return false; +   // Attempt to advance `cur' past the double left angle brackets +   // starting a chunk name.  Returm true on success. +   static bool +   chunk_name_began(const char8_t*& cur, const char8_t* end) { +      if (cur[0] == u8'<' and cur + 1 < end and cur[1] == u8'<') { +         cur += 2; +         return true;        } +      return false; +   } -      // Attempt to move `cur' past the double right angle brackets -      // terminating a chunk name.  Returm true on success. -      static bool -      chunk_name_ended(const Byte*& cur, const Byte* end) { -         if (cur[0] == '>' and cur + 1 < end and cur[1] == '>') { -            cur += 2; -            return true; -         } -         return false; +   // Attempt to move `cur' past the double right angle brackets +   // terminating a chunk name.  Returm true on success. +   static bool +   chunk_name_ended(const char8_t*& cur, const char8_t* end) { +      if (cur[0] == u8'>' and cur + 1 < end and cur[1] == u8'>') { +         cur += 2; +         return true;        } +      return false; +   } -      // We've just seen the start of a chunk reference; skip -      // characters till we seen of the chunk's name. -      static void -      skip_to_end_of_chunk_name(const Byte*& cur, const Byte* end) { -         while (cur < end) { -            if (looking_at_newline(*cur) -                or (cur + 1 < end and cur[0] == '>' and cur[1] == '>')) -               return; -            ++cur; -         } +   // We've just seen the start of a chunk reference; skip +   // characters till we seen of the chunk's name. +   static void +   skip_to_end_of_chunk_name(const char8_t*& cur, const char8_t* end) { +      while (cur < end) { +         if (looking_at_newline(*cur) +               or (cur + 1 < end and cur[0] == u8'>' and cur[1] == u8'>')) +            return; +         ++cur;        } +   } -      // Move the cursor until end of line. -      static void -      skip_to_end_of_line(const Byte*& cur, const Byte* end) { -         while (cur < end) { -            if (saw_newline(cur, end)) -               break; -            ++cur; -         } +   // Move the cursor until end of line. +   static void +   skip_to_end_of_line(const char8_t*& cur, const char8_t* end) { +      while (cur < end) { +         if (saw_newline(cur, end)) +            break; +         ++cur;        } -       -      void -      Document::parse(const Memory::FileMapping& file) { -         auto cur = text_start; -         auto last = file.end(); -         // Process one line at a time. -         while (cur < last) { -            // 1. `@' ends previous chunk -            if (*cur == '@') { -               auto p = cur; -               if (trailing_blank(++cur, last)) -                  finish_chunk(p); -            } -            // 2. `<<' introduces a chunk reference or a chunk definition. -            else if (chunk_name_began(cur, last)) { -               auto label_start = cur; -               skip_to_end_of_chunk_name(cur, last); -               if (chunk_name_ended(cur, last)) { -                  auto label_end = cur - 2; -                  if (cur < last and *cur == '=') { -                     if (trailing_blank(++cur, last)) { -                        // chunk definition or extension -                        finish_chunk(label_start - 2); -                        begin_chunk(std::string(label_start, label_end), cur); -                     } -                  } -                  else if (trailing_blank(cur, last)) { -                     // This is just a reference to a chunk. -                     active_chunk->add_text(text_start, label_start - 2); -                     active_chunk->reference_chunk(label_start, label_end); -                     text_start = cur; +   } +    +   void +   Document::parse(const Memory::FileMapping& file) { +      auto cur = text_start; +      auto last = reinterpret_cast<const char8_t*>(file.end()); +      // Process one line at a time. +      while (cur < last) { +         // 1. `@' ends previous chunk +         if (*cur == u8'@') { +            auto p = cur; +            if (trailing_blank(++cur, last)) +               finish_chunk(p); +         } +         // 2. `<<' introduces a chunk reference or a chunk definition. +         else if (chunk_name_began(cur, last)) { +            auto label_start = cur; +            skip_to_end_of_chunk_name(cur, last); +            if (chunk_name_ended(cur, last)) { +               auto label_end = cur - 2; +               if (cur < last and *cur == u8'=') { +                  if (trailing_blank(++cur, last)) { +                     // chunk definition or extension +                     finish_chunk(label_start - 2); +                     begin_chunk(std::u8string(label_start, label_end), cur);                    } -                  else -                     skip_to_end_of_line(cur, last);                 } +               else if (trailing_blank(cur, last)) { +                  // This is just a reference to a chunk. +                  active_chunk->add_text(text_start, label_start - 2); +                  active_chunk->reference_chunk(label_start, label_end); +                  text_start = cur; +               } +               else +                  skip_to_end_of_line(cur, last);              } -            else -               skip_to_end_of_line(cur, last);           } -         finish_chunk(cur); +         else +            skip_to_end_of_line(cur, last);        } +      finish_chunk(cur); +   } -      // Capture  chunk resolution in a document. -      struct resolve_chunk { -         resolve_chunk(const std::string& s, const Document& f) -               : name(s), doc(f) { } -         const std::string name; // name of the chunk -         const Document& doc;    // document containing the chunk. -      }; - -      // Print the resolution of a chunk name onto an output stream. -      std::ostream& -      operator<<(std::ostream& os, const resolve_chunk& rc) { -         // FIXME: no attempt at detecting circularities. -         const CompositeText* doc = rc.doc.lookup_chunk(rc.name); -         if (doc == 0) { -            std::cerr << "chunk " << rc.name << " is undefined" << std::endl; +   // Capture  chunk resolution in a document. +   struct resolve_chunk { +      resolve_chunk(const std::u8string& s, const Document& f) +            : name(s), doc(f) { } +      const std::u8string name; // name of the chunk +      const Document& doc;    // document containing the chunk. +   }; + +   // Print the resolution of a chunk name onto an output stream. +   std::ostream& +   operator<<(std::ostream& os, const resolve_chunk& rc) { +      // FIXME: no attempt at detecting circularities. +      const CompositeText* doc = rc.doc.lookup_chunk(rc.name); +      if (doc == 0) { +         print(std::cerr << "chunk ", rc.name) << " is undefined" << std::endl; +         exit(1); +      } +      for (std::size_t i = 0; i < doc->size(); ++i) { +         const Element* elt = (*doc)[i]; +         if (const BasicText* t = dynamic_cast<const BasicText*>(elt)) +            std::copy(t->begin(), t->end(), +                        std::ostream_iterator<char>(os)); +         else if (const Reference* r = dynamic_cast<const Reference*>(elt)) +            os << resolve_chunk(r->name(), rc.doc); +         else { +            std::cerr << "unknown document element" << std::endl;              exit(1);           } -         for (std::size_t i = 0; i < doc->size(); ++i) { -            const Element* elt = (*doc)[i]; -            if (const BasicText* t = dynamic_cast<const BasicText*>(elt)) -               std::copy(t->begin(), t->end(), -                         std::ostream_iterator<char>(os)); -            else if (const Reference* r = dynamic_cast<const Reference*>(elt)) -               os << resolve_chunk(r->name(), rc.doc); -            else { -               std::cerr << "unknown document element" << std::endl; -               exit(1); -            } -         } - -         return os;        } -      // Return true if the `arg' is the option named`opt'. -      static inline bool -      is_option(const char* arg, const char* opt) { -         return strcmp(arg, opt) == 0; -      } +      return os; +   } -      // `arg' is a argument on the command line.  If `arg' -      // does not match option name `opt', return null.  Otherwise, -      // return a pointer to the terminating NUL character if there -      // is no specified value for that option, or a pointer to the -      // start of the value. -      static const char* -      is_named_arg(const char* arg, const char* opt) { -         const int n = strlen(opt); -         int i = 0; -         // Get out if argion name does not match. -         // Note:  Ideally, we could use strncmp().  However, that -         // function is not available in C++98, so we cannot depend on it. -         for (; i < n ; ++i) -            if (arg[i] != opt[i]) -               return 0; - -         if (arg[i] == '\0') -            return arg + i;     // no value for the option. -         return arg + n + 1;    // being of the value. -      } +   // Return true if the `arg' is the option named`opt'. +   static inline bool +   is_option(const char* arg, const char* opt) { +      return strcmp(arg, opt) == 0; +   } + +   // `arg' is a argument on the command line.  If `arg' +   // does not match option name `opt', return null.  Otherwise, +   // return a pointer to the terminating NUL character if there +   // is no specified value for that option, or a pointer to the +   // start of the value. +   static const char* +   is_named_arg(const char* arg, const char* opt) { +      const int n = strlen(opt); +      int i = 0; +      // Get out if argion name does not match. +      // Note:  Ideally, we could use strncmp().  However, that +      // function is not available in C++98, so we cannot depend on it. +      for (; i < n ; ++i) +         if (arg[i] != opt[i]) +            return 0; + +      if (arg[i] == '\0') +         return arg + i;     // no value for the option. +      return arg + n + 1;    // being of the value.     }  } @@ -353,9 +361,9 @@ int  main(int argc, char* argv[]) {     using namespace OpenAxiom::Hammer;     int error_count = 0; -   const char* chunk = 0;      // chunck to tangle -   const char* output_path = 0; // path to the output file -   const char* input_path = 0;  // path to the input file. +   const char* chunk = nullptr;      // chunck to tangle +   const char* output_path = nullptr; // path to the output file +   const char* input_path = nullptr;  // path to the input file.     // 1. Process command line arguments.     for (int pos = 1; error_count == 0 and pos < argc; ++pos) {        if (const char* val = is_named_arg(argv[pos], "--tangle")) { @@ -407,7 +415,8 @@ main(int argc, char* argv[]) {     try {        OpenAxiom::Memory::FileMapping file(input_path);        std::ofstream os(output_path); -      os << resolve_chunk(chunk, Document(file)); +      auto what = reinterpret_cast<const char8_t*>(chunk); +      os << resolve_chunk(what, Document(file));     }     catch(const OpenAxiom::SystemError& e) {        std::cerr << e.message() << std::endl; | 
