// Copyright (C) 2013, Gabriel Dos Reis.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     - Redistributions of source code must retain the above copyright
//       notice, this list of conditions and the following disclaimer.
//
//     - Redistributions in binary form must reproduce the above copyright
//       notice, this list of conditions and the following disclaimer in
//       the documentation and/or other materials provided with the
//       distribution.
//
//     - Neither the name of The Numerical Algorithms Group Ltd. nor the
//       names of its contributors may be used to endorse or promote products
//       derived from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
// OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

// --% Author: Gabriel Dos Reis.
// --% Description:
// --%   This program implements basic functionalities for untangling
// --%   algebra source code from the pamphlets.  The syntax is that
// --%   of `noweb'.  A chunk definition starts with a pattern
// --%   <<name>>= on a line by itself, and ends with `@' by itself
// --%   on a line.  A chunk can refer to another chunk through
// --%   a pattern of the form `<<name>>'.

#include <string.h>
#include <stdlib.h>
#include <utility>
#include <string>
#include <iostream>
#include <fstream>
#include <iterator>
#include <list>
#include <vector>
#include <map>
#include <open-axiom/storage>
#include <open-axiom/FileMapping>

// Workaround lack of standard streaming operation for std::u8string.
static std::ostream& print(std::ostream& os, const std::u8string& s)
{
   constexpr auto cast = [](auto p) { return reinterpret_cast<const char*>(&*p); };
   std::copy(cast(s.begin()), cast(s.end()), std::ostream_iterator<char>(os));
   return os;
}

namespace OpenAxiom::Hammer {
   // -------------
   // -- Element --
   // -------------
   // Base class of document elements.
   struct Element {
      virtual ~Element() = default;
   };

   // ---------------
   // -- BasicText --
   // ---------------
   // Plain text, with no reference to any chunk.  
   struct BasicText : Element {
      BasicText(const char8_t* f, const char8_t* l) : span(f, l) { }
      // Pointer to the start of this basic text element
      const char8_t* begin() const { return span.first; }
      // One-past-the-end of the this basic text element.
      const char8_t* end() const { return span.second; }
   private:
      std::pair<const char8_t*, const char8_t*> span;
   };

   // ---------------
   // -- Reference --
   // ---------------
   // Reference to a a chunk by name.
   struct Reference : Element {
      explicit Reference(const std::u8string& s) : label(s) { }
      // Naame of the chunk referenced.
      const std::u8string& name() const { return label; }
   private:
      const std::u8string label;
   };

   // -------------------
   // -- CompositeText --
   // -------------------
   // Sequence of basic elements and reference to chunks.
   struct CompositeText: private std::vector<const Element*> {
      typedef std::vector<const Element*> base;
      using base::iterator;
      using base::begin;
      using base::end;
      using base::size;
      using base::operator[];

      // Augment this chunk with a basic text in the open interval
      // [f,l).
      CompositeText& add_text(const char8_t* f, const char8_t* l) {
         texts.push_back(BasicText(f, l));
         push_back(&texts.back());
         return *this;
      }

      // Augment this chunk with a reference to another chunk
      // named `n'.  Note that we don't attempt to check for
      // possible circularities.
      CompositeText reference_chunk(const char8_t* f, const char8_t* l) {
         refs.push_back(Reference(std::u8string(f, l)));
         push_back(&refs.back());
         return *this;
      }

   private:
      std::list<BasicText> texts;
      std::list<Reference> refs;
   };

   // --------------
   // -- Document --
   // --------------
   // A whole document; a sequence of chunks.
   struct Document : std::list<CompositeText> {
      Document(const Memory::FileMapping& file)
            : active_chunk{&prose}, 
               text_start{(reinterpret_cast<const char8_t*>(file.begin()))}
      {
         parse(file);
      }

      // Return a pointer to a document chunk name `n'.
      // Otherwise, return null.
      CompositeText* lookup_chunk(const std::u8string& n) const {
         ChunkTable::const_iterator i = defs.find(n);
         return i == defs.end() ? 0 : i->second;
      }

   private:
      typedef std::map<std::u8string, CompositeText*> ChunkTable;
      CompositeText prose;         // the prose around the chunks.
      ChunkTable defs;             // chunk definition table.
      CompositeText* active_chunk; // chunk under construction.
      const char8_t* text_start;      // begining of current basic text.

      // Append basic text in the range `[text_start,last)'
      // to the current chunk.
      void finish_chunk(const char8_t* last) {
         if (text_start != last)
            active_chunk->add_text(text_start, last);
         active_chunk = &prose;
         text_start = last;
      }

      // Start a new chunk or extend an existing chunk.
      void begin_chunk(const std::u8string& name, const char8_t* start) {
         if (CompositeText* chunk = lookup_chunk(name))
            active_chunk = chunk;
         else {
            push_back(CompositeText());
            defs[name] = active_chunk = &back();
         }
         text_start = start;
      }

      // Parse a file mapping into this document.
      void parse(const Memory::FileMapping&);
   };

   // Return true if the character `c' introduces a newline.
   static inline bool
   looking_at_newline(char8_t c) {
      return c == u8'\n' or c == u8'\r';
   }

   // Attempt to advance the cursor past newline marker.
   // Return true on sucess.
   static bool
   saw_newline(const char8_t*& cur, const char8_t* end) {
      if (*cur == u8'\n') {
         ++cur;
         return true;
      }
      else if (*cur == u8'\r') {
         if (++cur < end and *cur == u8'\n')
            ++cur;
         return true;
      }
      return false;
   }

   // Move `cur' to end of line or `end', whichever comes first.
   // Return true if the area swept consisted only of blank characters.
   static inline bool
   trailing_blank(const char8_t*& cur, const char8_t* end) {
      bool result = true;
      for (; cur < end and not saw_newline(cur, end); ++cur)
         result = isspace(*cur);
      return result;
   }

   // Attempt to advance `cur' past the double left angle brackets
   // starting a chunk name.  Returm true on success.
   static bool
   chunk_name_began(const char8_t*& cur, const char8_t* end) {
      if (cur[0] == u8'<' and cur + 1 < end and cur[1] == u8'<') {
         cur += 2;
         return true;
      }
      return false;
   }

   // Attempt to move `cur' past the double right angle brackets
   // terminating a chunk name.  Returm true on success.
   static bool
   chunk_name_ended(const char8_t*& cur, const char8_t* end) {
      if (cur[0] == u8'>' and cur + 1 < end and cur[1] == u8'>') {
         cur += 2;
         return true;
      }
      return false;
   }

   // We've just seen the start of a chunk reference; skip
   // characters till we seen of the chunk's name.
   static void
   skip_to_end_of_chunk_name(const char8_t*& cur, const char8_t* end) {
      while (cur < end) {
         if (looking_at_newline(*cur)
               or (cur + 1 < end and cur[0] == u8'>' and cur[1] == u8'>'))
            return;
         ++cur;
      }
   }

   // Move the cursor until end of line.
   static void
   skip_to_end_of_line(const char8_t*& cur, const char8_t* end) {
      while (cur < end) {
         if (saw_newline(cur, end))
            break;
         ++cur;
      }
   }
   
   void
   Document::parse(const Memory::FileMapping& file) {
      auto cur = text_start;
      auto last = reinterpret_cast<const char8_t*>(file.end());
      // Process one line at a time.
      while (cur < last) {
         // 1. `@' ends previous chunk
         if (*cur == u8'@') {
            auto p = cur;
            if (trailing_blank(++cur, last))
               finish_chunk(p);
         }
         // 2. `<<' introduces a chunk reference or a chunk definition.
         else if (chunk_name_began(cur, last)) {
            auto label_start = cur;
            skip_to_end_of_chunk_name(cur, last);
            if (chunk_name_ended(cur, last)) {
               auto label_end = cur - 2;
               if (cur < last and *cur == u8'=') {
                  if (trailing_blank(++cur, last)) {
                     // chunk definition or extension
                     finish_chunk(label_start - 2);
                     begin_chunk(std::u8string(label_start, label_end), cur);
                  }
               }
               else if (trailing_blank(cur, last)) {
                  // This is just a reference to a chunk.
                  active_chunk->add_text(text_start, label_start - 2);
                  active_chunk->reference_chunk(label_start, label_end);
                  text_start = cur;
               }
               else
                  skip_to_end_of_line(cur, last);
            }
         }
         else
            skip_to_end_of_line(cur, last);
      }
      finish_chunk(cur);
   }

   // Capture  chunk resolution in a document.
   struct resolve_chunk {
      resolve_chunk(const std::u8string& s, const Document& f)
            : name(s), doc(f) { }
      const std::u8string name; // name of the chunk
      const Document& doc;    // document containing the chunk.
   };

   // Print the resolution of a chunk name onto an output stream.
   std::ostream&
   operator<<(std::ostream& os, const resolve_chunk& rc) {
      // FIXME: no attempt at detecting circularities.
      const CompositeText* doc = rc.doc.lookup_chunk(rc.name);
      if (doc == 0) {
         print(std::cerr << "chunk ", rc.name) << " is undefined" << std::endl;
         exit(1);
      }
      for (std::size_t i = 0; i < doc->size(); ++i) {
         const Element* elt = (*doc)[i];
         if (const BasicText* t = dynamic_cast<const BasicText*>(elt))
            std::copy(t->begin(), t->end(),
                        std::ostream_iterator<char>(os));
         else if (const Reference* r = dynamic_cast<const Reference*>(elt))
            os << resolve_chunk(r->name(), rc.doc);
         else {
            std::cerr << "unknown document element" << std::endl;
            exit(1);
         }
      }

      return os;
   }

   // Return true if the `arg' is the option named`opt'.
   static inline bool
   is_option(const char* arg, const char* opt) {
      return strcmp(arg, opt) == 0;
   }

   // `arg' is a argument on the command line.  If `arg'
   // does not match option name `opt', return null.  Otherwise,
   // return a pointer to the terminating NUL character if there
   // is no specified value for that option, or a pointer to the
   // start of the value.
   static const char*
   is_named_arg(const char* arg, const char* opt) {
      const int n = strlen(opt);
      int i = 0;
      // Get out if argion name does not match.
      // Note:  Ideally, we could use strncmp().  However, that
      // function is not available in C++98, so we cannot depend on it.
      for (; i < n ; ++i)
         if (arg[i] != opt[i])
            return 0;

      if (arg[i] == '\0')
         return arg + i;     // no value for the option.
      return arg + n + 1;    // being of the value.
   }
}


int
main(int argc, char* argv[]) {
   using namespace OpenAxiom::Hammer;
   int error_count = 0;
   const char* chunk = nullptr;      // chunck to tangle
   const char* output_path = nullptr; // path to the output file
   const char* input_path = nullptr;  // path to the input file.
   // 1. Process command line arguments.
   for (int pos = 1; error_count == 0 and pos < argc; ++pos) {
      if (const char* val = is_named_arg(argv[pos], "--tangle")) {
         if (chunk != 0) {
            std::cerr << "cannot tangle more than one chunk";
            ++error_count;
         }
         else
            chunk = *val == 0 ? "*" : val;
      }
      else if (const char* val = is_named_arg(argv[pos], "--output")) {
         if (*val == 0) {
            std::cerr << "missing output file name" << std::endl;
            ++error_count;
         }
         else
            output_path = val;
      }
      else if (argv[pos][0] == '-' and argv[pos][1] == '-') {
         std::cerr << "unknown option " << argv[pos] << std::endl;
         ++error_count;
      }
      else if (input_path != 0) {
         std::cerr << "there must be exactly one input file" << std::endl;
         ++error_count;
      }
      else
         input_path = argv[pos];
   }

   // 2. Basic sanity check.
   if (input_path == 0) {
      std::cerr << "missing input file" << std::endl;
      return 1;
   }
   if (output_path == 0) {
      std::cerr << "missing output file" << std::endl;
      return 1;
   }
   if (chunk == 0) {
      std::cerr << "missing chunk name" << std::endl;
      return 1;
   }

   if (error_count != 0)
      return 1;

   // 3. Attempt to extract the chunk.
   try {
      OpenAxiom::Memory::FileMapping file(input_path);
      std::ofstream os(output_path);
      auto what = reinterpret_cast<const char8_t*>(chunk);
      os << resolve_chunk(what, Document(file));
   }
   catch(const OpenAxiom::SystemError& e) {
      std::cerr << e.message() << std::endl;
      exit(1);
   }
   return 0;
}