aboutsummaryrefslogtreecommitdiff
path: root/src/boot/bemol.cc
blob: 34870e1353da250e3579e8ff05ad4b8662cf6428 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
// -*- C++ -*-
// Copyright (C) 2014, Gabriel Dos Reis.
// All rights reserved.
// Written by Gabriel Dos Reis.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     - Redistributions of source code must retain the above copyright
//       notice, this list of conditions and the following disclaimer.
//
//     - Redistributions in binary form must reproduce the above copyright
//       notice, this list of conditions and the following disclaimer in
//       the documentation and/or other materials provided with the
//       distribution.
//
//     - Neither the name of OpenAxiom. nor the names of its contributors
//       may be used to endorse or promote products derived from this
//       software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
// OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

// --% Author: Gabriel Dos Reis
// --% Description:

#include <open-axiom/diagnostics>
#include <open-axiom/token>
#include <iostream>
#include <fstream>
#include <vector>
#include <string>
#include <stack>
#include <iterator>
#include <ctype.h>

using namespace OpenAxiom;

// 
// -- Reading input source files --
// 

// A physical line is just raw text, with coupled with location
// information such as line and indentation column.
struct Line : std::string {
   LineNumber number;
   ColumnIndex indent;
   Line() : number(), indent() { }

   std::string sub_string(ColumnIndex s, ColumnIndex e) const {
      return substr(s, e - s);
   }
};

// A program fragment is a logical line, composed of possibly
// several physical lines subject to the off-side rule.  As a
// special case, a line ending with the underbar character
// continues to the next next with disregard to the off-side rule.
struct Fragment : std::vector<Line> {
   explicit operator bool() const { return not empty(); }
   bool line_continuation() const {
      return not empty() and back().back() == '_';
   }
   ColumnIndex last_indent() const {
      return empty() ? 0 : back().indent;
   }
   using std::vector<Line>::operator[];
   const Line& operator()(const OpenAxiom::FragmentCursor& pos) const {
      return (*this)[pos.line];
   }
   uint8_t operator[](const OpenAxiom::FragmentCursor& pos) const {
      return (*this)[pos.line][pos.column];
   }
   uint8_t advance(OpenAxiom::FragmentCursor& pos) const {
      return (*this)[pos.line][pos.column++];
   }

   bool covering(const OpenAxiom::FragmentCursor& pos) const {
      return pos.column < (*this)[pos.line].size();
   }
};

// Formatting program fragments.
static std::ostream&
operator<<(std::ostream& os, const Fragment& f) {
   std::copy(f.begin(), f.end(),
             std::ostream_iterator<std::string>(std::cout, "\n"));
   return os;
}


// A source input transform a character stream into a program fragment
// stream, delivering a fragment one at a time.
struct SourceInput {
   SourceInput(std::istream& is) : input(is) { }
   Fragment get();

private:
   std::istream& input;
   Line line;
};

// Return the indentation level of a line.
// FIXME: reject or expand tabs as appropriate.
static ColumnIndex
indentation(const Line& line) {
   ColumnIndex idx { };
   for (auto c : line) {
      if (not isspace(c))
         break;
      ++idx;
   }
   return idx;
}

// Remove trailing white-space characters from the line.
static Line&
trim_right(Line& line) {
   auto n = line.length();
   while (n > 0 and isspace(line[n-1]))
      --n;
   line.resize(n);
   return line;
}

// Clean up and dress up the line with indentation information.
static Line&
prop_up(Line& line) {
   line.indent = indentation(trim_right(line));
   return line;
}

// Return true if line is entirely a negative comment.
static bool
negative_comment(const Line& line) {
   if (line.indent + 1 >= line.length())
      return false;
   return line[line.indent] == '-' and line[line.indent + 1] == '-';
}

// Return true if line is either empty or a negative comment.
static bool
blank(const Line& line) {
   return line.empty() or negative_comment(line);
}

// Return true if line is entirely a positive comment, i.e. a description.
static bool
positive_comment(const Line& line) {
   if (line.indent + 1 >= line.length())
      return false;
   return line[line.indent] == '+' and line[line.indent + 1] == '+';
}

// Decompose the input souce file into fragments, and return one
// fragment at a time.
Fragment
SourceInput::get() {
   Fragment fragment;
   std::stack<ColumnIndex> indents;

   if (not line.empty()) {
      indents.push(line.indent);
      fragment.push_back(line);
   }

   while (std::getline(input, line)) {
      ++line.number;
      if (blank(prop_up(line)))
         continue;              // Don't bother with ignorable comments.
      else if (fragment.line_continuation())
         ;
      else if (indents.empty()) {
         if (fragment.empty() and line.indent != 0)
            std::cout << "warning: white space at begining of fragment"
                      << " on line " << line.number << '\n';
         indents.push(line.indent);
      }
      else if (line.indent == 0 and not positive_comment(fragment.back()))
         break;                 // A completely new line; save for later.
      else if (line.indent > indents.top())
         indents.push(line.indent);
      else {
         while (line.indent < indents.top())
            indents.pop();
      }
      fragment.push_back(line);
   }
   return fragment;
}

// 
// -- Decomposing source files into lexical units of information --
// 

struct Locus {
   OpenAxiom::LineNumber line;
   OpenAxiom::ColumnIndex column;
};

static std::ostream&
operator<<(std::ostream& os, const Locus& l)
{
   os << '{' << l.line << ", " << l.column << '}';
   return os;
}

struct BemolToken {
   using Location = ::Locus;
   OpenAxiom::TokenCategory category;
   OpenAxiom::TokenValue value;
   Locus start;
   Locus end;

   explicit operator bool() const { return category != TokenCategory::EOS; }
};

static std::ostream&
operator<<(std::ostream& os, const BemolToken& t) {
   os << t.category << '{' << t.start << '-' << t.end << '}';
   return os;
}

using TokenSequence = OpenAxiom::TokenStream<BemolToken>;

// --

static void
translate_source_file(SourceInput& src, std::ostream& out, const char* path) {
   while (auto f = src.get()) {
      out << "================================================\n";
      out << f;
      try {
         TokenSequence ts { f, OpenAxiom::Language::Boot };
         for (auto& t : ts) {
            out << '\t' << t;
            switch (t.category) {
            case TokenCategory::Junk:
            case TokenCategory::Unclassified:
               out //<< f[t.start.line].sub_string(t.start.column, t.end.column)
                   << " in file " << path
                   << " at line " << t.start.line
                   << ", column " << t.start.column;
               break;
            default:
               break;
            }
            out << '\n';
         }
      }
      catch(const OpenAxiom::EndOfStringUnseen& e) {
         std::cerr << path << ": syntax error: "
                   << "premature end of line before matching quote "
                   << "of string literal on line " << e.line
                   << " at column " << e.column
                   << std::endl;
      }
      catch (const OpenAxiom::MissingExponent& e) {
         std::cerr << path << ": syntax error: "
                   << "missing exponent of floating point constant "
                   << "on line " << e.line
                   << ", column " << e.column
                   << std::endl;
      }
      out << "================================================\n";
   }
   out << std::flush;
}

static void
process_file(const char* path) {
   std::ifstream in { path };
   if (!in) {
      std::cerr << "error: could not open file `" << path << "'"
                << std::endl;
      return;
   }
   SourceInput src { in };
   translate_source_file(src, std::cout, path);
}

int main(int argc, char* argv[]) {
   for (int i = 1; i < argc; ++i) {
      process_file(argv[i]);
   }
}