aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordos-reis <gdr@axiomatics.org>2013-06-22 02:59:16 +0000
committerdos-reis <gdr@axiomatics.org>2013-06-22 02:59:16 +0000
commit6d072bf729ad585b2c9ce1f95b062726bba2c896 (patch)
tree29cb572380fbf1f0f14163bb9542077497f90a55
parentb717872a67934dfb3a9782f58527aabe3d2a81b7 (diff)
downloadopen-axiom-6d072bf729ad585b2c9ce1f95b062726bba2c896.tar.gz
Use Byte for data from input source.
-rw-r--r--src/include/storage.H12
-rw-r--r--src/utils/hammer.cc42
-rw-r--r--src/utils/sexpr.H4
-rw-r--r--src/utils/sexpr.cc57
-rw-r--r--src/utils/storage.cc6
-rw-r--r--src/utils/string-pool.H14
-rw-r--r--src/utils/string-pool.cc14
7 files changed, 78 insertions, 71 deletions
diff --git a/src/include/storage.H b/src/include/storage.H
index b75ced9d..cd3a723a 100644
--- a/src/include/storage.H
+++ b/src/include/storage.H
@@ -46,6 +46,9 @@
#include <string>
namespace OpenAxiom {
+ // Datatype for the unit of storage.
+ using Byte = unsigned char;
+
// -----------------
// -- SystemError --
// -----------------
@@ -64,9 +67,6 @@ namespace OpenAxiom {
void filesystem_error(const std::string&);
namespace Memory {
- // Datatype for the unit of storage.
- using Byte = unsigned char;
-
// Datatype for pointers to data.
using Pointer = void*;
@@ -302,11 +302,11 @@ namespace OpenAxiom {
explicit FileMapping(std::string);
FileMapping(FileMapping&&);
~FileMapping();
- const char* begin() const { return static_cast<const char*>(start); }
- const char* end() const { return begin() + extent; }
+ const Byte* begin() const { return start; }
+ const Byte* end() const { return begin() + extent; }
std::size_t size() const { return extent; }
protected:
- Pointer start; // address at the mapped storage
+ Byte* start; // address at the mapped storage
size_t extent; // length (in bytes) of the storage
private:
FileMapping(const FileMapping&) = delete;
diff --git a/src/utils/hammer.cc b/src/utils/hammer.cc
index 3ab56c72..eb07b378 100644
--- a/src/utils/hammer.cc
+++ b/src/utils/hammer.cc
@@ -1,4 +1,4 @@
-// Copyright (C) 2010, Gabriel Dos Reis.
+// Copyright (C) 2013, Gabriel Dos Reis.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
@@ -65,13 +65,13 @@ namespace OpenAxiom {
// ---------------
// Plain text, with no reference to any chunk.
struct BasicText : Element {
- BasicText(const char* f, const char* l) : span(f, l) { }
+ BasicText(const Byte* f, const Byte* l) : span(f, l) { }
// Pointer to the start of this basic text element
- const char* begin() const { return span.first; }
+ const Byte* begin() const { return span.first; }
// Oone-past-the-end of the this basic text element.
- const char* end() const { return span.second; }
+ const Byte* end() const { return span.second; }
private:
- std::pair<const char*, const char*> span;
+ std::pair<const Byte*, const Byte*> span;
};
// ---------------
@@ -100,7 +100,7 @@ namespace OpenAxiom {
// Augment this chunk with a basic text in the open interval
// [f,l).
- CompositeText& add_text(const char* f, const char* l) {
+ CompositeText& add_text(const Byte* f, const Byte* l) {
texts.push_back(BasicText(f, l));
push_back(&texts.back());
return *this;
@@ -109,7 +109,7 @@ namespace OpenAxiom {
// Augment this chunk with a reference to another chunk
// named `n'. Note that we don't attempt to check for
// possible circularities.
- CompositeText reference_chunk(const char* f, const char* l) {
+ CompositeText reference_chunk(const Byte* f, const Byte* l) {
refs.push_back(Reference(std::string(f, l)));
push_back(&refs.back());
return *this;
@@ -142,11 +142,11 @@ namespace OpenAxiom {
CompositeText prose; // the prose around the chunks.
ChunkTable defs; // chunk definition table.
CompositeText* active_chunk; // chunk under construction.
- const char* text_start; // begining of current basic text.
+ const Byte* text_start; // begining of current basic text.
// Append basic text in the range `[text_start,last)'
// to the current chunk.
- void finish_chunk(const char* last) {
+ void finish_chunk(const Byte* last) {
if (text_start != last)
active_chunk->add_text(text_start, last);
active_chunk = &prose;
@@ -154,7 +154,7 @@ namespace OpenAxiom {
}
// Start a new chunk or extend an existing chunk.
- void begin_chunk(const std::string& name, const char* start) {
+ void begin_chunk(const std::string& name, const Byte* start) {
if (CompositeText* chunk = lookup_chunk(name))
active_chunk = chunk;
else {
@@ -177,7 +177,7 @@ namespace OpenAxiom {
// Attempt to advance the cursor past newline marker.
// Return true on sucess.
static bool
- saw_newline(const char*& cur, const char* end) {
+ saw_newline(const Byte*& cur, const Byte* end) {
if (*cur == '\n') {
++cur;
return true;
@@ -193,7 +193,7 @@ namespace OpenAxiom {
// Move `cur' to end of line or `end', whichever comes first.
// Return true if the area swept consisted only of blank characters.
static inline bool
- trailing_blank(const char*& cur, const char* end) {
+ trailing_blank(const Byte*& cur, const Byte* end) {
bool result = true;
for (; cur < end and not saw_newline(cur, end); ++cur)
result = isspace(*cur);
@@ -203,7 +203,7 @@ namespace OpenAxiom {
// Attempt to advance `cur' past the double left angle brackets
// starting a chunk name. Returm true on success.
static bool
- chunk_name_began(const char*& cur, const char* end) {
+ chunk_name_began(const Byte*& cur, const Byte* end) {
if (cur[0] == '<' and cur + 1 < end and cur[1] == '<') {
cur += 2;
return true;
@@ -214,7 +214,7 @@ namespace OpenAxiom {
// Attempt to move `cur' past the double right angle brackets
// terminating a chunk name. Returm true on success.
static bool
- chunk_name_ended(const char*& cur, const char* end) {
+ chunk_name_ended(const Byte*& cur, const Byte* end) {
if (cur[0] == '>' and cur + 1 < end and cur[1] == '>') {
cur += 2;
return true;
@@ -225,7 +225,7 @@ namespace OpenAxiom {
// We've just seen the start of a chunk reference; skip
// characters till we seen of the chunk's name.
static void
- skip_to_end_of_chunk_name(const char*& cur, const char* end) {
+ skip_to_end_of_chunk_name(const Byte*& cur, const Byte* end) {
while (cur < end) {
if (looking_at_newline(*cur)
or (cur + 1 < end and cur[0] == '>' and cur[1] == '>'))
@@ -236,7 +236,7 @@ namespace OpenAxiom {
// Move the cursor until end of line.
static void
- skip_to_end_of_line(const char*& cur, const char* end) {
+ skip_to_end_of_line(const Byte*& cur, const Byte* end) {
while (cur < end) {
if (saw_newline(cur, end))
break;
@@ -246,22 +246,22 @@ namespace OpenAxiom {
void
Document::parse(const Memory::FileMapping& file) {
- const char* cur = text_start;
- const char* last = file.end();
+ auto cur = text_start;
+ auto last = file.end();
// Process one line at a time.
while (cur < last) {
// 1. `@' ends previous chunk
if (*cur == '@') {
- const char* p = cur;
+ auto p = cur;
if (trailing_blank(++cur, last))
finish_chunk(p);
}
// 2. `<<' introduces a chunk reference or a chunk definition.
else if (chunk_name_began(cur, last)) {
- const char* label_start = cur;
+ auto label_start = cur;
skip_to_end_of_chunk_name(cur, last);
if (chunk_name_ended(cur, last)) {
- const char* label_end = cur - 2;
+ auto label_end = cur - 2;
if (cur < last and *cur == '=') {
if (trailing_blank(++cur, last)) {
// chunk definition or extension
diff --git a/src/utils/sexpr.H b/src/utils/sexpr.H
index 7b62a328..358dd506 100644
--- a/src/utils/sexpr.H
+++ b/src/utils/sexpr.H
@@ -111,8 +111,8 @@ namespace OpenAxiom {
Lexer(StringPool& pool, std::vector<Token>& toks)
: strings(pool), tokens(toks) { }
- const char* tokenize(const char*, const char*);
- BasicString intern(const char* s, size_t n) {
+ const Byte* tokenize(const Byte*, const Byte*);
+ BasicString intern(const Byte* s, size_t n) {
return strings.intern(s, n);
}
diff --git a/src/utils/sexpr.cc b/src/utils/sexpr.cc
index 9e57765f..77ed3060 100644
--- a/src/utils/sexpr.cc
+++ b/src/utils/sexpr.cc
@@ -122,16 +122,16 @@ namespace OpenAxiom {
// Move `cur' past all consecutive blank characters, and
// return the new position.
- static const char*
- skip_blank(const char*& cur, const char* end) {
+ static const Byte*
+ skip_blank(const Byte*& cur, const Byte* end) {
while (cur < end and is_blank(*cur))
++cur;
return cur;
}
// Move `cur' to end-of-line marker.
- static const char*
- skip_to_eol(const char*& cur, const char* end) {
+ static const Byte*
+ skip_to_eol(const Byte*& cur, const Byte* end) {
// FIXME: properly handle CR+LF.
while (cur < end and *cur != '\n')
++cur;
@@ -139,8 +139,8 @@ namespace OpenAxiom {
}
// Move `cur' until a word boundary is reached.
- static const char*
- skip_to_word_boundary(const char*& cur, const char* end) {
+ static const Byte*
+ skip_to_word_boundary(const Byte*& cur, const Byte* end) {
bool saw_escape = false;
for (; cur < end; ++cur) {
if (saw_escape)
@@ -156,7 +156,7 @@ namespace OpenAxiom {
// Move `cur' one-past a non-esacaped character `c'.
// Return true if the character was seen.
static bool
- skip_to_nonescaped_char(const char*& cur, const char* end, char c) {
+ skip_to_nonescaped_char(const Byte*& cur, const Byte* end, char c) {
bool saw_escape = false;
for (; cur < end; ++cur)
if (saw_escape)
@@ -173,7 +173,7 @@ namespace OpenAxiom {
// Move `cur' past the closing quote of string literal.
// Return true if the closing fence was effectively seen.
static inline bool
- skip_to_quote(const char*& cur, const char* end) {
+ skip_to_quote(const Byte*& cur, const Byte* end) {
return skip_to_nonescaped_char(cur, end, '"');
}
@@ -204,7 +204,7 @@ namespace OpenAxiom {
// an integer followrd by the equal sign or the sharp sign.
// `cur' is moved along the way.
static bool
- only_digits_before_equal_or_shap(const char*& cur, const char* end) {
+ only_digits_before_equal_or_shap(const Byte*& cur, const Byte* end) {
while (cur < end and isdigit(*cur))
++cur;
return cur < end and (*cur == '#' or *cur == '=');
@@ -215,8 +215,8 @@ namespace OpenAxiom {
// entirely of digits.
static void
maybe_reclassify(Token& t) {
- const char* cur = t.lexeme->begin();
- const char* end = t.lexeme->end();
+ const Byte* cur = t.lexeme->begin();
+ const Byte* end = t.lexeme->end();
while (cur < end and isdigit(*cur))
++cur;
if (cur == end)
@@ -226,7 +226,7 @@ namespace OpenAxiom {
// Returns true if the first characters in the range
// [cur, last) start an identifier.
static bool
- start_symbol(const char* cur, const char* last) {
+ start_symbol(const Byte* cur, const Byte* last) {
if (cur >= last)
return false;
return identifier_part(*cur)
@@ -236,7 +236,7 @@ namespace OpenAxiom {
// We are processing a symbol token. Accumulate all
// legitimate characters till the end of the token.
static void
- skip_to_end_of_symbol(const char*& cur, const char* end) {
+ skip_to_end_of_symbol(const Byte*& cur, const Byte* end) {
const char c = *cur;
if (*cur == '|')
skip_to_nonescaped_char(++cur, end, c);
@@ -247,22 +247,22 @@ namespace OpenAxiom {
}
static Token
- match_maybe_symbol(Lexer* lexer, const char*& cur, const char* end) {
+ match_maybe_symbol(Lexer* lexer, const Byte*& cur, const Byte* end) {
Token t = { Token::identifier, 0 };
- const char* start = cur;
+ const Byte* start = cur;
skip_to_end_of_symbol(cur, end);
t.lexeme = lexer->intern(start, cur - start);
maybe_reclassify(t);
return t;
}
- const char*
- Lexer::tokenize(const char* cur, const char* end) {
+ const Byte*
+ Lexer::tokenize(const Byte* cur, const Byte* end) {
while (skip_blank(cur, end) < end) {
Token t = { Token::unknown, 0 };
switch (*cur) {
case ';': {
- const char* start = cur;
+ const Byte* start = cur;
t.type = Token::semicolon;
skip_to_eol(cur, end);
t.lexeme = intern(start, cur - start);
@@ -276,7 +276,7 @@ namespace OpenAxiom {
break;
case ',': {
- const char* start = cur;
+ const Byte* start = cur;
if (++cur < end and *cur == '@') {
t.type = Token::comma_at;
++cur;
@@ -292,7 +292,7 @@ namespace OpenAxiom {
break;
case '#': {
- const char* start = cur;
+ const Byte* start = cur;
if (cur + 1 < end and special_after_sharp(cur[1])) {
t.type = Token::Type(OPENAXIOM_SEXPR_TOKEN2(cur[0], cur[1]));
t.lexeme = intern(cur, 2);
@@ -322,7 +322,7 @@ namespace OpenAxiom {
}
case '"': {
- const char* start = cur;
+ const Byte* start = cur;
skip_to_quote(++cur, end);
t.type = Token::string;
t.lexeme = intern(start, cur - start);
@@ -333,7 +333,7 @@ namespace OpenAxiom {
if (start_symbol(cur, end))
t = match_maybe_symbol(this, cur, end);
else {
- const char* start = cur;
+ const Byte* start = cur;
skip_to_word_boundary(++cur, end);
t.lexeme = intern(start, cur - start);
}
@@ -651,7 +651,7 @@ namespace OpenAxiom {
// The sequence of characters in [cur, last) consists
// entirely of digits. Return the corresponding natural value.
static size_t
- natural_value(const char* cur, const char* last) {
+ natural_value(const Byte* cur, const Byte* last) {
size_t n = 0;
for (; cur < last; ++cur)
// FIXME: check for overflow.
@@ -678,7 +678,7 @@ namespace OpenAxiom {
equal_character_name(BasicString lhs, const char* rhs) {
if (lhs->size() != strlen(rhs))
return false;
- for (const char* cur = lhs->begin(); cur != lhs->end(); ++cur)
+ for (const Byte* cur = lhs->begin(); cur != lhs->end(); ++cur)
if (tolower(*cur) != *rhs++)
return false;
return true;
@@ -839,6 +839,11 @@ namespace OpenAxiom {
Parser::Parser(Allocator& a, std::vector<const Syntax*>& v)
: alloc(a), syns(v) { }
+ static std::string
+ to_string(BasicString s) {
+ return { s->begin(), s->end() };
+ }
+
const Syntax*
Parser::parse_syntax(const Token*& cur, const Token* last) {
if (not skip_ignorable_tokens(cur, last))
@@ -898,7 +903,7 @@ namespace OpenAxiom {
default:
parse_error(std::string("parse error before ")
- + cur->lexeme->begin());
+ + to_string(cur->lexeme));
return 0; // never executed
}
}
@@ -915,7 +920,7 @@ namespace OpenAxiom {
std::vector<Token> tokens;
Memory::FileMapping input(s);
Lexer lexer(raw_strs, tokens);
- const char* rest = lexer.tokenize(input.begin(), input.end());
+ const Byte* rest = lexer.tokenize(input.begin(), input.end());
if (rest != input.end())
syntax_error("syntax error");
Parser parser(allocator, *this);
diff --git a/src/utils/storage.cc b/src/utils/storage.cc
index 54d14761..183dcd20 100644
--- a/src/utils/storage.cc
+++ b/src/utils/storage.cc
@@ -266,7 +266,8 @@ namespace OpenAxiom {
HANDLE mapping = CreateFileMapping(file, 0, PAGE_READONLY, 0, 0, 0);
if (mapping == 0)
filesystem_error("could not map file " + path);
- start = MapViewOfFile(mapping, FILE_MAP_READ, 0, 0, 0);
+ start = static_cast<Byte*>
+ (MapViewOfFile(mapping, FILE_MAP_READ, 0, 0, 0));
extent = GetFileSize(file, 0);
CloseHandle(mapping);
CloseHandle(file);
@@ -280,7 +281,8 @@ namespace OpenAxiom {
int fd = open(path.c_str(), O_RDONLY);
if (fd < 0)
filesystem_error("could not open " + path);
- start = mmap(Pointer(), s.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
+ start = static_cast<Byte*>
+ (mmap(Pointer(), s.st_size, PROT_READ, MAP_PRIVATE, fd, 0));
close(fd);
if (start == MAP_FAILED)
filesystem_error("could not map file " + path);
diff --git a/src/utils/string-pool.H b/src/utils/string-pool.H
index 311db1bf..5ed06fe3 100644
--- a/src/utils/string-pool.H
+++ b/src/utils/string-pool.H
@@ -47,12 +47,12 @@ namespace OpenAxiom {
// ----------------
// String data allocated from a stringpool.
struct StringItem {
- const char* begin() const { return text; }
- const char* end() const { return text + length; }
+ const Byte* begin() const { return text; }
+ const Byte* end() const { return text + length; }
size_t size() const { return length; }
- bool equal(const char*, size_t) const;
+ bool equal(const Byte*, size_t) const;
protected:
- const char* text; // pointer to the byte sequence
+ const Byte* text; // pointer to the byte sequence
size_t length; // number of bytes in this string
friend class StringPool;
StringItem() : text(), length() { }
@@ -72,11 +72,11 @@ namespace OpenAxiom {
EntryType* intern(const char*);
// Intern a sequence of characters given by its start and length.
- EntryType* intern(const char*, size_t);
+ EntryType* intern(const Byte*, size_t);
private:
- Memory::Arena<char> strings; // character blub
+ Memory::Arena<Byte> strings; // character blub
// Allocate a string from the internal arena.
- const char* make_copy(const char*, size_t);
+ const Byte* make_copy(const Byte*, size_t);
};
typedef const StringPool::EntryType* BasicString;
diff --git a/src/utils/string-pool.cc b/src/utils/string-pool.cc
index db5036a0..39186362 100644
--- a/src/utils/string-pool.cc
+++ b/src/utils/string-pool.cc
@@ -39,7 +39,7 @@ namespace OpenAxiom {
// -- StringItem --
// ----------------
bool
- StringItem::equal(const char* str, size_t sz) const {
+ StringItem::equal(const Byte* str, size_t sz) const {
if (length != sz)
return false;
for (size_t i = 0; i < sz; ++i)
@@ -60,23 +60,23 @@ namespace OpenAxiom {
// Return a hash for the string starting from `str'
// of length `sz'.
static size_t
- hash(const char* str, size_t sz) {
+ hash(const Byte* str, size_t sz) {
size_t h = 0;
for(size_t i = 0; i < sz; ++i)
h = str[i] + (h << 6) + (h << 16) - h;
return h;
}
- const char*
- StringPool::make_copy(const char* f, size_t sz) {
- char* s = strings.allocate(sz + 1);
+ const Byte*
+ StringPool::make_copy(const Byte* f, size_t sz) {
+ Byte* s = strings.allocate(sz + 1);
memcpy(s, f, sz);
s[sz] = '\0';
return s;
}
StringPool::EntryType*
- StringPool::intern(const char* src, size_t sz) {
+ StringPool::intern(const Byte* src, size_t sz) {
const size_t h = hash(src, sz);
EntryType* e = hash_chain(h);
if (sz == 0)
@@ -97,6 +97,6 @@ namespace OpenAxiom {
StringPool::EntryType*
StringPool::intern(const char* s) {
- return intern(s, strlen(s));
+ return intern(reinterpret_cast<const Byte*>(s), strlen(s));
}
}