diff options
author | dos-reis <gdr@axiomatics.org> | 2013-06-22 02:59:16 +0000 |
---|---|---|
committer | dos-reis <gdr@axiomatics.org> | 2013-06-22 02:59:16 +0000 |
commit | 6d072bf729ad585b2c9ce1f95b062726bba2c896 (patch) | |
tree | 29cb572380fbf1f0f14163bb9542077497f90a55 | |
parent | b717872a67934dfb3a9782f58527aabe3d2a81b7 (diff) | |
download | open-axiom-6d072bf729ad585b2c9ce1f95b062726bba2c896.tar.gz |
Use Byte for data from input source.
-rw-r--r-- | src/include/storage.H | 12 | ||||
-rw-r--r-- | src/utils/hammer.cc | 42 | ||||
-rw-r--r-- | src/utils/sexpr.H | 4 | ||||
-rw-r--r-- | src/utils/sexpr.cc | 57 | ||||
-rw-r--r-- | src/utils/storage.cc | 6 | ||||
-rw-r--r-- | src/utils/string-pool.H | 14 | ||||
-rw-r--r-- | src/utils/string-pool.cc | 14 |
7 files changed, 78 insertions, 71 deletions
diff --git a/src/include/storage.H b/src/include/storage.H index b75ced9d..cd3a723a 100644 --- a/src/include/storage.H +++ b/src/include/storage.H @@ -46,6 +46,9 @@ #include <string> namespace OpenAxiom { + // Datatype for the unit of storage. + using Byte = unsigned char; + // ----------------- // -- SystemError -- // ----------------- @@ -64,9 +67,6 @@ namespace OpenAxiom { void filesystem_error(const std::string&); namespace Memory { - // Datatype for the unit of storage. - using Byte = unsigned char; - // Datatype for pointers to data. using Pointer = void*; @@ -302,11 +302,11 @@ namespace OpenAxiom { explicit FileMapping(std::string); FileMapping(FileMapping&&); ~FileMapping(); - const char* begin() const { return static_cast<const char*>(start); } - const char* end() const { return begin() + extent; } + const Byte* begin() const { return start; } + const Byte* end() const { return begin() + extent; } std::size_t size() const { return extent; } protected: - Pointer start; // address at the mapped storage + Byte* start; // address at the mapped storage size_t extent; // length (in bytes) of the storage private: FileMapping(const FileMapping&) = delete; diff --git a/src/utils/hammer.cc b/src/utils/hammer.cc index 3ab56c72..eb07b378 100644 --- a/src/utils/hammer.cc +++ b/src/utils/hammer.cc @@ -1,4 +1,4 @@ -// Copyright (C) 2010, Gabriel Dos Reis. +// Copyright (C) 2013, Gabriel Dos Reis. // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -65,13 +65,13 @@ namespace OpenAxiom { // --------------- // Plain text, with no reference to any chunk. struct BasicText : Element { - BasicText(const char* f, const char* l) : span(f, l) { } + BasicText(const Byte* f, const Byte* l) : span(f, l) { } // Pointer to the start of this basic text element - const char* begin() const { return span.first; } + const Byte* begin() const { return span.first; } // Oone-past-the-end of the this basic text element. - const char* end() const { return span.second; } + const Byte* end() const { return span.second; } private: - std::pair<const char*, const char*> span; + std::pair<const Byte*, const Byte*> span; }; // --------------- @@ -100,7 +100,7 @@ namespace OpenAxiom { // Augment this chunk with a basic text in the open interval // [f,l). - CompositeText& add_text(const char* f, const char* l) { + CompositeText& add_text(const Byte* f, const Byte* l) { texts.push_back(BasicText(f, l)); push_back(&texts.back()); return *this; @@ -109,7 +109,7 @@ namespace OpenAxiom { // Augment this chunk with a reference to another chunk // named `n'. Note that we don't attempt to check for // possible circularities. - CompositeText reference_chunk(const char* f, const char* l) { + CompositeText reference_chunk(const Byte* f, const Byte* l) { refs.push_back(Reference(std::string(f, l))); push_back(&refs.back()); return *this; @@ -142,11 +142,11 @@ namespace OpenAxiom { CompositeText prose; // the prose around the chunks. ChunkTable defs; // chunk definition table. CompositeText* active_chunk; // chunk under construction. - const char* text_start; // begining of current basic text. + const Byte* text_start; // begining of current basic text. // Append basic text in the range `[text_start,last)' // to the current chunk. - void finish_chunk(const char* last) { + void finish_chunk(const Byte* last) { if (text_start != last) active_chunk->add_text(text_start, last); active_chunk = &prose; @@ -154,7 +154,7 @@ namespace OpenAxiom { } // Start a new chunk or extend an existing chunk. - void begin_chunk(const std::string& name, const char* start) { + void begin_chunk(const std::string& name, const Byte* start) { if (CompositeText* chunk = lookup_chunk(name)) active_chunk = chunk; else { @@ -177,7 +177,7 @@ namespace OpenAxiom { // Attempt to advance the cursor past newline marker. // Return true on sucess. static bool - saw_newline(const char*& cur, const char* end) { + saw_newline(const Byte*& cur, const Byte* end) { if (*cur == '\n') { ++cur; return true; @@ -193,7 +193,7 @@ namespace OpenAxiom { // Move `cur' to end of line or `end', whichever comes first. // Return true if the area swept consisted only of blank characters. static inline bool - trailing_blank(const char*& cur, const char* end) { + trailing_blank(const Byte*& cur, const Byte* end) { bool result = true; for (; cur < end and not saw_newline(cur, end); ++cur) result = isspace(*cur); @@ -203,7 +203,7 @@ namespace OpenAxiom { // Attempt to advance `cur' past the double left angle brackets // starting a chunk name. Returm true on success. static bool - chunk_name_began(const char*& cur, const char* end) { + chunk_name_began(const Byte*& cur, const Byte* end) { if (cur[0] == '<' and cur + 1 < end and cur[1] == '<') { cur += 2; return true; @@ -214,7 +214,7 @@ namespace OpenAxiom { // Attempt to move `cur' past the double right angle brackets // terminating a chunk name. Returm true on success. static bool - chunk_name_ended(const char*& cur, const char* end) { + chunk_name_ended(const Byte*& cur, const Byte* end) { if (cur[0] == '>' and cur + 1 < end and cur[1] == '>') { cur += 2; return true; @@ -225,7 +225,7 @@ namespace OpenAxiom { // We've just seen the start of a chunk reference; skip // characters till we seen of the chunk's name. static void - skip_to_end_of_chunk_name(const char*& cur, const char* end) { + skip_to_end_of_chunk_name(const Byte*& cur, const Byte* end) { while (cur < end) { if (looking_at_newline(*cur) or (cur + 1 < end and cur[0] == '>' and cur[1] == '>')) @@ -236,7 +236,7 @@ namespace OpenAxiom { // Move the cursor until end of line. static void - skip_to_end_of_line(const char*& cur, const char* end) { + skip_to_end_of_line(const Byte*& cur, const Byte* end) { while (cur < end) { if (saw_newline(cur, end)) break; @@ -246,22 +246,22 @@ namespace OpenAxiom { void Document::parse(const Memory::FileMapping& file) { - const char* cur = text_start; - const char* last = file.end(); + auto cur = text_start; + auto last = file.end(); // Process one line at a time. while (cur < last) { // 1. `@' ends previous chunk if (*cur == '@') { - const char* p = cur; + auto p = cur; if (trailing_blank(++cur, last)) finish_chunk(p); } // 2. `<<' introduces a chunk reference or a chunk definition. else if (chunk_name_began(cur, last)) { - const char* label_start = cur; + auto label_start = cur; skip_to_end_of_chunk_name(cur, last); if (chunk_name_ended(cur, last)) { - const char* label_end = cur - 2; + auto label_end = cur - 2; if (cur < last and *cur == '=') { if (trailing_blank(++cur, last)) { // chunk definition or extension diff --git a/src/utils/sexpr.H b/src/utils/sexpr.H index 7b62a328..358dd506 100644 --- a/src/utils/sexpr.H +++ b/src/utils/sexpr.H @@ -111,8 +111,8 @@ namespace OpenAxiom { Lexer(StringPool& pool, std::vector<Token>& toks) : strings(pool), tokens(toks) { } - const char* tokenize(const char*, const char*); - BasicString intern(const char* s, size_t n) { + const Byte* tokenize(const Byte*, const Byte*); + BasicString intern(const Byte* s, size_t n) { return strings.intern(s, n); } diff --git a/src/utils/sexpr.cc b/src/utils/sexpr.cc index 9e57765f..77ed3060 100644 --- a/src/utils/sexpr.cc +++ b/src/utils/sexpr.cc @@ -122,16 +122,16 @@ namespace OpenAxiom { // Move `cur' past all consecutive blank characters, and // return the new position. - static const char* - skip_blank(const char*& cur, const char* end) { + static const Byte* + skip_blank(const Byte*& cur, const Byte* end) { while (cur < end and is_blank(*cur)) ++cur; return cur; } // Move `cur' to end-of-line marker. - static const char* - skip_to_eol(const char*& cur, const char* end) { + static const Byte* + skip_to_eol(const Byte*& cur, const Byte* end) { // FIXME: properly handle CR+LF. while (cur < end and *cur != '\n') ++cur; @@ -139,8 +139,8 @@ namespace OpenAxiom { } // Move `cur' until a word boundary is reached. - static const char* - skip_to_word_boundary(const char*& cur, const char* end) { + static const Byte* + skip_to_word_boundary(const Byte*& cur, const Byte* end) { bool saw_escape = false; for (; cur < end; ++cur) { if (saw_escape) @@ -156,7 +156,7 @@ namespace OpenAxiom { // Move `cur' one-past a non-esacaped character `c'. // Return true if the character was seen. static bool - skip_to_nonescaped_char(const char*& cur, const char* end, char c) { + skip_to_nonescaped_char(const Byte*& cur, const Byte* end, char c) { bool saw_escape = false; for (; cur < end; ++cur) if (saw_escape) @@ -173,7 +173,7 @@ namespace OpenAxiom { // Move `cur' past the closing quote of string literal. // Return true if the closing fence was effectively seen. static inline bool - skip_to_quote(const char*& cur, const char* end) { + skip_to_quote(const Byte*& cur, const Byte* end) { return skip_to_nonescaped_char(cur, end, '"'); } @@ -204,7 +204,7 @@ namespace OpenAxiom { // an integer followrd by the equal sign or the sharp sign. // `cur' is moved along the way. static bool - only_digits_before_equal_or_shap(const char*& cur, const char* end) { + only_digits_before_equal_or_shap(const Byte*& cur, const Byte* end) { while (cur < end and isdigit(*cur)) ++cur; return cur < end and (*cur == '#' or *cur == '='); @@ -215,8 +215,8 @@ namespace OpenAxiom { // entirely of digits. static void maybe_reclassify(Token& t) { - const char* cur = t.lexeme->begin(); - const char* end = t.lexeme->end(); + const Byte* cur = t.lexeme->begin(); + const Byte* end = t.lexeme->end(); while (cur < end and isdigit(*cur)) ++cur; if (cur == end) @@ -226,7 +226,7 @@ namespace OpenAxiom { // Returns true if the first characters in the range // [cur, last) start an identifier. static bool - start_symbol(const char* cur, const char* last) { + start_symbol(const Byte* cur, const Byte* last) { if (cur >= last) return false; return identifier_part(*cur) @@ -236,7 +236,7 @@ namespace OpenAxiom { // We are processing a symbol token. Accumulate all // legitimate characters till the end of the token. static void - skip_to_end_of_symbol(const char*& cur, const char* end) { + skip_to_end_of_symbol(const Byte*& cur, const Byte* end) { const char c = *cur; if (*cur == '|') skip_to_nonescaped_char(++cur, end, c); @@ -247,22 +247,22 @@ namespace OpenAxiom { } static Token - match_maybe_symbol(Lexer* lexer, const char*& cur, const char* end) { + match_maybe_symbol(Lexer* lexer, const Byte*& cur, const Byte* end) { Token t = { Token::identifier, 0 }; - const char* start = cur; + const Byte* start = cur; skip_to_end_of_symbol(cur, end); t.lexeme = lexer->intern(start, cur - start); maybe_reclassify(t); return t; } - const char* - Lexer::tokenize(const char* cur, const char* end) { + const Byte* + Lexer::tokenize(const Byte* cur, const Byte* end) { while (skip_blank(cur, end) < end) { Token t = { Token::unknown, 0 }; switch (*cur) { case ';': { - const char* start = cur; + const Byte* start = cur; t.type = Token::semicolon; skip_to_eol(cur, end); t.lexeme = intern(start, cur - start); @@ -276,7 +276,7 @@ namespace OpenAxiom { break; case ',': { - const char* start = cur; + const Byte* start = cur; if (++cur < end and *cur == '@') { t.type = Token::comma_at; ++cur; @@ -292,7 +292,7 @@ namespace OpenAxiom { break; case '#': { - const char* start = cur; + const Byte* start = cur; if (cur + 1 < end and special_after_sharp(cur[1])) { t.type = Token::Type(OPENAXIOM_SEXPR_TOKEN2(cur[0], cur[1])); t.lexeme = intern(cur, 2); @@ -322,7 +322,7 @@ namespace OpenAxiom { } case '"': { - const char* start = cur; + const Byte* start = cur; skip_to_quote(++cur, end); t.type = Token::string; t.lexeme = intern(start, cur - start); @@ -333,7 +333,7 @@ namespace OpenAxiom { if (start_symbol(cur, end)) t = match_maybe_symbol(this, cur, end); else { - const char* start = cur; + const Byte* start = cur; skip_to_word_boundary(++cur, end); t.lexeme = intern(start, cur - start); } @@ -651,7 +651,7 @@ namespace OpenAxiom { // The sequence of characters in [cur, last) consists // entirely of digits. Return the corresponding natural value. static size_t - natural_value(const char* cur, const char* last) { + natural_value(const Byte* cur, const Byte* last) { size_t n = 0; for (; cur < last; ++cur) // FIXME: check for overflow. @@ -678,7 +678,7 @@ namespace OpenAxiom { equal_character_name(BasicString lhs, const char* rhs) { if (lhs->size() != strlen(rhs)) return false; - for (const char* cur = lhs->begin(); cur != lhs->end(); ++cur) + for (const Byte* cur = lhs->begin(); cur != lhs->end(); ++cur) if (tolower(*cur) != *rhs++) return false; return true; @@ -839,6 +839,11 @@ namespace OpenAxiom { Parser::Parser(Allocator& a, std::vector<const Syntax*>& v) : alloc(a), syns(v) { } + static std::string + to_string(BasicString s) { + return { s->begin(), s->end() }; + } + const Syntax* Parser::parse_syntax(const Token*& cur, const Token* last) { if (not skip_ignorable_tokens(cur, last)) @@ -898,7 +903,7 @@ namespace OpenAxiom { default: parse_error(std::string("parse error before ") - + cur->lexeme->begin()); + + to_string(cur->lexeme)); return 0; // never executed } } @@ -915,7 +920,7 @@ namespace OpenAxiom { std::vector<Token> tokens; Memory::FileMapping input(s); Lexer lexer(raw_strs, tokens); - const char* rest = lexer.tokenize(input.begin(), input.end()); + const Byte* rest = lexer.tokenize(input.begin(), input.end()); if (rest != input.end()) syntax_error("syntax error"); Parser parser(allocator, *this); diff --git a/src/utils/storage.cc b/src/utils/storage.cc index 54d14761..183dcd20 100644 --- a/src/utils/storage.cc +++ b/src/utils/storage.cc @@ -266,7 +266,8 @@ namespace OpenAxiom { HANDLE mapping = CreateFileMapping(file, 0, PAGE_READONLY, 0, 0, 0); if (mapping == 0) filesystem_error("could not map file " + path); - start = MapViewOfFile(mapping, FILE_MAP_READ, 0, 0, 0); + start = static_cast<Byte*> + (MapViewOfFile(mapping, FILE_MAP_READ, 0, 0, 0)); extent = GetFileSize(file, 0); CloseHandle(mapping); CloseHandle(file); @@ -280,7 +281,8 @@ namespace OpenAxiom { int fd = open(path.c_str(), O_RDONLY); if (fd < 0) filesystem_error("could not open " + path); - start = mmap(Pointer(), s.st_size, PROT_READ, MAP_PRIVATE, fd, 0); + start = static_cast<Byte*> + (mmap(Pointer(), s.st_size, PROT_READ, MAP_PRIVATE, fd, 0)); close(fd); if (start == MAP_FAILED) filesystem_error("could not map file " + path); diff --git a/src/utils/string-pool.H b/src/utils/string-pool.H index 311db1bf..5ed06fe3 100644 --- a/src/utils/string-pool.H +++ b/src/utils/string-pool.H @@ -47,12 +47,12 @@ namespace OpenAxiom { // ---------------- // String data allocated from a stringpool. struct StringItem { - const char* begin() const { return text; } - const char* end() const { return text + length; } + const Byte* begin() const { return text; } + const Byte* end() const { return text + length; } size_t size() const { return length; } - bool equal(const char*, size_t) const; + bool equal(const Byte*, size_t) const; protected: - const char* text; // pointer to the byte sequence + const Byte* text; // pointer to the byte sequence size_t length; // number of bytes in this string friend class StringPool; StringItem() : text(), length() { } @@ -72,11 +72,11 @@ namespace OpenAxiom { EntryType* intern(const char*); // Intern a sequence of characters given by its start and length. - EntryType* intern(const char*, size_t); + EntryType* intern(const Byte*, size_t); private: - Memory::Arena<char> strings; // character blub + Memory::Arena<Byte> strings; // character blub // Allocate a string from the internal arena. - const char* make_copy(const char*, size_t); + const Byte* make_copy(const Byte*, size_t); }; typedef const StringPool::EntryType* BasicString; diff --git a/src/utils/string-pool.cc b/src/utils/string-pool.cc index db5036a0..39186362 100644 --- a/src/utils/string-pool.cc +++ b/src/utils/string-pool.cc @@ -39,7 +39,7 @@ namespace OpenAxiom { // -- StringItem -- // ---------------- bool - StringItem::equal(const char* str, size_t sz) const { + StringItem::equal(const Byte* str, size_t sz) const { if (length != sz) return false; for (size_t i = 0; i < sz; ++i) @@ -60,23 +60,23 @@ namespace OpenAxiom { // Return a hash for the string starting from `str' // of length `sz'. static size_t - hash(const char* str, size_t sz) { + hash(const Byte* str, size_t sz) { size_t h = 0; for(size_t i = 0; i < sz; ++i) h = str[i] + (h << 6) + (h << 16) - h; return h; } - const char* - StringPool::make_copy(const char* f, size_t sz) { - char* s = strings.allocate(sz + 1); + const Byte* + StringPool::make_copy(const Byte* f, size_t sz) { + Byte* s = strings.allocate(sz + 1); memcpy(s, f, sz); s[sz] = '\0'; return s; } StringPool::EntryType* - StringPool::intern(const char* src, size_t sz) { + StringPool::intern(const Byte* src, size_t sz) { const size_t h = hash(src, sz); EntryType* e = hash_chain(h); if (sz == 0) @@ -97,6 +97,6 @@ namespace OpenAxiom { StringPool::EntryType* StringPool::intern(const char* s) { - return intern(s, strlen(s)); + return intern(reinterpret_cast<const Byte*>(s), strlen(s)); } } |