aboutsummaryrefslogtreecommitdiff
path: root/src/hyper/lex.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/hyper/lex.c')
-rw-r--r--src/hyper/lex.c877
1 files changed, 877 insertions, 0 deletions
diff --git a/src/hyper/lex.c b/src/hyper/lex.c
new file mode 100644
index 00000000..6b4bf6d5
--- /dev/null
+++ b/src/hyper/lex.c
@@ -0,0 +1,877 @@
+/*
+ Copyright (C) 1991-2002, The Numerical Algorithms Group Ltd.
+ All rights reserved.
+ Copyright (C) 2007-2008, Gabriel Dos Reis.
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are
+ met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+
+ - Neither the name of The Numerical Algorithms Group Ltd. nor the
+ names of its contributors may be used to endorse or promote products
+ derived from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+ IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/*
+ * Lexical analyzer stuff. Exported functions: parser_init() --
+ * initialize the parser tables with keywords init_scanner() --
+ * initialize scanner for reading a new page get_token() --
+ * sets the "token" variable to be the next -- token in the current input
+ * stream save_scanner_state( ) -- save the current state of scanner so
+ * that -- the scanner input mode may be switched restore_scanner_state() --
+ * undo the saved state
+ *
+ * Note: The scanner reads from three seperate input locations depending on the
+ * value of the variable "input_type". If this variable is:
+ *
+ * FromFile -- it read from the file pointed to by "cfile". FromString
+ * -- It reads from the string "input_string". FromSpadSocket -- It reads
+ * from the socket pointed to by spad_socket FromFD -- It reads from a
+ * file descriptor
+ *
+ *
+ * New variable useAscii -- tells us if we we should translate
+ * graphics characters on the fly
+ * initialised in init_scanner
+ *
+ */
+#define _LEX_C
+#include "axiom-c-macros.h"
+#include "useproto.h"
+#include "debug.h"
+
+int useAscii;
+
+#define PARSER 1
+
+#include "hyper.h"
+#include "hterror.h"
+#include "lex.h"
+
+#include "all_hyper_proto.H1"
+#include "sockio-c.H1"
+
+
+#include <ctype.h>
+#include <setjmp.h>
+
+extern int gTtFontIs850;
+
+
+StateNode *top_state_node;
+HyperDocPage *gPageBeingParsed; /* page currently being parsed */
+extern jmp_buf jmpbuf;
+extern char ebuffer[];
+short int gInSpadsrc = 0;
+short int gInVerbatim;
+
+/* Parser variables */
+long fpos; /* Position of pointer in file in characters */
+long page_start_fpos; /* where the current pages fpos started */
+long keyword_fpos; /* fpos of beginning of most recent keyword */
+Token token; /* most recently read token */
+int last_token; /* most recently read token for unget_token */
+int input_type; /* indicates where to read input */
+char *input_string; /* input string read when from_string is true */
+int last_ch; /* last character read, for unget_char */
+int last_command; /* the last socket command */
+int keyword; /* the last command was a keyword, or a group */
+int cfd; /* current file decriptor */
+FILE *cfile; /* currently active file pointer */
+FILE *unixfd;
+int line_number;
+
+char sock_buf[1024]; /* buffer for socket input */
+
+#define TokenHashSize 100
+
+static HashTable tokenHashTable; /* hash table of parser tokens */
+
+void
+dumpToken(char *caller, Token t)
+{ fprintf(stderr,"%s:dumpToken type=%s id=%s\n",
+ caller,token_table[t.type],t.id);
+}
+
+
+/* initialize the parser keyword hash table */
+void
+parser_init(void)
+{
+ int i;
+ Token *toke;
+
+ /* First I initialize the hash table for the tokens */
+
+ hash_init(
+ &tokenHashTable,
+ TokenHashSize,
+ (EqualFunction)string_equal,
+ (HashcodeFunction)string_hash);
+ for (i = 2; i <= NumberUserTokens; i++) {
+ toke = (Token *) halloc(sizeof(Token), "Token");
+ toke->type = i;
+ toke->id = token_table[i];
+ hash_insert(&tokenHashTable, (char *)toke, toke->id);
+ }
+
+}
+
+/* initialize the lexical scanner to read from a file */
+void
+init_scanner(void)
+{
+ if (getenv("HTASCII")) {
+ useAscii = (strcmp(getenv("HTASCII"), "yes") == 0);
+ }
+ else {
+ if(gTtFontIs850==1) useAscii = 0;
+ else useAscii = 1;
+ }
+ keyword = 0;
+ last_ch = NoChar;
+ last_token = 0;
+ input_type = FromFile;
+ fpos = 0;
+ keyword_fpos = 0;
+ last_command = -1;
+ line_number = 1;
+}
+
+/*
+ * variables to save current state of scanner. Currently only one level of
+ * saving is allowed. In the future we should allow nested saves
+ */
+
+/* save the current state of the scanner */
+void
+save_scanner_state(void)
+{
+ StateNode *new_item = (StateNode *) halloc((sizeof(StateNode)), "StateNode");
+
+ new_item->page_start_fpos = page_start_fpos;
+ new_item->fpos = fpos;
+ new_item->keyword_fpos = keyword_fpos;
+ new_item->last_ch = last_ch;
+ new_item->last_token = last_token;
+ new_item->token = token;
+ new_item->input_type = input_type;
+ new_item->input_string = input_string;
+ new_item->cfile = cfile;
+ new_item->next = top_state_node;
+ new_item->keyword = keyword;
+ top_state_node = new_item;
+}
+
+/* restore the saved scanner state */
+void
+restore_scanner_state(void)
+{
+ StateNode *x = top_state_node;
+
+ if (top_state_node == NULL) {
+ fprintf(stderr, "Restore Scanner State: State empty\n");
+ exit(-1);
+ }
+ top_state_node = top_state_node->next;
+ page_start_fpos = x->page_start_fpos;
+ fpos = x->fpos;
+ keyword_fpos = x->keyword_fpos;
+ last_ch = x->last_ch;
+ last_token = x->last_token;
+ token = x->token;
+ input_type = x->input_type;
+ input_string = x->input_string;
+ cfile = x->cfile;
+ keyword = x->keyword;
+ if (cfile != NULL)
+ fseek(cfile, fpos + page_start_fpos, 0);
+ /** Once that is done, lets throw away some memory **/
+ free(x);
+}
+
+/* return the character to the input stream. */
+void
+unget_char(int c)
+{
+ if (c == '\n')
+ line_number--;
+ last_ch = c;
+}
+
+int
+get_char(void)
+{
+ int c;
+
+ c = get_char1();
+ if (useAscii) {
+ switch (c) {
+ case 'Ä':
+ c = '-';
+ break;
+ case 'Ú':
+ c = '+';
+ break;
+ case 'Ã':
+ c = '[';
+ break;
+ case 'À':
+ c = '+';
+ break;
+ case 'Â':
+ c = '-';
+ break;
+ case 'Å':
+ c = '+';
+ break;
+ case 'Á':
+ c = '-';
+ break;
+ case '¿':
+ c = '+';
+ break;
+ case '´':
+ c = ']';
+ break;
+ case 'Ù':
+ c = '+';
+ break;
+ case '³':
+ c = '|';
+ break;
+ default:
+ break;
+ }
+ }
+ return c;
+}
+
+char * read_again = 0;
+
+/* return the next character in the input stream */
+static int
+get_char1(void)
+{
+ int c;
+ int cmd;
+
+ if (last_ch != NoChar) {
+ c = last_ch;
+ last_ch = NoChar;
+ if (c == '\n')
+ line_number++;
+ return c;
+ }
+ switch (input_type) {
+ case FromUnixFD:
+ c = getc(unixfd);
+ if (c == '\n')
+ line_number++;
+ return c;
+ case FromString:
+ c = (*input_string ? *input_string++ : EOF);
+ if (c == '\n')
+ line_number++;
+ return c;
+ case FromFile:
+ c = getc(cfile);
+ fpos++;
+ if (c == '\n')
+ line_number++;
+ return c;
+ case FromSpadSocket:
+AGAIN:
+ if (*input_string) {
+ /* this should never happen for the first character */
+ c = *input_string++;
+ if (c == '\n')
+ line_number++;
+ return c;
+ }
+ if (last_command == EndOfPage)
+ return EOF;
+ if (read_again == NULL) {
+ last_command = cmd = get_int(spad_socket);
+ if (cmd == EndOfPage)
+ return EOF;
+#ifndef HTADD
+ if (cmd == SpadError)
+ spad_error_handler();
+#endif
+ }
+ read_again = get_string_buf(spad_socket, sock_buf, 1023);
+ /* this will be null if this is the last time*/
+ input_string = sock_buf;
+ goto AGAIN;
+ default:
+ fprintf(stderr, "Get Char: Unknown type of input: %d\n", input_type);
+ return -1;
+ }
+}
+
+
+#define special(c) ((c) == '{' || (c) == '}' || (c) == '#' || (c) == '%' || \
+ (c) == '\\' || (c) == '[' || (c) == ']' || (c) == '_' || \
+ (c) == ' ' || (c) == '$' || (c) == '~' || (c) == '^' || \
+ (c) == '&')
+
+#define punctuation(c) ((c)== '`' || (c) == '\'' || (c) == ',' || \
+ (c) == '.' || (c) == '?' || (c) == '"' || \
+ (c) == ';' || (c) == ':' || (c) == '-')
+
+#define whitespace(c) ((c) == ' ' || (c) == '\t' || (c) == '\n')
+#define delim(c) \
+ (whitespace(c) || special(c) || punctuation(c))
+
+
+
+Token unget_toke;
+
+/* return current token to the input stream */
+void
+unget_token(void)
+{
+ last_token = 1;
+ unget_toke.type = token.type;
+ unget_toke.id = alloc_string(token.id - 1);
+}
+
+
+int
+get_token(void)
+{
+ int c, ws;
+ int nls = 0;
+ static int seen_white = 0;
+ static char buffer[1024];
+ char *buf = buffer;
+
+ if (last_token) {
+ last_token = 0;
+ token.type = unget_toke.type;
+ strcpy(buffer, unget_toke.id);
+ free(unget_toke.id);
+ token.id = buffer + 1;
+ if (token.type == EOF)
+ return EOF;
+ else
+ return 0;
+ }
+ seen_white = nls = 0;
+ do {
+ c = get_char();
+ ws = whitespace(c);
+ if (ws)
+ seen_white++;
+ if (c == '\n') {
+ if (nls) {
+ token.type = Par;
+ return 0;
+ }
+ else
+ nls++;
+ }
+ } while (ws);
+
+ /* first character of string indicates number of spaces before token */
+
+ if (!keyword)
+ *buf++ = seen_white;
+ else
+ *buf++ = 0;
+
+ keyword = 0;
+ if (input_type != FromSpadSocket && c == '%') {
+ while ((c = get_char()) != '\n' && c != EOF);
+/* trying to fix the comment problem: a comment line forces words on either side together*/
+/* try returning the eol */
+ unget_char(c);
+ return get_token();
+ }
+ if (input_type == FromFile && c == '$') {
+ token.type = Dollar;
+ return 0;
+ }
+ switch (c) {
+ case EOF:
+ token.type = -1;
+ return EOF;
+ case '\\':
+ keyword_fpos = fpos - 1;
+ c = get_char();
+ if (!isalpha(c)) {
+ *buf++ = c;
+ token.type = Word;
+ *buf = '\0';
+ seen_white = 0;
+ }
+ else {
+ do {
+ *buf++ = c;
+ } while ((c = get_char()) != EOF && isalpha(c));
+
+ unget_char(c);
+ *buf = '\0';
+ keyword = 1;
+ token.id = buffer + 1;
+ return (keyword_type());
+ }
+ break;
+ case '{':
+ token.type = Lbrace;
+ break;
+ case '}':
+ token.type = Rbrace;
+ break;
+ case '[':
+ token.type = Lsquarebrace;
+ *buf++ = c;
+ *buf = '\0';
+ token.id = buffer + 1;
+ break;
+ case ']':
+ token.type = Rsquarebrace;
+ *buf++ = c;
+ *buf = '\0';
+ token.id = buffer + 1;
+ break;
+ case '#':
+ token.type = Pound;
+
+ /*
+ * if I get a pound then what I do is parse until I get something
+ * that is not an integer
+ */
+ c = get_char();
+ while (isdigit(c) && (c != EOF)) {
+ *buf++ = c;
+ c = get_char();
+ }
+ unget_char(c);
+ *buf = '\0';
+ token.id = buffer + 1;
+ break;
+ case '`':
+ case '\'':
+ case ',':
+ case '.':
+ case '!':
+ case '?':
+ case '"':
+ case ':':
+ case ';':
+ token.type = Punctuation;
+ *buf++ = c;
+ *buf = '\0';
+ /** Now I should set the buffer[0] as my flag for whether I had
+ white-space in front of me, and whether I had white space
+ behind me **/
+ if (buffer[0])
+ buffer[0] = FRONTSPACE;
+ c = get_char();
+ if (whitespace(c))
+ buffer[0] |= BACKSPACE;
+ unget_char(c);
+ token.id = buffer + 1;
+ break;
+ case '-':
+ do {
+ *buf++ = c;
+ } while (((c = get_char()) != EOF) && (c == '-'));
+ unget_char(c);
+ *buf = '\0';
+ token.type = Dash;
+ token.id = buffer + 1;
+ break;
+ default:
+ do {
+ *buf++ = c;
+ } while ((c = get_char()) != EOF && !delim(c));
+ unget_char(c);
+ *buf = '\0';
+ token.type = Word;
+ token.id = buffer + 1;
+ break;
+ }
+/* dumpToken("get_token",token);*/
+ return 0;
+}
+
+
+/*
+ * Here are the structures and stuff needed for the begin and end routines.
+ * The stack stores the begin types that have been seen and the end
+ * pops them off and checks to insure that they are reversed properly.
+ */
+
+typedef struct be_struct {
+ int type;
+ char *id;
+ struct be_struct *next;
+} BeStruct;
+
+BeStruct *top_be_stack;
+
+
+void
+push_be_stack(int type,char * id)
+{
+ BeStruct *be = (BeStruct *) halloc(sizeof(BeStruct), "BeginENd stack");
+
+ if (gWindow != NULL) {
+ be->type = type;
+ be->next = top_be_stack;
+ be->id = alloc_string(id);
+ top_be_stack = be;
+ }
+ return;
+}
+void
+check_and_pop_be_stack(int type,char * id)
+{
+ BeStruct *x;
+
+ /*
+ * this routine pops the be stack and compares types. If they are
+ * the same then I am okay and return a 1. Else I return a two and try to
+ * print a meaningful message
+ */
+ if (gWindow == NULL)
+ return;
+ if (top_be_stack == NULL) { /* tried to pop when I shouldn't have */
+ fprintf(stderr, "Unexpected \\end{%s} \n", token.id);
+ print_page_and_filename();
+ print_next_ten_tokens();
+ jump();
+ }
+ x = top_be_stack;
+ if (x->type == type) {
+ top_be_stack = top_be_stack->next;
+ free(x->id);
+ free(x);
+ return;
+ }
+ /* else I didn't have a match. Lets try to write a sensible message */
+ fprintf(stderr, "\\begin{%s} ended with \\end{%s} \n", x->id, id);
+ print_page_and_filename();
+ print_next_ten_tokens();
+ jump();
+}
+
+int
+clear_be_stack(void)
+{
+ BeStruct *x = top_be_stack, *y;
+
+ top_be_stack = NULL;
+ while (x != NULL) {
+ y = x->next;
+ free(x);
+ x = y;
+ }
+ return 1;
+}
+
+int
+be_type(char *which)
+{
+ Token store;
+
+ get_expected_token(Lbrace);
+ get_expected_token(Word);
+ switch (token.id[0]) {
+ case 't':
+ if (!strcmp(token.id, "titems")) {
+ token.type = Begintitems;
+ }
+ else {
+ return -1;
+ }
+ break;
+ case 'p':
+ if (!strcmp(token.id, "page")) {
+ token.type = Page;
+ }
+ else if (!strcmp(token.id, "paste")) {
+ token.type = Paste;
+ }
+ else if (!strcmp(token.id, "patch")) {
+ token.type = Patch;
+ }
+ else {
+ return -1;
+ }
+ break;
+ case 'v': /* possibly a verbatim mode */
+ if (!strcmp(token.id, "verbatim")) {
+ token.type = Verbatim;
+ }
+ else {
+ return -1;
+ }
+ break;
+ case 's': /* possibly a scroll mode */
+ if (!strcmp("scroll", token.id)) {
+ token.type = Beginscroll;
+ }
+ else if (!strcmp(token.id, "spadsrc")) {
+ token.type = Spadsrc;
+ }
+ else {
+ return -1;
+ }
+ break;
+ case 'i': /* possibly a item */
+ if (!strcmp("items", token.id)) {
+ token.type = Beginitems;
+ }
+ else {
+ return -1;
+ }
+ break;
+ default:
+ return -1;
+ }
+ store.type = token.type;
+ /* store.id = alloc_string(token.id); */
+ get_expected_token(Rbrace);
+ token.type = store.type;
+
+ /*
+ * strcpy(token.id, store.id); free(store.id);
+ */
+ return 0;
+
+}
+int
+begin_type(void)
+{
+ /*Token store;*/
+ int ret_val;
+
+ /*
+ * This routine parses a statement of the form \begin{word}. Once it has
+ * read the word it tries to assign it a type. Once that is done it sends
+ * the word id, and the type to push_be_stack and then returns the type.
+ * For the moment I amnot even going to use a has_table, although in the
+ * future this may be needed
+ */
+ ret_val = be_type("begin");
+ if (ret_val == -1) {
+ if (gWindow == NULL || gInVerbatim)
+ return 1;
+ else {
+ fprintf(stderr, "Unknown begin type \\begin{%s} \n", token.id);
+ print_page_and_filename();
+ print_next_ten_tokens();
+ jump();
+ }
+ }
+ else {
+ if (gWindow != NULL && !gInVerbatim && token.type != Verbatim
+ && token.type != Spadsrc) {
+ /* Now here I should push the needed info and then get */
+ push_be_stack(token.type, token.id);
+ }
+ return 1;
+ }
+ return 1;
+}
+
+
+int
+end_type(void)
+{
+ int ret;
+
+ /*
+ * This routine gets the end type just as the begin_type routine does,
+ * But then it checks to see if recieved the proper end_type. By a clever
+ * trick, the proper end type is 3000 + type. When environments this will
+ * have to change
+ */
+ ret = be_type("end");
+ if (ret == -1) {
+ /* unrecognized end token */
+ if (gWindow == NULL || gInVerbatim) {
+ return 1;
+ }
+ else {
+ fprintf(stderr, "Unknown begin type \\begin{%s} \n", token.id);
+ print_page_and_filename();
+ print_next_ten_tokens();
+ jump();
+ }
+ }
+ else {
+ if (gWindow != NULL && !gInVerbatim) {
+ check_and_pop_be_stack(token.type, token.id);
+ token.type += 3000;
+ return 1;
+ }
+ else {
+ if (gWindow != NULL && ((gInVerbatim && token.type == Verbatim) ||
+ (gInSpadsrc && token.type == Spadsrc))) {
+ check_and_pop_be_stack(token.type, token.id);
+ token.type += 3000;
+ return 1;
+ }
+ else {
+ token.type += 3000;
+ return 1;
+ }
+ }
+ }
+ return 1;
+}
+
+
+
+static int
+keyword_type(void)
+{
+ Token *token_ent;
+
+ /* first check to see if it is a reserved token */
+ token_ent = (Token *) hash_find(&tokenHashTable, token.id);
+ if (token_ent != NULL) {
+ token.type = token_ent->type;
+
+ /*
+ * if I am a keyword I also have to check to see if I am a begin or
+ * an end
+ */
+ if (token.type == Begin)
+ return begin_type();
+ if (token.type == End)
+ return end_type();
+ /* next check to see if it is a macro */
+ }
+ else if (gWindow != NULL) {
+ if (hash_find(gWindow->fMacroHashTable, token.id) != NULL)
+ token.type = Macro;
+ else if (gPageBeingParsed->box_hash != NULL &&
+ hash_find(gPageBeingParsed->box_hash, token.id) != NULL)
+ {
+ token.type = Boxcond;
+ }
+ else if (hash_find(gWindow->fCondHashTable, token.id) != NULL)
+ token.type = Cond;
+ else /* We have no idea what we've got */
+ token.type = Unkeyword;
+ }
+ else { /* We am probably in htadd so just return. It
+ * is only concerned with pages anyway */
+ token.type = Unkeyword;
+ }
+ return 0;
+}
+
+/* read a token, and report a syntax error if it has the wrong type */
+void
+get_expected_token(int type)
+{
+ get_token();
+ if (token.type != type) {
+ token_name(type);
+ fprintf(stderr, "syntax error: expected a %s\n", ebuffer);
+ if (token.type == EOF) {
+ print_page_and_filename();
+ fprintf(stderr, "Unexpected EOF\n");
+ }
+ else {
+ token_name(token.type);
+ fprintf(stderr, "not a %s\n", ebuffer);
+ print_page_and_filename();
+ print_next_ten_tokens();
+ }
+ longjmp(jmpbuf, 1);
+ fprintf(stderr, "Could not jump to Error Page\n");
+ exit(-1);
+ }
+}
+
+
+#ifndef HTADD
+static void
+spad_error_handler(void)
+{
+ /* fprintf(stderr, "got a spad error\n"); */
+ longjmp(jmpbuf, 1);
+ fprintf(stderr, "(HyperDoc) Fatal Error: Could not jump to Error Page.\n");
+ exit(-1);
+}
+
+extern int still_reading, str_len;
+void
+reset_connection(void)
+{
+ if (spad_socket) {
+ FD_CLR(spad_socket->socket, &socket_mask);
+ purpose_table[spad_socket->purpose] = NULL;
+ close(spad_socket->socket);
+ spad_socket->socket = 0;
+ spad_socket = NULL;
+ if (input_string)
+ input_string[0] = '\0';
+ read_again = 0;
+ str_len = 0;
+ still_reading = 0;
+ connect_spad();
+ }
+}
+#endif
+
+
+/* returns true if spad is currently computing */
+int
+spad_busy(void)
+{
+ if (session_server == NULL)
+ return 1;
+ send_int(session_server, QuerySpad);
+ return get_int(session_server);
+}
+
+/* connect to AXIOM , return 0 if succesful, 1 if not */
+int
+connect_spad(void)
+{
+ if (!MenuServerOpened) {
+ fprintf(stderr, "(HyperDoc) Warning: Not connected to AXIOM Server!\n");
+ LoudBeepAtTheUser();
+ return NotConnected;
+ }
+ if (spad_socket == NULL) {
+ spad_socket = connect_to_local_server(SpadServer, MenuServer, Forever);
+ if (spad_socket == NULL) {
+ fprintf(stderr, "(HyperDoc) Warning: Could not connect to AXIOM Server!\n");
+ LoudBeepAtTheUser();
+ return NotConnected;
+ }
+ }
+ /* if (spad_busy()) return SpadBusy; */
+ return Connected;
+}