From ddb282d424a2f17cbfb1370988fec88a1401d45e Mon Sep 17 00:00:00 2001 From: dos-reis Date: Sun, 9 Oct 2011 01:06:31 +0000 Subject: * interp/lexing.boot: New tokenizer functions. * interp/parsing.lisp: Use them. * interp/metalex.lisp: Likewise. (GET-SPECIAL-TOKEN): Remove. * interp/bootlex.lisp: Likewise. Remove old tokenizers, --- src/ChangeLog | 8 ++++ src/interp/bootlex.lisp | 111 +++--------------------------------------------- src/interp/lexing.boot | 76 ++++++++++++++++++++++++++++++++- src/interp/metalex.lisp | 7 --- src/interp/parsing.lisp | 2 +- 5 files changed, 91 insertions(+), 113 deletions(-) diff --git a/src/ChangeLog b/src/ChangeLog index c1dda3a3..9f89ffb8 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,11 @@ +2011-10-08 Gabriel Dos Reis + + * interp/lexing.boot: New tokenizer functions. + * interp/parsing.lisp: Use them. + * interp/metalex.lisp: Likewise. + (GET-SPECIAL-TOKEN): Remove. + * interp/bootlex.lisp: Likewise. Remove old tokenizers, + 2011-10-07 Gabriel Dos Reis * interp/lexing.boot: Include sys-macros. diff --git a/src/interp/bootlex.lisp b/src/interp/bootlex.lisp index 328aced3..e932ede3 100644 --- a/src/interp/bootlex.lisp +++ b/src/interp/bootlex.lisp @@ -160,44 +160,19 @@ If you have a . followed by an integer, get a floating point number. Otherwise, get a .. identifier." - (if (not (boot-skip-blanks)) + (if (not (|skipBlankChars|)) nil - (let ((token-type (boot-token-lookahead-type (|currentChar|)))) + (let ((token-type (|tokenLookaheadType| (|currentChar|)))) (case token-type (eof (|tokenInstall| nil '*eof token |$nonblank|)) (escape (|advanceChar!|) - (get-boot-identifier-token token t)) + (|getIdentifier| token t)) (argument-designator (get-argument-designator-token token)) - (id (get-boot-identifier-token token)) + (id (|getIdentifier| token nil)) (num (get-spad-integer-token token)) - (string (get-SPADSTRING-token token)) - (special-char (get-special-token token)) - (t (get-gliph-token token token-type)))))) - -(defun boot-skip-blanks () - (setq |$nonblank| t) - (loop (let ((cc (|currentChar|))) - (if (not cc) (return nil)) - (if (eq (boot-token-lookahead-type cc) 'white) - (progn (setq |$nonblank| nil) (if (not (|advanceChar!|)) (return nil))) - (return t))))) - -(defun boot-token-lookahead-type (char) - "Predicts the kind of token to follow, based on the given initial character." - (cond ((not char) 'eof) - ((char= char #\_) 'escape) - ((and (char= char #\#) (digitp (|nextChar|))) 'argument-designator) - ((digitp char) 'num) - ((and (char= char #\$) $boot - (alpha-char-p (|nextChar|))) 'id) - ((or (char= char #\%) (char= char #\?) - (char= char #\!) (alpha-char-p char)) 'id) - ((char= char #\") 'string) - ((member char - '(#\Space #\Tab #\Return) - :test #'char=) 'white) - ((get (intern (string char)) 'Gliph)) - (t 'special-char))) + (string (|getSpadString| token)) + (special-char (|getSpecial| token)) + (t (|getGliph| token token-type)))))) (defun get-argument-designator-token (token) (|advanceChar!|) @@ -206,78 +181,6 @@ Otherwise, get a .. identifier." 'argument-designator token |$nonblank|)) -(defun get-boot-identifier-token (token &optional (escaped? nil)) - "An identifier consists of an escape followed by any character, a %, ?, -or an alphabetic, followed by any number of escaped characters, digits, -or the chracters ?, !, ' or %" - (prog ((buf (make-adjustable-string 0)) - (default-package NIL)) - (suffix (|currentChar|) buf) - (|advanceChar!|) - id (let ((cur-char (|currentChar|))) - (cond ((char= cur-char #\_) - (if (not (|advanceChar!|)) (go bye)) - (suffix (|currentChar|) buf) - (setq escaped? t) - (if (not (|advanceChar!|)) (go bye)) - (go id)) - ((and (null default-package) - (char= cur-char #\')) - (setq default-package buf) - (setq buf (make-adjustable-string 0)) - (if (not (|advanceChar!|)) (go bye)) - (go id)) - ((or (alpha-char-p cur-char) - (digitp cur-char) - (member cur-char '(#\% #\' #\? #\!) :test #'char=)) - (suffix (|currentChar|) buf) - (if (not (|advanceChar!|)) (go bye)) - (go id)))) - bye (if (and (stringp default-package) - (or (not (find-package default-package)) ;; not a package name - (every #'(lambda (x) (eql x #\')) buf))) ;;token ends with '' - (setq buf (concatenate 'string default-package "'" buf) - default-package nil)) - (setq buf (intern buf (or default-package "BOOT"))) - (return (|tokenInstall| - buf - (if (and (not escaped?) - (member buf |Keywords| :test #'eq)) - 'keyword 'identifier) - token - |$nonblank|)))) - -(defun get-gliph-token (token gliph-list) - (prog ((buf (make-adjustable-string 0))) - (suffix (|currentChar|) buf) - (|advanceChar!|) - loop (setq gliph-list (assoc (intern (string (|currentChar|))) gliph-list)) - (if gliph-list - (progn (suffix (|currentChar|) buf) - (pop gliph-list) - (|advanceChar!|) - (go loop)) - (let ((new-token (intern buf))) - (return (|tokenInstall| (or (get new-token 'renametok) new-token) - 'gliph token |$nonblank|)))))) - -(defun get-SPADSTRING-token (token) - "With TOK=\" and ABC\" on IN-STREAM, extracts and stacks string ABC" - (PROG ((BUF (make-adjustable-string 0))) - (if (char/= (|currentChar|) #\") (RETURN NIL) (|advanceChar!|)) - (loop - (if (char= (|currentChar|) #\") (return nil)) - (SUFFIX (if (char= (|currentChar|) #\_) - (|advanceChar!|) - (|currentChar|)) - BUF) - (if (null (|advanceChar!|)) ;;end of line - (PROGN (|sayBrightly| "Close quote inserted") (RETURN nil))) - ) - (|advanceChar!|) - (return (|tokenInstall| (copy-seq buf) ;should make a simple string - 'spadstring token)))) - ;; -*- Parse an integer number -*- ;; The number may be written in plain format, where the radix ;; is implicitly taken to be 10. Or the spelling can explicitly diff --git a/src/interp/lexing.boot b/src/interp/lexing.boot index a9a161c0..992239e3 100644 --- a/src/interp/lexing.boot +++ b/src/interp/lexing.boot @@ -244,7 +244,60 @@ tokenStackClear!() == tokenInstall(nil,nil,$nextToken,nil) tokenInstall(nil,nil,$priorToken,nil) ---% +++ Predicts the kind of token to follow, based on the given initial character +tokenLookaheadType c == + c = nil => 'EOF + c = char "__" => 'ESCAPE + c = char "#" and digit? nextChar() => 'ARGUMENT_-DESIGNATOR + digit? c => 'NUM + c = char "%" or c = char "?" or c = char "?" or alphabetic? c => 'ID + c = char "_"" => 'STRING + c = char " " or c = charByName "Tab" or c = charByName "Return" => 'WHITE + p := property(makeSymbol charString c,'GLIPH) => p + 'SPECIAL_-CHAR + +skipBlankChars() == + $nonblank := true + repeat + c := currentChar() + c = nil => return false + tokenLookaheadType c = 'WHITE => + $nonblank := false + advanceChar!() = nil => return false + return true + +getSpadString tok == + buf := nil + currentChar() ~= char "_"" => nil + advanceChar!() + repeat + currentChar() = char "_"" => leave nil + buf := [(currentChar() = char "__" => advanceChar!(); currentChar()),:buf] + advanceChar!() = nil => + sayBrightly '"close quote inserted" + leave nil + advanceChar!() + tokenInstall(listToString reverse! buf,'SPADSTRING,tok) + +++ Take a special character off the input stream. We let the type name +++ of each special character be the atom whose print name is the +++ character itself +getSpecial tok == + c := currentChar() + advanceChar!() + tokenInstall(c,'SPECIAL_-CHAR,tok) + +getGliph(tok,gliphs) == + buf := [currentChar()] + advanceChar!() + repeat + gliphs := symbolAssoc(makeSymbol charString currentChar(),gliphs) => + buf := [currentChar(),:buf] + gliphs := rest gliphs + advanceChar!() + s := makeSymbol listToString reverse! buf + return tokenInstall(property(s,'RENAMETOK) or s,'GLIPH,tok,$nonblank) + Keywords == [ "or", "and", "isnt", "is", "when", "where", "forall", "exist", "try", "has", "with", "add", "case", "in", "by", "pretend", "mod", "finally", @@ -252,6 +305,27 @@ Keywords == [ "if", "iterate", "break", "from", "exit", "leave", "return", "not", "repeat", "until", "while", "for", "import", "inline" ] +getIdentifier(tok,esc?) == + buf := [currentChar()] + advanceChar!() + repeat + c := currentChar() + c = char "__" => + advanceChar!() = nil => leave nil + buf := [currentChar(),:buf] + esc? := true + advanceChar!() = nil => leave nil + alphabetic? c or digit? c + or scalarMember?(c,[char "%",char "'",char "?",char "!"]) => + buf := [c,:buf] + advanceChar!() = nil => leave nil + leave nil + s := makeSymbol listToString reverse! buf + tt := + not esc? and symbolMember?(s,Keywords) => 'KEYWORD + 'IDENTIFIER + tokenInstall(s,tt,tok,$nonblank) + escapeKeywords(nm,id) == symbolMember?(id,Keywords) => strconc('"__",nm) nm diff --git a/src/interp/metalex.lisp b/src/interp/metalex.lisp index 40dc84c2..ed1b588e 100644 --- a/src/interp/metalex.lisp +++ b/src/interp/metalex.lisp @@ -235,13 +235,6 @@ Symbolics read-line returns embedded newlines in a c-m-Y.") (defun make-adjustable-string (n) (make-array (list n) :element-type 'character :adjustable t)) -(defun get-special-token (token) - "Take a special character off the input stream. We let the type name of each -special character be the atom whose print name is the character itself." - (let ((symbol (|currentChar|))) - (|advanceChar!|) - (|tokenInstall| symbol 'special-char token))) - (defun get-number-token (token) "Take a number off the input stream." (prog ((buf (make-adjustable-string 0))) diff --git a/src/interp/parsing.lisp b/src/interp/parsing.lisp index 28a02239..42d5efbc 100644 --- a/src/interp/parsing.lisp +++ b/src/interp/parsing.lisp @@ -179,7 +179,7 @@ the stack, then stack a NIL. Return the value of prod." (defun Match-String (x) "Returns length of X if X matches initial segment of inputstream." (|ungetTokens|) ; So we don't get out of synch with token stream - (boot-skip-blanks) + (|skipBlankChars|) (if (and (not (|linePastEnd?| |$spadLine|)) (|currentChar|) ) (initial-substring-p x (subseq (|lineBuffer| |$spadLine|) (|lineCurrentIndex| |$spadLine|))))) -- cgit v1.2.3