aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/ChangeLog8
-rw-r--r--src/interp/bootlex.lisp111
-rw-r--r--src/interp/lexing.boot76
-rw-r--r--src/interp/metalex.lisp7
-rw-r--r--src/interp/parsing.lisp2
5 files changed, 91 insertions, 113 deletions
diff --git a/src/ChangeLog b/src/ChangeLog
index c1dda3a3..9f89ffb8 100644
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,3 +1,11 @@
+2011-10-08 Gabriel Dos Reis <gdr@cs.tamu.edu>
+
+ * interp/lexing.boot: New tokenizer functions.
+ * interp/parsing.lisp: Use them.
+ * interp/metalex.lisp: Likewise.
+ (GET-SPECIAL-TOKEN): Remove.
+ * interp/bootlex.lisp: Likewise. Remove old tokenizers,
+
2011-10-07 Gabriel Dos Reis <gdr@cs.tamu.edu>
* interp/lexing.boot: Include sys-macros.
diff --git a/src/interp/bootlex.lisp b/src/interp/bootlex.lisp
index 328aced3..e932ede3 100644
--- a/src/interp/bootlex.lisp
+++ b/src/interp/bootlex.lisp
@@ -160,44 +160,19 @@
If you have a . followed by an integer, get a floating point number.
Otherwise, get a .. identifier."
- (if (not (boot-skip-blanks))
+ (if (not (|skipBlankChars|))
nil
- (let ((token-type (boot-token-lookahead-type (|currentChar|))))
+ (let ((token-type (|tokenLookaheadType| (|currentChar|))))
(case token-type
(eof (|tokenInstall| nil '*eof token |$nonblank|))
(escape (|advanceChar!|)
- (get-boot-identifier-token token t))
+ (|getIdentifier| token t))
(argument-designator (get-argument-designator-token token))
- (id (get-boot-identifier-token token))
+ (id (|getIdentifier| token nil))
(num (get-spad-integer-token token))
- (string (get-SPADSTRING-token token))
- (special-char (get-special-token token))
- (t (get-gliph-token token token-type))))))
-
-(defun boot-skip-blanks ()
- (setq |$nonblank| t)
- (loop (let ((cc (|currentChar|)))
- (if (not cc) (return nil))
- (if (eq (boot-token-lookahead-type cc) 'white)
- (progn (setq |$nonblank| nil) (if (not (|advanceChar!|)) (return nil)))
- (return t)))))
-
-(defun boot-token-lookahead-type (char)
- "Predicts the kind of token to follow, based on the given initial character."
- (cond ((not char) 'eof)
- ((char= char #\_) 'escape)
- ((and (char= char #\#) (digitp (|nextChar|))) 'argument-designator)
- ((digitp char) 'num)
- ((and (char= char #\$) $boot
- (alpha-char-p (|nextChar|))) 'id)
- ((or (char= char #\%) (char= char #\?)
- (char= char #\!) (alpha-char-p char)) 'id)
- ((char= char #\") 'string)
- ((member char
- '(#\Space #\Tab #\Return)
- :test #'char=) 'white)
- ((get (intern (string char)) 'Gliph))
- (t 'special-char)))
+ (string (|getSpadString| token))
+ (special-char (|getSpecial| token))
+ (t (|getGliph| token token-type))))))
(defun get-argument-designator-token (token)
(|advanceChar!|)
@@ -206,78 +181,6 @@ Otherwise, get a .. identifier."
'argument-designator token |$nonblank|))
-(defun get-boot-identifier-token (token &optional (escaped? nil))
- "An identifier consists of an escape followed by any character, a %, ?,
-or an alphabetic, followed by any number of escaped characters, digits,
-or the chracters ?, !, ' or %"
- (prog ((buf (make-adjustable-string 0))
- (default-package NIL))
- (suffix (|currentChar|) buf)
- (|advanceChar!|)
- id (let ((cur-char (|currentChar|)))
- (cond ((char= cur-char #\_)
- (if (not (|advanceChar!|)) (go bye))
- (suffix (|currentChar|) buf)
- (setq escaped? t)
- (if (not (|advanceChar!|)) (go bye))
- (go id))
- ((and (null default-package)
- (char= cur-char #\'))
- (setq default-package buf)
- (setq buf (make-adjustable-string 0))
- (if (not (|advanceChar!|)) (go bye))
- (go id))
- ((or (alpha-char-p cur-char)
- (digitp cur-char)
- (member cur-char '(#\% #\' #\? #\!) :test #'char=))
- (suffix (|currentChar|) buf)
- (if (not (|advanceChar!|)) (go bye))
- (go id))))
- bye (if (and (stringp default-package)
- (or (not (find-package default-package)) ;; not a package name
- (every #'(lambda (x) (eql x #\')) buf))) ;;token ends with ''
- (setq buf (concatenate 'string default-package "'" buf)
- default-package nil))
- (setq buf (intern buf (or default-package "BOOT")))
- (return (|tokenInstall|
- buf
- (if (and (not escaped?)
- (member buf |Keywords| :test #'eq))
- 'keyword 'identifier)
- token
- |$nonblank|))))
-
-(defun get-gliph-token (token gliph-list)
- (prog ((buf (make-adjustable-string 0)))
- (suffix (|currentChar|) buf)
- (|advanceChar!|)
- loop (setq gliph-list (assoc (intern (string (|currentChar|))) gliph-list))
- (if gliph-list
- (progn (suffix (|currentChar|) buf)
- (pop gliph-list)
- (|advanceChar!|)
- (go loop))
- (let ((new-token (intern buf)))
- (return (|tokenInstall| (or (get new-token 'renametok) new-token)
- 'gliph token |$nonblank|))))))
-
-(defun get-SPADSTRING-token (token)
- "With TOK=\" and ABC\" on IN-STREAM, extracts and stacks string ABC"
- (PROG ((BUF (make-adjustable-string 0)))
- (if (char/= (|currentChar|) #\") (RETURN NIL) (|advanceChar!|))
- (loop
- (if (char= (|currentChar|) #\") (return nil))
- (SUFFIX (if (char= (|currentChar|) #\_)
- (|advanceChar!|)
- (|currentChar|))
- BUF)
- (if (null (|advanceChar!|)) ;;end of line
- (PROGN (|sayBrightly| "Close quote inserted") (RETURN nil)))
- )
- (|advanceChar!|)
- (return (|tokenInstall| (copy-seq buf) ;should make a simple string
- 'spadstring token))))
-
;; -*- Parse an integer number -*-
;; The number may be written in plain format, where the radix
;; is implicitly taken to be 10. Or the spelling can explicitly
diff --git a/src/interp/lexing.boot b/src/interp/lexing.boot
index a9a161c0..992239e3 100644
--- a/src/interp/lexing.boot
+++ b/src/interp/lexing.boot
@@ -244,7 +244,60 @@ tokenStackClear!() ==
tokenInstall(nil,nil,$nextToken,nil)
tokenInstall(nil,nil,$priorToken,nil)
---%
+++ Predicts the kind of token to follow, based on the given initial character
+tokenLookaheadType c ==
+ c = nil => 'EOF
+ c = char "__" => 'ESCAPE
+ c = char "#" and digit? nextChar() => 'ARGUMENT_-DESIGNATOR
+ digit? c => 'NUM
+ c = char "%" or c = char "?" or c = char "?" or alphabetic? c => 'ID
+ c = char "_"" => 'STRING
+ c = char " " or c = charByName "Tab" or c = charByName "Return" => 'WHITE
+ p := property(makeSymbol charString c,'GLIPH) => p
+ 'SPECIAL_-CHAR
+
+skipBlankChars() ==
+ $nonblank := true
+ repeat
+ c := currentChar()
+ c = nil => return false
+ tokenLookaheadType c = 'WHITE =>
+ $nonblank := false
+ advanceChar!() = nil => return false
+ return true
+
+getSpadString tok ==
+ buf := nil
+ currentChar() ~= char "_"" => nil
+ advanceChar!()
+ repeat
+ currentChar() = char "_"" => leave nil
+ buf := [(currentChar() = char "__" => advanceChar!(); currentChar()),:buf]
+ advanceChar!() = nil =>
+ sayBrightly '"close quote inserted"
+ leave nil
+ advanceChar!()
+ tokenInstall(listToString reverse! buf,'SPADSTRING,tok)
+
+++ Take a special character off the input stream. We let the type name
+++ of each special character be the atom whose print name is the
+++ character itself
+getSpecial tok ==
+ c := currentChar()
+ advanceChar!()
+ tokenInstall(c,'SPECIAL_-CHAR,tok)
+
+getGliph(tok,gliphs) ==
+ buf := [currentChar()]
+ advanceChar!()
+ repeat
+ gliphs := symbolAssoc(makeSymbol charString currentChar(),gliphs) =>
+ buf := [currentChar(),:buf]
+ gliphs := rest gliphs
+ advanceChar!()
+ s := makeSymbol listToString reverse! buf
+ return tokenInstall(property(s,'RENAMETOK) or s,'GLIPH,tok,$nonblank)
+
Keywords == [
"or", "and", "isnt", "is", "when", "where", "forall", "exist", "try",
"has", "with", "add", "case", "in", "by", "pretend", "mod", "finally",
@@ -252,6 +305,27 @@ Keywords == [
"if", "iterate", "break", "from", "exit", "leave", "return",
"not", "repeat", "until", "while", "for", "import", "inline" ]
+getIdentifier(tok,esc?) ==
+ buf := [currentChar()]
+ advanceChar!()
+ repeat
+ c := currentChar()
+ c = char "__" =>
+ advanceChar!() = nil => leave nil
+ buf := [currentChar(),:buf]
+ esc? := true
+ advanceChar!() = nil => leave nil
+ alphabetic? c or digit? c
+ or scalarMember?(c,[char "%",char "'",char "?",char "!"]) =>
+ buf := [c,:buf]
+ advanceChar!() = nil => leave nil
+ leave nil
+ s := makeSymbol listToString reverse! buf
+ tt :=
+ not esc? and symbolMember?(s,Keywords) => 'KEYWORD
+ 'IDENTIFIER
+ tokenInstall(s,tt,tok,$nonblank)
+
escapeKeywords(nm,id) ==
symbolMember?(id,Keywords) => strconc('"__",nm)
nm
diff --git a/src/interp/metalex.lisp b/src/interp/metalex.lisp
index 40dc84c2..ed1b588e 100644
--- a/src/interp/metalex.lisp
+++ b/src/interp/metalex.lisp
@@ -235,13 +235,6 @@ Symbolics read-line returns embedded newlines in a c-m-Y.")
(defun make-adjustable-string (n)
(make-array (list n) :element-type 'character :adjustable t))
-(defun get-special-token (token)
- "Take a special character off the input stream. We let the type name of each
-special character be the atom whose print name is the character itself."
- (let ((symbol (|currentChar|)))
- (|advanceChar!|)
- (|tokenInstall| symbol 'special-char token)))
-
(defun get-number-token (token)
"Take a number off the input stream."
(prog ((buf (make-adjustable-string 0)))
diff --git a/src/interp/parsing.lisp b/src/interp/parsing.lisp
index 28a02239..42d5efbc 100644
--- a/src/interp/parsing.lisp
+++ b/src/interp/parsing.lisp
@@ -179,7 +179,7 @@ the stack, then stack a NIL. Return the value of prod."
(defun Match-String (x)
"Returns length of X if X matches initial segment of inputstream."
(|ungetTokens|) ; So we don't get out of synch with token stream
- (boot-skip-blanks)
+ (|skipBlankChars|)
(if (and (not (|linePastEnd?| |$spadLine|)) (|currentChar|) )
(initial-substring-p x
(subseq (|lineBuffer| |$spadLine|) (|lineCurrentIndex| |$spadLine|)))))