diff options
Diffstat (limited to 'src/interp/scan.boot')
-rw-r--r-- | src/interp/scan.boot | 714 |
1 files changed, 714 insertions, 0 deletions
diff --git a/src/interp/scan.boot b/src/interp/scan.boot new file mode 100644 index 00000000..5fd0caa6 --- /dev/null +++ b/src/interp/scan.boot @@ -0,0 +1,714 @@ +-- Copyright (c) 1991-2002, The Numerical ALgorithms Group Ltd. +-- All rights reserved. +-- Copyright (C) 2007, Gabriel Dos Reis. +-- All rights reserved. +-- +-- Redistribution and use in source and binary forms, with or without +-- modification, are permitted provided that the following conditions are +-- met: +-- +-- - Redistributions of source code must retain the above copyright +-- notice, this list of conditions and the following disclaimer. +-- +-- - Redistributions in binary form must reproduce the above copyright +-- notice, this list of conditions and the following disclaimer in +-- the documentation and/or other materials provided with the +-- distribution. +-- +-- - Neither the name of The Numerical ALgorithms Group Ltd. nor the +-- names of its contributors may be used to endorse or promote products +-- derived from this software without specific prior written permission. +-- +-- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +-- IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +-- TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +-- PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER +-- OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +-- EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +-- PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +-- PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +-- LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +-- NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +-- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +-- This is a horrible hack to work around a horrible bug in GCL +-- as reported here: +-- http://lists.gnu.org/archive/html/gcl-devel/2007-08/msg00004.html +-- +)if %hasFeature KEYWORD::GCL +)package "VMLISP" +)package "AxiomCore" +)endif + +import '"bits" +import '"dq" +import '"incl" + +)package "BOOT" + +--% Separators + +$SPACE := QENUM('" ", 0) +ESCAPE := QENUM('"__ ", 0) +STRING_CHAR := QENUM('"_" ", 0) +PLUSCOMMENT := QENUM('"+ ", 0) +MINUSCOMMENT:= QENUM('"- ", 0) +RADIX_CHAR := QENUM('"r ", 0) +DOT := QENUM('". ", 0) +EXPONENT1 := QENUM('"E ", 0) +EXPONENT2 := QENUM('"e ", 0) +CLOSEPAREN := QENUM('") ", 0) +CLOSEANGLE := QENUM('"> ", 0) +QUESTION := QENUM('"? ",0) + + +--% Keywords + +scanKeyWords := [ _ + ['"add", "ADD" ],_ + ['"and", "AND" ],_ + ['"break", "BREAK" ],_ + ['"by", "BY" ],_ + ['"case", "CASE" ],_ + ['"default", "DEFAULT" ],_ + ['"define", "DEFN" ],_ + ['"do", "DO"],_ + ['"else", "ELSE" ],_ + ['"exit", "EXIT" ],_ + ['"export","EXPORT" ],_ + ['"for", "FOR" ],_ + ['"free", "FREE" ],_ + ['"from", "FROM" ],_ + ['"has", "HAS" ],_ + ['"if", "IF" ],_ + ['"import", "IMPORT" ],_ + ['"in", "IN" ],_ + ['"inline", "INLINE" ],_ + ['"is", "IS" ],_ + ['"isnt", "ISNT" ],_ + ['"iterate", "ITERATE"],_ + ['"local", "local" ],_ + ['"macro", "MACRO" ],_ + ['"mod", "MOD" ],_ + ['"or", "OR" ],_ + ['"pretend","PRETEND" ],_ + ['"quo","QUO" ],_ + ['"rem","REM" ],_ + ['"repeat","REPEAT" ],_ + ['"return","RETURN" ],_ + ['"rule","RULE" ],_ + ['"then","THEN" ],_ + ['"where","WHERE" ],_ + ['"while","WHILE" ],_ + ['"with","WITH" ],_ + ['"|","BAR"],_ + ['".","DOT" ],_ + ['"::","COERCE" ],_ + ['":","COLON" ],_ + ['":-","COLONDASH" ],_ + ['"@","AT" ],_ + ['"@@","ATAT" ],_ + ['",","COMMA" ],_ + ['";","SEMICOLON" ],_ + ['"**","POWER" ],_ + ['"*","TIMES" ],_ + ['"+","PLUS" ],_ + ['"-","MINUS" ],_ + ['"<","LT" ],_ + ['">","GT" ],_ + ['"<=","LE" ],_ + ['">=","GE" ],_ + ['"=", "EQUAL"],_ + ['"~=","NOTEQUAL" ],_ + ['"~","~" ],_ + ['"^","CARAT" ],_ + ['"..","SEG" ],_ + ['"#","#" ],_ + ['"&","AMPERSAND" ],_ + ['"$","$" ],_ + ['"/","SLASH" ],_ + ['"\","BACKSLASH" ],_ + ['"//","SLASHSLASH" ],_ + ['"\\","BACKSLASHBACKSLASH" ],_ + ['"/\","SLASHBACKSLASH" ],_ + ['"\/","BACKSLASHSLASH" ],_ + ['"=>","EXIT" ],_ + ['":=","BECOMES" ],_ + ['"==","DEF" ],_ + ['"==>","MDEF" ],_ + ['"->","ARROW" ],_ + ['"<-","LARROW" ],_ + ['"+->","GIVES" ],_ + ['"(","(" ],_ + ['")",")" ],_ + ['"(|","(|" ],_ + ['"|)","|)" ],_ + ['"[","[" ],_ + ['"]","]" ],_ + ['"[__]","[]" ],_ + ['"{","{" ],_ + ['"}","}" ],_ + ['"{__}","{}" ],_ + ['"[|","[|" ],_ + ['"|]","|]" ],_ + ['"[|__|]","[||]" ],_ + ['"{|","{|" ],_ + ['"|}","|}" ],_ + ['"{|__|}","{||}" ],_ + ['"<<","OANGLE" ],_ + ['">>","CANGLE" ],_ + ['"'", "'" ],_ + ['"`", "BACKQUOTE" ]_ + ] + + +scanKeyTableCons()== + KeyTable:=MAKE_-HASHTABLE("CVEC",true) + for st in scanKeyWords repeat + HPUT(KeyTable,CAR st,CADR st) + KeyTable + +scanKeyTable:=scanKeyTableCons() + + +scanInsert(s,d) == + l := #s + h := QENUM(s,0) + u := ELT(d,h) + n := #u + k:=0 + while l <= #(ELT(u,k)) repeat + k:=k+1 + v := MAKE_-VEC(n+1) + for i in 0..k-1 repeat VEC_-SETELT(v,i,ELT(u,i)) + VEC_-SETELT(v,k,s) + for i in k..n-1 repeat VEC_-SETELT(v,i+1,ELT(u,i)) + VEC_-SETELT(d,h,v) + s + +scanDictCons()== + l:= HKEYS scanKeyTable + d := + a:=MAKE_-VEC(256) + b:=MAKE_-VEC(1) + VEC_-SETELT(b,0,MAKE_-CVEC 0) + for i in 0..255 repeat VEC_-SETELT(a,i,b) + a + for s in l repeat scanInsert(s,d) + d + +scanDict:=scanDictCons() + + +scanPunCons()== + listing := HKEYS scanKeyTable + a:=MAKE_-BVEC 256 +-- SETSIZE(a,256) + for i in 0..255 repeat BVEC_-SETELT(a,i,0) + for k in listing repeat + if not startsId? k.0 + then BVEC_-SETELT(a,QENUM(k,0),1) + a + +scanPun:=scanPunCons() + +--for i in ["COLON","MINUS"] repeat +-- MAKEPROP(i,'PREGENERIC,'TRUE) + +for i in [ _ + ["EQUAL" ,"="], _ + ["TIMES" ,"*"], _ + ["HAS" ,"has"], _ + ["CASE" ,"case"], _ + ["REM" ,"rem"], _ + ["MOD" ,"mod"], _ + ["QUO" ,"quo"], _ + ["SLASH" ,"/"], _ + ["BACKSLASH","\"], _ + ["SLASHSLASH" ,"//"], _ + ["BACKSLASHBACKSLASH","\\"], _ + ["SLASHBACKSLASH" ,"/\"], _ + ["BACKSLASHSLASH","\/"], _ + ["POWER" ,"**"], _ + ["CARAT" ,"^"], _ + ["PLUS" ,"+"], _ + ["MINUS" ,"-"], _ + ["LT" ,"<"], _ + ["GT" ,">"], _ + ["OANGLE" ,"<<"], _ + ["CANGLE" ,">>"], _ + ["LE" ,"<="], _ + ["GE" ,">="], _ + ["NOTEQUAL" ,"~="], _ + ["BY" ,"by"], _ + ["ARROW" ,"->"], _ + ["LARROW" ,"<-"], _ + ["BAR" ,"|"], _ + ["SEG" ,".."] _ + ] repeat MAKEPROP(CAR i,'INFGENERIC,CADR i) + +-- Scanner + +-- lineoftoks bites off a token-dq from a line-stream +-- returning the token-dq and the rest of the line-stream + +scanIgnoreLine(ln,n)== + if null n + then n + else + fst:=QENUM(ln,0) + if EQ(fst,CLOSEPAREN) + then if incPrefix?('"command",1,ln) + then true + else nil + else n + +nextline(s)== + if npNull s + then false + else + $f:= CAR s + $r:= CDR s + $ln := CDR $f + $linepos:=CAAR $f + $n:=STRPOSL('" ",$ln,0,true)-- spaces at beginning + $sz :=# $ln + true + + +lineoftoks(s)== + $f: local:=nil + $r:local :=nil + $ln:local :=nil + $linepos:local:=nil + $n:local:=nil + $sz:local := nil + $floatok:local:=true + if not nextline s + then CONS(nil,nil) + else + if null scanIgnoreLine($ln,$n) -- line of spaces or starts ) or > + then cons(nil,$r) + else + toks:=[] + a:= incPrefix?('"command",1,$ln) + a => + $ln:=SUBSTRING($ln,8,nil) + b:= dqUnit constoken($ln,$linepos,["command",$ln],0) + cons([[b,s]],$r) + + while $n<$sz repeat toks:=dqAppend(toks,scanToken()) + if null toks + then cons([],$r) + else cons([[toks,s]],$r) + + +scanToken () == + ln:=$ln + c:=QENUM($ln,$n) + linepos:=$linepos + n:=$n + ch:=$ln.$n + b:= + startsComment?() => + scanComment() + [] + startsNegComment?() => + scanNegComment() + [] + c= QUESTION => + $n:=$n+1 + lfid '"?" + punctuation? c => scanPunct () + startsId? ch => scanWord (false) + c=$SPACE => + scanSpace () + [] + c = STRING_CHAR => scanString () + digit? ch => scanNumber () + c=ESCAPE => scanEscape() + scanError () + null b => nil + dqUnit constoken(ln,linepos,b,n+lnExtraBlanks linepos) + +-- to pair badge and badgee + +-- lfid x== ["id",INTERN x] +lfid x== ["id",INTERN(x, '"BOOT")] + +lfkey x==["key",keyword x] + +lfinteger x== + ["integer",x] +-- if EQUAL(x,'"0") +-- then ["id",INTERN x] +-- else if EQUAL(x,'"1") +-- then ["id",INTERN x] +-- else ["integer",x] + +lfrinteger (r,x)==["integer",CONCAT (r,CONCAT('"r",x))] +--lfrfloat(a,w,v)==["rfloat",CONCAT(a,'"r.",v)] +lffloat(a,w,e)==["float",CONCAT(a,'".",w,'"e",e)] +lfstring x==if #x=1 then ["char",x] else ["string",x] +lfcomment x== ["comment", x] +lfnegcomment x== ["negcomment", x] +lferror x==["error",x] +lfspaces x==["spaces",x] + +constoken(ln,lp,b,n)== +-- [b.0,b.1,cons(lp,n)] + a:=cons(b.0,b.1) + ncPutQ(a,"posn",cons(lp,n)) + a + +scanEscape()== + $n:=$n+1 + a:=scanEsc() + if a then scanWord true else nil + +scanEsc()== + if $n>=$sz + then if nextline($r) + then + while null $n repeat nextline($r) + scanEsc() + false + else false + else + n1:=STRPOSL('" ",$ln,$n,true) + if null n1 + then if nextline($r) + then + while null $n repeat nextline($r) + scanEsc() + false + else false + else + if $n=n1 + then true + else if QENUM($ln,n1)=ESCAPE + then + $n:=n1+1 + scanEsc() + false + else + $n:=n1 + startsNegComment?() or startsComment?() => + nextline($r) + scanEsc() + false + false + +startsComment?()== + if $n<$sz + then + if QENUM($ln,$n)=PLUSCOMMENT + then + www:=$n+1 + if www>=$sz + then false + else QENUM($ln,www) = PLUSCOMMENT + else false + else false + +startsNegComment?()== + if $n< $sz + then + if QENUM($ln,$n)=MINUSCOMMENT + then + www:=$n+1 + if www>=$sz + then false + else QENUM($ln,www) = MINUSCOMMENT + else false + else false + +scanNegComment()== + n:=$n + $n:=$sz + lfnegcomment SUBSTRING($ln,n,nil) + +scanComment()== + n:=$n + $n:=$sz + lfcomment SUBSTRING($ln,n,nil) + + +scanPunct()== + sss:=subMatch($ln,$n) + a:= # sss + if a=0 + then + scanError() + else + $n:=$n+a + scanKeyTr sss + +scanKeyTr w== + if EQ(keyword w,"DOT") + then if $floatok + then scanPossFloat(w) + else lfkey w + else + $floatok:=not scanCloser? w + lfkey w + +scanPossFloat (w)== + if $n>=$sz or not digit? $ln.$n + then lfkey w + else + w:=spleI(function digit?) + scanExponent('"0",w) + +scanCloser:=[")","}","]","|)","|}","|]"] + +scanCloser? w== MEMQ(keyword w,scanCloser) + +scanSpace()== + n:=$n + $n:=STRPOSL('" ",$ln,$n,true) + if null $n then $n:=# $ln + $floatok:=true + lfspaces ($n-n) + +scanString()== + $n:=$n+1 + $floatok:=false + lfstring scanS () + +scanS()== + if $n>=$sz + then + ncSoftError(cons($linepos,lnExtraBlanks $linepos+$n),"S2CN0001",[]) + '"" + else + n:=$n + strsym :=STRPOS ('"_"",$ln,$n,nil) or $sz + escsym:=STRPOS ('"__" + ,$ln,$n,nil) or $sz + mn:=MIN(strsym,escsym) + if mn=$sz + then + $n:=$sz + ncSoftError(cons($linepos,lnExtraBlanks $linepos+$n), + "S2CN0001",[]) + SUBSTRING($ln,n,nil) + else if mn=strsym + then + $n:=mn+1 + SUBSTRING($ln,n,mn-n) + else --escape is found first + str:=SUBSTRING($ln,n,mn-n)-- before escape + $n:=mn+1 + a:=scanEsc() -- case of end of line when false + b:=if a + then + str:=CONCAT(str,scanTransform($ln.$n)) + $n:=$n+1 + scanS() + else scanS() + CONCAT(str,b) +scanTransform x==x + +--idChar? x== scanLetter x or DIGITP x or MEMQ(x,'(_? _%)) + +--scanLetter x== +-- if not CHARP x +-- then false +-- else STRPOSL(scanTrTable,x,0,NIL) + +posend(line,n)== + while n<#line and idChar? line.n repeat n:=n+1 + n + +--numend(line,n)== +-- while n<#line and digit? line.n repeat n:=n+1 +-- n + +--startsId? x== scanLetter x or MEMQ(x,'(_? _%)) +digit? x== DIGITP x + +scanW(b)== -- starts pointing to first char + n1:=$n -- store starting character position + $n:=$n+1 -- the first character is not tested + l:=$sz + endid:=posend($ln,$n) + if endid=l or QENUM($ln,endid)^=ESCAPE + then -- not escaped + $n:=endid + [b,SUBSTRING($ln,n1,endid-n1)] -- l overflows + else -- escape and endid^=l + str:=SUBSTRING($ln,n1,endid-n1) + $n:=endid+1 + a:=scanEsc() + bb:=if a -- escape nonspace + then scanW(true) + else + if $n>=$sz + then [b,'""] + else + if idChar?($ln.$n) + then scanW(b) + else [b,'""] + [bb.0 or b,CONCAT(str,bb.1)] + +scanWord(esp) == + aaa:=scanW(false) + w:=aaa.1 + $floatok:=false + if esp or aaa.0 + then lfid w + else if keyword? w + then + $floatok:=true + lfkey w + else lfid w + + + +spleI(dig)==spleI1(dig,false) +spleI1(dig,zro) == + n:=$n + l:= $sz + while $n<l and FUNCALL(dig,($ln.$n)) repeat $n:=$n+1 + if $n=l or QENUM($ln,$n)^=ESCAPE + then if n=$n and zro + then '"0" + else SUBSTRING($ln,n,$n-n) + else -- escaped + str:=SUBSTRING($ln,n,$n-n) + $n:=$n+1 + a:=scanEsc() + bb:=spleI1(dig,zro)-- escape, anyno spaces are ignored + CONCAT(str,bb) + +scanCheckRadix(r,w)== + ns:=#w + done:=false + for i in 0..ns-1 repeat + a:=rdigit? w.i + if null a or a>=r + then ncSoftError(cons($linepos,lnExtraBlanks $linepos+$n-ns+i), + "S2CN0002", [w.i]) + +scanNumber() == + a := spleI(function digit?) + if $n>=$sz + then lfinteger a + else + if QENUM($ln,$n)^=RADIX_CHAR + then + if $floatok and QENUM($ln,$n)=DOT + then + n:=$n + $n:=$n+1 + if $n<$sz and QENUM($ln,$n)=DOT + then + $n:=n + lfinteger a + else + w:=spleI1(function digit?,true) + scanExponent(a,w) + else lfinteger a + else + $n:=$n+1 + w:=spleI1(function rdigit?,true) + scanCheckRadix(PARSE_-INTEGER a,w) + if $n>=$sz + then + lfrinteger(a,w) + else if QENUM($ln,$n)=DOT + then + n:=$n + $n:=$n+1 + if $n<$sz and QENUM($ln,$n)=DOT + then + $n:=n + lfrinteger(a,w) + else + --$n:=$n+1 + v:=spleI1(function rdigit?,true) + scanCheckRadix(PARSE_-INTEGER a,v) + scanExponent(CONCAT(a,'"r",w),v) + else lfrinteger(a,w) + +scanExponent(a,w)== + if $n>=$sz + then lffloat(a,w,'"0") + else + n:=$n + c:=QENUM($ln,$n) + if c=EXPONENT1 or c=EXPONENT2 + then + $n:=$n+1 + if $n>=$sz + then + $n:=n + lffloat(a,w,'"0") + else if digit?($ln.$n) + then + e:=spleI(function digit?) + lffloat(a,w,e) + else + c1:=QENUM($ln,$n) + if c1=PLUSCOMMENT or c1=MINUSCOMMENT + then + $n:=$n+1 + if $n>=$sz + then + $n:=n + lffloat(a,w,'"0") + else + if digit?($ln.$n) + then + e:=spleI(function digit?) + lffloat(a,w, + (if c1=MINUSCOMMENT then CONCAT('"-",e)else e)) + else + $n:=n + lffloat(a,w,'"0") + else lffloat(a,w,'"0") + +rdigit? x== + STRPOS(x,'"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ",0,nil) + +scanError()== + n:=$n + $n:=$n+1 + ncSoftError(cons($linepos,lnExtraBlanks $linepos+$n), + "S2CN0003",[$ln.n]) + lferror ($ln.n) + + +keyword st == HGET(scanKeyTable,st) + +keyword? st == not null HGET(scanKeyTable,st) + +subMatch(l,i)==substringMatch(l,scanDict,i) + +substringMatch (l,d,i)== + h:= QENUM(l, i) + u:=ELT(d,h) + ll:=SIZE l + done:=false + s1:='"" + for j in 0.. SIZE u - 1 while not done repeat + s:=ELT(u,j) + ls:=SIZE s + done:=if ls+i > ll + then false + else + eql:= true + for k in 1..ls-1 while eql repeat + eql:= EQL(QENUM(s,k),QENUM(l,k+i)) + if eql + then + s1:=s + true + else false + s1 + + + +punctuation? c== scanPun.c=1 + |