-- Copyright (c) 1991-2002, The Numerical ALgorithms Group Ltd. -- All rights reserved. -- Copyright (C) 2007-2010, Gabriel Dos Reis. -- All rights reserved. -- -- Redistribution and use in source and binary forms, with or without -- modification, are permitted provided that the following conditions are -- met: -- -- - Redistributions of source code must retain the above copyright -- notice, this list of conditions and the following disclaimer. -- -- - Redistributions in binary form must reproduce the above copyright -- notice, this list of conditions and the following disclaimer in -- the documentation and/or other materials provided with the -- distribution. -- -- - Neither the name of The Numerical ALgorithms Group Ltd. nor the -- names of its contributors may be used to endorse or promote products -- derived from this software without specific prior written permission. -- -- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS -- IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -- TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -- PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER -- OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -- EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -- PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -- PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -- LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -- NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import bits import dq import incl import sys_-utility namespace BOOT module scan --% $RDigits == '"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" $smallLetters == '"abcdefghijklmnopqrstuvwxyz" --% Separators $SPACE == QENUM('" ", 0) ESCAPE == QENUM('"__ ", 0) STRING_CHAR == QENUM('"_" ", 0) PLUSCOMMENT == QENUM('"+ ", 0) MINUSCOMMENT == QENUM('"- ", 0) RADIX_CHAR == QENUM('"r ", 0) DOT == QENUM('". ", 0) EXPONENT1 == QENUM('"E ", 0) EXPONENT2 == QENUM('"e ", 0) CLOSEPAREN == QENUM('") ", 0) CLOSEANGLE == QENUM('"> ", 0) QUESTION == QENUM('"? ",0) --% Keywords scanKeyWords == [ _ ['"add", "ADD" ],_ ['"and", "AND" ],_ ['"assume","ASSUME" ],_ ['"break", "BREAK" ],_ ['"by", "BY" ],_ ['"case", "CASE" ],_ ['"default", "DEFAULT" ],_ ['"define", "DEFN" ],_ ['"do", "DO"],_ ['"else", "ELSE" ],_ ['"exist", "EXIST"],_ ['"exit", "EXIT" ],_ ['"export","EXPORT" ],_ ['"forall", "FORALL"],_ ['"for", "FOR" ],_ ['"free", "FREE" ],_ ['"from", "FROM" ],_ ['"has", "HAS" ],_ ['"if", "IF" ],_ ['"import", "IMPORT" ],_ ['"in", "IN" ],_ ['"inline", "INLINE" ],_ ['"is", "IS" ],_ ['"isnt", "ISNT" ],_ ['"iterate", "ITERATE"],_ ['"local", "local" ],_ ['"macro", "MACRO" ],_ ['"mod", "MOD" ],_ ['"or", "OR" ],_ ['"pretend","PRETEND" ],_ ['"quo","QUO" ],_ ['"rem","REM" ],_ ['"repeat","REPEAT" ],_ ['"return","RETURN" ],_ ['"rule","RULE" ],_ ['"then","THEN" ],_ ['"where","WHERE" ],_ ['"while","WHILE" ],_ ['"with","WITH" ],_ ['"|","BAR"],_ ['".","DOT" ],_ ['"::","COERCE" ],_ ['":","COLON" ],_ ['":-","COLONDASH" ],_ ['"@","AT" ],_ ['",","COMMA" ],_ ['";","SEMICOLON" ],_ ['"**","POWER" ],_ ['"*","TIMES" ],_ ['"+","PLUS" ],_ ['"-","MINUS" ],_ ['"<","LT" ],_ ['">","GT" ],_ ['"<=","LE" ],_ ['">=","GE" ],_ ['"=", "EQUAL"],_ ['"~=","NOTEQUAL" ],_ ['"~","~" ],_ ['"^","CARAT" ],_ ['"..","SEG" ],_ ['"#","#" ],_ ['"&","AMPERSAND" ],_ ['"$","$" ],_ ['"/","SLASH" ],_ ['"\","BACKSLASH" ],_ ['"//","SLASHSLASH" ],_ ['"\\","BACKSLASHBACKSLASH" ],_ ['"/\","SLASHBACKSLASH" ],_ ['"\/","BACKSLASHSLASH" ],_ ['"=>","EXIT" ],_ ['":=","BECOMES" ],_ ['"==","DEF" ],_ ['"==>","MDEF" ],_ ['"->","ARROW" ],_ ['"<-","LARROW" ],_ ['"+->","GIVES" ],_ ['"(","(" ],_ ['")",")" ],_ ['"(|","(|" ],_ ['"|)","|)" ],_ ['"[","[" ],_ ['"]","]" ],_ ['"[__]","[]" ],_ ['"{","{" ],_ ['"}","}" ],_ ['"{__}","{}" ],_ ['"[|","[|" ],_ ['"|]","|]" ],_ ['"[|__|]","[||]" ],_ ['"{|","{|" ],_ ['"|}","|}" ],_ ['"{|__|}","{||}" ],_ ['"<<","OANGLE" ],_ ['">>","CANGLE" ],_ ['"'", "'" ],_ ['"`", "BACKQUOTE" ]_ ] scanKeyTableCons()== KeyTable := hashTable 'EQUAL for st in scanKeyWords repeat HPUT(KeyTable,first st,second st) KeyTable scanKeyTable:=scanKeyTableCons() scanInsert(s,d) == l := #s h := QENUM(s,0) u := d.h n := #u k:=0 while l <= #(u.k) repeat k := k+1 v := MAKE_-VEC(n+1) for i in 0..k-1 repeat VEC_-SETELT(v,i,u.i) VEC_-SETELT(v,k,s) for i in k..n-1 repeat VEC_-SETELT(v,i+1,u.i) VEC_-SETELT(d,h,v) s scanDictCons()== l:= HKEYS scanKeyTable d := a := MAKE_-VEC(256) b := MAKE_-VEC(1) VEC_-SETELT(b,0,MAKE_-CVEC 0) for i in 0..255 repeat VEC_-SETELT(a,i,b) a for s in l repeat scanInsert(s,d) d scanDict:=scanDictCons() scanPunCons()== listing := HKEYS scanKeyTable a := MAKE_-BVEC 256 -- SETSIZE(a,256) for i in 0..255 repeat BVEC_-SETELT(a,i,0) for k in listing repeat if not startsId? k.0 then BVEC_-SETELT(a,QENUM(k,0),1) a scanPun:=scanPunCons() --for i in ["COLON","MINUS"] repeat -- property(i,'PREGENERIC) := true for i in [ _ ["EQUAL" ,"="], _ ["TIMES" ,"*"], _ ["HAS" ,"has"], _ ["CASE" ,"case"], _ ["REM" ,"rem"], _ ["MOD" ,"mod"], _ ["QUO" ,"quo"], _ ["SLASH" ,"/"], _ ["BACKSLASH","\"], _ ["SLASHSLASH" ,"//"], _ ["BACKSLASHBACKSLASH","\\"], _ ["SLASHBACKSLASH" ,"/\"], _ ["BACKSLASHSLASH","\/"], _ ["POWER" ,"**"], _ ["CARAT" ,"^"], _ ["PLUS" ,"+"], _ ["MINUS" ,"-"], _ ["LT" ,"<"], _ ["GT" ,">"], _ ["OANGLE" ,"<<"], _ ["CANGLE" ,">>"], _ ["LE" ,"<="], _ ["GE" ,">="], _ ["NOTEQUAL" ,"~="], _ ["BY" ,"by"], _ ["ARROW" ,"->"], _ ["LARROW" ,"<-"], _ ["BAR" ,"|"], _ ["SEG" ,".."] _ ] repeat property(first i,'INFGENERIC) := second i -- Scanner -- lineoftoks bites off a token-dq from a line-stream -- returning the token-dq and the rest of the line-stream scanIgnoreLine(ln,n)== n = nil => n fst := QENUM(ln,0) fst = CLOSEPAREN => incPrefix?('"command",1,ln) => true nil n nextline(s)== npNull s => false $f := first s $r := rest s $ln := rest $f $linepos := CAAR $f $n := STRPOSL('" ",$ln,0,true)-- spaces at beginning $sz := #$ln true lineoftoks(s)== $f: local := nil $r: local := nil $ln: local := nil $linepos: local := nil $n: local := nil $sz: local := nil $floatok: local := true not nextline s => [nil,:nil] null scanIgnoreLine($ln,$n) => [nil,:$r] -- line of spaces or starts ) or > toks := [] a := incPrefix?('"command",1,$ln) a => $ln := SUBSTRING($ln,8,nil) b := dqUnit constoken($ln,$linepos,["command",$ln],0) [[[b,s]],:$r] while $n<$sz repeat toks := dqAppend(toks,scanToken()) null toks => [[],:$r] [[[toks,s]],:$r] scanToken() == ln:=$ln c:=QENUM($ln,$n) linepos:=$linepos n:=$n ch:=$ln.$n b:= startsComment?() => scanComment() [] startsNegComment?() => scanNegComment() [] c= QUESTION => $n:=$n+1 lfid '"?" punctuation? c => scanPunct () startsId? ch => scanWord (false) c=$SPACE => scanSpace () [] c = STRING_CHAR => scanString () digit? ch => scanNumber () c=ESCAPE => scanEscape() scanError () null b => nil dqUnit constoken(ln,linepos,b,n+lnExtraBlanks linepos) -- to pair badge and badgee -- lfid x== ["id",INTERN x] lfid x== ["id",INTERN(x, '"BOOT")] lfkey x==["key",keyword x] lfinteger x== ["integer",x] -- if EQUAL(x,'"0") -- then ["id",INTERN x] -- else if EQUAL(x,'"1") -- then ["id",INTERN x] -- else ["integer",x] lfrinteger (r,x)==["integer",strconc (r,strconc('"r",x))] --lfrfloat(a,w,v)==["rfloat",strconc(a,'"r.",v)] lffloat(a,w,e)==["float",strconc(a,'".",w,'"e",e)] lfstring x==if #x=1 then ["char",x] else ["string",x] lfcomment x== ["comment", x] lfnegcomment x== ["negcomment", x] lferror x==["error",x] lfspaces x==["spaces",x] constoken(ln,lp,b,n)== -- [b.0,b.1,[lp,:n]] a:=[b.0,:b.1] ncPutQ(a,"posn",[lp,:n]) a scanEscape()== $n:=$n+1 a:=scanEsc() if a then scanWord true else nil scanEsc()== if $n>=$sz then if nextline($r) then while null $n repeat nextline($r) scanEsc() false else false else n1:=STRPOSL('" ",$ln,$n,true) if null n1 then if nextline($r) then while null $n repeat nextline($r) scanEsc() false else false else if $n=n1 then true else if QENUM($ln,n1)=ESCAPE then $n:=n1+1 scanEsc() false else $n:=n1 startsNegComment?() or startsComment?() => nextline($r) scanEsc() false false startsComment?()== if $n<$sz then if QENUM($ln,$n)=PLUSCOMMENT then www:=$n+1 if www>=$sz then false else QENUM($ln,www) = PLUSCOMMENT else false else false startsNegComment?()== if $n< $sz then if QENUM($ln,$n)=MINUSCOMMENT then www:=$n+1 if www>=$sz then false else QENUM($ln,www) = MINUSCOMMENT else false else false scanNegComment()== n:=$n $n:=$sz lfnegcomment SUBSTRING($ln,n,nil) scanComment()== n:=$n $n:=$sz lfcomment SUBSTRING($ln,n,nil) scanPunct()== sss:=subMatch($ln,$n) a:= # sss if a=0 then scanError() else $n:=$n+a scanKeyTr sss scanKeyTr w== if keyword w = "DOT" then if $floatok then scanPossFloat(w) else lfkey w else $floatok:=not scanCloser? w lfkey w scanPossFloat (w)== if $n>=$sz or not digit? $ln.$n then lfkey w else w:=spleI(function digit?) scanExponent('"0",w) scanCloser == [")","}","]","|)","|}","|]"] scanCloser? w== MEMQ(keyword w,scanCloser) scanSpace()== n:=$n $n:=STRPOSL('" ",$ln,$n,true) if null $n then $n:=# $ln $floatok:=true lfspaces ($n-n) scanString()== $n:=$n+1 $floatok:=false lfstring scanS () scanS()== if $n>=$sz then ncSoftError([$linepos,:lnExtraBlanks $linepos+$n],"S2CN0001",[]) '"" else n:=$n strsym :=STRPOS ('"_"",$ln,$n,nil) or $sz escsym:=STRPOS ('"__" ,$ln,$n,nil) or $sz mn:=MIN(strsym,escsym) if mn=$sz then $n:=$sz ncSoftError([$linepos,:lnExtraBlanks $linepos+$n], "S2CN0001",[]) SUBSTRING($ln,n,nil) else if mn=strsym then $n:=mn+1 SUBSTRING($ln,n,mn-n) else --escape is found first str:=SUBSTRING($ln,n,mn-n)-- before escape $n:=mn+1 a:=scanEsc() -- case of end of line when false b:=if a then str:=strconc(str,scanTransform($ln.$n)) $n:=$n+1 scanS() else scanS() strconc(str,b) scanTransform x==x --idChar? x== scanLetter x or digit? x or x in '(_? _%) --scanLetter x== -- if not char? x -- then false -- else STRPOSL(scanTrTable,x,0,NIL) posend(line,n)== while n<#line and idChar? line.n repeat n:=n+1 n --numend(line,n)== -- while n<#line and digit? line.n repeat n:=n+1 -- n --startsId? x== scanLetter x or x in '(_? _%) scanW(b)== -- starts pointing to first char n1:=$n -- store starting character position $n:=$n+1 -- the first character is not tested l:=$sz endid:=posend($ln,$n) if endid=l or QENUM($ln,endid) ~= ESCAPE then -- not escaped $n:=endid [b,SUBSTRING($ln,n1,endid-n1)] -- l overflows else -- escape and endid ~= l str:=SUBSTRING($ln,n1,endid-n1) $n:=endid+1 a:=scanEsc() bb:=if a -- escape nonspace then scanW(true) else if $n>=$sz then [b,'""] else if idChar?($ln.$n) then scanW(b) else [b,'""] [bb.0 or b,strconc(str,bb.1)] scanWord(esp) == aaa:=scanW(false) w:=aaa.1 $floatok:=false if esp or aaa.0 then lfid w else if keyword? w then $floatok:=true lfkey w else lfid w spleI(dig)==spleI1(dig,false) spleI1(dig,zro) == n:=$n l:= $sz while $n ncSoftError([$linepos,:lnExtraBlanks $linepos+$n],"S2CN0004",[a]) done:=false for i in 0..ns-1 repeat a:=rdigit? w.i if null a or a>=r then ncSoftError([$linepos,:lnExtraBlanks $linepos+$n-ns+i], "S2CN0002", [w.i]) scanNumber() == a := spleI(function digit?) if $n>=$sz then lfinteger a else if QENUM($ln,$n) ~= RADIX_CHAR then if $floatok and QENUM($ln,$n)=DOT then n:=$n $n:=$n+1 if $n<$sz and QENUM($ln,$n)=DOT then $n:=n lfinteger a else w:=spleI1(function digit?,true) scanExponent(a,w) else lfinteger a else $n:=$n+1 w:=spleI1(function rdigit?,false) scanCheckRadix(a,w) if $n>=$sz then lfrinteger(a,w) else if QENUM($ln,$n)=DOT then n:=$n $n:=$n+1 if $n<$sz and QENUM($ln,$n)=DOT then $n:=n lfrinteger(a,w) else --$n:=$n+1 v:=spleI1(function rdigit?,true) scanCheckRadix(a,v) scanExponent(strconc(a,'"r",w),v) else lfrinteger(a,w) scanExponent(a,w)== if $n>=$sz then lffloat(a,w,'"0") else n:=$n c:=QENUM($ln,$n) if c=EXPONENT1 or c=EXPONENT2 then $n:=$n+1 if $n>=$sz then $n:=n lffloat(a,w,'"0") else if digit?($ln.$n) then e:=spleI(function digit?) lffloat(a,w,e) else c1:=QENUM($ln,$n) if c1=PLUSCOMMENT or c1=MINUSCOMMENT then $n:=$n+1 if $n>=$sz then $n:=n lffloat(a,w,'"0") else if digit?($ln.$n) then e:=spleI(function digit?) lffloat(a,w, (if c1=MINUSCOMMENT then strconc('"-",e)else e)) else $n:=n lffloat(a,w,'"0") else lffloat(a,w,'"0") rdigit? x== d := STRPOS(x,$RDigits,0,nil) => d d := STRPOS(x,$smallLetters,0,nil) => 10 + d nil scanError()== n:=$n $n:=$n+1 ncSoftError([$linepos,:lnExtraBlanks $linepos+$n], "S2CN0003",[$ln.n]) lferror ($ln.n) keyword st == HGET(scanKeyTable,st) keyword? st == not null HGET(scanKeyTable,st) subMatch(l,i)==substringMatch(l,scanDict,i) substringMatch (l,d,i)== h:= QENUM(l, i) u:=d.h ll:=SIZE l done:=false s1:='"" for j in 0.. SIZE u - 1 while not done repeat s:=u.j ls:=SIZE s done:=if ls+i > ll then false else eql:= true for k in 1..ls-1 while eql repeat eql:= EQL(QENUM(s,k),QENUM(l,k+i)) if eql then s1:=s true else false s1 punctuation? c== scanPun.c=1