-- Copyright (c) 1991-2002, The Numerical ALgorithms Group Ltd. -- All rights reserved. -- Copyright (C) 2007-2011, Gabriel Dos Reis. -- All rights reserved. -- -- Redistribution and use in source and binary forms, with or without -- modification, are permitted provided that the following conditions are -- met: -- -- - Redistributions of source code must retain the above copyright -- notice, this list of conditions and the following disclaimer. -- -- - Redistributions in binary form must reproduce the above copyright -- notice, this list of conditions and the following disclaimer in -- the documentation and/or other materials provided with the -- distribution. -- -- - Neither the name of The Numerical ALgorithms Group Ltd. nor the -- names of its contributors may be used to endorse or promote products -- derived from this software without specific prior written permission. -- -- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS -- IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -- TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -- PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER -- OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -- EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -- PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -- PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -- LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -- NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import bits import dq import incl import sys_-utility namespace BOOT module scan --% $RDigits == '"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" $smallLetters == '"abcdefghijklmnopqrstuvwxyz" --% Keywords scanKeyWords == [ _ ['"add", "ADD" ],_ ['"and", "AND" ],_ ['"assume","ASSUME" ],_ ['"break", "BREAK" ],_ ['"by", "BY" ],_ ['"case", "CASE" ],_ ['"default", "DEFAULT" ],_ ['"define", "DEFN" ],_ ['"do", "DO"],_ ['"else", "ELSE" ],_ ['"exist", "EXIST"],_ ['"exit", "EXIT" ],_ ['"export","EXPORT" ],_ ['"forall", "FORALL"],_ ['"for", "FOR" ],_ ['"free", "FREE" ],_ ['"from", "FROM" ],_ ['"has", "HAS" ],_ ['"if", "IF" ],_ ['"import", "IMPORT" ],_ ['"in", "IN" ],_ ['"inline", "INLINE" ],_ ['"is", "IS" ],_ ['"isnt", "ISNT" ],_ ['"iterate", "ITERATE"],_ ['"local", "local" ],_ ['"macro", "MACRO" ],_ ['"mod", "MOD" ],_ ['"or", "OR" ],_ ['"pretend","PRETEND" ],_ ['"quo","QUO" ],_ ['"rem","REM" ],_ ['"repeat","REPEAT" ],_ ['"return","RETURN" ],_ ['"rule","RULE" ],_ ['"then","THEN" ],_ ['"where","WHERE" ],_ ['"while","WHILE" ],_ ['"with","WITH" ],_ ['"|","BAR"],_ ['".","DOT" ],_ ['"::","COERCE" ],_ ['":","COLON" ],_ ['":-","COLONDASH" ],_ ['"@","AT" ],_ ['",","COMMA" ],_ ['";","SEMICOLON" ],_ ['"**","POWER" ],_ ['"*","TIMES" ],_ ['"+","PLUS" ],_ ['"-","MINUS" ],_ ['"<","LT" ],_ ['">","GT" ],_ ['"<=","LE" ],_ ['">=","GE" ],_ ['"=", "EQUAL"],_ ['"~=","NOTEQUAL" ],_ ['"~","~" ],_ ['"^","CARAT" ],_ ['"..","SEG" ],_ ['"#","#" ],_ ['"&","AMPERSAND" ],_ ['"$","$" ],_ ['"/","SLASH" ],_ ['"\","BACKSLASH" ],_ ['"//","SLASHSLASH" ],_ ['"\\","BACKSLASHBACKSLASH" ],_ ['"/\","SLASHBACKSLASH" ],_ ['"\/","BACKSLASHSLASH" ],_ ['"=>","EXIT" ],_ ['":=","BECOMES" ],_ ['"==","DEF" ],_ ['"==>","MDEF" ],_ ['"->","ARROW" ],_ ['"<-","LARROW" ],_ ['"+->","GIVES" ],_ ['"(","(" ],_ ['")",")" ],_ ['"(|","(|" ],_ ['"|)","|)" ],_ ['"[","[" ],_ ['"]","]" ],_ ['"[__]","[]" ],_ ['"{","{" ],_ ['"}","}" ],_ ['"{__}","{}" ],_ ['"[|","[|" ],_ ['"|]","|]" ],_ ['"[|__|]","[||]" ],_ ['"{|","{|" ],_ ['"|}","|}" ],_ ['"{|__|}","{||}" ],_ ['"<<","OANGLE" ],_ ['">>","CANGLE" ],_ ['"'", "'" ],_ ['"`", "BACKQUOTE" ]_ ] scanKeyTableCons()== KeyTable := hashTable 'EQUAL for st in scanKeyWords repeat tableValue(KeyTable,first st) := second st KeyTable scanKeyTable:=scanKeyTableCons() scanInsert(s,d) == l := #s h := codePoint stringChar(s,0) u := vectorRef(d,h) n := #u k:=0 while l <= #vectorRef(u,k) repeat k := k+1 v := newVector(n+1) for i in 0..k-1 repeat vectorRef(v,i) := vectorRef(u,i) vectorRef(v,k) := s for i in k..n-1 repeat vectorRef(v,i+1) := vectorRef(u,i) vectorRef(d,h) := v s scanDictCons()== d := a := newVector 256 b := newVector 1 vectorRef(b,0) := '"" for i in 0..255 repeat vectorRef(a,i) := b a for [s,:.] in entries scanKeyTable repeat scanInsert(s,d) d scanDict:=scanDictCons() scanPunCons()== a := makeBitVector 256 for i in 0..255 repeat bitref(a,i) := 0 for [k,:.] in entries scanKeyTable repeat if not startsId? stringChar(k,0) then bitref(a,codePoint stringChar(k,0)) := 1 a scanPun:=scanPunCons() --for i in ["COLON","MINUS"] repeat -- property(i,'PREGENERIC) := true for i in [ _ ["EQUAL" ,"="], _ ["TIMES" ,"*"], _ ["HAS" ,"has"], _ ["CASE" ,"case"], _ ["REM" ,"rem"], _ ["MOD" ,"mod"], _ ["QUO" ,"quo"], _ ["SLASH" ,"/"], _ ["BACKSLASH","\"], _ ["SLASHSLASH" ,"//"], _ ["BACKSLASHBACKSLASH","\\"], _ ["SLASHBACKSLASH" ,"/\"], _ ["BACKSLASHSLASH","\/"], _ ["POWER" ,"**"], _ ["CARAT" ,"^"], _ ["PLUS" ,"+"], _ ["MINUS" ,"-"], _ ["LT" ,"<"], _ ["GT" ,">"], _ ["OANGLE" ,"<<"], _ ["CANGLE" ,">>"], _ ["LE" ,"<="], _ ["GE" ,">="], _ ["NOTEQUAL" ,"~="], _ ["BY" ,"by"], _ ["ARROW" ,"->"], _ ["LARROW" ,"<-"], _ ["BAR" ,"|"], _ ["SEG" ,".."] _ ] repeat property(first i,'INFGENERIC) := second i -- Scanner -- lineoftoks bites off a token-dq from a line-stream -- returning the token-dq and the rest of the line-stream scanIgnoreLine(ln,n)== n = nil => n stringChar(ln,0) = char ")" => incPrefix?('"command",1,ln) => true nil n nextline(s)== npNull s => false $f := first s $r := rest s $ln := rest $f $linepos := CAAR $f $n := STRPOSL('" ",$ln,0,true)-- spaces at beginning $sz := #$ln true lineoftoks(s)== $f: local := nil $r: local := nil $ln: local := nil $linepos: local := nil $n: local := nil $sz: local := nil $floatok: local := true not nextline s => [nil,:nil] null scanIgnoreLine($ln,$n) => [nil,:$r] -- line of spaces or starts ) or > toks := [] a := incPrefix?('"command",1,$ln) a => $ln := subString($ln,8) b := dqUnit constoken($linepos,["command",$ln],0) [[[b,s]],:$r] while $n<$sz repeat toks := dqAppend(toks,scanToken()) null toks => [nil,:$r] [[[toks,s]],:$r] scanToken() == linepos := $linepos n := $n ch := stringChar($ln,$n) b := startsComment?() => scanComment() [] startsNegComment?() => scanNegComment() [] ch = char "?" => $n := $n+1 lfid '"?" punctuation? codePoint ch => scanPunct() startsId? ch => scanWord(false) ch = char " " => scanSpace() [] ch = char "_"" => scanString() digit? ch => scanNumber() ch = char "__" => scanEscape() scanError() null b => nil dqUnit constoken(linepos,b,n+lnExtraBlanks linepos) -- to pair badge and badgee lfid x == ["id",makeSymbol(x, '"BOOT")] lfkey x == ["key",keyword x] lfinteger x== ["integer",x] lfrinteger (r,x)== ["integer",strconc (r,strconc('"r",x))] --lfrfloat(a,w,v)==["rfloat",strconc(a,'"r.",v)] lffloat(a,w,e)== ["float",strconc(a,'".",w,'"e",e)] lfstring x== #x = 1 => ["char",x] ["string",x] lfcomment x== ["comment", x] lfnegcomment x == ["negcomment", x] lferror x == ["error",x] lfspaces x == ["spaces",x] constoken(lp,b,n)== a := [b.0,:b.1] ncPutQ(a,"posn",[lp,:n]) a scanEscape()== $n := $n+1 scanEsc() => scanWord true nil scanEsc()== $n >= $sz => nextline($r) => while null $n repeat nextline($r) scanEsc() false false n1 := STRPOSL('" ",$ln,$n,true) n1 = nil => nextline($r) => while null $n repeat nextline($r) scanEsc() false false $n = n1 => true stringChar($ln,n1) = char "__" => $n := n1+1 scanEsc() false $n := n1 startsNegComment?() or startsComment?() => nextline($r) scanEsc() false false startsComment?()== $n < $sz => stringChar($ln,$n) = char "+" => www := $n + 1 www >= $sz => false stringChar($ln,www) = char "+" false false startsNegComment?()== $n < $sz => stringChar($ln,$n) = char "-" => www := $n+1 www >= $sz => false stringChar($ln,www) = char "-" false false scanNegComment()== n := $n $n := $sz lfnegcomment subString($ln,n) scanComment()== n := $n $n := $sz lfcomment subString($ln,n) scanPunct()== sss := subMatch($ln,$n) a := #sss a = 0 => scanError() $n := $n+a scanKeyTr sss scanKeyTr w== keyword w = "DOT" => $floatok => scanPossFloat(w) lfkey w $floatok := not scanCloser? w lfkey w scanPossFloat (w)== $n >= $sz or not digit? $ln.$n => lfkey w w := spleI(function digit?) scanExponent('"0",w) scanCloser == [")","}","]","|)","|}","|]"] scanCloser? w== symbolMember?(keyword w,scanCloser) scanSpace()== n := $n $n := STRPOSL('" ",$ln,$n,true) if $n = nil then $n := #$ln $floatok := true lfspaces($n-n) scanString()== $n := $n+1 $floatok := false lfstring scanS() scanS()== $n >= $sz => ncSoftError([$linepos,:lnExtraBlanks $linepos+$n],"S2CN0001",[]) '"" n := $n strsym := STRPOS ('"_"",$ln,$n,nil) or $sz escsym := STRPOS ('"__",$ln,$n,nil) or $sz mn := MIN(strsym,escsym) mn = $sz => $n:=$sz ncSoftError([$linepos,:lnExtraBlanks $linepos+$n], "S2CN0001",[]) subString($ln,n) mn = strsym => $n:=mn+1 subString($ln,n,mn-n) --escape is found first str := subString($ln,n,mn-n)-- before escape $n := mn+1 a := scanEsc() -- case of end of line when false b := a => str := strconc(str,scanTransform($ln.$n)) $n := $n+1 scanS() scanS() strconc(str,b) scanTransform x == x --idChar? x== scanLetter x or digit? x or x in '(_? _%) --scanLetter x== -- if not char? x -- then false -- else STRPOSL(scanTrTable,x,0,nil) posend(line,n)== while n<#line and idChar? line.n repeat n := n+1 n --numend(line,n)== -- while n<#line and digit? line.n repeat n:=n+1 -- n --startsId? x== scanLetter x or x in '(_? _%) scanW(b)== -- starts pointing to first char n1 := $n -- store starting character position $n := $n+1 -- the first character is not tested l := $sz endid := posend($ln,$n) endid=l or stringChar($ln,endid) ~= char "__" => -- not escaped $n:=endid [b,subString($ln,n1,endid-n1)] -- l overflows -- escape and endid ~= l str := subString($ln,n1,endid-n1) $n := endid+1 a := scanEsc() bb := a => scanW(true) -- escape nonspace $n >= $sz => [b,'""] idChar?($ln.$n) => scanW(b) [b,'""] [bb.0 or b,strconc(str,bb.1)] scanWord(esp) == aaa := scanW(false) w := aaa.1 $floatok := false esp or aaa.0 => lfid w keyword? w => $floatok:=true lfkey w lfid w spleI(dig) == spleI1(dig,false) spleI1(dig,zro) == n := $n l := $sz while $n<l and FUNCALL(dig,($ln.$n)) repeat $n := $n+1 $n = l or stringChar($ln,$n) ~= char "__" => n = $n and zro => '"0" subString($ln,n,$n-n) -- escaped str:=subString($ln,n,$n-n) $n:=$n+1 a:=scanEsc() bb:=spleI1(dig,zro)-- escape, anyno spaces are ignored strconc(str,bb) scanCheckRadix(a,w)== r := readInteger a ns := #w ns = 0 => ncSoftError([$linepos,:lnExtraBlanks $linepos+$n],"S2CN0004",[a]) done := false for i in 0..ns-1 repeat a := rdigit? w.i a = nil or a>=r => ncSoftError([$linepos,:lnExtraBlanks $linepos+$n-ns+i],"S2CN0002", [w.i]) scanNumber() == a := spleI(function digit?) $n >= $sz => lfinteger a stringChar($ln,$n) ~= char "r" => if $floatok and stringChar($ln,$n) = char "." then n:=$n $n:=$n+1 if $n<$sz and stringChar($ln,$n) = char "." then $n:=n lfinteger a else w:=spleI1(function digit?,true) scanExponent(a,w) else lfinteger a $n := $n+1 w := spleI1(function rdigit?,false) scanCheckRadix(a,w) $n >= $sz => lfrinteger(a,w) stringChar($ln,$n) = char "." => n := $n $n := $n+1 $n < $sz and stringChar($ln,$n) = char "." => $n :=n lfrinteger(a,w) v := spleI1(function rdigit?,true) scanCheckRadix(a,v) scanExponent(strconc(a,'"r",w),v) lfrinteger(a,w) scanExponent(a,w)== $n >= $sz => lffloat(a,w,'"0") n := $n c := stringChar($ln,$n) c = char "E" or c = char "e" => $n := $n + 1 $n >= $sz => $n:=n lffloat(a,w,'"0") digit?($ln.$n) => e := spleI(function digit?) lffloat(a,w,e) c := stringChar($ln,$n) c = char "+" or c = char "-" => $n := $n + 1 $n >= $sz => $n := n lffloat(a,w,'"0") digit? stringChar($ln,$n) => e := spleI(function digit?) lffloat(a,w,(c = char "-" => strconc('"-",e); e)) $n := n lffloat(a,w,'"0") lffloat(a,w,'"0") rdigit? x== d := STRPOS(x,$RDigits,0,nil) => d d := STRPOS(x,$smallLetters,0,nil) => 10 + d nil scanError()== n := $n $n := $n+1 ncSoftError([$linepos,:lnExtraBlanks $linepos+$n],"S2CN0003",[$ln.n]) lferror($ln.n) keyword st == tableValue(scanKeyTable,st) keyword? st == not null tableValue(scanKeyTable,st) subMatch(l,i) == substringMatch(l,scanDict,i) substringMatch (l,d,i)== h := codePoint stringChar(l, i) u := d.h ll := #l done := false s1 := '"" for j in 0..#u - 1 while not done repeat s := u.j ls := #s done := ls+i > ll => false eql := true for k in 1..ls-1 while eql repeat eql := stringChar(s,k) = stringChar(l,k+i) eql => s1 := s true false s1 punctuation? c == scanPun.c=1