aboutsummaryrefslogtreecommitdiff
path: root/src/interp/guess.boot
diff options
context:
space:
mode:
authordos-reis <gdr@axiomatics.org>2007-09-20 14:50:49 +0000
committerdos-reis <gdr@axiomatics.org>2007-09-20 14:50:49 +0000
commit0850ca5458cb09b2d04cec162558500e9a05cf4a (patch)
treeaa76b50f08c662dab9a49b6ee9f0dc7318139ea1 /src/interp/guess.boot
parent6f8caa148526efc14239febdc12f91165389a8ea (diff)
downloadopen-axiom-0850ca5458cb09b2d04cec162558500e9a05cf4a.tar.gz
Revert commits to the wrong tree.
Diffstat (limited to 'src/interp/guess.boot')
-rw-r--r--src/interp/guess.boot347
1 files changed, 0 insertions, 347 deletions
diff --git a/src/interp/guess.boot b/src/interp/guess.boot
deleted file mode 100644
index 8dde919c..00000000
--- a/src/interp/guess.boot
+++ /dev/null
@@ -1,347 +0,0 @@
--- Copyright (c) 1991-2002, The Numerical ALgorithms Group Ltd.
--- All rights reserved.
---
--- Redistribution and use in source and binary forms, with or without
--- modification, are permitted provided that the following conditions are
--- met:
---
--- - Redistributions of source code must retain the above copyright
--- notice, this list of conditions and the following disclaimer.
---
--- - Redistributions in binary form must reproduce the above copyright
--- notice, this list of conditions and the following disclaimer in
--- the documentation and/or other materials provided with the
--- distribution.
---
--- - Neither the name of The Numerical ALgorithms Group Ltd. nor the
--- names of its contributors may be used to endorse or promote products
--- derived from this software without specific prior written permission.
---
--- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
--- IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
--- TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
--- PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
--- OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
--- EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
--- PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
--- PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
--- LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
--- NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
--- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-
-$minThreshold := 3
-$maxThreshold := 7
-
---=======================================================================
--- Build Directories
---=======================================================================
-buildOperationWordTable() ==
- $opWordTable := buildWordTable [PNAME x for x in allOperations()]
-
-buildWordTable u ==
- table:= MAKE_-HASHTABLE 'ID
- for s in u repeat
- words := wordsOfString s
- key := UPCASE s.0
- HPUT(table,key,[[s,:words],:HGET(table,key)])
- for key in HKEYS table repeat
- HPUT(table,key,
- listSort(function GLESSEQP,removeDupOrderedAlist
- listSort(function GLESSEQP, HGET(table,key),function CAR),
- function CADR))
- table
-
-measureWordTable u ==
- +/[+/[#entry for entry in HGET(u,key)] for key in HKEYS u]
-
-removeDupOrderedAlist u ==
- -- removes duplicate entries in ordered alist
- -- (where duplicates are adjacent)
- for x in tails u repeat
- (y := rest x) and first first x = first first y => RPLACD(x,rest y)
- u
-
-wordsOfString(s) == [UPCASE x for x in wordsOfStringKeepCase s]
-
-wordsOfStringKeepCase s == wordsOfString1(s,0) or [COPY s]
-
-wordsOfString1(s,j) ==
- k := or/[i for i in j..SUB1(MAXINDEX(s)) | UPPER_-CASE_-P s.i] =>
- tailWords:=
- UPPER_-CASE_-P s.(k+1) =>
- n:= or/[i for i in (k+2)..SUB1(MAXINDEX(s))|not UPPER_-CASE_-P s.i]
- null n => [SUBSTRING(s,k,nil)]
- n > k+1 => [SUBSTRING(s,k,n-k-1),:wordsOfString1(s,n-1)]
- m := or/[i for i in (k+2)..SUB1(MAXINDEX(s)) | UPPER_-CASE_-P s.i] =>
- [SUBSTRING(s,k,m-k),:wordsOfString1(s,m)]
- [SUBSTRING(s,k,nil)]
- k > j+1 => [SUBSTRING(s,j,k-j),:tailWords]
- tailWords
- nil
-
-wordKeys s ==
- REMDUP [UPCASE s.0,:fn(s,1,-1,MAXINDEX s,nil)] where fn(s,i,lastKeyIndex,n,acc) ==
- i > n => acc
- UPPER_-CASE_-P s.i =>
--- i = lastKeyIndex + 1 => fn(s,i + 1,i,n,[s.i,:rest acc])
- fn(s,i + 1,i,n,[s.i,:acc])
- fn(s,i + 1,lastKeyIndex,n,acc)
-
---=======================================================================
--- Augment Function Directories
---=======================================================================
-add2WordFunctionTable fn ==
---called from DEF
- $functionTable and
- null LASSOC(s := PNAME fn,HGET($functionTable,(key := UPCASE s.0))) =>
- HPUT($functionTable,key,[[s,:wordsOfString s],:HGET($functionTable,key)])
-
---=======================================================================
--- Guess Function Name
---=======================================================================
-findWords(word,table) ==
- $lastWord := word
- $lastTable:= table
- $totalWords:= nil
- $countThreshold := $minThreshold
- $lastMinimum := -1
- res := findApproximateWords(word,table)
- if null res then
- $countThreshold := $countThreshold + 2
- res := findApproximateWords(word,table)
- $lastAlist := mySort res =>
--- $lastMinimum := CAR LAST $lastAlist
--- $lastWords := wordSort CDAR $lastAlist
--- $totalWords:= $lastWords
--- $lastAlist := CDR $lastAlist
--- $totalWords
- $lastMinimum := CAAR $lastAlist
- $lastWords := wordSort CDAR $lastAlist
- $totalWords:= $lastWords
- $lastAlist := CDR $lastAlist
- $totalWords
- $lastWords := nil
-
-wordSort u == REMDUP listSort(function GLESSEQP,u)
-
-more() == moreWords($lastWord,$lastTable)
-
-moreWords(word,table) ==
- $lastAlist =>
- $lastMinimum := CAR LAST pp $lastAlist
- numberOfLastWords := #$lastWords
- $lastWords := "append"/(ASSOCRIGHT $lastAlist)
- if #$lastWords > numberOfLastWords then
- trialLastAlist :=
- [p for p in $lastAlist | p.0 < $maxThreshold]
- trialLastWords := "append"/(ASSOCRIGHT trialLastAlist)
- if #trialLastWords > numberOfLastWords then
- $lastWords := trialLastWords
- $totalWords:= wordSort [:$lastWords,:$totalWords]
- $lastAlist := nil
- $totalWords
- $countThreshold := $countThreshold + 2
- $lastAlist := findApproximateWords(word,table)
- moreWords(word,table)
-
-findApproximateWords(word,table) ==
- count := $countThreshold
- words:= wordsOfString word
- upperWord:= UPCASE COPY word
- n := #words
- threshold:=
- n = 1 => count
- count+1
-
- --first try to break up as list of words
- alist:= nil
- for i in 1..#words repeat
- $penalty :local := (i = 1 => 0; 1)
- wordAlist:= HGET(table,UPCASE (first words).0)
- for [x,:wordList] in wordAlist repeat
- k := findApproxWordList(words,wordList,n,threshold,#wordList)
- k =>
- k := k + $penalty
- k <= $lastMinimum => 'skip
- alist := consAlist(k,x,alist)
-
- if i = 1 and null alist then
- --no winners, so try flattening to upper case and checking again
- wordSize := SIZE word
- lastThreshold := MAX(threshold - 1,wordSize/2)
- for [x,:.] in wordAlist repeat
- k := deltaWordEntry(upperWord,UPCASE x)
- k < lastThreshold => alist := consAlist(k,x,alist)
-
- rotateWordList words
-
- alist
-
-consAlist(x,y,alist) ==
- u := ASSOC(x,alist) =>
- RPLACD(u,[y,:CDR u])
- alist
- [[x,y],:alist]
-
-findApproxWordList(words,wordList,n,threshold,w) ==
- val := findApproxWordList1(words,wordList,n,threshold,w)
- null val => val
---pp [val,:wordList]
- val
-
-findApproxWordList1(words,wordList,n,threshold,w) ==
- two := threshold - 2
- n = w =>
- k := findApproxSimple(words,wordList,threshold) => k
-
- n < 3 => false
- threshold := threshold - 1
- sum := 0 --next, throw out one bad word
-
- badWord := false
- for entry in wordList for part in words while sum < threshold repeat
- k:= deltaWordEntry(part,entry)
- k < two => sum:= sum + k
- null badWord => badWord := true
- sum := 1000
- sum < threshold =>
--- pp [2,sum,wordList]
- sum + 2
-
- n+1 = w => --assume one word is missing
- sum := 0
- badWord := false
- for entries in tails wordList for part in words
- while sum < threshold repeat
- entry := first entries
- k:= deltaWordEntry(part,entry)
- k < two => sum:= sum + k
- null badWord =>
- badWord := true
- entries := rest entries --skip this bad word
- entry := first entries
- k := deltaWordEntry(part,entry)
- k < two => sum := sum + k
- sum := 1000
- sum := 1000
- sum < threshold =>
--- pp [3,sum,wordList]
- sum + 2
- false
- n-1 = w => --assume one word too many
- sum := 0 --here: KEEP it hard to satisfy
- badWord := false
- for entry in wordList for parts in tails words
- while sum < threshold repeat
- part := first parts
- k:= deltaWordEntry(part,entry)
- k < 2 => sum:= sum + k
- null badWord =>
- badWord := true
- parts := rest parts --skip this bad word
- part := first parts
- k := deltaWordEntry(part,entry)
- k < 2 => sum := sum + k
- return (sum := 1000)
- return (sum := 1000)
- sum < threshold =>
--- pp [4,sum,wordList]
- $penalty = 1 => sum
- sum + 1
- false
- false
-
-findApproxSimple(words,wordList,threshold) ==
- sum := 0
- --first try matching words in order
- for entry in wordList for part in words while sum < threshold repeat
- sum:= sum + deltaWordEntry(part,entry)
- sum < threshold =>
--- pp ['"--->",sum,:wordList]
- sum
- nil
-
-rotateWordList u ==
- v := u
- p := CAR v
- while QCDR v repeat
- RPLACA(v,CADR v)
- v := QCDR v
- RPLACA(v,p)
- u
-
-deltaWordEntry(word,entry) ==
- word = entry => 0
- word.0 ^= entry.0 => 1000
- #word > 2 and stringPrefix?(word,entry) => 1
- ABS(diff := SIZE word - SIZE entry) > 4 => 1000
- canForgeWord(word,entry)
-
---+ Note these are optimized definitions below-- see commented out versions
---+ to understand the algorithm
-canForgeWord(word,entry) ==
- forge(word,0,MAXINDEX word,entry,0,MAXINDEX entry,0)
-
-forge(word,w,W,entry,e,E,n) ==
- w > W =>
- e > E => n
- QSADD1 QSPLUS(E-e,n)
- e > E => QSADD1 QSPLUS(W-w,n)
- word.w = entry.e => forge(word,w+1,W,entry,e+1,E,n)
- w=W or e=E => forge(word,w+1,W,entry,e+1,E,QSADD1 n)
- word.w=entry.(e+1) =>
- word.(w+1) = entry.e => forge(word,w+2,W,entry,e+2,E,QSADD1 n)
- forge(word,w+1,W,entry,e+2,E,QSADD1 n)
- word.(w+1)=entry.e => forge(word,w+2,W,entry,e+1,E,QSADD1 n)
-
- (deltaW := W-w) > 1 and (deltaE := E-e) > 1 =>
- --if word is long, can we delete chars to match 2 consective chars
- deltaW >= deltaE and
- (k := or/[j for j in (w+2)..(W-1) | word.j = entry.e])
- and word.(k+1) = entry.(e+1) =>
- forge(word,k+2,W,entry,e+2,E,QSPLUS(k-w,n))
- deltaW <= deltaE and
- --if word is short, can we insert chars so as to match 2 consecutive chars
- (k := or/[j for j in (e+2)..(E-1) | word.w = entry.j])
- and word.(w+1) = entry.(k+1) =>
- forge(word,w+2,W,entry,k+2,E,QSPLUS(n,k-e))
- forge(word,w+1,W,entry,e+1,E,QSADD1 n)
- --check for two consecutive matches down the line
- forge(word,w+1,W,entry,e+1,E,QSADD1 n)
-
---+ DO NOT REMOVE DEFINITIONS BELOW which explain the algorithm
---+ canForgeWord(word,entry) ==--
---+ [d,i,s,t] := forge(word,0,MAXINDEX word,entry,0,MAXINDEX entry,0,0,0,0)
---+ --d=deletions, i=insertions, s=substitutions, t=transpositions
---+ --list is formed only for tuning purposes-- remove later on
---+ d + i + s + t
-
---+forge(word,w,W,entry,e,E,d,i,s,t) ==
---+ w > W =>
---+ e > E => [d,i,s,t]
---+ [d,E-e+i+1,s,t]
---+ e > E => [W-w+d+1,i,s,t]
---+ word.w = entry.e => forge(word,w+1,W,entry,e+1,E,d,i,s,t)
---+ w=W or e=E => forge(word,w+1,W,entry,e+1,E,d,i,s+1,t)
---+ word.w=entry.(e+1) =>
---+ word.(w+1) = entry.e => forge(word,w+2,W,entry,e+2,E,d,i,s,t+1)
---+ forge(word,w+1,W,entry,e+2,E,d,i+1,s,t)
---+ word.(w+1)=entry.e => forge(word,w+2,W,entry,e+1,E,d+1,i,s,t)
---+
---+ (deltaW := W-w) > 1 and (deltaE := E-e) > 1 =>
---+ --if word is long, can we delete chars to match 2 consective chars
---+ deltaW >= deltaE and
---+ (k := or/[j for j in (w+2)..(W-1) | word.j = entry.e])
---+ and word.(k+1) = entry.(e+1) =>
---+ forge(word,k+2,W,entry,e+2,E,d+k-w,i,s,t)
---+ deltaW <= deltaE and
---+ --if word is short, can we insert chars so as to match 2 consecutive chars
---+ (k := or/[j for j in (e+2)..(E-1) | word.w = entry.j])
---+ and word.(w+1) = entry.(k+1) =>
---+ forge(word,w+2,W,entry,k+2,E,d,i+k-e,s,t)
---+ forge(word,w+1,W,entry,e+1,E,d,i,s+1,t)
---+ --check for two consecutive matches down the line
---+ forge(word,w+1,W,entry,e+1,E,d,i,s+1,t)
-
-mySort u == listSort(function GLESSEQP,u)