diff options
author | dos-reis <gdr@axiomatics.org> | 2007-11-01 17:56:02 +0000 |
---|---|---|
committer | dos-reis <gdr@axiomatics.org> | 2007-11-01 17:56:02 +0000 |
commit | 69afed478ed20a61bcd6c046158006b3b67600ad (patch) | |
tree | 54d94ebf17c2ff1d7b2eb6e7a5c8ba3fcf7587db /src/interp/guess.boot.pamphlet | |
parent | 0a0bd4aacab28c08c92cf652812ba9336a232871 (diff) | |
download | open-axiom-69afed478ed20a61bcd6c046158006b3b67600ad.tar.gz |
remove more pamphlet
Diffstat (limited to 'src/interp/guess.boot.pamphlet')
-rw-r--r-- | src/interp/guess.boot.pamphlet | 369 |
1 files changed, 0 insertions, 369 deletions
diff --git a/src/interp/guess.boot.pamphlet b/src/interp/guess.boot.pamphlet deleted file mode 100644 index 4f4d2544..00000000 --- a/src/interp/guess.boot.pamphlet +++ /dev/null @@ -1,369 +0,0 @@ -\documentclass{article} -\usepackage{axiom} -\begin{document} -\title{\$SPAD/src/interp guess.boot} -\author{The Axiom Team} -\maketitle -\begin{abstract} -\end{abstract} -\eject -\tableofcontents -\eject -\section{License} -<<license>>= --- Copyright (c) 1991-2002, The Numerical ALgorithms Group Ltd. --- All rights reserved. --- --- Redistribution and use in source and binary forms, with or without --- modification, are permitted provided that the following conditions are --- met: --- --- - Redistributions of source code must retain the above copyright --- notice, this list of conditions and the following disclaimer. --- --- - Redistributions in binary form must reproduce the above copyright --- notice, this list of conditions and the following disclaimer in --- the documentation and/or other materials provided with the --- distribution. --- --- - Neither the name of The Numerical ALgorithms Group Ltd. nor the --- names of its contributors may be used to endorse or promote products --- derived from this software without specific prior written permission. --- --- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS --- IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED --- TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A --- PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER --- OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, --- EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, --- PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR --- PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF --- LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING --- NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS --- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -@ -<<*>>= -<<license>> - -$minThreshold := 3 -$maxThreshold := 7 - ---======================================================================= --- Build Directories ---======================================================================= -buildOperationWordTable() == - $opWordTable := buildWordTable [PNAME x for x in allOperations()] - -buildWordTable u == - table:= MAKE_-HASHTABLE 'ID - for s in u repeat - words := wordsOfString s - key := UPCASE s.0 - HPUT(table,key,[[s,:words],:HGET(table,key)]) - for key in HKEYS table repeat - HPUT(table,key, - listSort(function GLESSEQP,removeDupOrderedAlist - listSort(function GLESSEQP, HGET(table,key),function CAR), - function CADR)) - table - -measureWordTable u == - +/[+/[#entry for entry in HGET(u,key)] for key in HKEYS u] - -removeDupOrderedAlist u == - -- removes duplicate entries in ordered alist - -- (where duplicates are adjacent) - for x in tails u repeat - (y := rest x) and first first x = first first y => RPLACD(x,rest y) - u - -wordsOfString(s) == [UPCASE x for x in wordsOfStringKeepCase s] - -wordsOfStringKeepCase s == wordsOfString1(s,0) or [COPY s] - -wordsOfString1(s,j) == - k := or/[i for i in j..SUB1(MAXINDEX(s)) | UPPER_-CASE_-P s.i] => - tailWords:= - UPPER_-CASE_-P s.(k+1) => - n:= or/[i for i in (k+2)..SUB1(MAXINDEX(s))|not UPPER_-CASE_-P s.i] - null n => [SUBSTRING(s,k,nil)] - n > k+1 => [SUBSTRING(s,k,n-k-1),:wordsOfString1(s,n-1)] - m := or/[i for i in (k+2)..SUB1(MAXINDEX(s)) | UPPER_-CASE_-P s.i] => - [SUBSTRING(s,k,m-k),:wordsOfString1(s,m)] - [SUBSTRING(s,k,nil)] - k > j+1 => [SUBSTRING(s,j,k-j),:tailWords] - tailWords - nil - -wordKeys s == - REMDUP [UPCASE s.0,:fn(s,1,-1,MAXINDEX s,nil)] where fn(s,i,lastKeyIndex,n,acc) == - i > n => acc - UPPER_-CASE_-P s.i => --- i = lastKeyIndex + 1 => fn(s,i + 1,i,n,[s.i,:rest acc]) - fn(s,i + 1,i,n,[s.i,:acc]) - fn(s,i + 1,lastKeyIndex,n,acc) - ---======================================================================= --- Augment Function Directories ---======================================================================= -add2WordFunctionTable fn == ---called from DEF - $functionTable and - null LASSOC(s := PNAME fn,HGET($functionTable,(key := UPCASE s.0))) => - HPUT($functionTable,key,[[s,:wordsOfString s],:HGET($functionTable,key)]) - ---======================================================================= --- Guess Function Name ---======================================================================= -findWords(word,table) == - $lastWord := word - $lastTable:= table - $totalWords:= nil - $countThreshold := $minThreshold - $lastMinimum := -1 - res := findApproximateWords(word,table) - if null res then - $countThreshold := $countThreshold + 2 - res := findApproximateWords(word,table) - $lastAlist := mySort res => --- $lastMinimum := CAR LAST $lastAlist --- $lastWords := wordSort CDAR $lastAlist --- $totalWords:= $lastWords --- $lastAlist := CDR $lastAlist --- $totalWords - $lastMinimum := CAAR $lastAlist - $lastWords := wordSort CDAR $lastAlist - $totalWords:= $lastWords - $lastAlist := CDR $lastAlist - $totalWords - $lastWords := nil - -wordSort u == REMDUP listSort(function GLESSEQP,u) - -more() == moreWords($lastWord,$lastTable) - -moreWords(word,table) == - $lastAlist => - $lastMinimum := CAR LAST pp $lastAlist - numberOfLastWords := #$lastWords - $lastWords := "append"/(ASSOCRIGHT $lastAlist) - if #$lastWords > numberOfLastWords then - trialLastAlist := - [p for p in $lastAlist | p.0 < $maxThreshold] - trialLastWords := "append"/(ASSOCRIGHT trialLastAlist) - if #trialLastWords > numberOfLastWords then - $lastWords := trialLastWords - $totalWords:= wordSort [:$lastWords,:$totalWords] - $lastAlist := nil - $totalWords - $countThreshold := $countThreshold + 2 - $lastAlist := findApproximateWords(word,table) - moreWords(word,table) - -findApproximateWords(word,table) == - count := $countThreshold - words:= wordsOfString word - upperWord:= UPCASE COPY word - n := #words - threshold:= - n = 1 => count - count+1 - - --first try to break up as list of words - alist:= nil - for i in 1..#words repeat - $penalty :local := (i = 1 => 0; 1) - wordAlist:= HGET(table,UPCASE (first words).0) - for [x,:wordList] in wordAlist repeat - k := findApproxWordList(words,wordList,n,threshold,#wordList) - k => - k := k + $penalty - k <= $lastMinimum => 'skip - alist := consAlist(k,x,alist) - - if i = 1 and null alist then - --no winners, so try flattening to upper case and checking again - wordSize := SIZE word - lastThreshold := MAX(threshold - 1,wordSize/2) - for [x,:.] in wordAlist repeat - k := deltaWordEntry(upperWord,UPCASE x) - k < lastThreshold => alist := consAlist(k,x,alist) - - rotateWordList words - - alist - -consAlist(x,y,alist) == - u := ASSOC(x,alist) => - RPLACD(u,[y,:CDR u]) - alist - [[x,y],:alist] - -findApproxWordList(words,wordList,n,threshold,w) == - val := findApproxWordList1(words,wordList,n,threshold,w) - null val => val ---pp [val,:wordList] - val - -findApproxWordList1(words,wordList,n,threshold,w) == - two := threshold - 2 - n = w => - k := findApproxSimple(words,wordList,threshold) => k - - n < 3 => false - threshold := threshold - 1 - sum := 0 --next, throw out one bad word - - badWord := false - for entry in wordList for part in words while sum < threshold repeat - k:= deltaWordEntry(part,entry) - k < two => sum:= sum + k - null badWord => badWord := true - sum := 1000 - sum < threshold => --- pp [2,sum,wordList] - sum + 2 - - n+1 = w => --assume one word is missing - sum := 0 - badWord := false - for entries in tails wordList for part in words - while sum < threshold repeat - entry := first entries - k:= deltaWordEntry(part,entry) - k < two => sum:= sum + k - null badWord => - badWord := true - entries := rest entries --skip this bad word - entry := first entries - k := deltaWordEntry(part,entry) - k < two => sum := sum + k - sum := 1000 - sum := 1000 - sum < threshold => --- pp [3,sum,wordList] - sum + 2 - false - n-1 = w => --assume one word too many - sum := 0 --here: KEEP it hard to satisfy - badWord := false - for entry in wordList for parts in tails words - while sum < threshold repeat - part := first parts - k:= deltaWordEntry(part,entry) - k < 2 => sum:= sum + k - null badWord => - badWord := true - parts := rest parts --skip this bad word - part := first parts - k := deltaWordEntry(part,entry) - k < 2 => sum := sum + k - return (sum := 1000) - return (sum := 1000) - sum < threshold => --- pp [4,sum,wordList] - $penalty = 1 => sum - sum + 1 - false - false - -findApproxSimple(words,wordList,threshold) == - sum := 0 - --first try matching words in order - for entry in wordList for part in words while sum < threshold repeat - sum:= sum + deltaWordEntry(part,entry) - sum < threshold => --- pp ['"--->",sum,:wordList] - sum - nil - -rotateWordList u == - v := u - p := CAR v - while QCDR v repeat - RPLACA(v,CADR v) - v := QCDR v - RPLACA(v,p) - u - -deltaWordEntry(word,entry) == - word = entry => 0 - word.0 ^= entry.0 => 1000 - #word > 2 and stringPrefix?(word,entry) => 1 - ABS(diff := SIZE word - SIZE entry) > 4 => 1000 - canForgeWord(word,entry) - ---+ Note these are optimized definitions below-- see commented out versions ---+ to understand the algorithm -canForgeWord(word,entry) == - forge(word,0,MAXINDEX word,entry,0,MAXINDEX entry,0) - -forge(word,w,W,entry,e,E,n) == - w > W => - e > E => n - QSADD1 QSPLUS(E-e,n) - e > E => QSADD1 QSPLUS(W-w,n) - word.w = entry.e => forge(word,w+1,W,entry,e+1,E,n) - w=W or e=E => forge(word,w+1,W,entry,e+1,E,QSADD1 n) - word.w=entry.(e+1) => - word.(w+1) = entry.e => forge(word,w+2,W,entry,e+2,E,QSADD1 n) - forge(word,w+1,W,entry,e+2,E,QSADD1 n) - word.(w+1)=entry.e => forge(word,w+2,W,entry,e+1,E,QSADD1 n) - - (deltaW := W-w) > 1 and (deltaE := E-e) > 1 => - --if word is long, can we delete chars to match 2 consective chars - deltaW >= deltaE and - (k := or/[j for j in (w+2)..(W-1) | word.j = entry.e]) - and word.(k+1) = entry.(e+1) => - forge(word,k+2,W,entry,e+2,E,QSPLUS(k-w,n)) - deltaW <= deltaE and - --if word is short, can we insert chars so as to match 2 consecutive chars - (k := or/[j for j in (e+2)..(E-1) | word.w = entry.j]) - and word.(w+1) = entry.(k+1) => - forge(word,w+2,W,entry,k+2,E,QSPLUS(n,k-e)) - forge(word,w+1,W,entry,e+1,E,QSADD1 n) - --check for two consecutive matches down the line - forge(word,w+1,W,entry,e+1,E,QSADD1 n) - ---+ DO NOT REMOVE DEFINITIONS BELOW which explain the algorithm ---+ canForgeWord(word,entry) ==-- ---+ [d,i,s,t] := forge(word,0,MAXINDEX word,entry,0,MAXINDEX entry,0,0,0,0) ---+ --d=deletions, i=insertions, s=substitutions, t=transpositions ---+ --list is formed only for tuning purposes-- remove later on ---+ d + i + s + t - ---+forge(word,w,W,entry,e,E,d,i,s,t) == ---+ w > W => ---+ e > E => [d,i,s,t] ---+ [d,E-e+i+1,s,t] ---+ e > E => [W-w+d+1,i,s,t] ---+ word.w = entry.e => forge(word,w+1,W,entry,e+1,E,d,i,s,t) ---+ w=W or e=E => forge(word,w+1,W,entry,e+1,E,d,i,s+1,t) ---+ word.w=entry.(e+1) => ---+ word.(w+1) = entry.e => forge(word,w+2,W,entry,e+2,E,d,i,s,t+1) ---+ forge(word,w+1,W,entry,e+2,E,d,i+1,s,t) ---+ word.(w+1)=entry.e => forge(word,w+2,W,entry,e+1,E,d+1,i,s,t) ---+ ---+ (deltaW := W-w) > 1 and (deltaE := E-e) > 1 => ---+ --if word is long, can we delete chars to match 2 consective chars ---+ deltaW >= deltaE and ---+ (k := or/[j for j in (w+2)..(W-1) | word.j = entry.e]) ---+ and word.(k+1) = entry.(e+1) => ---+ forge(word,k+2,W,entry,e+2,E,d+k-w,i,s,t) ---+ deltaW <= deltaE and ---+ --if word is short, can we insert chars so as to match 2 consecutive chars ---+ (k := or/[j for j in (e+2)..(E-1) | word.w = entry.j]) ---+ and word.(w+1) = entry.(k+1) => ---+ forge(word,w+2,W,entry,k+2,E,d,i+k-e,s,t) ---+ forge(word,w+1,W,entry,e+1,E,d,i,s+1,t) ---+ --check for two consecutive matches down the line ---+ forge(word,w+1,W,entry,e+1,E,d,i,s+1,t) - -mySort u == listSort(function GLESSEQP,u) -@ -\eject -\begin{thebibliography}{99} -\bibitem{1} nothing -\end{thebibliography} -\end{document} |