Initial population.

author: dos-reis <gdr@axiomatics.org> 2007-08-14 05:14:52 +0000
committer: dos-reis <gdr@axiomatics.org> 2007-08-14 05:14:52 +0000
commit: ab8cc85adde879fb963c94d15675783f2cf4b183 (patch)
tree: c202482327f474583b750b2c45dedfc4e4312b1d /src/interp/guess.boot.pamphlet
download: open-axiom-ab8cc85adde879fb963c94d15675783f2cf4b183.tar.gz
1 files changed, 369 insertions, 0 deletions
diff --git a/src/interp/guess.boot.pamphlet b/src/interp/guess.boot.pamphlet
new file mode 100644
index 00000000..4f4d2544
--- /dev/null
+++ b/src/interp/guess.boot.pamphlet
@@ -0,0 +1,369 @@
+\documentclass{article}
+\usepackage{axiom}
+\begin{document}
+\title{\$SPAD/src/interp guess.boot}
+\author{The Axiom Team}
+\maketitle
+\begin{abstract}
+\end{abstract}
+\eject
+\tableofcontents
+\eject
+\section{License}
+<<license>>=
+-- Copyright (c) 1991-2002, The Numerical ALgorithms Group Ltd.
+-- All rights reserved.
+--
+-- Redistribution and use in source and binary forms, with or without
+-- modification, are permitted provided that the following conditions are
+-- met:
+--
+--     - Redistributions of source code must retain the above copyright
+--       notice, this list of conditions and the following disclaimer.
+--
+--     - Redistributions in binary form must reproduce the above copyright
+--       notice, this list of conditions and the following disclaimer in
+--       the documentation and/or other materials provided with the
+--       distribution.
+--
+--     - Neither the name of The Numerical ALgorithms Group Ltd. nor the
+--       names of its contributors may be used to endorse or promote products
+--       derived from this software without specific prior written permission.
+--
+-- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+-- IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+-- TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+-- PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+-- OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+-- EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+-- PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+-- PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+-- LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+-- NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+-- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+@
+<<*>>=
+<<license>>
+
+$minThreshold := 3
+$maxThreshold := 7
+ 
+--=======================================================================
+--                        Build Directories
+--=======================================================================
+buildOperationWordTable() ==
+  $opWordTable := buildWordTable [PNAME x for x in allOperations()]
+ 
+buildWordTable u ==
+  table:= MAKE_-HASHTABLE 'ID
+  for s in u repeat
+    words := wordsOfString s
+    key := UPCASE s.0
+    HPUT(table,key,[[s,:words],:HGET(table,key)])
+  for key in HKEYS table repeat
+    HPUT(table,key,
+      listSort(function GLESSEQP,removeDupOrderedAlist
+        listSort(function GLESSEQP, HGET(table,key),function CAR),
+          function CADR))
+  table
+
+measureWordTable u ==
+  +/[+/[#entry for entry in HGET(u,key)] for key in HKEYS u]
+
+removeDupOrderedAlist u ==
+  -- removes duplicate entries in ordered alist
+  -- (where duplicates are adjacent)
+  for x in tails u repeat
+    (y := rest x) and first first x = first first y => RPLACD(x,rest y)
+  u
+ 
+wordsOfString(s) == [UPCASE x for x in wordsOfStringKeepCase s]
+ 
+wordsOfStringKeepCase s == wordsOfString1(s,0) or [COPY s]
+ 
+wordsOfString1(s,j) ==
+  k := or/[i for i in j..SUB1(MAXINDEX(s)) | UPPER_-CASE_-P s.i] =>
+    tailWords:=
+      UPPER_-CASE_-P s.(k+1) =>
+        n:= or/[i for i in (k+2)..SUB1(MAXINDEX(s))|not UPPER_-CASE_-P s.i]
+        null n => [SUBSTRING(s,k,nil)]
+        n > k+1 => [SUBSTRING(s,k,n-k-1),:wordsOfString1(s,n-1)]
+      m := or/[i for i in (k+2)..SUB1(MAXINDEX(s)) | UPPER_-CASE_-P s.i] =>
+        [SUBSTRING(s,k,m-k),:wordsOfString1(s,m)]
+      [SUBSTRING(s,k,nil)]
+    k > j+1 => [SUBSTRING(s,j,k-j),:tailWords]
+    tailWords
+  nil
+
+wordKeys s == 
+  REMDUP [UPCASE s.0,:fn(s,1,-1,MAXINDEX s,nil)] where fn(s,i,lastKeyIndex,n,acc) ==
+    i > n => acc
+    UPPER_-CASE_-P s.i =>
+--    i = lastKeyIndex + 1 => fn(s,i + 1,i,n,[s.i,:rest acc])
+      fn(s,i + 1,i,n,[s.i,:acc])
+    fn(s,i + 1,lastKeyIndex,n,acc)
+
+--=======================================================================
+--               Augment Function Directories
+--=======================================================================
+add2WordFunctionTable fn ==
+--called from DEF
+  $functionTable and
+    null LASSOC(s := PNAME fn,HGET($functionTable,(key := UPCASE s.0))) =>
+      HPUT($functionTable,key,[[s,:wordsOfString s],:HGET($functionTable,key)])
+ 
+--=======================================================================
+--                       Guess Function Name
+--=======================================================================
+findWords(word,table) == 
+  $lastWord := word
+  $lastTable:= table
+  $totalWords:= nil
+  $countThreshold := $minThreshold
+  $lastMinimum := -1
+  res := findApproximateWords(word,table) 
+  if null res then
+    $countThreshold := $countThreshold + 2
+    res := findApproximateWords(word,table) 
+  $lastAlist := mySort res =>
+--    $lastMinimum := CAR LAST $lastAlist
+--    $lastWords := wordSort CDAR $lastAlist
+--    $totalWords:= $lastWords
+--    $lastAlist := CDR  $lastAlist
+--    $totalWords
+      $lastMinimum := CAAR $lastAlist
+      $lastWords := wordSort CDAR $lastAlist
+      $totalWords:= $lastWords
+      $lastAlist := CDR  $lastAlist
+      $totalWords
+  $lastWords := nil
+
+wordSort u == REMDUP listSort(function GLESSEQP,u)
+
+more() == moreWords($lastWord,$lastTable)
+
+moreWords(word,table) ==
+  $lastAlist =>
+     $lastMinimum := CAR LAST pp $lastAlist
+     numberOfLastWords := #$lastWords
+     $lastWords := "append"/(ASSOCRIGHT $lastAlist)
+     if #$lastWords > numberOfLastWords then 
+       trialLastAlist := 
+         [p for p in $lastAlist | p.0 < $maxThreshold]
+       trialLastWords := "append"/(ASSOCRIGHT trialLastAlist)
+       if #trialLastWords > numberOfLastWords then
+         $lastWords := trialLastWords         
+     $totalWords:= wordSort [:$lastWords,:$totalWords]
+     $lastAlist := nil
+     $totalWords
+  $countThreshold := $countThreshold + 2
+  $lastAlist := findApproximateWords(word,table)
+  moreWords(word,table)
+
+findApproximateWords(word,table) ==
+  count := $countThreshold
+  words:= wordsOfString word
+  upperWord:= UPCASE COPY word
+  n := #words
+  threshold:=
+    n = 1 => count
+    count+1
+ 
+  --first try to break up as list of words
+  alist:= nil
+  for i in 1..#words repeat
+    $penalty :local := (i = 1 => 0; 1)
+    wordAlist:= HGET(table,UPCASE (first words).0)
+    for [x,:wordList] in wordAlist repeat
+      k := findApproxWordList(words,wordList,n,threshold,#wordList) 
+      k =>
+        k := k + $penalty
+        k <= $lastMinimum => 'skip
+        alist := consAlist(k,x,alist)
+      
+    if i = 1 and null alist then
+        --no winners, so try flattening to upper case and checking again
+        wordSize := SIZE word
+        lastThreshold := MAX(threshold - 1,wordSize/2)
+        for [x,:.] in wordAlist repeat
+          k := deltaWordEntry(upperWord,UPCASE x)
+          k < lastThreshold => alist := consAlist(k,x,alist)
+
+    rotateWordList words
+
+  alist
+
+consAlist(x,y,alist) ==
+  u := ASSOC(x,alist) => 
+    RPLACD(u,[y,:CDR u])
+    alist
+  [[x,y],:alist]
+
+findApproxWordList(words,wordList,n,threshold,w) ==
+  val := findApproxWordList1(words,wordList,n,threshold,w)
+  null val => val
+--pp [val,:wordList]
+  val
+
+findApproxWordList1(words,wordList,n,threshold,w) ==
+  two := threshold - 2
+  n = w => 
+      k := findApproxSimple(words,wordList,threshold) => k 
+
+      n < 3 => false
+      threshold := threshold - 1
+      sum := 0      --next, throw out one bad word
+
+      badWord := false
+      for entry in wordList for part in words while sum < threshold repeat
+        k:= deltaWordEntry(part,entry)
+        k < two => sum:= sum + k
+        null badWord => badWord := true
+        sum := 1000
+      sum < threshold => 
+--      pp [2,sum,wordList]
+        sum + 2
+
+  n+1 = w =>    --assume one word is missing
+      sum := 0
+      badWord := false
+      for entries in tails wordList for part in words
+           while sum < threshold repeat
+        entry := first entries
+        k:= deltaWordEntry(part,entry)
+        k < two => sum:= sum + k
+        null badWord =>
+          badWord := true
+          entries := rest entries      --skip this bad word
+          entry := first entries
+          k := deltaWordEntry(part,entry)
+          k < two => sum := sum + k
+          sum := 1000
+        sum := 1000
+      sum < threshold => 
+--      pp [3,sum,wordList]
+        sum + 2
+      false
+  n-1 = w =>    --assume one word too many
+      sum := 0  --here: KEEP it hard to satisfy
+      badWord := false
+      for entry in wordList for parts in tails words
+           while sum < threshold repeat
+        part := first parts
+        k:= deltaWordEntry(part,entry)
+        k < 2 => sum:= sum + k
+        null badWord =>
+          badWord := true
+          parts := rest parts      --skip this bad word
+          part := first parts
+          k := deltaWordEntry(part,entry)
+          k < 2 => sum := sum + k
+          return (sum := 1000)
+        return (sum := 1000)
+      sum < threshold =>
+--      pp [4,sum,wordList]
+        $penalty = 1 => sum
+        sum + 1
+      false
+  false
+
+findApproxSimple(words,wordList,threshold) ==
+  sum := 0
+  --first try matching words in order
+  for entry in wordList for part in words while sum < threshold repeat
+    sum:= sum + deltaWordEntry(part,entry)
+  sum < threshold => 
+--  pp ['"--->",sum,:wordList]
+    sum
+  nil
+      
+rotateWordList u ==
+  v := u
+  p := CAR v
+  while QCDR v repeat
+    RPLACA(v,CADR v) 
+    v := QCDR v
+  RPLACA(v,p)
+  u
+
+deltaWordEntry(word,entry) ==
+  word = entry => 0
+  word.0 ^= entry.0 => 1000
+  #word > 2 and stringPrefix?(word,entry) => 1
+  ABS(diff := SIZE word - SIZE entry) > 4 => 1000
+  canForgeWord(word,entry)
+ 
+--+ Note these are optimized definitions below-- see commented out versions
+--+   to understand the algorithm
+canForgeWord(word,entry) ==
+  forge(word,0,MAXINDEX word,entry,0,MAXINDEX entry,0)
+ 
+forge(word,w,W,entry,e,E,n) ==
+  w > W =>
+    e > E => n
+    QSADD1 QSPLUS(E-e,n)
+  e > E => QSADD1 QSPLUS(W-w,n)
+  word.w = entry.e => forge(word,w+1,W,entry,e+1,E,n)
+  w=W or e=E => forge(word,w+1,W,entry,e+1,E,QSADD1 n)
+  word.w=entry.(e+1) =>
+    word.(w+1) = entry.e => forge(word,w+2,W,entry,e+2,E,QSADD1 n)
+    forge(word,w+1,W,entry,e+2,E,QSADD1 n)
+  word.(w+1)=entry.e => forge(word,w+2,W,entry,e+1,E,QSADD1 n)
+ 
+  (deltaW := W-w) > 1 and (deltaE := E-e) > 1 =>
+    --if word is long, can we delete chars to match 2 consective chars
+    deltaW >= deltaE and
+      (k := or/[j for j in (w+2)..(W-1) | word.j = entry.e])
+        and word.(k+1) = entry.(e+1) =>
+          forge(word,k+2,W,entry,e+2,E,QSPLUS(k-w,n))
+    deltaW <= deltaE and
+    --if word is short, can we insert chars so as to match 2 consecutive chars
+      (k := or/[j for j in (e+2)..(E-1) | word.w = entry.j])
+        and word.(w+1) = entry.(k+1) =>
+          forge(word,w+2,W,entry,k+2,E,QSPLUS(n,k-e))
+    forge(word,w+1,W,entry,e+1,E,QSADD1 n)
+  --check for two consecutive matches down the line
+  forge(word,w+1,W,entry,e+1,E,QSADD1 n)
+ 
+--+ DO NOT REMOVE DEFINITIONS BELOW which explain the algorithm
+--+ canForgeWord(word,entry) ==--
+--+ [d,i,s,t] := forge(word,0,MAXINDEX word,entry,0,MAXINDEX entry,0,0,0,0)
+--+ --d=deletions, i=insertions, s=substitutions, t=transpositions
+--+ --list is formed only for tuning purposes-- remove later on
+--+ d + i + s + t
+ 
+--+forge(word,w,W,entry,e,E,d,i,s,t) ==
+--+  w > W =>
+--+    e > E => [d,i,s,t]
+--+    [d,E-e+i+1,s,t]
+--+  e > E => [W-w+d+1,i,s,t]
+--+  word.w = entry.e => forge(word,w+1,W,entry,e+1,E,d,i,s,t)
+--+  w=W or e=E => forge(word,w+1,W,entry,e+1,E,d,i,s+1,t)
+--+  word.w=entry.(e+1) =>
+--+    word.(w+1) = entry.e => forge(word,w+2,W,entry,e+2,E,d,i,s,t+1)
+--+    forge(word,w+1,W,entry,e+2,E,d,i+1,s,t)
+--+  word.(w+1)=entry.e => forge(word,w+2,W,entry,e+1,E,d+1,i,s,t)
+--+
+--+  (deltaW := W-w) > 1 and (deltaE := E-e) > 1 =>
+--+    --if word is long, can we delete chars to match 2 consective chars
+--+    deltaW >= deltaE and
+--+      (k := or/[j for j in (w+2)..(W-1) | word.j = entry.e])
+--+        and word.(k+1) = entry.(e+1) =>
+--+          forge(word,k+2,W,entry,e+2,E,d+k-w,i,s,t)
+--+    deltaW <= deltaE and
+--+    --if word is short, can we insert chars so as to match 2 consecutive chars
+--+      (k := or/[j for j in (e+2)..(E-1) | word.w = entry.j])
+--+        and word.(w+1) = entry.(k+1) =>
+--+          forge(word,w+2,W,entry,k+2,E,d,i+k-e,s,t)
+--+    forge(word,w+1,W,entry,e+1,E,d,i,s+1,t)
+--+  --check for two consecutive matches down the line
+--+  forge(word,w+1,W,entry,e+1,E,d,i,s+1,t)
+ 
+mySort u == listSort(function GLESSEQP,u)
+@
+\eject
+\begin{thebibliography}{99}
+\bibitem{1} nothing
+\end{thebibliography}
+\end{document}
author	dos-reis <gdr@axiomatics.org>	2007-08-14 05:14:52 +0000
committer	dos-reis <gdr@axiomatics.org>	2007-08-14 05:14:52 +0000
commit	ab8cc85adde879fb963c94d15675783f2cf4b183 (patch)
tree	c202482327f474583b750b2c45dedfc4e4312b1d /src/interp/guess.boot.pamphlet
download	open-axiom-ab8cc85adde879fb963c94d15675783f2cf4b183.tar.gz