\documentclass{article} \usepackage{axiom} \begin{document} \title{\$SPAD/src/algebra string.spad} \author{Stephen M. Watt, Michael Monagan, Manuel Bronstein} \maketitle \begin{abstract} \end{abstract} \eject \tableofcontents \eject \section{domain CHAR Character} <>= )abbrev domain CHAR Character ++ Author: Stephen M. Watt ++ Date Created: July 1986 ++ Date Last Updated: June 20, 1991 ++ Basic Operations: char ++ Related Domains: ++ Also See: ++ AMS Classifications: ++ Keywords: character, string ++ Examples: ++ References: ++ Description: ++ This domain provides the basic character data type. Character: OrderedFinite() with ord: % -> NonNegativeInteger ++ ord(c) provides an integral code corresponding to the ++ character c. It is always true that \spad{char ord c = c}. char: NonNegativeInteger -> % ++ char(i) provides a character corresponding to the integer ++ code i. It is always true that \spad{ord char i = i}. char: String -> % ++ char(s) provides a character from a string s of length one. space: % ++ \spad{space} provides the blank character. quote: % ++ \spad{quote} provides the string quote character, \spad{"}. escape: % ++ \spad{escape} provides the escape character, \spad{_}, which ++ is used to allow quotes and other characters {\em within} ++ strings. upperCase: % -> % ++ upperCase(c) converts a lower case letter to the corresponding ++ upper case letter. If c is not a lower case letter, then ++ it is returned unchanged. lowerCase: % -> % ++ lowerCase(c) converts an upper case letter to the corresponding ++ lower case letter. If c is not an upper case letter, then ++ it is returned unchanged. digit?: % -> Boolean ++ digit?(c) tests if c is a digit character, ++ i.e. one of 0..9. hexDigit?: % -> Boolean ++ hexDigit?(c) tests if c is a hexadecimal numeral, ++ i.e. one of 0..9, a..f or A..F. alphabetic?: % -> Boolean ++ alphabetic?(c) tests if c is a letter, ++ i.e. one of a..z or A..Z. upperCase?: % -> Boolean ++ upperCase?(c) tests if c is an upper case letter, ++ i.e. one of A..Z. lowerCase?: % -> Boolean ++ lowerCase?(c) tests if c is an lower case letter, ++ i.e. one of a..z. alphanumeric?: % -> Boolean ++ alphanumeric?(c) tests if c is either a letter or number, ++ i.e. one of 0..9, a..z or A..Z. == add -- We use the base Lisp's system base-char as a -- the representation for this class. CC ==> CharacterClass() NNI ==> NonNegativeInteger import CC --cl: Record(dig:CC,hex:CC,upp:CC,low:CC,alpha:CC,alnum:CC) := -- [ digit(), hexDigit(), -- upperCase(), lowerCase(), alphabetic(), alphanumeric() ] a = b == CHAR_=(a,b)$Lisp a < b == CHAR_<(a,b)$Lisp a > b == CHAR_>(a,b)$Lisp a <= b == CHAR_<_=(a,b)$Lisp a >= b == CHAR_>_=(a,b)$Lisp size() == 256 index n == char((n - 1)::NNI) lookup c == (1 + ord c)::PositiveInteger char(n: NNI) == CODE_-CHAR(n)$Lisp ord c == CHAR_-CODE(c)$Lisp random() == char(random(size())$NNI) space == CHAR(" ", 0$Lisp)$Lisp quote == CHAR("_" ", 0$Lisp)$Lisp escape == CHAR("__ ", 0$Lisp)$Lisp coerce(c:%):OutputForm == c : OutputForm digit? c == member?(c, digit()) hexDigit? c == member?(c, hexDigit()) upperCase? c == member?(c, upperCase()) lowerCase? c == member?(c, lowerCase()) alphabetic? c == member?(c, alphabetic()) alphanumeric? c == member?(c, alphanumeric()) latex c == concat("\mbox{`", concat(new(1,c)$String, "'}")$String)$String char(s:String) == one?(#s) => s(minIndex s) userError "String is not a single character" upperCase c == CHAR_-UPCASE(c)$Lisp : % lowerCase c == CHAR_-DOWNCASE(c)$Lisp : % @ \section{domain CCLASS CharacterClass} <>= import Character import String import List )abbrev domain CCLASS CharacterClass ++ Author: Stephen M. Watt ++ Date Created: July 1986 ++ Date Last Updated: June 20, 1991 ++ Basic Operations: charClass ++ Related Domains: Character, Bits ++ Also See: ++ AMS Classifications: ++ Keywords: ++ Examples: ++ References: ++ Description: ++ This domain allows classes of characters to be defined and manipulated ++ efficiently. CharacterClass: Join(SetCategory, ConvertibleTo String, FiniteSetAggregate Character, ConvertibleTo List Character) with charClass: String -> % ++ charClass(s) creates a character class which contains ++ exactly the characters given in the string s. charClass: List Character -> % ++ charClass(l) creates a character class which contains ++ exactly the characters given in the list l. digit: constant -> % ++ digit() returns the class of all characters ++ for which \spadfunFrom{digit?}{Character} is true. hexDigit: constant -> % ++ hexDigit() returns the class of all characters for which ++ \spadfunFrom{hexDigit?}{Character} is true. upperCase: constant -> % ++ upperCase() returns the class of all characters for which ++ \spadfunFrom{upperCase?}{Character} is true. lowerCase: constant -> % ++ lowerCase() returns the class of all characters for which ++ \spadfunFrom{lowerCase?}{Character} is true. alphabetic : constant -> % ++ alphabetic() returns the class of all characters for which ++ \spadfunFrom{alphabetic?}{Character} is true. alphanumeric: constant -> % ++ alphanumeric() returns the class of all characters for which ++ \spadfunFrom{alphanumeric?}{Character} is true. == add Rep := IndexedBits(0) N := size()$Character a, b: % digit() == charClass "0123456789" hexDigit() == charClass "0123456789abcdefABCDEF" upperCase() == charClass "ABCDEFGHIJKLMNOPQRSTUVWXYZ" lowerCase() == charClass "abcdefghijklmnopqrstuvwxyz" alphabetic() == union(upperCase(), lowerCase()) alphanumeric() == union(alphabetic(), digit()) a = b == a =$Rep b member?(c, a) == a(ord c) union(a,b) == Or(a, b) intersect (a,b) == And(a, b) difference(a,b) == And(a, Not b) complement a == Not a convert(cl):String == construct(convert(cl)@List(Character)) convert(cl:%):List(Character) == [char(i) for i in 0..N-1 | cl.i] charClass(s: String) == cl := new(N, false) for i in minIndex(s)..maxIndex(s) repeat cl(ord s.i) := true cl charClass(l: List Character) == cl := new(N, false) for c in l repeat cl(ord c) := true cl coerce(cl):OutputForm == (convert(cl)@String)::OutputForm -- Stuff to make a legal SetAggregate view # a == (n := 0; for i in 0..N-1 | a.i repeat n := n+1; n) empty():% == charClass [] brace():% == charClass [] insert!(c, a) == (a(ord c) := true; a) remove!(c: Character, a:%) == (a(ord c) := false; a) inspect(a) == for i in 0..N-1 | a.i repeat return char i error "Cannot take a character from an empty class." extract!(a) == for i in 0..N-1 | a.i repeat a.i := false return char i error "Cannot take a character from an empty class." map(f, a) == b := new(N, false) for i in 0..N-1 | a.i repeat b(ord f char i) := true b temp: % := new(N, false)$Rep map!(f, a) == fill!(temp, false) for i in 0..N-1 | a.i repeat temp(ord f char i) := true copyInto!(a, temp, 0) parts a == [char i for i in 0..N-1 | a.i] @ \section{domain ISTRING IndexedString} <>= )abbrev domain ISTRING IndexedString ++ Authors: Stephen Watt, Michael Monagan, Manuel Bronstein 1986 .. 1991 -- The following Lisp dependencies are divided into two groups -- Those that are required -- QENUM QESET QCSIZE MAKE-FULL-CVEC EQ QSLESSP QSGREATERP -- Those that can are included for efficiency only -- COPY STRCONC SUBSTRING STRPOS RPLACSTR DOWNCASE UPCASE CGREATERP ++ Description: ++ This domain implements low-level strings IndexedString(mn:Integer): Export == Implementation where B ==> Boolean C ==> Character I ==> Integer N ==> NonNegativeInteger U ==> UniversalSegment Integer Export == StringAggregate() Implementation == add -- These assume Character's Rep is Small I Qelt ==> CHAR$Lisp Qequal ==> EQUAL$Lisp Qsetelt ==> QESET$Lisp Qsize ==> QCSIZE$Lisp Cheq ==> CHAR_=$Lisp c: Character cc: CharacterClass -- new n == MAKE_-FULL_-CVEC(n, space$C)$Lisp new(n, c) == MAKE_-FULL_-CVEC(n, c)$Lisp empty() == MAKE_-FULL_-CVEC(0$Lisp)$Lisp empty?(s) == Qsize(s) = 0 #s == Qsize(s) s = t == Qequal(s, t) s < t == CGREATERP(t,s)$Lisp concat(s:%,t:%) == STRCONC(s,t)$Lisp copy s == COPY_-SEQ(s)$Lisp insert(s:%, t:%, i:I) == concat(concat(s(mn..i-1), t), s(i..)) coerce(s:%):OutputForm == outputForm(s pretend String) minIndex s == mn upperCase! s == map!(upperCase, s) lowerCase! s == map!(lowerCase, s) latex s == concat("\mbox{``", concat(s pretend String, "''}")) replace(s, sg, t) == l := lo(sg) - mn m := #s n := #t h:I := if hasHi sg then hi(sg) - mn else maxIndex s - mn l < 0 or h >= m or h < l-1 => error "index out of range" r := new((m-(h-l+1)+n)::N, space$C) k : NonNegativeInteger for k in 0.. for i in 0..l-1 repeat Qsetelt(r, k, Qelt(s, i)) for k in k.. for i in 0..n-1 repeat Qsetelt(r, k, Qelt(t, i)) for k in k.. for i in h+1..m-1 repeat Qsetelt(r, k, Qelt(s, i)) r setelt(s:%, i:I, c:C) == i < mn or i > maxIndex(s) => error "index out of range" Qsetelt(s, i - mn, c) c substring?(part, whole, startpos) == np:I := Qsize part nw:I := Qsize whole (startpos := startpos - mn) < 0 => error "index out of bounds" np > nw - startpos => false for ip in 0..np-1 for iw in startpos.. repeat not Cheq(Qelt(part, ip), Qelt(whole, iw)) => return false true position(s:%, t:%, startpos:I) == (startpos := startpos - mn) < 0 => error "index out of bounds" startpos >= Qsize t => mn - 1 r:I := STRPOS(s, t, startpos, NIL$Lisp)$Lisp EQ(r, NIL$Lisp)$Lisp => mn - 1 r + mn position(c: Character, t: %, startpos: I) == (startpos := startpos - mn) < 0 => error "index out of bounds" startpos >= Qsize t => mn - 1 for r in startpos..Qsize t - 1 repeat if Cheq(Qelt(t, r), c) then return r + mn mn - 1 position(cc: CharacterClass, t: %, startpos: I) == (startpos := startpos - mn) < 0 => error "index out of bounds" startpos >= Qsize t => mn - 1 for r in startpos..Qsize t - 1 repeat if member?(Qelt(t,r), cc) then return r + mn mn - 1 suffix?(s, t) == (m := maxIndex s) > (n := maxIndex t) => false substring?(s, t, mn + n - m) split(s, c) == n := maxIndex s i : NonNegativeInteger for i in mn..n while s.i = c repeat 0 l := empty()$List(%) j:Integer -- j is conditionally intialized while i <= n and (j := position(c, s, i)) >= mn repeat l := concat(s(i..j-1), l) for i in j..n while s.i = c repeat 0 if i <= n then l := concat(s(i..n), l) reverse! l split(s, cc) == n := maxIndex s i : NonNegativeInteger for i in mn..n while member?(s.i,cc) repeat 0 l := empty()$List(%) j:Integer -- j is conditionally intialized while i <= n and (j := position(cc, s, i)) >= mn repeat l := concat(s(i..j-1), l) for i in j..n while member?(s.i,cc) repeat 0 if i <= n then l := concat(s(i..n), l) reverse! l leftTrim(s, c) == n := maxIndex s i : NonNegativeInteger for i in mn .. n while s.i = c repeat 0 s(i..n) leftTrim(s, cc) == n := maxIndex s i : NonNegativeInteger for i in mn .. n while member?(s.i,cc) repeat 0 s(i..n) rightTrim(s, c) == j : NonNegativeInteger for j in maxIndex s .. mn by -1 while s.j = c repeat 0 s(minIndex(s)..j) rightTrim(s, cc) == j : NonNegativeInteger for j in maxIndex s .. mn by -1 while member?(s.j, cc) repeat 0 s(minIndex(s)..j) concat l == t := new(+/[#s for s in l], space$C) i := mn for s in l repeat copyInto!(t, s, i) i := i + #s t copyInto!(y, x, s) == m := #x n := #y s := s - mn s < 0 or s+m > n => error "index out of range" RPLACSTR(y, s, m, x, 0, m)$Lisp y elt(s:%, i:I) == i < mn or i > maxIndex(s) => error "index out of range" Qelt(s, i - mn) elt(s:%, sg:U) == l := lo(sg) - mn h := if hasHi sg then hi(sg) - mn else maxIndex s - mn l < 0 or h >= #s => error "index out of bound" SUBSTRING(s, l, max(0, h-l+1))$Lisp hash s == SXHASH(s)$Foreign(Builtin) match(pattern,target,wildcard) == stringMatch(pattern,target,CHARACTER(wildcard)$Lisp)$Lisp @ Up to [[patch--40]] this read \begin{verbatim} match(pattern,target,wildcard) == stringMatch(pattern,target,wildcard)$Lisp \end{verbatim} which did not work (Issue~\#97), since [[wildcard]] is an Axiom-[[Character]], not a Lisp-[[Character]]. The operation [[CHARACTER]] from [[Lisp]] performs the coercion. <>= match?(pattern, target, dontcare) == n := maxIndex pattern p := position(dontcare, pattern, m := minIndex pattern)::N p = m-1 => pattern = target (p ~= m) and not prefix?(pattern(m..p-1), target) => false i := p -- index into target q := position(dontcare, pattern, p + 1)::N while q ~= m-1 repeat s := pattern(p+1..q-1) i := position(s, target, i)::N i = m-1 => return false i := i + #s p := q q := position(dontcare, pattern, q + 1)::N (p ~= n) and not suffix?(pattern(p+1..n), target) => false true @ \section{domain STRING String} <>= )abbrev domain STRING String ++ Description: ++ This is the domain of character strings. MINSTRINGINDEX ==> 1 -- as of 3/14/90. String(): StringCategory == IndexedString(MINSTRINGINDEX) add string n == STRINGIMAGE(n)$Lisp OMwrite(x: %): String == s: String := "" sp := OM_-STRINGTOSTRINGPTR(s)$Lisp dev: OpenMathDevice := OMopenString(sp pretend String, OMencodingXML) OMputObject(dev) OMputString(dev, x pretend String) OMputEndObject(dev) OMclose(dev) s := OM_-STRINGPTRTOSTRING(sp)$Lisp pretend String s OMwrite(x: %, wholeObj: Boolean): String == s: String := "" sp := OM_-STRINGTOSTRINGPTR(s)$Lisp dev: OpenMathDevice := OMopenString(sp pretend String, OMencodingXML) if wholeObj then OMputObject(dev) OMputString(dev, x pretend String) if wholeObj then OMputEndObject(dev) OMclose(dev) s := OM_-STRINGPTRTOSTRING(sp)$Lisp pretend String s OMwrite(dev: OpenMathDevice, x: %): Void == OMputObject(dev) OMputString(dev, x pretend String) OMputEndObject(dev) OMwrite(dev: OpenMathDevice, x: %, wholeObj: Boolean): Void == if wholeObj then OMputObject(dev) OMputString(dev, x pretend String) if wholeObj then OMputEndObject(dev) @ \section{category STRICAT StringCategory} <>= )abbrev category STRICAT StringCategory -- Note that StringCategory is built into the old compiler -- redundant SetCategory added to help A# compiler ++ Description: ++ A category for string-like objects StringCategory():Category == Join(StringAggregate(), SetCategory, OpenMath) with string: Integer -> % ++ string(i) returns the decimal representation of i in a string @ \section{License} <>= --Copyright (c) 1991-2002, The Numerical ALgorithms Group Ltd. --All rights reserved. -- --Redistribution and use in source and binary forms, with or without --modification, are permitted provided that the following conditions are --met: -- -- - Redistributions of source code must retain the above copyright -- notice, this list of conditions and the following disclaimer. -- -- - Redistributions in binary form must reproduce the above copyright -- notice, this list of conditions and the following disclaimer in -- the documentation and/or other materials provided with the -- distribution. -- -- - Neither the name of The Numerical ALgorithms Group Ltd. nor the -- names of its contributors may be used to endorse or promote products -- derived from this software without specific prior written permission. -- --THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS --IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED --TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A --PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER --OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, --EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, --PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR --PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF --LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING --NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS --SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. @ <<*>>= <> <> <> <> <> <> @ \eject \begin{thebibliography}{99} \bibitem{1} nothing \end{thebibliography} \end{document}