\documentclass{article}
\usepackage{open-axiom}
\begin{document}
\title{\$SPAD/src/algebra string.spad}
\author{Stephen M. Watt, Michael Monagan, Manuel Bronstein}
\maketitle
\begin{abstract}
\end{abstract}
\eject
\tableofcontents
\eject
\section{domain CHAR Character}
<<domain CHAR Character>>=
)abbrev domain CHAR Character
++ Author: Stephen M. Watt
++ Date Created: July 1986
++ Date Last Updated: June 20, 1991
++ Basic Operations: char
++ Related Domains:
++ Also See:
++ AMS Classifications:
++ Keywords: character, string
++ Examples:
++ References:
++ Description:
++   This domain provides the basic character data type.

Character: OrderedFinite() with
	ord: % -> NonNegativeInteger
	    ++ ord(c) provides an integral code corresponding to the
	    ++ character c.  It is always true that \spad{char ord c = c}.
	char: NonNegativeInteger  -> %
	    ++ char(i) provides a character corresponding to the integer
	    ++ code i.	It is always true that \spad{ord char i = i}.
	char: String   -> %
	    ++ char(s) provides a character from a string s of length one.
	space: %
	    ++ \spad{space} provides the blank character.
	quote: %
	    ++ \spad{quote} provides the string quote character, \spad{"}.
	underscore: %
	    ++ \spad{underscore} designates the underbar character.
        newline: %
            ++ \spad{newline} designates the new line character.
        carriageReturn: %
            ++ \spad{carriageReturn} designates carriage return.
        linefeed: %
            ++ \spad{linefeed} designates the line feed character.
        formfeed: %
            ++ \spad{formfeed} designates the form feed character.
        backspace: %
            ++ \spad{backspace} designates the backspace character.
        horizontalTab: %
            ++ \spad{horizontalTab} designates horizontal tab.
        verticalTab: %
            ++ \spad{verticalTab} designates vertical tab.
        escape: %
            ++ \spad{escape} designate the escape character.
	upperCase: % -> %
	    ++ upperCase(c) converts a lower case letter to the corresponding
	    ++ upper case letter.  If c is not a lower case letter, then
	    ++ it is returned unchanged.
	lowerCase: % -> %
	    ++ lowerCase(c) converts an upper case letter to the corresponding
	    ++ lower case letter.  If c is not an upper case letter, then
	    ++ it is returned unchanged.
	digit?: % -> Boolean
	    ++ digit?(c) tests if c is a digit character,
	    ++ i.e. one of 0..9.
	hexDigit?: % -> Boolean
	    ++ hexDigit?(c) tests if c is a hexadecimal numeral,
	    ++ i.e. one of 0..9, a..f or A..F.
	alphabetic?: % -> Boolean
	    ++ alphabetic?(c) tests if c is a letter,
	    ++ i.e. one of a..z or A..Z.
	upperCase?: % -> Boolean
	    ++ upperCase?(c) tests if c is an upper case letter,
	    ++ i.e. one of A..Z.
	lowerCase?: % -> Boolean
	    ++ lowerCase?(c) tests if c is an lower case letter,
	    ++ i.e. one of a..z.
	alphanumeric?: % -> Boolean
	    ++ alphanumeric?(c) tests if c is either a letter or number,
	    ++ i.e. one of 0..9, a..z or A..Z.

    == add
        -- We use the base Lisp's system base-char as a
        -- the representation for this class.
	CC ==> CharacterClass()
        NNI ==> NonNegativeInteger
	import CC
        import %ccstmax: NonNegativeInteger from Foreign Builtin
        import %ceq: (%,%) -> Boolean from Foreign Builtin
        import %clt: (%,%) -> Boolean from Foreign Builtin
        import %cle: (%,%) -> Boolean from Foreign Builtin
        import %cgt: (%,%) -> Boolean from Foreign Builtin
        import %cge: (%,%) -> Boolean from Foreign Builtin
        import %cup: % -> %           from Foreign Builtin
        import %cdown: % -> %         from Foreign Builtin
        import %c2i: % -> NNI         from Foreign Builtin
        import %i2c: NNI -> %         from Foreign Builtin
        import %iinc: NNI -> PositiveInteger from Foreign Builtin
        import %idec: PositiveInteger -> NNI from Foreign Builtin
        import %ccst: String -> %     from Foreign Builtin
        import %s2c: String -> %      from Foreign Builtin
        import %c2s: % -> String      from Foreign Builtin
        import %strconc: (String,String) -> String from Foreign Builtin

	a = b		       == %ceq(a,b)
	a < b		       == %clt(a,b)
	a > b		       == %cgt(a,b)
	a <= b		       == %cle(a,b)
	a >= b		       == %cge(a,b)
	size()		       == %ccstmax
	index n		       == char %idec n
	lookup c	       == %iinc ord c
	char(n: NNI)	       == %i2c n
	ord c		       == %c2i c
	random()	       == char(random(size())$NNI)
	space		       == %ccst " "
	quote		       == %ccst "_""
	underscore	       == %ccst "__"
        newline                == %ccst "\n"
        carriageReturn         == %i2c 13
        linefeed               == %i2c 10
        formfeed               == %i2c 12
        backspace              == %i2c 8
        horizontalTab          == %i2c 9
        verticalTab            == %i2c 11
        escape                 == %i2c 27
	coerce(c:%):OutputForm == c : OutputForm
	digit? c	       == member?(c, digit())
	hexDigit? c	       == member?(c, hexDigit())
	upperCase? c	       == member?(c, upperCase())
	lowerCase? c	       == member?(c, lowerCase())
	alphabetic? c	       == member?(c, alphabetic())
	alphanumeric? c	       == member?(c, alphanumeric())

	latex c ==
	  %strconc("\mbox{`", %strconc(%c2s c, "'}"))

	char(s: String) ==
          %s2c s

	upperCase c ==
          %cup c

	lowerCase c ==
          %cdown c

@

\section{domain CCLASS CharacterClass}
<<domain CCLASS CharacterClass>>=
import Character
import String
import List
)abbrev domain CCLASS CharacterClass
++ Author: Stephen M. Watt
++ Date Created: July 1986
++ Date Last Updated: June 20, 1991
++ Basic Operations: charClass
++ Related Domains: Character, Bits
++ Also See:
++ AMS Classifications:
++ Keywords:
++ Examples:
++ References:
++ Description:
++   This domain allows classes of characters to be defined and manipulated
++   efficiently.


CharacterClass: Join(SetCategory, ConvertibleTo String,
  FiniteSetAggregate Character, ConvertibleTo List Character) with
	charClass: String -> %
	    ++ charClass(s) creates a character class which contains
	    ++ exactly the characters given in the string s.
	charClass: List Character -> %
	    ++ charClass(l) creates a character class which contains
	    ++ exactly the characters given in the list l.
	digit:	constant -> %
	    ++ digit() returns the class of all characters
	    ++ for which \spadfunFrom{digit?}{Character} is true.
	hexDigit: constant -> %
	    ++ hexDigit() returns the class of all characters for which
	    ++ \spadfunFrom{hexDigit?}{Character} is true.
	upperCase: constant -> %
	    ++ upperCase() returns the class of all characters for which
	    ++ \spadfunFrom{upperCase?}{Character} is true.
	lowerCase:  constant -> %
	    ++ lowerCase() returns the class of all characters for which
	    ++ \spadfunFrom{lowerCase?}{Character} is true.
	alphabetic  :  constant -> %
	    ++ alphabetic() returns the class of all characters for which
	    ++ \spadfunFrom{alphabetic?}{Character} is true.
	alphanumeric:  constant -> %
	    ++ alphanumeric() returns the class of all characters for which
	    ++ \spadfunFrom{alphanumeric?}{Character} is true.

    == add
        import %iinc: Integer -> Integer from Foreign Builtin
        import %idec: Integer -> Integer from Foreign Builtin

	Rep := IndexedBits(0)
	N   := size()$Character

	a, b: %

	digit()		== charClass "0123456789"
	hexDigit()	== charClass "0123456789abcdefABCDEF"
	upperCase()	== charClass "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
	lowerCase()	== charClass "abcdefghijklmnopqrstuvwxyz"
	alphabetic()	== union(upperCase(), lowerCase())
	alphanumeric()	== union(alphabetic(), digit())

	a = b		== a =$Rep b

	member?(c, a)	== a(ord c)
	union(a,b)	== Or(a, b)
	intersect (a,b) == And(a, b)
	difference(a,b) == And(a, Not b)
	complement a	== Not a

	convert(cl):String ==
	  construct(convert(cl)@List(Character))
	convert(cl:%):List(Character) ==
	  [char(i) for i in 0..%idec N | cl.i]

	charClass(s: String) ==
	  cl := new(N, false)
	  for i in minIndex(s)..maxIndex(s) repeat cl(ord s.i) := true
	  cl

	charClass(l: List Character) ==
	  cl := new(N, false)
	  for c in l repeat cl(ord c) := true
	  cl

	coerce(cl):OutputForm == (convert(cl)@String)::OutputForm

	-- Stuff to make a legal SetAggregate view
	# a == (n := 0; for i in 0..%idec N | a.i repeat n := %iinc n; n)
	empty():%	== charClass []
	brace():%	== charClass []

	insert!(c, a)	== (a(ord c) := true; a)
	remove!(c: Character, a:%) == (a(ord c) := false; a)

	inspect(a) ==
	    for i in 0..%idec N | a.i repeat
		 return char i
	    error "Cannot take a character from an empty class."
	extract!(a) ==
	    for i in 0..%idec N | a.i repeat
		 a.i := false
		 return char i
	    error "Cannot take a character from an empty class."

	map(f, a) ==
	    b := new(N, false)
	    for i in 0..%idec N | a.i repeat b(ord f char i) := true
	    b

	temp: % := new(N, false)$Rep
	map!(f, a) ==
	    fill!(temp, false)
	    for i in 0..%idec N | a.i repeat temp(ord f char i) := true
	    copyInto!(a, temp, 0)

	parts a ==
	    [char i for i in 0..%idec N | a.i]

@

\section{domain STRING String}
<<domain STRING String>>=
)abbrev domain STRING String
++ Description:
++   This is the domain of character strings.
++ Authors: Stephen Watt, Michael Monagan, Manuel Bronstein 1986 .. 1991

String(): Public == Private where
  Public == StringAggregate with
    string: Integer -> %
      ++ \spad{string i} returns the decimal representation of
      ++ \spad{i} in a string
    string: DoubleFloat -> %
      ++ \spad{string f} returns the decimal representation of
      ++ \spad{f} in a string
    string: Identifier -> %
      ++ \spad{string id} is the string representation of the
      ++ identifier \spad{id}
  Private == add
    macro B == Boolean
    macro C == Character
    macro I == Integer
    macro N == NonNegativeInteger
    macro U == UniversalSegment Integer

    import %icst0: N                  from Foreign Builtin
    import %icst1: N                  from Foreign Builtin
    import %i2s: I -> %               from Foreign Builtin
    import %iinc: I -> I              from Foreign Builtin
    import %idec: I -> I              from Foreign Builtin
    import %f2s: DoubleFloat -> %     from Foreign Builtin
    import %sname: Identifier -> %    from Foreign Builtin
    import %strlength: % -> N         from Foreign Builtin
    import %streq: (%,%) -> B         from Foreign Builtin
    import %strlt: (%,%) -> B         from Foreign Builtin
    import %ceq: (C, C) -> B          from Foreign Builtin
    import %schar: (%,I) -> C         from Foreign Builtin
    import %strconc: (%,%) -> %       from Foreign Builtin
    import %strcopy: % -> %           from Foreign Builtin
    import %strstc: (%,I,C) -> Void   from Foreign Builtin
    import %hash : % -> SingleInteger from Foreign Builtin

    string(n: I) == %i2s n
    string(f: DoubleFloat) == %f2s f
    string(id: Identifier) == %sname id

    c:	Character
    cc: CharacterClass

--  new n		   == makeString(n, space$C)$Lisp
    new(n, c)		   == makeString(n, c)$Lisp
    empty()		   == makeString(0@I)$Lisp
    empty?(s)		   == zero? %strlength s
    #s			   == %strlength s
    s = t		   == %streq(s,t)
    s < t		   == %strlt(s,t)
    concat(s:%,t:%)	   == %strconc(s,t)
    copy s		   == %strcopy s
    insert(s:%, t:%, i:I)  == concat(concat(s(1..%idec i), t), s(i..))
    coerce(s:%):OutputForm == outputForm(s pretend String)
    minIndex s		   == %icst1
    upperCase! s	   == map!(upperCase, s)
    lowerCase! s	   == map!(lowerCase, s)

    latex s		   ==
      concat("\mbox{``", concat(s pretend String, "''}"))

    replace(s, sg, t) ==
	l := %idec lo(sg)
	m := #s
	n := #t
	h:I := if hasHi sg then %idec hi(sg) else %idec maxIndex s
	negative? l or h >= m or h < %idec l => error "index out of range"
	r := new((m-%iinc(h-l)+n)::N, space$C)
        k: NonNegativeInteger := %icst0
	for i in %icst0..%idec l repeat
          %strstc(r, k, %schar(s, i))
          k := %iinc(k) : N
	for i in %icst0..%idec n repeat
          %strstc(r, k, %schar(t, i))
          k := %iinc(k) : N
	for i in %iinc h..%idec m repeat
          %strstc(r, k, %schar(s, i))
          k := %iinc(k) : N
	r

    setelt(s:%, i:I, c:C) ==
	i < 1 or i > maxIndex(s) => error "index out of range"
	%strstc(s, %idec i, c)
	c

    substring?(part, whole, startpos) ==
	np:I := %strlength part
	nw:I := %strlength whole
	negative?(startpos := %idec startpos) => error "index out of bounds"
	np > nw - startpos => false
	for ip in %icst0..%idec np for iw in startpos.. repeat
	    not %ceq(%schar(part, ip), %schar(whole, iw)) => return false
	true

    position(s:%, t:%, startpos:I) ==
	negative?(startpos := %idec startpos) => error "index out of bounds"
	startpos >= %strlength t => %icst0
	r:I := STRPOS(s, t, startpos, NIL$Lisp)$Lisp
	%peq(r, NIL$Lisp)$Foreign(Builtin) => %icst0
	%iinc r
    position(c: Character, t: %, startpos: I) ==
	negative?(startpos := %idec startpos) => error "index out of bounds"
	startpos >= %strlength t => %icst0
	for r in startpos..%idec %strlength t repeat
	    if %ceq(%schar(t, r), c) then return %iinc r
	%icst0
    position(cc: CharacterClass, t: %, startpos: I) ==
	negative?(startpos := %idec startpos) => error "index out of bounds"
	startpos >= %strlength t => %icst0
	for r in startpos..%idec %strlength t repeat
	    if member?(%schar(t,r), cc) then return %iinc r
	%icst0

    suffix?(s, t) ==
	(m := maxIndex s) > (n := maxIndex t) => false
	substring?(s, t, %iinc(n - m))

    split(s, c) ==
	n := maxIndex s
        i := %icst1
	while i <= n and s.i = c repeat i := %iinc i
	l := empty()$List(%)
	j:Integer -- j is conditionally intialized
	while i <= n and (j := position(c, s, i)) >= %icst1 repeat
	    l := concat(s(i..%idec j), l)
            i := j
	    while i <= n and s.i = c repeat i := %iinc i
	if i <= n then l := concat(s(i..n), l)
	reverse! l

    split(s, cc) ==
	n := maxIndex s
        i := %icst1
	while i <= n and member?(s.i,cc) repeat i := %iinc i
	l := empty()$List(%)
	j:Integer -- j is conditionally intialized
	while i <= n and (j := position(cc, s, i)) >= 1 repeat
	    l := concat(s(i..%idec j), l)
            i := j
	    while i <= n and member?(s.i,cc) repeat i := %iinc i
	if i <= n then l := concat(s(i..n), l)
	reverse! l

    leftTrim(s, c) ==
	n := maxIndex s
        i := %icst1
	while i <= n and s.i = c repeat i := %iinc i
	s(i..n)

    leftTrim(s, cc) ==
	n := maxIndex s
        i := %icst1
	while i <= n and member?(s.i,cc) repeat i := %iinc i
	s(i..n)

    rightTrim(s, c) ==
        j := maxIndex s
	while j >=  1 and s.j = c repeat j := %idec j
	s(minIndex(s)..j)

    rightTrim(s, cc) ==
        j := maxIndex s
	while j >= %icst1 and member?(s.j, cc) repeat j := %idec j
	s(minIndex(s)..j)

    concat l ==
	t := new(+/[#s for s in l], space$C)
	i := %icst1
	for s in l repeat
	    copyInto!(t, s, i)
	    i := i + #s
	t

    copyInto!(y, x, s) ==
	m := #x
	n := #y
	s := %idec s
	negative? s or s+m > n => error "index out of range"
	RPLACSTR(y, s, m, x, %icst0, m)$Lisp
	y

    elt(s:%, i:I) ==
	i < %icst1 or i > maxIndex(s) => error "index out of range"
	%schar(s, %idec i)

    elt(s:%, sg:U) ==
	l := %idec lo(sg)
	h := if hasHi sg then %idec hi(sg) else %idec maxIndex s
	negative? l or h >= #s => error "index out of bound"
	SUBSTRING(s, l, max(%icst0, %iinc(h-l)))$Lisp

    hash s ==
        %hash s

    match(pattern,target,wildcard) ==
      stringMatch(pattern,target,CHARACTER(wildcard)$Lisp)$Lisp
 
    match?(pattern, target, dontcare) ==
	n := maxIndex pattern
	p := position(dontcare, pattern, m := minIndex pattern)::N
	p = %idec m => pattern = target
	(p ~= m) and not prefix?(pattern(m..%idec p), target) => false
	i := p	-- index into target
	q := position(dontcare, pattern, %iinc p)::N
	while q ~= %idec m repeat
	   s := pattern(%iinc p..%idec q)
	   i := position(s, target, i)::N
	   i = %idec m => return false
	   i := i + #s
	   p := q
	   q := position(dontcare, pattern, %iinc q)::N
	(p ~= n) and not suffix?(pattern(%iinc p..n), target) => false
	true

@

\section{License}
<<license>>=
--Copyright (c) 1991-2002, The Numerical ALgorithms Group Ltd.
--All rights reserved.
-- Copyright (C) 2007-2011, Gabriel Dos Reis.
-- All rights reserved.
--
--Redistribution and use in source and binary forms, with or without
--modification, are permitted provided that the following conditions are
--met:
--
--    - Redistributions of source code must retain the above copyright
--      notice, this list of conditions and the following disclaimer.
--
--    - Redistributions in binary form must reproduce the above copyright
--      notice, this list of conditions and the following disclaimer in
--      the documentation and/or other materials provided with the
--      distribution.
--
--    - Neither the name of The Numerical ALgorithms Group Ltd. nor the
--      names of its contributors may be used to endorse or promote products
--      derived from this software without specific prior written permission.
--
--THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
--IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
--TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
--PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
--OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
--EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
--PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
--PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
--LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
--NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
--SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
@
<<*>>=
<<license>>

<<domain CHAR Character>>
<<domain CCLASS CharacterClass>>
<<domain STRING String>>
@
\eject
\begin{thebibliography}{99}
\bibitem{1} nothing
\end{thebibliography}
\end{document}