aboutsummaryrefslogtreecommitdiff
path: root/Text
diff options
context:
space:
mode:
authorfiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>2008-08-01 00:45:07 +0000
committerfiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>2008-08-01 00:45:07 +0000
commit0c6dd105b73e95d61d3a3d3213d8f80f3d8a22d4 (patch)
treed8cad0cbbdc02fee41bd55ba9938371c8278cf18 /Text
parent8440385f45e18f43172c0cf4409cf4b8b538bbeb (diff)
downloadpandoc-0c6dd105b73e95d61d3a3d3213d8f80f3d8a22d4.tar.gz
Added code for xml library (Text.XML.Light) to source tree,
since there is currently no debian package. Removed dependency on xml library. Added license to debian/copyright. git-svn-id: https://pandoc.googlecode.com/svn/trunk@1351 788f1e2b-df1e-0410-8736-df70ead52e1b
Diffstat (limited to 'Text')
-rw-r--r--Text/XML/Light.hs96
-rw-r--r--Text/XML/Light/Cursor.hs327
-rw-r--r--Text/XML/Light/Input.hs307
-rw-r--r--Text/XML/Light/Output.hs150
-rw-r--r--Text/XML/Light/Proc.hs103
-rw-r--r--Text/XML/Light/Types.hs91
6 files changed, 1074 insertions, 0 deletions
diff --git a/Text/XML/Light.hs b/Text/XML/Light.hs
new file mode 100644
index 000000000..f2d75290b
--- /dev/null
+++ b/Text/XML/Light.hs
@@ -0,0 +1,96 @@
+{-# LANGUAGE FlexibleInstances #-}
+--------------------------------------------------------------------
+-- |
+-- Module : Text.XML.Light
+-- Copyright : (c) Galois, Inc. 2007
+-- License : BSD3
+--
+-- Maintainer: Iavor S. Diatchki <diatchki@galois.com>
+-- Stability : provisional
+-- Portability: portability
+--
+-- A lightweight XML parsing, filtering and generating library.
+--
+-- This module reexports functions from:
+--
+-- * "Text.XML.Light.Types"
+--
+-- * "Text.XML.Light.Proc"
+--
+-- * "Text.XML.Light.Input"
+--
+-- * "Text.XML.Light.Output"
+--
+
+module Text.XML.Light (
+
+ module Text.XML.Light,
+ module Text.XML.Light.Types,
+ module Text.XML.Light.Proc,
+ module Text.XML.Light.Input,
+ module Text.XML.Light.Output
+
+ ) where
+
+import Text.XML.Light.Types
+import Text.XML.Light.Proc
+import Text.XML.Light.Input
+import Text.XML.Light.Output
+
+-- | Add an attribute to an element.
+add_attr :: Attr -> Element -> Element
+add_attr a e = add_attrs [a] e
+
+-- | Add some attributes to an element.
+add_attrs :: [Attr] -> Element -> Element
+add_attrs as e = e { elAttribs = as ++ elAttribs e }
+
+-- | Create an unqualified name.
+unqual :: String -> QName
+unqual x = blank_name { qName = x }
+
+-- | A smart element constructor which uses the type of its argument
+-- to determine what sort of element to make.
+class Node t where
+ node :: QName -> t -> Element
+
+instance Node ([Attr],[Content]) where
+ node n (attrs,cont) = blank_element { elName = n
+ , elAttribs = attrs
+ , elContent = cont
+ }
+
+instance Node [Attr] where node n as = node n (as,[]::[Content])
+instance Node Attr where node n a = node n [a]
+instance Node () where node n () = node n ([]::[Attr])
+
+instance Node [Content] where node n cs = node n ([]::[Attr],cs)
+instance Node Content where node n c = node n [c]
+instance Node ([Attr],Content) where node n (as,c) = node n (as,[c])
+instance Node (Attr,Content) where node n (a,c) = node n ([a],[c])
+
+instance Node ([Attr],[Element]) where
+ node n (as,cs) = node n (as,map Elem cs)
+
+instance Node ([Attr],Element) where node n (as,c) = node n (as,[c])
+instance Node (Attr,Element) where node n (a,c) = node n ([a],c)
+instance Node ([Element]) where node n es = node n ([]::[Attr],es)
+instance Node (Element) where node n e = node n [e]
+
+instance Node ([Attr],[CData]) where
+ node n (as,cs) = node n (as,map Text cs)
+
+instance Node ([Attr],CData) where node n (as,c) = node n (as,[c])
+instance Node (Attr,CData) where node n (a,c) = node n ([a],c)
+instance Node [CData] where node n es = node n ([]::[Attr],es)
+instance Node CData where node n e = node n [e]
+
+instance Node ([Attr],String) where
+ node n (as,t) = node n (as,blank_cdata { cdData = t })
+
+instance Node (Attr,String) where node n (a,t) = node n ([a],t)
+instance Node [Char] where node n t = node n ([]::[Attr],t)
+
+-- | Create node with unqualified name
+unode :: Node t => String -> t -> Element
+unode = node . unqual
diff --git a/Text/XML/Light/Cursor.hs b/Text/XML/Light/Cursor.hs
new file mode 100644
index 000000000..06d15bdb6
--- /dev/null
+++ b/Text/XML/Light/Cursor.hs
@@ -0,0 +1,327 @@
+--------------------------------------------------------------------
+-- |
+-- Module : Text.XML.Light.Cursor
+-- Copyright : (c) Galois, Inc. 2008
+-- License : BSD3
+--
+-- Maintainer: Iavor S. Diatchki <diatchki@galois.com>
+-- Stability : provisional
+-- Portability:
+--
+-- XML cursors for working XML content withing the context of
+-- an XML document. This implemntation is based on the general
+-- tree zipper written by Krasimir Angelov and Iavor S. Diatchki.
+--
+
+module Text.XML.Light.Cursor
+ ( Tag(..), getTag, setTag, fromTag
+ , Cursor(..), Path
+
+ -- * Conversions
+ , fromContent
+ , fromElement
+ , fromForest
+ , toForest
+ , toTree
+
+ -- * Moving around
+ , parent
+ , root
+ , getChild
+ , firstChild
+ , lastChild
+ , left
+ , right
+
+ -- ** Searching
+ , findChild
+ , findLeft
+ , findRight
+
+ -- * Node classification
+ , isRoot
+ , isFirst
+ , isLast
+ , isLeaf
+ , isChild
+ , hasChildren
+ , getNodeIndex
+
+ -- * Updates
+ , setContent
+ , modifyContent
+ , modifyContentM
+
+ -- ** Inserting content
+ , insertLeft
+ , insertRight
+ , insertGoLeft
+ , insertGoRight
+
+ -- ** Removing content
+ , removeLeft
+ , removeRight
+ , removeGoLeft
+ , removeGoRight
+ , removeGoUp
+
+ ) where
+
+import Text.XML.Light.Types
+import Data.Maybe(isNothing)
+
+data Tag = Tag { tagName :: QName
+ , tagAttribs :: [Attr]
+ , tagLine :: Maybe Line
+ } deriving (Show)
+
+getTag :: Element -> Tag
+getTag e = Tag { tagName = elName e
+ , tagAttribs = elAttribs e
+ , tagLine = elLine e
+ }
+
+setTag :: Tag -> Element -> Element
+setTag t e = fromTag t (elContent e)
+
+fromTag :: Tag -> [Content] -> Element
+fromTag t cs = Element { elName = tagName t
+ , elAttribs = tagAttribs t
+ , elLine = tagLine t
+ , elContent = cs
+ }
+
+type Path = [([Content],Tag,[Content])]
+
+-- | The position of a piece of content in an XML document.
+data Cursor = Cur
+ { current :: Content -- ^ The currently selected content.
+ , lefts :: [Content] -- ^ Siblings on the left, closest first.
+ , rights :: [Content] -- ^ Siblings on the right, closest first.
+ , parents :: Path -- ^ The contexts of the parent elements of this location.
+ } deriving (Show)
+
+-- Moving around ---------------------------------------------------------------
+
+-- | The parent of the given location.
+parent :: Cursor -> Maybe Cursor
+parent loc =
+ case parents loc of
+ (pls,v,prs) : ps -> Just
+ Cur { current = Elem
+ (fromTag v
+ (combChildren (lefts loc) (current loc) (rights loc)))
+ , lefts = pls, rights = prs, parents = ps
+ }
+ [] -> Nothing
+
+
+-- | The top-most parent of the given location.
+root :: Cursor -> Cursor
+root loc = maybe loc root (parent loc)
+
+-- | The left sibling of the given location.
+left :: Cursor -> Maybe Cursor
+left loc =
+ case lefts loc of
+ t : ts -> Just loc { current = t, lefts = ts
+ , rights = current loc : rights loc }
+ [] -> Nothing
+
+-- | The right sibling of the given location.
+right :: Cursor -> Maybe Cursor
+right loc =
+ case rights loc of
+ t : ts -> Just loc { current = t, lefts = current loc : lefts loc
+ , rights = ts }
+ [] -> Nothing
+
+-- | The first child of the given location.
+firstChild :: Cursor -> Maybe Cursor
+firstChild loc =
+ do (t : ts, ps) <- downParents loc
+ return Cur { current = t, lefts = [], rights = ts , parents = ps }
+
+-- | The last child of the given location.
+lastChild :: Cursor -> Maybe Cursor
+lastChild loc =
+ do (ts, ps) <- downParents loc
+ case reverse ts of
+ l : ls -> return Cur { current = l, lefts = ls, rights = []
+ , parents = ps }
+ [] -> Nothing
+
+-- | Find the next left sibling that satisfies a predicate.
+findLeft :: (Cursor -> Bool) -> Cursor -> Maybe Cursor
+findLeft p loc = do loc1 <- left loc
+ if p loc1 then return loc1 else findLeft p loc1
+
+-- | Find the next right sibling that satisfies a predicate.
+findRight :: (Cursor -> Bool) -> Cursor -> Maybe Cursor
+findRight p loc = do loc1 <- right loc
+ if p loc1 then return loc1 else findRight p loc1
+
+-- | The first child that satisfies a predicate.
+findChild :: (Cursor -> Bool) -> Cursor -> Maybe Cursor
+findChild p loc =
+ do loc1 <- firstChild loc
+ if p loc1 then return loc1 else findRight p loc1
+
+-- | The child with the given index (starting from 0).
+getChild :: Int -> Cursor -> Maybe Cursor
+getChild n loc =
+ do (ts,ps) <- downParents loc
+ (ls,t,rs) <- splitChildren ts n
+ return Cur { current = t, lefts = ls, rights = rs, parents = ps }
+
+
+-- | private: computes the parent for "down" operations.
+downParents :: Cursor -> Maybe ([Content], Path)
+downParents loc =
+ case current loc of
+ Elem e -> Just ( elContent e
+ , (lefts loc, getTag e, rights loc) : parents loc
+ )
+ _ -> Nothing
+
+-- Conversions -----------------------------------------------------------------
+
+-- | A cursor for the guven content.
+fromContent :: Content -> Cursor
+fromContent t = Cur { current = t, lefts = [], rights = [], parents = [] }
+
+-- | A cursor for the guven element.
+fromElement :: Element -> Cursor
+fromElement e = fromContent (Elem e)
+
+-- | The location of the first tree in a forest.
+fromForest :: [Content] -> Maybe Cursor
+fromForest (t:ts) = Just Cur { current = t, lefts = [], rights = ts
+ , parents = [] }
+fromForest [] = Nothing
+
+-- | Computes the tree containing this location.
+toTree :: Cursor -> Content
+toTree loc = current (root loc)
+
+-- | Computes the forest containing this location.
+toForest :: Cursor -> [Content]
+toForest loc = let r = root loc in combChildren (lefts r) (current r) (rights r)
+
+
+-- Queries ---------------------------------------------------------------------
+
+-- | Are we at the top of the document?
+isRoot :: Cursor -> Bool
+isRoot loc = null (parents loc)
+
+-- | Are we at the left end of the the document?
+isFirst :: Cursor -> Bool
+isFirst loc = null (lefts loc)
+
+-- | Are we at the right end of the document?
+isLast :: Cursor -> Bool
+isLast loc = null (rights loc)
+
+-- | Are we at the bottom of the document?
+isLeaf :: Cursor -> Bool
+isLeaf loc = isNothing (downParents loc)
+
+-- | Do we have a parent?
+isChild :: Cursor -> Bool
+isChild loc = not (isRoot loc)
+
+-- | Get the node index inside the sequence of children
+getNodeIndex :: Cursor -> Int
+getNodeIndex loc = length (lefts loc)
+
+-- | Do we have children?
+hasChildren :: Cursor -> Bool
+hasChildren loc = not (isLeaf loc)
+
+
+
+-- Updates ---------------------------------------------------------------------
+
+-- | Change the current content.
+setContent :: Content -> Cursor -> Cursor
+setContent t loc = loc { current = t }
+
+-- | Modify the current content.
+modifyContent :: (Content -> Content) -> Cursor -> Cursor
+modifyContent f loc = setContent (f (current loc)) loc
+
+-- | Modify the current content, allowing for an effect.
+modifyContentM :: Monad m => (Content -> m Content) -> Cursor -> m Cursor
+modifyContentM f loc = do x <- f (current loc)
+ return (setContent x loc)
+
+-- | Insert content to the left of the current position.
+insertLeft :: Content -> Cursor -> Cursor
+insertLeft t loc = loc { lefts = t : lefts loc }
+
+-- | Insert content to the right of the current position.
+insertRight :: Content -> Cursor -> Cursor
+insertRight t loc = loc { rights = t : rights loc }
+
+-- | Remove the conent on the left of the current position, if any.
+removeLeft :: Cursor -> Maybe (Content,Cursor)
+removeLeft loc = case lefts loc of
+ l : ls -> return (l,loc { lefts = ls })
+ [] -> Nothing
+
+-- | Remove the conent on the right of the current position, if any.
+removeRight :: Cursor -> Maybe (Content,Cursor)
+removeRight loc = case rights loc of
+ l : ls -> return (l,loc { rights = ls })
+ [] -> Nothing
+
+
+-- | Insert content to the left of the current position.
+-- The new content becomes the current position.
+insertGoLeft :: Content -> Cursor -> Cursor
+insertGoLeft t loc = loc { current = t, rights = current loc : rights loc }
+
+-- | Insert content to the right of the current position.
+-- The new content becomes the current position.
+insertGoRight :: Content -> Cursor -> Cursor
+insertGoRight t loc = loc { current = t, lefts = current loc : lefts loc }
+
+-- | Remove the current element.
+-- The new position is the one on the left.
+removeGoLeft :: Cursor -> Maybe Cursor
+removeGoLeft loc = case lefts loc of
+ l : ls -> Just loc { current = l, lefts = ls }
+ [] -> Nothing
+
+-- | Remove the current element.
+-- The new position is the one on the right.
+removeGoRight :: Cursor -> Maybe Cursor
+removeGoRight loc = case rights loc of
+ l : ls -> Just loc { current = l, rights = ls }
+ [] -> Nothing
+
+-- | Remove the current element.
+-- The new position is the parent of the old position.
+removeGoUp :: Cursor -> Maybe Cursor
+removeGoUp loc =
+ case parents loc of
+ (pls,v,prs) : ps -> Just
+ Cur { current = Elem (fromTag v (reverse (lefts loc) ++ rights loc))
+ , lefts = pls, rights = prs, parents = ps
+ }
+ [] -> Nothing
+
+
+-- | private: Gets the given element of a list.
+-- Also returns the preceeding elements (reversed) and the folloing elements.
+splitChildren :: [a] -> Int -> Maybe ([a],a,[a])
+splitChildren _ n | n < 0 = Nothing
+splitChildren cs pos = loop [] cs pos
+ where loop acc (x:xs) 0 = Just (acc,x,xs)
+ loop acc (x:xs) n = loop (x:acc) xs $! n-1
+ loop _ _ _ = Nothing
+
+-- | private: combChildren ls x ys = reverse ls ++ [x] ++ ys
+combChildren :: [a] -> a -> [a] -> [a]
+combChildren ls t rs = foldl (flip (:)) (t:rs) ls
diff --git a/Text/XML/Light/Input.hs b/Text/XML/Light/Input.hs
new file mode 100644
index 000000000..3cf3a8cd1
--- /dev/null
+++ b/Text/XML/Light/Input.hs
@@ -0,0 +1,307 @@
+--------------------------------------------------------------------
+-- |
+-- Module : Text.XML.Light.Input
+-- Copyright : (c) Galois, Inc. 2007
+-- License : BSD3
+--
+-- Maintainer: Iavor S. Diatchki <diatchki@galois.com>
+-- Stability : provisional
+-- Portability: portable
+--
+-- Lightweight XML parsing
+--
+
+module Text.XML.Light.Input (parseXML,parseXMLDoc) where
+
+import Text.XML.Light.Types
+import Text.XML.Light.Proc
+import Text.XML.Light.Output(tagEnd)
+
+import Data.Char(isSpace)
+import Data.List(isPrefixOf)
+import Numeric(readHex)
+
+-- | parseXMLDoc, parse a XMLl document to maybe an element
+parseXMLDoc :: String -> Maybe Element
+parseXMLDoc xs = strip (parseXML xs)
+ where strip cs = case onlyElems cs of
+ e : es
+ | "?xml" `isPrefixOf` qName (elName e)
+ -> strip (map Elem es)
+ | otherwise -> Just e
+ _ -> Nothing
+
+-- | parseXML to a list of content chunks
+parseXML :: String -> [Content]
+parseXML xs = parse $ tokens $ preprocess xs
+
+------------------------------------------------------------------------
+
+parse :: [Token] -> [Content]
+parse [] = []
+parse ts = let (es,_,ts1) = nodes ([],Nothing) [] ts
+ in es ++ parse ts1
+
+-- Information about namespaces.
+-- The first component is a map that associates prefixes to URIs,
+-- the second is the URI for the default namespace, if one was provided.
+type NSInfo = ([(String,String)],Maybe String)
+
+nodes :: NSInfo -> [QName] -> [Token] -> ([Content], [QName], [Token])
+
+nodes ns ps (TokCRef ref : ts) =
+ let (es,qs,ts1) = nodes ns ps ts
+ in (CRef ref : es, qs, ts1)
+
+nodes ns ps (TokText txt : ts) =
+ let (es,qs,ts1) = nodes ns ps ts
+ (more,es1) = case es of
+ Text cd : es1'
+ | cdVerbatim cd == cdVerbatim txt -> (cdData cd,es1')
+ _ -> ([],es)
+
+ in (Text txt { cdData = cdData txt ++ more } : es1, qs, ts1)
+
+nodes cur_info ps (TokStart p t as empty : ts) = (node : siblings, open, toks)
+ where
+ new_name = annotName new_info t
+ new_info = foldr addNS cur_info as
+ node = Elem Element { elLine = Just p
+ , elName = new_name
+ , elAttribs = map (annotAttr new_info) as
+ , elContent = children
+ }
+
+ (children,(siblings,open,toks))
+ | empty = ([], nodes cur_info ps ts)
+ | otherwise = let (es1,qs1,ts1) = nodes new_info (new_name:ps) ts
+ in (es1,
+ case qs1 of
+ [] -> nodes cur_info ps ts1
+ _ : qs3 -> ([],qs3,ts1))
+
+nodes ns ps (TokEnd p t : ts) = let t1 = annotName ns t
+ in case break (t1 ==) ps of
+ (as,_:_) -> ([],as,ts)
+ -- Unknown closing tag. Insert as text.
+ (_,[]) ->
+ let (es,qs,ts1) = nodes ns ps ts
+ in (Text CData {
+ cdLine = Just p,
+ cdVerbatim = CDataText,
+ cdData = tagEnd t ""
+ } : es,qs, ts1)
+
+nodes _ ps [] = ([],ps,[])
+
+
+annotName :: NSInfo -> QName -> QName
+annotName (namespaces,def_ns) n =
+ n { qURI = maybe def_ns (`lookup` namespaces) (qPrefix n) }
+
+annotAttr :: NSInfo -> Attr -> Attr
+annotAttr ns a@(Attr { attrKey = k}) =
+ case (qPrefix k, qName k) of
+ (Nothing,"xmlns") -> a
+ _ -> a { attrKey = annotName ns k }
+
+addNS :: Attr -> NSInfo -> NSInfo
+addNS (Attr { attrKey = key, attrVal = val }) (ns,def) =
+ case (qPrefix key, qName key) of
+ (Nothing,"xmlns") -> (ns, if null val then Nothing else Just val)
+ (Just "xmlns", k) -> ((k, val) : ns, def)
+ _ -> (ns,def)
+
+
+-- Lexer -----------------------------------------------------------------------
+
+type LChar = (Line,Char)
+type LString = [LChar]
+data Token = TokStart Line QName [Attr] Bool -- is empty?
+ | TokEnd Line QName
+ | TokCRef String
+ | TokText CData
+ deriving Show
+
+tokens :: String -> [Token]
+tokens = tokens' . linenumber 1
+
+tokens' :: LString -> [Token]
+tokens' ((_,'<') : c@(_,'!') : cs) = special c cs
+
+tokens' ((_,'<') : cs) = tag (dropSpace cs) -- we are being nice here
+tokens' [] = []
+tokens' cs@((l,_):_) = let (as,bs) = breakn ('<' ==) cs
+ in map cvt (decode_text as) ++ tokens' bs
+
+ -- XXX: Note, some of the lines might be a bit inacuarate
+ where cvt (TxtBit x) = TokText CData { cdLine = Just l
+ , cdVerbatim = CDataText
+ , cdData = x
+ }
+ cvt (CRefBit x) = case cref_to_char x of
+ Just c -> TokText CData { cdLine = Just l
+ , cdVerbatim = CDataText
+ , cdData = [c]
+ }
+ Nothing -> TokCRef x
+
+
+special :: LChar -> LString -> [Token]
+special _ ((_,'-') : (_,'-') : cs) = skip cs
+ where skip ((_,'-') : (_,'-') : (_,'>') : ds) = tokens' ds
+ skip (_ : ds) = skip ds
+ skip [] = [] -- unterminated comment
+
+special c ((_,'[') : (_,'C') : (_,'D') : (_,'A') : (_,'T') : (_,'A') : (_,'[')
+ : cs) =
+ let (xs,ts) = cdata cs
+ in TokText CData { cdLine = Just (fst c), cdVerbatim = CDataVerbatim, cdData = xs }
+ : tokens' ts
+ where cdata ((_,']') : (_,']') : (_,'>') : ds) = ([],ds)
+ cdata ((_,d) : ds) = let (xs,ys) = cdata ds in (d:xs,ys)
+ cdata [] = ([],[])
+
+special c cs =
+ let (xs,ts) = munch "" 0 cs
+ in TokText CData { cdLine = Just (fst c), cdVerbatim = CDataRaw, cdData = '<':'!':(reverse xs) } : tokens' ts
+ where munch acc nesting ((_,'>') : ds)
+ | nesting == (0::Int) = ('>':acc,ds)
+ | otherwise = munch ('>':acc) (nesting-1) ds
+ munch acc nesting ((_,'<') : ds)
+ = munch ('<':acc) (nesting+1) ds
+ munch acc n ((_,x) : ds) = munch (x:acc) n ds
+ munch acc _ [] = (acc,[]) -- unterminated DTD markup
+
+--special c cs = tag (c : cs) -- invalid specials are processed as tags
+
+
+qualName :: LString -> (QName,LString)
+qualName xs = let (as,bs) = breakn endName xs
+ (q,n) = case break (':'==) as of
+ (q1,_:n1) -> (Just q1, n1)
+ _ -> (Nothing, as)
+ in (QName { qURI = Nothing, qPrefix = q, qName = n }, bs)
+ where endName x = isSpace x || x == '=' || x == '>' || x == '/'
+
+
+
+
+
+tag :: LString -> [Token]
+tag ((p,'/') : cs) = let (n,ds) = qualName (dropSpace cs)
+ in TokEnd p n : case ds of
+ (_,'>') : es -> tokens' es
+ -- tag was not properly closed...
+ _ -> tokens' ds
+tag [] = []
+tag cs = let (n,ds) = qualName cs
+ (as,b,ts) = attribs (dropSpace ds)
+ in TokStart (fst (head cs)) n as b : ts
+
+attribs :: LString -> ([Attr], Bool, [Token])
+attribs cs = case cs of
+ (_,'>') : ds -> ([], False, tokens' ds)
+
+ (_,'/') : ds -> ([], True, case ds of
+ (_,'>') : es -> tokens' es
+ -- insert missing > ...
+ _ -> tokens' ds)
+
+ (_,'?') : (_,'>') : ds -> ([], True, tokens' ds)
+
+ -- doc ended within a tag..
+ [] -> ([],False,[])
+
+ _ -> let (a,cs1) = attrib cs
+ (as,b,ts) = attribs cs1
+ in (a:as,b,ts)
+
+attrib :: LString -> (Attr,LString)
+attrib cs = let (ks,cs1) = qualName cs
+ (vs,cs2) = attr_val (dropSpace cs1)
+ in ((Attr ks (decode_attr vs)),dropSpace cs2)
+
+attr_val :: LString -> (String,LString)
+attr_val ((_,'=') : cs) = string (dropSpace cs)
+attr_val cs = ("",cs)
+
+
+dropSpace :: LString -> LString
+dropSpace = dropWhile (isSpace . snd)
+
+-- | Match the value for an attribute. For malformed XML we do
+-- our best to guess the programmer's intention.
+string :: LString -> (String,LString)
+string ((_,'"') : cs) = break' ('"' ==) cs
+
+-- Allow attributes to be enclosed between ' '.
+string ((_,'\'') : cs) = break' ('\'' ==) cs
+
+-- Allow attributes that are not enclosed by anything.
+string cs = breakn eos cs
+ where eos x = isSpace x || x == '>' || x == '/'
+
+
+break' :: (a -> Bool) -> [(b,a)] -> ([a],[(b,a)])
+break' p xs = let (as,bs) = breakn p xs
+ in (as, case bs of
+ [] -> []
+ _ : cs -> cs)
+
+breakn :: (a -> Bool) -> [(b,a)] -> ([a],[(b,a)])
+breakn p l = (map snd as,bs) where (as,bs) = break (p . snd) l
+
+
+
+decode_attr :: String -> String
+decode_attr cs = concatMap cvt (decode_text cs)
+ where cvt (TxtBit x) = x
+ cvt (CRefBit x) = case cref_to_char x of
+ Just c -> [c]
+ Nothing -> '&' : x ++ ";"
+
+data Txt = TxtBit String | CRefBit String deriving Show
+
+decode_text :: [Char] -> [Txt]
+decode_text xs@('&' : cs) = case break (';' ==) cs of
+ (as,_:bs) -> CRefBit as : decode_text bs
+ _ -> [TxtBit xs]
+decode_text [] = []
+decode_text cs = let (as,bs) = break ('&' ==) cs
+ in TxtBit as : decode_text bs
+
+cref_to_char :: [Char] -> Maybe Char
+cref_to_char cs = case cs of
+ '#' : ds -> num_esc ds
+ "lt" -> Just '<'
+ "gt" -> Just '>'
+ "amp" -> Just '&'
+ "apos" -> Just '\''
+ "quot" -> Just '"'
+ _ -> Nothing
+
+num_esc :: String -> Maybe Char
+num_esc cs = case cs of
+ 'x' : ds -> check (readHex ds)
+ _ -> check (reads cs)
+
+ where check [(n,"")] = cvt_char n
+ check _ = Nothing
+
+cvt_char :: Int -> Maybe Char
+cvt_char x
+ | fromEnum (minBound :: Char) <= x && x <= fromEnum (maxBound::Char)
+ = Just (toEnum x)
+ | otherwise = Nothing
+
+preprocess :: String -> String
+preprocess ('\r' : '\n' : cs) = '\n' : preprocess cs
+preprocess ('\r' : cs) = '\n' : preprocess cs
+preprocess (c : cs) = c : preprocess cs
+preprocess [] = []
+
+linenumber :: Line -> String -> LString
+linenumber _ [] = []
+linenumber n ('\n':s) = n' `seq` ((n,'\n'):linenumber n' s) where n' = n + 1
+linenumber n (c:s) = (n,c) : linenumber n s
diff --git a/Text/XML/Light/Output.hs b/Text/XML/Light/Output.hs
new file mode 100644
index 000000000..65d1bb1af
--- /dev/null
+++ b/Text/XML/Light/Output.hs
@@ -0,0 +1,150 @@
+--------------------------------------------------------------------
+-- |
+-- Module : Text.XML.Light.Output
+-- Copyright : (c) Galois, Inc. 2007
+-- License : BSD3
+--
+-- Maintainer: Iavor S. Diatchki <diatchki@galois.com>
+-- Stability : provisional
+-- Portability:
+--
+-- Output handling for the lightweight XML lib.
+--
+
+module Text.XML.Light.Output
+ ( showTopElement, showContent, showElement, showCData, showQName, showAttr
+ , ppTopElement, ppContent, ppElement
+ , tagEnd, xml_header
+ ) where
+
+import Text.XML.Light.Types
+import Data.Char
+import Data.List ( isPrefixOf )
+
+-- | The XML 1.0 header
+xml_header :: String
+xml_header = "<?xml version='1.0' ?>"
+
+-- | Pretty printing renders XML documents faithfully,
+-- with the exception that whitespace may be added\/removed
+-- in non-verbatim character data.
+ppTopElement :: Element -> String
+ppTopElement e = unlines [xml_header,ppElement e]
+
+-- | Pretty printing elements
+ppElement :: Element -> String
+ppElement e = ppElementS "" e ""
+
+-- | Pretty printing content
+ppContent :: Content -> String
+ppContent x = ppContentS "" x ""
+
+-- | Pretty printing content using ShowS
+ppContentS :: String -> Content -> ShowS
+ppContentS i x xs = case x of
+ Elem e -> ppElementS i e xs
+ Text c -> ppCData i c xs
+ CRef r -> showCRefS r xs
+
+ppElementS :: String -> Element -> ShowS
+ppElementS i e xs = i ++ (tagStart (elName e) (elAttribs e) $
+ case elContent e of
+ []
+ | not ("?xml" `isPrefixOf` (qName $ elName e)) -> " />" ++ xs
+ | otherwise -> " ?>" ++ xs
+ [Text t] -> ">" ++ ppCData "" t (tagEnd (elName e) xs)
+ cs -> ">\n" ++ foldr ppSub (i ++ tagEnd (elName e) xs) cs
+ where ppSub e1 = ppContentS (" " ++ i) e1 . showChar '\n'
+ )
+
+ppCData :: String -> CData -> ShowS
+ppCData i c xs = i ++ if (cdVerbatim c /= CDataText )
+ then showCDataS c xs
+ else foldr cons xs (showCData c)
+
+ where cons :: Char -> String -> String
+ cons '\n' ys = "\n" ++ i ++ ys
+ cons y ys = y : ys
+
+
+
+--------------------------------------------------------------------------------
+-- | Adds the <?xml?> header.
+showTopElement :: Element -> String
+showTopElement c = xml_header ++ showElement c
+
+showContent :: Content -> String
+showContent c = showContentS c ""
+
+showElement :: Element -> String
+showElement c = showElementS c ""
+
+showCData :: CData -> String
+showCData c = showCDataS c ""
+
+-- Note: crefs should not contain '&', ';', etc.
+showCRefS :: String -> ShowS
+showCRefS r xs = '&' : r ++ ';' : xs
+
+-- | Good for transmition (no extra white space etc.) but less readable.
+showContentS :: Content -> ShowS
+showContentS (Elem e) = showElementS e
+showContentS (Text cs) = showCDataS cs
+showContentS (CRef cs) = showCRefS cs
+
+-- | Good for transmition (no extra white space etc.) but less readable.
+showElementS :: Element -> ShowS
+showElementS e xs =
+ tagStart (elName e) (elAttribs e)
+ $ case elContent e of
+ [] -> " />" ++ xs
+ ch -> '>' : foldr showContentS (tagEnd (elName e) xs) ch
+
+-- | Convert a text element to characters.
+showCDataS :: CData -> ShowS
+showCDataS cd =
+ case cdVerbatim cd of
+ CDataText -> escStr (cdData cd)
+ CDataVerbatim -> showString "<![CDATA[" . escCData (cdData cd) . showString "]]>"
+ CDataRaw -> \ xs -> cdData cd ++ xs
+
+--------------------------------------------------------------------------------
+escCData :: String -> ShowS
+escCData (']' : ']' : '>' : cs) = showString "]]]]><![CDATA[>" . escCData cs
+escCData (c : cs) = showChar c . escCData cs
+escCData [] = id
+
+escChar :: Char -> ShowS
+escChar c = case c of
+ '<' -> showString "&lt;"
+ '>' -> showString "&gt;"
+ '&' -> showString "&amp;"
+ '"' -> showString "&quot;"
+ '\'' -> showString "&apos;"
+ -- XXX: Is this really wortherd?
+ -- We could deal with these issues when we convert characters to bytes.
+ _ | (oc <= 0x7f && isPrint c) || c == '\n' || c == '\r' -> showChar c
+ | otherwise -> showString "&#" . shows oc . showChar ';'
+ where oc = ord c
+
+escStr :: String -> ShowS
+escStr cs rs = foldr escChar rs cs
+
+tagEnd :: QName -> ShowS
+tagEnd qn rs = '<':'/':showQName qn ++ '>':rs
+
+tagStart :: QName -> [Attr] -> ShowS
+tagStart qn as rs = '<':showQName qn ++ as_str ++ rs
+ where as_str = if null as then "" else ' ' : unwords (map showAttr as)
+
+showAttr :: Attr -> String
+showAttr (Attr qn v) = showQName qn ++ '=' : '"' : escStr v "\""
+
+showQName :: QName -> String
+showQName q = pre ++ qName q
+ where pre = case qPrefix q of
+ Nothing -> ""
+ Just p -> p ++ ":"
+
+
+
diff --git a/Text/XML/Light/Proc.hs b/Text/XML/Light/Proc.hs
new file mode 100644
index 000000000..34d844a3f
--- /dev/null
+++ b/Text/XML/Light/Proc.hs
@@ -0,0 +1,103 @@
+--------------------------------------------------------------------
+-- |
+-- Module : Text.XML.Light.Proc
+-- Copyright : (c) Galois, Inc. 2007
+-- License : BSD3
+--
+-- Maintainer: Iavor S. Diatchki <diatchki@galois.com>
+-- Stability : provisional
+-- Portability:
+--
+--------------------------------------------------------------------
+
+
+module Text.XML.Light.Proc where
+
+import Text.XML.Light.Types
+
+import Data.Maybe(listToMaybe)
+import Data.List(find)
+
+-- | Get the text value of an XML element. This function
+-- ignores non-text elements, and concatenates all text elements.
+strContent :: Element -> String
+strContent e = concatMap cdData $ onlyText $ elContent e
+
+-- | Select only the elements from a list of XML content.
+onlyElems :: [Content] -> [Element]
+onlyElems xs = [ x | Elem x <- xs ]
+
+-- | Select only the elements from a parent.
+elChildren :: Element -> [Element]
+elChildren e = [ x | Elem x <- elContent e ]
+
+-- | Select only the text from a list of XML content.
+onlyText :: [Content] -> [CData]
+onlyText xs = [ x | Text x <- xs ]
+
+-- | Find all immediate children with the given name.
+findChildren :: QName -> Element -> [Element]
+findChildren q e = filterChildren ((q ==) . elName) e
+
+-- | Filter all immediate children wrt a given predicate.
+filterChildren :: (Element -> Bool) -> Element -> [Element]
+filterChildren p e = filter p (onlyElems (elContent e))
+
+
+-- | Filter all immediate children wrt a given predicate over their names.
+filterChildrenName :: (QName -> Bool) -> Element -> [Element]
+filterChildrenName p e = filter (p.elName) (onlyElems (elContent e))
+
+
+-- | Find an immediate child with the given name.
+findChild :: QName -> Element -> Maybe Element
+findChild q e = listToMaybe (findChildren q e)
+
+-- | Find an immediate child with the given name.
+filterChild :: (Element -> Bool) -> Element -> Maybe Element
+filterChild p e = listToMaybe (filterChildren p e)
+
+-- | Find an immediate child with name matching a predicate.
+filterChildName :: (QName -> Bool) -> Element -> Maybe Element
+filterChildName p e = listToMaybe (filterChildrenName p e)
+
+-- | Find the left-most occurrence of an element matching given name.
+findElement :: QName -> Element -> Maybe Element
+findElement q e = listToMaybe (findElements q e)
+
+-- | Filter the left-most occurrence of an element wrt. given predicate.
+filterElement :: (Element -> Bool) -> Element -> Maybe Element
+filterElement p e = listToMaybe (filterElements p e)
+
+-- | Filter the left-most occurrence of an element wrt. given predicate.
+filterElementName :: (QName -> Bool) -> Element -> Maybe Element
+filterElementName p e = listToMaybe (filterElementsName p e)
+
+-- | Find all non-nested occurances of an element.
+-- (i.e., once we have found an element, we do not search
+-- for more occurances among the element's children).
+findElements :: QName -> Element -> [Element]
+findElements qn e = filterElementsName (qn==) e
+
+-- | Find all non-nested occurrences of an element wrt. given predicate.
+-- (i.e., once we have found an element, we do not search
+-- for more occurances among the element's children).
+filterElements :: (Element -> Bool) -> Element -> [Element]
+filterElements p e
+ | p e = [e]
+ | otherwise = concatMap (filterElements p) $ onlyElems $ elContent e
+
+-- | Find all non-nested occurences of an element wrt a predicate over element names.
+-- (i.e., once we have found an element, we do not search
+-- for more occurances among the element's children).
+filterElementsName :: (QName -> Bool) -> Element -> [Element]
+filterElementsName p e = filterElements (p.elName) e
+
+-- | Lookup the value of an attribute.
+findAttr :: QName -> Element -> Maybe String
+findAttr x e = attrVal `fmap` find ((x ==) . attrKey) (elAttribs e)
+
+-- | Lookup attribute name from list.
+lookupAttr :: QName -> [Attr] -> Maybe String
+lookupAttr x as = attrVal `fmap` find ((x ==) . attrKey) as
+
diff --git a/Text/XML/Light/Types.hs b/Text/XML/Light/Types.hs
new file mode 100644
index 000000000..6d8f04b4a
--- /dev/null
+++ b/Text/XML/Light/Types.hs
@@ -0,0 +1,91 @@
+--------------------------------------------------------------------
+-- |
+-- Module : Text.XML.Light.Types
+-- Copyright : (c) Galois, Inc. 2007
+-- License : BSD3
+--
+-- Maintainer: Iavor S. Diatchki <diatchki@galois.com>
+-- Stability : provisional
+-- Portability:
+--
+-- Basic XML types.
+--
+
+module Text.XML.Light.Types where
+
+-- | A line is an Integer
+type Line = Integer
+
+-- | XML content
+data Content = Elem Element
+ | Text CData
+ | CRef String
+ deriving Show
+
+-- | XML elements
+data Element = Element {
+ elName :: QName,
+ elAttribs :: [Attr],
+ elContent :: [Content],
+ elLine :: Maybe Line
+ } deriving Show
+
+-- | XML attributes
+data Attr = Attr {
+ attrKey :: QName,
+ attrVal :: String
+ } deriving (Eq,Ord,Show)
+
+-- | XML CData
+data CData = CData {
+ cdVerbatim :: CDataKind,
+ cdData :: String,
+ cdLine :: Maybe Line
+ } deriving Show
+
+data CDataKind
+ = CDataText -- ^ Ordinary character data; pretty printer escapes &, < etc.
+ | CDataVerbatim -- ^ Unescaped character data; pretty printer embeds it in <![CDATA[..
+ | CDataRaw -- ^ As-is character data; pretty printer passes it along without any escaping or CDATA wrap-up.
+ deriving ( Eq, Show )
+
+-- | XML qualified names
+data QName = QName {
+ qName :: String,
+ qURI :: Maybe String,
+ qPrefix :: Maybe String
+ } deriving Show
+
+
+instance Eq QName where
+ q1 == q2 = compare q1 q2 == EQ
+
+instance Ord QName where
+ compare q1 q2 =
+ case compare (qName q1) (qName q2) of
+ EQ -> case (qURI q1, qURI q2) of
+ (Nothing,Nothing) -> compare (qPrefix q1) (qPrefix q2)
+ (u1,u2) -> compare u1 u2
+ x -> x
+
+
+-- blank elements --------------------------------------------------------------
+
+-- | Blank names
+blank_name :: QName
+blank_name = QName { qName = "", qURI = Nothing, qPrefix = Nothing }
+
+-- | Blank cdata
+blank_cdata :: CData
+blank_cdata = CData { cdVerbatim = CDataText, cdData = "", cdLine = Nothing }
+
+-- | Blank elements
+blank_element :: Element
+blank_element = Element
+ { elName = blank_name
+ , elAttribs = []
+ , elContent = []
+ , elLine = Nothing
+ }
+
+