8 files changed, 1116 insertions, 4 deletions
diff --git a/Text/XML/Light.hs b/Text/XML/Light.hs
new file mode 100644
index 000000000..f2d75290b
--- /dev/null
+++ b/Text/XML/Light.hs
@@ -0,0 +1,96 @@
+{-# LANGUAGE FlexibleInstances #-}
+--------------------------------------------------------------------
+-- |
+-- Module    : Text.XML.Light
+-- Copyright : (c) Galois, Inc. 2007
+-- License   : BSD3
+--
+-- Maintainer: Iavor S. Diatchki <diatchki@galois.com>
+-- Stability : provisional
+-- Portability: portability
+--
+-- A lightweight XML parsing, filtering and generating library.
+--
+-- This module reexports functions from:
+--
+-- * "Text.XML.Light.Types"
+--
+-- * "Text.XML.Light.Proc"
+--
+-- * "Text.XML.Light.Input"
+--
+-- * "Text.XML.Light.Output"
+--
+
+module Text.XML.Light (
+
+    module Text.XML.Light,
+    module Text.XML.Light.Types,
+    module Text.XML.Light.Proc,
+    module Text.XML.Light.Input,
+    module Text.XML.Light.Output
+
+  ) where
+
+import Text.XML.Light.Types
+import Text.XML.Light.Proc
+import Text.XML.Light.Input
+import Text.XML.Light.Output
+
+-- | Add an attribute to an element.
+add_attr :: Attr -> Element -> Element
+add_attr a e = add_attrs [a] e
+
+-- | Add some attributes to an element.
+add_attrs :: [Attr] -> Element -> Element
+add_attrs as e = e { elAttribs = as ++ elAttribs e }
+
+-- | Create an unqualified name.
+unqual :: String -> QName
+unqual x = blank_name { qName = x }
+
+-- | A smart element constructor which uses the type of its argument
+-- to determine what sort of element to make.
+class Node t where
+  node :: QName -> t -> Element
+
+instance Node ([Attr],[Content]) where
+  node n (attrs,cont) = blank_element { elName     = n
+                                      , elAttribs  = attrs
+                                      , elContent  = cont
+                                      }
+
+instance Node [Attr]             where node n as   = node n (as,[]::[Content])
+instance Node Attr               where node n a    = node n [a]
+instance Node ()                 where node n ()   = node n ([]::[Attr])
+
+instance Node [Content]          where node n cs     = node n ([]::[Attr],cs)
+instance Node Content            where node n c      = node n [c]
+instance Node ([Attr],Content)   where node n (as,c) = node n (as,[c])
+instance Node (Attr,Content)     where node n (a,c)  = node n ([a],[c])
+
+instance Node ([Attr],[Element]) where
+  node n (as,cs) = node n (as,map Elem cs)
+
+instance Node ([Attr],Element)   where node n (as,c) = node n (as,[c])
+instance Node (Attr,Element)     where node n (a,c)  = node n ([a],c)
+instance Node ([Element])        where node n es     = node n ([]::[Attr],es)
+instance Node (Element)          where node n e      = node n [e]
+
+instance Node ([Attr],[CData])   where
+  node n (as,cs) = node n (as,map Text cs)
+
+instance Node ([Attr],CData)     where node n (as,c) = node n (as,[c])
+instance Node (Attr,CData)       where node n (a,c)  = node n ([a],c)
+instance Node [CData]            where node n es     = node n ([]::[Attr],es)
+instance Node CData              where node n e      = node n [e]
+
+instance Node ([Attr],String)    where
+  node n (as,t) = node n (as,blank_cdata { cdData = t })
+
+instance Node (Attr,String)      where node n (a,t)  = node n ([a],t)
+instance Node [Char]             where node n t      = node n ([]::[Attr],t)
+
+-- | Create node with unqualified name
+unode :: Node t => String -> t -> Element
+unode = node . unqual
diff --git a/Text/XML/Light/Cursor.hs b/Text/XML/Light/Cursor.hs
new file mode 100644
index 000000000..06d15bdb6
--- /dev/null
+++ b/Text/XML/Light/Cursor.hs
@@ -0,0 +1,327 @@
+--------------------------------------------------------------------
+-- |
+-- Module    : Text.XML.Light.Cursor
+-- Copyright : (c) Galois, Inc. 2008
+-- License   : BSD3
+--
+-- Maintainer: Iavor S. Diatchki <diatchki@galois.com>
+-- Stability : provisional
+-- Portability:
+--
+-- XML cursors for working XML content withing the context of
+-- an XML document.  This implemntation is based on the general
+-- tree zipper written by Krasimir Angelov and Iavor S. Diatchki.
+--
+
+module Text.XML.Light.Cursor
+  ( Tag(..), getTag, setTag, fromTag
+  , Cursor(..), Path
+
+  -- * Conversions
+  , fromContent
+  , fromElement
+  , fromForest
+  , toForest
+  , toTree
+
+  -- * Moving around
+  , parent
+  , root
+  , getChild
+  , firstChild
+  , lastChild
+  , left
+  , right
+
+  -- ** Searching
+  , findChild
+  , findLeft
+  , findRight
+
+  -- * Node classification
+  , isRoot
+  , isFirst
+  , isLast
+  , isLeaf
+  , isChild
+  , hasChildren
+  , getNodeIndex
+
+  -- * Updates
+  , setContent
+  , modifyContent
+  , modifyContentM
+
+  -- ** Inserting content
+  , insertLeft
+  , insertRight
+  , insertGoLeft
+  , insertGoRight
+
+  -- ** Removing content
+  , removeLeft
+  , removeRight
+  , removeGoLeft
+  , removeGoRight
+  , removeGoUp
+
+  ) where
+
+import Text.XML.Light.Types
+import Data.Maybe(isNothing)
+
+data Tag = Tag { tagName    :: QName
+               , tagAttribs :: [Attr]
+               , tagLine    :: Maybe Line
+               } deriving (Show)
+
+getTag :: Element -> Tag
+getTag e = Tag { tagName = elName e
+               , tagAttribs = elAttribs e
+               , tagLine = elLine e
+               }
+
+setTag :: Tag -> Element -> Element
+setTag t e = fromTag t (elContent e)
+
+fromTag :: Tag -> [Content] -> Element
+fromTag t cs = Element { elName    = tagName t
+                       , elAttribs = tagAttribs t
+                       , elLine    = tagLine t
+                       , elContent = cs
+                       }
+
+type Path = [([Content],Tag,[Content])]
+
+-- | The position of a piece of content in an XML document.
+data Cursor = Cur
+  { current :: Content      -- ^ The currently selected content.
+  , lefts   :: [Content]    -- ^ Siblings on the left, closest first.
+  , rights  :: [Content]    -- ^ Siblings on the right, closest first.
+  , parents :: Path -- ^ The contexts of the parent elements of this location.
+  } deriving (Show)
+
+-- Moving around ---------------------------------------------------------------
+
+-- | The parent of the given location.
+parent :: Cursor -> Maybe Cursor
+parent loc =
+  case parents loc of
+    (pls,v,prs) : ps -> Just
+      Cur { current = Elem
+                    (fromTag v
+                    (combChildren (lefts loc) (current loc) (rights loc)))
+          , lefts = pls, rights = prs, parents = ps
+          }
+    [] -> Nothing
+
+
+-- | The top-most parent of the given location.
+root :: Cursor -> Cursor
+root loc = maybe loc root (parent loc)
+
+-- | The left sibling of the given location.
+left :: Cursor -> Maybe Cursor
+left loc =
+  case lefts loc of
+    t : ts -> Just loc { current = t, lefts = ts
+                                    , rights = current loc : rights loc }
+    []     -> Nothing
+
+-- | The right sibling of the given location.
+right :: Cursor -> Maybe Cursor
+right loc =
+  case rights loc of
+    t : ts -> Just loc { current = t, lefts = current loc : lefts loc
+                                    , rights = ts }
+    []     -> Nothing
+
+-- | The first child of the given location.
+firstChild :: Cursor -> Maybe Cursor
+firstChild loc =
+  do (t : ts, ps) <- downParents loc
+     return Cur { current = t, lefts = [], rights = ts , parents = ps }
+
+-- | The last child of the given location.
+lastChild :: Cursor -> Maybe Cursor
+lastChild loc =
+  do (ts, ps) <- downParents loc
+     case reverse ts of
+       l : ls -> return Cur { current = l, lefts = ls, rights = []
+                                                     , parents = ps }
+       [] -> Nothing
+
+-- | Find the next left sibling that satisfies a predicate.
+findLeft :: (Cursor -> Bool) -> Cursor -> Maybe Cursor
+findLeft p loc = do loc1 <- left loc
+                    if p loc1 then return loc1 else findLeft p loc1
+
+-- | Find the next right sibling that satisfies a predicate.
+findRight :: (Cursor -> Bool) -> Cursor -> Maybe Cursor
+findRight p loc = do loc1 <- right loc
+                     if p loc1 then return loc1 else findRight p loc1
+
+-- | The first child that satisfies a predicate.
+findChild :: (Cursor -> Bool) -> Cursor -> Maybe Cursor
+findChild p loc =
+  do loc1 <- firstChild loc
+     if p loc1 then return loc1 else findRight p loc1
+
+-- | The child with the given index (starting from 0).
+getChild :: Int -> Cursor -> Maybe Cursor
+getChild n loc =
+  do (ts,ps) <- downParents loc
+     (ls,t,rs) <- splitChildren ts n
+     return Cur { current = t, lefts = ls, rights = rs, parents = ps }
+
+
+-- | private: computes the parent for "down" operations.
+downParents :: Cursor -> Maybe ([Content], Path)
+downParents loc =
+  case current loc of
+    Elem e -> Just ( elContent e
+                   , (lefts loc, getTag e, rights loc) : parents loc
+                   )
+    _      -> Nothing
+
+-- Conversions -----------------------------------------------------------------
+
+-- | A cursor for the guven content.
+fromContent :: Content -> Cursor
+fromContent t = Cur { current = t, lefts = [], rights = [], parents = [] }
+
+-- | A cursor for the guven element.
+fromElement :: Element -> Cursor
+fromElement e = fromContent (Elem e)
+
+-- | The location of the first tree in a forest.
+fromForest :: [Content] -> Maybe Cursor
+fromForest (t:ts) = Just Cur { current = t, lefts = [], rights = ts
+                                                      , parents = [] }
+fromForest []     = Nothing
+
+-- | Computes the tree containing this location.
+toTree :: Cursor -> Content
+toTree loc = current (root loc)
+
+-- | Computes the forest containing this location.
+toForest :: Cursor -> [Content]
+toForest loc = let r = root loc in combChildren (lefts r) (current r) (rights r)
+
+
+-- Queries ---------------------------------------------------------------------
+
+-- | Are we at the top of the document?
+isRoot :: Cursor -> Bool
+isRoot loc = null (parents loc)
+
+-- | Are we at the left end of the the document?
+isFirst :: Cursor -> Bool
+isFirst loc = null (lefts loc)
+
+-- | Are we at the right end of the document?
+isLast :: Cursor -> Bool
+isLast loc = null (rights loc)
+
+-- | Are we at the bottom of the document?
+isLeaf :: Cursor -> Bool
+isLeaf loc = isNothing (downParents loc)
+
+-- | Do we have a parent?
+isChild :: Cursor -> Bool
+isChild loc = not (isRoot loc)
+
+-- | Get the node index inside the sequence of children
+getNodeIndex :: Cursor -> Int
+getNodeIndex loc = length (lefts loc)
+
+-- | Do we have children?
+hasChildren :: Cursor -> Bool
+hasChildren loc = not (isLeaf loc)
+
+
+
+-- Updates ---------------------------------------------------------------------
+
+-- | Change the current content.
+setContent :: Content -> Cursor -> Cursor
+setContent t loc = loc { current = t }
+
+-- | Modify the current content.
+modifyContent :: (Content -> Content) -> Cursor -> Cursor
+modifyContent f loc = setContent (f (current loc)) loc
+
+-- | Modify the current content, allowing for an effect.
+modifyContentM :: Monad m => (Content -> m Content) -> Cursor -> m Cursor
+modifyContentM f loc = do x <- f (current loc)
+                          return (setContent x loc)
+
+-- | Insert content to the left of the current position.
+insertLeft :: Content -> Cursor -> Cursor
+insertLeft t loc = loc { lefts = t : lefts loc }
+
+-- | Insert content to the right of the current position.
+insertRight :: Content -> Cursor -> Cursor
+insertRight t loc = loc { rights = t : rights loc }
+
+-- | Remove the conent on the left of the current position, if any.
+removeLeft :: Cursor -> Maybe (Content,Cursor)
+removeLeft loc = case lefts loc of
+                   l : ls -> return (l,loc { lefts = ls })
+                   [] -> Nothing
+
+-- | Remove the conent on the right of the current position, if any.
+removeRight :: Cursor -> Maybe (Content,Cursor)
+removeRight loc = case rights loc of
+                    l : ls -> return (l,loc { rights = ls })
+                    [] -> Nothing
+
+
+-- | Insert content to the left of the current position.
+-- The new content becomes the current position.
+insertGoLeft :: Content -> Cursor -> Cursor
+insertGoLeft t loc = loc { current = t, rights = current loc : rights loc }
+
+-- | Insert content to the right of the current position.
+-- The new content becomes the current position.
+insertGoRight :: Content -> Cursor -> Cursor
+insertGoRight t loc = loc { current = t, lefts = current loc : lefts loc }
+
+-- | Remove the current element.
+-- The new position is the one on the left.
+removeGoLeft :: Cursor -> Maybe Cursor
+removeGoLeft loc = case lefts loc of
+                     l : ls -> Just loc { current = l, lefts = ls }
+                     []     -> Nothing
+
+-- | Remove the current element.
+-- The new position is the one on the right.
+removeGoRight :: Cursor -> Maybe Cursor
+removeGoRight loc = case rights loc of
+                     l : ls -> Just loc { current = l, rights = ls }
+                     []     -> Nothing
+
+-- | Remove the current element.
+-- The new position is the parent of the old position.
+removeGoUp :: Cursor -> Maybe Cursor
+removeGoUp loc =
+  case parents loc of
+    (pls,v,prs) : ps -> Just
+      Cur { current = Elem (fromTag v (reverse (lefts loc) ++ rights loc))
+          , lefts = pls, rights = prs, parents = ps
+          }
+    [] -> Nothing
+
+
+-- | private: Gets the given element of a list.
+-- Also returns the preceeding elements (reversed) and the folloing elements.
+splitChildren :: [a] -> Int -> Maybe ([a],a,[a])
+splitChildren _ n | n < 0 = Nothing
+splitChildren cs pos = loop [] cs pos
+  where loop acc (x:xs) 0 = Just (acc,x,xs)
+        loop acc (x:xs) n = loop (x:acc) xs $! n-1
+        loop _ _ _        = Nothing
+
+-- | private: combChildren ls x ys = reverse ls ++ [x] ++ ys
+combChildren :: [a] -> a -> [a] -> [a]
+combChildren ls t rs = foldl (flip (:)) (t:rs) ls
diff --git a/Text/XML/Light/Input.hs b/Text/XML/Light/Input.hs
new file mode 100644
index 000000000..3cf3a8cd1
--- /dev/null
+++ b/Text/XML/Light/Input.hs
@@ -0,0 +1,307 @@
+--------------------------------------------------------------------
+-- |
+-- Module    : Text.XML.Light.Input
+-- Copyright : (c) Galois, Inc. 2007
+-- License   : BSD3
+--
+-- Maintainer: Iavor S. Diatchki <diatchki@galois.com>
+-- Stability : provisional
+-- Portability: portable
+--
+-- Lightweight XML parsing
+--
+
+module Text.XML.Light.Input (parseXML,parseXMLDoc) where
+
+import Text.XML.Light.Types
+import Text.XML.Light.Proc
+import Text.XML.Light.Output(tagEnd)
+
+import Data.Char(isSpace)
+import Data.List(isPrefixOf)
+import Numeric(readHex)
+
+-- | parseXMLDoc, parse a XMLl document to maybe an element
+parseXMLDoc  :: String -> Maybe Element
+parseXMLDoc xs  = strip (parseXML xs)
+  where strip cs = case onlyElems cs of
+                    e : es
+                      | "?xml" `isPrefixOf` qName (elName e)
+                          -> strip (map Elem es)
+                      | otherwise -> Just e
+                    _ -> Nothing
+
+-- | parseXML to a list of content chunks
+parseXML   :: String -> [Content]
+parseXML xs = parse $ tokens $ preprocess xs
+
+------------------------------------------------------------------------
+
+parse      :: [Token] -> [Content]
+parse []    = []
+parse ts    = let (es,_,ts1) = nodes ([],Nothing) [] ts
+              in es ++ parse ts1
+
+-- Information about namespaces.
+-- The first component is a map that associates prefixes to URIs,
+-- the second is the URI for the default namespace, if one was provided.
+type NSInfo = ([(String,String)],Maybe String)
+
+nodes :: NSInfo -> [QName] -> [Token] -> ([Content], [QName], [Token])
+
+nodes ns ps (TokCRef ref : ts) =
+  let (es,qs,ts1) = nodes ns ps ts
+  in (CRef ref : es, qs, ts1)
+
+nodes ns ps (TokText txt : ts) =
+  let (es,qs,ts1) = nodes ns ps ts
+      (more,es1)  = case es of
+                      Text cd : es1'
+                        | cdVerbatim cd == cdVerbatim txt -> (cdData cd,es1')
+                      _                                   -> ([],es)
+
+  in (Text txt { cdData = cdData txt ++ more } : es1, qs, ts1)
+
+nodes cur_info ps (TokStart p t as empty : ts) = (node : siblings, open, toks)
+  where
+  new_name  = annotName new_info t
+  new_info  = foldr addNS cur_info as
+  node      = Elem Element { elLine    = Just p
+                           , elName    = new_name
+                           , elAttribs = map (annotAttr new_info) as
+                           , elContent = children
+                           }
+
+  (children,(siblings,open,toks))
+    | empty     = ([], nodes cur_info ps ts)
+    | otherwise = let (es1,qs1,ts1) = nodes new_info (new_name:ps) ts
+                  in (es1,
+                      case qs1 of
+                        [] -> nodes cur_info ps ts1
+                        _ : qs3 -> ([],qs3,ts1))
+
+nodes ns ps (TokEnd p t : ts)   = let t1 = annotName ns t
+                                in case break (t1 ==) ps of
+                                  (as,_:_) -> ([],as,ts)
+                                  -- Unknown closing tag. Insert as text.
+                                  (_,[]) ->
+                                    let (es,qs,ts1) = nodes ns ps ts
+                                    in (Text CData {
+                                               cdLine = Just p,
+                                               cdVerbatim = CDataText,
+                                               cdData = tagEnd t ""
+                                              } : es,qs, ts1)
+
+nodes _ ps []                 = ([],ps,[])
+
+
+annotName :: NSInfo -> QName -> QName
+annotName (namespaces,def_ns) n =
+  n { qURI = maybe def_ns (`lookup` namespaces) (qPrefix n) }
+
+annotAttr :: NSInfo -> Attr -> Attr
+annotAttr ns a@(Attr { attrKey = k}) =
+  case (qPrefix k, qName k) of
+    (Nothing,"xmlns") -> a
+    _                 -> a { attrKey = annotName ns k }
+
+addNS :: Attr -> NSInfo -> NSInfo
+addNS (Attr { attrKey = key, attrVal = val }) (ns,def) =
+  case (qPrefix key, qName key) of
+    (Nothing,"xmlns") -> (ns, if null val then Nothing else Just val)
+    (Just "xmlns", k) -> ((k, val) : ns, def)
+    _                 -> (ns,def)
+
+
+-- Lexer -----------------------------------------------------------------------
+
+type LChar              = (Line,Char)
+type LString            = [LChar]
+data Token              = TokStart Line QName [Attr] Bool  -- is empty?
+                        | TokEnd Line QName
+                        | TokCRef String
+                        | TokText CData
+                          deriving Show
+
+tokens             :: String -> [Token]
+tokens = tokens' . linenumber 1
+
+tokens' :: LString -> [Token]
+tokens' ((_,'<') : c@(_,'!') : cs) = special c cs
+
+tokens' ((_,'<') : cs)   = tag (dropSpace cs) -- we are being nice here
+tokens' [] = []
+tokens' cs@((l,_):_) = let (as,bs) = breakn ('<' ==) cs
+                       in map cvt (decode_text as) ++ tokens' bs
+
+  -- XXX: Note, some of the lines might be a bit inacuarate
+  where cvt (TxtBit x)  = TokText CData { cdLine = Just l
+                                        , cdVerbatim = CDataText
+                                        , cdData = x
+                                        }
+        cvt (CRefBit x) = case cref_to_char x of
+                            Just c -> TokText CData { cdLine = Just l
+                                                    , cdVerbatim = CDataText
+                                                    , cdData = [c]
+                                                    }
+                            Nothing -> TokCRef x
+
+
+special :: LChar -> LString -> [Token]
+special _ ((_,'-') : (_,'-') : cs) = skip cs
+  where skip ((_,'-') : (_,'-') : (_,'>') : ds) = tokens' ds
+        skip (_ : ds) = skip ds
+        skip [] = [] -- unterminated comment
+
+special c ((_,'[') : (_,'C') : (_,'D') : (_,'A') : (_,'T') : (_,'A') : (_,'[')
+         : cs) =
+  let (xs,ts) = cdata cs
+  in TokText CData { cdLine = Just (fst c), cdVerbatim = CDataVerbatim, cdData = xs }
+                                                                  : tokens' ts
+  where cdata ((_,']') : (_,']') : (_,'>') : ds) = ([],ds)
+        cdata ((_,d) : ds)  = let (xs,ys) = cdata ds in (d:xs,ys)
+        cdata []        = ([],[])
+
+special c cs = 
+  let (xs,ts) = munch "" 0 cs
+  in TokText CData { cdLine = Just (fst c), cdVerbatim = CDataRaw, cdData = '<':'!':(reverse xs) } : tokens' ts
+  where munch acc nesting ((_,'>') : ds) 
+         | nesting == (0::Int) = ('>':acc,ds)
+	 | otherwise           = munch ('>':acc) (nesting-1) ds
+        munch acc nesting ((_,'<') : ds)
+	 = munch ('<':acc) (nesting+1) ds
+        munch acc n ((_,x) : ds) = munch (x:acc) n ds
+        munch acc _ [] = (acc,[]) -- unterminated DTD markup
+
+--special c cs = tag (c : cs) -- invalid specials are processed as tags
+
+
+qualName           :: LString -> (QName,LString)
+qualName xs         = let (as,bs) = breakn endName xs
+                          (q,n)   = case break (':'==) as of
+                                      (q1,_:n1) -> (Just q1, n1)
+                                      _         -> (Nothing, as)
+                      in (QName { qURI = Nothing, qPrefix = q, qName = n }, bs)
+  where endName x = isSpace x || x == '=' || x == '>' || x == '/'
+
+
+
+
+
+tag              :: LString -> [Token]
+tag ((p,'/') : cs)    = let (n,ds) = qualName (dropSpace cs)
+                        in TokEnd p n : case ds of
+                                          (_,'>') : es -> tokens' es
+                                          -- tag was not properly closed...
+                                          _        -> tokens' ds
+tag []            = []
+tag cs            = let (n,ds)  = qualName cs
+                        (as,b,ts) = attribs (dropSpace ds)
+                    in TokStart (fst (head cs)) n as b : ts
+
+attribs          :: LString -> ([Attr], Bool, [Token])
+attribs cs        = case cs of
+                      (_,'>') : ds -> ([], False, tokens' ds)
+
+                      (_,'/') : ds -> ([], True, case ds of
+                                              (_,'>') : es -> tokens' es
+                                              -- insert missing >  ...
+                                              _ -> tokens' ds)
+
+                      (_,'?') : (_,'>') : ds -> ([], True, tokens' ds)
+
+                      -- doc ended within a tag..
+                      []       -> ([],False,[])
+
+                      _        -> let (a,cs1) = attrib cs
+                                      (as,b,ts) = attribs cs1
+                                  in (a:as,b,ts)
+
+attrib             :: LString -> (Attr,LString)
+attrib cs           = let (ks,cs1)  = qualName cs
+                          (vs,cs2)  = attr_val (dropSpace cs1)
+                      in ((Attr ks (decode_attr vs)),dropSpace cs2)
+
+attr_val           :: LString -> (String,LString)
+attr_val ((_,'=') : cs) = string (dropSpace cs)
+attr_val cs         = ("",cs)
+
+
+dropSpace :: LString -> LString
+dropSpace = dropWhile (isSpace . snd)
+
+-- | Match the value for an attribute.  For malformed XML we do
+-- our best to guess the programmer's intention.
+string             :: LString -> (String,LString)
+string ((_,'"') : cs)   = break' ('"' ==) cs
+
+-- Allow attributes to be enclosed between ' '.
+string ((_,'\'') : cs)  = break' ('\'' ==) cs
+
+-- Allow attributes that are not enclosed by anything.
+string cs           = breakn eos cs
+  where eos x = isSpace x || x == '>' || x == '/'
+
+
+break' :: (a -> Bool) -> [(b,a)] -> ([a],[(b,a)])
+break' p xs         = let (as,bs) = breakn p xs
+                      in (as, case bs of
+                                [] -> []
+                                _ : cs -> cs)
+
+breakn :: (a -> Bool) -> [(b,a)] -> ([a],[(b,a)])
+breakn p l = (map snd as,bs) where (as,bs) = break (p . snd) l
+
+
+
+decode_attr :: String -> String
+decode_attr cs = concatMap cvt (decode_text cs)
+  where cvt (TxtBit x) = x
+        cvt (CRefBit x) = case cref_to_char x of
+                            Just c -> [c]
+                            Nothing -> '&' : x ++ ";"
+
+data Txt = TxtBit String | CRefBit String deriving Show
+
+decode_text :: [Char] -> [Txt]
+decode_text xs@('&' : cs) = case break (';' ==) cs of
+                              (as,_:bs) -> CRefBit as : decode_text bs
+                              _ -> [TxtBit xs]
+decode_text []  = []
+decode_text cs  = let (as,bs) = break ('&' ==) cs
+                  in TxtBit as : decode_text bs
+
+cref_to_char :: [Char] -> Maybe Char
+cref_to_char cs = case cs of
+  '#' : ds  -> num_esc ds
+  "lt"      -> Just '<'
+  "gt"      -> Just '>'
+  "amp"     -> Just '&'
+  "apos"    -> Just '\''
+  "quot"    -> Just '"'
+  _         -> Nothing
+
+num_esc :: String -> Maybe Char
+num_esc cs = case cs of
+               'x' : ds -> check (readHex ds)
+               _        -> check (reads cs)
+
+  where check [(n,"")]  = cvt_char n
+        check _         = Nothing
+
+cvt_char :: Int -> Maybe Char
+cvt_char x
+  | fromEnum (minBound :: Char) <= x && x <= fromEnum (maxBound::Char)
+                = Just (toEnum x)
+  | otherwise = Nothing
+
+preprocess :: String -> String
+preprocess ('\r' : '\n' : cs) = '\n' : preprocess cs
+preprocess ('\r' : cs)        = '\n' : preprocess cs
+preprocess (c : cs)           = c : preprocess cs
+preprocess []                 = []
+
+linenumber :: Line -> String -> LString
+linenumber _ [] = []
+linenumber n ('\n':s) = n' `seq` ((n,'\n'):linenumber n' s) where n' = n + 1
+linenumber n (c:s)    = (n,c) : linenumber n s
diff --git a/Text/XML/Light/Output.hs b/Text/XML/Light/Output.hs
new file mode 100644
index 000000000..65d1bb1af
--- /dev/null
+++ b/Text/XML/Light/Output.hs
@@ -0,0 +1,150 @@
+--------------------------------------------------------------------
+-- |
+-- Module    : Text.XML.Light.Output
+-- Copyright : (c) Galois, Inc. 2007
+-- License   : BSD3
+--
+-- Maintainer: Iavor S. Diatchki <diatchki@galois.com>
+-- Stability : provisional
+-- Portability:
+--
+-- Output handling for the lightweight XML lib.
+--
+
+module Text.XML.Light.Output
+  ( showTopElement, showContent, showElement, showCData, showQName, showAttr
+  , ppTopElement, ppContent, ppElement
+  , tagEnd, xml_header
+  ) where
+
+import Text.XML.Light.Types
+import Data.Char
+import Data.List ( isPrefixOf )
+
+-- | The XML 1.0 header
+xml_header :: String
+xml_header = "<?xml version='1.0' ?>"
+
+-- | Pretty printing renders XML documents faithfully,
+-- with the exception that whitespace may be added\/removed
+-- in non-verbatim character data.
+ppTopElement       :: Element -> String
+ppTopElement e      = unlines [xml_header,ppElement e]
+
+-- | Pretty printing elements
+ppElement          :: Element -> String
+ppElement e         = ppElementS "" e ""
+
+-- | Pretty printing content
+ppContent          :: Content -> String
+ppContent x         = ppContentS "" x ""
+
+-- | Pretty printing content using ShowS
+ppContentS         :: String -> Content -> ShowS
+ppContentS i x xs   = case x of
+                        Elem e -> ppElementS i e xs
+                        Text c -> ppCData i c xs
+                        CRef r -> showCRefS r xs
+
+ppElementS         :: String -> Element -> ShowS
+ppElementS i e xs   = i ++ (tagStart (elName e) (elAttribs e) $
+  case elContent e of
+    [] 
+     | not ("?xml" `isPrefixOf` (qName $ elName e)) -> " />" ++ xs
+     | otherwise -> " ?>" ++ xs
+    [Text t] -> ">" ++ ppCData "" t (tagEnd (elName e) xs)
+    cs -> ">\n" ++ foldr ppSub (i ++ tagEnd (elName e) xs) cs
+      where ppSub e1 = ppContentS ("  " ++ i) e1 . showChar '\n'
+  )
+
+ppCData            :: String -> CData -> ShowS
+ppCData i c xs      = i ++ if (cdVerbatim c /= CDataText )
+                              then showCDataS c xs
+                              else foldr cons xs (showCData c)
+
+  where cons         :: Char -> String -> String
+        cons '\n' ys  = "\n" ++ i ++ ys
+        cons y ys     = y : ys
+
+
+
+--------------------------------------------------------------------------------
+-- | Adds the <?xml?> header.
+showTopElement     :: Element -> String
+showTopElement c    = xml_header ++ showElement c
+
+showContent        :: Content -> String
+showContent c       = showContentS c ""
+
+showElement        :: Element -> String
+showElement c       = showElementS c ""
+
+showCData          :: CData -> String
+showCData c         = showCDataS c ""
+
+-- Note: crefs should not contain '&', ';', etc.
+showCRefS          :: String -> ShowS
+showCRefS r xs      = '&' : r ++ ';' : xs
+
+-- | Good for transmition (no extra white space etc.) but less readable.
+showContentS           :: Content -> ShowS
+showContentS (Elem e)   = showElementS e
+showContentS (Text cs)  = showCDataS cs
+showContentS (CRef cs)  = showCRefS cs
+
+-- | Good for transmition (no extra white space etc.) but less readable.
+showElementS       :: Element -> ShowS
+showElementS e xs =
+  tagStart (elName e) (elAttribs e)
+    $ case elContent e of
+        [] -> " />" ++ xs
+        ch -> '>' : foldr showContentS (tagEnd (elName e) xs) ch
+
+-- | Convert a text element to characters.
+showCDataS         :: CData -> ShowS
+showCDataS cd = 
+ case cdVerbatim cd of
+   CDataText     -> escStr (cdData cd)
+   CDataVerbatim -> showString "<![CDATA[" . escCData (cdData cd) . showString "]]>"
+   CDataRaw      -> \ xs -> cdData cd ++ xs
+
+--------------------------------------------------------------------------------
+escCData           :: String -> ShowS
+escCData (']' : ']' : '>' : cs) = showString "]]]]><![CDATA[>" . escCData cs
+escCData (c : cs)               = showChar c . escCData cs
+escCData []                     = id
+
+escChar            :: Char -> ShowS
+escChar c = case c of
+  '<'   -> showString "&lt;"
+  '>'   -> showString "&gt;"
+  '&'   -> showString "&amp;"
+  '"'   -> showString "&quot;"
+  '\''  -> showString "&apos;"
+  -- XXX: Is this really wortherd?
+  -- We could deal with these issues when we convert characters to bytes.
+  _ | (oc <= 0x7f && isPrint c) || c == '\n' || c == '\r' -> showChar c
+    | otherwise -> showString "&#" . shows oc . showChar ';'
+      where oc = ord c
+
+escStr             :: String -> ShowS
+escStr cs rs        = foldr escChar rs cs
+
+tagEnd             :: QName -> ShowS
+tagEnd qn rs        = '<':'/':showQName qn ++ '>':rs
+
+tagStart           :: QName -> [Attr] -> ShowS
+tagStart qn as rs   = '<':showQName qn ++ as_str ++ rs
+ where as_str       = if null as then "" else ' ' : unwords (map showAttr as)
+
+showAttr           :: Attr -> String
+showAttr (Attr qn v) = showQName qn ++ '=' : '"' : escStr v "\""
+
+showQName          :: QName -> String
+showQName q         = pre ++ qName q
+  where pre = case qPrefix q of
+                Nothing -> ""
+                Just p  -> p ++ ":"
+
+
+
diff --git a/Text/XML/Light/Proc.hs b/Text/XML/Light/Proc.hs
new file mode 100644
index 000000000..34d844a3f
--- /dev/null
+++ b/Text/XML/Light/Proc.hs
@@ -0,0 +1,103 @@
+--------------------------------------------------------------------
+-- |
+-- Module    : Text.XML.Light.Proc
+-- Copyright : (c) Galois, Inc. 2007
+-- License   : BSD3
+--
+-- Maintainer: Iavor S. Diatchki <diatchki@galois.com>
+-- Stability : provisional
+-- Portability:
+--
+--------------------------------------------------------------------
+
+
+module Text.XML.Light.Proc where
+
+import Text.XML.Light.Types
+
+import Data.Maybe(listToMaybe)
+import Data.List(find)
+
+-- | Get the text value of an XML element.  This function
+-- ignores non-text elements, and concatenates all text elements.
+strContent         :: Element -> String
+strContent e        = concatMap cdData $ onlyText $ elContent e
+
+-- | Select only the elements from a list of XML content.
+onlyElems          :: [Content] -> [Element]
+onlyElems xs        = [ x | Elem x <- xs ]
+
+-- | Select only the elements from a parent.
+elChildren         :: Element -> [Element]
+elChildren e        = [ x | Elem x <- elContent e ]
+
+-- | Select only the text from a list of XML content.
+onlyText           :: [Content] -> [CData]
+onlyText xs         = [ x | Text x <- xs ]
+
+-- | Find all immediate children with the given name.
+findChildren       :: QName -> Element -> [Element]
+findChildren q e    = filterChildren ((q ==) . elName) e
+
+-- | Filter all immediate children wrt a given predicate.
+filterChildren       :: (Element -> Bool) -> Element -> [Element]
+filterChildren p e    = filter p (onlyElems (elContent e))
+
+
+-- | Filter all immediate children wrt a given predicate over their names.
+filterChildrenName      :: (QName -> Bool) -> Element -> [Element]
+filterChildrenName p e   = filter (p.elName) (onlyElems (elContent e))
+
+
+-- | Find an immediate child with the given name.
+findChild          :: QName -> Element -> Maybe Element
+findChild q e       = listToMaybe (findChildren q e)
+
+-- | Find an immediate child with the given name.
+filterChild          :: (Element -> Bool) -> Element -> Maybe Element
+filterChild p e       = listToMaybe (filterChildren p e)
+
+-- | Find an immediate child with name matching a predicate.
+filterChildName      :: (QName -> Bool) -> Element -> Maybe Element
+filterChildName p e   = listToMaybe (filterChildrenName p e)
+
+-- | Find the left-most occurrence of an element matching given name.
+findElement        :: QName -> Element -> Maybe Element
+findElement q e     = listToMaybe (findElements q e)
+
+-- | Filter the left-most occurrence of an element wrt. given predicate.
+filterElement        :: (Element -> Bool) -> Element -> Maybe Element
+filterElement p e     = listToMaybe (filterElements p e)
+
+-- | Filter the left-most occurrence of an element wrt. given predicate.
+filterElementName     :: (QName -> Bool) -> Element -> Maybe Element
+filterElementName p e  = listToMaybe (filterElementsName p e)
+
+-- | Find all non-nested occurances of an element.
+-- (i.e., once we have found an element, we do not search
+-- for more occurances among the element's children).
+findElements       :: QName -> Element -> [Element]
+findElements qn e = filterElementsName (qn==) e
+
+-- | Find all non-nested occurrences of an element wrt. given predicate.
+-- (i.e., once we have found an element, we do not search
+-- for more occurances among the element's children).
+filterElements       :: (Element -> Bool) -> Element -> [Element]
+filterElements p e
+ | p e        = [e]
+ | otherwise  = concatMap (filterElements p) $ onlyElems $ elContent e
+
+-- | Find all non-nested occurences of an element wrt a predicate over element names.
+-- (i.e., once we have found an element, we do not search
+-- for more occurances among the element's children).
+filterElementsName       :: (QName -> Bool) -> Element -> [Element]
+filterElementsName p e = filterElements (p.elName) e
+
+-- | Lookup the value of an attribute.
+findAttr           :: QName -> Element -> Maybe String
+findAttr x e        = attrVal `fmap` find ((x ==) . attrKey) (elAttribs e)
+
+-- | Lookup attribute name from list.
+lookupAttr           :: QName -> [Attr] -> Maybe String
+lookupAttr x as        = attrVal `fmap` find ((x ==) . attrKey) as
+
diff --git a/Text/XML/Light/Types.hs b/Text/XML/Light/Types.hs
new file mode 100644
index 000000000..6d8f04b4a
--- /dev/null
+++ b/Text/XML/Light/Types.hs
@@ -0,0 +1,91 @@
+--------------------------------------------------------------------
+-- |
+-- Module    : Text.XML.Light.Types
+-- Copyright : (c) Galois, Inc. 2007
+-- License   : BSD3
+--
+-- Maintainer: Iavor S. Diatchki <diatchki@galois.com>
+-- Stability : provisional
+-- Portability:
+--
+-- Basic XML types.
+--
+
+module Text.XML.Light.Types where
+
+-- | A line is an Integer
+type Line     = Integer
+
+-- | XML content
+data Content  = Elem Element
+              | Text CData
+              | CRef String
+                deriving Show
+
+-- | XML elements
+data Element  = Element {
+                  elName      :: QName,
+                  elAttribs   :: [Attr],
+                  elContent   :: [Content],
+                  elLine      :: Maybe Line
+                } deriving Show
+
+-- | XML attributes
+data Attr     = Attr {
+                  attrKey :: QName,
+                  attrVal :: String
+                } deriving (Eq,Ord,Show)
+
+-- | XML CData
+data CData    = CData {
+                  cdVerbatim  :: CDataKind,
+                  cdData      :: String,
+                  cdLine      :: Maybe Line
+                } deriving Show
+
+data CDataKind
+ = CDataText      -- ^ Ordinary character data; pretty printer escapes &, < etc.
+ | CDataVerbatim  -- ^ Unescaped character data; pretty printer embeds it in <![CDATA[..
+ | CDataRaw       -- ^ As-is character data; pretty printer passes it along without any escaping or CDATA wrap-up.
+   deriving ( Eq, Show )
+
+-- | XML qualified names
+data QName    = QName {
+                  qName   :: String,
+                  qURI    :: Maybe String,
+                  qPrefix :: Maybe String
+                } deriving Show
+
+
+instance Eq QName where
+  q1 == q2  = compare q1 q2 == EQ
+
+instance Ord QName where
+  compare q1 q2 =
+    case compare (qName q1) (qName q2) of
+      EQ  -> case (qURI q1, qURI q2) of
+               (Nothing,Nothing) -> compare (qPrefix q1) (qPrefix q2)
+               (u1,u2)           -> compare u1 u2
+      x   -> x
+
+
+-- blank elements --------------------------------------------------------------
+
+-- | Blank names
+blank_name :: QName
+blank_name = QName { qName = "", qURI = Nothing, qPrefix = Nothing }
+
+-- | Blank cdata
+blank_cdata :: CData
+blank_cdata = CData { cdVerbatim = CDataText, cdData = "", cdLine = Nothing }
+
+-- | Blank elements
+blank_element :: Element
+blank_element = Element
+                  { elName    = blank_name
+                  , elAttribs = []
+                  , elContent = []
+                  , elLine    = Nothing
+                  }
+
+
diff --git a/debian/copyright b/debian/copyright
index 94208eaf7..ebe763a4c 100644
--- a/debian/copyright
+++ b/debian/copyright
@@ -50,3 +50,36 @@ by Eric A. Meyer
 <http://meyerweb.com/eric/tools/s5
 
 Released under an explicit Public Domain License
+
+----------------------------------------------------------------------
+Text/XML/Light/*
+(c) 2007 Galois Inc.
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the author nor the names of his contributors
+may be used to endorse or promote products derived from this software
+without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE CONTRIBUTORS ``AS IS'' AND ANY EXPRESS
+OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
diff --git a/pandoc.cabal b/pandoc.cabal
index 96fcfda53..349238ce9 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -79,8 +79,7 @@ Library
   if flag(highlighting)
     Build-depends: highlighting-kate
     cpp-options:   -DHIGHLIGHTING
-  Build-Depends: parsec < 3, xhtml, mtl, network, filepath, process, directory, xml,
-                 utf8-string
+  Build-Depends: parsec < 3, xhtml, mtl, network, filepath, process, directory, utf8-string
   Hs-Source-Dirs:  .
   Exposed-Modules: Text.Pandoc,
                    Text.Pandoc.Blocks,
@@ -108,10 +107,16 @@ Library
                    Text.Pandoc.Writers.MediaWiki,
                    Text.Pandoc.Writers.RTF,
                    Text.Pandoc.Writers.S5
-  Other-Modules:   Text.Pandoc.XML
+  Other-Modules:   Text.Pandoc.XML,
+                   Text.XML.Light,
+                   Text.XML.Light.Types,
+                   Text.XML.Light.Output,
+                   Text.XML.Light.Input,
+                   Text.XML.Light.Cursor
   Ghc-Options:     -O2 -Wall -threaded
   Ghc-Prof-Options: -auto-all
-  Extensions:      CPP
+  Extensions:      CPP, FlexibleInstances
+  --               FlexibleInstances needed for Text.XML.Light
 
   if flag(library)
     Buildable:      True