diff options
author | fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b> | 2008-08-01 00:45:07 +0000 |
---|---|---|
committer | fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b> | 2008-08-01 00:45:07 +0000 |
commit | 0c6dd105b73e95d61d3a3d3213d8f80f3d8a22d4 (patch) | |
tree | d8cad0cbbdc02fee41bd55ba9938371c8278cf18 /Text | |
parent | 8440385f45e18f43172c0cf4409cf4b8b538bbeb (diff) | |
download | pandoc-0c6dd105b73e95d61d3a3d3213d8f80f3d8a22d4.tar.gz |
Added code for xml library (Text.XML.Light) to source tree,
since there is currently no debian package. Removed
dependency on xml library. Added license to debian/copyright.
git-svn-id: https://pandoc.googlecode.com/svn/trunk@1351 788f1e2b-df1e-0410-8736-df70ead52e1b
Diffstat (limited to 'Text')
-rw-r--r-- | Text/XML/Light.hs | 96 | ||||
-rw-r--r-- | Text/XML/Light/Cursor.hs | 327 | ||||
-rw-r--r-- | Text/XML/Light/Input.hs | 307 | ||||
-rw-r--r-- | Text/XML/Light/Output.hs | 150 | ||||
-rw-r--r-- | Text/XML/Light/Proc.hs | 103 | ||||
-rw-r--r-- | Text/XML/Light/Types.hs | 91 |
6 files changed, 1074 insertions, 0 deletions
diff --git a/Text/XML/Light.hs b/Text/XML/Light.hs new file mode 100644 index 000000000..f2d75290b --- /dev/null +++ b/Text/XML/Light.hs @@ -0,0 +1,96 @@ +{-# LANGUAGE FlexibleInstances #-} +-------------------------------------------------------------------- +-- | +-- Module : Text.XML.Light +-- Copyright : (c) Galois, Inc. 2007 +-- License : BSD3 +-- +-- Maintainer: Iavor S. Diatchki <diatchki@galois.com> +-- Stability : provisional +-- Portability: portability +-- +-- A lightweight XML parsing, filtering and generating library. +-- +-- This module reexports functions from: +-- +-- * "Text.XML.Light.Types" +-- +-- * "Text.XML.Light.Proc" +-- +-- * "Text.XML.Light.Input" +-- +-- * "Text.XML.Light.Output" +-- + +module Text.XML.Light ( + + module Text.XML.Light, + module Text.XML.Light.Types, + module Text.XML.Light.Proc, + module Text.XML.Light.Input, + module Text.XML.Light.Output + + ) where + +import Text.XML.Light.Types +import Text.XML.Light.Proc +import Text.XML.Light.Input +import Text.XML.Light.Output + +-- | Add an attribute to an element. +add_attr :: Attr -> Element -> Element +add_attr a e = add_attrs [a] e + +-- | Add some attributes to an element. +add_attrs :: [Attr] -> Element -> Element +add_attrs as e = e { elAttribs = as ++ elAttribs e } + +-- | Create an unqualified name. +unqual :: String -> QName +unqual x = blank_name { qName = x } + +-- | A smart element constructor which uses the type of its argument +-- to determine what sort of element to make. +class Node t where + node :: QName -> t -> Element + +instance Node ([Attr],[Content]) where + node n (attrs,cont) = blank_element { elName = n + , elAttribs = attrs + , elContent = cont + } + +instance Node [Attr] where node n as = node n (as,[]::[Content]) +instance Node Attr where node n a = node n [a] +instance Node () where node n () = node n ([]::[Attr]) + +instance Node [Content] where node n cs = node n ([]::[Attr],cs) +instance Node Content where node n c = node n [c] +instance Node ([Attr],Content) where node n (as,c) = node n (as,[c]) +instance Node (Attr,Content) where node n (a,c) = node n ([a],[c]) + +instance Node ([Attr],[Element]) where + node n (as,cs) = node n (as,map Elem cs) + +instance Node ([Attr],Element) where node n (as,c) = node n (as,[c]) +instance Node (Attr,Element) where node n (a,c) = node n ([a],c) +instance Node ([Element]) where node n es = node n ([]::[Attr],es) +instance Node (Element) where node n e = node n [e] + +instance Node ([Attr],[CData]) where + node n (as,cs) = node n (as,map Text cs) + +instance Node ([Attr],CData) where node n (as,c) = node n (as,[c]) +instance Node (Attr,CData) where node n (a,c) = node n ([a],c) +instance Node [CData] where node n es = node n ([]::[Attr],es) +instance Node CData where node n e = node n [e] + +instance Node ([Attr],String) where + node n (as,t) = node n (as,blank_cdata { cdData = t }) + +instance Node (Attr,String) where node n (a,t) = node n ([a],t) +instance Node [Char] where node n t = node n ([]::[Attr],t) + +-- | Create node with unqualified name +unode :: Node t => String -> t -> Element +unode = node . unqual diff --git a/Text/XML/Light/Cursor.hs b/Text/XML/Light/Cursor.hs new file mode 100644 index 000000000..06d15bdb6 --- /dev/null +++ b/Text/XML/Light/Cursor.hs @@ -0,0 +1,327 @@ +-------------------------------------------------------------------- +-- | +-- Module : Text.XML.Light.Cursor +-- Copyright : (c) Galois, Inc. 2008 +-- License : BSD3 +-- +-- Maintainer: Iavor S. Diatchki <diatchki@galois.com> +-- Stability : provisional +-- Portability: +-- +-- XML cursors for working XML content withing the context of +-- an XML document. This implemntation is based on the general +-- tree zipper written by Krasimir Angelov and Iavor S. Diatchki. +-- + +module Text.XML.Light.Cursor + ( Tag(..), getTag, setTag, fromTag + , Cursor(..), Path + + -- * Conversions + , fromContent + , fromElement + , fromForest + , toForest + , toTree + + -- * Moving around + , parent + , root + , getChild + , firstChild + , lastChild + , left + , right + + -- ** Searching + , findChild + , findLeft + , findRight + + -- * Node classification + , isRoot + , isFirst + , isLast + , isLeaf + , isChild + , hasChildren + , getNodeIndex + + -- * Updates + , setContent + , modifyContent + , modifyContentM + + -- ** Inserting content + , insertLeft + , insertRight + , insertGoLeft + , insertGoRight + + -- ** Removing content + , removeLeft + , removeRight + , removeGoLeft + , removeGoRight + , removeGoUp + + ) where + +import Text.XML.Light.Types +import Data.Maybe(isNothing) + +data Tag = Tag { tagName :: QName + , tagAttribs :: [Attr] + , tagLine :: Maybe Line + } deriving (Show) + +getTag :: Element -> Tag +getTag e = Tag { tagName = elName e + , tagAttribs = elAttribs e + , tagLine = elLine e + } + +setTag :: Tag -> Element -> Element +setTag t e = fromTag t (elContent e) + +fromTag :: Tag -> [Content] -> Element +fromTag t cs = Element { elName = tagName t + , elAttribs = tagAttribs t + , elLine = tagLine t + , elContent = cs + } + +type Path = [([Content],Tag,[Content])] + +-- | The position of a piece of content in an XML document. +data Cursor = Cur + { current :: Content -- ^ The currently selected content. + , lefts :: [Content] -- ^ Siblings on the left, closest first. + , rights :: [Content] -- ^ Siblings on the right, closest first. + , parents :: Path -- ^ The contexts of the parent elements of this location. + } deriving (Show) + +-- Moving around --------------------------------------------------------------- + +-- | The parent of the given location. +parent :: Cursor -> Maybe Cursor +parent loc = + case parents loc of + (pls,v,prs) : ps -> Just + Cur { current = Elem + (fromTag v + (combChildren (lefts loc) (current loc) (rights loc))) + , lefts = pls, rights = prs, parents = ps + } + [] -> Nothing + + +-- | The top-most parent of the given location. +root :: Cursor -> Cursor +root loc = maybe loc root (parent loc) + +-- | The left sibling of the given location. +left :: Cursor -> Maybe Cursor +left loc = + case lefts loc of + t : ts -> Just loc { current = t, lefts = ts + , rights = current loc : rights loc } + [] -> Nothing + +-- | The right sibling of the given location. +right :: Cursor -> Maybe Cursor +right loc = + case rights loc of + t : ts -> Just loc { current = t, lefts = current loc : lefts loc + , rights = ts } + [] -> Nothing + +-- | The first child of the given location. +firstChild :: Cursor -> Maybe Cursor +firstChild loc = + do (t : ts, ps) <- downParents loc + return Cur { current = t, lefts = [], rights = ts , parents = ps } + +-- | The last child of the given location. +lastChild :: Cursor -> Maybe Cursor +lastChild loc = + do (ts, ps) <- downParents loc + case reverse ts of + l : ls -> return Cur { current = l, lefts = ls, rights = [] + , parents = ps } + [] -> Nothing + +-- | Find the next left sibling that satisfies a predicate. +findLeft :: (Cursor -> Bool) -> Cursor -> Maybe Cursor +findLeft p loc = do loc1 <- left loc + if p loc1 then return loc1 else findLeft p loc1 + +-- | Find the next right sibling that satisfies a predicate. +findRight :: (Cursor -> Bool) -> Cursor -> Maybe Cursor +findRight p loc = do loc1 <- right loc + if p loc1 then return loc1 else findRight p loc1 + +-- | The first child that satisfies a predicate. +findChild :: (Cursor -> Bool) -> Cursor -> Maybe Cursor +findChild p loc = + do loc1 <- firstChild loc + if p loc1 then return loc1 else findRight p loc1 + +-- | The child with the given index (starting from 0). +getChild :: Int -> Cursor -> Maybe Cursor +getChild n loc = + do (ts,ps) <- downParents loc + (ls,t,rs) <- splitChildren ts n + return Cur { current = t, lefts = ls, rights = rs, parents = ps } + + +-- | private: computes the parent for "down" operations. +downParents :: Cursor -> Maybe ([Content], Path) +downParents loc = + case current loc of + Elem e -> Just ( elContent e + , (lefts loc, getTag e, rights loc) : parents loc + ) + _ -> Nothing + +-- Conversions ----------------------------------------------------------------- + +-- | A cursor for the guven content. +fromContent :: Content -> Cursor +fromContent t = Cur { current = t, lefts = [], rights = [], parents = [] } + +-- | A cursor for the guven element. +fromElement :: Element -> Cursor +fromElement e = fromContent (Elem e) + +-- | The location of the first tree in a forest. +fromForest :: [Content] -> Maybe Cursor +fromForest (t:ts) = Just Cur { current = t, lefts = [], rights = ts + , parents = [] } +fromForest [] = Nothing + +-- | Computes the tree containing this location. +toTree :: Cursor -> Content +toTree loc = current (root loc) + +-- | Computes the forest containing this location. +toForest :: Cursor -> [Content] +toForest loc = let r = root loc in combChildren (lefts r) (current r) (rights r) + + +-- Queries --------------------------------------------------------------------- + +-- | Are we at the top of the document? +isRoot :: Cursor -> Bool +isRoot loc = null (parents loc) + +-- | Are we at the left end of the the document? +isFirst :: Cursor -> Bool +isFirst loc = null (lefts loc) + +-- | Are we at the right end of the document? +isLast :: Cursor -> Bool +isLast loc = null (rights loc) + +-- | Are we at the bottom of the document? +isLeaf :: Cursor -> Bool +isLeaf loc = isNothing (downParents loc) + +-- | Do we have a parent? +isChild :: Cursor -> Bool +isChild loc = not (isRoot loc) + +-- | Get the node index inside the sequence of children +getNodeIndex :: Cursor -> Int +getNodeIndex loc = length (lefts loc) + +-- | Do we have children? +hasChildren :: Cursor -> Bool +hasChildren loc = not (isLeaf loc) + + + +-- Updates --------------------------------------------------------------------- + +-- | Change the current content. +setContent :: Content -> Cursor -> Cursor +setContent t loc = loc { current = t } + +-- | Modify the current content. +modifyContent :: (Content -> Content) -> Cursor -> Cursor +modifyContent f loc = setContent (f (current loc)) loc + +-- | Modify the current content, allowing for an effect. +modifyContentM :: Monad m => (Content -> m Content) -> Cursor -> m Cursor +modifyContentM f loc = do x <- f (current loc) + return (setContent x loc) + +-- | Insert content to the left of the current position. +insertLeft :: Content -> Cursor -> Cursor +insertLeft t loc = loc { lefts = t : lefts loc } + +-- | Insert content to the right of the current position. +insertRight :: Content -> Cursor -> Cursor +insertRight t loc = loc { rights = t : rights loc } + +-- | Remove the conent on the left of the current position, if any. +removeLeft :: Cursor -> Maybe (Content,Cursor) +removeLeft loc = case lefts loc of + l : ls -> return (l,loc { lefts = ls }) + [] -> Nothing + +-- | Remove the conent on the right of the current position, if any. +removeRight :: Cursor -> Maybe (Content,Cursor) +removeRight loc = case rights loc of + l : ls -> return (l,loc { rights = ls }) + [] -> Nothing + + +-- | Insert content to the left of the current position. +-- The new content becomes the current position. +insertGoLeft :: Content -> Cursor -> Cursor +insertGoLeft t loc = loc { current = t, rights = current loc : rights loc } + +-- | Insert content to the right of the current position. +-- The new content becomes the current position. +insertGoRight :: Content -> Cursor -> Cursor +insertGoRight t loc = loc { current = t, lefts = current loc : lefts loc } + +-- | Remove the current element. +-- The new position is the one on the left. +removeGoLeft :: Cursor -> Maybe Cursor +removeGoLeft loc = case lefts loc of + l : ls -> Just loc { current = l, lefts = ls } + [] -> Nothing + +-- | Remove the current element. +-- The new position is the one on the right. +removeGoRight :: Cursor -> Maybe Cursor +removeGoRight loc = case rights loc of + l : ls -> Just loc { current = l, rights = ls } + [] -> Nothing + +-- | Remove the current element. +-- The new position is the parent of the old position. +removeGoUp :: Cursor -> Maybe Cursor +removeGoUp loc = + case parents loc of + (pls,v,prs) : ps -> Just + Cur { current = Elem (fromTag v (reverse (lefts loc) ++ rights loc)) + , lefts = pls, rights = prs, parents = ps + } + [] -> Nothing + + +-- | private: Gets the given element of a list. +-- Also returns the preceeding elements (reversed) and the folloing elements. +splitChildren :: [a] -> Int -> Maybe ([a],a,[a]) +splitChildren _ n | n < 0 = Nothing +splitChildren cs pos = loop [] cs pos + where loop acc (x:xs) 0 = Just (acc,x,xs) + loop acc (x:xs) n = loop (x:acc) xs $! n-1 + loop _ _ _ = Nothing + +-- | private: combChildren ls x ys = reverse ls ++ [x] ++ ys +combChildren :: [a] -> a -> [a] -> [a] +combChildren ls t rs = foldl (flip (:)) (t:rs) ls diff --git a/Text/XML/Light/Input.hs b/Text/XML/Light/Input.hs new file mode 100644 index 000000000..3cf3a8cd1 --- /dev/null +++ b/Text/XML/Light/Input.hs @@ -0,0 +1,307 @@ +-------------------------------------------------------------------- +-- | +-- Module : Text.XML.Light.Input +-- Copyright : (c) Galois, Inc. 2007 +-- License : BSD3 +-- +-- Maintainer: Iavor S. Diatchki <diatchki@galois.com> +-- Stability : provisional +-- Portability: portable +-- +-- Lightweight XML parsing +-- + +module Text.XML.Light.Input (parseXML,parseXMLDoc) where + +import Text.XML.Light.Types +import Text.XML.Light.Proc +import Text.XML.Light.Output(tagEnd) + +import Data.Char(isSpace) +import Data.List(isPrefixOf) +import Numeric(readHex) + +-- | parseXMLDoc, parse a XMLl document to maybe an element +parseXMLDoc :: String -> Maybe Element +parseXMLDoc xs = strip (parseXML xs) + where strip cs = case onlyElems cs of + e : es + | "?xml" `isPrefixOf` qName (elName e) + -> strip (map Elem es) + | otherwise -> Just e + _ -> Nothing + +-- | parseXML to a list of content chunks +parseXML :: String -> [Content] +parseXML xs = parse $ tokens $ preprocess xs + +------------------------------------------------------------------------ + +parse :: [Token] -> [Content] +parse [] = [] +parse ts = let (es,_,ts1) = nodes ([],Nothing) [] ts + in es ++ parse ts1 + +-- Information about namespaces. +-- The first component is a map that associates prefixes to URIs, +-- the second is the URI for the default namespace, if one was provided. +type NSInfo = ([(String,String)],Maybe String) + +nodes :: NSInfo -> [QName] -> [Token] -> ([Content], [QName], [Token]) + +nodes ns ps (TokCRef ref : ts) = + let (es,qs,ts1) = nodes ns ps ts + in (CRef ref : es, qs, ts1) + +nodes ns ps (TokText txt : ts) = + let (es,qs,ts1) = nodes ns ps ts + (more,es1) = case es of + Text cd : es1' + | cdVerbatim cd == cdVerbatim txt -> (cdData cd,es1') + _ -> ([],es) + + in (Text txt { cdData = cdData txt ++ more } : es1, qs, ts1) + +nodes cur_info ps (TokStart p t as empty : ts) = (node : siblings, open, toks) + where + new_name = annotName new_info t + new_info = foldr addNS cur_info as + node = Elem Element { elLine = Just p + , elName = new_name + , elAttribs = map (annotAttr new_info) as + , elContent = children + } + + (children,(siblings,open,toks)) + | empty = ([], nodes cur_info ps ts) + | otherwise = let (es1,qs1,ts1) = nodes new_info (new_name:ps) ts + in (es1, + case qs1 of + [] -> nodes cur_info ps ts1 + _ : qs3 -> ([],qs3,ts1)) + +nodes ns ps (TokEnd p t : ts) = let t1 = annotName ns t + in case break (t1 ==) ps of + (as,_:_) -> ([],as,ts) + -- Unknown closing tag. Insert as text. + (_,[]) -> + let (es,qs,ts1) = nodes ns ps ts + in (Text CData { + cdLine = Just p, + cdVerbatim = CDataText, + cdData = tagEnd t "" + } : es,qs, ts1) + +nodes _ ps [] = ([],ps,[]) + + +annotName :: NSInfo -> QName -> QName +annotName (namespaces,def_ns) n = + n { qURI = maybe def_ns (`lookup` namespaces) (qPrefix n) } + +annotAttr :: NSInfo -> Attr -> Attr +annotAttr ns a@(Attr { attrKey = k}) = + case (qPrefix k, qName k) of + (Nothing,"xmlns") -> a + _ -> a { attrKey = annotName ns k } + +addNS :: Attr -> NSInfo -> NSInfo +addNS (Attr { attrKey = key, attrVal = val }) (ns,def) = + case (qPrefix key, qName key) of + (Nothing,"xmlns") -> (ns, if null val then Nothing else Just val) + (Just "xmlns", k) -> ((k, val) : ns, def) + _ -> (ns,def) + + +-- Lexer ----------------------------------------------------------------------- + +type LChar = (Line,Char) +type LString = [LChar] +data Token = TokStart Line QName [Attr] Bool -- is empty? + | TokEnd Line QName + | TokCRef String + | TokText CData + deriving Show + +tokens :: String -> [Token] +tokens = tokens' . linenumber 1 + +tokens' :: LString -> [Token] +tokens' ((_,'<') : c@(_,'!') : cs) = special c cs + +tokens' ((_,'<') : cs) = tag (dropSpace cs) -- we are being nice here +tokens' [] = [] +tokens' cs@((l,_):_) = let (as,bs) = breakn ('<' ==) cs + in map cvt (decode_text as) ++ tokens' bs + + -- XXX: Note, some of the lines might be a bit inacuarate + where cvt (TxtBit x) = TokText CData { cdLine = Just l + , cdVerbatim = CDataText + , cdData = x + } + cvt (CRefBit x) = case cref_to_char x of + Just c -> TokText CData { cdLine = Just l + , cdVerbatim = CDataText + , cdData = [c] + } + Nothing -> TokCRef x + + +special :: LChar -> LString -> [Token] +special _ ((_,'-') : (_,'-') : cs) = skip cs + where skip ((_,'-') : (_,'-') : (_,'>') : ds) = tokens' ds + skip (_ : ds) = skip ds + skip [] = [] -- unterminated comment + +special c ((_,'[') : (_,'C') : (_,'D') : (_,'A') : (_,'T') : (_,'A') : (_,'[') + : cs) = + let (xs,ts) = cdata cs + in TokText CData { cdLine = Just (fst c), cdVerbatim = CDataVerbatim, cdData = xs } + : tokens' ts + where cdata ((_,']') : (_,']') : (_,'>') : ds) = ([],ds) + cdata ((_,d) : ds) = let (xs,ys) = cdata ds in (d:xs,ys) + cdata [] = ([],[]) + +special c cs = + let (xs,ts) = munch "" 0 cs + in TokText CData { cdLine = Just (fst c), cdVerbatim = CDataRaw, cdData = '<':'!':(reverse xs) } : tokens' ts + where munch acc nesting ((_,'>') : ds) + | nesting == (0::Int) = ('>':acc,ds) + | otherwise = munch ('>':acc) (nesting-1) ds + munch acc nesting ((_,'<') : ds) + = munch ('<':acc) (nesting+1) ds + munch acc n ((_,x) : ds) = munch (x:acc) n ds + munch acc _ [] = (acc,[]) -- unterminated DTD markup + +--special c cs = tag (c : cs) -- invalid specials are processed as tags + + +qualName :: LString -> (QName,LString) +qualName xs = let (as,bs) = breakn endName xs + (q,n) = case break (':'==) as of + (q1,_:n1) -> (Just q1, n1) + _ -> (Nothing, as) + in (QName { qURI = Nothing, qPrefix = q, qName = n }, bs) + where endName x = isSpace x || x == '=' || x == '>' || x == '/' + + + + + +tag :: LString -> [Token] +tag ((p,'/') : cs) = let (n,ds) = qualName (dropSpace cs) + in TokEnd p n : case ds of + (_,'>') : es -> tokens' es + -- tag was not properly closed... + _ -> tokens' ds +tag [] = [] +tag cs = let (n,ds) = qualName cs + (as,b,ts) = attribs (dropSpace ds) + in TokStart (fst (head cs)) n as b : ts + +attribs :: LString -> ([Attr], Bool, [Token]) +attribs cs = case cs of + (_,'>') : ds -> ([], False, tokens' ds) + + (_,'/') : ds -> ([], True, case ds of + (_,'>') : es -> tokens' es + -- insert missing > ... + _ -> tokens' ds) + + (_,'?') : (_,'>') : ds -> ([], True, tokens' ds) + + -- doc ended within a tag.. + [] -> ([],False,[]) + + _ -> let (a,cs1) = attrib cs + (as,b,ts) = attribs cs1 + in (a:as,b,ts) + +attrib :: LString -> (Attr,LString) +attrib cs = let (ks,cs1) = qualName cs + (vs,cs2) = attr_val (dropSpace cs1) + in ((Attr ks (decode_attr vs)),dropSpace cs2) + +attr_val :: LString -> (String,LString) +attr_val ((_,'=') : cs) = string (dropSpace cs) +attr_val cs = ("",cs) + + +dropSpace :: LString -> LString +dropSpace = dropWhile (isSpace . snd) + +-- | Match the value for an attribute. For malformed XML we do +-- our best to guess the programmer's intention. +string :: LString -> (String,LString) +string ((_,'"') : cs) = break' ('"' ==) cs + +-- Allow attributes to be enclosed between ' '. +string ((_,'\'') : cs) = break' ('\'' ==) cs + +-- Allow attributes that are not enclosed by anything. +string cs = breakn eos cs + where eos x = isSpace x || x == '>' || x == '/' + + +break' :: (a -> Bool) -> [(b,a)] -> ([a],[(b,a)]) +break' p xs = let (as,bs) = breakn p xs + in (as, case bs of + [] -> [] + _ : cs -> cs) + +breakn :: (a -> Bool) -> [(b,a)] -> ([a],[(b,a)]) +breakn p l = (map snd as,bs) where (as,bs) = break (p . snd) l + + + +decode_attr :: String -> String +decode_attr cs = concatMap cvt (decode_text cs) + where cvt (TxtBit x) = x + cvt (CRefBit x) = case cref_to_char x of + Just c -> [c] + Nothing -> '&' : x ++ ";" + +data Txt = TxtBit String | CRefBit String deriving Show + +decode_text :: [Char] -> [Txt] +decode_text xs@('&' : cs) = case break (';' ==) cs of + (as,_:bs) -> CRefBit as : decode_text bs + _ -> [TxtBit xs] +decode_text [] = [] +decode_text cs = let (as,bs) = break ('&' ==) cs + in TxtBit as : decode_text bs + +cref_to_char :: [Char] -> Maybe Char +cref_to_char cs = case cs of + '#' : ds -> num_esc ds + "lt" -> Just '<' + "gt" -> Just '>' + "amp" -> Just '&' + "apos" -> Just '\'' + "quot" -> Just '"' + _ -> Nothing + +num_esc :: String -> Maybe Char +num_esc cs = case cs of + 'x' : ds -> check (readHex ds) + _ -> check (reads cs) + + where check [(n,"")] = cvt_char n + check _ = Nothing + +cvt_char :: Int -> Maybe Char +cvt_char x + | fromEnum (minBound :: Char) <= x && x <= fromEnum (maxBound::Char) + = Just (toEnum x) + | otherwise = Nothing + +preprocess :: String -> String +preprocess ('\r' : '\n' : cs) = '\n' : preprocess cs +preprocess ('\r' : cs) = '\n' : preprocess cs +preprocess (c : cs) = c : preprocess cs +preprocess [] = [] + +linenumber :: Line -> String -> LString +linenumber _ [] = [] +linenumber n ('\n':s) = n' `seq` ((n,'\n'):linenumber n' s) where n' = n + 1 +linenumber n (c:s) = (n,c) : linenumber n s diff --git a/Text/XML/Light/Output.hs b/Text/XML/Light/Output.hs new file mode 100644 index 000000000..65d1bb1af --- /dev/null +++ b/Text/XML/Light/Output.hs @@ -0,0 +1,150 @@ +-------------------------------------------------------------------- +-- | +-- Module : Text.XML.Light.Output +-- Copyright : (c) Galois, Inc. 2007 +-- License : BSD3 +-- +-- Maintainer: Iavor S. Diatchki <diatchki@galois.com> +-- Stability : provisional +-- Portability: +-- +-- Output handling for the lightweight XML lib. +-- + +module Text.XML.Light.Output + ( showTopElement, showContent, showElement, showCData, showQName, showAttr + , ppTopElement, ppContent, ppElement + , tagEnd, xml_header + ) where + +import Text.XML.Light.Types +import Data.Char +import Data.List ( isPrefixOf ) + +-- | The XML 1.0 header +xml_header :: String +xml_header = "<?xml version='1.0' ?>" + +-- | Pretty printing renders XML documents faithfully, +-- with the exception that whitespace may be added\/removed +-- in non-verbatim character data. +ppTopElement :: Element -> String +ppTopElement e = unlines [xml_header,ppElement e] + +-- | Pretty printing elements +ppElement :: Element -> String +ppElement e = ppElementS "" e "" + +-- | Pretty printing content +ppContent :: Content -> String +ppContent x = ppContentS "" x "" + +-- | Pretty printing content using ShowS +ppContentS :: String -> Content -> ShowS +ppContentS i x xs = case x of + Elem e -> ppElementS i e xs + Text c -> ppCData i c xs + CRef r -> showCRefS r xs + +ppElementS :: String -> Element -> ShowS +ppElementS i e xs = i ++ (tagStart (elName e) (elAttribs e) $ + case elContent e of + [] + | not ("?xml" `isPrefixOf` (qName $ elName e)) -> " />" ++ xs + | otherwise -> " ?>" ++ xs + [Text t] -> ">" ++ ppCData "" t (tagEnd (elName e) xs) + cs -> ">\n" ++ foldr ppSub (i ++ tagEnd (elName e) xs) cs + where ppSub e1 = ppContentS (" " ++ i) e1 . showChar '\n' + ) + +ppCData :: String -> CData -> ShowS +ppCData i c xs = i ++ if (cdVerbatim c /= CDataText ) + then showCDataS c xs + else foldr cons xs (showCData c) + + where cons :: Char -> String -> String + cons '\n' ys = "\n" ++ i ++ ys + cons y ys = y : ys + + + +-------------------------------------------------------------------------------- +-- | Adds the <?xml?> header. +showTopElement :: Element -> String +showTopElement c = xml_header ++ showElement c + +showContent :: Content -> String +showContent c = showContentS c "" + +showElement :: Element -> String +showElement c = showElementS c "" + +showCData :: CData -> String +showCData c = showCDataS c "" + +-- Note: crefs should not contain '&', ';', etc. +showCRefS :: String -> ShowS +showCRefS r xs = '&' : r ++ ';' : xs + +-- | Good for transmition (no extra white space etc.) but less readable. +showContentS :: Content -> ShowS +showContentS (Elem e) = showElementS e +showContentS (Text cs) = showCDataS cs +showContentS (CRef cs) = showCRefS cs + +-- | Good for transmition (no extra white space etc.) but less readable. +showElementS :: Element -> ShowS +showElementS e xs = + tagStart (elName e) (elAttribs e) + $ case elContent e of + [] -> " />" ++ xs + ch -> '>' : foldr showContentS (tagEnd (elName e) xs) ch + +-- | Convert a text element to characters. +showCDataS :: CData -> ShowS +showCDataS cd = + case cdVerbatim cd of + CDataText -> escStr (cdData cd) + CDataVerbatim -> showString "<![CDATA[" . escCData (cdData cd) . showString "]]>" + CDataRaw -> \ xs -> cdData cd ++ xs + +-------------------------------------------------------------------------------- +escCData :: String -> ShowS +escCData (']' : ']' : '>' : cs) = showString "]]]]><![CDATA[>" . escCData cs +escCData (c : cs) = showChar c . escCData cs +escCData [] = id + +escChar :: Char -> ShowS +escChar c = case c of + '<' -> showString "<" + '>' -> showString ">" + '&' -> showString "&" + '"' -> showString """ + '\'' -> showString "'" + -- XXX: Is this really wortherd? + -- We could deal with these issues when we convert characters to bytes. + _ | (oc <= 0x7f && isPrint c) || c == '\n' || c == '\r' -> showChar c + | otherwise -> showString "&#" . shows oc . showChar ';' + where oc = ord c + +escStr :: String -> ShowS +escStr cs rs = foldr escChar rs cs + +tagEnd :: QName -> ShowS +tagEnd qn rs = '<':'/':showQName qn ++ '>':rs + +tagStart :: QName -> [Attr] -> ShowS +tagStart qn as rs = '<':showQName qn ++ as_str ++ rs + where as_str = if null as then "" else ' ' : unwords (map showAttr as) + +showAttr :: Attr -> String +showAttr (Attr qn v) = showQName qn ++ '=' : '"' : escStr v "\"" + +showQName :: QName -> String +showQName q = pre ++ qName q + where pre = case qPrefix q of + Nothing -> "" + Just p -> p ++ ":" + + + diff --git a/Text/XML/Light/Proc.hs b/Text/XML/Light/Proc.hs new file mode 100644 index 000000000..34d844a3f --- /dev/null +++ b/Text/XML/Light/Proc.hs @@ -0,0 +1,103 @@ +-------------------------------------------------------------------- +-- | +-- Module : Text.XML.Light.Proc +-- Copyright : (c) Galois, Inc. 2007 +-- License : BSD3 +-- +-- Maintainer: Iavor S. Diatchki <diatchki@galois.com> +-- Stability : provisional +-- Portability: +-- +-------------------------------------------------------------------- + + +module Text.XML.Light.Proc where + +import Text.XML.Light.Types + +import Data.Maybe(listToMaybe) +import Data.List(find) + +-- | Get the text value of an XML element. This function +-- ignores non-text elements, and concatenates all text elements. +strContent :: Element -> String +strContent e = concatMap cdData $ onlyText $ elContent e + +-- | Select only the elements from a list of XML content. +onlyElems :: [Content] -> [Element] +onlyElems xs = [ x | Elem x <- xs ] + +-- | Select only the elements from a parent. +elChildren :: Element -> [Element] +elChildren e = [ x | Elem x <- elContent e ] + +-- | Select only the text from a list of XML content. +onlyText :: [Content] -> [CData] +onlyText xs = [ x | Text x <- xs ] + +-- | Find all immediate children with the given name. +findChildren :: QName -> Element -> [Element] +findChildren q e = filterChildren ((q ==) . elName) e + +-- | Filter all immediate children wrt a given predicate. +filterChildren :: (Element -> Bool) -> Element -> [Element] +filterChildren p e = filter p (onlyElems (elContent e)) + + +-- | Filter all immediate children wrt a given predicate over their names. +filterChildrenName :: (QName -> Bool) -> Element -> [Element] +filterChildrenName p e = filter (p.elName) (onlyElems (elContent e)) + + +-- | Find an immediate child with the given name. +findChild :: QName -> Element -> Maybe Element +findChild q e = listToMaybe (findChildren q e) + +-- | Find an immediate child with the given name. +filterChild :: (Element -> Bool) -> Element -> Maybe Element +filterChild p e = listToMaybe (filterChildren p e) + +-- | Find an immediate child with name matching a predicate. +filterChildName :: (QName -> Bool) -> Element -> Maybe Element +filterChildName p e = listToMaybe (filterChildrenName p e) + +-- | Find the left-most occurrence of an element matching given name. +findElement :: QName -> Element -> Maybe Element +findElement q e = listToMaybe (findElements q e) + +-- | Filter the left-most occurrence of an element wrt. given predicate. +filterElement :: (Element -> Bool) -> Element -> Maybe Element +filterElement p e = listToMaybe (filterElements p e) + +-- | Filter the left-most occurrence of an element wrt. given predicate. +filterElementName :: (QName -> Bool) -> Element -> Maybe Element +filterElementName p e = listToMaybe (filterElementsName p e) + +-- | Find all non-nested occurances of an element. +-- (i.e., once we have found an element, we do not search +-- for more occurances among the element's children). +findElements :: QName -> Element -> [Element] +findElements qn e = filterElementsName (qn==) e + +-- | Find all non-nested occurrences of an element wrt. given predicate. +-- (i.e., once we have found an element, we do not search +-- for more occurances among the element's children). +filterElements :: (Element -> Bool) -> Element -> [Element] +filterElements p e + | p e = [e] + | otherwise = concatMap (filterElements p) $ onlyElems $ elContent e + +-- | Find all non-nested occurences of an element wrt a predicate over element names. +-- (i.e., once we have found an element, we do not search +-- for more occurances among the element's children). +filterElementsName :: (QName -> Bool) -> Element -> [Element] +filterElementsName p e = filterElements (p.elName) e + +-- | Lookup the value of an attribute. +findAttr :: QName -> Element -> Maybe String +findAttr x e = attrVal `fmap` find ((x ==) . attrKey) (elAttribs e) + +-- | Lookup attribute name from list. +lookupAttr :: QName -> [Attr] -> Maybe String +lookupAttr x as = attrVal `fmap` find ((x ==) . attrKey) as + diff --git a/Text/XML/Light/Types.hs b/Text/XML/Light/Types.hs new file mode 100644 index 000000000..6d8f04b4a --- /dev/null +++ b/Text/XML/Light/Types.hs @@ -0,0 +1,91 @@ +-------------------------------------------------------------------- +-- | +-- Module : Text.XML.Light.Types +-- Copyright : (c) Galois, Inc. 2007 +-- License : BSD3 +-- +-- Maintainer: Iavor S. Diatchki <diatchki@galois.com> +-- Stability : provisional +-- Portability: +-- +-- Basic XML types. +-- + +module Text.XML.Light.Types where + +-- | A line is an Integer +type Line = Integer + +-- | XML content +data Content = Elem Element + | Text CData + | CRef String + deriving Show + +-- | XML elements +data Element = Element { + elName :: QName, + elAttribs :: [Attr], + elContent :: [Content], + elLine :: Maybe Line + } deriving Show + +-- | XML attributes +data Attr = Attr { + attrKey :: QName, + attrVal :: String + } deriving (Eq,Ord,Show) + +-- | XML CData +data CData = CData { + cdVerbatim :: CDataKind, + cdData :: String, + cdLine :: Maybe Line + } deriving Show + +data CDataKind + = CDataText -- ^ Ordinary character data; pretty printer escapes &, < etc. + | CDataVerbatim -- ^ Unescaped character data; pretty printer embeds it in <![CDATA[.. + | CDataRaw -- ^ As-is character data; pretty printer passes it along without any escaping or CDATA wrap-up. + deriving ( Eq, Show ) + +-- | XML qualified names +data QName = QName { + qName :: String, + qURI :: Maybe String, + qPrefix :: Maybe String + } deriving Show + + +instance Eq QName where + q1 == q2 = compare q1 q2 == EQ + +instance Ord QName where + compare q1 q2 = + case compare (qName q1) (qName q2) of + EQ -> case (qURI q1, qURI q2) of + (Nothing,Nothing) -> compare (qPrefix q1) (qPrefix q2) + (u1,u2) -> compare u1 u2 + x -> x + + +-- blank elements -------------------------------------------------------------- + +-- | Blank names +blank_name :: QName +blank_name = QName { qName = "", qURI = Nothing, qPrefix = Nothing } + +-- | Blank cdata +blank_cdata :: CData +blank_cdata = CData { cdVerbatim = CDataText, cdData = "", cdLine = Nothing } + +-- | Blank elements +blank_element :: Element +blank_element = Element + { elName = blank_name + , elAttribs = [] + , elContent = [] + , elLine = Nothing + } + + |