diff options
Diffstat (limited to 'Text')
-rw-r--r-- | Text/Pandoc/ODT.hs | 144 | ||||
-rw-r--r-- | Text/Pandoc/Shared.hs | 26 | ||||
-rw-r--r-- | Text/Pandoc/TH.hs | 14 | ||||
-rw-r--r-- | Text/XML/Light.hs | 96 | ||||
-rw-r--r-- | Text/XML/Light/Cursor.hs | 327 | ||||
-rw-r--r-- | Text/XML/Light/Input.hs | 307 | ||||
-rw-r--r-- | Text/XML/Light/Output.hs | 150 | ||||
-rw-r--r-- | Text/XML/Light/Proc.hs | 103 | ||||
-rw-r--r-- | Text/XML/Light/Types.hs | 91 |
9 files changed, 68 insertions, 1190 deletions
diff --git a/Text/Pandoc/ODT.hs b/Text/Pandoc/ODT.hs index 8c3b1b45f..10cf1b7e2 100644 --- a/Text/Pandoc/ODT.hs +++ b/Text/Pandoc/ODT.hs @@ -29,22 +29,16 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Functions for producing an ODT file from OpenDocument XML. -} module Text.Pandoc.ODT ( saveOpenDocumentAsODT ) where -import Text.Pandoc.TH ( binaryContentsOf ) -import Data.Maybe ( fromJust ) -import Data.List ( partition, intersperse ) -import System.Directory -import System.FilePath ( (</>), takeDirectory, takeFileName, splitDirectories ) -import System.Process ( runProcess, waitForProcess ) -import System.Exit -import Text.XML.Light -import Text.XML.Light.Cursor -import Text.Pandoc.Shared ( withTempDir ) -import Network.URI ( isURI ) -import qualified Data.ByteString as B ( writeFile, pack ) -import Data.ByteString.Internal ( c2w ) +import Text.Pandoc.TH ( makeZip ) +import Data.List ( find ) +import System.FilePath ( (</>), takeFileName ) +import qualified Data.ByteString.Lazy as B +import Data.ByteString.Lazy.UTF8 ( fromString ) import Prelude hiding ( writeFile, readFile ) -import System.IO ( stderr ) -import System.IO.UTF8 +import Codec.Archive.Zip +import Control.Applicative ( (<$>) ) +import Text.ParserCombinators.Parsec +import System.Time -- | Produce an ODT file from OpenDocument XML. saveOpenDocumentAsODT :: FilePath -- ^ Pathname of ODT file to be produced. @@ -52,89 +46,43 @@ saveOpenDocumentAsODT :: FilePath -- ^ Pathname of ODT file to be produced. -> String -- ^ OpenDocument XML contents. -> IO () saveOpenDocumentAsODT destinationODTPath sourceDirRelative xml = do - let zipCmd = "zip" - -- check for zip in path: - zipPathMaybe <- findExecutable zipCmd - let zipPath = case zipPathMaybe of - Nothing -> error $ "The '" ++ zipCmd ++ - "' command, which is needed to build an ODT file, was not found.\n" ++ - "It can be obtained from http://www.info-zip.org/Zip.html\n" ++ - "Debian (and Debian-based) linux: apt-get install zip\n" ++ - "Windows: See http://gnuwin32.sourceforge.net/packages/zip.htm" - Just x -> x - withTempDir "pandoc-odt" $ \tempDir -> do - let tempODT = tempDir </> "reference.odt" - B.writeFile tempODT $ B.pack $ map c2w $(binaryContentsOf $ "odt-styles" </> "reference.odt") - xml' <- handlePictures tempODT sourceDirRelative xml - writeFile (tempDir </> "content.xml") xml' - ph <- runProcess zipPath ["-9", "-q", "-r", tempODT, "content.xml", "Pictures"] - (Just tempDir) Nothing Nothing Nothing (Just stderr) - ec <- waitForProcess ph -- requires compilation with -threaded - case ec of - ExitSuccess -> copyFile tempODT destinationODTPath - _ -> error "Error creating ODT." >> exitWith ec + let refArchive = read $(makeZip "odt-styles") + -- handle pictures + let (newContents, pics) = + case runParser pPictures [] "OpenDocument XML contents" xml of + Left err -> error $ show err + Right x -> x + picEntries <- mapM (makePictureEntry sourceDirRelative) pics + (TOD epochTime _) <- getClockTime + let contentEntry = toEntry "content.xml" epochTime $ fromString newContents + let archive = foldr addEntryToArchive refArchive (contentEntry : picEntries) + B.writeFile destinationODTPath $ fromArchive archive --- | Find <draw:image ... /> elements and copy the file (xlink:href attribute) into Pictures/ in --- the zip file. If filename is a URL, attempt to download it. Modify xlink:href attributes --- to point to the new locations in Pictures/. Return modified XML. -handlePictures :: FilePath -- ^ Path of ODT file in temp directory - -> FilePath -- ^ Directory (relative) containing source file - -> String -- ^ OpenDocument XML string - -> IO String -- ^ Modified XML -handlePictures tempODT sourceDirRelative xml = do - let parsed = case parseXMLDoc xml of - Nothing -> error "Could not parse OpenDocument XML." - Just x -> x - let cursor = case (fromForest $ elContent parsed) of - Nothing -> error "ODT appears empty" - Just x -> x - cursor' <- scanPictures tempODT sourceDirRelative cursor - let modified = parsed { elContent = toForest $ root cursor' } - return $ showTopElement modified +makePictureEntry :: FilePath -- ^ Relative directory of source file + -> (FilePath, String) -- ^ Path and new path of picture + -> IO Entry +makePictureEntry sourceDirRelative (path, newPath) = do + entry <- readEntry [] $ sourceDirRelative </> path + return (entry { eRelativePath = newPath }) -scanPictures :: FilePath -> FilePath -> Cursor -> IO Cursor -scanPictures tempODT sourceDirRelative cursor = do - cursor' <- handleTree tempODT sourceDirRelative cursor - case right cursor' of - Just n -> scanPictures tempODT sourceDirRelative n - Nothing -> return cursor' - -handleTree :: FilePath -> FilePath -> Cursor -> IO Cursor -handleTree tempODT sourceDirRelative cursor = do - case firstChild cursor of - Nothing -> modifyContentM (handleContent tempODT sourceDirRelative) cursor - Just n -> scanPictures tempODT sourceDirRelative n >>= return . fromJust . parent - --- | If content is an image link, handle it appropriately. --- Otherwise, handle children if any. -handleContent :: FilePath -> FilePath -> Content -> IO Content -handleContent tempODT sourceDirRelative content@(Elem el) = do - if qName (elName el) == "image" - then do - let (hrefs, rest) = partition (\a -> qName (attrKey a) == "href") $ elAttribs el - let href = case hrefs of - [] -> error $ "No href found in " ++ show el - [x] -> x - _ -> error $ "Multiple hrefs found in " ++ show el - if isURI $ attrVal href - then return content - else do -- treat as filename - let oldLoc = sourceDirRelative </> attrVal href - fileExists <- doesFileExist oldLoc - if fileExists - then do - let pref = take 230 $ concat $ intersperse "_" $ - splitDirectories $ takeDirectory $ attrVal href - let picName = pref ++ "_" ++ (takeFileName $ attrVal href) - let tempDir = takeDirectory tempODT - createDirectoryIfMissing False $ tempDir </> "Pictures" - copyFile oldLoc $ tempDir </> "Pictures" </> picName - let newAttrs = (href { attrVal = "Pictures/" ++ picName }) : rest - return $ Elem (el { elAttribs = newAttrs }) - else do - hPutStrLn stderr $ "Warning: Unable to find image at " ++ oldLoc ++ " - ignoring." - return content - else return content - -handleContent _ _ c = return c -- not Element +pPictures :: GenParser Char [(FilePath, String)] ([Char], [(FilePath, String)]) +pPictures = do + contents <- concat <$> many (pPicture <|> many1 (noneOf "<") <|> string "<") + pics <- getState + return (contents, pics) +pPicture :: GenParser Char [(FilePath, String)] [Char] +pPicture = try $ do + string "<draw:image xlink:href=\"" + path <- manyTill anyChar (char '"') + let filename = takeFileName path + pics <- getState + newPath <- case find (\(o, _) -> o == path) pics of + Just (_, new) -> return new + Nothing -> do + -- get a unique name + let dups = length $ (filter (\(o, _) -> takeFileName o == filename)) pics + let new = "Pictures/" ++ replicate dups '0' ++ filename + updateState ((path, new) :) + return new + return $ "<draw:image xlink:href=\"" ++ newPath ++ "\"" diff --git a/Text/Pandoc/Shared.hs b/Text/Pandoc/Shared.hs index 2c53ffa7a..9bb0c35f9 100644 --- a/Text/Pandoc/Shared.hs +++ b/Text/Pandoc/Shared.hs @@ -101,7 +101,7 @@ module Text.Pandoc.Shared ( WriterOptions (..), defaultWriterOptions, -- * File handling - withTempDir + inDirectory ) where import Text.Pandoc.Definition @@ -112,10 +112,7 @@ import Text.Pandoc.CharacterReferences ( characterReference ) import Data.Char ( toLower, toUpper, ord, isLower, isUpper ) import Data.List ( find, isPrefixOf ) import Control.Monad ( join ) -import Control.Exception ( bracket ) import Network.URI ( parseURI, URI (..), isAllowedInURI ) -import System.FilePath ( (</>), (<.>) ) -import System.IO.Error ( catch, ioError, isAlreadyExistsError ) import System.Directory import Prelude hiding ( putStrLn, writeFile, readFile, getContents ) import System.IO.UTF8 @@ -920,16 +917,11 @@ defaultWriterOptions = -- File handling -- --- | Perform a function in a temporary directory and clean up. -withTempDir :: FilePath -> (FilePath -> IO a) -> IO a -withTempDir baseName = bracket (createTempDir 0 baseName) (removeDirectoryRecursive) - --- | Create a temporary directory with a unique name. -createTempDir :: Integer -> FilePath -> IO FilePath -createTempDir num baseName = do - sysTempDir <- getTemporaryDirectory - let dirName = sysTempDir </> baseName <.> show num - catch (createDirectory dirName >> return dirName) $ - \e -> if isAlreadyExistsError e - then createTempDir (num + 1) baseName - else ioError e +-- | Perform an IO action in a directory, returning to starting directory. +inDirectory :: FilePath -> IO a -> IO a +inDirectory path action = do + oldDir <- getCurrentDirectory + setCurrentDirectory path + result <- action + setCurrentDirectory oldDir + return result diff --git a/Text/Pandoc/TH.hs b/Text/Pandoc/TH.hs index dfd6be28b..0dc5a6719 100644 --- a/Text/Pandoc/TH.hs +++ b/Text/Pandoc/TH.hs @@ -30,7 +30,8 @@ Template haskell functions used by Pandoc modules. -} module Text.Pandoc.TH ( contentsOf, - binaryContentsOf + binaryContentsOf, + makeZip ) where import Language.Haskell.TH @@ -39,6 +40,8 @@ import qualified Data.ByteString as B import Data.ByteString.Internal ( w2c ) import Prelude hiding ( readFile ) import System.IO.UTF8 +import Codec.Archive.Zip +import Text.Pandoc.Shared ( inDirectory ) -- | Insert contents of text file into a template. contentsOf :: FilePath -> ExpQ @@ -51,3 +54,12 @@ binaryContentsOf p = lift =<< (runIO $ B.readFile p) instance Lift B.ByteString where lift x = return (LitE (StringL $ map w2c $ B.unpack x)) + +instance Lift Archive where + lift x = return (LitE (StringL $ show x )) + +-- | Construct zip file from files in a directory, and +-- insert into a template. +makeZip :: FilePath -> ExpQ +makeZip path = lift =<< (runIO $ inDirectory path $ addFilesToArchive [OptRecursive] emptyArchive ["."]) + diff --git a/Text/XML/Light.hs b/Text/XML/Light.hs deleted file mode 100644 index f2d75290b..000000000 --- a/Text/XML/Light.hs +++ /dev/null @@ -1,96 +0,0 @@ -{-# LANGUAGE FlexibleInstances #-} --------------------------------------------------------------------- --- | --- Module : Text.XML.Light --- Copyright : (c) Galois, Inc. 2007 --- License : BSD3 --- --- Maintainer: Iavor S. Diatchki <diatchki@galois.com> --- Stability : provisional --- Portability: portability --- --- A lightweight XML parsing, filtering and generating library. --- --- This module reexports functions from: --- --- * "Text.XML.Light.Types" --- --- * "Text.XML.Light.Proc" --- --- * "Text.XML.Light.Input" --- --- * "Text.XML.Light.Output" --- - -module Text.XML.Light ( - - module Text.XML.Light, - module Text.XML.Light.Types, - module Text.XML.Light.Proc, - module Text.XML.Light.Input, - module Text.XML.Light.Output - - ) where - -import Text.XML.Light.Types -import Text.XML.Light.Proc -import Text.XML.Light.Input -import Text.XML.Light.Output - --- | Add an attribute to an element. -add_attr :: Attr -> Element -> Element -add_attr a e = add_attrs [a] e - --- | Add some attributes to an element. -add_attrs :: [Attr] -> Element -> Element -add_attrs as e = e { elAttribs = as ++ elAttribs e } - --- | Create an unqualified name. -unqual :: String -> QName -unqual x = blank_name { qName = x } - --- | A smart element constructor which uses the type of its argument --- to determine what sort of element to make. -class Node t where - node :: QName -> t -> Element - -instance Node ([Attr],[Content]) where - node n (attrs,cont) = blank_element { elName = n - , elAttribs = attrs - , elContent = cont - } - -instance Node [Attr] where node n as = node n (as,[]::[Content]) -instance Node Attr where node n a = node n [a] -instance Node () where node n () = node n ([]::[Attr]) - -instance Node [Content] where node n cs = node n ([]::[Attr],cs) -instance Node Content where node n c = node n [c] -instance Node ([Attr],Content) where node n (as,c) = node n (as,[c]) -instance Node (Attr,Content) where node n (a,c) = node n ([a],[c]) - -instance Node ([Attr],[Element]) where - node n (as,cs) = node n (as,map Elem cs) - -instance Node ([Attr],Element) where node n (as,c) = node n (as,[c]) -instance Node (Attr,Element) where node n (a,c) = node n ([a],c) -instance Node ([Element]) where node n es = node n ([]::[Attr],es) -instance Node (Element) where node n e = node n [e] - -instance Node ([Attr],[CData]) where - node n (as,cs) = node n (as,map Text cs) - -instance Node ([Attr],CData) where node n (as,c) = node n (as,[c]) -instance Node (Attr,CData) where node n (a,c) = node n ([a],c) -instance Node [CData] where node n es = node n ([]::[Attr],es) -instance Node CData where node n e = node n [e] - -instance Node ([Attr],String) where - node n (as,t) = node n (as,blank_cdata { cdData = t }) - -instance Node (Attr,String) where node n (a,t) = node n ([a],t) -instance Node [Char] where node n t = node n ([]::[Attr],t) - --- | Create node with unqualified name -unode :: Node t => String -> t -> Element -unode = node . unqual diff --git a/Text/XML/Light/Cursor.hs b/Text/XML/Light/Cursor.hs deleted file mode 100644 index 06d15bdb6..000000000 --- a/Text/XML/Light/Cursor.hs +++ /dev/null @@ -1,327 +0,0 @@ --------------------------------------------------------------------- --- | --- Module : Text.XML.Light.Cursor --- Copyright : (c) Galois, Inc. 2008 --- License : BSD3 --- --- Maintainer: Iavor S. Diatchki <diatchki@galois.com> --- Stability : provisional --- Portability: --- --- XML cursors for working XML content withing the context of --- an XML document. This implemntation is based on the general --- tree zipper written by Krasimir Angelov and Iavor S. Diatchki. --- - -module Text.XML.Light.Cursor - ( Tag(..), getTag, setTag, fromTag - , Cursor(..), Path - - -- * Conversions - , fromContent - , fromElement - , fromForest - , toForest - , toTree - - -- * Moving around - , parent - , root - , getChild - , firstChild - , lastChild - , left - , right - - -- ** Searching - , findChild - , findLeft - , findRight - - -- * Node classification - , isRoot - , isFirst - , isLast - , isLeaf - , isChild - , hasChildren - , getNodeIndex - - -- * Updates - , setContent - , modifyContent - , modifyContentM - - -- ** Inserting content - , insertLeft - , insertRight - , insertGoLeft - , insertGoRight - - -- ** Removing content - , removeLeft - , removeRight - , removeGoLeft - , removeGoRight - , removeGoUp - - ) where - -import Text.XML.Light.Types -import Data.Maybe(isNothing) - -data Tag = Tag { tagName :: QName - , tagAttribs :: [Attr] - , tagLine :: Maybe Line - } deriving (Show) - -getTag :: Element -> Tag -getTag e = Tag { tagName = elName e - , tagAttribs = elAttribs e - , tagLine = elLine e - } - -setTag :: Tag -> Element -> Element -setTag t e = fromTag t (elContent e) - -fromTag :: Tag -> [Content] -> Element -fromTag t cs = Element { elName = tagName t - , elAttribs = tagAttribs t - , elLine = tagLine t - , elContent = cs - } - -type Path = [([Content],Tag,[Content])] - --- | The position of a piece of content in an XML document. -data Cursor = Cur - { current :: Content -- ^ The currently selected content. - , lefts :: [Content] -- ^ Siblings on the left, closest first. - , rights :: [Content] -- ^ Siblings on the right, closest first. - , parents :: Path -- ^ The contexts of the parent elements of this location. - } deriving (Show) - --- Moving around --------------------------------------------------------------- - --- | The parent of the given location. -parent :: Cursor -> Maybe Cursor -parent loc = - case parents loc of - (pls,v,prs) : ps -> Just - Cur { current = Elem - (fromTag v - (combChildren (lefts loc) (current loc) (rights loc))) - , lefts = pls, rights = prs, parents = ps - } - [] -> Nothing - - --- | The top-most parent of the given location. -root :: Cursor -> Cursor -root loc = maybe loc root (parent loc) - --- | The left sibling of the given location. -left :: Cursor -> Maybe Cursor -left loc = - case lefts loc of - t : ts -> Just loc { current = t, lefts = ts - , rights = current loc : rights loc } - [] -> Nothing - --- | The right sibling of the given location. -right :: Cursor -> Maybe Cursor -right loc = - case rights loc of - t : ts -> Just loc { current = t, lefts = current loc : lefts loc - , rights = ts } - [] -> Nothing - --- | The first child of the given location. -firstChild :: Cursor -> Maybe Cursor -firstChild loc = - do (t : ts, ps) <- downParents loc - return Cur { current = t, lefts = [], rights = ts , parents = ps } - --- | The last child of the given location. -lastChild :: Cursor -> Maybe Cursor -lastChild loc = - do (ts, ps) <- downParents loc - case reverse ts of - l : ls -> return Cur { current = l, lefts = ls, rights = [] - , parents = ps } - [] -> Nothing - --- | Find the next left sibling that satisfies a predicate. -findLeft :: (Cursor -> Bool) -> Cursor -> Maybe Cursor -findLeft p loc = do loc1 <- left loc - if p loc1 then return loc1 else findLeft p loc1 - --- | Find the next right sibling that satisfies a predicate. -findRight :: (Cursor -> Bool) -> Cursor -> Maybe Cursor -findRight p loc = do loc1 <- right loc - if p loc1 then return loc1 else findRight p loc1 - --- | The first child that satisfies a predicate. -findChild :: (Cursor -> Bool) -> Cursor -> Maybe Cursor -findChild p loc = - do loc1 <- firstChild loc - if p loc1 then return loc1 else findRight p loc1 - --- | The child with the given index (starting from 0). -getChild :: Int -> Cursor -> Maybe Cursor -getChild n loc = - do (ts,ps) <- downParents loc - (ls,t,rs) <- splitChildren ts n - return Cur { current = t, lefts = ls, rights = rs, parents = ps } - - --- | private: computes the parent for "down" operations. -downParents :: Cursor -> Maybe ([Content], Path) -downParents loc = - case current loc of - Elem e -> Just ( elContent e - , (lefts loc, getTag e, rights loc) : parents loc - ) - _ -> Nothing - --- Conversions ----------------------------------------------------------------- - --- | A cursor for the guven content. -fromContent :: Content -> Cursor -fromContent t = Cur { current = t, lefts = [], rights = [], parents = [] } - --- | A cursor for the guven element. -fromElement :: Element -> Cursor -fromElement e = fromContent (Elem e) - --- | The location of the first tree in a forest. -fromForest :: [Content] -> Maybe Cursor -fromForest (t:ts) = Just Cur { current = t, lefts = [], rights = ts - , parents = [] } -fromForest [] = Nothing - --- | Computes the tree containing this location. -toTree :: Cursor -> Content -toTree loc = current (root loc) - --- | Computes the forest containing this location. -toForest :: Cursor -> [Content] -toForest loc = let r = root loc in combChildren (lefts r) (current r) (rights r) - - --- Queries --------------------------------------------------------------------- - --- | Are we at the top of the document? -isRoot :: Cursor -> Bool -isRoot loc = null (parents loc) - --- | Are we at the left end of the the document? -isFirst :: Cursor -> Bool -isFirst loc = null (lefts loc) - --- | Are we at the right end of the document? -isLast :: Cursor -> Bool -isLast loc = null (rights loc) - --- | Are we at the bottom of the document? -isLeaf :: Cursor -> Bool -isLeaf loc = isNothing (downParents loc) - --- | Do we have a parent? -isChild :: Cursor -> Bool -isChild loc = not (isRoot loc) - --- | Get the node index inside the sequence of children -getNodeIndex :: Cursor -> Int -getNodeIndex loc = length (lefts loc) - --- | Do we have children? -hasChildren :: Cursor -> Bool -hasChildren loc = not (isLeaf loc) - - - --- Updates --------------------------------------------------------------------- - --- | Change the current content. -setContent :: Content -> Cursor -> Cursor -setContent t loc = loc { current = t } - --- | Modify the current content. -modifyContent :: (Content -> Content) -> Cursor -> Cursor -modifyContent f loc = setContent (f (current loc)) loc - --- | Modify the current content, allowing for an effect. -modifyContentM :: Monad m => (Content -> m Content) -> Cursor -> m Cursor -modifyContentM f loc = do x <- f (current loc) - return (setContent x loc) - --- | Insert content to the left of the current position. -insertLeft :: Content -> Cursor -> Cursor -insertLeft t loc = loc { lefts = t : lefts loc } - --- | Insert content to the right of the current position. -insertRight :: Content -> Cursor -> Cursor -insertRight t loc = loc { rights = t : rights loc } - --- | Remove the conent on the left of the current position, if any. -removeLeft :: Cursor -> Maybe (Content,Cursor) -removeLeft loc = case lefts loc of - l : ls -> return (l,loc { lefts = ls }) - [] -> Nothing - --- | Remove the conent on the right of the current position, if any. -removeRight :: Cursor -> Maybe (Content,Cursor) -removeRight loc = case rights loc of - l : ls -> return (l,loc { rights = ls }) - [] -> Nothing - - --- | Insert content to the left of the current position. --- The new content becomes the current position. -insertGoLeft :: Content -> Cursor -> Cursor -insertGoLeft t loc = loc { current = t, rights = current loc : rights loc } - --- | Insert content to the right of the current position. --- The new content becomes the current position. -insertGoRight :: Content -> Cursor -> Cursor -insertGoRight t loc = loc { current = t, lefts = current loc : lefts loc } - --- | Remove the current element. --- The new position is the one on the left. -removeGoLeft :: Cursor -> Maybe Cursor -removeGoLeft loc = case lefts loc of - l : ls -> Just loc { current = l, lefts = ls } - [] -> Nothing - --- | Remove the current element. --- The new position is the one on the right. -removeGoRight :: Cursor -> Maybe Cursor -removeGoRight loc = case rights loc of - l : ls -> Just loc { current = l, rights = ls } - [] -> Nothing - --- | Remove the current element. --- The new position is the parent of the old position. -removeGoUp :: Cursor -> Maybe Cursor -removeGoUp loc = - case parents loc of - (pls,v,prs) : ps -> Just - Cur { current = Elem (fromTag v (reverse (lefts loc) ++ rights loc)) - , lefts = pls, rights = prs, parents = ps - } - [] -> Nothing - - --- | private: Gets the given element of a list. --- Also returns the preceeding elements (reversed) and the folloing elements. -splitChildren :: [a] -> Int -> Maybe ([a],a,[a]) -splitChildren _ n | n < 0 = Nothing -splitChildren cs pos = loop [] cs pos - where loop acc (x:xs) 0 = Just (acc,x,xs) - loop acc (x:xs) n = loop (x:acc) xs $! n-1 - loop _ _ _ = Nothing - --- | private: combChildren ls x ys = reverse ls ++ [x] ++ ys -combChildren :: [a] -> a -> [a] -> [a] -combChildren ls t rs = foldl (flip (:)) (t:rs) ls diff --git a/Text/XML/Light/Input.hs b/Text/XML/Light/Input.hs deleted file mode 100644 index 3cf3a8cd1..000000000 --- a/Text/XML/Light/Input.hs +++ /dev/null @@ -1,307 +0,0 @@ --------------------------------------------------------------------- --- | --- Module : Text.XML.Light.Input --- Copyright : (c) Galois, Inc. 2007 --- License : BSD3 --- --- Maintainer: Iavor S. Diatchki <diatchki@galois.com> --- Stability : provisional --- Portability: portable --- --- Lightweight XML parsing --- - -module Text.XML.Light.Input (parseXML,parseXMLDoc) where - -import Text.XML.Light.Types -import Text.XML.Light.Proc -import Text.XML.Light.Output(tagEnd) - -import Data.Char(isSpace) -import Data.List(isPrefixOf) -import Numeric(readHex) - --- | parseXMLDoc, parse a XMLl document to maybe an element -parseXMLDoc :: String -> Maybe Element -parseXMLDoc xs = strip (parseXML xs) - where strip cs = case onlyElems cs of - e : es - | "?xml" `isPrefixOf` qName (elName e) - -> strip (map Elem es) - | otherwise -> Just e - _ -> Nothing - --- | parseXML to a list of content chunks -parseXML :: String -> [Content] -parseXML xs = parse $ tokens $ preprocess xs - ------------------------------------------------------------------------- - -parse :: [Token] -> [Content] -parse [] = [] -parse ts = let (es,_,ts1) = nodes ([],Nothing) [] ts - in es ++ parse ts1 - --- Information about namespaces. --- The first component is a map that associates prefixes to URIs, --- the second is the URI for the default namespace, if one was provided. -type NSInfo = ([(String,String)],Maybe String) - -nodes :: NSInfo -> [QName] -> [Token] -> ([Content], [QName], [Token]) - -nodes ns ps (TokCRef ref : ts) = - let (es,qs,ts1) = nodes ns ps ts - in (CRef ref : es, qs, ts1) - -nodes ns ps (TokText txt : ts) = - let (es,qs,ts1) = nodes ns ps ts - (more,es1) = case es of - Text cd : es1' - | cdVerbatim cd == cdVerbatim txt -> (cdData cd,es1') - _ -> ([],es) - - in (Text txt { cdData = cdData txt ++ more } : es1, qs, ts1) - -nodes cur_info ps (TokStart p t as empty : ts) = (node : siblings, open, toks) - where - new_name = annotName new_info t - new_info = foldr addNS cur_info as - node = Elem Element { elLine = Just p - , elName = new_name - , elAttribs = map (annotAttr new_info) as - , elContent = children - } - - (children,(siblings,open,toks)) - | empty = ([], nodes cur_info ps ts) - | otherwise = let (es1,qs1,ts1) = nodes new_info (new_name:ps) ts - in (es1, - case qs1 of - [] -> nodes cur_info ps ts1 - _ : qs3 -> ([],qs3,ts1)) - -nodes ns ps (TokEnd p t : ts) = let t1 = annotName ns t - in case break (t1 ==) ps of - (as,_:_) -> ([],as,ts) - -- Unknown closing tag. Insert as text. - (_,[]) -> - let (es,qs,ts1) = nodes ns ps ts - in (Text CData { - cdLine = Just p, - cdVerbatim = CDataText, - cdData = tagEnd t "" - } : es,qs, ts1) - -nodes _ ps [] = ([],ps,[]) - - -annotName :: NSInfo -> QName -> QName -annotName (namespaces,def_ns) n = - n { qURI = maybe def_ns (`lookup` namespaces) (qPrefix n) } - -annotAttr :: NSInfo -> Attr -> Attr -annotAttr ns a@(Attr { attrKey = k}) = - case (qPrefix k, qName k) of - (Nothing,"xmlns") -> a - _ -> a { attrKey = annotName ns k } - -addNS :: Attr -> NSInfo -> NSInfo -addNS (Attr { attrKey = key, attrVal = val }) (ns,def) = - case (qPrefix key, qName key) of - (Nothing,"xmlns") -> (ns, if null val then Nothing else Just val) - (Just "xmlns", k) -> ((k, val) : ns, def) - _ -> (ns,def) - - --- Lexer ----------------------------------------------------------------------- - -type LChar = (Line,Char) -type LString = [LChar] -data Token = TokStart Line QName [Attr] Bool -- is empty? - | TokEnd Line QName - | TokCRef String - | TokText CData - deriving Show - -tokens :: String -> [Token] -tokens = tokens' . linenumber 1 - -tokens' :: LString -> [Token] -tokens' ((_,'<') : c@(_,'!') : cs) = special c cs - -tokens' ((_,'<') : cs) = tag (dropSpace cs) -- we are being nice here -tokens' [] = [] -tokens' cs@((l,_):_) = let (as,bs) = breakn ('<' ==) cs - in map cvt (decode_text as) ++ tokens' bs - - -- XXX: Note, some of the lines might be a bit inacuarate - where cvt (TxtBit x) = TokText CData { cdLine = Just l - , cdVerbatim = CDataText - , cdData = x - } - cvt (CRefBit x) = case cref_to_char x of - Just c -> TokText CData { cdLine = Just l - , cdVerbatim = CDataText - , cdData = [c] - } - Nothing -> TokCRef x - - -special :: LChar -> LString -> [Token] -special _ ((_,'-') : (_,'-') : cs) = skip cs - where skip ((_,'-') : (_,'-') : (_,'>') : ds) = tokens' ds - skip (_ : ds) = skip ds - skip [] = [] -- unterminated comment - -special c ((_,'[') : (_,'C') : (_,'D') : (_,'A') : (_,'T') : (_,'A') : (_,'[') - : cs) = - let (xs,ts) = cdata cs - in TokText CData { cdLine = Just (fst c), cdVerbatim = CDataVerbatim, cdData = xs } - : tokens' ts - where cdata ((_,']') : (_,']') : (_,'>') : ds) = ([],ds) - cdata ((_,d) : ds) = let (xs,ys) = cdata ds in (d:xs,ys) - cdata [] = ([],[]) - -special c cs = - let (xs,ts) = munch "" 0 cs - in TokText CData { cdLine = Just (fst c), cdVerbatim = CDataRaw, cdData = '<':'!':(reverse xs) } : tokens' ts - where munch acc nesting ((_,'>') : ds) - | nesting == (0::Int) = ('>':acc,ds) - | otherwise = munch ('>':acc) (nesting-1) ds - munch acc nesting ((_,'<') : ds) - = munch ('<':acc) (nesting+1) ds - munch acc n ((_,x) : ds) = munch (x:acc) n ds - munch acc _ [] = (acc,[]) -- unterminated DTD markup - ---special c cs = tag (c : cs) -- invalid specials are processed as tags - - -qualName :: LString -> (QName,LString) -qualName xs = let (as,bs) = breakn endName xs - (q,n) = case break (':'==) as of - (q1,_:n1) -> (Just q1, n1) - _ -> (Nothing, as) - in (QName { qURI = Nothing, qPrefix = q, qName = n }, bs) - where endName x = isSpace x || x == '=' || x == '>' || x == '/' - - - - - -tag :: LString -> [Token] -tag ((p,'/') : cs) = let (n,ds) = qualName (dropSpace cs) - in TokEnd p n : case ds of - (_,'>') : es -> tokens' es - -- tag was not properly closed... - _ -> tokens' ds -tag [] = [] -tag cs = let (n,ds) = qualName cs - (as,b,ts) = attribs (dropSpace ds) - in TokStart (fst (head cs)) n as b : ts - -attribs :: LString -> ([Attr], Bool, [Token]) -attribs cs = case cs of - (_,'>') : ds -> ([], False, tokens' ds) - - (_,'/') : ds -> ([], True, case ds of - (_,'>') : es -> tokens' es - -- insert missing > ... - _ -> tokens' ds) - - (_,'?') : (_,'>') : ds -> ([], True, tokens' ds) - - -- doc ended within a tag.. - [] -> ([],False,[]) - - _ -> let (a,cs1) = attrib cs - (as,b,ts) = attribs cs1 - in (a:as,b,ts) - -attrib :: LString -> (Attr,LString) -attrib cs = let (ks,cs1) = qualName cs - (vs,cs2) = attr_val (dropSpace cs1) - in ((Attr ks (decode_attr vs)),dropSpace cs2) - -attr_val :: LString -> (String,LString) -attr_val ((_,'=') : cs) = string (dropSpace cs) -attr_val cs = ("",cs) - - -dropSpace :: LString -> LString -dropSpace = dropWhile (isSpace . snd) - --- | Match the value for an attribute. For malformed XML we do --- our best to guess the programmer's intention. -string :: LString -> (String,LString) -string ((_,'"') : cs) = break' ('"' ==) cs - --- Allow attributes to be enclosed between ' '. -string ((_,'\'') : cs) = break' ('\'' ==) cs - --- Allow attributes that are not enclosed by anything. -string cs = breakn eos cs - where eos x = isSpace x || x == '>' || x == '/' - - -break' :: (a -> Bool) -> [(b,a)] -> ([a],[(b,a)]) -break' p xs = let (as,bs) = breakn p xs - in (as, case bs of - [] -> [] - _ : cs -> cs) - -breakn :: (a -> Bool) -> [(b,a)] -> ([a],[(b,a)]) -breakn p l = (map snd as,bs) where (as,bs) = break (p . snd) l - - - -decode_attr :: String -> String -decode_attr cs = concatMap cvt (decode_text cs) - where cvt (TxtBit x) = x - cvt (CRefBit x) = case cref_to_char x of - Just c -> [c] - Nothing -> '&' : x ++ ";" - -data Txt = TxtBit String | CRefBit String deriving Show - -decode_text :: [Char] -> [Txt] -decode_text xs@('&' : cs) = case break (';' ==) cs of - (as,_:bs) -> CRefBit as : decode_text bs - _ -> [TxtBit xs] -decode_text [] = [] -decode_text cs = let (as,bs) = break ('&' ==) cs - in TxtBit as : decode_text bs - -cref_to_char :: [Char] -> Maybe Char -cref_to_char cs = case cs of - '#' : ds -> num_esc ds - "lt" -> Just '<' - "gt" -> Just '>' - "amp" -> Just '&' - "apos" -> Just '\'' - "quot" -> Just '"' - _ -> Nothing - -num_esc :: String -> Maybe Char -num_esc cs = case cs of - 'x' : ds -> check (readHex ds) - _ -> check (reads cs) - - where check [(n,"")] = cvt_char n - check _ = Nothing - -cvt_char :: Int -> Maybe Char -cvt_char x - | fromEnum (minBound :: Char) <= x && x <= fromEnum (maxBound::Char) - = Just (toEnum x) - | otherwise = Nothing - -preprocess :: String -> String -preprocess ('\r' : '\n' : cs) = '\n' : preprocess cs -preprocess ('\r' : cs) = '\n' : preprocess cs -preprocess (c : cs) = c : preprocess cs -preprocess [] = [] - -linenumber :: Line -> String -> LString -linenumber _ [] = [] -linenumber n ('\n':s) = n' `seq` ((n,'\n'):linenumber n' s) where n' = n + 1 -linenumber n (c:s) = (n,c) : linenumber n s diff --git a/Text/XML/Light/Output.hs b/Text/XML/Light/Output.hs deleted file mode 100644 index 65d1bb1af..000000000 --- a/Text/XML/Light/Output.hs +++ /dev/null @@ -1,150 +0,0 @@ --------------------------------------------------------------------- --- | --- Module : Text.XML.Light.Output --- Copyright : (c) Galois, Inc. 2007 --- License : BSD3 --- --- Maintainer: Iavor S. Diatchki <diatchki@galois.com> --- Stability : provisional --- Portability: --- --- Output handling for the lightweight XML lib. --- - -module Text.XML.Light.Output - ( showTopElement, showContent, showElement, showCData, showQName, showAttr - , ppTopElement, ppContent, ppElement - , tagEnd, xml_header - ) where - -import Text.XML.Light.Types -import Data.Char -import Data.List ( isPrefixOf ) - --- | The XML 1.0 header -xml_header :: String -xml_header = "<?xml version='1.0' ?>" - --- | Pretty printing renders XML documents faithfully, --- with the exception that whitespace may be added\/removed --- in non-verbatim character data. -ppTopElement :: Element -> String -ppTopElement e = unlines [xml_header,ppElement e] - --- | Pretty printing elements -ppElement :: Element -> String -ppElement e = ppElementS "" e "" - --- | Pretty printing content -ppContent :: Content -> String -ppContent x = ppContentS "" x "" - --- | Pretty printing content using ShowS -ppContentS :: String -> Content -> ShowS -ppContentS i x xs = case x of - Elem e -> ppElementS i e xs - Text c -> ppCData i c xs - CRef r -> showCRefS r xs - -ppElementS :: String -> Element -> ShowS -ppElementS i e xs = i ++ (tagStart (elName e) (elAttribs e) $ - case elContent e of - [] - | not ("?xml" `isPrefixOf` (qName $ elName e)) -> " />" ++ xs - | otherwise -> " ?>" ++ xs - [Text t] -> ">" ++ ppCData "" t (tagEnd (elName e) xs) - cs -> ">\n" ++ foldr ppSub (i ++ tagEnd (elName e) xs) cs - where ppSub e1 = ppContentS (" " ++ i) e1 . showChar '\n' - ) - -ppCData :: String -> CData -> ShowS -ppCData i c xs = i ++ if (cdVerbatim c /= CDataText ) - then showCDataS c xs - else foldr cons xs (showCData c) - - where cons :: Char -> String -> String - cons '\n' ys = "\n" ++ i ++ ys - cons y ys = y : ys - - - --------------------------------------------------------------------------------- --- | Adds the <?xml?> header. -showTopElement :: Element -> String -showTopElement c = xml_header ++ showElement c - -showContent :: Content -> String -showContent c = showContentS c "" - -showElement :: Element -> String -showElement c = showElementS c "" - -showCData :: CData -> String -showCData c = showCDataS c "" - --- Note: crefs should not contain '&', ';', etc. -showCRefS :: String -> ShowS -showCRefS r xs = '&' : r ++ ';' : xs - --- | Good for transmition (no extra white space etc.) but less readable. -showContentS :: Content -> ShowS -showContentS (Elem e) = showElementS e -showContentS (Text cs) = showCDataS cs -showContentS (CRef cs) = showCRefS cs - --- | Good for transmition (no extra white space etc.) but less readable. -showElementS :: Element -> ShowS -showElementS e xs = - tagStart (elName e) (elAttribs e) - $ case elContent e of - [] -> " />" ++ xs - ch -> '>' : foldr showContentS (tagEnd (elName e) xs) ch - --- | Convert a text element to characters. -showCDataS :: CData -> ShowS -showCDataS cd = - case cdVerbatim cd of - CDataText -> escStr (cdData cd) - CDataVerbatim -> showString "<![CDATA[" . escCData (cdData cd) . showString "]]>" - CDataRaw -> \ xs -> cdData cd ++ xs - --------------------------------------------------------------------------------- -escCData :: String -> ShowS -escCData (']' : ']' : '>' : cs) = showString "]]]]><![CDATA[>" . escCData cs -escCData (c : cs) = showChar c . escCData cs -escCData [] = id - -escChar :: Char -> ShowS -escChar c = case c of - '<' -> showString "<" - '>' -> showString ">" - '&' -> showString "&" - '"' -> showString """ - '\'' -> showString "'" - -- XXX: Is this really wortherd? - -- We could deal with these issues when we convert characters to bytes. - _ | (oc <= 0x7f && isPrint c) || c == '\n' || c == '\r' -> showChar c - | otherwise -> showString "&#" . shows oc . showChar ';' - where oc = ord c - -escStr :: String -> ShowS -escStr cs rs = foldr escChar rs cs - -tagEnd :: QName -> ShowS -tagEnd qn rs = '<':'/':showQName qn ++ '>':rs - -tagStart :: QName -> [Attr] -> ShowS -tagStart qn as rs = '<':showQName qn ++ as_str ++ rs - where as_str = if null as then "" else ' ' : unwords (map showAttr as) - -showAttr :: Attr -> String -showAttr (Attr qn v) = showQName qn ++ '=' : '"' : escStr v "\"" - -showQName :: QName -> String -showQName q = pre ++ qName q - where pre = case qPrefix q of - Nothing -> "" - Just p -> p ++ ":" - - - diff --git a/Text/XML/Light/Proc.hs b/Text/XML/Light/Proc.hs deleted file mode 100644 index 34d844a3f..000000000 --- a/Text/XML/Light/Proc.hs +++ /dev/null @@ -1,103 +0,0 @@ --------------------------------------------------------------------- --- | --- Module : Text.XML.Light.Proc --- Copyright : (c) Galois, Inc. 2007 --- License : BSD3 --- --- Maintainer: Iavor S. Diatchki <diatchki@galois.com> --- Stability : provisional --- Portability: --- --------------------------------------------------------------------- - - -module Text.XML.Light.Proc where - -import Text.XML.Light.Types - -import Data.Maybe(listToMaybe) -import Data.List(find) - --- | Get the text value of an XML element. This function --- ignores non-text elements, and concatenates all text elements. -strContent :: Element -> String -strContent e = concatMap cdData $ onlyText $ elContent e - --- | Select only the elements from a list of XML content. -onlyElems :: [Content] -> [Element] -onlyElems xs = [ x | Elem x <- xs ] - --- | Select only the elements from a parent. -elChildren :: Element -> [Element] -elChildren e = [ x | Elem x <- elContent e ] - --- | Select only the text from a list of XML content. -onlyText :: [Content] -> [CData] -onlyText xs = [ x | Text x <- xs ] - --- | Find all immediate children with the given name. -findChildren :: QName -> Element -> [Element] -findChildren q e = filterChildren ((q ==) . elName) e - --- | Filter all immediate children wrt a given predicate. -filterChildren :: (Element -> Bool) -> Element -> [Element] -filterChildren p e = filter p (onlyElems (elContent e)) - - --- | Filter all immediate children wrt a given predicate over their names. -filterChildrenName :: (QName -> Bool) -> Element -> [Element] -filterChildrenName p e = filter (p.elName) (onlyElems (elContent e)) - - --- | Find an immediate child with the given name. -findChild :: QName -> Element -> Maybe Element -findChild q e = listToMaybe (findChildren q e) - --- | Find an immediate child with the given name. -filterChild :: (Element -> Bool) -> Element -> Maybe Element -filterChild p e = listToMaybe (filterChildren p e) - --- | Find an immediate child with name matching a predicate. -filterChildName :: (QName -> Bool) -> Element -> Maybe Element -filterChildName p e = listToMaybe (filterChildrenName p e) - --- | Find the left-most occurrence of an element matching given name. -findElement :: QName -> Element -> Maybe Element -findElement q e = listToMaybe (findElements q e) - --- | Filter the left-most occurrence of an element wrt. given predicate. -filterElement :: (Element -> Bool) -> Element -> Maybe Element -filterElement p e = listToMaybe (filterElements p e) - --- | Filter the left-most occurrence of an element wrt. given predicate. -filterElementName :: (QName -> Bool) -> Element -> Maybe Element -filterElementName p e = listToMaybe (filterElementsName p e) - --- | Find all non-nested occurances of an element. --- (i.e., once we have found an element, we do not search --- for more occurances among the element's children). -findElements :: QName -> Element -> [Element] -findElements qn e = filterElementsName (qn==) e - --- | Find all non-nested occurrences of an element wrt. given predicate. --- (i.e., once we have found an element, we do not search --- for more occurances among the element's children). -filterElements :: (Element -> Bool) -> Element -> [Element] -filterElements p e - | p e = [e] - | otherwise = concatMap (filterElements p) $ onlyElems $ elContent e - --- | Find all non-nested occurences of an element wrt a predicate over element names. --- (i.e., once we have found an element, we do not search --- for more occurances among the element's children). -filterElementsName :: (QName -> Bool) -> Element -> [Element] -filterElementsName p e = filterElements (p.elName) e - --- | Lookup the value of an attribute. -findAttr :: QName -> Element -> Maybe String -findAttr x e = attrVal `fmap` find ((x ==) . attrKey) (elAttribs e) - --- | Lookup attribute name from list. -lookupAttr :: QName -> [Attr] -> Maybe String -lookupAttr x as = attrVal `fmap` find ((x ==) . attrKey) as - diff --git a/Text/XML/Light/Types.hs b/Text/XML/Light/Types.hs deleted file mode 100644 index 6d8f04b4a..000000000 --- a/Text/XML/Light/Types.hs +++ /dev/null @@ -1,91 +0,0 @@ --------------------------------------------------------------------- --- | --- Module : Text.XML.Light.Types --- Copyright : (c) Galois, Inc. 2007 --- License : BSD3 --- --- Maintainer: Iavor S. Diatchki <diatchki@galois.com> --- Stability : provisional --- Portability: --- --- Basic XML types. --- - -module Text.XML.Light.Types where - --- | A line is an Integer -type Line = Integer - --- | XML content -data Content = Elem Element - | Text CData - | CRef String - deriving Show - --- | XML elements -data Element = Element { - elName :: QName, - elAttribs :: [Attr], - elContent :: [Content], - elLine :: Maybe Line - } deriving Show - --- | XML attributes -data Attr = Attr { - attrKey :: QName, - attrVal :: String - } deriving (Eq,Ord,Show) - --- | XML CData -data CData = CData { - cdVerbatim :: CDataKind, - cdData :: String, - cdLine :: Maybe Line - } deriving Show - -data CDataKind - = CDataText -- ^ Ordinary character data; pretty printer escapes &, < etc. - | CDataVerbatim -- ^ Unescaped character data; pretty printer embeds it in <![CDATA[.. - | CDataRaw -- ^ As-is character data; pretty printer passes it along without any escaping or CDATA wrap-up. - deriving ( Eq, Show ) - --- | XML qualified names -data QName = QName { - qName :: String, - qURI :: Maybe String, - qPrefix :: Maybe String - } deriving Show - - -instance Eq QName where - q1 == q2 = compare q1 q2 == EQ - -instance Ord QName where - compare q1 q2 = - case compare (qName q1) (qName q2) of - EQ -> case (qURI q1, qURI q2) of - (Nothing,Nothing) -> compare (qPrefix q1) (qPrefix q2) - (u1,u2) -> compare u1 u2 - x -> x - - --- blank elements -------------------------------------------------------------- - --- | Blank names -blank_name :: QName -blank_name = QName { qName = "", qURI = Nothing, qPrefix = Nothing } - --- | Blank cdata -blank_cdata :: CData -blank_cdata = CData { cdVerbatim = CDataText, cdData = "", cdLine = Nothing } - --- | Blank elements -blank_element :: Element -blank_element = Element - { elName = blank_name - , elAttribs = [] - , elContent = [] - , elLine = Nothing - } - - |