From 4dca8f6e75948d489e8127119ce3787cb97ee1e2 Mon Sep 17 00:00:00 2001 From: fiddlosopher Date: Thu, 4 Sep 2008 02:51:28 +0000 Subject: Reworked Text.Pandoc.ODT to use zip-archive instead of calling external 'zip'. + Removed utf8-string and xml-light modules, and unneeded content.xml. + Removed code for building reference.odt from Setup.hs. The ODT is now built using template haskell in Text.Pandoc.ODT. + Removed copyright statements for utf8-string and xml modules, since they are no longer included in the source. + README: Removed claim that 'zip' is needed for ODT production. + Removed dependency on 'zip' from debian/control. + Text.Pandoc.Shared: Removed withTempDir, added inDirectory. + Added makeZip to Text.Pandoc.TH. + pandoc.cabal: Added dependencies on old-time, zip-archive, and utf8-string. Added markdown2pdf files to extra-sources list. git-svn-id: https://pandoc.googlecode.com/svn/trunk@1417 788f1e2b-df1e-0410-8736-df70ead52e1b --- Codec/Binary/UTF8/String.hs | 97 ------------- README | 5 - Setup.hs | 29 ---- System/IO/UTF8.hs | 118 ---------------- Text/Pandoc/ODT.hs | 144 +++++++------------ Text/Pandoc/Shared.hs | 26 ++-- Text/Pandoc/TH.hs | 14 +- Text/XML/Light.hs | 96 ------------- Text/XML/Light/Cursor.hs | 327 -------------------------------------------- Text/XML/Light/Input.hs | 307 ----------------------------------------- Text/XML/Light/Output.hs | 150 -------------------- Text/XML/Light/Proc.hs | 103 -------------- Text/XML/Light/Types.hs | 91 ------------ debian/control | 6 +- debian/copyright | 62 --------- odt-styles/content.xml | 17 --- pandoc.cabal | 20 +-- 17 files changed, 75 insertions(+), 1537 deletions(-) delete mode 100644 Codec/Binary/UTF8/String.hs delete mode 100644 System/IO/UTF8.hs delete mode 100644 Text/XML/Light.hs delete mode 100644 Text/XML/Light/Cursor.hs delete mode 100644 Text/XML/Light/Input.hs delete mode 100644 Text/XML/Light/Output.hs delete mode 100644 Text/XML/Light/Proc.hs delete mode 100644 Text/XML/Light/Types.hs delete mode 100644 odt-styles/content.xml diff --git a/Codec/Binary/UTF8/String.hs b/Codec/Binary/UTF8/String.hs deleted file mode 100644 index 27c003f00..000000000 --- a/Codec/Binary/UTF8/String.hs +++ /dev/null @@ -1,97 +0,0 @@ --- --- | --- Module : Codec.Binary.UTF8.String --- Copyright : (c) Eric Mertens 2007 --- License : BSD3-style (see LICENSE) --- --- Maintainer: emertens@galois.com --- Stability : experimental --- Portability : portable --- --- Support for encoding UTF8 Strings to and from @[Word8]@ --- - -module Codec.Binary.UTF8.String ( - encode - , decode - , encodeString - , decodeString - ) where - -import Data.Word (Word8) -import Data.Bits ((.|.),(.&.),shiftL,shiftR) -import Data.Char (chr,ord) - -default(Int) - --- | Encode a string using 'encode' and store the result in a 'String'. -encodeString :: String -> String -encodeString xs = map (toEnum . fromEnum) (encode xs) - --- | Decode a string using 'decode' using a 'String' as input. --- | This is not safe but it is necessary if UTF-8 encoded text --- | has been loaded into a 'String' prior to being decoded. -decodeString :: String -> String -decodeString xs = decode (map (toEnum . fromEnum) xs) - -replacement_character :: Char -replacement_character = '\xfffd' - --- | Encode a Haskell String to a list of Word8 values, in UTF8 format. -encode :: String -> [Word8] -encode = concatMap (map fromIntegral . go . ord) - where - go oc - | oc <= 0x7f = [oc] - - | oc <= 0x7ff = [ 0xc0 + (oc `shiftR` 6) - , 0x80 + oc .&. 0x3f - ] - - | oc <= 0xffff = [ 0xe0 + (oc `shiftR` 12) - , 0x80 + ((oc `shiftR` 6) .&. 0x3f) - , 0x80 + oc .&. 0x3f - ] - | otherwise = [ 0xf0 + (oc `shiftR` 18) - , 0x80 + ((oc `shiftR` 12) .&. 0x3f) - , 0x80 + ((oc `shiftR` 6) .&. 0x3f) - , 0x80 + oc .&. 0x3f - ] - --- --- | Decode a UTF8 string packed into a list of Word8 values, directly to String --- -decode :: [Word8] -> String -decode [ ] = "" -decode (c:cs) - | c < 0x80 = chr (fromEnum c) : decode cs - | c < 0xc0 = replacement_character : decode cs - | c < 0xe0 = multi1 - | c < 0xf0 = multi_byte 2 0xf 0x800 - | c < 0xf8 = multi_byte 3 0x7 0x10000 - | c < 0xfc = multi_byte 4 0x3 0x200000 - | c < 0xfe = multi_byte 5 0x1 0x4000000 - | otherwise = replacement_character : decode cs - where - multi1 = case cs of - c1 : ds | c1 .&. 0xc0 == 0x80 -> - let d = ((fromEnum c .&. 0x1f) `shiftL` 6) .|. fromEnum (c1 .&. 0x3f) - in if d >= 0x000080 then toEnum d : decode ds - else replacement_character : decode ds - _ -> replacement_character : decode cs - - multi_byte :: Int -> Word8 -> Int -> [Char] - multi_byte i mask overlong = aux i cs (fromEnum (c .&. mask)) - where - aux 0 rs acc - | overlong <= acc && acc <= 0x10ffff && - (acc < 0xd800 || 0xdfff < acc) && - (acc < 0xfffe || 0xffff < acc) = chr acc : decode rs - | otherwise = replacement_character : decode rs - - aux n (r:rs) acc - | r .&. 0xc0 == 0x80 = aux (n-1) rs - $ shiftL acc 6 .|. fromEnum (r .&. 0x3f) - - aux _ rs _ = replacement_character : decode rs - diff --git a/README b/README index 89314fbb2..f05331a20 100644 --- a/README +++ b/README @@ -87,11 +87,6 @@ Supported output formats include `markdown`, `latex`, `context` Texinfo), `mediawiki` (MediaWiki markup), `man` (groff man), and `s5` (which produces an HTML file that acts like powerpoint). -For `odt` output, you must have `zip` in the path. If you -don't have it installed, you can get the free [Info-ZIP]. - -[Info-ZIP]: http://www.info-zip.org/Zip.html - Supported input formats include `markdown`, `html`, `latex`, and `rst`. Note that the `rst` reader only parses a subset of reStructuredText syntax. For example, it doesn't handle tables, option lists, or diff --git a/Setup.hs b/Setup.hs index c51c53bb1..9be68f8ce 100644 --- a/Setup.hs +++ b/Setup.hs @@ -12,7 +12,6 @@ import Data.Maybe ( fromJust, isNothing, catMaybes ) main = defaultMainWithHooks $ simpleUserHooks { runTests = runTestSuite - , preConf = checkReferenceODT , postBuild = makeManPages } -- | Run test suite. @@ -20,34 +19,6 @@ runTestSuite _ _ _ _ = do inDirectory "tests" $ runCommand "runhaskell -i.. RunTests.hs" >>= waitForProcess return () --- | If reference.odt needs rebuilding, build it. -checkReferenceODT _ _ = inDirectory "odt-styles" $ do - let refodt = "reference.odt" - let deps = [ "meta.xml", "content.xml", "settings.xml", "META-INF/manifest.xml", - "Thumbnails/thumbnail.png", "styles.xml", "mimetype" ] - modifiedDeps <- modifiedDependencies refodt deps - if null modifiedDeps - then return () - else makeReferenceODT modifiedDeps - return emptyHookedBuildInfo - --- | Create reference.odt by zipping up sources in odt-styles directory. -makeReferenceODT :: [FilePath] -> IO () -makeReferenceODT sources = do - zipPathMaybe <- findExecutable "zip" - if isNothing zipPathMaybe - then error $ "The 'zip' command, which is needed to build reference.odt\n" ++ - "from sources in the odt-styles directory, was not found.\n" ++ - "Try again after installing zip (http://www.info-zip.org/Zip.html).\n" ++ - "Or use the pandoc source tarball, which contains a prebuilt reference.odt." - else do - putStrLn "Creating reference.odt:" - ec <- runProcess (fromJust zipPathMaybe) (["-9", "-r", "reference.odt"] ++ sources) - Nothing Nothing Nothing Nothing (Just stderr) >>= waitForProcess - case ec of - ExitSuccess -> return () - _ -> error "Error creating ODT." - -- | Build man pages from markdown sources in man/man1/. makeManPages _ _ _ _ = do mapM makeManPage ["pandoc.1", "hsmarkdown.1", "html2markdown.1", "markdown2pdf.1"] diff --git a/System/IO/UTF8.hs b/System/IO/UTF8.hs deleted file mode 100644 index d0af4c38e..000000000 --- a/System/IO/UTF8.hs +++ /dev/null @@ -1,118 +0,0 @@ ------------------------------------------------------------------------------ --- | --- Module : System.IO.UTF8 --- Copyright : (c) Eric Mertens 2007 --- License : BSD3-style (see LICENSE) --- --- Maintainer: emertens@galois.com --- Stability : experimental --- Portability : portable --- --- String IO preserving UTF8 encoding. --- - -module System.IO.UTF8 ( - print - , putStr - , putStrLn - , getLine - , readLn - , readFile - , writeFile - , appendFile - , getContents - , hGetLine - , hGetContents - , hPutStr - , hPutStrLn - ) where - -import Control.Monad (liftM) -import Data.Char (ord, chr) -import Data.Word (Word8) -import Prelude (String, ($), (=<<), (>>=), (.), map, toEnum, fromEnum, Read, - Show(..)) -import System.IO (Handle, IO, FilePath) -import qualified System.IO as IO - -import Codec.Binary.UTF8.String (encode, decode) - - --- | Encode a string in UTF8 form. -encodeString :: String -> String -encodeString xs = bytesToString (encode xs) - --- | Decode a string from UTF8 -decodeString :: String -> String -decodeString xs = decode (stringToBytes xs) - --- | Convert a list of bytes to a String -bytesToString :: [Word8] -> String -bytesToString xs = map (chr . fromEnum) xs - --- | String to list of bytes. -stringToBytes :: String -> [Word8] -stringToBytes xs = map (toEnum . ord) xs - --- | The 'print' function outputs a value of any printable type to the --- standard output device. This function differs from the --- System.IO.print in that it preserves any UTF8 encoding of the shown value. --- -print :: Show a => a -> IO () -print x = putStrLn (show x) - --- | Write a UTF8 string to the standard output device -putStr :: String -> IO () -putStr x = IO.putStr (encodeString x) - --- | The same as 'putStr', but adds a newline character. -putStrLn :: String -> IO () -putStrLn x = IO.putStrLn (encodeString x) - --- | Read a UTF8 line from the standard input device -getLine :: IO String -getLine = liftM decodeString IO.getLine - --- | The 'readLn' function combines 'getLine' and 'readIO', preserving UTF8 -readLn :: Read a => IO a -readLn = IO.readIO =<< getLine - --- | The 'readFile' function reads a file and --- returns the contents of the file as a UTF8 string. --- The file is read lazily, on demand, as with 'getContents'. -readFile :: FilePath -> IO String -readFile n = liftM decodeString (IO.openBinaryFile n IO.ReadMode >>= - IO.hGetContents) - --- | The computation 'writeFile' @file str@ function writes the UTF8 string @str@, --- to the file @file@. -writeFile :: FilePath -> String -> IO () -writeFile n c = IO.withBinaryFile n IO.WriteMode $ \ h -> - IO.hPutStr h $ encodeString c - --- | The computation 'appendFile' @file str@ function appends the UTF8 string @str@, --- to the file @file@. -appendFile :: FilePath -> String -> IO () -appendFile n c = IO.withBinaryFile n IO.AppendMode $ \h -> - IO.hPutStr h $ encodeString c - --- | Read a UTF8 line from a Handle -hGetLine :: Handle -> IO String -hGetLine h = liftM decodeString $ IO.hGetLine h - --- | Lazily read a UTF8 string from a Handle -hGetContents :: Handle -> IO String -hGetContents h = liftM decodeString (IO.hGetContents h) - --- | Write a UTF8 string to a Handle. -hPutStr :: Handle -> String -> IO () -hPutStr h s = IO.hPutStr h (encodeString s) - --- | Write a UTF8 string to a Handle, appending a newline. -hPutStrLn :: Handle -> String -> IO () -hPutStrLn h s = IO.hPutStrLn h (encodeString s) - --- | Lazily read stdin as a UTF8 string. -getContents :: IO String -getContents = liftM decodeString IO.getContents - diff --git a/Text/Pandoc/ODT.hs b/Text/Pandoc/ODT.hs index 8c3b1b45f..10cf1b7e2 100644 --- a/Text/Pandoc/ODT.hs +++ b/Text/Pandoc/ODT.hs @@ -29,22 +29,16 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Functions for producing an ODT file from OpenDocument XML. -} module Text.Pandoc.ODT ( saveOpenDocumentAsODT ) where -import Text.Pandoc.TH ( binaryContentsOf ) -import Data.Maybe ( fromJust ) -import Data.List ( partition, intersperse ) -import System.Directory -import System.FilePath ( (), takeDirectory, takeFileName, splitDirectories ) -import System.Process ( runProcess, waitForProcess ) -import System.Exit -import Text.XML.Light -import Text.XML.Light.Cursor -import Text.Pandoc.Shared ( withTempDir ) -import Network.URI ( isURI ) -import qualified Data.ByteString as B ( writeFile, pack ) -import Data.ByteString.Internal ( c2w ) +import Text.Pandoc.TH ( makeZip ) +import Data.List ( find ) +import System.FilePath ( (), takeFileName ) +import qualified Data.ByteString.Lazy as B +import Data.ByteString.Lazy.UTF8 ( fromString ) import Prelude hiding ( writeFile, readFile ) -import System.IO ( stderr ) -import System.IO.UTF8 +import Codec.Archive.Zip +import Control.Applicative ( (<$>) ) +import Text.ParserCombinators.Parsec +import System.Time -- | Produce an ODT file from OpenDocument XML. saveOpenDocumentAsODT :: FilePath -- ^ Pathname of ODT file to be produced. @@ -52,89 +46,43 @@ saveOpenDocumentAsODT :: FilePath -- ^ Pathname of ODT file to be produced. -> String -- ^ OpenDocument XML contents. -> IO () saveOpenDocumentAsODT destinationODTPath sourceDirRelative xml = do - let zipCmd = "zip" - -- check for zip in path: - zipPathMaybe <- findExecutable zipCmd - let zipPath = case zipPathMaybe of - Nothing -> error $ "The '" ++ zipCmd ++ - "' command, which is needed to build an ODT file, was not found.\n" ++ - "It can be obtained from http://www.info-zip.org/Zip.html\n" ++ - "Debian (and Debian-based) linux: apt-get install zip\n" ++ - "Windows: See http://gnuwin32.sourceforge.net/packages/zip.htm" - Just x -> x - withTempDir "pandoc-odt" $ \tempDir -> do - let tempODT = tempDir "reference.odt" - B.writeFile tempODT $ B.pack $ map c2w $(binaryContentsOf $ "odt-styles" "reference.odt") - xml' <- handlePictures tempODT sourceDirRelative xml - writeFile (tempDir "content.xml") xml' - ph <- runProcess zipPath ["-9", "-q", "-r", tempODT, "content.xml", "Pictures"] - (Just tempDir) Nothing Nothing Nothing (Just stderr) - ec <- waitForProcess ph -- requires compilation with -threaded - case ec of - ExitSuccess -> copyFile tempODT destinationODTPath - _ -> error "Error creating ODT." >> exitWith ec + let refArchive = read $(makeZip "odt-styles") + -- handle pictures + let (newContents, pics) = + case runParser pPictures [] "OpenDocument XML contents" xml of + Left err -> error $ show err + Right x -> x + picEntries <- mapM (makePictureEntry sourceDirRelative) pics + (TOD epochTime _) <- getClockTime + let contentEntry = toEntry "content.xml" epochTime $ fromString newContents + let archive = foldr addEntryToArchive refArchive (contentEntry : picEntries) + B.writeFile destinationODTPath $ fromArchive archive --- | Find elements and copy the file (xlink:href attribute) into Pictures/ in --- the zip file. If filename is a URL, attempt to download it. Modify xlink:href attributes --- to point to the new locations in Pictures/. Return modified XML. -handlePictures :: FilePath -- ^ Path of ODT file in temp directory - -> FilePath -- ^ Directory (relative) containing source file - -> String -- ^ OpenDocument XML string - -> IO String -- ^ Modified XML -handlePictures tempODT sourceDirRelative xml = do - let parsed = case parseXMLDoc xml of - Nothing -> error "Could not parse OpenDocument XML." - Just x -> x - let cursor = case (fromForest $ elContent parsed) of - Nothing -> error "ODT appears empty" - Just x -> x - cursor' <- scanPictures tempODT sourceDirRelative cursor - let modified = parsed { elContent = toForest $ root cursor' } - return $ showTopElement modified +makePictureEntry :: FilePath -- ^ Relative directory of source file + -> (FilePath, String) -- ^ Path and new path of picture + -> IO Entry +makePictureEntry sourceDirRelative (path, newPath) = do + entry <- readEntry [] $ sourceDirRelative path + return (entry { eRelativePath = newPath }) -scanPictures :: FilePath -> FilePath -> Cursor -> IO Cursor -scanPictures tempODT sourceDirRelative cursor = do - cursor' <- handleTree tempODT sourceDirRelative cursor - case right cursor' of - Just n -> scanPictures tempODT sourceDirRelative n - Nothing -> return cursor' - -handleTree :: FilePath -> FilePath -> Cursor -> IO Cursor -handleTree tempODT sourceDirRelative cursor = do - case firstChild cursor of - Nothing -> modifyContentM (handleContent tempODT sourceDirRelative) cursor - Just n -> scanPictures tempODT sourceDirRelative n >>= return . fromJust . parent - --- | If content is an image link, handle it appropriately. --- Otherwise, handle children if any. -handleContent :: FilePath -> FilePath -> Content -> IO Content -handleContent tempODT sourceDirRelative content@(Elem el) = do - if qName (elName el) == "image" - then do - let (hrefs, rest) = partition (\a -> qName (attrKey a) == "href") $ elAttribs el - let href = case hrefs of - [] -> error $ "No href found in " ++ show el - [x] -> x - _ -> error $ "Multiple hrefs found in " ++ show el - if isURI $ attrVal href - then return content - else do -- treat as filename - let oldLoc = sourceDirRelative attrVal href - fileExists <- doesFileExist oldLoc - if fileExists - then do - let pref = take 230 $ concat $ intersperse "_" $ - splitDirectories $ takeDirectory $ attrVal href - let picName = pref ++ "_" ++ (takeFileName $ attrVal href) - let tempDir = takeDirectory tempODT - createDirectoryIfMissing False $ tempDir "Pictures" - copyFile oldLoc $ tempDir "Pictures" picName - let newAttrs = (href { attrVal = "Pictures/" ++ picName }) : rest - return $ Elem (el { elAttribs = newAttrs }) - else do - hPutStrLn stderr $ "Warning: Unable to find image at " ++ oldLoc ++ " - ignoring." - return content - else return content - -handleContent _ _ c = return c -- not Element +pPictures :: GenParser Char [(FilePath, String)] ([Char], [(FilePath, String)]) +pPictures = do + contents <- concat <$> many (pPicture <|> many1 (noneOf "<") <|> string "<") + pics <- getState + return (contents, pics) +pPicture :: GenParser Char [(FilePath, String)] [Char] +pPicture = try $ do + string " o == path) pics of + Just (_, new) -> return new + Nothing -> do + -- get a unique name + let dups = length $ (filter (\(o, _) -> takeFileName o == filename)) pics + let new = "Pictures/" ++ replicate dups '0' ++ filename + updateState ((path, new) :) + return new + return $ "), (<.>) ) -import System.IO.Error ( catch, ioError, isAlreadyExistsError ) import System.Directory import Prelude hiding ( putStrLn, writeFile, readFile, getContents ) import System.IO.UTF8 @@ -920,16 +917,11 @@ defaultWriterOptions = -- File handling -- --- | Perform a function in a temporary directory and clean up. -withTempDir :: FilePath -> (FilePath -> IO a) -> IO a -withTempDir baseName = bracket (createTempDir 0 baseName) (removeDirectoryRecursive) - --- | Create a temporary directory with a unique name. -createTempDir :: Integer -> FilePath -> IO FilePath -createTempDir num baseName = do - sysTempDir <- getTemporaryDirectory - let dirName = sysTempDir baseName <.> show num - catch (createDirectory dirName >> return dirName) $ - \e -> if isAlreadyExistsError e - then createTempDir (num + 1) baseName - else ioError e +-- | Perform an IO action in a directory, returning to starting directory. +inDirectory :: FilePath -> IO a -> IO a +inDirectory path action = do + oldDir <- getCurrentDirectory + setCurrentDirectory path + result <- action + setCurrentDirectory oldDir + return result diff --git a/Text/Pandoc/TH.hs b/Text/Pandoc/TH.hs index dfd6be28b..0dc5a6719 100644 --- a/Text/Pandoc/TH.hs +++ b/Text/Pandoc/TH.hs @@ -30,7 +30,8 @@ Template haskell functions used by Pandoc modules. -} module Text.Pandoc.TH ( contentsOf, - binaryContentsOf + binaryContentsOf, + makeZip ) where import Language.Haskell.TH @@ -39,6 +40,8 @@ import qualified Data.ByteString as B import Data.ByteString.Internal ( w2c ) import Prelude hiding ( readFile ) import System.IO.UTF8 +import Codec.Archive.Zip +import Text.Pandoc.Shared ( inDirectory ) -- | Insert contents of text file into a template. contentsOf :: FilePath -> ExpQ @@ -51,3 +54,12 @@ binaryContentsOf p = lift =<< (runIO $ B.readFile p) instance Lift B.ByteString where lift x = return (LitE (StringL $ map w2c $ B.unpack x)) + +instance Lift Archive where + lift x = return (LitE (StringL $ show x )) + +-- | Construct zip file from files in a directory, and +-- insert into a template. +makeZip :: FilePath -> ExpQ +makeZip path = lift =<< (runIO $ inDirectory path $ addFilesToArchive [OptRecursive] emptyArchive ["."]) + diff --git a/Text/XML/Light.hs b/Text/XML/Light.hs deleted file mode 100644 index f2d75290b..000000000 --- a/Text/XML/Light.hs +++ /dev/null @@ -1,96 +0,0 @@ -{-# LANGUAGE FlexibleInstances #-} --------------------------------------------------------------------- --- | --- Module : Text.XML.Light --- Copyright : (c) Galois, Inc. 2007 --- License : BSD3 --- --- Maintainer: Iavor S. Diatchki --- Stability : provisional --- Portability: portability --- --- A lightweight XML parsing, filtering and generating library. --- --- This module reexports functions from: --- --- * "Text.XML.Light.Types" --- --- * "Text.XML.Light.Proc" --- --- * "Text.XML.Light.Input" --- --- * "Text.XML.Light.Output" --- - -module Text.XML.Light ( - - module Text.XML.Light, - module Text.XML.Light.Types, - module Text.XML.Light.Proc, - module Text.XML.Light.Input, - module Text.XML.Light.Output - - ) where - -import Text.XML.Light.Types -import Text.XML.Light.Proc -import Text.XML.Light.Input -import Text.XML.Light.Output - --- | Add an attribute to an element. -add_attr :: Attr -> Element -> Element -add_attr a e = add_attrs [a] e - --- | Add some attributes to an element. -add_attrs :: [Attr] -> Element -> Element -add_attrs as e = e { elAttribs = as ++ elAttribs e } - --- | Create an unqualified name. -unqual :: String -> QName -unqual x = blank_name { qName = x } - --- | A smart element constructor which uses the type of its argument --- to determine what sort of element to make. -class Node t where - node :: QName -> t -> Element - -instance Node ([Attr],[Content]) where - node n (attrs,cont) = blank_element { elName = n - , elAttribs = attrs - , elContent = cont - } - -instance Node [Attr] where node n as = node n (as,[]::[Content]) -instance Node Attr where node n a = node n [a] -instance Node () where node n () = node n ([]::[Attr]) - -instance Node [Content] where node n cs = node n ([]::[Attr],cs) -instance Node Content where node n c = node n [c] -instance Node ([Attr],Content) where node n (as,c) = node n (as,[c]) -instance Node (Attr,Content) where node n (a,c) = node n ([a],[c]) - -instance Node ([Attr],[Element]) where - node n (as,cs) = node n (as,map Elem cs) - -instance Node ([Attr],Element) where node n (as,c) = node n (as,[c]) -instance Node (Attr,Element) where node n (a,c) = node n ([a],c) -instance Node ([Element]) where node n es = node n ([]::[Attr],es) -instance Node (Element) where node n e = node n [e] - -instance Node ([Attr],[CData]) where - node n (as,cs) = node n (as,map Text cs) - -instance Node ([Attr],CData) where node n (as,c) = node n (as,[c]) -instance Node (Attr,CData) where node n (a,c) = node n ([a],c) -instance Node [CData] where node n es = node n ([]::[Attr],es) -instance Node CData where node n e = node n [e] - -instance Node ([Attr],String) where - node n (as,t) = node n (as,blank_cdata { cdData = t }) - -instance Node (Attr,String) where node n (a,t) = node n ([a],t) -instance Node [Char] where node n t = node n ([]::[Attr],t) - --- | Create node with unqualified name -unode :: Node t => String -> t -> Element -unode = node . unqual diff --git a/Text/XML/Light/Cursor.hs b/Text/XML/Light/Cursor.hs deleted file mode 100644 index 06d15bdb6..000000000 --- a/Text/XML/Light/Cursor.hs +++ /dev/null @@ -1,327 +0,0 @@ --------------------------------------------------------------------- --- | --- Module : Text.XML.Light.Cursor --- Copyright : (c) Galois, Inc. 2008 --- License : BSD3 --- --- Maintainer: Iavor S. Diatchki --- Stability : provisional --- Portability: --- --- XML cursors for working XML content withing the context of --- an XML document. This implemntation is based on the general --- tree zipper written by Krasimir Angelov and Iavor S. Diatchki. --- - -module Text.XML.Light.Cursor - ( Tag(..), getTag, setTag, fromTag - , Cursor(..), Path - - -- * Conversions - , fromContent - , fromElement - , fromForest - , toForest - , toTree - - -- * Moving around - , parent - , root - , getChild - , firstChild - , lastChild - , left - , right - - -- ** Searching - , findChild - , findLeft - , findRight - - -- * Node classification - , isRoot - , isFirst - , isLast - , isLeaf - , isChild - , hasChildren - , getNodeIndex - - -- * Updates - , setContent - , modifyContent - , modifyContentM - - -- ** Inserting content - , insertLeft - , insertRight - , insertGoLeft - , insertGoRight - - -- ** Removing content - , removeLeft - , removeRight - , removeGoLeft - , removeGoRight - , removeGoUp - - ) where - -import Text.XML.Light.Types -import Data.Maybe(isNothing) - -data Tag = Tag { tagName :: QName - , tagAttribs :: [Attr] - , tagLine :: Maybe Line - } deriving (Show) - -getTag :: Element -> Tag -getTag e = Tag { tagName = elName e - , tagAttribs = elAttribs e - , tagLine = elLine e - } - -setTag :: Tag -> Element -> Element -setTag t e = fromTag t (elContent e) - -fromTag :: Tag -> [Content] -> Element -fromTag t cs = Element { elName = tagName t - , elAttribs = tagAttribs t - , elLine = tagLine t - , elContent = cs - } - -type Path = [([Content],Tag,[Content])] - --- | The position of a piece of content in an XML document. -data Cursor = Cur - { current :: Content -- ^ The currently selected content. - , lefts :: [Content] -- ^ Siblings on the left, closest first. - , rights :: [Content] -- ^ Siblings on the right, closest first. - , parents :: Path -- ^ The contexts of the parent elements of this location. - } deriving (Show) - --- Moving around --------------------------------------------------------------- - --- | The parent of the given location. -parent :: Cursor -> Maybe Cursor -parent loc = - case parents loc of - (pls,v,prs) : ps -> Just - Cur { current = Elem - (fromTag v - (combChildren (lefts loc) (current loc) (rights loc))) - , lefts = pls, rights = prs, parents = ps - } - [] -> Nothing - - --- | The top-most parent of the given location. -root :: Cursor -> Cursor -root loc = maybe loc root (parent loc) - --- | The left sibling of the given location. -left :: Cursor -> Maybe Cursor -left loc = - case lefts loc of - t : ts -> Just loc { current = t, lefts = ts - , rights = current loc : rights loc } - [] -> Nothing - --- | The right sibling of the given location. -right :: Cursor -> Maybe Cursor -right loc = - case rights loc of - t : ts -> Just loc { current = t, lefts = current loc : lefts loc - , rights = ts } - [] -> Nothing - --- | The first child of the given location. -firstChild :: Cursor -> Maybe Cursor -firstChild loc = - do (t : ts, ps) <- downParents loc - return Cur { current = t, lefts = [], rights = ts , parents = ps } - --- | The last child of the given location. -lastChild :: Cursor -> Maybe Cursor -lastChild loc = - do (ts, ps) <- downParents loc - case reverse ts of - l : ls -> return Cur { current = l, lefts = ls, rights = [] - , parents = ps } - [] -> Nothing - --- | Find the next left sibling that satisfies a predicate. -findLeft :: (Cursor -> Bool) -> Cursor -> Maybe Cursor -findLeft p loc = do loc1 <- left loc - if p loc1 then return loc1 else findLeft p loc1 - --- | Find the next right sibling that satisfies a predicate. -findRight :: (Cursor -> Bool) -> Cursor -> Maybe Cursor -findRight p loc = do loc1 <- right loc - if p loc1 then return loc1 else findRight p loc1 - --- | The first child that satisfies a predicate. -findChild :: (Cursor -> Bool) -> Cursor -> Maybe Cursor -findChild p loc = - do loc1 <- firstChild loc - if p loc1 then return loc1 else findRight p loc1 - --- | The child with the given index (starting from 0). -getChild :: Int -> Cursor -> Maybe Cursor -getChild n loc = - do (ts,ps) <- downParents loc - (ls,t,rs) <- splitChildren ts n - return Cur { current = t, lefts = ls, rights = rs, parents = ps } - - --- | private: computes the parent for "down" operations. -downParents :: Cursor -> Maybe ([Content], Path) -downParents loc = - case current loc of - Elem e -> Just ( elContent e - , (lefts loc, getTag e, rights loc) : parents loc - ) - _ -> Nothing - --- Conversions ----------------------------------------------------------------- - --- | A cursor for the guven content. -fromContent :: Content -> Cursor -fromContent t = Cur { current = t, lefts = [], rights = [], parents = [] } - --- | A cursor for the guven element. -fromElement :: Element -> Cursor -fromElement e = fromContent (Elem e) - --- | The location of the first tree in a forest. -fromForest :: [Content] -> Maybe Cursor -fromForest (t:ts) = Just Cur { current = t, lefts = [], rights = ts - , parents = [] } -fromForest [] = Nothing - --- | Computes the tree containing this location. -toTree :: Cursor -> Content -toTree loc = current (root loc) - --- | Computes the forest containing this location. -toForest :: Cursor -> [Content] -toForest loc = let r = root loc in combChildren (lefts r) (current r) (rights r) - - --- Queries --------------------------------------------------------------------- - --- | Are we at the top of the document? -isRoot :: Cursor -> Bool -isRoot loc = null (parents loc) - --- | Are we at the left end of the the document? -isFirst :: Cursor -> Bool -isFirst loc = null (lefts loc) - --- | Are we at the right end of the document? -isLast :: Cursor -> Bool -isLast loc = null (rights loc) - --- | Are we at the bottom of the document? -isLeaf :: Cursor -> Bool -isLeaf loc = isNothing (downParents loc) - --- | Do we have a parent? -isChild :: Cursor -> Bool -isChild loc = not (isRoot loc) - --- | Get the node index inside the sequence of children -getNodeIndex :: Cursor -> Int -getNodeIndex loc = length (lefts loc) - --- | Do we have children? -hasChildren :: Cursor -> Bool -hasChildren loc = not (isLeaf loc) - - - --- Updates --------------------------------------------------------------------- - --- | Change the current content. -setContent :: Content -> Cursor -> Cursor -setContent t loc = loc { current = t } - --- | Modify the current content. -modifyContent :: (Content -> Content) -> Cursor -> Cursor -modifyContent f loc = setContent (f (current loc)) loc - --- | Modify the current content, allowing for an effect. -modifyContentM :: Monad m => (Content -> m Content) -> Cursor -> m Cursor -modifyContentM f loc = do x <- f (current loc) - return (setContent x loc) - --- | Insert content to the left of the current position. -insertLeft :: Content -> Cursor -> Cursor -insertLeft t loc = loc { lefts = t : lefts loc } - --- | Insert content to the right of the current position. -insertRight :: Content -> Cursor -> Cursor -insertRight t loc = loc { rights = t : rights loc } - --- | Remove the conent on the left of the current position, if any. -removeLeft :: Cursor -> Maybe (Content,Cursor) -removeLeft loc = case lefts loc of - l : ls -> return (l,loc { lefts = ls }) - [] -> Nothing - --- | Remove the conent on the right of the current position, if any. -removeRight :: Cursor -> Maybe (Content,Cursor) -removeRight loc = case rights loc of - l : ls -> return (l,loc { rights = ls }) - [] -> Nothing - - --- | Insert content to the left of the current position. --- The new content becomes the current position. -insertGoLeft :: Content -> Cursor -> Cursor -insertGoLeft t loc = loc { current = t, rights = current loc : rights loc } - --- | Insert content to the right of the current position. --- The new content becomes the current position. -insertGoRight :: Content -> Cursor -> Cursor -insertGoRight t loc = loc { current = t, lefts = current loc : lefts loc } - --- | Remove the current element. --- The new position is the one on the left. -removeGoLeft :: Cursor -> Maybe Cursor -removeGoLeft loc = case lefts loc of - l : ls -> Just loc { current = l, lefts = ls } - [] -> Nothing - --- | Remove the current element. --- The new position is the one on the right. -removeGoRight :: Cursor -> Maybe Cursor -removeGoRight loc = case rights loc of - l : ls -> Just loc { current = l, rights = ls } - [] -> Nothing - --- | Remove the current element. --- The new position is the parent of the old position. -removeGoUp :: Cursor -> Maybe Cursor -removeGoUp loc = - case parents loc of - (pls,v,prs) : ps -> Just - Cur { current = Elem (fromTag v (reverse (lefts loc) ++ rights loc)) - , lefts = pls, rights = prs, parents = ps - } - [] -> Nothing - - --- | private: Gets the given element of a list. --- Also returns the preceeding elements (reversed) and the folloing elements. -splitChildren :: [a] -> Int -> Maybe ([a],a,[a]) -splitChildren _ n | n < 0 = Nothing -splitChildren cs pos = loop [] cs pos - where loop acc (x:xs) 0 = Just (acc,x,xs) - loop acc (x:xs) n = loop (x:acc) xs $! n-1 - loop _ _ _ = Nothing - --- | private: combChildren ls x ys = reverse ls ++ [x] ++ ys -combChildren :: [a] -> a -> [a] -> [a] -combChildren ls t rs = foldl (flip (:)) (t:rs) ls diff --git a/Text/XML/Light/Input.hs b/Text/XML/Light/Input.hs deleted file mode 100644 index 3cf3a8cd1..000000000 --- a/Text/XML/Light/Input.hs +++ /dev/null @@ -1,307 +0,0 @@ --------------------------------------------------------------------- --- | --- Module : Text.XML.Light.Input --- Copyright : (c) Galois, Inc. 2007 --- License : BSD3 --- --- Maintainer: Iavor S. Diatchki --- Stability : provisional --- Portability: portable --- --- Lightweight XML parsing --- - -module Text.XML.Light.Input (parseXML,parseXMLDoc) where - -import Text.XML.Light.Types -import Text.XML.Light.Proc -import Text.XML.Light.Output(tagEnd) - -import Data.Char(isSpace) -import Data.List(isPrefixOf) -import Numeric(readHex) - --- | parseXMLDoc, parse a XMLl document to maybe an element -parseXMLDoc :: String -> Maybe Element -parseXMLDoc xs = strip (parseXML xs) - where strip cs = case onlyElems cs of - e : es - | "?xml" `isPrefixOf` qName (elName e) - -> strip (map Elem es) - | otherwise -> Just e - _ -> Nothing - --- | parseXML to a list of content chunks -parseXML :: String -> [Content] -parseXML xs = parse $ tokens $ preprocess xs - ------------------------------------------------------------------------- - -parse :: [Token] -> [Content] -parse [] = [] -parse ts = let (es,_,ts1) = nodes ([],Nothing) [] ts - in es ++ parse ts1 - --- Information about namespaces. --- The first component is a map that associates prefixes to URIs, --- the second is the URI for the default namespace, if one was provided. -type NSInfo = ([(String,String)],Maybe String) - -nodes :: NSInfo -> [QName] -> [Token] -> ([Content], [QName], [Token]) - -nodes ns ps (TokCRef ref : ts) = - let (es,qs,ts1) = nodes ns ps ts - in (CRef ref : es, qs, ts1) - -nodes ns ps (TokText txt : ts) = - let (es,qs,ts1) = nodes ns ps ts - (more,es1) = case es of - Text cd : es1' - | cdVerbatim cd == cdVerbatim txt -> (cdData cd,es1') - _ -> ([],es) - - in (Text txt { cdData = cdData txt ++ more } : es1, qs, ts1) - -nodes cur_info ps (TokStart p t as empty : ts) = (node : siblings, open, toks) - where - new_name = annotName new_info t - new_info = foldr addNS cur_info as - node = Elem Element { elLine = Just p - , elName = new_name - , elAttribs = map (annotAttr new_info) as - , elContent = children - } - - (children,(siblings,open,toks)) - | empty = ([], nodes cur_info ps ts) - | otherwise = let (es1,qs1,ts1) = nodes new_info (new_name:ps) ts - in (es1, - case qs1 of - [] -> nodes cur_info ps ts1 - _ : qs3 -> ([],qs3,ts1)) - -nodes ns ps (TokEnd p t : ts) = let t1 = annotName ns t - in case break (t1 ==) ps of - (as,_:_) -> ([],as,ts) - -- Unknown closing tag. Insert as text. - (_,[]) -> - let (es,qs,ts1) = nodes ns ps ts - in (Text CData { - cdLine = Just p, - cdVerbatim = CDataText, - cdData = tagEnd t "" - } : es,qs, ts1) - -nodes _ ps [] = ([],ps,[]) - - -annotName :: NSInfo -> QName -> QName -annotName (namespaces,def_ns) n = - n { qURI = maybe def_ns (`lookup` namespaces) (qPrefix n) } - -annotAttr :: NSInfo -> Attr -> Attr -annotAttr ns a@(Attr { attrKey = k}) = - case (qPrefix k, qName k) of - (Nothing,"xmlns") -> a - _ -> a { attrKey = annotName ns k } - -addNS :: Attr -> NSInfo -> NSInfo -addNS (Attr { attrKey = key, attrVal = val }) (ns,def) = - case (qPrefix key, qName key) of - (Nothing,"xmlns") -> (ns, if null val then Nothing else Just val) - (Just "xmlns", k) -> ((k, val) : ns, def) - _ -> (ns,def) - - --- Lexer ----------------------------------------------------------------------- - -type LChar = (Line,Char) -type LString = [LChar] -data Token = TokStart Line QName [Attr] Bool -- is empty? - | TokEnd Line QName - | TokCRef String - | TokText CData - deriving Show - -tokens :: String -> [Token] -tokens = tokens' . linenumber 1 - -tokens' :: LString -> [Token] -tokens' ((_,'<') : c@(_,'!') : cs) = special c cs - -tokens' ((_,'<') : cs) = tag (dropSpace cs) -- we are being nice here -tokens' [] = [] -tokens' cs@((l,_):_) = let (as,bs) = breakn ('<' ==) cs - in map cvt (decode_text as) ++ tokens' bs - - -- XXX: Note, some of the lines might be a bit inacuarate - where cvt (TxtBit x) = TokText CData { cdLine = Just l - , cdVerbatim = CDataText - , cdData = x - } - cvt (CRefBit x) = case cref_to_char x of - Just c -> TokText CData { cdLine = Just l - , cdVerbatim = CDataText - , cdData = [c] - } - Nothing -> TokCRef x - - -special :: LChar -> LString -> [Token] -special _ ((_,'-') : (_,'-') : cs) = skip cs - where skip ((_,'-') : (_,'-') : (_,'>') : ds) = tokens' ds - skip (_ : ds) = skip ds - skip [] = [] -- unterminated comment - -special c ((_,'[') : (_,'C') : (_,'D') : (_,'A') : (_,'T') : (_,'A') : (_,'[') - : cs) = - let (xs,ts) = cdata cs - in TokText CData { cdLine = Just (fst c), cdVerbatim = CDataVerbatim, cdData = xs } - : tokens' ts - where cdata ((_,']') : (_,']') : (_,'>') : ds) = ([],ds) - cdata ((_,d) : ds) = let (xs,ys) = cdata ds in (d:xs,ys) - cdata [] = ([],[]) - -special c cs = - let (xs,ts) = munch "" 0 cs - in TokText CData { cdLine = Just (fst c), cdVerbatim = CDataRaw, cdData = '<':'!':(reverse xs) } : tokens' ts - where munch acc nesting ((_,'>') : ds) - | nesting == (0::Int) = ('>':acc,ds) - | otherwise = munch ('>':acc) (nesting-1) ds - munch acc nesting ((_,'<') : ds) - = munch ('<':acc) (nesting+1) ds - munch acc n ((_,x) : ds) = munch (x:acc) n ds - munch acc _ [] = (acc,[]) -- unterminated DTD markup - ---special c cs = tag (c : cs) -- invalid specials are processed as tags - - -qualName :: LString -> (QName,LString) -qualName xs = let (as,bs) = breakn endName xs - (q,n) = case break (':'==) as of - (q1,_:n1) -> (Just q1, n1) - _ -> (Nothing, as) - in (QName { qURI = Nothing, qPrefix = q, qName = n }, bs) - where endName x = isSpace x || x == '=' || x == '>' || x == '/' - - - - - -tag :: LString -> [Token] -tag ((p,'/') : cs) = let (n,ds) = qualName (dropSpace cs) - in TokEnd p n : case ds of - (_,'>') : es -> tokens' es - -- tag was not properly closed... - _ -> tokens' ds -tag [] = [] -tag cs = let (n,ds) = qualName cs - (as,b,ts) = attribs (dropSpace ds) - in TokStart (fst (head cs)) n as b : ts - -attribs :: LString -> ([Attr], Bool, [Token]) -attribs cs = case cs of - (_,'>') : ds -> ([], False, tokens' ds) - - (_,'/') : ds -> ([], True, case ds of - (_,'>') : es -> tokens' es - -- insert missing > ... - _ -> tokens' ds) - - (_,'?') : (_,'>') : ds -> ([], True, tokens' ds) - - -- doc ended within a tag.. - [] -> ([],False,[]) - - _ -> let (a,cs1) = attrib cs - (as,b,ts) = attribs cs1 - in (a:as,b,ts) - -attrib :: LString -> (Attr,LString) -attrib cs = let (ks,cs1) = qualName cs - (vs,cs2) = attr_val (dropSpace cs1) - in ((Attr ks (decode_attr vs)),dropSpace cs2) - -attr_val :: LString -> (String,LString) -attr_val ((_,'=') : cs) = string (dropSpace cs) -attr_val cs = ("",cs) - - -dropSpace :: LString -> LString -dropSpace = dropWhile (isSpace . snd) - --- | Match the value for an attribute. For malformed XML we do --- our best to guess the programmer's intention. -string :: LString -> (String,LString) -string ((_,'"') : cs) = break' ('"' ==) cs - --- Allow attributes to be enclosed between ' '. -string ((_,'\'') : cs) = break' ('\'' ==) cs - --- Allow attributes that are not enclosed by anything. -string cs = breakn eos cs - where eos x = isSpace x || x == '>' || x == '/' - - -break' :: (a -> Bool) -> [(b,a)] -> ([a],[(b,a)]) -break' p xs = let (as,bs) = breakn p xs - in (as, case bs of - [] -> [] - _ : cs -> cs) - -breakn :: (a -> Bool) -> [(b,a)] -> ([a],[(b,a)]) -breakn p l = (map snd as,bs) where (as,bs) = break (p . snd) l - - - -decode_attr :: String -> String -decode_attr cs = concatMap cvt (decode_text cs) - where cvt (TxtBit x) = x - cvt (CRefBit x) = case cref_to_char x of - Just c -> [c] - Nothing -> '&' : x ++ ";" - -data Txt = TxtBit String | CRefBit String deriving Show - -decode_text :: [Char] -> [Txt] -decode_text xs@('&' : cs) = case break (';' ==) cs of - (as,_:bs) -> CRefBit as : decode_text bs - _ -> [TxtBit xs] -decode_text [] = [] -decode_text cs = let (as,bs) = break ('&' ==) cs - in TxtBit as : decode_text bs - -cref_to_char :: [Char] -> Maybe Char -cref_to_char cs = case cs of - '#' : ds -> num_esc ds - "lt" -> Just '<' - "gt" -> Just '>' - "amp" -> Just '&' - "apos" -> Just '\'' - "quot" -> Just '"' - _ -> Nothing - -num_esc :: String -> Maybe Char -num_esc cs = case cs of - 'x' : ds -> check (readHex ds) - _ -> check (reads cs) - - where check [(n,"")] = cvt_char n - check _ = Nothing - -cvt_char :: Int -> Maybe Char -cvt_char x - | fromEnum (minBound :: Char) <= x && x <= fromEnum (maxBound::Char) - = Just (toEnum x) - | otherwise = Nothing - -preprocess :: String -> String -preprocess ('\r' : '\n' : cs) = '\n' : preprocess cs -preprocess ('\r' : cs) = '\n' : preprocess cs -preprocess (c : cs) = c : preprocess cs -preprocess [] = [] - -linenumber :: Line -> String -> LString -linenumber _ [] = [] -linenumber n ('\n':s) = n' `seq` ((n,'\n'):linenumber n' s) where n' = n + 1 -linenumber n (c:s) = (n,c) : linenumber n s diff --git a/Text/XML/Light/Output.hs b/Text/XML/Light/Output.hs deleted file mode 100644 index 65d1bb1af..000000000 --- a/Text/XML/Light/Output.hs +++ /dev/null @@ -1,150 +0,0 @@ --------------------------------------------------------------------- --- | --- Module : Text.XML.Light.Output --- Copyright : (c) Galois, Inc. 2007 --- License : BSD3 --- --- Maintainer: Iavor S. Diatchki --- Stability : provisional --- Portability: --- --- Output handling for the lightweight XML lib. --- - -module Text.XML.Light.Output - ( showTopElement, showContent, showElement, showCData, showQName, showAttr - , ppTopElement, ppContent, ppElement - , tagEnd, xml_header - ) where - -import Text.XML.Light.Types -import Data.Char -import Data.List ( isPrefixOf ) - --- | The XML 1.0 header -xml_header :: String -xml_header = "" - --- | Pretty printing renders XML documents faithfully, --- with the exception that whitespace may be added\/removed --- in non-verbatim character data. -ppTopElement :: Element -> String -ppTopElement e = unlines [xml_header,ppElement e] - --- | Pretty printing elements -ppElement :: Element -> String -ppElement e = ppElementS "" e "" - --- | Pretty printing content -ppContent :: Content -> String -ppContent x = ppContentS "" x "" - --- | Pretty printing content using ShowS -ppContentS :: String -> Content -> ShowS -ppContentS i x xs = case x of - Elem e -> ppElementS i e xs - Text c -> ppCData i c xs - CRef r -> showCRefS r xs - -ppElementS :: String -> Element -> ShowS -ppElementS i e xs = i ++ (tagStart (elName e) (elAttribs e) $ - case elContent e of - [] - | not ("?xml" `isPrefixOf` (qName $ elName e)) -> " />" ++ xs - | otherwise -> " ?>" ++ xs - [Text t] -> ">" ++ ppCData "" t (tagEnd (elName e) xs) - cs -> ">\n" ++ foldr ppSub (i ++ tagEnd (elName e) xs) cs - where ppSub e1 = ppContentS (" " ++ i) e1 . showChar '\n' - ) - -ppCData :: String -> CData -> ShowS -ppCData i c xs = i ++ if (cdVerbatim c /= CDataText ) - then showCDataS c xs - else foldr cons xs (showCData c) - - where cons :: Char -> String -> String - cons '\n' ys = "\n" ++ i ++ ys - cons y ys = y : ys - - - --------------------------------------------------------------------------------- --- | Adds the header. -showTopElement :: Element -> String -showTopElement c = xml_header ++ showElement c - -showContent :: Content -> String -showContent c = showContentS c "" - -showElement :: Element -> String -showElement c = showElementS c "" - -showCData :: CData -> String -showCData c = showCDataS c "" - --- Note: crefs should not contain '&', ';', etc. -showCRefS :: String -> ShowS -showCRefS r xs = '&' : r ++ ';' : xs - --- | Good for transmition (no extra white space etc.) but less readable. -showContentS :: Content -> ShowS -showContentS (Elem e) = showElementS e -showContentS (Text cs) = showCDataS cs -showContentS (CRef cs) = showCRefS cs - --- | Good for transmition (no extra white space etc.) but less readable. -showElementS :: Element -> ShowS -showElementS e xs = - tagStart (elName e) (elAttribs e) - $ case elContent e of - [] -> " />" ++ xs - ch -> '>' : foldr showContentS (tagEnd (elName e) xs) ch - --- | Convert a text element to characters. -showCDataS :: CData -> ShowS -showCDataS cd = - case cdVerbatim cd of - CDataText -> escStr (cdData cd) - CDataVerbatim -> showString "" - CDataRaw -> \ xs -> cdData cd ++ xs - --------------------------------------------------------------------------------- -escCData :: String -> ShowS -escCData (']' : ']' : '>' : cs) = showString "]]]]>" . escCData cs -escCData (c : cs) = showChar c . escCData cs -escCData [] = id - -escChar :: Char -> ShowS -escChar c = case c of - '<' -> showString "<" - '>' -> showString ">" - '&' -> showString "&" - '"' -> showString """ - '\'' -> showString "'" - -- XXX: Is this really wortherd? - -- We could deal with these issues when we convert characters to bytes. - _ | (oc <= 0x7f && isPrint c) || c == '\n' || c == '\r' -> showChar c - | otherwise -> showString "&#" . shows oc . showChar ';' - where oc = ord c - -escStr :: String -> ShowS -escStr cs rs = foldr escChar rs cs - -tagEnd :: QName -> ShowS -tagEnd qn rs = '<':'/':showQName qn ++ '>':rs - -tagStart :: QName -> [Attr] -> ShowS -tagStart qn as rs = '<':showQName qn ++ as_str ++ rs - where as_str = if null as then "" else ' ' : unwords (map showAttr as) - -showAttr :: Attr -> String -showAttr (Attr qn v) = showQName qn ++ '=' : '"' : escStr v "\"" - -showQName :: QName -> String -showQName q = pre ++ qName q - where pre = case qPrefix q of - Nothing -> "" - Just p -> p ++ ":" - - - diff --git a/Text/XML/Light/Proc.hs b/Text/XML/Light/Proc.hs deleted file mode 100644 index 34d844a3f..000000000 --- a/Text/XML/Light/Proc.hs +++ /dev/null @@ -1,103 +0,0 @@ --------------------------------------------------------------------- --- | --- Module : Text.XML.Light.Proc --- Copyright : (c) Galois, Inc. 2007 --- License : BSD3 --- --- Maintainer: Iavor S. Diatchki --- Stability : provisional --- Portability: --- --------------------------------------------------------------------- - - -module Text.XML.Light.Proc where - -import Text.XML.Light.Types - -import Data.Maybe(listToMaybe) -import Data.List(find) - --- | Get the text value of an XML element. This function --- ignores non-text elements, and concatenates all text elements. -strContent :: Element -> String -strContent e = concatMap cdData $ onlyText $ elContent e - --- | Select only the elements from a list of XML content. -onlyElems :: [Content] -> [Element] -onlyElems xs = [ x | Elem x <- xs ] - --- | Select only the elements from a parent. -elChildren :: Element -> [Element] -elChildren e = [ x | Elem x <- elContent e ] - --- | Select only the text from a list of XML content. -onlyText :: [Content] -> [CData] -onlyText xs = [ x | Text x <- xs ] - --- | Find all immediate children with the given name. -findChildren :: QName -> Element -> [Element] -findChildren q e = filterChildren ((q ==) . elName) e - --- | Filter all immediate children wrt a given predicate. -filterChildren :: (Element -> Bool) -> Element -> [Element] -filterChildren p e = filter p (onlyElems (elContent e)) - - --- | Filter all immediate children wrt a given predicate over their names. -filterChildrenName :: (QName -> Bool) -> Element -> [Element] -filterChildrenName p e = filter (p.elName) (onlyElems (elContent e)) - - --- | Find an immediate child with the given name. -findChild :: QName -> Element -> Maybe Element -findChild q e = listToMaybe (findChildren q e) - --- | Find an immediate child with the given name. -filterChild :: (Element -> Bool) -> Element -> Maybe Element -filterChild p e = listToMaybe (filterChildren p e) - --- | Find an immediate child with name matching a predicate. -filterChildName :: (QName -> Bool) -> Element -> Maybe Element -filterChildName p e = listToMaybe (filterChildrenName p e) - --- | Find the left-most occurrence of an element matching given name. -findElement :: QName -> Element -> Maybe Element -findElement q e = listToMaybe (findElements q e) - --- | Filter the left-most occurrence of an element wrt. given predicate. -filterElement :: (Element -> Bool) -> Element -> Maybe Element -filterElement p e = listToMaybe (filterElements p e) - --- | Filter the left-most occurrence of an element wrt. given predicate. -filterElementName :: (QName -> Bool) -> Element -> Maybe Element -filterElementName p e = listToMaybe (filterElementsName p e) - --- | Find all non-nested occurances of an element. --- (i.e., once we have found an element, we do not search --- for more occurances among the element's children). -findElements :: QName -> Element -> [Element] -findElements qn e = filterElementsName (qn==) e - --- | Find all non-nested occurrences of an element wrt. given predicate. --- (i.e., once we have found an element, we do not search --- for more occurances among the element's children). -filterElements :: (Element -> Bool) -> Element -> [Element] -filterElements p e - | p e = [e] - | otherwise = concatMap (filterElements p) $ onlyElems $ elContent e - --- | Find all non-nested occurences of an element wrt a predicate over element names. --- (i.e., once we have found an element, we do not search --- for more occurances among the element's children). -filterElementsName :: (QName -> Bool) -> Element -> [Element] -filterElementsName p e = filterElements (p.elName) e - --- | Lookup the value of an attribute. -findAttr :: QName -> Element -> Maybe String -findAttr x e = attrVal `fmap` find ((x ==) . attrKey) (elAttribs e) - --- | Lookup attribute name from list. -lookupAttr :: QName -> [Attr] -> Maybe String -lookupAttr x as = attrVal `fmap` find ((x ==) . attrKey) as - diff --git a/Text/XML/Light/Types.hs b/Text/XML/Light/Types.hs deleted file mode 100644 index 6d8f04b4a..000000000 --- a/Text/XML/Light/Types.hs +++ /dev/null @@ -1,91 +0,0 @@ --------------------------------------------------------------------- --- | --- Module : Text.XML.Light.Types --- Copyright : (c) Galois, Inc. 2007 --- License : BSD3 --- --- Maintainer: Iavor S. Diatchki --- Stability : provisional --- Portability: --- --- Basic XML types. --- - -module Text.XML.Light.Types where - --- | A line is an Integer -type Line = Integer - --- | XML content -data Content = Elem Element - | Text CData - | CRef String - deriving Show - --- | XML elements -data Element = Element { - elName :: QName, - elAttribs :: [Attr], - elContent :: [Content], - elLine :: Maybe Line - } deriving Show - --- | XML attributes -data Attr = Attr { - attrKey :: QName, - attrVal :: String - } deriving (Eq,Ord,Show) - --- | XML CData -data CData = CData { - cdVerbatim :: CDataKind, - cdData :: String, - cdLine :: Maybe Line - } deriving Show - -data CDataKind - = CDataText -- ^ Ordinary character data; pretty printer escapes &, < etc. - | CDataVerbatim -- ^ Unescaped character data; pretty printer embeds it in case (qURI q1, qURI q2) of - (Nothing,Nothing) -> compare (qPrefix q1) (qPrefix q2) - (u1,u2) -> compare u1 u2 - x -> x - - --- blank elements -------------------------------------------------------------- - --- | Blank names -blank_name :: QName -blank_name = QName { qName = "", qURI = Nothing, qPrefix = Nothing } - --- | Blank cdata -blank_cdata :: CData -blank_cdata = CData { cdVerbatim = CDataText, cdData = "", cdLine = Nothing } - --- | Blank elements -blank_element :: Element -blank_element = Element - { elName = blank_name - , elAttribs = [] - , elContent = [] - , elLine = Nothing - } - - diff --git a/debian/control b/debian/control index 8443a1cea..4219d0dd4 100644 --- a/debian/control +++ b/debian/control @@ -2,7 +2,7 @@ Source: pandoc Section: text Priority: optional Maintainer: Recai Oktaş -Build-Depends: debhelper (>= 4.0.0), haskell-devscripts (>=0.5.12), ghc6 (>= 6.8.2-1), libghc6-xhtml-dev, libghc6-mtl-dev, libghc6-network-dev, zip +Build-Depends: debhelper (>= 4.0.0), haskell-devscripts (>=0.5.12), ghc6 (>= 6.8.2-1), libghc6-xhtml-dev, libghc6-mtl-dev, libghc6-network-dev Build-Depends-Indep: haddock Standards-Version: 3.7.3 Homepage: http://johnmacfarlane.net/pandoc/ @@ -12,7 +12,7 @@ XS-Vcs-Browser: http://pandoc.googlecode.com/svn/trunk Package: pandoc Section: text Architecture: any -Depends: ${shlibs:Depends}, zip, texlive-latex-recommended | tetex-extra +Depends: ${shlibs:Depends}, texlive-latex-recommended | tetex-extra Suggests: tidy, wget | w3m Description: general markup converter Pandoc is a Haskell library for converting from one markup format to @@ -38,7 +38,7 @@ Description: general markup converter Package: libghc6-pandoc-dev Section: libdevel Architecture: any -Depends: ${haskell:Depends}, libghc6-xhtml-dev, libghc6-mtl-dev, libghc6-network-dev, zip, texlive-latex-recommended | tetex-extra +Depends: ${haskell:Depends}, libghc6-xhtml-dev, libghc6-mtl-dev, libghc6-network-dev, texlive-latex-recommended | tetex-extra Suggests: pandoc-doc Description: general markup converter Pandoc is a Haskell library for converting from one markup format to diff --git a/debian/copyright b/debian/copyright index 444f73796..b87bcaa64 100644 --- a/debian/copyright +++ b/debian/copyright @@ -57,68 +57,6 @@ by Eric A. Meyer Released under an explicit Public Domain License ----------------------------------------------------------------------- -System/IO/UTF8.hs and Codec/Binary/UTF8/String.hs -from the utf8-string package on HackageDB -Copyright (c) 2007, Galois Inc. -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of Galois Inc. nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY Galois Inc. ``AS IS'' AND ANY -EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL Galois Inc. BE LIABLE FOR ANY -DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND -ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - ----------------------------------------------------------------------- -Text/XML/Light/* -from the xml package on HackageDB -(c) 2007 Galois Inc. - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: - -1. Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. - -3. Neither the name of the author nor the names of his contributors -may be used to endorse or promote products derived from this software -without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE CONTRIBUTORS ``AS IS'' AND ANY EXPRESS -OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR -ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, -STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN -ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. - ------------------------------------------------------------------------ Diff.hs in tests/ from the Diff package v 0.1.2 (Data.Algorithm.Diff) diff --git a/odt-styles/content.xml b/odt-styles/content.xml deleted file mode 100644 index 801af034d..000000000 --- a/odt-styles/content.xml +++ /dev/null @@ -1,17 +0,0 @@ - - - - - - - - - - - - - - - - - diff --git a/pandoc.cabal b/pandoc.cabal index a68a21278..94a2623af 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -38,12 +38,12 @@ Extra-Source-Files: -- documentation README, INSTALL, COPYRIGHT, COPYING, changelog, -- sources for man pages - man/man1/pandoc.1.md, + man/man1/pandoc.1.md, man/man1/markdown2pdf.1.md, man/man1/html2markdown.1.md, man/man1/hsmarkdown.1.md, -- Makefile Makefile, -- wrappers - html2markdown, hsmarkdown, + markdown2pdf, html2markdown, hsmarkdown, -- data for DefaultHeaders.hs data/headers/ConTeXt.header, data/headers/Docbook.header, @@ -65,13 +65,11 @@ Extra-Source-Files: data/ui/default/print.css, -- data for ODT writer odt-styles/meta.xml, - odt-styles/content.xml, odt-styles/settings.xml, odt-styles/META-INF/manifest.xml, odt-styles/Thumbnails/thumbnail.png, odt-styles/styles.xml, odt-styles/mimetype, - odt-styles/reference.odt, -- tests tests/bodybg.gif, tests/writer.latex, @@ -151,7 +149,7 @@ Library Exposed-Modules: Text.Pandoc.Biblio cpp-options: -D_CITEPROC Build-Depends: parsec < 3, xhtml, mtl, network, filepath, process, directory, - template-haskell, bytestring + template-haskell, bytestring, zip-archive, utf8-string, old-time Hs-Source-Dirs: . Exposed-Modules: Text.Pandoc, Text.Pandoc.Blocks, @@ -180,17 +178,7 @@ Library Text.Pandoc.Writers.RTF, Text.Pandoc.Writers.S5 Other-Modules: Text.Pandoc.XML, - Text.Pandoc.TH, - -- from xml package - Text.XML.Light, - Text.XML.Light.Types, - Text.XML.Light.Output, - Text.XML.Light.Input, - Text.XML.Light.Proc, - Text.XML.Light.Cursor, - -- from utf8-string package - System.IO.UTF8, - Codec.Binary.UTF8.String + Text.Pandoc.TH Extensions: CPP, TemplateHaskell, FlexibleInstances Ghc-Options: -O2 -Wall -threaded Ghc-Prof-Options: -auto-all -- cgit v1.2.3