aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Codec/Binary/UTF8/String.hs97
-rw-r--r--README5
-rw-r--r--Setup.hs29
-rw-r--r--System/IO/UTF8.hs118
-rw-r--r--Text/Pandoc/ODT.hs144
-rw-r--r--Text/Pandoc/Shared.hs26
-rw-r--r--Text/Pandoc/TH.hs14
-rw-r--r--Text/XML/Light.hs96
-rw-r--r--Text/XML/Light/Cursor.hs327
-rw-r--r--Text/XML/Light/Input.hs307
-rw-r--r--Text/XML/Light/Output.hs150
-rw-r--r--Text/XML/Light/Proc.hs103
-rw-r--r--Text/XML/Light/Types.hs91
-rw-r--r--debian/control6
-rw-r--r--debian/copyright62
-rw-r--r--odt-styles/content.xml17
-rw-r--r--pandoc.cabal20
17 files changed, 75 insertions, 1537 deletions
diff --git a/Codec/Binary/UTF8/String.hs b/Codec/Binary/UTF8/String.hs
deleted file mode 100644
index 27c003f00..000000000
--- a/Codec/Binary/UTF8/String.hs
+++ /dev/null
@@ -1,97 +0,0 @@
---
--- |
--- Module : Codec.Binary.UTF8.String
--- Copyright : (c) Eric Mertens 2007
--- License : BSD3-style (see LICENSE)
---
--- Maintainer: emertens@galois.com
--- Stability : experimental
--- Portability : portable
---
--- Support for encoding UTF8 Strings to and from @[Word8]@
---
-
-module Codec.Binary.UTF8.String (
- encode
- , decode
- , encodeString
- , decodeString
- ) where
-
-import Data.Word (Word8)
-import Data.Bits ((.|.),(.&.),shiftL,shiftR)
-import Data.Char (chr,ord)
-
-default(Int)
-
--- | Encode a string using 'encode' and store the result in a 'String'.
-encodeString :: String -> String
-encodeString xs = map (toEnum . fromEnum) (encode xs)
-
--- | Decode a string using 'decode' using a 'String' as input.
--- | This is not safe but it is necessary if UTF-8 encoded text
--- | has been loaded into a 'String' prior to being decoded.
-decodeString :: String -> String
-decodeString xs = decode (map (toEnum . fromEnum) xs)
-
-replacement_character :: Char
-replacement_character = '\xfffd'
-
--- | Encode a Haskell String to a list of Word8 values, in UTF8 format.
-encode :: String -> [Word8]
-encode = concatMap (map fromIntegral . go . ord)
- where
- go oc
- | oc <= 0x7f = [oc]
-
- | oc <= 0x7ff = [ 0xc0 + (oc `shiftR` 6)
- , 0x80 + oc .&. 0x3f
- ]
-
- | oc <= 0xffff = [ 0xe0 + (oc `shiftR` 12)
- , 0x80 + ((oc `shiftR` 6) .&. 0x3f)
- , 0x80 + oc .&. 0x3f
- ]
- | otherwise = [ 0xf0 + (oc `shiftR` 18)
- , 0x80 + ((oc `shiftR` 12) .&. 0x3f)
- , 0x80 + ((oc `shiftR` 6) .&. 0x3f)
- , 0x80 + oc .&. 0x3f
- ]
-
---
--- | Decode a UTF8 string packed into a list of Word8 values, directly to String
---
-decode :: [Word8] -> String
-decode [ ] = ""
-decode (c:cs)
- | c < 0x80 = chr (fromEnum c) : decode cs
- | c < 0xc0 = replacement_character : decode cs
- | c < 0xe0 = multi1
- | c < 0xf0 = multi_byte 2 0xf 0x800
- | c < 0xf8 = multi_byte 3 0x7 0x10000
- | c < 0xfc = multi_byte 4 0x3 0x200000
- | c < 0xfe = multi_byte 5 0x1 0x4000000
- | otherwise = replacement_character : decode cs
- where
- multi1 = case cs of
- c1 : ds | c1 .&. 0xc0 == 0x80 ->
- let d = ((fromEnum c .&. 0x1f) `shiftL` 6) .|. fromEnum (c1 .&. 0x3f)
- in if d >= 0x000080 then toEnum d : decode ds
- else replacement_character : decode ds
- _ -> replacement_character : decode cs
-
- multi_byte :: Int -> Word8 -> Int -> [Char]
- multi_byte i mask overlong = aux i cs (fromEnum (c .&. mask))
- where
- aux 0 rs acc
- | overlong <= acc && acc <= 0x10ffff &&
- (acc < 0xd800 || 0xdfff < acc) &&
- (acc < 0xfffe || 0xffff < acc) = chr acc : decode rs
- | otherwise = replacement_character : decode rs
-
- aux n (r:rs) acc
- | r .&. 0xc0 == 0x80 = aux (n-1) rs
- $ shiftL acc 6 .|. fromEnum (r .&. 0x3f)
-
- aux _ rs _ = replacement_character : decode rs
-
diff --git a/README b/README
index 89314fbb2..f05331a20 100644
--- a/README
+++ b/README
@@ -87,11 +87,6 @@ Supported output formats include `markdown`, `latex`, `context`
Texinfo), `mediawiki` (MediaWiki markup), `man` (groff man), and `s5`
(which produces an HTML file that acts like powerpoint).
-For `odt` output, you must have `zip` in the path. If you
-don't have it installed, you can get the free [Info-ZIP].
-
-[Info-ZIP]: http://www.info-zip.org/Zip.html
-
Supported input formats include `markdown`, `html`, `latex`, and `rst`.
Note that the `rst` reader only parses a subset of reStructuredText
syntax. For example, it doesn't handle tables, option lists, or
diff --git a/Setup.hs b/Setup.hs
index c51c53bb1..9be68f8ce 100644
--- a/Setup.hs
+++ b/Setup.hs
@@ -12,7 +12,6 @@ import Data.Maybe ( fromJust, isNothing, catMaybes )
main = defaultMainWithHooks $
simpleUserHooks { runTests = runTestSuite
- , preConf = checkReferenceODT
, postBuild = makeManPages }
-- | Run test suite.
@@ -20,34 +19,6 @@ runTestSuite _ _ _ _ = do
inDirectory "tests" $ runCommand "runhaskell -i.. RunTests.hs" >>= waitForProcess
return ()
--- | If reference.odt needs rebuilding, build it.
-checkReferenceODT _ _ = inDirectory "odt-styles" $ do
- let refodt = "reference.odt"
- let deps = [ "meta.xml", "content.xml", "settings.xml", "META-INF/manifest.xml",
- "Thumbnails/thumbnail.png", "styles.xml", "mimetype" ]
- modifiedDeps <- modifiedDependencies refodt deps
- if null modifiedDeps
- then return ()
- else makeReferenceODT modifiedDeps
- return emptyHookedBuildInfo
-
--- | Create reference.odt by zipping up sources in odt-styles directory.
-makeReferenceODT :: [FilePath] -> IO ()
-makeReferenceODT sources = do
- zipPathMaybe <- findExecutable "zip"
- if isNothing zipPathMaybe
- then error $ "The 'zip' command, which is needed to build reference.odt\n" ++
- "from sources in the odt-styles directory, was not found.\n" ++
- "Try again after installing zip (http://www.info-zip.org/Zip.html).\n" ++
- "Or use the pandoc source tarball, which contains a prebuilt reference.odt."
- else do
- putStrLn "Creating reference.odt:"
- ec <- runProcess (fromJust zipPathMaybe) (["-9", "-r", "reference.odt"] ++ sources)
- Nothing Nothing Nothing Nothing (Just stderr) >>= waitForProcess
- case ec of
- ExitSuccess -> return ()
- _ -> error "Error creating ODT."
-
-- | Build man pages from markdown sources in man/man1/.
makeManPages _ _ _ _ = do
mapM makeManPage ["pandoc.1", "hsmarkdown.1", "html2markdown.1", "markdown2pdf.1"]
diff --git a/System/IO/UTF8.hs b/System/IO/UTF8.hs
deleted file mode 100644
index d0af4c38e..000000000
--- a/System/IO/UTF8.hs
+++ /dev/null
@@ -1,118 +0,0 @@
------------------------------------------------------------------------------
--- |
--- Module : System.IO.UTF8
--- Copyright : (c) Eric Mertens 2007
--- License : BSD3-style (see LICENSE)
---
--- Maintainer: emertens@galois.com
--- Stability : experimental
--- Portability : portable
---
--- String IO preserving UTF8 encoding.
---
-
-module System.IO.UTF8 (
- print
- , putStr
- , putStrLn
- , getLine
- , readLn
- , readFile
- , writeFile
- , appendFile
- , getContents
- , hGetLine
- , hGetContents
- , hPutStr
- , hPutStrLn
- ) where
-
-import Control.Monad (liftM)
-import Data.Char (ord, chr)
-import Data.Word (Word8)
-import Prelude (String, ($), (=<<), (>>=), (.), map, toEnum, fromEnum, Read,
- Show(..))
-import System.IO (Handle, IO, FilePath)
-import qualified System.IO as IO
-
-import Codec.Binary.UTF8.String (encode, decode)
-
-
--- | Encode a string in UTF8 form.
-encodeString :: String -> String
-encodeString xs = bytesToString (encode xs)
-
--- | Decode a string from UTF8
-decodeString :: String -> String
-decodeString xs = decode (stringToBytes xs)
-
--- | Convert a list of bytes to a String
-bytesToString :: [Word8] -> String
-bytesToString xs = map (chr . fromEnum) xs
-
--- | String to list of bytes.
-stringToBytes :: String -> [Word8]
-stringToBytes xs = map (toEnum . ord) xs
-
--- | The 'print' function outputs a value of any printable type to the
--- standard output device. This function differs from the
--- System.IO.print in that it preserves any UTF8 encoding of the shown value.
---
-print :: Show a => a -> IO ()
-print x = putStrLn (show x)
-
--- | Write a UTF8 string to the standard output device
-putStr :: String -> IO ()
-putStr x = IO.putStr (encodeString x)
-
--- | The same as 'putStr', but adds a newline character.
-putStrLn :: String -> IO ()
-putStrLn x = IO.putStrLn (encodeString x)
-
--- | Read a UTF8 line from the standard input device
-getLine :: IO String
-getLine = liftM decodeString IO.getLine
-
--- | The 'readLn' function combines 'getLine' and 'readIO', preserving UTF8
-readLn :: Read a => IO a
-readLn = IO.readIO =<< getLine
-
--- | The 'readFile' function reads a file and
--- returns the contents of the file as a UTF8 string.
--- The file is read lazily, on demand, as with 'getContents'.
-readFile :: FilePath -> IO String
-readFile n = liftM decodeString (IO.openBinaryFile n IO.ReadMode >>=
- IO.hGetContents)
-
--- | The computation 'writeFile' @file str@ function writes the UTF8 string @str@,
--- to the file @file@.
-writeFile :: FilePath -> String -> IO ()
-writeFile n c = IO.withBinaryFile n IO.WriteMode $ \ h ->
- IO.hPutStr h $ encodeString c
-
--- | The computation 'appendFile' @file str@ function appends the UTF8 string @str@,
--- to the file @file@.
-appendFile :: FilePath -> String -> IO ()
-appendFile n c = IO.withBinaryFile n IO.AppendMode $ \h ->
- IO.hPutStr h $ encodeString c
-
--- | Read a UTF8 line from a Handle
-hGetLine :: Handle -> IO String
-hGetLine h = liftM decodeString $ IO.hGetLine h
-
--- | Lazily read a UTF8 string from a Handle
-hGetContents :: Handle -> IO String
-hGetContents h = liftM decodeString (IO.hGetContents h)
-
--- | Write a UTF8 string to a Handle.
-hPutStr :: Handle -> String -> IO ()
-hPutStr h s = IO.hPutStr h (encodeString s)
-
--- | Write a UTF8 string to a Handle, appending a newline.
-hPutStrLn :: Handle -> String -> IO ()
-hPutStrLn h s = IO.hPutStrLn h (encodeString s)
-
--- | Lazily read stdin as a UTF8 string.
-getContents :: IO String
-getContents = liftM decodeString IO.getContents
-
diff --git a/Text/Pandoc/ODT.hs b/Text/Pandoc/ODT.hs
index 8c3b1b45f..10cf1b7e2 100644
--- a/Text/Pandoc/ODT.hs
+++ b/Text/Pandoc/ODT.hs
@@ -29,22 +29,16 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Functions for producing an ODT file from OpenDocument XML.
-}
module Text.Pandoc.ODT ( saveOpenDocumentAsODT ) where
-import Text.Pandoc.TH ( binaryContentsOf )
-import Data.Maybe ( fromJust )
-import Data.List ( partition, intersperse )
-import System.Directory
-import System.FilePath ( (</>), takeDirectory, takeFileName, splitDirectories )
-import System.Process ( runProcess, waitForProcess )
-import System.Exit
-import Text.XML.Light
-import Text.XML.Light.Cursor
-import Text.Pandoc.Shared ( withTempDir )
-import Network.URI ( isURI )
-import qualified Data.ByteString as B ( writeFile, pack )
-import Data.ByteString.Internal ( c2w )
+import Text.Pandoc.TH ( makeZip )
+import Data.List ( find )
+import System.FilePath ( (</>), takeFileName )
+import qualified Data.ByteString.Lazy as B
+import Data.ByteString.Lazy.UTF8 ( fromString )
import Prelude hiding ( writeFile, readFile )
-import System.IO ( stderr )
-import System.IO.UTF8
+import Codec.Archive.Zip
+import Control.Applicative ( (<$>) )
+import Text.ParserCombinators.Parsec
+import System.Time
-- | Produce an ODT file from OpenDocument XML.
saveOpenDocumentAsODT :: FilePath -- ^ Pathname of ODT file to be produced.
@@ -52,89 +46,43 @@ saveOpenDocumentAsODT :: FilePath -- ^ Pathname of ODT file to be produced.
-> String -- ^ OpenDocument XML contents.
-> IO ()
saveOpenDocumentAsODT destinationODTPath sourceDirRelative xml = do
- let zipCmd = "zip"
- -- check for zip in path:
- zipPathMaybe <- findExecutable zipCmd
- let zipPath = case zipPathMaybe of
- Nothing -> error $ "The '" ++ zipCmd ++
- "' command, which is needed to build an ODT file, was not found.\n" ++
- "It can be obtained from http://www.info-zip.org/Zip.html\n" ++
- "Debian (and Debian-based) linux: apt-get install zip\n" ++
- "Windows: See http://gnuwin32.sourceforge.net/packages/zip.htm"
- Just x -> x
- withTempDir "pandoc-odt" $ \tempDir -> do
- let tempODT = tempDir </> "reference.odt"
- B.writeFile tempODT $ B.pack $ map c2w $(binaryContentsOf $ "odt-styles" </> "reference.odt")
- xml' <- handlePictures tempODT sourceDirRelative xml
- writeFile (tempDir </> "content.xml") xml'
- ph <- runProcess zipPath ["-9", "-q", "-r", tempODT, "content.xml", "Pictures"]
- (Just tempDir) Nothing Nothing Nothing (Just stderr)
- ec <- waitForProcess ph -- requires compilation with -threaded
- case ec of
- ExitSuccess -> copyFile tempODT destinationODTPath
- _ -> error "Error creating ODT." >> exitWith ec
+ let refArchive = read $(makeZip "odt-styles")
+ -- handle pictures
+ let (newContents, pics) =
+ case runParser pPictures [] "OpenDocument XML contents" xml of
+ Left err -> error $ show err
+ Right x -> x
+ picEntries <- mapM (makePictureEntry sourceDirRelative) pics
+ (TOD epochTime _) <- getClockTime
+ let contentEntry = toEntry "content.xml" epochTime $ fromString newContents
+ let archive = foldr addEntryToArchive refArchive (contentEntry : picEntries)
+ B.writeFile destinationODTPath $ fromArchive archive
--- | Find <draw:image ... /> elements and copy the file (xlink:href attribute) into Pictures/ in
--- the zip file. If filename is a URL, attempt to download it. Modify xlink:href attributes
--- to point to the new locations in Pictures/. Return modified XML.
-handlePictures :: FilePath -- ^ Path of ODT file in temp directory
- -> FilePath -- ^ Directory (relative) containing source file
- -> String -- ^ OpenDocument XML string
- -> IO String -- ^ Modified XML
-handlePictures tempODT sourceDirRelative xml = do
- let parsed = case parseXMLDoc xml of
- Nothing -> error "Could not parse OpenDocument XML."
- Just x -> x
- let cursor = case (fromForest $ elContent parsed) of
- Nothing -> error "ODT appears empty"
- Just x -> x
- cursor' <- scanPictures tempODT sourceDirRelative cursor
- let modified = parsed { elContent = toForest $ root cursor' }
- return $ showTopElement modified
+makePictureEntry :: FilePath -- ^ Relative directory of source file
+ -> (FilePath, String) -- ^ Path and new path of picture
+ -> IO Entry
+makePictureEntry sourceDirRelative (path, newPath) = do
+ entry <- readEntry [] $ sourceDirRelative </> path
+ return (entry { eRelativePath = newPath })
-scanPictures :: FilePath -> FilePath -> Cursor -> IO Cursor
-scanPictures tempODT sourceDirRelative cursor = do
- cursor' <- handleTree tempODT sourceDirRelative cursor
- case right cursor' of
- Just n -> scanPictures tempODT sourceDirRelative n
- Nothing -> return cursor'
-
-handleTree :: FilePath -> FilePath -> Cursor -> IO Cursor
-handleTree tempODT sourceDirRelative cursor = do
- case firstChild cursor of
- Nothing -> modifyContentM (handleContent tempODT sourceDirRelative) cursor
- Just n -> scanPictures tempODT sourceDirRelative n >>= return . fromJust . parent
-
--- | If content is an image link, handle it appropriately.
--- Otherwise, handle children if any.
-handleContent :: FilePath -> FilePath -> Content -> IO Content
-handleContent tempODT sourceDirRelative content@(Elem el) = do
- if qName (elName el) == "image"
- then do
- let (hrefs, rest) = partition (\a -> qName (attrKey a) == "href") $ elAttribs el
- let href = case hrefs of
- [] -> error $ "No href found in " ++ show el
- [x] -> x
- _ -> error $ "Multiple hrefs found in " ++ show el
- if isURI $ attrVal href
- then return content
- else do -- treat as filename
- let oldLoc = sourceDirRelative </> attrVal href
- fileExists <- doesFileExist oldLoc
- if fileExists
- then do
- let pref = take 230 $ concat $ intersperse "_" $
- splitDirectories $ takeDirectory $ attrVal href
- let picName = pref ++ "_" ++ (takeFileName $ attrVal href)
- let tempDir = takeDirectory tempODT
- createDirectoryIfMissing False $ tempDir </> "Pictures"
- copyFile oldLoc $ tempDir </> "Pictures" </> picName
- let newAttrs = (href { attrVal = "Pictures/" ++ picName }) : rest
- return $ Elem (el { elAttribs = newAttrs })
- else do
- hPutStrLn stderr $ "Warning: Unable to find image at " ++ oldLoc ++ " - ignoring."
- return content
- else return content
-
-handleContent _ _ c = return c -- not Element
+pPictures :: GenParser Char [(FilePath, String)] ([Char], [(FilePath, String)])
+pPictures = do
+ contents <- concat <$> many (pPicture <|> many1 (noneOf "<") <|> string "<")
+ pics <- getState
+ return (contents, pics)
+pPicture :: GenParser Char [(FilePath, String)] [Char]
+pPicture = try $ do
+ string "<draw:image xlink:href=\""
+ path <- manyTill anyChar (char '"')
+ let filename = takeFileName path
+ pics <- getState
+ newPath <- case find (\(o, _) -> o == path) pics of
+ Just (_, new) -> return new
+ Nothing -> do
+ -- get a unique name
+ let dups = length $ (filter (\(o, _) -> takeFileName o == filename)) pics
+ let new = "Pictures/" ++ replicate dups '0' ++ filename
+ updateState ((path, new) :)
+ return new
+ return $ "<draw:image xlink:href=\"" ++ newPath ++ "\""
diff --git a/Text/Pandoc/Shared.hs b/Text/Pandoc/Shared.hs
index 2c53ffa7a..9bb0c35f9 100644
--- a/Text/Pandoc/Shared.hs
+++ b/Text/Pandoc/Shared.hs
@@ -101,7 +101,7 @@ module Text.Pandoc.Shared (
WriterOptions (..),
defaultWriterOptions,
-- * File handling
- withTempDir
+ inDirectory
) where
import Text.Pandoc.Definition
@@ -112,10 +112,7 @@ import Text.Pandoc.CharacterReferences ( characterReference )
import Data.Char ( toLower, toUpper, ord, isLower, isUpper )
import Data.List ( find, isPrefixOf )
import Control.Monad ( join )
-import Control.Exception ( bracket )
import Network.URI ( parseURI, URI (..), isAllowedInURI )
-import System.FilePath ( (</>), (<.>) )
-import System.IO.Error ( catch, ioError, isAlreadyExistsError )
import System.Directory
import Prelude hiding ( putStrLn, writeFile, readFile, getContents )
import System.IO.UTF8
@@ -920,16 +917,11 @@ defaultWriterOptions =
-- File handling
--
--- | Perform a function in a temporary directory and clean up.
-withTempDir :: FilePath -> (FilePath -> IO a) -> IO a
-withTempDir baseName = bracket (createTempDir 0 baseName) (removeDirectoryRecursive)
-
--- | Create a temporary directory with a unique name.
-createTempDir :: Integer -> FilePath -> IO FilePath
-createTempDir num baseName = do
- sysTempDir <- getTemporaryDirectory
- let dirName = sysTempDir </> baseName <.> show num
- catch (createDirectory dirName >> return dirName) $
- \e -> if isAlreadyExistsError e
- then createTempDir (num + 1) baseName
- else ioError e
+-- | Perform an IO action in a directory, returning to starting directory.
+inDirectory :: FilePath -> IO a -> IO a
+inDirectory path action = do
+ oldDir <- getCurrentDirectory
+ setCurrentDirectory path
+ result <- action
+ setCurrentDirectory oldDir
+ return result
diff --git a/Text/Pandoc/TH.hs b/Text/Pandoc/TH.hs
index dfd6be28b..0dc5a6719 100644
--- a/Text/Pandoc/TH.hs
+++ b/Text/Pandoc/TH.hs
@@ -30,7 +30,8 @@ Template haskell functions used by Pandoc modules.
-}
module Text.Pandoc.TH (
contentsOf,
- binaryContentsOf
+ binaryContentsOf,
+ makeZip
) where
import Language.Haskell.TH
@@ -39,6 +40,8 @@ import qualified Data.ByteString as B
import Data.ByteString.Internal ( w2c )
import Prelude hiding ( readFile )
import System.IO.UTF8
+import Codec.Archive.Zip
+import Text.Pandoc.Shared ( inDirectory )
-- | Insert contents of text file into a template.
contentsOf :: FilePath -> ExpQ
@@ -51,3 +54,12 @@ binaryContentsOf p = lift =<< (runIO $ B.readFile p)
instance Lift B.ByteString where
lift x = return (LitE (StringL $ map w2c $ B.unpack x))
+
+instance Lift Archive where
+ lift x = return (LitE (StringL $ show x ))
+
+-- | Construct zip file from files in a directory, and
+-- insert into a template.
+makeZip :: FilePath -> ExpQ
+makeZip path = lift =<< (runIO $ inDirectory path $ addFilesToArchive [OptRecursive] emptyArchive ["."])
+
diff --git a/Text/XML/Light.hs b/Text/XML/Light.hs
deleted file mode 100644
index f2d75290b..000000000
--- a/Text/XML/Light.hs
+++ /dev/null
@@ -1,96 +0,0 @@
-{-# LANGUAGE FlexibleInstances #-}
---------------------------------------------------------------------
--- |
--- Module : Text.XML.Light
--- Copyright : (c) Galois, Inc. 2007
--- License : BSD3
---
--- Maintainer: Iavor S. Diatchki <diatchki@galois.com>
--- Stability : provisional
--- Portability: portability
---
--- A lightweight XML parsing, filtering and generating library.
---
--- This module reexports functions from:
---
--- * "Text.XML.Light.Types"
---
--- * "Text.XML.Light.Proc"
---
--- * "Text.XML.Light.Input"
---
--- * "Text.XML.Light.Output"
---
-
-module Text.XML.Light (
-
- module Text.XML.Light,
- module Text.XML.Light.Types,
- module Text.XML.Light.Proc,
- module Text.XML.Light.Input,
- module Text.XML.Light.Output
-
- ) where
-
-import Text.XML.Light.Types
-import Text.XML.Light.Proc
-import Text.XML.Light.Input
-import Text.XML.Light.Output
-
--- | Add an attribute to an element.
-add_attr :: Attr -> Element -> Element
-add_attr a e = add_attrs [a] e
-
--- | Add some attributes to an element.
-add_attrs :: [Attr] -> Element -> Element
-add_attrs as e = e { elAttribs = as ++ elAttribs e }
-
--- | Create an unqualified name.
-unqual :: String -> QName
-unqual x = blank_name { qName = x }
-
--- | A smart element constructor which uses the type of its argument
--- to determine what sort of element to make.
-class Node t where
- node :: QName -> t -> Element
-
-instance Node ([Attr],[Content]) where
- node n (attrs,cont) = blank_element { elName = n
- , elAttribs = attrs
- , elContent = cont
- }
-
-instance Node [Attr] where node n as = node n (as,[]::[Content])
-instance Node Attr where node n a = node n [a]
-instance Node () where node n () = node n ([]::[Attr])
-
-instance Node [Content] where node n cs = node n ([]::[Attr],cs)
-instance Node Content where node n c = node n [c]
-instance Node ([Attr],Content) where node n (as,c) = node n (as,[c])
-instance Node (Attr,Content) where node n (a,c) = node n ([a],[c])
-
-instance Node ([Attr],[Element]) where
- node n (as,cs) = node n (as,map Elem cs)
-
-instance Node ([Attr],Element) where node n (as,c) = node n (as,[c])
-instance Node (Attr,Element) where node n (a,c) = node n ([a],c)
-instance Node ([Element]) where node n es = node n ([]::[Attr],es)
-instance Node (Element) where node n e = node n [e]
-
-instance Node ([Attr],[CData]) where
- node n (as,cs) = node n (as,map Text cs)
-
-instance Node ([Attr],CData) where node n (as,c) = node n (as,[c])
-instance Node (Attr,CData) where node n (a,c) = node n ([a],c)
-instance Node [CData] where node n es = node n ([]::[Attr],es)
-instance Node CData where node n e = node n [e]
-
-instance Node ([Attr],String) where
- node n (as,t) = node n (as,blank_cdata { cdData = t })
-
-instance Node (Attr,String) where node n (a,t) = node n ([a],t)
-instance Node [Char] where node n t = node n ([]::[Attr],t)
-
--- | Create node with unqualified name
-unode :: Node t => String -> t -> Element
-unode = node . unqual
diff --git a/Text/XML/Light/Cursor.hs b/Text/XML/Light/Cursor.hs
deleted file mode 100644
index 06d15bdb6..000000000
--- a/Text/XML/Light/Cursor.hs
+++ /dev/null
@@ -1,327 +0,0 @@
---------------------------------------------------------------------
--- |
--- Module : Text.XML.Light.Cursor
--- Copyright : (c) Galois, Inc. 2008
--- License : BSD3
---
--- Maintainer: Iavor S. Diatchki <diatchki@galois.com>
--- Stability : provisional
--- Portability:
---
--- XML cursors for working XML content withing the context of
--- an XML document. This implemntation is based on the general
--- tree zipper written by Krasimir Angelov and Iavor S. Diatchki.
---
-
-module Text.XML.Light.Cursor
- ( Tag(..), getTag, setTag, fromTag
- , Cursor(..), Path
-
- -- * Conversions
- , fromContent
- , fromElement
- , fromForest
- , toForest
- , toTree
-
- -- * Moving around
- , parent
- , root
- , getChild
- , firstChild
- , lastChild
- , left
- , right
-
- -- ** Searching
- , findChild
- , findLeft
- , findRight
-
- -- * Node classification
- , isRoot
- , isFirst
- , isLast
- , isLeaf
- , isChild
- , hasChildren
- , getNodeIndex
-
- -- * Updates
- , setContent
- , modifyContent
- , modifyContentM
-
- -- ** Inserting content
- , insertLeft
- , insertRight
- , insertGoLeft
- , insertGoRight
-
- -- ** Removing content
- , removeLeft
- , removeRight
- , removeGoLeft
- , removeGoRight
- , removeGoUp
-
- ) where
-
-import Text.XML.Light.Types
-import Data.Maybe(isNothing)
-
-data Tag = Tag { tagName :: QName
- , tagAttribs :: [Attr]
- , tagLine :: Maybe Line
- } deriving (Show)
-
-getTag :: Element -> Tag
-getTag e = Tag { tagName = elName e
- , tagAttribs = elAttribs e
- , tagLine = elLine e
- }
-
-setTag :: Tag -> Element -> Element
-setTag t e = fromTag t (elContent e)
-
-fromTag :: Tag -> [Content] -> Element
-fromTag t cs = Element { elName = tagName t
- , elAttribs = tagAttribs t
- , elLine = tagLine t
- , elContent = cs
- }
-
-type Path = [([Content],Tag,[Content])]
-
--- | The position of a piece of content in an XML document.
-data Cursor = Cur
- { current :: Content -- ^ The currently selected content.
- , lefts :: [Content] -- ^ Siblings on the left, closest first.
- , rights :: [Content] -- ^ Siblings on the right, closest first.
- , parents :: Path -- ^ The contexts of the parent elements of this location.
- } deriving (Show)
-
--- Moving around ---------------------------------------------------------------
-
--- | The parent of the given location.
-parent :: Cursor -> Maybe Cursor
-parent loc =
- case parents loc of
- (pls,v,prs) : ps -> Just
- Cur { current = Elem
- (fromTag v
- (combChildren (lefts loc) (current loc) (rights loc)))
- , lefts = pls, rights = prs, parents = ps
- }
- [] -> Nothing
-
-
--- | The top-most parent of the given location.
-root :: Cursor -> Cursor
-root loc = maybe loc root (parent loc)
-
--- | The left sibling of the given location.
-left :: Cursor -> Maybe Cursor
-left loc =
- case lefts loc of
- t : ts -> Just loc { current = t, lefts = ts
- , rights = current loc : rights loc }
- [] -> Nothing
-
--- | The right sibling of the given location.
-right :: Cursor -> Maybe Cursor
-right loc =
- case rights loc of
- t : ts -> Just loc { current = t, lefts = current loc : lefts loc
- , rights = ts }
- [] -> Nothing
-
--- | The first child of the given location.
-firstChild :: Cursor -> Maybe Cursor
-firstChild loc =
- do (t : ts, ps) <- downParents loc
- return Cur { current = t, lefts = [], rights = ts , parents = ps }
-
--- | The last child of the given location.
-lastChild :: Cursor -> Maybe Cursor
-lastChild loc =
- do (ts, ps) <- downParents loc
- case reverse ts of
- l : ls -> return Cur { current = l, lefts = ls, rights = []
- , parents = ps }
- [] -> Nothing
-
--- | Find the next left sibling that satisfies a predicate.
-findLeft :: (Cursor -> Bool) -> Cursor -> Maybe Cursor
-findLeft p loc = do loc1 <- left loc
- if p loc1 then return loc1 else findLeft p loc1
-
--- | Find the next right sibling that satisfies a predicate.
-findRight :: (Cursor -> Bool) -> Cursor -> Maybe Cursor
-findRight p loc = do loc1 <- right loc
- if p loc1 then return loc1 else findRight p loc1
-
--- | The first child that satisfies a predicate.
-findChild :: (Cursor -> Bool) -> Cursor -> Maybe Cursor
-findChild p loc =
- do loc1 <- firstChild loc
- if p loc1 then return loc1 else findRight p loc1
-
--- | The child with the given index (starting from 0).
-getChild :: Int -> Cursor -> Maybe Cursor
-getChild n loc =
- do (ts,ps) <- downParents loc
- (ls,t,rs) <- splitChildren ts n
- return Cur { current = t, lefts = ls, rights = rs, parents = ps }
-
-
--- | private: computes the parent for "down" operations.
-downParents :: Cursor -> Maybe ([Content], Path)
-downParents loc =
- case current loc of
- Elem e -> Just ( elContent e
- , (lefts loc, getTag e, rights loc) : parents loc
- )
- _ -> Nothing
-
--- Conversions -----------------------------------------------------------------
-
--- | A cursor for the guven content.
-fromContent :: Content -> Cursor
-fromContent t = Cur { current = t, lefts = [], rights = [], parents = [] }
-
--- | A cursor for the guven element.
-fromElement :: Element -> Cursor
-fromElement e = fromContent (Elem e)
-
--- | The location of the first tree in a forest.
-fromForest :: [Content] -> Maybe Cursor
-fromForest (t:ts) = Just Cur { current = t, lefts = [], rights = ts
- , parents = [] }
-fromForest [] = Nothing
-
--- | Computes the tree containing this location.
-toTree :: Cursor -> Content
-toTree loc = current (root loc)
-
--- | Computes the forest containing this location.
-toForest :: Cursor -> [Content]
-toForest loc = let r = root loc in combChildren (lefts r) (current r) (rights r)
-
-
--- Queries ---------------------------------------------------------------------
-
--- | Are we at the top of the document?
-isRoot :: Cursor -> Bool
-isRoot loc = null (parents loc)
-
--- | Are we at the left end of the the document?
-isFirst :: Cursor -> Bool
-isFirst loc = null (lefts loc)
-
--- | Are we at the right end of the document?
-isLast :: Cursor -> Bool
-isLast loc = null (rights loc)
-
--- | Are we at the bottom of the document?
-isLeaf :: Cursor -> Bool
-isLeaf loc = isNothing (downParents loc)
-
--- | Do we have a parent?
-isChild :: Cursor -> Bool
-isChild loc = not (isRoot loc)
-
--- | Get the node index inside the sequence of children
-getNodeIndex :: Cursor -> Int
-getNodeIndex loc = length (lefts loc)
-
--- | Do we have children?
-hasChildren :: Cursor -> Bool
-hasChildren loc = not (isLeaf loc)
-
-
-
--- Updates ---------------------------------------------------------------------
-
--- | Change the current content.
-setContent :: Content -> Cursor -> Cursor
-setContent t loc = loc { current = t }
-
--- | Modify the current content.
-modifyContent :: (Content -> Content) -> Cursor -> Cursor
-modifyContent f loc = setContent (f (current loc)) loc
-
--- | Modify the current content, allowing for an effect.
-modifyContentM :: Monad m => (Content -> m Content) -> Cursor -> m Cursor
-modifyContentM f loc = do x <- f (current loc)
- return (setContent x loc)
-
--- | Insert content to the left of the current position.
-insertLeft :: Content -> Cursor -> Cursor
-insertLeft t loc = loc { lefts = t : lefts loc }
-
--- | Insert content to the right of the current position.
-insertRight :: Content -> Cursor -> Cursor
-insertRight t loc = loc { rights = t : rights loc }
-
--- | Remove the conent on the left of the current position, if any.
-removeLeft :: Cursor -> Maybe (Content,Cursor)
-removeLeft loc = case lefts loc of
- l : ls -> return (l,loc { lefts = ls })
- [] -> Nothing
-
--- | Remove the conent on the right of the current position, if any.
-removeRight :: Cursor -> Maybe (Content,Cursor)
-removeRight loc = case rights loc of
- l : ls -> return (l,loc { rights = ls })
- [] -> Nothing
-
-
--- | Insert content to the left of the current position.
--- The new content becomes the current position.
-insertGoLeft :: Content -> Cursor -> Cursor
-insertGoLeft t loc = loc { current = t, rights = current loc : rights loc }
-
--- | Insert content to the right of the current position.
--- The new content becomes the current position.
-insertGoRight :: Content -> Cursor -> Cursor
-insertGoRight t loc = loc { current = t, lefts = current loc : lefts loc }
-
--- | Remove the current element.
--- The new position is the one on the left.
-removeGoLeft :: Cursor -> Maybe Cursor
-removeGoLeft loc = case lefts loc of
- l : ls -> Just loc { current = l, lefts = ls }
- [] -> Nothing
-
--- | Remove the current element.
--- The new position is the one on the right.
-removeGoRight :: Cursor -> Maybe Cursor
-removeGoRight loc = case rights loc of
- l : ls -> Just loc { current = l, rights = ls }
- [] -> Nothing
-
--- | Remove the current element.
--- The new position is the parent of the old position.
-removeGoUp :: Cursor -> Maybe Cursor
-removeGoUp loc =
- case parents loc of
- (pls,v,prs) : ps -> Just
- Cur { current = Elem (fromTag v (reverse (lefts loc) ++ rights loc))
- , lefts = pls, rights = prs, parents = ps
- }
- [] -> Nothing
-
-
--- | private: Gets the given element of a list.
--- Also returns the preceeding elements (reversed) and the folloing elements.
-splitChildren :: [a] -> Int -> Maybe ([a],a,[a])
-splitChildren _ n | n < 0 = Nothing
-splitChildren cs pos = loop [] cs pos
- where loop acc (x:xs) 0 = Just (acc,x,xs)
- loop acc (x:xs) n = loop (x:acc) xs $! n-1
- loop _ _ _ = Nothing
-
--- | private: combChildren ls x ys = reverse ls ++ [x] ++ ys
-combChildren :: [a] -> a -> [a] -> [a]
-combChildren ls t rs = foldl (flip (:)) (t:rs) ls
diff --git a/Text/XML/Light/Input.hs b/Text/XML/Light/Input.hs
deleted file mode 100644
index 3cf3a8cd1..000000000
--- a/Text/XML/Light/Input.hs
+++ /dev/null
@@ -1,307 +0,0 @@
---------------------------------------------------------------------
--- |
--- Module : Text.XML.Light.Input
--- Copyright : (c) Galois, Inc. 2007
--- License : BSD3
---
--- Maintainer: Iavor S. Diatchki <diatchki@galois.com>
--- Stability : provisional
--- Portability: portable
---
--- Lightweight XML parsing
---
-
-module Text.XML.Light.Input (parseXML,parseXMLDoc) where
-
-import Text.XML.Light.Types
-import Text.XML.Light.Proc
-import Text.XML.Light.Output(tagEnd)
-
-import Data.Char(isSpace)
-import Data.List(isPrefixOf)
-import Numeric(readHex)
-
--- | parseXMLDoc, parse a XMLl document to maybe an element
-parseXMLDoc :: String -> Maybe Element
-parseXMLDoc xs = strip (parseXML xs)
- where strip cs = case onlyElems cs of
- e : es
- | "?xml" `isPrefixOf` qName (elName e)
- -> strip (map Elem es)
- | otherwise -> Just e
- _ -> Nothing
-
--- | parseXML to a list of content chunks
-parseXML :: String -> [Content]
-parseXML xs = parse $ tokens $ preprocess xs
-
-------------------------------------------------------------------------
-
-parse :: [Token] -> [Content]
-parse [] = []
-parse ts = let (es,_,ts1) = nodes ([],Nothing) [] ts
- in es ++ parse ts1
-
--- Information about namespaces.
--- The first component is a map that associates prefixes to URIs,
--- the second is the URI for the default namespace, if one was provided.
-type NSInfo = ([(String,String)],Maybe String)
-
-nodes :: NSInfo -> [QName] -> [Token] -> ([Content], [QName], [Token])
-
-nodes ns ps (TokCRef ref : ts) =
- let (es,qs,ts1) = nodes ns ps ts
- in (CRef ref : es, qs, ts1)
-
-nodes ns ps (TokText txt : ts) =
- let (es,qs,ts1) = nodes ns ps ts
- (more,es1) = case es of
- Text cd : es1'
- | cdVerbatim cd == cdVerbatim txt -> (cdData cd,es1')
- _ -> ([],es)
-
- in (Text txt { cdData = cdData txt ++ more } : es1, qs, ts1)
-
-nodes cur_info ps (TokStart p t as empty : ts) = (node : siblings, open, toks)
- where
- new_name = annotName new_info t
- new_info = foldr addNS cur_info as
- node = Elem Element { elLine = Just p
- , elName = new_name
- , elAttribs = map (annotAttr new_info) as
- , elContent = children
- }
-
- (children,(siblings,open,toks))
- | empty = ([], nodes cur_info ps ts)
- | otherwise = let (es1,qs1,ts1) = nodes new_info (new_name:ps) ts
- in (es1,
- case qs1 of
- [] -> nodes cur_info ps ts1
- _ : qs3 -> ([],qs3,ts1))
-
-nodes ns ps (TokEnd p t : ts) = let t1 = annotName ns t
- in case break (t1 ==) ps of
- (as,_:_) -> ([],as,ts)
- -- Unknown closing tag. Insert as text.
- (_,[]) ->
- let (es,qs,ts1) = nodes ns ps ts
- in (Text CData {
- cdLine = Just p,
- cdVerbatim = CDataText,
- cdData = tagEnd t ""
- } : es,qs, ts1)
-
-nodes _ ps [] = ([],ps,[])
-
-
-annotName :: NSInfo -> QName -> QName
-annotName (namespaces,def_ns) n =
- n { qURI = maybe def_ns (`lookup` namespaces) (qPrefix n) }
-
-annotAttr :: NSInfo -> Attr -> Attr
-annotAttr ns a@(Attr { attrKey = k}) =
- case (qPrefix k, qName k) of
- (Nothing,"xmlns") -> a
- _ -> a { attrKey = annotName ns k }
-
-addNS :: Attr -> NSInfo -> NSInfo
-addNS (Attr { attrKey = key, attrVal = val }) (ns,def) =
- case (qPrefix key, qName key) of
- (Nothing,"xmlns") -> (ns, if null val then Nothing else Just val)
- (Just "xmlns", k) -> ((k, val) : ns, def)
- _ -> (ns,def)
-
-
--- Lexer -----------------------------------------------------------------------
-
-type LChar = (Line,Char)
-type LString = [LChar]
-data Token = TokStart Line QName [Attr] Bool -- is empty?
- | TokEnd Line QName
- | TokCRef String
- | TokText CData
- deriving Show
-
-tokens :: String -> [Token]
-tokens = tokens' . linenumber 1
-
-tokens' :: LString -> [Token]
-tokens' ((_,'<') : c@(_,'!') : cs) = special c cs
-
-tokens' ((_,'<') : cs) = tag (dropSpace cs) -- we are being nice here
-tokens' [] = []
-tokens' cs@((l,_):_) = let (as,bs) = breakn ('<' ==) cs
- in map cvt (decode_text as) ++ tokens' bs
-
- -- XXX: Note, some of the lines might be a bit inacuarate
- where cvt (TxtBit x) = TokText CData { cdLine = Just l
- , cdVerbatim = CDataText
- , cdData = x
- }
- cvt (CRefBit x) = case cref_to_char x of
- Just c -> TokText CData { cdLine = Just l
- , cdVerbatim = CDataText
- , cdData = [c]
- }
- Nothing -> TokCRef x
-
-
-special :: LChar -> LString -> [Token]
-special _ ((_,'-') : (_,'-') : cs) = skip cs
- where skip ((_,'-') : (_,'-') : (_,'>') : ds) = tokens' ds
- skip (_ : ds) = skip ds
- skip [] = [] -- unterminated comment
-
-special c ((_,'[') : (_,'C') : (_,'D') : (_,'A') : (_,'T') : (_,'A') : (_,'[')
- : cs) =
- let (xs,ts) = cdata cs
- in TokText CData { cdLine = Just (fst c), cdVerbatim = CDataVerbatim, cdData = xs }
- : tokens' ts
- where cdata ((_,']') : (_,']') : (_,'>') : ds) = ([],ds)
- cdata ((_,d) : ds) = let (xs,ys) = cdata ds in (d:xs,ys)
- cdata [] = ([],[])
-
-special c cs =
- let (xs,ts) = munch "" 0 cs
- in TokText CData { cdLine = Just (fst c), cdVerbatim = CDataRaw, cdData = '<':'!':(reverse xs) } : tokens' ts
- where munch acc nesting ((_,'>') : ds)
- | nesting == (0::Int) = ('>':acc,ds)
- | otherwise = munch ('>':acc) (nesting-1) ds
- munch acc nesting ((_,'<') : ds)
- = munch ('<':acc) (nesting+1) ds
- munch acc n ((_,x) : ds) = munch (x:acc) n ds
- munch acc _ [] = (acc,[]) -- unterminated DTD markup
-
---special c cs = tag (c : cs) -- invalid specials are processed as tags
-
-
-qualName :: LString -> (QName,LString)
-qualName xs = let (as,bs) = breakn endName xs
- (q,n) = case break (':'==) as of
- (q1,_:n1) -> (Just q1, n1)
- _ -> (Nothing, as)
- in (QName { qURI = Nothing, qPrefix = q, qName = n }, bs)
- where endName x = isSpace x || x == '=' || x == '>' || x == '/'
-
-
-
-
-
-tag :: LString -> [Token]
-tag ((p,'/') : cs) = let (n,ds) = qualName (dropSpace cs)
- in TokEnd p n : case ds of
- (_,'>') : es -> tokens' es
- -- tag was not properly closed...
- _ -> tokens' ds
-tag [] = []
-tag cs = let (n,ds) = qualName cs
- (as,b,ts) = attribs (dropSpace ds)
- in TokStart (fst (head cs)) n as b : ts
-
-attribs :: LString -> ([Attr], Bool, [Token])
-attribs cs = case cs of
- (_,'>') : ds -> ([], False, tokens' ds)
-
- (_,'/') : ds -> ([], True, case ds of
- (_,'>') : es -> tokens' es
- -- insert missing > ...
- _ -> tokens' ds)
-
- (_,'?') : (_,'>') : ds -> ([], True, tokens' ds)
-
- -- doc ended within a tag..
- [] -> ([],False,[])
-
- _ -> let (a,cs1) = attrib cs
- (as,b,ts) = attribs cs1
- in (a:as,b,ts)
-
-attrib :: LString -> (Attr,LString)
-attrib cs = let (ks,cs1) = qualName cs
- (vs,cs2) = attr_val (dropSpace cs1)
- in ((Attr ks (decode_attr vs)),dropSpace cs2)
-
-attr_val :: LString -> (String,LString)
-attr_val ((_,'=') : cs) = string (dropSpace cs)
-attr_val cs = ("",cs)
-
-
-dropSpace :: LString -> LString
-dropSpace = dropWhile (isSpace . snd)
-
--- | Match the value for an attribute. For malformed XML we do
--- our best to guess the programmer's intention.
-string :: LString -> (String,LString)
-string ((_,'"') : cs) = break' ('"' ==) cs
-
--- Allow attributes to be enclosed between ' '.
-string ((_,'\'') : cs) = break' ('\'' ==) cs
-
--- Allow attributes that are not enclosed by anything.
-string cs = breakn eos cs
- where eos x = isSpace x || x == '>' || x == '/'
-
-
-break' :: (a -> Bool) -> [(b,a)] -> ([a],[(b,a)])
-break' p xs = let (as,bs) = breakn p xs
- in (as, case bs of
- [] -> []
- _ : cs -> cs)
-
-breakn :: (a -> Bool) -> [(b,a)] -> ([a],[(b,a)])
-breakn p l = (map snd as,bs) where (as,bs) = break (p . snd) l
-
-
-
-decode_attr :: String -> String
-decode_attr cs = concatMap cvt (decode_text cs)
- where cvt (TxtBit x) = x
- cvt (CRefBit x) = case cref_to_char x of
- Just c -> [c]
- Nothing -> '&' : x ++ ";"
-
-data Txt = TxtBit String | CRefBit String deriving Show
-
-decode_text :: [Char] -> [Txt]
-decode_text xs@('&' : cs) = case break (';' ==) cs of
- (as,_:bs) -> CRefBit as : decode_text bs
- _ -> [TxtBit xs]
-decode_text [] = []
-decode_text cs = let (as,bs) = break ('&' ==) cs
- in TxtBit as : decode_text bs
-
-cref_to_char :: [Char] -> Maybe Char
-cref_to_char cs = case cs of
- '#' : ds -> num_esc ds
- "lt" -> Just '<'
- "gt" -> Just '>'
- "amp" -> Just '&'
- "apos" -> Just '\''
- "quot" -> Just '"'
- _ -> Nothing
-
-num_esc :: String -> Maybe Char
-num_esc cs = case cs of
- 'x' : ds -> check (readHex ds)
- _ -> check (reads cs)
-
- where check [(n,"")] = cvt_char n
- check _ = Nothing
-
-cvt_char :: Int -> Maybe Char
-cvt_char x
- | fromEnum (minBound :: Char) <= x && x <= fromEnum (maxBound::Char)
- = Just (toEnum x)
- | otherwise = Nothing
-
-preprocess :: String -> String
-preprocess ('\r' : '\n' : cs) = '\n' : preprocess cs
-preprocess ('\r' : cs) = '\n' : preprocess cs
-preprocess (c : cs) = c : preprocess cs
-preprocess [] = []
-
-linenumber :: Line -> String -> LString
-linenumber _ [] = []
-linenumber n ('\n':s) = n' `seq` ((n,'\n'):linenumber n' s) where n' = n + 1
-linenumber n (c:s) = (n,c) : linenumber n s
diff --git a/Text/XML/Light/Output.hs b/Text/XML/Light/Output.hs
deleted file mode 100644
index 65d1bb1af..000000000
--- a/Text/XML/Light/Output.hs
+++ /dev/null
@@ -1,150 +0,0 @@
---------------------------------------------------------------------
--- |
--- Module : Text.XML.Light.Output
--- Copyright : (c) Galois, Inc. 2007
--- License : BSD3
---
--- Maintainer: Iavor S. Diatchki <diatchki@galois.com>
--- Stability : provisional
--- Portability:
---
--- Output handling for the lightweight XML lib.
---
-
-module Text.XML.Light.Output
- ( showTopElement, showContent, showElement, showCData, showQName, showAttr
- , ppTopElement, ppContent, ppElement
- , tagEnd, xml_header
- ) where
-
-import Text.XML.Light.Types
-import Data.Char
-import Data.List ( isPrefixOf )
-
--- | The XML 1.0 header
-xml_header :: String
-xml_header = "<?xml version='1.0' ?>"
-
--- | Pretty printing renders XML documents faithfully,
--- with the exception that whitespace may be added\/removed
--- in non-verbatim character data.
-ppTopElement :: Element -> String
-ppTopElement e = unlines [xml_header,ppElement e]
-
--- | Pretty printing elements
-ppElement :: Element -> String
-ppElement e = ppElementS "" e ""
-
--- | Pretty printing content
-ppContent :: Content -> String
-ppContent x = ppContentS "" x ""
-
--- | Pretty printing content using ShowS
-ppContentS :: String -> Content -> ShowS
-ppContentS i x xs = case x of
- Elem e -> ppElementS i e xs
- Text c -> ppCData i c xs
- CRef r -> showCRefS r xs
-
-ppElementS :: String -> Element -> ShowS
-ppElementS i e xs = i ++ (tagStart (elName e) (elAttribs e) $
- case elContent e of
- []
- | not ("?xml" `isPrefixOf` (qName $ elName e)) -> " />" ++ xs
- | otherwise -> " ?>" ++ xs
- [Text t] -> ">" ++ ppCData "" t (tagEnd (elName e) xs)
- cs -> ">\n" ++ foldr ppSub (i ++ tagEnd (elName e) xs) cs
- where ppSub e1 = ppContentS (" " ++ i) e1 . showChar '\n'
- )
-
-ppCData :: String -> CData -> ShowS
-ppCData i c xs = i ++ if (cdVerbatim c /= CDataText )
- then showCDataS c xs
- else foldr cons xs (showCData c)
-
- where cons :: Char -> String -> String
- cons '\n' ys = "\n" ++ i ++ ys
- cons y ys = y : ys
-
-
-
---------------------------------------------------------------------------------
--- | Adds the <?xml?> header.
-showTopElement :: Element -> String
-showTopElement c = xml_header ++ showElement c
-
-showContent :: Content -> String
-showContent c = showContentS c ""
-
-showElement :: Element -> String
-showElement c = showElementS c ""
-
-showCData :: CData -> String
-showCData c = showCDataS c ""
-
--- Note: crefs should not contain '&', ';', etc.
-showCRefS :: String -> ShowS
-showCRefS r xs = '&' : r ++ ';' : xs
-
--- | Good for transmition (no extra white space etc.) but less readable.
-showContentS :: Content -> ShowS
-showContentS (Elem e) = showElementS e
-showContentS (Text cs) = showCDataS cs
-showContentS (CRef cs) = showCRefS cs
-
--- | Good for transmition (no extra white space etc.) but less readable.
-showElementS :: Element -> ShowS
-showElementS e xs =
- tagStart (elName e) (elAttribs e)
- $ case elContent e of
- [] -> " />" ++ xs
- ch -> '>' : foldr showContentS (tagEnd (elName e) xs) ch
-
--- | Convert a text element to characters.
-showCDataS :: CData -> ShowS
-showCDataS cd =
- case cdVerbatim cd of
- CDataText -> escStr (cdData cd)
- CDataVerbatim -> showString "<![CDATA[" . escCData (cdData cd) . showString "]]>"
- CDataRaw -> \ xs -> cdData cd ++ xs
-
---------------------------------------------------------------------------------
-escCData :: String -> ShowS
-escCData (']' : ']' : '>' : cs) = showString "]]]]><![CDATA[>" . escCData cs
-escCData (c : cs) = showChar c . escCData cs
-escCData [] = id
-
-escChar :: Char -> ShowS
-escChar c = case c of
- '<' -> showString "&lt;"
- '>' -> showString "&gt;"
- '&' -> showString "&amp;"
- '"' -> showString "&quot;"
- '\'' -> showString "&apos;"
- -- XXX: Is this really wortherd?
- -- We could deal with these issues when we convert characters to bytes.
- _ | (oc <= 0x7f && isPrint c) || c == '\n' || c == '\r' -> showChar c
- | otherwise -> showString "&#" . shows oc . showChar ';'
- where oc = ord c
-
-escStr :: String -> ShowS
-escStr cs rs = foldr escChar rs cs
-
-tagEnd :: QName -> ShowS
-tagEnd qn rs = '<':'/':showQName qn ++ '>':rs
-
-tagStart :: QName -> [Attr] -> ShowS
-tagStart qn as rs = '<':showQName qn ++ as_str ++ rs
- where as_str = if null as then "" else ' ' : unwords (map showAttr as)
-
-showAttr :: Attr -> String
-showAttr (Attr qn v) = showQName qn ++ '=' : '"' : escStr v "\""
-
-showQName :: QName -> String
-showQName q = pre ++ qName q
- where pre = case qPrefix q of
- Nothing -> ""
- Just p -> p ++ ":"
-
-
-
diff --git a/Text/XML/Light/Proc.hs b/Text/XML/Light/Proc.hs
deleted file mode 100644
index 34d844a3f..000000000
--- a/Text/XML/Light/Proc.hs
+++ /dev/null
@@ -1,103 +0,0 @@
---------------------------------------------------------------------
--- |
--- Module : Text.XML.Light.Proc
--- Copyright : (c) Galois, Inc. 2007
--- License : BSD3
---
--- Maintainer: Iavor S. Diatchki <diatchki@galois.com>
--- Stability : provisional
--- Portability:
---
---------------------------------------------------------------------
-
-
-module Text.XML.Light.Proc where
-
-import Text.XML.Light.Types
-
-import Data.Maybe(listToMaybe)
-import Data.List(find)
-
--- | Get the text value of an XML element. This function
--- ignores non-text elements, and concatenates all text elements.
-strContent :: Element -> String
-strContent e = concatMap cdData $ onlyText $ elContent e
-
--- | Select only the elements from a list of XML content.
-onlyElems :: [Content] -> [Element]
-onlyElems xs = [ x | Elem x <- xs ]
-
--- | Select only the elements from a parent.
-elChildren :: Element -> [Element]
-elChildren e = [ x | Elem x <- elContent e ]
-
--- | Select only the text from a list of XML content.
-onlyText :: [Content] -> [CData]
-onlyText xs = [ x | Text x <- xs ]
-
--- | Find all immediate children with the given name.
-findChildren :: QName -> Element -> [Element]
-findChildren q e = filterChildren ((q ==) . elName) e
-
--- | Filter all immediate children wrt a given predicate.
-filterChildren :: (Element -> Bool) -> Element -> [Element]
-filterChildren p e = filter p (onlyElems (elContent e))
-
-
--- | Filter all immediate children wrt a given predicate over their names.
-filterChildrenName :: (QName -> Bool) -> Element -> [Element]
-filterChildrenName p e = filter (p.elName) (onlyElems (elContent e))
-
-
--- | Find an immediate child with the given name.
-findChild :: QName -> Element -> Maybe Element
-findChild q e = listToMaybe (findChildren q e)
-
--- | Find an immediate child with the given name.
-filterChild :: (Element -> Bool) -> Element -> Maybe Element
-filterChild p e = listToMaybe (filterChildren p e)
-
--- | Find an immediate child with name matching a predicate.
-filterChildName :: (QName -> Bool) -> Element -> Maybe Element
-filterChildName p e = listToMaybe (filterChildrenName p e)
-
--- | Find the left-most occurrence of an element matching given name.
-findElement :: QName -> Element -> Maybe Element
-findElement q e = listToMaybe (findElements q e)
-
--- | Filter the left-most occurrence of an element wrt. given predicate.
-filterElement :: (Element -> Bool) -> Element -> Maybe Element
-filterElement p e = listToMaybe (filterElements p e)
-
--- | Filter the left-most occurrence of an element wrt. given predicate.
-filterElementName :: (QName -> Bool) -> Element -> Maybe Element
-filterElementName p e = listToMaybe (filterElementsName p e)
-
--- | Find all non-nested occurances of an element.
--- (i.e., once we have found an element, we do not search
--- for more occurances among the element's children).
-findElements :: QName -> Element -> [Element]
-findElements qn e = filterElementsName (qn==) e
-
--- | Find all non-nested occurrences of an element wrt. given predicate.
--- (i.e., once we have found an element, we do not search
--- for more occurances among the element's children).
-filterElements :: (Element -> Bool) -> Element -> [Element]
-filterElements p e
- | p e = [e]
- | otherwise = concatMap (filterElements p) $ onlyElems $ elContent e
-
--- | Find all non-nested occurences of an element wrt a predicate over element names.
--- (i.e., once we have found an element, we do not search
--- for more occurances among the element's children).
-filterElementsName :: (QName -> Bool) -> Element -> [Element]
-filterElementsName p e = filterElements (p.elName) e
-
--- | Lookup the value of an attribute.
-findAttr :: QName -> Element -> Maybe String
-findAttr x e = attrVal `fmap` find ((x ==) . attrKey) (elAttribs e)
-
--- | Lookup attribute name from list.
-lookupAttr :: QName -> [Attr] -> Maybe String
-lookupAttr x as = attrVal `fmap` find ((x ==) . attrKey) as
-
diff --git a/Text/XML/Light/Types.hs b/Text/XML/Light/Types.hs
deleted file mode 100644
index 6d8f04b4a..000000000
--- a/Text/XML/Light/Types.hs
+++ /dev/null
@@ -1,91 +0,0 @@
---------------------------------------------------------------------
--- |
--- Module : Text.XML.Light.Types
--- Copyright : (c) Galois, Inc. 2007
--- License : BSD3
---
--- Maintainer: Iavor S. Diatchki <diatchki@galois.com>
--- Stability : provisional
--- Portability:
---
--- Basic XML types.
---
-
-module Text.XML.Light.Types where
-
--- | A line is an Integer
-type Line = Integer
-
--- | XML content
-data Content = Elem Element
- | Text CData
- | CRef String
- deriving Show
-
--- | XML elements
-data Element = Element {
- elName :: QName,
- elAttribs :: [Attr],
- elContent :: [Content],
- elLine :: Maybe Line
- } deriving Show
-
--- | XML attributes
-data Attr = Attr {
- attrKey :: QName,
- attrVal :: String
- } deriving (Eq,Ord,Show)
-
--- | XML CData
-data CData = CData {
- cdVerbatim :: CDataKind,
- cdData :: String,
- cdLine :: Maybe Line
- } deriving Show
-
-data CDataKind
- = CDataText -- ^ Ordinary character data; pretty printer escapes &, < etc.
- | CDataVerbatim -- ^ Unescaped character data; pretty printer embeds it in <![CDATA[..
- | CDataRaw -- ^ As-is character data; pretty printer passes it along without any escaping or CDATA wrap-up.
- deriving ( Eq, Show )
-
--- | XML qualified names
-data QName = QName {
- qName :: String,
- qURI :: Maybe String,
- qPrefix :: Maybe String
- } deriving Show
-
-
-instance Eq QName where
- q1 == q2 = compare q1 q2 == EQ
-
-instance Ord QName where
- compare q1 q2 =
- case compare (qName q1) (qName q2) of
- EQ -> case (qURI q1, qURI q2) of
- (Nothing,Nothing) -> compare (qPrefix q1) (qPrefix q2)
- (u1,u2) -> compare u1 u2
- x -> x
-
-
--- blank elements --------------------------------------------------------------
-
--- | Blank names
-blank_name :: QName
-blank_name = QName { qName = "", qURI = Nothing, qPrefix = Nothing }
-
--- | Blank cdata
-blank_cdata :: CData
-blank_cdata = CData { cdVerbatim = CDataText, cdData = "", cdLine = Nothing }
-
--- | Blank elements
-blank_element :: Element
-blank_element = Element
- { elName = blank_name
- , elAttribs = []
- , elContent = []
- , elLine = Nothing
- }
-
-
diff --git a/debian/control b/debian/control
index 8443a1cea..4219d0dd4 100644
--- a/debian/control
+++ b/debian/control
@@ -2,7 +2,7 @@ Source: pandoc
Section: text
Priority: optional
Maintainer: Recai Oktaş <roktas@debian.org>
-Build-Depends: debhelper (>= 4.0.0), haskell-devscripts (>=0.5.12), ghc6 (>= 6.8.2-1), libghc6-xhtml-dev, libghc6-mtl-dev, libghc6-network-dev, zip
+Build-Depends: debhelper (>= 4.0.0), haskell-devscripts (>=0.5.12), ghc6 (>= 6.8.2-1), libghc6-xhtml-dev, libghc6-mtl-dev, libghc6-network-dev
Build-Depends-Indep: haddock
Standards-Version: 3.7.3
Homepage: http://johnmacfarlane.net/pandoc/
@@ -12,7 +12,7 @@ XS-Vcs-Browser: http://pandoc.googlecode.com/svn/trunk
Package: pandoc
Section: text
Architecture: any
-Depends: ${shlibs:Depends}, zip, texlive-latex-recommended | tetex-extra
+Depends: ${shlibs:Depends}, texlive-latex-recommended | tetex-extra
Suggests: tidy, wget | w3m
Description: general markup converter
Pandoc is a Haskell library for converting from one markup format to
@@ -38,7 +38,7 @@ Description: general markup converter
Package: libghc6-pandoc-dev
Section: libdevel
Architecture: any
-Depends: ${haskell:Depends}, libghc6-xhtml-dev, libghc6-mtl-dev, libghc6-network-dev, zip, texlive-latex-recommended | tetex-extra
+Depends: ${haskell:Depends}, libghc6-xhtml-dev, libghc6-mtl-dev, libghc6-network-dev, texlive-latex-recommended | tetex-extra
Suggests: pandoc-doc
Description: general markup converter
Pandoc is a Haskell library for converting from one markup format to
diff --git a/debian/copyright b/debian/copyright
index 444f73796..b87bcaa64 100644
--- a/debian/copyright
+++ b/debian/copyright
@@ -57,68 +57,6 @@ by Eric A. Meyer
Released under an explicit Public Domain License
-----------------------------------------------------------------------
-System/IO/UTF8.hs and Codec/Binary/UTF8/String.hs
-from the utf8-string package on HackageDB
-Copyright (c) 2007, Galois Inc.
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
- * Neither the name of Galois Inc. nor the
- names of its contributors may be used to endorse or promote products
- derived from this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY Galois Inc. ``AS IS'' AND ANY
-EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL Galois Inc. BE LIABLE FOR ANY
-DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-----------------------------------------------------------------------
-Text/XML/Light/*
-from the xml package on HackageDB
-(c) 2007 Galois Inc.
-
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions
-are met:
-
-1. Redistributions of source code must retain the above copyright
-notice, this list of conditions and the following disclaimer.
-
-2. Redistributions in binary form must reproduce the above copyright
-notice, this list of conditions and the following disclaimer in the
-documentation and/or other materials provided with the distribution.
-
-3. Neither the name of the author nor the names of his contributors
-may be used to endorse or promote products derived from this software
-without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE CONTRIBUTORS ``AS IS'' AND ANY EXPRESS
-OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
-STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
-ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-
------------------------------------------------------------------------
Diff.hs in tests/
from the Diff package v 0.1.2 (Data.Algorithm.Diff)
diff --git a/odt-styles/content.xml b/odt-styles/content.xml
deleted file mode 100644
index 801af034d..000000000
--- a/odt-styles/content.xml
+++ /dev/null
@@ -1,17 +0,0 @@
-<?xml version="1.0" encoding="utf-8" ?>
-
-<office:document-content xmlns:office="urn:oasis:names:tc:opendocument:xmlns:office:1.0" xmlns:style="urn:oasis:names:tc:opendocument:xmlns:style:1.0" xmlns:text="urn:oasis:names:tc:opendocument:xmlns:text:1.0" xmlns:table="urn:oasis:names:tc:opendocument:xmlns:table:1.0" xmlns:draw="urn:oasis:names:tc:opendocument:xmlns:drawing:1.0" xmlns:fo="urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:meta="urn:oasis:names:tc:opendocument:xmlns:meta:1.0" xmlns:number="urn:oasis:names:tc:opendocument:xmlns:datastyle:1.0" xmlns:svg="urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0" xmlns:chart="urn:oasis:names:tc:opendocument:xmlns:chart:1.0" xmlns:dr3d="urn:oasis:names:tc:opendocument:xmlns:dr3d:1.0" xmlns:math="http://www.w3.org/1998/Math/MathML" xmlns:form="urn:oasis:names:tc:opendocument:xmlns:form:1.0" xmlns:script="urn:oasis:names:tc:opendocument:xmlns:script:1.0" xmlns:ooo="http://openoffice.org/2004/office" xmlns:ooow="http://openoffice.org/2004/writer" xmlns:oooc="http://openoffice.org/2004/calc" xmlns:dom="http://www.w3.org/2001/xml-events" xmlns:xforms="http://www.w3.org/2002/xforms" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" office:version="1.0">
- <office:scripts />
- <office:font-face-decls>
- <style:font-face style:name="&amp;apos;Lucida Sans Unicode&amp;apos;" svg:font-family="Lucida Sans Unicode" />
- <style:font-face style:name="&amp;apos;Tahoma&amp;apos;" svg:font-family="Tahoma" />
- <style:font-face style:name="&amp;apos;Times New Roman&amp;apos;" svg:font-family="Times New Roman" />
- </office:font-face-decls>
- <office:automatic-styles>
- </office:automatic-styles>
- <office:body>
- <office:text>
- </office:text>
- </office:body>
-
-</office:document-content>
diff --git a/pandoc.cabal b/pandoc.cabal
index a68a21278..94a2623af 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -38,12 +38,12 @@ Extra-Source-Files:
-- documentation
README, INSTALL, COPYRIGHT, COPYING, changelog,
-- sources for man pages
- man/man1/pandoc.1.md,
+ man/man1/pandoc.1.md, man/man1/markdown2pdf.1.md,
man/man1/html2markdown.1.md, man/man1/hsmarkdown.1.md,
-- Makefile
Makefile,
-- wrappers
- html2markdown, hsmarkdown,
+ markdown2pdf, html2markdown, hsmarkdown,
-- data for DefaultHeaders.hs
data/headers/ConTeXt.header,
data/headers/Docbook.header,
@@ -65,13 +65,11 @@ Extra-Source-Files:
data/ui/default/print.css,
-- data for ODT writer
odt-styles/meta.xml,
- odt-styles/content.xml,
odt-styles/settings.xml,
odt-styles/META-INF/manifest.xml,
odt-styles/Thumbnails/thumbnail.png,
odt-styles/styles.xml,
odt-styles/mimetype,
- odt-styles/reference.odt,
-- tests
tests/bodybg.gif,
tests/writer.latex,
@@ -151,7 +149,7 @@ Library
Exposed-Modules: Text.Pandoc.Biblio
cpp-options: -D_CITEPROC
Build-Depends: parsec < 3, xhtml, mtl, network, filepath, process, directory,
- template-haskell, bytestring
+ template-haskell, bytestring, zip-archive, utf8-string, old-time
Hs-Source-Dirs: .
Exposed-Modules: Text.Pandoc,
Text.Pandoc.Blocks,
@@ -180,17 +178,7 @@ Library
Text.Pandoc.Writers.RTF,
Text.Pandoc.Writers.S5
Other-Modules: Text.Pandoc.XML,
- Text.Pandoc.TH,
- -- from xml package
- Text.XML.Light,
- Text.XML.Light.Types,
- Text.XML.Light.Output,
- Text.XML.Light.Input,
- Text.XML.Light.Proc,
- Text.XML.Light.Cursor,
- -- from utf8-string package
- System.IO.UTF8,
- Codec.Binary.UTF8.String
+ Text.Pandoc.TH
Extensions: CPP, TemplateHaskell, FlexibleInstances
Ghc-Options: -O2 -Wall -threaded
Ghc-Prof-Options: -auto-all