aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/Text/Pandoc/SelfContained.hs2
-rw-r--r--src/Text/Pandoc/Templates.hs5
-rw-r--r--src/Text/Pandoc/UTF8.hs30
-rw-r--r--src/Text/Pandoc/Writers/Docx.hs21
-rw-r--r--src/Text/Pandoc/Writers/EPUB.hs20
-rw-r--r--src/Text/Pandoc/Writers/ODT.hs8
-rw-r--r--src/pandoc.hs5
7 files changed, 57 insertions, 34 deletions
diff --git a/src/Text/Pandoc/SelfContained.hs b/src/Text/Pandoc/SelfContained.hs
index 7a21f6f3a..e468d504d 100644
--- a/src/Text/Pandoc/SelfContained.hs
+++ b/src/Text/Pandoc/SelfContained.hs
@@ -37,7 +37,6 @@ import Network.HTTP
import Data.ByteString.Base64
import qualified Data.ByteString.Char8 as B
import Data.ByteString (ByteString)
-import Data.ByteString.UTF8 (toString, fromString)
import System.FilePath (takeExtension, dropExtension, takeDirectory, (</>))
import Data.Char (toLower, isAscii, isAlphaNum)
import Codec.Compression.GZip as Gzip
@@ -45,6 +44,7 @@ import qualified Data.ByteString.Lazy as L
import Text.Pandoc.Shared (findDataFile, renderTags')
import Text.Pandoc.MIME (getMimeType)
import System.Directory (doesFileExist)
+import Text.Pandoc.UTF8 (toString, fromString)
getItem :: Maybe FilePath -> String -> IO (ByteString, Maybe String)
getItem userdata f =
diff --git a/src/Text/Pandoc/Templates.hs b/src/Text/Pandoc/Templates.hs
index 4f5ad54bd..e81fd9d14 100644
--- a/src/Text/Pandoc/Templates.hs
+++ b/src/Text/Pandoc/Templates.hs
@@ -78,7 +78,8 @@ import Text.Blaze.Internal (preEscapedString)
#else
import Text.Blaze (preEscapedString, Html)
#endif
-import Data.ByteString.Lazy.UTF8 (ByteString, fromString)
+import Text.Pandoc.UTF8 (fromStringLazy)
+import Data.ByteString.Lazy (ByteString)
import Text.Pandoc.Shared (readDataFile)
import qualified Control.Exception.Extensible as E (try, IOException)
@@ -118,7 +119,7 @@ instance TemplateTarget String where
toTarget = id
instance TemplateTarget ByteString where
- toTarget = fromString
+ toTarget = fromStringLazy
instance TemplateTarget Html where
toTarget = preEscapedString
diff --git a/src/Text/Pandoc/UTF8.hs b/src/Text/Pandoc/UTF8.hs
index 5be7d3c8e..1f2b4b695 100644
--- a/src/Text/Pandoc/UTF8.hs
+++ b/src/Text/Pandoc/UTF8.hs
@@ -35,6 +35,10 @@ module Text.Pandoc.UTF8 ( readFile
, hPutStr
, hPutStrLn
, hGetContents
+ , toString
+ , fromString
+ , toStringLazy
+ , fromStringLazy
, encodePath
, decodeArg
)
@@ -50,6 +54,13 @@ import System.IO hiding (readFile, writeFile, getContents,
putStr, putStrLn, hPutStr, hPutStrLn, hGetContents)
import Prelude hiding (readFile, writeFile, getContents, putStr, putStrLn )
import qualified System.IO as IO
+import qualified Data.ByteString.Char8 as B
+import qualified Data.ByteString.Lazy as BL
+import qualified Data.Text.Encoding as T
+import qualified Data.Text as T
+import qualified Data.Text.Lazy as TL
+import qualified Data.Text.Lazy.Encoding as TL
+import Data.Text.Encoding.Error
readFile :: FilePath -> IO String
readFile f = do
@@ -75,15 +86,28 @@ hPutStrLn :: Handle -> String -> IO ()
hPutStrLn h s = hSetEncoding h utf8 >> IO.hPutStrLn h s
hGetContents :: Handle -> IO String
-hGetContents h = hSetEncoding h utf8_bom >> hSetNewlineMode h universalNewlineMode
+hGetContents h = hSetEncoding h utf8_bom
+ >> hSetNewlineMode h universalNewlineMode
>> IO.hGetContents h
+toString :: B.ByteString -> String
+toString = T.unpack . T.decodeUtf8With lenientDecode
+
+fromString :: String -> B.ByteString
+fromString = T.encodeUtf8 . T.pack
+
+toStringLazy :: BL.ByteString -> String
+toStringLazy = TL.unpack . TL.decodeUtf8With lenientDecode
+
+fromStringLazy :: String -> BL.ByteString
+fromStringLazy = TL.encodeUtf8 . TL.pack
+
encodePath :: FilePath -> FilePath
decodeArg :: String -> String
#if MIN_VERSION_base(4,4,0)
encodePath = id
decodeArg = id
#else
-encodePath = encodeString
-decodeArg = decodeString
+encodePath = B.unpack . fromString
+decodeArg = toString . B.pack
#endif
diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index 84bf95dfb..f8e3370e4 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -32,8 +32,7 @@ import Data.List ( intercalate )
import System.FilePath ( (</>) )
import qualified Data.ByteString.Lazy as B
import qualified Data.Map as M
-import Data.ByteString.Lazy.UTF8 ( fromString, toString )
-import Text.Pandoc.UTF8 as UTF8
+import qualified Text.Pandoc.UTF8 as UTF8
import System.IO ( stderr )
import Codec.Archive.Zip
import Data.Time.Clock.POSIX
@@ -126,7 +125,7 @@ writeDocx opts doc@(Pandoc (Meta tit auths date) _) = do
let newrels = map toImgRel imgs
let relpath = "word/_rels/document.xml.rels"
let reldoc = case findEntryByPath relpath refArchive >>=
- parseXMLDoc . toString . fromEntry of
+ parseXMLDoc . UTF8.toStringLazy . fromEntry of
Just d -> d
Nothing -> error $ relpath ++ "missing in reference docx"
let reldoc' = reldoc{ elContent = elContent reldoc ++ map Elem newrels }
@@ -138,21 +137,21 @@ writeDocx opts doc@(Pandoc (Meta tit auths date) _) = do
let toLinkRel (src,ident) = mknode "Relationship" [("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink"),("Id",ident),("Target",src),("TargetMode","External") ] ()
let newrels' = map toLinkRel $ M.toList $ stExternalLinks st
let reldoc'' = reldoc' { elContent = elContent reldoc' ++ map Elem newrels' }
- let relEntry = toEntry relpath epochtime $ fromString $ showTopElement' reldoc''
- let contentEntry = toEntry "word/document.xml" epochtime $ fromString $ showTopElement' newContents
+ let relEntry = toEntry relpath epochtime $ UTF8.fromStringLazy $ showTopElement' reldoc''
+ let contentEntry = toEntry "word/document.xml" epochtime $ UTF8.fromStringLazy $ showTopElement' newContents
-- styles
let newstyles = styleToOpenXml $ writerHighlightStyle opts
let stylepath = "word/styles.xml"
let styledoc = case findEntryByPath stylepath refArchive >>=
- parseXMLDoc . toString . fromEntry of
+ parseXMLDoc . UTF8.toStringLazy . fromEntry of
Just d -> d
Nothing -> error $ "Unable to parse " ++ stylepath ++
" from reference.docx"
let styledoc' = styledoc{ elContent = elContent styledoc ++ map Elem newstyles }
- let styleEntry = toEntry stylepath epochtime $ fromString $ showTopElement' styledoc'
+ let styleEntry = toEntry stylepath epochtime $ UTF8.fromStringLazy $ showTopElement' styledoc'
-- construct word/numbering.xml
let numpath = "word/numbering.xml"
- let numEntry = toEntry numpath epochtime $ fromString $ showTopElement'
+ let numEntry = toEntry numpath epochtime $ UTF8.fromStringLazy $ showTopElement'
$ mkNumbering (stNumStyles st) (stLists st)
let docPropsPath = "docProps/core.xml"
let docProps = mknode "cp:coreProperties"
@@ -166,16 +165,16 @@ writeDocx opts doc@(Pandoc (Meta tit auths date) _) = do
(maybe "" id $ normalizeDate $ stringify date)
: mknode "dcterms:modified" [("xsi:type","dcterms:W3CDTF")] () -- put current time here
: map (mknode "dc:creator" [] . stringify) auths
- let docPropsEntry = toEntry docPropsPath epochtime $ fromString $ showTopElement' docProps
+ let docPropsEntry = toEntry docPropsPath epochtime $ UTF8.fromStringLazy $ showTopElement' docProps
let relsPath = "_rels/.rels"
rels <- case findEntryByPath relsPath refArchive of
- Just e -> return $ toString $ fromEntry e
+ Just e -> return $ UTF8.toStringLazy $ fromEntry e
Nothing -> err 57 "could not find .rels/_rels in reference docx"
-- fix .rels/_rels, which can get screwed up when reference.docx is edited by Word
let rels' = substitute "http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties"
"http://schemas.openxmlformats.org/officedocument/2006/relationships/metadata/core-properties"
rels
- let relsEntry = toEntry relsPath epochtime $ fromString rels'
+ let relsEntry = toEntry relsPath epochtime $ UTF8.fromStringLazy rels'
let archive = foldr addEntryToArchive refArchive $
relsEntry : contentEntry : relEntry : numEntry : styleEntry : docPropsEntry : imageEntries
return $ fromArchive archive
diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
index 6f8931caa..3fac93c05 100644
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -35,7 +35,7 @@ import System.Environment ( getEnv )
import Text.Printf (printf)
import System.FilePath ( (</>), (<.>), takeBaseName, takeExtension, takeFileName )
import qualified Data.ByteString.Lazy as B
-import Data.ByteString.Lazy.UTF8 ( fromString )
+import Text.Pandoc.UTF8 ( fromStringLazy )
import Codec.Archive.Zip
import Data.Time.Clock.POSIX
import Text.Pandoc.Shared hiding ( Element )
@@ -82,7 +82,7 @@ writeEPUB opts doc@(Pandoc meta _) = do
Nothing -> return ([],[])
Just img -> do
let coverImage = "cover-image" ++ takeExtension img
- let cpContent = fromString $ writeHtmlString
+ let cpContent = fromStringLazy $ writeHtmlString
opts'{writerTemplate = coverImageTemplate,
writerVariables = ("coverimage",coverImage):vars}
(Pandoc meta [])
@@ -91,7 +91,7 @@ writeEPUB opts doc@(Pandoc meta _) = do
, [mkEntry coverImage imgContent] )
-- title page
- let tpContent = fromString $ writeHtmlString
+ let tpContent = fromStringLazy $ writeHtmlString
opts'{writerTemplate = titlePageTemplate}
(Pandoc meta [])
let tpEntry = mkEntry "title_page.xhtml" tpContent
@@ -125,7 +125,7 @@ writeEPUB opts doc@(Pandoc meta _) = do
let chapterToEntry :: Int -> Pandoc -> Entry
chapterToEntry num chap = mkEntry
(showChapter num) $
- fromString $ chapToHtml chap
+ fromStringLazy $ chapToHtml chap
let chapterEntries = zipWith chapterToEntry [1..] chapters
-- contents.opf
@@ -157,7 +157,7 @@ writeEPUB opts doc@(Pandoc meta _) = do
let plainTitle = plainify $ docTitle meta
let plainAuthors = map plainify $ docAuthors meta
let plainDate = maybe "" id $ normalizeDate $ stringify $ docDate meta
- let contentsData = fromString $ ppTopElement $
+ let contentsData = fromStringLazy $ ppTopElement $
unode "package" ! [("version","2.0")
,("xmlns","http://www.idpf.org/2007/opf")
,("unique-identifier","BookId")] $
@@ -189,7 +189,7 @@ writeEPUB opts doc@(Pandoc meta _) = do
, unode "content" ! [("src",
eRelativePath ent)] $ ()
]
- let tocData = fromString $ ppTopElement $
+ let tocData = fromStringLazy $ ppTopElement $
unode "ncx" ! [("version","2005-1")
,("xmlns","http://www.daisy.org/z3986/2005/ncx/")] $
[ unode "head" $
@@ -214,10 +214,10 @@ writeEPUB opts doc@(Pandoc meta _) = do
let tocEntry = mkEntry "toc.ncx" tocData
-- mimetype
- let mimetypeEntry = mkEntry "mimetype" $ fromString "application/epub+zip"
+ let mimetypeEntry = mkEntry "mimetype" $ fromStringLazy "application/epub+zip"
-- container.xml
- let containerData = fromString $ ppTopElement $
+ let containerData = fromStringLazy $ ppTopElement $
unode "container" ! [("version","1.0")
,("xmlns","urn:oasis:names:tc:opendocument:xmlns:container")] $
unode "rootfiles" $
@@ -226,7 +226,7 @@ writeEPUB opts doc@(Pandoc meta _) = do
let containerEntry = mkEntry "META-INF/container.xml" containerData
-- com.apple.ibooks.display-options.xml
- let apple = fromString $ ppTopElement $
+ let apple = fromStringLazy $ ppTopElement $
unode "display_options" $
unode "platform" ! [("name","*")] $
unode "option" ! [("name","specified-fonts")] $ "true"
@@ -236,7 +236,7 @@ writeEPUB opts doc@(Pandoc meta _) = do
stylesheet <- case writerEpubStylesheet opts of
Just s -> return s
Nothing -> readDataFile (writerUserDataDir opts) "epub.css"
- let stylesheetEntry = mkEntry "stylesheet.css" $ fromString stylesheet
+ let stylesheetEntry = mkEntry "stylesheet.css" $ fromStringLazy stylesheet
-- construct archive
let archive = foldr addEntryToArchive emptyArchive
diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs
index f43d0a087..cbff88be5 100644
--- a/src/Text/Pandoc/Writers/ODT.hs
+++ b/src/Text/Pandoc/Writers/ODT.hs
@@ -32,7 +32,7 @@ import Data.IORef
import Data.List ( isPrefixOf )
import System.FilePath ( (</>), takeExtension )
import qualified Data.ByteString.Lazy as B
-import Data.ByteString.Lazy.UTF8 ( fromString )
+import Text.Pandoc.UTF8 ( fromStringLazy )
import Codec.Archive.Zip
import Data.Time.Clock.POSIX
import Paths_pandoc ( getDataFileName )
@@ -74,7 +74,7 @@ writeODT opts doc@(Pandoc (Meta title _ _) _) = do
doc' <- bottomUpM (transformPic sourceDir picEntriesRef) doc
let newContents = writeOpenDocument opts{writerWrapText = False} doc'
epochtime <- floor `fmap` getPOSIXTime
- let contentEntry = toEntry "content.xml" epochtime $ fromString newContents
+ let contentEntry = toEntry "content.xml" epochtime $ fromStringLazy newContents
picEntries <- readIORef picEntriesRef
let archive = foldr addEntryToArchive refArchive $ contentEntry : picEntries
-- construct META-INF/manifest.xml based on archive
@@ -86,7 +86,7 @@ writeODT opts doc@(Pandoc (Meta title _ _) _) = do
]
let files = [ ent | ent <- filesInArchive archive, not ("META-INF" `isPrefixOf` ent) ]
let manifestEntry = toEntry "META-INF/manifest.xml" epochtime
- $ fromString $ show
+ $ fromStringLazy $ show
$ text "<?xml version=\"1.0\" encoding=\"utf-8\"?>"
$$
( inTags True "manifest:manifest"
@@ -100,7 +100,7 @@ writeODT opts doc@(Pandoc (Meta title _ _) _) = do
)
let archive' = addEntryToArchive manifestEntry archive
let metaEntry = toEntry "meta.xml" epochtime
- $ fromString $ show
+ $ fromStringLazy $ show
$ text "<?xml version=\"1.0\" encoding=\"utf-8\"?>"
$$
( inTags True "office:document-meta"
diff --git a/src/pandoc.hs b/src/pandoc.hs
index 305557f6a..484f7ca44 100644
--- a/src/pandoc.hs
+++ b/src/pandoc.hs
@@ -57,7 +57,6 @@ import Control.Monad (when, unless, liftM)
import Network.HTTP (simpleHTTP, mkRequest, getResponseBody, RequestMethod(..))
import Network.URI (parseURI, isURI, URI(..))
import qualified Data.ByteString.Lazy as B
-import Data.ByteString.Lazy.UTF8 (toString)
import Text.CSL.Reference (Reference(..))
copyrightMessage :: String
@@ -988,7 +987,7 @@ main = do
readURI u
_ -> UTF8.readFile src
readURI uri = simpleHTTP (mkRequest GET uri) >>= getResponseBody >>=
- return . toString -- treat all as UTF8
+ return . UTF8.toStringLazy -- treat all as UTF8
let convertTabs = tabFilter (if preserveTabs then 0 else tabStop)
@@ -1038,7 +1037,7 @@ main = do
res <- tex2pdf latexEngine $ f writerOptions doc2
case res of
Right pdf -> writeBinary pdf
- Left err' -> err 43 $ toString err'
+ Left err' -> err 43 $ UTF8.toStringLazy err'
| otherwise -> selfcontain (f writerOptions doc2 ++
['\n' | not standalone'])
>>= writerFn outputFile . handleEntities