diff options
author | John MacFarlane <fiddlosopher@gmail.com> | 2012-12-29 17:44:02 -0800 |
---|---|---|
committer | John MacFarlane <fiddlosopher@gmail.com> | 2012-12-29 17:54:07 -0800 |
commit | 1864bb0994cc70b99ea8207b3a22438c343348c0 (patch) | |
tree | 79c773cad15876d1f37f3907c8eec45ac278b6b5 | |
parent | 32c5a8e2dcc1262a8181d0f9b6ffe208e7499481 (diff) | |
download | pandoc-1864bb0994cc70b99ea8207b3a22438c343348c0.tar.gz |
Data files changes.
* Added `embed_data_files` flag. (not yet used)
* Shared no longer exports `findDataFile`.
* `readDataFile` now returns a strict bytestring.
* Shared now exports `readDataFileUTF8` which returns a string like
the old `readDataFile`.
* Rewrote modules to use new data file functions and to avoid
using functions from Paths_pandoc directly.
-rw-r--r-- | benchmark/benchmark-pandoc.hs | 6 | ||||
-rw-r--r-- | pandoc.cabal | 7 | ||||
-rw-r--r-- | pandoc.hs | 22 | ||||
-rw-r--r-- | src/Text/Pandoc/SelfContained.hs | 16 | ||||
-rw-r--r-- | src/Text/Pandoc/Shared.hs | 41 | ||||
-rw-r--r-- | src/Text/Pandoc/Templates.hs | 4 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/Docx.hs | 13 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/EPUB.hs | 7 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/ODT.hs | 15 |
9 files changed, 64 insertions, 67 deletions
diff --git a/benchmark/benchmark-pandoc.hs b/benchmark/benchmark-pandoc.hs index 728e45b56..015d0135a 100644 --- a/benchmark/benchmark-pandoc.hs +++ b/benchmark/benchmark-pandoc.hs @@ -1,5 +1,5 @@ import Text.Pandoc -import Text.Pandoc.Shared (readDataFile, normalize) +import Text.Pandoc.Shared (readDataFileUTF8, normalize) import Criterion.Main import Criterion.Config import Text.JSON.Generic @@ -35,8 +35,8 @@ main :: IO () main = do args <- getArgs (conf,_) <- parseArgs defaultConfig{ cfgSamples = Last $ Just 20 } defaultOptions args - inp <- readDataFile (Just ".") "README" - inp2 <- readDataFile (Just ".") "tests/testsuite.txt" + inp <- readDataFileUTF8 (Just ".") "README" + inp2 <- readDataFileUTF8 (Just ".") "tests/testsuite.txt" let opts = def{ readerSmart = True } let doc = readMarkdown opts $ inp ++ unlines (drop 3 $ lines inp2) let readerBs = map (readerBench doc) readers diff --git a/pandoc.cabal b/pandoc.cabal index 40d76be3f..8b40344cf 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -187,6 +187,9 @@ Source-repository head Flag blaze_html_0_5 Description: Use blaze-html 0.5 and blaze-markup 0.5 Default: True +Flag embed_data_files + Description: Embed data files in binary for relocatable executable. + Default: False Library Build-Depends: base >= 4.2 && <5, @@ -224,6 +227,10 @@ Library else build-depends: blaze-html >= 0.4.3.0 && < 0.5 + if flag(embed_data_files) + build-depends: file-embed >= 0.0.4 && < 0.1, + template-haskell >= 2.4 && < 2.9 + cpp-options: -DEMBED_DATA_FILES if impl(ghc >= 7.0.1) Ghc-Options: -O2 -rtsopts -Wall -fno-warn-unused-do-bind -dno-debug-output else @@ -33,7 +33,7 @@ module Main where import Text.Pandoc import Text.Pandoc.PDF (tex2pdf) import Text.Pandoc.Readers.LaTeX (handleIncludes) -import Text.Pandoc.Shared ( tabFilter, readDataFile, safeRead, +import Text.Pandoc.Shared ( tabFilter, readDataFileUTF8, safeRead, headerShift, normalize, err, warn ) import Text.Pandoc.XML ( toEntities, fromEntities ) import Text.Pandoc.SelfContained ( makeSelfContained ) @@ -889,24 +889,27 @@ main = do E.catch (UTF8.readFile tp') (\e -> if isDoesNotExistError e then E.catch - (readDataFile datadir $ - "templates" </> tp') + (readDataFileUTF8 datadir + ("templates" </> tp')) (\e' -> let _ = (e' :: E.SomeException) in throwIO e') else throwIO e) variables' <- case mathMethod of LaTeXMathML Nothing -> do - s <- readDataFile datadir $ "data" </> "LaTeXMathML.js" + s <- readDataFileUTF8 datadir + ("data" </> "LaTeXMathML.js") return $ ("mathml-script", s) : variables MathML Nothing -> do - s <- readDataFile datadir $ "data"</>"MathMLinHTML.js" + s <- readDataFileUTF8 datadir + ("data"</>"MathMLinHTML.js") return $ ("mathml-script", s) : variables _ -> return variables variables'' <- if "dzslides" `isPrefixOf` writerName' then do - dztempl <- readDataFile datadir $ "dzslides" </> "template.html" + dztempl <- readDataFileUTF8 datadir + ("dzslides" </> "template.html") let dzcore = unlines $ dropWhile (not . isPrefixOf "<!-- {{{{ dzslides core") $ lines dztempl return $ ("dzslides-core", dzcore) : variables' @@ -927,15 +930,16 @@ main = do then do csl <- CSL.parseCSL =<< case mbCsl of - Nothing -> readDataFile datadir "default.csl" + Nothing -> readDataFileUTF8 datadir + "default.csl" Just cslfile -> do exists <- doesFileExist cslfile if exists then UTF8.readFile cslfile else do csldir <- getAppUserDataDirectory "csl" - readDataFile (Just csldir) - (replaceExtension cslfile "csl") + readDataFileUTF8 (Just csldir) + (replaceExtension cslfile "csl") abbrevs <- maybe (return []) CSL.readJsonAbbrevFile cslabbrevs return $ Just csl { CSL.styleAbbrevs = abbrevs } else return Nothing diff --git a/src/Text/Pandoc/SelfContained.hs b/src/Text/Pandoc/SelfContained.hs index e468d504d..4b52a6f13 100644 --- a/src/Text/Pandoc/SelfContained.hs +++ b/src/Text/Pandoc/SelfContained.hs @@ -41,7 +41,7 @@ import System.FilePath (takeExtension, dropExtension, takeDirectory, (</>)) import Data.Char (toLower, isAscii, isAlphaNum) import Codec.Compression.GZip as Gzip import qualified Data.ByteString.Lazy as L -import Text.Pandoc.Shared (findDataFile, renderTags') +import Text.Pandoc.Shared (readDataFile, renderTags') import Text.Pandoc.MIME (getMimeType) import System.Directory (doesFileExist) import Text.Pandoc.UTF8 (toString, fromString) @@ -55,18 +55,8 @@ getItem userdata f = ".gz" -> getMimeType $ dropExtension f x -> getMimeType x exists <- doesFileExist f - if exists - then do - cont <- B.readFile f - return (cont, mime) - else do - res <- findDataFile userdata f - exists' <- doesFileExist res - if exists' - then do - cont <- B.readFile res - return (cont, mime) - else error $ "Could not find `" ++ f ++ "'" + cont <- if exists then B.readFile f else readDataFile userdata f + return (cont, mime) -- TODO - have this return mime type too - then it can work for google -- chart API, e.g. diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index dee10cf9b..71bdca9dd 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE DeriveDataTypeable #-} +{-# LANGUAGE DeriveDataTypeable, CPP #-} {- Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu> @@ -64,8 +64,8 @@ module Text.Pandoc.Shared ( renderTags', -- * File handling inDirectory, - findDataFile, readDataFile, + readDataFileUTF8, -- * Error handling err, warn, @@ -89,13 +89,18 @@ import System.FilePath ( (</>) ) import Data.Generics (Typeable, Data) import qualified Control.Monad.State as S import Control.Monad (msum) -import Paths_pandoc (getDataFileName) import Text.Pandoc.Pretty (charWidth) import System.Locale (defaultTimeLocale) import Data.Time import System.IO (stderr) import Text.HTML.TagSoup (renderTagsOptions, RenderOptions(..), Tag(..), renderOptions) +import qualified Data.ByteString as B +#ifdef EMBED_DATA_FILES +import Data.FileEmbed +#else +import Paths_pandoc (getDataFileName) +#endif -- -- List processing @@ -499,20 +504,28 @@ inDirectory path action = do setCurrentDirectory oldDir return result --- | Get file path for data file, either from specified user data directory, --- or, if not found there, from Cabal data directory. -findDataFile :: Maybe FilePath -> FilePath -> IO FilePath -findDataFile Nothing f = getDataFileName f -findDataFile (Just u) f = do - ex <- doesFileExist (u </> f) - if ex - then return (u </> f) - else getDataFileName f +readDefaultDataFile :: FilePath -> IO B.ByteString +readDefaultDataFile fname = +#ifdef EMBED_DATA_FILES + TODO +#else + getDataFileName fname >>= B.readFile +#endif -- | Read file from specified user data directory or, if not found there, from -- Cabal data directory. -readDataFile :: Maybe FilePath -> FilePath -> IO String -readDataFile userDir fname = findDataFile userDir fname >>= UTF8.readFile +readDataFile :: Maybe FilePath -> FilePath -> IO B.ByteString +readDataFile Nothing fname = readDefaultDataFile fname +readDataFile (Just userDir) fname = do + exists <- doesFileExist (userDir </> fname) + if exists + then B.readFile (userDir </> fname) + else readDefaultDataFile fname + +-- | Same as 'readDataFile' but returns a String instead of a ByteString. +readDataFileUTF8 :: Maybe FilePath -> FilePath -> IO String +readDataFileUTF8 userDir fname = + UTF8.toString `fmap` readDataFile userDir fname -- -- Error reporting diff --git a/src/Text/Pandoc/Templates.hs b/src/Text/Pandoc/Templates.hs index 75c133101..bbdb4adc4 100644 --- a/src/Text/Pandoc/Templates.hs +++ b/src/Text/Pandoc/Templates.hs @@ -80,7 +80,7 @@ import Text.Blaze (preEscapedString, Html) #endif import Text.Pandoc.UTF8 (fromStringLazy) import Data.ByteString.Lazy (ByteString) -import Text.Pandoc.Shared (readDataFile) +import Text.Pandoc.Shared (readDataFileUTF8) import qualified Control.Exception.Extensible as E (try, IOException) -- | Get default template for the specified writer. @@ -98,7 +98,7 @@ getDefaultTemplate user writer = do "multimarkdown" -> getDefaultTemplate user "markdown" "markdown_github" -> getDefaultTemplate user "markdown" _ -> let fname = "templates" </> "default" <.> format - in E.try $ readDataFile user fname + in E.try $ readDataFileUTF8 user fname data TemplateState = TemplateState Int [(String,String)] diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index b0f31ac45..706ced967 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -29,14 +29,12 @@ Conversion of 'Pandoc' documents to docx. -} module Text.Pandoc.Writers.Docx ( writeDocx ) where import Data.List ( intercalate ) -import System.FilePath ( (</>) ) import qualified Data.ByteString.Lazy as B import qualified Data.Map as M import qualified Text.Pandoc.UTF8 as UTF8 import System.IO ( stderr ) import Codec.Archive.Zip import Data.Time.Clock.POSIX -import Paths_pandoc ( getDataFileName ) import Text.Pandoc.Definition import Text.Pandoc.Generic import System.Directory @@ -104,15 +102,8 @@ writeDocx opts doc@(Pandoc (Meta tit auths date) _) = do refArchive <- liftM toArchive $ case writerReferenceDocx opts of Just f -> B.readFile f - Nothing -> do - let defaultDocx = getDataFileName "reference.docx" >>= B.readFile - case datadir of - Nothing -> defaultDocx - Just d -> do - exists <- doesFileExist (d </> "reference.docx") - if exists - then B.readFile (d </> "reference.docx") - else defaultDocx + Nothing -> (B.fromChunks . (:[])) `fmap` + readDataFile datadir "reference.docx" (newContents, st) <- runStateT (writeOpenXML opts{writerWrapText = False} doc) defaultWriterState diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index e7feace7a..4ffff0054 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -30,13 +30,13 @@ Conversion of 'Pandoc' documents to EPUB. module Text.Pandoc.Writers.EPUB ( writeEPUB2, writeEPUB3 ) where import Data.IORef import Data.Maybe ( fromMaybe, isNothing ) -import Data.List ( isPrefixOf, isInfixOf, intercalate ) +import Data.List ( isInfixOf, intercalate ) import System.Environment ( getEnv ) import Text.Printf (printf) import System.FilePath ( (</>), takeBaseName, takeExtension, takeFileName ) import qualified Data.ByteString.Lazy as B import qualified Data.ByteString.Lazy.Char8 as B8 -import Text.Pandoc.UTF8 ( fromStringLazy ) +import Text.Pandoc.UTF8 ( fromStringLazy, toString ) import Codec.Archive.Zip import Data.Time.Clock.POSIX import Data.Time @@ -321,7 +321,8 @@ writeEPUB version opts doc@(Pandoc meta _) = do -- stylesheet stylesheet <- case writerEpubStylesheet opts of Just s -> return s - Nothing -> readDataFile (writerUserDataDir opts) "epub.css" + Nothing -> toString `fmap` + readDataFile (writerUserDataDir opts) "epub.css" let stylesheetEntry = mkEntry "stylesheet.css" $ fromStringLazy stylesheet -- construct archive diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs index cbff88be5..5e92cfcec 100644 --- a/src/Text/Pandoc/Writers/ODT.hs +++ b/src/Text/Pandoc/Writers/ODT.hs @@ -35,15 +35,13 @@ import qualified Data.ByteString.Lazy as B import Text.Pandoc.UTF8 ( fromStringLazy ) import Codec.Archive.Zip import Data.Time.Clock.POSIX -import Paths_pandoc ( getDataFileName ) import Text.Pandoc.Options ( WriterOptions(..) ) -import Text.Pandoc.Shared ( stringify ) +import Text.Pandoc.Shared ( stringify, readDataFile ) import Text.Pandoc.ImageSize ( readImageSize, sizeInPoints ) import Text.Pandoc.MIME ( getMimeType ) import Text.Pandoc.Definition import Text.Pandoc.Generic import Text.Pandoc.Writers.OpenDocument ( writeOpenDocument ) -import System.Directory import Control.Monad (liftM) import Network.URI ( unEscapeString ) import Text.Pandoc.XML @@ -59,15 +57,8 @@ writeODT opts doc@(Pandoc (Meta title _ _) _) = do refArchive <- liftM toArchive $ case writerReferenceODT opts of Just f -> B.readFile f - Nothing -> do - let defaultODT = getDataFileName "reference.odt" >>= B.readFile - case datadir of - Nothing -> defaultODT - Just d -> do - exists <- doesFileExist (d </> "reference.odt") - if exists - then B.readFile (d </> "reference.odt") - else defaultODT + Nothing -> (B.fromChunks . (:[])) `fmap` + readDataFile datadir "reference.odt" -- handle pictures picEntriesRef <- newIORef ([] :: [Entry]) let sourceDir = writerSourceDirectory opts |