aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Class
diff options
context:
space:
mode:
authorAlbert Krewinkel <albert@zeitkraut.de>2020-03-15 04:49:36 +0100
committerGitHub <noreply@github.com>2020-03-14 20:49:36 -0700
commitec49643d647080218bedfaf61b3eb9b9383e42c0 (patch)
treedb0093d2ad97010e6c4fd92c3295b2873733a696 /src/Text/Pandoc/Class
parent11b5f1e40b4c1e4ddbfb93a8d75bfd55ef52a6df (diff)
downloadpandoc-ec49643d647080218bedfaf61b3eb9b9383e42c0.tar.gz
Subdivide Text.Pandoc.Class into small modules (#6106)
* Extract CommonState into submodule * Extract PandocMonad into submodule * PandocMonad: ensure all functions have Haddock documentation
Diffstat (limited to 'src/Text/Pandoc/Class')
-rw-r--r--src/Text/Pandoc/Class/CommonState.hs80
-rw-r--r--src/Text/Pandoc/Class/PandocMonad.hs706
2 files changed, 786 insertions, 0 deletions
diff --git a/src/Text/Pandoc/Class/CommonState.hs b/src/Text/Pandoc/Class/CommonState.hs
new file mode 100644
index 000000000..4a0f66859
--- /dev/null
+++ b/src/Text/Pandoc/Class/CommonState.hs
@@ -0,0 +1,80 @@
+{- |
+Module : Text.Pandoc.Class.CommonState
+Copyright : Copyright (C) 2016-2020 Jesse Rosenthal, John MacFarlane
+License : GNU GPL, version 2 or above
+
+Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu>
+Stability : alpha
+Portability : portable
+
+Common state shared by all pandoc-specific operations, including
+those in readers, writers, and Lua filters.
+-}
+
+module Text.Pandoc.Class.CommonState
+ ( CommonState(..)
+ , defaultCommonState
+ )
+where
+
+import Data.Default (Default (def))
+import Data.Text (Text)
+import Text.Pandoc.BCP47 (Lang)
+import Text.Pandoc.MediaBag (MediaBag)
+import Text.Pandoc.Logging (LogMessage, Verbosity (WARNING))
+import Text.Pandoc.Translations (Translations)
+
+-- | 'CommonState' represents state that is used by all
+-- instances of 'PandocMonad'. Normally users should not
+-- need to interact with it directly; instead, auxiliary
+-- functions like 'setVerbosity' and 'withMediaBag' should be used.
+data CommonState = CommonState
+ { stLog :: [LogMessage]
+ -- ^ A list of log messages in reverse order
+ , stUserDataDir :: Maybe FilePath
+ -- ^ Directory to search for data files
+ , stSourceURL :: Maybe Text
+ -- ^ Absolute URL + dir of 1st source file
+ , stRequestHeaders :: [(Text, Text)]
+ -- ^ Headers to add for HTTP requests
+ , stMediaBag :: MediaBag
+ -- ^ Media parsed from binary containers
+ , stTranslations :: Maybe (Lang, Maybe Translations)
+ -- ^ Translations for localization
+ , stInputFiles :: [FilePath]
+ -- ^ List of input files from command line
+ , stOutputFile :: Maybe FilePath
+ -- ^ Output file from command line
+ , stResourcePath :: [FilePath]
+ -- ^ Path to search for resources like
+ -- included images
+ , stVerbosity :: Verbosity
+ -- ^ Verbosity level
+ , stTrace :: Bool
+ -- ^ Controls whether tracing messages are
+ -- issued.
+ }
+
+-- | The default @'CommonState'@. All fields are initialized as the
+-- monoid identity of their resprective type, except for:
+--
+-- * @'stResourcePath'@, which is set to @["."]@,
+-- * @'stTrace'@, which is set to @'False'@, and
+-- * @'stVerbosity'@, which is set to @WARNING@.
+defaultCommonState :: CommonState
+defaultCommonState = CommonState
+ { stLog = []
+ , stUserDataDir = Nothing
+ , stSourceURL = Nothing
+ , stRequestHeaders = []
+ , stMediaBag = mempty
+ , stTranslations = Nothing
+ , stInputFiles = []
+ , stOutputFile = Nothing
+ , stResourcePath = ["."]
+ , stVerbosity = WARNING
+ , stTrace = False
+ }
+
+instance Default CommonState where
+ def = defaultCommonState
diff --git a/src/Text/Pandoc/Class/PandocMonad.hs b/src/Text/Pandoc/Class/PandocMonad.hs
new file mode 100644
index 000000000..7f31eec96
--- /dev/null
+++ b/src/Text/Pandoc/Class/PandocMonad.hs
@@ -0,0 +1,706 @@
+{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE CPP #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE MultiParamTypeClasses #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE TypeSynonymInstances #-}
+{-# LANGUAGE UndecidableInstances #-}
+{-# LANGUAGE ViewPatterns #-}
+#if MIN_VERSION_base(4,8,0)
+#else
+{-# LANGUAGE OverlappingInstances #-}
+#endif
+{- |
+Module : Text.Pandoc.Class.PandocMonad
+Copyright : Copyright (C) 2016-20 Jesse Rosenthal, John MacFarlane
+License : GNU GPL, version 2 or above
+
+Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu>
+Stability : alpha
+Portability : portable
+
+This module defines a type class, 'PandocMonad', for pandoc readers
+and writers.
+-}
+
+module Text.Pandoc.Class.PandocMonad
+ ( PandocMonad(..)
+ , getPOSIXTime
+ , getZonedTime
+ , readFileFromDirs
+ , report
+ , setTrace
+ , setRequestHeader
+ , getLog
+ , setVerbosity
+ , getVerbosity
+ , getMediaBag
+ , setMediaBag
+ , insertMedia
+ , setUserDataDir
+ , getUserDataDir
+ , fetchItem
+ , fetchMediaResource
+ , getInputFiles
+ , setInputFiles
+ , getOutputFile
+ , setOutputFile
+ , setResourcePath
+ , getResourcePath
+ , readDefaultDataFile
+ , readDataFile
+ , fillMediaBag
+ , toLang
+ , setTranslations
+ , translateTerm
+ , makeCanonical
+ ) where
+
+import Prelude
+import Codec.Archive.Zip
+import Control.Monad.Except (MonadError (catchError, throwError),
+ MonadTrans, lift, when)
+import Data.Digest.Pure.SHA (sha1, showDigest)
+import Data.Maybe (fromMaybe)
+import Data.Time (UTCTime)
+import Data.Time.Clock.POSIX (POSIXTime, utcTimeToPOSIXSeconds)
+import Data.Time.LocalTime (TimeZone, ZonedTime, utcToZonedTime)
+import Network.URI ( escapeURIString, nonStrictRelativeTo,
+ unEscapeString, parseURIReference, isAllowedInURI,
+ parseURI, URI(..) )
+import System.FilePath ((</>), (<.>), takeExtension, dropExtension,
+ isRelative, splitDirectories)
+import System.Random (StdGen)
+import Text.Pandoc.BCP47 (Lang(..), parseBCP47, renderLang)
+import Text.Pandoc.Class.CommonState (CommonState (..))
+import Text.Pandoc.Definition
+import Text.Pandoc.Error
+import Text.Pandoc.Logging
+import Text.Pandoc.MIME (MimeType, getMimeType, extensionFromMimeType)
+import Text.Pandoc.MediaBag (MediaBag, lookupMedia)
+import Text.Pandoc.Shared (uriPathToPath)
+import Text.Pandoc.Translations (Term(..), Translations, lookupTerm,
+ readTranslations)
+import Text.Pandoc.Walk (walkM)
+import Text.Parsec (ParsecT, getPosition, sourceLine, sourceName)
+import qualified Data.ByteString as B
+import qualified Data.ByteString.Lazy as BL
+import qualified Data.Text as T
+import qualified Debug.Trace
+import qualified System.FilePath.Posix as Posix
+import qualified Text.Pandoc.MediaBag as MB
+import qualified Text.Pandoc.UTF8 as UTF8
+#ifdef EMBED_DATA_FILES
+import Text.Pandoc.Data (dataFiles)
+#endif
+
+-- | The PandocMonad typeclass contains all the potentially
+-- IO-related functions used in pandoc's readers and writers.
+-- Instances of this typeclass may implement these functions
+-- in IO (as in 'PandocIO') or using an internal state that
+-- represents a file system, time, and so on (as in 'PandocPure').
+class (Functor m, Applicative m, Monad m, MonadError PandocError m)
+ => PandocMonad m where
+ -- | Lookup an environment variable.
+ lookupEnv :: T.Text -> m (Maybe T.Text)
+ -- | Get the current (UTC) time.
+ getCurrentTime :: m UTCTime
+ -- | Get the locale's time zone.
+ getCurrentTimeZone :: m TimeZone
+ -- | Return a new generator for random numbers.
+ newStdGen :: m StdGen
+ -- | Return a new unique integer.
+ newUniqueHash :: m Int
+ -- | Retrieve contents and mime type from a URL, raising
+ -- an error on failure.
+ openURL :: T.Text -> m (B.ByteString, Maybe MimeType)
+ -- | Read the lazy ByteString contents from a file path,
+ -- raising an error on failure.
+ readFileLazy :: FilePath -> m BL.ByteString
+ -- | Read the strict ByteString contents from a file path,
+ -- raising an error on failure.
+ readFileStrict :: FilePath -> m B.ByteString
+ -- | Return a list of paths that match a glob, relative to
+ -- the working directory. See 'System.FilePath.Glob' for
+ -- the glob syntax.
+ glob :: String -> m [FilePath]
+ -- | Returns True if file exists.
+ fileExists :: FilePath -> m Bool
+ -- | Returns the path of data file.
+ getDataFileName :: FilePath -> m FilePath
+ -- | Return the modification time of a file.
+ getModificationTime :: FilePath -> m UTCTime
+ -- | Get the value of the 'CommonState' used by all instances
+ -- of 'PandocMonad'.
+ getCommonState :: m CommonState
+ -- | Set the value of the 'CommonState' used by all instances
+ -- of 'PandocMonad'.
+ -- | Get the value of a specific field of 'CommonState'.
+ putCommonState :: CommonState -> m ()
+ -- | Get the value of a specific field of 'CommonState'.
+ getsCommonState :: (CommonState -> a) -> m a
+ getsCommonState f = f <$> getCommonState
+ -- | Modify the 'CommonState'.
+ modifyCommonState :: (CommonState -> CommonState) -> m ()
+ modifyCommonState f = getCommonState >>= putCommonState . f
+ -- | Output a log message.
+ logOutput :: LogMessage -> m ()
+ -- | Output a debug message to sterr, using 'Debug.Trace.trace',
+ -- if tracing is enabled. Note: this writes to stderr even in
+ -- pure instances.
+ trace :: T.Text -> m ()
+ trace msg = do
+ tracing <- getsCommonState stTrace
+ when tracing $ Debug.Trace.trace ("[trace] " ++ T.unpack msg) (return ())
+
+-- * Functions defined for all PandocMonad instances
+
+-- | Set the verbosity level.
+setVerbosity :: PandocMonad m => Verbosity -> m ()
+setVerbosity verbosity =
+ modifyCommonState $ \st -> st{ stVerbosity = verbosity }
+
+-- | Get the verbosity level.
+getVerbosity :: PandocMonad m => m Verbosity
+getVerbosity = getsCommonState stVerbosity
+
+-- | Get the accomulated log messages (in temporal order).
+getLog :: PandocMonad m => m [LogMessage]
+getLog = reverse <$> getsCommonState stLog
+
+-- | Log a message using 'logOutput'. Note that 'logOutput' is
+-- called only if the verbosity level exceeds the level of the
+-- message, but the message is added to the list of log messages
+-- that will be retrieved by 'getLog' regardless of its verbosity level.
+report :: PandocMonad m => LogMessage -> m ()
+report msg = do
+ verbosity <- getsCommonState stVerbosity
+ let level = messageVerbosity msg
+ when (level <= verbosity) $ logOutput msg
+ modifyCommonState $ \st -> st{ stLog = msg : stLog st }
+
+-- | Determine whether tracing is enabled. This affects
+-- the behavior of 'trace'. If tracing is not enabled,
+-- 'trace' does nothing.
+setTrace :: PandocMonad m => Bool -> m ()
+setTrace useTracing = modifyCommonState $ \st -> st{stTrace = useTracing}
+
+-- | Set request header to use in HTTP requests.
+setRequestHeader :: PandocMonad m
+ => T.Text -- ^ Header name
+ -> T.Text -- ^ Value
+ -> m ()
+setRequestHeader name val = modifyCommonState $ \st ->
+ st{ stRequestHeaders =
+ (name, val) : filter (\(n,_) -> n /= name) (stRequestHeaders st) }
+
+-- | Initialize the media bag.
+setMediaBag :: PandocMonad m => MediaBag -> m ()
+setMediaBag mb = modifyCommonState $ \st -> st{stMediaBag = mb}
+
+-- | Retrieve the media bag.
+getMediaBag :: PandocMonad m => m MediaBag
+getMediaBag = getsCommonState stMediaBag
+
+-- | Insert an item into the media bag.
+insertMedia :: PandocMonad m => FilePath -> Maybe MimeType -> BL.ByteString -> m ()
+insertMedia fp mime bs = do
+ mb <- getMediaBag
+ let mb' = MB.insertMedia fp mime bs mb
+ setMediaBag mb'
+
+-- | Retrieve the input filenames.
+getInputFiles :: PandocMonad m => m [FilePath]
+getInputFiles = getsCommonState stInputFiles
+
+-- | Set the input filenames.
+setInputFiles :: PandocMonad m => [FilePath] -> m ()
+setInputFiles fs = do
+ let sourceURL = case fs of
+ [] -> Nothing
+ (x:_) -> case parseURI x of
+ Just u
+ | uriScheme u `elem` ["http:","https:"] ->
+ Just $ show u{ uriQuery = "",
+ uriFragment = "" }
+ _ -> Nothing
+
+ modifyCommonState $ \st -> st{ stInputFiles = fs
+ , stSourceURL = T.pack <$> sourceURL }
+
+-- | Retrieve the output filename.
+getOutputFile :: PandocMonad m => m (Maybe FilePath)
+getOutputFile = getsCommonState stOutputFile
+
+-- | Set the output filename.
+setOutputFile :: PandocMonad m => Maybe FilePath -> m ()
+setOutputFile mbf = modifyCommonState $ \st -> st{ stOutputFile = mbf }
+
+-- | Retrieve the resource path searched by 'fetchItem'.
+getResourcePath :: PandocMonad m => m [FilePath]
+getResourcePath = getsCommonState stResourcePath
+
+-- | Set the resource path searched by 'fetchItem'.
+setResourcePath :: PandocMonad m => [FilePath] -> m ()
+setResourcePath ps = modifyCommonState $ \st -> st{stResourcePath = ps}
+
+-- | Get the POSIX time.
+getPOSIXTime :: PandocMonad m => m POSIXTime
+getPOSIXTime = utcTimeToPOSIXSeconds <$> getCurrentTime
+
+-- | Get the zoned time.
+getZonedTime :: PandocMonad m => m ZonedTime
+getZonedTime = do
+ t <- getCurrentTime
+ tz <- getCurrentTimeZone
+ return $ utcToZonedTime tz t
+
+-- | Read file, checking in any number of directories.
+readFileFromDirs :: PandocMonad m => [FilePath] -> FilePath -> m (Maybe T.Text)
+readFileFromDirs [] _ = return Nothing
+readFileFromDirs (d:ds) f = catchError
+ (Just . T.pack . UTF8.toStringLazy <$> readFileLazy (d </> f))
+ (\_ -> readFileFromDirs ds f)
+
+-- | Convert BCP47 string to a Lang, issuing warning
+-- if there are problems.
+toLang :: PandocMonad m => Maybe T.Text -> m (Maybe Lang)
+toLang Nothing = return Nothing
+toLang (Just s) =
+ case parseBCP47 s of
+ Left _ -> do
+ report $ InvalidLang s
+ return Nothing
+ Right l -> return (Just l)
+
+-- | Select the language to use with 'translateTerm'.
+-- Note that this does not read a translation file;
+-- that is only done the first time 'translateTerm' is
+-- used.
+setTranslations :: PandocMonad m => Lang -> m ()
+setTranslations lang =
+ modifyCommonState $ \st -> st{ stTranslations = Just (lang, Nothing) }
+
+-- | Load term map.
+getTranslations :: PandocMonad m => m Translations
+getTranslations = do
+ mbtrans <- getsCommonState stTranslations
+ case mbtrans of
+ Nothing -> return mempty -- no language defined
+ Just (_, Just t) -> return t
+ Just (lang, Nothing) -> do -- read from file
+ let translationFile = "translations/" <> renderLang lang <> ".yaml"
+ let fallbackFile = "translations/" <> langLanguage lang <> ".yaml"
+ let getTrans fp = do
+ bs <- readDataFile fp
+ case readTranslations (UTF8.toText bs) of
+ Left e -> do
+ report $ CouldNotLoadTranslations (renderLang lang)
+ (T.pack fp <> ": " <> e)
+ -- make sure we don't try again...
+ modifyCommonState $ \st ->
+ st{ stTranslations = Nothing }
+ return mempty
+ Right t -> do
+ modifyCommonState $ \st ->
+ st{ stTranslations = Just (lang, Just t) }
+ return t
+ catchError (getTrans $ T.unpack translationFile)
+ (\_ ->
+ catchError (getTrans $ T.unpack fallbackFile)
+ (\e -> do
+ report $ CouldNotLoadTranslations (renderLang lang)
+ $ case e of
+ PandocCouldNotFindDataFileError _ ->
+ "data file " <> fallbackFile <> " not found"
+ _ -> ""
+ -- make sure we don't try again...
+ modifyCommonState $ \st -> st{ stTranslations = Nothing }
+ return mempty))
+
+-- | Get a translation from the current term map.
+-- Issue a warning if the term is not defined.
+translateTerm :: PandocMonad m => Term -> m T.Text
+translateTerm term = do
+ translations <- getTranslations
+ case lookupTerm term translations of
+ Just s -> return s
+ Nothing -> do
+ report $ NoTranslation $ T.pack $ show term
+ return ""
+
+-- | Specialized version of parseURIReference that disallows
+-- single-letter schemes. Reason: these are usually windows absolute
+-- paths.
+parseURIReference' :: T.Text -> Maybe URI
+parseURIReference' s = do
+ u <- parseURIReference (T.unpack s)
+ case uriScheme u of
+ [_] -> Nothing
+ _ -> Just u
+
+-- | Set the user data directory in common state.
+setUserDataDir :: PandocMonad m
+ => Maybe FilePath
+ -> m ()
+setUserDataDir mbfp = modifyCommonState $ \st -> st{ stUserDataDir = mbfp }
+
+-- | Get the user data directory from common state.
+getUserDataDir :: PandocMonad m
+ => m (Maybe FilePath)
+getUserDataDir = getsCommonState stUserDataDir
+
+-- | Fetch an image or other item from the local filesystem or the net.
+-- Returns raw content and maybe mime type.
+fetchItem :: PandocMonad m
+ => T.Text
+ -> m (B.ByteString, Maybe MimeType)
+fetchItem s = do
+ mediabag <- getMediaBag
+ case lookupMedia (T.unpack s) mediabag of
+ Just (mime, bs) -> return (BL.toStrict bs, Just mime)
+ Nothing -> downloadOrRead s
+
+-- | Returns the content and, if available, the MIME type of a resource.
+-- If the given resource location is a valid URI, then download the
+-- resource from that URI. Otherwise, treat the resource identifier as a
+-- local file name.
+--
+-- Note that resources are treated relative to the URL of the first
+-- input source, if any.
+downloadOrRead :: PandocMonad m
+ => T.Text
+ -> m (B.ByteString, Maybe MimeType)
+downloadOrRead s = do
+ sourceURL <- getsCommonState stSourceURL
+ case (sourceURL >>= parseURIReference' .
+ ensureEscaped, ensureEscaped s) of
+ (Just u, s') -> -- try fetching from relative path at source
+ case parseURIReference' s' of
+ Just u' -> openURL $ T.pack $ show $ u' `nonStrictRelativeTo` u
+ Nothing -> openURL s' -- will throw error
+ (Nothing, s'@(T.unpack -> ('/':'/':c:_))) | c /= '?' -> -- protocol-relative URI
+ -- we exclude //? because of //?UNC/ on Windows
+ case parseURIReference' s' of
+ Just u' -> openURL $ T.pack $ show $ u' `nonStrictRelativeTo` httpcolon
+ Nothing -> openURL s' -- will throw error
+ (Nothing, s') ->
+ case parseURI (T.unpack s') of -- requires absolute URI
+ Just u' | uriScheme u' == "file:" ->
+ readLocalFile $ uriPathToPath (T.pack $ uriPath u')
+ -- We don't want to treat C:/ as a scheme:
+ Just u' | length (uriScheme u') > 2 -> openURL (T.pack $ show u')
+ _ -> readLocalFile fp -- get from local file system
+ where readLocalFile f = do
+ resourcePath <- getResourcePath
+ cont <- if isRelative f
+ then withPaths resourcePath readFileStrict f
+ else readFileStrict f
+ return (cont, mime)
+ httpcolon = URI{ uriScheme = "http:",
+ uriAuthority = Nothing,
+ uriPath = "",
+ uriQuery = "",
+ uriFragment = "" }
+ dropFragmentAndQuery = T.takeWhile (\c -> c /= '?' && c /= '#')
+ fp = unEscapeString $ T.unpack $ dropFragmentAndQuery s
+ mime = getMimeType $ case takeExtension fp of
+ ".gz" -> dropExtension fp
+ ".svgz" -> dropExtension fp ++ ".svg"
+ x -> x
+ ensureEscaped = T.pack . escapeURIString isAllowedInURI . T.unpack . T.map convertSlash
+ convertSlash '\\' = '/'
+ convertSlash x = x
+
+-- | Retrieve default reference.docx.
+getDefaultReferenceDocx :: PandocMonad m => m Archive
+getDefaultReferenceDocx = do
+ let paths = ["[Content_Types].xml",
+ "_rels/.rels",
+ "docProps/app.xml",
+ "docProps/core.xml",
+ "docProps/custom.xml",
+ "word/document.xml",
+ "word/fontTable.xml",
+ "word/footnotes.xml",
+ "word/comments.xml",
+ "word/numbering.xml",
+ "word/settings.xml",
+ "word/webSettings.xml",
+ "word/styles.xml",
+ "word/_rels/document.xml.rels",
+ "word/_rels/footnotes.xml.rels",
+ "word/theme/theme1.xml"]
+ let toLazy = BL.fromChunks . (:[])
+ let pathToEntry path = do
+ epochtime <- (floor . utcTimeToPOSIXSeconds) <$> getCurrentTime
+ contents <- toLazy <$> readDataFile ("docx/" ++ path)
+ return $ toEntry path epochtime contents
+ datadir <- getUserDataDir
+ mbArchive <- case datadir of
+ Nothing -> return Nothing
+ Just d -> do
+ exists <- fileExists (d </> "reference.docx")
+ if exists
+ then return (Just (d </> "reference.docx"))
+ else return Nothing
+ case mbArchive of
+ Just arch -> toArchive <$> readFileLazy arch
+ Nothing -> foldr addEntryToArchive emptyArchive <$>
+ mapM pathToEntry paths
+
+-- | Retrieve default reference.odt.
+getDefaultReferenceODT :: PandocMonad m => m Archive
+getDefaultReferenceODT = do
+ let paths = ["mimetype",
+ "manifest.rdf",
+ "styles.xml",
+ "content.xml",
+ "meta.xml",
+ "settings.xml",
+ "Configurations2/accelerator/current.xml",
+ "Thumbnails/thumbnail.png",
+ "META-INF/manifest.xml"]
+ let pathToEntry path = do epochtime <- floor `fmap` getPOSIXTime
+ contents <- (BL.fromChunks . (:[])) `fmap`
+ readDataFile ("odt/" ++ path)
+ return $ toEntry path epochtime contents
+ datadir <- getUserDataDir
+ mbArchive <- case datadir of
+ Nothing -> return Nothing
+ Just d -> do
+ exists <- fileExists (d </> "reference.odt")
+ if exists
+ then return (Just (d </> "reference.odt"))
+ else return Nothing
+ case mbArchive of
+ Just arch -> toArchive <$> readFileLazy arch
+ Nothing -> foldr addEntryToArchive emptyArchive <$>
+ mapM pathToEntry paths
+
+-- | Retrieve default reference.pptx.
+getDefaultReferencePptx :: PandocMonad m => m Archive
+getDefaultReferencePptx = do
+ -- We're going to narrow this down substantially once we get it
+ -- working.
+ let paths = [ "[Content_Types].xml"
+ , "_rels/.rels"
+ , "docProps/app.xml"
+ , "docProps/core.xml"
+ , "ppt/_rels/presentation.xml.rels"
+ , "ppt/presProps.xml"
+ , "ppt/presentation.xml"
+ , "ppt/slideLayouts/_rels/slideLayout1.xml.rels"
+ , "ppt/slideLayouts/_rels/slideLayout2.xml.rels"
+ , "ppt/slideLayouts/_rels/slideLayout3.xml.rels"
+ , "ppt/slideLayouts/_rels/slideLayout4.xml.rels"
+ , "ppt/slideLayouts/_rels/slideLayout5.xml.rels"
+ , "ppt/slideLayouts/_rels/slideLayout6.xml.rels"
+ , "ppt/slideLayouts/_rels/slideLayout7.xml.rels"
+ , "ppt/slideLayouts/_rels/slideLayout8.xml.rels"
+ , "ppt/slideLayouts/_rels/slideLayout9.xml.rels"
+ , "ppt/slideLayouts/_rels/slideLayout10.xml.rels"
+ , "ppt/slideLayouts/_rels/slideLayout11.xml.rels"
+ , "ppt/slideLayouts/slideLayout1.xml"
+ , "ppt/slideLayouts/slideLayout10.xml"
+ , "ppt/slideLayouts/slideLayout11.xml"
+ , "ppt/slideLayouts/slideLayout2.xml"
+ , "ppt/slideLayouts/slideLayout3.xml"
+ , "ppt/slideLayouts/slideLayout4.xml"
+ , "ppt/slideLayouts/slideLayout5.xml"
+ , "ppt/slideLayouts/slideLayout6.xml"
+ , "ppt/slideLayouts/slideLayout7.xml"
+ , "ppt/slideLayouts/slideLayout8.xml"
+ , "ppt/slideLayouts/slideLayout9.xml"
+ , "ppt/slideMasters/_rels/slideMaster1.xml.rels"
+ , "ppt/slideMasters/slideMaster1.xml"
+ , "ppt/slides/_rels/slide1.xml.rels"
+ , "ppt/slides/slide1.xml"
+ , "ppt/slides/_rels/slide2.xml.rels"
+ , "ppt/slides/slide2.xml"
+ , "ppt/slides/_rels/slide3.xml.rels"
+ , "ppt/slides/slide3.xml"
+ , "ppt/slides/_rels/slide4.xml.rels"
+ , "ppt/slides/slide4.xml"
+ , "ppt/tableStyles.xml"
+ , "ppt/theme/theme1.xml"
+ , "ppt/viewProps.xml"
+ -- These relate to notes slides.
+ , "ppt/notesMasters/notesMaster1.xml"
+ , "ppt/notesMasters/_rels/notesMaster1.xml.rels"
+ , "ppt/notesSlides/notesSlide1.xml"
+ , "ppt/notesSlides/_rels/notesSlide1.xml.rels"
+ , "ppt/notesSlides/notesSlide2.xml"
+ , "ppt/notesSlides/_rels/notesSlide2.xml.rels"
+ , "ppt/theme/theme2.xml"
+ ]
+ let toLazy = BL.fromChunks . (:[])
+ let pathToEntry path = do
+ epochtime <- (floor . utcTimeToPOSIXSeconds) <$> getCurrentTime
+ contents <- toLazy <$> readDataFile ("pptx/" ++ path)
+ return $ toEntry path epochtime contents
+ datadir <- getUserDataDir
+ mbArchive <- case datadir of
+ Nothing -> return Nothing
+ Just d -> do
+ exists <- fileExists (d </> "reference.pptx")
+ if exists
+ then return (Just (d </> "reference.pptx"))
+ else return Nothing
+ case mbArchive of
+ Just arch -> toArchive <$> readFileLazy arch
+ Nothing -> foldr addEntryToArchive emptyArchive <$>
+ mapM pathToEntry paths
+
+-- | Read file from user data directory or,
+-- if not found there, from Cabal data directory.
+readDataFile :: PandocMonad m => FilePath -> m B.ByteString
+readDataFile fname = do
+ datadir <- getUserDataDir
+ case datadir of
+ Nothing -> readDefaultDataFile fname
+ Just userDir -> do
+ exists <- fileExists (userDir </> fname)
+ if exists
+ then readFileStrict (userDir </> fname)
+ else readDefaultDataFile fname
+
+-- | Read file from from Cabal data directory.
+readDefaultDataFile :: PandocMonad m => FilePath -> m B.ByteString
+readDefaultDataFile "reference.docx" =
+ (B.concat . BL.toChunks . fromArchive) <$> getDefaultReferenceDocx
+readDefaultDataFile "reference.pptx" =
+ (B.concat . BL.toChunks . fromArchive) <$> getDefaultReferencePptx
+readDefaultDataFile "reference.odt" =
+ (B.concat . BL.toChunks . fromArchive) <$> getDefaultReferenceODT
+readDefaultDataFile fname =
+#ifdef EMBED_DATA_FILES
+ case lookup (makeCanonical fname) dataFiles of
+ Nothing -> throwError $ PandocCouldNotFindDataFileError $ T.pack fname
+ Just contents -> return contents
+#else
+ getDataFileName fname' >>= checkExistence >>= readFileStrict
+ where fname' = if fname == "MANUAL.txt" then fname else "data" </> fname
+
+-- | Returns the input filename unchanged if the file exits, and throws
+-- a `PandocCouldNotFindDataFileError` if it doesn't.
+checkExistence :: PandocMonad m => FilePath -> m FilePath
+checkExistence fn = do
+ exists <- fileExists fn
+ if exists
+ then return fn
+ else throwError $ PandocCouldNotFindDataFileError $ T.pack fn
+#endif
+
+-- | Canonicalizes a file path by removing redundant @.@ and @..@.
+makeCanonical :: FilePath -> FilePath
+makeCanonical = Posix.joinPath . transformPathParts . splitDirectories
+ where transformPathParts = reverse . foldl go []
+ go as "." = as
+ go (_:as) ".." = as
+ go as x = x : as
+
+-- | Trys to run an action on a file: for each directory given, a
+-- filepath is created from the given filename, and the action is run on
+-- that filepath. Returns the result of the first successful execution
+-- of the action, or throws a @PandocResourceNotFound@ exception if the
+-- action errors for all filepaths.
+withPaths :: PandocMonad m => [FilePath] -> (FilePath -> m a) -> FilePath -> m a
+withPaths [] _ fp = throwError $ PandocResourceNotFound $ T.pack fp
+withPaths (p:ps) action fp =
+ catchError (action (p </> fp))
+ (\_ -> withPaths ps action fp)
+
+-- | Fetch local or remote resource (like an image) and provide data suitable
+-- for adding it to the MediaBag.
+fetchMediaResource :: PandocMonad m
+ => T.Text -> m (FilePath, Maybe MimeType, BL.ByteString)
+fetchMediaResource src = do
+ (bs, mt) <- downloadOrRead src
+ let ext = fromMaybe (T.pack $ takeExtension $ T.unpack src)
+ (mt >>= extensionFromMimeType)
+ let bs' = BL.fromChunks [bs]
+ let basename = showDigest $ sha1 bs'
+ let fname = basename <.> T.unpack ext
+ return (fname, mt, bs')
+
+-- | Traverse tree, filling media bag for any images that
+-- aren't already in the media bag.
+fillMediaBag :: PandocMonad m => Pandoc -> m Pandoc
+fillMediaBag d = walkM handleImage d
+ where handleImage :: PandocMonad m => Inline -> m Inline
+ handleImage (Image attr lab (src, tit)) = catchError
+ (do mediabag <- getMediaBag
+ case lookupMedia (T.unpack src) mediabag of
+ Just (_, _) -> return $ Image attr lab (src, tit)
+ Nothing -> do
+ (fname, mt, bs) <- fetchMediaResource src
+ insertMedia fname mt bs
+ return $ Image attr lab (T.pack fname, tit))
+ (\e ->
+ case e of
+ PandocResourceNotFound _ -> do
+ report $ CouldNotFetchResource src
+ "replacing image with description"
+ -- emit alt text
+ return $ Span ("",["image"],[]) lab
+ PandocHttpError u er -> do
+ report $ CouldNotFetchResource u
+ (T.pack $ show er ++ "\rReplacing image with description.")
+ -- emit alt text
+ return $ Span ("",["image"],[]) lab
+ _ -> throwError e)
+ handleImage x = return x
+
+-- This requires UndecidableInstances. We could avoid that
+-- by repeating the definitions below for every monad transformer
+-- we use: ReaderT, WriterT, StateT, RWST. But this seems to
+-- be harmless.
+instance (MonadTrans t, PandocMonad m, Functor (t m),
+ MonadError PandocError (t m), Monad (t m),
+ Applicative (t m)) => PandocMonad (t m) where
+ lookupEnv = lift . lookupEnv
+ getCurrentTime = lift getCurrentTime
+ getCurrentTimeZone = lift getCurrentTimeZone
+ newStdGen = lift newStdGen
+ newUniqueHash = lift newUniqueHash
+ openURL = lift . openURL
+ readFileLazy = lift . readFileLazy
+ readFileStrict = lift . readFileStrict
+ glob = lift . glob
+ fileExists = lift . fileExists
+ getDataFileName = lift . getDataFileName
+ getModificationTime = lift . getModificationTime
+ getCommonState = lift getCommonState
+ putCommonState = lift . putCommonState
+ logOutput = lift . logOutput
+
+instance {-# OVERLAPS #-} PandocMonad m => PandocMonad (ParsecT s st m) where
+ lookupEnv = lift . lookupEnv
+ getCurrentTime = lift getCurrentTime
+ getCurrentTimeZone = lift getCurrentTimeZone
+ newStdGen = lift newStdGen
+ newUniqueHash = lift newUniqueHash
+ openURL = lift . openURL
+ readFileLazy = lift . readFileLazy
+ readFileStrict = lift . readFileStrict
+ glob = lift . glob
+ fileExists = lift . fileExists
+ getDataFileName = lift . getDataFileName
+ getModificationTime = lift . getModificationTime
+ getCommonState = lift getCommonState
+ putCommonState = lift . putCommonState
+ trace msg = do
+ tracing <- getsCommonState stTrace
+ when tracing $ do
+ pos <- getPosition
+ Debug.Trace.trace
+ ("[trace] Parsed " ++ T.unpack msg ++ " at line " ++
+ show (sourceLine pos) ++
+ if sourceName pos == "chunk"
+ then " of chunk"
+ else "")
+ (return ())
+ logOutput = lift . logOutput