diff options
author | Albert Krewinkel <albert@zeitkraut.de> | 2020-03-15 04:49:36 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-03-14 20:49:36 -0700 |
commit | ec49643d647080218bedfaf61b3eb9b9383e42c0 (patch) | |
tree | db0093d2ad97010e6c4fd92c3295b2873733a696 /src/Text/Pandoc/Class | |
parent | 11b5f1e40b4c1e4ddbfb93a8d75bfd55ef52a6df (diff) | |
download | pandoc-ec49643d647080218bedfaf61b3eb9b9383e42c0.tar.gz |
Subdivide Text.Pandoc.Class into small modules (#6106)
* Extract CommonState into submodule
* Extract PandocMonad into submodule
* PandocMonad: ensure all functions have Haddock documentation
Diffstat (limited to 'src/Text/Pandoc/Class')
-rw-r--r-- | src/Text/Pandoc/Class/CommonState.hs | 80 | ||||
-rw-r--r-- | src/Text/Pandoc/Class/PandocMonad.hs | 706 |
2 files changed, 786 insertions, 0 deletions
diff --git a/src/Text/Pandoc/Class/CommonState.hs b/src/Text/Pandoc/Class/CommonState.hs new file mode 100644 index 000000000..4a0f66859 --- /dev/null +++ b/src/Text/Pandoc/Class/CommonState.hs @@ -0,0 +1,80 @@ +{- | +Module : Text.Pandoc.Class.CommonState +Copyright : Copyright (C) 2016-2020 Jesse Rosenthal, John MacFarlane +License : GNU GPL, version 2 or above + +Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu> +Stability : alpha +Portability : portable + +Common state shared by all pandoc-specific operations, including +those in readers, writers, and Lua filters. +-} + +module Text.Pandoc.Class.CommonState + ( CommonState(..) + , defaultCommonState + ) +where + +import Data.Default (Default (def)) +import Data.Text (Text) +import Text.Pandoc.BCP47 (Lang) +import Text.Pandoc.MediaBag (MediaBag) +import Text.Pandoc.Logging (LogMessage, Verbosity (WARNING)) +import Text.Pandoc.Translations (Translations) + +-- | 'CommonState' represents state that is used by all +-- instances of 'PandocMonad'. Normally users should not +-- need to interact with it directly; instead, auxiliary +-- functions like 'setVerbosity' and 'withMediaBag' should be used. +data CommonState = CommonState + { stLog :: [LogMessage] + -- ^ A list of log messages in reverse order + , stUserDataDir :: Maybe FilePath + -- ^ Directory to search for data files + , stSourceURL :: Maybe Text + -- ^ Absolute URL + dir of 1st source file + , stRequestHeaders :: [(Text, Text)] + -- ^ Headers to add for HTTP requests + , stMediaBag :: MediaBag + -- ^ Media parsed from binary containers + , stTranslations :: Maybe (Lang, Maybe Translations) + -- ^ Translations for localization + , stInputFiles :: [FilePath] + -- ^ List of input files from command line + , stOutputFile :: Maybe FilePath + -- ^ Output file from command line + , stResourcePath :: [FilePath] + -- ^ Path to search for resources like + -- included images + , stVerbosity :: Verbosity + -- ^ Verbosity level + , stTrace :: Bool + -- ^ Controls whether tracing messages are + -- issued. + } + +-- | The default @'CommonState'@. All fields are initialized as the +-- monoid identity of their resprective type, except for: +-- +-- * @'stResourcePath'@, which is set to @["."]@, +-- * @'stTrace'@, which is set to @'False'@, and +-- * @'stVerbosity'@, which is set to @WARNING@. +defaultCommonState :: CommonState +defaultCommonState = CommonState + { stLog = [] + , stUserDataDir = Nothing + , stSourceURL = Nothing + , stRequestHeaders = [] + , stMediaBag = mempty + , stTranslations = Nothing + , stInputFiles = [] + , stOutputFile = Nothing + , stResourcePath = ["."] + , stVerbosity = WARNING + , stTrace = False + } + +instance Default CommonState where + def = defaultCommonState diff --git a/src/Text/Pandoc/Class/PandocMonad.hs b/src/Text/Pandoc/Class/PandocMonad.hs new file mode 100644 index 000000000..7f31eec96 --- /dev/null +++ b/src/Text/Pandoc/Class/PandocMonad.hs @@ -0,0 +1,706 @@ +{-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE CPP #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE MultiParamTypeClasses #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE TypeSynonymInstances #-} +{-# LANGUAGE UndecidableInstances #-} +{-# LANGUAGE ViewPatterns #-} +#if MIN_VERSION_base(4,8,0) +#else +{-# LANGUAGE OverlappingInstances #-} +#endif +{- | +Module : Text.Pandoc.Class.PandocMonad +Copyright : Copyright (C) 2016-20 Jesse Rosenthal, John MacFarlane +License : GNU GPL, version 2 or above + +Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu> +Stability : alpha +Portability : portable + +This module defines a type class, 'PandocMonad', for pandoc readers +and writers. +-} + +module Text.Pandoc.Class.PandocMonad + ( PandocMonad(..) + , getPOSIXTime + , getZonedTime + , readFileFromDirs + , report + , setTrace + , setRequestHeader + , getLog + , setVerbosity + , getVerbosity + , getMediaBag + , setMediaBag + , insertMedia + , setUserDataDir + , getUserDataDir + , fetchItem + , fetchMediaResource + , getInputFiles + , setInputFiles + , getOutputFile + , setOutputFile + , setResourcePath + , getResourcePath + , readDefaultDataFile + , readDataFile + , fillMediaBag + , toLang + , setTranslations + , translateTerm + , makeCanonical + ) where + +import Prelude +import Codec.Archive.Zip +import Control.Monad.Except (MonadError (catchError, throwError), + MonadTrans, lift, when) +import Data.Digest.Pure.SHA (sha1, showDigest) +import Data.Maybe (fromMaybe) +import Data.Time (UTCTime) +import Data.Time.Clock.POSIX (POSIXTime, utcTimeToPOSIXSeconds) +import Data.Time.LocalTime (TimeZone, ZonedTime, utcToZonedTime) +import Network.URI ( escapeURIString, nonStrictRelativeTo, + unEscapeString, parseURIReference, isAllowedInURI, + parseURI, URI(..) ) +import System.FilePath ((</>), (<.>), takeExtension, dropExtension, + isRelative, splitDirectories) +import System.Random (StdGen) +import Text.Pandoc.BCP47 (Lang(..), parseBCP47, renderLang) +import Text.Pandoc.Class.CommonState (CommonState (..)) +import Text.Pandoc.Definition +import Text.Pandoc.Error +import Text.Pandoc.Logging +import Text.Pandoc.MIME (MimeType, getMimeType, extensionFromMimeType) +import Text.Pandoc.MediaBag (MediaBag, lookupMedia) +import Text.Pandoc.Shared (uriPathToPath) +import Text.Pandoc.Translations (Term(..), Translations, lookupTerm, + readTranslations) +import Text.Pandoc.Walk (walkM) +import Text.Parsec (ParsecT, getPosition, sourceLine, sourceName) +import qualified Data.ByteString as B +import qualified Data.ByteString.Lazy as BL +import qualified Data.Text as T +import qualified Debug.Trace +import qualified System.FilePath.Posix as Posix +import qualified Text.Pandoc.MediaBag as MB +import qualified Text.Pandoc.UTF8 as UTF8 +#ifdef EMBED_DATA_FILES +import Text.Pandoc.Data (dataFiles) +#endif + +-- | The PandocMonad typeclass contains all the potentially +-- IO-related functions used in pandoc's readers and writers. +-- Instances of this typeclass may implement these functions +-- in IO (as in 'PandocIO') or using an internal state that +-- represents a file system, time, and so on (as in 'PandocPure'). +class (Functor m, Applicative m, Monad m, MonadError PandocError m) + => PandocMonad m where + -- | Lookup an environment variable. + lookupEnv :: T.Text -> m (Maybe T.Text) + -- | Get the current (UTC) time. + getCurrentTime :: m UTCTime + -- | Get the locale's time zone. + getCurrentTimeZone :: m TimeZone + -- | Return a new generator for random numbers. + newStdGen :: m StdGen + -- | Return a new unique integer. + newUniqueHash :: m Int + -- | Retrieve contents and mime type from a URL, raising + -- an error on failure. + openURL :: T.Text -> m (B.ByteString, Maybe MimeType) + -- | Read the lazy ByteString contents from a file path, + -- raising an error on failure. + readFileLazy :: FilePath -> m BL.ByteString + -- | Read the strict ByteString contents from a file path, + -- raising an error on failure. + readFileStrict :: FilePath -> m B.ByteString + -- | Return a list of paths that match a glob, relative to + -- the working directory. See 'System.FilePath.Glob' for + -- the glob syntax. + glob :: String -> m [FilePath] + -- | Returns True if file exists. + fileExists :: FilePath -> m Bool + -- | Returns the path of data file. + getDataFileName :: FilePath -> m FilePath + -- | Return the modification time of a file. + getModificationTime :: FilePath -> m UTCTime + -- | Get the value of the 'CommonState' used by all instances + -- of 'PandocMonad'. + getCommonState :: m CommonState + -- | Set the value of the 'CommonState' used by all instances + -- of 'PandocMonad'. + -- | Get the value of a specific field of 'CommonState'. + putCommonState :: CommonState -> m () + -- | Get the value of a specific field of 'CommonState'. + getsCommonState :: (CommonState -> a) -> m a + getsCommonState f = f <$> getCommonState + -- | Modify the 'CommonState'. + modifyCommonState :: (CommonState -> CommonState) -> m () + modifyCommonState f = getCommonState >>= putCommonState . f + -- | Output a log message. + logOutput :: LogMessage -> m () + -- | Output a debug message to sterr, using 'Debug.Trace.trace', + -- if tracing is enabled. Note: this writes to stderr even in + -- pure instances. + trace :: T.Text -> m () + trace msg = do + tracing <- getsCommonState stTrace + when tracing $ Debug.Trace.trace ("[trace] " ++ T.unpack msg) (return ()) + +-- * Functions defined for all PandocMonad instances + +-- | Set the verbosity level. +setVerbosity :: PandocMonad m => Verbosity -> m () +setVerbosity verbosity = + modifyCommonState $ \st -> st{ stVerbosity = verbosity } + +-- | Get the verbosity level. +getVerbosity :: PandocMonad m => m Verbosity +getVerbosity = getsCommonState stVerbosity + +-- | Get the accomulated log messages (in temporal order). +getLog :: PandocMonad m => m [LogMessage] +getLog = reverse <$> getsCommonState stLog + +-- | Log a message using 'logOutput'. Note that 'logOutput' is +-- called only if the verbosity level exceeds the level of the +-- message, but the message is added to the list of log messages +-- that will be retrieved by 'getLog' regardless of its verbosity level. +report :: PandocMonad m => LogMessage -> m () +report msg = do + verbosity <- getsCommonState stVerbosity + let level = messageVerbosity msg + when (level <= verbosity) $ logOutput msg + modifyCommonState $ \st -> st{ stLog = msg : stLog st } + +-- | Determine whether tracing is enabled. This affects +-- the behavior of 'trace'. If tracing is not enabled, +-- 'trace' does nothing. +setTrace :: PandocMonad m => Bool -> m () +setTrace useTracing = modifyCommonState $ \st -> st{stTrace = useTracing} + +-- | Set request header to use in HTTP requests. +setRequestHeader :: PandocMonad m + => T.Text -- ^ Header name + -> T.Text -- ^ Value + -> m () +setRequestHeader name val = modifyCommonState $ \st -> + st{ stRequestHeaders = + (name, val) : filter (\(n,_) -> n /= name) (stRequestHeaders st) } + +-- | Initialize the media bag. +setMediaBag :: PandocMonad m => MediaBag -> m () +setMediaBag mb = modifyCommonState $ \st -> st{stMediaBag = mb} + +-- | Retrieve the media bag. +getMediaBag :: PandocMonad m => m MediaBag +getMediaBag = getsCommonState stMediaBag + +-- | Insert an item into the media bag. +insertMedia :: PandocMonad m => FilePath -> Maybe MimeType -> BL.ByteString -> m () +insertMedia fp mime bs = do + mb <- getMediaBag + let mb' = MB.insertMedia fp mime bs mb + setMediaBag mb' + +-- | Retrieve the input filenames. +getInputFiles :: PandocMonad m => m [FilePath] +getInputFiles = getsCommonState stInputFiles + +-- | Set the input filenames. +setInputFiles :: PandocMonad m => [FilePath] -> m () +setInputFiles fs = do + let sourceURL = case fs of + [] -> Nothing + (x:_) -> case parseURI x of + Just u + | uriScheme u `elem` ["http:","https:"] -> + Just $ show u{ uriQuery = "", + uriFragment = "" } + _ -> Nothing + + modifyCommonState $ \st -> st{ stInputFiles = fs + , stSourceURL = T.pack <$> sourceURL } + +-- | Retrieve the output filename. +getOutputFile :: PandocMonad m => m (Maybe FilePath) +getOutputFile = getsCommonState stOutputFile + +-- | Set the output filename. +setOutputFile :: PandocMonad m => Maybe FilePath -> m () +setOutputFile mbf = modifyCommonState $ \st -> st{ stOutputFile = mbf } + +-- | Retrieve the resource path searched by 'fetchItem'. +getResourcePath :: PandocMonad m => m [FilePath] +getResourcePath = getsCommonState stResourcePath + +-- | Set the resource path searched by 'fetchItem'. +setResourcePath :: PandocMonad m => [FilePath] -> m () +setResourcePath ps = modifyCommonState $ \st -> st{stResourcePath = ps} + +-- | Get the POSIX time. +getPOSIXTime :: PandocMonad m => m POSIXTime +getPOSIXTime = utcTimeToPOSIXSeconds <$> getCurrentTime + +-- | Get the zoned time. +getZonedTime :: PandocMonad m => m ZonedTime +getZonedTime = do + t <- getCurrentTime + tz <- getCurrentTimeZone + return $ utcToZonedTime tz t + +-- | Read file, checking in any number of directories. +readFileFromDirs :: PandocMonad m => [FilePath] -> FilePath -> m (Maybe T.Text) +readFileFromDirs [] _ = return Nothing +readFileFromDirs (d:ds) f = catchError + (Just . T.pack . UTF8.toStringLazy <$> readFileLazy (d </> f)) + (\_ -> readFileFromDirs ds f) + +-- | Convert BCP47 string to a Lang, issuing warning +-- if there are problems. +toLang :: PandocMonad m => Maybe T.Text -> m (Maybe Lang) +toLang Nothing = return Nothing +toLang (Just s) = + case parseBCP47 s of + Left _ -> do + report $ InvalidLang s + return Nothing + Right l -> return (Just l) + +-- | Select the language to use with 'translateTerm'. +-- Note that this does not read a translation file; +-- that is only done the first time 'translateTerm' is +-- used. +setTranslations :: PandocMonad m => Lang -> m () +setTranslations lang = + modifyCommonState $ \st -> st{ stTranslations = Just (lang, Nothing) } + +-- | Load term map. +getTranslations :: PandocMonad m => m Translations +getTranslations = do + mbtrans <- getsCommonState stTranslations + case mbtrans of + Nothing -> return mempty -- no language defined + Just (_, Just t) -> return t + Just (lang, Nothing) -> do -- read from file + let translationFile = "translations/" <> renderLang lang <> ".yaml" + let fallbackFile = "translations/" <> langLanguage lang <> ".yaml" + let getTrans fp = do + bs <- readDataFile fp + case readTranslations (UTF8.toText bs) of + Left e -> do + report $ CouldNotLoadTranslations (renderLang lang) + (T.pack fp <> ": " <> e) + -- make sure we don't try again... + modifyCommonState $ \st -> + st{ stTranslations = Nothing } + return mempty + Right t -> do + modifyCommonState $ \st -> + st{ stTranslations = Just (lang, Just t) } + return t + catchError (getTrans $ T.unpack translationFile) + (\_ -> + catchError (getTrans $ T.unpack fallbackFile) + (\e -> do + report $ CouldNotLoadTranslations (renderLang lang) + $ case e of + PandocCouldNotFindDataFileError _ -> + "data file " <> fallbackFile <> " not found" + _ -> "" + -- make sure we don't try again... + modifyCommonState $ \st -> st{ stTranslations = Nothing } + return mempty)) + +-- | Get a translation from the current term map. +-- Issue a warning if the term is not defined. +translateTerm :: PandocMonad m => Term -> m T.Text +translateTerm term = do + translations <- getTranslations + case lookupTerm term translations of + Just s -> return s + Nothing -> do + report $ NoTranslation $ T.pack $ show term + return "" + +-- | Specialized version of parseURIReference that disallows +-- single-letter schemes. Reason: these are usually windows absolute +-- paths. +parseURIReference' :: T.Text -> Maybe URI +parseURIReference' s = do + u <- parseURIReference (T.unpack s) + case uriScheme u of + [_] -> Nothing + _ -> Just u + +-- | Set the user data directory in common state. +setUserDataDir :: PandocMonad m + => Maybe FilePath + -> m () +setUserDataDir mbfp = modifyCommonState $ \st -> st{ stUserDataDir = mbfp } + +-- | Get the user data directory from common state. +getUserDataDir :: PandocMonad m + => m (Maybe FilePath) +getUserDataDir = getsCommonState stUserDataDir + +-- | Fetch an image or other item from the local filesystem or the net. +-- Returns raw content and maybe mime type. +fetchItem :: PandocMonad m + => T.Text + -> m (B.ByteString, Maybe MimeType) +fetchItem s = do + mediabag <- getMediaBag + case lookupMedia (T.unpack s) mediabag of + Just (mime, bs) -> return (BL.toStrict bs, Just mime) + Nothing -> downloadOrRead s + +-- | Returns the content and, if available, the MIME type of a resource. +-- If the given resource location is a valid URI, then download the +-- resource from that URI. Otherwise, treat the resource identifier as a +-- local file name. +-- +-- Note that resources are treated relative to the URL of the first +-- input source, if any. +downloadOrRead :: PandocMonad m + => T.Text + -> m (B.ByteString, Maybe MimeType) +downloadOrRead s = do + sourceURL <- getsCommonState stSourceURL + case (sourceURL >>= parseURIReference' . + ensureEscaped, ensureEscaped s) of + (Just u, s') -> -- try fetching from relative path at source + case parseURIReference' s' of + Just u' -> openURL $ T.pack $ show $ u' `nonStrictRelativeTo` u + Nothing -> openURL s' -- will throw error + (Nothing, s'@(T.unpack -> ('/':'/':c:_))) | c /= '?' -> -- protocol-relative URI + -- we exclude //? because of //?UNC/ on Windows + case parseURIReference' s' of + Just u' -> openURL $ T.pack $ show $ u' `nonStrictRelativeTo` httpcolon + Nothing -> openURL s' -- will throw error + (Nothing, s') -> + case parseURI (T.unpack s') of -- requires absolute URI + Just u' | uriScheme u' == "file:" -> + readLocalFile $ uriPathToPath (T.pack $ uriPath u') + -- We don't want to treat C:/ as a scheme: + Just u' | length (uriScheme u') > 2 -> openURL (T.pack $ show u') + _ -> readLocalFile fp -- get from local file system + where readLocalFile f = do + resourcePath <- getResourcePath + cont <- if isRelative f + then withPaths resourcePath readFileStrict f + else readFileStrict f + return (cont, mime) + httpcolon = URI{ uriScheme = "http:", + uriAuthority = Nothing, + uriPath = "", + uriQuery = "", + uriFragment = "" } + dropFragmentAndQuery = T.takeWhile (\c -> c /= '?' && c /= '#') + fp = unEscapeString $ T.unpack $ dropFragmentAndQuery s + mime = getMimeType $ case takeExtension fp of + ".gz" -> dropExtension fp + ".svgz" -> dropExtension fp ++ ".svg" + x -> x + ensureEscaped = T.pack . escapeURIString isAllowedInURI . T.unpack . T.map convertSlash + convertSlash '\\' = '/' + convertSlash x = x + +-- | Retrieve default reference.docx. +getDefaultReferenceDocx :: PandocMonad m => m Archive +getDefaultReferenceDocx = do + let paths = ["[Content_Types].xml", + "_rels/.rels", + "docProps/app.xml", + "docProps/core.xml", + "docProps/custom.xml", + "word/document.xml", + "word/fontTable.xml", + "word/footnotes.xml", + "word/comments.xml", + "word/numbering.xml", + "word/settings.xml", + "word/webSettings.xml", + "word/styles.xml", + "word/_rels/document.xml.rels", + "word/_rels/footnotes.xml.rels", + "word/theme/theme1.xml"] + let toLazy = BL.fromChunks . (:[]) + let pathToEntry path = do + epochtime <- (floor . utcTimeToPOSIXSeconds) <$> getCurrentTime + contents <- toLazy <$> readDataFile ("docx/" ++ path) + return $ toEntry path epochtime contents + datadir <- getUserDataDir + mbArchive <- case datadir of + Nothing -> return Nothing + Just d -> do + exists <- fileExists (d </> "reference.docx") + if exists + then return (Just (d </> "reference.docx")) + else return Nothing + case mbArchive of + Just arch -> toArchive <$> readFileLazy arch + Nothing -> foldr addEntryToArchive emptyArchive <$> + mapM pathToEntry paths + +-- | Retrieve default reference.odt. +getDefaultReferenceODT :: PandocMonad m => m Archive +getDefaultReferenceODT = do + let paths = ["mimetype", + "manifest.rdf", + "styles.xml", + "content.xml", + "meta.xml", + "settings.xml", + "Configurations2/accelerator/current.xml", + "Thumbnails/thumbnail.png", + "META-INF/manifest.xml"] + let pathToEntry path = do epochtime <- floor `fmap` getPOSIXTime + contents <- (BL.fromChunks . (:[])) `fmap` + readDataFile ("odt/" ++ path) + return $ toEntry path epochtime contents + datadir <- getUserDataDir + mbArchive <- case datadir of + Nothing -> return Nothing + Just d -> do + exists <- fileExists (d </> "reference.odt") + if exists + then return (Just (d </> "reference.odt")) + else return Nothing + case mbArchive of + Just arch -> toArchive <$> readFileLazy arch + Nothing -> foldr addEntryToArchive emptyArchive <$> + mapM pathToEntry paths + +-- | Retrieve default reference.pptx. +getDefaultReferencePptx :: PandocMonad m => m Archive +getDefaultReferencePptx = do + -- We're going to narrow this down substantially once we get it + -- working. + let paths = [ "[Content_Types].xml" + , "_rels/.rels" + , "docProps/app.xml" + , "docProps/core.xml" + , "ppt/_rels/presentation.xml.rels" + , "ppt/presProps.xml" + , "ppt/presentation.xml" + , "ppt/slideLayouts/_rels/slideLayout1.xml.rels" + , "ppt/slideLayouts/_rels/slideLayout2.xml.rels" + , "ppt/slideLayouts/_rels/slideLayout3.xml.rels" + , "ppt/slideLayouts/_rels/slideLayout4.xml.rels" + , "ppt/slideLayouts/_rels/slideLayout5.xml.rels" + , "ppt/slideLayouts/_rels/slideLayout6.xml.rels" + , "ppt/slideLayouts/_rels/slideLayout7.xml.rels" + , "ppt/slideLayouts/_rels/slideLayout8.xml.rels" + , "ppt/slideLayouts/_rels/slideLayout9.xml.rels" + , "ppt/slideLayouts/_rels/slideLayout10.xml.rels" + , "ppt/slideLayouts/_rels/slideLayout11.xml.rels" + , "ppt/slideLayouts/slideLayout1.xml" + , "ppt/slideLayouts/slideLayout10.xml" + , "ppt/slideLayouts/slideLayout11.xml" + , "ppt/slideLayouts/slideLayout2.xml" + , "ppt/slideLayouts/slideLayout3.xml" + , "ppt/slideLayouts/slideLayout4.xml" + , "ppt/slideLayouts/slideLayout5.xml" + , "ppt/slideLayouts/slideLayout6.xml" + , "ppt/slideLayouts/slideLayout7.xml" + , "ppt/slideLayouts/slideLayout8.xml" + , "ppt/slideLayouts/slideLayout9.xml" + , "ppt/slideMasters/_rels/slideMaster1.xml.rels" + , "ppt/slideMasters/slideMaster1.xml" + , "ppt/slides/_rels/slide1.xml.rels" + , "ppt/slides/slide1.xml" + , "ppt/slides/_rels/slide2.xml.rels" + , "ppt/slides/slide2.xml" + , "ppt/slides/_rels/slide3.xml.rels" + , "ppt/slides/slide3.xml" + , "ppt/slides/_rels/slide4.xml.rels" + , "ppt/slides/slide4.xml" + , "ppt/tableStyles.xml" + , "ppt/theme/theme1.xml" + , "ppt/viewProps.xml" + -- These relate to notes slides. + , "ppt/notesMasters/notesMaster1.xml" + , "ppt/notesMasters/_rels/notesMaster1.xml.rels" + , "ppt/notesSlides/notesSlide1.xml" + , "ppt/notesSlides/_rels/notesSlide1.xml.rels" + , "ppt/notesSlides/notesSlide2.xml" + , "ppt/notesSlides/_rels/notesSlide2.xml.rels" + , "ppt/theme/theme2.xml" + ] + let toLazy = BL.fromChunks . (:[]) + let pathToEntry path = do + epochtime <- (floor . utcTimeToPOSIXSeconds) <$> getCurrentTime + contents <- toLazy <$> readDataFile ("pptx/" ++ path) + return $ toEntry path epochtime contents + datadir <- getUserDataDir + mbArchive <- case datadir of + Nothing -> return Nothing + Just d -> do + exists <- fileExists (d </> "reference.pptx") + if exists + then return (Just (d </> "reference.pptx")) + else return Nothing + case mbArchive of + Just arch -> toArchive <$> readFileLazy arch + Nothing -> foldr addEntryToArchive emptyArchive <$> + mapM pathToEntry paths + +-- | Read file from user data directory or, +-- if not found there, from Cabal data directory. +readDataFile :: PandocMonad m => FilePath -> m B.ByteString +readDataFile fname = do + datadir <- getUserDataDir + case datadir of + Nothing -> readDefaultDataFile fname + Just userDir -> do + exists <- fileExists (userDir </> fname) + if exists + then readFileStrict (userDir </> fname) + else readDefaultDataFile fname + +-- | Read file from from Cabal data directory. +readDefaultDataFile :: PandocMonad m => FilePath -> m B.ByteString +readDefaultDataFile "reference.docx" = + (B.concat . BL.toChunks . fromArchive) <$> getDefaultReferenceDocx +readDefaultDataFile "reference.pptx" = + (B.concat . BL.toChunks . fromArchive) <$> getDefaultReferencePptx +readDefaultDataFile "reference.odt" = + (B.concat . BL.toChunks . fromArchive) <$> getDefaultReferenceODT +readDefaultDataFile fname = +#ifdef EMBED_DATA_FILES + case lookup (makeCanonical fname) dataFiles of + Nothing -> throwError $ PandocCouldNotFindDataFileError $ T.pack fname + Just contents -> return contents +#else + getDataFileName fname' >>= checkExistence >>= readFileStrict + where fname' = if fname == "MANUAL.txt" then fname else "data" </> fname + +-- | Returns the input filename unchanged if the file exits, and throws +-- a `PandocCouldNotFindDataFileError` if it doesn't. +checkExistence :: PandocMonad m => FilePath -> m FilePath +checkExistence fn = do + exists <- fileExists fn + if exists + then return fn + else throwError $ PandocCouldNotFindDataFileError $ T.pack fn +#endif + +-- | Canonicalizes a file path by removing redundant @.@ and @..@. +makeCanonical :: FilePath -> FilePath +makeCanonical = Posix.joinPath . transformPathParts . splitDirectories + where transformPathParts = reverse . foldl go [] + go as "." = as + go (_:as) ".." = as + go as x = x : as + +-- | Trys to run an action on a file: for each directory given, a +-- filepath is created from the given filename, and the action is run on +-- that filepath. Returns the result of the first successful execution +-- of the action, or throws a @PandocResourceNotFound@ exception if the +-- action errors for all filepaths. +withPaths :: PandocMonad m => [FilePath] -> (FilePath -> m a) -> FilePath -> m a +withPaths [] _ fp = throwError $ PandocResourceNotFound $ T.pack fp +withPaths (p:ps) action fp = + catchError (action (p </> fp)) + (\_ -> withPaths ps action fp) + +-- | Fetch local or remote resource (like an image) and provide data suitable +-- for adding it to the MediaBag. +fetchMediaResource :: PandocMonad m + => T.Text -> m (FilePath, Maybe MimeType, BL.ByteString) +fetchMediaResource src = do + (bs, mt) <- downloadOrRead src + let ext = fromMaybe (T.pack $ takeExtension $ T.unpack src) + (mt >>= extensionFromMimeType) + let bs' = BL.fromChunks [bs] + let basename = showDigest $ sha1 bs' + let fname = basename <.> T.unpack ext + return (fname, mt, bs') + +-- | Traverse tree, filling media bag for any images that +-- aren't already in the media bag. +fillMediaBag :: PandocMonad m => Pandoc -> m Pandoc +fillMediaBag d = walkM handleImage d + where handleImage :: PandocMonad m => Inline -> m Inline + handleImage (Image attr lab (src, tit)) = catchError + (do mediabag <- getMediaBag + case lookupMedia (T.unpack src) mediabag of + Just (_, _) -> return $ Image attr lab (src, tit) + Nothing -> do + (fname, mt, bs) <- fetchMediaResource src + insertMedia fname mt bs + return $ Image attr lab (T.pack fname, tit)) + (\e -> + case e of + PandocResourceNotFound _ -> do + report $ CouldNotFetchResource src + "replacing image with description" + -- emit alt text + return $ Span ("",["image"],[]) lab + PandocHttpError u er -> do + report $ CouldNotFetchResource u + (T.pack $ show er ++ "\rReplacing image with description.") + -- emit alt text + return $ Span ("",["image"],[]) lab + _ -> throwError e) + handleImage x = return x + +-- This requires UndecidableInstances. We could avoid that +-- by repeating the definitions below for every monad transformer +-- we use: ReaderT, WriterT, StateT, RWST. But this seems to +-- be harmless. +instance (MonadTrans t, PandocMonad m, Functor (t m), + MonadError PandocError (t m), Monad (t m), + Applicative (t m)) => PandocMonad (t m) where + lookupEnv = lift . lookupEnv + getCurrentTime = lift getCurrentTime + getCurrentTimeZone = lift getCurrentTimeZone + newStdGen = lift newStdGen + newUniqueHash = lift newUniqueHash + openURL = lift . openURL + readFileLazy = lift . readFileLazy + readFileStrict = lift . readFileStrict + glob = lift . glob + fileExists = lift . fileExists + getDataFileName = lift . getDataFileName + getModificationTime = lift . getModificationTime + getCommonState = lift getCommonState + putCommonState = lift . putCommonState + logOutput = lift . logOutput + +instance {-# OVERLAPS #-} PandocMonad m => PandocMonad (ParsecT s st m) where + lookupEnv = lift . lookupEnv + getCurrentTime = lift getCurrentTime + getCurrentTimeZone = lift getCurrentTimeZone + newStdGen = lift newStdGen + newUniqueHash = lift newUniqueHash + openURL = lift . openURL + readFileLazy = lift . readFileLazy + readFileStrict = lift . readFileStrict + glob = lift . glob + fileExists = lift . fileExists + getDataFileName = lift . getDataFileName + getModificationTime = lift . getModificationTime + getCommonState = lift getCommonState + putCommonState = lift . putCommonState + trace msg = do + tracing <- getsCommonState stTrace + when tracing $ do + pos <- getPosition + Debug.Trace.trace + ("[trace] Parsed " ++ T.unpack msg ++ " at line " ++ + show (sourceLine pos) ++ + if sourceName pos == "chunk" + then " of chunk" + else "") + (return ()) + logOutput = lift . logOutput |