From ec49643d647080218bedfaf61b3eb9b9383e42c0 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Sun, 15 Mar 2020 04:49:36 +0100
Subject: Subdivide Text.Pandoc.Class into small modules (#6106)

* Extract CommonState into submodule

* Extract PandocMonad into submodule

* PandocMonad: ensure all functions have Haddock documentation
---
 src/Text/Pandoc/Class/CommonState.hs |  80 ++++
 src/Text/Pandoc/Class/PandocMonad.hs | 706 +++++++++++++++++++++++++++++++++++
 2 files changed, 786 insertions(+)
 create mode 100644 src/Text/Pandoc/Class/CommonState.hs
 create mode 100644 src/Text/Pandoc/Class/PandocMonad.hs

(limited to 'src/Text/Pandoc/Class')

diff --git a/src/Text/Pandoc/Class/CommonState.hs b/src/Text/Pandoc/Class/CommonState.hs
new file mode 100644
index 000000000..4a0f66859
--- /dev/null
+++ b/src/Text/Pandoc/Class/CommonState.hs
@@ -0,0 +1,80 @@
+{- |
+Module      : Text.Pandoc.Class.CommonState
+Copyright   : Copyright (C) 2016-2020 Jesse Rosenthal, John MacFarlane
+License     : GNU GPL, version 2 or above
+
+Maintainer  : Jesse Rosenthal <jrosenthal@jhu.edu>
+Stability   : alpha
+Portability : portable
+
+Common state shared by all pandoc-specific operations, including
+those in readers, writers, and Lua filters.
+-}
+
+module Text.Pandoc.Class.CommonState
+  ( CommonState(..)
+  , defaultCommonState
+  )
+where
+
+import Data.Default (Default (def))
+import Data.Text (Text)
+import Text.Pandoc.BCP47 (Lang)
+import Text.Pandoc.MediaBag (MediaBag)
+import Text.Pandoc.Logging (LogMessage, Verbosity (WARNING))
+import Text.Pandoc.Translations (Translations)
+
+-- | 'CommonState' represents state that is used by all
+-- instances of 'PandocMonad'.  Normally users should not
+-- need to interact with it directly; instead, auxiliary
+-- functions like 'setVerbosity' and 'withMediaBag' should be used.
+data CommonState = CommonState
+  { stLog          :: [LogMessage]
+    -- ^ A list of log messages in reverse order
+  , stUserDataDir  :: Maybe FilePath
+    -- ^ Directory to search for data files
+  , stSourceURL    :: Maybe Text
+    -- ^ Absolute URL + dir of 1st source file
+  , stRequestHeaders :: [(Text, Text)]
+    -- ^ Headers to add for HTTP requests
+  , stMediaBag     :: MediaBag
+    -- ^ Media parsed from binary containers
+  , stTranslations :: Maybe (Lang, Maybe Translations)
+    -- ^ Translations for localization
+  , stInputFiles   :: [FilePath]
+    -- ^ List of input files from command line
+  , stOutputFile   :: Maybe FilePath
+    -- ^ Output file from command line
+  , stResourcePath :: [FilePath]
+    -- ^ Path to search for resources like
+    -- included images
+  , stVerbosity    :: Verbosity
+    -- ^ Verbosity level
+  , stTrace        :: Bool
+    -- ^ Controls whether tracing messages are
+    -- issued.
+  }
+
+-- | The default @'CommonState'@. All fields are initialized as the
+-- monoid identity of their resprective type, except for:
+--
+--   * @'stResourcePath'@, which is set to @["."]@,
+--   * @'stTrace'@, which is set to @'False'@, and
+--   * @'stVerbosity'@, which is set to @WARNING@.
+defaultCommonState :: CommonState
+defaultCommonState = CommonState
+  { stLog = []
+  , stUserDataDir = Nothing
+  , stSourceURL = Nothing
+  , stRequestHeaders = []
+  , stMediaBag = mempty
+  , stTranslations = Nothing
+  , stInputFiles = []
+  , stOutputFile = Nothing
+  , stResourcePath = ["."]
+  , stVerbosity = WARNING
+  , stTrace = False
+  }
+
+instance Default CommonState where
+  def = defaultCommonState
diff --git a/src/Text/Pandoc/Class/PandocMonad.hs b/src/Text/Pandoc/Class/PandocMonad.hs
new file mode 100644
index 000000000..7f31eec96
--- /dev/null
+++ b/src/Text/Pandoc/Class/PandocMonad.hs
@@ -0,0 +1,706 @@
+{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE CPP #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE MultiParamTypeClasses #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE TypeSynonymInstances #-}
+{-# LANGUAGE UndecidableInstances #-}
+{-# LANGUAGE ViewPatterns #-}
+#if MIN_VERSION_base(4,8,0)
+#else
+{-# LANGUAGE OverlappingInstances #-}
+#endif
+{- |
+Module      : Text.Pandoc.Class.PandocMonad
+Copyright   : Copyright (C) 2016-20 Jesse Rosenthal, John MacFarlane
+License     : GNU GPL, version 2 or above
+
+Maintainer  : Jesse Rosenthal <jrosenthal@jhu.edu>
+Stability   : alpha
+Portability : portable
+
+This module defines a type class, 'PandocMonad', for pandoc readers
+and writers.
+-}
+
+module Text.Pandoc.Class.PandocMonad
+  ( PandocMonad(..)
+  , getPOSIXTime
+  , getZonedTime
+  , readFileFromDirs
+  , report
+  , setTrace
+  , setRequestHeader
+  , getLog
+  , setVerbosity
+  , getVerbosity
+  , getMediaBag
+  , setMediaBag
+  , insertMedia
+  , setUserDataDir
+  , getUserDataDir
+  , fetchItem
+  , fetchMediaResource
+  , getInputFiles
+  , setInputFiles
+  , getOutputFile
+  , setOutputFile
+  , setResourcePath
+  , getResourcePath
+  , readDefaultDataFile
+  , readDataFile
+  , fillMediaBag
+  , toLang
+  , setTranslations
+  , translateTerm
+  , makeCanonical
+  ) where
+
+import Prelude
+import Codec.Archive.Zip
+import Control.Monad.Except (MonadError (catchError, throwError),
+                             MonadTrans, lift, when)
+import Data.Digest.Pure.SHA (sha1, showDigest)
+import Data.Maybe (fromMaybe)
+import Data.Time (UTCTime)
+import Data.Time.Clock.POSIX (POSIXTime, utcTimeToPOSIXSeconds)
+import Data.Time.LocalTime (TimeZone, ZonedTime, utcToZonedTime)
+import Network.URI ( escapeURIString, nonStrictRelativeTo,
+                     unEscapeString, parseURIReference, isAllowedInURI,
+                     parseURI, URI(..) )
+import System.FilePath ((</>), (<.>), takeExtension, dropExtension,
+                        isRelative, splitDirectories)
+import System.Random (StdGen)
+import Text.Pandoc.BCP47 (Lang(..), parseBCP47, renderLang)
+import Text.Pandoc.Class.CommonState (CommonState (..))
+import Text.Pandoc.Definition
+import Text.Pandoc.Error
+import Text.Pandoc.Logging
+import Text.Pandoc.MIME (MimeType, getMimeType, extensionFromMimeType)
+import Text.Pandoc.MediaBag (MediaBag, lookupMedia)
+import Text.Pandoc.Shared (uriPathToPath)
+import Text.Pandoc.Translations (Term(..), Translations, lookupTerm,
+                                 readTranslations)
+import Text.Pandoc.Walk (walkM)
+import Text.Parsec (ParsecT, getPosition, sourceLine, sourceName)
+import qualified Data.ByteString as B
+import qualified Data.ByteString.Lazy as BL
+import qualified Data.Text as T
+import qualified Debug.Trace
+import qualified System.FilePath.Posix as Posix
+import qualified Text.Pandoc.MediaBag as MB
+import qualified Text.Pandoc.UTF8 as UTF8
+#ifdef EMBED_DATA_FILES
+import Text.Pandoc.Data (dataFiles)
+#endif
+
+-- | The PandocMonad typeclass contains all the potentially
+-- IO-related functions used in pandoc's readers and writers.
+-- Instances of this typeclass may implement these functions
+-- in IO (as in 'PandocIO') or using an internal state that
+-- represents a file system, time, and so on (as in 'PandocPure').
+class (Functor m, Applicative m, Monad m, MonadError PandocError m)
+      => PandocMonad m where
+  -- | Lookup an environment variable.
+  lookupEnv :: T.Text -> m (Maybe T.Text)
+  -- | Get the current (UTC) time.
+  getCurrentTime :: m UTCTime
+  -- | Get the locale's time zone.
+  getCurrentTimeZone :: m TimeZone
+  -- | Return a new generator for random numbers.
+  newStdGen :: m StdGen
+  -- | Return a new unique integer.
+  newUniqueHash :: m Int
+  -- | Retrieve contents and mime type from a URL, raising
+  -- an error on failure.
+  openURL :: T.Text -> m (B.ByteString, Maybe MimeType)
+  -- | Read the lazy ByteString contents from a file path,
+  -- raising an error on failure.
+  readFileLazy :: FilePath -> m BL.ByteString
+  -- | Read the strict ByteString contents from a file path,
+  -- raising an error on failure.
+  readFileStrict :: FilePath -> m B.ByteString
+  -- | Return a list of paths that match a glob, relative to
+  -- the working directory.  See 'System.FilePath.Glob' for
+  -- the glob syntax.
+  glob :: String -> m [FilePath]
+  -- | Returns True if file exists.
+  fileExists :: FilePath -> m Bool
+  -- | Returns the path of data file.
+  getDataFileName :: FilePath -> m FilePath
+  -- | Return the modification time of a file.
+  getModificationTime :: FilePath -> m UTCTime
+  -- | Get the value of the 'CommonState' used by all instances
+  -- of 'PandocMonad'.
+  getCommonState :: m CommonState
+  -- | Set the value of the 'CommonState' used by all instances
+  -- of 'PandocMonad'.
+  -- | Get the value of a specific field of 'CommonState'.
+  putCommonState :: CommonState -> m ()
+  -- | Get the value of a specific field of 'CommonState'.
+  getsCommonState :: (CommonState -> a) -> m a
+  getsCommonState f = f <$> getCommonState
+  -- | Modify the 'CommonState'.
+  modifyCommonState :: (CommonState -> CommonState) -> m ()
+  modifyCommonState f = getCommonState >>= putCommonState . f
+  -- | Output a log message.
+  logOutput :: LogMessage -> m ()
+  -- | Output a debug message to sterr, using 'Debug.Trace.trace',
+  -- if tracing is enabled.  Note: this writes to stderr even in
+  -- pure instances.
+  trace :: T.Text -> m ()
+  trace msg = do
+    tracing <- getsCommonState stTrace
+    when tracing $ Debug.Trace.trace ("[trace] " ++ T.unpack msg) (return ())
+
+-- * Functions defined for all PandocMonad instances
+
+-- | Set the verbosity level.
+setVerbosity :: PandocMonad m => Verbosity -> m ()
+setVerbosity verbosity =
+  modifyCommonState $ \st -> st{ stVerbosity = verbosity }
+
+-- | Get the verbosity level.
+getVerbosity :: PandocMonad m => m Verbosity
+getVerbosity = getsCommonState stVerbosity
+
+-- | Get the accomulated log messages (in temporal order).
+getLog :: PandocMonad m => m [LogMessage]
+getLog = reverse <$> getsCommonState stLog
+
+-- | Log a message using 'logOutput'.  Note that 'logOutput' is
+-- called only if the verbosity level exceeds the level of the
+-- message, but the message is added to the list of log messages
+-- that will be retrieved by 'getLog' regardless of its verbosity level.
+report :: PandocMonad m => LogMessage -> m ()
+report msg = do
+  verbosity <- getsCommonState stVerbosity
+  let level = messageVerbosity msg
+  when (level <= verbosity) $ logOutput msg
+  modifyCommonState $ \st -> st{ stLog = msg : stLog st }
+
+-- | Determine whether tracing is enabled.  This affects
+-- the behavior of 'trace'.  If tracing is not enabled,
+-- 'trace' does nothing.
+setTrace :: PandocMonad m => Bool -> m ()
+setTrace useTracing = modifyCommonState $ \st -> st{stTrace = useTracing}
+
+-- | Set request header to use in HTTP requests.
+setRequestHeader :: PandocMonad m
+                 => T.Text  -- ^ Header name
+                 -> T.Text  -- ^ Value
+                 -> m ()
+setRequestHeader name val = modifyCommonState $ \st ->
+  st{ stRequestHeaders =
+       (name, val) : filter (\(n,_) -> n /= name) (stRequestHeaders st)  }
+
+-- | Initialize the media bag.
+setMediaBag :: PandocMonad m => MediaBag -> m ()
+setMediaBag mb = modifyCommonState $ \st -> st{stMediaBag = mb}
+
+-- | Retrieve the media bag.
+getMediaBag :: PandocMonad m => m MediaBag
+getMediaBag = getsCommonState stMediaBag
+
+-- | Insert an item into the media bag.
+insertMedia :: PandocMonad m => FilePath -> Maybe MimeType -> BL.ByteString -> m ()
+insertMedia fp mime bs = do
+  mb <- getMediaBag
+  let mb' = MB.insertMedia fp mime bs mb
+  setMediaBag mb'
+
+-- | Retrieve the input filenames.
+getInputFiles :: PandocMonad m => m [FilePath]
+getInputFiles = getsCommonState stInputFiles
+
+-- | Set the input filenames.
+setInputFiles :: PandocMonad m => [FilePath] -> m ()
+setInputFiles fs = do
+  let sourceURL = case fs of
+                    []    -> Nothing
+                    (x:_) -> case parseURI x of
+                                Just u
+                                  | uriScheme u `elem` ["http:","https:"] ->
+                                      Just $ show u{ uriQuery = "",
+                                                     uriFragment = "" }
+                                _ -> Nothing
+
+  modifyCommonState $ \st -> st{ stInputFiles = fs
+                               , stSourceURL = T.pack <$> sourceURL }
+
+-- | Retrieve the output filename.
+getOutputFile :: PandocMonad m => m (Maybe FilePath)
+getOutputFile = getsCommonState stOutputFile
+
+-- | Set the output filename.
+setOutputFile :: PandocMonad m => Maybe FilePath -> m ()
+setOutputFile mbf = modifyCommonState $ \st -> st{ stOutputFile = mbf }
+
+-- | Retrieve the resource path searched by 'fetchItem'.
+getResourcePath :: PandocMonad m => m [FilePath]
+getResourcePath = getsCommonState stResourcePath
+
+-- | Set the resource path searched by 'fetchItem'.
+setResourcePath :: PandocMonad m => [FilePath] -> m ()
+setResourcePath ps = modifyCommonState $ \st -> st{stResourcePath = ps}
+
+-- | Get the POSIX time.
+getPOSIXTime :: PandocMonad m => m POSIXTime
+getPOSIXTime = utcTimeToPOSIXSeconds <$> getCurrentTime
+
+-- | Get the zoned time.
+getZonedTime :: PandocMonad m => m ZonedTime
+getZonedTime = do
+  t <- getCurrentTime
+  tz <- getCurrentTimeZone
+  return $ utcToZonedTime tz t
+
+-- | Read file, checking in any number of directories.
+readFileFromDirs :: PandocMonad m => [FilePath] -> FilePath -> m (Maybe T.Text)
+readFileFromDirs [] _ = return Nothing
+readFileFromDirs (d:ds) f = catchError
+    (Just . T.pack . UTF8.toStringLazy <$> readFileLazy (d </> f))
+    (\_ -> readFileFromDirs ds f)
+
+-- | Convert BCP47 string to a Lang, issuing warning
+-- if there are problems.
+toLang :: PandocMonad m => Maybe T.Text -> m (Maybe Lang)
+toLang Nothing = return Nothing
+toLang (Just s) =
+  case parseBCP47 s of
+       Left _ -> do
+         report $ InvalidLang s
+         return Nothing
+       Right l -> return (Just l)
+
+-- | Select the language to use with 'translateTerm'.
+-- Note that this does not read a translation file;
+-- that is only done the first time 'translateTerm' is
+-- used.
+setTranslations :: PandocMonad m => Lang -> m ()
+setTranslations lang =
+  modifyCommonState $ \st -> st{ stTranslations = Just (lang, Nothing) }
+
+-- | Load term map.
+getTranslations :: PandocMonad m => m Translations
+getTranslations = do
+  mbtrans <- getsCommonState stTranslations
+  case mbtrans of
+       Nothing -> return mempty  -- no language defined
+       Just (_, Just t) -> return t
+       Just (lang, Nothing) -> do  -- read from file
+         let translationFile = "translations/" <> renderLang lang <> ".yaml"
+         let fallbackFile = "translations/" <> langLanguage lang <> ".yaml"
+         let getTrans fp = do
+               bs <- readDataFile fp
+               case readTranslations (UTF8.toText bs) of
+                    Left e   -> do
+                      report $ CouldNotLoadTranslations (renderLang lang)
+                        (T.pack fp <> ": " <> e)
+                      -- make sure we don't try again...
+                      modifyCommonState $ \st ->
+                        st{ stTranslations = Nothing }
+                      return mempty
+                    Right t -> do
+                      modifyCommonState $ \st ->
+                                  st{ stTranslations = Just (lang, Just t) }
+                      return t
+         catchError (getTrans $ T.unpack translationFile)
+           (\_ ->
+             catchError (getTrans $ T.unpack fallbackFile)
+               (\e -> do
+                 report $ CouldNotLoadTranslations (renderLang lang)
+                          $ case e of
+                               PandocCouldNotFindDataFileError _ ->
+                                 "data file " <> fallbackFile <> " not found"
+                               _ -> ""
+                 -- make sure we don't try again...
+                 modifyCommonState $ \st -> st{ stTranslations = Nothing }
+                 return mempty))
+
+-- | Get a translation from the current term map.
+-- Issue a warning if the term is not defined.
+translateTerm :: PandocMonad m => Term -> m T.Text
+translateTerm term = do
+  translations <- getTranslations
+  case lookupTerm term translations of
+       Just s -> return s
+       Nothing -> do
+         report $ NoTranslation $ T.pack $ show term
+         return ""
+
+-- | Specialized version of parseURIReference that disallows
+-- single-letter schemes.  Reason:  these are usually windows absolute
+-- paths.
+parseURIReference' :: T.Text -> Maybe URI
+parseURIReference' s = do
+  u <- parseURIReference (T.unpack s)
+  case uriScheme u of
+       [_] -> Nothing
+       _   -> Just u
+
+-- | Set the user data directory in common state.
+setUserDataDir :: PandocMonad m
+               => Maybe FilePath
+               -> m ()
+setUserDataDir mbfp = modifyCommonState $ \st -> st{ stUserDataDir = mbfp }
+
+-- | Get the user data directory from common state.
+getUserDataDir :: PandocMonad m
+               => m (Maybe FilePath)
+getUserDataDir = getsCommonState stUserDataDir
+
+-- | Fetch an image or other item from the local filesystem or the net.
+-- Returns raw content and maybe mime type.
+fetchItem :: PandocMonad m
+          => T.Text
+          -> m (B.ByteString, Maybe MimeType)
+fetchItem s = do
+  mediabag <- getMediaBag
+  case lookupMedia (T.unpack s) mediabag of
+    Just (mime, bs) -> return (BL.toStrict bs, Just mime)
+    Nothing -> downloadOrRead s
+
+-- | Returns the content and, if available, the MIME type of a resource.
+-- If the given resource location is a valid URI, then download the
+-- resource from that URI. Otherwise, treat the resource identifier as a
+-- local file name.
+--
+-- Note that resources are treated relative to the URL of the first
+-- input source, if any.
+downloadOrRead :: PandocMonad m
+               => T.Text
+               -> m (B.ByteString, Maybe MimeType)
+downloadOrRead s = do
+  sourceURL <- getsCommonState stSourceURL
+  case (sourceURL >>= parseURIReference' .
+                       ensureEscaped, ensureEscaped s) of
+    (Just u, s') -> -- try fetching from relative path at source
+       case parseURIReference' s' of
+            Just u' -> openURL $ T.pack $ show $ u' `nonStrictRelativeTo` u
+            Nothing -> openURL s' -- will throw error
+    (Nothing, s'@(T.unpack -> ('/':'/':c:_))) | c /= '?' ->  -- protocol-relative URI
+                -- we exclude //? because of //?UNC/ on Windows
+       case parseURIReference' s' of
+            Just u' -> openURL $ T.pack $ show $ u' `nonStrictRelativeTo` httpcolon
+            Nothing -> openURL s' -- will throw error
+    (Nothing, s') ->
+       case parseURI (T.unpack s') of  -- requires absolute URI
+            Just u' | uriScheme u' == "file:" ->
+                 readLocalFile $ uriPathToPath (T.pack $ uriPath u')
+            -- We don't want to treat C:/ as a scheme:
+            Just u' | length (uriScheme u') > 2 -> openURL (T.pack $ show u')
+            _ -> readLocalFile fp -- get from local file system
+   where readLocalFile f = do
+             resourcePath <- getResourcePath
+             cont <- if isRelative f
+                        then withPaths resourcePath readFileStrict f
+                        else readFileStrict f
+             return (cont, mime)
+         httpcolon = URI{ uriScheme = "http:",
+                          uriAuthority = Nothing,
+                          uriPath = "",
+                          uriQuery = "",
+                          uriFragment = "" }
+         dropFragmentAndQuery = T.takeWhile (\c -> c /= '?' && c /= '#')
+         fp = unEscapeString $ T.unpack $ dropFragmentAndQuery s
+         mime = getMimeType $ case takeExtension fp of
+                     ".gz" -> dropExtension fp
+                     ".svgz" -> dropExtension fp ++ ".svg"
+                     x     -> x
+         ensureEscaped = T.pack . escapeURIString isAllowedInURI . T.unpack . T.map convertSlash
+         convertSlash '\\' = '/'
+         convertSlash x    = x
+
+-- | Retrieve default reference.docx.
+getDefaultReferenceDocx :: PandocMonad m => m Archive
+getDefaultReferenceDocx = do
+  let paths = ["[Content_Types].xml",
+               "_rels/.rels",
+               "docProps/app.xml",
+               "docProps/core.xml",
+               "docProps/custom.xml",
+               "word/document.xml",
+               "word/fontTable.xml",
+               "word/footnotes.xml",
+               "word/comments.xml",
+               "word/numbering.xml",
+               "word/settings.xml",
+               "word/webSettings.xml",
+               "word/styles.xml",
+               "word/_rels/document.xml.rels",
+               "word/_rels/footnotes.xml.rels",
+               "word/theme/theme1.xml"]
+  let toLazy = BL.fromChunks . (:[])
+  let pathToEntry path = do
+        epochtime <- (floor . utcTimeToPOSIXSeconds) <$> getCurrentTime
+        contents <- toLazy <$> readDataFile ("docx/" ++ path)
+        return $ toEntry path epochtime contents
+  datadir <- getUserDataDir
+  mbArchive <- case datadir of
+                    Nothing   -> return Nothing
+                    Just d    -> do
+                       exists <- fileExists (d </> "reference.docx")
+                       if exists
+                          then return (Just (d </> "reference.docx"))
+                          else return Nothing
+  case mbArchive of
+     Just arch -> toArchive <$> readFileLazy arch
+     Nothing   -> foldr addEntryToArchive emptyArchive <$>
+                     mapM pathToEntry paths
+
+-- | Retrieve default reference.odt.
+getDefaultReferenceODT :: PandocMonad m => m Archive
+getDefaultReferenceODT = do
+  let paths = ["mimetype",
+               "manifest.rdf",
+               "styles.xml",
+               "content.xml",
+               "meta.xml",
+               "settings.xml",
+               "Configurations2/accelerator/current.xml",
+               "Thumbnails/thumbnail.png",
+               "META-INF/manifest.xml"]
+  let pathToEntry path = do epochtime <- floor `fmap` getPOSIXTime
+                            contents <- (BL.fromChunks . (:[])) `fmap`
+                                          readDataFile ("odt/" ++ path)
+                            return $ toEntry path epochtime contents
+  datadir <- getUserDataDir
+  mbArchive <- case datadir of
+                    Nothing   -> return Nothing
+                    Just d    -> do
+                       exists <- fileExists (d </> "reference.odt")
+                       if exists
+                          then return (Just (d </> "reference.odt"))
+                          else return Nothing
+  case mbArchive of
+     Just arch -> toArchive <$> readFileLazy arch
+     Nothing   -> foldr addEntryToArchive emptyArchive <$>
+                     mapM pathToEntry paths
+
+-- | Retrieve default reference.pptx.
+getDefaultReferencePptx :: PandocMonad m => m Archive
+getDefaultReferencePptx = do
+  -- We're going to narrow this down substantially once we get it
+  -- working.
+  let paths = [ "[Content_Types].xml"
+              , "_rels/.rels"
+              , "docProps/app.xml"
+              , "docProps/core.xml"
+              , "ppt/_rels/presentation.xml.rels"
+              , "ppt/presProps.xml"
+              , "ppt/presentation.xml"
+              , "ppt/slideLayouts/_rels/slideLayout1.xml.rels"
+              , "ppt/slideLayouts/_rels/slideLayout2.xml.rels"
+              , "ppt/slideLayouts/_rels/slideLayout3.xml.rels"
+              , "ppt/slideLayouts/_rels/slideLayout4.xml.rels"
+              , "ppt/slideLayouts/_rels/slideLayout5.xml.rels"
+              , "ppt/slideLayouts/_rels/slideLayout6.xml.rels"
+              , "ppt/slideLayouts/_rels/slideLayout7.xml.rels"
+              , "ppt/slideLayouts/_rels/slideLayout8.xml.rels"
+              , "ppt/slideLayouts/_rels/slideLayout9.xml.rels"
+              , "ppt/slideLayouts/_rels/slideLayout10.xml.rels"
+              , "ppt/slideLayouts/_rels/slideLayout11.xml.rels"
+              , "ppt/slideLayouts/slideLayout1.xml"
+              , "ppt/slideLayouts/slideLayout10.xml"
+              , "ppt/slideLayouts/slideLayout11.xml"
+              , "ppt/slideLayouts/slideLayout2.xml"
+              , "ppt/slideLayouts/slideLayout3.xml"
+              , "ppt/slideLayouts/slideLayout4.xml"
+              , "ppt/slideLayouts/slideLayout5.xml"
+              , "ppt/slideLayouts/slideLayout6.xml"
+              , "ppt/slideLayouts/slideLayout7.xml"
+              , "ppt/slideLayouts/slideLayout8.xml"
+              , "ppt/slideLayouts/slideLayout9.xml"
+              , "ppt/slideMasters/_rels/slideMaster1.xml.rels"
+              , "ppt/slideMasters/slideMaster1.xml"
+              , "ppt/slides/_rels/slide1.xml.rels"
+              , "ppt/slides/slide1.xml"
+              , "ppt/slides/_rels/slide2.xml.rels"
+              , "ppt/slides/slide2.xml"
+              , "ppt/slides/_rels/slide3.xml.rels"
+              , "ppt/slides/slide3.xml"
+              , "ppt/slides/_rels/slide4.xml.rels"
+              , "ppt/slides/slide4.xml"
+              , "ppt/tableStyles.xml"
+              , "ppt/theme/theme1.xml"
+              , "ppt/viewProps.xml"
+              -- These relate to notes slides.
+              , "ppt/notesMasters/notesMaster1.xml"
+              , "ppt/notesMasters/_rels/notesMaster1.xml.rels"
+              , "ppt/notesSlides/notesSlide1.xml"
+              , "ppt/notesSlides/_rels/notesSlide1.xml.rels"
+              , "ppt/notesSlides/notesSlide2.xml"
+              , "ppt/notesSlides/_rels/notesSlide2.xml.rels"
+              , "ppt/theme/theme2.xml"
+              ]
+  let toLazy = BL.fromChunks . (:[])
+  let pathToEntry path = do
+        epochtime <- (floor . utcTimeToPOSIXSeconds) <$> getCurrentTime
+        contents <- toLazy <$> readDataFile ("pptx/" ++ path)
+        return $ toEntry path epochtime contents
+  datadir <- getUserDataDir
+  mbArchive <- case datadir of
+                    Nothing   -> return Nothing
+                    Just d    -> do
+                       exists <- fileExists (d </> "reference.pptx")
+                       if exists
+                          then return (Just (d </> "reference.pptx"))
+                          else return Nothing
+  case mbArchive of
+     Just arch -> toArchive <$> readFileLazy arch
+     Nothing   -> foldr addEntryToArchive emptyArchive <$>
+                     mapM pathToEntry paths
+
+-- | Read file from user data directory or,
+-- if not found there, from Cabal data directory.
+readDataFile :: PandocMonad m => FilePath -> m B.ByteString
+readDataFile fname = do
+  datadir <- getUserDataDir
+  case datadir of
+       Nothing -> readDefaultDataFile fname
+       Just userDir -> do
+         exists <- fileExists (userDir </> fname)
+         if exists
+            then readFileStrict (userDir </> fname)
+            else readDefaultDataFile fname
+
+-- | Read file from from Cabal data directory.
+readDefaultDataFile :: PandocMonad m => FilePath -> m B.ByteString
+readDefaultDataFile "reference.docx" =
+  (B.concat . BL.toChunks . fromArchive) <$> getDefaultReferenceDocx
+readDefaultDataFile "reference.pptx" =
+  (B.concat . BL.toChunks . fromArchive) <$> getDefaultReferencePptx
+readDefaultDataFile "reference.odt" =
+  (B.concat . BL.toChunks . fromArchive) <$> getDefaultReferenceODT
+readDefaultDataFile fname =
+#ifdef EMBED_DATA_FILES
+  case lookup (makeCanonical fname) dataFiles of
+    Nothing       -> throwError $ PandocCouldNotFindDataFileError $ T.pack fname
+    Just contents -> return contents
+#else
+  getDataFileName fname' >>= checkExistence >>= readFileStrict
+    where fname' = if fname == "MANUAL.txt" then fname else "data" </> fname
+
+-- | Returns the input filename unchanged if the file exits, and throws
+-- a `PandocCouldNotFindDataFileError` if it doesn't.
+checkExistence :: PandocMonad m => FilePath -> m FilePath
+checkExistence fn = do
+  exists <- fileExists fn
+  if exists
+     then return fn
+     else throwError $ PandocCouldNotFindDataFileError $ T.pack fn
+#endif
+
+-- | Canonicalizes a file path by removing redundant @.@ and @..@.
+makeCanonical :: FilePath -> FilePath
+makeCanonical = Posix.joinPath . transformPathParts . splitDirectories
+ where  transformPathParts = reverse . foldl go []
+        go as     "."  = as
+        go (_:as) ".." = as
+        go as     x    = x : as
+
+-- | Trys to run an action on a file: for each directory given, a
+-- filepath is created from the given filename, and the action is run on
+-- that filepath. Returns the result of the first successful execution
+-- of the action, or throws a @PandocResourceNotFound@ exception if the
+-- action errors for all filepaths.
+withPaths :: PandocMonad m => [FilePath] -> (FilePath -> m a) -> FilePath -> m a
+withPaths [] _ fp = throwError $ PandocResourceNotFound $ T.pack fp
+withPaths (p:ps) action fp =
+  catchError (action (p </> fp))
+             (\_ -> withPaths ps action fp)
+
+-- | Fetch local or remote resource (like an image) and provide data suitable
+-- for adding it to the MediaBag.
+fetchMediaResource :: PandocMonad m
+              => T.Text -> m (FilePath, Maybe MimeType, BL.ByteString)
+fetchMediaResource src = do
+  (bs, mt) <- downloadOrRead src
+  let ext = fromMaybe (T.pack $ takeExtension $ T.unpack src)
+                      (mt >>= extensionFromMimeType)
+  let bs' = BL.fromChunks [bs]
+  let basename = showDigest $ sha1 bs'
+  let fname = basename <.> T.unpack ext
+  return (fname, mt, bs')
+
+-- | Traverse tree, filling media bag for any images that
+-- aren't already in the media bag.
+fillMediaBag :: PandocMonad m => Pandoc -> m Pandoc
+fillMediaBag d = walkM handleImage d
+  where handleImage :: PandocMonad m => Inline -> m Inline
+        handleImage (Image attr lab (src, tit)) = catchError
+          (do mediabag <- getMediaBag
+              case lookupMedia (T.unpack src) mediabag of
+                Just (_, _) -> return $ Image attr lab (src, tit)
+                Nothing -> do
+                  (fname, mt, bs) <- fetchMediaResource src
+                  insertMedia fname mt bs
+                  return $ Image attr lab (T.pack fname, tit))
+          (\e ->
+              case e of
+                PandocResourceNotFound _ -> do
+                  report $ CouldNotFetchResource src
+                            "replacing image with description"
+                  -- emit alt text
+                  return $ Span ("",["image"],[]) lab
+                PandocHttpError u er -> do
+                  report $ CouldNotFetchResource u
+                            (T.pack $ show er ++ "\rReplacing image with description.")
+                  -- emit alt text
+                  return $ Span ("",["image"],[]) lab
+                _ -> throwError e)
+        handleImage x = return x
+
+-- This requires UndecidableInstances.  We could avoid that
+-- by repeating the definitions below for every monad transformer
+-- we use: ReaderT, WriterT, StateT, RWST.  But this seems to
+-- be harmless.
+instance (MonadTrans t, PandocMonad m, Functor (t m),
+          MonadError PandocError (t m), Monad (t m),
+          Applicative (t m)) => PandocMonad (t m) where
+  lookupEnv = lift . lookupEnv
+  getCurrentTime = lift getCurrentTime
+  getCurrentTimeZone = lift getCurrentTimeZone
+  newStdGen = lift newStdGen
+  newUniqueHash = lift newUniqueHash
+  openURL = lift . openURL
+  readFileLazy = lift . readFileLazy
+  readFileStrict = lift . readFileStrict
+  glob = lift . glob
+  fileExists = lift . fileExists
+  getDataFileName = lift . getDataFileName
+  getModificationTime = lift . getModificationTime
+  getCommonState = lift getCommonState
+  putCommonState = lift . putCommonState
+  logOutput = lift . logOutput
+
+instance {-# OVERLAPS #-} PandocMonad m => PandocMonad (ParsecT s st m) where
+  lookupEnv = lift . lookupEnv
+  getCurrentTime = lift getCurrentTime
+  getCurrentTimeZone = lift getCurrentTimeZone
+  newStdGen = lift newStdGen
+  newUniqueHash = lift newUniqueHash
+  openURL = lift . openURL
+  readFileLazy = lift . readFileLazy
+  readFileStrict = lift . readFileStrict
+  glob = lift . glob
+  fileExists = lift . fileExists
+  getDataFileName = lift . getDataFileName
+  getModificationTime = lift . getModificationTime
+  getCommonState = lift getCommonState
+  putCommonState = lift . putCommonState
+  trace msg = do
+    tracing <- getsCommonState stTrace
+    when tracing $ do
+      pos <- getPosition
+      Debug.Trace.trace
+        ("[trace] Parsed " ++ T.unpack msg ++ " at line " ++
+            show (sourceLine pos) ++
+            if sourceName pos == "chunk"
+               then " of chunk"
+               else "")
+        (return ())
+  logOutput = lift . logOutput
-- 
cgit v1.2.3