From ec49643d647080218bedfaf61b3eb9b9383e42c0 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Sun, 15 Mar 2020 04:49:36 +0100 Subject: Subdivide Text.Pandoc.Class into small modules (#6106) * Extract CommonState into submodule * Extract PandocMonad into submodule * PandocMonad: ensure all functions have Haddock documentation --- src/Text/Pandoc/Class.hs | 732 ++--------------------------------- src/Text/Pandoc/Class/CommonState.hs | 80 ++++ src/Text/Pandoc/Class/PandocMonad.hs | 706 +++++++++++++++++++++++++++++++++ 3 files changed, 823 insertions(+), 695 deletions(-) create mode 100644 src/Text/Pandoc/Class/CommonState.hs create mode 100644 src/Text/Pandoc/Class/PandocMonad.hs (limited to 'src/Text') diff --git a/src/Text/Pandoc/Class.hs b/src/Text/Pandoc/Class.hs index 436238139..b125e3bab 100644 --- a/src/Text/Pandoc/Class.hs +++ b/src/Text/Pandoc/Class.hs @@ -74,356 +74,57 @@ module Text.Pandoc.Class ( PandocMonad(..) , Translations ) where -import Prelude -import System.Random (StdGen, next, mkStdGen) -import qualified System.Random as IO (newStdGen) import Codec.Archive.Zip -import qualified Data.CaseInsensitive as CI -import Data.Unique (hashUnique) -import qualified Data.Unique as IO (newUnique) -import qualified Text.Pandoc.UTF8 as UTF8 -import qualified System.Directory as Directory -import Data.Time (UTCTime) -import Text.Pandoc.Logging -import Text.Pandoc.Shared (uriPathToPath) -import Text.Parsec (ParsecT, getPosition, sourceLine, sourceName) -import qualified Data.Time as IO (getCurrentTime) -import Text.Pandoc.MIME (MimeType, getMimeType, extensionFromMimeType) -import Text.Pandoc.Definition -import Data.Digest.Pure.SHA (sha1, showDigest) -import Data.Maybe (fromMaybe) -import Data.Time.Clock.POSIX ( utcTimeToPOSIXSeconds - , posixSecondsToUTCTime - , POSIXTime ) -import Data.Time.LocalTime (TimeZone, ZonedTime, utcToZonedTime, utc) +import Control.Monad.Except +import Control.Monad.State.Strict import Data.ByteString.Base64 (decodeLenient) -import Network.URI ( escapeURIString, nonStrictRelativeTo, - unEscapeString, parseURIReference, isAllowedInURI, - parseURI, URI(..) ) +import Data.ByteString.Lazy (toChunks) +import Data.Default +import Data.Time (UTCTime) +import Data.Time.Clock.POSIX ( posixSecondsToUTCTime ) +import Data.Time.LocalTime (TimeZone, utc) +import Data.Unique (hashUnique) +import Data.Word (Word8) import Network.HTTP.Client (httpLbs, responseBody, responseHeaders, Request(port, host, requestHeaders), parseRequest, newManager) import Network.HTTP.Client.Internal (addProxy) import Network.HTTP.Client.TLS (tlsManagerSettings) -import System.Environment (getEnv) import Network.HTTP.Types.Header ( hContentType ) import Network.Socket (withSocketsDo) -import Data.ByteString.Lazy (toChunks) -import qualified Control.Exception as E -import qualified Data.Time.LocalTime as IO (getCurrentTimeZone) +import Network.URI ( unEscapeString ) +import Prelude +import System.Directory (createDirectoryIfMissing, getDirectoryContents, + doesDirectoryExist) +import System.Environment (getEnv) +import System.FilePath ((), takeDirectory, normalise) +import System.FilePath.Glob (match, compile) +import System.IO (stderr) +import System.IO.Error +import System.Random (StdGen, next, mkStdGen) +import Text.Pandoc.Class.CommonState (CommonState (..)) +import Text.Pandoc.Class.PandocMonad +import Text.Pandoc.Definition +import Text.Pandoc.Error +import Text.Pandoc.Logging import Text.Pandoc.MediaBag (MediaBag, lookupMedia, mediaDirectory) -import Text.Pandoc.Walk (walkM, walk) -import qualified Text.Pandoc.MediaBag as MB +import Text.Pandoc.Translations (Translations) +import Text.Pandoc.Walk (walk) +import qualified Control.Exception as E import qualified Data.ByteString as B import qualified Data.ByteString.Lazy as BL -import qualified System.Environment as IO (lookupEnv) -import System.FilePath.Glob (match, compile) -import System.Directory (createDirectoryIfMissing, getDirectoryContents, - doesDirectoryExist) -import System.FilePath - ((), (<.>), takeDirectory, takeExtension, dropExtension, - isRelative, normalise, splitDirectories) -import qualified System.FilePath.Glob as IO (glob) -import qualified System.FilePath.Posix as Posix -import qualified System.Directory as IO (getModificationTime) -import Control.Monad.State.Strict -import Control.Monad.Except -import Data.Word (Word8) -import Data.Default -import System.IO.Error -import System.IO (stderr) +import qualified Data.CaseInsensitive as CI import qualified Data.Map as M import qualified Data.Text as T -import Text.Pandoc.Error -import Text.Pandoc.BCP47 (Lang(..), parseBCP47, renderLang) -import Text.Pandoc.Translations (Term(..), Translations, lookupTerm, - readTranslations) -import qualified Debug.Trace -#ifdef EMBED_DATA_FILES -import Text.Pandoc.Data (dataFiles) -#else -import qualified Paths_pandoc as Paths -#endif - --- | The PandocMonad typeclass contains all the potentially --- IO-related functions used in pandoc's readers and writers. --- Instances of this typeclass may implement these functions --- in IO (as in 'PandocIO') or using an internal state that --- represents a file system, time, and so on (as in 'PandocPure'). -class (Functor m, Applicative m, Monad m, MonadError PandocError m) - => PandocMonad m where - -- | Lookup an environment variable. - lookupEnv :: T.Text -> m (Maybe T.Text) - -- | Get the current (UTC) time. - getCurrentTime :: m UTCTime - -- | Get the locale's time zone. - getCurrentTimeZone :: m TimeZone - -- | Return a new generator for random numbers. - newStdGen :: m StdGen - -- | Return a new unique integer. - newUniqueHash :: m Int - -- | Retrieve contents and mime type from a URL, raising - -- an error on failure. - openURL :: T.Text -> m (B.ByteString, Maybe MimeType) - -- | Read the lazy ByteString contents from a file path, - -- raising an error on failure. - readFileLazy :: FilePath -> m BL.ByteString - -- | Read the strict ByteString contents from a file path, - -- raising an error on failure. - readFileStrict :: FilePath -> m B.ByteString - -- | Return a list of paths that match a glob, relative to - -- the working directory. See 'System.FilePath.Glob' for - -- the glob syntax. - glob :: String -> m [FilePath] - -- | Returns True if file exists. - fileExists :: FilePath -> m Bool - -- | Returns the path of data file. - getDataFileName :: FilePath -> m FilePath - -- | Return the modification time of a file. - getModificationTime :: FilePath -> m UTCTime - -- | Get the value of the 'CommonState' used by all instances - -- of 'PandocMonad'. - getCommonState :: m CommonState - -- | Set the value of the 'CommonState' used by all instances - -- of 'PandocMonad'. - -- | Get the value of a specific field of 'CommonState'. - putCommonState :: CommonState -> m () - -- | Get the value of a specific field of 'CommonState'. - getsCommonState :: (CommonState -> a) -> m a - getsCommonState f = f <$> getCommonState - -- | Modify the 'CommonState'. - modifyCommonState :: (CommonState -> CommonState) -> m () - modifyCommonState f = getCommonState >>= putCommonState . f - -- Output a log message. - logOutput :: LogMessage -> m () - -- Output a debug message to sterr, using 'Debug.Trace.trace', - -- if tracing is enabled. Note: this writes to stderr even in - -- pure instances. - trace :: T.Text -> m () - trace msg = do - tracing <- getsCommonState stTrace - when tracing $ Debug.Trace.trace ("[trace] " ++ T.unpack msg) (return ()) - --- * Functions defined for all PandocMonad instances - --- | Set the verbosity level. -setVerbosity :: PandocMonad m => Verbosity -> m () -setVerbosity verbosity = - modifyCommonState $ \st -> st{ stVerbosity = verbosity } - --- | Get the verbosity level. -getVerbosity :: PandocMonad m => m Verbosity -getVerbosity = getsCommonState stVerbosity - --- Get the accomulated log messages (in temporal order). -getLog :: PandocMonad m => m [LogMessage] -getLog = reverse <$> getsCommonState stLog - --- | Log a message using 'logOutput'. Note that 'logOutput' is --- called only if the verbosity level exceeds the level of the --- message, but the message is added to the list of log messages --- that will be retrieved by 'getLog' regardless of its verbosity level. -report :: PandocMonad m => LogMessage -> m () -report msg = do - verbosity <- getsCommonState stVerbosity - let level = messageVerbosity msg - when (level <= verbosity) $ logOutput msg - modifyCommonState $ \st -> st{ stLog = msg : stLog st } - --- | Determine whether tracing is enabled. This affects --- the behavior of 'trace'. If tracing is not enabled, --- 'trace' does nothing. -setTrace :: PandocMonad m => Bool -> m () -setTrace useTracing = modifyCommonState $ \st -> st{stTrace = useTracing} - --- | Set request header to use in HTTP requests. -setRequestHeader :: PandocMonad m - => T.Text -- ^ Header name - -> T.Text -- ^ Value - -> m () -setRequestHeader name val = modifyCommonState $ \st -> - st{ stRequestHeaders = - (name, val) : filter (\(n,_) -> n /= name) (stRequestHeaders st) } - --- | Initialize the media bag. -setMediaBag :: PandocMonad m => MediaBag -> m () -setMediaBag mb = modifyCommonState $ \st -> st{stMediaBag = mb} - --- Retrieve the media bag. -getMediaBag :: PandocMonad m => m MediaBag -getMediaBag = getsCommonState stMediaBag - --- Insert an item into the media bag. -insertMedia :: PandocMonad m => FilePath -> Maybe MimeType -> BL.ByteString -> m () -insertMedia fp mime bs = do - mb <- getMediaBag - let mb' = MB.insertMedia fp mime bs mb - setMediaBag mb' - --- Retrieve the input filenames. -getInputFiles :: PandocMonad m => m [FilePath] -getInputFiles = getsCommonState stInputFiles - --- Set the input filenames. -setInputFiles :: PandocMonad m => [FilePath] -> m () -setInputFiles fs = do - let sourceURL = case fs of - [] -> Nothing - (x:_) -> case parseURI x of - Just u - | uriScheme u `elem` ["http:","https:"] -> - Just $ show u{ uriQuery = "", - uriFragment = "" } - _ -> Nothing - - modifyCommonState $ \st -> st{ stInputFiles = fs - , stSourceURL = T.pack <$> sourceURL } - --- Retrieve the output filename. -getOutputFile :: PandocMonad m => m (Maybe FilePath) -getOutputFile = getsCommonState stOutputFile - --- Set the output filename. -setOutputFile :: PandocMonad m => Maybe FilePath -> m () -setOutputFile mbf = modifyCommonState $ \st -> st{ stOutputFile = mbf } - --- Retrieve the resource path searched by 'fetchItem'. -getResourcePath :: PandocMonad m => m [FilePath] -getResourcePath = getsCommonState stResourcePath - --- Set the resource path searched by 'fetchItem'. -setResourcePath :: PandocMonad m => [FilePath] -> m () -setResourcePath ps = modifyCommonState $ \st -> st{stResourcePath = ps} - --- Get the POSIX time. -getPOSIXTime :: PandocMonad m => m POSIXTime -getPOSIXTime = utcTimeToPOSIXSeconds <$> getCurrentTime - --- Get the zoned time. -getZonedTime :: PandocMonad m => m ZonedTime -getZonedTime = do - t <- getCurrentTime - tz <- getCurrentTimeZone - return $ utcToZonedTime tz t - --- | Read file, checking in any number of directories. -readFileFromDirs :: PandocMonad m => [FilePath] -> FilePath -> m (Maybe T.Text) -readFileFromDirs [] _ = return Nothing -readFileFromDirs (d:ds) f = catchError - ((Just . T.pack . UTF8.toStringLazy) <$> readFileLazy (d f)) - (\_ -> readFileFromDirs ds f) - --- | 'CommonState' represents state that is used by all --- instances of 'PandocMonad'. Normally users should not --- need to interact with it directly; instead, auxiliary --- functions like 'setVerbosity' and 'withMediaBag' should be used. -data CommonState = CommonState { stLog :: [LogMessage] - -- ^ A list of log messages in reverse order - , stUserDataDir :: Maybe FilePath - -- ^ Directory to search for data files - , stSourceURL :: Maybe T.Text - -- ^ Absolute URL + dir of 1st source file - , stRequestHeaders :: [(T.Text, T.Text)] - -- ^ Headers to add for HTTP requests - , stMediaBag :: MediaBag - -- ^ Media parsed from binary containers - , stTranslations :: Maybe - (Lang, Maybe Translations) - -- ^ Translations for localization - , stInputFiles :: [FilePath] - -- ^ List of input files from command line - , stOutputFile :: Maybe FilePath - -- ^ Output file from command line - , stResourcePath :: [FilePath] - -- ^ Path to search for resources like - -- included images - , stVerbosity :: Verbosity - -- ^ Verbosity level - , stTrace :: Bool - -- ^ Controls whether tracing messages are - -- issued. - } - -instance Default CommonState where - def = CommonState { stLog = [] - , stUserDataDir = Nothing - , stSourceURL = Nothing - , stRequestHeaders = [] - , stMediaBag = mempty - , stTranslations = Nothing - , stInputFiles = [] - , stOutputFile = Nothing - , stResourcePath = ["."] - , stVerbosity = WARNING - , stTrace = False - } - --- | Convert BCP47 string to a Lang, issuing warning --- if there are problems. -toLang :: PandocMonad m => Maybe T.Text -> m (Maybe Lang) -toLang Nothing = return Nothing -toLang (Just s) = - case parseBCP47 s of - Left _ -> do - report $ InvalidLang s - return Nothing - Right l -> return (Just l) - --- | Select the language to use with 'translateTerm'. --- Note that this does not read a translation file; --- that is only done the first time 'translateTerm' is --- used. -setTranslations :: PandocMonad m => Lang -> m () -setTranslations lang = - modifyCommonState $ \st -> st{ stTranslations = Just (lang, Nothing) } - --- | Load term map. -getTranslations :: PandocMonad m => m Translations -getTranslations = do - mbtrans <- getsCommonState stTranslations - case mbtrans of - Nothing -> return mempty -- no language defined - Just (_, Just t) -> return t - Just (lang, Nothing) -> do -- read from file - let translationFile = "translations/" <> renderLang lang <> ".yaml" - let fallbackFile = "translations/" <> langLanguage lang <> ".yaml" - let getTrans fp = do - bs <- readDataFile fp - case readTranslations (UTF8.toText bs) of - Left e -> do - report $ CouldNotLoadTranslations (renderLang lang) - (T.pack fp <> ": " <> e) - -- make sure we don't try again... - modifyCommonState $ \st -> - st{ stTranslations = Nothing } - return mempty - Right t -> do - modifyCommonState $ \st -> - st{ stTranslations = Just (lang, Just t) } - return t - catchError (getTrans $ T.unpack translationFile) - (\_ -> - catchError (getTrans $ T.unpack fallbackFile) - (\e -> do - report $ CouldNotLoadTranslations (renderLang lang) - $ case e of - PandocCouldNotFindDataFileError _ -> - "data file " <> fallbackFile <> " not found" - _ -> "" - -- make sure we don't try again... - modifyCommonState $ \st -> st{ stTranslations = Nothing } - return mempty)) - --- | Get a translation from the current term map. --- Issue a warning if the term is not defined. -translateTerm :: PandocMonad m => Term -> m T.Text -translateTerm term = do - translations <- getTranslations - case lookupTerm term translations of - Just s -> return s - Nothing -> do - report $ NoTranslation $ T.pack $ show term - return "" +import qualified Data.Time as IO (getCurrentTime) +import qualified Data.Time.LocalTime as IO (getCurrentTimeZone) +import qualified Data.Unique as IO (newUnique) +import qualified System.Directory as Directory +import qualified System.Directory as IO (getModificationTime) +import qualified System.Environment as IO (lookupEnv) +import qualified System.FilePath.Glob as IO (glob) +import qualified System.Random as IO (newStdGen) +import qualified Text.Pandoc.UTF8 as UTF8 -- | Evaluate a 'PandocIO' operation. runIO :: PandocIO a -> IO (Either PandocError a) @@ -519,314 +220,6 @@ alertIndent (l:ls) = do where go l' = do UTF8.hPutStr stderr " " UTF8.hPutStrLn stderr $ T.unpack l' --- | Specialized version of parseURIReference that disallows --- single-letter schemes. Reason: these are usually windows absolute --- paths. -parseURIReference' :: T.Text -> Maybe URI -parseURIReference' s = do - u <- parseURIReference (T.unpack s) - case uriScheme u of - [_] -> Nothing - _ -> Just u - --- | Set the user data directory in common state. -setUserDataDir :: PandocMonad m - => Maybe FilePath - -> m () -setUserDataDir mbfp = modifyCommonState $ \st -> st{ stUserDataDir = mbfp } - --- | Get the user data directory from common state. -getUserDataDir :: PandocMonad m - => m (Maybe FilePath) -getUserDataDir = getsCommonState stUserDataDir - --- | Fetch an image or other item from the local filesystem or the net. --- Returns raw content and maybe mime type. -fetchItem :: PandocMonad m - => T.Text - -> m (B.ByteString, Maybe MimeType) -fetchItem s = do - mediabag <- getMediaBag - case lookupMedia (T.unpack s) mediabag of - Just (mime, bs) -> return (BL.toStrict bs, Just mime) - Nothing -> downloadOrRead s - -downloadOrRead :: PandocMonad m - => T.Text - -> m (B.ByteString, Maybe MimeType) -downloadOrRead s = do - sourceURL <- getsCommonState stSourceURL - case (sourceURL >>= parseURIReference' . - ensureEscaped, ensureEscaped s) of - (Just u, s') -> -- try fetching from relative path at source - case parseURIReference' s' of - Just u' -> openURL $ T.pack $ show $ u' `nonStrictRelativeTo` u - Nothing -> openURL s' -- will throw error - (Nothing, s'@(T.unpack -> ('/':'/':c:_))) | c /= '?' -> -- protocol-relative URI - -- we exclude //? because of //?UNC/ on Windows - case parseURIReference' s' of - Just u' -> openURL $ T.pack $ show $ u' `nonStrictRelativeTo` httpcolon - Nothing -> openURL s' -- will throw error - (Nothing, s') -> - case parseURI (T.unpack s') of -- requires absolute URI - Just u' | uriScheme u' == "file:" -> - readLocalFile $ uriPathToPath (T.pack $ uriPath u') - -- We don't want to treat C:/ as a scheme: - Just u' | length (uriScheme u') > 2 -> openURL (T.pack $ show u') - _ -> readLocalFile fp -- get from local file system - where readLocalFile f = do - resourcePath <- getResourcePath - cont <- if isRelative f - then withPaths resourcePath readFileStrict f - else readFileStrict f - return (cont, mime) - httpcolon = URI{ uriScheme = "http:", - uriAuthority = Nothing, - uriPath = "", - uriQuery = "", - uriFragment = "" } - dropFragmentAndQuery = T.takeWhile (\c -> c /= '?' && c /= '#') - fp = unEscapeString $ T.unpack $ dropFragmentAndQuery s - mime = getMimeType $ case takeExtension fp of - ".gz" -> dropExtension fp - ".svgz" -> dropExtension fp ++ ".svg" - x -> x - ensureEscaped = T.pack . escapeURIString isAllowedInURI . T.unpack . T.map convertSlash - convertSlash '\\' = '/' - convertSlash x = x - --- Retrieve default reference.docx. -getDefaultReferenceDocx :: PandocMonad m => m Archive -getDefaultReferenceDocx = do - let paths = ["[Content_Types].xml", - "_rels/.rels", - "docProps/app.xml", - "docProps/core.xml", - "docProps/custom.xml", - "word/document.xml", - "word/fontTable.xml", - "word/footnotes.xml", - "word/comments.xml", - "word/numbering.xml", - "word/settings.xml", - "word/webSettings.xml", - "word/styles.xml", - "word/_rels/document.xml.rels", - "word/_rels/footnotes.xml.rels", - "word/theme/theme1.xml"] - let toLazy = BL.fromChunks . (:[]) - let pathToEntry path = do - epochtime <- (floor . utcTimeToPOSIXSeconds) <$> getCurrentTime - contents <- toLazy <$> readDataFile ("docx/" ++ path) - return $ toEntry path epochtime contents - datadir <- getUserDataDir - mbArchive <- case datadir of - Nothing -> return Nothing - Just d -> do - exists <- fileExists (d "reference.docx") - if exists - then return (Just (d "reference.docx")) - else return Nothing - case mbArchive of - Just arch -> toArchive <$> readFileLazy arch - Nothing -> foldr addEntryToArchive emptyArchive <$> - mapM pathToEntry paths - --- Retrieve default reference.odt. -getDefaultReferenceODT :: PandocMonad m => m Archive -getDefaultReferenceODT = do - let paths = ["mimetype", - "manifest.rdf", - "styles.xml", - "content.xml", - "meta.xml", - "settings.xml", - "Configurations2/accelerator/current.xml", - "Thumbnails/thumbnail.png", - "META-INF/manifest.xml"] - let pathToEntry path = do epochtime <- floor `fmap` getPOSIXTime - contents <- (BL.fromChunks . (:[])) `fmap` - readDataFile ("odt/" ++ path) - return $ toEntry path epochtime contents - datadir <- getUserDataDir - mbArchive <- case datadir of - Nothing -> return Nothing - Just d -> do - exists <- fileExists (d "reference.odt") - if exists - then return (Just (d "reference.odt")) - else return Nothing - case mbArchive of - Just arch -> toArchive <$> readFileLazy arch - Nothing -> foldr addEntryToArchive emptyArchive <$> - mapM pathToEntry paths - -getDefaultReferencePptx :: PandocMonad m => m Archive -getDefaultReferencePptx = do - -- We're going to narrow this down substantially once we get it - -- working. - let paths = [ "[Content_Types].xml" - , "_rels/.rels" - , "docProps/app.xml" - , "docProps/core.xml" - , "ppt/_rels/presentation.xml.rels" - , "ppt/presProps.xml" - , "ppt/presentation.xml" - , "ppt/slideLayouts/_rels/slideLayout1.xml.rels" - , "ppt/slideLayouts/_rels/slideLayout2.xml.rels" - , "ppt/slideLayouts/_rels/slideLayout3.xml.rels" - , "ppt/slideLayouts/_rels/slideLayout4.xml.rels" - , "ppt/slideLayouts/_rels/slideLayout5.xml.rels" - , "ppt/slideLayouts/_rels/slideLayout6.xml.rels" - , "ppt/slideLayouts/_rels/slideLayout7.xml.rels" - , "ppt/slideLayouts/_rels/slideLayout8.xml.rels" - , "ppt/slideLayouts/_rels/slideLayout9.xml.rels" - , "ppt/slideLayouts/_rels/slideLayout10.xml.rels" - , "ppt/slideLayouts/_rels/slideLayout11.xml.rels" - , "ppt/slideLayouts/slideLayout1.xml" - , "ppt/slideLayouts/slideLayout10.xml" - , "ppt/slideLayouts/slideLayout11.xml" - , "ppt/slideLayouts/slideLayout2.xml" - , "ppt/slideLayouts/slideLayout3.xml" - , "ppt/slideLayouts/slideLayout4.xml" - , "ppt/slideLayouts/slideLayout5.xml" - , "ppt/slideLayouts/slideLayout6.xml" - , "ppt/slideLayouts/slideLayout7.xml" - , "ppt/slideLayouts/slideLayout8.xml" - , "ppt/slideLayouts/slideLayout9.xml" - , "ppt/slideMasters/_rels/slideMaster1.xml.rels" - , "ppt/slideMasters/slideMaster1.xml" - , "ppt/slides/_rels/slide1.xml.rels" - , "ppt/slides/slide1.xml" - , "ppt/slides/_rels/slide2.xml.rels" - , "ppt/slides/slide2.xml" - , "ppt/slides/_rels/slide3.xml.rels" - , "ppt/slides/slide3.xml" - , "ppt/slides/_rels/slide4.xml.rels" - , "ppt/slides/slide4.xml" - , "ppt/tableStyles.xml" - , "ppt/theme/theme1.xml" - , "ppt/viewProps.xml" - -- These relate to notes slides. - , "ppt/notesMasters/notesMaster1.xml" - , "ppt/notesMasters/_rels/notesMaster1.xml.rels" - , "ppt/notesSlides/notesSlide1.xml" - , "ppt/notesSlides/_rels/notesSlide1.xml.rels" - , "ppt/notesSlides/notesSlide2.xml" - , "ppt/notesSlides/_rels/notesSlide2.xml.rels" - , "ppt/theme/theme2.xml" - ] - let toLazy = BL.fromChunks . (:[]) - let pathToEntry path = do - epochtime <- (floor . utcTimeToPOSIXSeconds) <$> getCurrentTime - contents <- toLazy <$> readDataFile ("pptx/" ++ path) - return $ toEntry path epochtime contents - datadir <- getUserDataDir - mbArchive <- case datadir of - Nothing -> return Nothing - Just d -> do - exists <- fileExists (d "reference.pptx") - if exists - then return (Just (d "reference.pptx")) - else return Nothing - case mbArchive of - Just arch -> toArchive <$> readFileLazy arch - Nothing -> foldr addEntryToArchive emptyArchive <$> - mapM pathToEntry paths - - --- | Read file from user data directory or, --- if not found there, from Cabal data directory. -readDataFile :: PandocMonad m => FilePath -> m B.ByteString -readDataFile fname = do - datadir <- getUserDataDir - case datadir of - Nothing -> readDefaultDataFile fname - Just userDir -> do - exists <- fileExists (userDir fname) - if exists - then readFileStrict (userDir fname) - else readDefaultDataFile fname - --- | Read file from from Cabal data directory. -readDefaultDataFile :: PandocMonad m => FilePath -> m B.ByteString -readDefaultDataFile "reference.docx" = - (B.concat . BL.toChunks . fromArchive) <$> getDefaultReferenceDocx -readDefaultDataFile "reference.pptx" = - (B.concat . BL.toChunks . fromArchive) <$> getDefaultReferencePptx -readDefaultDataFile "reference.odt" = - (B.concat . BL.toChunks . fromArchive) <$> getDefaultReferenceODT -readDefaultDataFile fname = -#ifdef EMBED_DATA_FILES - case lookup (makeCanonical fname) dataFiles of - Nothing -> throwError $ PandocCouldNotFindDataFileError $ T.pack fname - Just contents -> return contents -#else - getDataFileName fname' >>= checkExistence >>= readFileStrict - where fname' = if fname == "MANUAL.txt" then fname else "data" fname - -checkExistence :: PandocMonad m => FilePath -> m FilePath -checkExistence fn = do - exists <- fileExists fn - if exists - then return fn - else throwError $ PandocCouldNotFindDataFileError $ T.pack fn -#endif - -makeCanonical :: FilePath -> FilePath -makeCanonical = Posix.joinPath . transformPathParts . splitDirectories - where transformPathParts = reverse . foldl go [] - go as "." = as - go (_:as) ".." = as - go as x = x : as - -withPaths :: PandocMonad m => [FilePath] -> (FilePath -> m a) -> FilePath -> m a -withPaths [] _ fp = throwError $ PandocResourceNotFound $ T.pack fp -withPaths (p:ps) action fp = - catchError (action (p fp)) - (\_ -> withPaths ps action fp) - --- | Fetch local or remote resource (like an image) and provide data suitable --- for adding it to the MediaBag. -fetchMediaResource :: PandocMonad m - => T.Text -> m (FilePath, Maybe MimeType, BL.ByteString) -fetchMediaResource src = do - (bs, mt) <- downloadOrRead src - let ext = fromMaybe (T.pack $ takeExtension $ T.unpack src) - (mt >>= extensionFromMimeType) - let bs' = BL.fromChunks [bs] - let basename = showDigest $ sha1 bs' - let fname = basename <.> T.unpack ext - return (fname, mt, bs') - --- | Traverse tree, filling media bag for any images that --- aren't already in the media bag. -fillMediaBag :: PandocMonad m => Pandoc -> m Pandoc -fillMediaBag d = walkM handleImage d - where handleImage :: PandocMonad m => Inline -> m Inline - handleImage (Image attr lab (src, tit)) = catchError - (do mediabag <- getMediaBag - case lookupMedia (T.unpack src) mediabag of - Just (_, _) -> return $ Image attr lab (src, tit) - Nothing -> do - (fname, mt, bs) <- fetchMediaResource src - insertMedia fname mt bs - return $ Image attr lab (T.pack fname, tit)) - (\e -> - case e of - PandocResourceNotFound _ -> do - report $ CouldNotFetchResource src - "replacing image with description" - -- emit alt text - return $ Span ("",["image"],[]) lab - PandocHttpError u er -> do - report $ CouldNotFetchResource u - (T.pack $ show er ++ "\rReplacing image with description.") - -- emit alt text - return $ Span ("",["image"],[]) lab - _ -> throwError e) - handleImage x = return x - -- | Extract media from the mediabag into a directory. extractMedia :: FilePath -> Pandoc -> PandocIO Pandoc extractMedia dir d = do @@ -1015,54 +408,3 @@ instance PandocMonad PandocPure where putCommonState x = PandocPure $ lift $ put x logOutput _msg = return () - --- This requires UndecidableInstances. We could avoid that --- by repeating the definitions below for every monad transformer --- we use: ReaderT, WriterT, StateT, RWST. But this seems to --- be harmless. -instance (MonadTrans t, PandocMonad m, Functor (t m), - MonadError PandocError (t m), Monad (t m), - Applicative (t m)) => PandocMonad (t m) where - lookupEnv = lift . lookupEnv - getCurrentTime = lift getCurrentTime - getCurrentTimeZone = lift getCurrentTimeZone - newStdGen = lift newStdGen - newUniqueHash = lift newUniqueHash - openURL = lift . openURL - readFileLazy = lift . readFileLazy - readFileStrict = lift . readFileStrict - glob = lift . glob - fileExists = lift . fileExists - getDataFileName = lift . getDataFileName - getModificationTime = lift . getModificationTime - getCommonState = lift getCommonState - putCommonState = lift . putCommonState - logOutput = lift . logOutput - -instance {-# OVERLAPS #-} PandocMonad m => PandocMonad (ParsecT s st m) where - lookupEnv = lift . lookupEnv - getCurrentTime = lift getCurrentTime - getCurrentTimeZone = lift getCurrentTimeZone - newStdGen = lift newStdGen - newUniqueHash = lift newUniqueHash - openURL = lift . openURL - readFileLazy = lift . readFileLazy - readFileStrict = lift . readFileStrict - glob = lift . glob - fileExists = lift . fileExists - getDataFileName = lift . getDataFileName - getModificationTime = lift . getModificationTime - getCommonState = lift getCommonState - putCommonState = lift . putCommonState - trace msg = do - tracing <- getsCommonState stTrace - when tracing $ do - pos <- getPosition - Debug.Trace.trace - ("[trace] Parsed " ++ T.unpack msg ++ " at line " ++ - show (sourceLine pos) ++ - if sourceName pos == "chunk" - then " of chunk" - else "") - (return ()) - logOutput = lift . logOutput diff --git a/src/Text/Pandoc/Class/CommonState.hs b/src/Text/Pandoc/Class/CommonState.hs new file mode 100644 index 000000000..4a0f66859 --- /dev/null +++ b/src/Text/Pandoc/Class/CommonState.hs @@ -0,0 +1,80 @@ +{- | +Module : Text.Pandoc.Class.CommonState +Copyright : Copyright (C) 2016-2020 Jesse Rosenthal, John MacFarlane +License : GNU GPL, version 2 or above + +Maintainer : Jesse Rosenthal +Stability : alpha +Portability : portable + +Common state shared by all pandoc-specific operations, including +those in readers, writers, and Lua filters. +-} + +module Text.Pandoc.Class.CommonState + ( CommonState(..) + , defaultCommonState + ) +where + +import Data.Default (Default (def)) +import Data.Text (Text) +import Text.Pandoc.BCP47 (Lang) +import Text.Pandoc.MediaBag (MediaBag) +import Text.Pandoc.Logging (LogMessage, Verbosity (WARNING)) +import Text.Pandoc.Translations (Translations) + +-- | 'CommonState' represents state that is used by all +-- instances of 'PandocMonad'. Normally users should not +-- need to interact with it directly; instead, auxiliary +-- functions like 'setVerbosity' and 'withMediaBag' should be used. +data CommonState = CommonState + { stLog :: [LogMessage] + -- ^ A list of log messages in reverse order + , stUserDataDir :: Maybe FilePath + -- ^ Directory to search for data files + , stSourceURL :: Maybe Text + -- ^ Absolute URL + dir of 1st source file + , stRequestHeaders :: [(Text, Text)] + -- ^ Headers to add for HTTP requests + , stMediaBag :: MediaBag + -- ^ Media parsed from binary containers + , stTranslations :: Maybe (Lang, Maybe Translations) + -- ^ Translations for localization + , stInputFiles :: [FilePath] + -- ^ List of input files from command line + , stOutputFile :: Maybe FilePath + -- ^ Output file from command line + , stResourcePath :: [FilePath] + -- ^ Path to search for resources like + -- included images + , stVerbosity :: Verbosity + -- ^ Verbosity level + , stTrace :: Bool + -- ^ Controls whether tracing messages are + -- issued. + } + +-- | The default @'CommonState'@. All fields are initialized as the +-- monoid identity of their resprective type, except for: +-- +-- * @'stResourcePath'@, which is set to @["."]@, +-- * @'stTrace'@, which is set to @'False'@, and +-- * @'stVerbosity'@, which is set to @WARNING@. +defaultCommonState :: CommonState +defaultCommonState = CommonState + { stLog = [] + , stUserDataDir = Nothing + , stSourceURL = Nothing + , stRequestHeaders = [] + , stMediaBag = mempty + , stTranslations = Nothing + , stInputFiles = [] + , stOutputFile = Nothing + , stResourcePath = ["."] + , stVerbosity = WARNING + , stTrace = False + } + +instance Default CommonState where + def = defaultCommonState diff --git a/src/Text/Pandoc/Class/PandocMonad.hs b/src/Text/Pandoc/Class/PandocMonad.hs new file mode 100644 index 000000000..7f31eec96 --- /dev/null +++ b/src/Text/Pandoc/Class/PandocMonad.hs @@ -0,0 +1,706 @@ +{-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE CPP #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE MultiParamTypeClasses #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE TypeSynonymInstances #-} +{-# LANGUAGE UndecidableInstances #-} +{-# LANGUAGE ViewPatterns #-} +#if MIN_VERSION_base(4,8,0) +#else +{-# LANGUAGE OverlappingInstances #-} +#endif +{- | +Module : Text.Pandoc.Class.PandocMonad +Copyright : Copyright (C) 2016-20 Jesse Rosenthal, John MacFarlane +License : GNU GPL, version 2 or above + +Maintainer : Jesse Rosenthal +Stability : alpha +Portability : portable + +This module defines a type class, 'PandocMonad', for pandoc readers +and writers. +-} + +module Text.Pandoc.Class.PandocMonad + ( PandocMonad(..) + , getPOSIXTime + , getZonedTime + , readFileFromDirs + , report + , setTrace + , setRequestHeader + , getLog + , setVerbosity + , getVerbosity + , getMediaBag + , setMediaBag + , insertMedia + , setUserDataDir + , getUserDataDir + , fetchItem + , fetchMediaResource + , getInputFiles + , setInputFiles + , getOutputFile + , setOutputFile + , setResourcePath + , getResourcePath + , readDefaultDataFile + , readDataFile + , fillMediaBag + , toLang + , setTranslations + , translateTerm + , makeCanonical + ) where + +import Prelude +import Codec.Archive.Zip +import Control.Monad.Except (MonadError (catchError, throwError), + MonadTrans, lift, when) +import Data.Digest.Pure.SHA (sha1, showDigest) +import Data.Maybe (fromMaybe) +import Data.Time (UTCTime) +import Data.Time.Clock.POSIX (POSIXTime, utcTimeToPOSIXSeconds) +import Data.Time.LocalTime (TimeZone, ZonedTime, utcToZonedTime) +import Network.URI ( escapeURIString, nonStrictRelativeTo, + unEscapeString, parseURIReference, isAllowedInURI, + parseURI, URI(..) ) +import System.FilePath ((), (<.>), takeExtension, dropExtension, + isRelative, splitDirectories) +import System.Random (StdGen) +import Text.Pandoc.BCP47 (Lang(..), parseBCP47, renderLang) +import Text.Pandoc.Class.CommonState (CommonState (..)) +import Text.Pandoc.Definition +import Text.Pandoc.Error +import Text.Pandoc.Logging +import Text.Pandoc.MIME (MimeType, getMimeType, extensionFromMimeType) +import Text.Pandoc.MediaBag (MediaBag, lookupMedia) +import Text.Pandoc.Shared (uriPathToPath) +import Text.Pandoc.Translations (Term(..), Translations, lookupTerm, + readTranslations) +import Text.Pandoc.Walk (walkM) +import Text.Parsec (ParsecT, getPosition, sourceLine, sourceName) +import qualified Data.ByteString as B +import qualified Data.ByteString.Lazy as BL +import qualified Data.Text as T +import qualified Debug.Trace +import qualified System.FilePath.Posix as Posix +import qualified Text.Pandoc.MediaBag as MB +import qualified Text.Pandoc.UTF8 as UTF8 +#ifdef EMBED_DATA_FILES +import Text.Pandoc.Data (dataFiles) +#endif + +-- | The PandocMonad typeclass contains all the potentially +-- IO-related functions used in pandoc's readers and writers. +-- Instances of this typeclass may implement these functions +-- in IO (as in 'PandocIO') or using an internal state that +-- represents a file system, time, and so on (as in 'PandocPure'). +class (Functor m, Applicative m, Monad m, MonadError PandocError m) + => PandocMonad m where + -- | Lookup an environment variable. + lookupEnv :: T.Text -> m (Maybe T.Text) + -- | Get the current (UTC) time. + getCurrentTime :: m UTCTime + -- | Get the locale's time zone. + getCurrentTimeZone :: m TimeZone + -- | Return a new generator for random numbers. + newStdGen :: m StdGen + -- | Return a new unique integer. + newUniqueHash :: m Int + -- | Retrieve contents and mime type from a URL, raising + -- an error on failure. + openURL :: T.Text -> m (B.ByteString, Maybe MimeType) + -- | Read the lazy ByteString contents from a file path, + -- raising an error on failure. + readFileLazy :: FilePath -> m BL.ByteString + -- | Read the strict ByteString contents from a file path, + -- raising an error on failure. + readFileStrict :: FilePath -> m B.ByteString + -- | Return a list of paths that match a glob, relative to + -- the working directory. See 'System.FilePath.Glob' for + -- the glob syntax. + glob :: String -> m [FilePath] + -- | Returns True if file exists. + fileExists :: FilePath -> m Bool + -- | Returns the path of data file. + getDataFileName :: FilePath -> m FilePath + -- | Return the modification time of a file. + getModificationTime :: FilePath -> m UTCTime + -- | Get the value of the 'CommonState' used by all instances + -- of 'PandocMonad'. + getCommonState :: m CommonState + -- | Set the value of the 'CommonState' used by all instances + -- of 'PandocMonad'. + -- | Get the value of a specific field of 'CommonState'. + putCommonState :: CommonState -> m () + -- | Get the value of a specific field of 'CommonState'. + getsCommonState :: (CommonState -> a) -> m a + getsCommonState f = f <$> getCommonState + -- | Modify the 'CommonState'. + modifyCommonState :: (CommonState -> CommonState) -> m () + modifyCommonState f = getCommonState >>= putCommonState . f + -- | Output a log message. + logOutput :: LogMessage -> m () + -- | Output a debug message to sterr, using 'Debug.Trace.trace', + -- if tracing is enabled. Note: this writes to stderr even in + -- pure instances. + trace :: T.Text -> m () + trace msg = do + tracing <- getsCommonState stTrace + when tracing $ Debug.Trace.trace ("[trace] " ++ T.unpack msg) (return ()) + +-- * Functions defined for all PandocMonad instances + +-- | Set the verbosity level. +setVerbosity :: PandocMonad m => Verbosity -> m () +setVerbosity verbosity = + modifyCommonState $ \st -> st{ stVerbosity = verbosity } + +-- | Get the verbosity level. +getVerbosity :: PandocMonad m => m Verbosity +getVerbosity = getsCommonState stVerbosity + +-- | Get the accomulated log messages (in temporal order). +getLog :: PandocMonad m => m [LogMessage] +getLog = reverse <$> getsCommonState stLog + +-- | Log a message using 'logOutput'. Note that 'logOutput' is +-- called only if the verbosity level exceeds the level of the +-- message, but the message is added to the list of log messages +-- that will be retrieved by 'getLog' regardless of its verbosity level. +report :: PandocMonad m => LogMessage -> m () +report msg = do + verbosity <- getsCommonState stVerbosity + let level = messageVerbosity msg + when (level <= verbosity) $ logOutput msg + modifyCommonState $ \st -> st{ stLog = msg : stLog st } + +-- | Determine whether tracing is enabled. This affects +-- the behavior of 'trace'. If tracing is not enabled, +-- 'trace' does nothing. +setTrace :: PandocMonad m => Bool -> m () +setTrace useTracing = modifyCommonState $ \st -> st{stTrace = useTracing} + +-- | Set request header to use in HTTP requests. +setRequestHeader :: PandocMonad m + => T.Text -- ^ Header name + -> T.Text -- ^ Value + -> m () +setRequestHeader name val = modifyCommonState $ \st -> + st{ stRequestHeaders = + (name, val) : filter (\(n,_) -> n /= name) (stRequestHeaders st) } + +-- | Initialize the media bag. +setMediaBag :: PandocMonad m => MediaBag -> m () +setMediaBag mb = modifyCommonState $ \st -> st{stMediaBag = mb} + +-- | Retrieve the media bag. +getMediaBag :: PandocMonad m => m MediaBag +getMediaBag = getsCommonState stMediaBag + +-- | Insert an item into the media bag. +insertMedia :: PandocMonad m => FilePath -> Maybe MimeType -> BL.ByteString -> m () +insertMedia fp mime bs = do + mb <- getMediaBag + let mb' = MB.insertMedia fp mime bs mb + setMediaBag mb' + +-- | Retrieve the input filenames. +getInputFiles :: PandocMonad m => m [FilePath] +getInputFiles = getsCommonState stInputFiles + +-- | Set the input filenames. +setInputFiles :: PandocMonad m => [FilePath] -> m () +setInputFiles fs = do + let sourceURL = case fs of + [] -> Nothing + (x:_) -> case parseURI x of + Just u + | uriScheme u `elem` ["http:","https:"] -> + Just $ show u{ uriQuery = "", + uriFragment = "" } + _ -> Nothing + + modifyCommonState $ \st -> st{ stInputFiles = fs + , stSourceURL = T.pack <$> sourceURL } + +-- | Retrieve the output filename. +getOutputFile :: PandocMonad m => m (Maybe FilePath) +getOutputFile = getsCommonState stOutputFile + +-- | Set the output filename. +setOutputFile :: PandocMonad m => Maybe FilePath -> m () +setOutputFile mbf = modifyCommonState $ \st -> st{ stOutputFile = mbf } + +-- | Retrieve the resource path searched by 'fetchItem'. +getResourcePath :: PandocMonad m => m [FilePath] +getResourcePath = getsCommonState stResourcePath + +-- | Set the resource path searched by 'fetchItem'. +setResourcePath :: PandocMonad m => [FilePath] -> m () +setResourcePath ps = modifyCommonState $ \st -> st{stResourcePath = ps} + +-- | Get the POSIX time. +getPOSIXTime :: PandocMonad m => m POSIXTime +getPOSIXTime = utcTimeToPOSIXSeconds <$> getCurrentTime + +-- | Get the zoned time. +getZonedTime :: PandocMonad m => m ZonedTime +getZonedTime = do + t <- getCurrentTime + tz <- getCurrentTimeZone + return $ utcToZonedTime tz t + +-- | Read file, checking in any number of directories. +readFileFromDirs :: PandocMonad m => [FilePath] -> FilePath -> m (Maybe T.Text) +readFileFromDirs [] _ = return Nothing +readFileFromDirs (d:ds) f = catchError + (Just . T.pack . UTF8.toStringLazy <$> readFileLazy (d f)) + (\_ -> readFileFromDirs ds f) + +-- | Convert BCP47 string to a Lang, issuing warning +-- if there are problems. +toLang :: PandocMonad m => Maybe T.Text -> m (Maybe Lang) +toLang Nothing = return Nothing +toLang (Just s) = + case parseBCP47 s of + Left _ -> do + report $ InvalidLang s + return Nothing + Right l -> return (Just l) + +-- | Select the language to use with 'translateTerm'. +-- Note that this does not read a translation file; +-- that is only done the first time 'translateTerm' is +-- used. +setTranslations :: PandocMonad m => Lang -> m () +setTranslations lang = + modifyCommonState $ \st -> st{ stTranslations = Just (lang, Nothing) } + +-- | Load term map. +getTranslations :: PandocMonad m => m Translations +getTranslations = do + mbtrans <- getsCommonState stTranslations + case mbtrans of + Nothing -> return mempty -- no language defined + Just (_, Just t) -> return t + Just (lang, Nothing) -> do -- read from file + let translationFile = "translations/" <> renderLang lang <> ".yaml" + let fallbackFile = "translations/" <> langLanguage lang <> ".yaml" + let getTrans fp = do + bs <- readDataFile fp + case readTranslations (UTF8.toText bs) of + Left e -> do + report $ CouldNotLoadTranslations (renderLang lang) + (T.pack fp <> ": " <> e) + -- make sure we don't try again... + modifyCommonState $ \st -> + st{ stTranslations = Nothing } + return mempty + Right t -> do + modifyCommonState $ \st -> + st{ stTranslations = Just (lang, Just t) } + return t + catchError (getTrans $ T.unpack translationFile) + (\_ -> + catchError (getTrans $ T.unpack fallbackFile) + (\e -> do + report $ CouldNotLoadTranslations (renderLang lang) + $ case e of + PandocCouldNotFindDataFileError _ -> + "data file " <> fallbackFile <> " not found" + _ -> "" + -- make sure we don't try again... + modifyCommonState $ \st -> st{ stTranslations = Nothing } + return mempty)) + +-- | Get a translation from the current term map. +-- Issue a warning if the term is not defined. +translateTerm :: PandocMonad m => Term -> m T.Text +translateTerm term = do + translations <- getTranslations + case lookupTerm term translations of + Just s -> return s + Nothing -> do + report $ NoTranslation $ T.pack $ show term + return "" + +-- | Specialized version of parseURIReference that disallows +-- single-letter schemes. Reason: these are usually windows absolute +-- paths. +parseURIReference' :: T.Text -> Maybe URI +parseURIReference' s = do + u <- parseURIReference (T.unpack s) + case uriScheme u of + [_] -> Nothing + _ -> Just u + +-- | Set the user data directory in common state. +setUserDataDir :: PandocMonad m + => Maybe FilePath + -> m () +setUserDataDir mbfp = modifyCommonState $ \st -> st{ stUserDataDir = mbfp } + +-- | Get the user data directory from common state. +getUserDataDir :: PandocMonad m + => m (Maybe FilePath) +getUserDataDir = getsCommonState stUserDataDir + +-- | Fetch an image or other item from the local filesystem or the net. +-- Returns raw content and maybe mime type. +fetchItem :: PandocMonad m + => T.Text + -> m (B.ByteString, Maybe MimeType) +fetchItem s = do + mediabag <- getMediaBag + case lookupMedia (T.unpack s) mediabag of + Just (mime, bs) -> return (BL.toStrict bs, Just mime) + Nothing -> downloadOrRead s + +-- | Returns the content and, if available, the MIME type of a resource. +-- If the given resource location is a valid URI, then download the +-- resource from that URI. Otherwise, treat the resource identifier as a +-- local file name. +-- +-- Note that resources are treated relative to the URL of the first +-- input source, if any. +downloadOrRead :: PandocMonad m + => T.Text + -> m (B.ByteString, Maybe MimeType) +downloadOrRead s = do + sourceURL <- getsCommonState stSourceURL + case (sourceURL >>= parseURIReference' . + ensureEscaped, ensureEscaped s) of + (Just u, s') -> -- try fetching from relative path at source + case parseURIReference' s' of + Just u' -> openURL $ T.pack $ show $ u' `nonStrictRelativeTo` u + Nothing -> openURL s' -- will throw error + (Nothing, s'@(T.unpack -> ('/':'/':c:_))) | c /= '?' -> -- protocol-relative URI + -- we exclude //? because of //?UNC/ on Windows + case parseURIReference' s' of + Just u' -> openURL $ T.pack $ show $ u' `nonStrictRelativeTo` httpcolon + Nothing -> openURL s' -- will throw error + (Nothing, s') -> + case parseURI (T.unpack s') of -- requires absolute URI + Just u' | uriScheme u' == "file:" -> + readLocalFile $ uriPathToPath (T.pack $ uriPath u') + -- We don't want to treat C:/ as a scheme: + Just u' | length (uriScheme u') > 2 -> openURL (T.pack $ show u') + _ -> readLocalFile fp -- get from local file system + where readLocalFile f = do + resourcePath <- getResourcePath + cont <- if isRelative f + then withPaths resourcePath readFileStrict f + else readFileStrict f + return (cont, mime) + httpcolon = URI{ uriScheme = "http:", + uriAuthority = Nothing, + uriPath = "", + uriQuery = "", + uriFragment = "" } + dropFragmentAndQuery = T.takeWhile (\c -> c /= '?' && c /= '#') + fp = unEscapeString $ T.unpack $ dropFragmentAndQuery s + mime = getMimeType $ case takeExtension fp of + ".gz" -> dropExtension fp + ".svgz" -> dropExtension fp ++ ".svg" + x -> x + ensureEscaped = T.pack . escapeURIString isAllowedInURI . T.unpack . T.map convertSlash + convertSlash '\\' = '/' + convertSlash x = x + +-- | Retrieve default reference.docx. +getDefaultReferenceDocx :: PandocMonad m => m Archive +getDefaultReferenceDocx = do + let paths = ["[Content_Types].xml", + "_rels/.rels", + "docProps/app.xml", + "docProps/core.xml", + "docProps/custom.xml", + "word/document.xml", + "word/fontTable.xml", + "word/footnotes.xml", + "word/comments.xml", + "word/numbering.xml", + "word/settings.xml", + "word/webSettings.xml", + "word/styles.xml", + "word/_rels/document.xml.rels", + "word/_rels/footnotes.xml.rels", + "word/theme/theme1.xml"] + let toLazy = BL.fromChunks . (:[]) + let pathToEntry path = do + epochtime <- (floor . utcTimeToPOSIXSeconds) <$> getCurrentTime + contents <- toLazy <$> readDataFile ("docx/" ++ path) + return $ toEntry path epochtime contents + datadir <- getUserDataDir + mbArchive <- case datadir of + Nothing -> return Nothing + Just d -> do + exists <- fileExists (d "reference.docx") + if exists + then return (Just (d "reference.docx")) + else return Nothing + case mbArchive of + Just arch -> toArchive <$> readFileLazy arch + Nothing -> foldr addEntryToArchive emptyArchive <$> + mapM pathToEntry paths + +-- | Retrieve default reference.odt. +getDefaultReferenceODT :: PandocMonad m => m Archive +getDefaultReferenceODT = do + let paths = ["mimetype", + "manifest.rdf", + "styles.xml", + "content.xml", + "meta.xml", + "settings.xml", + "Configurations2/accelerator/current.xml", + "Thumbnails/thumbnail.png", + "META-INF/manifest.xml"] + let pathToEntry path = do epochtime <- floor `fmap` getPOSIXTime + contents <- (BL.fromChunks . (:[])) `fmap` + readDataFile ("odt/" ++ path) + return $ toEntry path epochtime contents + datadir <- getUserDataDir + mbArchive <- case datadir of + Nothing -> return Nothing + Just d -> do + exists <- fileExists (d "reference.odt") + if exists + then return (Just (d "reference.odt")) + else return Nothing + case mbArchive of + Just arch -> toArchive <$> readFileLazy arch + Nothing -> foldr addEntryToArchive emptyArchive <$> + mapM pathToEntry paths + +-- | Retrieve default reference.pptx. +getDefaultReferencePptx :: PandocMonad m => m Archive +getDefaultReferencePptx = do + -- We're going to narrow this down substantially once we get it + -- working. + let paths = [ "[Content_Types].xml" + , "_rels/.rels" + , "docProps/app.xml" + , "docProps/core.xml" + , "ppt/_rels/presentation.xml.rels" + , "ppt/presProps.xml" + , "ppt/presentation.xml" + , "ppt/slideLayouts/_rels/slideLayout1.xml.rels" + , "ppt/slideLayouts/_rels/slideLayout2.xml.rels" + , "ppt/slideLayouts/_rels/slideLayout3.xml.rels" + , "ppt/slideLayouts/_rels/slideLayout4.xml.rels" + , "ppt/slideLayouts/_rels/slideLayout5.xml.rels" + , "ppt/slideLayouts/_rels/slideLayout6.xml.rels" + , "ppt/slideLayouts/_rels/slideLayout7.xml.rels" + , "ppt/slideLayouts/_rels/slideLayout8.xml.rels" + , "ppt/slideLayouts/_rels/slideLayout9.xml.rels" + , "ppt/slideLayouts/_rels/slideLayout10.xml.rels" + , "ppt/slideLayouts/_rels/slideLayout11.xml.rels" + , "ppt/slideLayouts/slideLayout1.xml" + , "ppt/slideLayouts/slideLayout10.xml" + , "ppt/slideLayouts/slideLayout11.xml" + , "ppt/slideLayouts/slideLayout2.xml" + , "ppt/slideLayouts/slideLayout3.xml" + , "ppt/slideLayouts/slideLayout4.xml" + , "ppt/slideLayouts/slideLayout5.xml" + , "ppt/slideLayouts/slideLayout6.xml" + , "ppt/slideLayouts/slideLayout7.xml" + , "ppt/slideLayouts/slideLayout8.xml" + , "ppt/slideLayouts/slideLayout9.xml" + , "ppt/slideMasters/_rels/slideMaster1.xml.rels" + , "ppt/slideMasters/slideMaster1.xml" + , "ppt/slides/_rels/slide1.xml.rels" + , "ppt/slides/slide1.xml" + , "ppt/slides/_rels/slide2.xml.rels" + , "ppt/slides/slide2.xml" + , "ppt/slides/_rels/slide3.xml.rels" + , "ppt/slides/slide3.xml" + , "ppt/slides/_rels/slide4.xml.rels" + , "ppt/slides/slide4.xml" + , "ppt/tableStyles.xml" + , "ppt/theme/theme1.xml" + , "ppt/viewProps.xml" + -- These relate to notes slides. + , "ppt/notesMasters/notesMaster1.xml" + , "ppt/notesMasters/_rels/notesMaster1.xml.rels" + , "ppt/notesSlides/notesSlide1.xml" + , "ppt/notesSlides/_rels/notesSlide1.xml.rels" + , "ppt/notesSlides/notesSlide2.xml" + , "ppt/notesSlides/_rels/notesSlide2.xml.rels" + , "ppt/theme/theme2.xml" + ] + let toLazy = BL.fromChunks . (:[]) + let pathToEntry path = do + epochtime <- (floor . utcTimeToPOSIXSeconds) <$> getCurrentTime + contents <- toLazy <$> readDataFile ("pptx/" ++ path) + return $ toEntry path epochtime contents + datadir <- getUserDataDir + mbArchive <- case datadir of + Nothing -> return Nothing + Just d -> do + exists <- fileExists (d "reference.pptx") + if exists + then return (Just (d "reference.pptx")) + else return Nothing + case mbArchive of + Just arch -> toArchive <$> readFileLazy arch + Nothing -> foldr addEntryToArchive emptyArchive <$> + mapM pathToEntry paths + +-- | Read file from user data directory or, +-- if not found there, from Cabal data directory. +readDataFile :: PandocMonad m => FilePath -> m B.ByteString +readDataFile fname = do + datadir <- getUserDataDir + case datadir of + Nothing -> readDefaultDataFile fname + Just userDir -> do + exists <- fileExists (userDir fname) + if exists + then readFileStrict (userDir fname) + else readDefaultDataFile fname + +-- | Read file from from Cabal data directory. +readDefaultDataFile :: PandocMonad m => FilePath -> m B.ByteString +readDefaultDataFile "reference.docx" = + (B.concat . BL.toChunks . fromArchive) <$> getDefaultReferenceDocx +readDefaultDataFile "reference.pptx" = + (B.concat . BL.toChunks . fromArchive) <$> getDefaultReferencePptx +readDefaultDataFile "reference.odt" = + (B.concat . BL.toChunks . fromArchive) <$> getDefaultReferenceODT +readDefaultDataFile fname = +#ifdef EMBED_DATA_FILES + case lookup (makeCanonical fname) dataFiles of + Nothing -> throwError $ PandocCouldNotFindDataFileError $ T.pack fname + Just contents -> return contents +#else + getDataFileName fname' >>= checkExistence >>= readFileStrict + where fname' = if fname == "MANUAL.txt" then fname else "data" fname + +-- | Returns the input filename unchanged if the file exits, and throws +-- a `PandocCouldNotFindDataFileError` if it doesn't. +checkExistence :: PandocMonad m => FilePath -> m FilePath +checkExistence fn = do + exists <- fileExists fn + if exists + then return fn + else throwError $ PandocCouldNotFindDataFileError $ T.pack fn +#endif + +-- | Canonicalizes a file path by removing redundant @.@ and @..@. +makeCanonical :: FilePath -> FilePath +makeCanonical = Posix.joinPath . transformPathParts . splitDirectories + where transformPathParts = reverse . foldl go [] + go as "." = as + go (_:as) ".." = as + go as x = x : as + +-- | Trys to run an action on a file: for each directory given, a +-- filepath is created from the given filename, and the action is run on +-- that filepath. Returns the result of the first successful execution +-- of the action, or throws a @PandocResourceNotFound@ exception if the +-- action errors for all filepaths. +withPaths :: PandocMonad m => [FilePath] -> (FilePath -> m a) -> FilePath -> m a +withPaths [] _ fp = throwError $ PandocResourceNotFound $ T.pack fp +withPaths (p:ps) action fp = + catchError (action (p fp)) + (\_ -> withPaths ps action fp) + +-- | Fetch local or remote resource (like an image) and provide data suitable +-- for adding it to the MediaBag. +fetchMediaResource :: PandocMonad m + => T.Text -> m (FilePath, Maybe MimeType, BL.ByteString) +fetchMediaResource src = do + (bs, mt) <- downloadOrRead src + let ext = fromMaybe (T.pack $ takeExtension $ T.unpack src) + (mt >>= extensionFromMimeType) + let bs' = BL.fromChunks [bs] + let basename = showDigest $ sha1 bs' + let fname = basename <.> T.unpack ext + return (fname, mt, bs') + +-- | Traverse tree, filling media bag for any images that +-- aren't already in the media bag. +fillMediaBag :: PandocMonad m => Pandoc -> m Pandoc +fillMediaBag d = walkM handleImage d + where handleImage :: PandocMonad m => Inline -> m Inline + handleImage (Image attr lab (src, tit)) = catchError + (do mediabag <- getMediaBag + case lookupMedia (T.unpack src) mediabag of + Just (_, _) -> return $ Image attr lab (src, tit) + Nothing -> do + (fname, mt, bs) <- fetchMediaResource src + insertMedia fname mt bs + return $ Image attr lab (T.pack fname, tit)) + (\e -> + case e of + PandocResourceNotFound _ -> do + report $ CouldNotFetchResource src + "replacing image with description" + -- emit alt text + return $ Span ("",["image"],[]) lab + PandocHttpError u er -> do + report $ CouldNotFetchResource u + (T.pack $ show er ++ "\rReplacing image with description.") + -- emit alt text + return $ Span ("",["image"],[]) lab + _ -> throwError e) + handleImage x = return x + +-- This requires UndecidableInstances. We could avoid that +-- by repeating the definitions below for every monad transformer +-- we use: ReaderT, WriterT, StateT, RWST. But this seems to +-- be harmless. +instance (MonadTrans t, PandocMonad m, Functor (t m), + MonadError PandocError (t m), Monad (t m), + Applicative (t m)) => PandocMonad (t m) where + lookupEnv = lift . lookupEnv + getCurrentTime = lift getCurrentTime + getCurrentTimeZone = lift getCurrentTimeZone + newStdGen = lift newStdGen + newUniqueHash = lift newUniqueHash + openURL = lift . openURL + readFileLazy = lift . readFileLazy + readFileStrict = lift . readFileStrict + glob = lift . glob + fileExists = lift . fileExists + getDataFileName = lift . getDataFileName + getModificationTime = lift . getModificationTime + getCommonState = lift getCommonState + putCommonState = lift . putCommonState + logOutput = lift . logOutput + +instance {-# OVERLAPS #-} PandocMonad m => PandocMonad (ParsecT s st m) where + lookupEnv = lift . lookupEnv + getCurrentTime = lift getCurrentTime + getCurrentTimeZone = lift getCurrentTimeZone + newStdGen = lift newStdGen + newUniqueHash = lift newUniqueHash + openURL = lift . openURL + readFileLazy = lift . readFileLazy + readFileStrict = lift . readFileStrict + glob = lift . glob + fileExists = lift . fileExists + getDataFileName = lift . getDataFileName + getModificationTime = lift . getModificationTime + getCommonState = lift getCommonState + putCommonState = lift . putCommonState + trace msg = do + tracing <- getsCommonState stTrace + when tracing $ do + pos <- getPosition + Debug.Trace.trace + ("[trace] Parsed " ++ T.unpack msg ++ " at line " ++ + show (sourceLine pos) ++ + if sourceName pos == "chunk" + then " of chunk" + else "") + (return ()) + logOutput = lift . logOutput -- cgit v1.2.3