aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Class.hs
diff options
context:
space:
mode:
authorschrieveslaach <schrieveslaach@online.de>2017-06-12 15:52:29 +0200
committerGitHub <noreply@github.com>2017-06-12 15:52:29 +0200
commit635f299b441e238ccd34e3ad61c5e36f0ca30067 (patch)
tree11cfc34402975bad208f9a48d075fe2ace959e70 /src/Text/Pandoc/Class.hs
parent181c56d4003aa83abed23b95a452c4890aa3797c (diff)
parent23f3c2d7b4796d1af742a74999ce67924bf2abb3 (diff)
downloadpandoc-635f299b441e238ccd34e3ad61c5e36f0ca30067.tar.gz
Merge branch 'master' into textcolor-support
Diffstat (limited to 'src/Text/Pandoc/Class.hs')
-rw-r--r--src/Text/Pandoc/Class.hs126
1 files changed, 98 insertions, 28 deletions
diff --git a/src/Text/Pandoc/Class.hs b/src/Text/Pandoc/Class.hs
index 1afa64c10..14a0b8044 100644
--- a/src/Text/Pandoc/Class.hs
+++ b/src/Text/Pandoc/Class.hs
@@ -1,6 +1,8 @@
-{-# LANGUAGE DeriveFunctor, DeriveDataTypeable, TypeSynonymInstances,
-FlexibleInstances, GeneralizedNewtypeDeriving, FlexibleContexts,
-StandaloneDeriving #-}
+{-# LANGUAGE DeriveFunctor #-}
+{-# LANGUAGE TypeSynonymInstances #-}
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
+{-# LANGUAGE FlexibleContexts #-}
{-
Copyright (C) 2016 Jesse Rosenthal <jrosenthal@jhu.edu>
@@ -61,6 +63,8 @@ module Text.Pandoc.Class ( PandocMonad(..)
, runIOorExplode
, runPure
, withMediaBag
+ , fillMediaBag
+ , extractMedia
) where
import Prelude hiding (readFile)
@@ -76,8 +80,11 @@ import Text.Pandoc.Compat.Time (UTCTime)
import Text.Pandoc.Logging
import Text.Parsec (ParsecT)
import qualified Text.Pandoc.Compat.Time as IO (getCurrentTime)
-import Text.Pandoc.MIME (MimeType, getMimeType)
+import Text.Pandoc.MIME (MimeType, getMimeType, extensionFromMimeType)
+import Text.Pandoc.Definition
import Data.Char (toLower)
+import Data.Digest.Pure.SHA (sha1, showDigest)
+import Data.Maybe (fromMaybe)
import Data.Time.Clock.POSIX ( utcTimeToPOSIXSeconds
, posixSecondsToUTCTime
, POSIXTime )
@@ -86,13 +93,16 @@ import Network.URI ( escapeURIString, nonStrictRelativeTo,
unEscapeString, parseURIReference, isAllowedInURI,
parseURI, URI(..) )
import qualified Data.Time.LocalTime as IO (getCurrentTimeZone)
-import Text.Pandoc.MediaBag (MediaBag, lookupMedia)
+import Text.Pandoc.MediaBag (MediaBag, lookupMedia, mediaDirectory)
+import Text.Pandoc.Walk (walkM, walk)
import qualified Text.Pandoc.MediaBag as MB
import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy as BL
import qualified System.Environment as IO (lookupEnv)
import System.FilePath.Glob (match, compile)
-import System.FilePath ((</>), takeExtension, dropExtension, isRelative)
+import System.Directory (createDirectoryIfMissing)
+import System.FilePath ((</>), (<.>), takeDirectory,
+ takeExtension, dropExtension, isRelative, normalise)
import qualified System.FilePath.Glob as IO (glob)
import qualified System.Directory as IO (getModificationTime)
import Control.Monad as M (fail)
@@ -145,7 +155,7 @@ report :: PandocMonad m => LogMessage -> m ()
report msg = do
verbosity <- getsCommonState stVerbosity
let level = messageVerbosity msg
- when (level <= verbosity) $ do
+ when (level <= verbosity) $
logOutput msg
unless (level == DEBUG) $
modifyCommonState $ \st -> st{ stLog = msg : stLog st }
@@ -213,7 +223,7 @@ runIO :: PandocIO a -> IO (Either PandocError a)
runIO ma = flip evalStateT def $ runExceptT $ unPandocIO ma
withMediaBag :: PandocMonad m => m a -> m (a, MediaBag)
-withMediaBag ma = ((,)) <$> ma <*> getMediaBag
+withMediaBag ma = (,) <$> ma <*> getMediaBag
runIOorExplode :: PandocIO a -> IO a
runIOorExplode ma = runIO ma >>= handleError
@@ -239,10 +249,13 @@ instance PandocMonad PandocIO where
getCurrentTime = liftIO IO.getCurrentTime
getCurrentTimeZone = liftIO IO.getCurrentTimeZone
newStdGen = liftIO IO.newStdGen
- newUniqueHash = hashUnique <$> (liftIO IO.newUnique)
+ newUniqueHash = hashUnique <$> liftIO IO.newUnique
openURL u = do
report $ Fetching u
- liftIOError IO.openURL u
+ res <- liftIO (IO.openURL u)
+ case res of
+ Right r -> return r
+ Left e -> throwError $ PandocHttpError u e
readFileLazy s = liftIOError BL.readFile s
readFileStrict s = liftIOError B.readFile s
readDataFile mfp fname = liftIOError (IO.readDataFile mfp) fname
@@ -252,7 +265,7 @@ instance PandocMonad PandocIO where
putCommonState x = PandocIO $ lift $ put x
logOutput msg = liftIO $ do
UTF8.hPutStr stderr $ "[" ++
- (map toLower $ show (messageVerbosity msg)) ++ "] "
+ map toLower (show (messageVerbosity msg)) ++ "] "
alertIndent $ lines $ showLogMessage msg
alertIndent :: [String] -> IO ()
@@ -283,14 +296,14 @@ fetchItem :: PandocMonad m
fetchItem sourceURL s = do
mediabag <- getMediaBag
case lookupMedia s mediabag of
- Just (mime, bs) -> return $ (BL.toStrict bs, Just mime)
+ Just (mime, bs) -> return (BL.toStrict bs, Just mime)
Nothing -> downloadOrRead sourceURL s
downloadOrRead :: PandocMonad m
=> Maybe String
-> String
-> m (B.ByteString, Maybe MimeType)
-downloadOrRead sourceURL s = do
+downloadOrRead sourceURL s =
case (sourceURL >>= parseURIReference' .
ensureEscaped, ensureEscaped s) of
(Just u, s') -> -- try fetching from relative path at source
@@ -330,12 +343,73 @@ downloadOrRead sourceURL s = do
convertSlash x = x
withPaths :: PandocMonad m => [FilePath] -> (FilePath -> m a) -> FilePath -> m a
-withPaths [] _ fp = throwError $ PandocIOError fp
- (userError "file not found in resource path")
+withPaths [] _ fp = throwError $ PandocResourceNotFound fp
withPaths (p:ps) action fp =
catchError (action (p </> fp))
(\_ -> withPaths ps action fp)
+-- | Traverse tree, filling media bag for any images that
+-- aren't already in the media bag.
+fillMediaBag :: PandocMonad m => Maybe String -> Pandoc -> m Pandoc
+fillMediaBag sourceURL d = walkM handleImage d
+ where handleImage :: PandocMonad m => Inline -> m Inline
+ handleImage (Image attr lab (src, tit)) = catchError
+ (do mediabag <- getMediaBag
+ case lookupMedia src mediabag of
+ Just (_, _) -> return $ Image attr lab (src, tit)
+ Nothing -> do
+ (bs, mt) <- downloadOrRead sourceURL src
+ let ext = fromMaybe (takeExtension src)
+ (mt >>= extensionFromMimeType)
+ let bs' = BL.fromChunks [bs]
+ let basename = showDigest $ sha1 bs'
+ let fname = basename <.> ext
+ insertMedia fname mt bs'
+ return $ Image attr lab (fname, tit))
+ (\e ->
+ case e of
+ PandocResourceNotFound _ -> do
+ report $ CouldNotFetchResource src
+ "replacing image with description"
+ -- emit alt text
+ return $ Span ("",["image"],[]) lab
+ PandocHttpError u er -> do
+ report $ CouldNotFetchResource u
+ (show er ++ "\rReplacing image with description.")
+ -- emit alt text
+ return $ Span ("",["image"],[]) lab
+ _ -> throwError e)
+ handleImage x = return x
+
+-- | Extract media from the mediabag into a directory.
+extractMedia :: FilePath -> Pandoc -> PandocIO Pandoc
+extractMedia dir d = do
+ media <- getMediaBag
+ case [fp | (fp, _, _) <- mediaDirectory media] of
+ [] -> return d
+ fps -> do
+ mapM_ (writeMedia dir media) fps
+ return $ walk (adjustImagePath dir fps) d
+
+writeMedia :: FilePath -> MediaBag -> FilePath -> PandocIO ()
+writeMedia dir mediabag subpath = do
+ -- we join and split to convert a/b/c to a\b\c on Windows;
+ -- in zip containers all paths use /
+ let fullpath = dir </> normalise subpath
+ let mbcontents = lookupMedia subpath mediabag
+ case mbcontents of
+ Nothing -> throwError $ PandocResourceNotFound subpath
+ Just (_, bs) -> do
+ report $ Extracting fullpath
+ liftIO $ do
+ createDirectoryIfMissing True $ takeDirectory fullpath
+ BL.writeFile fullpath bs
+
+adjustImagePath :: FilePath -> [FilePath] -> Inline -> Inline
+adjustImagePath dir paths (Image attr lab (src, tit))
+ | src `elem` paths = Image attr lab (dir ++ "/" ++ src, tit)
+adjustImagePath _ _ x = x
+
data PureState = PureState { stStdGen :: StdGen
, stWord8Store :: [Word8] -- should be
-- inifinite,
@@ -373,7 +447,7 @@ instance Default PureState where
getPureState :: PandocPure PureState
-getPureState = PandocPure $ lift $ lift $ get
+getPureState = PandocPure $ lift $ lift get
getsPureState :: (PureState -> a) -> PandocPure a
getsPureState f = f <$> getPureState
@@ -433,30 +507,27 @@ instance PandocMonad PandocPure where
modifyPureState $ \st -> st { stUniqStore = us }
return u
_ -> M.fail "uniq store ran out of elements"
- openURL u = throwError $ PandocIOError u $
- userError "Cannot open URL in PandocPure"
+ openURL u = throwError $ PandocResourceNotFound u
readFileLazy fp = do
fps <- getsPureState stFiles
case infoFileContents <$> getFileInfo fp fps of
Just bs -> return (BL.fromStrict bs)
- Nothing -> throwError $ PandocIOError fp
- (userError "File not found in PureState")
+ Nothing -> throwError $ PandocResourceNotFound fp
readFileStrict fp = do
fps <- getsPureState stFiles
case infoFileContents <$> getFileInfo fp fps of
Just bs -> return bs
- Nothing -> throwError $ PandocIOError fp
- (userError "File not found in PureState")
- readDataFile Nothing "reference.docx" = do
+ Nothing -> throwError $ PandocResourceNotFound fp
+ readDataFile Nothing "reference.docx" =
(B.concat . BL.toChunks . fromArchive) <$> getsPureState stReferenceDocx
- readDataFile Nothing "reference.odt" = do
+ readDataFile Nothing "reference.odt" =
(B.concat . BL.toChunks . fromArchive) <$> getsPureState stReferenceODT
readDataFile Nothing fname = do
let fname' = if fname == "MANUAL.txt" then fname else "data" </> fname
readFileStrict fname'
readDataFile (Just userDir) fname = do
userDirFiles <- getsPureState stUserDataDir
- case infoFileContents <$> (getFileInfo (userDir </> fname) userDirFiles) of
+ case infoFileContents <$> getFileInfo (userDir </> fname) userDirFiles of
Just bs -> return bs
Nothing -> readDataFile Nothing fname
@@ -466,12 +537,12 @@ instance PandocMonad PandocPure where
getModificationTime fp = do
fps <- getsPureState stFiles
- case infoFileMTime <$> (getFileInfo fp fps) of
+ case infoFileMTime <$> getFileInfo fp fps of
Just tm -> return tm
Nothing -> throwError $ PandocIOError fp
(userError "Can't get modification time")
- getCommonState = PandocPure $ lift $ get
+ getCommonState = PandocPure $ lift get
putCommonState x = PandocPure $ lift $ put x
logOutput _msg = return ()
@@ -555,4 +626,3 @@ instance PandocMonad m => PandocMonad (StateT st m) where
getCommonState = lift getCommonState
putCommonState = lift . putCommonState
logOutput = lift . logOutput
-