diff options
author | schrieveslaach <schrieveslaach@online.de> | 2017-06-12 15:52:29 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-06-12 15:52:29 +0200 |
commit | 635f299b441e238ccd34e3ad61c5e36f0ca30067 (patch) | |
tree | 11cfc34402975bad208f9a48d075fe2ace959e70 /src/Text/Pandoc/Class.hs | |
parent | 181c56d4003aa83abed23b95a452c4890aa3797c (diff) | |
parent | 23f3c2d7b4796d1af742a74999ce67924bf2abb3 (diff) | |
download | pandoc-635f299b441e238ccd34e3ad61c5e36f0ca30067.tar.gz |
Merge branch 'master' into textcolor-support
Diffstat (limited to 'src/Text/Pandoc/Class.hs')
-rw-r--r-- | src/Text/Pandoc/Class.hs | 126 |
1 files changed, 98 insertions, 28 deletions
diff --git a/src/Text/Pandoc/Class.hs b/src/Text/Pandoc/Class.hs index 1afa64c10..14a0b8044 100644 --- a/src/Text/Pandoc/Class.hs +++ b/src/Text/Pandoc/Class.hs @@ -1,6 +1,8 @@ -{-# LANGUAGE DeriveFunctor, DeriveDataTypeable, TypeSynonymInstances, -FlexibleInstances, GeneralizedNewtypeDeriving, FlexibleContexts, -StandaloneDeriving #-} +{-# LANGUAGE DeriveFunctor #-} +{-# LANGUAGE TypeSynonymInstances #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# LANGUAGE FlexibleContexts #-} {- Copyright (C) 2016 Jesse Rosenthal <jrosenthal@jhu.edu> @@ -61,6 +63,8 @@ module Text.Pandoc.Class ( PandocMonad(..) , runIOorExplode , runPure , withMediaBag + , fillMediaBag + , extractMedia ) where import Prelude hiding (readFile) @@ -76,8 +80,11 @@ import Text.Pandoc.Compat.Time (UTCTime) import Text.Pandoc.Logging import Text.Parsec (ParsecT) import qualified Text.Pandoc.Compat.Time as IO (getCurrentTime) -import Text.Pandoc.MIME (MimeType, getMimeType) +import Text.Pandoc.MIME (MimeType, getMimeType, extensionFromMimeType) +import Text.Pandoc.Definition import Data.Char (toLower) +import Data.Digest.Pure.SHA (sha1, showDigest) +import Data.Maybe (fromMaybe) import Data.Time.Clock.POSIX ( utcTimeToPOSIXSeconds , posixSecondsToUTCTime , POSIXTime ) @@ -86,13 +93,16 @@ import Network.URI ( escapeURIString, nonStrictRelativeTo, unEscapeString, parseURIReference, isAllowedInURI, parseURI, URI(..) ) import qualified Data.Time.LocalTime as IO (getCurrentTimeZone) -import Text.Pandoc.MediaBag (MediaBag, lookupMedia) +import Text.Pandoc.MediaBag (MediaBag, lookupMedia, mediaDirectory) +import Text.Pandoc.Walk (walkM, walk) import qualified Text.Pandoc.MediaBag as MB import qualified Data.ByteString as B import qualified Data.ByteString.Lazy as BL import qualified System.Environment as IO (lookupEnv) import System.FilePath.Glob (match, compile) -import System.FilePath ((</>), takeExtension, dropExtension, isRelative) +import System.Directory (createDirectoryIfMissing) +import System.FilePath ((</>), (<.>), takeDirectory, + takeExtension, dropExtension, isRelative, normalise) import qualified System.FilePath.Glob as IO (glob) import qualified System.Directory as IO (getModificationTime) import Control.Monad as M (fail) @@ -145,7 +155,7 @@ report :: PandocMonad m => LogMessage -> m () report msg = do verbosity <- getsCommonState stVerbosity let level = messageVerbosity msg - when (level <= verbosity) $ do + when (level <= verbosity) $ logOutput msg unless (level == DEBUG) $ modifyCommonState $ \st -> st{ stLog = msg : stLog st } @@ -213,7 +223,7 @@ runIO :: PandocIO a -> IO (Either PandocError a) runIO ma = flip evalStateT def $ runExceptT $ unPandocIO ma withMediaBag :: PandocMonad m => m a -> m (a, MediaBag) -withMediaBag ma = ((,)) <$> ma <*> getMediaBag +withMediaBag ma = (,) <$> ma <*> getMediaBag runIOorExplode :: PandocIO a -> IO a runIOorExplode ma = runIO ma >>= handleError @@ -239,10 +249,13 @@ instance PandocMonad PandocIO where getCurrentTime = liftIO IO.getCurrentTime getCurrentTimeZone = liftIO IO.getCurrentTimeZone newStdGen = liftIO IO.newStdGen - newUniqueHash = hashUnique <$> (liftIO IO.newUnique) + newUniqueHash = hashUnique <$> liftIO IO.newUnique openURL u = do report $ Fetching u - liftIOError IO.openURL u + res <- liftIO (IO.openURL u) + case res of + Right r -> return r + Left e -> throwError $ PandocHttpError u e readFileLazy s = liftIOError BL.readFile s readFileStrict s = liftIOError B.readFile s readDataFile mfp fname = liftIOError (IO.readDataFile mfp) fname @@ -252,7 +265,7 @@ instance PandocMonad PandocIO where putCommonState x = PandocIO $ lift $ put x logOutput msg = liftIO $ do UTF8.hPutStr stderr $ "[" ++ - (map toLower $ show (messageVerbosity msg)) ++ "] " + map toLower (show (messageVerbosity msg)) ++ "] " alertIndent $ lines $ showLogMessage msg alertIndent :: [String] -> IO () @@ -283,14 +296,14 @@ fetchItem :: PandocMonad m fetchItem sourceURL s = do mediabag <- getMediaBag case lookupMedia s mediabag of - Just (mime, bs) -> return $ (BL.toStrict bs, Just mime) + Just (mime, bs) -> return (BL.toStrict bs, Just mime) Nothing -> downloadOrRead sourceURL s downloadOrRead :: PandocMonad m => Maybe String -> String -> m (B.ByteString, Maybe MimeType) -downloadOrRead sourceURL s = do +downloadOrRead sourceURL s = case (sourceURL >>= parseURIReference' . ensureEscaped, ensureEscaped s) of (Just u, s') -> -- try fetching from relative path at source @@ -330,12 +343,73 @@ downloadOrRead sourceURL s = do convertSlash x = x withPaths :: PandocMonad m => [FilePath] -> (FilePath -> m a) -> FilePath -> m a -withPaths [] _ fp = throwError $ PandocIOError fp - (userError "file not found in resource path") +withPaths [] _ fp = throwError $ PandocResourceNotFound fp withPaths (p:ps) action fp = catchError (action (p </> fp)) (\_ -> withPaths ps action fp) +-- | Traverse tree, filling media bag for any images that +-- aren't already in the media bag. +fillMediaBag :: PandocMonad m => Maybe String -> Pandoc -> m Pandoc +fillMediaBag sourceURL d = walkM handleImage d + where handleImage :: PandocMonad m => Inline -> m Inline + handleImage (Image attr lab (src, tit)) = catchError + (do mediabag <- getMediaBag + case lookupMedia src mediabag of + Just (_, _) -> return $ Image attr lab (src, tit) + Nothing -> do + (bs, mt) <- downloadOrRead sourceURL src + let ext = fromMaybe (takeExtension src) + (mt >>= extensionFromMimeType) + let bs' = BL.fromChunks [bs] + let basename = showDigest $ sha1 bs' + let fname = basename <.> ext + insertMedia fname mt bs' + return $ Image attr lab (fname, tit)) + (\e -> + case e of + PandocResourceNotFound _ -> do + report $ CouldNotFetchResource src + "replacing image with description" + -- emit alt text + return $ Span ("",["image"],[]) lab + PandocHttpError u er -> do + report $ CouldNotFetchResource u + (show er ++ "\rReplacing image with description.") + -- emit alt text + return $ Span ("",["image"],[]) lab + _ -> throwError e) + handleImage x = return x + +-- | Extract media from the mediabag into a directory. +extractMedia :: FilePath -> Pandoc -> PandocIO Pandoc +extractMedia dir d = do + media <- getMediaBag + case [fp | (fp, _, _) <- mediaDirectory media] of + [] -> return d + fps -> do + mapM_ (writeMedia dir media) fps + return $ walk (adjustImagePath dir fps) d + +writeMedia :: FilePath -> MediaBag -> FilePath -> PandocIO () +writeMedia dir mediabag subpath = do + -- we join and split to convert a/b/c to a\b\c on Windows; + -- in zip containers all paths use / + let fullpath = dir </> normalise subpath + let mbcontents = lookupMedia subpath mediabag + case mbcontents of + Nothing -> throwError $ PandocResourceNotFound subpath + Just (_, bs) -> do + report $ Extracting fullpath + liftIO $ do + createDirectoryIfMissing True $ takeDirectory fullpath + BL.writeFile fullpath bs + +adjustImagePath :: FilePath -> [FilePath] -> Inline -> Inline +adjustImagePath dir paths (Image attr lab (src, tit)) + | src `elem` paths = Image attr lab (dir ++ "/" ++ src, tit) +adjustImagePath _ _ x = x + data PureState = PureState { stStdGen :: StdGen , stWord8Store :: [Word8] -- should be -- inifinite, @@ -373,7 +447,7 @@ instance Default PureState where getPureState :: PandocPure PureState -getPureState = PandocPure $ lift $ lift $ get +getPureState = PandocPure $ lift $ lift get getsPureState :: (PureState -> a) -> PandocPure a getsPureState f = f <$> getPureState @@ -433,30 +507,27 @@ instance PandocMonad PandocPure where modifyPureState $ \st -> st { stUniqStore = us } return u _ -> M.fail "uniq store ran out of elements" - openURL u = throwError $ PandocIOError u $ - userError "Cannot open URL in PandocPure" + openURL u = throwError $ PandocResourceNotFound u readFileLazy fp = do fps <- getsPureState stFiles case infoFileContents <$> getFileInfo fp fps of Just bs -> return (BL.fromStrict bs) - Nothing -> throwError $ PandocIOError fp - (userError "File not found in PureState") + Nothing -> throwError $ PandocResourceNotFound fp readFileStrict fp = do fps <- getsPureState stFiles case infoFileContents <$> getFileInfo fp fps of Just bs -> return bs - Nothing -> throwError $ PandocIOError fp - (userError "File not found in PureState") - readDataFile Nothing "reference.docx" = do + Nothing -> throwError $ PandocResourceNotFound fp + readDataFile Nothing "reference.docx" = (B.concat . BL.toChunks . fromArchive) <$> getsPureState stReferenceDocx - readDataFile Nothing "reference.odt" = do + readDataFile Nothing "reference.odt" = (B.concat . BL.toChunks . fromArchive) <$> getsPureState stReferenceODT readDataFile Nothing fname = do let fname' = if fname == "MANUAL.txt" then fname else "data" </> fname readFileStrict fname' readDataFile (Just userDir) fname = do userDirFiles <- getsPureState stUserDataDir - case infoFileContents <$> (getFileInfo (userDir </> fname) userDirFiles) of + case infoFileContents <$> getFileInfo (userDir </> fname) userDirFiles of Just bs -> return bs Nothing -> readDataFile Nothing fname @@ -466,12 +537,12 @@ instance PandocMonad PandocPure where getModificationTime fp = do fps <- getsPureState stFiles - case infoFileMTime <$> (getFileInfo fp fps) of + case infoFileMTime <$> getFileInfo fp fps of Just tm -> return tm Nothing -> throwError $ PandocIOError fp (userError "Can't get modification time") - getCommonState = PandocPure $ lift $ get + getCommonState = PandocPure $ lift get putCommonState x = PandocPure $ lift $ put x logOutput _msg = return () @@ -555,4 +626,3 @@ instance PandocMonad m => PandocMonad (StateT st m) where getCommonState = lift getCommonState putCommonState = lift . putCommonState logOutput = lift . logOutput - |