From 4f3434586743afb69f00ca91fe6ec9b68b39ae7e Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Fri, 8 Jan 2021 18:38:20 +0100 Subject: Update copyright notices for 2021 (#7012) --- src/Text/Pandoc/Lua/Module/MediaBag.hs | 2 +- src/Text/Pandoc/Lua/Module/Pandoc.hs | 2 +- src/Text/Pandoc/Lua/Module/System.hs | 2 +- src/Text/Pandoc/Lua/Module/Types.hs | 2 +- src/Text/Pandoc/Lua/Module/Utils.hs | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) (limited to 'src/Text/Pandoc/Lua/Module') diff --git a/src/Text/Pandoc/Lua/Module/MediaBag.hs b/src/Text/Pandoc/Lua/Module/MediaBag.hs index e5a10217a..715e53885 100644 --- a/src/Text/Pandoc/Lua/Module/MediaBag.hs +++ b/src/Text/Pandoc/Lua/Module/MediaBag.hs @@ -1,7 +1,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Lua.Module.MediaBag - Copyright : Copyright © 2017-2020 Albert Krewinkel + Copyright : Copyright © 2017-2021 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel diff --git a/src/Text/Pandoc/Lua/Module/Pandoc.hs b/src/Text/Pandoc/Lua/Module/Pandoc.hs index 3886568b7..a9ce3866d 100644 --- a/src/Text/Pandoc/Lua/Module/Pandoc.hs +++ b/src/Text/Pandoc/Lua/Module/Pandoc.hs @@ -2,7 +2,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Lua.Module.Pandoc - Copyright : Copyright © 2017-2020 Albert Krewinkel + Copyright : Copyright © 2017-2021 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel diff --git a/src/Text/Pandoc/Lua/Module/System.hs b/src/Text/Pandoc/Lua/Module/System.hs index 04508e461..bd35babaf 100644 --- a/src/Text/Pandoc/Lua/Module/System.hs +++ b/src/Text/Pandoc/Lua/Module/System.hs @@ -1,6 +1,6 @@ {- | Module : Text.Pandoc.Lua.Module.System - Copyright : © 2019-2020 Albert Krewinkel + Copyright : © 2019-2021 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel diff --git a/src/Text/Pandoc/Lua/Module/Types.hs b/src/Text/Pandoc/Lua/Module/Types.hs index 999f2e588..bb4f02c3c 100644 --- a/src/Text/Pandoc/Lua/Module/Types.hs +++ b/src/Text/Pandoc/Lua/Module/Types.hs @@ -1,6 +1,6 @@ {- | Module : Text.Pandoc.Lua.Module.Types - Copyright : © 2019-2020 Albert Krewinkel + Copyright : © 2019-2021 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel diff --git a/src/Text/Pandoc/Lua/Module/Utils.hs b/src/Text/Pandoc/Lua/Module/Utils.hs index 7595b9c0f..1b04021a7 100644 --- a/src/Text/Pandoc/Lua/Module/Utils.hs +++ b/src/Text/Pandoc/Lua/Module/Utils.hs @@ -2,7 +2,7 @@ {-# LANGUAGE ScopedTypeVariables #-} {- | Module : Text.Pandoc.Lua.Module.Utils - Copyright : Copyright © 2017-2020 Albert Krewinkel + Copyright : Copyright © 2017-2021 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel -- cgit v1.2.3 From 490065f3ed3dd9377a740ad6fcbc441a658889dd Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Tue, 26 Jan 2021 14:19:30 +0100 Subject: Lua: always load built-in Lua scripts from default data-dir The Lua modules `pandoc` and `pandoc.List` are now always loaded from the system's default data directory. Loading from a different directory by overriding the default path, e.g. via `--data-dir`, is no longer supported to avoid unexpected behavior and to address security concerns. --- src/Text/Pandoc/Lua/Init.hs | 18 ++++++++++++++---- src/Text/Pandoc/Lua/Module/Pandoc.hs | 8 ++++---- src/Text/Pandoc/Lua/Packages.hs | 33 +++++++-------------------------- src/Text/Pandoc/Lua/PandocLua.hs | 31 +++++++++++++++++++------------ 4 files changed, 44 insertions(+), 46 deletions(-) (limited to 'src/Text/Pandoc/Lua/Module') diff --git a/src/Text/Pandoc/Lua/Init.hs b/src/Text/Pandoc/Lua/Init.hs index 0a5ce85cb..baa6f0295 100644 --- a/src/Text/Pandoc/Lua/Init.hs +++ b/src/Text/Pandoc/Lua/Init.hs @@ -12,17 +12,18 @@ module Text.Pandoc.Lua.Init ( runLua ) where +import Control.Monad (when) import Control.Monad.Catch (try) import Control.Monad.Trans (MonadIO (..)) import Data.Data (Data, dataTypeConstrs, dataTypeOf, showConstr) import Foreign.Lua (Lua) import GHC.IO.Encoding (getForeignEncoding, setForeignEncoding, utf8) +import Text.Pandoc.Class.PandocMonad (readDataFile) import Text.Pandoc.Class.PandocIO (PandocIO) import Text.Pandoc.Error (PandocError) import Text.Pandoc.Lua.Packages (installPandocPackageSearcher) -import Text.Pandoc.Lua.PandocLua (PandocLua, liftPandocLua, - loadScriptFromDataDir, runPandocLua) - +import Text.Pandoc.Lua.PandocLua (PandocLua, liftPandocLua, runPandocLua) +import Text.Pandoc.Lua.Util (throwTopMessageAsError') import qualified Foreign.Lua as Lua import qualified Text.Pandoc.Definition as Pandoc import qualified Text.Pandoc.Lua.Module.Pandoc as ModulePandoc @@ -44,7 +45,7 @@ initLuaState = do liftPandocLua Lua.openlibs installPandocPackageSearcher initPandocModule - loadScriptFromDataDir "init.lua" + loadInitScript "init.lua" where initPandocModule :: PandocLua () initPandocModule = do @@ -61,6 +62,15 @@ initLuaState = do -- assign module to global variable liftPandocLua $ Lua.setglobal "pandoc" + loadInitScript :: FilePath -> PandocLua () + loadInitScript scriptFile = do + script <- readDataFile scriptFile + status <- liftPandocLua $ Lua.dostring script + when (status /= Lua.OK) . liftPandocLua $ + throwTopMessageAsError' + (("Couldn't load '" ++ scriptFile ++ "'.\n") ++) + + -- | AST elements are marshaled via normal constructor functions in the -- @pandoc@ module. However, accessing Lua globals from Haskell is -- expensive (due to error handling). Accessing the Lua registry is much diff --git a/src/Text/Pandoc/Lua/Module/Pandoc.hs b/src/Text/Pandoc/Lua/Module/Pandoc.hs index a9ce3866d..a8afecd2e 100644 --- a/src/Text/Pandoc/Lua/Module/Pandoc.hs +++ b/src/Text/Pandoc/Lua/Module/Pandoc.hs @@ -25,7 +25,7 @@ import Text.Pandoc.Definition (Block, Inline) import Text.Pandoc.Lua.Filter (walkInlines, walkBlocks, LuaFilter, SingletonsList (..)) import Text.Pandoc.Lua.Marshaling () import Text.Pandoc.Lua.PandocLua (PandocLua, addFunction, liftPandocLua, - loadScriptFromDataDir) + loadDefaultModule) import Text.Pandoc.Walk (Walkable) import Text.Pandoc.Options (ReaderOptions (readerExtensions)) import Text.Pandoc.Process (pipeProcess) @@ -38,11 +38,11 @@ import qualified Foreign.Lua as Lua import qualified Text.Pandoc.Lua.Util as LuaUtil import Text.Pandoc.Error --- | Push the "pandoc" on the lua stack. Requires the `list` module to be --- loaded. +-- | Push the "pandoc" package to the Lua stack. Requires the `List` +-- module to be loadable. pushModule :: PandocLua NumResults pushModule = do - loadScriptFromDataDir "pandoc.lua" + loadDefaultModule "pandoc" addFunction "read" readDoc addFunction "pipe" pipeFn addFunction "walk_block" walkBlock diff --git a/src/Text/Pandoc/Lua/Packages.hs b/src/Text/Pandoc/Lua/Packages.hs index d62fb725d..5949a1a7d 100644 --- a/src/Text/Pandoc/Lua/Packages.hs +++ b/src/Text/Pandoc/Lua/Packages.hs @@ -1,6 +1,3 @@ -{-# LANGUAGE FlexibleContexts #-} -{-# LANGUAGE LambdaCase #-} -{-# LANGUAGE ScopedTypeVariables #-} {- | Module : Text.Pandoc.Lua.Packages Copyright : Copyright © 2017-2021 Albert Krewinkel @@ -15,13 +12,9 @@ module Text.Pandoc.Lua.Packages ( installPandocPackageSearcher ) where -import Control.Monad.Catch (try) import Control.Monad (forM_) -import Data.ByteString (ByteString) -import Foreign.Lua (Lua, NumResults) -import Text.Pandoc.Error (PandocError) -import Text.Pandoc.Class.PandocMonad (readDataFile) -import Text.Pandoc.Lua.PandocLua (PandocLua, liftPandocLua) +import Foreign.Lua (NumResults) +import Text.Pandoc.Lua.PandocLua (PandocLua, liftPandocLua, loadDefaultModule) import qualified Foreign.Lua as Lua import qualified Foreign.Lua.Module.Text as Text @@ -54,24 +47,12 @@ pandocPackageSearcher pkgName = "pandoc.types" -> pushWrappedHsFun Types.pushModule "pandoc.utils" -> pushWrappedHsFun Utils.pushModule "text" -> pushWrappedHsFun Text.pushModule - _ -> searchPureLuaLoader + "pandoc.List" -> pushWrappedHsFun (loadDefaultModule pkgName) + _ -> reportPandocSearcherFailure where pushWrappedHsFun f = liftPandocLua $ do Lua.pushHaskellFunction f return 1 - searchPureLuaLoader = do - let filename = pkgName ++ ".lua" - try (readDataFile filename) >>= \case - Right script -> pushWrappedHsFun (loadStringAsPackage pkgName script) - Left (_ :: PandocError) -> liftPandocLua $ do - Lua.push ("\n\tno file '" ++ filename ++ "' in pandoc's datadir") - return (1 :: NumResults) - -loadStringAsPackage :: String -> ByteString -> Lua NumResults -loadStringAsPackage pkgName script = do - status <- Lua.dostring script - if status == Lua.OK - then return (1 :: NumResults) - else do - msg <- Lua.popValue - Lua.raiseError ("Error while loading `" <> pkgName <> "`.\n" <> msg) + reportPandocSearcherFailure = liftPandocLua $ do + Lua.push ("\n\t" <> pkgName <> "is not one of pandoc's default packages") + return (1 :: NumResults) diff --git a/src/Text/Pandoc/Lua/PandocLua.hs b/src/Text/Pandoc/Lua/PandocLua.hs index 4beac22b7..750e019b6 100644 --- a/src/Text/Pandoc/Lua/PandocLua.hs +++ b/src/Text/Pandoc/Lua/PandocLua.hs @@ -23,24 +23,23 @@ module Text.Pandoc.Lua.PandocLua , runPandocLua , liftPandocLua , addFunction - , loadScriptFromDataDir + , loadDefaultModule ) where -import Control.Monad (when) import Control.Monad.Catch (MonadCatch, MonadMask, MonadThrow) import Control.Monad.Except (MonadError (catchError, throwError)) import Control.Monad.IO.Class (MonadIO (liftIO)) import Foreign.Lua (Lua (..), NumResults, Pushable, ToHaskellFunction) import Text.Pandoc.Class.PandocIO (PandocIO) -import Text.Pandoc.Class.PandocMonad (PandocMonad (..), readDataFile) -import Text.Pandoc.Error (PandocError) +import Text.Pandoc.Class.PandocMonad (PandocMonad (..), readDefaultDataFile) +import Text.Pandoc.Error (PandocError (PandocLuaError)) import Text.Pandoc.Lua.Global (Global (..), setGlobals) import Text.Pandoc.Lua.ErrorConversion (errorConversion) import qualified Control.Monad.Catch as Catch +import qualified Data.Text as T import qualified Foreign.Lua as Lua import qualified Text.Pandoc.Class.IO as IO -import qualified Text.Pandoc.Lua.Util as LuaUtil -- | Type providing access to both, pandoc and Lua operations. newtype PandocLua a = PandocLua { unPandocLua :: Lua a } @@ -86,14 +85,22 @@ addFunction name fn = liftPandocLua $ do Lua.pushHaskellFunction fn Lua.rawset (-3) --- | Load a file from pandoc's data directory. -loadScriptFromDataDir :: FilePath -> PandocLua () -loadScriptFromDataDir scriptFile = do - script <- readDataFile scriptFile +-- | Load a pure Lua module included with pandoc. Leaves the result on +-- the stack and returns @NumResults 1@. +-- +-- The script is loaded from the default data directory. We do not load +-- from data directories supplied via command line, as this could cause +-- scripts to be executed even though they had not been passed explicitly. +loadDefaultModule :: String -> PandocLua NumResults +loadDefaultModule name = do + script <- readDefaultDataFile (name <> ".lua") status <- liftPandocLua $ Lua.dostring script - when (status /= Lua.OK) . liftPandocLua $ - LuaUtil.throwTopMessageAsError' - (("Couldn't load '" ++ scriptFile ++ "'.\n") ++) + if status == Lua.OK + then return (1 :: NumResults) + else do + msg <- liftPandocLua Lua.popValue + let err = "Error while loading `" <> name <> "`.\n" <> msg + throwError $ PandocLuaError (T.pack err) -- | Global variables which should always be set. defaultGlobals :: PandocIO [Global] -- cgit v1.2.3 From a5169f68b251e04b0a68a7d93a30bafcb3f85e78 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Thu, 4 Feb 2021 19:07:59 +0100 Subject: Lua filters: use same function names in Haskell and Lua --- .hlint.yaml | 1 + src/Text/Pandoc/Lua/Module/MediaBag.hs | 27 ++++++++++++++------------- src/Text/Pandoc/Lua/Module/Pandoc.hs | 31 ++++++++++++++++--------------- 3 files changed, 31 insertions(+), 28 deletions(-) (limited to 'src/Text/Pandoc/Lua/Module') diff --git a/.hlint.yaml b/.hlint.yaml index 6b74014d4..d5ebffd34 100644 --- a/.hlint.yaml +++ b/.hlint.yaml @@ -60,6 +60,7 @@ - Text.Pandoc.Citeproc - Text.Pandoc.Extensions - Text.Pandoc.Lua.Marshaling.Version + - Text.Pandoc.Lua.Module.Pandoc - Text.Pandoc.Lua.Module.Utils - Text.Pandoc.Readers.Odt.ContentReader - Text.Pandoc.Readers.Odt.Namespaces diff --git a/src/Text/Pandoc/Lua/Module/MediaBag.hs b/src/Text/Pandoc/Lua/Module/MediaBag.hs index 715e53885..78b699176 100644 --- a/src/Text/Pandoc/Lua/Module/MediaBag.hs +++ b/src/Text/Pandoc/Lua/Module/MediaBag.hs @@ -13,6 +13,7 @@ module Text.Pandoc.Lua.Module.MediaBag ( pushModule ) where +import Prelude hiding (lookup) import Control.Monad (zipWithM_) import Foreign.Lua (Lua, NumResults, Optional) import Text.Pandoc.Class.CommonState (CommonState (..)) @@ -36,10 +37,10 @@ pushModule = do liftPandocLua Lua.newtable addFunction "delete" delete addFunction "empty" empty - addFunction "insert" insertMediaFn + addFunction "insert" insert addFunction "items" items - addFunction "lookup" lookupMediaFn - addFunction "list" mediaDirectoryFn + addFunction "lookup" lookup + addFunction "list" list addFunction "fetch" fetch return 1 @@ -53,11 +54,11 @@ empty :: PandocLua NumResults empty = 0 <$ modifyCommonState (\st -> st { stMediaBag = mempty }) -- | Insert a new item into the media bag. -insertMediaFn :: FilePath - -> Optional MimeType - -> BL.ByteString - -> PandocLua NumResults -insertMediaFn fp optionalMime contents = do +insert :: FilePath + -> Optional MimeType + -> BL.ByteString + -> PandocLua NumResults +insert fp optionalMime contents = do mb <- getMediaBag setMediaBag $ MB.insertMedia fp (Lua.fromOptional optionalMime) contents mb return (Lua.NumResults 0) @@ -66,9 +67,9 @@ insertMediaFn fp optionalMime contents = do items :: PandocLua NumResults items = getMediaBag >>= liftPandocLua . pushIterator -lookupMediaFn :: FilePath - -> PandocLua NumResults -lookupMediaFn fp = do +lookup :: FilePath + -> PandocLua NumResults +lookup fp = do res <- MB.lookupMedia fp <$> getMediaBag liftPandocLua $ case res of Nothing -> 1 <$ Lua.pushnil @@ -77,8 +78,8 @@ lookupMediaFn fp = do Lua.push contents return 2 -mediaDirectoryFn :: PandocLua NumResults -mediaDirectoryFn = do +list :: PandocLua NumResults +list = do dirContents <- MB.mediaDirectory <$> getMediaBag liftPandocLua $ do Lua.newtable diff --git a/src/Text/Pandoc/Lua/Module/Pandoc.hs b/src/Text/Pandoc/Lua/Module/Pandoc.hs index a8afecd2e..8d30f9a0c 100644 --- a/src/Text/Pandoc/Lua/Module/Pandoc.hs +++ b/src/Text/Pandoc/Lua/Module/Pandoc.hs @@ -14,6 +14,7 @@ module Text.Pandoc.Lua.Module.Pandoc ( pushModule ) where +import Prelude hiding (read) import Control.Monad (when) import Control.Monad.Except (throwError) import Data.Default (Default (..)) @@ -43,10 +44,10 @@ import Text.Pandoc.Error pushModule :: PandocLua NumResults pushModule = do loadDefaultModule "pandoc" - addFunction "read" readDoc - addFunction "pipe" pipeFn - addFunction "walk_block" walkBlock - addFunction "walk_inline" walkInline + addFunction "read" read + addFunction "pipe" pipe + addFunction "walk_block" walk_block + addFunction "walk_inline" walk_inline return 1 walkElement :: (Walkable (SingletonsList Inline) a, @@ -54,14 +55,14 @@ walkElement :: (Walkable (SingletonsList Inline) a, => a -> LuaFilter -> PandocLua a walkElement x f = liftPandocLua $ walkInlines f x >>= walkBlocks f -walkInline :: Inline -> LuaFilter -> PandocLua Inline -walkInline = walkElement +walk_inline :: Inline -> LuaFilter -> PandocLua Inline +walk_inline = walkElement -walkBlock :: Block -> LuaFilter -> PandocLua Block -walkBlock = walkElement +walk_block :: Block -> LuaFilter -> PandocLua Block +walk_block = walkElement -readDoc :: T.Text -> Optional T.Text -> PandocLua NumResults -readDoc content formatSpecOrNil = liftPandocLua $ do +read :: T.Text -> Optional T.Text -> PandocLua NumResults +read content formatSpecOrNil = liftPandocLua $ do let formatSpec = fromMaybe "markdown" (Lua.fromOptional formatSpecOrNil) res <- Lua.liftIO . runIO $ getReader formatSpec >>= \(rdr,es) -> @@ -79,11 +80,11 @@ readDoc content formatSpecOrNil = liftPandocLua $ do Left e -> Lua.raiseError $ show e -- | Pipes input through a command. -pipeFn :: String - -> [String] - -> BL.ByteString - -> PandocLua NumResults -pipeFn command args input = liftPandocLua $ do +pipe :: String -- ^ path to executable + -> [String] -- ^ list of arguments + -> BL.ByteString -- ^ input passed to process via stdin + -> PandocLua NumResults +pipe command args input = liftPandocLua $ do (ec, output) <- Lua.liftIO $ pipeProcess Nothing command args input case ec of ExitSuccess -> 1 <$ Lua.push output -- cgit v1.2.3 From e227496d3a5d07df8183b8d986ea2aa36c90612c Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Thu, 8 Apr 2021 22:14:47 +0200 Subject: Lua filter: respect Inlines/Blocks filter functions in pandoc.walk_* --- src/Text/Pandoc/Lua/Filter.hs | 2 ++ src/Text/Pandoc/Lua/Module/Pandoc.hs | 11 ++++++++--- 2 files changed, 10 insertions(+), 3 deletions(-) (limited to 'src/Text/Pandoc/Lua/Module') diff --git a/src/Text/Pandoc/Lua/Filter.hs b/src/Text/Pandoc/Lua/Filter.hs index 90967f295..01bf90efa 100644 --- a/src/Text/Pandoc/Lua/Filter.hs +++ b/src/Text/Pandoc/Lua/Filter.hs @@ -13,7 +13,9 @@ module Text.Pandoc.Lua.Filter ( LuaFilterFunction , LuaFilter , runFilterFile , walkInlines + , walkInlineLists , walkBlocks + , walkBlockLists , module Text.Pandoc.Lua.Walk ) where import Control.Applicative ((<|>)) diff --git a/src/Text/Pandoc/Lua/Module/Pandoc.hs b/src/Text/Pandoc/Lua/Module/Pandoc.hs index 8d30f9a0c..5c14b3a30 100644 --- a/src/Text/Pandoc/Lua/Module/Pandoc.hs +++ b/src/Text/Pandoc/Lua/Module/Pandoc.hs @@ -23,8 +23,10 @@ import Foreign.Lua (Lua, NumResults, Optional, Peekable, Pushable) import System.Exit (ExitCode (..)) import Text.Pandoc.Class.PandocIO (runIO) import Text.Pandoc.Definition (Block, Inline) -import Text.Pandoc.Lua.Filter (walkInlines, walkBlocks, LuaFilter, SingletonsList (..)) +import Text.Pandoc.Lua.Filter (LuaFilter, SingletonsList (..), walkInlines, + walkInlineLists, walkBlocks, walkBlockLists) import Text.Pandoc.Lua.Marshaling () +import Text.Pandoc.Lua.Marshaling.List (List (..)) import Text.Pandoc.Lua.PandocLua (PandocLua, addFunction, liftPandocLua, loadDefaultModule) import Text.Pandoc.Walk (Walkable) @@ -51,9 +53,12 @@ pushModule = do return 1 walkElement :: (Walkable (SingletonsList Inline) a, - Walkable (SingletonsList Block) a) + Walkable (SingletonsList Block) a, + Walkable (List Inline) a, + Walkable (List Block) a) => a -> LuaFilter -> PandocLua a -walkElement x f = liftPandocLua $ walkInlines f x >>= walkBlocks f +walkElement x f = liftPandocLua $ + walkInlines f x >>= walkInlineLists f >>= walkBlocks f >>= walkBlockLists f walk_inline :: Inline -> LuaFilter -> PandocLua Inline walk_inline = walkElement -- cgit v1.2.3 From 8511f6fdf6c9fbc2cc926538bca4ae9f554b4ed9 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 23 May 2021 22:57:02 -0700 Subject: MediaBag improvements. In the current dev version, we will sometimes add a version of an image with a hashed name, keeping the original version with the original name, which would leave to undesirable duplication. This change separates the media's filename from the media's canonical name (which is the path of the link in the document itself). Filenames are based on SHA1 hashes and assigned automatically. In Text.Pandoc.MediaBag: - Export MediaItem type [API change]. - Change MediaBag type to a map from Text to MediaItem [API change]. - `lookupMedia` now returns a `MediaItem` [API change]. - Change `insertMedia` so it sets the `mediaPath` to a filename based on the SHA1 hash of the contents. This will be used when contents are extracted. In Text.Pandoc.Class.PandocMonad: - Remove `fetchMediaResource` [API change]. Lua MediaBag module has been changed minimally. In the future it would be better, probably, to give Lua access to the full MediaItem type. --- src/Text/Pandoc/Class/IO.hs | 9 ++++--- src/Text/Pandoc/Class/PandocMonad.hs | 43 ++++++++++++++-------------------- src/Text/Pandoc/Lua/Module/MediaBag.hs | 6 ++--- src/Text/Pandoc/MediaBag.hs | 35 ++++++++++++++++++++------- test/Tests/Readers/Docx.hs | 10 ++++---- 5 files changed, 55 insertions(+), 48 deletions(-) (limited to 'src/Text/Pandoc/Lua/Module') diff --git a/src/Text/Pandoc/Class/IO.hs b/src/Text/Pandoc/Class/IO.hs index bb4e2b732..f12c0a938 100644 --- a/src/Text/Pandoc/Class/IO.hs +++ b/src/Text/Pandoc/Class/IO.hs @@ -62,7 +62,7 @@ import Text.Pandoc.Definition (Pandoc, Inline (Image)) import Text.Pandoc.Error (PandocError (..)) import Text.Pandoc.Logging (LogMessage (..), messageVerbosity, showLogMessage) import Text.Pandoc.MIME (MimeType) -import Text.Pandoc.MediaBag (MediaBag, lookupMedia, mediaDirectory) +import Text.Pandoc.MediaBag (MediaBag, MediaItem(..), lookupMedia, mediaDirectory) import Text.Pandoc.Walk (walk) import qualified Control.Exception as E import qualified Data.ByteString as B @@ -213,14 +213,13 @@ writeMedia :: (PandocMonad m, MonadIO m) writeMedia dir mediabag subpath = do -- we join and split to convert a/b/c to a\b\c on Windows; -- in zip containers all paths use / - let fullpath = dir unEscapeString (normalise subpath) let mbcontents = lookupMedia subpath mediabag case mbcontents of Nothing -> throwError $ PandocResourceNotFound $ pack subpath - Just (_, bs) -> do - report $ Extracting $ pack fullpath + Just item -> do + let fullpath = dir mediaPath item liftIOError (createDirectoryIfMissing True) (takeDirectory fullpath) - logIOError $ BL.writeFile fullpath bs + logIOError $ BL.writeFile fullpath $ mediaContents item -- | If the given Inline element is an image with a @src@ path equal to -- one in the list of @paths@, then prepends @dir@ to the image source; diff --git a/src/Text/Pandoc/Class/PandocMonad.hs b/src/Text/Pandoc/Class/PandocMonad.hs index dd6499a73..ae6917e06 100644 --- a/src/Text/Pandoc/Class/PandocMonad.hs +++ b/src/Text/Pandoc/Class/PandocMonad.hs @@ -37,7 +37,6 @@ module Text.Pandoc.Class.PandocMonad , setUserDataDir , getUserDataDir , fetchItem - , fetchMediaResource , getInputFiles , setInputFiles , getOutputFile @@ -57,8 +56,6 @@ module Text.Pandoc.Class.PandocMonad import Codec.Archive.Zip import Control.Monad.Except (MonadError (catchError, throwError), MonadTrans, lift, when) -import Data.Digest.Pure.SHA (sha1, showDigest) -import Data.Maybe (fromMaybe) import Data.List (foldl') import Data.Time (UTCTime) import Data.Time.Clock.POSIX (POSIXTime, utcTimeToPOSIXSeconds, @@ -67,7 +64,7 @@ import Data.Time.LocalTime (TimeZone, ZonedTime, utcToZonedTime) import Network.URI ( escapeURIString, nonStrictRelativeTo, unEscapeString, parseURIReference, isAllowedInURI, parseURI, URI(..) ) -import System.FilePath ((), (<.>), takeExtension, dropExtension, +import System.FilePath ((), takeExtension, dropExtension, isRelative, splitDirectories) import System.Random (StdGen) import Text.Collate.Lang (Lang(..), parseLang, renderLang) @@ -75,8 +72,8 @@ import Text.Pandoc.Class.CommonState (CommonState (..)) import Text.Pandoc.Definition import Text.Pandoc.Error import Text.Pandoc.Logging -import Text.Pandoc.MIME (MimeType, getMimeType, extensionFromMimeType) -import Text.Pandoc.MediaBag (MediaBag, lookupMedia) +import Text.Pandoc.MIME (MimeType, getMimeType) +import Text.Pandoc.MediaBag (MediaBag, lookupMedia, MediaItem(..)) import Text.Pandoc.Shared (uriPathToPath, safeRead) import Text.Pandoc.Translations (Term(..), Translations, lookupTerm, readTranslations) @@ -376,7 +373,8 @@ fetchItem :: PandocMonad m fetchItem s = do mediabag <- getMediaBag case lookupMedia (T.unpack s) mediabag of - Just (mime, bs) -> return (BL.toStrict bs, Just mime) + Just item -> return (BL.toStrict (mediaContents item), + Just (mediaMimeType item)) Nothing -> downloadOrRead s -- | Returns the content and, if available, the MIME type of a resource. @@ -629,19 +627,6 @@ withPaths (p:ps) action fp = catchError (action (p fp)) (\_ -> withPaths ps action fp) --- | Fetch local or remote resource (like an image) and provide data suitable --- for adding it to the MediaBag. -fetchMediaResource :: PandocMonad m - => T.Text -> m (FilePath, Maybe MimeType, BL.ByteString) -fetchMediaResource src = do - (bs, mt) <- fetchItem src - let ext = fromMaybe (T.pack $ takeExtension $ T.unpack src) - (mt >>= extensionFromMimeType) - let bs' = BL.fromChunks [bs] - let basename = showDigest $ sha1 bs' - let fname = basename <.> T.unpack ext - return (fname, mt, bs') - -- | Traverse tree, filling media bag for any images that -- aren't already in the media bag. fillMediaBag :: PandocMonad m => Pandoc -> m Pandoc @@ -649,12 +634,18 @@ fillMediaBag d = walkM handleImage d where handleImage :: PandocMonad m => Inline -> m Inline handleImage (Image attr lab (src, tit)) = catchError (do mediabag <- getMediaBag - case lookupMedia (T.unpack src) mediabag of - Just (_, _) -> return $ Image attr lab (src, tit) - Nothing -> do - (fname, mt, bs) <- fetchMediaResource src - insertMedia fname mt bs - return $ Image attr lab (T.pack fname, tit)) + let fp = T.unpack src + src' <- T.pack <$> case lookupMedia fp mediabag of + Just item -> return $ mediaPath item + Nothing -> do + (bs, mt) <- fetchItem src + insertMedia fp mt (BL.fromStrict bs) + mediabag' <- getMediaBag + case lookupMedia fp mediabag' of + Just item -> return $ mediaPath item + Nothing -> throwError $ PandocSomeError $ + src <> " not successfully inserted into MediaBag" + return $ Image attr lab (src', tit)) (\e -> case e of PandocResourceNotFound _ -> do diff --git a/src/Text/Pandoc/Lua/Module/MediaBag.hs b/src/Text/Pandoc/Lua/Module/MediaBag.hs index 78b699176..3eed50fca 100644 --- a/src/Text/Pandoc/Lua/Module/MediaBag.hs +++ b/src/Text/Pandoc/Lua/Module/MediaBag.hs @@ -73,9 +73,9 @@ lookup fp = do res <- MB.lookupMedia fp <$> getMediaBag liftPandocLua $ case res of Nothing -> 1 <$ Lua.pushnil - Just (mimeType, contents) -> do - Lua.push mimeType - Lua.push contents + Just item -> do + Lua.push $ MB.mediaMimeType item + Lua.push $ MB.mediaContents item return 2 list :: PandocLua NumResults diff --git a/src/Text/Pandoc/MediaBag.hs b/src/Text/Pandoc/MediaBag.hs index 4a9b4efa1..a65f315fc 100644 --- a/src/Text/Pandoc/MediaBag.hs +++ b/src/Text/Pandoc/MediaBag.hs @@ -15,6 +15,7 @@ Definition of a MediaBag object to hold binary resources, and an interface for interacting with it. -} module Text.Pandoc.MediaBag ( + MediaItem(..), MediaBag, deleteMedia, lookupMedia, @@ -28,15 +29,23 @@ import qualified Data.Map as M import Data.Maybe (fromMaybe) import Data.Typeable (Typeable) import System.FilePath -import Text.Pandoc.MIME (MimeType, getMimeTypeDef) +import Text.Pandoc.MIME (MimeType, getMimeTypeDef, extensionFromMimeType) import Data.Text (Text) import qualified Data.Text as T +import Data.Digest.Pure.SHA (sha1, showDigest) + +data MediaItem = + MediaItem + { mediaMimeType :: MimeType + , mediaPath :: FilePath + , mediaContents :: BL.ByteString + } deriving (Eq, Ord, Show, Data, Typeable) -- | A container for a collection of binary resources, with names and -- mime types. Note that a 'MediaBag' is a Monoid, so 'mempty' -- can be used for an empty 'MediaBag', and '<>' can be used to append -- two 'MediaBag's. -newtype MediaBag = MediaBag (M.Map Text (MimeType, BL.ByteString)) +newtype MediaBag = MediaBag (M.Map Text MediaItem) deriving (Semigroup, Monoid, Data, Typeable) instance Show MediaBag where @@ -62,26 +71,34 @@ insertMedia :: FilePath -- ^ relative path and canonical name of resource -> MediaBag -> MediaBag insertMedia fp mbMime contents (MediaBag mediamap) = - MediaBag (M.insert (canonicalize fp) (mime, contents) mediamap) - where mime = fromMaybe fallback mbMime + MediaBag (M.insert (canonicalize fp) mediaItem mediamap) + where mediaItem = MediaItem{ mediaPath = showDigest (sha1 contents) <> + "." <> ext + , mediaContents = contents + , mediaMimeType = mt } fallback = case takeExtension fp of ".gz" -> getMimeTypeDef $ dropExtension fp _ -> getMimeTypeDef fp + mt = fromMaybe fallback mbMime + ext = maybe (takeExtension fp) T.unpack $ extensionFromMimeType mt + -- | Lookup a media item in a 'MediaBag', returning mime type and contents. lookupMedia :: FilePath -> MediaBag - -> Maybe (MimeType, BL.ByteString) + -> Maybe MediaItem lookupMedia fp (MediaBag mediamap) = M.lookup (canonicalize fp) mediamap -- | Get a list of the file paths stored in a 'MediaBag', with -- their corresponding mime types and the lengths in bytes of the contents. mediaDirectory :: MediaBag -> [(FilePath, MimeType, Int)] mediaDirectory (MediaBag mediamap) = - M.foldrWithKey (\fp (mime,contents) -> - ((T.unpack fp, mime, fromIntegral (BL.length contents)):)) [] mediamap + M.foldrWithKey (\fp item -> + ((T.unpack fp, mediaMimeType item, + fromIntegral (BL.length (mediaContents item))):)) [] mediamap mediaItems :: MediaBag -> [(FilePath, MimeType, BL.ByteString)] mediaItems (MediaBag mediamap) = - M.foldrWithKey (\fp (mime,contents) -> - ((T.unpack fp, mime, contents):)) [] mediamap + M.foldrWithKey (\fp item -> + ((T.unpack fp, mediaMimeType item, mediaContents item):)) + [] mediamap diff --git a/test/Tests/Readers/Docx.hs b/test/Tests/Readers/Docx.hs index 2cce70cc5..939ff9939 100644 --- a/test/Tests/Readers/Docx.hs +++ b/test/Tests/Readers/Docx.hs @@ -24,7 +24,7 @@ import Test.Tasty.HUnit import Tests.Helpers import Text.Pandoc import qualified Text.Pandoc.Class as P -import Text.Pandoc.MediaBag (MediaBag, lookupMedia, mediaDirectory) +import qualified Text.Pandoc.MediaBag as MB import Text.Pandoc.UTF8 as UTF8 -- We define a wrapper around pandoc that doesn't normalize in the @@ -91,11 +91,11 @@ getMedia :: FilePath -> FilePath -> IO (Maybe B.ByteString) getMedia archivePath mediaPath = fmap fromEntry . findEntryByPath ("word/" ++ mediaPath) . toArchive <$> B.readFile archivePath -compareMediaPathIO :: FilePath -> MediaBag -> FilePath -> IO Bool +compareMediaPathIO :: FilePath -> MB.MediaBag -> FilePath -> IO Bool compareMediaPathIO mediaPath mediaBag docxPath = do docxMedia <- getMedia docxPath mediaPath - let mbBS = case lookupMedia mediaPath mediaBag of - Just (_, bs) -> bs + let mbBS = case MB.lookupMedia mediaPath mediaBag of + Just item -> MB.mediaContents item Nothing -> error ("couldn't find " ++ mediaPath ++ " in media bag") @@ -110,7 +110,7 @@ compareMediaBagIO docxFile = do mb <- runIOorExplode $ readDocx defopts df >> P.getMediaBag bools <- mapM (\(fp, _, _) -> compareMediaPathIO fp mb docxFile) - (mediaDirectory mb) + (MB.mediaDirectory mb) return $ and bools testMediaBagIO :: String -> FilePath -> IO TestTree -- cgit v1.2.3 From 55bcd4b4fb1dced6c6e316db6cd117b52bbadee5 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Thu, 10 Jun 2021 18:26:53 +0200 Subject: Lua utils: fix handling of table headers in `from_simple_table` Passing an empty list of header cells now results in an empty table header. Fixes: #7369 --- src/Text/Pandoc/Lua/Module/Utils.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/Text/Pandoc/Lua/Module') diff --git a/src/Text/Pandoc/Lua/Module/Utils.hs b/src/Text/Pandoc/Lua/Module/Utils.hs index 1b04021a7..3ec3afc26 100644 --- a/src/Text/Pandoc/Lua/Module/Utils.hs +++ b/src/Text/Pandoc/Lua/Module/Utils.hs @@ -146,7 +146,7 @@ from_simple_table (SimpleTable capt aligns widths head' body) = do nullAttr (Caption Nothing [Plain capt]) (zipWith (\a w -> (a, toColWidth w)) aligns widths) - (TableHead nullAttr [blockListToRow head']) + (TableHead nullAttr [blockListToRow head' | not (null head') ]) [TableBody nullAttr 0 [] $ map blockListToRow body] (TableFoot nullAttr []) return (NumResults 1) -- cgit v1.2.3