From 5d49cbd35e815dd041e54da511bdd0eeafd400c0 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Wed, 10 Jan 2018 22:26:12 +0100 Subject: Move filter functions to separate module --- src/Text/Pandoc/App.hs | 100 +++-------------------------------------- src/Text/Pandoc/Filter.hs | 60 +++++++++++++++++++++++++ src/Text/Pandoc/Filter/Json.hs | 97 +++++++++++++++++++++++++++++++++++++++ src/Text/Pandoc/Filter/Lua.hs | 53 ++++++++++++++++++++++ src/Text/Pandoc/Filter/Path.hs | 53 ++++++++++++++++++++++ 5 files changed, 268 insertions(+), 95 deletions(-) create mode 100644 src/Text/Pandoc/Filter.hs create mode 100644 src/Text/Pandoc/Filter/Json.hs create mode 100644 src/Text/Pandoc/Filter/Lua.hs create mode 100644 src/Text/Pandoc/Filter/Path.hs (limited to 'src/Text/Pandoc') diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs index 976311e77..26c754cd6 100644 --- a/src/Text/Pandoc/App.hs +++ b/src/Text/Pandoc/App.hs @@ -46,12 +46,11 @@ import qualified Control.Exception as E import Control.Monad import Control.Monad.Except (catchError, throwError) import Control.Monad.Trans -import Data.Aeson (defaultOptions, eitherDecode', encode) +import Data.Aeson (defaultOptions) import Data.Aeson.TH (deriveJSON) import qualified Data.ByteString as BS import qualified Data.ByteString.Lazy as B import Data.Char (toLower, toUpper) -import Data.Foldable (foldrM) import Data.List (find, intercalate, isPrefixOf, isSuffixOf, sort) import qualified Data.Map as M import Data.Maybe (fromMaybe, isJust, isNothing) @@ -73,10 +72,9 @@ import Skylighting (Style, Syntax (..), defaultSyntaxMap, parseTheme, pygments) import Skylighting.Parser (addSyntaxDefinition, parseSyntaxDefinition) import System.Console.GetOpt -import System.Directory (Permissions (..), doesFileExist, findExecutable, - getAppUserDataDirectory, getPermissions) -import System.Environment (getArgs, getEnvironment, getProgName) -import System.Exit (ExitCode (..), exitSuccess) +import System.Directory (getAppUserDataDirectory) +import System.Environment (getArgs, getProgName) +import System.Exit (exitSuccess) import System.FilePath import System.IO (nativeNewline, stdout) import qualified System.IO as IO (Newline (..)) @@ -84,10 +82,9 @@ import System.IO.Error (isDoesNotExistError) import Text.Pandoc import Text.Pandoc.BCP47 (Lang (..), parseBCP47) import Text.Pandoc.Builder (setMeta, deleteMeta) +import Text.Pandoc.Filter (Filter (JSONFilter, LuaFilter), applyFilters) import Text.Pandoc.Highlighting (highlightingStyles) -import Text.Pandoc.Lua (LuaException (..), runLuaFilter) import Text.Pandoc.PDF (makePDF) -import Text.Pandoc.Process (pipeProcess) import Text.Pandoc.SelfContained (makeDataURI, makeSelfContained) import Text.Pandoc.Shared (eastAsianLineBreakFilter, stripEmptyParagraphs, headerShift, isURI, ordNub, safeRead, tabFilter) @@ -538,48 +535,6 @@ type Transform = Pandoc -> Pandoc isTextFormat :: String -> Bool isTextFormat s = s `notElem` ["odt","docx","epub2","epub3","epub","pptx"] -externalFilter :: MonadIO m - => ReaderOptions -> FilePath -> [String] -> Pandoc -> m Pandoc -externalFilter ropts f args' d = liftIO $ do - exists <- doesFileExist f - isExecutable <- if exists - then executable <$> getPermissions f - else return True - let (f', args'') = if exists - then case map toLower (takeExtension f) of - _ | isExecutable -> ("." f, args') - ".py" -> ("python", f:args') - ".hs" -> ("runhaskell", f:args') - ".pl" -> ("perl", f:args') - ".rb" -> ("ruby", f:args') - ".php" -> ("php", f:args') - ".js" -> ("node", f:args') - ".r" -> ("Rscript", f:args') - _ -> (f, args') - else (f, args') - unless (exists && isExecutable) $ do - mbExe <- findExecutable f' - when (isNothing mbExe) $ - E.throwIO $ PandocFilterError f ("Could not find executable " ++ f') - env <- getEnvironment - let env' = Just - ( ("PANDOC_VERSION", pandocVersion) - : ("PANDOC_READER_OPTIONS", UTF8.toStringLazy (encode ropts)) - : env ) - (exitcode, outbs) <- E.handle filterException $ - pipeProcess env' f' args'' $ encode d - case exitcode of - ExitSuccess -> either (E.throwIO . PandocFilterError f) - return $ eitherDecode' outbs - ExitFailure ec -> E.throwIO $ PandocFilterError f - ("Filter returned error status " ++ show ec) - where filterException :: E.SomeException -> IO a - filterException e = E.throwIO $ PandocFilterError f (show e) - -data Filter = LuaFilter FilePath - | JSONFilter FilePath - deriving (Show) - -- | Data structure for command line options. data Opt = Opt { optTabStop :: Int -- ^ Number of spaces per tab @@ -824,50 +779,6 @@ defaultWriterName x = applyTransforms :: Monad m => [Transform] -> Pandoc -> m Pandoc applyTransforms transforms d = return $ foldr ($) d transforms - -- First we check to see if a filter is found. If not, and if it's - -- not an absolute path, we check to see whether it's in `userdir/filters`. - -- If not, we leave it unchanged. -expandFilterPath :: PandocMonad m => FilePath -> m FilePath -expandFilterPath fp = do - mbDatadir <- getUserDataDir - fpExists <- fileExists fp - if fpExists - then return fp - else case mbDatadir of - Just datadir | isRelative fp -> do - let filterPath = datadir "filters" fp - filterPathExists <- fileExists filterPath - if filterPathExists - then return filterPath - else return fp - _ -> return fp - -applyFilters :: ReaderOptions - -> [Filter] - -> [String] - -> Pandoc - -> PandocIO Pandoc -applyFilters ropts filters args d = do - foldrM ($) d $ map (applyFilter ropts args) filters - -applyFilter :: ReaderOptions - -> [String] - -> Filter - -> Pandoc - -> PandocIO Pandoc -applyFilter ropts args (LuaFilter f) d = do - f' <- expandFilterPath f - let format = case args of - (x:_) -> x - _ -> error "Format not supplied for lua filter" - res <- runLuaFilter ropts f' format d - case res of - Right x -> return x - Left (LuaException s) -> E.throw (PandocFilterError f s) -applyFilter ropts args (JSONFilter f) d = do - f' <- expandFilterPath f - liftIO $ externalFilter ropts f' args d - readSource :: FilePath -> PandocIO Text readSource "-" = liftIO (UTF8.toText <$> BS.getContents) readSource src = case parseURI src of @@ -1722,5 +1633,4 @@ deprecatedOption o msg = -- see https://github.com/jgm/pandoc/pull/4083 -- using generic deriving caused long compilation times $(deriveJSON defaultOptions ''LineEnding) -$(deriveJSON defaultOptions ''Filter) $(deriveJSON defaultOptions ''Opt) diff --git a/src/Text/Pandoc/Filter.hs b/src/Text/Pandoc/Filter.hs new file mode 100644 index 000000000..30c99cc28 --- /dev/null +++ b/src/Text/Pandoc/Filter.hs @@ -0,0 +1,60 @@ +{- +Copyright (C) 2006-2017 John MacFarlane + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} +{-# LANGUAGE TemplateHaskell #-} + +{- | + Module : Text.Pandoc.Filter + Copyright : Copyright (C) 2006-2017 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane + Stability : alpha + Portability : portable + +Programmatically modifications of pandoc documents. +-} +module Text.Pandoc.Filter + ( Filter (..) + , applyFilters + ) where + +import Data.Aeson (defaultOptions) +import Data.Aeson.TH (deriveJSON) +import Data.Foldable (foldrM) +import Text.Pandoc.Class (PandocIO) +import Text.Pandoc.Definition (Pandoc) +import Text.Pandoc.Options (ReaderOptions) +import qualified Text.Pandoc.Filter.Json as JsonFilter +import qualified Text.Pandoc.Filter.Lua as LuaFilter + +data Filter = LuaFilter FilePath + | JSONFilter FilePath + deriving (Show) + +applyFilters :: ReaderOptions + -> [Filter] + -> [String] + -> Pandoc + -> PandocIO Pandoc +applyFilters ropts filters args d = do + foldrM ($) d $ map applyFilter filters + where + applyFilter (JSONFilter f) = JsonFilter.apply ropts args f + applyFilter (LuaFilter f) = LuaFilter.apply ropts args f + +$(deriveJSON defaultOptions ''Filter) diff --git a/src/Text/Pandoc/Filter/Json.hs b/src/Text/Pandoc/Filter/Json.hs new file mode 100644 index 000000000..681c52720 --- /dev/null +++ b/src/Text/Pandoc/Filter/Json.hs @@ -0,0 +1,97 @@ +{- +Copyright (C) 2006-2018 John MacFarlane + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Filter + Copyright : Copyright (C) 2006-2018 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane + Stability : alpha + Portability : portable + +Programmatically modifications of pandoc documents via JSON filters. +-} +module Text.Pandoc.Filter.Json (apply) where + +import Control.Monad (unless, when) +import Control.Monad.Trans (MonadIO (liftIO)) +import Data.Aeson (eitherDecode', encode) +import Data.Char (toLower) +import Data.Maybe (isNothing) +import System.Directory (executable, doesFileExist, findExecutable, + getPermissions) +import System.Environment (getEnvironment) +import System.Exit (ExitCode (..)) +import System.FilePath ((), takeExtension) +import Text.Pandoc.Class (PandocIO) +import Text.Pandoc.Error (PandocError (PandocFilterError)) +import Text.Pandoc.Definition (Pandoc) +import Text.Pandoc.Filter.Path (expandFilterPath) +import Text.Pandoc.Options (ReaderOptions) +import Text.Pandoc.Process (pipeProcess) +import Text.Pandoc.Shared (pandocVersion) +import qualified Control.Exception as E +import qualified Text.Pandoc.UTF8 as UTF8 + +apply :: ReaderOptions + -> [String] + -> FilePath + -> Pandoc + -> PandocIO Pandoc +apply ropts args f d = do + f' <- expandFilterPath f + liftIO $ externalFilter ropts f' args d + +externalFilter :: MonadIO m + => ReaderOptions -> FilePath -> [String] -> Pandoc -> m Pandoc +externalFilter ropts f args' d = liftIO $ do + exists <- doesFileExist f + isExecutable <- if exists + then executable <$> getPermissions f + else return True + let (f', args'') = if exists + then case map toLower (takeExtension f) of + _ | isExecutable -> ("." f, args') + ".py" -> ("python", f:args') + ".hs" -> ("runhaskell", f:args') + ".pl" -> ("perl", f:args') + ".rb" -> ("ruby", f:args') + ".php" -> ("php", f:args') + ".js" -> ("node", f:args') + ".r" -> ("Rscript", f:args') + _ -> (f, args') + else (f, args') + unless (exists && isExecutable) $ do + mbExe <- findExecutable f' + when (isNothing mbExe) $ + E.throwIO $ PandocFilterError f ("Could not find executable " ++ f') + env <- getEnvironment + let env' = Just + ( ("PANDOC_VERSION", pandocVersion) + : ("PANDOC_READER_OPTIONS", UTF8.toStringLazy (encode ropts)) + : env ) + (exitcode, outbs) <- E.handle filterException $ + pipeProcess env' f' args'' $ encode d + case exitcode of + ExitSuccess -> either (E.throwIO . PandocFilterError f) + return $ eitherDecode' outbs + ExitFailure ec -> E.throwIO $ PandocFilterError f + ("Filter returned error status " ++ show ec) + where filterException :: E.SomeException -> IO a + filterException e = E.throwIO $ PandocFilterError f (show e) diff --git a/src/Text/Pandoc/Filter/Lua.hs b/src/Text/Pandoc/Filter/Lua.hs new file mode 100644 index 000000000..597a31cbc --- /dev/null +++ b/src/Text/Pandoc/Filter/Lua.hs @@ -0,0 +1,53 @@ +{- +Copyright (C) 2006-2018 John MacFarlane + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Filter.Lua + Copyright : Copyright (C) 2006-2018 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane + Stability : alpha + Portability : portable + +Apply Lua filters to modify a pandoc documents programmatically. +-} +module Text.Pandoc.Filter.Lua (apply) where + +import Control.Exception (throw) +import Text.Pandoc.Class (PandocIO) +import Text.Pandoc.Definition (Pandoc) +import Text.Pandoc.Error (PandocError (PandocFilterError)) +import Text.Pandoc.Filter.Path (expandFilterPath) +import Text.Pandoc.Lua (LuaException (..), runLuaFilter) +import Text.Pandoc.Options (ReaderOptions) + +apply :: ReaderOptions + -> [String] + -> FilePath + -> Pandoc + -> PandocIO Pandoc +apply ropts args f d = do + f' <- expandFilterPath f + let format = case args of + (x:_) -> x + _ -> error "Format not supplied for lua filter" + res <- runLuaFilter ropts f' format d + case res of + Right x -> return x + Left (LuaException s) -> throw (PandocFilterError f s) diff --git a/src/Text/Pandoc/Filter/Path.hs b/src/Text/Pandoc/Filter/Path.hs new file mode 100644 index 000000000..8074bcbb7 --- /dev/null +++ b/src/Text/Pandoc/Filter/Path.hs @@ -0,0 +1,53 @@ +{- +Copyright (C) 2006-2018 John MacFarlane + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Filter.Path + Copyright : Copyright (C) 2006-2018 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane + Stability : alpha + Portability : portable + +Expand paths of filters, searching the data directory. +-} +module Text.Pandoc.Filter.Path + ( expandFilterPath + ) where + +import Text.Pandoc.Class (PandocMonad, fileExists, getUserDataDir) +import System.FilePath ((), isRelative) + + -- First we check to see if a filter is found. If not, and if it's + -- not an absolute path, we check to see whether it's in `userdir/filters`. + -- If not, we leave it unchanged. +expandFilterPath :: PandocMonad m => FilePath -> m FilePath +expandFilterPath fp = do + mbDatadir <- getUserDataDir + fpExists <- fileExists fp + if fpExists + then return fp + else case mbDatadir of + Just datadir | isRelative fp -> do + let filterPath = datadir "filters" fp + filterPathExists <- fileExists filterPath + if filterPathExists + then return filterPath + else return fp + _ -> return fp -- cgit v1.2.3 From 8d5422f36b28bab67b4d13e4a3d2154d0c5024f8 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Wed, 10 Jan 2018 22:26:12 +0100 Subject: Lua modules: add function pandoc.utils.run_json_filter Runs a JSON filter on a Pandoc document. --- doc/lua-filters.md | 31 +++++++++++++++++++++++++++++++ src/Text/Pandoc/Lua/Module/Utils.hs | 29 ++++++++++++++++++++++++++--- src/Text/Pandoc/Lua/Packages.hs | 3 ++- 3 files changed, 59 insertions(+), 4 deletions(-) (limited to 'src/Text/Pandoc') diff --git a/doc/lua-filters.md b/doc/lua-filters.md index 9b82155be..6f03360bb 100644 --- a/doc/lua-filters.md +++ b/doc/lua-filters.md @@ -1433,6 +1433,37 @@ functions. print(table.concat(elements[1].numbering, '.')) -- 0.1 print(table.concat(elements[2].numbering, '.')) -- 0.2 +[`run_json_filter (doc, filter[, args])`]{#utils-run_json_filter} + +: Filter the given doc by passing it through the a JSON filter. + + Parameters: + + `doc`: + : the Pandoc document to filter + + `filter`: + : filter to run + + `args`: + : list of arguments passed to the filter. Defaults to + `{FORMAT}`. + + Returns: + + - ([Pandoc](#Pandoc)) Filtered document + + Usage: + + -- Assumes `some_blocks` contains blocks for which a + -- separate literature section is required. + local sub_doc = pandoc.Pandoc(some_blocks, metadata) + sub_doc_with_bib = pandoc.utils.run_json_filter( + sub_doc, + 'pandoc-citeproc' + ) + some_blocks = sub_doc.blocks -- some blocks with bib + [`normalize_date (date_string)`]{#utils-normalize_date} : Parse a date and convert (if possible) to "YYYY-MM-DD" diff --git a/src/Text/Pandoc/Lua/Module/Utils.hs b/src/Text/Pandoc/Lua/Module/Utils.hs index b453b38d7..ab29cc0c7 100644 --- a/src/Text/Pandoc/Lua/Module/Utils.hs +++ b/src/Text/Pandoc/Lua/Module/Utils.hs @@ -30,22 +30,26 @@ module Text.Pandoc.Lua.Module.Utils ) where import Control.Applicative ((<|>)) +import Data.Default (def) import Foreign.Lua (FromLuaStack, Lua, LuaInteger, NumResults) +import Text.Pandoc.Class (runIO, setUserDataDir) import Text.Pandoc.Definition (Pandoc, Meta, MetaValue, Block, Inline) import Text.Pandoc.Lua.StackInstances () -import Text.Pandoc.Lua.Util (addFunction) +import Text.Pandoc.Lua.Util (addFunction, popValue) import qualified Data.Digest.Pure.SHA as SHA import qualified Data.ByteString.Lazy as BSL import qualified Foreign.Lua as Lua +import qualified Text.Pandoc.Filter.Json as JsonFilter import qualified Text.Pandoc.Shared as Shared -- | Push the "pandoc.utils" module to the lua stack. -pushModule :: Lua NumResults -pushModule = do +pushModule :: Maybe FilePath -> Lua NumResults +pushModule mbDatadir = do Lua.newtable addFunction "hierarchicalize" hierarchicalize addFunction "normalize_date" normalizeDate + addFunction "run_json_filter" (runJsonFilter mbDatadir) addFunction "sha1" sha1 addFunction "stringify" stringify addFunction "to_roman_numeral" toRomanNumeral @@ -62,6 +66,25 @@ hierarchicalize = return . Shared.hierarchicalize normalizeDate :: String -> Lua (Lua.Optional String) normalizeDate = return . Lua.Optional . Shared.normalizeDate +-- | Run a JSON filter on the given document. +runJsonFilter :: Maybe FilePath + -> Pandoc + -> FilePath + -> Lua.Optional [String] + -> Lua NumResults +runJsonFilter mbDatadir doc filterFile optArgs = do + args <- case Lua.fromOptional optArgs of + Just x -> return x + Nothing -> do + Lua.getglobal "FORMAT" + (:[]) <$> popValue + filterRes <- Lua.liftIO . runIO $ do + setUserDataDir mbDatadir + JsonFilter.apply def args filterFile doc + case filterRes of + Left err -> Lua.raiseError (show err) + Right d -> (1 :: NumResults) <$ Lua.push d + -- | Calculate the hash of the given contents. sha1 :: BSL.ByteString -> Lua String diff --git a/src/Text/Pandoc/Lua/Packages.hs b/src/Text/Pandoc/Lua/Packages.hs index dda2dd2fe..0169d0045 100644 --- a/src/Text/Pandoc/Lua/Packages.hs +++ b/src/Text/Pandoc/Lua/Packages.hs @@ -78,7 +78,8 @@ pandocPackageSearcher luaPkgParams pkgName = "pandoc.mediabag" -> let st = luaPkgCommonState luaPkgParams mbRef = luaPkgMediaBag luaPkgParams in pushWrappedHsFun (MediaBag.pushModule st mbRef) - "pandoc.utils" -> pushWrappedHsFun Utils.pushModule + "pandoc.utils" -> let datadirMb = luaPkgDataDir luaPkgParams + in pushWrappedHsFun (Utils.pushModule datadirMb) _ -> searchPureLuaLoader where pushWrappedHsFun f = do -- cgit v1.2.3