diff options
author | Albert Krewinkel <albert@zeitkraut.de> | 2020-01-15 23:26:00 +0100 |
---|---|---|
committer | John MacFarlane <jgm@berkeley.edu> | 2020-01-15 14:26:00 -0800 |
commit | 672a4bdd1d4a587feaa38613fce64335adaad76d (patch) | |
tree | 17e58c93a24eedba8fc06aa28f661395e1057096 | |
parent | 400b29d10e9ba20479692ff2e2a482bb27bfe09f (diff) | |
download | pandoc-672a4bdd1d4a587feaa38613fce64335adaad76d.tar.gz |
Lua filters: allow filtering of element lists (#6040)
Lists of Inline and Block elements can now be filtered via `Inlines` and
`Blocks` functions, respectively. This is helpful if a filter conversion
depends on the order of elements rather than a single element.
For example, the following filter can be used to remove all spaces
before a citation:
function isSpaceBeforeCite (spc, cite)
return spc and spc.t == 'Space'
and cite and cite.t == 'Cite'
end
function Inlines (inlines)
for i = #inlines-1,1,-1 do
if isSpaceBeforeCite(inlines[i], inlines[i+1]) then
inlines:remove(i)
end
end
return inlines
end
Closes: #6038
-rw-r--r-- | doc/lua-filters.md | 72 | ||||
-rw-r--r-- | pandoc.cabal | 4 | ||||
-rw-r--r-- | src/Text/Pandoc/Lua/Filter.hs | 97 | ||||
-rw-r--r-- | src/Text/Pandoc/Lua/Init.hs | 1 | ||||
-rw-r--r-- | src/Text/Pandoc/Lua/Marshaling/List.hs | 45 | ||||
-rw-r--r-- | test/Tests/Lua.hs | 25 | ||||
-rw-r--r-- | test/lua/blocks-filter.lua | 8 | ||||
-rw-r--r-- | test/lua/inlines-filter.lua | 19 | ||||
-rw-r--r-- | test/lua/meta.lua | 6 |
9 files changed, 244 insertions, 33 deletions
diff --git a/doc/lua-filters.md b/doc/lua-filters.md index fea3d4c1b..31beed162 100644 --- a/doc/lua-filters.md +++ b/doc/lua-filters.md @@ -136,13 +136,52 @@ Elements without matching functions are left untouched. See [module documentation](#module-pandoc) for a list of pandoc elements. -## Execution order +## Filters on element sequences + +For some filtering tasks, the it is necessary to know the order +in which elements occur in the document. It is not enough then to +inspect a single element at a time. + +There are two special function names, which can be used to define +filters on lists of blocks or lists of inlines. + +[`Inlines (inlines)`]{#inlines-filter} +: If present in a filter, this function will be called on all + lists of inline elements, like the content of a [Para] + (paragraph) block, or the description of an [Image]. The + `inlines` argument passed to the function will be a [List] of + [Inlines] for each call. + +[`Blocks (blocks)`]{#blocks-filter} +: If present in a filter, this function will be called on all + lists of block elements, like the content of a [MetaBlocks] + meta element block, on each item of a list, and the main + content of the [Pandoc] document. The `blocks` argument + passed to the function will be a [List] of [Inlines] for each + call. + +These filter functions are special in that the result must either +be nil, in which case the list is left unchanged, or must be a +list of the correct type, i.e., the same type as the input +argument. Single elements are **not** allowed as return values, +as a single element in this context usually hints at a bug. + +See ["Remove spaces before normal citations"][Inlines filter +example] for an example. + +This functionality has been added in pandoc 2.9.2. + +[Inlines filter example]: #remove-spaces-before-citations + +## Execution Order Element filter functions within a filter set are called in a fixed order, skipping any which are not present: 1. functions for [*Inline* elements](#type-inline), + 2. the [`Inlines`](#inlines-filter) filter function, 2. functions for [*Block* elements](#type-block) , + 2. the [`Blocks`](#inlines-filter) filter function, 3. the [`Meta`](#type-meta) filter function, and last 4. the [`Pandoc`](#type-pandoc) filter function. @@ -368,6 +407,34 @@ function Doc (blocks, meta) end ``` +## Remove spaces before citations + +This filter removes all spaces preceding an "author-in-text" +citation. In Markdown, author-in-text citations (e.g., +`@citekey`), must be preceded by a space. If these spaces are +undesired, they must be removed with a filter. + +``` lua +local function is_space_before_author_in_text(spc, cite) + return spc and spc.t == 'Space' + and cite and cite.t == 'Cite' + -- there must be only a single citation, and it must have + -- mode 'AuthorInText' + and #cite.citations == 1 + and cite.citations[1].mode == 'AuthorInText' +end + +function Inlines (inlines) + -- Go from end to start to avoid problems with shifting indices. + for i = #inlines-1, 1, -1 do + if is_space_before_author_in_text(inlines[i], inlines[i+1]) then + inlines:remove(i) + end + end + return inlines +end +``` + ## Replacing placeholders with their metadata value Lua filter functions are run in the order @@ -1650,15 +1717,18 @@ Usage: [Citation]: #type-citation [Citations]: #type-citation [CommonState]: #type-commonstate +[Image]: #type-image [Inline]: #type-inline [Inlines]: #type-inline [List]: #type-list [ListAttributes]: #type-listattributes [Meta]: #type-meta +[MetaBlocks]: #type-metablocks [MetaValue]: #type-metavalue [MetaValues]: #type-metavalue [LogMessage]: #type-logmessage [Pandoc]: #type-pandoc +[Para]: #type-para [Version]: #type-version [`pandoc.utils.equals`]: #pandoc.utils.equals diff --git a/pandoc.cabal b/pandoc.cabal index 49f6d65c0..c37f4324b 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -605,9 +605,10 @@ library Text.Pandoc.Lua.Marshaling.AST, Text.Pandoc.Lua.Marshaling.AnyValue, Text.Pandoc.Lua.Marshaling.CommonState, + Text.Pandoc.Lua.Marshaling.Context, + Text.Pandoc.Lua.Marshaling.List, Text.Pandoc.Lua.Marshaling.MediaBag, Text.Pandoc.Lua.Marshaling.ReaderOptions, - Text.Pandoc.Lua.Marshaling.Context, Text.Pandoc.Lua.Marshaling.Version, Text.Pandoc.Lua.Module.MediaBag, Text.Pandoc.Lua.Module.Pandoc, @@ -838,4 +839,3 @@ benchmark benchmark-pandoc -Widentities -Werror=missing-home-modules -fhide-source-paths - diff --git a/src/Text/Pandoc/Lua/Filter.hs b/src/Text/Pandoc/Lua/Filter.hs index 74c7058f3..beef492d7 100644 --- a/src/Text/Pandoc/Lua/Filter.hs +++ b/src/Text/Pandoc/Lua/Filter.hs @@ -2,8 +2,8 @@ {-# LANGUAGE NoImplicitPrelude #-} {- | Module : Text.Pandoc.Lua.Filter -Copyright : © 2012–2019 John MacFarlane, - © 2017-2019 Albert Krewinkel +Copyright : © 2012–2020 John MacFarlane, + © 2017-2020 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> Stability : alpha @@ -13,25 +13,23 @@ Types and functions for running Lua filters. module Text.Pandoc.Lua.Filter ( LuaFilterFunction , LuaFilter , runFilterFile - , tryFilter - , runFilterFunction - , walkMWithLuaFilter , walkInlines , walkBlocks - , blockElementNames - , inlineElementNames , module Text.Pandoc.Lua.Walk ) where import Prelude +import Control.Applicative ((<|>)) import Control.Monad (mplus, (>=>)) import Control.Monad.Catch (finally) import Data.Data (Data, DataType, dataTypeConstrs, dataTypeName, dataTypeOf, showConstr, toConstr, tyconUQname) import Data.Foldable (foldrM) import Data.Map (Map) +import Data.Maybe (fromMaybe) import Foreign.Lua (Lua, Peekable, Pushable) import Text.Pandoc.Definition import Text.Pandoc.Lua.Marshaling () +import Text.Pandoc.Lua.Marshaling.List (List (..)) import Text.Pandoc.Lua.Walk (SingletonsList (..)) import Text.Pandoc.Walk (Walkable (walkM)) @@ -67,7 +65,9 @@ newtype LuaFilter = LuaFilter (Map String LuaFilterFunction) instance Peekable LuaFilter where peek idx = do - let constrs = metaFilterName + let constrs = listOfInlinesFilterName + : listOfBlocksFilterName + : metaFilterName : pandocFilterNames ++ blockElementNames ++ inlineElementNames @@ -109,22 +109,34 @@ elementOrList x = do Right res -> [res] <$ Lua.pop 1 Left _ -> Lua.peekList topOfStack `finally` Lua.pop 1 --- | Try running a filter for the given element -tryFilter :: (Data a, Peekable a, Pushable a) - => LuaFilter -> a -> Lua [a] -tryFilter (LuaFilter fnMap) x = - let filterFnName = showConstr (toConstr x) - catchAllName = tyconUQname $ dataTypeName (dataTypeOf x) - in - case Map.lookup filterFnName fnMap `mplus` Map.lookup catchAllName fnMap of - Just fn -> runFilterFunction fn x *> elementOrList x - Nothing -> return [x] - --- | Apply filter on a sequence of AST elements. +-- | Pop and return a value from the stack; if the value at the top of +-- the stack is @nil@, return the fallback element. +popOption :: Peekable a => a -> Lua a +popOption fallback = fromMaybe fallback . Lua.fromOptional <$> Lua.popValue + +-- | Apply filter on a sequence of AST elements. Both lists and single +-- value are accepted as filter function return values. runOnSequence :: (Data a, Peekable a, Pushable a) => LuaFilter -> SingletonsList a -> Lua (SingletonsList a) -runOnSequence lf (SingletonsList xs) = - SingletonsList <$> mconcatMapM (tryFilter lf) xs +runOnSequence (LuaFilter fnMap) (SingletonsList xs) = + SingletonsList <$> mconcatMapM tryFilter xs + where + tryFilter :: (Data a, Peekable a, Pushable a) => a -> Lua [a] + tryFilter x = + let filterFnName = showConstr (toConstr x) + catchAllName = tyconUQname $ dataTypeName (dataTypeOf x) + in case Map.lookup filterFnName fnMap <|> Map.lookup catchAllName fnMap of + Just fn -> runFilterFunction fn x *> elementOrList x + Nothing -> return [x] + +-- | Try filtering the given value without type error corrections on +-- the return value. +runOnValue :: (Data a, Peekable a, Pushable a) + => String -> LuaFilter -> a -> Lua a +runOnValue filterFnName (LuaFilter fnMap) x = + case Map.lookup filterFnName fnMap of + Just fn -> runFilterFunction fn x *> popOption x + Nothing -> return x -- | Push a value to the stack via a lua filter function. The filter function is -- called with given element as argument and is expected to return an element. @@ -138,7 +150,12 @@ runFilterFunction lf x = do walkMWithLuaFilter :: LuaFilter -> Pandoc -> Lua Pandoc walkMWithLuaFilter f = - walkInlines f >=> walkBlocks f >=> walkMeta f >=> walkPandoc f + walkInlines f + >=> walkInlineLists f + >=> walkBlocks f + >=> walkBlockLists f + >=> walkMeta f + >=> walkPandoc f mconcatMapM :: (Monad m) => (a -> m [a]) -> [a] -> m [a] mconcatMapM f = fmap mconcat . mapM f @@ -146,6 +163,9 @@ mconcatMapM f = fmap mconcat . mapM f hasOneOf :: LuaFilter -> [String] -> Bool hasOneOf (LuaFilter fnMap) = any (\k -> Map.member k fnMap) +contains :: LuaFilter -> String -> Bool +contains (LuaFilter fnMap) = (`Map.member` fnMap) + walkInlines :: Walkable (SingletonsList Inline) a => LuaFilter -> a -> Lua a walkInlines lf = let f :: SingletonsList Inline -> Lua (SingletonsList Inline) @@ -154,6 +174,14 @@ walkInlines lf = then walkM f else return +walkInlineLists :: Walkable (List Inline) a => LuaFilter -> a -> Lua a +walkInlineLists lf = + let f :: List Inline -> Lua (List Inline) + f = runOnValue listOfInlinesFilterName lf + in if lf `contains` listOfInlinesFilterName + then walkM f + else return + walkBlocks :: Walkable (SingletonsList Block) a => LuaFilter -> a -> Lua a walkBlocks lf = let f :: SingletonsList Block -> Lua (SingletonsList Block) @@ -162,13 +190,18 @@ walkBlocks lf = then walkM f else return +walkBlockLists :: Walkable (List Block) a => LuaFilter -> a -> Lua a +walkBlockLists lf = + let f :: List Block -> Lua (List Block) + f = runOnValue listOfBlocksFilterName lf + in if lf `contains` listOfBlocksFilterName + then walkM f + else return + walkMeta :: LuaFilter -> Pandoc -> Lua Pandoc -walkMeta (LuaFilter fnMap) = - case Map.lookup "Meta" fnMap of - Just fn -> walkM (\(Pandoc meta blocks) -> do - meta' <- runFilterFunction fn meta *> singleElement meta - return $ Pandoc meta' blocks) - Nothing -> return +walkMeta lf (Pandoc m bs) = do + m' <- runOnValue "Meta" lf m + return $ Pandoc m' bs walkPandoc :: LuaFilter -> Pandoc -> Lua Pandoc walkPandoc (LuaFilter fnMap) = @@ -185,6 +218,12 @@ inlineElementNames = "Inline" : constructorsFor (dataTypeOf (Str mempty)) blockElementNames :: [String] blockElementNames = "Block" : constructorsFor (dataTypeOf (Para [])) +listOfInlinesFilterName :: String +listOfInlinesFilterName = "Inlines" + +listOfBlocksFilterName :: String +listOfBlocksFilterName = "Blocks" + metaFilterName :: String metaFilterName = "Meta" diff --git a/src/Text/Pandoc/Lua/Init.hs b/src/Text/Pandoc/Lua/Init.hs index cf6c71231..5ade83e4d 100644 --- a/src/Text/Pandoc/Lua/Init.hs +++ b/src/Text/Pandoc/Lua/Init.hs @@ -119,6 +119,7 @@ putConstructorsInRegistry = do constrsToReg $ Pandoc.Citation mempty mempty mempty Pandoc.AuthorInText 0 0 putInReg "Attr" -- used for Attr type alias putInReg "ListAttributes" -- used for ListAttributes type alias + putInReg "List" -- pandoc.List where constrsToReg :: Data a => a -> Lua () constrsToReg = mapM_ (putInReg . showConstr) . dataTypeConstrs . dataTypeOf diff --git a/src/Text/Pandoc/Lua/Marshaling/List.hs b/src/Text/Pandoc/Lua/Marshaling/List.hs new file mode 100644 index 000000000..e4fbfc200 --- /dev/null +++ b/src/Text/Pandoc/Lua/Marshaling/List.hs @@ -0,0 +1,45 @@ +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE DeriveDataTypeable #-} +{-# LANGUAGE MultiParamTypeClasses #-} +{-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE UndecidableInstances #-} +{- | +Module : Text.Pandoc.Lua.Marshaling.List +Copyright : © 2012-2020 John MacFarlane + © 2017-2020 Albert Krewinkel +License : GNU GPL, version 2 or above +Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> +Stability : alpha + +Marshaling/unmarshaling instances for @pandoc.List@s. +-} +module Text.Pandoc.Lua.Marshaling.List + ( List (..) + ) where + +import Prelude +import Data.Data (Data) +import Foreign.Lua (Peekable, Pushable) +import Text.Pandoc.Walk (Walkable (..)) +import Text.Pandoc.Lua.Util (defineHowTo, pushViaConstructor) + +import qualified Foreign.Lua as Lua + +-- | List wrapper which is marshalled as @pandoc.List@. +newtype List a = List { fromList :: [a] } + deriving (Data, Eq, Show) + +instance Pushable a => Pushable (List a) where + push (List xs) = + pushViaConstructor "List" xs + +instance Peekable a => Peekable (List a) where + peek idx = defineHowTo "get List" $ do + xs <- Lua.peek idx + return $ List xs + +-- List is just a wrapper, so we can reuse the walk instance for +-- unwrapped Hasekll lists. +instance Walkable [a] b => Walkable (List a) b where + walkM f = walkM (fmap fromList . f . List) + query f = query (f . List) diff --git a/test/Tests/Lua.hs b/test/Tests/Lua.hs index 7683df09f..5e01266c0 100644 --- a/test/Tests/Lua.hs +++ b/test/Tests/Lua.hs @@ -23,7 +23,8 @@ import Text.Pandoc.Arbitrary () import Text.Pandoc.Builder (bulletList, definitionList, displayMath, divWith, doc, doubleQuoted, emph, header, lineBlock, linebreak, math, orderedList, para, plain, rawBlock, - singleQuoted, space, str, strong) + singleQuoted, space, str, strong, + HasMeta (setMeta)) import Text.Pandoc.Class (runIOorExplode, setUserDataDir) import Text.Pandoc.Definition (Block (BlockQuote, Div, Para), Inline (Emph, Str), Attr, Meta, Pandoc, pandocTypesVersion) @@ -129,6 +130,28 @@ tests = map (localOption (QuickCheckTests 20)) (doc $ divWith ("", [], kv_before) (para "nil")) (doc $ divWith ("", [], kv_after) (para "nil")) + , testCase "Filter list of inlines" $ + assertFilterConversion "List of inlines" + "inlines-filter.lua" + (doc $ para ("Hello," <> linebreak <> "World! Wassup?")) + (doc $ para "Hello, World! Wassup?") + + , testCase "Filter list of blocks" $ + assertFilterConversion "List of blocks" + "blocks-filter.lua" + (doc $ para "one." <> para "two." <> para "three.") + (doc $ plain "3") + + , testCase "Filter Meta" $ + let setMetaBefore = setMeta "old" ("old" :: T.Text) + . setMeta "bool" False + setMetaAfter = setMeta "new" ("new" :: T.Text) + . setMeta "bool" True + in assertFilterConversion "Meta filtering" + "meta.lua" + (setMetaBefore . doc $ mempty) + (setMetaAfter . doc $ mempty) + , testCase "Script filename is set" $ assertFilterConversion "unexpected script name" "script-name.lua" diff --git a/test/lua/blocks-filter.lua b/test/lua/blocks-filter.lua new file mode 100644 index 000000000..4e944e922 --- /dev/null +++ b/test/lua/blocks-filter.lua @@ -0,0 +1,8 @@ +function Blocks (blks) + -- verify that this looks like a `pandoc.List` + if not blks.find or not blks.map or not blks.filter then + error("table doesn't seem to be an instance of pandoc.List") + end + -- return plain block containing the number of elements in the list + return {pandoc.Plain {pandoc.Str(tostring(#blks))}} +end diff --git a/test/lua/inlines-filter.lua b/test/lua/inlines-filter.lua new file mode 100644 index 000000000..69608bd77 --- /dev/null +++ b/test/lua/inlines-filter.lua @@ -0,0 +1,19 @@ +function isWorldAfterSpace (fst, snd) + return fst and fst.t == 'LineBreak' + and snd and snd.t == 'Str' and snd.text == 'World!' +end + +function Inlines (inlns) + -- verify that this looks like a `pandoc.List` + if not inlns.find or not inlns.map or not inlns.filter then + error("table doesn't seem to be an instance of pandoc.List") + end + + -- Remove spaces before string "World" + for i = #inlns-1,1,-1 do + if isWorldAfterSpace(inlns[i], inlns[i+1]) then + inlns[i] = pandoc.Space() + end + end + return inlns +end diff --git a/test/lua/meta.lua b/test/lua/meta.lua new file mode 100644 index 000000000..5e2946203 --- /dev/null +++ b/test/lua/meta.lua @@ -0,0 +1,6 @@ +function Meta (meta) + meta.old = nil + meta.new = "new" + meta.bool = (meta.bool == false) + return meta +end |