aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlbert Krewinkel <albert@zeitkraut.de>2021-12-08 19:06:48 +0100
committerJohn MacFarlane <jgm@berkeley.edu>2021-12-11 08:59:11 -0800
commit83b5b79c0e4f073198b5af11b9e8a0a4471fcd41 (patch)
tree699ea018e8fe1ef4aa47c49abb2c4708caf2c641
parentbfb3118ebb1f24d8b12a806ef0ade14d5c4575ce (diff)
downloadpandoc-83b5b79c0e4f073198b5af11b9e8a0a4471fcd41.tar.gz
Custom reader: pass list of sources instead of concatenated text
The first argument passed to Lua `Reader` functions is no longer a plain string but a richer data structure. The structure can easily be converted to a string by applying `tostring`, but is also a list with elements that contain each the *text* and *name* of each input source as a property of the respective name. A small example is added to the custom reader documentation, showcasing its use in a reader that creates a syntax-highlighted code block for each source code file passed as input. Existing readers must be updated.
-rw-r--r--data/creole.lua2
-rw-r--r--doc/custom-readers.md55
-rw-r--r--pandoc.cabal1
-rw-r--r--src/Text/Pandoc/Lua/Marshal/Sources.hs46
-rw-r--r--src/Text/Pandoc/Lua/Orphans.hs5
-rw-r--r--src/Text/Pandoc/Readers/Custom.hs10
6 files changed, 104 insertions, 15 deletions
diff --git a/data/creole.lua b/data/creole.lua
index 5b7d7f554..590dfc871 100644
--- a/data/creole.lua
+++ b/data/creole.lua
@@ -186,5 +186,5 @@ G = P{ "Doc",
}
function Reader(input, reader_options)
- return lpeg.match(G, input)
+ return lpeg.match(G, tostring(input))
end
diff --git a/doc/custom-readers.md b/doc/custom-readers.md
index afa0caa73..df2de2182 100644
--- a/doc/custom-readers.md
+++ b/doc/custom-readers.md
@@ -17,7 +17,7 @@ install any additional software to do this.
A custom reader is a Lua file that defines a function
called `Reader`, which takes two arguments:
-- a string, the raw input to be parsed
+- the raw input to be parsed, as a list of sources
- optionally, a table of reader options, e.g.
`{ columns = 62, standalone = true }`.
@@ -27,6 +27,16 @@ which is automatically in scope. (Indeed, all of the utility
functions that are available for [Lua filters] are available
in custom readers, too.)
+Each source item corresponds to a file or stream passed to pandoc
+containing its text and name. E.g., if a single file `input.txt`
+is passed to pandoc, then the list of sources will contain just a
+single element `s`, where `s.name == 'input.txt'` and `s.text`
+contains the file contents as a string.
+
+The sources list, as well as each of its elements, can be
+converted to a string via the Lua standard library function
+`tostring`.
+
[Lua filters]: https://pandoc.org/lua-filters.html
[`pandoc` module]: https://pandoc.org/lua-filters.html#module-pandoc
@@ -34,12 +44,20 @@ A minimal example would be
```lua
function Reader(input)
- return pandoc.Pandoc({ pandoc.CodeBlock(input) })
+ return pandoc.Pandoc({ pandoc.CodeBlock(tostring(input)) })
end
```
-This just returns a document containing a big code block with
-all of the input.
+This just returns a document containing a big code block with all
+of the input. Or, to create a separate code block for each input
+file, one might write
+
+``` lua
+function Reader(input)
+ return pandoc.Pandoc(input:map(
+ function (s) return pandoc.CodeBlock(s.text) end))
+end
+```
In a nontrivial reader, you'll want to parse the input.
You can do this using standard Lua library functions
@@ -84,7 +102,7 @@ G = P{ "Pandoc",
}
function Reader(input)
- return lpeg.match(G, input)
+ return lpeg.match(G, tostring(input))
end
```
@@ -277,7 +295,7 @@ function Reader(input, reader_options)
local refs = {}
local thisref = {}
local ids = {}
- for line in string.gmatch(input, "[^\n]*") do
+ for line in string.gmatch(tostring(input), "[^\n]*") do
key, val = string.match(line, "([A-Z][A-Z0-9]) %- (.*)")
if key == "ER" then
-- clean up fields
@@ -550,7 +568,7 @@ G = P{ "Doc",
}
function Reader(input, reader_options)
- return lpeg.match(G, input)
+ return lpeg.match(G, tostring(input))
end
```
@@ -614,7 +632,7 @@ end
function Reader(input)
- local parsed = json.decode(input)
+ local parsed = json.decode(tostring(input))
local blocks = {}
for _,entry in ipairs(parsed.data.children) do
@@ -636,3 +654,24 @@ Similar code can be used to consume JSON output from other APIs.
Note that the content of the text fields is markdown, so we
convert it using `pandoc.read()`.
+
+# Example: syntax-highlighted code files
+
+This is a reader that puts the content of each input file into a
+code block, sets the file's extension as the block's class to
+enable code highlighting, and places the filename as a header
+above each code block.
+
+``` lua
+function to_code_block (source)
+ local _, lang = pandoc.path.split_extension(source.name)
+ return pandoc.Div{
+ pandoc.Header(1, source.name == '' and '<stdin>' or source.name),
+ pandoc.CodeBlock(source.text, {class=lang}),
+ }
+end
+
+function Reader (input, opts)
+ return pandoc.Pandoc(input:map(to_code_block))
+end
+```
diff --git a/pandoc.cabal b/pandoc.cabal
index dcf12bf04..92513b911 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -694,6 +694,7 @@ library
Text.Pandoc.Lua.Marshal.Context,
Text.Pandoc.Lua.Marshal.PandocError,
Text.Pandoc.Lua.Marshal.ReaderOptions,
+ Text.Pandoc.Lua.Marshal.Sources,
Text.Pandoc.Lua.Module.MediaBag,
Text.Pandoc.Lua.Module.Pandoc,
Text.Pandoc.Lua.Module.System,
diff --git a/src/Text/Pandoc/Lua/Marshal/Sources.hs b/src/Text/Pandoc/Lua/Marshal/Sources.hs
new file mode 100644
index 000000000..7b5262ab5
--- /dev/null
+++ b/src/Text/Pandoc/Lua/Marshal/Sources.hs
@@ -0,0 +1,46 @@
+{-# LANGUAGE OverloadedStrings #-}
+{-# OPTIONS_GHC -fno-warn-orphans #-}
+{- |
+Module : Text.Pandoc.Lua.Marshaling.Sources
+Copyright : © 2021 Albert Krewinkel
+License : GNU GPL, version 2 or above
+Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
+
+Marshal 'Sources'.
+-}
+module Text.Pandoc.Lua.Marshal.Sources
+ ( pushSources
+ ) where
+
+import Data.Text (Text)
+import HsLua as Lua
+import Text.Pandoc.Lua.Marshal.List (newListMetatable)
+import Text.Pandoc.Sources (Sources (..))
+import Text.Parsec (SourcePos, sourceName)
+
+-- | Pushes the 'Sources' as a list of lazy Lua objects.
+pushSources :: LuaError e => Pusher e Sources
+pushSources (Sources srcs) = do
+ pushList (pushUD typeSource) srcs
+ newListMetatable "pandoc Sources" $ do
+ pushName "__tostring"
+ pushHaskellFunction $ do
+ sources <- forcePeek $ peekList (peekUD typeSource) (nthBottom 1)
+ pushText . mconcat $ map snd sources
+ return 1
+ rawset (nth 3)
+ setmetatable (nth 2)
+
+-- | Source object type.
+typeSource :: LuaError e => DocumentedType e (SourcePos, Text)
+typeSource = deftype "pandoc input source"
+ [ operation Tostring $ lambda
+ ### liftPure snd
+ <#> udparam typeSource "srcs" "Source to print in native format"
+ =#> functionResult pushText "string" "Haskell representation"
+ ]
+ [ readonly "name" "source name"
+ (pushString, sourceName . fst)
+ , readonly "text" "source text"
+ (pushText, snd)
+ ]
diff --git a/src/Text/Pandoc/Lua/Orphans.hs b/src/Text/Pandoc/Lua/Orphans.hs
index eef05bd27..d5b8f2c5d 100644
--- a/src/Text/Pandoc/Lua/Orphans.hs
+++ b/src/Text/Pandoc/Lua/Orphans.hs
@@ -22,7 +22,9 @@ import Text.Pandoc.Lua.Marshal.CommonState ()
import Text.Pandoc.Lua.Marshal.Context ()
import Text.Pandoc.Lua.Marshal.PandocError()
import Text.Pandoc.Lua.Marshal.ReaderOptions ()
+import Text.Pandoc.Lua.Marshal.Sources (pushSources)
import Text.Pandoc.Lua.ErrorConversion ()
+import Text.Pandoc.Sources (Sources)
instance Pushable Pandoc where
push = pushPandoc
@@ -109,3 +111,6 @@ instance Peekable Version where
instance {-# OVERLAPPING #-} Peekable Attr where
peek = forcePeek . peekAttr
+
+instance Pushable Sources where
+ push = pushSources
diff --git a/src/Text/Pandoc/Readers/Custom.hs b/src/Text/Pandoc/Readers/Custom.hs
index d7336012b..7b6c99ed8 100644
--- a/src/Text/Pandoc/Readers/Custom.hs
+++ b/src/Text/Pandoc/Readers/Custom.hs
@@ -17,7 +17,6 @@ Supports custom parsers written in Lua which produce a Pandoc AST.
module Text.Pandoc.Readers.Custom ( readCustom ) where
import Control.Exception
import Control.Monad (when)
-import Data.Text (Text)
import HsLua as Lua hiding (Operation (Div), render)
import HsLua.Class.Peekable (PeekError)
import Control.Monad.IO.Class (MonadIO)
@@ -26,13 +25,13 @@ import Text.Pandoc.Lua (Global (..), runLua, setGlobals)
import Text.Pandoc.Lua.Util (dofileWithTraceback)
import Text.Pandoc.Options
import Text.Pandoc.Class (PandocMonad)
-import Text.Pandoc.Sources (ToSources(..), sourcesToText)
+import Text.Pandoc.Sources (Sources, ToSources(..))
-- | Convert custom markup to Pandoc.
readCustom :: (PandocMonad m, MonadIO m, ToSources s)
=> FilePath -> ReaderOptions -> s -> m Pandoc
-readCustom luaFile opts sources = do
- let input = sourcesToText $ toSources sources
+readCustom luaFile opts srcs = do
+ let input = toSources srcs
let globals = [ PANDOC_SCRIPT_FILE luaFile ]
res <- runLua $ do
setGlobals globals
@@ -47,8 +46,7 @@ readCustom luaFile opts sources = do
Right doc -> return doc
parseCustom :: forall e. PeekError e
- => Text
+ => Sources
-> ReaderOptions
-> LuaE e Pandoc
parseCustom = invoke @e "Reader"
-