aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2021-11-05 22:10:29 -0700
committerGitHub <noreply@github.com>2021-11-05 22:10:29 -0700
commitee2f0021f9b59f0bca6eabf4884641da7a09e21d (patch)
treef29d5325d1d89c736093534d27b62c98a674df57
parentbac6ae9607582233336984c30bba3c586eba6844 (diff)
downloadpandoc-ee2f0021f9b59f0bca6eabf4884641da7a09e21d.tar.gz
Add interface for custom readers written in Lua. (#7671)
New module Text.Pandoc.Readers.Custom, exporting readCustom [API change]. Users can now do `-f myreader.lua` and pandoc will treat the script myreader.lua as a custom reader, which parses an input string to a pandoc AST, using the pandoc module defined for Lua filters. A sample custom reader can be found in data/reader.lua. Closes #7669.
-rw-r--r--MANUAL.txt36
-rw-r--r--data/reader.lua44
-rw-r--r--pandoc.cabal1
-rw-r--r--src/Text/Pandoc/App.hs13
-rw-r--r--src/Text/Pandoc/Readers/Custom.hs55
5 files changed, 135 insertions, 14 deletions
diff --git a/MANUAL.txt b/MANUAL.txt
index 019d80bf0..7e9f9f85a 100644
--- a/MANUAL.txt
+++ b/MANUAL.txt
@@ -266,6 +266,7 @@ header when requesting a document from a URL:
- `tikiwiki` ([TikiWiki markup])
- `twiki` ([TWiki markup])
- `vimwiki` ([Vimwiki])
+ - the path of a custom Lua reader, see [Custom readers and writers] below
:::
Extensions can be individually enabled or disabled by
@@ -338,7 +339,7 @@ header when requesting a document from a URL:
- `tei` ([TEI Simple])
- `xwiki` ([XWiki markup])
- `zimwiki` ([ZimWiki markup])
- - the path of a custom Lua writer, see [Custom writers] below
+ - the path of a custom Lua writer, see [Custom readers and writers] below
:::
Note that `odt`, `docx`, `epub`, and `pdf` output will not be directed
@@ -6574,19 +6575,35 @@ With these custom styles, you can use your input document as a
reference-doc while creating docx output (see below), and maintain the
same styles in your input and output files.
-# Custom writers
+# Custom readers and writers
-Pandoc can be extended with custom writers written in [Lua]. (Pandoc
-includes a Lua interpreter, so Lua need not be installed separately.)
+Pandoc can be extended with custom readers and writers written
+in [Lua]. (Pandoc includes a Lua interpreter, so Lua need not
+be installed separately.)
-To use a custom writer, simply specify the path to the Lua script
-in place of the output format. For example:
+To use a custom reader or writer, simply specify the path to the
+Lua script in place of the input or output format. For example:
pandoc -t data/sample.lua
+ pandoc -f my_custom_markup_language.lua -t latex -s
-Creating a custom writer requires writing a Lua function for each
-possible element in a pandoc document. To get a documented example
-which you can modify according to your needs, do
+A custom reader is a Lua script that defines one function,
+Reader, which takes a string as input and returns a Pandoc
+AST. See the [Lua filters documentation] for documentation
+of the functions that are available for creating pandoc
+AST elements. For parsing, the [lpeg] parsing library
+is available by default. To see a sample custom reader:
+
+ pandoc --print-default-data-file reader.lua
+
+Reader options are available via the global variable
+`PANDOC_READER_OPTIONS`, as expalined in the [Lua filters
+documentation].
+
+A custom writer is a Lua script that defines a function
+that specifies how to render each element in a Pandoc AST.
+To see a documented example which you can modify according
+to your needs:
pandoc --print-default-data-file sample.lua
@@ -6598,6 +6615,7 @@ default template with the name
subdirectory of your user data directory (see [Templates]).
[Lua]: https://www.lua.org
+[lpeg]: http://www.inf.puc-rio.br/~roberto/lpeg/
# Reproducible builds
diff --git a/data/reader.lua b/data/reader.lua
new file mode 100644
index 000000000..4aca4edd3
--- /dev/null
+++ b/data/reader.lua
@@ -0,0 +1,44 @@
+-- A sample custom reader for a very simple markup language.
+-- This parses a document into paragraphs separated by blank lines.
+-- This is _{italic} and this is *{boldface}
+-- This is an escaped special character: \_, \*, \{, \}
+-- == text makes a level-2 heading
+-- That's it!
+
+-- For better performance we put these functions in local variables:
+local P, S, R, Cf, Cc, Ct, V, Cs, Cg, Cb, B =
+ lpeg.P, lpeg.S, lpeg.R, lpeg.Cf, lpeg.Cc, lpeg.Ct, lpeg.V,
+ lpeg.Cs, lpeg.Cg, lpeg.Cb, lpeg.B
+
+local whitespacechar = S(" \t\r\n")
+local specialchar = S("_*{}\\")
+local escapedchar = P"\\" * specialchar
+ / function (x) return string.sub(x,2) end
+local wordchar = (P(1) - (whitespacechar + specialchar)) + escapedchar
+local spacechar = S(" \t")
+local newline = P"\r"^-1 * P"\n"
+local blanklines = newline * spacechar^0 * newline^1
+local endline = newline - blanklines
+
+-- Grammar
+G = P{ "Pandoc",
+ Pandoc = blanklines^-1 * Ct(V"Block"^0) / pandoc.Pandoc;
+ Block = V"Header" + V"Para";
+ Para = Ct(V"Inline"^1) * blanklines^-1 / pandoc.Para;
+ Header = Ct(Cg(P("=")^1 / function(x) return #x end, "length")
+ * spacechar^1
+ * Cg(Ct(V"Inline"^0), "contents")
+ * blanklines^-1) /
+ function(res) return pandoc.Header(res.length, res.contents) end;
+ Inline = V"Emph" + V"Str" + V"Space" + V"SoftBreak" + V"Special" ;
+ Str = wordchar^1 / pandoc.Str;
+ Space = spacechar^1 / pandoc.Space;
+ SoftBreak = endline / pandoc.SoftBreak;
+ Emph = Ct(P"_{" * Cg(Ct((V"Inline" - P"}")^1), "contents") * P"}") /
+ function(res) return pandoc.Emph(res.contents) end;
+ Special = specialchar / pandoc.Str;
+}
+
+function Reader(input)
+ return lpeg.match(G, input)
+end
diff --git a/pandoc.cabal b/pandoc.cabal
index 8911cdb1b..a86cc71a3 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -642,6 +642,7 @@ library
Text.Pandoc.Readers.Ipynb,
Text.Pandoc.Readers.CSV,
Text.Pandoc.Readers.RTF,
+ Text.Pandoc.Readers.Custom,
Text.Pandoc.Writers,
Text.Pandoc.Writers.Native,
Text.Pandoc.Writers.Docbook,
diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs
index 20e647456..9eb9c2cf3 100644
--- a/src/Text/Pandoc/App.hs
+++ b/src/Text/Pandoc/App.hs
@@ -68,6 +68,7 @@ import Text.Pandoc.Shared (eastAsianLineBreakFilter, stripEmptyParagraphs,
defaultUserDataDir, tshow)
import Text.Pandoc.Writers.Shared (lookupMetaString)
import Text.Pandoc.Readers.Markdown (yamlToMeta)
+import Text.Pandoc.Readers.Custom (readCustom)
import qualified Text.Pandoc.UTF8 as UTF8
#ifndef _WINDOWS
import System.Posix.IO (stdOutput)
@@ -154,11 +155,13 @@ convertWithOpts opts = do
-> ByteStringReader $ \o t -> sandbox files (r o t)
(reader, readerExts) <-
- if optSandbox opts
- then case runPure (getReader readerName) of
- Left e -> throwError e
- Right (r, rexts) -> return (makeSandboxed r, rexts)
- else getReader readerName
+ if ".lua" `T.isSuffixOf` readerName
+ then return (TextReader (readCustom (T.unpack readerName)), mempty)
+ else if optSandbox opts
+ then case runPure (getReader readerName) of
+ Left e -> throwError e
+ Right (r, rexts) -> return (makeSandboxed r, rexts)
+ else getReader readerName
outputSettings <- optToOutputSettings opts
let format = outputFormat outputSettings
diff --git a/src/Text/Pandoc/Readers/Custom.hs b/src/Text/Pandoc/Readers/Custom.hs
new file mode 100644
index 000000000..83d82a9cc
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Custom.hs
@@ -0,0 +1,55 @@
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE TypeApplications #-}
+{- |
+ Module : Text.Pandoc.Readers.Custom
+ Copyright : Copyright (C) 2021 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Supports custom parsers written in Lua which produce a Pandoc AST.
+-}
+module Text.Pandoc.Readers.Custom ( readCustom ) where
+import Control.Exception
+import Control.Monad (when)
+import Data.Text (Text)
+import HsLua as Lua hiding (Operation (Div), render)
+import HsLua.Class.Peekable (PeekError)
+import Control.Monad.IO.Class (MonadIO)
+import Text.Pandoc.Definition
+import Text.Pandoc.Lua (Global (..), runLua, setGlobals)
+import Text.Pandoc.Lua.Util (dofileWithTraceback)
+import Text.Pandoc.Options
+import Text.Pandoc.Class (PandocMonad)
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
+
+-- | Convert custom markup to Pandoc.
+readCustom :: (PandocMonad m, MonadIO m, ToSources s)
+ => FilePath -> ReaderOptions -> s -> m Pandoc
+readCustom luaFile opts sources = do
+ let input = sourcesToText $ toSources sources
+ let globals = [ PANDOC_SCRIPT_FILE luaFile
+ , PANDOC_READER_OPTIONS opts
+ ]
+ res <- runLua $ do
+ setGlobals globals
+ stat <- dofileWithTraceback luaFile
+ -- check for error in lua script (later we'll change the return type
+ -- to handle this more gracefully):
+ when (stat /= Lua.OK)
+ Lua.throwErrorAsException
+ parseCustom input
+ case res of
+ Left msg -> throw msg
+ Right doc -> return doc
+
+parseCustom :: forall e. PeekError e
+ => Text
+ -> LuaE e Pandoc
+parseCustom = invoke @e "Reader"
+