From ee2f0021f9b59f0bca6eabf4884641da7a09e21d Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 5 Nov 2021 22:10:29 -0700 Subject: Add interface for custom readers written in Lua. (#7671) New module Text.Pandoc.Readers.Custom, exporting readCustom [API change]. Users can now do `-f myreader.lua` and pandoc will treat the script myreader.lua as a custom reader, which parses an input string to a pandoc AST, using the pandoc module defined for Lua filters. A sample custom reader can be found in data/reader.lua. Closes #7669. --- MANUAL.txt | 36 ++++++++++++++++++------- data/reader.lua | 44 +++++++++++++++++++++++++++++++ pandoc.cabal | 1 + src/Text/Pandoc/App.hs | 13 +++++---- src/Text/Pandoc/Readers/Custom.hs | 55 +++++++++++++++++++++++++++++++++++++++ 5 files changed, 135 insertions(+), 14 deletions(-) create mode 100644 data/reader.lua create mode 100644 src/Text/Pandoc/Readers/Custom.hs diff --git a/MANUAL.txt b/MANUAL.txt index 019d80bf0..7e9f9f85a 100644 --- a/MANUAL.txt +++ b/MANUAL.txt @@ -266,6 +266,7 @@ header when requesting a document from a URL: - `tikiwiki` ([TikiWiki markup]) - `twiki` ([TWiki markup]) - `vimwiki` ([Vimwiki]) + - the path of a custom Lua reader, see [Custom readers and writers] below ::: Extensions can be individually enabled or disabled by @@ -338,7 +339,7 @@ header when requesting a document from a URL: - `tei` ([TEI Simple]) - `xwiki` ([XWiki markup]) - `zimwiki` ([ZimWiki markup]) - - the path of a custom Lua writer, see [Custom writers] below + - the path of a custom Lua writer, see [Custom readers and writers] below ::: Note that `odt`, `docx`, `epub`, and `pdf` output will not be directed @@ -6574,19 +6575,35 @@ With these custom styles, you can use your input document as a reference-doc while creating docx output (see below), and maintain the same styles in your input and output files. -# Custom writers +# Custom readers and writers -Pandoc can be extended with custom writers written in [Lua]. (Pandoc -includes a Lua interpreter, so Lua need not be installed separately.) +Pandoc can be extended with custom readers and writers written +in [Lua]. (Pandoc includes a Lua interpreter, so Lua need not +be installed separately.) -To use a custom writer, simply specify the path to the Lua script -in place of the output format. For example: +To use a custom reader or writer, simply specify the path to the +Lua script in place of the input or output format. For example: pandoc -t data/sample.lua + pandoc -f my_custom_markup_language.lua -t latex -s -Creating a custom writer requires writing a Lua function for each -possible element in a pandoc document. To get a documented example -which you can modify according to your needs, do +A custom reader is a Lua script that defines one function, +Reader, which takes a string as input and returns a Pandoc +AST. See the [Lua filters documentation] for documentation +of the functions that are available for creating pandoc +AST elements. For parsing, the [lpeg] parsing library +is available by default. To see a sample custom reader: + + pandoc --print-default-data-file reader.lua + +Reader options are available via the global variable +`PANDOC_READER_OPTIONS`, as expalined in the [Lua filters +documentation]. + +A custom writer is a Lua script that defines a function +that specifies how to render each element in a Pandoc AST. +To see a documented example which you can modify according +to your needs: pandoc --print-default-data-file sample.lua @@ -6598,6 +6615,7 @@ default template with the name subdirectory of your user data directory (see [Templates]). [Lua]: https://www.lua.org +[lpeg]: http://www.inf.puc-rio.br/~roberto/lpeg/ # Reproducible builds diff --git a/data/reader.lua b/data/reader.lua new file mode 100644 index 000000000..4aca4edd3 --- /dev/null +++ b/data/reader.lua @@ -0,0 +1,44 @@ +-- A sample custom reader for a very simple markup language. +-- This parses a document into paragraphs separated by blank lines. +-- This is _{italic} and this is *{boldface} +-- This is an escaped special character: \_, \*, \{, \} +-- == text makes a level-2 heading +-- That's it! + +-- For better performance we put these functions in local variables: +local P, S, R, Cf, Cc, Ct, V, Cs, Cg, Cb, B = + lpeg.P, lpeg.S, lpeg.R, lpeg.Cf, lpeg.Cc, lpeg.Ct, lpeg.V, + lpeg.Cs, lpeg.Cg, lpeg.Cb, lpeg.B + +local whitespacechar = S(" \t\r\n") +local specialchar = S("_*{}\\") +local escapedchar = P"\\" * specialchar + / function (x) return string.sub(x,2) end +local wordchar = (P(1) - (whitespacechar + specialchar)) + escapedchar +local spacechar = S(" \t") +local newline = P"\r"^-1 * P"\n" +local blanklines = newline * spacechar^0 * newline^1 +local endline = newline - blanklines + +-- Grammar +G = P{ "Pandoc", + Pandoc = blanklines^-1 * Ct(V"Block"^0) / pandoc.Pandoc; + Block = V"Header" + V"Para"; + Para = Ct(V"Inline"^1) * blanklines^-1 / pandoc.Para; + Header = Ct(Cg(P("=")^1 / function(x) return #x end, "length") + * spacechar^1 + * Cg(Ct(V"Inline"^0), "contents") + * blanklines^-1) / + function(res) return pandoc.Header(res.length, res.contents) end; + Inline = V"Emph" + V"Str" + V"Space" + V"SoftBreak" + V"Special" ; + Str = wordchar^1 / pandoc.Str; + Space = spacechar^1 / pandoc.Space; + SoftBreak = endline / pandoc.SoftBreak; + Emph = Ct(P"_{" * Cg(Ct((V"Inline" - P"}")^1), "contents") * P"}") / + function(res) return pandoc.Emph(res.contents) end; + Special = specialchar / pandoc.Str; +} + +function Reader(input) + return lpeg.match(G, input) +end diff --git a/pandoc.cabal b/pandoc.cabal index 8911cdb1b..a86cc71a3 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -642,6 +642,7 @@ library Text.Pandoc.Readers.Ipynb, Text.Pandoc.Readers.CSV, Text.Pandoc.Readers.RTF, + Text.Pandoc.Readers.Custom, Text.Pandoc.Writers, Text.Pandoc.Writers.Native, Text.Pandoc.Writers.Docbook, diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs index 20e647456..9eb9c2cf3 100644 --- a/src/Text/Pandoc/App.hs +++ b/src/Text/Pandoc/App.hs @@ -68,6 +68,7 @@ import Text.Pandoc.Shared (eastAsianLineBreakFilter, stripEmptyParagraphs, defaultUserDataDir, tshow) import Text.Pandoc.Writers.Shared (lookupMetaString) import Text.Pandoc.Readers.Markdown (yamlToMeta) +import Text.Pandoc.Readers.Custom (readCustom) import qualified Text.Pandoc.UTF8 as UTF8 #ifndef _WINDOWS import System.Posix.IO (stdOutput) @@ -154,11 +155,13 @@ convertWithOpts opts = do -> ByteStringReader $ \o t -> sandbox files (r o t) (reader, readerExts) <- - if optSandbox opts - then case runPure (getReader readerName) of - Left e -> throwError e - Right (r, rexts) -> return (makeSandboxed r, rexts) - else getReader readerName + if ".lua" `T.isSuffixOf` readerName + then return (TextReader (readCustom (T.unpack readerName)), mempty) + else if optSandbox opts + then case runPure (getReader readerName) of + Left e -> throwError e + Right (r, rexts) -> return (makeSandboxed r, rexts) + else getReader readerName outputSettings <- optToOutputSettings opts let format = outputFormat outputSettings diff --git a/src/Text/Pandoc/Readers/Custom.hs b/src/Text/Pandoc/Readers/Custom.hs new file mode 100644 index 000000000..83d82a9cc --- /dev/null +++ b/src/Text/Pandoc/Readers/Custom.hs @@ -0,0 +1,55 @@ +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE TypeApplications #-} +{- | + Module : Text.Pandoc.Readers.Custom + Copyright : Copyright (C) 2021 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane + Stability : alpha + Portability : portable + +Supports custom parsers written in Lua which produce a Pandoc AST. +-} +module Text.Pandoc.Readers.Custom ( readCustom ) where +import Control.Exception +import Control.Monad (when) +import Data.Text (Text) +import HsLua as Lua hiding (Operation (Div), render) +import HsLua.Class.Peekable (PeekError) +import Control.Monad.IO.Class (MonadIO) +import Text.Pandoc.Definition +import Text.Pandoc.Lua (Global (..), runLua, setGlobals) +import Text.Pandoc.Lua.Util (dofileWithTraceback) +import Text.Pandoc.Options +import Text.Pandoc.Class (PandocMonad) +import Text.Pandoc.Sources (ToSources(..), sourcesToText) + +-- | Convert custom markup to Pandoc. +readCustom :: (PandocMonad m, MonadIO m, ToSources s) + => FilePath -> ReaderOptions -> s -> m Pandoc +readCustom luaFile opts sources = do + let input = sourcesToText $ toSources sources + let globals = [ PANDOC_SCRIPT_FILE luaFile + , PANDOC_READER_OPTIONS opts + ] + res <- runLua $ do + setGlobals globals + stat <- dofileWithTraceback luaFile + -- check for error in lua script (later we'll change the return type + -- to handle this more gracefully): + when (stat /= Lua.OK) + Lua.throwErrorAsException + parseCustom input + case res of + Left msg -> throw msg + Right doc -> return doc + +parseCustom :: forall e. PeekError e + => Text + -> LuaE e Pandoc +parseCustom = invoke @e "Reader" + -- cgit v1.2.3