aboutsummaryrefslogtreecommitdiff
path: root/data
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2021-11-05 22:10:29 -0700
committerGitHub <noreply@github.com>2021-11-05 22:10:29 -0700
commitee2f0021f9b59f0bca6eabf4884641da7a09e21d (patch)
treef29d5325d1d89c736093534d27b62c98a674df57 /data
parentbac6ae9607582233336984c30bba3c586eba6844 (diff)
downloadpandoc-ee2f0021f9b59f0bca6eabf4884641da7a09e21d.tar.gz
Add interface for custom readers written in Lua. (#7671)
New module Text.Pandoc.Readers.Custom, exporting readCustom [API change]. Users can now do `-f myreader.lua` and pandoc will treat the script myreader.lua as a custom reader, which parses an input string to a pandoc AST, using the pandoc module defined for Lua filters. A sample custom reader can be found in data/reader.lua. Closes #7669.
Diffstat (limited to 'data')
-rw-r--r--data/reader.lua44
1 files changed, 44 insertions, 0 deletions
diff --git a/data/reader.lua b/data/reader.lua
new file mode 100644
index 000000000..4aca4edd3
--- /dev/null
+++ b/data/reader.lua
@@ -0,0 +1,44 @@
+-- A sample custom reader for a very simple markup language.
+-- This parses a document into paragraphs separated by blank lines.
+-- This is _{italic} and this is *{boldface}
+-- This is an escaped special character: \_, \*, \{, \}
+-- == text makes a level-2 heading
+-- That's it!
+
+-- For better performance we put these functions in local variables:
+local P, S, R, Cf, Cc, Ct, V, Cs, Cg, Cb, B =
+ lpeg.P, lpeg.S, lpeg.R, lpeg.Cf, lpeg.Cc, lpeg.Ct, lpeg.V,
+ lpeg.Cs, lpeg.Cg, lpeg.Cb, lpeg.B
+
+local whitespacechar = S(" \t\r\n")
+local specialchar = S("_*{}\\")
+local escapedchar = P"\\" * specialchar
+ / function (x) return string.sub(x,2) end
+local wordchar = (P(1) - (whitespacechar + specialchar)) + escapedchar
+local spacechar = S(" \t")
+local newline = P"\r"^-1 * P"\n"
+local blanklines = newline * spacechar^0 * newline^1
+local endline = newline - blanklines
+
+-- Grammar
+G = P{ "Pandoc",
+ Pandoc = blanklines^-1 * Ct(V"Block"^0) / pandoc.Pandoc;
+ Block = V"Header" + V"Para";
+ Para = Ct(V"Inline"^1) * blanklines^-1 / pandoc.Para;
+ Header = Ct(Cg(P("=")^1 / function(x) return #x end, "length")
+ * spacechar^1
+ * Cg(Ct(V"Inline"^0), "contents")
+ * blanklines^-1) /
+ function(res) return pandoc.Header(res.length, res.contents) end;
+ Inline = V"Emph" + V"Str" + V"Space" + V"SoftBreak" + V"Special" ;
+ Str = wordchar^1 / pandoc.Str;
+ Space = spacechar^1 / pandoc.Space;
+ SoftBreak = endline / pandoc.SoftBreak;
+ Emph = Ct(P"_{" * Cg(Ct((V"Inline" - P"}")^1), "contents") * P"}") /
+ function(res) return pandoc.Emph(res.contents) end;
+ Special = specialchar / pandoc.Str;
+}
+
+function Reader(input)
+ return lpeg.match(G, input)
+end