aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2021-11-07 14:33:18 -0800
committerJohn MacFarlane <jgm@berkeley.edu>2021-11-07 14:34:56 -0800
commit881b45209e8fd70079e409d05fa7402741501940 (patch)
treede3d9e6464223adca4706edb67172b2f323e4872
parent213913f02554b5cec8ea946538d8ba445d4d3f4e (diff)
downloadpandoc-881b45209e8fd70079e409d05fa7402741501940.tar.gz
Replace old sample custom reader with a full-featured reader for creole.
This is better as an example. And it is faster than pandoc's regular creole parser, which shows that high-performance readers can be developed this way.
-rw-r--r--MANUAL.txt2
-rw-r--r--data/creole.lua197
-rw-r--r--data/reader.lua87
-rw-r--r--pandoc.cabal2
4 files changed, 200 insertions, 88 deletions
diff --git a/MANUAL.txt b/MANUAL.txt
index dc973f958..b1f75961b 100644
--- a/MANUAL.txt
+++ b/MANUAL.txt
@@ -6594,7 +6594,7 @@ of the functions that are available for creating pandoc
AST elements. For parsing, the [lpeg] parsing library
is available by default. To see a sample custom reader:
- pandoc --print-default-data-file reader.lua
+ pandoc --print-default-data-file creole.lua
If you want your custom reader to have access to reader options
(e.g. the tab stop setting), you give your Reader function a
diff --git a/data/creole.lua b/data/creole.lua
new file mode 100644
index 000000000..ffde73638
--- /dev/null
+++ b/data/creole.lua
@@ -0,0 +1,197 @@
+-- A sample custom reader for Creole 1.0 (common wiki markup)
+-- http://www.wikicreole.org/wiki/CheatSheet
+
+-- For better performance we put these functions in local variables:
+local P, S, R, Cf, Cc, Ct, V, Cs, Cg, Cb, B, C, Cmt =
+ lpeg.P, lpeg.S, lpeg.R, lpeg.Cf, lpeg.Cc, lpeg.Ct, lpeg.V,
+ lpeg.Cs, lpeg.Cg, lpeg.Cb, lpeg.B, lpeg.C, lpeg.Cmt
+
+local whitespacechar = S(" \t\r\n")
+local specialchar = S("/*~[]\\{}|")
+local wordchar = (1 - (whitespacechar + specialchar))
+local spacechar = S(" \t")
+local newline = P"\r"^-1 * P"\n"
+local blankline = spacechar^0 * newline
+local endline = newline * #-blankline
+local endequals = spacechar^0 * P"="^0 * spacechar^0 * newline
+local cellsep = spacechar^0 * P"|"
+
+local function trim(s)
+ return (s:gsub("^%s*(.-)%s*$", "%1"))
+end
+
+local function ListItem(lev, ch)
+ local start
+ if ch == nil then
+ start = S"*#"
+ else
+ start = P(ch)
+ end
+ local subitem = function(c)
+ if lev < 6 then
+ return ListItem(lev + 1, c)
+ else
+ return (1 - 1) -- fails
+ end
+ end
+ local parser = spacechar^0
+ * start^lev
+ * #(- start)
+ * spacechar^0
+ * Ct((V"Inline" - (newline * spacechar^0 * S"*#"))^0)
+ * newline
+ * (Ct(subitem("*")^1) / pandoc.BulletList
+ +
+ Ct(subitem("#")^1) / pandoc.OrderedList
+ +
+ Cc(nil))
+ / function (ils, sublist)
+ return { pandoc.Plain(ils), sublist }
+ end
+ return parser
+end
+
+local re = require're'
+x = re.compile[[
+ listname <- {| {:tag: '' -> 'list':} (name s)* |}
+ name <- {| {:tag: '' -> 'id':} {[a-z][a-z]*} |}
+ s <- ' '*
+]]
+
+-- Grammar
+G = P{ "Doc",
+ Doc = Ct(V"Block"^0)
+ / pandoc.Pandoc ;
+ Block = blankline^0
+ * ( V"Header"
+ + V"HorizontalRule"
+ + V"CodeBlock"
+ + V"List"
+ + V"Table"
+ + V"Para") ;
+ Para = Ct(V"Inline"^1)
+ * newline
+ / pandoc.Para ;
+ HorizontalRule = spacechar^0
+ * P"----"
+ * spacechar^0
+ * newline
+ / pandoc.HorizontalRule;
+ Header = (P("=")^1 / string.len)
+ * spacechar^1
+ * Ct((V"Inline" - endequals)^1)
+ * endequals
+ / pandoc.Header;
+ CodeBlock = P"{{{"
+ * blankline
+ * C((1 - (newline * P"}}}"))^0)
+ * newline
+ * P"}}}"
+ / pandoc.CodeBlock;
+ Placeholder = P"<<<"
+ * C(P(1) - P">>>")^0
+ * P">>>"
+ / function() return pandoc.Div({}) end;
+ List = V"BulletList"
+ + V"OrderedList" ;
+ BulletList = Ct(ListItem(1,'*')^1)
+ / pandoc.BulletList ;
+ OrderedList = Ct(ListItem(1,'#')^1)
+ / pandoc.OrderedList ;
+ Table = (V"TableHeader" + Cc{})
+ * Ct(V"TableRow"^1)
+ / function(headrow, bodyrows)
+ local numcolumns = #(bodyrows[1])
+ local aligns = {}
+ local widths = {}
+ for i = 1,numcolumns do
+ aligns[i] = pandoc.AlignDefault
+ widths[i] = 0
+ end
+ return pandoc.utils.from_simple_table(
+ pandoc.SimpleTable({}, aligns, widths, headrow, bodyrows))
+ end ;
+ TableHeader = Ct(V"HeaderCell"^1)
+ * cellsep^-1
+ * spacechar^0
+ * newline ;
+ TableRow = Ct(V"BodyCell"^1)
+ * cellsep^-1
+ * spacechar^0
+ * newline ;
+ HeaderCell = cellsep
+ * P"="
+ * spacechar^0
+ * Ct((V"Inline" - (newline + cellsep))^0)
+ / function(ils) return { pandoc.Plain(ils) } end ;
+ BodyCell = cellsep
+ * spacechar^0
+ * Ct((V"Inline" - (newline + cellsep))^0)
+ / function(ils) return { pandoc.Plain(ils) } end ;
+ Inline = V"Emph"
+ + V"Strong"
+ + V"LineBreak"
+ + V"Link"
+ + V"URL"
+ + V"Image"
+ + V"Str"
+ + V"Space"
+ + V"SoftBreak"
+ + V"Escaped"
+ + V"Placeholder"
+ + V"Code"
+ + V"Special" ;
+ Str = wordchar^1
+ / pandoc.Str;
+ Escaped = P"~"
+ * C(P(1))
+ / pandoc.Str ;
+ Special = specialchar
+ / pandoc.Str;
+ Space = spacechar^1
+ / pandoc.Space ;
+ SoftBreak = endline
+ * # -(V"HorizontalRule" + V"CodeBlock")
+ / pandoc.SoftBreak ;
+ LineBreak = P"\\\\"
+ / pandoc.LineBreak ;
+ Code = P"{{{"
+ * C((1 - P"}}}")^0)
+ * P"}}}"
+ / trim / pandoc.Code ;
+ Link = P"[["
+ * C((1 - (P"]]" + P"|"))^0)
+ * (P"|" * Ct((V"Inline" - P"]]")^1))^-1 * P"]]"
+ / function(url, desc)
+ local txt = desc or {pandoc.Str(url)}
+ return pandoc.Link(txt, url)
+ end ;
+ Image = P"{{"
+ * #-P"{"
+ * C((1 - (S"}"))^0)
+ * (P"|" * Ct((V"Inline" - P"}}")^1))^-1
+ * P"}}"
+ / function(url, desc)
+ local txt = desc or ""
+ return pandoc.Image(txt, url)
+ end ;
+ URL = P"http"
+ * P"s"^-1
+ * P":"
+ * (1 - (whitespacechar + (S",.?!:;\"'" * #whitespacechar)))^1
+ / function(url)
+ return pandoc.Link(pandoc.Str(url), url)
+ end ;
+ Emph = P"//"
+ * Ct((V"Inline" - P"//")^1)
+ * P"//"
+ / pandoc.Emph ;
+ Strong = P"**"
+ * Ct((V"Inline" -P"**")^1)
+ * P"**"
+ / pandoc.Strong ;
+}
+
+function Reader(input, reader_options)
+ return lpeg.match(G, input)
+end
diff --git a/data/reader.lua b/data/reader.lua
deleted file mode 100644
index e466e6ea1..000000000
--- a/data/reader.lua
+++ /dev/null
@@ -1,87 +0,0 @@
--- A sample custom reader for a very simple markup language.
--- This parses a document into paragraphs separated by blank lines.
--- This is /italic/ and this is *boldface* and this is `code`
--- and `code``with backtick` (doubled `` = ` inside backticks).
--- This is an escaped special character: \_, \*, \\
--- == text makes a level-2 heading
--- That's it!
-
--- For better performance we put these functions in local variables:
-local P, S, R, Cf, Cc, Ct, V, Cs, Cg, Cb, B, C, Cmt =
- lpeg.P, lpeg.S, lpeg.R, lpeg.Cf, lpeg.Cc, lpeg.Ct, lpeg.V,
- lpeg.Cs, lpeg.Cg, lpeg.Cb, lpeg.B, lpeg.C, lpeg.Cmt
-
---- if item is a table, concatenate it to acc;
--- otherwise insert it at the end.
-local function add_item(acc, item)
- if acc == nil then
- acc = {}
- end
- if type(item) == "table" then
- for i = 1,#item do
- add_item(acc, item[i])
- end
- else
- acc[#acc + 1] = item
- end
- return acc
-end
-
-local function Many1(parser)
- return Cf(Cc(nil) * parser^1 , add_item)
-end
-
-local function Many(parser)
- return (Many1(parser) + Cc{})
-end
-
-local whitespacechar = S(" \t\r\n")
-local specialchar = S("/*\\`")
-local wordchar = (1 - (whitespacechar + specialchar))
-local spacechar = S(" \t")
-local newline = P"\r"^-1 * P"\n"
-local blanklines = newline * (spacechar^0 * newline)^1
-local endline = newline - blanklines
-
-local function BetweenDelims(c, parser, constructor)
- local starter = P(c) * #(- whitespacechar)
- local ender = B(1 - whitespacechar) * P(c)
- return starter * Many(parser - ender) * C(ender^-1) /
- function(contents, ender)
- if ender == "" then -- fallback
- return { pandoc.Str(c) , contents }
- else
- return constructor(contents)
- end
- end
-end
-
-
--- Grammar
-G = P{ "Pandoc",
- Pandoc = Many(V"Block") / pandoc.Pandoc;
- Block = blanklines^0 * (V"Header" + V"Para") ;
- Para = Many1(V"Inline") * blanklines^-1 / pandoc.Para;
- Header = (P("=")^1 / string.len)
- * spacechar^1
- * Many(V"Inline" - (spacechar^0 * P("=")^0 * blanklines))
- * spacechar^0
- * P("=")^0
- * blanklines^-1 /
- function(lev, contents) return pandoc.Header(lev, contents) end;
- Inline = V"Emph" + V"Strong" + V"Str" + V"Space" + V"SoftBreak" +
- V"Code" + V"Escaped" + V"Special";
- Str = wordchar^1 / pandoc.Str;
- Escaped = "\\" * C(specialchar) / function(s) return pandoc.Str(s) end;
- Space = spacechar^1 / pandoc.Space;
- SoftBreak = endline / pandoc.SoftBreak;
- Emph = BetweenDelims("/", V"Inline", pandoc.Emph);
- Strong = BetweenDelims("*", V"Inline", pandoc.Strong);
- Code = P"`" * Ct(( (P"``" / "`") + (C(1) - S"`"))^1) * P"`"
- / table.concat / pandoc.Code;
- Special = S"`\\" / pandoc.Str;
-}
-
-function Reader(input, opts)
- return lpeg.match(G, input)
-end
diff --git a/pandoc.cabal b/pandoc.cabal
index a86cc71a3..764b459ba 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -174,6 +174,8 @@ data-files:
data/abbreviations
-- sample lua custom writer
data/sample.lua
+ -- sample lua custom reader
+ data/creole.lua
-- lua init script
data/init.lua
-- pandoc lua module