diff options
author | John MacFarlane <jgm@berkeley.edu> | 2021-11-07 14:33:18 -0800 |
---|---|---|
committer | John MacFarlane <jgm@berkeley.edu> | 2021-11-07 14:34:56 -0800 |
commit | 881b45209e8fd70079e409d05fa7402741501940 (patch) | |
tree | de3d9e6464223adca4706edb67172b2f323e4872 | |
parent | 213913f02554b5cec8ea946538d8ba445d4d3f4e (diff) | |
download | pandoc-881b45209e8fd70079e409d05fa7402741501940.tar.gz |
Replace old sample custom reader with a full-featured reader for creole.
This is better as an example. And it is faster than pandoc's
regular creole parser, which shows that high-performance readers
can be developed this way.
-rw-r--r-- | MANUAL.txt | 2 | ||||
-rw-r--r-- | data/creole.lua | 197 | ||||
-rw-r--r-- | data/reader.lua | 87 | ||||
-rw-r--r-- | pandoc.cabal | 2 |
4 files changed, 200 insertions, 88 deletions
diff --git a/MANUAL.txt b/MANUAL.txt index dc973f958..b1f75961b 100644 --- a/MANUAL.txt +++ b/MANUAL.txt @@ -6594,7 +6594,7 @@ of the functions that are available for creating pandoc AST elements. For parsing, the [lpeg] parsing library is available by default. To see a sample custom reader: - pandoc --print-default-data-file reader.lua + pandoc --print-default-data-file creole.lua If you want your custom reader to have access to reader options (e.g. the tab stop setting), you give your Reader function a diff --git a/data/creole.lua b/data/creole.lua new file mode 100644 index 000000000..ffde73638 --- /dev/null +++ b/data/creole.lua @@ -0,0 +1,197 @@ +-- A sample custom reader for Creole 1.0 (common wiki markup) +-- http://www.wikicreole.org/wiki/CheatSheet + +-- For better performance we put these functions in local variables: +local P, S, R, Cf, Cc, Ct, V, Cs, Cg, Cb, B, C, Cmt = + lpeg.P, lpeg.S, lpeg.R, lpeg.Cf, lpeg.Cc, lpeg.Ct, lpeg.V, + lpeg.Cs, lpeg.Cg, lpeg.Cb, lpeg.B, lpeg.C, lpeg.Cmt + +local whitespacechar = S(" \t\r\n") +local specialchar = S("/*~[]\\{}|") +local wordchar = (1 - (whitespacechar + specialchar)) +local spacechar = S(" \t") +local newline = P"\r"^-1 * P"\n" +local blankline = spacechar^0 * newline +local endline = newline * #-blankline +local endequals = spacechar^0 * P"="^0 * spacechar^0 * newline +local cellsep = spacechar^0 * P"|" + +local function trim(s) + return (s:gsub("^%s*(.-)%s*$", "%1")) +end + +local function ListItem(lev, ch) + local start + if ch == nil then + start = S"*#" + else + start = P(ch) + end + local subitem = function(c) + if lev < 6 then + return ListItem(lev + 1, c) + else + return (1 - 1) -- fails + end + end + local parser = spacechar^0 + * start^lev + * #(- start) + * spacechar^0 + * Ct((V"Inline" - (newline * spacechar^0 * S"*#"))^0) + * newline + * (Ct(subitem("*")^1) / pandoc.BulletList + + + Ct(subitem("#")^1) / pandoc.OrderedList + + + Cc(nil)) + / function (ils, sublist) + return { pandoc.Plain(ils), sublist } + end + return parser +end + +local re = require're' +x = re.compile[[ + listname <- {| {:tag: '' -> 'list':} (name s)* |} + name <- {| {:tag: '' -> 'id':} {[a-z][a-z]*} |} + s <- ' '* +]] + +-- Grammar +G = P{ "Doc", + Doc = Ct(V"Block"^0) + / pandoc.Pandoc ; + Block = blankline^0 + * ( V"Header" + + V"HorizontalRule" + + V"CodeBlock" + + V"List" + + V"Table" + + V"Para") ; + Para = Ct(V"Inline"^1) + * newline + / pandoc.Para ; + HorizontalRule = spacechar^0 + * P"----" + * spacechar^0 + * newline + / pandoc.HorizontalRule; + Header = (P("=")^1 / string.len) + * spacechar^1 + * Ct((V"Inline" - endequals)^1) + * endequals + / pandoc.Header; + CodeBlock = P"{{{" + * blankline + * C((1 - (newline * P"}}}"))^0) + * newline + * P"}}}" + / pandoc.CodeBlock; + Placeholder = P"<<<" + * C(P(1) - P">>>")^0 + * P">>>" + / function() return pandoc.Div({}) end; + List = V"BulletList" + + V"OrderedList" ; + BulletList = Ct(ListItem(1,'*')^1) + / pandoc.BulletList ; + OrderedList = Ct(ListItem(1,'#')^1) + / pandoc.OrderedList ; + Table = (V"TableHeader" + Cc{}) + * Ct(V"TableRow"^1) + / function(headrow, bodyrows) + local numcolumns = #(bodyrows[1]) + local aligns = {} + local widths = {} + for i = 1,numcolumns do + aligns[i] = pandoc.AlignDefault + widths[i] = 0 + end + return pandoc.utils.from_simple_table( + pandoc.SimpleTable({}, aligns, widths, headrow, bodyrows)) + end ; + TableHeader = Ct(V"HeaderCell"^1) + * cellsep^-1 + * spacechar^0 + * newline ; + TableRow = Ct(V"BodyCell"^1) + * cellsep^-1 + * spacechar^0 + * newline ; + HeaderCell = cellsep + * P"=" + * spacechar^0 + * Ct((V"Inline" - (newline + cellsep))^0) + / function(ils) return { pandoc.Plain(ils) } end ; + BodyCell = cellsep + * spacechar^0 + * Ct((V"Inline" - (newline + cellsep))^0) + / function(ils) return { pandoc.Plain(ils) } end ; + Inline = V"Emph" + + V"Strong" + + V"LineBreak" + + V"Link" + + V"URL" + + V"Image" + + V"Str" + + V"Space" + + V"SoftBreak" + + V"Escaped" + + V"Placeholder" + + V"Code" + + V"Special" ; + Str = wordchar^1 + / pandoc.Str; + Escaped = P"~" + * C(P(1)) + / pandoc.Str ; + Special = specialchar + / pandoc.Str; + Space = spacechar^1 + / pandoc.Space ; + SoftBreak = endline + * # -(V"HorizontalRule" + V"CodeBlock") + / pandoc.SoftBreak ; + LineBreak = P"\\\\" + / pandoc.LineBreak ; + Code = P"{{{" + * C((1 - P"}}}")^0) + * P"}}}" + / trim / pandoc.Code ; + Link = P"[[" + * C((1 - (P"]]" + P"|"))^0) + * (P"|" * Ct((V"Inline" - P"]]")^1))^-1 * P"]]" + / function(url, desc) + local txt = desc or {pandoc.Str(url)} + return pandoc.Link(txt, url) + end ; + Image = P"{{" + * #-P"{" + * C((1 - (S"}"))^0) + * (P"|" * Ct((V"Inline" - P"}}")^1))^-1 + * P"}}" + / function(url, desc) + local txt = desc or "" + return pandoc.Image(txt, url) + end ; + URL = P"http" + * P"s"^-1 + * P":" + * (1 - (whitespacechar + (S",.?!:;\"'" * #whitespacechar)))^1 + / function(url) + return pandoc.Link(pandoc.Str(url), url) + end ; + Emph = P"//" + * Ct((V"Inline" - P"//")^1) + * P"//" + / pandoc.Emph ; + Strong = P"**" + * Ct((V"Inline" -P"**")^1) + * P"**" + / pandoc.Strong ; +} + +function Reader(input, reader_options) + return lpeg.match(G, input) +end diff --git a/data/reader.lua b/data/reader.lua deleted file mode 100644 index e466e6ea1..000000000 --- a/data/reader.lua +++ /dev/null @@ -1,87 +0,0 @@ --- A sample custom reader for a very simple markup language. --- This parses a document into paragraphs separated by blank lines. --- This is /italic/ and this is *boldface* and this is `code` --- and `code``with backtick` (doubled `` = ` inside backticks). --- This is an escaped special character: \_, \*, \\ --- == text makes a level-2 heading --- That's it! - --- For better performance we put these functions in local variables: -local P, S, R, Cf, Cc, Ct, V, Cs, Cg, Cb, B, C, Cmt = - lpeg.P, lpeg.S, lpeg.R, lpeg.Cf, lpeg.Cc, lpeg.Ct, lpeg.V, - lpeg.Cs, lpeg.Cg, lpeg.Cb, lpeg.B, lpeg.C, lpeg.Cmt - ---- if item is a table, concatenate it to acc; --- otherwise insert it at the end. -local function add_item(acc, item) - if acc == nil then - acc = {} - end - if type(item) == "table" then - for i = 1,#item do - add_item(acc, item[i]) - end - else - acc[#acc + 1] = item - end - return acc -end - -local function Many1(parser) - return Cf(Cc(nil) * parser^1 , add_item) -end - -local function Many(parser) - return (Many1(parser) + Cc{}) -end - -local whitespacechar = S(" \t\r\n") -local specialchar = S("/*\\`") -local wordchar = (1 - (whitespacechar + specialchar)) -local spacechar = S(" \t") -local newline = P"\r"^-1 * P"\n" -local blanklines = newline * (spacechar^0 * newline)^1 -local endline = newline - blanklines - -local function BetweenDelims(c, parser, constructor) - local starter = P(c) * #(- whitespacechar) - local ender = B(1 - whitespacechar) * P(c) - return starter * Many(parser - ender) * C(ender^-1) / - function(contents, ender) - if ender == "" then -- fallback - return { pandoc.Str(c) , contents } - else - return constructor(contents) - end - end -end - - --- Grammar -G = P{ "Pandoc", - Pandoc = Many(V"Block") / pandoc.Pandoc; - Block = blanklines^0 * (V"Header" + V"Para") ; - Para = Many1(V"Inline") * blanklines^-1 / pandoc.Para; - Header = (P("=")^1 / string.len) - * spacechar^1 - * Many(V"Inline" - (spacechar^0 * P("=")^0 * blanklines)) - * spacechar^0 - * P("=")^0 - * blanklines^-1 / - function(lev, contents) return pandoc.Header(lev, contents) end; - Inline = V"Emph" + V"Strong" + V"Str" + V"Space" + V"SoftBreak" + - V"Code" + V"Escaped" + V"Special"; - Str = wordchar^1 / pandoc.Str; - Escaped = "\\" * C(specialchar) / function(s) return pandoc.Str(s) end; - Space = spacechar^1 / pandoc.Space; - SoftBreak = endline / pandoc.SoftBreak; - Emph = BetweenDelims("/", V"Inline", pandoc.Emph); - Strong = BetweenDelims("*", V"Inline", pandoc.Strong); - Code = P"`" * Ct(( (P"``" / "`") + (C(1) - S"`"))^1) * P"`" - / table.concat / pandoc.Code; - Special = S"`\\" / pandoc.Str; -} - -function Reader(input, opts) - return lpeg.match(G, input) -end diff --git a/pandoc.cabal b/pandoc.cabal index a86cc71a3..764b459ba 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -174,6 +174,8 @@ data-files: data/abbreviations -- sample lua custom writer data/sample.lua + -- sample lua custom reader + data/creole.lua -- lua init script data/init.lua -- pandoc lua module |