From 464abdc854b21b713e68335e07f2331a623acc97 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Sun, 21 Nov 2021 10:15:35 +0100 Subject: Rename doc/custom-reader.lua to doc/custom-readers.md --- doc/custom-readers.lua | 599 ------------------------------------------------- doc/custom-readers.md | 599 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 599 insertions(+), 599 deletions(-) delete mode 100644 doc/custom-readers.lua create mode 100644 doc/custom-readers.md diff --git a/doc/custom-readers.lua b/doc/custom-readers.lua deleted file mode 100644 index bef3ae050..000000000 --- a/doc/custom-readers.lua +++ /dev/null @@ -1,599 +0,0 @@ ---- -author: -- John MacFarlane -date: 'November 18, 2021' -title: Creating Custom Pandoc Readers in Lua ---- - -# Introduction - -If you need to parse a format not already handled by pandoc, -you can create a custom reader using the [Lua] language. -Pandoc has a built-in Lua interpreter, so you needn't -install any additional software to do this. - -[Lua]: https://www.lua.org - -A custom writer is a Lua file that defines a function -called `Reader`, which takes two arguments: - -- a string, the raw input to be parsed -- optionally, a table of reader options, e.g. - `{ columns = 62, standalone = true }`. - -The `Reader` function should return a `Pandoc` AST. -This can be created using functions in the [`pandoc` module], -which is automatically in scope. (Indeed, all of the utility -functions that are available for [Lua filters] are available -in custom readers, too.) - -[Lua filters]: https://pandoc.org/lua-filters.html -[`pandoc` module]: https://pandoc.org/lua-filters.html#module-pandoc - -A minimal example would be - -```lua -function Reader(input) - return pandoc.Pandoc({ pandoc.CodeBlock(input) }) -end -``` - -This just returns a document containing a big code block with -all of the input. - -In a nontrivial reader, you'll want to parse the input. -You can do this using standard Lua library functions -(for example, the [patterns] library), or with the powerful -and fast [lpeg] parsing library, which is automatically in scope. -You can also use external Lua libraries (for example, -an XML parser). - -[patterns]: http://lua-users.org/wiki/PatternsTutorial -[lpeg]: http://www.inf.puc-rio.br/~roberto/lpeg/ - -# Example: plain text reader - -This is a simple example using [lpeg] to parse the input -into space-separated strings and blankline-separated paragraphs. - -```lua --- A sample custom reader that just parses text into blankline-separated --- paragraphs with space-separated words. - --- For better performance we put these functions in local variables: -local P, S, R, Cf, Cc, Ct, V, Cs, Cg, Cb, B, C, Cmt = - lpeg.P, lpeg.S, lpeg.R, lpeg.Cf, lpeg.Cc, lpeg.Ct, lpeg.V, - lpeg.Cs, lpeg.Cg, lpeg.Cb, lpeg.B, lpeg.C, lpeg.Cmt - -local whitespacechar = S(" \t\r\n") -local wordchar = (1 - whitespacechar) -local spacechar = S(" \t") -local newline = P"\r"^-1 * P"\n" -local blanklines = newline * (spacechar^0 * newline)^1 -local endline = newline - blanklines - --- Grammar -G = P{ "Pandoc", - Pandoc = Ct(V"Block"^0) / pandoc.Pandoc; - Block = blanklines^0 * V"Para" ; - Para = Ct(V"Inline"^1) / pandoc.Para; - Inline = V"Str" + V"Space" + V"SoftBreak" ; - Str = wordchar^1 / pandoc.Str; - Space = spacechar^1 / pandoc.Space; - SoftBreak = endline / pandoc.SoftBreak; -} - -function Reader(input) - return lpeg.match(G, input) -end -``` - -Example of use: - -``` -% pandoc -f plain.lua -t native -*Hello there*, this is plain text with no formatting -except paragraph breaks. - -- Like this one. -^D -[ Para - [ Str "*Hello" - , Space - , Str "there*," - , Space - , Str "this" - , Space - , Str "is" - , Space - , Str "plain" - , Space - , Str "text" - , Space - , Str "with" - , Space - , Str "no" - , Space - , Str "formatting" - , SoftBreak - , Str "except" - , Space - , Str "paragraph" - , Space - , Str "breaks." - ] -, Para - [ Str "-" - , Space - , Str "Like" - , Space - , Str "this" - , Space - , Str "one." - ] -] -``` - -# Example: a RIS bibliography reader - -This is a parser for [RIS bibliography] files. It can be used -to convert them to CSL JSON or YAML, BibTeX, or BibLaTeX. - -[RIS bibliography]: https://en.wikipedia.org/wiki/RIS_(file_format) - -```lua --- A sample custom reader for RIS bibliography format --- https://en.wikipedia.org/wiki/RIS_(file_format) --- The references are converted to inline pandoc/CSL YAML --- references in the metadata. - -local inspect = require"inspect" - -local types = - { ABST = "article", - ADVS = "motion-picture", - AGGR = "dataset", - ANCIENT = "book", - ART = "graphic", - BILL = "bill", - BLOG = "post-weblog", - BOOK = "book", - CASE = "legal_case", - CHAP = "chapter", - CHART = "graphic", - CLSWK = "book", - COMP = "program", - CONF = "paper-conference", - CPAPER = "paper-conference", - CTLG = "catalog", - DATA = "dataset", - DBASE = "dataset", - DICT = "book", - EBOOK = "book", - ECHAP = "chapter", - EDBOOK = "book", - EJOUR = "article", - WEB = "webpage", - ENCYC = "entry-encyclopedia", - EQUA = "figure", - FIGURE = "figure", - GEN = "entry", - GOVDOC = "report", - GRANT = "report", - HEAR = "report", - ICOMM = "personal_communication", - INPR = "article-journal", - JFULL = "article-journal", - JOUR = "article-journal", - LEGAL = "legal_case", - MANSCPT = "manuscript", - MAP = "map", - MGZN = "article-magazine", - MPCT = "motion-picture", - MULTI = "webpage", - MUSIC = "musical_score", - NEWS = "article-newspaper", - PAMP = "pamphlet", - PAT = "patent", - PCOMM = "personal_communication", - RPRT = "report", - SER = "article", - SLIDE = "graphic", - SOUND = "musical_score", - STAND = "report", - STAT = "legislation", - THES = "thesis", - UNBILL = "bill", - UNPB = "unpublished", - VIDEO = "graphic" - } - -local function inlines(s) - local ils = {} - for t in string.gmatch(s, "%S+") do - if #ils == 0 then - ils = {pandoc.Str(t)} - else - table.insert(ils, pandoc.Space()) - table.insert(ils, pandoc.Str(t)) - end - end - return pandoc.MetaInlines(ils) -end - -local function clean(refpairs) - local ref = {} - for i = 1, #refpairs do - local k,v = table.unpack(refpairs[i]) - if k == "TY" then - ref["type"] = types[v] - elseif k == "VL" then - ref.volume = v - elseif k == "KW" then - ref.keyword = v - elseif k == "PB" then - ref.publisher = v - elseif k == "CY" or k == "PP" then - ref["publisher-place"] = v - elseif k == "SP" then - if ref.page then - ref.page = v .. ref.page - else - ref.page = v - end - elseif k == "EP" then - if ref.page then - ref.page = ref.page .. "-" .. v - else - ref.page = "-" .. v - end - elseif k == "AU" or k == "A1" or k == "A2" or k == "A3" then - if ref.author then - table.insert(ref.author, inlines(v)) - else - ref.author = {inlines(v)} - end - elseif k == "TI" or k == "T1" or k == "CT" or - (k == "BT" and ref.type == "book") then - ref.title = inlines(v) - elseif k == "ET" then - ref.edition = inlines(v) - elseif k == "NV" then - ref["number-of-volumes"] = inlines(v) - elseif k == "AB" then - ref.abstract = inlines(v) - elseif k == "ED" then - if ref.editor then - table.insert(ref.editor, inlines(v)) - else - ref.editor = {inlines(v)} - end - elseif k == "JO" or k == "JF" or k == "T2" or - (k == "BT" and ref.type ~= "book") then - ref["container-title"] = inlines(v) - elseif k == "PY" or k == "Y1" then - ref.issued = v - elseif k == "IS" then - ref.issue = v - elseif k == "SN" then - ref.ISSN = v - elseif k == "L" then - ref.lang = v - elseif k == "UR" or k == "LK" then - ref.URL = v - end - end - return ref -end - -function Reader(input, reader_options) - local refs = {} - local thisref = {} - local ids = {} - for line in string.gmatch(input, "[^\n]*") do - key, val = string.match(line, "([A-Z][A-Z0-9]) %- (.*)") - if key == "ER" then - -- clean up fields - local newref = clean(thisref) - -- ensure we have an id and if not, create a sensible one - if not newref.id then - newref.id = "" - for _,x in ipairs(newref.author) do - newref.id = newref.id .. string.match(pandoc.utils.stringify(x), "%a+") - end - if newref.issued then - newref.id = newref.id .. string.match(newref.issued, "%d+") - end - if ids[newref.id] then -- add disambiguator if needed - newref.id = newref.id .. "-" .. #ids - end - end - table.insert(ids, newref.id) - table.insert(refs, newref) - thisref = {} - elseif key then - table.insert(thisref, {key, val}) - end - end - return pandoc.Pandoc({}, pandoc.Meta { references = refs } ) -end -``` - -Example of use: - -``` -% pandoc -f ris.lua -t bibtex -TY - JOUR -AU - Shannon, Claude E. -PY - 1948 -DA - July -TI - A Mathematical Theory of Communication -T2 - Bell System Technical Journal -SP - 379 -EP - 423 -VL - 27 -ER - -TY - JOUR -T1 - On computable numbers, with an application to the Entscheidungsproblem -A1 - Turing, Alan Mathison -JO - Proc. of London Mathematical Society -VL - 47 -IS - 1 -SP - 230 -EP - 265 -Y1 - 1937 -ER - -^D -@article{Shannon1948, - author = {Shannon, Claude E.}, - title = {A {Mathematical} {Theory} of {Communication}}, - journal = {Bell System Technical Journal}, - volume = {27}, - pages = {379-423}, - year = {1948} -} -@article{Turing1937, - author = {Turing, Alan Mathison}, - title = {On Computable Numbers, with an Application to the - {Entscheidungsproblem}}, - journal = {Proc. of London Mathematical Society}, - volume = {47}, - number = {1}, - pages = {230-265}, - year = {1937} -} -``` - -# Example: a wiki Creole reader - -This is a parser for [Creole common wiki markup]. -It uses an [lpeg] grammar. Fun fact: this custom reader is faster than -pandoc's built-in creole reader! This shows that high-performance -readers can be designed in this way. - -[Creole common wiki markup]: http://www.wikicreole.org/wiki/CheatSheet - - -```lua --- A sample custom reader for Creole 1.0 (common wiki markup) --- http://www.wikicreole.org/wiki/CheatSheet - --- For better performance we put these functions in local variables: -local P, S, R, Cf, Cc, Ct, V, Cs, Cg, Cb, B, C, Cmt = - lpeg.P, lpeg.S, lpeg.R, lpeg.Cf, lpeg.Cc, lpeg.Ct, lpeg.V, - lpeg.Cs, lpeg.Cg, lpeg.Cb, lpeg.B, lpeg.C, lpeg.Cmt - -local whitespacechar = S(" \t\r\n") -local specialchar = S("/*~[]\\{}|") -local wordchar = (1 - (whitespacechar + specialchar)) -local spacechar = S(" \t") -local newline = P"\r"^-1 * P"\n" -local blankline = spacechar^0 * newline -local endline = newline * #-blankline -local endequals = spacechar^0 * P"="^0 * spacechar^0 * newline -local cellsep = spacechar^0 * P"|" - -local function trim(s) - return (s:gsub("^%s*(.-)%s*$", "%1")) -end - -local function ListItem(lev, ch) - local start - if ch == nil then - start = S"*#" - else - start = P(ch) - end - local subitem = function(c) - if lev < 6 then - return ListItem(lev + 1, c) - else - return (1 - 1) -- fails - end - end - local parser = spacechar^0 - * start^lev - * #(- start) - * spacechar^0 - * Ct((V"Inline" - (newline * spacechar^0 * S"*#"))^0) - * newline - * (Ct(subitem("*")^1) / pandoc.BulletList - + - Ct(subitem("#")^1) / pandoc.OrderedList - + - Cc(nil)) - / function (ils, sublist) - return { pandoc.Plain(ils), sublist } - end - return parser -end - --- Grammar -G = P{ "Doc", - Doc = Ct(V"Block"^0) - / pandoc.Pandoc ; - Block = blankline^0 - * ( V"Header" - + V"HorizontalRule" - + V"CodeBlock" - + V"List" - + V"Table" - + V"Para") ; - Para = Ct(V"Inline"^1) - * newline - / pandoc.Para ; - HorizontalRule = spacechar^0 - * P"----" - * spacechar^0 - * newline - / pandoc.HorizontalRule; - Header = (P("=")^1 / string.len) - * spacechar^1 - * Ct((V"Inline" - endequals)^1) - * endequals - / pandoc.Header; - CodeBlock = P"{{{" - * blankline - * C((1 - (newline * P"}}}"))^0) - * newline - * P"}}}" - / pandoc.CodeBlock; - Placeholder = P"<<<" - * C(P(1) - P">>>")^0 - * P">>>" - / function() return pandoc.Div({}) end; - List = V"BulletList" - + V"OrderedList" ; - BulletList = Ct(ListItem(1,'*')^1) - / pandoc.BulletList ; - OrderedList = Ct(ListItem(1,'#')^1) - / pandoc.OrderedList ; - Table = (V"TableHeader" + Cc{}) - * Ct(V"TableRow"^1) - / function(headrow, bodyrows) - local numcolumns = #(bodyrows[1]) - local aligns = {} - local widths = {} - for i = 1,numcolumns do - aligns[i] = pandoc.AlignDefault - widths[i] = 0 - end - return pandoc.utils.from_simple_table( - pandoc.SimpleTable({}, aligns, widths, headrow, bodyrows)) - end ; - TableHeader = Ct(V"HeaderCell"^1) - * cellsep^-1 - * spacechar^0 - * newline ; - TableRow = Ct(V"BodyCell"^1) - * cellsep^-1 - * spacechar^0 - * newline ; - HeaderCell = cellsep - * P"=" - * spacechar^0 - * Ct((V"Inline" - (newline + cellsep))^0) - / function(ils) return { pandoc.Plain(ils) } end ; - BodyCell = cellsep - * spacechar^0 - * Ct((V"Inline" - (newline + cellsep))^0) - / function(ils) return { pandoc.Plain(ils) } end ; - Inline = V"Emph" - + V"Strong" - + V"LineBreak" - + V"Link" - + V"URL" - + V"Image" - + V"Str" - + V"Space" - + V"SoftBreak" - + V"Escaped" - + V"Placeholder" - + V"Code" - + V"Special" ; - Str = wordchar^1 - / pandoc.Str; - Escaped = P"~" - * C(P(1)) - / pandoc.Str ; - Special = specialchar - / pandoc.Str; - Space = spacechar^1 - / pandoc.Space ; - SoftBreak = endline - * # -(V"HorizontalRule" + V"CodeBlock") - / pandoc.SoftBreak ; - LineBreak = P"\\\\" - / pandoc.LineBreak ; - Code = P"{{{" - * C((1 - P"}}}")^0) - * P"}}}" - / trim / pandoc.Code ; - Link = P"[[" - * C((1 - (P"]]" + P"|"))^0) - * (P"|" * Ct((V"Inline" - P"]]")^1))^-1 * P"]]" - / function(url, desc) - local txt = desc or {pandoc.Str(url)} - return pandoc.Link(txt, url) - end ; - Image = P"{{" - * #-P"{" - * C((1 - (S"}"))^0) - * (P"|" * Ct((V"Inline" - P"}}")^1))^-1 - * P"}}" - / function(url, desc) - local txt = desc or "" - return pandoc.Image(txt, url) - end ; - URL = P"http" - * P"s"^-1 - * P":" - * (1 - (whitespacechar + (S",.?!:;\"'" * #whitespacechar)))^1 - / function(url) - return pandoc.Link(pandoc.Str(url), url) - end ; - Emph = P"//" - * Ct((V"Inline" - P"//")^1) - * P"//" - / pandoc.Emph ; - Strong = P"**" - * Ct((V"Inline" -P"**")^1) - * P"**" - / pandoc.Strong ; -} - -function Reader(input, reader_options) - return lpeg.match(G, input) -end -``` - -Example of use: - -``` -% pandoc -f creole.lua -t markdown -== Wiki Creole - -You can make things **bold** or //italic// or **//both//** or //**both**//. - -Character formatting extends across line breaks: **bold, -this is still bold. This line deliberately does not end in star-star. - -Not bold. Character formatting does not cross paragraph boundaries. - -You can use [[internal links]] or [[http://www.wikicreole.org|external links]], -give the link a [[internal links|different]] name. -^D -## Wiki Creole - -You can make things **bold** or *italic* or ***both*** or ***both***. - -Character formatting extends across line breaks: \*\*bold, this is still -bold. This line deliberately does not end in star-star. - -Not bold. Character formatting does not cross paragraph boundaries. - -You can use [internal links](internal links) or [external -links](http://www.wikicreole.org), give the link a -[different](internal links) name. -``` - diff --git a/doc/custom-readers.md b/doc/custom-readers.md new file mode 100644 index 000000000..bef3ae050 --- /dev/null +++ b/doc/custom-readers.md @@ -0,0 +1,599 @@ +--- +author: +- John MacFarlane +date: 'November 18, 2021' +title: Creating Custom Pandoc Readers in Lua +--- + +# Introduction + +If you need to parse a format not already handled by pandoc, +you can create a custom reader using the [Lua] language. +Pandoc has a built-in Lua interpreter, so you needn't +install any additional software to do this. + +[Lua]: https://www.lua.org + +A custom writer is a Lua file that defines a function +called `Reader`, which takes two arguments: + +- a string, the raw input to be parsed +- optionally, a table of reader options, e.g. + `{ columns = 62, standalone = true }`. + +The `Reader` function should return a `Pandoc` AST. +This can be created using functions in the [`pandoc` module], +which is automatically in scope. (Indeed, all of the utility +functions that are available for [Lua filters] are available +in custom readers, too.) + +[Lua filters]: https://pandoc.org/lua-filters.html +[`pandoc` module]: https://pandoc.org/lua-filters.html#module-pandoc + +A minimal example would be + +```lua +function Reader(input) + return pandoc.Pandoc({ pandoc.CodeBlock(input) }) +end +``` + +This just returns a document containing a big code block with +all of the input. + +In a nontrivial reader, you'll want to parse the input. +You can do this using standard Lua library functions +(for example, the [patterns] library), or with the powerful +and fast [lpeg] parsing library, which is automatically in scope. +You can also use external Lua libraries (for example, +an XML parser). + +[patterns]: http://lua-users.org/wiki/PatternsTutorial +[lpeg]: http://www.inf.puc-rio.br/~roberto/lpeg/ + +# Example: plain text reader + +This is a simple example using [lpeg] to parse the input +into space-separated strings and blankline-separated paragraphs. + +```lua +-- A sample custom reader that just parses text into blankline-separated +-- paragraphs with space-separated words. + +-- For better performance we put these functions in local variables: +local P, S, R, Cf, Cc, Ct, V, Cs, Cg, Cb, B, C, Cmt = + lpeg.P, lpeg.S, lpeg.R, lpeg.Cf, lpeg.Cc, lpeg.Ct, lpeg.V, + lpeg.Cs, lpeg.Cg, lpeg.Cb, lpeg.B, lpeg.C, lpeg.Cmt + +local whitespacechar = S(" \t\r\n") +local wordchar = (1 - whitespacechar) +local spacechar = S(" \t") +local newline = P"\r"^-1 * P"\n" +local blanklines = newline * (spacechar^0 * newline)^1 +local endline = newline - blanklines + +-- Grammar +G = P{ "Pandoc", + Pandoc = Ct(V"Block"^0) / pandoc.Pandoc; + Block = blanklines^0 * V"Para" ; + Para = Ct(V"Inline"^1) / pandoc.Para; + Inline = V"Str" + V"Space" + V"SoftBreak" ; + Str = wordchar^1 / pandoc.Str; + Space = spacechar^1 / pandoc.Space; + SoftBreak = endline / pandoc.SoftBreak; +} + +function Reader(input) + return lpeg.match(G, input) +end +``` + +Example of use: + +``` +% pandoc -f plain.lua -t native +*Hello there*, this is plain text with no formatting +except paragraph breaks. + +- Like this one. +^D +[ Para + [ Str "*Hello" + , Space + , Str "there*," + , Space + , Str "this" + , Space + , Str "is" + , Space + , Str "plain" + , Space + , Str "text" + , Space + , Str "with" + , Space + , Str "no" + , Space + , Str "formatting" + , SoftBreak + , Str "except" + , Space + , Str "paragraph" + , Space + , Str "breaks." + ] +, Para + [ Str "-" + , Space + , Str "Like" + , Space + , Str "this" + , Space + , Str "one." + ] +] +``` + +# Example: a RIS bibliography reader + +This is a parser for [RIS bibliography] files. It can be used +to convert them to CSL JSON or YAML, BibTeX, or BibLaTeX. + +[RIS bibliography]: https://en.wikipedia.org/wiki/RIS_(file_format) + +```lua +-- A sample custom reader for RIS bibliography format +-- https://en.wikipedia.org/wiki/RIS_(file_format) +-- The references are converted to inline pandoc/CSL YAML +-- references in the metadata. + +local inspect = require"inspect" + +local types = + { ABST = "article", + ADVS = "motion-picture", + AGGR = "dataset", + ANCIENT = "book", + ART = "graphic", + BILL = "bill", + BLOG = "post-weblog", + BOOK = "book", + CASE = "legal_case", + CHAP = "chapter", + CHART = "graphic", + CLSWK = "book", + COMP = "program", + CONF = "paper-conference", + CPAPER = "paper-conference", + CTLG = "catalog", + DATA = "dataset", + DBASE = "dataset", + DICT = "book", + EBOOK = "book", + ECHAP = "chapter", + EDBOOK = "book", + EJOUR = "article", + WEB = "webpage", + ENCYC = "entry-encyclopedia", + EQUA = "figure", + FIGURE = "figure", + GEN = "entry", + GOVDOC = "report", + GRANT = "report", + HEAR = "report", + ICOMM = "personal_communication", + INPR = "article-journal", + JFULL = "article-journal", + JOUR = "article-journal", + LEGAL = "legal_case", + MANSCPT = "manuscript", + MAP = "map", + MGZN = "article-magazine", + MPCT = "motion-picture", + MULTI = "webpage", + MUSIC = "musical_score", + NEWS = "article-newspaper", + PAMP = "pamphlet", + PAT = "patent", + PCOMM = "personal_communication", + RPRT = "report", + SER = "article", + SLIDE = "graphic", + SOUND = "musical_score", + STAND = "report", + STAT = "legislation", + THES = "thesis", + UNBILL = "bill", + UNPB = "unpublished", + VIDEO = "graphic" + } + +local function inlines(s) + local ils = {} + for t in string.gmatch(s, "%S+") do + if #ils == 0 then + ils = {pandoc.Str(t)} + else + table.insert(ils, pandoc.Space()) + table.insert(ils, pandoc.Str(t)) + end + end + return pandoc.MetaInlines(ils) +end + +local function clean(refpairs) + local ref = {} + for i = 1, #refpairs do + local k,v = table.unpack(refpairs[i]) + if k == "TY" then + ref["type"] = types[v] + elseif k == "VL" then + ref.volume = v + elseif k == "KW" then + ref.keyword = v + elseif k == "PB" then + ref.publisher = v + elseif k == "CY" or k == "PP" then + ref["publisher-place"] = v + elseif k == "SP" then + if ref.page then + ref.page = v .. ref.page + else + ref.page = v + end + elseif k == "EP" then + if ref.page then + ref.page = ref.page .. "-" .. v + else + ref.page = "-" .. v + end + elseif k == "AU" or k == "A1" or k == "A2" or k == "A3" then + if ref.author then + table.insert(ref.author, inlines(v)) + else + ref.author = {inlines(v)} + end + elseif k == "TI" or k == "T1" or k == "CT" or + (k == "BT" and ref.type == "book") then + ref.title = inlines(v) + elseif k == "ET" then + ref.edition = inlines(v) + elseif k == "NV" then + ref["number-of-volumes"] = inlines(v) + elseif k == "AB" then + ref.abstract = inlines(v) + elseif k == "ED" then + if ref.editor then + table.insert(ref.editor, inlines(v)) + else + ref.editor = {inlines(v)} + end + elseif k == "JO" or k == "JF" or k == "T2" or + (k == "BT" and ref.type ~= "book") then + ref["container-title"] = inlines(v) + elseif k == "PY" or k == "Y1" then + ref.issued = v + elseif k == "IS" then + ref.issue = v + elseif k == "SN" then + ref.ISSN = v + elseif k == "L" then + ref.lang = v + elseif k == "UR" or k == "LK" then + ref.URL = v + end + end + return ref +end + +function Reader(input, reader_options) + local refs = {} + local thisref = {} + local ids = {} + for line in string.gmatch(input, "[^\n]*") do + key, val = string.match(line, "([A-Z][A-Z0-9]) %- (.*)") + if key == "ER" then + -- clean up fields + local newref = clean(thisref) + -- ensure we have an id and if not, create a sensible one + if not newref.id then + newref.id = "" + for _,x in ipairs(newref.author) do + newref.id = newref.id .. string.match(pandoc.utils.stringify(x), "%a+") + end + if newref.issued then + newref.id = newref.id .. string.match(newref.issued, "%d+") + end + if ids[newref.id] then -- add disambiguator if needed + newref.id = newref.id .. "-" .. #ids + end + end + table.insert(ids, newref.id) + table.insert(refs, newref) + thisref = {} + elseif key then + table.insert(thisref, {key, val}) + end + end + return pandoc.Pandoc({}, pandoc.Meta { references = refs } ) +end +``` + +Example of use: + +``` +% pandoc -f ris.lua -t bibtex +TY - JOUR +AU - Shannon, Claude E. +PY - 1948 +DA - July +TI - A Mathematical Theory of Communication +T2 - Bell System Technical Journal +SP - 379 +EP - 423 +VL - 27 +ER - +TY - JOUR +T1 - On computable numbers, with an application to the Entscheidungsproblem +A1 - Turing, Alan Mathison +JO - Proc. of London Mathematical Society +VL - 47 +IS - 1 +SP - 230 +EP - 265 +Y1 - 1937 +ER - +^D +@article{Shannon1948, + author = {Shannon, Claude E.}, + title = {A {Mathematical} {Theory} of {Communication}}, + journal = {Bell System Technical Journal}, + volume = {27}, + pages = {379-423}, + year = {1948} +} +@article{Turing1937, + author = {Turing, Alan Mathison}, + title = {On Computable Numbers, with an Application to the + {Entscheidungsproblem}}, + journal = {Proc. of London Mathematical Society}, + volume = {47}, + number = {1}, + pages = {230-265}, + year = {1937} +} +``` + +# Example: a wiki Creole reader + +This is a parser for [Creole common wiki markup]. +It uses an [lpeg] grammar. Fun fact: this custom reader is faster than +pandoc's built-in creole reader! This shows that high-performance +readers can be designed in this way. + +[Creole common wiki markup]: http://www.wikicreole.org/wiki/CheatSheet + + +```lua +-- A sample custom reader for Creole 1.0 (common wiki markup) +-- http://www.wikicreole.org/wiki/CheatSheet + +-- For better performance we put these functions in local variables: +local P, S, R, Cf, Cc, Ct, V, Cs, Cg, Cb, B, C, Cmt = + lpeg.P, lpeg.S, lpeg.R, lpeg.Cf, lpeg.Cc, lpeg.Ct, lpeg.V, + lpeg.Cs, lpeg.Cg, lpeg.Cb, lpeg.B, lpeg.C, lpeg.Cmt + +local whitespacechar = S(" \t\r\n") +local specialchar = S("/*~[]\\{}|") +local wordchar = (1 - (whitespacechar + specialchar)) +local spacechar = S(" \t") +local newline = P"\r"^-1 * P"\n" +local blankline = spacechar^0 * newline +local endline = newline * #-blankline +local endequals = spacechar^0 * P"="^0 * spacechar^0 * newline +local cellsep = spacechar^0 * P"|" + +local function trim(s) + return (s:gsub("^%s*(.-)%s*$", "%1")) +end + +local function ListItem(lev, ch) + local start + if ch == nil then + start = S"*#" + else + start = P(ch) + end + local subitem = function(c) + if lev < 6 then + return ListItem(lev + 1, c) + else + return (1 - 1) -- fails + end + end + local parser = spacechar^0 + * start^lev + * #(- start) + * spacechar^0 + * Ct((V"Inline" - (newline * spacechar^0 * S"*#"))^0) + * newline + * (Ct(subitem("*")^1) / pandoc.BulletList + + + Ct(subitem("#")^1) / pandoc.OrderedList + + + Cc(nil)) + / function (ils, sublist) + return { pandoc.Plain(ils), sublist } + end + return parser +end + +-- Grammar +G = P{ "Doc", + Doc = Ct(V"Block"^0) + / pandoc.Pandoc ; + Block = blankline^0 + * ( V"Header" + + V"HorizontalRule" + + V"CodeBlock" + + V"List" + + V"Table" + + V"Para") ; + Para = Ct(V"Inline"^1) + * newline + / pandoc.Para ; + HorizontalRule = spacechar^0 + * P"----" + * spacechar^0 + * newline + / pandoc.HorizontalRule; + Header = (P("=")^1 / string.len) + * spacechar^1 + * Ct((V"Inline" - endequals)^1) + * endequals + / pandoc.Header; + CodeBlock = P"{{{" + * blankline + * C((1 - (newline * P"}}}"))^0) + * newline + * P"}}}" + / pandoc.CodeBlock; + Placeholder = P"<<<" + * C(P(1) - P">>>")^0 + * P">>>" + / function() return pandoc.Div({}) end; + List = V"BulletList" + + V"OrderedList" ; + BulletList = Ct(ListItem(1,'*')^1) + / pandoc.BulletList ; + OrderedList = Ct(ListItem(1,'#')^1) + / pandoc.OrderedList ; + Table = (V"TableHeader" + Cc{}) + * Ct(V"TableRow"^1) + / function(headrow, bodyrows) + local numcolumns = #(bodyrows[1]) + local aligns = {} + local widths = {} + for i = 1,numcolumns do + aligns[i] = pandoc.AlignDefault + widths[i] = 0 + end + return pandoc.utils.from_simple_table( + pandoc.SimpleTable({}, aligns, widths, headrow, bodyrows)) + end ; + TableHeader = Ct(V"HeaderCell"^1) + * cellsep^-1 + * spacechar^0 + * newline ; + TableRow = Ct(V"BodyCell"^1) + * cellsep^-1 + * spacechar^0 + * newline ; + HeaderCell = cellsep + * P"=" + * spacechar^0 + * Ct((V"Inline" - (newline + cellsep))^0) + / function(ils) return { pandoc.Plain(ils) } end ; + BodyCell = cellsep + * spacechar^0 + * Ct((V"Inline" - (newline + cellsep))^0) + / function(ils) return { pandoc.Plain(ils) } end ; + Inline = V"Emph" + + V"Strong" + + V"LineBreak" + + V"Link" + + V"URL" + + V"Image" + + V"Str" + + V"Space" + + V"SoftBreak" + + V"Escaped" + + V"Placeholder" + + V"Code" + + V"Special" ; + Str = wordchar^1 + / pandoc.Str; + Escaped = P"~" + * C(P(1)) + / pandoc.Str ; + Special = specialchar + / pandoc.Str; + Space = spacechar^1 + / pandoc.Space ; + SoftBreak = endline + * # -(V"HorizontalRule" + V"CodeBlock") + / pandoc.SoftBreak ; + LineBreak = P"\\\\" + / pandoc.LineBreak ; + Code = P"{{{" + * C((1 - P"}}}")^0) + * P"}}}" + / trim / pandoc.Code ; + Link = P"[[" + * C((1 - (P"]]" + P"|"))^0) + * (P"|" * Ct((V"Inline" - P"]]")^1))^-1 * P"]]" + / function(url, desc) + local txt = desc or {pandoc.Str(url)} + return pandoc.Link(txt, url) + end ; + Image = P"{{" + * #-P"{" + * C((1 - (S"}"))^0) + * (P"|" * Ct((V"Inline" - P"}}")^1))^-1 + * P"}}" + / function(url, desc) + local txt = desc or "" + return pandoc.Image(txt, url) + end ; + URL = P"http" + * P"s"^-1 + * P":" + * (1 - (whitespacechar + (S",.?!:;\"'" * #whitespacechar)))^1 + / function(url) + return pandoc.Link(pandoc.Str(url), url) + end ; + Emph = P"//" + * Ct((V"Inline" - P"//")^1) + * P"//" + / pandoc.Emph ; + Strong = P"**" + * Ct((V"Inline" -P"**")^1) + * P"**" + / pandoc.Strong ; +} + +function Reader(input, reader_options) + return lpeg.match(G, input) +end +``` + +Example of use: + +``` +% pandoc -f creole.lua -t markdown +== Wiki Creole + +You can make things **bold** or //italic// or **//both//** or //**both**//. + +Character formatting extends across line breaks: **bold, +this is still bold. This line deliberately does not end in star-star. + +Not bold. Character formatting does not cross paragraph boundaries. + +You can use [[internal links]] or [[http://www.wikicreole.org|external links]], +give the link a [[internal links|different]] name. +^D +## Wiki Creole + +You can make things **bold** or *italic* or ***both*** or ***both***. + +Character formatting extends across line breaks: \*\*bold, this is still +bold. This line deliberately does not end in star-star. + +Not bold. Character formatting does not cross paragraph boundaries. + +You can use [internal links](internal links) or [external +links](http://www.wikicreole.org), give the link a +[different](internal links) name. +``` + -- cgit v1.2.3