diff options
82 files changed, 1380 insertions, 605 deletions
diff --git a/.travis.yml b/.travis.yml index 7461426d1..836586808 100644 --- a/.travis.yml +++ b/.travis.yml @@ -122,7 +122,7 @@ install: cabal install -v1 --disable-optimization --flags="$FLAGS" --enable-tests --force-reinstalls --reorder-goals --max-backjumps=-1 $CABALARGS ;; *) - cabal install --disable-optimization --only-dependencies --flags="$FLAGS" --enable-tests --force-reinstalls --reorder-goals --max-backjumps=-1 $CABALARGS + cabal install --disable-optimization --only-dependencies --flags="$FLAGS" --enable-tests --force-reinstalls --reorder-goals --max-backjumps=-1 $CABALARGS . ;; esac ;; diff --git a/INSTALL.md b/INSTALL.md index 32e71cc3c..aff0b103f 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -39,7 +39,8 @@ - For PDF output, you'll also need LaTeX. Because a full [MacTeX] installation takes more than a gigabyte of disk space, we recommend installing [BasicTeX](http://www.tug.org/mactex/morepackages.html) - (64M) and using the `tlmgr` tool to install additional packages + (64M) or [TinyTeX](https://yihui.name/tinytex/) + and using the `tlmgr` tool to install additional packages as needed. If you get errors warning of fonts not found, try tlmgr install collection-fontsrecommended @@ -92,6 +93,19 @@ [TeX Live](http://www.tug.org/texlive/) via your package manager. (On Debian/Ubuntu, `apt-get install texlive`.) +## Chrome OS + +On Chrome OS, pandoc can be installed using the +[chromebrew](https://github.com/skycocker/chromebrew) package manager +with the command: + +```sh +crew install pandoc +``` + +This will automatically build and configure pandoc for the specific +device you are using. + ## BSD - Pandoc is in the [NetBSD] and [FreeBSD ports] repositories. @@ -14,10 +14,16 @@ quick: stack install --resolver=$(RESOLVER) --ghc-options='$(GHCOPTS)' --install-ghc --flag 'pandoc:embed_data_files' --fast --test --test-arguments='-j4 --hide-successes $(TESTARGS)' quick-cabal: - cabal new-build . --ghc-options '$(GHCOPTS)' --flags '+embed_data_files' --enable-tests --only-dependencies - cabal new-build . --ghc-options '$(GHCOPTS)' --flags '+embed_data_files' --enable-tests --disable-optimization + cabal new-configure . --ghc-options '$(GHCOPTS)' --flags '+embed_data_files' --enable-tests + cabal new-build . --disable-optimization cabal new-install --symlink-bindir=$$HOME/.local/bin - cabal new-run test-pandoc --ghc-options '$(GHCOPTS)' --flags '+embed_data_files' --disable-optimization -- --hide-successes $(TESTARGS) + cabal new-run test-pandoc --disable-optimization -- --hide-successes $(TESTARGS) + +full-cabal: + cabal new-configure . --ghc-options '$(GHCOPTS)' --flags '+embed_data_files +weigh-pandoc +trypandoc' --enable-tests --enable-benchmarks + cabal new-build . --disable-optimization + cabal new-install --symlink-bindir=$$HOME/.local/bin + cabal new-run test-pandoc --disable-optimization -- --hide-successes $(TESTARGS) full: stack install --resolver=$(RESOLVER) --flag 'pandoc:embed_data_files' --flag 'pandoc:weigh-pandoc' --flag 'pandoc:trypandoc' --bench --no-run-benchmarks --test --test-arguments='-j4 --hide-successes' --ghc-options '-Wall -Werror -fno-warn-unused-do-bind -O0 -j4 $(GHCOPTS)' @@ -85,11 +91,11 @@ pandoc-$(version)-windows-%.msi: pandoc-windows-%.msi .INTERMEDIATE: pandoc-windows-i386.msi pandoc-windows-x86_64.msi pandoc-windows-i386.msi: - JOBID=$(shell curl 'https://ci.appveyor.com/api/projects/jgm/pandoc' | jq -r '.build.jobs[1].jobId') && \ + JOBID=$(shell curl https://ci.appveyor.com/api/projects/jgm/pandoc | jq '.build.jobs[]| select(.name|test("i386")) | .jobId') && \ wget "https://ci.appveyor.com/api/buildjobs/$$JOBID/artifacts/windows%2F$@" -O $@ pandoc-windows-x86_64.msi: - JOBID=$(shell curl 'https://ci.appveyor.com/api/projects/jgm/pandoc' | jq -r '.build.jobs[0].jobId') && \ + JOBID=$(shell curl https://ci.appveyor.com/api/projects/jgm/pandoc | jq '.build.jobs[]| select(.name|test("x86_64")) | .jobId') && \ wget "https://ci.appveyor.com/api/buildjobs/$$JOBID/artifacts/windows%2F$@" -O $@ man/pandoc.1: MANUAL.txt man/pandoc.1.template diff --git a/benchmark/benchmark-pandoc.hs b/benchmark/benchmark-pandoc.hs index db6c2eb9c..3ed7011e4 100644 --- a/benchmark/benchmark-pandoc.hs +++ b/benchmark/benchmark-pandoc.hs @@ -19,6 +19,8 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -} import Prelude import Text.Pandoc +import Text.Pandoc.Error (PandocError(..)) +import Control.Monad.Except (throwError) import qualified Text.Pandoc.UTF8 as UTF8 import qualified Data.ByteString as B import Criterion.Main @@ -37,14 +39,15 @@ readerBench doc name = $ nf (\i -> either (error . show) id $ runPure (readerFun i)) inp Left _ -> Nothing - where res = runPure $ do - (TextReader r, rexts) - <- either (fail . show) return $ getReader name - (TextWriter w, wexts) - <- either (fail . show) return $ getWriter name - inp <- w def{ writerWrapText = WrapAuto, writerExtensions = wexts } - doc - return (r def{ readerExtensions = rexts }, inp) + where res = runPure $ + case (getReader name, getWriter name) of + (Right (TextReader r, rexts), + Right (TextWriter w, wexts)) -> do + inp <- w def{ writerWrapText = WrapAuto + , writerExtensions = wexts } doc + return $ (r def{ readerExtensions = rexts }, inp) + _ -> throwError $ PandocSomeError + $ "could not get text reader and writer for " ++ name writerBench :: Pandoc -> String @@ -55,11 +58,13 @@ writerBench doc name = Just $ bench (name ++ " writer") $ nf (\d -> either (error . show) id $ runPure (writerFun d)) doc - _ -> Nothing + Left _ -> Nothing where res = runPure $ do - (TextWriter w, wexts) - <- either (fail . show) return $ getWriter name - return $ w def{ writerExtensions = wexts } + case (getWriter name) of + Right (TextWriter w, wexts) -> + return $ w def{ writerExtensions = wexts } + _ -> throwError $ PandocSomeError + $ "could not get text reader and writer for " ++ name main :: IO () main = do diff --git a/data/docx/[Content_Types].xml b/data/docx/[Content_Types].xml index 1e888dff9..0c0118a88 100644 --- a/data/docx/[Content_Types].xml +++ b/data/docx/[Content_Types].xml @@ -1,2 +1,2 @@ <?xml version="1.0" encoding="UTF-8"?> -<Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types"><Default Extension="xml" ContentType="application/xml" /><Default Extension="rels" ContentType="application/vnd.openxmlformats-package.relationships+xml" /><Override PartName="/word/webSettings.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.webSettings+xml" /><Override PartName="/word/numbering.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.numbering+xml" /><Override PartName="/word/settings.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.settings+xml" /><Override PartName="/word/theme/theme1.xml" ContentType="application/vnd.openxmlformats-officedocument.theme+xml" /><Override PartName="/word/fontTable.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.fontTable+xml" /><Override PartName="/docProps/app.xml" ContentType="application/vnd.openxmlformats-officedocument.extended-properties+xml" /><Override PartName="/docProps/core.xml" ContentType="application/vnd.openxmlformats-package.core-properties+xml" /><Override PartName="/word/styles.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.styles+xml" /><Override PartName="/word/document.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml" /><Override PartName="/word/comments.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.comments+xml" /><Override PartName="/word/footnotes.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.footnotes+xml" /></Types> +<Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types"><Default Extension="xml" ContentType="application/xml" /><Default Extension="rels" ContentType="application/vnd.openxmlformats-package.relationships+xml" /><Override PartName="/word/webSettings.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.webSettings+xml" /><Override PartName="/word/numbering.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.numbering+xml" /><Override PartName="/word/settings.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.settings+xml" /><Override PartName="/word/theme/theme1.xml" ContentType="application/vnd.openxmlformats-officedocument.theme+xml" /><Override PartName="/word/fontTable.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.fontTable+xml" /><Override PartName="/docProps/app.xml" ContentType="application/vnd.openxmlformats-officedocument.extended-properties+xml" /><Override PartName="/docProps/core.xml" ContentType="application/vnd.openxmlformats-package.core-properties+xml" /><Override PartName="/docProps/custom.xml" ContentType="application/vnd.openxmlformats-officedocument.custom-properties+xml"/><Override PartName="/word/styles.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.styles+xml" /><Override PartName="/word/document.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml" /><Override PartName="/word/comments.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.comments+xml" /><Override PartName="/word/footnotes.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.footnotes+xml" /></Types> diff --git a/data/docx/_rels/.rels b/data/docx/_rels/.rels index 44e5daa3d..1121ed8d2 100644 --- a/data/docx/_rels/.rels +++ b/data/docx/_rels/.rels @@ -1,2 +1,3 @@ <?xml version="1.0" encoding="UTF-8"?> -<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships"><Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument" Target="word/document.xml" /><Relationship Id="rId4" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties" Target="docProps/app.xml" /><Relationship Id="rId3" Type="http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties" Target="docProps/core.xml" /></Relationships>
\ No newline at end of file +<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships"><Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument" Target="word/document.xml" /><Relationship Id="rId4" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties" Target="docProps/app.xml" /><Relationship Id="rId3" Type="http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties" Target="docProps/core.xml" /><Relationship Id="rId5" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/custom-properties" Target="docProps/custom.xml"/> +</Relationships> diff --git a/data/docx/docProps/custom.xml b/data/docx/docProps/custom.xml new file mode 100644 index 000000000..bdd0df8dd --- /dev/null +++ b/data/docx/docProps/custom.xml @@ -0,0 +1,2 @@ +<?xml version="1.0" encoding="UTF-8" standalone="yes"?> +<Properties xmlns="http://schemas.openxmlformats.org/officeDocument/2006/custom-properties" xmlns:vt="http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes"></Properties> diff --git a/data/pandoc.lua b/data/pandoc.lua index 54370bd1b..370c6a944 100644 --- a/data/pandoc.lua +++ b/data/pandoc.lua @@ -267,8 +267,16 @@ M.MetaInlines = M.MetaValue:create_constructor( -- @tparam {MetaValue,...} meta_values list of meta values M.MetaList = M.MetaValue:create_constructor( 'MetaList', - function (content) return ensureList(content) end + function (content) + if content.tag == 'MetaList' then + return content + end + return ensureList(content) + end ) +for k, v in pairs(List) do + M.MetaList.behavior[k] = v +end --- Meta map -- @function MetaMap @@ -403,7 +411,7 @@ M.Null = M.Block:create_constructor( M.OrderedList = M.Block:create_constructor( "OrderedList", function(items, listAttributes) - listAttributes = listAttributes or {1, M.DefaultStyle, M.DefaultDelim} + listAttributes = listAttributes or M.ListAttributes() return {c = {listAttributes, ensureList(items)}} end, {{listAttributes = {"start", "style", "delimiter"}}, "content"} @@ -857,6 +865,34 @@ function M.Citation:new (id, mode, prefix, suffix, note_num, hash) } end +-- ListAttributes +M.ListAttributes = AstElement:make_subtype 'ListAttributes' + +--- Creates a set of list attributes. +-- @function ListAttributes +-- @tparam[opt] integer start number of the first list item +-- @tparam[opt] string style style used for list numbering +-- @tparam[opt] DefaultDelim|Period|OneParen|TwoParens delimiter delimiter of list numbers +-- @treturn table list attributes table +function M.ListAttributes:new (start, style, delimiter) + start = start or 1 + style = style or 'DefaultStyle' + delimiter = delimiter or 'DefaultDelim' + return {start, style, delimiter} +end +M.ListAttributes.behavior._field_names = {start = 1, style = 2, delimiter = 3} +M.ListAttributes.behavior.__index = function (t, k) + return rawget(t, getmetatable(t)._field_names[k]) or + getmetatable(t)[k] +end +M.ListAttributes.behavior.__newindex = function (t, k, v) + if getmetatable(t)._field_names[k] then + rawset(t, getmetatable(t)._field_names[k], v) + else + rawset(t, k, v) + end +end + ------------------------------------------------------------------------ -- Constants diff --git a/data/sample.lua b/data/sample.lua index 6c09442b5..9d6bf0fc7 100644 --- a/data/sample.lua +++ b/data/sample.lua @@ -2,7 +2,8 @@ -- that is very similar to that of pandoc's HTML writer. -- There is one new feature: code blocks marked with class 'dot' -- are piped through graphviz and images are included in the HTML --- output using 'data:' URLs. +-- output using 'data:' URLs. The image format can be controlled +-- via the `image_format` metadata field. -- -- Invoke with: pandoc -t sample.lua -- @@ -12,6 +13,28 @@ -- produce informative error messages if your code contains -- syntax errors. +local pipe = pandoc.pipe +local stringify = (require "pandoc.utils").stringify + +-- The global variable PANDOC_DOCUMENT contains the full AST of +-- the document which is going to be written. It can be used to +-- configure the writer. +local meta = PANDOC_DOCUMENT.meta + +-- Chose the image format based on the value of the +-- `image_format` meta value. +local image_format = meta.image_format + and stringify(meta.image_format) + or "png" +local image_mime_type = ({ + jpeg = "image/jpeg", + jpg = "image/jpeg", + gif = "image/gif", + png = "image/png", + svg = "image/svg+xml", + })[image_format] + or error("unsupported image format `" .. img_format .. "`") + -- Character escaping local function escape(s, in_attribute) return s:gsub("[<>&\"']", @@ -44,19 +67,6 @@ local function attributes(attr) return table.concat(attr_table) end --- Run cmd on a temporary file containing inp and return result. -local function pipe(cmd, inp) - local tmp = os.tmpname() - local tmph = io.open(tmp, "w") - tmph:write(inp) - tmph:close() - local outh = io.popen(cmd .. " " .. tmp,"r") - local result = outh:read("*all") - outh:close() - os.remove(tmp) - return result -end - -- Table to store footnotes, so they can be included at the end. local notes = {} @@ -217,8 +227,8 @@ function CodeBlock(s, attr) -- If code block has class 'dot', pipe the contents through dot -- and base64, and include the base64-encoded png as a data: URL. if attr.class and string.match(' ' .. attr.class .. ' ',' dot ') then - local png = pipe("base64", pipe("dot -Tpng", s)) - return '<img src="data:image/png;base64,' .. png .. '"/>' + local img = pipe("base64", {}, pipe("dot", {"-T" .. image_format}, s)) + return '<img src="data:' .. image_mime_type .. ';base64,' .. img .. '"/>' -- otherwise treat as code (one could pipe through a highlighter) else return "<pre><code" .. attributes(attr) .. ">" .. escape(s) .. diff --git a/doc/customizing-pandoc.md b/doc/customizing-pandoc.md new file mode 100644 index 000000000..0fdfc8115 --- /dev/null +++ b/doc/customizing-pandoc.md @@ -0,0 +1,157 @@ +--- +author: +- Mauro Bieg +- John MacFarlane +title: Customizing Pandoc +--- + +This document provides a quick overview over the various ways to +customize pandoc's output, with links to fuller documentation +and some examples. + +## Templates + +When the `-s`/`--standalone` option is used, pandoc will +generate a standalone document rather than a fragment. +For example, in HTML output this will include the +`<head>` element; in LaTeX output, it will include the +preamble. + +Pandoc comes with a default template for (almost) every output +format. A template is a plain text file containing variables +that are replaced by text generated by pandoc. For example, +the variable `$body$` will be replaced by the document body, +and `$title$` by the title from metadata. Variables will +be automatically populated by the contents of like-named +metadata fields (with proper escaping). (See +[YAML metadata blocks](/MANUAL.html#extension-yaml_metadata_block) +for documentation on setting metafields in pandoc markdown +documents; the command line option +[`--metadata`](/MANUAL.html#option--metadata) can also be +used.) Values for variables can also be specified directly +from the command line using `--variable` (which does no escaping). + +To look at the default template for an output format, you can do +`pandoc -D FORMAT`, where `FORMAT` is replaced by the name of +the format. You can also replace the defaults with your +own custom templates, either by using the `--template` option +or by putting the custom template in your user data directory +(on linux and macOS, `~/.pandoc/templates/`). + +Note that in many cases you can avoid the need for a custom +template by making use of the `--include-in-header`, +`--include-before-body`, and `--include-after-body` options. + +For more information, see [Templates](/MANUAL.html#templates) in +the pandoc manual. + +### Example: adding structured author data to HTML + +TODO + +### Example: generating documents from YAML metadata + +TODO <!-- Example of generating a structured document, +say, a table, from structured YAML metadata using +just the control structures in pandoc's template +language. --> + +## Reference docx/pptx/odt + +For `docx`, `pptx` or `odt` documents, things are a bit more +complicated. Instead of a single template file, you need to +provide a customized `reference.docx/pptx/odt`. +See the manual for the +[`--reference-doc`](/MANUAL.html#option--reference-doc) option. + +### Example: changing the font and line spacing in a Word docx + +TODO + +## Filters + +Templates are very powerful, but they are only a sort of scaffold to +place your document's body text in. You cannot directly change the +body text using the template. + +If you need to affect the output of the actual body text, you +can use a pandoc filter. A filter is a small program that +transforms the document, between the parsing and the writing phase, +while it is still in pandoc's native format. For example, +a filter might find all the Header elements of a document +and capitalize their text. + +Pandoc's native representation of a document is an +abstract syntax tree (AST), not unlike the HTML DOM. It is +documented +[here](https://hackage.haskell.org/package/pandoc-types/docs/Text-Pandoc-Definition.html). A `Pandoc` document is a chunk of +metadata (`Meta`) and a list of `Block`s. The `Block`s, in +turn, are composed of other `Block`s and `Inline` elements. +(`Block` elements are things like paragraphs, lists, headers, +and code blocks. `Inline` elements are individual words, +links, emphasis, and so on.) Filters operate on these +elements. You can use `pandoc -t native` to learn about the +AST's structure. + +There are two kinds of filters: JSON filters (which transform a +JSON serialization of the pandoc AST, and may be written in any +language that can parse and emit JSON), and Lua filters (which +use an interface built directly into pandoc, and must be written +in the Lua language). If you are writing your own filters, it +is best to use Lua filters, which are more portable (they +require only pandoc itself) and more efficient. See [Lua +filters](lua-filters.html) for documentation and examples. If +you would prefer to write your filter in another language, see +[Filters](filters.html) for a gentle introduction to JSON +filters. + +There's a repository of lua filters at +[pandoc/lua-filters](https://github.com/pandoc/lua-filters) +on GitHub. A number of pandoc filters, written in +Haskell, are available on +[Hackage](https://hackage.haskell.org/packages/search?terms=pandoc+filter) +and can be installed using the `stack` or `cabal` tools. +The wiki also lists [third party +filters](https://github.com/jgm/pandoc/wiki/Pandoc-Filters). + +### Example: capitalizing headers + +TODO + +### Example: code extractor + +TODO + +## Generic Divs and Spans + +TODO +[Divs and Spans](/MANUAL.html#divs-and-spans): generic blocks +that can be transformed with filters + +### Example: colored text + + +### Example: custom styles in docx + +[Custom Styles in Docx](/MANUAL.html#custom-styles-in-docx) + +## Raw attributes + +TODO +[Generic raw attributes](/MANUAL.html#generic-raw-attribute): +to include raw snippets + +## Custom writers + +TODO +[Custom writers](/MANUAL.html#custom-writers) + +## Custom syntax highlighting + +TODO +[Custom syntax highlighting](/MANUAL.html#syntax-highlighting), +provided by the [skylighting +library](https://github.com/jgm/skylighting) + +including highlighting styles + diff --git a/doc/filters.md b/doc/filters.md index a27d657fb..c3edd0e46 100644 --- a/doc/filters.md +++ b/doc/filters.md @@ -12,8 +12,8 @@ reader into pandoc’s intermediate representation of the document---an "abstract syntax tree" or AST---which is then converted by the writer into the target format. The pandoc AST format is defined in the module -`Text.Pandoc.Definition` in -[pandoc-types](https://hackage.haskell.org/package/pandoc-types). +[`Text.Pandoc.Definition` in the `pandoc-types` package +](https://hackage.haskell.org/package/pandoc-types/docs/Text-Pandoc-Definition.html). A "filter" is a program that modifies the AST, between the reader and the writer: diff --git a/doc/lua-filter-types-and-objects.md b/doc/lua-filter-types-and-objects.md index f6d11a928..3e1637819 100644 --- a/doc/lua-filter-types-and-objects.md +++ b/doc/lua-filter-types-and-objects.md @@ -4,18 +4,18 @@ ## Pandoc -blocks +`blocks` : document content ([list] of [blocks]) -meta +`meta` : document meta information ([Meta] object) ## Meta -Meta information on a document; string-indexed collection of [meta -values](#metavalue). This is represented as a string-indexed table containing -[meta values](#MetaValue). +Meta information on a document; string-indexed collection of +[meta values](#metavalue). This is represented as a +string-indexed table containing [meta values](#MetaValue). ## MetaValue @@ -24,8 +24,50 @@ Document meta information items. ### MetaBlocks -blocks -: a list of blocks usable as meta value ([list] of [blocks]) +A list of blocks usable as meta value ([list] of [blocks]) + +Fields: + +`tag`, `t` +: the literal `MetaBlocks` (string) + +### MetaBool + +Plain Lua boolean value (boolean) + +### MetaInlines + +List of inlines used in metadata ([list] of [inline]s) + +Fields: + +`tag`, `t` +: the literal `MetaInlines` (string) + +### MetaList + +A list of other [meta value]s. ([list]) + +Fields: + +`tag`, `t` +: the literal `MetaList` (string) + +### MetaMap + +A string-indexed map of meta-values. (table) + +Fields: + +`tag`, `t` +: the literal `MetaMap` (string) + +*Note*: The fields will be shadowed if the map contains a field +with the same name as those listed. + +### MetaString + +Plain Lua string value (string) ## Block @@ -37,96 +79,96 @@ A block quote element content: : block content ([list] of [blocks]) -tag, t +`tag`, `t` : the literal `BlockQuote` (string) ### BulletList A bullet list -content +`content` : list of items ([list] of [blocks]) -tag, t -: the literal `BlockQuote` (string) +`tag`, `t` +: the literal `BulletList` (string) ### CodeBlock Block of code. -text +`text` : code string (string) -attr +`attr` : element attributes (Attr) -identifier +`identifier` : alias for `attr.identifier` (string) -classes +`classes` : alias for `attr.classes` ([list] of strings) -attributes -: alias for `attr.attributes` ([attributes]) +`attributes` +: alias for `attr.attributes` ([Attributes]) -tag, t +`tag`, `t` : the literal `CodeBlock` (string) ### DefinitionList Definition list, containing terms and their explanation. -content +`content` : list of items -tag, t +`tag`, `t` : the literal `DefinitionList` (string) ### Div Generic block container with attributes -content +`content` : block content ([list] of [blocks]) -attr +`attr` : element attributes (Attr) -identifier +`identifier` : alias for `attr.identifier` (string) -classes +`classes` : alias for `attr.classes` ([list] of strings) -attributes -: alias for `attr.attributes` ([attributes]) +`attributes` +: alias for `attr.attributes` ([Attributes]) -tag, t +`tag`, `t` : the literal `Div` (string) ### Header Creates a header element. -level +`level` : header level (integer) -content +`content` : inline content ([list] of [inlines]) -attr +`attr` : element attributes (Attr) -identifier +`identifier` : alias for `attr.identifier` (string) -classes +`classes` : alias for `attr.classes` ([list] of strings) -attributes -: alias for `attr.attributes` ([attributes]) +`attributes` +: alias for `attr.attributes` ([Attributes]) -tag, t +`tag`, `t` : the literal `Header` (string) @@ -134,7 +176,7 @@ tag, t A horizontal rule. -tag, t +`tag`, `t` : the literal `HorizontalRule` (string) ### LineBlock @@ -142,10 +184,10 @@ tag, t A line block, i.e. a list of lines, each separated from the next by a newline. -content +`content` : inline content -tag, t +`tag`, `t` : the literal `LineBlock` (string) ### Null @@ -153,7 +195,7 @@ tag, t A null element; this element never produces any output in the target format. -tag, t +`tag`, `t` : the literal `Null` (string) ### OrderedList @@ -162,229 +204,447 @@ An ordered list. Parameters: -items +`items` : list items ([list] of [blocks]) -listAttributes -: list parameters +`listAttributes` +: list parameters (ListAttributes) + +`start` +: alias for `listAttributes.start` (integer) + +`style` +: alias for `listAttributes.style` (string) + +`delimiter` +: alias for `listAttributes.delimiter` (string) -tag, t +`tag`, `t` : the literal `OrderedList` (string) ### Para A paragraph -content -: inline content +`content` +: inline content ([list] of [inlines]) -tag, t +`tag`, `t` : the literal `Para` (string) ### Plain Plain text, not a paragraph -content -: inline content +`content` +: inline content ([list] of [inlines]) -tag, t +`tag`, `t` : the literal `Plain` (string) ### RawBlock Raw content of a specified format. -format +`format` : format of content (string) -text +`text` : raw content (string) -tag, t +`tag`, `t` : the literal `RawBlock` (string) ### Table A table. -caption: -: table caption +`caption` +: table caption ([list] of [inlines]) -aligns: -: alignments +`aligns` +: column alignments ([list] of [Alignment]s) -widths: -: column widths +`widths` +: column widths (number) -headers -: header row +`headers` +: header row ([list] of [table cells](#table-cell)) -rows: -: table rows +`rows` +: table rows ([list] of [list]s of [table cells](#table-cell)) -tag, t +`tag`, `t` : the literal `Table` (string) +A [table cell]{#table-cell} is a list of blocks. + +[Alignment]{#Alignment} is a string value indicating the +horizontal alignment of a table column. `AlignLeft`, +`AlignRight`, and `AlignCenter` leads cell content tob be +left-aligned, right-aligned, and centered, respectively. The +default alignment is `AlignDefault` (often equivalent to +centered). + ## Inline ### Cite Citation -content +`content` : ([list] of [inlines]) -citations +`citations` : citation entries ([list] of [citations]) -tag, t +`tag`, `t` : the literal `Cite` (string) ### Code Inline code -text +`text` : code string (string) -attr +`attr` : attributes ([Attr]) -identifier +`identifier` : alias for `attr.identifier` (string) -classes +`classes` : alias for `attr.classes` ([list] of strings) -attributes -: alias for `attr.attributes` ([attributes]) +`attributes` +: alias for `attr.attributes` ([Attributes]) -tag, t +`tag`, `t` : the literal `Code` (string) ### Emph Emphasized text -content +`content` : inline content ([list] of [inlines]) -tag, t +`tag`, `t` : the literal `Emph` (string) -<!-- TODO --> - ### Image Image: alt text (list of inlines), target -tag, t -: the literal `Image` (string) +`attr` +: attributes ([Attr]) -### Link -Hyperlink: alt text (list of inlines), target +`caption` +: text used to describe the image ([list] of [inlines]) -tag, t -: the literal `Link` (string) +`src` +: path to the image file (string) + +`title` +: brief image description + +`identifier` +: alias for `attr.identifier` (string) + +`classes` +: alias for `attr.classes` ([list] of strings) + +`attributes` +: alias for `attr.attributes` ([Attributes]) + +`tag`, `t` +: the literal `Image` (string) ### LineBreak Hard line break -tag, t +`tag`, `t` : the literal `LineBreak` (string) +### Link +Hyperlink: alt text (list of inlines), target + +`attr` +: attributes ([Attr]) + +`content` +: text for this link ([list] of [inlines]) + +`target` +: the link target (string) + +`identifier` +: alias for `attr.identifier` (string) + +`classes` +: alias for `attr.classes` ([list] of strings) + +`attributes` +: alias for `attr.attributes` ([Attributes]) + +`tag`, `t` +: the literal `Link` (string) + ### Math TeX math (literal) -tag, t +`mathype` +: specifier determining whether the math content should be + shown inline (`InlineMath`) or on a separate line + (`DisplayMath`) (string) + +`text` +: math content (string) + +`tag`, `t` : the literal `Math` (string) ### Note Footnote or endnote -tag, t +`content` +: ([list] of [blocks]) + +`tag`, `t` : the literal `Note` (string) ### Quoted -Quoted text (list of inlines) +Quoted text + +`quotetype` +: type of quotes to be used; one of `SingleQuote` or + `DoubleQuote` (string) -tag, t +`content` +: quoted text ([list] of [inlines]) + +`tag`, `t` : the literal `Quoted` (string) ### RawInline Raw inline -tag, t +`format` +: the format of the content (string) + +`text` +: raw content (string) + +`tag`, `t` : the literal `RawInline` (string) ### SmallCaps -Small caps text (list of inlines) +Small caps text + +`content` +: ([list] of [inlines]) -tag, t +`tag`, `t` : the literal `SmallCaps` (string) ### SoftBreak Soft line break -tag, t +`tag`, `t` : the literal `SoftBreak` (string) ### Space Inter-word space -tag, t +`tag`, `t` : the literal `Space` (string) ### Span Generic inline container with attributes -attr +`attr` : attributes ([Attr]) -identifier +`content` +: wrapped content ([list] of [inlines]) + +`identifier` : alias for `attr.identifier` (string) -classes +`classes` : alias for `attr.classes` ([list] of strings) -attributes -: alias for `attr.attributes` ([attributes]) +`attributes` +: alias for `attr.attributes` ([Attributes]) -tag, t +`tag`, `t` : the literal `Span` (string) ### Str -Text (string) +Text + +`text` +: content (string) -tag, t +`tag`, `t` : the literal `Str` (string) ### Strikeout -Strikeout text (list of inlines) +Strikeout text + +`content` +: inline content ([list] of [inlines]) -tag, t +`tag`, `t` : the literal `Strikeout` (string) ### Strong -Strongly emphasized text (list of inlines) +Strongly emphasized text -tag, t +`content` +: inline content ([list] of [inlines]) + +`tag`, `t` : the literal `Strong` (string) ### Subscript -Subscripted text (list of inlines) +Subscripted text -tag, t +`content` +: inline content ([list] of [inlines]) + +`tag`, `t` : the literal `Subscript` (string) ### Superscript -Superscripted text (list of inlines) +Superscripted text + +`content` +: inline content ([list] of [inlines]) -tag, t +`tag`, `t` : the literal `Superscript` (string) -## Attributes +## Element components + +### Attr + +A set of element attributes + +`identifier` +: element identifier (string) + +`classes` +: element classes ([list] of strings) + +`attributes` +: collection of key/value pairs ([Attributes]) + +### Attributes + +List of key/value pairs. Values can be accessed by using keys as +indices to the list table. + +### Citation + +Single citation entry + +`id` +: citation identifier, e.g., a bibtex key (string) + +`mode` +: citation mode, one of `AuthorInText`, `SuppressAuthor`, or + `NormalCitation` (string) + +`prefix` +: citation prefix ([list] of [inlines]) + +`suffix` +: citation suffix ([list] of [inlines]) + +`note_num` +: note number (integer) + +`hash` +: hash (integer) + +### ListAttributes +List attributes + +`start` +: number of the first list item (integer) + +`style` +: style used for list numbers; possible values are `DefaultStyle`, + `Example`, `Decimal`, `LowerRoman`, `UpperRoman`, + `LowerAlpha`, and `UpperAlpha` (string) + +`delimiter` +: delimiter of list numbers; one of `DefaultDelim`, `Period`, + `OneParen`, and `TwoParens` (string) + +## Hierarchical Element {#Element} + +Hierarchical elements can be either *Sec* (sections) or *Blk* +(blocks). *Blk* elements are treated like [block]s. + +### Sec + +Section elements used to provide hierarchical information on +document contents. + +**Objects of this type are read-only.** + +`level` +: header level (integer) + +`numbering` +: section numbering ([list] of integers) + +`attr` +: header attributes ([Attr]) + +`label` +: header content ([list] of [inlines]) + +`contents` +: list of contents in this section ([list] of hierarchical elements) + +`tag`, `t` +: constant `Sec` (string) + +## ReaderOptions + +Pandoc reader options + +`abbreviations` +: set of known abbreviations (set of strings) + +`columns` +: number of columns in terminal (integer) + +`default_image_extension` +: default extension for images (string) + +`extensions` +: string representation of the syntax extensions bit field + (string) + +`indented_code_classes` +: default classes for indented code blocks (list of strings) + +`standalone` +: whether the input was a standalone document with header + (boolean) + +`strip_comments` +: HTML comments are stripped instead of parsed as raw HTML + (boolean) -List of key/value pairs. Values can be accessed by using keys as indices to the -list table. +`tab_stop` +: width (i.e. equivalent number of spaces) of tab stops + (integer) +`track_changes` +: track changes setting for docx; one of `AcceptChanges`, + `RejectChanges`, and `AllChanges` (string) [block]: #block [blocks]: #block @@ -393,4 +653,5 @@ list table. [inline]: #inline [inlines]: #inline [Attr]: #attr -[attributes]: #attributes +[Attributes]: #attributes +[citations]: #citation diff --git a/doc/lua-filters.md b/doc/lua-filters.md index 917b9cc16..ecc63a4c4 100644 --- a/doc/lua-filters.md +++ b/doc/lua-filters.md @@ -1253,6 +1253,23 @@ Lua functions for pandoc scripts. `hash`: : hash number +[`ListAttributes ([start[, style[, delimiter]]])`](#ListAttributes) + +: Creates a set of list attributes + + Parameters: + + `start`: + : number of the first list item (default: 1) + + `style`: + : style used for list numbering (default: `DefaultStyle`) + + `delimiter`: + : delimiter of list numbers (default: `DefaultDelim`) + + Returns: list attributes table + ## Constants [`AuthorInText`]{#AuthorInText} diff --git a/pandoc.cabal b/pandoc.cabal index 16a098b2c..935542e79 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -79,6 +79,7 @@ data-files: data/docx/_rels/.rels data/docx/docProps/app.xml data/docx/docProps/core.xml + data/docx/docProps/custom.xml data/docx/word/document.xml data/docx/word/fontTable.xml data/docx/word/comments.xml @@ -367,7 +368,7 @@ library tagsoup >= 0.14.6 && < 0.15, base64-bytestring >= 0.1 && < 1.1, zlib >= 0.5 && < 0.7, - skylighting >= 0.7.2 && < 0.8, + skylighting >= 0.7.4 && < 0.8, data-default >= 0.4 && < 0.8, temporary >= 1.1 && < 1.4, blaze-html >= 0.9 && < 0.10, @@ -701,6 +702,7 @@ benchmark benchmark-pandoc time, bytestring, containers, base >= 4.8 && < 5, text >= 0.11 && < 1.3, + mtl >= 2.2 && < 2.3, criterion >= 1.0 && < 1.6 if impl(ghc < 8.0) build-depends: semigroups == 0.18.* diff --git a/src/Text/Pandoc/Class.hs b/src/Text/Pandoc/Class.hs index e47546dfc..92b41aa4e 100644 --- a/src/Text/Pandoc/Class.hs +++ b/src/Text/Pandoc/Class.hs @@ -629,6 +629,7 @@ getDefaultReferenceDocx = do "_rels/.rels", "docProps/app.xml", "docProps/core.xml", + "docProps/custom.xml", "word/document.xml", "word/fontTable.xml", "word/footnotes.xml", diff --git a/src/Text/Pandoc/Logging.hs b/src/Text/Pandoc/Logging.hs index 4b025821c..675396067 100644 --- a/src/Text/Pandoc/Logging.hs +++ b/src/Text/Pandoc/Logging.hs @@ -292,7 +292,8 @@ showLogMessage msg = "Extracting " ++ fp ++ "..." NoTitleElement fallback -> "This document format requires a nonempty <title> element.\n" ++ - "Please specify either 'title' or 'pagetitle' in the metadata.\n" ++ + "Please specify either 'title' or 'pagetitle' in the metadata,\n" ++ + "e.g. by using --metadata pagetitle=\"...\" on the command line.\n" ++ "Falling back to '" ++ fallback ++ "'" NoLangSpecified -> "No value for 'lang' was specified in the metadata.\n" ++ diff --git a/src/Text/Pandoc/Lua/Init.hs b/src/Text/Pandoc/Lua/Init.hs index 35611d481..8449d736d 100644 --- a/src/Text/Pandoc/Lua/Init.hs +++ b/src/Text/Pandoc/Lua/Init.hs @@ -111,6 +111,7 @@ putConstructorsInRegistry = do constrsToReg $ Pandoc.MetaList mempty constrsToReg $ Pandoc.Citation mempty mempty mempty Pandoc.AuthorInText 0 0 putInReg "Attr" -- used for Attr type alias + putInReg "ListAttributes" -- used for ListAttributes type alias Lua.pop 1 where constrsToReg :: Data a => a -> Lua () diff --git a/src/Text/Pandoc/Lua/StackInstances.hs b/src/Text/Pandoc/Lua/StackInstances.hs index 931b8c225..2d7b9c583 100644 --- a/src/Text/Pandoc/Lua/StackInstances.hs +++ b/src/Text/Pandoc/Lua/StackInstances.hs @@ -36,9 +36,10 @@ module Text.Pandoc.Lua.StackInstances () where import Prelude import Control.Applicative ((<|>)) -import Control.Monad (when) import Data.Data (showConstr, toConstr) import Foreign.Lua (Lua, Peekable, Pushable, StackIndex) +import Foreign.Lua.Userdata ( ensureUserdataMetatable, pushAnyWithMetatable + , metatableName) import Text.Pandoc.Definition import Text.Pandoc.Extensions (Extensions) import Text.Pandoc.Lua.Util (defineHowTo, pushViaConstructor) @@ -185,7 +186,8 @@ pushBlock = \case Header lvl attr inlns -> pushViaConstructor "Header" lvl inlns (LuaAttr attr) HorizontalRule -> pushViaConstructor "HorizontalRule" LineBlock blcks -> pushViaConstructor "LineBlock" blcks - OrderedList lstAttr list -> pushViaConstructor "OrderedList" list lstAttr + OrderedList lstAttr list -> pushViaConstructor "OrderedList" list + (LuaListAttributes lstAttr) Null -> pushViaConstructor "Null" Para blcks -> pushViaConstructor "Para" blcks Plain blcks -> pushViaConstructor "Plain" blcks @@ -207,7 +209,9 @@ peekBlock idx = defineHowTo "get Block value" $ do <$> elementContent "HorizontalRule" -> return HorizontalRule "LineBlock" -> LineBlock <$> elementContent - "OrderedList" -> uncurry OrderedList <$> elementContent + "OrderedList" -> (\(LuaListAttributes lstAttr, lst) -> + OrderedList lstAttr lst) + <$> elementContent "Null" -> return Null "Para" -> Para <$> elementContent "Plain" -> Plain <$> elementContent @@ -289,29 +293,44 @@ instance Pushable LuaAttr where instance Peekable LuaAttr where peek idx = defineHowTo "get Attr value" (LuaAttr <$> Lua.peek idx) +-- | Wrapper for ListAttributes +newtype LuaListAttributes = LuaListAttributes ListAttributes + +instance Pushable LuaListAttributes where + push (LuaListAttributes (start, style, delimiter)) = + pushViaConstructor "ListAttributes" start style delimiter + +instance Peekable LuaListAttributes where + peek = defineHowTo "get ListAttributes value" . + fmap LuaListAttributes . Lua.peek + -- -- Hierarchical elements -- instance Pushable Element where push (Blk blk) = Lua.push blk - push (Sec lvl num attr label contents) = do - Lua.newtable - LuaUtil.addField "level" lvl - LuaUtil.addField "numbering" num - LuaUtil.addField "attr" (LuaAttr attr) - LuaUtil.addField "label" label - LuaUtil.addField "contents" contents - pushSecMetaTable - Lua.setmetatable (-2) - where - pushSecMetaTable :: Lua () - pushSecMetaTable = do - inexistant <- Lua.newmetatable "PandocElementSec" - when inexistant $ do - LuaUtil.addField "t" "Sec" - Lua.push "__index" - Lua.pushvalue (-2) - Lua.rawset (-3) + push sec = pushAnyWithMetatable pushElementMetatable sec + where + pushElementMetatable = ensureUserdataMetatable (metatableName sec) $ + LuaUtil.addFunction "__index" indexElement + +instance Peekable Element where + peek idx = Lua.ltype idx >>= \case + Lua.TypeUserdata -> Lua.peekAny idx + _ -> Blk <$> Lua.peek idx + +indexElement :: Element -> String -> Lua Lua.NumResults +indexElement = \case + (Blk _) -> const (1 <$ Lua.pushnil) -- this shouldn't happen + (Sec lvl num attr label contents) -> fmap (return 1) . \case + "level" -> Lua.push lvl + "numbering" -> Lua.push num + "attr" -> Lua.push (LuaAttr attr) + "label" -> Lua.push label + "contents" -> Lua.push contents + "tag" -> Lua.push "Sec" + "t" -> Lua.push "Sec" + _ -> Lua.pushnil -- @@ -340,9 +359,32 @@ instance Pushable ReaderOptions where LuaUtil.addField "extensions" extensions LuaUtil.addField "standalone" standalone LuaUtil.addField "columns" columns - LuaUtil.addField "tabStop" tabStop - LuaUtil.addField "indentedCodeClasses" indentedCodeClasses + LuaUtil.addField "tab_stop" tabStop + LuaUtil.addField "indented_code_classes" indentedCodeClasses LuaUtil.addField "abbreviations" abbreviations - LuaUtil.addField "defaultImageExtension" defaultImageExtension - LuaUtil.addField "trackChanges" trackChanges - LuaUtil.addField "stripComments" stripComments + LuaUtil.addField "default_image_extension" defaultImageExtension + LuaUtil.addField "track_changes" trackChanges + LuaUtil.addField "strip_comments" stripComments + + -- add metatable + let indexReaderOptions :: AnyValue -> AnyValue -> Lua Lua.NumResults + indexReaderOptions _tbl (AnyValue key) = do + Lua.ltype key >>= \case + Lua.TypeString -> Lua.peek key >>= \case + "defaultImageExtension" -> Lua.push defaultImageExtension + "indentedCodeClasses" -> Lua.push indentedCodeClasses + "stripComments" -> Lua.push stripComments + "tabStop" -> Lua.push tabStop + "trackChanges" -> Lua.push trackChanges + _ -> Lua.pushnil + _ -> Lua.pushnil + return 1 + Lua.newtable + LuaUtil.addFunction "__index" indexReaderOptions + Lua.setmetatable (Lua.nthFromTop 2) + +-- | Dummy type to allow values of arbitrary Lua type. +newtype AnyValue = AnyValue StackIndex + +instance Peekable AnyValue where + peek = return . AnyValue diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index 5d95d0e27..da8cc6433 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -397,7 +397,7 @@ spaceChar = satisfy $ \c -> c == ' ' || c == '\t' -- | Parses a nonspace, nonnewline character. nonspaceChar :: Stream s m Char => ParserT s st m Char -nonspaceChar = satisfy $ flip notElem ['\t', '\n', ' ', '\r'] +nonspaceChar = noneOf ['\t', '\n', ' ', '\r'] -- | Skips zero or more spaces or tabs. skipSpaces :: Stream s m Char => ParserT s st m () diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index b06e07a80..dab3d5db2 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -1069,11 +1069,11 @@ instance NamedTag (Tag String) where getTagName _ = Nothing isInlineTag :: NamedTag (Tag a) => Tag a -> Bool -isInlineTag t = isInlineTagName || isCommentTag t - where isInlineTagName = case getTagName t of - Just x -> x - `Set.notMember` blockTags - Nothing -> False +isInlineTag t = + isCommentTag t || case getTagName t of + Nothing -> False + Just x -> x `Set.notMember` blockTags || + T.take 1 x == "?" -- processing instr. isBlockTag :: NamedTag (Tag a) => Tag a -> Bool isBlockTag t = isBlockTagName || isTagComment t @@ -1208,8 +1208,10 @@ htmlTag f = try $ do let isNameChar c = isAlphaNum c || c == ':' || c == '-' || c == '_' let isName s = case s of [] -> False - ('?':_) -> True -- processing instruction (c:cs) -> isLetter c && all isNameChar cs + let isPI s = case s of + ('?':_) -> True -- processing instruction + _ -> False let endpos = if ln == 1 then setSourceColumn startpos @@ -1225,7 +1227,7 @@ htmlTag f = try $ do let handleTag tagname = do -- basic sanity check, since the parser is very forgiving -- and finds tags in stuff like x<y) - guard $ isName tagname + guard $ isName tagname || isPI tagname guard $ not $ null tagname -- <https://example.org> should NOT be a tag either. -- tagsoup will parse it as TagOpen "https:" [("example.org","")] @@ -1245,7 +1247,7 @@ htmlTag f = try $ do else return (next, "<!--" <> s <> "-->") | otherwise -> fail "bogus comment mode, HTML5 parse error" TagOpen tagname attr -> do - guard $ all (isName . fst) attr + guard $ isPI tagname || all (isName . fst) attr handleTag tagname TagClose tagname -> handleTag tagname diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 7c5619165..26ac781db 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -1308,13 +1308,7 @@ isBlockCommand s = treatAsBlock :: Set.Set Text treatAsBlock = Set.fromList - [ "let", "def", "DeclareRobustCommand" - , "newcommand", "renewcommand" - , "newenvironment", "renewenvironment" - , "providecommand", "provideenvironment" - -- newcommand, etc. should be parsed by macroDef, but we need this - -- here so these aren't parsed as inline commands to ignore - , "special", "pdfannot", "pdfstringdef" + [ "special", "pdfannot", "pdfstringdef" , "bibliographystyle" , "maketitle", "makeindex", "makeglossary" , "addcontentsline", "addtocontents", "addtocounter" @@ -1375,6 +1369,7 @@ inline = (mempty <$ comment) <|> (space <$ whitespace) <|> (softbreak <$ endline) <|> word + <|> macroDef <|> inlineCommand' <|> inlineEnvironment <|> inlineGroup @@ -1420,8 +1415,7 @@ end_ t = try (do preamble :: PandocMonad m => LP m Blocks preamble = mempty <$ many preambleBlock where preambleBlock = spaces1 - <|> void macroDef - <|> void blockCommand + <|> void (macroDef <|> blockCommand) <|> void braced <|> (notFollowedBy (begin_ "document") >> void anyTok) @@ -1484,9 +1478,9 @@ authors = try $ do egroup addMeta "author" (map trimInlines auths) -macroDef :: PandocMonad m => LP m Blocks +macroDef :: (Monoid a, PandocMonad m) => LP m a macroDef = - mempty <$ ((commandDef <|> environmentDef) <* doMacros 0) + mempty <$ (commandDef <|> environmentDef) where commandDef = do (name, macro') <- newcommand <|> letmacro <|> defmacro guardDisabled Ext_latex_macros <|> @@ -1506,21 +1500,28 @@ macroDef = letmacro :: PandocMonad m => LP m (Text, Macro) letmacro = do controlSeq "let" - Tok _ (CtrlSeq name) _ <- anyControlSeq - optional $ symbol '=' - spaces - contents <- bracedOrToken - return (name, Macro ExpandWhenDefined [] Nothing contents) + (name, contents) <- withVerbatimMode $ do + Tok _ (CtrlSeq name) _ <- anyControlSeq + optional $ symbol '=' + spaces + -- we first parse in verbatim mode, and then expand macros, + -- because we don't want \let\foo\bar to turn into + -- \let\foo hello if we have previously \def\bar{hello} + contents <- bracedOrToken + return (name, contents) + contents' <- doMacros' 0 contents + return (name, Macro ExpandWhenDefined [] Nothing contents') defmacro :: PandocMonad m => LP m (Text, Macro) -defmacro = try $ do - controlSeq "def" - Tok _ (CtrlSeq name) _ <- anyControlSeq - argspecs <- many (argspecArg <|> argspecPattern) +defmacro = try $ -- we use withVerbatimMode, because macros are to be expanded -- at point of use, not point of definition - contents <- withVerbatimMode bracedOrToken - return (name, Macro ExpandWhenUsed argspecs Nothing contents) + withVerbatimMode $ do + controlSeq "def" + Tok _ (CtrlSeq name) _ <- anyControlSeq + argspecs <- many (argspecArg <|> argspecPattern) + contents <- bracedOrToken + return (name, Macro ExpandWhenUsed argspecs Nothing contents) argspecArg :: PandocMonad m => LP m ArgSpec argspecArg = do @@ -1530,8 +1531,8 @@ argspecArg = do argspecPattern :: PandocMonad m => LP m ArgSpec argspecPattern = Pattern <$> many1 (satisfyTok (\(Tok _ toktype' txt) -> - (toktype' == Symbol || toktype' == Word) && - (txt /= "{" && txt /= "\\" && txt /= "}"))) + (toktype' == Symbol || toktype' == Word) && + (txt /= "{" && txt /= "\\" && txt /= "}"))) newcommand :: PandocMonad m => LP m (Text, Macro) newcommand = do @@ -1540,22 +1541,24 @@ newcommand = do controlSeq "renewcommand" <|> controlSeq "providecommand" <|> controlSeq "DeclareRobustCommand" - optional $ symbol '*' - Tok _ (CtrlSeq name) txt <- withVerbatimMode $ anyControlSeq <|> - (symbol '{' *> spaces *> anyControlSeq <* spaces <* symbol '}') - spaces - numargs <- option 0 $ try bracketedNum - let argspecs = map (\i -> ArgNum i) [1..numargs] - spaces - optarg <- option Nothing $ Just <$> try bracketedToks - spaces - contents <- withVerbatimMode bracedOrToken - when (mtype == "newcommand") $ do - macros <- sMacros <$> getState - case M.lookup name macros of - Just _ -> report $ MacroAlreadyDefined (T.unpack txt) pos - Nothing -> return () - return (name, Macro ExpandWhenUsed argspecs optarg contents) + withVerbatimMode $ do + Tok _ (CtrlSeq name) txt <- do + optional (symbol '*') + anyControlSeq <|> + (symbol '{' *> spaces *> anyControlSeq <* spaces <* symbol '}') + spaces + numargs <- option 0 $ try bracketedNum + let argspecs = map (\i -> ArgNum i) [1..numargs] + spaces + optarg <- option Nothing $ Just <$> try bracketedToks + spaces + contents <- bracedOrToken + when (mtype == "newcommand") $ do + macros <- sMacros <$> getState + case M.lookup name macros of + Just _ -> report $ MacroAlreadyDefined (T.unpack txt) pos + Nothing -> return () + return (name, Macro ExpandWhenUsed argspecs optarg contents) newenvironment :: PandocMonad m => LP m (Text, Macro, Macro) newenvironment = do @@ -1563,24 +1566,23 @@ newenvironment = do Tok _ (CtrlSeq mtype) _ <- controlSeq "newenvironment" <|> controlSeq "renewenvironment" <|> controlSeq "provideenvironment" - optional $ symbol '*' - spaces - name <- untokenize <$> braced - spaces - numargs <- option 0 $ try bracketedNum - let argspecs = map (\i -> ArgNum i) [1..numargs] - spaces - optarg <- option Nothing $ Just <$> try bracketedToks - spaces - startcontents <- withVerbatimMode bracedOrToken - spaces - endcontents <- withVerbatimMode bracedOrToken - when (mtype == "newenvironment") $ do - macros <- sMacros <$> getState - case M.lookup name macros of - Just _ -> report $ MacroAlreadyDefined (T.unpack name) pos - Nothing -> return () - return (name, Macro ExpandWhenUsed argspecs optarg startcontents, + withVerbatimMode $ do + optional $ symbol '*' + spaces + name <- untokenize <$> braced + spaces + numargs <- option 0 $ try bracketedNum + spaces + optarg <- option Nothing $ Just <$> try bracketedToks + let argspecs = map (\i -> ArgNum i) [1..numargs] + startcontents <- spaces >> bracedOrToken + endcontents <- spaces >> bracedOrToken + when (mtype == "newenvironment") $ do + macros <- sMacros <$> getState + case M.lookup name macros of + Just _ -> report $ MacroAlreadyDefined (T.unpack name) pos + Nothing -> return () + return (name, Macro ExpandWhenUsed argspecs optarg startcontents, Macro ExpandWhenUsed [] Nothing endcontents) bracketedNum :: PandocMonad m => LP m Int @@ -1644,7 +1646,9 @@ blockCommand = try $ do let names = ordNub [name', name] let rawDefiniteBlock = do guard $ isBlockCommand name - rawBlock "latex" <$> getRawCommand name (txt <> star) + rawcontents <- getRawCommand name (txt <> star) + (guardEnabled Ext_raw_tex >> return (rawBlock "latex" rawcontents)) + <|> ignore rawcontents -- heuristic: if it could be either block or inline, we -- treat it if block if we have a sequence of block -- commands followed by a newline. But we stop if we @@ -1656,7 +1660,10 @@ blockCommand = try $ do guard $ "start" `T.isPrefixOf` n let rawMaybeBlock = try $ do guard $ not $ isInlineCommand name - curr <- rawBlock "latex" <$> getRawCommand name (txt <> star) + rawcontents <- getRawCommand name (txt <> star) + curr <- (guardEnabled Ext_raw_tex >> + return (rawBlock "latex" rawcontents)) + <|> ignore rawcontents rest <- many $ notFollowedBy startCommand *> blockCommand lookAhead $ blankline <|> startCommand return $ curr <> mconcat rest @@ -1757,6 +1764,8 @@ blockCommands = M.fromList , ("input", include "input") , ("subfile", include "subfile") , ("usepackage", include "usepackage") + -- preamble + , ("PackageError", mempty <$ (braced >> braced >> braced)) ] diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs index 9256217fe..69bbf28d4 100644 --- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs +++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs @@ -49,6 +49,7 @@ module Text.Pandoc.Readers.LaTeX.Parsing , toksToString , satisfyTok , doMacros + , doMacros' , setpos , anyControlSeq , anySymbol @@ -110,6 +111,8 @@ import Text.Pandoc.Readers.LaTeX.Types (ExpansionPoint (..), Macro (..), import Text.Pandoc.Shared import Text.Parsec.Pos +-- import Debug.Trace (traceShowId) + newtype DottedNum = DottedNum [Int] deriving (Show) @@ -140,6 +143,7 @@ data LaTeXState = LaTeXState{ sOptions :: ReaderOptions , sLabels :: M.Map String [Inline] , sHasChapters :: Bool , sToggles :: M.Map String Bool + , sExpanded :: Bool } deriving Show @@ -161,6 +165,7 @@ defaultLaTeXState = LaTeXState{ sOptions = def , sLabels = M.empty , sHasChapters = False , sToggles = M.empty + , sExpanded = False } instance PandocMonad m => HasQuoteContext LaTeXState m where @@ -211,10 +216,14 @@ type LP m = ParserT [Tok] LaTeXState m withVerbatimMode :: PandocMonad m => LP m a -> LP m a withVerbatimMode parser = do - updateState $ \st -> st{ sVerbatimMode = True } - result <- parser - updateState $ \st -> st{ sVerbatimMode = False } - return result + alreadyVerbatimMode <- sVerbatimMode <$> getState + if alreadyVerbatimMode + then parser + else do + updateState $ \st -> st{ sVerbatimMode = True } + result <- parser + updateState $ \st -> st{ sVerbatimMode = False } + return result rawLaTeXParser :: (PandocMonad m, HasMacros s, HasReaderOptions s) => Bool -> LP m a -> LP m a -> ParserT String s m (a, String) @@ -231,7 +240,7 @@ rawLaTeXParser retokenize parser valParser = do Right toks' -> do res <- lift $ runParserT (do when retokenize $ do -- retokenize, applying macros - doMacros 0 + doMacros ts <- many (satisfyTok (const True)) setInput ts rawparser) @@ -246,8 +255,7 @@ rawLaTeXParser retokenize parser valParser = do applyMacros :: (PandocMonad m, HasMacros s, HasReaderOptions s) => String -> ParserT String s m String applyMacros s = (guardDisabled Ext_latex_macros >> return s) <|> - do let retokenize = doMacros 0 *> - (toksToString <$> many (satisfyTok (const True))) + do let retokenize = toksToString <$> many (satisfyTok (const True)) pstate <- getState let lstate = def{ sOptions = extractReaderOptions pstate , sMacros = extractMacros pstate } @@ -255,6 +263,7 @@ applyMacros s = (guardDisabled Ext_latex_macros >> return s) <|> case res of Left e -> fail (show e) Right s' -> return s' + tokenize :: SourceName -> Text -> [Tok] tokenize sourcename = totoks (initialPos sourcename) @@ -368,10 +377,10 @@ toksToString :: [Tok] -> String toksToString = T.unpack . untokenize satisfyTok :: PandocMonad m => (Tok -> Bool) -> LP m Tok -satisfyTok f = - try $ do +satisfyTok f = do + doMacros -- apply macros on remaining input stream res <- tokenPrim (T.unpack . untoken) updatePos matcher - doMacros 0 -- apply macros on remaining input stream + updateState $ \st -> st{ sExpanded = False } return res where matcher t | f t = Just t | otherwise = Nothing @@ -379,82 +388,97 @@ satisfyTok f = updatePos _spos _ (Tok pos _ _ : _) = pos updatePos spos _ [] = incSourceColumn spos 1 -doMacros :: PandocMonad m => Int -> LP m () -doMacros n = do +doMacros :: PandocMonad m => LP m () +doMacros = do + expanded <- sExpanded <$> getState verbatimMode <- sVerbatimMode <$> getState - unless verbatimMode $ do - inp <- getInput - case inp of - Tok spos (CtrlSeq "begin") _ : Tok _ Symbol "{" : - Tok _ Word name : Tok _ Symbol "}" : ts - -> handleMacros spos name ts - Tok spos (CtrlSeq "end") _ : Tok _ Symbol "{" : - Tok _ Word name : Tok _ Symbol "}" : ts - -> handleMacros spos ("end" <> name) ts - Tok _ (CtrlSeq "expandafter") _ : t : ts - -> do setInput ts - doMacros n - getInput >>= setInput . combineTok t - Tok spos (CtrlSeq name) _ : ts - -> handleMacros spos name ts - _ -> return () - where combineTok (Tok spos (CtrlSeq name) x) (Tok _ Word w : ts) - | T.all isLetterOrAt w = - Tok spos (CtrlSeq (name <> w)) (x1 <> w <> x2) : ts - where (x1, x2) = T.break isSpaceOrTab x - combineTok t ts = t:ts - handleMacros spos name ts = do - macros <- sMacros <$> getState - case M.lookup name macros of - Nothing -> return () - Just (Macro expansionPoint argspecs optarg newtoks) -> do - setInput ts - let matchTok (Tok _ toktype txt) = - satisfyTok (\(Tok _ toktype' txt') -> - toktype == toktype' && - txt == txt') - let matchPattern toks = try $ mapM_ matchTok toks - let getargs argmap [] = return argmap - getargs argmap (Pattern toks : rest) = try $ do - matchPattern toks - getargs argmap rest - getargs argmap (ArgNum i : Pattern toks : rest) = - try $ do - x <- mconcat <$> manyTill - (braced <|> ((:[]) <$> anyTok)) - (matchPattern toks) - getargs (M.insert i x argmap) rest - getargs argmap (ArgNum i : rest) = do - x <- try $ spaces >> bracedOrToken - getargs (M.insert i x argmap) rest - args <- case optarg of - Nothing -> getargs M.empty argspecs - Just o -> do - x <- option o bracketedToks - getargs (M.singleton 1 x) argspecs - -- first boolean param is true if we're tokenizing - -- an argument (in which case we don't want to - -- expand #1 etc.) - let addTok False (Tok _ (Arg i) _) acc = - case M.lookup i args of - Nothing -> mzero - Just xs -> foldr (addTok True) acc xs - -- see #4007 - addTok _ (Tok _ (CtrlSeq x) txt) - acc@(Tok _ Word _ : _) - | not (T.null txt) && - isLetter (T.last txt) = - Tok spos (CtrlSeq x) (txt <> " ") : acc - addTok _ t acc = setpos spos t : acc - ts' <- getInput - setInput $ foldr (addTok False) ts' newtoks - case expansionPoint of - ExpandWhenUsed -> - if n > 20 -- detect macro expansion loops - then throwError $ PandocMacroLoop (T.unpack name) - else doMacros (n + 1) - ExpandWhenDefined -> return () - + unless (expanded || verbatimMode) $ do + getInput >>= doMacros' 1 >>= setInput + updateState $ \st -> st{ sExpanded = True } + +doMacros' :: PandocMonad m => Int -> [Tok] -> LP m [Tok] +doMacros' n inp = do + case inp of + Tok spos (CtrlSeq "begin") _ : Tok _ Symbol "{" : + Tok _ Word name : Tok _ Symbol "}" : ts + -> handleMacros n spos name ts + Tok spos (CtrlSeq "end") _ : Tok _ Symbol "{" : + Tok _ Word name : Tok _ Symbol "}" : ts + -> handleMacros n spos ("end" <> name) ts + Tok _ (CtrlSeq "expandafter") _ : t : ts + -> combineTok t <$> doMacros' n ts + Tok spos (CtrlSeq name) _ : ts + -> handleMacros n spos name ts + _ -> return inp + <|> return inp + + where + combineTok (Tok spos (CtrlSeq name) x) (Tok _ Word w : ts) + | T.all isLetterOrAt w = + Tok spos (CtrlSeq (name <> w)) (x1 <> w <> x2) : ts + where (x1, x2) = T.break isSpaceOrTab x + combineTok t ts = t:ts + + matchTok (Tok _ toktype txt) = + satisfyTok (\(Tok _ toktype' txt') -> + toktype == toktype' && + txt == txt') + + matchPattern toks = try $ mapM_ matchTok toks + + getargs argmap [] = return argmap + getargs argmap (Pattern toks : rest) = try $ do + matchPattern toks + getargs argmap rest + getargs argmap (ArgNum i : Pattern toks : rest) = + try $ do + x <- mconcat <$> manyTill (braced <|> ((:[]) <$> anyTok)) + (matchPattern toks) + getargs (M.insert i x argmap) rest + getargs argmap (ArgNum i : rest) = do + x <- try $ spaces >> bracedOrToken + getargs (M.insert i x argmap) rest + + addTok False args spos (Tok _ (Arg i) _) acc = + case M.lookup i args of + Nothing -> mzero + Just xs -> foldr (addTok True args spos) acc xs + -- see #4007 + addTok _ _ spos (Tok _ (CtrlSeq x) txt) + acc@(Tok _ Word _ : _) + | not (T.null txt) + , isLetter (T.last txt) = + Tok spos (CtrlSeq x) (txt <> " ") : acc + addTok _ _ spos t acc = setpos spos t : acc + + handleMacros n' spos name ts = do + when (n' > 20) -- detect macro expansion loops + $ throwError $ PandocMacroLoop (T.unpack name) + macros <- sMacros <$> getState + case M.lookup name macros of + Nothing -> mzero + Just (Macro expansionPoint argspecs optarg newtoks) -> do + let getargs' = do + args <- case optarg of + Nothing -> getargs M.empty argspecs + Just o -> do + x <- option o bracketedToks + getargs (M.singleton 1 x) argspecs + rest <- getInput + return (args, rest) + lstate <- getState + res <- lift $ runParserT getargs' lstate "args" ts + case res of + Left _ -> fail $ "Could not parse arguments for " ++ + T.unpack name + Right (args, rest) -> do + -- first boolean param is true if we're tokenizing + -- an argument (in which case we don't want to + -- expand #1 etc.) + let result = foldr (addTok False args spos) rest newtoks + case expansionPoint of + ExpandWhenUsed -> doMacros' (n' + 1) result + ExpandWhenDefined -> return result setpos :: SourcePos -> Tok -> Tok setpos spos (Tok _ tt txt) = Tok spos tt txt diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index d1ea7a1a5..5944ecf82 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1586,6 +1586,7 @@ symbol = do <|> try (do lookAhead $ char '\\' notFollowedBy' (() <$ rawTeXBlock) char '\\') + updateLastStrPos return $ return $ B.str [result] -- parses inline code, between n `s and n `s @@ -1632,7 +1633,7 @@ enclosure c = do 3 -> three c 2 -> two c mempty 1 -> one c mempty - _ -> return (return $ B.str cs) + _ -> updateLastStrPos >> return (return $ B.str cs) ender :: PandocMonad m => Char -> Int -> MarkdownParser m () ender c n = try $ do @@ -1716,19 +1717,24 @@ nonEndline = satisfy (/='\n') str :: PandocMonad m => MarkdownParser m (F Inlines) str = do + canRelocateSpace <- notAfterString result <- many1 (alphaNum <|> try (char '.' <* notFollowedBy (char '.'))) updateLastStrPos (do guardEnabled Ext_smart abbrevs <- getOption readerAbbreviations if not (null result) && last result == '.' && result `Set.member` abbrevs then try (do ils <- whitespace <|> endline - lookAhead alphaNum + -- ?? lookAhead alphaNum + -- replace space after with nonbreaking space + -- if softbreak, move before abbrev if possible (#4635) return $ do ils' <- ils - if ils' == B.space - then return (B.str result <> B.str "\160") - else -- linebreak or softbreak - return (ils' <> B.str result <> B.str "\160")) + case B.toList ils' of + [Space] -> + return (B.str result <> B.str "\160") + [SoftBreak] | canRelocateSpace -> + return (ils' <> B.str result <> B.str "\160") + _ -> return (B.str result <> ils')) <|> return (return (B.str result)) else return (return (B.str result))) <|> return (return (B.str result)) diff --git a/src/Text/Pandoc/Readers/Muse.hs b/src/Text/Pandoc/Readers/Muse.hs index 134598c07..6acc88b3d 100644 --- a/src/Text/Pandoc/Readers/Muse.hs +++ b/src/Text/Pandoc/Readers/Muse.hs @@ -44,7 +44,6 @@ import Control.Monad import Control.Monad.Reader import Control.Monad.Except (throwError) import Data.Bifunctor -import Data.Char (isAlphaNum) import Data.Default import Data.List (intercalate) import Data.List.Split (splitOn) @@ -59,8 +58,8 @@ import Text.Pandoc.Definition import Text.Pandoc.Error (PandocError (PandocParsecError)) import Text.Pandoc.Logging import Text.Pandoc.Options -import Text.Pandoc.Parsing hiding (F, enclosed) -import Text.Pandoc.Shared (crFilter, underlineSpan, mapLeft) +import Text.Pandoc.Parsing hiding (F) +import Text.Pandoc.Shared (crFilter, underlineSpan) -- | Read Muse from an input string and return a Pandoc document. readMuse :: PandocMonad m @@ -69,9 +68,9 @@ readMuse :: PandocMonad m -> m Pandoc readMuse opts s = do let input = crFilter s - res <- mapLeft (PandocParsecError $ unpack input) `liftM` runReaderT (runParserT parseMuse def{ museOptions = opts } "source" input) def + res <- flip runReaderT def $ runParserT parseMuse def{ museOptions = opts } "source" input case res of - Left e -> throwError e + Left e -> throwError $ PandocParsecError (unpack input) e Right d -> return d type F = Future MuseState @@ -83,7 +82,6 @@ data MuseState = MuseState { museMeta :: F Meta -- ^ Document metadata , museLastStrPos :: Maybe SourcePos -- ^ Position after last str parsed , museLogMessages :: [LogMessage] , museNotes :: M.Map String (SourcePos, F Blocks) - , museInPara :: Bool -- ^ True when looking for a paragraph terminator } instance Default MuseState where @@ -94,15 +92,17 @@ instance Default MuseState where , museLastStrPos = Nothing , museLogMessages = [] , museNotes = M.empty - , museInPara = False } data MuseEnv = MuseEnv { museInLink :: Bool -- ^ True when parsing a link description to avoid nested links + , museInPara :: Bool -- ^ True when parsing paragraph is not allowed } instance Default MuseEnv where - def = MuseEnv { museInLink = False } + def = MuseEnv { museInLink = False + , museInPara = False + } type MuseParser m = ParserT Text MuseState (ReaderT MuseEnv m) @@ -131,18 +131,12 @@ parseMuse = do many directive blocks <- (:) <$> parseBlocks <*> many parseSection st <- getState - let doc = runF (Pandoc <$> museMeta st <*> fmap B.toList (mconcat blocks)) st - reportLogMessages - return doc + runF (Pandoc <$> museMeta st <*> fmap B.toList (mconcat blocks)) st <$ reportLogMessages -- * Utility functions commonPrefix :: String -> String -> String -commonPrefix _ [] = [] -commonPrefix [] _ = [] -commonPrefix (x:xs) (y:ys) - | x == y = x : commonPrefix xs ys - | otherwise = [] +commonPrefix xs ys = map fst $ takeWhile (uncurry (==)) $ zip xs ys -- | Trim up to one newline from the beginning of the string. lchop :: String -> String @@ -159,12 +153,11 @@ dropSpacePrefix lns = where flns = filter (not . all (== ' ')) lns maxIndent = if null flns then maximum (map length lns) else length $ takeWhile (== ' ') $ foldl1 commonPrefix flns -atStart :: PandocMonad m => MuseParser m a -> MuseParser m a -atStart p = do +atStart :: PandocMonad m => MuseParser m () +atStart = do pos <- getPosition st <- getState guard $ museLastStrPos st /= Just pos - p firstColumn :: PandocMonad m => MuseParser m () firstColumn = getPosition >>= \pos -> guard (sourceColumn pos == 1) @@ -206,18 +199,16 @@ htmlAttrToPandoc attrs = (ident, classes, keyvals) where ident = fromMaybe "" $ lookup "id" attrs classes = maybe [] words $ lookup "class" attrs - keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"] + keyvals = [(k,v) | (k,v) <- attrs, k /= "id", k /= "class"] parseHtmlContent :: PandocMonad m => String -- ^ Tag name -> MuseParser m (Attr, F Blocks) -parseHtmlContent tag = try $ do - indent <- getIndent - attr <- openTag tag - manyTill spaceChar eol - content <- parseBlocksTill $ try $ count indent spaceChar *> closeTag tag - manyTill spaceChar eol -- closing tag must be followed by optional whitespace and newline - return (htmlAttrToPandoc attr, content) +parseHtmlContent tag = try $ getIndent >>= \indent -> (,) + <$> fmap htmlAttrToPandoc (openTag tag) + <* manyTill spaceChar eol + <*> allowPara (parseBlocksTill (try $ indentWith indent *> closeTag tag)) + <* manyTill spaceChar eol -- closing tag must be followed by optional whitespace and newline -- ** Directive parsers @@ -250,6 +241,9 @@ directive = do -- ** Block parsers +allowPara :: MonadReader MuseEnv m => m a -> m a +allowPara p = local (\s -> s { museInPara = False }) p + -- | Parse section contents until EOF or next header parseBlocks :: PandocMonad m => MuseParser m (F Blocks) @@ -263,10 +257,9 @@ parseBlocks = nextSection = mempty <$ lookAhead headingStart parseEnd = mempty <$ eof blockStart = (B.<>) <$> (blockElements <|> emacsNoteBlock) - <*> parseBlocks - listStart = do - updateState (\st -> st { museInPara = False }) - uncurry (B.<>) <$> (anyListUntil parseBlocks <|> amuseNoteBlockUntil parseBlocks) + <*> allowPara parseBlocks + listStart = + uncurry (B.<>) <$> allowPara (anyListUntil parseBlocks <|> amuseNoteBlockUntil parseBlocks) paraStart = do indent <- length <$> many spaceChar uncurry (B.<>) . first (p indent) <$> paraUntil parseBlocks @@ -282,46 +275,36 @@ parseSection = parseBlocksTill :: PandocMonad m => MuseParser m a -> MuseParser m (F Blocks) -parseBlocksTill end = - try (parseEnd <|> - blockStart <|> - listStart <|> - paraStart) +parseBlocksTill end = continuation where parseEnd = mempty <$ end - blockStart = (B.<>) <$> blockElements <*> continuation - listStart = do - updateState (\st -> st { museInPara = False }) - uncurry (B.<>) <$> anyListUntil (parseEnd <|> continuation) + blockStart = (B.<>) <$> blockElements <*> allowPara continuation + listStart = uncurry (B.<>) <$> allowPara (anyListUntil (parseEnd <|> continuation)) paraStart = uncurry (B.<>) <$> paraUntil (parseEnd <|> continuation) - continuation = parseBlocksTill end + continuation = try $ parseEnd <|> blockStart <|> listStart <|> paraStart listItemContentsUntil :: PandocMonad m => Int -> MuseParser m a -> MuseParser m a -> MuseParser m (F Blocks, a) -listItemContentsUntil col pre end = - try blockStart <|> - try listStart <|> - try paraStart +listItemContentsUntil col pre end = p where + p = try blockStart <|> try listStart <|> try paraStart parsePre = (mempty,) <$> pre parseEnd = (mempty,) <$> end paraStart = do (f, (r, e)) <- paraUntil (parsePre <|> continuation <|> parseEnd) return (f B.<> r, e) blockStart = first <$> ((B.<>) <$> blockElements) - <*> (parsePre <|> continuation <|> parseEnd) + <*> allowPara (parsePre <|> continuation <|> parseEnd) listStart = do - updateState (\st -> st { museInPara = False }) - (f, (r, e)) <- anyListUntil (parsePre <|> continuation <|> parseEnd) + (f, (r, e)) <- allowPara $ anyListUntil (parsePre <|> continuation <|> parseEnd) return (f B.<> r, e) continuation = try $ do blank <- optionMaybe blankline skipMany blankline indentWith col - updateState (\st -> st { museInPara = museInPara st && isNothing blank }) - listItemContentsUntil col pre end + local (\s -> s { museInPara = museInPara s && isNothing blank }) p parseBlock :: PandocMonad m => MuseParser m (F Blocks) parseBlock = do @@ -331,25 +314,22 @@ parseBlock = do where para = fst <$> paraUntil (try (eof <|> void (lookAhead blockElements))) blockElements :: PandocMonad m => MuseParser m (F Blocks) -blockElements = do - updateState (\st -> st { museInPara = False }) - choice [ mempty <$ blankline - , comment - , separator - , example - , exampleTag - , literalTag - , centerTag - , rightTag - , quoteTag - , divTag - , biblioTag - , playTag - , verseTag - , lineBlock - , table - , commentTag - ] +blockElements = (mempty <$ blankline) + <|> comment + <|> separator + <|> example + <|> exampleTag + <|> literalTag + <|> centerTag + <|> rightTag + <|> quoteTag + <|> divTag + <|> biblioTag + <|> playTag + <|> verseTag + <|> lineBlock + <|> table + <|> commentTag -- | Parse a line comment, starting with @;@ in the first column. comment :: PandocMonad m => MuseParser m (F Blocks) @@ -445,9 +425,9 @@ divTag = do -- | Parse @\<biblio>@ tag, the result is the same as @\<div class="biblio">@. -- @\<biblio>@ tag is supported only in Text::Amuse mode. biblioTag :: PandocMonad m => MuseParser m (F Blocks) -biblioTag = do - guardEnabled Ext_amuse - fmap (B.divWith ("", ["biblio"], [])) . snd <$> parseHtmlContent "biblio" +biblioTag = fmap (B.divWith ("", ["biblio"], [])) . snd + <$ guardEnabled Ext_amuse + <*> parseHtmlContent "biblio" -- | Parse @\<play>@ tag, the result is the same as @\<div class="play">@. -- @\<play>@ tag is supported only in Text::Amuse mode. @@ -463,13 +443,11 @@ verseLine = (<>) -- | Parse @\<verse>@ tag. verseTag :: PandocMonad m => MuseParser m (F Blocks) -verseTag = try $ do - indent <- getIndent - openTag "verse" - manyTill spaceChar eol - content <- sequence <$> manyTill (count indent spaceChar *> verseLine) (try $ count indent spaceChar *> closeTag "verse") - manyTill spaceChar eol - return $ B.lineBlock <$> content +verseTag = try $ getIndent >>= \indent -> fmap B.lineBlock . sequence + <$ openTag "verse" + <* manyTill spaceChar eol + <*> manyTill (indentWith indent *> verseLine) (try $ indentWith indent *> closeTag "verse") + <* manyTill spaceChar eol -- | Parse @\<comment>@ tag. commentTag :: PandocMonad m => MuseParser m (F Blocks) @@ -483,19 +461,16 @@ commentTag = try $ mempty paraContentsUntil :: PandocMonad m => MuseParser m a -- ^ Terminator parser -> MuseParser m (F Inlines, a) -paraContentsUntil end = do - updateState (\st -> st { museInPara = True }) - (l, e) <- someUntil inline $ try (manyTill spaceChar eol *> end) - updateState (\st -> st { museInPara = False }) - return (trimInlinesF $ mconcat l, e) +paraContentsUntil end = first (trimInlinesF . mconcat) + <$> someUntil inline (try (manyTill spaceChar eol *> local (\s -> s { museInPara = True}) end)) -- | Parse a paragraph. paraUntil :: PandocMonad m => MuseParser m a -- ^ Terminator parser -> MuseParser m (F Blocks, a) paraUntil end = do - state <- getState - guard $ not $ museInPara state + inPara <- asks museInPara + guard $ not inPara first (fmap B.para) <$> paraContentsUntil end noteMarker :: PandocMonad m => MuseParser m String @@ -504,6 +479,17 @@ noteMarker = try $ (:) <*> oneOf "123456789" <*> manyTill digit (char ']') +addNote :: PandocMonad m + => String + -> SourcePos + -> F Blocks + -> MuseParser m () +addNote ref pos content = do + oldnotes <- museNotes <$> getState + when (M.member ref oldnotes) + (logMessage $ DuplicateNoteReference ref pos) + updateState $ \s -> s{ museNotes = M.insert ref (pos, content) oldnotes } + -- Amusewiki version of note -- Parsing is similar to list item, except that note marker is used instead of list marker amuseNoteBlockUntil :: PandocMonad m @@ -513,12 +499,8 @@ amuseNoteBlockUntil end = try $ do guardEnabled Ext_amuse ref <- noteMarker <* spaceChar pos <- getPosition - updateState (\st -> st { museInPara = False }) - (content, e) <- listItemContentsUntil (sourceColumn pos - 1) (fail "x") end - oldnotes <- museNotes <$> getState - when (M.member ref oldnotes) - (logMessage $ DuplicateNoteReference ref pos) - updateState $ \s -> s{ museNotes = M.insert ref (pos, content) oldnotes } + (content, e) <- allowPara $ listItemContentsUntil (sourceColumn pos - 1) (fail "x") end + addNote ref pos content return (mempty, e) -- Emacs version of note @@ -526,13 +508,10 @@ amuseNoteBlockUntil end = try $ do emacsNoteBlock :: PandocMonad m => MuseParser m (F Blocks) emacsNoteBlock = try $ do guardDisabled Ext_amuse - pos <- getPosition ref <- noteMarker <* skipSpaces - content <- mconcat <$> blocksTillNote - oldnotes <- museNotes <$> getState - when (M.member ref oldnotes) - (logMessage $ DuplicateNoteReference ref pos) - updateState $ \s -> s{ museNotes = M.insert ref (pos, content) oldnotes } + pos <- getPosition + content <- fmap mconcat blocksTillNote + addNote ref pos content return mempty where blocksTillNote = @@ -544,10 +523,8 @@ emacsNoteBlock = try $ do -- | Parse a line block indicated by @\'>\'@ characters. lineBlock :: PandocMonad m => MuseParser m (F Blocks) -lineBlock = try $ do - indent <- getIndent - lns <- (blankVerseLine <|> nonblankVerseLine) `sepBy1'` try (indentWith indent) - return $ B.lineBlock <$> sequence lns +lineBlock = try $ getIndent >>= \indent -> fmap B.lineBlock . sequence + <$> (blankVerseLine <|> nonblankVerseLine) `sepBy1'` try (indentWith indent) where blankVerseLine = try $ mempty <$ char '>' <* blankline nonblankVerseLine = try (string "> ") *> verseLine @@ -561,8 +538,7 @@ bulletListItemsUntil :: PandocMonad m bulletListItemsUntil indent end = try $ do char '-' void spaceChar <|> lookAhead eol - updateState (\st -> st { museInPara = False }) - (x, (xs, e)) <- listItemContentsUntil (indent + 2) (try (optional blankline *> indentWith indent *> bulletListItemsUntil indent end)) (([],) <$> end) + (x, (xs, e)) <- allowPara $ listItemContentsUntil (indent + 2) (try (optional blankline *> indentWith indent *> bulletListItemsUntil indent end)) (([],) <$> end) return (x:xs, e) -- | Parse a bullet list. @@ -598,8 +574,7 @@ orderedListItemsUntil indent style end = continuation = try $ do pos <- getPosition void spaceChar <|> lookAhead eol - updateState (\st -> st { museInPara = False }) - (x, (xs, e)) <- listItemContentsUntil (sourceColumn pos) (try (optional blankline *> indentWith indent *> museOrderedListMarker style *> continuation)) (([],) <$> end) + (x, (xs, e)) <- allowPara $ listItemContentsUntil (sourceColumn pos) (try (optional blankline *> indentWith indent *> museOrderedListMarker style *> continuation)) (([],) <$> end) return (x:xs, e) -- | Parse an ordered list. @@ -620,8 +595,7 @@ descriptionsUntil :: PandocMonad m -> MuseParser m ([F Blocks], a) descriptionsUntil indent end = do void spaceChar <|> lookAhead eol - updateState (\st -> st { museInPara = False }) - (x, (xs, e)) <- listItemContentsUntil indent (try (optional blankline *> indentWith indent *> manyTill spaceChar (string "::") *> descriptionsUntil indent end)) (([],) <$> end) + (x, (xs, e)) <- allowPara $ listItemContentsUntil indent (try (optional blankline *> indentWith indent *> manyTill spaceChar (string "::") *> descriptionsUntil indent end)) (([],) <$> end) return (x:xs, e) definitionListItemsUntil :: PandocMonad m @@ -686,12 +660,8 @@ museAppendElement element tbl = MuseFooterRow row -> tbl{ museTableFooters = row : museTableFooters tbl } MuseCaption inlines -> tbl{ museTableCaption = inlines } -tableCell :: PandocMonad m => MuseParser m (F Blocks) -tableCell = try $ fmap B.plain . trimInlinesF . mconcat <$> manyTill inline (lookAhead cellEnd) - where cellEnd = try $ void (many1 spaceChar *> char '|') <|> eol - tableElements :: PandocMonad m => MuseParser m (F [MuseTableElement]) -tableElements = sequence <$> (tableParseElement `sepEndBy1` eol) +tableElements = sequence <$> many1 tableParseElement elementsToTable :: [MuseTableElement] -> MuseTable elementsToTable = foldr museAppendElement emptyTable @@ -710,10 +680,10 @@ tableParseElement = tableParseHeader tableParseRow :: PandocMonad m => Int -- ^ Number of separator characters -> MuseParser m (F [Blocks]) -tableParseRow n = try $ - sequence <$> (tableCell `sepBy2` fieldSep) - where p `sepBy2` sep = (:) <$> p <*> many1 (sep *> p) - fieldSep = many1 spaceChar *> count n (char '|') *> (void (many1 spaceChar) <|> void (lookAhead newline)) +tableParseRow n = try $ sequence <$> tableCells + where tableCells = (:) <$> tableCell sep <*> (tableCells <|> fmap pure (tableCell eol)) + tableCell p = try $ fmap B.plain . trimInlinesF . mconcat <$> manyTill inline' p + sep = try $ many1 spaceChar *> count n (char '|') *> lookAhead (void (many1 spaceChar) <|> void eol) -- | Parse a table header row. tableParseHeader :: PandocMonad m => MuseParser m (F MuseTableElement) @@ -732,7 +702,7 @@ tableParseCaption :: PandocMonad m => MuseParser m (F MuseTableElement) tableParseCaption = try $ fmap MuseCaption . trimInlinesF . mconcat <$ many spaceChar <* string "|+" - <*> many1Till inline (try $ string "+|") + <*> many1Till inline (try $ string "+|" *> eol) -- ** Inline parsers @@ -803,24 +773,15 @@ whitespace = try $ pure B.space <$ skipMany1 spaceChar br :: PandocMonad m => MuseParser m (F Inlines) br = try $ pure B.linebreak <$ string "<br>" -emphasisBetween :: (PandocMonad m, Show a) => MuseParser m a -> MuseParser m (F Inlines) -emphasisBetween c = try $ enclosedInlines c c - --- | Parses material enclosed between start and end parsers. -enclosed :: (Show end, Stream s m Char) => ParserT s st m t -- ^ start parser - -> ParserT s st m end -- ^ end parser - -> ParserT s st m a -- ^ content parser (to be used repeatedly) - -> ParserT s st m [a] -enclosed start end parser = try $ - start *> notFollowedBy spaceChar *> many1Till parser end - -enclosedInlines :: (PandocMonad m, Show a, Show b) +emphasisBetween :: (PandocMonad m, Show a) => MuseParser m a - -> MuseParser m b -> MuseParser m (F Inlines) -enclosedInlines start end = try $ trimInlinesF . mconcat - <$> enclosed (atStart start) end inline - <* notFollowedBy (satisfy isAlphaNum) +emphasisBetween p = try $ trimInlinesF . mconcat + <$ atStart + <* p + <* notFollowedBy spaceChar + <*> many1Till inline p + <* notFollowedBy alphaNum -- | Parse an inline tag, such as @\<em>@ and @\<strong>@. inlineTag :: PandocMonad m @@ -875,8 +836,7 @@ verbatimTag = return . B.text classTag :: PandocMonad m => MuseParser m (F Inlines) classTag = do classes <- maybe [] words . lookup "name" <$> openTag "class" - res <- manyTill inline $ closeTag "class" - return $ B.spanWith ("", classes, []) <$> mconcat res + fmap (B.spanWith ("", classes, [])) . mconcat <$> manyTill inline (closeTag "class") -- | Parse "~~" as nonbreaking space. nbsp :: PandocMonad m => MuseParser m (F Inlines) @@ -884,14 +844,12 @@ nbsp = try $ pure (B.str "\160") <$ string "~~" -- | Parse code markup, indicated by @\'=\'@ characters. code :: PandocMonad m => MuseParser m (F Inlines) -code = try $ do - atStart $ char '=' - contents <- many1Till (noneOf "\n\r" <|> (newline <* notFollowedBy newline)) $ char '=' - guard $ not $ null contents - guard $ head contents `notElem` " \t\n" - guard $ last contents `notElem` " \t\n" - notFollowedBy $ satisfy isAlphaNum - return $ return $ B.code contents +code = try $ fmap pure $ B.code . uncurry (++) + <$ atStart + <* char '=' + <* notFollowedBy (spaceChar <|> newline) + <*> manyUntil (noneOf "\n\r" <|> (newline <* notFollowedBy newline)) (try $ fmap pure $ noneOf " \t\n\r=" <* char '=') + <* notFollowedBy alphaNum -- | Parse @\<code>@ tag. codeTag :: PandocMonad m => MuseParser m (F Inlines) @@ -916,24 +874,24 @@ str :: PandocMonad m => MuseParser m (F Inlines) str = return . B.str <$> many1 alphaNum <* updateLastStrPos symbol :: PandocMonad m => MuseParser m (F Inlines) -symbol = return . B.str <$> count 1 nonspaceChar +symbol = pure . B.str . pure <$> nonspaceChar -- | Parse a link or image. linkOrImage :: PandocMonad m => MuseParser m (F Inlines) linkOrImage = try $ do inLink <- asks museInLink guard $ not inLink - local (\s -> s { museInLink = True }) (explicitLink <|> image <|> link) + local (\s -> s { museInLink = True }) (link "URL:" <|> image <|> link "") linkContent :: PandocMonad m => MuseParser m (F Inlines) linkContent = trimInlinesF . mconcat <$ char '[' <*> manyTill inline (char ']') --- | Parse a link starting with @URL:@ -explicitLink :: PandocMonad m => MuseParser m (F Inlines) -explicitLink = try $ do - string "[[URL:" +-- | Parse a link starting with (possibly null) prefix +link :: PandocMonad m => String -> MuseParser m (F Inlines) +link prefix = try $ do + string $ "[[" ++ prefix url <- manyTill anyChar $ char ']' content <- option (pure $ B.str url) linkContent char ']' @@ -966,11 +924,3 @@ image = try $ do <*> optionMaybe (many1 digit) <* many spaceChar <*> optionMaybe (oneOf "rlf") - -link :: PandocMonad m => MuseParser m (F Inlines) -link = try $ do - string "[[" - url <- manyTill anyChar $ char ']' - content <- optionMaybe linkContent - char ']' - return $ B.link url "" <$> fromMaybe (return $ B.str url) content diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index 28fa7b83e..1938ca171 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -665,11 +665,13 @@ directive' = do optional blanklines let body' = body ++ "\n\n" name = trim $ fromMaybe "" (lookup "name" fields) - imgAttr cl = ("", classes, widthAttr ++ heightAttr) + classes = words $ maybe "" trim (lookup "class" fields) + keyvals = [(k, trim v) | (k, v) <- fields, k /= "name", k /= "class"] + imgAttr cl = ("", classes ++ alignClasses, widthAttr ++ heightAttr) where - classes = words $ maybe "" trim (lookup cl fields) ++ - maybe "" (\x -> "align-" ++ trim x) - (lookup "align" fields) + alignClasses = words $ maybe "" trim (lookup cl fields) ++ + maybe "" (\x -> "align-" ++ trim x) + (lookup "align" fields) scale = case trim <$> lookup "scale" fields of Just v -> case reverse v of '%':vv -> @@ -698,8 +700,9 @@ directive' = do "line-block" -> lineBlockDirective body' "raw" -> return $ B.rawBlock (trim top) (stripTrailingNewlines body) "role" -> addNewRole top $ map (second trim) fields - "container" -> B.divWith (name, "container" : words top, []) <$> - parseFromString' parseBlocks body' + "container" -> B.divWith + (name, "container" : words top ++ classes, []) <$> + parseFromString' parseBlocks body' "replace" -> B.para <$> -- consumed by substKey parseInlineFromString (trim top) "unicode" -> B.para <$> -- consumed by substKey @@ -717,7 +720,7 @@ directive' = do (l:ls) -> B.divWith ("",["admonition-title"],[]) (B.para (B.str (toUpper l : ls))) [] -> mempty - return $ B.divWith ("",[label],[]) (lab <> bod) + return $ B.divWith (name,label:classes,keyvals) (lab <> bod) "sidebar" -> do let subtit = maybe "" trim $ lookup "subtitle" fields tit <- B.para . B.strong <$> parseInlineFromString @@ -725,21 +728,21 @@ directive' = do then "" else (": " ++ subtit)) bod <- parseFromString' parseBlocks body' - return $ B.divWith ("",["sidebar"],[]) $ tit <> bod + return $ B.divWith (name,"sidebar":classes,keyvals) $ tit <> bod "topic" -> do tit <- B.para . B.strong <$> parseInlineFromString top bod <- parseFromString' parseBlocks body' - return $ B.divWith ("",["topic"],[]) $ tit <> bod + return $ B.divWith (name,"topic":classes,keyvals) $ tit <> bod "default-role" -> mempty <$ updateState (\s -> s { stateRstDefaultRole = case trim top of "" -> stateRstDefaultRole def role -> role }) x | x == "code" || x == "code-block" -> - codeblock (words $ fromMaybe [] $ lookup "class" fields) + codeblock name classes (lookup "number-lines" fields) (trim top) body "aafig" -> do - let attribs = ("", ["aafig"], map (second trimr) fields) + let attribs = (name, ["aafig"], map (second trimr) fields) return $ B.codeBlockWith attribs $ stripTrailingNewlines body "math" -> return $ B.para $ mconcat $ map B.displayMath $ toChunks $ top ++ "\n\n" ++ body @@ -758,8 +761,7 @@ directive' = do $ B.imageWith attr src "" alt Nothing -> B.imageWith attr src "" alt "class" -> do - let attrs = ("", splitBy isSpace $ trim top, - map (second trimr) fields) + let attrs = (name, words (trim top), map (second trimr) fields) -- directive content or the first immediately following element children <- case body of "" -> block @@ -769,7 +771,7 @@ directive' = do pos <- getPosition logMessage $ SkippedContent (".. " ++ other) pos bod <- parseFromString' parseBlocks $ top ++ "\n\n" ++ body' - return $ B.divWith ("",[other],[]) bod + return $ B.divWith (name, other:classes, keyvals) bod tableDirective :: PandocMonad m => String -> [(String, String)] -> String -> RSTParser m Blocks @@ -989,10 +991,11 @@ toChunks = dropWhile null then "\\begin{aligned}\n" ++ s ++ "\n\\end{aligned}" else s -codeblock :: [String] -> Maybe String -> String -> String -> RSTParser m Blocks -codeblock classes numberLines lang body = +codeblock :: String -> [String] -> Maybe String -> String -> String + -> RSTParser m Blocks +codeblock ident classes numberLines lang body = return $ B.codeBlockWith attribs $ stripTrailingNewlines body - where attribs = ("", classes', kvs) + where attribs = (ident, classes', kvs) classes' = "sourceCode" : lang : maybe [] (const ["numberLines"]) numberLines ++ classes @@ -1266,7 +1269,9 @@ simpleTableHeader headless = try $ do rawContent <- if headless then return "" else simpleTableSep '=' >> anyLine - dashes <- simpleDashedLines '=' <|> simpleDashedLines '-' + dashes <- if headless + then simpleDashedLines '=' + else simpleDashedLines '=' <|> simpleDashedLines '-' newline let lines' = map snd dashes let indices = scanl (+) 0 lines' diff --git a/src/Text/Pandoc/Readers/Vimwiki.hs b/src/Text/Pandoc/Readers/Vimwiki.hs index 15f0d991f..6fdbcb50e 100644 --- a/src/Text/Pandoc/Readers/Vimwiki.hs +++ b/src/Text/Pandoc/Readers/Vimwiki.hs @@ -85,12 +85,12 @@ import qualified Text.Pandoc.Builder as B (blockQuote, bulletList, code, import Text.Pandoc.Class (PandocMonad (..)) import Text.Pandoc.Definition (Attr, Block (BulletList, OrderedList), Inline (Space), ListNumberDelim (..), - ListNumberStyle (..), Meta, Pandoc (..), + ListNumberStyle (..), Pandoc (..), nullMeta) import Text.Pandoc.Options (ReaderOptions) -import Text.Pandoc.Parsing (F, ParserState, ParserT, blanklines, emailAddress, +import Text.Pandoc.Parsing (ParserState, ParserT, blanklines, emailAddress, many1Till, orderedListMarker, readWithM, - registerHeader, runF, spaceChar, stateMeta', + registerHeader, spaceChar, stateMeta, stateOptions, uri) import Text.Pandoc.Shared (crFilter, splitBy, stringify, stripFirstAndLast) import Text.Parsec.Char (alphaNum, anyChar, char, newline, noneOf, oneOf, space, @@ -126,7 +126,7 @@ parseVimwiki = do spaces eof st <- getState - let meta = runF (stateMeta' st) st + let meta = stateMeta st return $ Pandoc meta (toList bs) -- block parser @@ -444,8 +444,8 @@ ph s = try $ do many spaceChar >>string ('%':s) >> spaceChar contents <- trimInlines . mconcat <$> manyTill inline (lookAhead newline) --use lookAhead because of placeholder in the whitespace parser - let meta' = return $ B.setMeta s contents nullMeta :: F Meta - updateState $ \st -> st { stateMeta' = stateMeta' st <> meta' } + let meta' = B.setMeta s contents nullMeta + updateState $ \st -> st { stateMeta = stateMeta st <> meta' } noHtmlPh :: PandocMonad m => VwParser m () noHtmlPh = try $ diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 9f48080b8..78a5a6a54 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -111,6 +111,7 @@ import qualified Control.Exception as E import Control.Monad (MonadPlus (..), msum, unless) import qualified Control.Monad.State.Strict as S import qualified Data.ByteString.Lazy as BL +import qualified Data.Bifunctor as Bifunctor import Data.Char (isAlpha, isDigit, isLetter, isLower, isSpace, isUpper, toLower) import Data.Data (Data, Typeable) @@ -637,8 +638,7 @@ inDirectory path action = E.bracket -- mapLeft :: (a -> b) -> Either a c -> Either b c -mapLeft f (Left x) = Left (f x) -mapLeft _ (Right x) = Right x +mapLeft = Bifunctor.first -- | Remove intermediate "." and ".." directories from a path. -- diff --git a/src/Text/Pandoc/Writers/Custom.hs b/src/Text/Pandoc/Writers/Custom.hs index 37fec9f0f..3ec8781be 100644 --- a/src/Text/Pandoc/Writers/Custom.hs +++ b/src/Text/Pandoc/Writers/Custom.hs @@ -36,18 +36,21 @@ import Control.Arrow ((***)) import Control.Exception import Control.Monad (when) import Data.Char (toLower) +import Data.Data (Data) import Data.List (intersperse) import qualified Data.Map as M import Data.Text (Text, pack) import Data.Typeable -import Foreign.Lua (Lua, Pushable) +import Foreign.Lua (Lua, Peekable, Pushable) +import Foreign.Lua.Userdata ( ensureUserdataMetatable, pushAnyWithMetatable + , metatableName) import Text.Pandoc.Class (PandocIO) import Text.Pandoc.Definition import Text.Pandoc.Error import Text.Pandoc.Lua.Init (LuaException (LuaException), runPandocLua, registerScriptPath) import Text.Pandoc.Lua.StackInstances () -import Text.Pandoc.Lua.Util (addField, dofileWithTraceback) +import Text.Pandoc.Lua.Util (addField, addFunction, dofileWithTraceback) import Text.Pandoc.Options import Text.Pandoc.Templates import qualified Text.Pandoc.UTF8 as UTF8 @@ -106,17 +109,37 @@ data PandocLuaException = PandocLuaException String instance Exception PandocLuaException +-- | Readonly and lazy pandoc objects. +newtype LazyPandoc = LazyPandoc Pandoc + deriving (Data) + +instance Pushable LazyPandoc where + push lazyDoc = pushAnyWithMetatable pushPandocMetatable lazyDoc + where + pushPandocMetatable = ensureUserdataMetatable (metatableName lazyDoc) $ + addFunction "__index" indexLazyPandoc + +instance Peekable LazyPandoc where + peek = Lua.peekAny + +indexLazyPandoc :: LazyPandoc -> String -> Lua Lua.NumResults +indexLazyPandoc (LazyPandoc (Pandoc meta blks)) field = 1 <$ + case field of + "blocks" -> Lua.push blks + "meta" -> Lua.push meta + _ -> Lua.pushnil + -- | Convert Pandoc to custom markup. writeCustom :: FilePath -> WriterOptions -> Pandoc -> PandocIO Text writeCustom luaFile opts doc@(Pandoc meta _) = do res <- runPandocLua $ do + Lua.push (LazyPandoc doc) *> Lua.setglobal "PANDOC_DOCUMENT" registerScriptPath luaFile stat <- dofileWithTraceback luaFile -- check for error in lua script (later we'll change the return type -- to handle this more gracefully): when (stat /= Lua.OK) $ Lua.tostring' (-1) >>= throw . PandocLuaException . UTF8.toString - -- TODO - call hierarchicalize, so we have that info rendered <- docToCustom opts doc context <- metaToJSON opts blockListToCustom diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 524d20fd1..d80b4a7bc 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -349,6 +349,8 @@ writeDocx opts doc@(Pandoc meta _) = do "application/vnd.openxmlformats-officedocument.extended-properties+xml") ,("/docProps/core.xml", "application/vnd.openxmlformats-package.core-properties+xml") + ,("/docProps/custom.xml", + "application/vnd.openxmlformats-officedocument.custom-properties+xml") ,("/word/styles.xml", "application/vnd.openxmlformats-officedocument.wordprocessingml.styles+xml") ,("/word/document.xml", @@ -507,6 +509,19 @@ writeDocx opts doc@(Pandoc meta _) = do ]) (formatTime defaultTimeLocale "%FT%XZ" utctime) let docPropsEntry = toEntry docPropsPath epochtime $ renderXml docProps + let customProperties :: [(String, String)] + customProperties = [] -- FIXME + let mkCustomProp (k, v) pid = mknode "property" + [("fmtid","{D5CDD505-2E9C-101B-9397-08002B2CF9AE}") + ,("pid", show pid) + ,("name", k)] $ mknode "vt:lpwstr" [] v + let customPropsPath = "docProps/custom.xml" + let customProps = mknode "Properties" + [("xmlns","http://schemas.openxmlformats.org/officeDocument/2006/custom-properties") + ,("xmlns:vt","http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes") + ] $ zipWith mkCustomProp customProperties [(2 :: Int)..] + let customPropsEntry = toEntry customPropsPath epochtime $ renderXml customProps + let relsPath = "_rels/.rels" let rels = mknode "Relationships" [("xmlns", "http://schemas.openxmlformats.org/package/2006/relationships")] $ map (\attrs -> mknode "Relationship" attrs ()) @@ -519,6 +534,9 @@ writeDocx opts doc@(Pandoc meta _) = do , [("Id","rId3") ,("Type","http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties") ,("Target","docProps/core.xml")] + , [("Id","rId5") + ,("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/custom-properties") + ,("Target","docProps/custom.xml")] ] let relsEntry = toEntry relsPath epochtime $ renderXml rels @@ -558,7 +576,8 @@ writeDocx opts doc@(Pandoc meta _) = do contentTypesEntry : relsEntry : contentEntry : relEntry : footnoteRelEntry : numEntry : styleEntry : footnotesEntry : commentsEntry : - docPropsEntry : docPropsAppEntry : themeEntry : + docPropsEntry : docPropsAppEntry : customPropsEntry : + themeEntry : fontTableEntry : settingsEntry : webSettingsEntry : imageEntries ++ headerFooterEntries ++ miscRelEntries ++ otherMediaEntries @@ -945,8 +964,13 @@ blockToOpenXML' opts (Table caption aligns widths headers rows) = do else withParaProp (pCustomStyle "TableCaption") $ blockToOpenXML opts (Para caption) let alignmentFor al = mknode "w:jc" [("w:val",alignmentToString al)] () - let cellToOpenXML (al, cell) = withParaProp (alignmentFor al) - $ blocksToOpenXML opts cell + -- Table cells require a <w:p> element, even an empty one! + -- Not in the spec but in Word 2007, 2010. See #4953. + let cellToOpenXML (al, cell) = do + es <- withParaProp (alignmentFor al) $ blocksToOpenXML opts cell + if any (\e -> qName (elName e) == "p") es + then return es + else return $ es ++ [mknode "w:p" [] ()] headers' <- mapM cellToOpenXML $ zip aligns headers rows' <- mapM (mapM cellToOpenXML . zip aligns) rows let borderProps = mknode "w:tcPr" [] diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index c1b5d0fa4..11d58b90a 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -655,7 +655,10 @@ blockToLaTeX (CodeBlock (identifier,classes,keyvalAttr) str) = do [ (if key == "startFrom" then "firstnumber" else key) ++ "=" ++ mbBraced attr | - (key,attr) <- keyvalAttr ] ++ + (key,attr) <- keyvalAttr, + key `notElem` ["exports", "tangle", "results"] + -- see #4889 + ] ++ (if identifier == "" then [] else [ "label=" ++ ref ]) @@ -1366,19 +1369,27 @@ citationsToBiblatex AuthorInText -> "textcite" NormalCitation -> "autocite" -citationsToBiblatex (c:cs) = do - args <- mapM convertOne (c:cs) - return $ text cmd <> foldl' (<>) empty args - where - cmd = case citationMode c of - SuppressAuthor -> "\\autocites*" - AuthorInText -> "\\textcites" - NormalCitation -> "\\autocites" - convertOne Citation { citationId = k - , citationPrefix = p - , citationSuffix = s - } - = citeArguments p s k +citationsToBiblatex (c:cs) + | all (\cit -> null (citationPrefix cit) && null (citationSuffix cit)) (c:cs) + = do + let cmd = case citationMode c of + SuppressAuthor -> "\\autocite*" + AuthorInText -> "\\textcite" + NormalCitation -> "\\autocite" + return $ text cmd <> + braces (text (intercalate "," (map citationId (c:cs)))) + | otherwise = do + let cmd = case citationMode c of + SuppressAuthor -> "\\autocites*" + AuthorInText -> "\\textcites" + NormalCitation -> "\\autocites" + let convertOne Citation { citationId = k + , citationPrefix = p + , citationSuffix = s + } + = citeArguments p s k + args <- mapM convertOne (c:cs) + return $ text cmd <> foldl' (<>) empty args citationsToBiblatex _ = return empty diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index 9a4acb59d..ad8d5c483 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -847,6 +847,13 @@ blockListToMarkdown opts blocks = do Plain ils : fixBlocks bs fixBlocks (Plain ils : bs) = Para ils : fixBlocks bs + fixBlocks (r@(RawBlock f raw) : b : bs) + | not (null raw) + , last raw /= '\n' = + case b of + Plain{} -> r : fixBlocks (b:bs) + RawBlock{} -> r : fixBlocks (b:bs) + _ -> RawBlock f (raw ++ "\n") : fixBlocks (b:bs) -- #4629 fixBlocks (x : xs) = x : fixBlocks xs fixBlocks [] = [] isListBlock (BulletList _) = True diff --git a/src/Text/Pandoc/Writers/Muse.hs b/src/Text/Pandoc/Writers/Muse.hs index 18aebc364..1374cdde3 100644 --- a/src/Text/Pandoc/Writers/Muse.hs +++ b/src/Text/Pandoc/Writers/Muse.hs @@ -250,11 +250,11 @@ blockToMuse (Header level (ident,_,_) inlines) = do let attr' = if null ident || (isEnabled Ext_auto_identifiers opts && ident == autoId) then empty else "#" <> text ident <> cr - let header' = if topLevel then (text $ replicate level '*') <> space else mempty + let header' = if topLevel then text (replicate level '*') <> space else mempty return $ blankline <> attr' $$ nowrap (header' <> contents) <> blankline -- https://www.gnu.org/software/emacs-muse/manual/muse.html#Horizontal-Rules-and-Anchors blockToMuse HorizontalRule = return $ blankline $$ "----" $$ blankline -blockToMuse (Table caption _ _ headers rows) = do +blockToMuse (Table caption _ _ headers rows) = do caption' <- inlineListToMuse caption headers' <- mapM blockListToMuse headers rows' <- mapM (mapM blockListToMuse) rows @@ -294,10 +294,10 @@ noteToMuse :: PandocMonad m -> Muse m Doc noteToMuse num note = hang (length marker) (text marker) <$> - (local (\env -> env { envInsideBlock = True + local (\env -> env { envInsideBlock = True , envInlineStart = True , envAfterSpace = True - }) $ blockListToMuse note) + }) (blockListToMuse note) where marker = "[" ++ show num ++ "] " diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs index 1c9481630..ac2ed5b4c 100644 --- a/src/Text/Pandoc/Writers/ODT.hs +++ b/src/Text/Pandoc/Writers/ODT.hs @@ -36,8 +36,9 @@ import Control.Monad.Except (catchError) import Control.Monad.State.Strict import qualified Data.ByteString.Lazy as B import Data.Generics (everywhere', mkT) -import Data.List (isPrefixOf) +import Data.List (isPrefixOf, intercalate) import Data.Maybe (fromMaybe) +import qualified Data.Map as Map import qualified Data.Text.Lazy as TL import System.FilePath (takeDirectory, takeExtension, (<.>)) import Text.Pandoc.BCP47 (Lang (..), getLang, renderLang) @@ -49,7 +50,8 @@ import Text.Pandoc.Logging import Text.Pandoc.MIME (extensionFromMimeType, getMimeType) import Text.Pandoc.Options (WrapOption (..), WriterOptions (..)) import Text.Pandoc.Pretty -import Text.Pandoc.Shared (stringify) +import Text.Pandoc.Shared (stringify, normalizeDate) +import Text.Pandoc.Writers.Shared (lookupMetaString) import Text.Pandoc.UTF8 (fromStringLazy, fromTextLazy, toStringLazy) import Text.Pandoc.Walk import Text.Pandoc.Writers.OpenDocument (writeOpenDocument) @@ -81,6 +83,7 @@ pandocToODT :: PandocMonad m -> O m B.ByteString pandocToODT opts doc@(Pandoc meta _) = do let title = docTitle meta + let authors = docAuthors meta lang <- toLang (getLang opts meta) refArchive <- case writerReferenceDoc opts of @@ -123,6 +126,15 @@ pandocToODT opts doc@(Pandoc meta _) = do ) ) let archive' = addEntryToArchive manifestEntry archive + let userDefinedMetaFields = [k | k <- Map.keys (unMeta meta) + , k `notElem` ["title", "lang", "author", "date"]] + let escapedText = text . escapeStringForXML + let userDefinedMeta = + map (\k -> inTags False "meta:user-defined" + [ ("meta_name", escapeStringForXML k) + ,("meta-value-type", "string") + ] (escapedText $ lookupMetaString k meta)) userDefinedMetaFields + let metaTag metafield = inTagsSimple metafield . escapedText let metaEntry = toEntry "meta.xml" epochtime $ fromStringLazy $ render Nothing $ text "<?xml version=\"1.0\" encoding=\"utf-8\"?>" @@ -134,14 +146,21 @@ pandocToODT opts doc@(Pandoc meta _) = do ,("xmlns:meta","urn:oasis:names:tc:opendocument:xmlns:meta:1.0") ,("xmlns:ooo","http://openoffice.org/2004/office") ,("xmlns:grddl","http://www.w3.org/2003/g/data-view#") - ,("office:version","1.2")] ( inTagsSimple "office:meta" $ - ( inTagsSimple "dc:title" - (text $ escapeStringForXML (stringify title)) + ,("office:version","1.2")] ( inTags True "office:meta" [] $ + ( metaTag "dc:title" (stringify title) $$ case lang of - Just l -> inTagsSimple "dc:language" - (text (escapeStringForXML (renderLang l))) + Just l -> metaTag "dc:language" (renderLang l) Nothing -> empty + $$ + metaTag "dc:creator" + (intercalate "; " (map stringify authors)) + $$ + maybe mempty + (metaTag "dc:date") + (normalizeDate (lookupMetaString "date" meta)) + $$ + vcat userDefinedMeta ) ) ) diff --git a/src/Text/Pandoc/Writers/Shared.hs b/src/Text/Pandoc/Writers/Shared.hs index ed2c46d7b..a7bf30aaa 100644 --- a/src/Text/Pandoc/Writers/Shared.hs +++ b/src/Text/Pandoc/Writers/Shared.hs @@ -41,7 +41,6 @@ module Text.Pandoc.Writers.Shared ( , isDisplayMath , fixDisplayMath , unsmartify - , hasSimpleCells , gridTable , lookupMetaBool , lookupMetaBlocks @@ -55,7 +54,6 @@ module Text.Pandoc.Writers.Shared ( where import Prelude import Control.Monad (zipWithM) -import Data.Monoid (Any (..)) import Data.Aeson (FromJSON (..), Result (..), ToJSON (..), Value (Object), encode, fromJSON) import Data.Char (chr, ord, isAscii, isSpace) @@ -72,7 +70,6 @@ import Text.Pandoc.Pretty import Text.Pandoc.Shared (stringify) import Text.Pandoc.UTF8 (toStringLazy) import Text.Pandoc.XML (escapeStringForXML) -import Text.Pandoc.Walk (query) import Text.Printf (printf) -- | Create JSON value for template from a 'Meta' and an association list @@ -246,21 +243,6 @@ unsmartify opts ('\8216':xs) = '\'' : unsmartify opts xs unsmartify opts (x:xs) = x : unsmartify opts xs unsmartify _ [] = [] --- | True if block is a table that can be represented with --- one line per row. -hasSimpleCells :: Block -> Bool -hasSimpleCells (Table _caption _aligns _widths headers rows) = - all isSimpleCell (concat (headers:rows)) - where - isLineBreak LineBreak = Any True - isLineBreak _ = Any False - hasLineBreak = getAny . query isLineBreak - isSimpleCell [Plain ils] = not (hasLineBreak ils) - isSimpleCell [Para ils ] = not (hasLineBreak ils) - isSimpleCell [] = True - isSimpleCell _ = False -hasSimpleCells _ = False - gridTable :: Monad m => WriterOptions -> (WriterOptions -> [Block] -> m Doc) diff --git a/stack.lts10.yaml b/stack.lts10.yaml index 76fc3b921..3a3bb0ea3 100644 --- a/stack.lts10.yaml +++ b/stack.lts10.yaml @@ -11,9 +11,7 @@ flags: packages: - '.' extra-deps: -- pandoc-citeproc-0.14.4 -- skylighting-0.7.2 -- skylighting-core-0.7.2 +- pandoc-citeproc-0.14.7 - ansi-terminal-0.8.0.2 - tasty-1.0.1.1 - test-framework-0.8.2.0 @@ -26,6 +24,8 @@ extra-deps: - hs-bibutils-6.6.0.0 - hslua-1.0.1 - hslua-module-text-0.2.0 +- skylighting-0.7.4 +- skylighting-core-0.7.4 ghc-options: "$locals": -fhide-source-paths -XNoImplicitPrelude resolver: lts-10.10 diff --git a/stack.lts11.yaml b/stack.lts11.yaml index afacb655f..1325a2b8e 100644 --- a/stack.lts11.yaml +++ b/stack.lts11.yaml @@ -11,7 +11,7 @@ flags: packages: - '.' extra-deps: -- pandoc-citeproc-0.14.4 +- pandoc-citeproc-0.14.7 - skylighting-0.7.2 - skylighting-core-0.7.2 - pandoc-types-1.17.5.1 @@ -22,6 +22,8 @@ extra-deps: - yaml-0.9.0 - hslua-1.0.1 - hslua-module-text-0.2.0 +- skylighting-0.7.4 +- skylighting-core-0.7.4 ghc-options: "$locals": -fhide-source-paths -XNoImplicitPrelude resolver: lts-11.17 diff --git a/stack.lts9.yaml b/stack.lts9.yaml index b12cd57dc..e65a3a618 100644 --- a/stack.lts9.yaml +++ b/stack.lts9.yaml @@ -11,7 +11,7 @@ flags: packages: - '.' extra-deps: -- pandoc-citeproc-0.14.4 +- pandoc-citeproc-0.14.7 - hslua-1.0.1 - hslua-module-text-0.2.0 - ansi-terminal-0.8.0.2 @@ -30,8 +30,8 @@ extra-deps: - Cabal-2.4.0.0 - dlist-0.8.0.4 - parsec-3.1.13.0 -- skylighting-core-0.7.3 -- skylighting-0.7.3 +- skylighting-0.7.4 +- skylighting-core-0.7.4 - yaml-0.9.0 - hslua-1.0.0 - hslua-module-text-0.2.0 diff --git a/stack.yaml b/stack.yaml index 986cae642..78b1f5175 100644 --- a/stack.yaml +++ b/stack.yaml @@ -19,13 +19,15 @@ packages: - foundation extra-dep: true extra-deps: -- pandoc-citeproc-0.14.4 +- pandoc-citeproc-0.14.7 - haddock-library-1.6.0 - HsYAML-0.1.1.1 - texmath-0.11.1 - yaml-0.9.0 - hslua-1.0.1 - hslua-module-text-0.2.0 +- skylighting-0.7.4 +- skylighting-core-0.7.4 ghc-options: "$locals": -fhide-source-paths -XNoImplicitPrelude resolver: lts-12.6 diff --git a/test/Tests/Readers/HTML.hs b/test/Tests/Readers/HTML.hs index eedb99029..514b6bb7b 100644 --- a/test/Tests/Readers/HTML.hs +++ b/test/Tests/Readers/HTML.hs @@ -7,6 +7,7 @@ import Data.Text (Text) import qualified Data.Text as T import Test.Tasty import Test.Tasty.QuickCheck +import Test.Tasty.Options (IsOption(defaultValue)) import Tests.Helpers import Text.Pandoc import Text.Pandoc.Arbitrary () @@ -77,5 +78,9 @@ tests = [ testGroup "base tag" , test htmlNativeDivs "<main> followed by text" $ "<main>main content</main>non-main content" =?> doc (divWith ("", [], [("role", "main")]) (plain (text "main content")) <> plain (text "non-main content")) ] - , testProperty "Round trip" (withMaxSuccess 25 roundTrip) + , askOption $ \(QuickCheckTests numtests) -> + testProperty "Round trip" $ + withMaxSuccess (if QuickCheckTests numtests == defaultValue + then 25 + else numtests) roundTrip ] diff --git a/test/Tests/Readers/Muse.hs b/test/Tests/Readers/Muse.hs index 958a74915..bd63236bd 100644 --- a/test/Tests/Readers/Muse.hs +++ b/test/Tests/Readers/Muse.hs @@ -8,6 +8,7 @@ import Data.Text (Text) import qualified Data.Text as T import Test.Tasty import Test.Tasty.QuickCheck +import Test.Tasty.Options (IsOption(defaultValue)) import Tests.Helpers import Text.Pandoc import Text.Pandoc.Arbitrary () @@ -156,6 +157,8 @@ tests = , "One character code" =: "=c=" =?> para (code "c") + , "Code with equal sign" =: "=foo = bar=" =?> para (code "foo = bar") + , "Three = characters is not a code" =: "===" =?> para "===" , "Multiline code markup" =: @@ -260,7 +263,11 @@ tests = ] , testGroup "Blocks" - [ testProperty "Round trip" (withMaxSuccess 25 roundTrip) + [ askOption $ \(QuickCheckTests numtests) -> + testProperty "Round trip" $ + withMaxSuccess (if QuickCheckTests numtests == defaultValue + then 25 + else numtests) roundTrip , "Block elements end paragraphs" =: T.unlines [ "First paragraph" , "----" @@ -891,6 +898,17 @@ tests = [[plain "", plain "Foo"], [plain "", plain ""], [plain "bar", plain ""]] + , "Empty cell in the middle" =: + T.unlines + [ " 1 | 2 | 3" + , " 4 | | 6" + , " 7 | 8 | 9" + ] =?> + table mempty [(AlignDefault, 0.0), (AlignDefault, 0.0), (AlignDefault, 0.0)] + [] + [[plain "1", plain "2", plain "3"], + [plain "4", mempty, plain "6"], + [plain "7", plain "8", plain "9"]] ] , testGroup "Lists" [ "Bullet list" =: diff --git a/test/command/3123.md b/test/command/3123.md new file mode 100644 index 000000000..b71241cdf --- /dev/null +++ b/test/command/3123.md @@ -0,0 +1,13 @@ +``` +% pandoc -f markdown -t native +<?php echo "1" ; ?> +^D +[RawBlock (Format "html") "<?php echo \"1\" ; ?>"] +``` + +``` +% pandoc -f markdown -t native +a<?php echo "1" ; ?> +^D +[Para [Str "a",RawInline (Format "html") "<?php echo \"1\" ; ?>"]] +``` diff --git a/test/command/3494.md b/test/command/3494.md index 249973fb3..7c480fde6 100644 --- a/test/command/3494.md +++ b/test/command/3494.md @@ -25,7 +25,7 @@ <td style="text-align: left;">thank you</td> </tr> <tr class="odd"> -<td style="text-align: right;"><p><em>blah</em></p></td> +<td style="text-align: right;"><em>blah</em></td> <td style="text-align: left;"><em>blah</em></td> <td style="text-align: left;"><em>blah</em></td> </tr> diff --git a/test/command/3577.md b/test/command/3577.md index ca9dba97c..dc88937e9 100644 --- a/test/command/3577.md +++ b/test/command/3577.md @@ -15,11 +15,9 @@ \caption{Subfigure with Subfloat} \end{figure} ^D - <figure> <img src="img1.jpg" alt="Caption 1" /><figcaption>Caption 1</figcaption> </figure> - <figure> <img src="img2.jpg" alt="Caption 2" /><figcaption>Caption 2</figcaption> </figure> diff --git a/test/command/3983.md b/test/command/3983.md index 7eaeb99f0..5d83941e7 100644 --- a/test/command/3983.md +++ b/test/command/3983.md @@ -1,5 +1,5 @@ ``` -pandoc -f latex -t native +pandoc -f latex+raw_tex -t native \def\filename@area{foo:bar:baz} \makeatletter \graphicspath\expandafter{\expandafter{\filename@area}}% @@ -10,7 +10,7 @@ pandoc -f latex -t native ``` ``` -pandoc -f latex -t native +pandoc -f latex+raw_tex -t native \makeatletter \newcommand\urlfootnote@[1]{\footnote{\url@{#1}}} \DeclareRobustCommand{\urlfootnote}{\hyper@normalise\urlfootnote@} @@ -21,7 +21,7 @@ pandoc -f latex -t native ``` ``` -pandoc -f latex -t native +pandoc -f latex+raw_tex -t native \def\foo{bar} \expandafter\bam\foo ^D diff --git a/test/command/4382.md b/test/command/4382.md new file mode 100644 index 000000000..4a840489f --- /dev/null +++ b/test/command/4382.md @@ -0,0 +1,10 @@ +``` +% pandoc -f rst -t native +- + +===== +^D +[BulletList + [[]] +,HorizontalRule] +``` diff --git a/test/command/4527.md b/test/command/4527.md index 984333559..2f54fca60 100644 --- a/test/command/4527.md +++ b/test/command/4527.md @@ -1,7 +1,7 @@ # Raw TeX blocks in CommonMark with and without raw_tex ``` -% pandoc -f latex -t commonmark-raw_tex +% pandoc -f latex+raw_tex -t commonmark-raw_tex \someunknowncommand Hello. @@ -10,7 +10,7 @@ Hello. ``` ``` -% pandoc -f latex -t commonmark+raw_tex +% pandoc -f latex+raw_tex -t commonmark+raw_tex \someunknowncommand Hello. @@ -19,3 +19,18 @@ Hello. Hello. ``` + +``` +% pandoc -f latex -t native +\maketitle +^D +[] +``` + +``` +% pandoc -f latex -t rst +\maketitle +Hello. +^D +Hello. +``` diff --git a/test/command/4635.md b/test/command/4635.md new file mode 100644 index 000000000..320b83956 --- /dev/null +++ b/test/command/4635.md @@ -0,0 +1,31 @@ +``` +% pandoc -f markdown -t native +(cf. +foo) +^D +[Para [Str "(cf.",SoftBreak,Str "foo)"]] +``` + +``` +% pandoc -f markdown -t native +a (cf. +foo) +^D +[Para [Str "a",Space,Str "(cf.",SoftBreak,Str "foo)"]] +``` + +``` +% pandoc -f markdown -t native +cf. +foo +^D +[Para [Str "cf.\160foo"]] +``` + +``` +% pandoc -f markdown -t native +a cf. +foo +^D +[Para [Str "a",SoftBreak,Str "cf.\160foo"]] +``` diff --git a/test/command/4715.md b/test/command/4715.md new file mode 100644 index 000000000..50873c2b5 --- /dev/null +++ b/test/command/4715.md @@ -0,0 +1,16 @@ +``` +% pandoc -f rst -t native +.. toctree:: + :name: tree1 + :class: foo bar + :caption: Indice dei contenuti + :numbered: + :maxdepth: 3 + + premessa.rst + acquisizione-software.rst + riuso-software.rst +^D +[Div ("tree1",["toctree","foo","bar"],[("caption","Indice dei contenuti"),("numbered",""),("maxdepth","3")]) + [Para [Str "premessa.rst",SoftBreak,Str "acquisizione-software.rst",SoftBreak,Str "riuso-software.rst"]]] + ``` diff --git a/test/command/4960.md b/test/command/4960.md new file mode 100644 index 000000000..7253b533a --- /dev/null +++ b/test/command/4960.md @@ -0,0 +1,22 @@ +``` +% pandoc -t latex --biblatex +[@a1;@a2;@a3] +^D +\autocite{a1,a2,a3} +``` + +``` +% pandoc -t latex --biblatex +@a1 [@a2;@a3] +^D +\textcite{a1,a2,a3} +``` + +``` +% pandoc -t latex --biblatex +[@a1, blah; @a2; see @a3] +^D +\autocites[blah]{a1}{a2}[see][]{a3} +``` + + diff --git a/test/command/ifstrequal.md b/test/command/ifstrequal.md index 4ad04d2e1..24ed7ef54 100644 --- a/test/command/ifstrequal.md +++ b/test/command/ifstrequal.md @@ -5,6 +5,5 @@ \h{a} \h{b} ^D -[Para [Emph [Str "no"]] -,Para [Str "\225",SoftBreak,Str "b"]] +[Para [Emph [Str "no"],SoftBreak,Str "\225",SoftBreak,Str "b"]] ``` diff --git a/test/command/macros.md b/test/command/macros.md index d091c2191..0c91944a1 100644 --- a/test/command/macros.md +++ b/test/command/macros.md @@ -4,6 +4,7 @@ $\my+\my$ ^D \newcommand{\my}{\phi} + $\phi+\phi$ ``` @@ -13,6 +14,7 @@ $\phi+\phi$ $\my+\my$ ^D \newcommand{\my}{\phi} + $\my+\my$ ``` @@ -75,6 +77,7 @@ x &= y\\\end{aligned}\] \end{equation} ^D \newcommand{\my}{\phi} + \begin{equation} \phi+\phi \end{equation} @@ -88,6 +91,7 @@ x &= y\\\end{aligned}\] \end{equation} ^D \newcommand{\my}{\phi} + \begin{equation} \my+\my \end{equation} @@ -101,3 +105,21 @@ x &= y\\\end{aligned}\] \newcommand{\my}{\emph{a}} \emph{a} ``` + +<https://tex.stackexchange.com/questions/258/what-is-the-difference-between-let-and-def> + +``` +% pandoc -f latex -t plain +\def\bar{hello} +\let\fooi\bar +\def\fooii{\bar} +\fooi +\fooii + +\def\bar{goodbye} +\fooi +\fooii +^D +hello+hello + +hello+goodbye +``` + diff --git a/test/docx/golden/block_quotes.docx b/test/docx/golden/block_quotes.docx Binary files differindex 28d6f035e..d118a6fb0 100644 --- a/test/docx/golden/block_quotes.docx +++ b/test/docx/golden/block_quotes.docx diff --git a/test/docx/golden/codeblock.docx b/test/docx/golden/codeblock.docx Binary files differindex af85598dc..7068893c1 100644 --- a/test/docx/golden/codeblock.docx +++ b/test/docx/golden/codeblock.docx diff --git a/test/docx/golden/comments.docx b/test/docx/golden/comments.docx Binary files differindex 33831dc06..2cdf4c210 100644 --- a/test/docx/golden/comments.docx +++ b/test/docx/golden/comments.docx diff --git a/test/docx/golden/custom_style_no_reference.docx b/test/docx/golden/custom_style_no_reference.docx Binary files differindex 78f56893c..f7e332963 100644 --- a/test/docx/golden/custom_style_no_reference.docx +++ b/test/docx/golden/custom_style_no_reference.docx diff --git a/test/docx/golden/custom_style_reference.docx b/test/docx/golden/custom_style_reference.docx Binary files differindex dfc2c960b..44900181e 100644 --- a/test/docx/golden/custom_style_reference.docx +++ b/test/docx/golden/custom_style_reference.docx diff --git a/test/docx/golden/definition_list.docx b/test/docx/golden/definition_list.docx Binary files differindex c3f076387..b57686e7d 100644 --- a/test/docx/golden/definition_list.docx +++ b/test/docx/golden/definition_list.docx diff --git a/test/docx/golden/headers.docx b/test/docx/golden/headers.docx Binary files differindex c2b6206a3..a51443929 100644 --- a/test/docx/golden/headers.docx +++ b/test/docx/golden/headers.docx diff --git a/test/docx/golden/image.docx b/test/docx/golden/image.docx Binary files differindex 95a28a098..e1f043ca3 100644 --- a/test/docx/golden/image.docx +++ b/test/docx/golden/image.docx diff --git a/test/docx/golden/inline_code.docx b/test/docx/golden/inline_code.docx Binary files differindex 1d415e411..46a3a6172 100644 --- a/test/docx/golden/inline_code.docx +++ b/test/docx/golden/inline_code.docx diff --git a/test/docx/golden/inline_formatting.docx b/test/docx/golden/inline_formatting.docx Binary files differindex 9e07bd25d..c214c7eda 100644 --- a/test/docx/golden/inline_formatting.docx +++ b/test/docx/golden/inline_formatting.docx diff --git a/test/docx/golden/inline_images.docx b/test/docx/golden/inline_images.docx Binary files differindex 62c5943ba..6ae175e4f 100644 --- a/test/docx/golden/inline_images.docx +++ b/test/docx/golden/inline_images.docx diff --git a/test/docx/golden/link_in_notes.docx b/test/docx/golden/link_in_notes.docx Binary files differindex c86f9aecd..7376966b3 100644 --- a/test/docx/golden/link_in_notes.docx +++ b/test/docx/golden/link_in_notes.docx diff --git a/test/docx/golden/links.docx b/test/docx/golden/links.docx Binary files differindex 652a93569..d5839c517 100644 --- a/test/docx/golden/links.docx +++ b/test/docx/golden/links.docx diff --git a/test/docx/golden/lists.docx b/test/docx/golden/lists.docx Binary files differindex 5e900feb1..bcc5e706d 100644 --- a/test/docx/golden/lists.docx +++ b/test/docx/golden/lists.docx diff --git a/test/docx/golden/lists_continuing.docx b/test/docx/golden/lists_continuing.docx Binary files differindex 278edaa99..bd35f2887 100644 --- a/test/docx/golden/lists_continuing.docx +++ b/test/docx/golden/lists_continuing.docx diff --git a/test/docx/golden/lists_restarting.docx b/test/docx/golden/lists_restarting.docx Binary files differindex 112b824b5..f693fca6f 100644 --- a/test/docx/golden/lists_restarting.docx +++ b/test/docx/golden/lists_restarting.docx diff --git a/test/docx/golden/nested_anchors_in_header.docx b/test/docx/golden/nested_anchors_in_header.docx Binary files differindex c2a10b828..20f83b3ca 100644 --- a/test/docx/golden/nested_anchors_in_header.docx +++ b/test/docx/golden/nested_anchors_in_header.docx diff --git a/test/docx/golden/notes.docx b/test/docx/golden/notes.docx Binary files differindex c6093c18a..197ccff94 100644 --- a/test/docx/golden/notes.docx +++ b/test/docx/golden/notes.docx diff --git a/test/docx/golden/table_one_row.docx b/test/docx/golden/table_one_row.docx Binary files differindex 34de65e2e..ace98df83 100644 --- a/test/docx/golden/table_one_row.docx +++ b/test/docx/golden/table_one_row.docx diff --git a/test/docx/golden/table_with_list_cell.docx b/test/docx/golden/table_with_list_cell.docx Binary files differindex c27f99736..fef0b16f5 100644 --- a/test/docx/golden/table_with_list_cell.docx +++ b/test/docx/golden/table_with_list_cell.docx diff --git a/test/docx/golden/tables.docx b/test/docx/golden/tables.docx Binary files differindex 4fcdd73c3..ec73cfeef 100644 --- a/test/docx/golden/tables.docx +++ b/test/docx/golden/tables.docx diff --git a/test/docx/golden/track_changes_deletion.docx b/test/docx/golden/track_changes_deletion.docx Binary files differindex 7b404dba1..dab2c3170 100644 --- a/test/docx/golden/track_changes_deletion.docx +++ b/test/docx/golden/track_changes_deletion.docx diff --git a/test/docx/golden/track_changes_insertion.docx b/test/docx/golden/track_changes_insertion.docx Binary files differindex 500a7c239..7b5af8ed5 100644 --- a/test/docx/golden/track_changes_insertion.docx +++ b/test/docx/golden/track_changes_insertion.docx diff --git a/test/docx/golden/track_changes_move.docx b/test/docx/golden/track_changes_move.docx Binary files differindex 05705c040..666cd85eb 100644 --- a/test/docx/golden/track_changes_move.docx +++ b/test/docx/golden/track_changes_move.docx diff --git a/test/docx/golden/unicode.docx b/test/docx/golden/unicode.docx Binary files differindex c1626874d..ef9480059 100644 --- a/test/docx/golden/unicode.docx +++ b/test/docx/golden/unicode.docx diff --git a/test/docx/golden/verbatim_subsuper.docx b/test/docx/golden/verbatim_subsuper.docx Binary files differindex d2ada67fa..dc9453430 100644 --- a/test/docx/golden/verbatim_subsuper.docx +++ b/test/docx/golden/verbatim_subsuper.docx diff --git a/test/lhs-test.html b/test/lhs-test.html index 5fce225df..3a3247982 100644 --- a/test/lhs-test.html +++ b/test/lhs-test.html @@ -29,7 +29,7 @@ a.sourceLine { text-indent: -1em; padding-left: 1em; } pre.numberSource a.sourceLine { position: relative; left: -4em; } pre.numberSource a.sourceLine::before - { content: attr(data-line-number); + { content: attr(title); position: relative; left: -1em; text-align: right; vertical-align: baseline; border: none; pointer-events: all; display: inline-block; -webkit-touch-callout: none; -webkit-user-select: none; @@ -82,9 +82,9 @@ code span.wa { color: #60a0b0; font-weight: bold; font-style: italic; } /* Warni <h1 id="lhs-test">lhs test</h1> <p><code>unsplit</code> is an arrow that takes a pair of values and combines them to return a single value:</p> -<div class="sourceCode" id="cb1"><pre class="sourceCode literate haskell"><code class="sourceCode haskell"><a class="sourceLine" id="cb1-1" data-line-number="1"><span class="ot">unsplit ::</span> (<span class="dt">Arrow</span> a) <span class="ot">=></span> (b <span class="ot">-></span> c <span class="ot">-></span> d) <span class="ot">-></span> a (b, c) d</a> -<a class="sourceLine" id="cb1-2" data-line-number="2">unsplit <span class="fu">=</span> arr <span class="fu">.</span> uncurry</a> -<a class="sourceLine" id="cb1-3" data-line-number="3"> <span class="co">-- arr (\op (x,y) -> x `op` y)</span></a></code></pre></div> +<div class="sourceCode" id="cb1"><pre class="sourceCode literate haskell"><code class="sourceCode haskell"><a class="sourceLine" id="cb1-1" title="1"><span class="ot">unsplit ::</span> (<span class="dt">Arrow</span> a) <span class="ot">=></span> (b <span class="ot">-></span> c <span class="ot">-></span> d) <span class="ot">-></span> a (b, c) d</a> +<a class="sourceLine" id="cb1-2" title="2">unsplit <span class="fu">=</span> arr <span class="fu">.</span> <span class="fu">uncurry</span></a> +<a class="sourceLine" id="cb1-3" title="3"> <span class="co">-- arr (\op (x,y) -> x `op` y)</span></a></code></pre></div> <p><code>(***)</code> combines two arrows into a new arrow by running the two arrows on a pair of values (one arrow on the first item of the pair and one arrow on the second item of the pair).</p> diff --git a/test/lhs-test.html+lhs b/test/lhs-test.html+lhs index 78bc1d426..c40f6173f 100644 --- a/test/lhs-test.html+lhs +++ b/test/lhs-test.html+lhs @@ -29,7 +29,7 @@ a.sourceLine { text-indent: -1em; padding-left: 1em; } pre.numberSource a.sourceLine { position: relative; left: -4em; } pre.numberSource a.sourceLine::before - { content: attr(data-line-number); + { content: attr(title); position: relative; left: -1em; text-align: right; vertical-align: baseline; border: none; pointer-events: all; display: inline-block; -webkit-touch-callout: none; -webkit-user-select: none; @@ -82,9 +82,9 @@ code span.wa { color: #60a0b0; font-weight: bold; font-style: italic; } /* Warni <h1 id="lhs-test">lhs test</h1> <p><code>unsplit</code> is an arrow that takes a pair of values and combines them to return a single value:</p> -<div class="sourceCode" id="cb1"><pre class="sourceCode literate literatehaskell"><code class="sourceCode literatehaskell"><a class="sourceLine" id="cb1-1" data-line-number="1"><span class="ot">> unsplit ::</span> (<span class="dt">Arrow</span> a) <span class="ot">=></span> (b <span class="ot">-></span> c <span class="ot">-></span> d) <span class="ot">-></span> a (b, c) d</a> -<a class="sourceLine" id="cb1-2" data-line-number="2"><span class="ot">></span> unsplit <span class="fu">=</span> arr <span class="fu">.</span> uncurry</a> -<a class="sourceLine" id="cb1-3" data-line-number="3"><span class="ot">></span> <span class="co">-- arr (\op (x,y) -> x `op` y)</span></a></code></pre></div> +<div class="sourceCode" id="cb1"><pre class="sourceCode literate literatehaskell"><code class="sourceCode literatehaskell"><a class="sourceLine" id="cb1-1" title="1"><span class="ot">> unsplit ::</span> (<span class="dt">Arrow</span> a) <span class="ot">=></span> (b <span class="ot">-></span> c <span class="ot">-></span> d) <span class="ot">-></span> a (b, c) d</a> +<a class="sourceLine" id="cb1-2" title="2"><span class="ot">></span> unsplit <span class="fu">=</span> arr <span class="fu">.</span> <span class="fu">uncurry</span></a> +<a class="sourceLine" id="cb1-3" title="3"><span class="ot">></span> <span class="co">-- arr (\op (x,y) -> x `op` y)</span></a></code></pre></div> <p><code>(***)</code> combines two arrows into a new arrow by running the two arrows on a pair of values (one arrow on the first item of the pair and one arrow on the second item of the pair).</p> diff --git a/test/lhs-test.latex b/test/lhs-test.latex index ba9d294c0..8379864ec 100644 --- a/test/lhs-test.latex +++ b/test/lhs-test.latex @@ -103,7 +103,7 @@ return a single value: \begin{Shaded} \begin{Highlighting}[] \OtherTok{unsplit ::}\NormalTok{ (}\DataTypeTok{Arrow}\NormalTok{ a) }\OtherTok{=>}\NormalTok{ (b }\OtherTok{->}\NormalTok{ c }\OtherTok{->}\NormalTok{ d) }\OtherTok{->}\NormalTok{ a (b, c) d} -\NormalTok{unsplit }\FunctionTok{=}\NormalTok{ arr }\FunctionTok{.}\NormalTok{ uncurry} +\NormalTok{unsplit }\FunctionTok{=}\NormalTok{ arr }\FunctionTok{.} \FunctionTok{uncurry} \CommentTok{-- arr (\textbackslash{}op (x,y) -> x `op` y)} \end{Highlighting} \end{Shaded} diff --git a/test/markdown-citations.native b/test/markdown-citations.native index c77ccbbfc..3d37dbae4 100644 --- a/test/markdown-citations.native +++ b/test/markdown-citations.native @@ -13,5 +13,5 @@ ,[Para [Str "Citation",Space,Str "with",Space,Str "a",Space,Str "suffix",Space,Str "and",Space,Str "locator",Space,Cite [Citation {citationId = "item1", citationPrefix = [], citationSuffix = [Space,Str "pp.\160\&33,",Space,Str "35-37,",Space,Str "and",Space,Str "nowhere",Space,Str "else"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 0}] [Str "[@item1",Space,Str "pp.",Space,Str "33,",Space,Str "35-37,",Space,Str "and",Space,Str "nowhere",Space,Str "else]"],Str "."]] ,[Para [Str "Citation",Space,Str "with",Space,Str "suffix",Space,Str "only",Space,Cite [Citation {citationId = "item1", citationPrefix = [], citationSuffix = [Space,Str "and",Space,Str "nowhere",Space,Str "else"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 0}] [Str "[@item1",Space,Str "and",Space,Str "nowhere",Space,Str "else]"],Str "."]] ,[Para [Str "Now",Space,Str "some",Space,Str "modifiers.",Note [Para [Str "Like",Space,Str "a",Space,Str "citation",Space,Str "without",Space,Str "author:",Space,Cite [Citation {citationId = "item1", citationPrefix = [], citationSuffix = [], citationMode = SuppressAuthor, citationNoteNum = 0, citationHash = 0}] [Str "[-@item1]"],Str ",",Space,Str "and",Space,Str "now",Space,Str "Doe",Space,Str "with",Space,Str "a",Space,Str "locator",Space,Cite [Citation {citationId = "item2", citationPrefix = [], citationSuffix = [Space,Str "p.\160\&44"], citationMode = SuppressAuthor, citationNoteNum = 0, citationHash = 0}] [Str "[-@item2",Space,Str "p.",Space,Str "44]"],Str "."]]]] - ,[Para [Str "With",Space,Str "some",Space,Str "markup",Space,Cite [Citation {citationId = "item1", citationPrefix = [Emph [Str "see"]], citationSuffix = [Space,Str "p.",Space,Strong [Str "32"]], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 0}] [Str "[*see*",Space,Str "@item1",Space,Str "p.",Space,Str "**32**]"],Str "."]]] + ,[Para [Str "With",Space,Str "some",Space,Str "markup",Space,Cite [Citation {citationId = "item1", citationPrefix = [Emph [Str "see"]], citationSuffix = [Space,Str "p.\160",Strong [Str "32"]], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 0}] [Str "[*see*",Space,Str "@item1",Space,Str "p.",Space,Str "**32**]"],Str "."]]] ,Header 1 ("references",[],[]) [Str "References"]] diff --git a/test/writer.markdown b/test/writer.markdown index 0cc465f1e..de0821970 100644 --- a/test/writer.markdown +++ b/test/writer.markdown @@ -409,6 +409,7 @@ And this is **strong** </tr> </table> <script type="text/javascript">document.write('This *should not* be interpreted as markdown');</script> + Here's a simple block: <div> @@ -446,6 +447,7 @@ foo This should just be an HTML comment: <!-- Comment --> + Multiline: <!-- @@ -455,6 +457,7 @@ Blah <!-- This is another comment. --> + Code block: <!-- Comment --> @@ -462,6 +465,7 @@ Code block: Just plain comment, with trailing spaces on the line: <!-- foo --> + Code: <hr /> |