From 6a235ba60693596af3f13b093b83defa37501e09 Mon Sep 17 00:00:00 2001 From: Alexander Kondratskiy Date: Sat, 13 Jul 2013 02:23:27 -0400 Subject: Checking options before applying syntax highlighting for HTML output --- src/Text/Pandoc/Writers/HTML.hs | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index 57bf2a349..cfc187e02 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -422,7 +422,10 @@ blockToHtml opts (CodeBlock (id',classes,keyvals) rawCode) = do adjCode = if tolhs then unlines . map ("> " ++) . lines $ rawCode else rawCode - case highlight formatHtmlBlock (id',classes',keyvals) adjCode of + hlCode = if writerHighlight opts -- check highlighting options + then highlight formatHtmlBlock (id',classes',keyvals) adjCode + else Nothing + case hlCode of Nothing -> return $ addAttrs opts (id',classes,keyvals) $ H.pre $ H.code $ toHtml adjCode Just h -> modify (\st -> st{ stHighlighting = True }) >> @@ -589,14 +592,17 @@ inlineToHtml opts inline = (LineBreak) -> return $ if writerHtml5 opts then H5.br else H.br (Emph lst) -> inlineListToHtml opts lst >>= return . H.em (Strong lst) -> inlineListToHtml opts lst >>= return . H.strong - (Code attr str) -> case highlight formatHtmlInline attr str of + (Code attr str) -> case hlCode of Nothing -> return $ addAttrs opts attr $ H.code $ strToHtml str Just h -> do modify $ \st -> st{ stHighlighting = True } return $ addAttrs opts (id',[],keyvals) h - where (id',_,keyvals) = attr + where (id',_,keyvals) = attr + hlCode = if writerHighlight opts + then highlight formatHtmlInline attr str + else Nothing (Strikeout lst) -> inlineListToHtml opts lst >>= return . H.del (SmallCaps lst) -> inlineListToHtml opts lst >>= -- cgit v1.2.3 From 35e2caa05863002acb64ce34504fb4424d2bc441 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 13 Jul 2013 13:47:09 -0700 Subject: Updated a test whose output changed due to last commit. --- tests/Tests/Writers/HTML.hs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tests/Tests/Writers/HTML.hs b/tests/Tests/Writers/HTML.hs index 1693f2fc1..84f4db191 100644 --- a/tests/Tests/Writers/HTML.hs +++ b/tests/Tests/Writers/HTML.hs @@ -6,7 +6,6 @@ import Text.Pandoc.Builder import Text.Pandoc import Tests.Helpers import Tests.Arbitrary() -import Text.Highlighting.Kate (languages) -- null if no hl support html :: (ToString a, ToPandoc a) => a -> String html = writeHtmlString def{ writerWrapText = False } . toPandoc @@ -32,9 +31,7 @@ tests :: [Test] tests = [ testGroup "inline code" [ "basic" =: code "@&" =?> "@&" , "haskell" =: codeWith ("",["haskell"],[]) ">>=" - =?> if null languages - then ">>=" - else ">>=" + =?> ">>=" , "nolanguage" =: codeWith ("",["nolanguage"],[]) ">>=" =?> ">>=" ] -- cgit v1.2.3 From f42095b7b72fc3419a661c65d17f46ba3cbc8d62 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 13 Jul 2013 13:48:50 -0700 Subject: Docx writer: Make `--no-highlight` work properly. --- src/Text/Pandoc/Writers/Docx.hs | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index e899200f6..d579d4fa6 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -214,7 +214,8 @@ writeDocx opts doc@(Pandoc meta _) = do let newstyles = styleToOpenXml $ writerHighlightStyle opts let stylepath = "word/styles.xml" styledoc <- parseXml refArchive stylepath - let styledoc' = styledoc{ elContent = elContent styledoc ++ map Elem newstyles } + let styledoc' = styledoc{ elContent = elContent styledoc ++ + [Elem x | x <- newstyles, writerHighlight opts] } let styleEntry = toEntry stylepath epochtime $ renderXml styledoc' -- construct word/numbering.xml @@ -665,13 +666,16 @@ inlineToOpenXML opts (Math mathType str) = do Right r -> return [r] Left _ -> inlinesToOpenXML opts (readTeXMath str) inlineToOpenXML opts (Cite _ lst) = inlinesToOpenXML opts lst -inlineToOpenXML _ (Code attrs str) = +inlineToOpenXML opts (Code attrs str) = withTextProp (rStyle "VerbatimChar") - $ case highlight formatOpenXML attrs str of - Nothing -> intercalate [br] - `fmap` (mapM formattedString $ lines str) - Just h -> return h - where formatOpenXML _fmtOpts = intercalate [br] . map (map toHlTok) + $ if writerHighlight opts + then case highlight formatOpenXML attrs str of + Nothing -> unhighlighted + Just h -> return h + else unhighlighted + where unhighlighted = intercalate [br] `fmap` + (mapM formattedString $ lines str) + formatOpenXML _fmtOpts = intercalate [br] . map (map toHlTok) toHlTok (toktype,tok) = mknode "w:r" [] [ mknode "w:rPr" [] [ rStyle $ show toktype ] -- cgit v1.2.3 From 7445fc538220c738952b46982e4606fc9f31d206 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 13 Jul 2013 14:45:12 -0700 Subject: Default epub CSS: Removed highlighting styles. These are added automatically to individual chapter files, depending on the highlighting style selected on the command line. --- data/epub.css | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/data/epub.css b/data/epub.css index a87fff9bb..cc9c501c5 100644 --- a/data/epub.css +++ b/data/epub.css @@ -12,22 +12,3 @@ h2.author { } h3.date { } ol.toc { padding: 0; margin-left: 1em; } ol.toc li { list-style-type: none; margin: 0; padding: 0; } -/* For source-code highlighting */ -table.sourceCode, tr.sourceCode, td.lineNumbers, td.sourceCode, table.sourceCode pre - { margin: 0; padding: 0; border: 0; vertical-align: baseline; border: none; } -td.lineNumbers { border-right: 1px solid #AAAAAA; text-align: right; color: #AAAAAA; padding-right: 5px; padding-left: 5px; } -td.sourceCode { padding-left: 5px; } -pre.sourceCode { } -code.sourceCode span.kw { color: #007020; font-weight: bold; } -code.sourceCode span.dt { color: #902000; } -code.sourceCode span.dv { color: #40a070; } -code.sourceCode span.bn { color: #40a070; } -code.sourceCode span.fl { color: #40a070; } -code.sourceCode span.ch { color: #4070a0; } -code.sourceCode span.st { color: #4070a0; } -code.sourceCode span.co { color: #60a0b0; font-style: italic; } -code.sourceCode span.ot { color: #007020; } -code.sourceCode span.al { color: red; font-weight: bold; } -code.sourceCode span.fu { color: #06287e; } -code.sourceCode span.re { } -code.sourceCode span.er { color: red; font-weight: bold; } -- cgit v1.2.3 From 851703044e1b266f9dedd9957f4a2c15f3acb504 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 13 Jul 2013 14:48:14 -0700 Subject: Simplified margin fields in default epub CSS file. --- data/epub.css | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data/epub.css b/data/epub.css index cc9c501c5..93153d62a 100644 --- a/data/epub.css +++ b/data/epub.css @@ -1,5 +1,5 @@ /* This defines styles and classes used in the book */ -body { margin-left: 5%; margin-right: 5%; margin-top: 5%; margin-bottom: 5%; text-align: justify; font-size: medium; } +body { margin: 5%; text-align: justify; font-size: medium; } code { font-family: monospace; } h1 { text-align: left; } h2 { text-align: left; } -- cgit v1.2.3 From f7c7672c40d0bfff5bff92cfa308ea153e4462ac Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 13 Jul 2013 15:06:09 -0700 Subject: Makefile: --enable-benchmarks no longer works with cabal install. --- Makefile | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 52115818a..b97ab1a5e 100644 --- a/Makefile +++ b/Makefile @@ -6,6 +6,9 @@ all: cabal-dev configure --enable-tests --enable-benchmarks && cabal-dev build +test: all + cabal test + prof: cabal-dev configure --disable-tests --enable-library-profiling --enable-executable-profiling && cabal-dev build @@ -45,4 +48,4 @@ citeproc-hs: pandoc-types cabal-dev add-source citeproc-hs install: - cabal-dev install --enable-tests --enable-benchmarks + cabal-dev install --enable-tests -- cgit v1.2.3 From a5882bd587f6cf71100b7e3b93250add4e53f9f8 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 13 Jul 2013 15:06:25 -0700 Subject: Tweaked travis script to use cabal-dev. --- .travis.yml | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/.travis.yml b/.travis.yml index 934a8ea5b..ced1a65df 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,7 +1,9 @@ language: haskell before_install: - - git submodule update --init --recursive + - make prep +# - git submodule update --init --recursive install: - - cabal update - - cabal install --enable-tests -script: cabal configure --enable-tests && cabal build && cabal test + - make install +# - cabal update +# - cabal install --enable-tests +script: cabal-dev configure --enable-tests && cabal-dev build && cabal-dev test -- cgit v1.2.3 From 3b1898dec93de07e73ae7d297cc924515a5df7ca Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 13 Jul 2013 15:08:36 -0700 Subject: Another attempt at the travis build spec. --- .travis.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index ced1a65df..2d343b778 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,9 +1,8 @@ language: haskell -before_install: - - make prep +# before_install: # - git submodule update --init --recursive install: - - make install + - make prep # - cabal update # - cabal install --enable-tests script: cabal-dev configure --enable-tests && cabal-dev build && cabal-dev test -- cgit v1.2.3 From 15984adaaa1eea73b268c669a2508635d07b56b1 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 13 Jul 2013 15:15:43 -0700 Subject: Another attempt at .travis.yml. --- .travis.yml | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/.travis.yml b/.travis.yml index 2d343b778..1216994db 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,8 +1,9 @@ language: haskell -# before_install: -# - git submodule update --init --recursive +before_install: + - cabal install cabal-dev + - 'git clone https://github.com/jgm/pandoc-types && cabal-dev add-source pandoc-types' + - 'git clone https://github.com/jgm/citeproc-hs && cabal-dev add-source citeproc-hs' install: - - make prep -# - cabal update -# - cabal install --enable-tests -script: cabal-dev configure --enable-tests && cabal-dev build && cabal-dev test + - cabal-dev install-deps --enable-tests +script: + - 'cabal-dev configure --enable-tests && cabal-dev build && cabal-dev test' -- cgit v1.2.3 From 0b49f810f401b9154b50727d2179d1ec39cd8d3e Mon Sep 17 00:00:00 2001 From: Alexander Kondratskiy Date: Sun, 14 Jul 2013 14:33:58 -0400 Subject: Fixing wrong numbered-list indentation in open document format --- src/Text/Pandoc/Writers/OpenDocument.hs | 12 ++++++----- tests/writer.opendocument | 38 ++++++++++++++++----------------- 2 files changed, 26 insertions(+), 24 deletions(-) diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs index 30f99c3e4..0efbf7580 100644 --- a/src/Text/Pandoc/Writers/OpenDocument.hs +++ b/src/Text/Pandoc/Writers/OpenDocument.hs @@ -489,14 +489,16 @@ paraStyle parent attrs = do tight = if t then [ ("fo:margin-top" , "0in" ) , ("fo:margin-bottom" , "0in" )] else [] - indent = when (i /= 0 || b || t) $ - selfClosingTag "style:paragraph-properties" $ - [ ("fo:margin-left" , indentVal) + indent = if (i /= 0 || b) + then [ ("fo:margin-left" , indentVal) , ("fo:margin-right" , "0in" ) , ("fo:text-indent" , "0in" ) , ("style:auto-text-indent" , "false" )] - ++ tight - addParaStyle $ inTags True "style:style" (styleAttr ++ attrs) indent + else [] + attributes = indent ++ tight + paraProps = when (not $ null attributes) $ + selfClosingTag "style:paragraph-properties" attributes + addParaStyle $ inTags True "style:style" (styleAttr ++ attrs) paraProps return pn paraListStyle :: Int -> State WriterState Int diff --git a/tests/writer.opendocument b/tests/writer.opendocument index 8727373a0..9e1661475 100644 --- a/tests/writer.opendocument +++ b/tests/writer.opendocument @@ -741,25 +741,25 @@ - + - + - + - + - + @@ -768,37 +768,37 @@ - + - + - + - + - + - + - + - + - + @@ -822,18 +822,18 @@ - + - + - + - + @@ -846,7 +846,7 @@ - + -- cgit v1.2.3 From 22699a979dad2e61a1d5a923c473c56d50f3c8dc Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 14 Jul 2013 16:12:06 -0700 Subject: Updated authors in README. --- README | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README b/README index 43e522976..cade75cee 100644 --- a/README +++ b/README @@ -2738,7 +2738,8 @@ Puneeth Chaganti, Paul Rivier, rodja.trappe, Bradley Kuhn, thsutton, Nathan Gass, Jonathan Daugherty, Jérémy Bobbio, Justin Bogner, qerub, Christopher Sawicki, Kelsey Hightower, Masayoshi Takahashi, Antoine Latter, Ralf Stephan, Eric Seidel, B. Scott Michel, Gavin Beatty, -Sergey Astanin, Arlo O'Keeffe, Denis Laxalde, Brent Yorgey. +Sergey Astanin, Arlo O'Keeffe, Denis Laxalde, Brent Yorgey, David Lazar, +Jamie F. Olson. [markdown]: http://daringfireball.net/projects/markdown/ [reStructuredText]: http://docutils.sourceforge.net/docs/ref/rst/introduction.html -- cgit v1.2.3 From 359ce379cc7f96c185275f7532914459edf72827 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 14 Jul 2013 16:12:28 -0700 Subject: Updated changelog for pending release. --- changelog | 339 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 339 insertions(+) diff --git a/changelog b/changelog index 1f96011cd..6585c3dd3 100644 --- a/changelog +++ b/changelog @@ -1,3 +1,342 @@ +[pending release 1.12] + + * `--toc-level` no longer implies `--toc`. + Reason: EPUB users who don't want a visible TOC may still want + to set the TOC level for in the book navigation. + + * `--help` now prints in and out formats in alphabetical order, and + says something about PDF output. Closes #720. + + * Less verbose output from `--self-contained`. + Now one gets "Fetching [URL]..." for each URL fetched, but not + the full header. + + * All slide formats: Support incremental slide view for definition lists. + + * Added syntax for "pauses" in beamer or reaveljs slide shows. + + This gives + + . . . + + a pause. + + [note - no longer seems to work in recente revealjs - perhaps + this should be reverted] + + * Slidy: Use slidy.js rather than slidy.js.gz. + Reason: some browsers have trouble with the gzipped js file, + at least on the local file system. Closes #795. + + * Added `revealjs` output format, for reveal.js HTML 5 slide shows. + Thanks to Jamie F. Olson for the initial patch. + Nested vertical stacks are used for hierarchical structure. + Results for more than one level of nesting may be odd. + + * Use new flexible metadata type. + + + Depend on `pandoc-types` 1.12. This changes the type of + `Meta` to allow structured metadata. (API change: existing + code that pattern-matches on `Meta` will have to be revised.) + + Revised readers and writers to use the new `Meta` type. + + Variables completely shadow metadata. + If many variables with the same name are set, a list is created. + + * `Text.Pandoc.Shared` + + + `openURL` now follows redirects. Closes #701. + + `readDefaultDataFile`: normalize the paths. + This fixes bugs in `--self-contained` on pandoc compiled with + `embed_data_files`. Closes #833. + + Fixed `readDefaultDataFile` so it works on Windows. + + URL-escape pipe characters. + Even though these are legal, `Network.URI` doesn't regard them + as legal in URLs. So we escape them first. Closes #535. + + `openURL`: Print diagnostic output to stderr, not stdout. + + `openURL`: Properly handle `data:` URIs. + + * New module `Text.Pandoc.Writers.Shared` for shared functions used + only in writers. + + + Added `metaToJSON`. This is used in writers to create a + JSON object for use in the templates from the pandoc metadata + and variables. + + Added `getField`, `setField`, `defField` for working with JSON + template contexts. + + * Added `Text.Pandoc.Asciify` utility module. + This exports functions to create ASCII-only versions of identifiers. + + * `Text.Pandoc.SelfContained` + + + Strip off fragment, query of relative URL before treating as + a filename. This fixes `--self-contained` when used with CSS + files that include web fonts using the method described here: + + Closes #739. + + Handle `src` in `embed`, `audio`, `source`, `input` tags. + + * Added `Text.Pandoc.Writers.Custom` and custom output formats. + + pandoc -t data/sample.lua + + will load the script sample.lua and use it as a custom writer. + `data/sample.lua` is provided as an example. (This can be printed + with the new `--print-custom-lua-writer` option. + + * Added OPML reader and writer. + + + New module `Text.Pandoc.Readers.OPML`. + The `_note` attribute is supported. This is unofficial, but + used e.g. in OmniOutliner and supported by multimarkdown. + We treat the contents as markdown blocks under a section + header. + + New module `Text.Pandoc.Writers.OPML`. + + * Added Haddock reader. + + + New module `Text.Pandoc.Readers.Haddock` (David Lazar). + This is based on Haddock's own lexer/parser. + + * Markdown reader + + + Properly handle blank line at beginning of input. Closes #882. + + Fixed bug in unmatched reference links. The input + `[*infile*] [*outfile*]` was getting improperly parsed: + "infile" was emphasized, but "*outfile*" was literal. Closes #883. + + Check for blank lines first in blocks. (And skip them. This + might speed things up in some cases.) + + Implemented `Ext_ascii_identifiers`. Closes #807. + + Allow internal `+` in citation identifiers. Closes #856. + + Added support for YAML metadata block at the beginning of document. + + * RST reader + + + Don't insert paragraphs where docutils doesn't. + `rst2html` doesn't add `

` tags to list items (even when they are + separated by blank lines) unless there are multiple paragraphs in the + list. This commit changes the RST reader to conform more closely to + what docutils does. Closes #880. + + Improved metadata. Treat initial field list as metadata when + standalone specified. Previously ALL fields "title", "author", + "date" in field lists were treated as metadata, even if not at + the beginning. Use `subtitle` metadata field for subtitle. + + Fixed 'authors' metadata parsing in reST. Semicolons separate + different authors. + + * Textile reader + + + Correctly handle entities. + + * LaTeX reader + + + Support alltt environment. Closes #892. + + Support `\textasciitilde`, `\textasciicircum`. Closes #810. + + Treat `\textsl` as emphasized text reader. Closes #850. + + Skip positional options after `\begin{figure}`. + + * LaTeX writer + + + Don't use ligatures in escaping inline code. + + Fixed footnote numbers in LaTeX/PDF tables. This fixes a bug + wherein notes were numbered incorrectly in tables. Closes #827. + + Always create labels for sections. Previously the labels were only + created when there were links to the section in the document. + Closes #871. + + Stop escaping `|` in LaTeX math. + This caused problems with array environments. Closes #891. + + * ConTeXt writer + + + Properly handle tables without captions. The old output only + worked in MkII. This should work in MkIV as well. Closes #837. + + * MediaWiki reader + + + Allow space before table rows. + + Fixed regression for `URL`. + `<` is no longer allowed in URLs, according to the uri parser + in `Text.Pandoc.Parsing`. Added a test case. + + Correctly handle indented preformatted text without preceding + or following blank line. + + Fixed `|` links inside table cells. Improved attribute parsing. + + Skip attributes on table rows. Previously we just crashed if + rows had attributes, now we ignore them. + + Ignore attributes on headers. + + * HTML writer + + + Fixed `--no-highlight` (Alexander Kondratskiy). + + Don't convert to lowercase in email obfuscation. Closes #839. + + * Man writer + + + Offer more fine-grained control in template. + Now the `title`, `section`, `header`, and `footer` can all be set + individually in metadata. The `description` variable has been + removed. Quotes have been added so that spaces are allowed in the title. + If you have a title that begins + + COMMAND(1) footer here | header here + + pandoc will parse it as before into a title, section, header, and + footer. But you can also specify these elements explicitly. + Closes #885. + + * AsciiDoc writer + + + Support `--atx-headers` (Max Rydahl Andersen). + + Don't print empty identifier blocks `([[]])` on headers (Max + Rydahl Andersen). + + * ODT writer + + + Fixing wrong numbered-list indentation in open document format + (Alexander Kondratskiy). Closes #369. + + * Docx writer + + + Fixed rendering of display math in lists. + In 1.11 and 1.11.1, display math in lists rendered as a new list + item. Now it always appears centered, just as outside of lists, + and in proper display math style, no matter how far indented the + containing list item is. Closes #784. + + Use `w:br` with `w:type` `textWrapping` for linebreaks. + Previously we used `w:cr`. Closes #873. + + Use Compact style for Plain block elements. + This differentiates between tight and loose lists. Closes #775. + + Ignore most components of `reference.docx`. + We take the `word/styles.xml`, `docProps/app.xml`, + `word/theme/theme1.xml`, and `word/fontTable.xml` from + `reference.docx`, ignoring everything else. This should help + with the corruption problems caused when different versions of + Word resave the reference.docx and reorganize things. + + Made `--no-highlight` work properly. + + * EPUB writer + + + Don't add `dc:creator` tags if present in EPUB metadata. + + Add `id="toc-title"` to `h1` in `nav.xhtml`. Closes #799. + + Don't put blank title page in reading sequence. + Set `linear="no"` if no title block. Addresses #797. + + Download webtex images and include as data URLs. + This allows you to use `--webtex` in creating EPUBs. + Math with `--webtex` is automatically made self-contained. + + In `data/epub.css`, removed highlighting styles (which + are no longer needed, since styles are added by the HTML + writer according to `--highlighting-style`). Simplified + margin fields. + + * Markdown writer + + + Allow simple tables to be printed as grid tables, + if other table options are disabled. This means you can do + `pandoc -t markdown-pipe_tables-simple_tables-multiline_tables` + and all tables will render as grid tables. + + Put multiple authors on separate lines in pandoc titleblock. + Also, don't wrap long author entries, as new lines get treated + as new authors. + + Only autolink absolute URIs. This fixes a regression, #830. + + Don't wrap attributes in fenced code blocks. + + Support YAML title block (render fields in alphabetical order + to make output predictable). + + Write full metadata in MMD style title blocks. + + * RTF writer + + + Properly handle characters above the 0000-FFFF range. + Uses surrogate pairs. Thanks to Hiromi Ishii for the patch. + + Fixed regression with RTF table of contents. + + Only autolink absolute URIs. This fixes a regression, #830. + + * `Text.Pandoc.PDF` + + + On Windows, create temdir in working directory. + Reason: the path to the system temp directory may contain tildes, + which causes problems in LaTeX when the username is more than + eight characters. Closes #777. + + * `Text.Pandoc.UTF8` + + + Use strict bytestrings in reading. The use of lazy bytestrings seemed + to cause problems using pandoc on 64-bit Windows 7/8. Closes #874. + + * `Text.Pandoc.Parsing` + + + Further improvements to URI parser. + Don't treat punctuation before percent-encoding as final punctuation. + Don't treat '+' as final punctuation. + + Generalized state type on `readWith` (API change). + + Specialize readWith to `String` input. + + Better error reporting in `readWith`: On error have it print the line + in which the error occurred, with a caret pointing to the column. + + In `ParserState`, replace `stateTitle`, `stateAuthors`, `stateDate` + with `stateMeta`. + + * `Text.Pandoc.XML` + + + Removed `stripTags`. (API change.) + + * `Text.Pandoc.Options` + + + Added `Ext_yaml_metadata_block`. (API change.) + + Added `Ext_ascii_identifiers`. (API change.) + This will force `Ext_auto_identifiers` to use ASCII only. + Set as default for github markdown. + + * `Text.Pandoc.Templates` + + + Changed type of `renderTemplate`: it now takes a JSON context + and a compiled template. (API change.) + + Export `compileTemplate`. (API change.) + + Export `renderTemplate'` that takes a string instead of a compiled + template. (API change.) + + Export `varListToJSON`. (API change.) + + Removed `Empty`. + + Simplified `Template` type to a newtype. + + Templates can now contain "record lookups" in variables; + for example, `author.institution` will retrieve the `institution` + field of the `author` variable. + + Fixed bug retrieving default template for markdown variants. + + * Default template changes + + + DocBook: Use DocBook 4.5 doctype. + + Org: '#+TITLE:' is inserted before the title. + Previously the writer did this. + + LaTeX: Changes to make mathfont work with xelatex. + We need the mathspec library, not just fontspec, for this. + We also need to set options for setmathfont. Closes #734. + + LaTeX: Use `tex-ansi` mapping for `monofont`. + This ensures that straight quotes appear as straight, rather than + being treated as curly. See #889. + + Made `\includegraphics` more flexible in LaTeX template. + Now it can be used with options, if needed. Thanks to Bernhard Weichel. + + DZSlides: title attribute on title section. + + * Removed `blaze_html_0_5` flag, require `blaze-html` >= 0.5. + Reason: < 0.5 does not provide a monoid instance for Attribute, + which is now needed by the HTML writer. + Closes #803. + + * Added `http-conduit` flag, which allows fetching https resources. + It also brings in a large number of dependencies (`http-conduit` + and its dependencies), which is why for now it is an optional flag. + Closes #820. + + * Added CONTRIBUTING.md. + + * Use latest `chicago-author-date.csl` as `default.csl`. + + * `make-windows-installer.bat`: Removed explicit paths for executables. + + * `aeson` is now used instead of `json` for JSON. + + * Set default stack size to 16M. This is needed for some large + conversions, esp. if pandoc is compiled with 64-bit ghc. + + + pandoc (1.11.1) * Markdown reader: -- cgit v1.2.3 From cd399d1b9df197f25f7eee6bb2b5e7d5ec3a3f47 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 14 Jul 2013 20:43:21 -0700 Subject: Updated sample.lua for new metadata. --- data/sample.lua | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/data/sample.lua b/data/sample.lua index 36eb9f6ef..1c82ebe2e 100644 --- a/data/sample.lua +++ b/data/sample.lua @@ -77,16 +77,16 @@ function Doc(body, metadata, variables) add('') add('') add('') - add('' .. metadata.title .. '') + add('' .. (metadata['title'] or '') .. '') add('') add('') - if title ~= "" then - add('

' .. metadata.title .. '

') + if metadata['title'] and metadata['title'] ~= "" then + add('

' .. metadata['title'] .. '

') end - for _, author in pairs(metadata.author) do + for _, author in pairs(metadata['author'] or {}) do add('

' .. author .. '

') end - if date ~= "" then + if metadata['date'] and metadata['date'] ~= "" then add('

' .. metadata.date .. '

') end add(body) -- cgit v1.2.3 From 595149a9bcb12aa832065f591058ab963c9f237e Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 15 Jul 2013 19:32:08 -0700 Subject: Fixed YAML code block in README. --- README | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/README b/README index cade75cee..adec7c0e7 100644 --- a/README +++ b/README @@ -1794,19 +1794,19 @@ if a title contains a colon, it must be quoted. The pipe character literally, without need for escaping. This form is necessary when the field contains blank lines: - --- - title: 'This is the title: it contains a colon' - author: - - name: Author One - affiliation: University of Somewhere - - name: Author Two - affiliation: University of Nowhere - tags: [nothing, nothingness] - abstract: | - This is the abstract. - - It consists of two paragraphs. - ... + --- + title: 'This is the title: it contains a colon' + author: + - name: Author One + affiliation: University of Somewhere + - name: Author Two + affiliation: University of Nowhere + tags: [nothing, nothingness] + abstract: | + This is the abstract. + + It consists of two paragraphs. + ... Template variables will be set from the metadata. Thus, for example, in writing HTML, the variable `abstract` will be set to the HTML -- cgit v1.2.3 From 0bd5830ad4cbf056d18595208532082fe674c6d2 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 16 Jul 2013 15:37:15 -0700 Subject: HTML reader: Generalized table parser. This commit doesn't change the present behavior at all, but it will make it easier to support non-simple tables in the future. --- src/Text/Pandoc/Readers/HTML.hs | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index f6657a4d1..56d35160c 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -88,7 +88,7 @@ block = choice , pCodeBlock , pList , pHrule - , pSimpleTable + , pTable , pHead , pBody , pPlain @@ -212,8 +212,8 @@ pHrule = do pSelfClosing (=="hr") (const True) return [HorizontalRule] -pSimpleTable :: TagParser [Block] -pSimpleTable = try $ do +pTable :: TagParser [Block] +pTable = try $ do TagOpen _ _ <- pSatisfy (~== TagOpen "table" []) skipMany pBlank caption <- option [] $ pInTags "caption" inline >>~ skipMany pBlank @@ -225,6 +225,11 @@ pSimpleTable = try $ do $ many1 $ try $ skipMany pBlank >> pInTags "tr" (pCell "td") skipMany pBlank TagClose _ <- pSatisfy (~== TagClose "table") + let isSinglePlain [] = True + isSinglePlain [Plain _] = True + isSinglePlain _ = False + let isSimple = all isSinglePlain $ concat (head':rows) + guard isSimple let cols = maximum $ map length rows let aligns = replicate cols AlignLeft let widths = replicate cols 0 @@ -233,7 +238,7 @@ pSimpleTable = try $ do pCell :: String -> TagParser [TableCell] pCell celltype = try $ do skipMany pBlank - res <- pInTags celltype pPlain + res <- pInTags celltype block skipMany pBlank return [res] -- cgit v1.2.3 From 8483b5756fbf45270a84fa3e9174081041ff5558 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 16 Jul 2013 15:49:53 -0700 Subject: HTML reader: Handle non-simple tables (#893). Column widths are divided equally. TODO: Get column widths from col tags if present. --- src/Text/Pandoc/Readers/HTML.hs | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 56d35160c..35b667fb0 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -217,6 +217,7 @@ pTable = try $ do TagOpen _ _ <- pSatisfy (~== TagOpen "table" []) skipMany pBlank caption <- option [] $ pInTags "caption" inline >>~ skipMany pBlank + -- TODO actually read these and take width information from them skipMany $ (pInTags "col" block >> skipMany pBlank) <|> (pInTags "colgroup" block >> skipMany pBlank) head' <- option [] $ pOptInTag "thead" $ pInTags "tr" (pCell "th") @@ -229,10 +230,15 @@ pTable = try $ do isSinglePlain [Plain _] = True isSinglePlain _ = False let isSimple = all isSinglePlain $ concat (head':rows) - guard isSimple - let cols = maximum $ map length rows + let cols = length $ if null head' + then head rows + else head' + -- fail if there are colspans or rowspans + guard $ all (\r -> length r == cols) rows let aligns = replicate cols AlignLeft - let widths = replicate cols 0 + let widths = if isSimple + then replicate cols 0 + else replicate cols (1.0 / fromIntegral cols) return [Table caption aligns widths head' rows] pCell :: String -> TagParser [TableCell] -- cgit v1.2.3 From 94c9825468692a343af7ef1686b1c92e1ec71adf Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 16 Jul 2013 17:03:28 -0700 Subject: HTML reader: read widths from col tags if present. Closes #893. --- src/Text/Pandoc/Readers/HTML.hs | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 35b667fb0..0068ab5c1 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -47,7 +47,7 @@ import Data.Maybe ( fromMaybe, isJust ) import Data.List ( intercalate ) import Data.Char ( isDigit ) import Control.Monad ( liftM, guard, when, mzero ) -import Control.Applicative ( (<$>), (<$) ) +import Control.Applicative ( (<$>), (<$), (<*) ) isSpace :: Char -> Bool isSpace ' ' = True @@ -218,8 +218,7 @@ pTable = try $ do skipMany pBlank caption <- option [] $ pInTags "caption" inline >>~ skipMany pBlank -- TODO actually read these and take width information from them - skipMany $ (pInTags "col" block >> skipMany pBlank) <|> - (pInTags "colgroup" block >> skipMany pBlank) + widths' <- pColgroup <|> many pCol head' <- option [] $ pOptInTag "thead" $ pInTags "tr" (pCell "th") skipMany pBlank rows <- pOptInTag "tbody" @@ -236,11 +235,29 @@ pTable = try $ do -- fail if there are colspans or rowspans guard $ all (\r -> length r == cols) rows let aligns = replicate cols AlignLeft - let widths = if isSimple - then replicate cols 0 - else replicate cols (1.0 / fromIntegral cols) + let widths = if null widths' + then if isSimple + then replicate cols 0 + else replicate cols (1.0 / fromIntegral cols) + else widths' return [Table caption aligns widths head' rows] +pCol :: TagParser Double +pCol = try $ do + TagOpen _ attribs <- pSatisfy (~== TagOpen "col" []) + optional $ pSatisfy (~== TagClose "col") + skipMany pBlank + return $ case lookup "width" attribs of + Just x | not (null x) && last x == '%' -> + maybe 0.0 id $ safeRead ('0':'.':init x) + _ -> 0.0 + +pColgroup :: TagParser [Double] +pColgroup = try $ do + pSatisfy (~== TagOpen "colgroup" []) + skipMany pBlank + manyTill pCol (pCloses "colgroup" <|> eof) <* skipMany pBlank + pCell :: String -> TagParser [TableCell] pCell celltype = try $ do skipMany pBlank -- cgit v1.2.3 From b2385d0e9bf13f2fc152a3983893c47f2ab5d4c0 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 16 Jul 2013 22:04:59 -0700 Subject: Text.Pandoc.ImageSize: Handle EPS. Closes #903. This change will make EPS images properly sized on conversion to Word. --- src/Text/Pandoc/ImageSize.hs | 24 +++++++++++++++++++++++- src/Text/Pandoc/Writers/Docx.hs | 1 + 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/src/Text/Pandoc/ImageSize.hs b/src/Text/Pandoc/ImageSize.hs index 273a1a428..9b0850efb 100644 --- a/src/Text/Pandoc/ImageSize.hs +++ b/src/Text/Pandoc/ImageSize.hs @@ -34,11 +34,12 @@ import Data.ByteString (ByteString, unpack) import qualified Data.ByteString.Char8 as B import Control.Monad import Data.Bits +import Text.Pandoc.Shared (safeRead) -- quick and dirty functions to get image sizes -- algorithms borrowed from wwwis.pl -data ImageType = Png | Gif | Jpeg | Pdf deriving Show +data ImageType = Png | Gif | Jpeg | Pdf | Eps deriving Show data ImageSize = ImageSize{ pxX :: Integer @@ -54,6 +55,9 @@ imageType img = case B.take 4 img of "\x47\x49\x46\x38" -> return Gif "\xff\xd8\xff\xe0" -> return Jpeg "%PDF" -> return Pdf + "%!PS" + | (B.take 4 $ B.drop 1 $ B.dropWhile (/=' ') img) == "EPSF" + -> return Eps _ -> fail "Unknown image type" imageSize :: ByteString -> Maybe ImageSize @@ -63,6 +67,7 @@ imageSize img = do Png -> pngSize img Gif -> gifSize img Jpeg -> jpegSize img + Eps -> epsSize img Pdf -> Nothing -- TODO sizeInPixels :: ImageSize -> (Integer, Integer) @@ -71,6 +76,23 @@ sizeInPixels s = (pxX s, pxY s) sizeInPoints :: ImageSize -> (Integer, Integer) sizeInPoints s = (pxX s * 72 `div` dpiX s, pxY s * 72 `div` dpiY s) +epsSize :: ByteString -> Maybe ImageSize +epsSize img = do + let ls = takeWhile ("%" `B.isPrefixOf`) $ B.lines img + let ls' = dropWhile (not . ("%%BoundingBox:" `B.isPrefixOf`)) ls + case ls' of + [] -> mzero + (x:_) -> case B.words x of + (_:_:_:ux:uy:[]) -> do + ux' <- safeRead $ B.unpack ux + uy' <- safeRead $ B.unpack uy + return ImageSize{ + pxX = ux' + , pxY = uy' + , dpiX = 72 + , dpiY = 72 } + _ -> mzero + pngSize :: ByteString -> Maybe ImageSize pngSize img = do let (h, rest) = B.splitAt 8 img diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index d579d4fa6..1ed8c2fa5 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -776,6 +776,7 @@ inlineToOpenXML opts (Image alt (src, tit)) = do Just Jpeg -> ".jpeg" Just Gif -> ".gif" Just Pdf -> ".pdf" + Just Eps -> ".eps" Nothing -> takeExtension src if null imgext then -- without an extension there is no rule for content type -- cgit v1.2.3 From 7d75b913bd45c41abefbc055163cf455c4b37b65 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 17 Jul 2013 15:06:43 -0700 Subject: LaTeX/Beamer template: Added `classoption` variable. This is intended for class options like `oneside`; it may be repeated with different options. Thanks to Oliver Matthews. --- README | 3 +++ data/templates | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/README b/README index adec7c0e7..f86de8cd7 100644 --- a/README +++ b/README @@ -768,6 +768,9 @@ as `title`, `author`, and `date`) as well as the following: : font size (10pt, 11pt, 12pt) for LaTeX documents `documentclass` : document class for LaTeX documents +`classoption` +: option for LaTeX documentclass, e.g. `oneside`; may be repeated + for multiple options `geometry` : options for LaTeX `geometry` class, e.g. `margin=1in`; may be repeated for multiple options diff --git a/data/templates b/data/templates index cb23306c2..24dab8e06 160000 --- a/data/templates +++ b/data/templates @@ -1 +1 @@ -Subproject commit cb23306c2721d9c1f918f057d7402e03e079476b +Subproject commit 24dab8e06ec3cdc66a6b6db0ebe17a586c2c67f6 -- cgit v1.2.3 From 6c2e76ac617e5972db5d118525e7f6f59f43caac Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 17 Jul 2013 15:38:56 -0700 Subject: Added `ignore_line_breaks` markdown extension. This causes intra-paragraph line breaks to be ignored, rather than being treated as hard line breaks or spaces. This is useful for some East Asian languages, where spaces aren't used between words, but text is separated into lines for readability. --- README | 6 ++++++ src/Text/Pandoc/Options.hs | 1 + src/Text/Pandoc/Readers/Markdown.hs | 1 + 3 files changed, 8 insertions(+) diff --git a/README b/README index f86de8cd7..a65e22a70 100644 --- a/README +++ b/README @@ -2416,6 +2416,12 @@ example, `markdown+hard_line_breaks` is markdown with hard line breaks. Causes all newlines within a paragraph to be interpreted as hard line breaks instead of spaces. +**Extension: `ignore_line_breaks`**\ +Causes newlines within a paragraph to be ignored, rather than being +treated as spaces or as hard line breaks. This option is intended for +use with East Asian languages where spaces are not used between words, +but text is divided into lines for readability. + **Extension: `tex_math_single_backslash`**\ Causes anything between `\(` and `\)` to be interpreted as inline TeX math, and anything between `\[` and `\]` to be interpreted diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs index c9a5e27da..61a85cf6e 100644 --- a/src/Text/Pandoc/Options.hs +++ b/src/Text/Pandoc/Options.hs @@ -92,6 +92,7 @@ data Extension = | Ext_superscript -- ^ Superscript using ^this^ syntax | Ext_subscript -- ^ Subscript using ~this~ syntax | Ext_hard_line_breaks -- ^ All newlines become hard line breaks + | Ext_ignore_line_breaks -- ^ Newlines in paragraphs are ignored | Ext_literate_haskell -- ^ Enable literate Haskell conventions | Ext_abbreviations -- ^ PHP markdown extra abbreviation definitions | Ext_auto_identifiers -- ^ Automatic identifiers for headers diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index a3500fbcf..1aa392162 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1566,6 +1566,7 @@ endline = try $ do notFollowedBy' bulletListStart notFollowedBy' anyOrderedListStart (guardEnabled Ext_hard_line_breaks >> return (return B.linebreak)) + <|> (guardEnabled Ext_ignore_line_breaks >> return mempty) <|> (return $ return B.space) -- -- cgit v1.2.3 From 7c980f39bf1cff941d3e78056fd69e0b371833e3 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 18 Jul 2013 20:58:14 -0700 Subject: Improved fetching of external resources. * In Shared, openURL and fetchItem now return an Either, for better error handling. (API change.) * Better error message when fetching a URL fails with `--self-contained`. * EPUB writer: If resource not found, skip it, as in Docx writer. * Closes #916. --- src/Text/Pandoc/SelfContained.hs | 5 +++-- src/Text/Pandoc/Shared.hs | 16 +++++++++------- src/Text/Pandoc/Writers/Docx.hs | 2 +- src/Text/Pandoc/Writers/EPUB.hs | 13 +++++++++---- src/Text/Pandoc/Writers/ODT.hs | 5 ++--- 5 files changed, 24 insertions(+), 17 deletions(-) diff --git a/src/Text/Pandoc/SelfContained.hs b/src/Text/Pandoc/SelfContained.hs index c4613992a..0547bc065 100644 --- a/src/Text/Pandoc/SelfContained.hs +++ b/src/Text/Pandoc/SelfContained.hs @@ -40,7 +40,7 @@ import System.FilePath (takeExtension, dropExtension, takeDirectory, ()) import Data.Char (toLower, isAscii, isAlphaNum) import Codec.Compression.GZip as Gzip import qualified Data.ByteString.Lazy as L -import Text.Pandoc.Shared (renderTags', openURL, readDataFile) +import Text.Pandoc.Shared (renderTags', openURL, readDataFile, err) import Text.Pandoc.UTF8 (toString, fromString) import Text.Pandoc.MIME (getMimeType) import System.Directory (doesFileExist) @@ -98,7 +98,7 @@ cssURLs userdata d orig = getItem :: Maybe FilePath -> String -> IO (ByteString, Maybe String) getItem userdata f = if isAbsoluteURI f - then openURL f + then openURL f >>= either handleErr return else do -- strip off trailing query or fragment part, if relative URL. -- this is needed for things like cmunrm.eot?#iefix, @@ -110,6 +110,7 @@ getItem userdata f = exists <- doesFileExist f' cont <- if exists then B.readFile f' else readDataFile userdata f' return (cont, mime) + where handleErr e = err 61 $ "Failed to retrieve " ++ f ++ "\n" ++ show e getRaw :: Maybe FilePath -> String -> String -> IO (ByteString, String) getRaw userdata mimetype src = do diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 09086da1f..0f2e16d2e 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -95,6 +95,7 @@ import Text.Pandoc.MIME (getMimeType) import System.FilePath ( (), takeExtension, dropExtension ) import Data.Generics (Typeable, Data) import qualified Control.Monad.State as S +import qualified Control.Exception as E import Control.Monad (msum, unless) import Text.Pandoc.Pretty (charWidth) import System.Locale (defaultTimeLocale) @@ -586,12 +587,13 @@ readDataFileUTF8 userDir fname = -- | Fetch an image or other item from the local filesystem or the net. -- Returns raw content and maybe mime type. -fetchItem :: String -> String -> IO (BS.ByteString, Maybe String) +fetchItem :: String -> String + -> IO (Either E.SomeException (BS.ByteString, Maybe String)) fetchItem sourceDir s = case s of _ | isAbsoluteURI s -> openURL s | isAbsoluteURI sourceDir -> openURL $ sourceDir ++ "/" ++ s - | otherwise -> do + | otherwise -> E.try $ do let mime = case takeExtension s of ".gz" -> getMimeType $ dropExtension s x -> getMimeType x @@ -600,21 +602,21 @@ fetchItem sourceDir s = return (cont, mime) -- | Read from a URL and return raw data and maybe mime type. -openURL :: String -> IO (BS.ByteString, Maybe String) +openURL :: String -> IO (Either E.SomeException (BS.ByteString, Maybe String)) openURL u | "data:" `isPrefixOf` u = let mime = takeWhile (/=',') $ drop 5 u contents = B8.pack $ unEscapeString $ drop 1 $ dropWhile (/=',') u - in return (contents, Just mime) + in return $ Right (contents, Just mime) #ifdef HTTP_CONDUIT - | otherwise = do + | otherwise = E.try $ do req <- parseUrl u resp <- withManager $ httpLbs req return (BS.concat $ toChunks $ responseBody resp, UTF8.toString `fmap` lookup hContentType (responseHeaders resp)) #else - | otherwise = getBodyAndMimeType `fmap` browse - (do S.liftIO $ UTF8.hPutStrLn stderr $ "Fetching " ++ u ++ "..." + | otherwise = E.try $ getBodyAndMimeType `fmap` browse + (do UTF8.hPutStrLn stderr $ "Fetching " ++ u ++ "..." setOutHandler $ const (return ()) setAllowRedirects True request (getRequest' u')) diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 1ed8c2fa5..611cddc65 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -726,7 +726,7 @@ inlineToOpenXML opts (Image alt (src, tit)) = do Just (_,_,_,elt,_) -> return [elt] Nothing -> do let sourceDir = writerSourceDirectory opts - res <- liftIO $ E.try $ fetchItem sourceDir src + res <- liftIO $ fetchItem sourceDir src case res of Left (_ :: E.SomeException) -> do liftIO $ warn $ "Could not find image `" ++ src ++ "', skipping..." diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index f171a2560..42863ef86 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -123,10 +123,15 @@ writeEPUB opts doc@(Pandoc meta _) = do Pandoc _ blocks <- bottomUpM (transformInline opts' sourceDir picsRef) doc pics <- readIORef picsRef - let readPicEntry (oldsrc, newsrc) = do - (img,_) <- fetchItem sourceDir oldsrc - return $ toEntry newsrc epochtime $ B.fromChunks . (:[]) $ img - picEntries <- mapM readPicEntry pics + let readPicEntry entries (oldsrc, newsrc) = do + res <- fetchItem sourceDir oldsrc + case res of + Left e -> do + warn $ "Could not find image `" ++ oldsrc ++ "', skipping..." + return entries + Right (img,_) -> return $ + (toEntry newsrc epochtime $ B.fromChunks . (:[]) $ img) : entries + picEntries <- foldM readPicEntry [] pics -- handle fonts let mkFontEntry f = mkEntry (takeFileName f) `fmap` B.readFile f diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs index db27286e8..589010bb9 100644 --- a/src/Text/Pandoc/Writers/ODT.hs +++ b/src/Text/Pandoc/Writers/ODT.hs @@ -42,7 +42,6 @@ import Text.Pandoc.Definition import Text.Pandoc.Generic import Text.Pandoc.Writers.OpenDocument ( writeOpenDocument ) import Control.Monad (liftM) -import Control.Monad.Trans (liftIO) import Text.Pandoc.XML import Text.Pandoc.Pretty import qualified Control.Exception as E @@ -114,10 +113,10 @@ writeODT opts doc@(Pandoc meta _) = do transformPic :: FilePath -> IORef [Entry] -> Inline -> IO Inline transformPic sourceDir entriesRef (Image lab (src,_)) = do - res <- liftIO $ E.try $ fetchItem sourceDir src + res <- fetchItem sourceDir src case res of Left (_ :: E.SomeException) -> do - liftIO $ warn $ "Could not find image `" ++ src ++ "', skipping..." + warn $ "Could not find image `" ++ src ++ "', skipping..." return $ Emph lab Right (img, _) -> do let size = imageSize img -- cgit v1.2.3 From 93e096fe1d23bf60a7ca7fa39fa6e730336338eb Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 18 Jul 2013 21:51:11 -0700 Subject: Fixed warning. --- src/Text/Pandoc/Writers/EPUB.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index 42863ef86..e625931fc 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -126,7 +126,7 @@ writeEPUB opts doc@(Pandoc meta _) = do let readPicEntry entries (oldsrc, newsrc) = do res <- fetchItem sourceDir oldsrc case res of - Left e -> do + Left _ -> do warn $ "Could not find image `" ++ oldsrc ++ "', skipping..." return entries Right (img,_) -> return $ -- cgit v1.2.3 From fd0f8c1a8a03cedf868ea2b26e40bfe00852d0b2 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 18 Jul 2013 21:51:23 -0700 Subject: Text.Pandoc.PDF: put temporary output directory in TEXINPUTS. This will help later when we try to download external resources. We can put them in the temp directory. See #917. --- src/Text/Pandoc/PDF.hs | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs index 3227fd0bd..b36f2a0af 100644 --- a/src/Text/Pandoc/PDF.hs +++ b/src/Text/Pandoc/PDF.hs @@ -38,6 +38,7 @@ import System.Exit (ExitCode (..)) import System.FilePath import System.Directory import System.Process +import System.Environment import Control.Exception (evaluate) import System.IO (hClose) import Control.Concurrent (putMVar, takeMVar, newEmptyMVar, forkIO) @@ -102,7 +103,12 @@ runTeXProgram program runsLeft tmpDir source = do unless exists $ UTF8.writeFile file source let programArgs = ["-halt-on-error", "-interaction", "nonstopmode", "-output-directory", tmpDir, file] - (exit, out, err) <- readCommand program programArgs + env' <- getEnvironment + let texinputs = maybe (tmpDir ++ ":") ((tmpDir ++ ":") ++) + $ lookup "TEXINPUTS" env' + let env'' = ("TEXINPUTS", texinputs) : + [(k,v) | (k,v) <- env', k /= "TEXINPUTS"] + (exit, out, err) <- readCommand (Just env'') program programArgs if runsLeft > 1 then runTeXProgram program (runsLeft - 1) tmpDir source else do @@ -118,12 +124,14 @@ runTeXProgram program runsLeft tmpDir source = do -- Run a command and return exitcode, contents of stdout, and -- contents of stderr. (Based on -- 'readProcessWithExitCode' from 'System.Process'.) -readCommand :: FilePath -- ^ command to run +readCommand :: Maybe [(String, String)] -- ^ environment variables + -> FilePath -- ^ command to run -> [String] -- ^ any arguments -> IO (ExitCode,ByteString,ByteString) -- ^ exit, stdout, stderr -readCommand cmd args = do +readCommand mbenv cmd args = do (Just inh, Just outh, Just errh, pid) <- - createProcess (proc cmd args){ std_in = CreatePipe, + createProcess (proc cmd args){ env = mbenv, + std_in = CreatePipe, std_out = CreatePipe, std_err = CreatePipe } outMVar <- newEmptyMVar -- cgit v1.2.3 From df3546068e8a04b83c7cc3aad31b286c37b86d18 Mon Sep 17 00:00:00 2001 From: David Röthlisberger Date: Sat, 20 Jul 2013 13:02:35 +0100 Subject: README: Fix minor typo --- README | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README b/README index a65e22a70..539f40a0f 100644 --- a/README +++ b/README @@ -892,7 +892,7 @@ There are two kinds of headers, Setext and atx. ### Setext-style headers ### A setext-style header is a line of text "underlined" with a row of `=` signs -(for a level one header) of `-` signs (for a level two header): +(for a level one header) or `-` signs (for a level two header): A level-one header ================== -- cgit v1.2.3 From a5df042e28e01dc6e70bbd1019b15d94cf9f06a9 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 20 Jul 2013 10:52:00 -0700 Subject: LaTeX template: Added `biblio-style` variable. Closes #920. --- README | 2 ++ data/templates | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/README b/README index 539f40a0f..d9b003344 100644 --- a/README +++ b/README @@ -790,6 +790,8 @@ as `title`, `author`, and `date`) as well as the following: : color for citation links in LaTeX documents `links-as-notes` : causes links to be printed as footnotes in LaTeX documents +`biblio-style` +: bibliography style in LaTeX, when used with `--natbib` `section` : section number in man pages `header` diff --git a/data/templates b/data/templates index 24dab8e06..120bed772 160000 --- a/data/templates +++ b/data/templates @@ -1 +1 @@ -Subproject commit 24dab8e06ec3cdc66a6b6db0ebe17a586c2c67f6 +Subproject commit 120bed772b4a235a0d17ea4560a682e23ac293ac -- cgit v1.2.3 From 93877e5a2290ea9e67e2047d3876fc0b7699b4ff Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 20 Jul 2013 11:06:58 -0700 Subject: Added info about CABALDIR. Modified from a patch by achalddave. --- INSTALL | 2 ++ 1 file changed, 2 insertions(+) diff --git a/INSTALL b/INSTALL index fa1cf62f8..8abb14b71 100644 --- a/INSTALL +++ b/INSTALL @@ -28,6 +28,8 @@ now be able to run `pandoc`: pandoc --help + [Not sure where `$CABALDIR` is?](http://www.haskell.org/haskellwiki/Cabal-Install#The_cabal-install_configuration_file) + 4. Make sure the `$CABALDIR/share/man/man1` directory is in your `MANPATH`. You should now be able to access the `pandoc` man page: -- cgit v1.2.3 From 7102254e244b37c91d6b35b4940511a8656edc49 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 20 Jul 2013 12:14:43 -0700 Subject: PDF generation improvements. * `Text.Pandoc.PDF` exports `makePDF` instead of `tex2pdf`. (API change.) * `makePDF` walks the pandoc AST and checks for the existence of images in the local directory. If they are not found, it attempts to find them, either in the directory containing the first source file, or at an absolute URL, or at a URL relative to the base URL of the first command line argument. * Closes #917. --- pandoc.hs | 4 ++-- src/Text/Pandoc/MIME.hs | 11 +++++++++-- src/Text/Pandoc/PDF.hs | 50 ++++++++++++++++++++++++++++++++++++++++++++----- 3 files changed, 56 insertions(+), 9 deletions(-) diff --git a/pandoc.hs b/pandoc.hs index 18124da3a..79bade221 100644 --- a/pandoc.hs +++ b/pandoc.hs @@ -31,7 +31,7 @@ writers. -} module Main where import Text.Pandoc -import Text.Pandoc.PDF (tex2pdf) +import Text.Pandoc.PDF (makePDF) import Text.Pandoc.Readers.LaTeX (handleIncludes) import Text.Pandoc.Shared ( tabFilter, readDataFileUTF8, safeRead, headerShift, normalize, err, warn ) @@ -1113,7 +1113,7 @@ main = do Right (IOByteStringWriter f) -> f writerOptions doc0 >>= writeBinary Right (PureStringWriter f) | pdfOutput -> do - res <- tex2pdf latexEngine $ f writerOptions doc0 + res <- makePDF latexEngine f writerOptions doc0 case res of Right pdf -> writeBinary pdf Left err' -> err 43 $ UTF8.toStringLazy err' diff --git a/src/Text/Pandoc/MIME.hs b/src/Text/Pandoc/MIME.hs index eb54bd48d..d9cb94a33 100644 --- a/src/Text/Pandoc/MIME.hs +++ b/src/Text/Pandoc/MIME.hs @@ -27,7 +27,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Mime type lookup for ODT writer. -} -module Text.Pandoc.MIME ( getMimeType ) +module Text.Pandoc.MIME ( getMimeType, extensionFromMimeType ) where import System.FilePath import Data.Char ( toLower ) @@ -37,7 +37,14 @@ import qualified Data.Map as M getMimeType :: FilePath -> Maybe String getMimeType "layout-cache" = Just "application/binary" -- in ODT getMimeType f = M.lookup (map toLower $ drop 1 $ takeExtension f) mimeTypes - where mimeTypes = M.fromList -- List borrowed from happstack-server. + where mimeTypes = M.fromList mimeTypesList + +extensionFromMimeType :: String -> Maybe String +extensionFromMimeType mimetype = M.lookup mimetype reverseMimeTypes + where reverseMimeTypes = M.fromList $ map (\(k,v) -> (v,k)) mimeTypesList + +mimeTypesList :: [(String, String)] +mimeTypesList = -- List borrowed from happstack-server. [("gz","application/x-gzip") ,("cabal","application/x-cabal") ,("%","application/x-trash") diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs index b36f2a0af..49b455285 100644 --- a/src/Text/Pandoc/PDF.hs +++ b/src/Text/Pandoc/PDF.hs @@ -28,12 +28,13 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion of LaTeX documents to PDF. -} -module Text.Pandoc.PDF ( tex2pdf ) where +module Text.Pandoc.PDF ( makePDF ) where import System.IO.Temp import Data.ByteString.Lazy (ByteString) import qualified Data.ByteString.Lazy as B import qualified Data.ByteString.Lazy.Char8 as BC +import qualified Data.ByteString as BS import System.Exit (ExitCode (..)) import System.FilePath import System.Directory @@ -42,9 +43,15 @@ import System.Environment import Control.Exception (evaluate) import System.IO (hClose) import Control.Concurrent (putMVar, takeMVar, newEmptyMVar, forkIO) -import Text.Pandoc.UTF8 as UTF8 import Control.Monad (unless) import Data.List (isInfixOf) +import qualified Data.ByteString.Base64 as B64 +import qualified Text.Pandoc.UTF8 as UTF8 +import Text.Pandoc.Definition +import Text.Pandoc.Generic (bottomUpM) +import Text.Pandoc.Shared (fetchItem, warn) +import Text.Pandoc.Options (WriterOptions(..)) +import Text.Pandoc.MIME (extensionFromMimeType) withTempDir :: String -> (FilePath -> IO a) -> IO a withTempDir = @@ -54,12 +61,45 @@ withTempDir = withSystemTempDirectory #endif -tex2pdf :: String -- ^ tex program (pdflatex, lualatex, xelatex) - -> String -- ^ latex source +makePDF :: String -- ^ pdf creator (pdflatex, lualatex, xelatex) + -> (WriterOptions -> Pandoc -> String) -- ^ writer + -> WriterOptions -- ^ options + -> Pandoc -- ^ document -> IO (Either ByteString ByteString) -tex2pdf program source = withTempDir "tex2pdf." $ \tmpdir -> +makePDF program writer opts doc = withTempDir "tex2pdf." $ \tmpdir -> do + doc' <- handleImages (writerSourceDirectory opts) tmpdir doc + let source = writer opts doc' tex2pdf' tmpdir program source +handleImages :: String -- ^ source directory/base URL + -> FilePath -- ^ temp dir to store images + -> Pandoc -- ^ document + -> IO Pandoc +handleImages baseURL tmpdir = bottomUpM (handleImage' baseURL tmpdir) + +handleImage' :: String + -> FilePath + -> Inline + -> IO Inline +handleImage' baseURL tmpdir (Image ils (src,tit)) = do + exists <- doesFileExist src + if exists + then return $ Image ils (src,tit) + else do + res <- fetchItem baseURL src + case res of + Right (contents, Just mime) -> do + let ext = maybe (takeExtension src) id $ + extensionFromMimeType mime + let basename = UTF8.toString $ B64.encode $ UTF8.fromString src + let fname = tmpdir basename <.> ext + BS.writeFile fname contents + return $ Image ils (fname,tit) + _ -> do + warn $ "Could not find image `" ++ src ++ "', skipping..." + return $ Image ils (src,tit) +handleImage' _ _ x = return x + tex2pdf' :: FilePath -- ^ temp directory for output -> String -- ^ tex program -> String -- ^ tex source -- cgit v1.2.3 From bd1979f1b74fb18baa70c4b77cc58931e980087a Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 20 Jul 2013 21:14:38 -0700 Subject: Markdown reader: Improved strong/emph parsing. Using technique from github.com/jgm/Markdown. The new parsing algorithm requires no backtracking, and no keeping track of nesting levels. It will give different results in some edge cases but should not affect most people. --- src/Text/Pandoc/Readers/Markdown.hs | 88 +++++++++++++++++++++++-------------- 1 file changed, 54 insertions(+), 34 deletions(-) diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 1aa392162..28f69eae4 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1340,17 +1340,15 @@ inline = choice [ whitespace , str , endline , code - , fours - , strong - , emph + , strongOrEmph , note , cite , link , image , math , strikeout - , superscript , subscript + , superscript , inlineNote -- after superscript because of ^[link](/foo)^ , autoLink , rawHtmlInline @@ -1455,14 +1453,58 @@ mathInlineWith op cl = try $ do notFollowedBy digit -- to prevent capture of $5 return $ concat words' --- to avoid performance problems, treat 4 or more _ or * or ~ or ^ in a row --- as a literal rather than attempting to parse for emph/strong/strikeout/super/sub -fours :: Parser [Char] st (F Inlines) -fours = try $ do - x <- char '*' <|> char '_' <|> char '~' <|> char '^' - count 2 $ satisfy (==x) - rest <- many1 (satisfy (==x)) - return $ return $ B.str (x:x:x:rest) +-- Parses material enclosed in *s, **s, _s, or __s. +-- Designed to avoid backtracking. +enclosure :: Char + -> MarkdownParser (F Inlines) +enclosure c = do + cs <- many1 (char c) + (return (B.str cs) <>) <$> whitespace + <|> case length cs of + 3 -> three c + 2 -> two c mempty + 1 -> one c mempty + _ -> return (return $ B.str cs) + +-- Parse inlines til you hit one c or a sequence of two cs. +-- If one c, emit emph and then parse two. +-- If two cs, emit strong and then parse one. +three :: Char -> MarkdownParser (F Inlines) +three c = do + contents <- mconcat <$> many (notFollowedBy (char c) >> inline) + (try (string [c,c,c]) >> return ((B.strong . B.emph) <$> contents)) + <|> (try (string [c,c]) >> one c (B.strong <$> contents)) + <|> (char c >> two c (B.emph <$> contents)) + <|> return (return (B.str [c,c,c]) <> contents) + +-- Parse inlines til you hit two c's, and emit strong. +-- If you never do hit two cs, emit ** plus inlines parsed. +two :: Char -> F Inlines -> MarkdownParser (F Inlines) +two c prefix' = do + let ender = try $ string [c,c] + contents <- mconcat <$> many (try $ notFollowedBy ender >> inline) + (ender >> return (B.strong <$> (prefix' <> contents))) + <|> return (return (B.str [c,c]) <> (prefix' <> contents)) + +-- Parse inlines til you hit a c, and emit emph. +-- If you never hit a c, emit * plus inlines parsed. +one :: Char -> F Inlines -> MarkdownParser (F Inlines) +one c prefix' = do + contents <- mconcat <$> many ( (notFollowedBy (char c) >> inline) + <|> try (string [c,c] >> + notFollowedBy (char c) >> + two c prefix') ) + (char c >> return (B.emph <$> (prefix' <> contents))) + <|> return (return (B.str [c]) <> (prefix' <> contents)) + +strongOrEmph :: MarkdownParser (F Inlines) +strongOrEmph = enclosure '*' <|> (checkIntraword >> enclosure '_') + where checkIntraword = do + exts <- getOption readerExtensions + when (Ext_intraword_underscores `Set.member` exts) $ do + pos <- getPosition + lastStrPos <- stateLastStrPos <$> getState + guard $ lastStrPos /= Just pos -- | Parses a list of inlines between start and end delimiters. inlinesBetween :: (Show b) @@ -1474,28 +1516,6 @@ inlinesBetween start end = where inner = innerSpace <|> (notFollowedBy' (() <$ whitespace) >> inline) innerSpace = try $ whitespace >>~ notFollowedBy' end -emph :: MarkdownParser (F Inlines) -emph = fmap B.emph <$> nested - (inlinesBetween starStart starEnd <|> inlinesBetween ulStart ulEnd) - where starStart = char '*' >> lookAhead nonspaceChar - starEnd = notFollowedBy' (() <$ strong) >> char '*' - ulStart = checkIntraword >> char '_' >> lookAhead nonspaceChar - ulEnd = notFollowedBy' (() <$ strong) >> char '_' - checkIntraword = do - exts <- getOption readerExtensions - when (Ext_intraword_underscores `Set.member` exts) $ do - pos <- getPosition - lastStrPos <- stateLastStrPos <$> getState - guard $ lastStrPos /= Just pos - -strong :: MarkdownParser (F Inlines) -strong = fmap B.strong <$> nested - (inlinesBetween starStart starEnd <|> inlinesBetween ulStart ulEnd) - where starStart = string "**" >> lookAhead nonspaceChar - starEnd = try $ string "**" - ulStart = string "__" >> lookAhead nonspaceChar - ulEnd = try $ string "__" - strikeout :: MarkdownParser (F Inlines) strikeout = fmap B.strikeout <$> (guardEnabled Ext_strikeout >> inlinesBetween strikeStart strikeEnd) -- cgit v1.2.3 From 800c5490ec080520268c9c3348f2b4199a21e6db Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 21 Jul 2013 11:44:49 -0700 Subject: LaTeX reader: Don't add spurious ", " to citation suffixes. This is added when needed in Text.Pandoc.Biblio anyway. --- src/Text/Pandoc/Readers/LaTeX.hs | 10 +++------- tests/Tests/Readers/LaTeX.hs | 36 ++++++++++++++++++------------------ tests/latex-reader.native | 2 +- 3 files changed, 22 insertions(+), 26 deletions(-) diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 1a22f2ad2..7c7ae9fef 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -44,7 +44,7 @@ import qualified Text.Pandoc.UTF8 as UTF8 import Data.Char ( chr, ord ) import Control.Monad import Text.Pandoc.Builder -import Data.Char (isLetter, isPunctuation, isSpace) +import Data.Char (isLetter) import Control.Applicative import Data.Monoid import System.Environment (getEnv) @@ -986,12 +986,8 @@ addPrefix _ _ = [] addSuffix :: [Inline] -> [Citation] -> [Citation] addSuffix s ks@(_:_) = - let k = last ks - s' = case s of - (Str (c:_):_) - | not (isPunctuation c || isSpace c) -> Str "," : Space : s - _ -> s - in init ks ++ [k {citationSuffix = citationSuffix k ++ s'}] + let k = last ks + in init ks ++ [k {citationSuffix = citationSuffix k ++ s}] addSuffix _ _ = [] simpleCiteArgs :: LP [Citation] diff --git a/tests/Tests/Readers/LaTeX.hs b/tests/Tests/Readers/LaTeX.hs index 271b32689..88029b7c2 100644 --- a/tests/Tests/Readers/LaTeX.hs +++ b/tests/Tests/Readers/LaTeX.hs @@ -79,14 +79,14 @@ natbibCitations = testGroup "natbib" =?> para (cite [baseCitation] (rt "\\citet{item1}")) , "suffix" =: "\\citet[p.~30]{item1}" =?> para - (cite [baseCitation{ citationSuffix = toList $ text ", p.\160\&30" }] (rt "\\citet[p.~30]{item1}")) + (cite [baseCitation{ citationSuffix = toList $ text "p.\160\&30" }] (rt "\\citet[p.~30]{item1}")) , "suffix long" =: "\\citet[p.~30, with suffix]{item1}" =?> para (cite [baseCitation{ citationSuffix = - toList $ text ", p.\160\&30, with suffix" }] (rt "\\citet[p.~30, with suffix]{item1}")) + toList $ text "p.\160\&30, with suffix" }] (rt "\\citet[p.~30, with suffix]{item1}")) , "multiple" =: "\\citeauthor{item1} \\citetext{\\citeyear{item1}; \\citeyear[p.~30]{item2}; \\citealp[see also][]{item3}}" =?> para (cite [baseCitation{ citationMode = AuthorInText } ,baseCitation{ citationMode = SuppressAuthor - , citationSuffix = [Str ",",Space,Str "p.\160\&30"] + , citationSuffix = [Str "p.\160\&30"] , citationId = "item2" } ,baseCitation{ citationId = "item3" , citationPrefix = [Str "see",Space,Str "also"] @@ -95,28 +95,28 @@ natbibCitations = testGroup "natbib" , "group" =: "\\citetext{\\citealp[see][p.~34--35]{item1}; \\citealp[also][chap. 3]{item3}}" =?> para (cite [baseCitation{ citationMode = NormalCitation , citationPrefix = [Str "see"] - , citationSuffix = [Str ",",Space,Str "p.\160\&34\8211\&35"] } + , citationSuffix = [Str "p.\160\&34\8211\&35"] } ,baseCitation{ citationMode = NormalCitation , citationId = "item3" , citationPrefix = [Str "also"] - , citationSuffix = [Str ",",Space,Str "chap.",Space,Str "3"] } + , citationSuffix = [Str "chap.",Space,Str "3"] } ] (rt "\\citetext{\\citealp[see][p.~34--35]{item1}; \\citealp[also][chap. 3]{item3}}")) , "suffix and locator" =: "\\citep[pp.~33, 35--37, and nowhere else]{item1}" =?> para (cite [baseCitation{ citationMode = NormalCitation - , citationSuffix = [Str ",",Space,Str "pp.\160\&33,",Space,Str "35\8211\&37,",Space,Str "and",Space,Str "nowhere",Space, Str "else"] }] (rt "\\citep[pp.~33, 35--37, and nowhere else]{item1}")) + , citationSuffix = [Str "pp.\160\&33,",Space,Str "35\8211\&37,",Space,Str "and",Space,Str "nowhere",Space, Str "else"] }] (rt "\\citep[pp.~33, 35--37, and nowhere else]{item1}")) , "suffix only" =: "\\citep[and nowhere else]{item1}" =?> para (cite [baseCitation{ citationMode = NormalCitation - , citationSuffix = toList $ text ", and nowhere else" }] (rt "\\citep[and nowhere else]{item1}")) + , citationSuffix = toList $ text "and nowhere else" }] (rt "\\citep[and nowhere else]{item1}")) , "no author" =: "\\citeyearpar{item1}, and now Doe with a locator \\citeyearpar[p.~44]{item2}" =?> para (cite [baseCitation{ citationMode = SuppressAuthor }] (rt "\\citeyearpar{item1}") <> text ", and now Doe with a locator " <> cite [baseCitation{ citationMode = SuppressAuthor - , citationSuffix = [Str ",",Space,Str "p.\160\&44"] + , citationSuffix = [Str "p.\160\&44"] , citationId = "item2" }] (rt "\\citeyearpar[p.~44]{item2}")) , "markup" =: "\\citep[\\emph{see}][p. \\textbf{32}]{item1}" =?> para (cite [baseCitation{ citationMode = NormalCitation , citationPrefix = [Emph [Str "see"]] - , citationSuffix = [Str ",",Space,Str "p.",Space, + , citationSuffix = [Str "p.",Space, Strong [Str "32"]] }] (rt "\\citep[\\emph{see}][p. \\textbf{32}]{item1}")) ] @@ -126,14 +126,14 @@ biblatexCitations = testGroup "biblatex" =?> para (cite [baseCitation] (rt "\\textcite{item1}")) , "suffix" =: "\\textcite[p.~30]{item1}" =?> para - (cite [baseCitation{ citationSuffix = toList $ text ", p.\160\&30" }] (rt "\\textcite[p.~30]{item1}")) + (cite [baseCitation{ citationSuffix = toList $ text "p.\160\&30" }] (rt "\\textcite[p.~30]{item1}")) , "suffix long" =: "\\textcite[p.~30, with suffix]{item1}" =?> para (cite [baseCitation{ citationSuffix = - toList $ text ", p.\160\&30, with suffix" }] (rt "\\textcite[p.~30, with suffix]{item1}")) + toList $ text "p.\160\&30, with suffix" }] (rt "\\textcite[p.~30, with suffix]{item1}")) , "multiple" =: "\\textcites{item1}[p.~30]{item2}[see also][]{item3}" =?> para (cite [baseCitation{ citationMode = AuthorInText } ,baseCitation{ citationMode = NormalCitation - , citationSuffix = [Str ",",Space,Str "p.\160\&30"] + , citationSuffix = [Str "p.\160\&30"] , citationId = "item2" } ,baseCitation{ citationId = "item3" , citationPrefix = [Str "see",Space,Str "also"] @@ -142,28 +142,28 @@ biblatexCitations = testGroup "biblatex" , "group" =: "\\autocites[see][p.~34--35]{item1}[also][chap. 3]{item3}" =?> para (cite [baseCitation{ citationMode = NormalCitation , citationPrefix = [Str "see"] - , citationSuffix = [Str ",",Space,Str "p.\160\&34\8211\&35"] } + , citationSuffix = [Str "p.\160\&34\8211\&35"] } ,baseCitation{ citationMode = NormalCitation , citationId = "item3" , citationPrefix = [Str "also"] - , citationSuffix = [Str ",",Space,Str "chap.",Space,Str "3"] } + , citationSuffix = [Str "chap.",Space,Str "3"] } ] (rt "\\autocites[see][p.~34--35]{item1}[also][chap. 3]{item3}")) , "suffix and locator" =: "\\autocite[pp.~33, 35--37, and nowhere else]{item1}" =?> para (cite [baseCitation{ citationMode = NormalCitation - , citationSuffix = [Str ",",Space,Str "pp.\160\&33,",Space,Str "35\8211\&37,",Space,Str "and",Space,Str "nowhere",Space, Str "else"] }] (rt "\\autocite[pp.~33, 35--37, and nowhere else]{item1}")) + , citationSuffix = [Str "pp.\160\&33,",Space,Str "35\8211\&37,",Space,Str "and",Space,Str "nowhere",Space, Str "else"] }] (rt "\\autocite[pp.~33, 35--37, and nowhere else]{item1}")) , "suffix only" =: "\\autocite[and nowhere else]{item1}" =?> para (cite [baseCitation{ citationMode = NormalCitation - , citationSuffix = toList $ text ", and nowhere else" }] (rt "\\autocite[and nowhere else]{item1}")) + , citationSuffix = toList $ text "and nowhere else" }] (rt "\\autocite[and nowhere else]{item1}")) , "no author" =: "\\autocite*{item1}, and now Doe with a locator \\autocite*[p.~44]{item2}" =?> para (cite [baseCitation{ citationMode = SuppressAuthor }] (rt "\\autocite*{item1}") <> text ", and now Doe with a locator " <> cite [baseCitation{ citationMode = SuppressAuthor - , citationSuffix = [Str ",",Space,Str "p.\160\&44"] + , citationSuffix = [Str "p.\160\&44"] , citationId = "item2" }] (rt "\\autocite*[p.~44]{item2}")) , "markup" =: "\\autocite[\\emph{see}][p. \\textbf{32}]{item1}" =?> para (cite [baseCitation{ citationMode = NormalCitation , citationPrefix = [Emph [Str "see"]] - , citationSuffix = [Str ",",Space,Str "p.",Space, + , citationSuffix = [Str "p.",Space, Strong [Str "32"]] }] (rt "\\autocite[\\emph{see}][p. \\textbf{32}]{item1}")) , "parencite" =: "\\parencite{item1}" =?> para (cite [baseCitation{ citationMode = NormalCitation }] (rt "\\parencite{item1}")) diff --git a/tests/latex-reader.native b/tests/latex-reader.native index d8769e605..d19196345 100644 --- a/tests/latex-reader.native +++ b/tests/latex-reader.native @@ -260,7 +260,7 @@ Pandoc (Meta {unMeta = fromList [("authors",MetaList [MetaInlines [Str "John",Sp ,HorizontalRule ,Header 1 ("",[],[]) [Str "LaTeX"] ,BulletList - [[Para [Cite [Citation {citationId = "smith.1899", citationPrefix = [], citationSuffix = [Str ",",Space,Str "22-23"], citationMode = AuthorInText, citationNoteNum = 0, citationHash = 0}] [RawInline "latex" "\\cite[22-23]{smith.1899}"]]] + [[Para [Cite [Citation {citationId = "smith.1899", citationPrefix = [], citationSuffix = [Str "22-23"], citationMode = AuthorInText, citationNoteNum = 0, citationHash = 0}] [RawInline "latex" "\\cite[22-23]{smith.1899}"]]] ,[Para [RawInline "latex" "\\doublespacing"]] ,[Para [Math InlineMath "2+2=4"]] ,[Para [Math InlineMath "x \\in y"]] -- cgit v1.2.3 From 6f99ad80135c06d30d92ad275d482e841ef1e872 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 21 Jul 2013 12:16:52 -0700 Subject: Biblio: Tweaks to improve default behavior. * A suffix beginning with a digit gets 'p' inserted before it before passing to citeproc-hs, so that bare numbers are treated as page numbers by default. * A suffix not beginning with punctuation has a space added at the beginning (rather than a comma and space, as was done before). * This adding occurs not just in author-in-text citations, but in all citations. The result of these changes (and the last commit) is that `\citep[23]{item1}` in LaTeX will be interpreted properly, with '23' treated as a locator of type 'page'. --- src/Text/Pandoc/Biblio.hs | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/src/Text/Pandoc/Biblio.hs b/src/Text/Pandoc/Biblio.hs index 4dd82dd08..ae371a46d 100644 --- a/src/Text/Pandoc/Biblio.hs +++ b/src/Text/Pandoc/Biblio.hs @@ -119,12 +119,12 @@ toCslCite :: Citation -> CSL.Cite toCslCite c = let (l, s) = locatorWords $ citationSuffix c (la,lo) = parseLocator l - s' = case (l,s,citationMode c) of - -- treat a bare locator as if it begins with comma + s' = case (l,s) of + -- treat a bare locator as if it begins with space -- so @item1 [blah] is like [@item1, blah] - ("",(x:_),AuthorInText) | not (isPunct x) - -> [Str ",",Space] ++ s - _ -> s + ("",(x:_)) + | not (isPunct x) -> [Space] ++ s + _ -> s isPunct (Str (x:_)) = isPunctuation x isPunct _ = False citMode = case citationMode c of @@ -173,13 +173,21 @@ pLocator :: Parsec [Inline] st String pLocator = try $ do optional $ pMatch (== Str ",") optional pSpace - f <- many1 (notFollowedBy pSpace >> anyToken) + f <- (guardFollowingDigit >> return [Str "p"]) -- "page" the default + <|> many1 (notFollowedBy pSpace >> anyToken) gs <- many1 pWordWithDigits return $ stringify f ++ (' ' : unwords gs) +guardFollowingDigit :: Parsec [Inline] st () +guardFollowingDigit = do + t <- lookAhead anyToken + case t of + Str (d:_) | isDigit d -> return () + _ -> mzero + pWordWithDigits :: Parsec [Inline] st String pWordWithDigits = try $ do - pSpace + optional pSpace r <- many1 (notFollowedBy pSpace >> anyToken) let s = stringify r guard $ any isDigit s -- cgit v1.2.3 From 7f15d888f6a232d13bf671c5a6ebd850d3f10db5 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 22 Jul 2013 16:14:47 -0700 Subject: Require highlighting-kate 0.5.5. 0.5.4 has a serious memory leak that affects the mandoc parser. See highlighting-kate#34. --- pandoc.cabal | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandoc.cabal b/pandoc.cabal index 96f15297f..cd04a2faf 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -256,7 +256,7 @@ Library tagsoup >= 0.12.5 && < 0.13, base64-bytestring >= 0.1 && < 1.1, zlib >= 0.5 && < 0.6, - highlighting-kate >= 0.5.4 && < 0.6, + highlighting-kate >= 0.5.5 && < 0.6, data-default >= 0.4 && < 0.6, temporary >= 1.1 && < 1.2, blaze-html >= 0.5 && < 0.7, @@ -358,7 +358,7 @@ Executable pandoc text >= 0.11 && < 0.12, bytestring >= 0.9 && < 0.11, extensible-exceptions >= 0.1 && < 0.2, - highlighting-kate >= 0.5.4 && < 0.6, + highlighting-kate >= 0.5.5 && < 0.6, HTTP >= 4000.0.5 && < 4000.3, citeproc-hs >= 0.3.7 && < 0.4 if impl(ghc >= 7.0.1) @@ -410,7 +410,7 @@ Test-Suite test-pandoc directory >= 1 && < 1.3, filepath >= 1.1 && < 1.4, process >= 1 && < 1.2, - highlighting-kate >= 0.5.4 && < 0.6, + highlighting-kate >= 0.5.5 && < 0.6, Diff >= 0.2 && < 0.4, test-framework >= 0.3 && < 0.9, test-framework-hunit >= 0.2 && < 0.4, -- cgit v1.2.3 From af5705308a4e4271602e7d356fa8602489e31d0e Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 23 Jul 2013 20:31:49 -0700 Subject: Test suite changes for new highlighting-kate version. --- tests/lhs-test.html | 2 +- tests/lhs-test.html+lhs | 2 +- tests/lhs-test.latex | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/lhs-test.html b/tests/lhs-test.html index 9cea03a9f..6fc51b1e9 100644 --- a/tests/lhs-test.html +++ b/tests/lhs-test.html @@ -30,7 +30,7 @@ code > span.er { color: #ff0000; font-weight: bold; }

lhs test

unsplit is an arrow that takes a pair of values and combines them to return a single value:

unsplit :: (Arrow a) => (b -> c -> d) -> a (b, c) d
-unsplit = arr . uncurry
+unsplit = arr . uncurry
           -- arr (\op (x,y) -> x `op` y)

(***) combines two arrows into a new arrow by running the two arrows on a pair of values (one arrow on the first item of the pair and one arrow on the second item of the pair).

f *** g = first f >>> second g
diff --git a/tests/lhs-test.html+lhs b/tests/lhs-test.html+lhs index 487a8a26b..bc0935bd1 100644 --- a/tests/lhs-test.html+lhs +++ b/tests/lhs-test.html+lhs @@ -30,7 +30,7 @@ code > span.er { color: #ff0000; font-weight: bold; }

lhs test

unsplit is an arrow that takes a pair of values and combines them to return a single value:

> unsplit :: (Arrow a) => (b -> c -> d) -> a (b, c) d
-> unsplit = arr . uncurry
+> unsplit = arr . uncurry
 >           -- arr (\op (x,y) -> x `op` y)

(***) combines two arrows into a new arrow by running the two arrows on a pair of values (one arrow on the first item of the pair and one arrow on the second item of the pair).

f *** g = first f >>> second g
diff --git a/tests/lhs-test.latex b/tests/lhs-test.latex index 3028063fe..0bfdec6a5 100644 --- a/tests/lhs-test.latex +++ b/tests/lhs-test.latex @@ -76,7 +76,7 @@ return a single value: \begin{Shaded} \begin{Highlighting}[] \OtherTok{unsplit ::} \NormalTok{(}\DataTypeTok{Arrow} \NormalTok{a) }\OtherTok{=>} \NormalTok{(b }\OtherTok{->} \NormalTok{c }\OtherTok{->} \NormalTok{d) }\OtherTok{->} \NormalTok{a (b, c) d} -\NormalTok{unsplit }\FunctionTok{=} \NormalTok{arr }\FunctionTok{.} \FunctionTok{uncurry} +\NormalTok{unsplit }\FunctionTok{=} \NormalTok{arr }\FunctionTok{.} \NormalTok{uncurry} \CommentTok{-- arr (\textbackslash{}op (x,y) -> x `op` y)} \end{Highlighting} \end{Shaded} -- cgit v1.2.3 From 5592666ca44aa0d027cd95bf11cff09825896584 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 23 Jul 2013 22:31:50 -0700 Subject: Text.Pandoc: Added readJSON, writeJSON to the API. Closes #817. --- src/Text/Pandoc.hs | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs index 86e78ce53..b5b698e09 100644 --- a/src/Text/Pandoc.hs +++ b/src/Text/Pandoc.hs @@ -72,9 +72,11 @@ module Text.Pandoc , readOPML , readHaddock , readNative + , readJSON -- * Writers: converting /from/ Pandoc format , Writer (..) , writeNative + , writeJSON , writeMarkdown , writePlain , writeRST @@ -190,9 +192,8 @@ markdown o s = do -- | Association list of formats and readers. readers :: [(String, ReaderOptions -> String -> IO Pandoc)] -readers = [("native" , \_ s -> return $ readNative s) - ,("json" , \_ s -> return $ checkJSON - $ decode $ UTF8.fromStringLazy s) +readers = [ ("native" , \_ s -> return $ readNative s) + ,("json" , \o s -> return $ readJSON o s) ,("markdown" , markdown) ,("markdown_strict" , markdown) ,("markdown_phpextra" , markdown) @@ -205,8 +206,8 @@ readers = [("native" , \_ s -> return $ readNative s) ,("textile" , \o s -> return $ readTextile o s) -- TODO : textile+lhs ,("html" , \o s -> return $ readHtml o s) ,("latex" , \o s -> return $ readLaTeX o s) - ,("haddock" , \o s -> return $ readHaddock o s) - ] + ,("haddock" , \o s -> return $ readHaddock o s) + ] data Writer = PureStringWriter (WriterOptions -> Pandoc -> String) | IOStringWriter (WriterOptions -> Pandoc -> IO String) @@ -216,12 +217,12 @@ data Writer = PureStringWriter (WriterOptions -> Pandoc -> String) writers :: [ ( String, Writer ) ] writers = [ ("native" , PureStringWriter writeNative) - ,("json" , PureStringWriter $ \_ -> UTF8.toStringLazy . encode) + ,("json" , PureStringWriter writeJSON) ,("docx" , IOByteStringWriter writeDocx) - ,("odt" , IOByteStringWriter writeODT) - ,("epub" , IOByteStringWriter $ \o -> + ,("odt" , IOByteStringWriter writeODT) + ,("epub" , IOByteStringWriter $ \o -> writeEPUB o{ writerEpubVersion = Just EPUB2 }) - ,("epub3" , IOByteStringWriter $ \o -> + ,("epub3" , IOByteStringWriter $ \o -> writeEPUB o{ writerEpubVersion = Just EPUB3 }) ,("fb2" , IOStringWriter writeFB2) ,("html" , PureStringWriter writeHtmlString) @@ -359,3 +360,10 @@ instance (Data a) => ToJsonFilter (a -> IO [a]) where checkJSON :: Maybe a -> a checkJSON Nothing = error "Error parsing JSON" checkJSON (Just r) = r + +readJSON :: ReaderOptions -> String -> Pandoc +readJSON _ = checkJSON . decode . UTF8.fromStringLazy + +writeJSON :: WriterOptions -> Pandoc -> String +writeJSON _ = UTF8.toStringLazy . encode + -- cgit v1.2.3 From 85dacbb282f34dc45a810d7c7b5a2d639b70e1f5 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 23 Jul 2013 22:52:15 -0700 Subject: Added `--filter` option. This makes it easier to use JSON filters. Instead of doing pandoc -t json | ./filter | pandoc -f json you can just do pandoc --filter ./filter --- README | 8 ++++++++ pandoc.cabal | 1 + pandoc.hs | 25 +++++++++++++++++++++---- 3 files changed, 30 insertions(+), 4 deletions(-) diff --git a/README b/README index d9b003344..83873a92e 100644 --- a/README +++ b/README @@ -259,6 +259,14 @@ Reader options require different kinds of images. Currently this option only affects the markdown and LaTeX readers. +`--filter=`*PATH* +: Specify an executable to be used as a filter transforming the + Pandoc AST after the input is parsed and before the output is + written. The executable should read JSON from stdin and write + JSON to stdout. The JSON must be formatted like pandoc's own + JSON input and output. Filters may be most easily created in Haskell, + using the utility function `toJsonFilter` from `Text.Pandoc`. + `--normalize` : Normalize the document after reading: merge adjacent `Str` or `Emph` elements, for example, and remove repeated `Space`s. diff --git a/pandoc.cabal b/pandoc.cabal index cd04a2faf..c47273c57 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -360,6 +360,7 @@ Executable pandoc extensible-exceptions >= 0.1 && < 0.2, highlighting-kate >= 0.5.5 && < 0.6, HTTP >= 4000.0.5 && < 4000.3, + process >= 1 && < 1.2, citeproc-hs >= 0.3.7 && < 0.4 if impl(ghc >= 7.0.1) Ghc-Options: -rtsopts -with-rtsopts=-K16m -Wall -fno-warn-unused-do-bind diff --git a/pandoc.hs b/pandoc.hs index 79bade221..94d206103 100644 --- a/pandoc.hs +++ b/pandoc.hs @@ -43,6 +43,7 @@ import System.Environment ( getArgs, getProgName ) import System.Exit ( exitWith, ExitCode (..) ) import System.FilePath import System.Console.GetOpt +import System.Process (readProcess) import Data.Char ( toLower ) import Data.List ( intercalate, isPrefixOf, sort ) import System.Directory ( getAppUserDataDirectory, doesFileExist, findExecutable ) @@ -53,6 +54,7 @@ import Control.Exception.Extensible ( throwIO ) import qualified Text.Pandoc.UTF8 as UTF8 import qualified Text.CSL as CSL import Control.Monad (when, unless, liftM) +import Data.Foldable (foldrM) import Network.HTTP (simpleHTTP, mkRequest, getResponseBody, RequestMethod(..)) import Network.URI (parseURI, isURI, URI(..)) import qualified Data.ByteString.Lazy as B @@ -86,6 +88,12 @@ wrapWords indent c = wrap' (c - indent) (c - indent) isTextFormat :: String -> Bool isTextFormat s = takeWhile (`notElem` "+-") s `notElem` ["odt","docx","epub","epub3"] +externalFilter :: FilePath -> Pandoc -> IO Pandoc +externalFilter f d = E.catch + (readJSON def `fmap` readProcess f [] (writeJSON def d)) + (\e -> let _ = (e :: E.SomeException) + in err 83 $ "Error running filter `" ++ f ++ "'") + -- | Data structure for command line options. data Opt = Opt { optTabStop :: Int -- ^ Number of spaces per tab @@ -272,6 +280,13 @@ options = "STRING") "" -- "Classes (whitespace- or comma-separated) to use for indented code-blocks" + , Option "" ["filter"] + (ReqArg + (\arg opt -> return opt { optPlugins = externalFilter arg : + optPlugins opt }) + "PROGRAM") + "" -- "External JSON filter" + , Option "" ["normalize"] (NoArg (\opt -> return opt { optTransforms = @@ -876,6 +891,7 @@ main = do , optReferenceLinks = referenceLinks , optWrapText = wrap , optColumns = columns + , optPlugins = plugins , optEmailObfuscation = obfuscationMethod , optIdentifierPrefix = idPrefix , optIndentedCodeClasses = codeBlockClasses @@ -1099,6 +1115,7 @@ main = do reader readerOpts let doc0 = foldr ($) doc transforms + doc1 <- foldrM ($) doc0 plugins let writeBinary :: B.ByteString -> IO () writeBinary = B.writeFile (UTF8.encodePath outputFile) @@ -1109,15 +1126,15 @@ main = do case getWriter writerName' of Left e -> err 9 e - Right (IOStringWriter f) -> f writerOptions doc0 >>= writerFn outputFile - Right (IOByteStringWriter f) -> f writerOptions doc0 >>= writeBinary + Right (IOStringWriter f) -> f writerOptions doc1 >>= writerFn outputFile + Right (IOByteStringWriter f) -> f writerOptions doc1 >>= writeBinary Right (PureStringWriter f) | pdfOutput -> do - res <- makePDF latexEngine f writerOptions doc0 + res <- makePDF latexEngine f writerOptions doc1 case res of Right pdf -> writeBinary pdf Left err' -> err 43 $ UTF8.toStringLazy err' - | otherwise -> selfcontain (f writerOptions doc0 ++ + | otherwise -> selfcontain (f writerOptions doc1 ++ ['\n' | not standalone']) >>= writerFn outputFile . handleEntities where htmlFormat = writerName' `elem` -- cgit v1.2.3 From 2e5edbb27837372f658b1abbe05371be57415847 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 23 Jul 2013 23:17:07 -0700 Subject: Revert "Added `--filter` option." This reverts commit 85dacbb282f34dc45a810d7c7b5a2d639b70e1f5. --- README | 8 -------- pandoc.cabal | 1 - pandoc.hs | 25 ++++--------------------- 3 files changed, 4 insertions(+), 30 deletions(-) diff --git a/README b/README index 83873a92e..d9b003344 100644 --- a/README +++ b/README @@ -259,14 +259,6 @@ Reader options require different kinds of images. Currently this option only affects the markdown and LaTeX readers. -`--filter=`*PATH* -: Specify an executable to be used as a filter transforming the - Pandoc AST after the input is parsed and before the output is - written. The executable should read JSON from stdin and write - JSON to stdout. The JSON must be formatted like pandoc's own - JSON input and output. Filters may be most easily created in Haskell, - using the utility function `toJsonFilter` from `Text.Pandoc`. - `--normalize` : Normalize the document after reading: merge adjacent `Str` or `Emph` elements, for example, and remove repeated `Space`s. diff --git a/pandoc.cabal b/pandoc.cabal index c47273c57..cd04a2faf 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -360,7 +360,6 @@ Executable pandoc extensible-exceptions >= 0.1 && < 0.2, highlighting-kate >= 0.5.5 && < 0.6, HTTP >= 4000.0.5 && < 4000.3, - process >= 1 && < 1.2, citeproc-hs >= 0.3.7 && < 0.4 if impl(ghc >= 7.0.1) Ghc-Options: -rtsopts -with-rtsopts=-K16m -Wall -fno-warn-unused-do-bind diff --git a/pandoc.hs b/pandoc.hs index 94d206103..79bade221 100644 --- a/pandoc.hs +++ b/pandoc.hs @@ -43,7 +43,6 @@ import System.Environment ( getArgs, getProgName ) import System.Exit ( exitWith, ExitCode (..) ) import System.FilePath import System.Console.GetOpt -import System.Process (readProcess) import Data.Char ( toLower ) import Data.List ( intercalate, isPrefixOf, sort ) import System.Directory ( getAppUserDataDirectory, doesFileExist, findExecutable ) @@ -54,7 +53,6 @@ import Control.Exception.Extensible ( throwIO ) import qualified Text.Pandoc.UTF8 as UTF8 import qualified Text.CSL as CSL import Control.Monad (when, unless, liftM) -import Data.Foldable (foldrM) import Network.HTTP (simpleHTTP, mkRequest, getResponseBody, RequestMethod(..)) import Network.URI (parseURI, isURI, URI(..)) import qualified Data.ByteString.Lazy as B @@ -88,12 +86,6 @@ wrapWords indent c = wrap' (c - indent) (c - indent) isTextFormat :: String -> Bool isTextFormat s = takeWhile (`notElem` "+-") s `notElem` ["odt","docx","epub","epub3"] -externalFilter :: FilePath -> Pandoc -> IO Pandoc -externalFilter f d = E.catch - (readJSON def `fmap` readProcess f [] (writeJSON def d)) - (\e -> let _ = (e :: E.SomeException) - in err 83 $ "Error running filter `" ++ f ++ "'") - -- | Data structure for command line options. data Opt = Opt { optTabStop :: Int -- ^ Number of spaces per tab @@ -280,13 +272,6 @@ options = "STRING") "" -- "Classes (whitespace- or comma-separated) to use for indented code-blocks" - , Option "" ["filter"] - (ReqArg - (\arg opt -> return opt { optPlugins = externalFilter arg : - optPlugins opt }) - "PROGRAM") - "" -- "External JSON filter" - , Option "" ["normalize"] (NoArg (\opt -> return opt { optTransforms = @@ -891,7 +876,6 @@ main = do , optReferenceLinks = referenceLinks , optWrapText = wrap , optColumns = columns - , optPlugins = plugins , optEmailObfuscation = obfuscationMethod , optIdentifierPrefix = idPrefix , optIndentedCodeClasses = codeBlockClasses @@ -1115,7 +1099,6 @@ main = do reader readerOpts let doc0 = foldr ($) doc transforms - doc1 <- foldrM ($) doc0 plugins let writeBinary :: B.ByteString -> IO () writeBinary = B.writeFile (UTF8.encodePath outputFile) @@ -1126,15 +1109,15 @@ main = do case getWriter writerName' of Left e -> err 9 e - Right (IOStringWriter f) -> f writerOptions doc1 >>= writerFn outputFile - Right (IOByteStringWriter f) -> f writerOptions doc1 >>= writeBinary + Right (IOStringWriter f) -> f writerOptions doc0 >>= writerFn outputFile + Right (IOByteStringWriter f) -> f writerOptions doc0 >>= writeBinary Right (PureStringWriter f) | pdfOutput -> do - res <- makePDF latexEngine f writerOptions doc1 + res <- makePDF latexEngine f writerOptions doc0 case res of Right pdf -> writeBinary pdf Left err' -> err 43 $ UTF8.toStringLazy err' - | otherwise -> selfcontain (f writerOptions doc1 ++ + | otherwise -> selfcontain (f writerOptions doc0 ++ ['\n' | not standalone']) >>= writerFn outputFile . handleEntities where htmlFormat = writerName' `elem` -- cgit v1.2.3 From 85cc140744b01148da944a58948d9e4a87cb64c4 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 25 Jul 2013 09:45:23 -0700 Subject: Textile reader: Improved handling of `
` blocks.

* Closed #927 (a bug in which `
` in certain contexts was
  not recognized as a code block).
* Remove internal HTML tags in code blocks, rather than printing
  them verbatim.
* Parse attributes on `
` tag for code blocks.
---
 src/Text/Pandoc/Readers/Textile.hs | 14 ++++++++++----
 tests/textile-reader.native        |  2 +-
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index a1687a691..9191f6908 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -57,6 +57,7 @@ import Text.Pandoc.Options
 import Text.Pandoc.Parsing
 import Text.Pandoc.Readers.HTML ( htmlTag, isInlineTag, isBlockTag )
 import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock )
+import Text.HTML.TagSoup (parseTags, innerText, fromAttrib, Tag(..))
 import Text.HTML.TagSoup.Match
 import Data.List ( intercalate )
 import Data.Char ( digitToInt, isUpper )
@@ -152,8 +153,10 @@ codeBlockBc = try $ do
 -- | Code Blocks in Textile are between 
 and 
codeBlockPre :: Parser [Char] ParserState Block codeBlockPre = try $ do - htmlTag (tagOpen (=="pre") null) - result' <- manyTill anyChar (try $ htmlTag (tagClose (=="pre")) >> blockBreak) + (t@(TagOpen _ attrs),_) <- htmlTag (tagOpen (=="pre") (const True)) + result' <- (innerText . parseTags) `fmap` -- remove internal tags + manyTill anyChar (htmlTag (tagClose (=="pre"))) + optional blanklines -- drop leading newline if any let result'' = case result' of '\n':xs -> xs @@ -162,7 +165,10 @@ codeBlockPre = try $ do let result''' = case reverse result'' of '\n':_ -> init result'' _ -> result'' - return $ CodeBlock ("",[],[]) result''' + let classes = words $ fromAttrib "class" t + let ident = fromAttrib "id" t + let kvs = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"] + return $ CodeBlock (ident,classes,kvs) result''' -- | Header of the form "hN. content" with N in 1..6 header :: Parser [Char] ParserState Block @@ -275,7 +281,7 @@ definitionListItem = try $ do -- blocks support, we have to lookAhead for a rawHtmlBlock. blockBreak :: Parser [Char] ParserState () blockBreak = try (newline >> blanklines >> return ()) <|> - (lookAhead rawHtmlBlock >> return ()) + try (optional spaces >> lookAhead rawHtmlBlock >> return ()) -- raw content diff --git a/tests/textile-reader.native b/tests/textile-reader.native index 22a338d38..d14ae02c8 100644 --- a/tests/textile-reader.native +++ b/tests/textile-reader.native @@ -139,7 +139,7 @@ Pandoc (Meta {unMeta = fromList []}) ,Header 1 ("",[],[]) [Str "Raw",Space,Str "HTML"] ,Para [Str "However",Str ",",Space,RawInline "html" "",Space,Str "raw",Space,Str "HTML",Space,Str "inlines",Space,RawInline "html" "",Space,Str "are",Space,Str "accepted",Str ",",Space,Str "as",Space,Str "well",Space,Str "as",Space,Str ":"] ,RawBlock "html" "
" -,Para [Str "any",Space,Strong [Str "Raw",Space,Str "HTML",Space,Str "Block"],Space,Str "with",Space,Str "bold",LineBreak] +,Para [Str "any",Space,Strong [Str "Raw",Space,Str "HTML",Space,Str "Block"],Space,Str "with",Space,Str "bold"] ,RawBlock "html" "
" ,Para [Str "Html",Space,Str "blocks",Space,Str "can",Space,Str "be"] ,RawBlock "html" "
" -- cgit v1.2.3 From fb9f2e4bd5f71c7b515566921c5c5a7bff73c52c Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 25 Jul 2013 10:00:11 -0700 Subject: LaTeX reader: Support `\v{}` for hacek. Closes #926. --- src/Text/Pandoc/Readers/LaTeX.hs | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 7c7ae9fef..6b5035d93 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -415,6 +415,7 @@ inlineCommands = M.fromList $ , (".", option (str ".") $ try $ tok >>= accent dot) , ("=", option (str "=") $ try $ tok >>= accent macron) , ("c", option (str "c") $ try $ tok >>= accent cedilla) + , ("v", option (str "v") $ try $ tok >>= accent hacek) , ("i", lit "i") , ("\\", linebreak <$ (optional (bracketed inline) *> optional sp)) , (",", pure mempty) @@ -671,6 +672,42 @@ cedilla 's' = 'ş' cedilla 'S' = 'Ş' cedilla c = c +hacek :: Char -> Char +hacek 'A' = 'Ǎ' +hacek 'a' = 'ǎ' +hacek 'C' = 'Č' +hacek 'c' = 'č' +hacek 'D' = 'Ď' +hacek 'd' = 'ď' +hacek 'E' = 'Ě' +hacek 'e' = 'ě' +hacek 'G' = 'Ǧ' +hacek 'g' = 'ǧ' +hacek 'H' = 'Ȟ' +hacek 'h' = 'ȟ' +hacek 'I' = 'Ǐ' +hacek 'i' = 'ǐ' +hacek 'j' = 'ǰ' +hacek 'K' = 'Ǩ' +hacek 'k' = 'ǩ' +hacek 'L' = 'Ľ' +hacek 'l' = 'ľ' +hacek 'N' = 'Ň' +hacek 'n' = 'ň' +hacek 'O' = 'Ǒ' +hacek 'o' = 'ǒ' +hacek 'R' = 'Ř' +hacek 'r' = 'ř' +hacek 'S' = 'Š' +hacek 's' = 'š' +hacek 'T' = 'Ť' +hacek 't' = 'ť' +hacek 'U' = 'Ǔ' +hacek 'u' = 'ǔ' +hacek 'Z' = 'Ž' +hacek 'z' = 'ž' +hacek c = c + tok :: LP Inlines tok = try $ grouped inline <|> inlineCommand <|> str <$> (count 1 $ inlineChar) -- cgit v1.2.3 From d5fad2306a27b3fcf2c85782dd13bc8e516a5df9 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 25 Jul 2013 20:29:42 -0700 Subject: LaTeX writer: Change `\` to `/` in paths. `/` works even on Windows in LaTeX. `\` will cause major problems if unescaped. --- src/Text/Pandoc/Writers/LaTeX.hs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index 2b4a608a7..06a04ade2 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -202,7 +202,8 @@ stringToLaTeX ctx (x:xs) = do _ -> '-' : rest '~' | not isUrl -> "\\textasciitilde{}" ++ rest '^' -> "\\^{}" ++ rest - '\\' -> "\\textbackslash{}" ++ rest + '\\'| isUrl -> '/' : rest -- NB. / works as path sep even on Windows + | otherwise -> "\\textbackslash{}" ++ rest '|' -> "\\textbar{}" ++ rest '<' -> "\\textless{}" ++ rest '>' -> "\\textgreater{}" ++ rest @@ -648,7 +649,8 @@ inlineToLaTeX (Image _ (source, _)) = do let source' = if isAbsoluteURI source then source else unEscapeString source - return $ "\\includegraphics" <> braces (text source') + source'' <- stringToLaTeX URLString source' + return $ "\\includegraphics" <> braces (text source'') inlineToLaTeX (Note contents) = do modify (\s -> s{stInNote = True}) contents' <- blockListToLaTeX contents -- cgit v1.2.3 From a97f39c12e7b47a272575b69ad4cdd38966c043e Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 26 Jul 2013 12:40:56 -0700 Subject: Beamer: add allowframebreaks to slide if set in header classes. It's recommended that your bibliography slide have this attribute: # References {.allowframebreaks} This causes multiple slides to be created if necessary, depending on the length of the bibliography. --- README | 10 ++++++++++ src/Text/Pandoc/Writers/LaTeX.hs | 19 +++++++++++-------- 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/README b/README index d9b003344..4895f0f52 100644 --- a/README +++ b/README @@ -2674,6 +2674,16 @@ using the `-V` option: pandoc -t beamer habits.txt -V theme:Warsaw -o habits.pdf +Note that header attributes will turn into slide attributes +(on a `
` or `
`) in HTML slide formats, allowing you +to style individual slides. In Beamer, the only header attribute +that affects slides is the `allowframebreaks` class, which sets the +`allowframebreaks` option, causing multiple slides to be created +if the content overfills the frame. This is recommended especially for +bibliographies: + + # References {.allowframebreaks} + Literate Haskell support ======================== diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index 06a04ade2..aa5bfa623 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -232,7 +232,7 @@ toSlides bs = do elementToBeamer :: Int -> Element -> State WriterState [Block] elementToBeamer _slideLevel (Blk b) = return [b] -elementToBeamer slideLevel (Sec lvl _num (ident,classes,_) tit elts) +elementToBeamer slideLevel (Sec lvl _num (ident,classes,kvs) tit elts) | lvl > slideLevel = do bs <- concat `fmap` mapM (elementToBeamer slideLevel) elts return $ Para ( RawInline "latex" "\\begin{block}{" @@ -240,7 +240,7 @@ elementToBeamer slideLevel (Sec lvl _num (ident,classes,_) tit elts) : bs ++ [RawBlock "latex" "\\end{block}"] | lvl < slideLevel = do bs <- concat `fmap` mapM (elementToBeamer slideLevel) elts - return $ (Header lvl (ident,classes,[]) tit) : bs + return $ (Header lvl (ident,classes,kvs) tit) : bs | otherwise = do -- lvl == slideLevel -- note: [fragile] is required or verbatim breaks let hasCodeBlock (CodeBlock _ _) = [True] @@ -248,17 +248,20 @@ elementToBeamer slideLevel (Sec lvl _num (ident,classes,_) tit elts) let hasCode (Code _ _) = [True] hasCode _ = [] opts <- gets stOptions - let fragile = if not $ null $ queryWith hasCodeBlock elts ++ + let fragile = not $ null $ queryWith hasCodeBlock elts ++ if writerListings opts then queryWith hasCode elts else [] - then "[fragile]" - else "" - let slideStart = Para $ RawInline "latex" ("\\begin{frame}" ++ fragile) : + let allowframebreaks = "allowframebreaks" `elem` classes + let optionslist = ["fragile" | fragile] ++ + ["allowframebreaks" | allowframebreaks] + let options = if null optionslist + then "" + else "[" ++ intercalate "," optionslist ++ "]" + let slideStart = Para $ RawInline "latex" ("\\begin{frame}" ++ options) : if tit == [Str "\0"] -- marker for hrule then [] - else (RawInline "latex" "\\frametitle{") : tit ++ - [RawInline "latex" "}"] + else (RawInline "latex" "{") : tit ++ [RawInline "latex" "}"] let slideEnd = RawBlock "latex" "\\end{frame}" -- now carve up slide into blocks if there are sections inside bs <- concat `fmap` mapM (elementToBeamer slideLevel) elts -- cgit v1.2.3 From 1dde96e41879dd2ae39e8e79f8c77914f1885947 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 29 Jul 2013 08:38:29 -0700 Subject: Document that `fancy_lists` turns on `#` as list marker. --- README | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/README b/README index 4895f0f52..dbb0150d9 100644 --- a/README +++ b/README @@ -1342,6 +1342,12 @@ capital letter with a period, by at least two spaces.[^2] (C\) 2007 Joe Smith +The `fancy_lists` extension also allows '`#`' to be used as an +ordered list marker in place of a numeral: + + #. one + #. two + **Extension: `startnum`** Pandoc also pays attention to the type of list marker used, and to the -- cgit v1.2.3 From 3c06e2692a8fd7307658498b44401868e1059d61 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 29 Jul 2013 08:38:46 -0700 Subject: Markdown atx headers: Allow `.` or `)` after `#` if no `fancy_lists`. --- src/Text/Pandoc/Readers/Markdown.hs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 28f69eae4..076706b4e 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -494,7 +494,8 @@ addToHeaderList (ident,classes,kvs) text = do atxHeader :: MarkdownParser (F Blocks) atxHeader = try $ do level <- many1 (char '#') >>= return . length - notFollowedBy (char '.' <|> char ')') -- this would be a list + notFollowedBy $ guardEnabled Ext_fancy_lists >> + (char '.' <|> char ')') -- this would be a list skipSpaces text <- trimInlinesF . mconcat <$> many (notFollowedBy atxClosing >> inline) attr <- atxClosing -- cgit v1.2.3 From 7024664ddae00fc459953bb5d4bbc91d5877be1b Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 30 Jul 2013 08:38:13 -0700 Subject: Fixed compilation with http-conduit flag False. --- src/Text/Pandoc/Shared.hs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 0f2e16d2e..09874299d 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -104,7 +104,6 @@ import System.IO (stderr) import Text.HTML.TagSoup (renderTagsOptions, RenderOptions(..), Tag(..), renderOptions) import qualified Data.ByteString as BS -import Data.ByteString.Lazy (toChunks) import qualified Data.ByteString.Char8 as B8 #ifdef EMBED_DATA_FILES @@ -114,6 +113,7 @@ import System.FilePath ( joinPath, splitDirectories ) import Paths_pandoc (getDataFileName) #endif #ifdef HTTP_CONDUIT +import Data.ByteString.Lazy (toChunks) import Network.HTTP.Conduit (httpLbs, parseUrl, withManager, responseBody, responseHeaders) import Network.HTTP.Types.Header ( hContentType) @@ -616,7 +616,7 @@ openURL u UTF8.toString `fmap` lookup hContentType (responseHeaders resp)) #else | otherwise = E.try $ getBodyAndMimeType `fmap` browse - (do UTF8.hPutStrLn stderr $ "Fetching " ++ u ++ "..." + (do S.liftIO $ UTF8.hPutStrLn stderr $ "Fetching " ++ u ++ "..." setOutHandler $ const (return ()) setAllowRedirects True request (getRequest' u')) -- cgit v1.2.3 From dceffeb04370e8661dd0534a6e97fd15caaeddcf Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 2 Aug 2013 14:20:44 -0700 Subject: Biblio: Override citeproc-hs's endWithPunct. The new version correctly sees a sentence ending in '.)' as ending with punctuation. This fixes a bug which led such sentences to receive an extra period at the end: '.).'. Thanks to Steve Petersen for reporting. --- src/Text/Pandoc/Biblio.hs | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/src/Text/Pandoc/Biblio.hs b/src/Text/Pandoc/Biblio.hs index ae371a46d..31c55472e 100644 --- a/src/Text/Pandoc/Biblio.hs +++ b/src/Text/Pandoc/Biblio.hs @@ -32,7 +32,7 @@ module Text.Pandoc.Biblio ( processBiblio ) where import Data.List import Data.Char ( isDigit, isPunctuation ) import qualified Data.Map as M -import Text.CSL hiding ( Cite(..), Citation(..) ) +import Text.CSL hiding ( Cite(..), Citation(..), endWithPunct ) import qualified Text.CSL as CSL ( Cite(..) ) import Text.Pandoc.Definition import Text.Pandoc.Generic @@ -88,6 +88,19 @@ sanitize :: [Inline] -> [Inline] sanitize xs | endWithPunct xs = toCapital xs | otherwise = toCapital (xs ++ [Str "."]) + +-- A replacement for citeproc-hs's endWithPunct, which wrongly treats +-- a sentence ending in '.)' as not ending with punctuation, leading +-- to an extra period. +endWithPunct :: [Inline] -> Bool +endWithPunct [] = True +endWithPunct xs@(_:_) = case reverse (stringify [last xs]) of + [] -> True + (')':c:_) | isEndPunct c -> True + (c:_) | isEndPunct c -> True + | otherwise -> False + where isEndPunct c = c `elem` ".,;:!?" + deNote :: Pandoc -> Pandoc deNote = topDown go where go (Note [Para xs]) = Note $ bottomUp go' [Para $ sanitize xs] -- cgit v1.2.3 From a32417378e8023b5dd8af4d8a9ea66eddb99a0eb Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 2 Aug 2013 15:37:09 -0700 Subject: Biblio: Don't interfere with Notes that aren't citation notes. Closes #898: notes not generated from citations were being adjusted (first letter capitalized, for example, against author's intentions). --- src/Text/Pandoc/Biblio.hs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/Text/Pandoc/Biblio.hs b/src/Text/Pandoc/Biblio.hs index 31c55472e..d0db35ae7 100644 --- a/src/Text/Pandoc/Biblio.hs +++ b/src/Text/Pandoc/Biblio.hs @@ -53,7 +53,7 @@ processBiblio (Just style) r p = map (map toCslCite) grps) cits_map = M.fromList $ zip grps (citations result) biblioList = map (renderPandoc' style) (bibliography result) - Pandoc m b = bottomUp mvPunct . deNote . bottomUp (processCite style cits_map) $ p' + Pandoc m b = bottomUp mvPunct . deNote . topDown (processCite style cits_map) $ p' in Pandoc m $ b ++ biblioList -- | Substitute 'Cite' elements with formatted citations. @@ -103,7 +103,8 @@ endWithPunct xs@(_:_) = case reverse (stringify [last xs]) of deNote :: Pandoc -> Pandoc deNote = topDown go - where go (Note [Para xs]) = Note $ bottomUp go' [Para $ sanitize xs] + where go (Cite cs [Note [Para xs]]) = + Cite cs [Note $ bottomUp go' [Para $ sanitize xs]] go (Note xs) = Note $ bottomUp go' xs go x = x go' (Note [Para xs]:ys) = -- cgit v1.2.3 From 1567d291a3aed0e55ddaaa65492ab19741e515b5 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 3 Aug 2013 16:39:43 -0700 Subject: Text.Pandoc.JSON: Use To/FromJSON instances from pandoc-types. * These use GHC generics rather than syb, and are faster. * toJsonFilter is now a deprecated synonym of toJSONFilter from Text.Pandoc.JSON. * The deprecated jsonFilter function has been removed. --- src/Text/Pandoc.hs | 74 +++++++++--------------------------------------------- 1 file changed, 12 insertions(+), 62 deletions(-) diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs index b5b698e09..db0f0e5fe 100644 --- a/src/Text/Pandoc.hs +++ b/src/Text/Pandoc.hs @@ -106,12 +106,13 @@ module Text.Pandoc -- * Miscellaneous , getReader , getWriter - , jsonFilter , ToJsonFilter(..) + , ToJSONFilter(..) ) where import Text.Pandoc.Definition import Text.Pandoc.Generic +import Text.Pandoc.JSON import Text.Pandoc.Readers.Markdown import Text.Pandoc.Readers.MediaWiki import Text.Pandoc.Readers.RST @@ -146,13 +147,11 @@ import Text.Pandoc.Writers.Custom import Text.Pandoc.Templates import Text.Pandoc.Options import Text.Pandoc.Shared (safeRead, warn) -import Data.ByteString.Lazy (ByteString) +import Data.Aeson import qualified Data.ByteString.Lazy as BL import Data.List (intercalate, isSuffixOf) import Data.Version (showVersion) -import Data.Aeson.Generic import Data.Set (Set) -import Data.Data import qualified Data.Set as Set import Text.Parsec import Text.Parsec.Error @@ -211,7 +210,7 @@ readers = [ ("native" , \_ s -> return $ readNative s) data Writer = PureStringWriter (WriterOptions -> Pandoc -> String) | IOStringWriter (WriterOptions -> Pandoc -> IO String) - | IOByteStringWriter (WriterOptions -> Pandoc -> IO ByteString) + | IOByteStringWriter (WriterOptions -> Pandoc -> IO BL.ByteString) -- | Association list of formats and writers. writers :: [ ( String, Writer ) ] @@ -304,66 +303,17 @@ getWriter s = \o -> r o{ writerExtensions = setExts $ getDefaultExtensions writerName } -{-# DEPRECATED jsonFilter "Use toJsonFilter instead" #-} --- | Converts a transformation on the Pandoc AST into a function --- that reads and writes a JSON-encoded string. This is useful --- for writing small scripts. -jsonFilter :: (Pandoc -> Pandoc) -> String -> String -jsonFilter f = UTF8.toStringLazy . encode . f . checkJSON . decode . UTF8.fromStringLazy - --- | 'toJsonFilter' convert a function into a filter that reads pandoc's json output --- from stdin, transforms it by walking the AST and applying the specified --- function, and writes the result as json to stdout. Usage example: --- --- > -- capitalize.hs --- > -- compile with: ghc --make capitalize --- > -- run with: pandoc -t json | ./capitalize | pandoc -f json --- > --- > import Text.Pandoc --- > import Data.Char (toUpper) --- > --- > main :: IO () --- > main = toJsonFilter capitalizeStrings --- > --- > capitalizeStrings :: Inline -> Inline --- > capitalizeStrings (Str s) = Str $ map toUpper s --- > capitalizeStrings x = x --- --- The function can be any type @(a -> a)@, @(a -> IO a)@, @(a -> [a])@, --- or @(a -> IO [a])@, where @a@ is an instance of 'Data'. --- So, for example, @a@ can be 'Pandoc', 'Inline', 'Block', ['Inline'], --- ['Block'], 'Meta', 'ListNumberStyle', 'Alignment', 'ListNumberDelim', --- 'QuoteType', etc. See 'Text.Pandoc.Definition'. -class ToJsonFilter a where - toJsonFilter :: a -> IO () - -instance (Data a) => ToJsonFilter (a -> a) where - toJsonFilter f = BL.getContents >>= - BL.putStr . encode . (bottomUp f :: Pandoc -> Pandoc) . checkJSON . decode - -instance (Data a) => ToJsonFilter (a -> IO a) where - toJsonFilter f = BL.getContents >>= - (bottomUpM f :: Pandoc -> IO Pandoc) . checkJSON . decode >>= - BL.putStr . encode - -instance (Data a) => ToJsonFilter (a -> [a]) where - toJsonFilter f = BL.getContents >>= - BL.putStr . encode . (bottomUp (concatMap f) :: Pandoc -> Pandoc) . - checkJSON . decode - -instance (Data a) => ToJsonFilter (a -> IO [a]) where - toJsonFilter f = BL.getContents >>= - (bottomUpM (fmap concat . mapM f) :: Pandoc -> IO Pandoc) - . checkJSON . decode >>= - BL.putStr . encode - -checkJSON :: Maybe a -> a -checkJSON Nothing = error "Error parsing JSON" -checkJSON (Just r) = r +{-# DEPRECATED toJsonFilter "Use toJSONFilter instead" #-} +class ToJSONFilter a => ToJsonFilter a + where toJsonFilter :: a -> IO () + toJsonFilter = toJSONFilter readJSON :: ReaderOptions -> String -> Pandoc -readJSON _ = checkJSON . decode . UTF8.fromStringLazy +readJSON _ = checkJSON . eitherDecode' . UTF8.fromStringLazy writeJSON :: WriterOptions -> Pandoc -> String writeJSON _ = UTF8.toStringLazy . encode +checkJSON :: Either String a -> a +checkJSON (Right x) = x +checkJSON (Left e) = error e -- cgit v1.2.3 From a24409d43efa7727f36b1752d384189661e5149c Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 3 Aug 2013 16:44:54 -0700 Subject: pandoc.cabal: Removed support for ghc version < 7.2. There is no point to supporting them, since pandoc-types requires at least ghc 7.2 for GHC generics. --- pandoc.cabal | 46 ++++++---------------------------------------- 1 file changed, 6 insertions(+), 40 deletions(-) diff --git a/pandoc.cabal b/pandoc.cabal index cd04a2faf..7d4bccc41 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -274,17 +274,8 @@ Library cpp-options: -DEMBED_DATA_FILES -- build-tools: hsb2hs other-modules: Text.Pandoc.Data - if impl(ghc >= 7.0.1) - Ghc-Options: -rtsopts -Wall -fno-warn-unused-do-bind - else - if impl(ghc >= 6.12) - Ghc-Options: -Wall -fno-warn-unused-do-bind - else - Ghc-Options: -Wall - if impl(ghc >= 7.0.1) - Ghc-Prof-Options: -auto-all -caf-all -rtsopts - else - Ghc-Prof-Options: -auto-all -caf-all + Ghc-Options: -rtsopts -Wall -fno-warn-unused-do-bind + Ghc-Prof-Options: -auto-all -caf-all -rtsopts Default-Language: Haskell98 Default-Extensions: CPP Other-Extensions: PatternGuards, OverloadedStrings, @@ -361,17 +352,8 @@ Executable pandoc highlighting-kate >= 0.5.5 && < 0.6, HTTP >= 4000.0.5 && < 4000.3, citeproc-hs >= 0.3.7 && < 0.4 - if impl(ghc >= 7.0.1) - Ghc-Options: -rtsopts -with-rtsopts=-K16m -Wall -fno-warn-unused-do-bind - else - if impl(ghc >= 6.12) - Ghc-Options: -Wall -fno-warn-unused-do-bind - else - Ghc-Options: -Wall - if impl(ghc >= 7.0.1) - Ghc-Prof-Options: -auto-all -caf-all -rtsopts -with-rtsopts=-K16m - else - Ghc-Prof-Options: -auto-all -caf-all + Ghc-Options: -rtsopts -with-rtsopts=-K16m -Wall -fno-warn-unused-do-bind + Ghc-Prof-Options: -auto-all -caf-all -rtsopts -with-rtsopts=-K16m if os(windows) Cpp-options: -D_WINDOWS Default-Language: Haskell98 @@ -431,17 +413,7 @@ Test-Suite test-pandoc Tests.Writers.HTML Tests.Writers.Markdown Tests.Writers.LaTeX - if impl(ghc >= 7.0.1) - Ghc-Options: -rtsopts -Wall -fno-warn-unused-do-bind - else - if impl(ghc >= 6.12) - Ghc-Options: -Wall -fno-warn-unused-do-bind - else - Ghc-Options: -Wall - if impl(ghc >= 7) - cpp-options: -D_LIT=lit - else - cpp-options: -D_LIT=$lit + Ghc-Options: -rtsopts -Wall -fno-warn-unused-do-bind Default-Language: Haskell98 Default-Extensions: CPP @@ -453,11 +425,5 @@ benchmark benchmark-pandoc base >= 4.2 && < 5, syb >= 0.1 && < 0.5, criterion >= 0.5 && < 0.9 - if impl(ghc >= 7.0.1) - Ghc-Options: -rtsopts -Wall -fno-warn-unused-do-bind - else - if impl(ghc >= 6.12) - Ghc-Options: -Wall -fno-warn-unused-do-bind - else - Ghc-Options: -Wall + Ghc-Options: -rtsopts -Wall -fno-warn-unused-do-bind Default-Language: Haskell98 -- cgit v1.2.3 From 97b2be599e11bbe7aed73a30d8c7900f4276a3df Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 3 Aug 2013 17:02:35 -0700 Subject: Text.Pandoc: Don't reexport ToJSONFilter. It's better just to import this from Text.Pandoc.JSON. That way, compiled filters will be smaller in size. --- src/Text/Pandoc.hs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs index db0f0e5fe..27aa02a75 100644 --- a/src/Text/Pandoc.hs +++ b/src/Text/Pandoc.hs @@ -107,7 +107,6 @@ module Text.Pandoc , getReader , getWriter , ToJsonFilter(..) - , ToJSONFilter(..) ) where import Text.Pandoc.Definition @@ -303,7 +302,8 @@ getWriter s = \o -> r o{ writerExtensions = setExts $ getDefaultExtensions writerName } -{-# DEPRECATED toJsonFilter "Use toJSONFilter instead" #-} +{-# DEPRECATED toJsonFilter "Use 'toJSONFilter' from 'Text.Pandoc.JSON' instead" #-} +-- | Deprecated. Use @toJSONFilter@ from @Text.Pandoc.JSON@ instead. class ToJSONFilter a => ToJsonFilter a where toJsonFilter :: a -> IO () toJsonFilter = toJSONFilter -- cgit v1.2.3 From 4a84b78100f2cfa0f7f7d13a24693a37af60003d Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 3 Aug 2013 23:05:14 -0700 Subject: MediaWiki writer: Use native mediawiki tables instead of HTML. Closes #720. --- src/Text/Pandoc/Writers/MediaWiki.hs | 83 +++++---- tests/tables.mediawiki | 316 ++++++++++++++--------------------- 2 files changed, 164 insertions(+), 235 deletions(-) diff --git a/src/Text/Pandoc/Writers/MediaWiki.hs b/src/Text/Pandoc/Writers/MediaWiki.hs index b3b319c2a..e1bfd18b2 100644 --- a/src/Text/Pandoc/Writers/MediaWiki.hs +++ b/src/Text/Pandoc/Writers/MediaWiki.hs @@ -36,7 +36,7 @@ import Text.Pandoc.Shared import Text.Pandoc.Writers.Shared import Text.Pandoc.Templates (renderTemplate') import Text.Pandoc.XML ( escapeStringForXML ) -import Data.List ( intersect, intercalate ) +import Data.List ( intersect, intercalate, intersperse ) import Network.URI ( isURI ) import Control.Monad.State @@ -135,25 +135,17 @@ blockToMediaWiki opts (BlockQuote blocks) = do return $ "
" ++ contents ++ "
" blockToMediaWiki opts (Table capt aligns widths headers rows') = do - let alignStrings = map alignmentToString aligns - captionDoc <- if null capt - then return "" - else do - c <- inlineListToMediaWiki opts capt - return $ "" ++ c ++ "\n" - let percent w = show (truncate (100*w) :: Integer) ++ "%" - let coltags = if all (== 0.0) widths - then "" - else unlines $ map - (\w -> "") widths - head' <- if all null headers - then return "" - else do - hs <- tableRowToMediaWiki opts alignStrings 0 headers - return $ "\n" ++ hs ++ "\n\n" - body' <- zipWithM (tableRowToMediaWiki opts alignStrings) [1..] rows' - return $ "\n" ++ captionDoc ++ coltags ++ head' ++ - "\n" ++ unlines body' ++ "\n
\n" + caption <- if null capt + then return "" + else do + c <- inlineListToMediaWiki opts capt + return $ "|+ " ++ trimr c ++ "\n" + let headless = all null headers + let allrows = if headless then rows' else headers:rows' + tableBody <- (concat . intersperse "|-\n") `fmap` + mapM (tableRowToMediaWiki opts headless aligns widths) + (zip [1..] allrows) + return $ "{|\n" ++ caption ++ tableBody ++ "|}\n" blockToMediaWiki opts x@(BulletList items) = do oldUseTags <- get >>= return . stUseTags @@ -285,20 +277,34 @@ vcat = intercalate "\n" -- Auxiliary functions for tables: tableRowToMediaWiki :: WriterOptions - -> [String] - -> Int - -> [[Block]] + -> Bool + -> [Alignment] + -> [Double] + -> (Int, [[Block]]) -> State WriterState String -tableRowToMediaWiki opts alignStrings rownum cols' = do - let celltype = if rownum == 0 then "th" else "td" - let rowclass = case rownum of - 0 -> "header" - x | x `rem` 2 == 1 -> "odd" - _ -> "even" - cols'' <- sequence $ zipWith - (\alignment item -> tableItemToMediaWiki opts celltype alignment item) - alignStrings cols' - return $ "\n" ++ unlines cols'' ++ "" +tableRowToMediaWiki opts headless alignments widths (rownum, cells) = do + cells' <- mapM (\cellData -> + tableCellToMediaWiki opts headless rownum cellData) + $ zip3 alignments widths cells + return $ unlines cells' + +tableCellToMediaWiki :: WriterOptions + -> Bool + -> Int + -> (Alignment, Double, [Block]) + -> State WriterState String +tableCellToMediaWiki opts headless rownum (alignment, width, bs) = do + contents <- blockListToMediaWiki opts bs + let marker = if rownum == 1 && not headless then "!" else "|" + let percent w = show (truncate (100*w) :: Integer) ++ "%" + let attrs = ["align=" ++ show (alignmentToString alignment) | + alignment /= AlignDefault && alignment /= AlignLeft] ++ + ["width=\"" ++ percent width ++ "\"" | + width /= 0.0 && rownum == 1] + let attr = if null attrs + then "" + else unwords attrs ++ "|" + return $ marker ++ attr ++ trimr contents alignmentToString :: Alignment -> [Char] alignmentToString alignment = case alignment of @@ -307,17 +313,6 @@ alignmentToString alignment = case alignment of AlignCenter -> "center" AlignDefault -> "left" -tableItemToMediaWiki :: WriterOptions - -> String - -> String - -> [Block] - -> State WriterState String -tableItemToMediaWiki opts celltype align' item = do - let mkcell x = "<" ++ celltype ++ " align=\"" ++ align' ++ "\">" ++ - x ++ "" - contents <- blockListToMediaWiki opts item - return $ mkcell contents - -- | Convert list of Pandoc block elements to MediaWiki. blockListToMediaWiki :: WriterOptions -- ^ Options -> [Block] -- ^ List of block elements diff --git a/tests/tables.mediawiki b/tests/tables.mediawiki index 4836ecd79..efde76559 100644 --- a/tests/tables.mediawiki +++ b/tests/tables.mediawiki @@ -1,212 +1,146 @@ Simple table with caption: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Demonstration of simple table syntax.
RightLeftCenterDefault
12121212
123123123123
1111
+{| +|+ Demonstration of simple table syntax. +!align="right"|Right +!Left +!align="center"|Center +!Default +|- +|align="right"|12 +|12 +|align="center"|12 +|12 +|- +|align="right"|123 +|123 +|align="center"|123 +|123 +|- +|align="right"|1 +|1 +|align="center"|1 +|1 +|} Simple table without caption: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
RightLeftCenterDefault
12121212
123123123123
1111
+{| +!align="right"|Right +!Left +!align="center"|Center +!Default +|- +|align="right"|12 +|12 +|align="center"|12 +|12 +|- +|align="right"|123 +|123 +|align="center"|123 +|123 +|- +|align="right"|1 +|1 +|align="center"|1 +|1 +|} Simple table indented two spaces: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Demonstration of simple table syntax.
RightLeftCenterDefault
12121212
123123123123
1111
+{| +|+ Demonstration of simple table syntax. +!align="right"|Right +!Left +!align="center"|Center +!Default +|- +|align="right"|12 +|12 +|align="center"|12 +|12 +|- +|align="right"|123 +|123 +|align="center"|123 +|123 +|- +|align="right"|1 +|1 +|align="center"|1 +|1 +|} Multiline table with caption: - - ----- - - - - - - - - - - - - - - - - - - - - - -
Here's the caption. It may span multiple lines.
Centered HeaderLeft AlignedRight AlignedDefault aligned
Firstrow12.0Example of a row that spans multiple lines.
Secondrow5.0Here's another one. Note the blank line between rows.
+{| +|+ Here's the caption. It may span multiple lines. +!align="center" width="15%"|Centered Header +!width="13%"|Left Aligned +!align="right" width="16%"|Right Aligned +!width="33%"|Default aligned +|- +|align="center"|First +|row +|align="right"|12.0 +|Example of a row that spans multiple lines. +|- +|align="center"|Second +|row +|align="right"|5.0 +|Here's another one. Note the blank line between rows. +|} Multiline table without caption: - ----- - - - - - - - - - - - - - - - - - - - - - -
Centered HeaderLeft AlignedRight AlignedDefault aligned
Firstrow12.0Example of a row that spans multiple lines.
Secondrow5.0Here's another one. Note the blank line between rows.
+{| +!align="center" width="15%"|Centered Header +!width="13%"|Left Aligned +!align="right" width="16%"|Right Aligned +!width="33%"|Default aligned +|- +|align="center"|First +|row +|align="right"|12.0 +|Example of a row that spans multiple lines. +|- +|align="center"|Second +|row +|align="right"|5.0 +|Here's another one. Note the blank line between rows. +|} Table without column headers: - - - - - - - - - - - - - - - - - - - - - -
12121212
123123123123
1111
+{| +|align="right"|12 +|12 +|align="center"|12 +|align="right"|12 +|- +|align="right"|123 +|123 +|align="center"|123 +|align="right"|123 +|- +|align="right"|1 +|1 +|align="center"|1 +|align="right"|1 +|} Multiline table without column headers: - ----- - - - - - - - - - - - - - -
Firstrow12.0Example of a row that spans multiple lines.
Secondrow5.0Here's another one. Note the blank line between rows.
+{| +|align="center" width="15%"|First +|width="13%"|row +|align="right" width="16%"|12.0 +|width="33%"|Example of a row that spans multiple lines. +|- +|align="center"|Second +|row +|align="right"|5.0 +|Here's another one. Note the blank line between rows. +|} -- cgit v1.2.3 From 5050cff37cbe2dffd7f7f09db11da40d7c1e48d0 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 3 Aug 2013 23:16:54 -0700 Subject: Removed comment that chokes recent cpp. Closes #933. --- src/Text/Pandoc/Parsing.hs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index 0913d8c6c..4ade6def8 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -421,7 +421,6 @@ uri :: Parser [Char] st (String, String) uri = try $ do scheme <- uriScheme char ':' - -- /^[\/\w\u0080-\uffff]+|%[A-Fa-f0-9]+|&#?\w+;|(?:[,]+|[\S])[%&~\w\u0080-\uffff]/ -- We allow punctuation except at the end, since -- we don't want the trailing '.' in 'http://google.com.' We want to allow -- http://en.wikipedia.org/wiki/State_of_emergency_(disambiguation) -- cgit v1.2.3 From 2d6e0b1530e61fa2d6a22d8b61042734b20f0af5 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 4 Aug 2013 14:12:13 -0700 Subject: Remove CPP from default-extensions; add pragmas to modules as needed. --- man/make-pandoc-man-pages.hs | 1 + pandoc.cabal | 4 ---- src/Text/Pandoc/Pretty.hs | 2 +- src/Text/Pandoc/UTF8.hs | 1 + src/Text/Pandoc/Writers/EPUB.hs | 6 +----- 5 files changed, 4 insertions(+), 10 deletions(-) diff --git a/man/make-pandoc-man-pages.hs b/man/make-pandoc-man-pages.hs index eca1276eb..008294433 100644 --- a/man/make-pandoc-man-pages.hs +++ b/man/make-pandoc-man-pages.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE CPP #-} -- Create pandoc.1 man and pandoc_markdown.5 man pages from README import Text.Pandoc import qualified Text.Pandoc.UTF8 as UTF8 diff --git a/pandoc.cabal b/pandoc.cabal index 7d4bccc41..7f12a44ae 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -277,7 +277,6 @@ Library Ghc-Options: -rtsopts -Wall -fno-warn-unused-do-bind Ghc-Prof-Options: -auto-all -caf-all -rtsopts Default-Language: Haskell98 - Default-Extensions: CPP Other-Extensions: PatternGuards, OverloadedStrings, ScopedTypeVariables, GeneralizedNewtypeDeriving, RelaxedPolyRec, DeriveDataTypeable, TypeSynonymInstances, @@ -357,7 +356,6 @@ Executable pandoc if os(windows) Cpp-options: -D_WINDOWS Default-Language: Haskell98 - Default-Extensions: CPP Other-Extensions: PatternGuards, OverloadedStrings, ScopedTypeVariables, GeneralizedNewtypeDeriving, RelaxedPolyRec, DeriveDataTypeable, TypeSynonymInstances, @@ -377,7 +375,6 @@ Executable make-pandoc-man-pages old-time >= 1.0 && < 1.2, time >= 1.2 && < 1.5 Default-Language: Haskell98 - Default-Extensions: CPP Test-Suite test-pandoc Type: exitcode-stdio-1.0 @@ -415,7 +412,6 @@ Test-Suite test-pandoc Tests.Writers.LaTeX Ghc-Options: -rtsopts -Wall -fno-warn-unused-do-bind Default-Language: Haskell98 - Default-Extensions: CPP benchmark benchmark-pandoc Type: exitcode-stdio-1.0 diff --git a/src/Text/Pandoc/Pretty.hs b/src/Text/Pandoc/Pretty.hs index 21121a506..faf2a6797 100644 --- a/src/Text/Pandoc/Pretty.hs +++ b/src/Text/Pandoc/Pretty.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# LANGUAGE GeneralizedNewtypeDeriving, CPP #-} {- Copyright (C) 2010 John MacFarlane diff --git a/src/Text/Pandoc/UTF8.hs b/src/Text/Pandoc/UTF8.hs index 9fa743cd9..229442543 100644 --- a/src/Text/Pandoc/UTF8.hs +++ b/src/Text/Pandoc/UTF8.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE CPP #-} {- Copyright (C) 2010 John MacFarlane diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index e625931fc..fb756f196 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE PatternGuards #-} +{-# LANGUAGE PatternGuards, CPP #-} {- Copyright (C) 2010 John MacFarlane @@ -62,11 +62,7 @@ import Text.Pandoc.MIME (getMimeType) import Prelude hiding (catch) #endif import Control.Exception (catch, SomeException) -#if MIN_VERSION_blaze_html(0,5,0) import Text.Blaze.Html.Renderer.Utf8 (renderHtml) -#else -import Text.Blaze.Renderer.Utf8 (renderHtml) -#endif -- A Chapter includes a list of blocks and maybe a section -- number offset. Note, some chapters are unnumbered. The section -- cgit v1.2.3 From dbdb9109c5509b25da4486caec6535e36560a96e Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 5 Aug 2013 14:51:17 -0700 Subject: Updated changelog. --- changelog | 198 ++++++++++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 149 insertions(+), 49 deletions(-) diff --git a/changelog b/changelog index 6585c3dd3..3166ff017 100644 --- a/changelog +++ b/changelog @@ -5,11 +5,11 @@ to set the TOC level for in the book navigation. * `--help` now prints in and out formats in alphabetical order, and - says something about PDF output. Closes #720. + says something about PDF output (#720). - * Less verbose output from `--self-contained`. - Now one gets "Fetching [URL]..." for each URL fetched, but not - the full header. + * `--self-contained` now returns less verbose output (telling you + which URLs it is fetching, but not giving the full header). In + addition, there are better error messages when fetching a URL fails. * All slide formats: Support incremental slide view for definition lists. @@ -26,7 +26,7 @@ * Slidy: Use slidy.js rather than slidy.js.gz. Reason: some browsers have trouble with the gzipped js file, - at least on the local file system. Closes #795. + at least on the local file system (#795). * Added `revealjs` output format, for reveal.js HTML 5 slide shows. Thanks to Jamie F. Olson for the initial patch. @@ -42,19 +42,55 @@ + Variables completely shadow metadata. If many variables with the same name are set, a list is created. + * `Text.Pandoc` + + + Make `toJsonFilter` an alias for `toJSONFilter` from `Text.Pandoc.JSON`. + + Removed `ToJsonFilter` typeclass. `ToJSONFilter` from + `Text.Pandoc.JSON` should be used instead. (Compiling against + pandoc-types instead of pandoc will also produce smaller executables.) + * Removed the deprecated `jsonFilter` function. + + Added `readJSON`, `writeJSON` to the API (#817). + * `Text.Pandoc.Shared` - + `openURL` now follows redirects. Closes #701. + + `openURL` now follows redirects (#701). + + `openURL` and `fetchItem` now return an Either, for + better error handling. (API change.) + `readDefaultDataFile`: normalize the paths. This fixes bugs in `--self-contained` on pandoc compiled with - `embed_data_files`. Closes #833. + `embed_data_files` (#833). + Fixed `readDefaultDataFile` so it works on Windows. + URL-escape pipe characters. Even though these are legal, `Network.URI` doesn't regard them - as legal in URLs. So we escape them first. Closes #535. + as legal in URLs. So we escape them first (#535). + `openURL`: Print diagnostic output to stderr, not stdout. + `openURL`: Properly handle `data:` URIs. + * `Text.Pandoc.Biblio` + + + Override citeproc-hs's `endWithPunct`. + The new version correctly sees a sentence ending in '.)' as ending + with punctuation. This fixes a bug which led such sentences to receive + an extra period at the end: '.).'. Thanks to Steve Petersen for + reporting. + + Don't interfere with Notes that aren't citation notes. + This fixes a bug in which notes not generated from citations were + being altered (e.g. first letter capitalized) (#898). + + Changes in suffix parsing. A suffix beginning with a digit gets 'p' + inserted before it before passing to citeproc-hs, so that bare numbers + are treated as page numbers by default. A suffix not beginning with + punctuation has a space added at the beginning (rather than a comma and + space, as was done before for not-author-in-text citations). + The result of these changes (and the last commit) is that + `\citep[23]{item1}` in LaTeX will be interpreted properly, + with '23' treated as a locator of type 'page'. + + * New module `Text.Pandoc.JSON` in pandoc-types. + + + This provides `ToJSON` and `FromJSON` instances for the basic + pandoc types. They use GHC generics and should be faster than the + old JSON serialization using `Data.Aeson.Generic`. + * New module `Text.Pandoc.Writers.Shared` for shared functions used only in writers. @@ -66,6 +102,9 @@ * Added `Text.Pandoc.Asciify` utility module. This exports functions to create ASCII-only versions of identifiers. + + * `Text.Pandoc.ImageSize`: Handle EPS (#903). This change will make + EPS images properly sized on conversion to Word. * `Text.Pandoc.SelfContained` @@ -73,7 +112,7 @@ a filename. This fixes `--self-contained` when used with CSS files that include web fonts using the method described here: - Closes #739. + (#739). + Handle `src` in `embed`, `audio`, `source`, `input` tags. * Added `Text.Pandoc.Writers.Custom` and custom output formats. @@ -100,15 +139,27 @@ * Markdown reader - + Properly handle blank line at beginning of input. Closes #882. + + Added `ignore_line_breaks` markdown extension. + This causes intra-paragraph line breaks to be ignored, + rather than being treated as hard line breaks or spaces. + This is useful for some East Asian languages, where spaces + aren't used between words, but text is separated into lines + for readability. + + Properly handle blank line at beginning of input (#882). + Fixed bug in unmatched reference links. The input `[*infile*] [*outfile*]` was getting improperly parsed: - "infile" was emphasized, but "*outfile*" was literal. Closes #883. + "infile" was emphasized, but "*outfile*" was literal (#883). + Check for blank lines first in blocks. (And skip them. This might speed things up in some cases.) - + Implemented `Ext_ascii_identifiers`. Closes #807. - + Allow internal `+` in citation identifiers. Closes #856. + + Implemented `Ext_ascii_identifiers` (#807). + + Allow internal `+` in citation identifiers (#856). + Added support for YAML metadata block at the beginning of document. + + Improved strong/emph parsing, using the strategy of + . The new parsing algorithm requires + no backtracking, and no keeping track of nesting levels. + It will give different results in some edge cases, but these should + not affect normal uses. + + Allow `.` or `)` after `#` in ATX headers if no `fancy_lists`. * RST reader @@ -116,7 +167,7 @@ `rst2html` doesn't add `

` tags to list items (even when they are separated by blank lines) unless there are multiple paragraphs in the list. This commit changes the RST reader to conform more closely to - what docutils does. Closes #880. + what docutils does (#880). + Improved metadata. Treat initial field list as metadata when standalone specified. Previously ALL fields "title", "author", "date" in field lists were treated as metadata, even if not at @@ -127,29 +178,27 @@ * Textile reader + Correctly handle entities. + + Improved handling of `

` blocks.
+    + Fixed a bug in which `
` in certain contexts was
+      not recognized as a code block (#927).
+    + Remove internal HTML tags in code blocks, rather than printing
+      them verbatim.
+    * Parse attributes on `
` tag for code blocks.
 
-  * LaTeX reader
-
-    + Support alltt environment.  Closes  #892.
-    + Support `\textasciitilde`, `\textasciicircum`.  Closes #810.
-    + Treat `\textsl` as emphasized text reader.  Closes #850.
-    + Skip positional options after `\begin{figure}`.
+  * HTML reader
 
-  * LaTeX writer
+    + Handle non-simple tables (#893).  Column widths are read from
+      `col` tags if present, otherwise divided equally.
 
-    + Don't use ligatures in escaping inline code.
-    + Fixed footnote numbers in LaTeX/PDF tables.  This fixes a bug
-      wherein notes were numbered incorrectly in tables.  Closes #827.
-    + Always create labels for sections.  Previously the labels were only
-      created when there were links to the section in the document.
-      Closes #871.
-    + Stop escaping `|` in LaTeX math.
-      This caused problems with array environments.  Closes #891.
-
-  * ConTeXt writer
+  * LaTeX reader
 
-    + Properly handle tables without captions.  The old output only
-      worked in MkII. This should work in MkIV as well.  Closes #837.
+    + Support alltt environment (#892).
+    + Support `\textasciitilde`, `\textasciicircum` (#810).
+    + Treat `\textsl` as emphasized text reader (#850).
+    + Skip positional options after `\begin{figure}`.
+    + Support `\v{}` for hacek (#926).
+    + Don't add spurious ", " to citation suffixes.
+      This is added when needed in `Text.Pandoc.Biblio` anyway.
 
   * MediaWiki reader
 
@@ -164,10 +213,42 @@
       rows had attributes, now we ignore them.
     + Ignore attributes on headers.
 
+  * LaTeX writer
+
+    + Don't use ligatures in escaping inline code.
+    + Fixed footnote numbers in LaTeX/PDF tables.  This fixes a bug
+      wherein notes were numbered incorrectly in tables (#827).
+    + Always create labels for sections.  Previously the labels were only
+      created when there were links to the section in the document (#871).
+    + Stop escaping `|` in LaTeX math.
+      This caused problems with array environments (#891).
+    + Change `\` to `/` in paths.  `/` works even on Windows in LaTeX.
+      `\` will cause major problems if unescaped.
+
+  * Beamer writer
+
+    + When creating beamer slides, add `allowframebreaks` option
+      to the slide if it is one of the header classes.  It's recommended
+      that your bibliography slide have this attribute:
+    
+        # References {.allowframebreaks}
+    
+    This causes multiple slides to be created if necessary, depending
+    on the length of the bibliography.
+
+  * ConTeXt writer
+
+    + Properly handle tables without captions.  The old output only
+      worked in MkII. This should work in MkIV as well (#837).
+
+  * MediaWiki writer
+
+    + Use native mediawiki tables instead of HTML (#720).
+
   * HTML writer
 
     + Fixed `--no-highlight` (Alexander Kondratskiy).
-    + Don't convert to lowercase in email obfuscation.  Closes #839.
+    + Don't convert to lowercase in email obfuscation (#839).
 
   * Man writer
 
@@ -180,8 +261,7 @@
         COMMAND(1) footer here | header here
 
       pandoc will parse it as before into a title, section, header, and
-      footer.  But you can also specify these elements explicitly.
-      Closes #885.
+      footer.  But you can also specify these elements explicitly (#885).
 
   * AsciiDoc writer
 
@@ -192,7 +272,7 @@
   * ODT writer
 
     + Fixing wrong numbered-list indentation in open document format
-      (Alexander Kondratskiy). Closes #369.
+      (Alexander Kondratskiy) (#369).
 
   * Docx writer
 
@@ -200,11 +280,11 @@
       In 1.11 and 1.11.1, display math in lists rendered as a new list
       item.  Now it always appears centered, just as outside of lists,
       and in proper display math style, no matter how far indented the
-      containing list item is.  Closes #784.
+      containing list item is (#784).
     + Use `w:br` with `w:type` `textWrapping` for linebreaks.
-      Previously we used `w:cr`.  Closes #873.
+      Previously we used `w:cr` (#873).
     + Use Compact style for Plain block elements.
-      This differentiates between tight and loose lists.  Closes #775.
+      This differentiates between tight and loose lists (#775).
     + Ignore most components of `reference.docx`.
       We take the `word/styles.xml`, `docProps/app.xml`,
       `word/theme/theme1.xml`, and `word/fontTable.xml` from
@@ -216,7 +296,7 @@
   * EPUB writer
 
     + Don't add `dc:creator` tags if present in EPUB metadata.
-    + Add `id="toc-title"` to `h1` in `nav.xhtml`.  Closes #799.
+    + Add `id="toc-title"` to `h1` in `nav.xhtml` (#799).
     + Don't put blank title page in reading sequence.
       Set `linear="no"` if no title block.  Addresses #797.
     + Download webtex images and include as data URLs.
@@ -226,6 +306,7 @@
       are no longer needed, since styles are added by the HTML
       writer according to `--highlighting-style`).  Simplified
       margin fields.
+    + If resource not found, skip it, as in Docx writer (#916).
 
   * Markdown writer
 
@@ -254,12 +335,22 @@
     + On Windows, create temdir in working directory.
       Reason:  the path to the system temp directory may contain tildes,
       which causes problems in LaTeX when the username is more than
-      eight characters.  Closes #777.
+      eight characters (#777).
+    + Put temporary output directory in `TEXINPUTS`.
+      This will help later when we try to download external resources.
+      We can put them in the temp directory (see #917).
+    + `Text.Pandoc.PDF` exports `makePDF` instead of `tex2pdf`.
+      (API change.)
+    + `makePDF` walks the pandoc AST and checks for the existence of
+      images in the local directory.  If they are not found, it attempts
+      to find them, either in the directory containing the first source
+      file, or at an absolute URL, or at a URL relative to the base URL
+      of the first command line argument (#917).
 
   * `Text.Pandoc.UTF8`
 
     + Use strict bytestrings in reading.  The use of lazy bytestrings seemed
-      to cause problems using pandoc on 64-bit Windows 7/8.  Closes #874.
+      to cause problems using pandoc on 64-bit Windows 7/8 (#874).
 
   * `Text.Pandoc.Parsing`
 
@@ -306,23 +397,26 @@
       Previously the writer did this.
     + LaTeX:  Changes to make mathfont work with xelatex.
       We need the mathspec library, not just fontspec, for this.
-      We also need to set options for setmathfont.  Closes #734.
+      We also need to set options for setmathfont (#734).
     + LaTeX: Use `tex-ansi` mapping for `monofont`.
       This ensures that straight quotes appear as straight, rather than
       being treated as curly.  See #889.
     + Made `\includegraphics` more flexible in LaTeX template.
       Now it can be used with options, if needed.  Thanks to Bernhard Weichel.
+    + LaTeX/Beamer: Added `classoption` variable.
+      This is intended for class options like `oneside`; it may
+      be repeated with different options.  (Thanks to Oliver Matthews.)
+    + LaTeX: Added `biblio-style` variable (#920).
     + DZSlides: title attribute on title section.
 
   * Removed `blaze_html_0_5` flag, require `blaze-html` >= 0.5.
     Reason:  < 0.5 does not provide a monoid instance for Attribute,
-    which is now needed by the HTML writer.
-    Closes #803.
+    which is now needed by the HTML writer (#803).
 
   * Added `http-conduit` flag, which allows fetching https resources.
     It also brings in a large number of dependencies (`http-conduit`
-    and its dependencies), which is why for now it is an optional flag.
-    Closes #820.
+    and its dependencies), which is why for now it is an optional flag
+    (#820).
 
   * Added CONTRIBUTING.md.
 
@@ -335,7 +429,13 @@
   * Set default stack size to 16M.  This is needed for some large
     conversions, esp. if pandoc is compiled with 64-bit ghc.
 
-
+  * Various small documentation improvements.
+    Thanks to achalddave and drothlis for patches.
+    
+  * Removed comment that chokes recent versions of CPP (#933).
+    
+  * Removed support for GHC version < 7.2, since pandoc-types now
+    requires at least GHC 7.2 for GHC generics.
 
 pandoc (1.11.1)
 
-- 
cgit v1.2.3


From b1be9cfaef10f907e3439d4fa91b63b3402b233f Mon Sep 17 00:00:00 2001
From: John MacFarlane 
Date: Mon, 5 Aug 2013 18:41:33 -0700
Subject: Require latest version of texmath.

Closes #935.
---
 pandoc.cabal | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandoc.cabal b/pandoc.cabal
index 7f12a44ae..3dc400d40 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -246,7 +246,7 @@ Library
                  old-locale >= 1 && < 1.1,
                  time >= 1.2 && < 1.5,
                  HTTP >= 4000.0.5 && < 4000.3,
-                 texmath >= 0.6.1.5 && < 0.7,
+                 texmath >= 0.6.3 && < 0.7,
                  xml >= 1.3.12 && < 1.4,
                  random >= 1 && < 1.1,
                  extensible-exceptions >= 0.1 && < 0.2,
-- 
cgit v1.2.3


From 52c5cdb04e6c574f897c948e45084bf9343bf57c Mon Sep 17 00:00:00 2001
From: John MacFarlane 
Date: Tue, 6 Aug 2013 16:19:34 -0700
Subject: Biblio:  Capitalize citation note only if it has a prefix.

So, author names or titles that aren't capitalized will stay
uncapitalized.
---
 changelog                 |  1 +
 src/Text/Pandoc/Biblio.hs | 17 ++++++++++-------
 2 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/changelog b/changelog
index 3166ff017..9e46155fd 100644
--- a/changelog
+++ b/changelog
@@ -76,6 +76,7 @@
     + Don't interfere with Notes that aren't citation notes.
       This fixes a bug in which notes not generated from citations were
       being altered (e.g. first letter capitalized) (#898).
+    + Only capitalize footnote citations when they have a prefix.
     + Changes in suffix parsing.  A suffix beginning with a digit gets 'p'
       inserted before it before passing to citeproc-hs, so that bare numbers
       are treated as page numbers by default.  A suffix not beginning with
diff --git a/src/Text/Pandoc/Biblio.hs b/src/Text/Pandoc/Biblio.hs
index d0db35ae7..755c779ea 100644
--- a/src/Text/Pandoc/Biblio.hs
+++ b/src/Text/Pandoc/Biblio.hs
@@ -84,11 +84,6 @@ mvPunct (Space : x : ys) | isNote x, startWithPunct ys =
 mvPunct (Space : x : ys) | isNote x = x : ys
 mvPunct xs = xs
 
-sanitize :: [Inline] -> [Inline]
-sanitize xs | endWithPunct xs = toCapital xs
-            | otherwise       = toCapital (xs ++ [Str "."])
-
-
 -- A replacement for citeproc-hs's endWithPunct, which wrongly treats
 -- a sentence ending in '.)' as not ending with punctuation, leading
 -- to an extra period.
@@ -103,8 +98,8 @@ endWithPunct xs@(_:_) = case reverse (stringify [last xs]) of
 
 deNote :: Pandoc -> Pandoc
 deNote = topDown go
-  where go (Cite cs [Note [Para xs]]) =
-            Cite cs [Note $ bottomUp go' [Para $ sanitize xs]]
+  where go (Cite (c:cs) [Note xs]) =
+            Cite (c:cs) [Note $ bottomUp go' $ sanitize c xs]
         go (Note xs) = Note $ bottomUp go' xs
         go x = x
         go' (Note [Para xs]:ys) =
@@ -112,6 +107,14 @@ deNote = topDown go
                 then initInline xs ++ ys
                 else xs ++ ys
         go' xs = xs
+        sanitize :: Citation -> [Block] -> [Block]
+        sanitize Citation{citationPrefix = pref} [Para xs] =
+           case (null pref, endWithPunct xs) of
+                (True, False)  -> [Para $ xs ++ [Str "."]]
+                (True, True)   -> [Para xs]
+                (False, False) -> [Para $ toCapital $ xs ++ [Str "."]]
+                (False, True)  -> [Para $ toCapital xs]
+        sanitize _ bs = bs
 
 isTextualCitation :: [Citation] -> Bool
 isTextualCitation (c:_) = citationMode c == AuthorInText
-- 
cgit v1.2.3


From 208a9e076d577fb5c275ec797b539305300f6a95 Mon Sep 17 00:00:00 2001
From: John MacFarlane 
Date: Tue, 6 Aug 2013 18:07:27 -0700
Subject: Beamer template:  Added fonttheme variable.

---
 README         | 6 +++---
 data/templates | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/README b/README
index dbb0150d9..bc5d5619d 100644
--- a/README
+++ b/README
@@ -761,7 +761,7 @@ as `title`, `author`, and `date`) as well as the following:
 `revealjs-url`
 :   base URL for reveal.js documents (defaults to `reveal.js`)
 `theme`
-:   reveal.js theme
+:   reveal.js or LaTeX beamer theme
 `transition`
 :   reveal.js transition
 `fontsize`
@@ -777,10 +777,10 @@ as `title`, `author`, and `date`) as well as the following:
 `mainfont`, `sansfont`, `monofont`, `mathfont`
 :   fonts for LaTeX documents (works only with xelatex
     and lualatex)
-`theme`
-:   theme for LaTeX beamer documents
 `colortheme`
 :   colortheme for LaTeX beamer documents
+`fonttheme`
+:   fonttheme for LaTeX beamer documents
 `linkcolor`
 :   color for internal links in LaTeX documents (`red`, `green`,
     `magenta`, `cyan`, `blue`, `black`)
diff --git a/data/templates b/data/templates
index 120bed772..c27f59c01 160000
--- a/data/templates
+++ b/data/templates
@@ -1 +1 @@
-Subproject commit 120bed772b4a235a0d17ea4560a682e23ac293ac
+Subproject commit c27f59c010b0468f01b710cdf3a3c04a450a03e7
-- 
cgit v1.2.3


From 802a78bbf24a95817621ab2bfe665c235231b367 Mon Sep 17 00:00:00 2001
From: John MacFarlane 
Date: Tue, 6 Aug 2013 18:13:04 -0700
Subject: Updated changelog

---
 changelog | 1 +
 1 file changed, 1 insertion(+)

diff --git a/changelog b/changelog
index 9e46155fd..42c25f978 100644
--- a/changelog
+++ b/changelog
@@ -407,6 +407,7 @@
     + LaTeX/Beamer: Added `classoption` variable.
       This is intended for class options like `oneside`; it may
       be repeated with different options.  (Thanks to Oliver Matthews.)
+    + Beamer: Added `fonttheme` variable.  (Thanks to Luis Osa.)
     + LaTeX: Added `biblio-style` variable (#920).
     + DZSlides: title attribute on title section.
 
-- 
cgit v1.2.3


From 7d18770b008c12e13c324223304c6703e06f3a4a Mon Sep 17 00:00:00 2001
From: John MacFarlane 
Date: Tue, 6 Aug 2013 23:31:01 -0700
Subject: Added support for MetaBool.

---
 src/Text/Pandoc/Readers/Markdown.hs | 2 +-
 src/Text/Pandoc/Writers/Custom.hs   | 2 ++
 src/Text/Pandoc/Writers/Shared.hs   | 1 +
 3 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 076706b4e..a880c09de 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -278,7 +278,7 @@ toMetaValue opts x =
 yamlToMeta :: ReaderOptions -> Yaml.Value -> MetaValue
 yamlToMeta opts (Yaml.String t) = toMetaValue opts t
 yamlToMeta _    (Yaml.Number n) = MetaString $ show n
-yamlToMeta _    (Yaml.Bool b) = MetaString $ map toLower $ show b
+yamlToMeta _    (Yaml.Bool b) = MetaBool b
 yamlToMeta opts (Yaml.Array xs) = B.toMetaValue $ map (yamlToMeta opts)
                                                 $ V.toList xs
 yamlToMeta opts (Yaml.Object o) = MetaMap $ H.foldrWithKey (\k v m ->
diff --git a/src/Text/Pandoc/Writers/Custom.hs b/src/Text/Pandoc/Writers/Custom.hs
index 732497616..5c82fe0e1 100644
--- a/src/Text/Pandoc/Writers/Custom.hs
+++ b/src/Text/Pandoc/Writers/Custom.hs
@@ -110,12 +110,14 @@ instance StackValue [Block] where
 instance StackValue MetaValue where
   push l (MetaMap m) = Lua.push l m
   push l (MetaList xs) = Lua.push l xs
+  push l (MetaBool x) = Lua.push l x
   push l (MetaString s) = Lua.push l s
   push l (MetaInlines ils) = Lua.push l ils
   push l (MetaBlocks bs) = Lua.push l bs
   peek _ _ = undefined
   valuetype (MetaMap _) = Lua.TTABLE
   valuetype (MetaList _) = Lua.TTABLE
+  valuetype (MetaBool _) = Lua.TBOOLEAN
   valuetype (MetaString _) = Lua.TSTRING
   valuetype (MetaInlines _) = Lua.TSTRING
   valuetype (MetaBlocks _) = Lua.TSTRING
diff --git a/src/Text/Pandoc/Writers/Shared.hs b/src/Text/Pandoc/Writers/Shared.hs
index c6c30d070..e6ec853f8 100644
--- a/src/Text/Pandoc/Writers/Shared.hs
+++ b/src/Text/Pandoc/Writers/Shared.hs
@@ -74,6 +74,7 @@ metaValueToJSON blockWriter inlineWriter (MetaMap metamap) = liftM toJSON $
   Traversable.mapM (metaValueToJSON blockWriter inlineWriter) metamap
 metaValueToJSON blockWriter inlineWriter (MetaList xs) = liftM toJSON $
   Traversable.mapM (metaValueToJSON blockWriter inlineWriter) xs
+metaValueToJSON _ _ (MetaBool b) = return $ toJSON b
 metaValueToJSON _ _ (MetaString s) = return $ toJSON s
 metaValueToJSON blockWriter _ (MetaBlocks bs) = liftM toJSON $ blockWriter bs
 metaValueToJSON _ inlineWriter (MetaInlines bs) = liftM toJSON $ inlineWriter bs
-- 
cgit v1.2.3


From d44d1664312f0d05ada61eb49a678ef8a04d90d0 Mon Sep 17 00:00:00 2001
From: John MacFarlane 
Date: Wed, 7 Aug 2013 08:43:42 -0700
Subject: Allow YAML title blocks to contain only comments.

---
 src/Text/Pandoc/Readers/Markdown.hs | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index a880c09de..251554de1 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -231,7 +231,9 @@ yamlTitleBlock = try $ do
   pos <- getPosition
   string "---"
   blankline
-  rawYaml <- unlines <$> manyTill anyLine stopLine
+  rawYamlLines <- manyTill anyLine stopLine
+  -- by including --- and ..., we allow yaml blocks with just comments:
+  let rawYaml = unlines ("---" : (rawYamlLines ++ ["..."]))
   optional blanklines
   opts <- stateOptions <$> getState
   case Yaml.decodeEither' $ UTF8.fromString rawYaml of
@@ -241,6 +243,7 @@ yamlTitleBlock = try $ do
                         then f
                         else B.setMeta (T.unpack k) (yamlToMeta opts v) . f)
                   id hashmap
+       Right Yaml.Null -> return $ return id
        Right _ -> do
                    addWarning (Just pos) "YAML header is not an object"
                    return $ return id
-- 
cgit v1.2.3


From bb61624bb2bba416e1992ecdf101f9660a3edcae Mon Sep 17 00:00:00 2001
From: John MacFarlane 
Date: Wed, 7 Aug 2013 14:30:47 -0700
Subject: Textile reader: Removed raw LaTeX parsing.

This isn't part of Textile.
---
 src/Text/Pandoc/Readers/Textile.hs | 16 ----------------
 1 file changed, 16 deletions(-)

diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index 9191f6908..d4f092d07 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -56,7 +56,6 @@ import Text.Pandoc.Shared
 import Text.Pandoc.Options
 import Text.Pandoc.Parsing
 import Text.Pandoc.Readers.HTML ( htmlTag, isInlineTag, isBlockTag )
-import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock )
 import Text.HTML.TagSoup (parseTags, innerText, fromAttrib, Tag(..))
 import Text.HTML.TagSoup.Match
 import Data.List ( intercalate )
@@ -126,7 +125,6 @@ blockParsers = [ codeBlock
                , commentBlock
                , anyList
                , rawHtmlBlock
-               , rawLaTeXBlock'
                , maybeExplicitBlock "table" table
                , maybeExplicitBlock "p" para
                ]
@@ -292,13 +290,6 @@ rawHtmlBlock = try $ do
   optional blanklines
   return $ RawBlock "html" b
 
--- | Raw block of LaTeX content
-rawLaTeXBlock' :: Parser [Char] ParserState Block
-rawLaTeXBlock' = do
-  guardEnabled Ext_raw_tex
-  RawBlock "latex" <$> (rawLaTeXBlock <* spaces)
-
-
 -- | In textile, paragraphs are separated by blank lines.
 para :: Parser [Char] ParserState Block
 para = try $ Para . normalizeSpaces <$> manyTill inline blockBreak
@@ -373,7 +364,6 @@ inlineParsers = [ str
                 , escapedInline
                 , htmlSpan
                 , rawHtmlInline
-                , rawLaTeXInline'
                 , note
                 , try $ (char '[' *> inlineMarkup <* char ']')
                 , inlineMarkup
@@ -489,12 +479,6 @@ endline = try $ do
 rawHtmlInline :: Parser [Char] ParserState Inline
 rawHtmlInline = RawInline "html" . snd <$> htmlTag isInlineTag
 
--- | Raw LaTeX Inline
-rawLaTeXInline' :: Parser [Char] ParserState Inline
-rawLaTeXInline' = try $ do
-  guardEnabled Ext_raw_tex
-  rawLaTeXInline
-
 -- | Textile standard link syntax is "label":target. But we
 -- can also have ["label":target].
 link :: Parser [Char] ParserState Inline
-- 
cgit v1.2.3


From 802dc9a8b9f206eb3be592ab19067f637eb2a3ee Mon Sep 17 00:00:00 2001
From: John MacFarlane 
Date: Thu, 8 Aug 2013 10:41:39 -0700
Subject: Added Text.Pandoc.Compat.Monoid.

This allows pandoc to compile with base < 4.5, where Data.Monoid
doesn't export `<>`.  Thanks to Dirk Ullirch for the patch.
---
 pandoc.cabal                     |  1 +
 src/Text/Pandoc/Compat/Monoid.hs | 16 ++++++++++++++++
 src/Text/Pandoc/Templates.hs     |  2 +-
 src/Text/Pandoc/Writers/Docx.hs  |  2 +-
 4 files changed, 19 insertions(+), 2 deletions(-)
 create mode 100644 src/Text/Pandoc/Compat/Monoid.hs

diff --git a/pandoc.cabal b/pandoc.cabal
index 3dc400d40..192b6c5fd 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -335,6 +335,7 @@ Library
                    Text.Pandoc.ImageSize,
                    Text.Pandoc.Slides,
                    Text.Pandoc.Highlighting,
+                   Text.Pandoc.Compat.Monoid,
                    Paths_pandoc
 
   Buildable:       True
diff --git a/src/Text/Pandoc/Compat/Monoid.hs b/src/Text/Pandoc/Compat/Monoid.hs
new file mode 100644
index 000000000..80ffcbbd6
--- /dev/null
+++ b/src/Text/Pandoc/Compat/Monoid.hs
@@ -0,0 +1,16 @@
+{-# LANGUAGE CPP #-}
+module Text.Pandoc.Compat.Monoid ( Monoid(..)
+                                 , (<>)
+                          ) where
+
+#if MIN_VERSION_base(4,5,0)
+import Data.Monoid ((<>), Monoid(..))
+#else
+import Data.Monoid (mappend, Monoid(..))
+#endif
+
+#if MIN_VERSION_base(4,5,0)
+#else
+(<>) :: Monoid m => m -> m -> m
+(<>) = mappend
+#endif
diff --git a/src/Text/Pandoc/Templates.hs b/src/Text/Pandoc/Templates.hs
index c95c84ca8..22a44e735 100644
--- a/src/Text/Pandoc/Templates.hs
+++ b/src/Text/Pandoc/Templates.hs
@@ -102,7 +102,7 @@ import Control.Applicative
 import qualified Data.Text as T
 import Data.Text (Text)
 import Data.Text.Encoding (encodeUtf8)
-import Data.Monoid ((<>), Monoid(..))
+import Text.Pandoc.Compat.Monoid ((<>), Monoid(..))
 import Data.List (intersperse, nub)
 import System.FilePath ((), (<.>))
 import qualified Data.Map as M
diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index 611cddc65..6bb4d5569 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -35,7 +35,7 @@ import qualified Data.ByteString.Lazy as BL
 import qualified Data.ByteString.Lazy.Char8 as BL8
 import qualified Data.Map as M
 import qualified Text.Pandoc.UTF8 as UTF8
-import Data.Monoid ((<>))
+import Text.Pandoc.Compat.Monoid ((<>))
 import Codec.Archive.Zip
 import Data.Time.Clock.POSIX
 import Text.Pandoc.Definition
-- 
cgit v1.2.3


From 12e7ec40707bfb716bb9add82e4320558e065492 Mon Sep 17 00:00:00 2001
From: John MacFarlane 
Date: Thu, 8 Aug 2013 10:42:52 -0700
Subject: Added Text.Pandoc.Compat.TagSoupEntity.

This allows pandoc to compile with tagsoup 0.13.x.
Thanks to Dirk Ullrich for the patch.
---
 pandoc.cabal                            |  3 ++-
 src/Text/Pandoc/Compat/TagSoupEntity.hs | 15 +++++++++++++++
 src/Text/Pandoc/Parsing.hs              |  2 +-
 src/Text/Pandoc/Readers/DocBook.hs      |  2 +-
 src/Text/Pandoc/Readers/OPML.hs         |  2 +-
 src/Text/Pandoc/XML.hs                  |  2 +-
 6 files changed, 21 insertions(+), 5 deletions(-)
 create mode 100644 src/Text/Pandoc/Compat/TagSoupEntity.hs

diff --git a/pandoc.cabal b/pandoc.cabal
index 192b6c5fd..19f8c14ef 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -253,7 +253,7 @@ Library
                  citeproc-hs >= 0.3.7 && < 0.4,
                  pandoc-types >= 1.12 && < 1.13,
                  aeson >= 0.6 && < 0.7,
-                 tagsoup >= 0.12.5 && < 0.13,
+                 tagsoup >= 0.12.5 && < 0.14,
                  base64-bytestring >= 0.1 && < 1.1,
                  zlib >= 0.5 && < 0.6,
                  highlighting-kate >= 0.5.5 && < 0.6,
@@ -336,6 +336,7 @@ Library
                    Text.Pandoc.Slides,
                    Text.Pandoc.Highlighting,
                    Text.Pandoc.Compat.Monoid,
+                   Text.Pandoc.Compat.TagSoupEntity,
                    Paths_pandoc
 
   Buildable:       True
diff --git a/src/Text/Pandoc/Compat/TagSoupEntity.hs b/src/Text/Pandoc/Compat/TagSoupEntity.hs
new file mode 100644
index 000000000..80985aef9
--- /dev/null
+++ b/src/Text/Pandoc/Compat/TagSoupEntity.hs
@@ -0,0 +1,15 @@
+{-# LANGUAGE CPP #-}
+module Text.Pandoc.Compat.TagSoupEntity (lookupEntity
+                          ) where
+
+import qualified Text.HTML.TagSoup.Entity as TE
+
+lookupEntity :: String -> Maybe Char
+#if MIN_VERSION_tagsoup(0,13,0)
+lookupEntity = str2chr . TE.lookupEntity
+  where str2chr :: Maybe String -> Maybe Char
+        str2chr (Just [c]) = Just c
+        str2chr _ = Nothing
+#else
+lookupEntity = TE.lookupEntity
+#endif
diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index 4ade6def8..2f42aba41 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -161,7 +161,7 @@ import Data.List ( intercalate, transpose )
 import Text.Pandoc.Shared
 import qualified Data.Map as M
 import Text.TeXMath.Macros (applyMacros, Macro, parseMacroDefinitions)
-import Text.HTML.TagSoup.Entity ( lookupEntity )
+import Text.Pandoc.Compat.TagSoupEntity ( lookupEntity )
 import Data.Default
 import qualified Data.Set as Set
 import Control.Monad.Reader
diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs
index 0058e889c..6a799e270 100644
--- a/src/Text/Pandoc/Readers/DocBook.hs
+++ b/src/Text/Pandoc/Readers/DocBook.hs
@@ -4,7 +4,7 @@ import Text.Pandoc.Options
 import Text.Pandoc.Definition
 import Text.Pandoc.Builder
 import Text.XML.Light
-import Text.HTML.TagSoup.Entity (lookupEntity)
+import Text.Pandoc.Compat.TagSoupEntity (lookupEntity)
 import Data.Generics
 import Data.Monoid
 import Data.Char (isSpace)
diff --git a/src/Text/Pandoc/Readers/OPML.hs b/src/Text/Pandoc/Readers/OPML.hs
index c9726d195..35d01e877 100644
--- a/src/Text/Pandoc/Readers/OPML.hs
+++ b/src/Text/Pandoc/Readers/OPML.hs
@@ -6,7 +6,7 @@ import Text.Pandoc.Builder
 import Text.Pandoc.Readers.HTML (readHtml)
 import Text.Pandoc.Readers.Markdown (readMarkdown)
 import Text.XML.Light
-import Text.HTML.TagSoup.Entity (lookupEntity)
+import Text.Pandoc.Compat.TagSoupEntity (lookupEntity)
 import Data.Generics
 import Data.Monoid
 import Control.Monad.State
diff --git a/src/Text/Pandoc/XML.hs b/src/Text/Pandoc/XML.hs
index 89ae81a10..c11af9a19 100644
--- a/src/Text/Pandoc/XML.hs
+++ b/src/Text/Pandoc/XML.hs
@@ -38,7 +38,7 @@ module Text.Pandoc.XML ( escapeCharForXML,
 
 import Text.Pandoc.Pretty
 import Data.Char (ord, isAscii, isSpace)
-import Text.HTML.TagSoup.Entity (lookupEntity)
+import Text.Pandoc.Compat.TagSoupEntity (lookupEntity)
 
 -- | Escape one character as needed for XML.
 escapeCharForXML :: Char -> String
-- 
cgit v1.2.3


From 9aa9d5cf68386acd127427cc62f6004b2a17057a Mon Sep 17 00:00:00 2001
From: John MacFarlane 
Date: Thu, 8 Aug 2013 10:52:53 -0700
Subject: Revert "Textile reader: Removed raw LaTeX parsing."

This reverts commit bb61624bb2bba416e1992ecdf101f9660a3edcae.
Apparently someone put this there for a reason, since it's in
the test suite.
---
 src/Text/Pandoc/Readers/Textile.hs | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index d4f092d07..9191f6908 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -56,6 +56,7 @@ import Text.Pandoc.Shared
 import Text.Pandoc.Options
 import Text.Pandoc.Parsing
 import Text.Pandoc.Readers.HTML ( htmlTag, isInlineTag, isBlockTag )
+import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock )
 import Text.HTML.TagSoup (parseTags, innerText, fromAttrib, Tag(..))
 import Text.HTML.TagSoup.Match
 import Data.List ( intercalate )
@@ -125,6 +126,7 @@ blockParsers = [ codeBlock
                , commentBlock
                , anyList
                , rawHtmlBlock
+               , rawLaTeXBlock'
                , maybeExplicitBlock "table" table
                , maybeExplicitBlock "p" para
                ]
@@ -290,6 +292,13 @@ rawHtmlBlock = try $ do
   optional blanklines
   return $ RawBlock "html" b
 
+-- | Raw block of LaTeX content
+rawLaTeXBlock' :: Parser [Char] ParserState Block
+rawLaTeXBlock' = do
+  guardEnabled Ext_raw_tex
+  RawBlock "latex" <$> (rawLaTeXBlock <* spaces)
+
+
 -- | In textile, paragraphs are separated by blank lines.
 para :: Parser [Char] ParserState Block
 para = try $ Para . normalizeSpaces <$> manyTill inline blockBreak
@@ -364,6 +373,7 @@ inlineParsers = [ str
                 , escapedInline
                 , htmlSpan
                 , rawHtmlInline
+                , rawLaTeXInline'
                 , note
                 , try $ (char '[' *> inlineMarkup <* char ']')
                 , inlineMarkup
@@ -479,6 +489,12 @@ endline = try $ do
 rawHtmlInline :: Parser [Char] ParserState Inline
 rawHtmlInline = RawInline "html" . snd <$> htmlTag isInlineTag
 
+-- | Raw LaTeX Inline
+rawLaTeXInline' :: Parser [Char] ParserState Inline
+rawLaTeXInline' = try $ do
+  guardEnabled Ext_raw_tex
+  rawLaTeXInline
+
 -- | Textile standard link syntax is "label":target. But we
 -- can also have ["label":target].
 link :: Parser [Char] ParserState Inline
-- 
cgit v1.2.3


From 2677e8466311e2becdd7f65f1f6c23d559db14aa Mon Sep 17 00:00:00 2001
From: John MacFarlane 
Date: Thu, 8 Aug 2013 11:09:00 -0700
Subject: Revert "Revert "Added `--filter` option.""

This reverts commit 2e5edbb27837372f658b1abbe05371be57415847.
---
 README       |  8 ++++++++
 pandoc.cabal |  1 +
 pandoc.hs    | 25 +++++++++++++++++++++----
 3 files changed, 30 insertions(+), 4 deletions(-)

diff --git a/README b/README
index bc5d5619d..e85ca1905 100644
--- a/README
+++ b/README
@@ -259,6 +259,14 @@ Reader options
     require different kinds of images.  Currently this option only affects
     the markdown and LaTeX readers.
 
+`--filter=`*PATH*
+:   Specify an executable to be used as a filter transforming the
+    Pandoc AST after the input is parsed and before the output is
+    written.  The executable should read JSON from stdin and write
+    JSON to stdout.  The JSON must be formatted like  pandoc's own
+    JSON input and output.  Filters may be most easily created in Haskell,
+    using the utility function `toJsonFilter` from `Text.Pandoc`.
+
 `--normalize`
 :   Normalize the document after reading:  merge adjacent
     `Str` or `Emph` elements, for example, and remove repeated `Space`s.
diff --git a/pandoc.cabal b/pandoc.cabal
index 19f8c14ef..a8dd528ad 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -352,6 +352,7 @@ Executable pandoc
                  extensible-exceptions >= 0.1 && < 0.2,
                  highlighting-kate >= 0.5.5 && < 0.6,
                  HTTP >= 4000.0.5 && < 4000.3,
+                 process >= 1 && < 1.2,
                  citeproc-hs >= 0.3.7 && < 0.4
   Ghc-Options:   -rtsopts -with-rtsopts=-K16m -Wall -fno-warn-unused-do-bind
   Ghc-Prof-Options: -auto-all -caf-all -rtsopts -with-rtsopts=-K16m
diff --git a/pandoc.hs b/pandoc.hs
index 79bade221..94d206103 100644
--- a/pandoc.hs
+++ b/pandoc.hs
@@ -43,6 +43,7 @@ import System.Environment ( getArgs, getProgName )
 import System.Exit ( exitWith, ExitCode (..) )
 import System.FilePath
 import System.Console.GetOpt
+import System.Process (readProcess)
 import Data.Char ( toLower )
 import Data.List ( intercalate, isPrefixOf, sort )
 import System.Directory ( getAppUserDataDirectory, doesFileExist, findExecutable )
@@ -53,6 +54,7 @@ import Control.Exception.Extensible ( throwIO )
 import qualified Text.Pandoc.UTF8 as UTF8
 import qualified Text.CSL as CSL
 import Control.Monad (when, unless, liftM)
+import Data.Foldable (foldrM)
 import Network.HTTP (simpleHTTP, mkRequest, getResponseBody, RequestMethod(..))
 import Network.URI (parseURI, isURI, URI(..))
 import qualified Data.ByteString.Lazy as B
@@ -86,6 +88,12 @@ wrapWords indent c = wrap' (c - indent) (c - indent)
 isTextFormat :: String -> Bool
 isTextFormat s = takeWhile (`notElem` "+-") s `notElem` ["odt","docx","epub","epub3"]
 
+externalFilter :: FilePath -> Pandoc -> IO Pandoc
+externalFilter f d = E.catch
+  (readJSON def `fmap` readProcess f [] (writeJSON def d))
+  (\e -> let _ = (e :: E.SomeException)
+         in err 83 $ "Error running filter `" ++ f ++ "'")
+
 -- | Data structure for command line options.
 data Opt = Opt
     { optTabStop           :: Int     -- ^ Number of spaces per tab
@@ -272,6 +280,13 @@ options =
                    "STRING")
                   "" -- "Classes (whitespace- or comma-separated) to use for indented code-blocks"
 
+    , Option "" ["filter"]
+                 (ReqArg
+                  (\arg opt -> return opt { optPlugins = externalFilter arg :
+                                               optPlugins opt })
+                  "PROGRAM")
+                 "" -- "External JSON filter"
+
     , Option "" ["normalize"]
                  (NoArg
                   (\opt -> return opt { optTransforms =
@@ -876,6 +891,7 @@ main = do
               , optReferenceLinks        = referenceLinks
               , optWrapText              = wrap
               , optColumns               = columns
+              , optPlugins               = plugins
               , optEmailObfuscation      = obfuscationMethod
               , optIdentifierPrefix      = idPrefix
               , optIndentedCodeClasses   = codeBlockClasses
@@ -1099,6 +1115,7 @@ main = do
            reader readerOpts
 
   let doc0 = foldr ($) doc transforms
+  doc1 <- foldrM ($) doc0 plugins
 
   let writeBinary :: B.ByteString -> IO ()
       writeBinary = B.writeFile (UTF8.encodePath outputFile)
@@ -1109,15 +1126,15 @@ main = do
 
   case getWriter writerName' of
     Left e -> err 9 e
-    Right (IOStringWriter f) -> f writerOptions doc0 >>= writerFn outputFile
-    Right (IOByteStringWriter f) -> f writerOptions doc0 >>= writeBinary
+    Right (IOStringWriter f) -> f writerOptions doc1 >>= writerFn outputFile
+    Right (IOByteStringWriter f) -> f writerOptions doc1 >>= writeBinary
     Right (PureStringWriter f)
       | pdfOutput -> do
-              res <- makePDF latexEngine f writerOptions doc0
+              res <- makePDF latexEngine f writerOptions doc1
               case res of
                    Right pdf -> writeBinary pdf
                    Left err' -> err 43 $ UTF8.toStringLazy err'
-      | otherwise -> selfcontain (f writerOptions doc0 ++
+      | otherwise -> selfcontain (f writerOptions doc1 ++
                                   ['\n' | not standalone'])
                       >>= writerFn outputFile . handleEntities
           where htmlFormat = writerName' `elem`
-- 
cgit v1.2.3


From b0143bfad6ebc3b2ae1ced2fb3a9bed989c41000 Mon Sep 17 00:00:00 2001
From: John MacFarlane 
Date: Thu, 8 Aug 2013 14:05:46 -0700
Subject: Improved INSTALL instructions.

---
 INSTALL | 66 ++++++++++++++++++++++++++++++++++++++++++-----------------------
 1 file changed, 43 insertions(+), 23 deletions(-)

diff --git a/INSTALL b/INSTALL
index 8abb14b71..8bcea83c2 100644
--- a/INSTALL
+++ b/INSTALL
@@ -9,9 +9,17 @@ Quick install
 -------------
 
 1.  Install the [Haskell platform].  This will give you [GHC] and
-the [cabal-install] build tool.
+    the [cabal-install] build tool, as well as `alex` and `happy`.
+    If you do not use the Haskell platform, you'll need to install
+    `alex` and `happy` separately:
 
-2.  Use `cabal` to install pandoc and its dependencies:
+        cabal install alex happy
+
+2.  Update your package database:
+
+        cabal update
+
+3.  Use `cabal` to install pandoc and its dependencies:
 
         cabal install pandoc
 
@@ -23,15 +31,22 @@ the [cabal-install] build tool.
 
         cabal install
 
-3.  Make sure the `$CABALDIR/bin` directory is in your path.  You should
-now be able to run `pandoc`:
+    Note: If you obtained the source from the git repository (rather
+    than a release tarball), you'll need to do
+
+        git submodule update --init
+
+    to fetch the contents of `data/templates` before `cabal install`.
+
+4.  Make sure the `$CABALDIR/bin` directory is in your path.  You should
+    now be able to run `pandoc`:
 
         pandoc --help
 
     [Not sure where `$CABALDIR` is?](http://www.haskell.org/haskellwiki/Cabal-Install#The_cabal-install_configuration_file)
 
-4.  Make sure the `$CABALDIR/share/man/man1` directory is in your `MANPATH`.
-You should now be able to access the `pandoc` man page:
+5.  Make sure the `$CABALDIR/share/man/man1` directory is in your `MANPATH`.
+    You should now be able to access the `pandoc` man page:
 
         man pandoc
 
@@ -45,14 +60,19 @@ Custom install
 This is a step-by-step procedure that offers maximal control
 over the build and installation.  Most users should use the
 quick install, but this information may be of use to packagers.
-For more details, see the [Cabal User's Guide].
+For more details, see the [Cabal User's Guide].  These instructions
+assume that the pandoc source directory is your working directory.
 
 1.  Install dependencies:  in addition to the [Haskell platform],
-you will need [zip-archive], [blaze-html], and [highlighting-kate].
+    you will need a number of additional libraries.  You can install
+    them all with
+
+        cabal update
+        cabal install --only-dependencies
 
 2.  Configure:
 
-        runghc Setup.hs configure --prefix=DIR --bindir=DIR --libdir=DIR \
+        cabal configure --prefix=DIR --bindir=DIR --libdir=DIR \
           --datadir=DIR --libsubdir=DIR --datasubdir=DIR --docdir=DIR \
           --htmldir=DIR --program-prefix=PREFIX --program-suffix=SUFFIX \
           --mandir=DIR --flags=FLAGSPEC
@@ -64,26 +84,34 @@ you will need [zip-archive], [blaze-html], and [highlighting-kate].
     preceded by a `-` (to force the flag to `false`), and separated
     by spaces.  Pandoc's flags include:
 
-    - `blaze_html_0_5`: Use blaze-html >= 0.5 (default yes)
-    - `embed_data_files`: embed all data files into the binary (default no)
+    - `embed_data_files`: embed all data files into the binary (default no).
+      This is helpful if you want to create a relocatable binary.
+      Note:  if this option is selected, you need to install the
+      `hsb2hs` preprocessor:
+
+          cabal install hsb2hs
+
+    - `http-conduit`:  use the `http-conduit` library to fetch external
+      resources (default yes -- without this, pandoc cannot make SSL
+      connections)
 
 3.  Build:
 
-        runghc Setup.hs build
+        cabal build
 
 4.  Build API documentation:
 
-        runghc Setup.hs haddock --html-location=URL --hyperlink-source
+        cabal haddock --html-location=URL --hyperlink-source
 
 5.  Copy the files:
 
-        runghc Setup.hs copy --destdir=PATH
+        cabal copy --destdir=PATH
 
     The default destdir is `/`.
 
 6.  Register pandoc as a GHC package:
 
-        runghc Setup.hs register
+        cabal register
 
     Package managers may want to use the `--gen-script` option to
     generate a script that can be run to register the package at
@@ -109,7 +137,6 @@ you please.
 [blaze-html]: http://hackage.haskell.org/package/blaze-html
 [Cabal User's Guide]: http://www.haskell.org/cabal/release/latest/doc/users-guide/builders.html#setup-configure-paths
 
-
 Running tests
 -------------
 
@@ -118,13 +145,6 @@ To build the tests:
 
     cabal configure --enable-tests && cabal build
 
-Note: If you obtained the source via git, you should first do
-
-    git submodule update --init data/templates
-
-to populate the templates subdirectory.  (You can skip this step
-if you obtained the source from a release tarball.)
-
 To run the tests:
 
     cabal test
-- 
cgit v1.2.3


From 7d694e15697a4b1cc974b6316a08117afe663a74 Mon Sep 17 00:00:00 2001
From: John MacFarlane 
Date: Thu, 8 Aug 2013 15:13:28 -0700
Subject: Added Text.Pandoc.Process (pipeProcess).

A souped up version of readProcessWithErrorCode that uses lazy bytestrings
and allows setting environment.
---
 pandoc.cabal               |   3 +-
 src/Text/Pandoc/Process.hs | 105 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 107 insertions(+), 1 deletion(-)
 create mode 100644 src/Text/Pandoc/Process.hs

diff --git a/pandoc.cabal b/pandoc.cabal
index a8dd528ad..8210bfce5 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -324,7 +324,8 @@ Library
                    Text.Pandoc.Templates,
                    Text.Pandoc.XML,
                    Text.Pandoc.Biblio,
-                   Text.Pandoc.SelfContained
+                   Text.Pandoc.SelfContained,
+                   Text.Pandoc.Process
   Other-Modules:   Text.Pandoc.Readers.Haddock.Lex,
                    Text.Pandoc.Readers.Haddock.Parse,
                    Text.Pandoc.Writers.Shared,
diff --git a/src/Text/Pandoc/Process.hs b/src/Text/Pandoc/Process.hs
new file mode 100644
index 000000000..112c5b974
--- /dev/null
+++ b/src/Text/Pandoc/Process.hs
@@ -0,0 +1,105 @@
+{-
+Copyright (C) 2013 John MacFarlane 
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+-}
+
+{- |
+   Module      : Text.Pandoc.Process
+   Copyright   : Copyright (C) 2013 John MacFarlane
+   License     : GNU GPL, version 2 or above
+
+   Maintainer  : John MacFarlane 
+   Stability   : alpha
+   Portability : portable
+
+ByteString variant of 'readProcessWithExitCode'.
+-}
+module Text.Pandoc.Process (pipeProcess)
+where
+import System.Process
+import System.Exit (ExitCode (..))
+import Control.Exception
+import System.IO (hClose, hFlush)
+import Control.Concurrent (putMVar, takeMVar, newEmptyMVar, forkIO)
+import Control.Monad (unless)
+import qualified Data.ByteString.Lazy as BL
+
+{- |
+Version of 'System.Process.readProcessWithExitCode' that uses lazy bytestrings
+instead of strings and allows setting environment variables.
+
+@readProcessWithExitCode@ creates an external process, reads its
+standard output and standard error strictly, waits until the process
+terminates, and then returns the 'ExitCode' of the process,
+the standard output, and the standard error.
+
+If an asynchronous exception is thrown to the thread executing
+@readProcessWithExitCode@. The forked process will be terminated and
+@readProcessWithExitCode@ will wait (block) until the process has been
+terminated.
+-}
+
+pipeProcess
+    :: Maybe [(String, String)] -- ^ environment variables
+    -> FilePath                 -- ^ Filename of the executable (see 'proc' for details)
+    -> [String]                 -- ^ any arguments
+    -> BL.ByteString            -- ^ standard input
+    -> IO (ExitCode,BL.ByteString,BL.ByteString) -- ^ exitcode, stdout, stderr
+pipeProcess mbenv cmd args input =
+    mask $ \restore -> do
+      (Just inh, Just outh, Just errh, pid) <- createProcess (proc cmd args)
+                                                   { env     = mbenv,
+                                                     std_in  = CreatePipe,
+                                                     std_out = CreatePipe,
+                                                     std_err = CreatePipe }
+      flip onException
+        (do hClose inh; hClose outh; hClose errh;
+            terminateProcess pid; waitForProcess pid) $ restore $ do
+        -- fork off a thread to start consuming stdout
+        out <- BL.hGetContents outh
+        waitOut <- forkWait $ evaluate $ BL.length out
+
+        -- fork off a thread to start consuming stderr
+        err <- BL.hGetContents errh
+        waitErr <- forkWait $ evaluate $ BL.length err
+
+        -- now write and flush any input
+        let writeInput = do
+              unless (BL.null input) $ do
+                BL.hPutStr inh input
+                hFlush inh
+              hClose inh
+
+        writeInput
+
+        -- wait on the output
+        waitOut
+        waitErr
+
+        hClose outh
+        hClose errh
+
+        -- wait on the process
+        ex <- waitForProcess pid
+
+        return (ex, out, err)
+
+forkWait :: IO a -> IO (IO a)
+forkWait a = do
+  res <- newEmptyMVar
+  _ <- mask $ \restore -> forkIO $ try (restore a) >>= putMVar res
+  return (takeMVar res >>= either (\ex -> throwIO (ex :: SomeException)) return)
+
-- 
cgit v1.2.3


From 83f263110f364e87d8c0908b4a52be801aa77802 Mon Sep 17 00:00:00 2001
From: John MacFarlane 
Date: Thu, 8 Aug 2013 15:15:20 -0700
Subject: Use pipeProcess in Text.Pandoc.PDF.

---
 src/Text/Pandoc/PDF.hs | 39 +++------------------------------------
 1 file changed, 3 insertions(+), 36 deletions(-)

diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs
index 49b455285..b030e2ca7 100644
--- a/src/Text/Pandoc/PDF.hs
+++ b/src/Text/Pandoc/PDF.hs
@@ -38,11 +38,7 @@ import qualified Data.ByteString as BS
 import System.Exit (ExitCode (..))
 import System.FilePath
 import System.Directory
-import System.Process
 import System.Environment
-import Control.Exception (evaluate)
-import System.IO (hClose)
-import Control.Concurrent (putMVar, takeMVar, newEmptyMVar, forkIO)
 import Control.Monad (unless)
 import Data.List (isInfixOf)
 import qualified Data.ByteString.Base64 as B64
@@ -52,6 +48,8 @@ import Text.Pandoc.Generic (bottomUpM)
 import Text.Pandoc.Shared (fetchItem, warn)
 import Text.Pandoc.Options (WriterOptions(..))
 import Text.Pandoc.MIME (extensionFromMimeType)
+import Text.Pandoc.Process (pipeProcess)
+import qualified Data.ByteString.Lazy as BL
 
 withTempDir :: String -> (FilePath -> IO a) -> IO a
 withTempDir =
@@ -148,7 +146,7 @@ runTeXProgram program runsLeft tmpDir source = do
           $ lookup "TEXINPUTS" env'
     let env'' = ("TEXINPUTS", texinputs) :
                   [(k,v) | (k,v) <- env', k /= "TEXINPUTS"]
-    (exit, out, err) <- readCommand (Just env'') program programArgs
+    (exit, out, err) <- pipeProcess (Just env'') program programArgs BL.empty
     if runsLeft > 1
        then runTeXProgram program (runsLeft - 1) tmpDir source
        else do
@@ -159,34 +157,3 @@ runTeXProgram program runsLeft tmpDir source = do
                    else return Nothing
          return (exit, out <> err, pdf)
 
--- utility functions
-
--- Run a command and return exitcode, contents of stdout, and
--- contents of stderr. (Based on
--- 'readProcessWithExitCode' from 'System.Process'.)
-readCommand :: Maybe [(String, String)]            -- ^ environment variables
-            -> FilePath                            -- ^ command to run
-            -> [String]                            -- ^ any arguments
-            -> IO (ExitCode,ByteString,ByteString) -- ^ exit, stdout, stderr
-readCommand mbenv cmd args = do
-    (Just inh, Just outh, Just errh, pid) <-
-        createProcess (proc cmd args){ env     = mbenv,
-                                       std_in  = CreatePipe,
-                                       std_out = CreatePipe,
-                                       std_err = CreatePipe }
-    outMVar <- newEmptyMVar
-    -- fork off a thread to start consuming stdout
-    out  <- B.hGetContents outh
-    _ <- forkIO $ evaluate (B.length out) >> putMVar outMVar ()
-    -- fork off a thread to start consuming stderr
-    err  <- B.hGetContents errh
-    _ <- forkIO $ evaluate (B.length err) >> putMVar outMVar ()
-    -- now write and flush any input
-    hClose inh -- done with stdin
-    -- wait on the output
-    takeMVar outMVar
-    takeMVar outMVar
-    hClose outh
-    -- wait on the process
-    ex <- waitForProcess pid
-    return (ex, out, err)
-- 
cgit v1.2.3


From 99bb066bb925134b506d39c8d6694fe81337d9c1 Mon Sep 17 00:00:00 2001
From: John MacFarlane 
Date: Thu, 8 Aug 2013 15:15:58 -0700
Subject: Pass writename as argument to filters.

This way filters can figure out what the target format is
and react appropriately.

Example:

    #!/usr/bin/env runghc
    import Text.Pandoc.JSON
    import Data.Char

    main = toJSONFilter cap
      where cap (Just "html") (Str xs) = Str $ map toUpper xs
            cap _ x = x

This capitalizes text only for html output.
---
 pandoc.cabal       |  2 +-
 pandoc.hs          | 17 +++++++++++------
 src/Text/Pandoc.hs |  5 +----
 3 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/pandoc.cabal b/pandoc.cabal
index 8210bfce5..3903fe606 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -352,8 +352,8 @@ Executable pandoc
                  bytestring >= 0.9 && < 0.11,
                  extensible-exceptions >= 0.1 && < 0.2,
                  highlighting-kate >= 0.5.5 && < 0.6,
+                 aeson >= 0.6 && < 0.7,
                  HTTP >= 4000.0.5 && < 4000.3,
-                 process >= 1 && < 1.2,
                  citeproc-hs >= 0.3.7 && < 0.4
   Ghc-Options:   -rtsopts -with-rtsopts=-K16m -Wall -fno-warn-unused-do-bind
   Ghc-Prof-Options: -auto-all -caf-all -rtsopts -with-rtsopts=-K16m
diff --git a/pandoc.hs b/pandoc.hs
index 94d206103..fdf0b35b7 100644
--- a/pandoc.hs
+++ b/pandoc.hs
@@ -37,13 +37,13 @@ import Text.Pandoc.Shared ( tabFilter, readDataFileUTF8, safeRead,
                             headerShift, normalize, err, warn )
 import Text.Pandoc.XML ( toEntities, fromEntities )
 import Text.Pandoc.SelfContained ( makeSelfContained )
+import Text.Pandoc.Process (pipeProcess)
 import Text.Highlighting.Kate ( languages, Style, tango, pygments,
          espresso, zenburn, kate, haddock, monochrome )
 import System.Environment ( getArgs, getProgName )
 import System.Exit ( exitWith, ExitCode (..) )
 import System.FilePath
 import System.Console.GetOpt
-import System.Process (readProcess)
 import Data.Char ( toLower )
 import Data.List ( intercalate, isPrefixOf, sort )
 import System.Directory ( getAppUserDataDirectory, doesFileExist, findExecutable )
@@ -59,6 +59,7 @@ import Network.HTTP (simpleHTTP, mkRequest, getResponseBody, RequestMethod(..))
 import Network.URI (parseURI, isURI, URI(..))
 import qualified Data.ByteString.Lazy as B
 import Text.CSL.Reference (Reference(..))
+import Data.Aeson (eitherDecode', encode)
 
 copyrightMessage :: String
 copyrightMessage = "\nCopyright (C) 2006-2013 John MacFarlane\n" ++
@@ -88,9 +89,13 @@ wrapWords indent c = wrap' (c - indent) (c - indent)
 isTextFormat :: String -> Bool
 isTextFormat s = takeWhile (`notElem` "+-") s `notElem` ["odt","docx","epub","epub3"]
 
-externalFilter :: FilePath -> Pandoc -> IO Pandoc
-externalFilter f d = E.catch
-  (readJSON def `fmap` readProcess f [] (writeJSON def d))
+externalFilter :: FilePath -> [String] -> Pandoc -> IO Pandoc
+externalFilter f args' d = E.catch
+  (do (exitcode, outbs, errbs) <- pipeProcess Nothing f args' $ encode d
+      case exitcode of
+           ExitSuccess    -> return $ either error id $ eitherDecode' outbs
+           ExitFailure _  -> err 83 $ "Error running filter `" ++ UTF8.toStringLazy outbs ++
+                                          UTF8.toStringLazy errbs ++  "'")
   (\e -> let _ = (e :: E.SomeException)
          in err 83 $ "Error running filter `" ++ f ++ "'")
 
@@ -132,7 +137,7 @@ data Opt = Opt
     , optReferenceLinks    :: Bool    -- ^ Use reference links in writing markdown, rst
     , optWrapText          :: Bool    -- ^ Wrap text
     , optColumns           :: Int     -- ^ Line length in characters
-    , optPlugins           :: [Pandoc -> IO Pandoc] -- ^ Plugins to apply
+    , optPlugins           :: [[String] -> Pandoc -> IO Pandoc] -- ^ Plugins to apply
     , optEmailObfuscation  :: ObfuscationMethod
     , optIdentifierPrefix  :: String
     , optIndentedCodeClasses :: [String] -- ^ Default classes for indented code blocks
@@ -1115,7 +1120,7 @@ main = do
            reader readerOpts
 
   let doc0 = foldr ($) doc transforms
-  doc1 <- foldrM ($) doc0 plugins
+  doc1 <- foldrM ($) doc0 $ map ($ [writerName']) plugins
 
   let writeBinary :: B.ByteString -> IO ()
       writeBinary = B.writeFile (UTF8.encodePath outputFile)
diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs
index 27aa02a75..703bb876a 100644
--- a/src/Text/Pandoc.hs
+++ b/src/Text/Pandoc.hs
@@ -309,11 +309,8 @@ class ToJSONFilter a => ToJsonFilter a
         toJsonFilter = toJSONFilter
 
 readJSON :: ReaderOptions -> String -> Pandoc
-readJSON _ = checkJSON . eitherDecode' . UTF8.fromStringLazy
+readJSON _ = either error id . eitherDecode' . UTF8.fromStringLazy
 
 writeJSON :: WriterOptions -> Pandoc -> String
 writeJSON _ = UTF8.toStringLazy . encode
 
-checkJSON :: Either String a -> a
-checkJSON (Right x) = x
-checkJSON (Left e)  = error e
-- 
cgit v1.2.3


From e9de0f0e22b9b64b5684efe81d03539c3f57a71c Mon Sep 17 00:00:00 2001
From: John MacFarlane 
Date: Thu, 8 Aug 2013 23:14:12 -0700
Subject: Preliminary support for new Div and Span elements in writers.

Currently these are "transparent" containers, except in HTML,
where they produce div and span elements with attributes.
---
 data/sample.lua                         | 8 ++++++++
 src/Text/Pandoc/Writers/AsciiDoc.hs     | 2 ++
 src/Text/Pandoc/Writers/ConTeXt.hs      | 2 ++
 src/Text/Pandoc/Writers/Custom.hs       | 5 +++++
 src/Text/Pandoc/Writers/Docbook.hs      | 3 +++
 src/Text/Pandoc/Writers/Docx.hs         | 2 ++
 src/Text/Pandoc/Writers/FB2.hs          | 3 +++
 src/Text/Pandoc/Writers/HTML.hs         | 5 +++++
 src/Text/Pandoc/Writers/LaTeX.hs        | 2 ++
 src/Text/Pandoc/Writers/Man.hs          | 2 ++
 src/Text/Pandoc/Writers/Markdown.hs     | 3 +++
 src/Text/Pandoc/Writers/MediaWiki.hs    | 6 ++++++
 src/Text/Pandoc/Writers/OpenDocument.hs | 2 ++
 src/Text/Pandoc/Writers/Org.hs          | 3 +++
 src/Text/Pandoc/Writers/RST.hs          | 2 ++
 src/Text/Pandoc/Writers/RTF.hs          | 3 +++
 src/Text/Pandoc/Writers/Texinfo.hs      | 5 +++++
 src/Text/Pandoc/Writers/Textile.hs      | 6 ++++++
 18 files changed, 64 insertions(+)

diff --git a/data/sample.lua b/data/sample.lua
index 1c82ebe2e..a7e9d6337 100644
--- a/data/sample.lua
+++ b/data/sample.lua
@@ -177,6 +177,10 @@ function Note(s)
             '">' .. num .. ''
 end
 
+function Span(s, attr)
+  return "" .. s .. ""
+end
+
 function Plain(s)
   return s
 end
@@ -299,6 +303,10 @@ function Table(caption, aligns, widths, headers, rows)
   return table.concat(buffer,'\n')
 end
 
+function Div(s, attr)
+  return "\n" .. s .. "
" +end + -- The following code will produce runtime warnings when you haven't defined -- all of the functions you need for the custom writer, so it's useful -- to include when you're working on a writer. diff --git a/src/Text/Pandoc/Writers/AsciiDoc.hs b/src/Text/Pandoc/Writers/AsciiDoc.hs index 6c3c6955e..00cea27e5 100644 --- a/src/Text/Pandoc/Writers/AsciiDoc.hs +++ b/src/Text/Pandoc/Writers/AsciiDoc.hs @@ -246,6 +246,7 @@ blockToAsciiDoc opts (OrderedList (_start, sty, _delim) items) = do blockToAsciiDoc opts (DefinitionList items) = do contents <- mapM (definitionListItemToAsciiDoc opts) items return $ cat contents <> blankline +blockToAsciiDoc opts (Div _ bs) = blockListToAsciiDoc opts bs -- | Convert bullet list item (list of blocks) to asciidoc. bulletListItemToAsciiDoc :: WriterOptions -> [Block] -> State WriterState Doc @@ -383,3 +384,4 @@ inlineToAsciiDoc opts (Note [Plain inlines]) = do return $ text "footnote:[" <> contents <> "]" -- asciidoc can't handle blank lines in notes inlineToAsciiDoc _ (Note _) = return "[multiblock footnote omitted]" +inlineToAsciiDoc opts (Span _ ils) = inlineListToAsciiDoc opts ils diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs index 32588dc8f..40dc1deb5 100644 --- a/src/Text/Pandoc/Writers/ConTeXt.hs +++ b/src/Text/Pandoc/Writers/ConTeXt.hs @@ -143,6 +143,7 @@ blockToConTeXt (CodeBlock _ str) = -- blankline because \stoptyping can't have anything after it, inc. '}' blockToConTeXt (RawBlock "context" str) = return $ text str <> blankline blockToConTeXt (RawBlock _ _ ) = return empty +blockToConTeXt (Div _ bs) = blockListToConTeXt bs blockToConTeXt (BulletList lst) = do contents <- mapM listItemToConTeXt lst return $ ("\\startitemize" <> if isTightList lst @@ -330,6 +331,7 @@ inlineToConTeXt (Note contents) = do then text "\\footnote{" <> nest 2 contents' <> char '}' else text "\\startbuffer " <> nest 2 contents' <> text "\\stopbuffer\\footnote{\\getbuffer}" +inlineToConTeXt (Span _ ils) = inlineListToConTeXt ils -- | Craft the section header, inserting the secton reference, if supplied. sectionHeader :: Attr diff --git a/src/Text/Pandoc/Writers/Custom.hs b/src/Text/Pandoc/Writers/Custom.hs index 5c82fe0e1..c250a240e 100644 --- a/src/Text/Pandoc/Writers/Custom.hs +++ b/src/Text/Pandoc/Writers/Custom.hs @@ -178,6 +178,9 @@ blockToCustom lua (OrderedList (num,sty,delim) items) = blockToCustom lua (DefinitionList items) = callfunc lua "DefinitionList" items +blockToCustom lua (Div attr items) = + callfunc lua "Div" items (attrToMap attr) + -- | Convert list of Pandoc block elements to Custom. blockListToCustom :: LuaState -- ^ Options -> [Block] -- ^ List of block elements @@ -240,3 +243,5 @@ inlineToCustom lua (Image alt (src,tit)) = inlineToCustom lua (Note contents) = callfunc lua "Note" contents +inlineToCustom lua (Span attr items) = + callfunc lua "Span" items (attrToMap attr) diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs index 6f4b61a79..2f415f3ee 100644 --- a/src/Text/Pandoc/Writers/Docbook.hs +++ b/src/Text/Pandoc/Writers/Docbook.hs @@ -148,6 +148,7 @@ listItemToDocbook opts item = -- | Convert a Pandoc block element to Docbook. blockToDocbook :: WriterOptions -> Block -> Doc blockToDocbook _ Null = empty +blockToDocbook opts (Div _ bs) = blocksToDocbook opts bs blockToDocbook _ (Header _ _ _) = empty -- should not occur after hierarchicalize blockToDocbook opts (Plain lst) = inlinesToDocbook opts lst -- title beginning with fig: indicates that the image is a figure @@ -267,6 +268,8 @@ inlineToDocbook opts (Quoted _ lst) = inTagsSimple "quote" $ inlinesToDocbook opts lst inlineToDocbook opts (Cite _ lst) = inlinesToDocbook opts lst +inlineToDocbook opts (Span _ ils) = + inlinesToDocbook opts ils inlineToDocbook _ (Code _ str) = inTagsSimple "literal" $ text (escapeStringForXML str) inlineToDocbook opts (Math t str) diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 6bb4d5569..d93254971 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -428,6 +428,7 @@ getUniqueId = liftIO $ (show . (+ 20) . hashUnique) `fmap` newUnique -- | Convert a Pandoc block element to OpenXML. blockToOpenXML :: WriterOptions -> Block -> WS [Element] blockToOpenXML _ Null = return [] +blockToOpenXML opts (Div _ bs) = blocksToOpenXML opts bs blockToOpenXML opts (Header lev (ident,_,_) lst) = do contents <- withParaProp (pStyle $ "Heading" ++ show lev) $ blockToOpenXML opts (Para lst) @@ -633,6 +634,7 @@ formattedString str = do inlineToOpenXML :: WriterOptions -> Inline -> WS [Element] inlineToOpenXML _ (Str str) = formattedString str inlineToOpenXML opts Space = inlineToOpenXML opts (Str " ") +inlineToOpenXML opts (Span _ ils) = inlinesToOpenXML opts ils inlineToOpenXML opts (Strong lst) = withTextProp (mknode "w:b" [] ()) $ inlinesToOpenXML opts lst inlineToOpenXML opts (Emph lst) = diff --git a/src/Text/Pandoc/Writers/FB2.hs b/src/Text/Pandoc/Writers/FB2.hs index 27f0c8305..2576b2dc2 100644 --- a/src/Text/Pandoc/Writers/FB2.hs +++ b/src/Text/Pandoc/Writers/FB2.hs @@ -324,6 +324,7 @@ blockToXml (CodeBlock _ s) = return . spaceBeforeAfter . map (el "p" . el "code") . lines $ s blockToXml (RawBlock _ s) = return . spaceBeforeAfter . map (el "p" . el "code") . lines $ s +blockToXml (Div _ bs) = cMapM blockToXml bs blockToXml (BlockQuote bs) = liftM (list . el "cite") $ cMapM blockToXml bs blockToXml (OrderedList a bss) = do state <- get @@ -425,6 +426,7 @@ indent = indentBlock -- | Convert a Pandoc's Inline element to FictionBook XML representation. toXml :: Inline -> FBM [Content] toXml (Str s) = return [txt s] +toXml (Span _ ils) = cMapM toXml ils toXml (Emph ss) = list `liftM` wrap "emphasis" ss toXml (Strong ss) = list `liftM` wrap "strong" ss toXml (Strikeout ss) = list `liftM` wrap "strikethrough" ss @@ -560,6 +562,7 @@ list = (:[]) plain :: Inline -> String plain (Str s) = s plain (Emph ss) = concat (map plain ss) +plain (Span _ ss) = concat (map plain ss) plain (Strong ss) = concat (map plain ss) plain (Strikeout ss) = concat (map plain ss) plain (Superscript ss) = concat (map plain ss) diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index cfc187e02..560c26c76 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -407,6 +407,9 @@ blockToHtml opts (Para [Str ".",Space,Str ".",Space,Str "."]) blockToHtml opts (Para lst) = do contents <- inlineListToHtml opts lst return $ H.p contents +blockToHtml opts (Div attr bs) = do + contents <- blockListToHtml opts bs + return $ addAttrs opts attr $ H.div $ nl opts >> contents >> nl opts blockToHtml _ (RawBlock "html" str) = return $ preEscapedString str blockToHtml _ (RawBlock _ _) = return mempty blockToHtml opts (HorizontalRule) = return $ if writerHtml5 opts then H5.hr else H.hr @@ -590,6 +593,8 @@ inlineToHtml opts inline = (Str str) -> return $ strToHtml str (Space) -> return $ strToHtml " " (LineBreak) -> return $ if writerHtml5 opts then H5.br else H.br + (Span attr ils) -> inlineListToHtml opts ils >>= + return . addAttrs opts attr . H.span (Emph lst) -> inlineListToHtml opts lst >>= return . H.em (Strong lst) -> inlineListToHtml opts lst >>= return . H.strong (Code attr str) -> case hlCode of diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index aa5bfa623..37de03e0f 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -282,6 +282,7 @@ isLineBreakOrSpace _ = False blockToLaTeX :: Block -- ^ Block to convert -> State WriterState Doc blockToLaTeX Null = return empty +blockToLaTeX (Div _ bs) = blockListToLaTeX bs blockToLaTeX (Plain lst) = inlineListToLaTeX $ dropWhile isLineBreakOrSpace lst -- title beginning with fig: indicates that the image is a figure @@ -560,6 +561,7 @@ isQuoted _ = False -- | Convert inline element to LaTeX inlineToLaTeX :: Inline -- ^ Inline to convert -> State WriterState Doc +inlineToLaTeX (Span _ ils) = inlineListToLaTeX ils >>= return . braces inlineToLaTeX (Emph lst) = inlineListToLaTeX lst >>= return . inCmd "emph" inlineToLaTeX (Strong lst) = diff --git a/src/Text/Pandoc/Writers/Man.hs b/src/Text/Pandoc/Writers/Man.hs index 0508b6c27..ed66c7c2b 100644 --- a/src/Text/Pandoc/Writers/Man.hs +++ b/src/Text/Pandoc/Writers/Man.hs @@ -160,6 +160,7 @@ blockToMan :: WriterOptions -- ^ Options -> Block -- ^ Block element -> State WriterState Doc blockToMan _ Null = return empty +blockToMan opts (Div _ bs) = blockListToMan opts bs blockToMan opts (Plain inlines) = liftM vcat $ mapM (inlineListToMan opts) $ splitSentences inlines blockToMan opts (Para inlines) = do @@ -300,6 +301,7 @@ inlineListToMan opts lst = mapM (inlineToMan opts) lst >>= (return . hcat) -- | Convert Pandoc inline element to man. inlineToMan :: WriterOptions -> Inline -> State WriterState Doc +inlineToMan opts (Span _ ils) = inlineListToMan opts ils inlineToMan opts (Emph lst) = do contents <- inlineListToMan opts lst return $ text "\\f[I]" <> contents <> text "\\f[]" diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index 80402a757..d195d8445 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -301,6 +301,7 @@ blockToMarkdown :: WriterOptions -- ^ Options -> Block -- ^ Block element -> State WriterState Doc blockToMarkdown _ Null = return empty +blockToMarkdown opts (Div _ bs) = blockListToMarkdown opts bs blockToMarkdown opts (Plain inlines) = do contents <- inlineListToMarkdown opts inlines return $ contents <> cr @@ -628,6 +629,8 @@ escapeSpaces x = x -- | Convert Pandoc inline element to markdown. inlineToMarkdown :: WriterOptions -> Inline -> State WriterState Doc +inlineToMarkdown opts (Span _ ils) = + inlineListToMarkdown opts ils inlineToMarkdown opts (Emph lst) = do contents <- inlineListToMarkdown opts lst return $ "*" <> contents <> "*" diff --git a/src/Text/Pandoc/Writers/MediaWiki.hs b/src/Text/Pandoc/Writers/MediaWiki.hs index e1bfd18b2..fccf25753 100644 --- a/src/Text/Pandoc/Writers/MediaWiki.hs +++ b/src/Text/Pandoc/Writers/MediaWiki.hs @@ -83,6 +83,9 @@ blockToMediaWiki :: WriterOptions -- ^ Options blockToMediaWiki _ Null = return "" +blockToMediaWiki opts (Div _ bs) = + blockListToMediaWiki opts bs + blockToMediaWiki opts (Plain inlines) = inlineListToMediaWiki opts inlines @@ -328,6 +331,9 @@ inlineListToMediaWiki opts lst = -- | Convert Pandoc inline element to MediaWiki. inlineToMediaWiki :: WriterOptions -> Inline -> State WriterState String +inlineToMediaWiki opts (Span _ ils) = + inlineListToMediaWiki opts ils + inlineToMediaWiki opts (Emph lst) = do contents <- inlineListToMediaWiki opts lst return $ "''" ++ contents ++ "''" diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs index 0efbf7580..d76d0f6ad 100644 --- a/src/Text/Pandoc/Writers/OpenDocument.hs +++ b/src/Text/Pandoc/Writers/OpenDocument.hs @@ -285,6 +285,7 @@ blockToOpenDocument :: WriterOptions -> Block -> State WriterState Doc blockToOpenDocument o bs | Plain b <- bs = inParagraphTags =<< inlinesToOpenDocument o b | Para b <- bs = inParagraphTags =<< inlinesToOpenDocument o b + | Div _ xs <- bs = blocksToOpenDocument o xs | Header i _ b <- bs = setFirstPara >> (inHeaderTags i =<< inlinesToOpenDocument o b) | BlockQuote b <- bs = setFirstPara >> mkBlockQuote b @@ -360,6 +361,7 @@ inlinesToOpenDocument o l = hcat <$> mapM (inlineToOpenDocument o) l inlineToOpenDocument :: WriterOptions -> Inline -> State WriterState Doc inlineToOpenDocument o ils | Space <- ils = inTextStyle space + | Span _ xs <- ils = inlinesToOpenDocument o xs | LineBreak <- ils = return $ selfClosingTag "text:line-break" [] | Str s <- ils = inTextStyle $ handleSpaces $ escapeStringForXML s | Emph l <- ils = withTextStyle Italic $ inlinesToOpenDocument o l diff --git a/src/Text/Pandoc/Writers/Org.hs b/src/Text/Pandoc/Writers/Org.hs index 40e8abf7e..34ae532b0 100644 --- a/src/Text/Pandoc/Writers/Org.hs +++ b/src/Text/Pandoc/Writers/Org.hs @@ -106,6 +106,7 @@ escapeString = escapeStringUsing $ blockToOrg :: Block -- ^ Block element -> State WriterState Doc blockToOrg Null = return empty +blockToOrg (Div _ bs) = blockListToOrg bs blockToOrg (Plain inlines) = inlineListToOrg inlines -- title beginning with fig: indicates that the image is a figure blockToOrg (Para [Image txt (src,'f':'i':'g':':':tit)]) = do @@ -229,6 +230,8 @@ inlineListToOrg lst = mapM inlineToOrg lst >>= return . hcat -- | Convert Pandoc inline element to Org. inlineToOrg :: Inline -> State WriterState Doc +inlineToOrg (Span _ lst) = + inlineListToOrg lst inlineToOrg (Emph lst) = do contents <- inlineListToOrg lst return $ "/" <> contents <> "/" diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs index 606793842..4d8daa15b 100644 --- a/src/Text/Pandoc/Writers/RST.hs +++ b/src/Text/Pandoc/Writers/RST.hs @@ -161,6 +161,7 @@ bordered contents c = blockToRST :: Block -- ^ Block element -> State WriterState Doc blockToRST Null = return empty +blockToRST (Div _ bs) = blockListToRST bs blockToRST (Plain inlines) = inlineListToRST inlines -- title beginning with fig: indicates that the image is a figure blockToRST (Para [Image txt (src,'f':'i':'g':':':tit)]) = do @@ -338,6 +339,7 @@ inlineListToRST lst = mapM inlineToRST (insertBS lst) >>= return . hcat -- | Convert Pandoc inline element to RST. inlineToRST :: Inline -> State WriterState Doc +inlineToRST (Span _ ils) = inlineListToRST ils inlineToRST (Emph lst) = do contents <- inlineListToRST lst return $ "*" <> contents <> "*" diff --git a/src/Text/Pandoc/Writers/RTF.hs b/src/Text/Pandoc/Writers/RTF.hs index 0db1c52c4..7e5d33c50 100644 --- a/src/Text/Pandoc/Writers/RTF.hs +++ b/src/Text/Pandoc/Writers/RTF.hs @@ -208,6 +208,8 @@ blockToRTF :: Int -- ^ indent level -> Block -- ^ block to convert -> String blockToRTF _ _ Null = "" +blockToRTF indent alignment (Div _ bs) = + concatMap (blockToRTF indent alignment) bs blockToRTF indent alignment (Plain lst) = rtfCompact indent 0 alignment $ inlineListToRTF lst blockToRTF indent alignment (Para lst) = @@ -308,6 +310,7 @@ inlineListToRTF lst = concatMap inlineToRTF lst -- | Convert inline item to RTF. inlineToRTF :: Inline -- ^ inline to convert -> String +inlineToRTF (Span _ lst) = inlineListToRTF lst inlineToRTF (Emph lst) = "{\\i " ++ (inlineListToRTF lst) ++ "}" inlineToRTF (Strong lst) = "{\\b " ++ (inlineListToRTF lst) ++ "}" inlineToRTF (Strikeout lst) = "{\\strike " ++ (inlineListToRTF lst) ++ "}" diff --git a/src/Text/Pandoc/Writers/Texinfo.hs b/src/Text/Pandoc/Writers/Texinfo.hs index 0f57d14b2..f8b460001 100644 --- a/src/Text/Pandoc/Writers/Texinfo.hs +++ b/src/Text/Pandoc/Writers/Texinfo.hs @@ -123,6 +123,8 @@ blockToTexinfo :: Block -- ^ Block to convert blockToTexinfo Null = return empty +blockToTexinfo (Div _ bs) = blockListToTexinfo bs + blockToTexinfo (Plain lst) = inlineListToTexinfo lst @@ -374,6 +376,9 @@ disallowedInNode c = c `elem` ".,:()" inlineToTexinfo :: Inline -- ^ Inline to convert -> State WriterState Doc +inlineToTexinfo (Span _ lst) = + inlineListToTexinfo lst + inlineToTexinfo (Emph lst) = inlineListToTexinfo lst >>= return . inCmd "emph" diff --git a/src/Text/Pandoc/Writers/Textile.hs b/src/Text/Pandoc/Writers/Textile.hs index 3288ce222..3fb554dca 100644 --- a/src/Text/Pandoc/Writers/Textile.hs +++ b/src/Text/Pandoc/Writers/Textile.hs @@ -101,6 +101,9 @@ blockToTextile :: WriterOptions -- ^ Options blockToTextile _ Null = return "" +blockToTextile opts (Div _ bs) = + blockListToTextile opts bs + blockToTextile opts (Plain inlines) = inlineListToTextile opts inlines @@ -343,6 +346,9 @@ inlineListToTextile opts lst = -- | Convert Pandoc inline element to Textile. inlineToTextile :: WriterOptions -> Inline -> State WriterState String +inlineToTextile opts (Span _ lst) = + inlineListToTextile opts lst + inlineToTextile opts (Emph lst) = do contents <- inlineListToTextile opts lst return $ if '_' `elem` contents -- cgit v1.2.3 From cbfa9321066212b912583481015224f3c944ae21 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 10 Aug 2013 17:23:51 -0700 Subject: Adjustments for new Format newtype. --- src/Text/Pandoc/Readers/HTML.hs | 4 ++-- src/Text/Pandoc/Readers/LaTeX.hs | 2 +- src/Text/Pandoc/Readers/RST.hs | 1 + src/Text/Pandoc/Readers/Textile.hs | 6 +++--- src/Text/Pandoc/Writers/AsciiDoc.hs | 8 ++++++-- src/Text/Pandoc/Writers/Custom.hs | 6 ++++++ src/Text/Pandoc/Writers/Docbook.hs | 9 +++++---- src/Text/Pandoc/Writers/Docx.hs | 10 +++++----- src/Text/Pandoc/Writers/EPUB.hs | 6 +++--- src/Text/Pandoc/Writers/HTML.hs | 13 ++++++++----- src/Text/Pandoc/Writers/LaTeX.hs | 13 ++++++++----- src/Text/Pandoc/Writers/Man.hs | 10 ++++++---- src/Text/Pandoc/Writers/MediaWiki.hs | 14 ++++++++------ src/Text/Pandoc/Writers/OpenDocument.hs | 12 +++++++----- src/Text/Pandoc/Writers/RST.hs | 15 +++++++++------ src/Text/Pandoc/Writers/RTF.hs | 12 +++++++----- src/Text/Pandoc/Writers/Texinfo.hs | 19 +++++++++++-------- src/Text/Pandoc/Writers/Textile.hs | 14 ++++++-------- tests/Tests/Arbitrary.hs | 8 ++++---- 19 files changed, 106 insertions(+), 76 deletions(-) diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 0068ab5c1..7ca554fa3 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -182,7 +182,7 @@ pRawHtmlBlock = do raw <- pHtmlBlock "script" <|> pHtmlBlock "style" <|> pRawTag parseRaw <- getOption readerParseRaw if parseRaw && not (null raw) - then return [RawBlock "html" raw] + then return [RawBlock (Format "html") raw] else return [] pHtmlBlock :: String -> TagParser String @@ -408,7 +408,7 @@ pRawHtmlInline = do result <- pSatisfy (tagComment (const True)) <|> pSatisfy isInlineTag parseRaw <- getOption readerParseRaw if parseRaw - then return [RawInline "html" $ renderTags' [result]] + then return [RawInline (Format "html") $ renderTags' [result]] else return [] pInlinesInTags :: String -> ([Inline] -> Inline) diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 6b5035d93..eb0baedda 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE ScopedTypeVariables, OverloadedStrings #-} {- Copyright (C) 2006-2012 John MacFarlane diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index 34962b553..df0a8294d 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE OverloadedStrings #-} {- Copyright (C) 2006-2010 John MacFarlane diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index 9191f6908..8ccd1e227 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -290,13 +290,13 @@ rawHtmlBlock :: Parser [Char] ParserState Block rawHtmlBlock = try $ do (_,b) <- htmlTag isBlockTag optional blanklines - return $ RawBlock "html" b + return $ RawBlock (Format "html") b -- | Raw block of LaTeX content rawLaTeXBlock' :: Parser [Char] ParserState Block rawLaTeXBlock' = do guardEnabled Ext_raw_tex - RawBlock "latex" <$> (rawLaTeXBlock <* spaces) + RawBlock (Format "latex") <$> (rawLaTeXBlock <* spaces) -- | In textile, paragraphs are separated by blank lines. @@ -487,7 +487,7 @@ endline = try $ do return LineBreak rawHtmlInline :: Parser [Char] ParserState Inline -rawHtmlInline = RawInline "html" . snd <$> htmlTag isInlineTag +rawHtmlInline = RawInline (Format "html") . snd <$> htmlTag isInlineTag -- | Raw LaTeX Inline rawLaTeXInline' :: Parser [Char] ParserState Inline diff --git a/src/Text/Pandoc/Writers/AsciiDoc.hs b/src/Text/Pandoc/Writers/AsciiDoc.hs index 00cea27e5..68b525742 100644 --- a/src/Text/Pandoc/Writers/AsciiDoc.hs +++ b/src/Text/Pandoc/Writers/AsciiDoc.hs @@ -132,7 +132,9 @@ blockToAsciiDoc opts (Para inlines) = do then text "\\" else empty return $ esc <> contents <> blankline -blockToAsciiDoc _ (RawBlock _ _) = return empty +blockToAsciiDoc _ (RawBlock f s) + | f == "asciidoc" = return $ text s + | otherwise = return empty blockToAsciiDoc _ HorizontalRule = return $ blankline <> text "'''''" <> blankline blockToAsciiDoc opts (Header level (ident,_,_) inlines) = do @@ -347,7 +349,9 @@ inlineToAsciiDoc _ (Math InlineMath str) = return $ "latexmath:[$" <> text str <> "$]" inlineToAsciiDoc _ (Math DisplayMath str) = return $ "latexmath:[\\[" <> text str <> "\\]]" -inlineToAsciiDoc _ (RawInline _ _) = return empty +inlineToAsciiDoc _ (RawInline f s) + | f == "asciidoc" = return $ text s + | otherwise = return empty inlineToAsciiDoc _ (LineBreak) = return $ " +" <> cr inlineToAsciiDoc _ Space = return space inlineToAsciiDoc opts (Cite _ lst) = inlineListToAsciiDoc opts lst diff --git a/src/Text/Pandoc/Writers/Custom.hs b/src/Text/Pandoc/Writers/Custom.hs index c250a240e..0234e1e35 100644 --- a/src/Text/Pandoc/Writers/Custom.hs +++ b/src/Text/Pandoc/Writers/Custom.hs @@ -33,6 +33,7 @@ module Text.Pandoc.Writers.Custom ( writeCustom ) where import Text.Pandoc.Definition import Text.Pandoc.Options import Data.List ( intersperse ) +import Data.Char ( toLower ) import Scripting.Lua (LuaState, StackValue, callfunc) import qualified Scripting.Lua as Lua import Text.Pandoc.UTF8 (fromString, toString) @@ -78,6 +79,11 @@ instance StackValue a => StackValue [a] where return (Just lst) valuetype _ = Lua.TTABLE +instance StackValue Format where + push lua (Format f) = Lua.push lua (map toLower f) + peek l n = fmap Format `fmap` Lua.peek l n + valuetype _ = Lua.TSTRING + instance (StackValue a, StackValue b) => StackValue (M.Map a b) where push lua m = do let xs = M.toList m diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs index 2f415f3ee..3d150d19b 100644 --- a/src/Text/Pandoc/Writers/Docbook.hs +++ b/src/Text/Pandoc/Writers/Docbook.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE OverloadedStrings #-} {- Copyright (C) 2006-2010 John MacFarlane @@ -199,10 +200,10 @@ blockToDocbook opts (OrderedList (start, numstyle, _) (first:rest)) = in inTags True "orderedlist" attribs items blockToDocbook opts (DefinitionList lst) = inTagsIndented "variablelist" $ deflistItemsToDocbook opts lst -blockToDocbook _ (RawBlock "docbook" str) = text str -- raw XML block --- we allow html for compatibility with earlier versions of pandoc -blockToDocbook _ (RawBlock "html" str) = text str -- raw XML block -blockToDocbook _ (RawBlock _ _) = empty +blockToDocbook _ (RawBlock f str) + | f == "docbook" = text str -- raw XML block + | f == "html" = text str -- allow html for backwards compatibility + | otherwise = empty blockToDocbook _ HorizontalRule = empty -- not semantic blockToDocbook opts (Table caption aligns widths headers rows) = let captionDoc = if null caption diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index d93254971..2483e243f 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -460,8 +460,8 @@ blockToOpenXML opts (Para lst) = do contents <- inlinesToOpenXML opts lst return [mknode "w:p" [] (paraProps ++ contents)] blockToOpenXML _ (RawBlock format str) - | format == "openxml" = return [ x | Elem x <- parseXML str ] - | otherwise = return [] + | format == Format "openxml" = return [ x | Elem x <- parseXML str ] + | otherwise = return [] blockToOpenXML opts (BlockQuote blocks) = withParaProp (pStyle "BlockQuote") $ blocksToOpenXML opts blocks blockToOpenXML opts (CodeBlock attrs str) = @@ -653,8 +653,8 @@ inlineToOpenXML opts (Strikeout lst) = $ inlinesToOpenXML opts lst inlineToOpenXML _ LineBreak = return [br] inlineToOpenXML _ (RawInline f str) - | f == "openxml" = return [ x | Elem x <- parseXML str ] - | otherwise = return [] + | f == Format "openxml" = return [ x | Elem x <- parseXML str ] + | otherwise = return [] inlineToOpenXML opts (Quoted quoteType lst) = inlinesToOpenXML opts $ [Str open] ++ lst ++ [Str close] where (open, close) = case quoteType of @@ -688,7 +688,7 @@ inlineToOpenXML opts (Note bs) = do let notemarker = mknode "w:r" [] [ mknode "w:rPr" [] (rStyle "FootnoteRef") , mknode "w:footnoteRef" [] () ] - let notemarkerXml = RawInline "openxml" $ ppElement notemarker + let notemarkerXml = RawInline (Format "openxml") $ ppElement notemarker let insertNoteRef (Plain ils : xs) = Plain (notemarkerXml : ils) : xs insertNoteRef (Para ils : xs) = Para (notemarkerXml : ils) : xs insertNoteRef xs = Para [notemarkerXml] : xs diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index fb756f196..ab14ff8a0 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -103,7 +103,7 @@ writeEPUB opts doc@(Pandoc meta _) = do Just img -> do let coverImage = "cover-image" ++ takeExtension img let cpContent = renderHtml $ writeHtml opts' - (Pandoc meta [RawBlock "html" $ "
\n\"cover\n
"]) + (Pandoc meta [RawBlock (Format "html") $ "
\n\"cover\n
"]) imgContent <- B.readFile img return ( [mkEntry "cover.xhtml" cpContent] , [mkEntry coverImage imgContent] ) @@ -422,7 +422,7 @@ transformInline opts sourceDir picsRef (Image lab (src,tit)) | isAbsoluteURI src = do raw <- makeSelfContained Nothing $ writeHtmlInline opts (Image lab (src,tit)) - return $ RawInline "html" raw + return $ RawInline (Format "html") raw | otherwise = do let src' = unEscapeString src pics <- readIORef picsRef @@ -438,7 +438,7 @@ transformInline opts sourceDir picsRef (Image lab (src,tit)) transformInline opts _ _ (x@(Math _ _)) | WebTeX _ <- writerHTMLMathMethod opts = do raw <- makeSelfContained Nothing $ writeHtmlInline opts x - return $ RawInline "html" raw + return $ RawInline (Format "html") raw transformInline _ _ _ x = return x writeHtmlInline :: WriterOptions -> Inline -> String diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index 560c26c76..25079574e 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -410,8 +410,9 @@ blockToHtml opts (Para lst) = do blockToHtml opts (Div attr bs) = do contents <- blockListToHtml opts bs return $ addAttrs opts attr $ H.div $ nl opts >> contents >> nl opts -blockToHtml _ (RawBlock "html" str) = return $ preEscapedString str -blockToHtml _ (RawBlock _ _) = return mempty +blockToHtml _ (RawBlock f str) + | f == Format "html" = return $ preEscapedString str + | otherwise = return mempty blockToHtml opts (HorizontalRule) = return $ if writerHtml5 opts then H5.hr else H.hr blockToHtml opts (CodeBlock (id',classes,keyvals) rawCode) = do let tolhs = isEnabled Ext_literate_haskell opts && @@ -678,12 +679,14 @@ inlineToHtml opts inline = return $ case t of InlineMath -> m DisplayMath -> brtag >> m >> brtag ) - (RawInline "latex" str) -> case writerHTMLMathMethod opts of + (RawInline f str) + | f == Format "latex" -> + case writerHTMLMathMethod opts of LaTeXMathML _ -> do modify (\st -> st {stMath = True}) return $ toHtml str _ -> return mempty - (RawInline "html" str) -> return $ preEscapedString str - (RawInline _ _) -> return mempty + | f == Format "html" -> return $ preEscapedString str + | otherwise -> return mempty (Link [Str str] (s,_)) | "mailto:" `isPrefixOf` s && s == escapeURI ("mailto" ++ str) -> -- autolink diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index 37de03e0f..d09ccc3b8 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -356,8 +356,10 @@ blockToLaTeX (CodeBlock (_,classes,keyvalAttr) str) = do Nothing -> rawCodeBlock Just h -> modify (\st -> st{ stHighlighting = True }) >> return (flush $ text h) -blockToLaTeX (RawBlock "latex" x) = return $ text x -blockToLaTeX (RawBlock _ _) = return empty +blockToLaTeX (RawBlock f x) + | f == Format "latex" || f == Format "tex" + = return $ text x + | otherwise = return empty blockToLaTeX (BulletList []) = return empty -- otherwise latex error blockToLaTeX (BulletList lst) = do incremental <- gets stIncremental @@ -630,9 +632,10 @@ inlineToLaTeX (Math InlineMath str) = return $ char '$' <> text str <> char '$' inlineToLaTeX (Math DisplayMath str) = return $ "\\[" <> text str <> "\\]" -inlineToLaTeX (RawInline "latex" str) = return $ text str -inlineToLaTeX (RawInline "tex" str) = return $ text str -inlineToLaTeX (RawInline _ _) = return empty +inlineToLaTeX (RawInline f str) + | f == Format "latex" || f == Format "tex" + = return $ text str + | otherwise = return empty inlineToLaTeX (LineBreak) = return "\\\\" inlineToLaTeX Space = return space inlineToLaTeX (Link txt ('#':ident, _)) = do diff --git a/src/Text/Pandoc/Writers/Man.hs b/src/Text/Pandoc/Writers/Man.hs index ed66c7c2b..642a002d6 100644 --- a/src/Text/Pandoc/Writers/Man.hs +++ b/src/Text/Pandoc/Writers/Man.hs @@ -167,8 +167,9 @@ blockToMan opts (Para inlines) = do contents <- liftM vcat $ mapM (inlineListToMan opts) $ splitSentences inlines return $ text ".PP" $$ contents -blockToMan _ (RawBlock "man" str) = return $ text str -blockToMan _ (RawBlock _ _) = return empty +blockToMan _ (RawBlock f str) + | f == Format "man" = return $ text str + | otherwise = return empty blockToMan _ HorizontalRule = return $ text ".PP" $$ text " * * * * *" blockToMan opts (Header level _ inlines) = do contents <- inlineListToMan opts inlines @@ -333,8 +334,9 @@ inlineToMan opts (Math InlineMath str) = inlineListToMan opts $ readTeXMath str inlineToMan opts (Math DisplayMath str) = do contents <- inlineListToMan opts $ readTeXMath str return $ cr <> text ".RS" $$ contents $$ text ".RE" -inlineToMan _ (RawInline "man" str) = return $ text str -inlineToMan _ (RawInline _ _) = return empty +inlineToMan _ (RawInline f str) + | f == Format "man" = return $ text str + | otherwise = return empty inlineToMan _ (LineBreak) = return $ cr <> text ".PD 0" $$ text ".P" $$ text ".PD" <> cr inlineToMan _ Space = return space diff --git a/src/Text/Pandoc/Writers/MediaWiki.hs b/src/Text/Pandoc/Writers/MediaWiki.hs index fccf25753..4ffba1100 100644 --- a/src/Text/Pandoc/Writers/MediaWiki.hs +++ b/src/Text/Pandoc/Writers/MediaWiki.hs @@ -107,9 +107,10 @@ blockToMediaWiki opts (Para inlines) = do then "

" ++ contents ++ "

" else contents ++ if null listLevel then "\n" else "" -blockToMediaWiki _ (RawBlock "mediawiki" str) = return str -blockToMediaWiki _ (RawBlock "html" str) = return str -blockToMediaWiki _ (RawBlock _ _) = return "" +blockToMediaWiki _ (RawBlock f str) + | f == Format "mediawiki" = return str + | f == Format "html" = return str + | otherwise = return "" blockToMediaWiki _ HorizontalRule = return "\n-----\n" @@ -374,9 +375,10 @@ inlineToMediaWiki _ (Str str) = return $ escapeString str inlineToMediaWiki _ (Math _ str) = return $ "" ++ str ++ "" -- note: str should NOT be escaped -inlineToMediaWiki _ (RawInline "mediawiki" str) = return str -inlineToMediaWiki _ (RawInline "html" str) = return str -inlineToMediaWiki _ (RawInline _ _) = return "" +inlineToMediaWiki _ (RawInline f str) + | f == Format "mediawiki" = return str + | f == Format "html" = return str + | otherwise = return "" inlineToMediaWiki _ (LineBreak) = return "
" diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs index d76d0f6ad..05c576c20 100644 --- a/src/Text/Pandoc/Writers/OpenDocument.hs +++ b/src/Text/Pandoc/Writers/OpenDocument.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE PatternGuards #-} +{-# LANGUAGE PatternGuards, OverloadedStrings #-} {- Copyright (C) 2008-2010 Andrea Rossato and John MacFarlane. @@ -296,7 +296,9 @@ blockToOpenDocument o bs | Table c a w h r <- bs = setFirstPara >> table c a w h r | HorizontalRule <- bs = setFirstPara >> return (selfClosingTag "text:p" [ ("text:style-name", "Horizontal_20_Line") ]) - | RawBlock _ _ <- bs = return empty + | RawBlock f s <- bs = if f == "opendocument" + then preformatted s + else return empty | Null <- bs = return empty | otherwise = return empty where @@ -374,9 +376,9 @@ inlineToOpenDocument o ils | Code _ s <- ils = preformatted s | Math _ s <- ils = inlinesToOpenDocument o (readTeXMath s) | Cite _ l <- ils = inlinesToOpenDocument o l - | RawInline "opendocument" s <- ils = preformatted s - | RawInline "html" s <- ils = preformatted s -- for backwards compat. - | RawInline _ _ <- ils = return empty + | RawInline f s <- ils = if f == "opendocument" || f == "html" + then preformatted s + else return empty | Link l (s,t) <- ils = mkLink s t <$> inlinesToOpenDocument o l | Image _ (s,t) <- ils = return $ mkImg s t | Note l <- ils = mkNote l diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs index 4d8daa15b..5fbbb6afc 100644 --- a/src/Text/Pandoc/Writers/RST.hs +++ b/src/Text/Pandoc/Writers/RST.hs @@ -42,7 +42,7 @@ import Network.URI (isAbsoluteURI) import Text.Pandoc.Pretty import Control.Monad.State import Control.Applicative ( (<$>) ) -import Data.Char (isSpace) +import Data.Char (isSpace, toLower) type Refs = [([Inline], Target)] @@ -176,9 +176,11 @@ blockToRST (Para inlines) | otherwise = do contents <- inlineListToRST inlines return $ contents <> blankline -blockToRST (RawBlock f str) = - return $ blankline <> ".. raw:: " <> text f $+$ - (nest 3 $ text str) $$ blankline +blockToRST (RawBlock f str) + | f == "rst" = return $ text str + | otherwise = return $ blankline <> ".. raw:: " <> + text (map toLower $ unFormat f) $+$ + (nest 3 $ text str) $$ blankline blockToRST HorizontalRule = return $ blankline $$ "--------------" $$ blankline blockToRST (Header level _ inlines) = do @@ -374,8 +376,9 @@ inlineToRST (Math t str) = do then blankline $$ ".. math::" $$ blankline $$ nest 3 (text str) $$ blankline else blankline $$ (".. math:: " <> text str) $$ blankline -inlineToRST (RawInline "rst" x) = return $ text x -inlineToRST (RawInline _ _) = return empty +inlineToRST (RawInline f x) + | f == "rst" = return $ text x + | otherwise = return empty inlineToRST (LineBreak) = return cr -- there's no line break in RST (see Para) inlineToRST Space = return space -- autolink diff --git a/src/Text/Pandoc/Writers/RTF.hs b/src/Text/Pandoc/Writers/RTF.hs index 7e5d33c50..6d2b1229d 100644 --- a/src/Text/Pandoc/Writers/RTF.hs +++ b/src/Text/Pandoc/Writers/RTF.hs @@ -62,7 +62,7 @@ rtfEmbedImage x@(Image _ (src,_)) = do let raw = "{\\pict" ++ filetype ++ " " ++ concat bytes ++ "}" return $ if B.null imgdata then x - else RawInline "rtf" raw + else RawInline (Format "rtf") raw else return x rtfEmbedImage x = return x @@ -218,8 +218,9 @@ blockToRTF indent alignment (BlockQuote lst) = concatMap (blockToRTF (indent + indentIncrement) alignment) lst blockToRTF indent _ (CodeBlock _ str) = rtfPar indent 0 AlignLeft ("\\f1 " ++ (codeStringToRTF str)) -blockToRTF _ _ (RawBlock "rtf" str) = str -blockToRTF _ _ (RawBlock _ _) = "" +blockToRTF _ _ (RawBlock f str) + | f == Format "rtf" = str + | otherwise = "" blockToRTF indent alignment (BulletList lst) = spaceAtEnd $ concatMap (listItemToRTF alignment indent (bulletMarker indent)) lst blockToRTF indent alignment (OrderedList attribs lst) = spaceAtEnd $ concat $ @@ -325,8 +326,9 @@ inlineToRTF (Code _ str) = "{\\f1 " ++ (codeStringToRTF str) ++ "}" inlineToRTF (Str str) = stringToRTF str inlineToRTF (Math _ str) = inlineListToRTF $ readTeXMath str inlineToRTF (Cite _ lst) = inlineListToRTF lst -inlineToRTF (RawInline "rtf" str) = str -inlineToRTF (RawInline _ _) = "" +inlineToRTF (RawInline f str) + | f == Format "rtf" = str + | otherwise = "" inlineToRTF (LineBreak) = "\\line " inlineToRTF Space = " " inlineToRTF (Link text (src, _)) = diff --git a/src/Text/Pandoc/Writers/Texinfo.hs b/src/Text/Pandoc/Writers/Texinfo.hs index f8b460001..b1fd3d6af 100644 --- a/src/Text/Pandoc/Writers/Texinfo.hs +++ b/src/Text/Pandoc/Writers/Texinfo.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE OverloadedStrings #-} {- Copyright (C) 2008-2010 John MacFarlane and Peter Wang @@ -152,10 +153,11 @@ blockToTexinfo (CodeBlock _ str) = do flush (text str) $$ text "@end verbatim" <> blankline -blockToTexinfo (RawBlock "texinfo" str) = return $ text str -blockToTexinfo (RawBlock "latex" str) = - return $ text "@tex" $$ text str $$ text "@end tex" -blockToTexinfo (RawBlock _ _) = return empty +blockToTexinfo (RawBlock f str) + | f == "texinfo" = return $ text str + | f == "latex" || f == "tex" = + return $ text "@tex" $$ text str $$ text "@end tex" + | otherwise = return empty blockToTexinfo (BulletList lst) = do items <- mapM listItemToTexinfo lst @@ -418,10 +420,11 @@ inlineToTexinfo (Cite _ lst) = inlineListToTexinfo lst inlineToTexinfo (Str str) = return $ text (stringToTexinfo str) inlineToTexinfo (Math _ str) = return $ inCmd "math" $ text str -inlineToTexinfo (RawInline f str) | f == "latex" || f == "tex" = - return $ text "@tex" $$ text str $$ text "@end tex" -inlineToTexinfo (RawInline "texinfo" str) = return $ text str -inlineToTexinfo (RawInline _ _) = return empty +inlineToTexinfo (RawInline f str) + | f == "latex" || f == "tex" = + return $ text "@tex" $$ text str $$ text "@end tex" + | f == "texinfo" = return $ text str + | otherwise = return empty inlineToTexinfo (LineBreak) = return $ text "@*" inlineToTexinfo Space = return $ char ' ' diff --git a/src/Text/Pandoc/Writers/Textile.hs b/src/Text/Pandoc/Writers/Textile.hs index 3fb554dca..27e8b60ec 100644 --- a/src/Text/Pandoc/Writers/Textile.hs +++ b/src/Text/Pandoc/Writers/Textile.hs @@ -121,10 +121,9 @@ blockToTextile opts (Para inlines) = do then "

" ++ contents ++ "

" else contents ++ if null listLevel then "\n" else "" -blockToTextile _ (RawBlock f str) = - if f == "html" || f == "textile" - then return str - else return "" +blockToTextile _ (RawBlock f str) + | f == Format "html" || f == Format "textile" = return str + | otherwise = return "" blockToTextile _ HorizontalRule = return "
\n" @@ -401,10 +400,9 @@ inlineToTextile _ (Str str) = return $ escapeStringForTextile str inlineToTextile _ (Math _ str) = return $ "" ++ escapeStringForXML str ++ "" -inlineToTextile _ (RawInline f str) = - if f == "html" || f == "textile" - then return str - else return "" +inlineToTextile _ (RawInline f str) + | f == Format "html" || f == Format "textile" = return str + | otherwise = return "" inlineToTextile _ (LineBreak) = return "\n" diff --git a/tests/Tests/Arbitrary.hs b/tests/Tests/Arbitrary.hs index 5939d088d..31c0cb46a 100644 --- a/tests/Tests/Arbitrary.hs +++ b/tests/Tests/Arbitrary.hs @@ -41,8 +41,8 @@ arbInline :: Int -> Gen Inline arbInline n = frequency $ [ (60, liftM Str realString) , (60, return Space) , (10, liftM2 Code arbAttr realString) - , (5, elements [ RawInline "html" "" - , RawInline "latex" "\\my{command}" ]) + , (5, elements [ RawInline (Format "html") "" + , RawInline (Format "latex") "\\my{command}" ]) ] ++ [ x | x <- nesters, n > 1] where nesters = [ (10, liftM Emph $ arbInlines (n-1)) , (10, liftM Strong $ arbInlines (n-1)) @@ -74,9 +74,9 @@ arbBlock :: Int -> Gen Block arbBlock n = frequency $ [ (10, liftM Plain $ arbInlines (n-1)) , (15, liftM Para $ arbInlines (n-1)) , (5, liftM2 CodeBlock arbAttr realString) - , (2, elements [ RawBlock "html" + , (2, elements [ RawBlock (Format "html") "
\n*&*\n
" - , RawBlock "latex" + , RawBlock (Format "latex") "\\begin[opt]{env}\nhi\n{\\end{env}" ]) , (5, do x1 <- choose (1 :: Int, 6) -- cgit v1.2.3 From 210d7b7bce32d6160fbfd558ddab921f8c3506cb Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 10 Aug 2013 17:53:11 -0700 Subject: Updated tests for new Format. --- tests/latex-reader.native | 6 +++--- tests/markdown-reader-more.native | 10 +++++----- tests/mediawiki-reader.native | 20 ++++++++++---------- tests/rst-reader.native | 6 +++--- tests/testsuite.native | 36 ++++++++++++++++++------------------ tests/textile-reader.native | 16 ++++++++-------- tests/writer.native | 36 ++++++++++++++++++------------------ 7 files changed, 65 insertions(+), 65 deletions(-) diff --git a/tests/latex-reader.native b/tests/latex-reader.native index d19196345..504e8b701 100644 --- a/tests/latex-reader.native +++ b/tests/latex-reader.native @@ -1,5 +1,5 @@ Pandoc (Meta {unMeta = fromList [("authors",MetaList [MetaInlines [Str "John",Space,Str "MacFarlane"],MetaInlines [Str "Anonymous"]]),("date",MetaInlines [Str "July",Space,Str "17,",Space,Str "2006"]),("title",MetaInlines [Str "Pandoc",Space,Str "Test",Space,Str "Suite"])]}) -[RawBlock "latex" "\\maketitle" +[RawBlock (Format {unFormat = "latex"}) "\\maketitle" ,Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "set",Space,Str "of",Space,Str "tests",Space,Str "for",Space,Str "pandoc.",Space,Str "Most",Space,Str "of",Space,Str "them",Space,Str "are",Space,Str "adapted",Space,Str "from",Space,Str "John",Space,Str "Gruber\8217s",Space,Str "markdown",Space,Str "test",Space,Str "suite."] ,HorizontalRule ,Header 1 ("",[],[]) [Str "Headers"] @@ -260,8 +260,8 @@ Pandoc (Meta {unMeta = fromList [("authors",MetaList [MetaInlines [Str "John",Sp ,HorizontalRule ,Header 1 ("",[],[]) [Str "LaTeX"] ,BulletList - [[Para [Cite [Citation {citationId = "smith.1899", citationPrefix = [], citationSuffix = [Str "22-23"], citationMode = AuthorInText, citationNoteNum = 0, citationHash = 0}] [RawInline "latex" "\\cite[22-23]{smith.1899}"]]] - ,[Para [RawInline "latex" "\\doublespacing"]] + [[Para [Cite [Citation {citationId = "smith.1899", citationPrefix = [], citationSuffix = [Str "22-23"], citationMode = AuthorInText, citationNoteNum = 0, citationHash = 0}] [RawInline (Format {unFormat = "latex"}) "\\cite[22-23]{smith.1899}"]]] + ,[Para [RawInline (Format {unFormat = "latex"}) "\\doublespacing"]] ,[Para [Math InlineMath "2+2=4"]] ,[Para [Math InlineMath "x \\in y"]] ,[Para [Math InlineMath "\\alpha \\wedge \\omega"]] diff --git a/tests/markdown-reader-more.native b/tests/markdown-reader-more.native index 2e8dc9dde..c88c0ed67 100644 --- a/tests/markdown-reader-more.native +++ b/tests/markdown-reader-more.native @@ -2,9 +2,9 @@ ,Header 2 ("blank-line-before-url-in-link-reference",[],[]) [Str "Blank",Space,Str "line",Space,Str "before",Space,Str "URL",Space,Str "in",Space,Str "link",Space,Str "reference"] ,Para [Link [Str "foo"] ("/url",""),Space,Str "and",Space,Link [Str "bar"] ("/url","title")] ,Header 2 ("raw-context-environments",[],[]) [Str "Raw",Space,Str "ConTeXt",Space,Str "environments"] -,Plain [RawInline "tex" "\\placeformula "] -,RawBlock "context" "\\startformula\n L_{1} = L_{2}\n \\stopformula" -,RawBlock "context" "\\start[a2]\n\\start[a2]\n\\stop[a2]\n\\stop[a2]" +,Plain [RawInline (Format {unFormat = "tex"}) "\\placeformula "] +,RawBlock (Format {unFormat = "context"}) "\\startformula\n L_{1} = L_{2}\n \\stopformula" +,RawBlock (Format {unFormat = "context"}) "\\start[a2]\n\\start[a2]\n\\stop[a2]\n\\stop[a2]" ,Header 2 ("urls-with-spaces",[],[]) [Str "URLs",Space,Str "with",Space,Str "spaces"] ,Para [Link [Str "foo"] ("/bar%20and%20baz",""),Space,Link [Str "foo"] ("/bar%20and%20baz",""),Space,Link [Str "foo"] ("/bar%20and%20baz",""),Space,Link [Str "foo"] ("bar%20baz","title")] ,Para [Link [Str "baz"] ("/foo%20foo",""),Space,Link [Str "bam"] ("/foo%20fee",""),Space,Link [Str "bork"] ("/foo/zee%20zob","title")] @@ -12,13 +12,13 @@ ,HorizontalRule ,HorizontalRule ,Header 2 ("raw-html-before-header",[],[]) [Str "Raw",Space,Str "HTML",Space,Str "before",Space,Str "header"] -,Para [RawInline "html" "
",RawInline "html" ""] +,Para [RawInline (Format {unFormat = "html"}) "",RawInline (Format {unFormat = "html"}) ""] ,Header 3 ("my-header",[],[]) [Str "my",Space,Str "header"] ,Header 2 ("in-math",[],[]) [Str "$",Space,Str "in",Space,Str "math"] ,Para [Math InlineMath "\\$2 + \\$3"] ,Header 2 ("commented-out-list-item",[],[]) [Str "Commented-out",Space,Str "list",Space,Str "item"] ,BulletList - [[Plain [Str "one",Space,RawInline "html" ""]] + [[Plain [Str "one",Space,RawInline (Format {unFormat = "html"}) ""]] ,[Plain [Str "three"]]] ,Header 2 ("backslash-newline",[],[]) [Str "Backslash",Space,Str "newline"] ,Para [Str "hi",LineBreak,Str "there"] diff --git a/tests/mediawiki-reader.native b/tests/mediawiki-reader.native index f16518b65..f6e09e45a 100644 --- a/tests/mediawiki-reader.native +++ b/tests/mediawiki-reader.native @@ -51,11 +51,11 @@ Pandoc (Meta {unMeta = fromList []}) ,Para [Str "bud"] ,Para [Str "another"] ,Header 2 ("",[],[]) [Str "raw",Space,Str "html"] -,Para [Str "hi",Space,RawInline "html" "",Emph [Str "there"],RawInline "html" "",Str "."] -,Para [RawInline "html" "",Str "inserted",RawInline "html" ""] -,RawBlock "html" "
" +,Para [Str "hi",Space,RawInline (Format {unFormat = "html"}) "",Emph [Str "there"],RawInline (Format {unFormat = "html"}) "",Str "."] +,Para [RawInline (Format {unFormat = "html"}) "",Str "inserted",RawInline (Format {unFormat = "html"}) ""] +,RawBlock (Format {unFormat = "html"}) "
" ,Para [Str "hi",Space,Emph [Str "there"]] -,RawBlock "html" "
" +,RawBlock (Format {unFormat = "html"}) "
" ,Header 2 ("",[],[]) [Str "sup,",Space,Str "sub,",Space,Str "del"] ,Para [Str "H",Subscript [Str "2"],Str "O",Space,Str "base",Superscript [Emph [Str "exponent"]],Space,Strikeout [Str "hello"]] ,Header 2 ("",[],[]) [Str "inline",Space,Str "code"] @@ -140,7 +140,7 @@ Pandoc (Meta {unMeta = fromList []}) ,[Plain [Str "this",Space,Str "looks",Space,Str "like",Space,Str "a",Space,Str "continuation"]] ,[Plain [Str "and",Space,Str "is",Space,Str "often",Space,Str "used"]] ,[Plain [Str "instead",LineBreak,Str "of",Space,Str "
"]]])]] - ,[Plain [RawInline "mediawiki" "{{{template\n|author=John\n|title=My Book\n}}}"] + ,[Plain [RawInline (Format {unFormat = "mediawiki"}) "{{{template\n|author=John\n|title=My Book\n}}}"] ,OrderedList (1,DefaultStyle,DefaultDelim) [[Plain [Str "five",Space,Str "sub",Space,Str "1"] ,OrderedList (1,DefaultStyle,DefaultDelim) @@ -168,16 +168,16 @@ Pandoc (Meta {unMeta = fromList []}) ,Para [Code ("",[],[]) "\160hell\160\160\160\160\160\160yeah"] ,Para [Code ("",[],[]) "Start\160with\160a\160space\160in\160the\160first\160column,",LineBreak,Code ("",[],[]) "(before\160the\160).",LineBreak,Code ("",[],[]) "",LineBreak,Code ("",[],[]) "Then\160your\160block\160format\160will\160be",LineBreak,Code ("",[],[]) "\160\160\160\160maintained.",LineBreak,Code ("",[],[]) "",LineBreak,Code ("",[],[]) "This\160is\160good\160for\160copying\160in\160code\160blocks:",LineBreak,Code ("",[],[]) "",LineBreak,Code ("",[],[]) "def\160function():",LineBreak,Code ("",[],[]) "\160\160\160\160\"\"\"documentation\160string\"\"\"",LineBreak,Code ("",[],[]) "",LineBreak,Code ("",[],[]) "\160\160\160\160if\160True:",LineBreak,Code ("",[],[]) "\160\160\160\160\160\160\160\160print\160True",LineBreak,Code ("",[],[]) "\160\160\160\160else:",LineBreak,Code ("",[],[]) "\160\160\160\160\160\160\160\160print\160False"] ,Para [Str "Not"] -,RawBlock "html" "
" +,RawBlock (Format {unFormat = "html"}) "
" ,Para [Str "preformatted"] ,Para [Str "Don't",Space,Str "need"] ,Para [Code ("",[],[]) "a\160blank\160line"] ,Para [Str "around",Space,Str "a",Space,Str "preformatted",Space,Str "block."] ,Header 2 ("",[],[]) [Str "templates"] -,RawBlock "mediawiki" "{{Welcome}}" -,RawBlock "mediawiki" "{{Foo:Bar}}" -,RawBlock "mediawiki" "{{Thankyou|all your effort|Me}}" -,Para [Str "Written",Space,RawInline "mediawiki" "{{{date}}}",Space,Str "by",Space,RawInline "mediawiki" "{{{name}}}",Str "."] +,RawBlock (Format {unFormat = "mediawiki"}) "{{Welcome}}" +,RawBlock (Format {unFormat = "mediawiki"}) "{{Foo:Bar}}" +,RawBlock (Format {unFormat = "mediawiki"}) "{{Thankyou|all your effort|Me}}" +,Para [Str "Written",Space,RawInline (Format {unFormat = "mediawiki"}) "{{{date}}}",Space,Str "by",Space,RawInline (Format {unFormat = "mediawiki"}) "{{{name}}}",Str "."] ,Header 2 ("",[],[]) [Str "tables"] ,Table [] [AlignDefault,AlignDefault] [0.0,0.0] [[] diff --git a/tests/rst-reader.native b/tests/rst-reader.native index 49677d958..69e73ae40 100644 --- a/tests/rst-reader.native +++ b/tests/rst-reader.native @@ -172,11 +172,11 @@ Pandoc (Meta {unMeta = fromList [("authors",MetaList [MetaInlines [Str "John",Sp [[Para [Str "123-4567"]]])] ,Header 1 ("",[],[]) [Str "HTML",Space,Str "Blocks"] ,Para [Str "Simple",Space,Str "block",Space,Str "on",Space,Str "one",Space,Str "line:"] -,RawBlock "html" "
foo
" +,RawBlock (Format {unFormat = "html"}) "
foo
" ,Para [Str "Now,",Space,Str "nested:"] -,RawBlock "html" "
\n
\n
\n foo\n
\n
\n
" +,RawBlock (Format {unFormat = "html"}) "
\n
\n
\n foo\n
\n
\n
" ,Header 1 ("",[],[]) [Str "LaTeX",Space,Str "Block"] -,RawBlock "latex" "\\begin{tabular}{|l|l|}\\hline\nAnimal & Number \\\\ \\hline\nDog & 2 \\\\\nCat & 1 \\\\ \\hline\n\\end{tabular}" +,RawBlock (Format {unFormat = "latex"}) "\\begin{tabular}{|l|l|}\\hline\nAnimal & Number \\\\ \\hline\nDog & 2 \\\\\nCat & 1 \\\\ \\hline\n\\end{tabular}" ,Header 1 ("",[],[]) [Str "Inline",Space,Str "Markup"] ,Para [Str "This",Space,Str "is",Space,Emph [Str "emphasized"],Str ".",Space,Str "This",Space,Str "is",Space,Strong [Str "strong"],Str "."] ,Para [Str "This",Space,Str "is",Space,Str "code:",Space,Code ("",[],[]) ">",Str ",",Space,Code ("",[],[]) "$",Str ",",Space,Code ("",[],[]) "\\",Str ",",Space,Code ("",[],[]) "\\$",Str ",",Space,Code ("",[],[]) "",Str "."] diff --git a/tests/testsuite.native b/tests/testsuite.native index d7f5f8864..f9cf606f3 100644 --- a/tests/testsuite.native +++ b/tests/testsuite.native @@ -228,45 +228,45 @@ Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "John",Spa ,[Plain [Str "sublist"]]]]])] ,Header 1 ("html-blocks",[],[]) [Str "HTML",Space,Str "Blocks"] ,Para [Str "Simple",Space,Str "block",Space,Str "on",Space,Str "one",Space,Str "line:"] -,RawBlock "html" "
" +,RawBlock (Format {unFormat = "html"}) "
" ,Plain [Str "foo"] -,RawBlock "html" "
\n" +,RawBlock (Format {unFormat = "html"}) "
\n" ,Para [Str "And",Space,Str "nested",Space,Str "without",Space,Str "indentation:"] -,RawBlock "html" "
\n
\n
" +,RawBlock (Format {unFormat = "html"}) "
\n
\n
" ,Plain [Str "foo"] -,RawBlock "html" "
\n
\n
" +,RawBlock (Format {unFormat = "html"}) "
\n
\n
" ,Plain [Str "bar"] -,RawBlock "html" "
\n
\n" +,RawBlock (Format {unFormat = "html"}) "
\n
\n" ,Para [Str "Interpreted",Space,Str "markdown",Space,Str "in",Space,Str "a",Space,Str "table:"] -,RawBlock "html" "\n\n\n\n
" +,RawBlock (Format {unFormat = "html"}) "\n\n\n\n\n\n
" ,Plain [Str "This",Space,Str "is",Space,Emph [Str "emphasized"]] -,RawBlock "html" "" +,RawBlock (Format {unFormat = "html"}) "" ,Plain [Str "And",Space,Str "this",Space,Str "is",Space,Strong [Str "strong"]] -,RawBlock "html" "
\n\n\n" +,RawBlock (Format {unFormat = "html"}) "
\n\n\n" ,Para [Str "Here\8217s",Space,Str "a",Space,Str "simple",Space,Str "block:"] -,RawBlock "html" "
\n " +,RawBlock (Format {unFormat = "html"}) "
\n " ,Plain [Str "foo"] -,RawBlock "html" "
\n" +,RawBlock (Format {unFormat = "html"}) "
\n" ,Para [Str "This",Space,Str "should",Space,Str "be",Space,Str "a",Space,Str "code",Space,Str "block,",Space,Str "though:"] ,CodeBlock ("",[],[]) "
\n foo\n
" ,Para [Str "As",Space,Str "should",Space,Str "this:"] ,CodeBlock ("",[],[]) "
foo
" ,Para [Str "Now,",Space,Str "nested:"] -,RawBlock "html" "
\n
\n
\n " +,RawBlock (Format {unFormat = "html"}) "
\n
\n
\n " ,Plain [Str "foo"] -,RawBlock "html" "
\n
\n
\n" +,RawBlock (Format {unFormat = "html"}) "
\n
\n
\n" ,Para [Str "This",Space,Str "should",Space,Str "just",Space,Str "be",Space,Str "an",Space,Str "HTML",Space,Str "comment:"] -,RawBlock "html" "\n" +,RawBlock (Format {unFormat = "html"}) "\n" ,Para [Str "Multiline:"] -,RawBlock "html" "\n\n\n" +,RawBlock (Format {unFormat = "html"}) "\n\n\n" ,Para [Str "Code",Space,Str "block:"] ,CodeBlock ("",[],[]) "" ,Para [Str "Just",Space,Str "plain",Space,Str "comment,",Space,Str "with",Space,Str "trailing",Space,Str "spaces",Space,Str "on",Space,Str "the",Space,Str "line:"] -,RawBlock "html" " \n" +,RawBlock (Format {unFormat = "html"}) " \n" ,Para [Str "Code:"] ,CodeBlock ("",[],[]) "
" ,Para [Str "Hr\8217s:"] -,RawBlock "html" "
\n\n
\n\n
\n\n
\n\n
\n\n
\n\n
\n\n
\n\n
\n" +,RawBlock (Format {unFormat = "html"}) "
\n\n
\n\n
\n\n
\n\n
\n\n
\n\n
\n\n
\n\n
\n" ,HorizontalRule ,Header 1 ("inline-markup",[],[]) [Str "Inline",Space,Str "Markup"] ,Para [Str "This",Space,Str "is",Space,Emph [Str "emphasized"],Str ",",Space,Str "and",Space,Str "so",Space,Emph [Str "is",Space,Str "this"],Str "."] @@ -294,7 +294,7 @@ Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "John",Spa ,HorizontalRule ,Header 1 ("latex",[],[]) [Str "LaTeX"] ,BulletList - [[Plain [RawInline "tex" "\\cite[22-23]{smith.1899}"]] + [[Plain [RawInline (Format {unFormat = "tex"}) "\\cite[22-23]{smith.1899}"]] ,[Plain [Math InlineMath "2+2=4"]] ,[Plain [Math InlineMath "x \\in y"]] ,[Plain [Math InlineMath "\\alpha \\wedge \\omega"]] @@ -309,7 +309,7 @@ Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "John",Spa ,[Plain [Str "Shoes",Space,Str "($20)",Space,Str "and",Space,Str "socks",Space,Str "($5)."]] ,[Plain [Str "Escaped",Space,Code ("",[],[]) "$",Str ":",Space,Str "$73",Space,Emph [Str "this",Space,Str "should",Space,Str "be",Space,Str "emphasized"],Space,Str "23$."]]] ,Para [Str "Here\8217s",Space,Str "a",Space,Str "LaTeX",Space,Str "table:"] -,RawBlock "latex" "\\begin{tabular}{|l|l|}\\hline\nAnimal & Number \\\\ \\hline\nDog & 2 \\\\\nCat & 1 \\\\ \\hline\n\\end{tabular}" +,RawBlock (Format {unFormat = "latex"}) "\\begin{tabular}{|l|l|}\\hline\nAnimal & Number \\\\ \\hline\nDog & 2 \\\\\nCat & 1 \\\\ \\hline\n\\end{tabular}" ,HorizontalRule ,Header 1 ("special-characters",[],[]) [Str "Special",Space,Str "Characters"] ,Para [Str "Here",Space,Str "is",Space,Str "some",Space,Str "unicode:"] diff --git a/tests/textile-reader.native b/tests/textile-reader.native index d14ae02c8..70b33f31d 100644 --- a/tests/textile-reader.native +++ b/tests/textile-reader.native @@ -137,23 +137,23 @@ Pandoc (Meta {unMeta = fromList []}) ,Header 1 ("",[],[]) [Str "Entities"] ,Para [Str "*",LineBreak,Str "&"] ,Header 1 ("",[],[]) [Str "Raw",Space,Str "HTML"] -,Para [Str "However",Str ",",Space,RawInline "html" "",Space,Str "raw",Space,Str "HTML",Space,Str "inlines",Space,RawInline "html" "",Space,Str "are",Space,Str "accepted",Str ",",Space,Str "as",Space,Str "well",Space,Str "as",Space,Str ":"] -,RawBlock "html" "
" +,Para [Str "However",Str ",",Space,RawInline (Format {unFormat = "html"}) "",Space,Str "raw",Space,Str "HTML",Space,Str "inlines",Space,RawInline (Format {unFormat = "html"}) "",Space,Str "are",Space,Str "accepted",Str ",",Space,Str "as",Space,Str "well",Space,Str "as",Space,Str ":"] +,RawBlock (Format {unFormat = "html"}) "
" ,Para [Str "any",Space,Strong [Str "Raw",Space,Str "HTML",Space,Str "Block"],Space,Str "with",Space,Str "bold"] -,RawBlock "html" "
" +,RawBlock (Format {unFormat = "html"}) "
" ,Para [Str "Html",Space,Str "blocks",Space,Str "can",Space,Str "be"] -,RawBlock "html" "
" +,RawBlock (Format {unFormat = "html"}) "
" ,Para [Str "inlined"] -,RawBlock "html" "
" +,RawBlock (Format {unFormat = "html"}) "
" ,Para [Str "as",Space,Str "well",Str "."] ,BulletList [[Plain [Str "this",Space,Str "<",Str "div",Str ">",Space,Str "won",Str "\8217",Str "t",Space,Str "produce",Space,Str "raw",Space,Str "html",Space,Str "blocks",Space,Str "<",Str "/div",Str ">"]] - ,[Plain [Str "but",Space,Str "this",Space,RawInline "html" "",Space,Str "will",Space,Str "produce",Space,Str "inline",Space,Str "html",Space,RawInline "html" ""]]] + ,[Plain [Str "but",Space,Str "this",Space,RawInline (Format {unFormat = "html"}) "",Space,Str "will",Space,Str "produce",Space,Str "inline",Space,Str "html",Space,RawInline (Format {unFormat = "html"}) ""]]] ,Para [Str "Can",Space,Str "you",Space,Str "prove",Space,Str "that",Space,Str "2",Space,Str "<",Space,Str "3",Space,Str "?"] ,Header 1 ("",[],[]) [Str "Raw",Space,Str "LaTeX"] ,Para [Str "This",Space,Str "Textile",Space,Str "reader",Space,Str "also",Space,Str "accepts",Space,Str "raw",Space,Str "LaTeX",Space,Str "for",Space,Str "blocks",Space,Str ":"] -,RawBlock "latex" "\\begin{itemize}\n \\item one\n \\item two\n\\end{itemize}" -,Para [Str "and",Space,Str "for",Space,RawInline "latex" "\\emph{inlines}",Str "."] +,RawBlock (Format {unFormat = "latex"}) "\\begin{itemize}\n \\item one\n \\item two\n\\end{itemize}" +,Para [Str "and",Space,Str "for",Space,RawInline (Format {unFormat = "latex"}) "\\emph{inlines}",Str "."] ,Header 1 ("",[],[]) [Str "Acronyms",Space,Str "and",Space,Str "marks"] ,Para [Str "PBS (Public Broadcasting System)"] ,Para [Str "Hi",Str "\8482"] diff --git a/tests/writer.native b/tests/writer.native index d7f5f8864..f9cf606f3 100644 --- a/tests/writer.native +++ b/tests/writer.native @@ -228,45 +228,45 @@ Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "John",Spa ,[Plain [Str "sublist"]]]]])] ,Header 1 ("html-blocks",[],[]) [Str "HTML",Space,Str "Blocks"] ,Para [Str "Simple",Space,Str "block",Space,Str "on",Space,Str "one",Space,Str "line:"] -,RawBlock "html" "
" +,RawBlock (Format {unFormat = "html"}) "
" ,Plain [Str "foo"] -,RawBlock "html" "
\n" +,RawBlock (Format {unFormat = "html"}) "
\n" ,Para [Str "And",Space,Str "nested",Space,Str "without",Space,Str "indentation:"] -,RawBlock "html" "
\n
\n
" +,RawBlock (Format {unFormat = "html"}) "
\n
\n
" ,Plain [Str "foo"] -,RawBlock "html" "
\n
\n
" +,RawBlock (Format {unFormat = "html"}) "
\n
\n
" ,Plain [Str "bar"] -,RawBlock "html" "
\n
\n" +,RawBlock (Format {unFormat = "html"}) "
\n
\n" ,Para [Str "Interpreted",Space,Str "markdown",Space,Str "in",Space,Str "a",Space,Str "table:"] -,RawBlock "html" "\n\n\n\n
" +,RawBlock (Format {unFormat = "html"}) "\n\n\n\n\n\n
" ,Plain [Str "This",Space,Str "is",Space,Emph [Str "emphasized"]] -,RawBlock "html" "" +,RawBlock (Format {unFormat = "html"}) "" ,Plain [Str "And",Space,Str "this",Space,Str "is",Space,Strong [Str "strong"]] -,RawBlock "html" "
\n\n\n" +,RawBlock (Format {unFormat = "html"}) "
\n\n\n" ,Para [Str "Here\8217s",Space,Str "a",Space,Str "simple",Space,Str "block:"] -,RawBlock "html" "
\n " +,RawBlock (Format {unFormat = "html"}) "
\n " ,Plain [Str "foo"] -,RawBlock "html" "
\n" +,RawBlock (Format {unFormat = "html"}) "
\n" ,Para [Str "This",Space,Str "should",Space,Str "be",Space,Str "a",Space,Str "code",Space,Str "block,",Space,Str "though:"] ,CodeBlock ("",[],[]) "
\n foo\n
" ,Para [Str "As",Space,Str "should",Space,Str "this:"] ,CodeBlock ("",[],[]) "
foo
" ,Para [Str "Now,",Space,Str "nested:"] -,RawBlock "html" "
\n
\n
\n " +,RawBlock (Format {unFormat = "html"}) "
\n
\n
\n " ,Plain [Str "foo"] -,RawBlock "html" "
\n
\n
\n" +,RawBlock (Format {unFormat = "html"}) "
\n
\n
\n" ,Para [Str "This",Space,Str "should",Space,Str "just",Space,Str "be",Space,Str "an",Space,Str "HTML",Space,Str "comment:"] -,RawBlock "html" "\n" +,RawBlock (Format {unFormat = "html"}) "\n" ,Para [Str "Multiline:"] -,RawBlock "html" "\n\n\n" +,RawBlock (Format {unFormat = "html"}) "\n\n\n" ,Para [Str "Code",Space,Str "block:"] ,CodeBlock ("",[],[]) "" ,Para [Str "Just",Space,Str "plain",Space,Str "comment,",Space,Str "with",Space,Str "trailing",Space,Str "spaces",Space,Str "on",Space,Str "the",Space,Str "line:"] -,RawBlock "html" " \n" +,RawBlock (Format {unFormat = "html"}) " \n" ,Para [Str "Code:"] ,CodeBlock ("",[],[]) "
" ,Para [Str "Hr\8217s:"] -,RawBlock "html" "
\n\n
\n\n
\n\n
\n\n
\n\n
\n\n
\n\n
\n\n
\n" +,RawBlock (Format {unFormat = "html"}) "
\n\n
\n\n
\n\n
\n\n
\n\n
\n\n
\n\n
\n\n
\n" ,HorizontalRule ,Header 1 ("inline-markup",[],[]) [Str "Inline",Space,Str "Markup"] ,Para [Str "This",Space,Str "is",Space,Emph [Str "emphasized"],Str ",",Space,Str "and",Space,Str "so",Space,Emph [Str "is",Space,Str "this"],Str "."] @@ -294,7 +294,7 @@ Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "John",Spa ,HorizontalRule ,Header 1 ("latex",[],[]) [Str "LaTeX"] ,BulletList - [[Plain [RawInline "tex" "\\cite[22-23]{smith.1899}"]] + [[Plain [RawInline (Format {unFormat = "tex"}) "\\cite[22-23]{smith.1899}"]] ,[Plain [Math InlineMath "2+2=4"]] ,[Plain [Math InlineMath "x \\in y"]] ,[Plain [Math InlineMath "\\alpha \\wedge \\omega"]] @@ -309,7 +309,7 @@ Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "John",Spa ,[Plain [Str "Shoes",Space,Str "($20)",Space,Str "and",Space,Str "socks",Space,Str "($5)."]] ,[Plain [Str "Escaped",Space,Code ("",[],[]) "$",Str ":",Space,Str "$73",Space,Emph [Str "this",Space,Str "should",Space,Str "be",Space,Str "emphasized"],Space,Str "23$."]]] ,Para [Str "Here\8217s",Space,Str "a",Space,Str "LaTeX",Space,Str "table:"] -,RawBlock "latex" "\\begin{tabular}{|l|l|}\\hline\nAnimal & Number \\\\ \\hline\nDog & 2 \\\\\nCat & 1 \\\\ \\hline\n\\end{tabular}" +,RawBlock (Format {unFormat = "latex"}) "\\begin{tabular}{|l|l|}\\hline\nAnimal & Number \\\\ \\hline\nDog & 2 \\\\\nCat & 1 \\\\ \\hline\n\\end{tabular}" ,HorizontalRule ,Header 1 ("special-characters",[],[]) [Str "Special",Space,Str "Characters"] ,Para [Str "Here",Space,Str "is",Space,Str "some",Space,Str "unicode:"] -- cgit v1.2.3 From 9152fa1a95346e26bc290b3f5018b2eeb5d4e077 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 10 Aug 2013 18:13:38 -0700 Subject: Use query instead of queryWith. --- src/Text/Pandoc/Biblio.hs | 5 +++-- src/Text/Pandoc/Shared.hs | 29 +++++++++++++++++++++++++++-- src/Text/Pandoc/Writers/ConTeXt.hs | 4 ++-- src/Text/Pandoc/Writers/LaTeX.hs | 7 ++++--- 4 files changed, 36 insertions(+), 9 deletions(-) diff --git a/src/Text/Pandoc/Biblio.hs b/src/Text/Pandoc/Biblio.hs index 755c779ea..206b38530 100644 --- a/src/Text/Pandoc/Biblio.hs +++ b/src/Text/Pandoc/Biblio.hs @@ -36,6 +36,7 @@ import Text.CSL hiding ( Cite(..), Citation(..), endWithPunct ) import qualified Text.CSL as CSL ( Cite(..) ) import Text.Pandoc.Definition import Text.Pandoc.Generic +import Text.Pandoc.Walk import Text.Pandoc.Shared (stringify) import Text.Parsec hiding (State) import Control.Monad @@ -48,7 +49,7 @@ processBiblio Nothing _ p = p processBiblio _ [] p = p processBiblio (Just style) r p = let p' = evalState (bottomUpM setHash p) 1 - grps = queryWith getCitation p' + grps = query getCitation p' result = citeproc procOpts style r (setNearNote style $ map (map toCslCite) grps) cits_map = M.fromList $ zip grps (citations result) @@ -121,7 +122,7 @@ isTextualCitation (c:_) = citationMode c == AuthorInText isTextualCitation _ = False -- | Retrieve all citations from a 'Pandoc' docuument. To be used with --- 'queryWith'. +-- 'query'. getCitation :: Inline -> [[Citation]] getCitation i | Cite t _ <- i = [t] | otherwise = [] diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 09874299d..2b692dc3c 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE DeriveDataTypeable, CPP #-} +{-# LANGUAGE DeriveDataTypeable, CPP, MultiParamTypeClasses #-} {- Copyright (C) 2006-2013 John MacFarlane @@ -79,6 +79,7 @@ module Text.Pandoc.Shared ( ) where import Text.Pandoc.Definition +import Text.Pandoc.Walk import Text.Pandoc.Generic import Text.Pandoc.Builder (Blocks, ToMetaValue(..)) import qualified Text.Pandoc.Builder as B @@ -105,6 +106,7 @@ import Text.HTML.TagSoup (renderTagsOptions, RenderOptions(..), Tag(..), renderOptions) import qualified Data.ByteString as BS import qualified Data.ByteString.Char8 as B8 +import Text.Pandoc.Compat.Monoid #ifdef EMBED_DATA_FILES import Text.Pandoc.Data (dataFiles) @@ -383,7 +385,7 @@ consolidateInlines [] = [] -- | Convert list of inlines to a string with formatting removed. stringify :: [Inline] -> String -stringify = queryWith go +stringify = query go where go :: Inline -> [Char] go Space = " " go (Str x) = x @@ -433,6 +435,29 @@ data Element = Blk Block -- lvl num attributes label contents deriving (Eq, Read, Show, Typeable, Data) +instance Walkable Inline Element where + walk f (Blk x) = Blk (walk f x) + walk f (Sec lev nums attr ils elts) = Sec lev nums attr (walk f ils) (walk f elts) + walkM f (Blk x) = Blk `fmap` walkM f x + walkM f (Sec lev nums attr ils elts) = do + ils' <- walkM f ils + elts' <- walkM f elts + return $ Sec lev nums attr ils' elts' + query f (Blk x) = query f x + query f (Sec _ _ _ ils elts) = query f ils <> query f elts + +instance Walkable Block Element where + walk f (Blk x) = Blk (walk f x) + walk f (Sec lev nums attr ils elts) = Sec lev nums attr (walk f ils) (walk f elts) + walkM f (Blk x) = Blk `fmap` walkM f x + walkM f (Sec lev nums attr ils elts) = do + ils' <- walkM f ils + elts' <- walkM f elts + return $ Sec lev nums attr ils' elts' + query f (Blk x) = query f x + query f (Sec _ _ _ ils elts) = query f ils <> query f elts + + -- | Convert Pandoc inline list to plain text identifier. HTML -- identifiers must start with a letter, and may contain only -- letters, digits, and the characters _-. diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs index 40dc1deb5..0379f8b0a 100644 --- a/src/Text/Pandoc/Writers/ConTeXt.hs +++ b/src/Text/Pandoc/Writers/ConTeXt.hs @@ -33,7 +33,7 @@ import Text.Pandoc.Definition import Text.Pandoc.Shared import Text.Pandoc.Writers.Shared import Text.Pandoc.Options -import Text.Pandoc.Generic (queryWith) +import Text.Pandoc.Walk (query) import Text.Printf ( printf ) import Data.List ( intercalate, isPrefixOf ) import Control.Monad.State @@ -326,7 +326,7 @@ inlineToConTeXt (Note contents) = do contents' <- blockListToConTeXt contents let codeBlock x@(CodeBlock _ _) = [x] codeBlock _ = [] - let codeBlocks = queryWith codeBlock contents + let codeBlocks = query codeBlock contents return $ if null codeBlocks then text "\\footnote{" <> nest 2 contents' <> char '}' else text "\\startbuffer " <> nest 2 contents' <> diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index d09ccc3b8..860ca8349 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -30,6 +30,7 @@ Conversion of 'Pandoc' format into LaTeX. -} module Text.Pandoc.Writers.LaTeX ( writeLaTeX ) where import Text.Pandoc.Definition +import Text.Pandoc.Walk import Text.Pandoc.Generic import Text.Pandoc.Shared import Text.Pandoc.Writers.Shared @@ -86,7 +87,7 @@ pandocToLaTeX options (Pandoc meta blocks) = do -- see if there are internal links let isInternalLink (Link _ ('#':xs,_)) = [xs] isInternalLink _ = [] - modify $ \s -> s{ stInternalLinks = queryWith isInternalLink blocks } + modify $ \s -> s{ stInternalLinks = query isInternalLink blocks } let template = writerTemplate options -- set stBook depending on documentclass let bookClasses = ["memoir","book","report","scrreprt","scrbook"] @@ -248,9 +249,9 @@ elementToBeamer slideLevel (Sec lvl _num (ident,classes,kvs) tit elts) let hasCode (Code _ _) = [True] hasCode _ = [] opts <- gets stOptions - let fragile = not $ null $ queryWith hasCodeBlock elts ++ + let fragile = not $ null $ query hasCodeBlock elts ++ if writerListings opts - then queryWith hasCode elts + then query hasCode elts else [] let allowframebreaks = "allowframebreaks" `elem` classes let optionslist = ["fragile" | fragile] ++ -- cgit v1.2.3 From 02a125d0aa8becd258c99b27c5e30116f0cbacb4 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 10 Aug 2013 18:45:00 -0700 Subject: Use walk, walkM in place of bottomUp, bottomUpM when possible. They are significantly faster. --- src/Text/Pandoc/PDF.hs | 4 ++-- src/Text/Pandoc/Readers/LaTeX.hs | 4 ++-- src/Text/Pandoc/Readers/MediaWiki.hs | 4 ++-- src/Text/Pandoc/Shared.hs | 2 +- src/Text/Pandoc/Writers/Docx.hs | 11 ++++++----- src/Text/Pandoc/Writers/EPUB.hs | 6 +++--- src/Text/Pandoc/Writers/FB2.hs | 8 ++++++-- src/Text/Pandoc/Writers/LaTeX.hs | 3 +-- src/Text/Pandoc/Writers/Markdown.hs | 8 ++++---- src/Text/Pandoc/Writers/ODT.hs | 4 ++-- src/Text/Pandoc/Writers/RTF.hs | 4 ++-- 11 files changed, 31 insertions(+), 27 deletions(-) diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs index b030e2ca7..ce20ac1b4 100644 --- a/src/Text/Pandoc/PDF.hs +++ b/src/Text/Pandoc/PDF.hs @@ -44,7 +44,7 @@ import Data.List (isInfixOf) import qualified Data.ByteString.Base64 as B64 import qualified Text.Pandoc.UTF8 as UTF8 import Text.Pandoc.Definition -import Text.Pandoc.Generic (bottomUpM) +import Text.Pandoc.Walk (walkM) import Text.Pandoc.Shared (fetchItem, warn) import Text.Pandoc.Options (WriterOptions(..)) import Text.Pandoc.MIME (extensionFromMimeType) @@ -73,7 +73,7 @@ handleImages :: String -- ^ source directory/base URL -> FilePath -- ^ temp dir to store images -> Pandoc -- ^ document -> IO Pandoc -handleImages baseURL tmpdir = bottomUpM (handleImage' baseURL tmpdir) +handleImages baseURL tmpdir = walkM (handleImage' baseURL tmpdir) handleImage' :: String -> FilePath diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index eb0baedda..71e1e0ac2 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -35,7 +35,7 @@ module Text.Pandoc.Readers.LaTeX ( readLaTeX, ) where import Text.Pandoc.Definition -import Text.Pandoc.Generic +import Text.Pandoc.Walk import Text.Pandoc.Shared import Text.Pandoc.Options import Text.Pandoc.Biblio (processBiblio) @@ -815,7 +815,7 @@ keyvals :: LP [(String, String)] keyvals = try $ char '[' *> manyTill keyval (char ']') alltt :: String -> LP Blocks -alltt t = bottomUp strToCode <$> parseFromString blocks +alltt t = walk strToCode <$> parseFromString blocks (substitute " " "\\ " $ substitute "%" "\\%" $ concat $ intersperse "\\\\\n" $ lines t) where strToCode (Str s) = Code nullAttr s diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs index 56049e035..8f1ff2776 100644 --- a/src/Text/Pandoc/Readers/MediaWiki.hs +++ b/src/Text/Pandoc/Readers/MediaWiki.hs @@ -42,7 +42,7 @@ import Text.Pandoc.Options import Text.Pandoc.Readers.HTML ( htmlTag, isBlockTag, isCommentTag ) import Text.Pandoc.XML ( fromEntities ) import Text.Pandoc.Parsing hiding ( nested ) -import Text.Pandoc.Generic ( bottomUp ) +import Text.Pandoc.Walk ( walk ) import Text.Pandoc.Shared ( stripTrailingNewlines, safeRead ) import Data.Monoid (mconcat, mempty) import Control.Applicative ((<$>), (<*), (*>), (<$)) @@ -342,7 +342,7 @@ preformatted = try $ do spacesStr _ = False if F.all spacesStr contents then return mempty - else return $ B.para $ bottomUp strToCode contents + else return $ B.para $ walk strToCode contents header :: MWParser Blocks header = try $ do diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 2b692dc3c..6fd78b188 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -518,7 +518,7 @@ isHeaderBlock _ = False -- | Shift header levels up or down. headerShift :: Int -> Pandoc -> Pandoc -headerShift n = bottomUp shift +headerShift n = walk shift where shift :: Block -> Block shift (Header level attr inner) = Header (level + n) attr inner shift x = x diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 2483e243f..aa618b2cc 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -45,6 +45,7 @@ import Text.Pandoc.Shared hiding (Element) import Text.Pandoc.Options import Text.Pandoc.Readers.TeXMath import Text.Pandoc.Highlighting ( highlight ) +import Text.Pandoc.Walk import Text.Highlighting.Kate.Types () import Text.XML.Light import Text.TeXMath @@ -108,7 +109,7 @@ writeDocx :: WriterOptions -- ^ Writer options -> IO BL.ByteString writeDocx opts doc@(Pandoc meta _) = do let datadir = writerUserDataDir opts - let doc' = bottomUp (concatMap fixDisplayMath) doc + let doc' = walk fixDisplayMath doc refArchive <- liftM (toArchive . toLazy) $ case writerReferenceDocx opts of Just f -> B.readFile f @@ -810,17 +811,17 @@ stripLeadingTrailingSpace = go . reverse . go . reverse where go (Space:xs) = xs go xs = xs -fixDisplayMath :: Block -> [Block] +fixDisplayMath :: Block -> Block fixDisplayMath (Plain lst) | any isDisplayMath lst && not (all isDisplayMath lst) = -- chop into several paragraphs so each displaymath is its own - map (Plain . stripLeadingTrailingSpace) $ + Div ("",["math"],[]) $ map (Plain . stripLeadingTrailingSpace) $ groupBy (\x y -> (isDisplayMath x && isDisplayMath y) || not (isDisplayMath x || isDisplayMath y)) lst fixDisplayMath (Para lst) | any isDisplayMath lst && not (all isDisplayMath lst) = -- chop into several paragraphs so each displaymath is its own - map (Para . stripLeadingTrailingSpace) $ + Div ("",["math"],[]) $ map (Para . stripLeadingTrailingSpace) $ groupBy (\x y -> (isDisplayMath x && isDisplayMath y) || not (isDisplayMath x || isDisplayMath y)) lst -fixDisplayMath x = [x] +fixDisplayMath x = x diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index ab14ff8a0..fa2b45036 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -48,7 +48,7 @@ import qualified Text.Pandoc.Shared as Shared import Text.Pandoc.Builder (fromList, setMeta) import Text.Pandoc.Options import Text.Pandoc.Definition -import Text.Pandoc.Generic +import Text.Pandoc.Walk import Control.Monad.State import Text.XML.Light hiding (ppTopElement) import Text.Pandoc.UUID @@ -116,7 +116,7 @@ writeEPUB opts doc@(Pandoc meta _) = do -- handle pictures picsRef <- newIORef [] - Pandoc _ blocks <- bottomUpM + Pandoc _ blocks <- walkM (transformInline opts' sourceDir picsRef) doc pics <- readIORef picsRef let readPicEntry entries (oldsrc, newsrc) = do @@ -520,7 +520,7 @@ correlateRefs chapterHeaderLevel bs = -- Replace internal link references using the table produced -- by correlateRefs. replaceRefs :: [(String,String)] -> [Block] -> [Block] -replaceRefs refTable = bottomUp replaceOneRef +replaceRefs refTable = walk replaceOneRef where replaceOneRef x@(Link lab ('#':xs,tit)) = case lookup xs refTable of Just url -> Link lab (url,tit) diff --git a/src/Text/Pandoc/Writers/FB2.hs b/src/Text/Pandoc/Writers/FB2.hs index 2576b2dc2..adbe948be 100644 --- a/src/Text/Pandoc/Writers/FB2.hs +++ b/src/Text/Pandoc/Writers/FB2.hs @@ -45,7 +45,7 @@ import qualified Text.XML.Light.Cursor as XC import Text.Pandoc.Definition import Text.Pandoc.Options (WriterOptions(..), HTMLMathMethod(..), def) import Text.Pandoc.Shared (orderedListMarkers) -import Text.Pandoc.Generic (bottomUp) +import Text.Pandoc.Walk -- | Data to be written at the end of the document: -- (foot)notes, URLs, references, images. @@ -423,6 +423,10 @@ indent = indentBlock indentLines ins = let lns = split isLineBreak ins :: [[Inline]] in intercalate [LineBreak] $ map ((Str spacer):) lns +capitalize :: Inline -> Inline +capitalize (Str xs) = Str $ map toUpper xs +capitalize x = x + -- | Convert a Pandoc's Inline element to FictionBook XML representation. toXml :: Inline -> FBM [Content] toXml (Str s) = return [txt s] @@ -432,7 +436,7 @@ toXml (Strong ss) = list `liftM` wrap "strong" ss toXml (Strikeout ss) = list `liftM` wrap "strikethrough" ss toXml (Superscript ss) = list `liftM` wrap "sup" ss toXml (Subscript ss) = list `liftM` wrap "sub" ss -toXml (SmallCaps ss) = cMapM toXml $ bottomUp (map toUpper) ss +toXml (SmallCaps ss) = cMapM toXml $ walk capitalize ss toXml (Quoted SingleQuote ss) = do -- FIXME: should be language-specific inner <- cMapM toXml ss return $ [txt "‘"] ++ inner ++ [txt "’"] diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index 860ca8349..7f9a99801 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -31,7 +31,6 @@ Conversion of 'Pandoc' format into LaTeX. module Text.Pandoc.Writers.LaTeX ( writeLaTeX ) where import Text.Pandoc.Definition import Text.Pandoc.Walk -import Text.Pandoc.Generic import Text.Pandoc.Shared import Text.Pandoc.Writers.Shared import Text.Pandoc.Options @@ -498,7 +497,7 @@ sectionHeader unnumbered ref level lst = do txt <- inlineListToLaTeX lst let noNote (Note _) = Str "" noNote x = x - let lstNoNotes = bottomUp noNote lst + let lstNoNotes = walk noNote lst let star = if unnumbered then text "*" else empty -- footnotes in sections don't work unless you specify an optional -- argument: \section[mysec]{mysec\footnote{blah}} diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index d195d8445..3d0ed8702 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -32,7 +32,7 @@ Markdown: -} module Text.Pandoc.Writers.Markdown (writeMarkdown, writePlain) where import Text.Pandoc.Definition -import Text.Pandoc.Generic +import Text.Pandoc.Walk import Text.Pandoc.Templates (renderTemplate') import Text.Pandoc.Shared import Text.Pandoc.Writers.Shared @@ -82,7 +82,7 @@ writePlain opts document = where document' = plainify document plainify :: Pandoc -> Pandoc -plainify = bottomUp go +plainify = walk go where go :: Inline -> Inline go (Emph xs) = SmallCaps xs go (Strong xs) = SmallCaps xs @@ -643,13 +643,13 @@ inlineToMarkdown opts (Strikeout lst) = do then "~~" <> contents <> "~~" else "" <> contents <> "" inlineToMarkdown opts (Superscript lst) = do - let lst' = bottomUp escapeSpaces lst + let lst' = walk escapeSpaces lst contents <- inlineListToMarkdown opts lst' return $ if isEnabled Ext_superscript opts then "^" <> contents <> "^" else "" <> contents <> "" inlineToMarkdown opts (Subscript lst) = do - let lst' = bottomUp escapeSpaces lst + let lst' = walk escapeSpaces lst contents <- inlineListToMarkdown opts lst' return $ if isEnabled Ext_subscript opts then "~" <> contents <> "~" diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs index 589010bb9..fb94d9ffb 100644 --- a/src/Text/Pandoc/Writers/ODT.hs +++ b/src/Text/Pandoc/Writers/ODT.hs @@ -39,7 +39,7 @@ import Text.Pandoc.Shared ( stringify, readDataFile, fetchItem, warn ) import Text.Pandoc.ImageSize ( imageSize, sizeInPoints ) import Text.Pandoc.MIME ( getMimeType ) import Text.Pandoc.Definition -import Text.Pandoc.Generic +import Text.Pandoc.Walk import Text.Pandoc.Writers.OpenDocument ( writeOpenDocument ) import Control.Monad (liftM) import Text.Pandoc.XML @@ -63,7 +63,7 @@ writeODT opts doc@(Pandoc meta _) = do -- handle pictures picEntriesRef <- newIORef ([] :: [Entry]) let sourceDir = writerSourceDirectory opts - doc' <- bottomUpM (transformPic sourceDir picEntriesRef) doc + doc' <- walkM (transformPic sourceDir picEntriesRef) doc let newContents = writeOpenDocument opts{writerWrapText = False} doc' epochtime <- floor `fmap` getPOSIXTime let contentEntry = toEntry "content.xml" epochtime $ fromStringLazy newContents diff --git a/src/Text/Pandoc/Writers/RTF.hs b/src/Text/Pandoc/Writers/RTF.hs index 6d2b1229d..0e8ce2ece 100644 --- a/src/Text/Pandoc/Writers/RTF.hs +++ b/src/Text/Pandoc/Writers/RTF.hs @@ -34,7 +34,7 @@ import Text.Pandoc.Shared import Text.Pandoc.Writers.Shared import Text.Pandoc.Readers.TeXMath import Text.Pandoc.Templates (renderTemplate') -import Text.Pandoc.Generic (bottomUpM) +import Text.Pandoc.Walk import Data.List ( isSuffixOf, intercalate ) import Data.Char ( ord, chr, isDigit, toLower ) import System.FilePath ( takeExtension ) @@ -70,7 +70,7 @@ rtfEmbedImage x = return x -- images embedded as encoded binary data. writeRTFWithEmbeddedImages :: WriterOptions -> Pandoc -> IO String writeRTFWithEmbeddedImages options doc = - writeRTF options `fmap` bottomUpM rtfEmbedImage doc + writeRTF options `fmap` walkM rtfEmbedImage doc -- | Convert Pandoc to a string in rich text format. writeRTF :: WriterOptions -> Pandoc -> String -- cgit v1.2.3 From 6f736dfa7578faab7b90546ee5b2c275185968c8 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 10 Aug 2013 19:04:15 -0700 Subject: Added Tests.Walk. This verifies that walk and query match the generic traversals. --- pandoc.cabal | 1 + tests/Tests/Walk.hs | 47 +++++++++++++++++++++++++++++++++++++++++++++++ tests/test-pandoc.hs | 2 ++ 3 files changed, 50 insertions(+) create mode 100644 tests/Tests/Walk.hs diff --git a/pandoc.cabal b/pandoc.cabal index 3903fe606..e22908918 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -406,6 +406,7 @@ Test-Suite test-pandoc Tests.Helpers Tests.Arbitrary Tests.Shared + Tests.Walk Tests.Readers.LaTeX Tests.Readers.Markdown Tests.Readers.RST diff --git a/tests/Tests/Walk.hs b/tests/Tests/Walk.hs new file mode 100644 index 000000000..f6aa1beae --- /dev/null +++ b/tests/Tests/Walk.hs @@ -0,0 +1,47 @@ +{-# LANGUAGE ScopedTypeVariables, FlexibleContexts #-} +module Tests.Walk (tests) where + +import Text.Pandoc.Definition +import Text.Pandoc.Walk +import Test.Framework +import Tests.Helpers +import Data.Char (toUpper) +import Tests.Arbitrary() +import Data.Generics +import Data.Monoid + +tests :: [Test] +tests = [ testGroup "Walk" + [ property "p_walk inlineTrans" (p_walk inlineTrans) + , property "p_walk blockTrans" (p_walk blockTrans) + , property "p_query inlineQuery" (p_query inlineQuery) + , property "p_query blockQuery" (p_query blockQuery) + ] + ] + +p_walk :: (Typeable a, Walkable a Pandoc) + => (a -> a) -> Pandoc -> Bool +p_walk f = (\(d :: Pandoc) -> everywhere (mkT f) d == walk f d) + +p_query :: (Eq a, Typeable a1, Monoid a, Walkable a1 Pandoc) + => (a1 -> a) -> Pandoc -> Bool +p_query f = (\(d :: Pandoc) -> everything mappend (mempty `mkQ` f) d == query f d) + +inlineTrans :: Inline -> Inline +inlineTrans (Str xs) = Str $ map toUpper xs +inlineTrans (Emph xs) = Strong xs +inlineTrans x = x + +blockTrans :: Block -> Block +blockTrans (Plain xs) = Para xs +blockTrans (BlockQuote xs) = Div ("",["special"],[]) xs +blockTrans x = x + +inlineQuery :: Inline -> String +inlineQuery (Str xs) = xs +inlineQuery _ = "" + +blockQuery :: Block -> [Int] +blockQuery (Header lev _ _) = [lev] +blockQuery _ = [] + diff --git a/tests/test-pandoc.hs b/tests/test-pandoc.hs index 24b7a8261..67ca5eae2 100644 --- a/tests/test-pandoc.hs +++ b/tests/test-pandoc.hs @@ -14,11 +14,13 @@ import qualified Tests.Writers.HTML import qualified Tests.Writers.Native import qualified Tests.Writers.Markdown import qualified Tests.Shared +import qualified Tests.Walk import Text.Pandoc.Shared (inDirectory) tests :: [Test] tests = [ testGroup "Old" Tests.Old.tests , testGroup "Shared" Tests.Shared.tests + , testGroup "Walk" Tests.Walk.tests , testGroup "Writers" [ testGroup "Native" Tests.Writers.Native.tests , testGroup "ConTeXt" Tests.Writers.ConTeXt.tests -- cgit v1.2.3 From e279175ea517e2df65fe5d716bc02e383b04fc36 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 11 Aug 2013 15:58:09 -0700 Subject: Options: Changed `writerSourceDir` to `writerSourceURL` (now a Maybe). Previously we used to store the directory of the first input file, even if it was local, and used this as a base directory for finding images in ODT, EPUB, Docx, and PDF. This has been confusing to many users. It seems better to look for images relative to the current working directory, even if the first file argument is in another directory. writerSourceURL is set to 'Just url' when the first command-line argument is an absolute URL. (So, relative links will be resolved in relation to the first page.) Otherwise, 'Nothing'. The ODT, EPUB, Docx, and PDF writers have been modified accordingly. Note that this change may break some existing workflows. If you have been assuming that relative links will be interpreted relative to the directory of the first file argument, you'll need to make that the current directory before running pandoc. Closes #942. --- pandoc.hs | 12 +++++++----- src/Text/Pandoc/Options.hs | 4 ++-- src/Text/Pandoc/PDF.hs | 6 +++--- src/Text/Pandoc/Shared.hs | 20 ++++++++++---------- src/Text/Pandoc/Writers/Docx.hs | 3 +-- src/Text/Pandoc/Writers/EPUB.hs | 21 +++++++-------------- src/Text/Pandoc/Writers/ODT.hs | 9 ++++----- 7 files changed, 34 insertions(+), 41 deletions(-) diff --git a/pandoc.hs b/pandoc.hs index fdf0b35b7..81672e16c 100644 --- a/pandoc.hs +++ b/pandoc.hs @@ -1034,13 +1034,15 @@ main = do return $ Just csl { CSL.styleAbbrevs = abbrevs } else return Nothing - let sourceDir = case sources of - [] -> "." + let sourceURL = case sources of + [] -> Nothing (x:_) -> case parseURI x of Just u | uriScheme u `elem` ["http:","https:"] -> - show u{ uriPath = "", uriQuery = "", uriFragment = "" } - _ -> takeDirectory x + Just $ show u{ uriPath = "", + uriQuery = "", + uriFragment = "" } + _ -> Nothing let readerOpts = def{ readerSmart = smart || (texLigatures && (laTeXOutput || "context" `isPrefixOf` writerName')) @@ -1074,7 +1076,7 @@ main = do writerColumns = columns, writerEmailObfuscation = obfuscationMethod, writerIdentifierPrefix = idPrefix, - writerSourceDirectory = sourceDir, + writerSourceURL = sourceURL, writerUserDataDir = datadir, writerHtml5 = html5, writerHtmlQTags = htmlQTags, diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs index 61a85cf6e..c7c37d6b8 100644 --- a/src/Text/Pandoc/Options.hs +++ b/src/Text/Pandoc/Options.hs @@ -286,7 +286,7 @@ data WriterOptions = WriterOptions , writerEmailObfuscation :: ObfuscationMethod -- ^ How to obfuscate emails , writerIdentifierPrefix :: String -- ^ Prefix for section & note ids in HTML -- and for footnote marks in markdown - , writerSourceDirectory :: FilePath -- ^ Directory path of 1st source file + , writerSourceURL :: Maybe String -- ^ Absolute URL + directory of 1st source file , writerUserDataDir :: Maybe FilePath -- ^ Path of user data directory , writerCiteMethod :: CiteMethod -- ^ How to print cites , writerBiblioFiles :: [FilePath] -- ^ Biblio files to use for citations @@ -329,7 +329,7 @@ instance Default WriterOptions where , writerColumns = 72 , writerEmailObfuscation = JavascriptObfuscation , writerIdentifierPrefix = "" - , writerSourceDirectory = "." + , writerSourceURL = Nothing , writerUserDataDir = Nothing , writerCiteMethod = Citeproc , writerBiblioFiles = [] diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs index ce20ac1b4..ae611bc37 100644 --- a/src/Text/Pandoc/PDF.hs +++ b/src/Text/Pandoc/PDF.hs @@ -65,17 +65,17 @@ makePDF :: String -- ^ pdf creator (pdflatex, lualatex, xelatex) -> Pandoc -- ^ document -> IO (Either ByteString ByteString) makePDF program writer opts doc = withTempDir "tex2pdf." $ \tmpdir -> do - doc' <- handleImages (writerSourceDirectory opts) tmpdir doc + doc' <- handleImages (writerSourceURL opts) tmpdir doc let source = writer opts doc' tex2pdf' tmpdir program source -handleImages :: String -- ^ source directory/base URL +handleImages :: Maybe String -- ^ source base URL -> FilePath -- ^ temp dir to store images -> Pandoc -- ^ document -> IO Pandoc handleImages baseURL tmpdir = walkM (handleImage' baseURL tmpdir) -handleImage' :: String +handleImage' :: Maybe String -> FilePath -> Inline -> IO Inline diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 6fd78b188..d670a35bc 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -612,18 +612,18 @@ readDataFileUTF8 userDir fname = -- | Fetch an image or other item from the local filesystem or the net. -- Returns raw content and maybe mime type. -fetchItem :: String -> String +fetchItem :: Maybe String -> String -> IO (Either E.SomeException (BS.ByteString, Maybe String)) -fetchItem sourceDir s = - case s of - _ | isAbsoluteURI s -> openURL s - | isAbsoluteURI sourceDir -> openURL $ sourceDir ++ "/" ++ s - | otherwise -> E.try $ do +fetchItem sourceURL s + | isAbsoluteURI s = openURL s + | otherwise = case sourceURL of + Just u -> openURL (u ++ "/" ++ s) + Nothing -> E.try readLocalFile + where readLocalFile = do let mime = case takeExtension s of - ".gz" -> getMimeType $ dropExtension s - x -> getMimeType x - let f = sourceDir s - cont <- BS.readFile f + ".gz" -> getMimeType $ dropExtension s + x -> getMimeType x + cont <- BS.readFile s return (cont, mime) -- | Read from a URL and return raw data and maybe mime type. diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index aa618b2cc..c8673ae48 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -728,8 +728,7 @@ inlineToOpenXML opts (Image alt (src, tit)) = do case M.lookup src imgs of Just (_,_,_,elt,_) -> return [elt] Nothing -> do - let sourceDir = writerSourceDirectory opts - res <- liftIO $ fetchItem sourceDir src + res <- liftIO $ fetchItem (writerSourceURL opts) src case res of Left (_ :: E.SomeException) -> do liftIO $ warn $ "Could not find image `" ++ src ++ "', skipping..." diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index fa2b45036..ac0e7610c 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -55,7 +55,7 @@ import Text.Pandoc.UUID import Text.Pandoc.Writers.HTML import Text.Pandoc.Writers.Markdown ( writePlain ) import Data.Char ( toLower ) -import Network.URI ( isAbsoluteURI, unEscapeString ) +import Network.URI ( unEscapeString ) import Text.Pandoc.MIME (getMimeType) #if MIN_VERSION_base(4,6,0) #else @@ -93,7 +93,6 @@ writeEPUB opts doc@(Pandoc meta _) = do then MathML Nothing else writerHTMLMathMethod opts , writerWrapText = False } - let sourceDir = writerSourceDirectory opts' let mbCoverImage = lookup "epub-cover-image" vars -- cover page @@ -117,10 +116,10 @@ writeEPUB opts doc@(Pandoc meta _) = do -- handle pictures picsRef <- newIORef [] Pandoc _ blocks <- walkM - (transformInline opts' sourceDir picsRef) doc + (transformInline opts' picsRef) doc pics <- readIORef picsRef let readPicEntry entries (oldsrc, newsrc) = do - res <- fetchItem sourceDir oldsrc + res <- fetchItem (writerSourceURL opts') oldsrc case res of Left _ -> do warn $ "Could not find image `" ++ oldsrc ++ "', skipping..." @@ -414,19 +413,13 @@ showDateTimeISO8601 :: UTCTime -> String showDateTimeISO8601 = formatTime defaultTimeLocale "%FT%TZ" transformInline :: WriterOptions - -> FilePath -> IORef [(FilePath, FilePath)] -- ^ (oldpath, newpath) images -> Inline -> IO Inline -transformInline opts sourceDir picsRef (Image lab (src,tit)) - | isAbsoluteURI src = do - raw <- makeSelfContained Nothing - $ writeHtmlInline opts (Image lab (src,tit)) - return $ RawInline (Format "html") raw - | otherwise = do +transformInline opts picsRef (Image lab (src,tit)) = do let src' = unEscapeString src pics <- readIORef picsRef - let oldsrc = sourceDir src' + let oldsrc = maybe src' ( src) $ writerSourceURL opts let ext = takeExtension src' newsrc <- case lookup oldsrc pics of Just n -> return n @@ -435,11 +428,11 @@ transformInline opts sourceDir picsRef (Image lab (src,tit)) modifyIORef picsRef ( (oldsrc, new): ) return new return $ Image lab (newsrc, tit) -transformInline opts _ _ (x@(Math _ _)) +transformInline opts _ (x@(Math _ _)) | WebTeX _ <- writerHTMLMathMethod opts = do raw <- makeSelfContained Nothing $ writeHtmlInline opts x return $ RawInline (Format "html") raw -transformInline _ _ _ x = return x +transformInline _ _ x = return x writeHtmlInline :: WriterOptions -> Inline -> String writeHtmlInline opts z = trimr $ diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs index fb94d9ffb..751a323f5 100644 --- a/src/Text/Pandoc/Writers/ODT.hs +++ b/src/Text/Pandoc/Writers/ODT.hs @@ -62,8 +62,7 @@ writeODT opts doc@(Pandoc meta _) = do readDataFile datadir "reference.odt" -- handle pictures picEntriesRef <- newIORef ([] :: [Entry]) - let sourceDir = writerSourceDirectory opts - doc' <- walkM (transformPic sourceDir picEntriesRef) doc + doc' <- walkM (transformPic opts picEntriesRef) doc let newContents = writeOpenDocument opts{writerWrapText = False} doc' epochtime <- floor `fmap` getPOSIXTime let contentEntry = toEntry "content.xml" epochtime $ fromStringLazy newContents @@ -111,9 +110,9 @@ writeODT opts doc@(Pandoc meta _) = do let archive'' = addEntryToArchive metaEntry archive' return $ fromArchive archive'' -transformPic :: FilePath -> IORef [Entry] -> Inline -> IO Inline -transformPic sourceDir entriesRef (Image lab (src,_)) = do - res <- fetchItem sourceDir src +transformPic :: WriterOptions -> IORef [Entry] -> Inline -> IO Inline +transformPic opts entriesRef (Image lab (src,_)) = do + res <- fetchItem (writerSourceURL opts) src case res of Left (_ :: E.SomeException) -> do warn $ "Could not find image `" ++ src ++ "', skipping..." -- cgit v1.2.3 From 7b975c2bcc32e5ddd96338afdb32a1ceacdc0980 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 11 Aug 2013 16:16:24 -0700 Subject: PDF: Add suggestion to use --latex-engine=xelatex on encoding error. --- src/Text/Pandoc/PDF.hs | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs index ae611bc37..a445e2991 100644 --- a/src/Text/Pandoc/PDF.hs +++ b/src/Text/Pandoc/PDF.hs @@ -109,8 +109,14 @@ tex2pdf' tmpDir program source = do (exit, log', mbPdf) <- runTeXProgram program numruns tmpDir source let msg = "Error producing PDF from TeX source." case (exit, mbPdf) of - (ExitFailure _, _) -> return $ Left $ - msg <> "\n" <> extractMsg log' + (ExitFailure _, _) -> do + let logmsg = extractMsg log' + let extramsg = + case logmsg of + x | "! Package inputenc Error" `BC.isPrefixOf` x -> + "\nTry running pandoc with --latex-engine=xelatex." + _ -> "" + return $ Left $ msg <> "\n" <> extractMsg log' <> extramsg (ExitSuccess, Nothing) -> return $ Left msg (ExitSuccess, Just pdf) -> return $ Right pdf -- cgit v1.2.3 From eb0c0b86ed518982eb5d3336e73ff5cb1d59d87c Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 11 Aug 2013 17:13:46 -0700 Subject: ODT/OpenDocument writer: Minor changes for ODF 1.2 conformance. See #939. We leave the nonconforming contextual-spacing attribute, which is provided by LibreOffice itself and seems to be supported. --- data/reference.odt | Bin 7058 -> 10702 bytes data/templates | 2 +- src/Text/Pandoc/Writers/ODT.hs | 20 ++++++++++++++------ src/Text/Pandoc/Writers/OpenDocument.hs | 3 ++- tests/writer.opendocument | 2 +- 5 files changed, 18 insertions(+), 9 deletions(-) diff --git a/data/reference.odt b/data/reference.odt index 6307119d3..29c1777d7 100644 Binary files a/data/reference.odt and b/data/reference.odt differ diff --git a/data/templates b/data/templates index c27f59c01..0cb55f228 160000 --- a/data/templates +++ b/data/templates @@ -1 +1 @@ -Subproject commit c27f59c010b0468f01b710cdf3a3c04a450a03e7 +Subproject commit 0cb55f2289148b106ab78ce8f15efc8d0b8acda0 diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs index 751a323f5..cc0a06243 100644 --- a/src/Text/Pandoc/Writers/ODT.hs +++ b/src/Text/Pandoc/Writers/ODT.hs @@ -65,26 +65,30 @@ writeODT opts doc@(Pandoc meta _) = do doc' <- walkM (transformPic opts picEntriesRef) doc let newContents = writeOpenDocument opts{writerWrapText = False} doc' epochtime <- floor `fmap` getPOSIXTime - let contentEntry = toEntry "content.xml" epochtime $ fromStringLazy newContents + let contentEntry = toEntry "content.xml" epochtime + $ fromStringLazy newContents picEntries <- readIORef picEntriesRef - let archive = foldr addEntryToArchive refArchive $ contentEntry : picEntries + let archive = foldr addEntryToArchive refArchive + $ contentEntry : picEntries -- construct META-INF/manifest.xml based on archive let toFileEntry fp = case getMimeType fp of Nothing -> empty Just m -> selfClosingTag "manifest:file-entry" [("manifest:media-type", m) ,("manifest:full-path", fp) + ,("manifest:version", "1.2") ] - let files = [ ent | ent <- filesInArchive archive, not ("META-INF" `isPrefixOf` ent) ] + let files = [ ent | ent <- filesInArchive archive, + not ("META-INF" `isPrefixOf` ent) ] let manifestEntry = toEntry "META-INF/manifest.xml" epochtime $ fromStringLazy $ render Nothing $ text "" $$ ( inTags True "manifest:manifest" - [("xmlns:manifest","urn:oasis:names:tc:opendocument:xmlns:manifest:1.0")] + [("xmlns:manifest","urn:oasis:names:tc:opendocument:xmlns:manifest:1.0") + ,("manifest:version","1.2")] $ ( selfClosingTag "manifest:file-entry" [("manifest:media-type","application/vnd.oasis.opendocument.text") - ,("manifest:version","1.2") ,("manifest:full-path","/")] $$ vcat ( map toFileEntry $ files ) ) @@ -107,7 +111,11 @@ writeODT opts doc@(Pandoc meta _) = do ) ) ) - let archive'' = addEntryToArchive metaEntry archive' + -- make sure mimetype is first + let mimetypeEntry = toEntry "mimetype" epochtime + $ fromStringLazy "application/vnd.oasis.opendocument.text" + let archive'' = addEntryToArchive mimetypeEntry + $ addEntryToArchive metaEntry archive' return $ fromArchive archive'' transformPic :: WriterOptions -> IORef [Entry] -> Inline -> IO Inline diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs index 05c576c20..3ec5c2073 100644 --- a/src/Text/Pandoc/Writers/OpenDocument.hs +++ b/src/Text/Pandoc/Writers/OpenDocument.hs @@ -461,7 +461,8 @@ tableStyle :: Int -> [(Char,Double)] -> Doc tableStyle num wcs = let tableId = "Table" ++ show (num + 1) table = inTags True "style:style" - [("style:name", tableId)] $ + [("style:name", tableId) + ,("style:family", "table")] $ selfClosingTag "style:table-properties" [("table:align" , "center")] colStyle (c,0) = selfClosingTag "style:style" diff --git a/tests/writer.opendocument b/tests/writer.opendocument index 9e1661475..1cee01f76 100644 --- a/tests/writer.opendocument +++ b/tests/writer.opendocument @@ -1,5 +1,5 @@ - + -- cgit v1.2.3 From 544d0bbf316e809271cdc64bc2946397c5eff547 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 11 Aug 2013 17:19:22 -0700 Subject: reference.odt: Change generator in meta.xml to Pandoc. --- data/reference.odt | Bin 10702 -> 10595 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/data/reference.odt b/data/reference.odt index 29c1777d7..08385dd4c 100644 Binary files a/data/reference.odt and b/data/reference.odt differ -- cgit v1.2.3 From 3ebdc5b5f0f5bc88f727a36268d55921672899c0 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 12 Aug 2013 16:21:24 -0700 Subject: Text.Pandoc.Compat.Monoid: Small improvements to the (<>) definition. --- src/Text/Pandoc/Compat/Monoid.hs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/Text/Pandoc/Compat/Monoid.hs b/src/Text/Pandoc/Compat/Monoid.hs index 80ffcbbd6..cb7ea2527 100644 --- a/src/Text/Pandoc/Compat/Monoid.hs +++ b/src/Text/Pandoc/Compat/Monoid.hs @@ -11,6 +11,10 @@ import Data.Monoid (mappend, Monoid(..)) #if MIN_VERSION_base(4,5,0) #else +infixr 6 <> + +-- | An infix synonym for 'mappend'. (<>) :: Monoid m => m -> m -> m (<>) = mappend +{-# INLINE (<>) #-} #endif -- cgit v1.2.3 From bd73d73a28acc2863bd52bdc6f0f9d850fa34d84 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 13 Aug 2013 18:25:20 -0700 Subject: Removed `--print-sample-lua-writer`, added `--print-default-data-file`. Closes #943. --- README | 7 +++---- pandoc.hs | 19 ++++++++++--------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/README b/README index e85ca1905..e5de97556 100644 --- a/README +++ b/README @@ -311,9 +311,8 @@ General writer options : Print the default template for an output *FORMAT*. (See `-t` for a list of possible *FORMAT*s.) -`--print-sample-lua-writer` -: Print a sample lua custom writer (see [Custom writers](#custom-writers), - below. +`--print-default-data-file=`*FILE* +: Print a default data file. `--no-wrap` : Disable text wrapping in output. By default, text is wrapped @@ -2759,7 +2758,7 @@ Creating a custom writer requires writing a lua function for each possible element in a pandoc document. To get a documented example which you can modify according to your needs, do - pandoc --print-sample-lua-writer + pandoc --print-default-data-file sample.lua Authors ======= diff --git a/pandoc.hs b/pandoc.hs index 81672e16c..1c0a49a73 100644 --- a/pandoc.hs +++ b/pandoc.hs @@ -33,8 +33,8 @@ module Main where import Text.Pandoc import Text.Pandoc.PDF (makePDF) import Text.Pandoc.Readers.LaTeX (handleIncludes) -import Text.Pandoc.Shared ( tabFilter, readDataFileUTF8, safeRead, - headerShift, normalize, err, warn ) +import Text.Pandoc.Shared ( tabFilter, readDataFileUTF8, readDataFile, + safeRead, headerShift, normalize, err, warn ) import Text.Pandoc.XML ( toEntities, fromEntities ) import Text.Pandoc.SelfContained ( makeSelfContained ) import Text.Pandoc.Process (pipeProcess) @@ -58,6 +58,7 @@ import Data.Foldable (foldrM) import Network.HTTP (simpleHTTP, mkRequest, getResponseBody, RequestMethod(..)) import Network.URI (parseURI, isURI, URI(..)) import qualified Data.ByteString.Lazy as B +import qualified Data.ByteString as BS import Text.CSL.Reference (Reference(..)) import Data.Aeson (eitherDecode', encode) @@ -347,13 +348,13 @@ options = "FORMAT") "" -- "Print default template for FORMAT" - , Option "" ["print-sample-lua-writer"] - (NoArg - (\_ -> do - sample <- readDataFileUTF8 Nothing "sample.lua" - UTF8.hPutStr stdout sample - exitWith ExitSuccess)) - "" -- "Print sample lua custom writer" + , Option "" ["print-default-data-file"] + (ReqArg + (\arg _ -> do + readDataFile Nothing arg >>= BS.hPutStr stdout + exitWith ExitSuccess) + "FILE") + "" -- "Print default data file" , Option "" ["no-wrap"] (NoArg -- cgit v1.2.3 From f6b5735d095dcfe2b0ef4aab02ff85acb47140c6 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 13 Aug 2013 23:11:33 -0700 Subject: Added module for writing python scripts, with several examples. See scripts subdirectory. --- pandoc.hs | 14 ++++++++------ scripts/caps.py | 9 +++++++++ scripts/comments.py | 23 +++++++++++++++++++++++ scripts/deemph.py | 10 ++++++++++ scripts/myemph.py | 11 +++++++++++ scripts/pandoc.py | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 110 insertions(+), 6 deletions(-) create mode 100755 scripts/caps.py create mode 100755 scripts/comments.py create mode 100755 scripts/deemph.py create mode 100755 scripts/myemph.py create mode 100755 scripts/pandoc.py diff --git a/pandoc.hs b/pandoc.hs index 1c0a49a73..0df4cc555 100644 --- a/pandoc.hs +++ b/pandoc.hs @@ -91,14 +91,16 @@ isTextFormat :: String -> Bool isTextFormat s = takeWhile (`notElem` "+-") s `notElem` ["odt","docx","epub","epub3"] externalFilter :: FilePath -> [String] -> Pandoc -> IO Pandoc -externalFilter f args' d = E.catch - (do (exitcode, outbs, errbs) <- pipeProcess Nothing f args' $ encode d +externalFilter f args' d = E.handle filterException $ + do (exitcode, outbs, errbs) <- pipeProcess Nothing f args' $ encode d case exitcode of ExitSuccess -> return $ either error id $ eitherDecode' outbs - ExitFailure _ -> err 83 $ "Error running filter `" ++ UTF8.toStringLazy outbs ++ - UTF8.toStringLazy errbs ++ "'") - (\e -> let _ = (e :: E.SomeException) - in err 83 $ "Error running filter `" ++ f ++ "'") + ExitFailure _ -> err 83 $ "Error running filter " ++ f ++ "\n" ++ + UTF8.toStringLazy outbs ++ + UTF8.toStringLazy errbs + where filterException :: E.SomeException -> IO Pandoc + filterException e = err 83 $ "Error running filter " ++ f ++ + "\n" ++ show e -- | Data structure for command line options. data Opt = Opt diff --git a/scripts/caps.py b/scripts/caps.py new file mode 100755 index 000000000..3ab8bc7a3 --- /dev/null +++ b/scripts/caps.py @@ -0,0 +1,9 @@ +#!/usr/bin/env python +from pandoc import toJSONFilter + +def caps(key, value, format): + if key == 'Str': + return {'Str': value.upper()} + +if __name__ == "__main__": + toJSONFilter(caps) diff --git a/scripts/comments.py b/scripts/comments.py new file mode 100755 index 000000000..3767f973a --- /dev/null +++ b/scripts/comments.py @@ -0,0 +1,23 @@ +#!/usr/bin/env python +from pandoc import toJSONFilter +import re + +incomment = False + +def comment(k,v,format=""): + global incomment + if k == 'RawBlock': + f, s = v + fmt = f['unFormat'] + if fmt == "html": + if re.search("", s): + incomment = True + return [] + elif re.search("", s): + incomment = False + return [] + if incomment: + return [] # suppress anything in a comment + +if __name__ == "__main__": + toJSONFilter(comment) diff --git a/scripts/deemph.py b/scripts/deemph.py new file mode 100755 index 000000000..c1d532969 --- /dev/null +++ b/scripts/deemph.py @@ -0,0 +1,10 @@ +#!/usr/bin/env python +from pandoc import walk, toJSONFilter +from caps import caps + +def deemph(k,v,f): + if k == 'Emph' and f == 'html': + return walk(v,caps,f) + +if __name__ == "__main__": + toJSONFilter(deemph) diff --git a/scripts/myemph.py b/scripts/myemph.py new file mode 100755 index 000000000..92f6202b4 --- /dev/null +++ b/scripts/myemph.py @@ -0,0 +1,11 @@ +#!/usr/bin/env python +from pandoc import toJSONFilter, rawInline + +def myemph(k, v, f): + if k == 'Emph' and f == 'latex': + v.insert(0, rawInline("latex", "\\myemph{")) + v.append(rawInline("latex", "}")) + return v + +if __name__ == "__main__": + toJSONFilter(myemph) diff --git a/scripts/pandoc.py b/scripts/pandoc.py new file mode 100755 index 000000000..8bc3afc03 --- /dev/null +++ b/scripts/pandoc.py @@ -0,0 +1,49 @@ +import sys +import json + +def walk(x, action, format = ""): + if isinstance(x, list): + array = [] + for item in x: + if isinstance(item, dict): + for k in item: + res = action(k, item[k], format) + if res is None: + array.append(walk(item, action, format)) + elif isinstance(res, list): + for z in res: + array.append(walk(z, action, format)) + else: + array.append(walk(res, action, format)) + else: + array.append(walk(item, action, format)) + return array + elif isinstance(x, dict): + obj = {} + for k in x: + obj[k] = walk(x[k], action, format) + return obj + else: + return x + +def toJSONFilter(action): + doc = json.loads(sys.stdin.read()) + if len(sys.argv) > 1: + format = sys.argv[1] + else: + format = "" + altered = walk(doc, action, format) + json.dump(altered, sys.stdout) + +def rawInline(format, s): + return {"RawInline": [{"unFormat": format}, s]} + +def rawBlock(format, s): + return {"RawBlock": [{"unFormat": format}, s]} + +def attributes(attrs): + attrs = attrs or [] + ident = attrs["id"] or "" + classes = attrs["classes"] or [] + keyvals = [x for x in attrs and x != "classes" and x != "id"] + return [ident, classes, keyvals] -- cgit v1.2.3 From 96c2d542977e5cc510f315aad7c2ee357fe988c9 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 14 Aug 2013 11:48:04 -0700 Subject: Commented python modules/sample scripts. --- scripts/caps.py | 5 +++++ scripts/comments.py | 9 ++++++++- scripts/deemph.py | 10 +++++++--- scripts/myemph.py | 9 ++++++--- scripts/pandoc.py | 33 +++++++++++++++++++++++++++++++++ 5 files changed, 59 insertions(+), 7 deletions(-) diff --git a/scripts/caps.py b/scripts/caps.py index 3ab8bc7a3..e29d48854 100755 --- a/scripts/caps.py +++ b/scripts/caps.py @@ -1,4 +1,9 @@ #!/usr/bin/env python + +"""Pandoc filter to convert all regular text to uppercase. +Code, link URLs, etc. are not affected. +""" + from pandoc import toJSONFilter def caps(key, value, format): diff --git a/scripts/comments.py b/scripts/comments.py index 3767f973a..5700c3485 100755 --- a/scripts/comments.py +++ b/scripts/comments.py @@ -2,9 +2,16 @@ from pandoc import toJSONFilter import re +"""Pandoc filter that causes everything between +'' and '' +to be ignored. The comment lines must appear on +lines by themselves, with blank lines surrounding +them. +""" + incomment = False -def comment(k,v,format=""): +def comment(k,v,fmt): global incomment if k == 'RawBlock': f, s = v diff --git a/scripts/deemph.py b/scripts/deemph.py index c1d532969..467641b4a 100755 --- a/scripts/deemph.py +++ b/scripts/deemph.py @@ -2,9 +2,13 @@ from pandoc import walk, toJSONFilter from caps import caps -def deemph(k,v,f): - if k == 'Emph' and f == 'html': - return walk(v,caps,f) +"""Pandoc filter that causes emphasized text to be displayed +in ALL CAPS. +""" + +def deemph(key, val, fmt): + if key == 'Emph': + return walk(val, caps, fmt) if __name__ == "__main__": toJSONFilter(deemph) diff --git a/scripts/myemph.py b/scripts/myemph.py index 92f6202b4..0514df317 100755 --- a/scripts/myemph.py +++ b/scripts/myemph.py @@ -1,11 +1,14 @@ #!/usr/bin/env python from pandoc import toJSONFilter, rawInline +"""Pandoc filter that causes emphasis to be rendered using +the custom macro '\myemph{...}' rather than '\emph{...}' +in latex. Other output formats are unaffected. +""" + def myemph(k, v, f): if k == 'Emph' and f == 'latex': - v.insert(0, rawInline("latex", "\\myemph{")) - v.append(rawInline("latex", "}")) - return v + return [rawInline("latex", "\\myemph{")] + v + [rawInline("latex","}")] if __name__ == "__main__": toJSONFilter(myemph) diff --git a/scripts/pandoc.py b/scripts/pandoc.py index 8bc3afc03..6d7f84b24 100755 --- a/scripts/pandoc.py +++ b/scripts/pandoc.py @@ -1,7 +1,19 @@ +# Author: John MacFarlane +# Copyright: (C) 2013 John MacFarlane +# License: GPL version 2 or higher + +""" +Functions to aid writing python scripts that process the pandoc +AST serialized as JSON. +""" + import sys import json def walk(x, action, format = ""): + """Walk a tree, applying an action to every object. + Returns a modified tree. + """ if isinstance(x, list): array = [] for item in x: @@ -27,6 +39,20 @@ def walk(x, action, format = ""): return x def toJSONFilter(action): + """Converts an action into a filter that reads a JSON-formatted + pandoc document from stdin, transforms it by walking the tree + with the action, and returns a new JSON-formatted pandoc document + to stdout. The argument is a function action(key, value, format), + where key is the type of the pandoc object (e.g. 'Str', 'Para'), + value is the contents of the object (e.g. a string for 'Str', + a list of inline elements for 'Para'), and format is the target + output format (which will be taken for the first command line + argument if present). If the function returns None, the object + to which it applies will remain unchanged. If it returns an + object, the object will be replaced. If it returns a list, the + list will be spliced in to the list to which the target object + belongs. (So, returning an empty list deletes the object.) + """ doc = json.loads(sys.stdin.read()) if len(sys.argv) > 1: format = sys.argv[1] @@ -36,12 +62,19 @@ def toJSONFilter(action): json.dump(altered, sys.stdout) def rawInline(format, s): + """Returns a 'RawInline' inline object. + """ return {"RawInline": [{"unFormat": format}, s]} def rawBlock(format, s): + """Returns a 'RawBlock' inline object. + """ return {"RawBlock": [{"unFormat": format}, s]} def attributes(attrs): + """Returns an attribute list, constructed from the + dictionary attrs. + """ attrs = attrs or [] ident = attrs["id"] or "" classes = attrs["classes"] or [] -- cgit v1.2.3 From bc3b5f99d6a7437e401f4155e111089341e5a18d Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 14 Aug 2013 12:46:48 -0700 Subject: Added graphviz.py example script. --- scripts/graphviz.py | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100755 scripts/graphviz.py diff --git a/scripts/graphviz.py b/scripts/graphviz.py new file mode 100755 index 000000000..ec31578ab --- /dev/null +++ b/scripts/graphviz.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python + +"""Pandoc filter to process code blocks with class "graphviz" into +graphviz-generated images. +""" + +import pygraphviz +import hashlib +import os +import sys +from pandoc import toJSONFilter + +def sha1(x): + return hashlib.sha1(x).hexdigest() + +imagedir = "graphviz-images" +files = [] + +def graphviz(key, value, format): + if key == 'CodeBlock': + [[ident,classes,keyvals], code] = value + caption = "caption" + if "graphviz" in classes: + G = pygraphviz.AGraph(string = code) + G.layout() + filename = sha1(code) + if format == "html": + filetype = "png" + elif format == "latex": + filetype = "pdf" + else: + filetype = "png" + alt = [{'Str': caption}] + src = imagedir + '/' + filename + '.' + filetype + if not src in files: + try: + os.mkdir(imagedir) + sys.stderr.write('Created directory ' + imagedir) + except OSError: + pass + G.draw(src) + sys.stderr.write('Created image ' + src) + tit = "" + return {'Para': [{'Image': [alt, [src,tit]]}]} + +if __name__ == "__main__": + toJSONFilter(graphviz) -- cgit v1.2.3 From 3ebecad36287e41f327de4898902c96cce4b18fe Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 14 Aug 2013 13:02:33 -0700 Subject: Improved graphviz.py. It won't regenerate images that have already been built. --- scripts/graphviz.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/scripts/graphviz.py b/scripts/graphviz.py index ec31578ab..2d977079b 100755 --- a/scripts/graphviz.py +++ b/scripts/graphviz.py @@ -14,7 +14,6 @@ def sha1(x): return hashlib.sha1(x).hexdigest() imagedir = "graphviz-images" -files = [] def graphviz(key, value, format): if key == 'CodeBlock': @@ -32,14 +31,14 @@ def graphviz(key, value, format): filetype = "png" alt = [{'Str': caption}] src = imagedir + '/' + filename + '.' + filetype - if not src in files: + if not os.path.isfile(src): try: os.mkdir(imagedir) - sys.stderr.write('Created directory ' + imagedir) + sys.stderr.write('Created directory ' + imagedir + '\n') except OSError: pass G.draw(src) - sys.stderr.write('Created image ' + src) + sys.stderr.write('Created image ' + src + '\n') tit = "" return {'Para': [{'Image': [alt, [src,tit]]}]} -- cgit v1.2.3 From 8926230175e1d22199505f52914f4d5767e27f62 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 14 Aug 2013 13:02:54 -0700 Subject: Print stderr output of filters to stderr. --- pandoc.hs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandoc.hs b/pandoc.hs index 0df4cc555..a16ffbc3c 100644 --- a/pandoc.hs +++ b/pandoc.hs @@ -47,7 +47,7 @@ import System.Console.GetOpt import Data.Char ( toLower ) import Data.List ( intercalate, isPrefixOf, sort ) import System.Directory ( getAppUserDataDirectory, doesFileExist, findExecutable ) -import System.IO ( stdout ) +import System.IO ( stdout, stderr ) import System.IO.Error ( isDoesNotExistError ) import qualified Control.Exception as E import Control.Exception.Extensible ( throwIO ) @@ -93,11 +93,11 @@ isTextFormat s = takeWhile (`notElem` "+-") s `notElem` ["odt","docx","epub","ep externalFilter :: FilePath -> [String] -> Pandoc -> IO Pandoc externalFilter f args' d = E.handle filterException $ do (exitcode, outbs, errbs) <- pipeProcess Nothing f args' $ encode d + when (not $ B.null errbs) $ B.hPutStr stderr errbs case exitcode of ExitSuccess -> return $ either error id $ eitherDecode' outbs ExitFailure _ -> err 83 $ "Error running filter " ++ f ++ "\n" ++ - UTF8.toStringLazy outbs ++ - UTF8.toStringLazy errbs + UTF8.toStringLazy outbs where filterException :: E.SomeException -> IO Pandoc filterException e = err 83 $ "Error running filter " ++ f ++ "\n" ++ show e -- cgit v1.2.3 From 5d2afd389478667d0c44ddf584bd93b9377e927e Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 14 Aug 2013 22:19:39 -0700 Subject: Added scripts/abc.py. --- scripts/abc.py | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100755 scripts/abc.py diff --git a/scripts/abc.py b/scripts/abc.py new file mode 100755 index 000000000..9c80fd6db --- /dev/null +++ b/scripts/abc.py @@ -0,0 +1,50 @@ +#!/usr/bin/env python + +""" +Pandoc filter to process code blocks with class "abc" containing +ABC notation into images. Assumes that abcm2ps and ImageMagick's +convert are in the path. Images are put in the abc-images directory. +""" + +import hashlib +import os +import sys +from pandoc import toJSONFilter +from subprocess import Popen, PIPE, call + +imagedir = "abc-images" + +def sha1(x): + return hashlib.sha1(x).hexdigest() + +def abc2eps(abc, filetype, outfile): + p = Popen(["abcm2ps", "-O", outfile + '.eps', "-"],stdin=PIPE) + p.stdin.write(abc) + p.communicate() + p.stdin.close() + p = call(["convert", outfile + '.eps', outfile + '.' + filetype]) + +def abc(key, value, format): + if key == 'CodeBlock': + [[ident,classes,keyvals], code] = value + if "abc" in classes: + outfile = imagedir + '/' + sha1(code) + if format == "html": + filetype = "png" + elif format == "latex": + filetype = "pdf" + else: + filetype = "png" + src = outfile + '.' + filetype + if not os.path.isfile(src): + try: + os.mkdir(imagedir) + sys.stderr.write('Created directory ' + imagedir + '\n') + except OSError: + pass + abc2eps(code, filetype, outfile) + sys.stderr.write('Created image ' + src + '\n') + return {'Para': [{'Image': [[], [src,""]]}]} + +if __name__ == "__main__": + toJSONFilter(abc) -- cgit v1.2.3 From b95823f4db6fb9943790bbffd693ca9756ca954e Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 14 Aug 2013 22:20:41 -0700 Subject: Minor code reformatting in script examples. --- scripts/caps.py | 3 ++- scripts/comments.py | 3 ++- scripts/deemph.py | 3 ++- scripts/graphviz.py | 3 ++- scripts/myemph.py | 3 ++- 5 files changed, 10 insertions(+), 5 deletions(-) diff --git a/scripts/caps.py b/scripts/caps.py index e29d48854..b86cd1520 100755 --- a/scripts/caps.py +++ b/scripts/caps.py @@ -1,6 +1,7 @@ #!/usr/bin/env python -"""Pandoc filter to convert all regular text to uppercase. +""" +Pandoc filter to convert all regular text to uppercase. Code, link URLs, etc. are not affected. """ diff --git a/scripts/comments.py b/scripts/comments.py index 5700c3485..304af1a2d 100755 --- a/scripts/comments.py +++ b/scripts/comments.py @@ -2,7 +2,8 @@ from pandoc import toJSONFilter import re -"""Pandoc filter that causes everything between +""" +Pandoc filter that causes everything between '' and '' to be ignored. The comment lines must appear on lines by themselves, with blank lines surrounding diff --git a/scripts/deemph.py b/scripts/deemph.py index 467641b4a..f69dac5b8 100755 --- a/scripts/deemph.py +++ b/scripts/deemph.py @@ -2,7 +2,8 @@ from pandoc import walk, toJSONFilter from caps import caps -"""Pandoc filter that causes emphasized text to be displayed +""" +Pandoc filter that causes emphasized text to be displayed in ALL CAPS. """ diff --git a/scripts/graphviz.py b/scripts/graphviz.py index 2d977079b..519a3a9cc 100755 --- a/scripts/graphviz.py +++ b/scripts/graphviz.py @@ -1,6 +1,7 @@ #!/usr/bin/env python -"""Pandoc filter to process code blocks with class "graphviz" into +""" +Pandoc filter to process code blocks with class "graphviz" into graphviz-generated images. """ diff --git a/scripts/myemph.py b/scripts/myemph.py index 0514df317..e527a0b2e 100755 --- a/scripts/myemph.py +++ b/scripts/myemph.py @@ -1,7 +1,8 @@ #!/usr/bin/env python from pandoc import toJSONFilter, rawInline -"""Pandoc filter that causes emphasis to be rendered using +""" +Pandoc filter that causes emphasis to be rendered using the custom macro '\myemph{...}' rather than '\emph{...}' in latex. Other output formats are unaffected. """ -- cgit v1.2.3 From f24c574a1ad931ac64a90ca74be70b4a65d01f08 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 14 Aug 2013 22:51:55 -0700 Subject: Added scripts/tikz.py. --- scripts/tikz.py | 67 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) create mode 100755 scripts/tikz.py diff --git a/scripts/tikz.py b/scripts/tikz.py new file mode 100755 index 000000000..7e1ed7927 --- /dev/null +++ b/scripts/tikz.py @@ -0,0 +1,67 @@ +#!/usr/bin/env python + +""" +Pandoc filter to process raw latex tikz environments into images. +Assumes that pdflatex is in the path, and that the standalone +package is available. Also assumes that ImageMagick's convert +is in the path. Images are put in the tikz-images directory. +""" + +import hashlib +import re +import os +import sys +import shutil +from pandoc import toJSONFilter +from subprocess import Popen, PIPE, call +from tempfile import mkdtemp + +imagedir = "tikz-images" + +def sha1(x): + return hashlib.sha1(x).hexdigest() + +def tikz2image(tikz, filetype, outfile): + tmpdir = mkdtemp() + olddir = os.getcwd() + os.chdir(tmpdir) + f = open('tikz.tex', 'w') + f.write("""\\documentclass{standalone} + \\usepackage{tikz} + \\begin{document} + """) + f.write(tikz) + f.write("\n\\end{document}\n") + f.close() + p = call(["pdflatex", 'tikz.tex'], stdout=sys.stderr) + os.chdir(olddir) + if filetype == 'pdf': + shutil.copyfile(tmpdir + '/tikz.pdf', outfile + '.pdf') + else: + call(["convert", tmpdir + '/tikz.pdf', outfile + '.' + filetype]) + shutil.rmtree(tmpdir) + +def tikz(key, value, format): + if key == 'RawBlock': + [fmt, code] = value + if fmt['unFormat'] == "latex" and re.match("\\\\begin{tikzpicture}", code): + outfile = imagedir + '/' + sha1(code) + if format == "html": + filetype = "png" + elif format == "latex": + filetype = "pdf" + else: + filetype = "png" + src = outfile + '.' + filetype + if not os.path.isfile(src): + try: + os.mkdir(imagedir) + sys.stderr.write('Created directory ' + imagedir + '\n') + except OSError: + pass + tikz2image(code, filetype, outfile) + sys.stderr.write('Created image ' + src + '\n') + return {'Para': [{'Image': [[], [src,""]]}]} + +if __name__ == "__main__": + toJSONFilter(tikz) -- cgit v1.2.3 From f5426b4905d14a8fea3ae7258ee3c19fc22d7391 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 14 Aug 2013 22:52:06 -0700 Subject: Small code cleanup. --- scripts/abc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/abc.py b/scripts/abc.py index 9c80fd6db..daecd1070 100755 --- a/scripts/abc.py +++ b/scripts/abc.py @@ -22,7 +22,7 @@ def abc2eps(abc, filetype, outfile): p.stdin.write(abc) p.communicate() p.stdin.close() - p = call(["convert", outfile + '.eps', outfile + '.' + filetype]) + call(["convert", outfile + '.eps', outfile + '.' + filetype]) def abc(key, value, format): if key == 'CodeBlock': -- cgit v1.2.3 From 2a6e6324a6db7151a4bb103227893af47d61f84d Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 14 Aug 2013 23:24:27 -0700 Subject: pandoc.py: Fixed bug for {}, removed rawInline, rawBlock. --- scripts/pandoc.py | 31 ++++++++++++------------------- 1 file changed, 12 insertions(+), 19 deletions(-) diff --git a/scripts/pandoc.py b/scripts/pandoc.py index 6d7f84b24..f21e9cc83 100755 --- a/scripts/pandoc.py +++ b/scripts/pandoc.py @@ -18,15 +18,18 @@ def walk(x, action, format = ""): array = [] for item in x: if isinstance(item, dict): - for k in item: - res = action(k, item[k], format) - if res is None: - array.append(walk(item, action, format)) - elif isinstance(res, list): - for z in res: - array.append(walk(z, action, format)) - else: - array.append(walk(res, action, format)) + if item == {}: + array.append(walk(item, action, format)) + else: + for k in item: + res = action(k, item[k], format) + if res is None: + array.append(walk(item, action, format)) + elif isinstance(res, list): + for z in res: + array.append(walk(z, action, format)) + else: + array.append(walk(res, action, format)) else: array.append(walk(item, action, format)) return array @@ -61,16 +64,6 @@ def toJSONFilter(action): altered = walk(doc, action, format) json.dump(altered, sys.stdout) -def rawInline(format, s): - """Returns a 'RawInline' inline object. - """ - return {"RawInline": [{"unFormat": format}, s]} - -def rawBlock(format, s): - """Returns a 'RawBlock' inline object. - """ - return {"RawBlock": [{"unFormat": format}, s]} - def attributes(attrs): """Returns an attribute list, constructed from the dictionary attrs. -- cgit v1.2.3 From 3e8bd8aa15a57c3dc87772049aabedeb1e0c7582 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 14 Aug 2013 23:24:45 -0700 Subject: Updated for removed unMeta, unFormat in pandoc-types. --- pandoc.cabal | 9 +++++++++ scripts/comments.py | 3 +-- scripts/myemph.py | 7 +++++-- scripts/tikz.py | 2 +- src/Text/Pandoc/Writers/RST.hs | 4 ++-- tests/docbook-reader.native | 2 +- tests/haddock-reader.native | 2 +- tests/html-reader.native | 2 +- tests/latex-reader.native | 8 ++++---- tests/markdown-reader-more.native | 10 +++++----- tests/mediawiki-reader.native | 22 +++++++++++----------- tests/opml-reader.native | 2 +- tests/rst-reader.native | 8 ++++---- tests/s5.native | 2 +- tests/testsuite.native | 38 +++++++++++++++++++------------------- tests/textile-reader.native | 18 +++++++++--------- tests/writer.native | 38 +++++++++++++++++++------------------- 17 files changed, 94 insertions(+), 83 deletions(-) diff --git a/pandoc.cabal b/pandoc.cabal index e22908918..352da4988 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -112,6 +112,15 @@ Extra-Source-Files: -- generated man pages (produced post-build) man/man1/pandoc.1, man/man5/pandoc_markdown.5, + -- python library and sample python scripts + scripts/abc.py, + scripts/comments.py, + scripts/graphviz.py, + scripts/pandoc.py, + scripts/caps.py, + scripts/deemph.py, + scripts/myemph.py, + scripts/tikz.py, -- tests tests/bodybg.gif, tests/docbook-reader.docbook diff --git a/scripts/comments.py b/scripts/comments.py index 304af1a2d..ded21039c 100755 --- a/scripts/comments.py +++ b/scripts/comments.py @@ -15,8 +15,7 @@ incomment = False def comment(k,v,fmt): global incomment if k == 'RawBlock': - f, s = v - fmt = f['unFormat'] + fmt, s = v if fmt == "html": if re.search("", s): incomment = True diff --git a/scripts/myemph.py b/scripts/myemph.py index e527a0b2e..2a322b385 100755 --- a/scripts/myemph.py +++ b/scripts/myemph.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -from pandoc import toJSONFilter, rawInline +from pandoc import toJSONFilter """ Pandoc filter that causes emphasis to be rendered using @@ -7,9 +7,12 @@ the custom macro '\myemph{...}' rather than '\emph{...}' in latex. Other output formats are unaffected. """ +def latex(s): + return {'RawInline': ['latex', s]} + def myemph(k, v, f): if k == 'Emph' and f == 'latex': - return [rawInline("latex", "\\myemph{")] + v + [rawInline("latex","}")] + return [latex('\\myemph{')] + v + [latex('}')] if __name__ == "__main__": toJSONFilter(myemph) diff --git a/scripts/tikz.py b/scripts/tikz.py index 7e1ed7927..4ff8b2383 100755 --- a/scripts/tikz.py +++ b/scripts/tikz.py @@ -44,7 +44,7 @@ def tikz2image(tikz, filetype, outfile): def tikz(key, value, format): if key == 'RawBlock': [fmt, code] = value - if fmt['unFormat'] == "latex" and re.match("\\\\begin{tikzpicture}", code): + if fmt == "latex" and re.match("\\\\begin{tikzpicture}", code): outfile = imagedir + '/' + sha1(code) if format == "html": filetype = "png" diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs index 5fbbb6afc..557658bc8 100644 --- a/src/Text/Pandoc/Writers/RST.hs +++ b/src/Text/Pandoc/Writers/RST.hs @@ -176,10 +176,10 @@ blockToRST (Para inlines) | otherwise = do contents <- inlineListToRST inlines return $ contents <> blankline -blockToRST (RawBlock f str) +blockToRST (RawBlock f@(Format f') str) | f == "rst" = return $ text str | otherwise = return $ blankline <> ".. raw:: " <> - text (map toLower $ unFormat f) $+$ + text (map toLower f') $+$ (nest 3 $ text str) $$ blankline blockToRST HorizontalRule = return $ blankline $$ "--------------" $$ blankline diff --git a/tests/docbook-reader.native b/tests/docbook-reader.native index 2d29bb154..8c94fea3e 100644 --- a/tests/docbook-reader.native +++ b/tests/docbook-reader.native @@ -1,4 +1,4 @@ -Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "John",Space,Str "MacFarlane"],MetaInlines [Str "Anonymous"]]),("date",MetaInlines [Str "July",Space,Str "17,",Space,Str "2006"]),("title",MetaInlines [Str "Pandoc",Space,Str "Test",Space,Str "Suite"])]}) +Pandoc (Meta (fromList [("author",MetaList [MetaInlines [Str "John",Space,Str "MacFarlane"],MetaInlines [Str "Anonymous"]]),("date",MetaInlines [Str "July",Space,Str "17,",Space,Str "2006"]),("title",MetaInlines [Str "Pandoc",Space,Str "Test",Space,Str "Suite"])])) [Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "set",Space,Str "of",Space,Str "tests",Space,Str "for",Space,Str "pandoc.",Space,Str "Most",Space,Str "of",Space,Str "them",Space,Str "are",Space,Str "adapted",Space,Str "from",Space,Str "John",Space,Str "Gruber\8217s",Space,Str "markdown",Space,Str "test",Space,Str "suite."] ,Header 1 ("",[],[]) [Str "Headers"] ,Header 2 ("",[],[]) [Str "Level",Space,Str "2",Space,Str "with",Space,Str "an",Space,Link [Str "embedded",Space,Str "link"] ("/url","")] diff --git a/tests/haddock-reader.native b/tests/haddock-reader.native index 877719b50..c17c2ddf0 100644 --- a/tests/haddock-reader.native +++ b/tests/haddock-reader.native @@ -1,4 +1,4 @@ -Pandoc (Meta {unMeta = fromList []}) +Pandoc (Meta (fromList [])) [Para [Str "This",Space,Str "file",Space,Str "tests",Space,Str "the",Space,Str "Pandoc",Space,Str "reader",Space,Str "for",Space,Str "Haddock.",Space,Str "We've",Space,Str "borrowed",Space,Str "examples",Space,Str "from",Space,Str "Haddock's",Space,Str "documentation:",Space,Link [Str "http://www.haskell.org/haddock/doc/html/ch03s08.html"] ("http://www.haskell.org/haddock/doc/html/ch03s08.html","http://www.haskell.org/haddock/doc/html/ch03s08.html"),Str "."] ,Para [Str "The",Space,Str "following",Space,Str "characters",Space,Str "have",Space,Str "special",Space,Str "meanings",Space,Str "in",Space,Str "Haddock,",Space,Str "/,",Space,Str "',",Space,Str "`,",Space,Str "\",",Space,Str "@,",Space,Str "<,",Space,Str "so",Space,Str "they",Space,Str "must",Space,Str "be",Space,Str "escaped."] ,Para [Str "*",Space,Str "This",Space,Str "is",Space,Str "a",Space,Str "paragraph,",Space,Str "not",Space,Str "a",Space,Str "list",Space,Str "item.",Space,Str ">",Space,Str "This",Space,Str "sentence",Space,Str "is",Space,Str "not",Space,Str "code.",Space,Str ">>>",Space,Str "This",Space,Str "is",Space,Str "not",Space,Str "an",Space,Str "example."] diff --git a/tests/html-reader.native b/tests/html-reader.native index 15937e594..8f60f040e 100644 --- a/tests/html-reader.native +++ b/tests/html-reader.native @@ -1,4 +1,4 @@ -Pandoc (Meta {unMeta = fromList [("title",MetaInlines [Str "Pandoc",Space,Str "Test",Space,Str "Suite"])]}) +Pandoc (Meta (fromList [("title",MetaInlines [Str "Pandoc",Space,Str "Test",Space,Str "Suite"])])) [Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "set",Space,Str "of",Space,Str "tests",Space,Str "for",Space,Str "pandoc",Str ".",Space,Str "Most",Space,Str "of",Space,Str "them",Space,Str "are",Space,Str "adapted",Space,Str "from",Space,Str "John",Space,Str "Gruber",Str "'",Str "s",Space,Str "markdown",Space,Str "test",Space,Str "suite",Str "."] ,HorizontalRule ,Header 1 ("",[],[]) [Str "Headers"] diff --git a/tests/latex-reader.native b/tests/latex-reader.native index 504e8b701..ddee17f9e 100644 --- a/tests/latex-reader.native +++ b/tests/latex-reader.native @@ -1,5 +1,5 @@ -Pandoc (Meta {unMeta = fromList [("authors",MetaList [MetaInlines [Str "John",Space,Str "MacFarlane"],MetaInlines [Str "Anonymous"]]),("date",MetaInlines [Str "July",Space,Str "17,",Space,Str "2006"]),("title",MetaInlines [Str "Pandoc",Space,Str "Test",Space,Str "Suite"])]}) -[RawBlock (Format {unFormat = "latex"}) "\\maketitle" +Pandoc (Meta (fromList [("authors",MetaList [MetaInlines [Str "John",Space,Str "MacFarlane"],MetaInlines [Str "Anonymous"]]),("date",MetaInlines [Str "July",Space,Str "17,",Space,Str "2006"]),("title",MetaInlines [Str "Pandoc",Space,Str "Test",Space,Str "Suite"])])) +[RawBlock (Format "latex") "\\maketitle" ,Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "set",Space,Str "of",Space,Str "tests",Space,Str "for",Space,Str "pandoc.",Space,Str "Most",Space,Str "of",Space,Str "them",Space,Str "are",Space,Str "adapted",Space,Str "from",Space,Str "John",Space,Str "Gruber\8217s",Space,Str "markdown",Space,Str "test",Space,Str "suite."] ,HorizontalRule ,Header 1 ("",[],[]) [Str "Headers"] @@ -260,8 +260,8 @@ Pandoc (Meta {unMeta = fromList [("authors",MetaList [MetaInlines [Str "John",Sp ,HorizontalRule ,Header 1 ("",[],[]) [Str "LaTeX"] ,BulletList - [[Para [Cite [Citation {citationId = "smith.1899", citationPrefix = [], citationSuffix = [Str "22-23"], citationMode = AuthorInText, citationNoteNum = 0, citationHash = 0}] [RawInline (Format {unFormat = "latex"}) "\\cite[22-23]{smith.1899}"]]] - ,[Para [RawInline (Format {unFormat = "latex"}) "\\doublespacing"]] + [[Para [Cite [Citation {citationId = "smith.1899", citationPrefix = [], citationSuffix = [Str "22-23"], citationMode = AuthorInText, citationNoteNum = 0, citationHash = 0}] [RawInline (Format "latex") "\\cite[22-23]{smith.1899}"]]] + ,[Para [RawInline (Format "latex") "\\doublespacing"]] ,[Para [Math InlineMath "2+2=4"]] ,[Para [Math InlineMath "x \\in y"]] ,[Para [Math InlineMath "\\alpha \\wedge \\omega"]] diff --git a/tests/markdown-reader-more.native b/tests/markdown-reader-more.native index c88c0ed67..ca588571f 100644 --- a/tests/markdown-reader-more.native +++ b/tests/markdown-reader-more.native @@ -2,9 +2,9 @@ ,Header 2 ("blank-line-before-url-in-link-reference",[],[]) [Str "Blank",Space,Str "line",Space,Str "before",Space,Str "URL",Space,Str "in",Space,Str "link",Space,Str "reference"] ,Para [Link [Str "foo"] ("/url",""),Space,Str "and",Space,Link [Str "bar"] ("/url","title")] ,Header 2 ("raw-context-environments",[],[]) [Str "Raw",Space,Str "ConTeXt",Space,Str "environments"] -,Plain [RawInline (Format {unFormat = "tex"}) "\\placeformula "] -,RawBlock (Format {unFormat = "context"}) "\\startformula\n L_{1} = L_{2}\n \\stopformula" -,RawBlock (Format {unFormat = "context"}) "\\start[a2]\n\\start[a2]\n\\stop[a2]\n\\stop[a2]" +,Plain [RawInline (Format "tex") "\\placeformula "] +,RawBlock (Format "context") "\\startformula\n L_{1} = L_{2}\n \\stopformula" +,RawBlock (Format "context") "\\start[a2]\n\\start[a2]\n\\stop[a2]\n\\stop[a2]" ,Header 2 ("urls-with-spaces",[],[]) [Str "URLs",Space,Str "with",Space,Str "spaces"] ,Para [Link [Str "foo"] ("/bar%20and%20baz",""),Space,Link [Str "foo"] ("/bar%20and%20baz",""),Space,Link [Str "foo"] ("/bar%20and%20baz",""),Space,Link [Str "foo"] ("bar%20baz","title")] ,Para [Link [Str "baz"] ("/foo%20foo",""),Space,Link [Str "bam"] ("/foo%20fee",""),Space,Link [Str "bork"] ("/foo/zee%20zob","title")] @@ -12,13 +12,13 @@ ,HorizontalRule ,HorizontalRule ,Header 2 ("raw-html-before-header",[],[]) [Str "Raw",Space,Str "HTML",Space,Str "before",Space,Str "header"] -,Para [RawInline (Format {unFormat = "html"}) "",RawInline (Format {unFormat = "html"}) ""] +,Para [RawInline (Format "html") "",RawInline (Format "html") ""] ,Header 3 ("my-header",[],[]) [Str "my",Space,Str "header"] ,Header 2 ("in-math",[],[]) [Str "$",Space,Str "in",Space,Str "math"] ,Para [Math InlineMath "\\$2 + \\$3"] ,Header 2 ("commented-out-list-item",[],[]) [Str "Commented-out",Space,Str "list",Space,Str "item"] ,BulletList - [[Plain [Str "one",Space,RawInline (Format {unFormat = "html"}) ""]] + [[Plain [Str "one",Space,RawInline (Format "html") ""]] ,[Plain [Str "three"]]] ,Header 2 ("backslash-newline",[],[]) [Str "Backslash",Space,Str "newline"] ,Para [Str "hi",LineBreak,Str "there"] diff --git a/tests/mediawiki-reader.native b/tests/mediawiki-reader.native index f6e09e45a..81596c7d7 100644 --- a/tests/mediawiki-reader.native +++ b/tests/mediawiki-reader.native @@ -1,4 +1,4 @@ -Pandoc (Meta {unMeta = fromList []}) +Pandoc (Meta (fromList [])) [Header 1 ("",[],[]) [Str "header"] ,Header 2 ("",[],[]) [Str "header",Space,Str "level",Space,Str "two"] ,Header 3 ("",[],[]) [Str "header",Space,Str "level",Space,Str "3"] @@ -51,11 +51,11 @@ Pandoc (Meta {unMeta = fromList []}) ,Para [Str "bud"] ,Para [Str "another"] ,Header 2 ("",[],[]) [Str "raw",Space,Str "html"] -,Para [Str "hi",Space,RawInline (Format {unFormat = "html"}) "",Emph [Str "there"],RawInline (Format {unFormat = "html"}) "",Str "."] -,Para [RawInline (Format {unFormat = "html"}) "",Str "inserted",RawInline (Format {unFormat = "html"}) ""] -,RawBlock (Format {unFormat = "html"}) "
" +,Para [Str "hi",Space,RawInline (Format "html") "",Emph [Str "there"],RawInline (Format "html") "",Str "."] +,Para [RawInline (Format "html") "",Str "inserted",RawInline (Format "html") ""] +,RawBlock (Format "html") "
" ,Para [Str "hi",Space,Emph [Str "there"]] -,RawBlock (Format {unFormat = "html"}) "
" +,RawBlock (Format "html") "
" ,Header 2 ("",[],[]) [Str "sup,",Space,Str "sub,",Space,Str "del"] ,Para [Str "H",Subscript [Str "2"],Str "O",Space,Str "base",Superscript [Emph [Str "exponent"]],Space,Strikeout [Str "hello"]] ,Header 2 ("",[],[]) [Str "inline",Space,Str "code"] @@ -140,7 +140,7 @@ Pandoc (Meta {unMeta = fromList []}) ,[Plain [Str "this",Space,Str "looks",Space,Str "like",Space,Str "a",Space,Str "continuation"]] ,[Plain [Str "and",Space,Str "is",Space,Str "often",Space,Str "used"]] ,[Plain [Str "instead",LineBreak,Str "of",Space,Str "
"]]])]] - ,[Plain [RawInline (Format {unFormat = "mediawiki"}) "{{{template\n|author=John\n|title=My Book\n}}}"] + ,[Plain [RawInline (Format "mediawiki") "{{{template\n|author=John\n|title=My Book\n}}}"] ,OrderedList (1,DefaultStyle,DefaultDelim) [[Plain [Str "five",Space,Str "sub",Space,Str "1"] ,OrderedList (1,DefaultStyle,DefaultDelim) @@ -168,16 +168,16 @@ Pandoc (Meta {unMeta = fromList []}) ,Para [Code ("",[],[]) "\160hell\160\160\160\160\160\160yeah"] ,Para [Code ("",[],[]) "Start\160with\160a\160space\160in\160the\160first\160column,",LineBreak,Code ("",[],[]) "(before\160the\160).",LineBreak,Code ("",[],[]) "",LineBreak,Code ("",[],[]) "Then\160your\160block\160format\160will\160be",LineBreak,Code ("",[],[]) "\160\160\160\160maintained.",LineBreak,Code ("",[],[]) "",LineBreak,Code ("",[],[]) "This\160is\160good\160for\160copying\160in\160code\160blocks:",LineBreak,Code ("",[],[]) "",LineBreak,Code ("",[],[]) "def\160function():",LineBreak,Code ("",[],[]) "\160\160\160\160\"\"\"documentation\160string\"\"\"",LineBreak,Code ("",[],[]) "",LineBreak,Code ("",[],[]) "\160\160\160\160if\160True:",LineBreak,Code ("",[],[]) "\160\160\160\160\160\160\160\160print\160True",LineBreak,Code ("",[],[]) "\160\160\160\160else:",LineBreak,Code ("",[],[]) "\160\160\160\160\160\160\160\160print\160False"] ,Para [Str "Not"] -,RawBlock (Format {unFormat = "html"}) "
" +,RawBlock (Format "html") "
" ,Para [Str "preformatted"] ,Para [Str "Don't",Space,Str "need"] ,Para [Code ("",[],[]) "a\160blank\160line"] ,Para [Str "around",Space,Str "a",Space,Str "preformatted",Space,Str "block."] ,Header 2 ("",[],[]) [Str "templates"] -,RawBlock (Format {unFormat = "mediawiki"}) "{{Welcome}}" -,RawBlock (Format {unFormat = "mediawiki"}) "{{Foo:Bar}}" -,RawBlock (Format {unFormat = "mediawiki"}) "{{Thankyou|all your effort|Me}}" -,Para [Str "Written",Space,RawInline (Format {unFormat = "mediawiki"}) "{{{date}}}",Space,Str "by",Space,RawInline (Format {unFormat = "mediawiki"}) "{{{name}}}",Str "."] +,RawBlock (Format "mediawiki") "{{Welcome}}" +,RawBlock (Format "mediawiki") "{{Foo:Bar}}" +,RawBlock (Format "mediawiki") "{{Thankyou|all your effort|Me}}" +,Para [Str "Written",Space,RawInline (Format "mediawiki") "{{{date}}}",Space,Str "by",Space,RawInline (Format "mediawiki") "{{{name}}}",Str "."] ,Header 2 ("",[],[]) [Str "tables"] ,Table [] [AlignDefault,AlignDefault] [0.0,0.0] [[] diff --git a/tests/opml-reader.native b/tests/opml-reader.native index e71857680..237a16719 100644 --- a/tests/opml-reader.native +++ b/tests/opml-reader.native @@ -1,4 +1,4 @@ -Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "Dave",Space,Str "Winer"]]),("date",MetaInlines [Str "Thu,",Space,Str "14",Space,Str "Jul",Space,Str "2005",Space,Str "23:41:05",Space,Str "GMT"]),("title",MetaInlines [Str "States"])]}) +Pandoc (Meta (fromList [("author",MetaList [MetaInlines [Str "Dave",Space,Str "Winer"]]),("date",MetaInlines [Str "Thu,",Space,Str "14",Space,Str "Jul",Space,Str "2005",Space,Str "23:41:05",Space,Str "GMT"]),("title",MetaInlines [Str "States"])])) [Header 1 ("",[],[]) [Str "United",Space,Str "States"] ,Header 2 ("",[],[]) [Str "Far",Space,Str "West"] ,Header 3 ("",[],[]) [Str "Alaska"] diff --git a/tests/rst-reader.native b/tests/rst-reader.native index 69e73ae40..09da2d5ef 100644 --- a/tests/rst-reader.native +++ b/tests/rst-reader.native @@ -1,4 +1,4 @@ -Pandoc (Meta {unMeta = fromList [("authors",MetaList [MetaInlines [Str "John",Space,Str "MacFarlane"],MetaInlines [Str "Anonymous"]]),("date",MetaInlines [Str "July",Space,Str "17,",Space,Str "2006"]),("revision",MetaBlocks [Para [Str "3"]]),("subtitle",MetaInlines [Str "Subtitle"]),("title",MetaInlines [Str "Pandoc",Space,Str "Test",Space,Str "Suite"])]}) +Pandoc (Meta (fromList [("authors",MetaList [MetaInlines [Str "John",Space,Str "MacFarlane"],MetaInlines [Str "Anonymous"]]),("date",MetaInlines [Str "July",Space,Str "17,",Space,Str "2006"]),("revision",MetaBlocks [Para [Str "3"]]),("subtitle",MetaInlines [Str "Subtitle"]),("title",MetaInlines [Str "Pandoc",Space,Str "Test",Space,Str "Suite"])])) [Header 1 ("",[],[]) [Str "Level",Space,Str "one",Space,Str "header"] ,Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "set",Space,Str "of",Space,Str "tests",Space,Str "for",Space,Str "pandoc.",Space,Str "Most",Space,Str "of",Space,Str "them",Space,Str "are",Space,Str "adapted",Space,Str "from",Space,Str "John",Space,Str "Gruber\8217s",Space,Str "markdown",Space,Str "test",Space,Str "suite."] ,Header 2 ("",[],[]) [Str "Level",Space,Str "two",Space,Str "header"] @@ -172,11 +172,11 @@ Pandoc (Meta {unMeta = fromList [("authors",MetaList [MetaInlines [Str "John",Sp [[Para [Str "123-4567"]]])] ,Header 1 ("",[],[]) [Str "HTML",Space,Str "Blocks"] ,Para [Str "Simple",Space,Str "block",Space,Str "on",Space,Str "one",Space,Str "line:"] -,RawBlock (Format {unFormat = "html"}) "
foo
" +,RawBlock (Format "html") "
foo
" ,Para [Str "Now,",Space,Str "nested:"] -,RawBlock (Format {unFormat = "html"}) "
\n
\n
\n foo\n
\n
\n
" +,RawBlock (Format "html") "
\n
\n
\n foo\n
\n
\n
" ,Header 1 ("",[],[]) [Str "LaTeX",Space,Str "Block"] -,RawBlock (Format {unFormat = "latex"}) "\\begin{tabular}{|l|l|}\\hline\nAnimal & Number \\\\ \\hline\nDog & 2 \\\\\nCat & 1 \\\\ \\hline\n\\end{tabular}" +,RawBlock (Format "latex") "\\begin{tabular}{|l|l|}\\hline\nAnimal & Number \\\\ \\hline\nDog & 2 \\\\\nCat & 1 \\\\ \\hline\n\\end{tabular}" ,Header 1 ("",[],[]) [Str "Inline",Space,Str "Markup"] ,Para [Str "This",Space,Str "is",Space,Emph [Str "emphasized"],Str ".",Space,Str "This",Space,Str "is",Space,Strong [Str "strong"],Str "."] ,Para [Str "This",Space,Str "is",Space,Str "code:",Space,Code ("",[],[]) ">",Str ",",Space,Code ("",[],[]) "$",Str ",",Space,Code ("",[],[]) "\\",Str ",",Space,Code ("",[],[]) "\\$",Str ",",Space,Code ("",[],[]) "",Str "."] diff --git a/tests/s5.native b/tests/s5.native index 5796b74a0..def09cf80 100644 --- a/tests/s5.native +++ b/tests/s5.native @@ -1,4 +1,4 @@ -Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "Sam",Space,Str "Smith"],MetaInlines [Str "Jen",Space,Str "Jones"]]),("date",MetaInlines [Str "July",Space,Str "15,",Space,Str "2006"]),("title",MetaInlines [Str "My",Space,Str "S5",Space,Str "Document"])]}) +Pandoc (Meta (fromList [("author",MetaList [MetaInlines [Str "Sam",Space,Str "Smith"],MetaInlines [Str "Jen",Space,Str "Jones"]]),("date",MetaInlines [Str "July",Space,Str "15,",Space,Str "2006"]),("title",MetaInlines [Str "My",Space,Str "S5",Space,Str "Document"])])) [Header 1 ("first-slide",[],[]) [Str "First",Space,Str "slide"] ,BulletList [[Plain [Str "first",Space,Str "bullet"]] diff --git a/tests/testsuite.native b/tests/testsuite.native index f9cf606f3..503b3001e 100644 --- a/tests/testsuite.native +++ b/tests/testsuite.native @@ -1,4 +1,4 @@ -Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "John",Space,Str "MacFarlane"],MetaInlines [Str "Anonymous"]]),("date",MetaInlines [Str "July",Space,Str "17,",Space,Str "2006"]),("title",MetaInlines [Str "Pandoc",Space,Str "Test",Space,Str "Suite"])]}) +Pandoc (Meta (fromList [("author",MetaList [MetaInlines [Str "John",Space,Str "MacFarlane"],MetaInlines [Str "Anonymous"]]),("date",MetaInlines [Str "July",Space,Str "17,",Space,Str "2006"]),("title",MetaInlines [Str "Pandoc",Space,Str "Test",Space,Str "Suite"])])) [Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "set",Space,Str "of",Space,Str "tests",Space,Str "for",Space,Str "pandoc.",Space,Str "Most",Space,Str "of",Space,Str "them",Space,Str "are",Space,Str "adapted",Space,Str "from",Space,Str "John",Space,Str "Gruber\8217s",Space,Str "markdown",Space,Str "test",Space,Str "suite."] ,HorizontalRule ,Header 1 ("headers",[],[]) [Str "Headers"] @@ -228,45 +228,45 @@ Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "John",Spa ,[Plain [Str "sublist"]]]]])] ,Header 1 ("html-blocks",[],[]) [Str "HTML",Space,Str "Blocks"] ,Para [Str "Simple",Space,Str "block",Space,Str "on",Space,Str "one",Space,Str "line:"] -,RawBlock (Format {unFormat = "html"}) "
" +,RawBlock (Format "html") "
" ,Plain [Str "foo"] -,RawBlock (Format {unFormat = "html"}) "
\n" +,RawBlock (Format "html") "
\n" ,Para [Str "And",Space,Str "nested",Space,Str "without",Space,Str "indentation:"] -,RawBlock (Format {unFormat = "html"}) "
\n
\n
" +,RawBlock (Format "html") "
\n
\n
" ,Plain [Str "foo"] -,RawBlock (Format {unFormat = "html"}) "
\n
\n
" +,RawBlock (Format "html") "
\n
\n
" ,Plain [Str "bar"] -,RawBlock (Format {unFormat = "html"}) "
\n
\n" +,RawBlock (Format "html") "
\n
\n" ,Para [Str "Interpreted",Space,Str "markdown",Space,Str "in",Space,Str "a",Space,Str "table:"] -,RawBlock (Format {unFormat = "html"}) "\n\n\n\n
" +,RawBlock (Format "html") "\n\n\n\n\n\n
" ,Plain [Str "This",Space,Str "is",Space,Emph [Str "emphasized"]] -,RawBlock (Format {unFormat = "html"}) "" +,RawBlock (Format "html") "" ,Plain [Str "And",Space,Str "this",Space,Str "is",Space,Strong [Str "strong"]] -,RawBlock (Format {unFormat = "html"}) "
\n\n\n" +,RawBlock (Format "html") "
\n\n\n" ,Para [Str "Here\8217s",Space,Str "a",Space,Str "simple",Space,Str "block:"] -,RawBlock (Format {unFormat = "html"}) "
\n " +,RawBlock (Format "html") "
\n " ,Plain [Str "foo"] -,RawBlock (Format {unFormat = "html"}) "
\n" +,RawBlock (Format "html") "
\n" ,Para [Str "This",Space,Str "should",Space,Str "be",Space,Str "a",Space,Str "code",Space,Str "block,",Space,Str "though:"] ,CodeBlock ("",[],[]) "
\n foo\n
" ,Para [Str "As",Space,Str "should",Space,Str "this:"] ,CodeBlock ("",[],[]) "
foo
" ,Para [Str "Now,",Space,Str "nested:"] -,RawBlock (Format {unFormat = "html"}) "
\n
\n
\n " +,RawBlock (Format "html") "
\n
\n
\n " ,Plain [Str "foo"] -,RawBlock (Format {unFormat = "html"}) "
\n
\n
\n" +,RawBlock (Format "html") "
\n
\n
\n" ,Para [Str "This",Space,Str "should",Space,Str "just",Space,Str "be",Space,Str "an",Space,Str "HTML",Space,Str "comment:"] -,RawBlock (Format {unFormat = "html"}) "\n" +,RawBlock (Format "html") "\n" ,Para [Str "Multiline:"] -,RawBlock (Format {unFormat = "html"}) "\n\n\n" +,RawBlock (Format "html") "\n\n\n" ,Para [Str "Code",Space,Str "block:"] ,CodeBlock ("",[],[]) "" ,Para [Str "Just",Space,Str "plain",Space,Str "comment,",Space,Str "with",Space,Str "trailing",Space,Str "spaces",Space,Str "on",Space,Str "the",Space,Str "line:"] -,RawBlock (Format {unFormat = "html"}) " \n" +,RawBlock (Format "html") " \n" ,Para [Str "Code:"] ,CodeBlock ("",[],[]) "
" ,Para [Str "Hr\8217s:"] -,RawBlock (Format {unFormat = "html"}) "
\n\n
\n\n
\n\n
\n\n
\n\n
\n\n
\n\n
\n\n
\n" +,RawBlock (Format "html") "
\n\n
\n\n
\n\n
\n\n
\n\n
\n\n
\n\n
\n\n
\n" ,HorizontalRule ,Header 1 ("inline-markup",[],[]) [Str "Inline",Space,Str "Markup"] ,Para [Str "This",Space,Str "is",Space,Emph [Str "emphasized"],Str ",",Space,Str "and",Space,Str "so",Space,Emph [Str "is",Space,Str "this"],Str "."] @@ -294,7 +294,7 @@ Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "John",Spa ,HorizontalRule ,Header 1 ("latex",[],[]) [Str "LaTeX"] ,BulletList - [[Plain [RawInline (Format {unFormat = "tex"}) "\\cite[22-23]{smith.1899}"]] + [[Plain [RawInline (Format "tex") "\\cite[22-23]{smith.1899}"]] ,[Plain [Math InlineMath "2+2=4"]] ,[Plain [Math InlineMath "x \\in y"]] ,[Plain [Math InlineMath "\\alpha \\wedge \\omega"]] @@ -309,7 +309,7 @@ Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "John",Spa ,[Plain [Str "Shoes",Space,Str "($20)",Space,Str "and",Space,Str "socks",Space,Str "($5)."]] ,[Plain [Str "Escaped",Space,Code ("",[],[]) "$",Str ":",Space,Str "$73",Space,Emph [Str "this",Space,Str "should",Space,Str "be",Space,Str "emphasized"],Space,Str "23$."]]] ,Para [Str "Here\8217s",Space,Str "a",Space,Str "LaTeX",Space,Str "table:"] -,RawBlock (Format {unFormat = "latex"}) "\\begin{tabular}{|l|l|}\\hline\nAnimal & Number \\\\ \\hline\nDog & 2 \\\\\nCat & 1 \\\\ \\hline\n\\end{tabular}" +,RawBlock (Format "latex") "\\begin{tabular}{|l|l|}\\hline\nAnimal & Number \\\\ \\hline\nDog & 2 \\\\\nCat & 1 \\\\ \\hline\n\\end{tabular}" ,HorizontalRule ,Header 1 ("special-characters",[],[]) [Str "Special",Space,Str "Characters"] ,Para [Str "Here",Space,Str "is",Space,Str "some",Space,Str "unicode:"] diff --git a/tests/textile-reader.native b/tests/textile-reader.native index 70b33f31d..31ab558d7 100644 --- a/tests/textile-reader.native +++ b/tests/textile-reader.native @@ -1,4 +1,4 @@ -Pandoc (Meta {unMeta = fromList []}) +Pandoc (Meta (fromList [])) [Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "set",Space,Str "of",Space,Str "tests",Space,Str "for",Space,Str "pandoc",Space,Str "Textile",Space,Str "Reader",Str ".",Space,Str "Part",Space,Str "of",Space,Str "it",Space,Str "comes",LineBreak,Str "from",Space,Str "John",Space,Str "Gruber",Str "\8217",Str "s",Space,Str "markdown",Space,Str "test",Space,Str "suite",Str "."] ,HorizontalRule ,Header 1 ("",[],[]) [Str "Headers"] @@ -137,23 +137,23 @@ Pandoc (Meta {unMeta = fromList []}) ,Header 1 ("",[],[]) [Str "Entities"] ,Para [Str "*",LineBreak,Str "&"] ,Header 1 ("",[],[]) [Str "Raw",Space,Str "HTML"] -,Para [Str "However",Str ",",Space,RawInline (Format {unFormat = "html"}) "",Space,Str "raw",Space,Str "HTML",Space,Str "inlines",Space,RawInline (Format {unFormat = "html"}) "",Space,Str "are",Space,Str "accepted",Str ",",Space,Str "as",Space,Str "well",Space,Str "as",Space,Str ":"] -,RawBlock (Format {unFormat = "html"}) "
" +,Para [Str "However",Str ",",Space,RawInline (Format "html") "",Space,Str "raw",Space,Str "HTML",Space,Str "inlines",Space,RawInline (Format "html") "",Space,Str "are",Space,Str "accepted",Str ",",Space,Str "as",Space,Str "well",Space,Str "as",Space,Str ":"] +,RawBlock (Format "html") "
" ,Para [Str "any",Space,Strong [Str "Raw",Space,Str "HTML",Space,Str "Block"],Space,Str "with",Space,Str "bold"] -,RawBlock (Format {unFormat = "html"}) "
" +,RawBlock (Format "html") "
" ,Para [Str "Html",Space,Str "blocks",Space,Str "can",Space,Str "be"] -,RawBlock (Format {unFormat = "html"}) "
" +,RawBlock (Format "html") "
" ,Para [Str "inlined"] -,RawBlock (Format {unFormat = "html"}) "
" +,RawBlock (Format "html") "
" ,Para [Str "as",Space,Str "well",Str "."] ,BulletList [[Plain [Str "this",Space,Str "<",Str "div",Str ">",Space,Str "won",Str "\8217",Str "t",Space,Str "produce",Space,Str "raw",Space,Str "html",Space,Str "blocks",Space,Str "<",Str "/div",Str ">"]] - ,[Plain [Str "but",Space,Str "this",Space,RawInline (Format {unFormat = "html"}) "",Space,Str "will",Space,Str "produce",Space,Str "inline",Space,Str "html",Space,RawInline (Format {unFormat = "html"}) ""]]] + ,[Plain [Str "but",Space,Str "this",Space,RawInline (Format "html") "",Space,Str "will",Space,Str "produce",Space,Str "inline",Space,Str "html",Space,RawInline (Format "html") ""]]] ,Para [Str "Can",Space,Str "you",Space,Str "prove",Space,Str "that",Space,Str "2",Space,Str "<",Space,Str "3",Space,Str "?"] ,Header 1 ("",[],[]) [Str "Raw",Space,Str "LaTeX"] ,Para [Str "This",Space,Str "Textile",Space,Str "reader",Space,Str "also",Space,Str "accepts",Space,Str "raw",Space,Str "LaTeX",Space,Str "for",Space,Str "blocks",Space,Str ":"] -,RawBlock (Format {unFormat = "latex"}) "\\begin{itemize}\n \\item one\n \\item two\n\\end{itemize}" -,Para [Str "and",Space,Str "for",Space,RawInline (Format {unFormat = "latex"}) "\\emph{inlines}",Str "."] +,RawBlock (Format "latex") "\\begin{itemize}\n \\item one\n \\item two\n\\end{itemize}" +,Para [Str "and",Space,Str "for",Space,RawInline (Format "latex") "\\emph{inlines}",Str "."] ,Header 1 ("",[],[]) [Str "Acronyms",Space,Str "and",Space,Str "marks"] ,Para [Str "PBS (Public Broadcasting System)"] ,Para [Str "Hi",Str "\8482"] diff --git a/tests/writer.native b/tests/writer.native index f9cf606f3..503b3001e 100644 --- a/tests/writer.native +++ b/tests/writer.native @@ -1,4 +1,4 @@ -Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "John",Space,Str "MacFarlane"],MetaInlines [Str "Anonymous"]]),("date",MetaInlines [Str "July",Space,Str "17,",Space,Str "2006"]),("title",MetaInlines [Str "Pandoc",Space,Str "Test",Space,Str "Suite"])]}) +Pandoc (Meta (fromList [("author",MetaList [MetaInlines [Str "John",Space,Str "MacFarlane"],MetaInlines [Str "Anonymous"]]),("date",MetaInlines [Str "July",Space,Str "17,",Space,Str "2006"]),("title",MetaInlines [Str "Pandoc",Space,Str "Test",Space,Str "Suite"])])) [Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "set",Space,Str "of",Space,Str "tests",Space,Str "for",Space,Str "pandoc.",Space,Str "Most",Space,Str "of",Space,Str "them",Space,Str "are",Space,Str "adapted",Space,Str "from",Space,Str "John",Space,Str "Gruber\8217s",Space,Str "markdown",Space,Str "test",Space,Str "suite."] ,HorizontalRule ,Header 1 ("headers",[],[]) [Str "Headers"] @@ -228,45 +228,45 @@ Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "John",Spa ,[Plain [Str "sublist"]]]]])] ,Header 1 ("html-blocks",[],[]) [Str "HTML",Space,Str "Blocks"] ,Para [Str "Simple",Space,Str "block",Space,Str "on",Space,Str "one",Space,Str "line:"] -,RawBlock (Format {unFormat = "html"}) "
" +,RawBlock (Format "html") "
" ,Plain [Str "foo"] -,RawBlock (Format {unFormat = "html"}) "
\n" +,RawBlock (Format "html") "
\n" ,Para [Str "And",Space,Str "nested",Space,Str "without",Space,Str "indentation:"] -,RawBlock (Format {unFormat = "html"}) "
\n
\n
" +,RawBlock (Format "html") "
\n
\n
" ,Plain [Str "foo"] -,RawBlock (Format {unFormat = "html"}) "
\n
\n
" +,RawBlock (Format "html") "
\n
\n
" ,Plain [Str "bar"] -,RawBlock (Format {unFormat = "html"}) "
\n
\n" +,RawBlock (Format "html") "
\n
\n" ,Para [Str "Interpreted",Space,Str "markdown",Space,Str "in",Space,Str "a",Space,Str "table:"] -,RawBlock (Format {unFormat = "html"}) "\n\n\n\n
" +,RawBlock (Format "html") "\n\n\n\n\n\n
" ,Plain [Str "This",Space,Str "is",Space,Emph [Str "emphasized"]] -,RawBlock (Format {unFormat = "html"}) "" +,RawBlock (Format "html") "" ,Plain [Str "And",Space,Str "this",Space,Str "is",Space,Strong [Str "strong"]] -,RawBlock (Format {unFormat = "html"}) "
\n\n\n" +,RawBlock (Format "html") "
\n\n\n" ,Para [Str "Here\8217s",Space,Str "a",Space,Str "simple",Space,Str "block:"] -,RawBlock (Format {unFormat = "html"}) "
\n " +,RawBlock (Format "html") "
\n " ,Plain [Str "foo"] -,RawBlock (Format {unFormat = "html"}) "
\n" +,RawBlock (Format "html") "
\n" ,Para [Str "This",Space,Str "should",Space,Str "be",Space,Str "a",Space,Str "code",Space,Str "block,",Space,Str "though:"] ,CodeBlock ("",[],[]) "
\n foo\n
" ,Para [Str "As",Space,Str "should",Space,Str "this:"] ,CodeBlock ("",[],[]) "
foo
" ,Para [Str "Now,",Space,Str "nested:"] -,RawBlock (Format {unFormat = "html"}) "
\n
\n
\n " +,RawBlock (Format "html") "
\n
\n
\n " ,Plain [Str "foo"] -,RawBlock (Format {unFormat = "html"}) "
\n
\n
\n" +,RawBlock (Format "html") "
\n
\n
\n" ,Para [Str "This",Space,Str "should",Space,Str "just",Space,Str "be",Space,Str "an",Space,Str "HTML",Space,Str "comment:"] -,RawBlock (Format {unFormat = "html"}) "\n" +,RawBlock (Format "html") "\n" ,Para [Str "Multiline:"] -,RawBlock (Format {unFormat = "html"}) "\n\n\n" +,RawBlock (Format "html") "\n\n\n" ,Para [Str "Code",Space,Str "block:"] ,CodeBlock ("",[],[]) "" ,Para [Str "Just",Space,Str "plain",Space,Str "comment,",Space,Str "with",Space,Str "trailing",Space,Str "spaces",Space,Str "on",Space,Str "the",Space,Str "line:"] -,RawBlock (Format {unFormat = "html"}) " \n" +,RawBlock (Format "html") " \n" ,Para [Str "Code:"] ,CodeBlock ("",[],[]) "
" ,Para [Str "Hr\8217s:"] -,RawBlock (Format {unFormat = "html"}) "
\n\n
\n\n
\n\n
\n\n
\n\n
\n\n
\n\n
\n\n
\n" +,RawBlock (Format "html") "
\n\n
\n\n
\n\n
\n\n
\n\n
\n\n
\n\n
\n\n
\n" ,HorizontalRule ,Header 1 ("inline-markup",[],[]) [Str "Inline",Space,Str "Markup"] ,Para [Str "This",Space,Str "is",Space,Emph [Str "emphasized"],Str ",",Space,Str "and",Space,Str "so",Space,Emph [Str "is",Space,Str "this"],Str "."] @@ -294,7 +294,7 @@ Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "John",Spa ,HorizontalRule ,Header 1 ("latex",[],[]) [Str "LaTeX"] ,BulletList - [[Plain [RawInline (Format {unFormat = "tex"}) "\\cite[22-23]{smith.1899}"]] + [[Plain [RawInline (Format "tex") "\\cite[22-23]{smith.1899}"]] ,[Plain [Math InlineMath "2+2=4"]] ,[Plain [Math InlineMath "x \\in y"]] ,[Plain [Math InlineMath "\\alpha \\wedge \\omega"]] @@ -309,7 +309,7 @@ Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "John",Spa ,[Plain [Str "Shoes",Space,Str "($20)",Space,Str "and",Space,Str "socks",Space,Str "($5)."]] ,[Plain [Str "Escaped",Space,Code ("",[],[]) "$",Str ":",Space,Str "$73",Space,Emph [Str "this",Space,Str "should",Space,Str "be",Space,Str "emphasized"],Space,Str "23$."]]] ,Para [Str "Here\8217s",Space,Str "a",Space,Str "LaTeX",Space,Str "table:"] -,RawBlock (Format {unFormat = "latex"}) "\\begin{tabular}{|l|l|}\\hline\nAnimal & Number \\\\ \\hline\nDog & 2 \\\\\nCat & 1 \\\\ \\hline\n\\end{tabular}" +,RawBlock (Format "latex") "\\begin{tabular}{|l|l|}\\hline\nAnimal & Number \\\\ \\hline\nDog & 2 \\\\\nCat & 1 \\\\ \\hline\n\\end{tabular}" ,HorizontalRule ,Header 1 ("special-characters",[],[]) [Str "Special",Space,Str "Characters"] ,Para [Str "Here",Space,Str "is",Space,Str "some",Space,Str "unicode:"] -- cgit v1.2.3 From 5af0de23cc60ab02b351e075992f6a936acdf19e Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 15 Aug 2013 12:20:57 -0700 Subject: Makefile: Remove *everything* on 'make veryclean'. --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index b97ab1a5e..b8548a648 100644 --- a/Makefile +++ b/Makefile @@ -37,7 +37,7 @@ clean: cabal-dev clean veryclean: clean - cabal-dev clean && rm -rf pandoc-types citeproc-hs + rm -rf pandoc-types citeproc-hs dist cabal-dev pandoc-types: git clone https://github.com/jgm/pandoc-types && \ -- cgit v1.2.3 From c45bd6d468b272a2737dcc2a3c9f4afaebf37494 Mon Sep 17 00:00:00 2001 From: Scott Morrison Date: Fri, 16 Aug 2013 10:03:54 +1000 Subject: adding support for breve accents via \u{} while reading LaTeX --- src/Text/Pandoc/Readers/LaTeX.hs | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 71e1e0ac2..7c370dd47 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -416,6 +416,7 @@ inlineCommands = M.fromList $ , ("=", option (str "=") $ try $ tok >>= accent macron) , ("c", option (str "c") $ try $ tok >>= accent cedilla) , ("v", option (str "v") $ try $ tok >>= accent hacek) + , ("u", option (str "u") $ try $ tok >>= accent breve) , ("i", lit "i") , ("\\", linebreak <$ (optional (bracketed inline) *> optional sp)) , (",", pure mempty) @@ -708,6 +709,21 @@ hacek 'Z' = 'Ž' hacek 'z' = 'ž' hacek c = c +breve :: Char -> Char +breve 'A' = 'Ă' +breve 'a' = 'ă' +breve 'E' = 'Ĕ' +breve 'e' = 'ĕ' +breve 'G' = 'Ğ' +breve 'g' = 'ğ' +breve 'I' = 'Ĭ' +breve 'i' = 'ĭ' +breve 'O' = 'Ŏ' +breve 'o' = 'ŏ' +breve 'U' = 'Ŭ' +breve 'u' = 'ŭ' +breve c = c + tok :: LP Inlines tok = try $ grouped inline <|> inlineCommand <|> str <$> (count 1 $ inlineChar) -- cgit v1.2.3 From 172f020bc5b59950afd29411b7d80200d0b38e83 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 15 Aug 2013 17:21:56 -0700 Subject: Shared: Better error message when default data file not found. Listing the full path can confuse people who are using `--self-contained`: they might have intended the file to be found locally. So now we just list the data file name. --- src/Text/Pandoc/Shared.hs | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index d670a35bc..72b467da5 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -583,8 +583,7 @@ readDefaultDataFile :: FilePath -> IO BS.ByteString readDefaultDataFile fname = #ifdef EMBED_DATA_FILES case lookup (makeCanonical fname) dataFiles of - Nothing -> ioError $ userError - $ "Data file `" ++ fname ++ "' does not exist" + Nothing -> err 97 $ "Could not find data file " ++ fname Just contents -> return contents where makeCanonical = joinPath . transformPathParts . splitDirectories transformPathParts = reverse . foldl go [] @@ -592,7 +591,12 @@ readDefaultDataFile fname = go (_:as) ".." = as go as x = x : as #else - getDataFileName ("data" fname) >>= BS.readFile + getDataFileName ("data" fname) >>= checkExistence >>= BS.readFile + where checkExistence fn = do + exists <- doesFileExist fn + if exists + then return fn + else err 97 ("Could not find data file " ++ fname) #endif -- | Read file from specified user data directory or, if not found there, from -- cgit v1.2.3 From fdbbbfc9a4dac1b2475262573b4dd1242d5a40a2 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 15 Aug 2013 20:06:26 -0700 Subject: Improved INSTALL instructions: * Note binary package for OSX. * Added URL of github wiki page on installing the dev version. --- INSTALL | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/INSTALL b/INSTALL index 8bcea83c2..ba3cb8eb6 100644 --- a/INSTALL +++ b/INSTALL @@ -3,7 +3,10 @@ These instructions explain how to install pandoc from source. Binary packages or ports of pandoc are available for freebsd and several linux distributions, so check your package manager. -There is also a Windows installer. +There are also binary installers for Windows and Mac OS X. + +If you are installing the development version from github, see also: +https://github.com/jgm/pandoc/wiki/Installing-the-development-version-of-pandoc Quick install ------------- -- cgit v1.2.3 From d3ebca6f553efa37cb6795dbd72e84051edea356 Mon Sep 17 00:00:00 2001 From: Scott Morrison Date: Fri, 16 Aug 2013 14:48:24 +1000 Subject: LaTeX reader missing \oe and \OE characters --- src/Text/Pandoc/Readers/LaTeX.hs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 7c370dd47..414e50fc8 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -402,6 +402,8 @@ inlineCommands = M.fromList $ , ("l", lit "ł") , ("ae", lit "æ") , ("AE", lit "Æ") + , ("oe", lit "œ") + , ("OE", lit "Œ") , ("pounds", lit "£") , ("euro", lit "€") , ("copyright", lit "©") -- cgit v1.2.3 From cc91b1d1d320acaf6fb13cbc09e91fe3dc99ed70 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 16 Aug 2013 12:25:08 -0700 Subject: Added *~ and *.pyc to .gitignore. --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index 3e9ae6904..98fe2fd6d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +*~ dist/* README.* !README.Debian @@ -10,6 +11,7 @@ man/man?/*.html pandoc.cabal.orig *.o *.hi +*.pyc /COPYING.rtf /COPYRIGHT.txt /cabal-dev/ -- cgit v1.2.3 From 309024971c1f4cfea0dc2529cc4b75f02d7def33 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 16 Aug 2013 12:25:21 -0700 Subject: Updated tests for latest pandoc-types changes. --- tests/docbook-reader.native | 2 +- tests/haddock-reader.native | 2 +- tests/html-reader.native | 2 +- tests/latex-reader.native | 2 +- tests/mediawiki-reader.native | 2 +- tests/opml-reader.native | 2 +- tests/rst-reader.native | 2 +- tests/s5.native | 2 +- tests/testsuite.native | 2 +- tests/textile-reader.native | 2 +- tests/writer.native | 2 +- 11 files changed, 11 insertions(+), 11 deletions(-) diff --git a/tests/docbook-reader.native b/tests/docbook-reader.native index 8c94fea3e..2d29bb154 100644 --- a/tests/docbook-reader.native +++ b/tests/docbook-reader.native @@ -1,4 +1,4 @@ -Pandoc (Meta (fromList [("author",MetaList [MetaInlines [Str "John",Space,Str "MacFarlane"],MetaInlines [Str "Anonymous"]]),("date",MetaInlines [Str "July",Space,Str "17,",Space,Str "2006"]),("title",MetaInlines [Str "Pandoc",Space,Str "Test",Space,Str "Suite"])])) +Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "John",Space,Str "MacFarlane"],MetaInlines [Str "Anonymous"]]),("date",MetaInlines [Str "July",Space,Str "17,",Space,Str "2006"]),("title",MetaInlines [Str "Pandoc",Space,Str "Test",Space,Str "Suite"])]}) [Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "set",Space,Str "of",Space,Str "tests",Space,Str "for",Space,Str "pandoc.",Space,Str "Most",Space,Str "of",Space,Str "them",Space,Str "are",Space,Str "adapted",Space,Str "from",Space,Str "John",Space,Str "Gruber\8217s",Space,Str "markdown",Space,Str "test",Space,Str "suite."] ,Header 1 ("",[],[]) [Str "Headers"] ,Header 2 ("",[],[]) [Str "Level",Space,Str "2",Space,Str "with",Space,Str "an",Space,Link [Str "embedded",Space,Str "link"] ("/url","")] diff --git a/tests/haddock-reader.native b/tests/haddock-reader.native index c17c2ddf0..877719b50 100644 --- a/tests/haddock-reader.native +++ b/tests/haddock-reader.native @@ -1,4 +1,4 @@ -Pandoc (Meta (fromList [])) +Pandoc (Meta {unMeta = fromList []}) [Para [Str "This",Space,Str "file",Space,Str "tests",Space,Str "the",Space,Str "Pandoc",Space,Str "reader",Space,Str "for",Space,Str "Haddock.",Space,Str "We've",Space,Str "borrowed",Space,Str "examples",Space,Str "from",Space,Str "Haddock's",Space,Str "documentation:",Space,Link [Str "http://www.haskell.org/haddock/doc/html/ch03s08.html"] ("http://www.haskell.org/haddock/doc/html/ch03s08.html","http://www.haskell.org/haddock/doc/html/ch03s08.html"),Str "."] ,Para [Str "The",Space,Str "following",Space,Str "characters",Space,Str "have",Space,Str "special",Space,Str "meanings",Space,Str "in",Space,Str "Haddock,",Space,Str "/,",Space,Str "',",Space,Str "`,",Space,Str "\",",Space,Str "@,",Space,Str "<,",Space,Str "so",Space,Str "they",Space,Str "must",Space,Str "be",Space,Str "escaped."] ,Para [Str "*",Space,Str "This",Space,Str "is",Space,Str "a",Space,Str "paragraph,",Space,Str "not",Space,Str "a",Space,Str "list",Space,Str "item.",Space,Str ">",Space,Str "This",Space,Str "sentence",Space,Str "is",Space,Str "not",Space,Str "code.",Space,Str ">>>",Space,Str "This",Space,Str "is",Space,Str "not",Space,Str "an",Space,Str "example."] diff --git a/tests/html-reader.native b/tests/html-reader.native index 8f60f040e..15937e594 100644 --- a/tests/html-reader.native +++ b/tests/html-reader.native @@ -1,4 +1,4 @@ -Pandoc (Meta (fromList [("title",MetaInlines [Str "Pandoc",Space,Str "Test",Space,Str "Suite"])])) +Pandoc (Meta {unMeta = fromList [("title",MetaInlines [Str "Pandoc",Space,Str "Test",Space,Str "Suite"])]}) [Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "set",Space,Str "of",Space,Str "tests",Space,Str "for",Space,Str "pandoc",Str ".",Space,Str "Most",Space,Str "of",Space,Str "them",Space,Str "are",Space,Str "adapted",Space,Str "from",Space,Str "John",Space,Str "Gruber",Str "'",Str "s",Space,Str "markdown",Space,Str "test",Space,Str "suite",Str "."] ,HorizontalRule ,Header 1 ("",[],[]) [Str "Headers"] diff --git a/tests/latex-reader.native b/tests/latex-reader.native index ddee17f9e..23e600000 100644 --- a/tests/latex-reader.native +++ b/tests/latex-reader.native @@ -1,4 +1,4 @@ -Pandoc (Meta (fromList [("authors",MetaList [MetaInlines [Str "John",Space,Str "MacFarlane"],MetaInlines [Str "Anonymous"]]),("date",MetaInlines [Str "July",Space,Str "17,",Space,Str "2006"]),("title",MetaInlines [Str "Pandoc",Space,Str "Test",Space,Str "Suite"])])) +Pandoc (Meta {unMeta = fromList [("authors",MetaList [MetaInlines [Str "John",Space,Str "MacFarlane"],MetaInlines [Str "Anonymous"]]),("date",MetaInlines [Str "July",Space,Str "17,",Space,Str "2006"]),("title",MetaInlines [Str "Pandoc",Space,Str "Test",Space,Str "Suite"])]}) [RawBlock (Format "latex") "\\maketitle" ,Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "set",Space,Str "of",Space,Str "tests",Space,Str "for",Space,Str "pandoc.",Space,Str "Most",Space,Str "of",Space,Str "them",Space,Str "are",Space,Str "adapted",Space,Str "from",Space,Str "John",Space,Str "Gruber\8217s",Space,Str "markdown",Space,Str "test",Space,Str "suite."] ,HorizontalRule diff --git a/tests/mediawiki-reader.native b/tests/mediawiki-reader.native index 81596c7d7..a424be0ae 100644 --- a/tests/mediawiki-reader.native +++ b/tests/mediawiki-reader.native @@ -1,4 +1,4 @@ -Pandoc (Meta (fromList [])) +Pandoc (Meta {unMeta = fromList []}) [Header 1 ("",[],[]) [Str "header"] ,Header 2 ("",[],[]) [Str "header",Space,Str "level",Space,Str "two"] ,Header 3 ("",[],[]) [Str "header",Space,Str "level",Space,Str "3"] diff --git a/tests/opml-reader.native b/tests/opml-reader.native index 237a16719..e71857680 100644 --- a/tests/opml-reader.native +++ b/tests/opml-reader.native @@ -1,4 +1,4 @@ -Pandoc (Meta (fromList [("author",MetaList [MetaInlines [Str "Dave",Space,Str "Winer"]]),("date",MetaInlines [Str "Thu,",Space,Str "14",Space,Str "Jul",Space,Str "2005",Space,Str "23:41:05",Space,Str "GMT"]),("title",MetaInlines [Str "States"])])) +Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "Dave",Space,Str "Winer"]]),("date",MetaInlines [Str "Thu,",Space,Str "14",Space,Str "Jul",Space,Str "2005",Space,Str "23:41:05",Space,Str "GMT"]),("title",MetaInlines [Str "States"])]}) [Header 1 ("",[],[]) [Str "United",Space,Str "States"] ,Header 2 ("",[],[]) [Str "Far",Space,Str "West"] ,Header 3 ("",[],[]) [Str "Alaska"] diff --git a/tests/rst-reader.native b/tests/rst-reader.native index 09da2d5ef..abceaaab7 100644 --- a/tests/rst-reader.native +++ b/tests/rst-reader.native @@ -1,4 +1,4 @@ -Pandoc (Meta (fromList [("authors",MetaList [MetaInlines [Str "John",Space,Str "MacFarlane"],MetaInlines [Str "Anonymous"]]),("date",MetaInlines [Str "July",Space,Str "17,",Space,Str "2006"]),("revision",MetaBlocks [Para [Str "3"]]),("subtitle",MetaInlines [Str "Subtitle"]),("title",MetaInlines [Str "Pandoc",Space,Str "Test",Space,Str "Suite"])])) +Pandoc (Meta {unMeta = fromList [("authors",MetaList [MetaInlines [Str "John",Space,Str "MacFarlane"],MetaInlines [Str "Anonymous"]]),("date",MetaInlines [Str "July",Space,Str "17,",Space,Str "2006"]),("revision",MetaBlocks [Para [Str "3"]]),("subtitle",MetaInlines [Str "Subtitle"]),("title",MetaInlines [Str "Pandoc",Space,Str "Test",Space,Str "Suite"])]}) [Header 1 ("",[],[]) [Str "Level",Space,Str "one",Space,Str "header"] ,Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "set",Space,Str "of",Space,Str "tests",Space,Str "for",Space,Str "pandoc.",Space,Str "Most",Space,Str "of",Space,Str "them",Space,Str "are",Space,Str "adapted",Space,Str "from",Space,Str "John",Space,Str "Gruber\8217s",Space,Str "markdown",Space,Str "test",Space,Str "suite."] ,Header 2 ("",[],[]) [Str "Level",Space,Str "two",Space,Str "header"] diff --git a/tests/s5.native b/tests/s5.native index def09cf80..5796b74a0 100644 --- a/tests/s5.native +++ b/tests/s5.native @@ -1,4 +1,4 @@ -Pandoc (Meta (fromList [("author",MetaList [MetaInlines [Str "Sam",Space,Str "Smith"],MetaInlines [Str "Jen",Space,Str "Jones"]]),("date",MetaInlines [Str "July",Space,Str "15,",Space,Str "2006"]),("title",MetaInlines [Str "My",Space,Str "S5",Space,Str "Document"])])) +Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "Sam",Space,Str "Smith"],MetaInlines [Str "Jen",Space,Str "Jones"]]),("date",MetaInlines [Str "July",Space,Str "15,",Space,Str "2006"]),("title",MetaInlines [Str "My",Space,Str "S5",Space,Str "Document"])]}) [Header 1 ("first-slide",[],[]) [Str "First",Space,Str "slide"] ,BulletList [[Plain [Str "first",Space,Str "bullet"]] diff --git a/tests/testsuite.native b/tests/testsuite.native index 503b3001e..d1b14b24e 100644 --- a/tests/testsuite.native +++ b/tests/testsuite.native @@ -1,4 +1,4 @@ -Pandoc (Meta (fromList [("author",MetaList [MetaInlines [Str "John",Space,Str "MacFarlane"],MetaInlines [Str "Anonymous"]]),("date",MetaInlines [Str "July",Space,Str "17,",Space,Str "2006"]),("title",MetaInlines [Str "Pandoc",Space,Str "Test",Space,Str "Suite"])])) +Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "John",Space,Str "MacFarlane"],MetaInlines [Str "Anonymous"]]),("date",MetaInlines [Str "July",Space,Str "17,",Space,Str "2006"]),("title",MetaInlines [Str "Pandoc",Space,Str "Test",Space,Str "Suite"])]}) [Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "set",Space,Str "of",Space,Str "tests",Space,Str "for",Space,Str "pandoc.",Space,Str "Most",Space,Str "of",Space,Str "them",Space,Str "are",Space,Str "adapted",Space,Str "from",Space,Str "John",Space,Str "Gruber\8217s",Space,Str "markdown",Space,Str "test",Space,Str "suite."] ,HorizontalRule ,Header 1 ("headers",[],[]) [Str "Headers"] diff --git a/tests/textile-reader.native b/tests/textile-reader.native index 31ab558d7..7e709a505 100644 --- a/tests/textile-reader.native +++ b/tests/textile-reader.native @@ -1,4 +1,4 @@ -Pandoc (Meta (fromList [])) +Pandoc (Meta {unMeta = fromList []}) [Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "set",Space,Str "of",Space,Str "tests",Space,Str "for",Space,Str "pandoc",Space,Str "Textile",Space,Str "Reader",Str ".",Space,Str "Part",Space,Str "of",Space,Str "it",Space,Str "comes",LineBreak,Str "from",Space,Str "John",Space,Str "Gruber",Str "\8217",Str "s",Space,Str "markdown",Space,Str "test",Space,Str "suite",Str "."] ,HorizontalRule ,Header 1 ("",[],[]) [Str "Headers"] diff --git a/tests/writer.native b/tests/writer.native index 503b3001e..d1b14b24e 100644 --- a/tests/writer.native +++ b/tests/writer.native @@ -1,4 +1,4 @@ -Pandoc (Meta (fromList [("author",MetaList [MetaInlines [Str "John",Space,Str "MacFarlane"],MetaInlines [Str "Anonymous"]]),("date",MetaInlines [Str "July",Space,Str "17,",Space,Str "2006"]),("title",MetaInlines [Str "Pandoc",Space,Str "Test",Space,Str "Suite"])])) +Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "John",Space,Str "MacFarlane"],MetaInlines [Str "Anonymous"]]),("date",MetaInlines [Str "July",Space,Str "17,",Space,Str "2006"]),("title",MetaInlines [Str "Pandoc",Space,Str "Test",Space,Str "Suite"])]}) [Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "set",Space,Str "of",Space,Str "tests",Space,Str "for",Space,Str "pandoc.",Space,Str "Most",Space,Str "of",Space,Str "them",Space,Str "are",Space,Str "adapted",Space,Str "from",Space,Str "John",Space,Str "Gruber\8217s",Space,Str "markdown",Space,Str "test",Space,Str "suite."] ,HorizontalRule ,Header 1 ("headers",[],[]) [Str "Headers"] -- cgit v1.2.3 From ab8c0dcd410282baaa9429f755ad55e6d01a2466 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 16 Aug 2013 12:40:38 -0700 Subject: LaTeX reader: parse label after section command and set id. Closes #951. --- src/Text/Pandoc/Readers/LaTeX.hs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 414e50fc8..50a95c361 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -315,12 +315,14 @@ authors = try $ do addMeta "authors" (map trimInlines auths) section :: Attr -> Int -> LP Blocks -section attr lvl = do +section (ident, classes, kvs) lvl = do hasChapters <- stateHasChapters `fmap` getState let lvl' = if hasChapters then lvl + 1 else lvl skipopts contents <- grouped inline - return $ headerWith attr lvl' contents + lab <- option ident $ try $ spaces >> controlSeq "label" >> + spaces >> braced + return $ headerWith (lab, classes, kvs) lvl' contents inlineCommand :: LP Inlines inlineCommand = try $ do -- cgit v1.2.3 From 441a7aebf8c141612203d1cab0032f8c55e536ed Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 16 Aug 2013 13:02:55 -0700 Subject: LaTeX writer: Avoid problem with footnotes in unnumbered headers. Closes #940. Added test case. --- src/Text/Pandoc/Writers/LaTeX.hs | 13 +++++++------ tests/Tests/Writers/LaTeX.hs | 6 ++++++ 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index 7f9a99801..98553c421 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -498,14 +498,15 @@ sectionHeader unnumbered ref level lst = do let noNote (Note _) = Str "" noNote x = x let lstNoNotes = walk noNote lst + txtNoNotes <- inlineListToLaTeX lstNoNotes let star = if unnumbered then text "*" else empty - -- footnotes in sections don't work unless you specify an optional - -- argument: \section[mysec]{mysec\footnote{blah}} - optional <- if lstNoNotes == lst + -- footnotes in sections don't work (except for starred variants) + -- unless you specify an optional argument: + -- \section[mysec]{mysec\footnote{blah}} + optional <- if unnumbered || lstNoNotes == lst then return empty else do - res <- inlineListToLaTeX lstNoNotes - return $ char '[' <> res <> char ']' + return $ brackets txtNoNotes let stuffing = star <> optional <> braces txt book <- gets stBook opts <- gets stOptions @@ -536,7 +537,7 @@ sectionHeader unnumbered ref level lst = do $$ if unnumbered then "\\addcontentsline{toc}" <> braces (text sectionType) <> - braces txt + braces txtNoNotes else empty -- | Convert list of inline elements to LaTeX. diff --git a/tests/Tests/Writers/LaTeX.hs b/tests/Tests/Writers/LaTeX.hs index b1427d91f..ebde5b97c 100644 --- a/tests/Tests/Writers/LaTeX.hs +++ b/tests/Tests/Writers/LaTeX.hs @@ -36,4 +36,10 @@ tests = [ testGroup "code blocks" [ "escape |" =: para (math "\\sigma|_{\\{x\\}}") =?> "$\\sigma|_{\\{x\\}}$" ] + , testGroup "headers" + [ "unnumbered header" =: + headerWith ("foo",["unnumbered"],[]) 1 + (text "Header 1" <> note (plain $ text "note")) =?> + "\\section*{Header 1\\footnote{note}}\\label{foo}\n\\addcontentsline{toc}{section}{Header 1}\n" + ] ] -- cgit v1.2.3 From 19591df739a6c50a3d0a9af55ba90b883264b21d Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 16 Aug 2013 13:05:06 -0700 Subject: Shared: stringify now skips over footnotes. That is usually the right thing to do for section labels, etc. --- src/Text/Pandoc/Shared.hs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 72b467da5..bf92601ef 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -391,6 +391,7 @@ stringify = query go go (Str x) = x go (Code _ x) = x go (Math _ x) = x + go (Note _) = "" go LineBreak = " " go _ = "" -- cgit v1.2.3 From 399c75da448dc0f90855b43ee44e9d7cf8009f1c Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 16 Aug 2013 13:08:11 -0700 Subject: Revert "Shared: stringify now skips over footnotes." This reverts commit 19591df739a6c50a3d0a9af55ba90b883264b21d. This change didn't work; query has already written the contents of the note by the time it gets to Note. --- src/Text/Pandoc/Shared.hs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index bf92601ef..72b467da5 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -391,7 +391,6 @@ stringify = query go go (Str x) = x go (Code _ x) = x go (Math _ x) = x - go (Note _) = "" go LineBreak = " " go _ = "" -- cgit v1.2.3 From 89a7703260703599a033be16e1581a0494326c2b Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 16 Aug 2013 13:22:27 -0700 Subject: Shared: Changed stringify so it ignores notes. Also documented this in README. --- README | 1 + src/Text/Pandoc/Shared.hs | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/README b/README index e5de97556..c25d611d6 100644 --- a/README +++ b/README @@ -980,6 +980,7 @@ automatically assigned a unique identifier based on the header text. To derive the identifier from the header text, - Remove all formatting, links, etc. + - Remove all footnotes. - Remove all punctuation, except underscores, hyphens, and periods. - Replace all spaces and newlines with hyphens. - Convert all alphabetic characters to lowercase. diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 72b467da5..eef150351 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -384,8 +384,10 @@ consolidateInlines (x : xs) = x : consolidateInlines xs consolidateInlines [] = [] -- | Convert list of inlines to a string with formatting removed. +-- Footnotes are skipped (since we don't want their contents in link +-- labels). stringify :: [Inline] -> String -stringify = query go +stringify = query go . walk deNote where go :: Inline -> [Char] go Space = " " go (Str x) = x @@ -393,6 +395,8 @@ stringify = query go go (Math _ x) = x go LineBreak = " " go _ = "" + deNote (Note _) = Str "" + deNote x = x -- | Change final list item from @Para@ to @Plain@ if the list contains -- no other @Para@ blocks. -- cgit v1.2.3 From caa89efc32d0ebaa34eb9eb8dc110e9af8d6d051 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 16 Aug 2013 20:57:34 -0700 Subject: Added scripts/deflists.py to filter examples. --- pandoc.cabal | 1 + scripts/deflists.py | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+) create mode 100755 scripts/deflists.py diff --git a/pandoc.cabal b/pandoc.cabal index 352da4988..a3d0dfa83 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -121,6 +121,7 @@ Extra-Source-Files: scripts/deemph.py, scripts/myemph.py, scripts/tikz.py, + scripts/deflists.py, -- tests tests/bodybg.gif, tests/docbook-reader.docbook diff --git a/scripts/deflists.py b/scripts/deflists.py new file mode 100755 index 000000000..502963419 --- /dev/null +++ b/scripts/deflists.py @@ -0,0 +1,20 @@ +#!/usr/bin/env python + +""" +Pandoc filter to convert definition lists to bullet +lists with the defined terms in strong emphasis (for +compatibility with standard markdown). +""" + +from pandoc import toJSONFilter + +def deflists(key, value, format): + if key == 'DefinitionList': + return {'BulletList': [tobullet(t,d) for [t,d] in value]} + +def tobullet(term, defs): + return [{'Para': [{'Strong': term}]}] + [b for d in defs for b in d] + + +if __name__ == "__main__": + toJSONFilter(deflists) -- cgit v1.2.3 From 5a5a2522163d73c3b91db2cb2b73e697a5dcfb23 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 17 Aug 2013 10:29:12 -0700 Subject: Markdown reader: Don't generate blank title, author, date elements. --- src/Text/Pandoc/Readers/Markdown.hs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 251554de1..906dd10f2 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -221,9 +221,9 @@ pandocTitleBlock = try $ do title' <- title author' <- author date' <- date - return $ B.setMeta "title" title' - . B.setMeta "author" author' - . B.setMeta "date" date' + return $ if B.isNull title' then id else B.setMeta "title" title' + . if null author' then id else B.setMeta "author" author' + . if B.isNull date' then id else B.setMeta "date" date' yamlTitleBlock :: MarkdownParser (F (Pandoc -> Pandoc)) yamlTitleBlock = try $ do -- cgit v1.2.3 From 3117c668a7d245689bfc291d5d9a64cb3178b52c Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 15 Aug 2013 22:39:14 -0700 Subject: Markdown reader: Parse span, div tags as Span, Div elements. Assuming markdown_in_html extension is set. --- src/Text/Pandoc/Readers/Markdown.hs | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 906dd10f2..535fc02c6 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -446,6 +446,7 @@ block = choice [ mempty <$ blanklines , header , lhsCodeBlock , rawTeXBlock + , divHtml , htmlBlock , table , lineBlock @@ -1355,6 +1356,7 @@ inline = choice [ whitespace , superscript , inlineNote -- after superscript because of ^[link](/foo)^ , autoLink + , spanHtml , rawHtmlInline , escapedChar , rawLaTeXInline' @@ -1755,6 +1757,26 @@ inBrackets parser = do char ']' return $ "[" ++ contents ++ "]" +spanHtml :: MarkdownParser (F Inlines) +spanHtml = try $ do + guardEnabled Ext_markdown_in_html_blocks + (TagOpen _ attrs, _) <- htmlTag (~== TagOpen "span" []) + contents <- mconcat <$> manyTill inline (htmlTag (~== TagClose "span")) + let ident = maybe "" id $ lookup "id" attrs + let classes = maybe [] words $ lookup "class" attrs + let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"] + return $ B.spanWith (ident, classes, keyvals) <$> contents + +divHtml :: MarkdownParser (F Blocks) +divHtml = try $ do + guardEnabled Ext_markdown_in_html_blocks + (TagOpen _ attrs, _) <- htmlTag (~== TagOpen "div" []) + contents <- mconcat <$> manyTill block (htmlTag (~== TagClose "div")) + let ident = maybe "" id $ lookup "id" attrs + let classes = maybe [] words $ lookup "class" attrs + let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"] + return $ B.divWith (ident, classes, keyvals) <$> contents + rawHtmlInline :: MarkdownParser (F Inlines) rawHtmlInline = do guardEnabled Ext_raw_html -- cgit v1.2.3 From 8d441af3da4709fd48a44e860d5a0cd4d35792af Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 18 Aug 2013 14:36:40 -0700 Subject: Adjusted writers and tests for change in parsing of div/span. Textile, MediaWiki, Markdown, Org, RST will emit raw HTML div tags for divs. Otherwise Div and Span are "transparent" block containers. --- src/Text/Pandoc/Writers/Docbook.hs | 2 +- src/Text/Pandoc/Writers/Markdown.hs | 17 ++++++++++----- src/Text/Pandoc/Writers/MediaWiki.hs | 12 +++++++---- src/Text/Pandoc/Writers/Org.hs | 9 +++++++- src/Text/Pandoc/Writers/RST.hs | 6 +++++- src/Text/Pandoc/Writers/Shared.hs | 18 ++++++++++++++++ src/Text/Pandoc/Writers/Textile.hs | 8 ++++++-- tests/testsuite.native | 18 ++++------------ tests/testsuite.txt | 12 +++++------ tests/writer.docbook | 40 ++++++++++++++---------------------- tests/writer.fb2 | 2 +- tests/writer.html | 12 +++-------- tests/writer.markdown | 27 +++++++++++++++++++----- tests/writer.mediawiki | 28 +++++++++++++++++-------- tests/writer.native | 18 ++++------------ tests/writer.opml | 2 +- tests/writer.org | 35 ++++++++++++++++++++++++++----- tests/writer.plain | 5 +++++ tests/writer.rst | 35 ++++++++++++++++++++++++++----- tests/writer.textile | 31 +++++++++++++++++++++++----- 20 files changed, 225 insertions(+), 112 deletions(-) diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs index 3d150d19b..7c03c07dc 100644 --- a/src/Text/Pandoc/Writers/Docbook.hs +++ b/src/Text/Pandoc/Writers/Docbook.hs @@ -149,7 +149,7 @@ listItemToDocbook opts item = -- | Convert a Pandoc block element to Docbook. blockToDocbook :: WriterOptions -> Block -> Doc blockToDocbook _ Null = empty -blockToDocbook opts (Div _ bs) = blocksToDocbook opts bs +blockToDocbook opts (Div _ bs) = blocksToDocbook opts $ map plainToPara bs blockToDocbook _ (Header _ _ _) = empty -- should not occur after hierarchicalize blockToDocbook opts (Plain lst) = inlinesToDocbook opts lst -- title beginning with fig: indicates that the image is a figure diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index 3d0ed8702..623c445df 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -1,6 +1,6 @@ {-# LANGUAGE OverloadedStrings, TupleSections, ScopedTypeVariables #-} {- -Copyright (C) 2006-2010 John MacFarlane +Copyright (C) 2006-2013 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Markdown - Copyright : Copyright (C) 2006-2010 John MacFarlane + Copyright : Copyright (C) 2006-2013 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane @@ -301,7 +301,13 @@ blockToMarkdown :: WriterOptions -- ^ Options -> Block -- ^ Block element -> State WriterState Doc blockToMarkdown _ Null = return empty -blockToMarkdown opts (Div _ bs) = blockListToMarkdown opts bs +blockToMarkdown opts (Div attrs ils) = do + isPlain <- gets stPlain + contents <- blockListToMarkdown opts ils + return $ if isPlain + then contents <> blankline + else tagWithAttrs "div" attrs <> blankline <> + contents <> blankline <> "
" <> blankline blockToMarkdown opts (Plain inlines) = do contents <- inlineListToMarkdown opts inlines return $ contents <> cr @@ -629,8 +635,9 @@ escapeSpaces x = x -- | Convert Pandoc inline element to markdown. inlineToMarkdown :: WriterOptions -> Inline -> State WriterState Doc -inlineToMarkdown opts (Span _ ils) = - inlineListToMarkdown opts ils +inlineToMarkdown opts (Span attrs ils) = do + contents <- inlineListToMarkdown opts ils + return $ tagWithAttrs "span" attrs <> contents <> text "" inlineToMarkdown opts (Emph lst) = do contents <- inlineListToMarkdown opts lst return $ "*" <> contents <> "*" diff --git a/src/Text/Pandoc/Writers/MediaWiki.hs b/src/Text/Pandoc/Writers/MediaWiki.hs index 4ffba1100..61741a61e 100644 --- a/src/Text/Pandoc/Writers/MediaWiki.hs +++ b/src/Text/Pandoc/Writers/MediaWiki.hs @@ -34,6 +34,7 @@ import Text.Pandoc.Definition import Text.Pandoc.Options import Text.Pandoc.Shared import Text.Pandoc.Writers.Shared +import Text.Pandoc.Pretty (render) import Text.Pandoc.Templates (renderTemplate') import Text.Pandoc.XML ( escapeStringForXML ) import Data.List ( intersect, intercalate, intersperse ) @@ -83,8 +84,10 @@ blockToMediaWiki :: WriterOptions -- ^ Options blockToMediaWiki _ Null = return "" -blockToMediaWiki opts (Div _ bs) = - blockListToMediaWiki opts bs +blockToMediaWiki opts (Div attrs bs) = do + contents <- blockListToMediaWiki opts bs + return $ render Nothing (tagWithAttrs "div" attrs) ++ "\n\n" ++ + contents ++ "\n\n" ++ "" blockToMediaWiki opts (Plain inlines) = inlineListToMediaWiki opts inlines @@ -332,8 +335,9 @@ inlineListToMediaWiki opts lst = -- | Convert Pandoc inline element to MediaWiki. inlineToMediaWiki :: WriterOptions -> Inline -> State WriterState String -inlineToMediaWiki opts (Span _ ils) = - inlineListToMediaWiki opts ils +inlineToMediaWiki opts (Span attrs ils) = do + contents <- inlineListToMediaWiki opts ils + return $ render Nothing (tagWithAttrs "span" attrs) ++ contents ++ "" inlineToMediaWiki opts (Emph lst) = do contents <- inlineListToMediaWiki opts lst diff --git a/src/Text/Pandoc/Writers/Org.hs b/src/Text/Pandoc/Writers/Org.hs index 34ae532b0..51083f52b 100644 --- a/src/Text/Pandoc/Writers/Org.hs +++ b/src/Text/Pandoc/Writers/Org.hs @@ -106,7 +106,14 @@ escapeString = escapeStringUsing $ blockToOrg :: Block -- ^ Block element -> State WriterState Doc blockToOrg Null = return empty -blockToOrg (Div _ bs) = blockListToOrg bs +blockToOrg (Div attrs bs) = do + contents <- blockListToOrg bs + let startTag = tagWithAttrs "div" attrs + let endTag = text "" + return $ blankline $$ "#+BEGIN_HTML" $$ + nest 2 startTag $$ "#+END_HTML" $$ blankline $$ + contents $$ blankline $$ "#+BEGIN_HTML" $$ + nest 2 endTag $$ "#+END_HTML" $$ blankline blockToOrg (Plain inlines) = inlineListToOrg inlines -- title beginning with fig: indicates that the image is a figure blockToOrg (Para [Image txt (src,'f':'i':'g':':':tit)]) = do diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs index 557658bc8..70c6b4421 100644 --- a/src/Text/Pandoc/Writers/RST.hs +++ b/src/Text/Pandoc/Writers/RST.hs @@ -161,7 +161,11 @@ bordered contents c = blockToRST :: Block -- ^ Block element -> State WriterState Doc blockToRST Null = return empty -blockToRST (Div _ bs) = blockListToRST bs +blockToRST (Div attr bs) = do + contents <- blockListToRST bs + let startTag = ".. raw:: html" $+$ nest 3 (tagWithAttrs "div" attr) + let endTag = ".. raw:: html" $+$ nest 3 "" + return $ blankline <> startTag $+$ contents $+$ endTag $$ blankline blockToRST (Plain inlines) = inlineListToRST inlines -- title beginning with fig: indicates that the image is a figure blockToRST (Para [Image txt (src,'f':'i':'g':':':tit)]) = do diff --git a/src/Text/Pandoc/Writers/Shared.hs b/src/Text/Pandoc/Writers/Shared.hs index e6ec853f8..89923822c 100644 --- a/src/Text/Pandoc/Writers/Shared.hs +++ b/src/Text/Pandoc/Writers/Shared.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE OverloadedStrings #-} {- Copyright (C) 2013 John MacFarlane @@ -32,9 +33,12 @@ module Text.Pandoc.Writers.Shared ( , getField , setField , defField + , tagWithAttrs ) where import Text.Pandoc.Definition +import Text.Pandoc.Pretty +import Text.Pandoc.XML (escapeStringForXML) import Control.Monad (liftM) import Text.Pandoc.Options (WriterOptions(..)) import qualified Data.HashMap.Strict as H @@ -120,3 +124,17 @@ defField field val (Object hashmap) = where f _newval oldval = oldval defField _ _ x = x +-- Produce an HTML tag with the given pandoc attributes. +tagWithAttrs :: String -> Attr -> Doc +tagWithAttrs tag (ident,classes,kvs) = hsep + ["<" <> text tag + ,if null ident + then empty + else "id=" <> doubleQuotes (text ident) + ,if null classes + then empty + else "class=" <> doubleQuotes (text (unwords classes)) + ] + <> hsep (map (\(k,v) -> text k <> "=" <> + doubleQuotes (text (escapeStringForXML v))) kvs) + <> ">" diff --git a/src/Text/Pandoc/Writers/Textile.hs b/src/Text/Pandoc/Writers/Textile.hs index 27e8b60ec..7c102cc86 100644 --- a/src/Text/Pandoc/Writers/Textile.hs +++ b/src/Text/Pandoc/Writers/Textile.hs @@ -33,6 +33,7 @@ module Text.Pandoc.Writers.Textile ( writeTextile ) where import Text.Pandoc.Definition import Text.Pandoc.Options import Text.Pandoc.Shared +import Text.Pandoc.Pretty (render) import Text.Pandoc.Writers.Shared import Text.Pandoc.Templates (renderTemplate') import Text.Pandoc.XML ( escapeStringForXML ) @@ -101,8 +102,11 @@ blockToTextile :: WriterOptions -- ^ Options blockToTextile _ Null = return "" -blockToTextile opts (Div _ bs) = - blockListToTextile opts bs +blockToTextile opts (Div attr bs) = do + let startTag = render Nothing $ tagWithAttrs "div" attr + let endTag = "" + contents <- blockListToTextile opts bs + return $ startTag ++ "\n\n" ++ contents ++ "\n\n" ++ endTag ++ "\n" blockToTextile opts (Plain inlines) = inlineListToTextile opts inlines diff --git a/tests/testsuite.native b/tests/testsuite.native index d1b14b24e..678d7595f 100644 --- a/tests/testsuite.native +++ b/tests/testsuite.native @@ -228,15 +228,9 @@ Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "John",Spa ,[Plain [Str "sublist"]]]]])] ,Header 1 ("html-blocks",[],[]) [Str "HTML",Space,Str "Blocks"] ,Para [Str "Simple",Space,Str "block",Space,Str "on",Space,Str "one",Space,Str "line:"] -,RawBlock (Format "html") "
" -,Plain [Str "foo"] -,RawBlock (Format "html") "
\n" +,Div ("",[],[]) [Plain [Str "foo"]] ,Para [Str "And",Space,Str "nested",Space,Str "without",Space,Str "indentation:"] -,RawBlock (Format "html") "
\n
\n
" -,Plain [Str "foo"] -,RawBlock (Format "html") "
\n
\n
" -,Plain [Str "bar"] -,RawBlock (Format "html") "
\n
\n" +,Div ("",[],[]) [Div ("",[],[]) [Div ("",[],[]) [Plain [Str "foo"]]],Div ("",[],[]) [Plain [Str "bar"]]] ,Para [Str "Interpreted",Space,Str "markdown",Space,Str "in",Space,Str "a",Space,Str "table:"] ,RawBlock (Format "html") "\n\n\n\n
" ,Plain [Str "This",Space,Str "is",Space,Emph [Str "emphasized"]] @@ -244,17 +238,13 @@ Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "John",Spa ,Plain [Str "And",Space,Str "this",Space,Str "is",Space,Strong [Str "strong"]] ,RawBlock (Format "html") "
\n\n\n" ,Para [Str "Here\8217s",Space,Str "a",Space,Str "simple",Space,Str "block:"] -,RawBlock (Format "html") "
\n " -,Plain [Str "foo"] -,RawBlock (Format "html") "
\n" +,Div ("",[],[]) [Plain [Str "foo"]] ,Para [Str "This",Space,Str "should",Space,Str "be",Space,Str "a",Space,Str "code",Space,Str "block,",Space,Str "though:"] ,CodeBlock ("",[],[]) "
\n foo\n
" ,Para [Str "As",Space,Str "should",Space,Str "this:"] ,CodeBlock ("",[],[]) "
foo
" ,Para [Str "Now,",Space,Str "nested:"] -,RawBlock (Format "html") "
\n
\n
\n " -,Plain [Str "foo"] -,RawBlock (Format "html") "
\n
\n
\n" +,Div ("",[],[]) [Div ("",[],[]) [Div ("",[],[]) [Plain [Str "foo"]]]] ,Para [Str "This",Space,Str "should",Space,Str "just",Space,Str "be",Space,Str "an",Space,Str "HTML",Space,Str "comment:"] ,RawBlock (Format "html") "\n" ,Para [Str "Multiline:"] diff --git a/tests/testsuite.txt b/tests/testsuite.txt index 3bb5d8cb5..4ddaae23f 100644 --- a/tests/testsuite.txt +++ b/tests/testsuite.txt @@ -377,7 +377,7 @@ Interpreted markdown in a table: Here's a simple block:
- foo +foo
This should be a code block, though: @@ -393,11 +393,11 @@ As should this: Now, nested:
-
-
- foo -
-
+
+
+ foo +
+
This should just be an HTML comment: diff --git a/tests/writer.docbook b/tests/writer.docbook index 1e77a61ed..e427d8ffc 100644 --- a/tests/writer.docbook +++ b/tests/writer.docbook @@ -862,22 +862,18 @@ These should not be escaped: \$ \\ \> \[ \{ Simple block on one line: -
- foo -
+ + foo + And nested without indentation: -
-
-
- foo -
-
-
- bar -
-
+ + foo + + + bar + Interpreted markdown in a table: @@ -896,10 +892,9 @@ These should not be escaped: \$ \\ \> \[ \{ Here’s a simple block: -
- - foo -
+ + foo + This should be a code block, though: @@ -917,14 +912,9 @@ These should not be escaped: \$ \\ \> \[ \{ Now, nested: -
-
-
- - foo -
-
-
+ + foo + This should just be an HTML comment: diff --git a/tests/writer.fb2 b/tests/writer.fb2 index 0bcbf1c2a..8106d2b91 100644 --- a/tests/writer.fb2 +++ b/tests/writer.fb2 @@ -1,2 +1,2 @@ -Pandoc Test SuiteJohnMacFarlaneAnonymousJuly 17, 2006pandoc<p>Pandoc Test Suite</p>

John MacFarlane

Anonymous

July 17, 2006

This is a set of tests for pandoc. Most of them are adapted from John Gruber’s markdown test suite.

——————————

<p>Headers</p>
<p>Level 2 with an embedded link </url></p>
<p>Level 3 with emphasis</p>
<p>Level 4</p>
<p>Level 5</p>
<p>Level 1</p>
<p>Level 2 with emphasis</p>
<p>Level 3</p>

with no blank line

<p>Level 2</p>

with no blank line

——————————

<p>Paragraphs</p>

Here’s a regular paragraph.

In Markdown 1.0.0 and earlier. Version 8. This line turns into a list item. Because a hard-wrapped line in the middle of a paragraph looked like a list item.

Here’s one with a bullet. * criminey.

There should be a hard line breakhere.

——————————

<p>Block Quotes</p>

E-mail style:

This is a block quote. It is pretty short.

Code in a block quote:

sub status {

print "working";

}

A list:

 1. item one

 2. item two

Nested block quotes:

nested

nested

This should not be a block quote: 2 > 1.

And a following paragraph.

——————————

<p>Code Blocks</p>

Code:

---- (should be four hyphens)

sub status {

print "working";

}

this code block is indented by one tab

And:

this code block is indented by two tabs

These should not be escaped: \$ \\ \> \[ \{

——————————

<p>Lists</p>
<p>Unordered</p>

Asterisks tight:

• asterisk 1

• asterisk 2

• asterisk 3

Asterisks loose:

• asterisk 1

• asterisk 2

• asterisk 3

Pluses tight:

• Plus 1

• Plus 2

• Plus 3

Pluses loose:

• Plus 1

• Plus 2

• Plus 3

Minuses tight:

• Minus 1

• Minus 2

• Minus 3

Minuses loose:

• Minus 1

• Minus 2

• Minus 3

<p>Ordered</p>

Tight:

 1. First

 2. Second

 3. Third

and:

 1. One

 2. Two

 3. Three

Loose using tabs:

 1. First

 2. Second

 3. Third

and using spaces:

 1. One

 2. Two

 3. Three

Multiple paragraphs:

 1. Item 1, graf one.Item 1. graf two. The quick brown fox jumped over the lazy dog’s back.

 2. Item 2.

 3. Item 3.

<p>Nested</p>

• Tab

◦ Tab

* Tab

Here’s another:

 1. First

 2. Second:

   • Fee

   • Fie

   • Foe

 3. Third

Same thing but with paragraphs:

 1. First

 2. Second:

   • Fee

   • Fie

   • Foe

 3. Third

<p>Tabs and spaces</p>

• this is a list item indented with tabs

• this is a list item indented with spaces

◦ this is an example list item indented with tabs

◦ this is an example list item indented with spaces

<p>Fancy list markers</p>

 (2) begins with 2

 (3) and now 3with a continuation

 (3) iv. sublist with roman numerals, starting with 4

 (3) v. more items

 (3) v. (A) a subsublist

 (3) v. (B) a subsublist

Nesting:

 A. Upper Alpha

 A. I. Upper Roman.

 A. I. (6) Decimal start with 6

 A. I. (6) c) Lower alpha with paren

Autonumbering:

 1. Autonumber.

 2. More.

 2. 1. Nested.

Should not be a list item:

M.A. 2007

B. Williams

——————————

<p>Definition Lists</p>

Tight using spaces:

apple

    red fruit

orange

    orange fruit

banana

    yellow fruit

Tight using tabs:

apple

    red fruit

orange

    orange fruit

banana

    yellow fruit

Loose:

apple

    red fruit

orange

    orange fruit

banana

    yellow fruit

Multiple blocks with italics:

apple

    red fruit    contains seeds, crisp, pleasant to taste

orange

    orange fruit

    { orange code block }

    orange block quote

Multiple definitions, tight:

apple

    red fruit    computer

orange

    orange fruit    bank

Multiple definitions, loose:

apple

    red fruit    computer

orange

    orange fruit    bank

Blank line after term, indented marker, alternate markers:

apple

    red fruit    computer

orange

    orange fruit

 1. sublist

 2. sublist

<p>HTML Blocks</p>

Simple block on one line:

<div>

foo

</div>

And nested without indentation:

<div>

<div>

<div>

foo

</div>

</div>

<div>

bar

</div>

</div>

Interpreted markdown in a table:

<table>

<tr>

<td>

This is emphasized

</td>

<td>

And this is strong

</td>

</tr>

</table>

<script type="text/javascript">document.write('This *should not* be interpreted as markdown');</script>

Here’s a simple block:

<div>

foo

</div>

This should be a code block, though:

<div>

foo

</div>

As should this:

<div>foo</div>

Now, nested:

<div>

<div>

<div>

foo

</div>

</div>

</div>

This should just be an HTML comment:

<!-- Comment -->

Multiline:

<!--

Blah

Blah

-->

<!--

This is another comment.

-->

Code block:

<!-- Comment -->

Just plain comment, with trailing spaces on the line:

<!-- foo -->

Code:

<hr />

Hr’s:

<hr>

<hr />

<hr />

<hr>

<hr />

<hr />

<hr class="foo" id="bar" />

<hr class="foo" id="bar" />

<hr class="foo" id="bar">

——————————

<p>Inline Markup</p>

This is emphasized, and so is this.

This is strong, and so is this.

An emphasized link[1].

This is strong and em.

So is this word.

This is strong and em.

So is this word.

This is code: >, $, \, \$, <html>.

This is strikeout.

Superscripts: abcd ahello ahello there.

Subscripts: H2O, H23O, Hmany of themO.

These should not be superscripts or subscripts, because of the unescaped spaces: a^b c^d, a~b c~d.

——————————

<p>Smart quotes, ellipses, dashes</p>

“Hello,” said the spider. “‘Shelob’ is my name.”

‘A’, ‘B’, and ‘C’ are letters.

‘Oak,’ ‘elm,’ and ‘beech’ are names of trees. So is ‘pine.’

‘He said, “I want to go.”’ Were you alive in the 70’s?

Here is some quoted ‘code’ and a “quoted link[2]”.

Some dashes: one—two — three—four — five.

Dashes between numbers: 5–7, 255–66, 1987–1999.

Ellipses…and…and….

——————————

<p>LaTeX</p>

• 

• 2+2=4

• x \in y

• \alpha \wedge \omega

• 223

• p-Tree

• Here’s some display math: \frac{d}{dx}f(x)=\lim_{h\to 0}\frac{f(x+h)-f(x)}{h}

• Here’s one that has a line break in it: \alpha + \omega \times x^2.

These shouldn’t be math:

• To get the famous equation, write $e = mc^2$.

• $22,000 is a lot of money. So is $34,000. (It worked if “lot” is emphasized.)

• Shoes ($20) and socks ($5).

• Escaped $: $73 this should be emphasized 23$.

Here’s a LaTeX table:

\begin{tabular}{|l|l|}\hline

Animal & Number \\ \hline

Dog & 2 \\

Cat & 1 \\ \hline

\end{tabular}

——————————

<p>Special Characters</p>

Here is some unicode:

• I hat: Î

• o umlaut: ö

• section: §

• set membership: ∈

• copyright: ©

AT&T has an ampersand in their name.

AT&T is another way to write it.

This & that.

4 < 5.

6 > 5.

Backslash: \

Backtick: `

Asterisk: *

Underscore: _

Left brace: {

Right brace: }

Left bracket: [

Right bracket: ]

Left paren: (

Right paren: )

Greater-than: >

Hash: #

Period: .

Bang: !

Plus: +

Minus: -

——————————

<p>Links</p>
<p>Explicit</p>

Just a URL[3].

URL and title[4].

URL and title[5].

URL and title[6].

URL and title[7]

URL and title[8]

with_underscore[9]

Email link[10]

Empty[11].

<p>Reference</p>

Foo bar[12].

Foo bar[13].

Foo bar[14].

With embedded [brackets][15].

b[16] by itself should be a link.

Indented once[17].

Indented twice[18].

Indented thrice[19].

This should [not][] be a link.

[not]: /url

Foo bar[20].

Foo biz[21].

<p>With ampersands</p>

Here’s a link with an ampersand in the URL[22].

Here’s a link with an amersand in the link text: AT&T[23].

Here’s an inline link[24].

Here’s an inline link in pointy braces[25].

<p>Autolinks</p>

With an ampersand: http://example.com/?foo=1&bar=2[26]

• In a list?

• http://example.com/[27]

• It should.

An e-mail address: nobody@nowhere.net[28]

Blockquoted: http://example.com/[29]

Auto-links should not occur here: <http://example.com/>

or here: <http://example.com/>

——————————

<p>Images</p>

From “Voyage dans la Lune” by Georges Melies (1902):

lalune

Here is a movie movie icon.

——————————

<p>Footnotes</p>

Here is a footnote reference,[30] and another.[31] This should not be a footnote reference, because it contains a space.[^my note] Here is an inline note.[32]

Notes can go in quotes.[33]

 1. And in list items.[34]

This paragraph should not be part of the note, as it is not indented.

<p>1</p>

/url

<p>2</p>

http://example.com/?foo=1&bar=2

<p>3</p>

/url/

<p>4</p>

title: /url/

<p>5</p>

title preceded by two spaces: /url/

<p>6</p>

title preceded by a tab: /url/

<p>7</p>

title with "quotes" in it: /url/

<p>8</p>

title with single quotes: /url/

<p>9</p>

/url/with_underscore

<p>10</p>

mailto:nobody@nowhere.net

<p>11</p>

<p>12</p>

/url/

<p>13</p>

/url/

<p>14</p>

/url/

<p>15</p>

/url/

<p>16</p>

/url/

<p>17</p>

/url

<p>18</p>

/url

<p>19</p>

/url

<p>20</p>

Title with "quotes" inside: /url/

<p>21</p>

Title with "quote" inside: /url/

<p>22</p>

http://example.com/?foo=1&bar=2

<p>23</p>

AT&T: http://att.com/

<p>24</p>

/script?foo=1&bar=2

<p>25</p>

/script?foo=1&bar=2

<p>26</p>

http://example.com/?foo=1&bar=2

<p>27</p>

http://example.com/

<p>28</p>

mailto:nobody@nowhere.net

<p>29</p>

http://example.com/

<p>30</p>

Here is the footnote. It can go anywhere after the footnote reference. It need not be placed at the end of the document.

<p>31</p>

Here’s the long note. This one contains multiple blocks.

Subsequent blocks are indented to show that they belong to the footnote (as with list items).

{ <code> }

If you want, you can indent every line, but you can also be lazy and just indent the first line of each block.

<p>32</p>

This is easier to type. Inline notes may contain links[32] and ] verbatim characters, as well as [bracketed text].

<p>33</p>

In quote.

<p>34</p>

In list.

/9j/4AAQSkZJRgABAQEASABIAAD//gBQVGhpcyBhcnQgaXMgaW4gdGhlIHB1YmxpYyBkb21haW4uIEtldmluIEh1Z2hlcywga2V2aW5oQGVpdC5jb20sIFNlcHRlbWJlciAxOTk1/9sAQwABAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEB/9sAQwEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEB/8AAEQgAFgAUAwEiAAIRAQMRAf/EABoAAQACAwEAAAAAAAAAAAAAAAAICQUGCgf/xAAjEAABBQEAAwABBQAAAAAAAAAGAwQFBwgCAAEJChEVOXa3/8QAFgEBAQEAAAAAAAAAAAAAAAAABggA/8QAJhEBAAECBQEJAAAAAAAAAAAAAQIAAwQFBhEhszE0NlFUcXR1tP/aAAwDAQACEQMRAD8AqQzziPNmpiqnIO1q4H+WkB84MdlzRSuM82/jVw/JCORtRmQz5d2VTy6WmS2eSYx3U/qkSRbgFsqRzH2Is4/mCluXc33vy8xTnJjTNqV/T8LKmkhr8Hq1da2aOvTfIh2CFeNt+GxFBP8AJFdFUbPWh+4FdXV7OtZOMR7mK9lBWNN+JBmMQ5cwmfH8DEFhTZUCRlE6CBq/ds/nBh9oYygeY1L9FnCUnBSN1t+w0l9bNomx1cllsOrL9OCTKtKOIqua6UVjP0dEvTyM7gp/3whbkAD0ScX3r6MLg+C2/XsMhCnJRn/5cVNHyJHiX6JKIFhhqnFeagm9BIgjfcJyNBTZiROBUk6Mp8CJRmT4NWU2MatV7n495DPk/wAbMJSRJOTBDItq0KR5s/nJN7LPW8AJWtYAoKQaDp+u4XShxgXhYcbHoxNTllCwETGQ8ag2jmDVsk8w/wCOp/C/hn+mWV/utpePH+D5wmF39NY6UakjUYR1Dn0YgRM5zQAAAMdfAA4AOAOArjkMNQ3vgm7UKtBR+m9QHFD5tpnDtpy+t2R20gK/OsmFtuDpaL5mVyiT5qdEVAvZci5ch5VoSGKbwlWTBr0RPoZT07av9lHfrXo6yLApWMugKpPM9SV1cDm65s/wkOHZBojoqiM+6GpMSj4FhtayNAUi5H3LfQBG2KWssFoSPuJdKyMLKtpuLi+e3jwFICUg7CSHsNVlYlKdizOTvKdq3KTsG8pQirsAG6vAB5FdhP490U4gfjxi+DedoqO4YftmKdKNulO26jiOv+2Ga/bftVNFXpHtVHrpLpRFJTpP3z77T469++fTx48e4LueE+NY6UKk7UniLP8A7rNf3X6//9k=/9j/4AAQSkZJRgABAQEAeAB4AAD/2wBDAAYEBQYFBAYGBQYHBwYIChAKCgkJChQODwwQFxQYGBcUFhYaHSUfGhsjHBYWICwgIyYnKSopGR8tMC0oMCUoKSj/2wBDAQcHBwoIChMKChMoGhYaKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCj/wAARCAD6APoDAREAAhEBAxEB/8QAHAAAAAcBAQAAAAAAAAAAAAAAAQIDBAUGBwAI/8QAPhAAAgEDAwIEBAQFAgUFAAMAAQIDAAQRBRIhBjETIkFRB2FxgRQykaEjQlKxwRXwFjNictEIJEPh8SZTgv/EABcBAQEBAQAAAAAAAAAAAAAAAAABAgT/xAAbEQEBAQEAAwEAAAAAAAAAAAAAARECEiExQf/aAAwDAQACEQMRAD8A2t0YoQpwT2qVzMV+N3UHgrDY2eoM0y58VEbgfp9K1yMRmnuJ5h40jyYHGSeKrWE8u2QAApOMdqGCsmT8h70TAJwMAZx249aKBy4c9vTNUC0zDCgmmmG7Ockjkj1PrUTAjcy5XP0ouCgHae4IomOJHhgIc55PHY0Uk5IXLMcUBQ27n96JYO2MYLebHtRBA7BcMx29sdxQJqwZRtIP+BQKpjHHc+xzigNGoAO/k+nPAoAYlee5oBiGeWySO9AJCgY5PHagFCADzj2GaA2N2TkjA/U0HMwbPPeiyBLDfkkj04FCl1cBMgn6URwYFGySR6D2oAeQDAxnHGKAhU4IbGc+tFwnwDj9aK7f8v2oNu+IHxNvJdXmt9EmKWSqArA/mPvxUxMZNe3Ml1dvNcMzSSEsxPOferJhht/OWyAPc0UfdgDcuM8n50AMCykZFARsngcY/egTcbjnJz9O9AB2kZGSQOcUCX8x83bntQCMruJ4B7D1oCyOGzxtJ9M80CAdg5UjFE0aFJrghLeNpHY4IRdx/QUNWCw6D6q1EZttEvirHAZ4ig/U4qw1b9H+CHVN3Mq6hJaWMJ5ZjJ4hA/7R3P3q3ET+pf8Ap/lWNm03XkkkA8qTW+3PHupP9qxopV78G+s7VSV0+OcAn/kzqSfscVvIKzqPTWu6XKE1LSL+Bhz5oDg/cd6lEZzGwLrtPqrA8frUCJfcw9gfegUjZsEAffNADyHt78UAjCjzDJxRcO5Pw3gwCGOVJQp8ZncMGOeNoxwMY96GCbQffFFcUXKjDDt2NEo+N3yyM5z3okKuqJgIzONoJyuMGi4QfGcqSfXBoYHJx659qKIRnnsfUGgJn/poJYoTIGLY+eDzQFlQK2G/KCTmgbspfO0qce/agPGcR7nHf9vnQFfBPlOc88Gg7uucc/M0Bd208YJJweKAYrea4kKQICRGW5IUYUZJ570DYqcknt3FE0VuVyDzj1oamOlulda6puvC0a0eZVIWSbtGn1Y1NNbX0x8ENH0qL8X1NdtqDoNxiQbIh8u+WpqL70Tc6fcxypouiRadbW8hhLFFXcB7Edz+tNFvEZxkmmgShbA9PlUA+Hgg/wBqDgmBkd6ArJuJBGR7VdEdqWgaVqMfh6hp9pcLj/5Ig2KaKJrvwW6S1EFoLaWwmPIe2fAz81ORTRm3UfwI1mzBbRL+K/ReyS/w3x/b+1Wexmev9O6xoE2zWdOubUDszr5T9G7H9auCJj2n3PPrUXTlGBB2kYx96GlQMjJJHuRRXBgDgk8DtRKH8w4OfYA0SUlIMsFXJ4oujHH8ufnRRGOSNoJNAeFC77F2jPucfvQFEqgY3nj/AKaCUY58wwq54AoCzOmVMke9QeRnGR7ZoEIF7pnaTk49KDpSSwQntQJsGKjgggZ9uDQc4OOe1Am2UCkHOR7dqA8t/cSW8MEkrGGEsUTPCk4zj9KJT3pzQtS6m1aPT9Jh8SVxlmJwqL/UfYURuuhfArR7f8NLrF1cXciKDJCrbI2b7c4+9NGtaRptrpdqltYW0VtAn5Y41wBUodvGjqUdQyn0YZqAIreOBFSFFRF7BQAKA1xcRwKplcJuOBn1NAR7y2ikWMzoZnGVQHJNAuQcD3oBKkD2FBy8jnvQFxnjjmg4rxwKBMqCBtPNA3vbCC+tngvYo54HGGSRQQR9DV0Y91n8DNOvFkuOmZmsrk5PgSNuiY98D1X+1XRhWu6DqWgX72er2j2069t/ZvmD2IoGG7jbnj1FFlB224PB+VClN4DYJHyAojmPGCck8cetCAxgjPp6UaAGKtx6+9ATAXO7nFBw8HHLN+goJhBuj2FeAcnmgNazW8U0vjweODGyqpYrsYjytx3x3oGa5LEEjH9XvQGlgmjjMmQq4HBPfPYgevagG5nhe3tkFuInQHxJQTmQntn0wKBKTlAeDx60DSY+U9zn+mgsnQvROr9Y3W2xi8KxV8SXUnCrjvj1Y/IUR6c6A6H03o6wMVgrSXMoBmuX/NIf8Djt/eiLfjJwO9ZBiOfmKDhktzQAzYBLZ8oyaDF+rOptVv8AUjNZL4tjA/lT+kr3wvqTQX/pi3Y+DqFxKXurmFWAaPaVzg4I/b0oHlxqV7penRTXFu93dPLsESYB2k8n7CgnradLq1WaIOFI/K42sPkRQCg3Kcd6Dgp3d6AdrGg5VxnjmgKWB8uQGxnFAUgKuSefSghuqNC0jXbAWGtxQyJKdsYc4YMf6T6GtDzR8S/hnqfSUz3NvuvNILYSZR5o+ezj/Pb6UGfLzyD/AJoFFySQVBHpQDJ5kGByPahAbWxn5+po0OF3D+XPtQJsNwOe+aAuygmMkebgHnHFALHYpJwSeGz2oGpOJWAI49BQEZlYAHkg4oARVOMvtBIJJ7AUAX6xxSOsUgmjViFcKRuHviiVfvhT8NZuqpk1LVFeHRkPlHZpznsP+n50qPS+mWVppdnFa2cEcFtGu1I41ChR8qyHVxK8cLPDD4kgGVQHBNAa0maaBJGTYzDJXOcUCy5JOaA2OMfoaArkheM7vlQNYNOtoWLJCgLHJwo5NApPKLaNpGRQB6j2oGmnRvcyNd3O/DkeErLhkWgklIdCyZOCRzxzQEeRxhdpUnncBkD5UCxXjJ7+tAlctMsIMLohz5mcZAH09aBQYdQwyAeaAuA7MAQxHH0oG1481nbGVInuWU5Kr+bHrgepoKB1u+o6jqlvBH05NevEBPBK0pQR4I4BHZj+1Bb9IS7lsFtNWtYwDGFYB/EXHqpJ7/WtQYx8VfhGbdZtV6Uh8gy81mpyR6koPb5UGKY4YkeYd88fbFAI5AC98c5oQBb+U9+9GnN5RgDgjOPWgAN3yMfWgAqc91/UUD2RSSRg9+49KCR6e0WfX9WS0icRwgb55WOFijH5nP0FBYNRi6dSR7HRNPmu0hOW1GaXaZMdwBwAP3oynE0XRYrFtV02wS4ECj8dp1wcsE7eJEf39qlFZ616ZttPu7Kbp9Zbi0vYzNCcgjHqoHuKsEp8LPh7P1PqjXerxywaXaviRSu1pWH8g+XuflQemIIY7S3SK3hVIo12pGoAAA7AClEL1N1RH0/oTalcwx+IACLaSQKx59Ppmshv0D1jH1ZbTubU27xkkAnKsuSMg/UUFluLlLaJXETyecKAg554zigXiubeRnSKeJ5FOGVXBIPsaBLULoWkIfw3kYsAqIOSTQJMbpm3oqlmwACeF9yfn+1A+Bx34oE5IY5P+YFbnPIzQKAckHuRQCAQOO1AL8r9KDhkZOT9M8UCcrxgAyYJzwD70CT3Itxm8kgi3fly+P7/AOKA9pskhEkZysnOfeg6RH8w3tgjAHtQRZ1uystSg0m5eRJ2UbHceV8fP3oJkBSAVII9xQFdSRwKDDvjN8L/AMSJte6chxcgFrm1QcSf9aj39x61YMH8Q+CkfhqpQncxBDH5H6VRwXJ/Ke1Am2QchuMYOaNFSAVznB9qAm8f10D2RmX8jDHP3oLbebtA6ej0m2LrfX6LcX7IMskf8kf6HcffIoG8yTadZxSTxCK3kRZUwSFfkruIJ78GhiS6Y1OS3160uZJFWO5bwZtxzuQ8bcfPNMZXvo2wsLnQ9R0q/maJNNv5Yo3bjCuMAHPzqA2jdUan0lF0/ZXcElxp9zE+5WVd/DE71IPPB7H2po1bSNXsdYthLp1ykyEcj+ZT817ioITrnoux6vs1gv5JYnjz4ckZ/Ln5etA+6N0BemdBttMina4WEFfFdQpIJJ7D60E5I4Vo9qnnsQO1A3k0yzeTxhCizZJ3qNpz9RQO449igMSxHGW5NAIwBtUAUAMORkfegMhG3jtQD8+fvQGXJz7UAHuRQA5YDI5FB0qCQA5yaCs2/SFit/Jd3AmvJ2bO64ctt5zwD2oLMilVAUDgcAelAJLbhgZz3oGN9HPIYmhtrWRw2czjt7Y+dA+h3mJS67W9gc0AvuLYANADpkZABHY85oPOnxy+Hx06Z+odGjC2jt/7qBRwjH+cY9D6/OrKMebcceHwfaqCYIyDgZ96GhHOFJI4/WjQpXnsaCz9J6fDqGvRC8OLO3Vri5PB/hqMkfc4H3oDT3UupapcXrKS9zISgDdhnAGPbsKC5aLLBHq9p01c6bbagPE23kpJYhmz5IySAAMj6nNGdRnT2lu3V9vaQQrJDHfCMFj5kAfufsMUFogu5H0jrLUYXK+Lq0aRse/lf/8AOKlFfudagvbnQpNQRmtILydCwPdCQcgMOMZFQanPoeiawBd9M6s9jeKPK1vKQp+RFA4septa6fuFtuqbRrmzx5b+BAdo927A+vsflQXfTr2z1O3W5025juIW/mjOR9KAZI914khaRNo4XdwT9KAl3b2+oWpjMoZWbOVfnI9sUCrXUNssUU8w3sQoJH5jQLvwQQC3NAKvuUPtK54waDg23v6UA7weBnNAIOBigMr+hoOjdZQdhBx3waAVG0Z7UBWfAOQSflQChyNxBAxQRutarb6bHALi9trSW4kEcJnGd7ewFA/j8QEK/IA/MBjmgWDDBB7igj9dupLTTbiaHZ4oQ7A7bQW9ATVgwXSNV6onl8azW6t45pWdxHIxWA/zNtz7A8Glg2S1u7fX+nt0J/H2c4MMhmQoW9GBUjj60g8sfEHpebpDqi4sHLG2Y77eQ486E8fcdvtVFekGW4UfegKVAAKgnFGhuDzxQXbpDTZF6a13UnUqrCOzQ5wGZmXIJ+lE0ppkEK6nJcRWcTW9hA08iKcjcowpye/mxQ0+6VRbC/jvLm48L8LG9y8pIOXxkDnuSTipqHXQMng3es9S3fhn8DbvcZI5Mr/lH700dc3Dad8NtPs4nU6jeXD6nMCwBRF5XOfU8YHrTNJFF1X8RawW1jc4GxTKNrZB385yPkBTFw1stSu7Ni9tPLGSQfK5Aphi8J8UNUm6fn0u72yvJ5fGbuF/39aYYtGgadp9/axXnRetzaXqnhqZI3bEcj4547Ak/X6UxFisPiXe6NMdO65057eQAr+LhUlHHbOPX07UwWXpQ6BqMo1LpgW0sioVI8Qgxk+684qC028M5890Y3kHKbUwF+lA4LDOzu2M4FAOG3DaoI9cntQdJxzQEyR259f/AKoGl5fSQRFo7ZpB/MhYIR9zxQdayyXKb7gqox5Yo2yB9WHc0DPUIWnhWKxkuYFRs5gcKWbPY59KBkx6isVeSGW31JNwHhyOUkA+o8v9qCfjkMo/LJFKqBmRvSgc2swnRyFcYODuXGfpQMtRsLK8vYJL+wjuGiUtHK6hghz6Z7H6UEmCsig84I9RigiruC9t0DaaVmIIHhTOQMeuGwT9qCJ1ywv9T0U29xFFiaVBJGHz5M5ODgYPY/arKJPTtLW1t44i7SKq48w8x+ZPrTRJoipGFQAAdgKgzX47dMJrXSrXkUe6807MykDkp/MP8/aro80FQyZ+tUJ7hvH0x270XQ7KGtXvIk0T4c9P2bIhkvpnvJVfjIxhf7qftRDXpu0/1DpzXltUlkvmWMBI+2zdnn64oYa6yX0XTm0i4jQ3t6wmuV53xov5UPpyeeKyLbpFtZ6Xpmn6TqNq7/ic6pqQRR/DVf8Alq2fTOP0FXBnXU+ox32o3lzeW+JrxlMXHKR9wfbJ/tVWK5f3AnaAjafCTwwcY4BOM/qKKSjA4Dg8j37UHZKkE5P0olSFlcLDdJPbTNBOigjxOVZu3+80Rbbnrq9l0t9I6isRd2rgKpPlZMdyre9An07oupoh1zo2+lea2fMlr+WZFx7ZwwqWDVug/ihDq7R6b1EPwmpMNokPlVj8xng1BqEUe1EAJOMDOc5oDSxq6YYeuaAJF4oCBUQ7mJ45zQHYB14wR86AVjBXyjge1AEcRTHlA9hQE8kbgEohJ5yQM0ETHNqMOr3IZQ9tIMQyEjKt7D3FBLqywRPJKTuxlj3zQI3Ut14e+yhWRj28Q7RjH60EfpF3rU/jLqFrHbS4/hqpJXH19aCRa8jgiVr1xGwXzYyf99qA9tc29/aRXFnKs1vINyOO2KByoxwe9AYocHGKBvdwLcWzxSLuR1KuD6gjBoPHXWujt071Nf6YSSkUnkJ4yp5H7f2rQgWAA3Y+1An4j/1t+tBrHxKuYS+gx24LRx6ZFtI/lz60FY0+/v8ASphNpd68EpXY5AGNvzFF1YOirZbzVrvX9dkNxZWH8eeaY5Lyj8qj7kcVlETqOqXd/HrPUNzcNE16Tbwxf1JkEgD2AA/etBte9R2Oq2cv+p6XHJfBFjgmjkMaRgAAEqO5o1FWfbgjsR8+9AlI5CgEggeoNAq0iug8uD7g80KKmCcZ7fPmjJzJfT/hWtjJvhOPK/OOe49u9A96X1W90/VrRtNkkSfxQF8I5yScdvX6UGidSLpfVFzcvbRiy6kgZBGysFW7B9T7HHNSjU+o9S1iz0e2uNLmX8RYxJ+KgYeVwVGTn5d6gjug/iU3UOt/6TewQpP59skL5B29x/8AYoNHPB78Ggb2l3bXO78PKsoyVyvIBHBFAoSkbfyhn4GfWgTnmWFN7ybAvc4Jz9hQRdx1dp0S3Dw+JJHbDdPIUZUjX3yRz9Bmrgzbqb4x9Oxho4bB751O5HPkXPsc80wQHT/xrJ1IHUbGKO0kdRiBiAgz+YjnsPpTKN/tLy3vLOK5t5klt5F3LKhyCPemAYLuK5XMDEj1OCP71ArGWLMPT0oIbU7h11u2t49OllWWNm/FIRsjI4AI/egfQ2ktpbww2XgxoDl9wJ49cUCHUGv2GixM13Mkcm0squwUH5/SrgxDW/jFcXOteHb3otrKEEiRISRM3zGc49v1qDT+gfiBpvV7y2unxTxywRhz4ozuHbOR2+9Bmf8A6kNIEWpaZqiooEqtBIR6kHI/atfRjDEt3AKjgVQjug9j+lQWh72e/htTOzyeCnhHPomeMYoJvQum7vVD47K9jpsQBkvZ5NoAHcgUAa7rKamE0Lp9Xh0G1OZZTwZSO8jn9cCsivdS38F9cJDZIY7G2URxKe5x/MT7nNaEKrENwAFPPlosFwS2cd/cc0UlIm3JOeKDo2LH+UA0SjgDk98URzPiJ2449e/NAbS7v8PdpKkpikQ5WQLkqccGgmYNQmXWLeQLG9wVRQVPlcj+Yn3xQa98OviAjz3WjdXSpFdliEuJCNjDtsJ7enepRdel+kdL0rqOTVdIsoYklV1dixO3nunpg9jUCnU3WMeka5b2EUcl3JInmigQs6ZPlJAHY8+vpQP9O1m3nthNo0cTwM2JDwoVj6H5gd6CbhtUiVn8TcXO4ktkZ+We1BAf8Z6fZ2uqXWpyxQrbStGseQzMB2IA961B59+IHXmodXal+HsPFh04HbHCo25+bY/zQWv4f/CCxvII73qC8iuXYb1tYZeF9txHf6U3BatX+DvSl86x6cr2dwjbnEUmcj6Enj6U8hLdJdEX/SmowJp2tTT6Oc+La3HO0442+3NBf1LmRUjjQAfmc+nyHvWQockYyQcY3CgaabaPZxGNnaUFi3mPb6f+KA2q3RstNurnBxDE0mPfCk1YPMemaP1L8RtYN9fJPc2aMUaVmCKg54H0z6VRYendf6Z6T1W56a6j6fgfwJyguhGJmPzbIzjHtSjTn0zSunbi01fSkt9Os5GAmWNCDOGxtXb6HnNZEZ8etOF90DPKFy1rKk3zAzg/3rXI8u7zvOTg4zVoTLDJ81QWDTb2SwuvFgcrkbXwM5H0PFGqsjpd6+kcT61Nc2ieb8OikFc/9PA+WfSjKA1nWBzpFlZ/hLWM4KH8zsPVj6mghN4IyQRk5NGo5BkFmyAfSgVjChdpGO/FAXYpOHLBe/FAQqoBJbA9sUBGxgtgEj/eaCf6DGjt1TZf8RNGumKS7mQZQkDIB+WaMrf8Ub/ovV7V20JIYL62K4khhCLOCcEcAdu9BmCuEQvxvyFUg42+v+/rQaj0zax/EXRY9Nns0t9TtM+BqCKAjEclXA98jn+1Si7Cz6u6O0tLjTrxLu2tQJJrDwcKE/m2M2SfeoLrpupDV9Mh1OytUS2vIN8m4BZQf6T7+vPpj50GfdK9L6rJqk1y1y0elRDKRqdjHHoyDhjx39e9BZr7fagW0j3kul3iETRqHkeF8ZBUjkZIxjtk5rQ86dW6r+O1OcW0UtvaRsY4oWfLKBxz7k/5NA46P6X1rqS6WPS7V9v88rAqi/f3oN46X6C1DSotkus+BIwKl8hn2+3PapROXPT2t20bPY6kJ5UGYmbIfIHGW5z68VBI6DrzzWSrrAjtrwFUbDja5OBlfuaCbluJLeNwIpLiVF3bVXAP0Pv8qBxLO8cYcW7vnuqkAigNFKs8CyxlwG/lcYI+1A31ayF/pt1auSFmiaM/LIxVgwfoO413o3qqfSLyUSwodogAyZVGcbPTPr71aNDvendJ6wtbu7Fi1lezK0bS4VZMjtnFZE0bC5u9Jh0qRAr2yw4uWx59vBI44PegN1tpbap0lqOk2sipLPB4aFsnHbBNOR5A1exFhqFxbeKkngyMhdOxIPcVuhiZFz/zBUEwcKvYnP6fWi0+6chjn6h062uATFLcRrIMnzAsO9EehNR+GvTV3GUh0+O2YsGaWHIf9amjIfib0no3S0VtFY3M000zMzLJtLKvvkenyx96oz0rwNjA8cj2osFLbVAbOc9jRQiXOAwxnj3oBlAxwDj37UDY+vHOQeTQBIdqjcPMfnQwJclWyBgCjJBFeefw4VaVycBUGST2wAKD0L8H9C1rSIILjWLSCytY1lZASVnlL4PI/wD8+vvUo1uwbxI5GkjdVc7isvOBjtj2qBWKFZiQ8CJCB5FHYj5jHFArDbQ20ey3RY1HOAOPsKCH1u61CPSLt9MtlXUHUrbCbJBbPdtvYetXR5T1y2udD6lni1ErJdJLvlK4wWOCePvVgsV/8Sr67UW1vA0NiowIonMe4+7FeT9ARQRmodWa9EYpPBhs1Tygw26rk9xknkn70EjonxZ17TXjAeKTkZ3L+YZ7N8vpSjX+lOpNM6umgkMG3EgBV1DYbG4kewz2NZGkC43CP8MPFBONysMAD50DaHVH8S6N1a+BaxMUjd280pA5wPb296DrXWLK9WNoJdtwybxDKPDcAnHIoJBifTBzzmgaz2UFzPFNNbwvLCcxuyglT7igdRRKg8qAZ5JAAzQEnuYoHiSWQIZW2ID/ADH2H6UERr12BY6hueIQJaO7SK/nHfnHtx3pyPGWoN4jynuCfU963RF+DL/UtQWTkjaWY/8ATnHFGql+j1VerdJY8r+KiJz/ANwoy9C/EjqSbpbRY723RJC8ojIcZ4IJ/wAVkecer9en1+9FzeLCCq4URjgDP7mtLhteadBY2kMczyHUpcO0YxtiUjgH/q9celAiLy1kjCX1ruyMLNGdrj0+h+lE0+t+kNQltJ7yKS3jgiTxUFw/hySp7qp70NV6YEBgWUNjBoaKeAODnHrRoVgDnBP0ozpxZ2f4y5trVeGuJFiBPpk4zQep9C0LTembS30fQbWP8ZsDyTugZgf6ix9fYZpbgmbXSmXULaa6kMzpltzcjJ//AGpaLCY1CDsF74PrUCgHY0HbSx7Z96BGUfxB2xjtQZ11t0Tb6jNfyw2wM18gV5AcBdpzyPnV0Yp1F0o/TEczXjXaTOQYpIk3QlT3B9Rj0zVl0VKbVppImheUSwbsgFfXHc0De0tri/ujFYQSSyfmKopPHqaDV/g9p+padr/gkSRTzKu0kZRlPLYPbOPf2pg9GWzRCMJAFxH5do4wayKX1z/G0CdzqLWRkiaTxQBLudclQvovbv3oMU/4Z67uwnUAt3u1Zw42yhmx3/Ln8v0oN86L1d00i3i1UiGQIocNnEbnkqT2xgiguEbI4DIysp7EHNAZnxQQ/Usksej3EsCl5EUthR5sY52/Mjigr6Qrp3R15LqEcIlmgdpFGAsY2navPJApyPJtwd8rnGBuJz6Gt0MzGSTyf0qCwSKA5ZsAjnn2otTXQYj/AOMNIDqCrXUZwf8AuGDRG6fF6Gyfo6+ub0CR4EPgIScLIeAcfc1keatN0661a+S3sYTPKzAbV9B7/StLrQ/iXp9pYLp8elWsUM11AzXMqt53I7g7j244oiB6W6Tn6j2TeAy2FspTeB+Z+/8AmgtnWlvpdl1Dp1pq07Ja20GFQpuDHHAwPf39KDHriVTKSPOCeBnHHtQGsrG5v5pfwcTOIlMjgEeVfck0XRIreS7uUigRpJXOEVe5PtRE/wBJ9HaxqvUcdhNFJp0lviaSWVcMgzxgdySeBipo9T6O8NppUJ1K4iW5KgSvIyqxb5jPH0paJm1NvKivE6Mp7MpyP1FQLRTwy58F0cjuAckfagOTtO3+Y8igMWCIWbOPlzQNhNBOWEbq5Q+YKc4+tAD7JEZgQfXj0oI/VtIttXsZLW5hRopByCP/ADVlGRa78Erae63aXK1tG3LAncM/Kmh10l8IZdBv4rxtTE0yggJsyoz6/P0po0zSNKEMdo9xGnjxuzkqMAEgjj7GmialjWQMgyCRyQcGoITqHT7q/a30+G2jFmwLSzl8GPBGFA9c5NBPRwJDbpHCipEi4CjtigqfWltqCaG8WhNbxyzOBIs8W8FcY4+dWQQ/wtuZdIGqadrknhy2u2QyOSEZOeRngY+XvTBZZevOmhC8janbqiZ53Zzj2FMFcs+sh1ZqsFrp8UkGkrlpbh+DNzhVX5Z5NQTfXyWUXSV2t+wW3EZ5I4HHt61eYPI0mA5C9snFaoLsPv8AvUEk5JcA8cZG480WnOlXX4PVLO4yQ0cyP244Yf8AiiPUfUump1B0/c2O8xfi4v8AmL3UcGpgw/SujNX0Trj8PpckimOMvHO/kEg9Rjs3PpV0aFq/Qqa1ZJ/qcrverEqNOwGM+uMfemiVtrKbQdMNjp9rvtkhPht6mU5yT8u1BkvXg1qXUtOvddgRY1R3j2YHiMvZSD27CgzSCyuNQ1KK0giL3Mz4VAOc/wDignoNNOnaHeiW8hgkku/Al2+Ziqgn09M0ETp0qpqSmGKOdFcEeLwMfPHag3JLuCRtPmQWsDhNphtVAcn1w3BPFSwDdWGpX1/OYdOtbbSrlQ80szHdn0GR24/c1AbWemdatLbTJdGvJIJypDQwMV3exwOPatSz9Ei/R+txy2mr3evyHV4miWIDhGwwyGA7nGRS2YNZAUBWYDdjGayEvxMYB3nYu4KCfU+woG93c2enWs1xcPFDCp8zEgDPzq4GGgz22saS1zZSZhkdsFePXt86YHWmySeLNDMYikZ4YNlvvUD+VARxQJqgwRQHUAAe2O1AWOFRM8mDlgB37fagb6reXFt4ItLZJnZsuWfaI0Hdjwcn2FAz0nWX1i4u4xY3VpFbv4eZ1x4vGdy/KgkriN2aMRlQoOW3DOR/5qwYr1P1tp2pdS3WnanKkGh24kRl2eaYgcb/AFxnnAqiv9Jno0dRLJPbtdQtkNPIALaMnODsPPpSjbdK0DTbWQXui+Gsco3BU5hPP5gPT7VkU74t6PZHpq/1N5ZZbwrtRnmOwDPOFJwPsK1xR5ybudw788VaC5X2WoJRULSBpAe5PA/aiinAZnHck4A70THq/Qr23/4Y0u4lmCpLBGA7epxjH60Du+WGOBvFlFuWOFcYyCfbPrUojri6k06xX8PFNfBUJ3ltzM3scVBjfVvVXVNit5dapNDZGQGK3shjeAe7YHIwAOT3zVggNTiu+orrR4p7m+upJFR7h3TPhggDaoBwRjnPH5hV0af0xotnoD3l5dWdrY2YjGLhwPEHoef996CC6m0HpuPpk3Wny2s9sJPHJ3AeI2D39T37UGU9QTDULuGPSLPwIyoVIYk2lj6/X70G2/DPp0hVudRuBLcwxhRGkeEjB9M+p96DSLprVHiieaAE+YxHkke4H1qUOIBawL4uAuc8nvj71AwjRtQ1eO78QNp9odyLju/qT8uf70Gb6r8SpLzryy0vp+4NxYSSCEswI2u2Rn3OOD9qC4dVamen9NlaC7tUaIFvCmnHiy4HJXJxnOeDVwed+rOvLnqSyWO4jZSru/kc7ck+30GKosXw2+KmqaDJDY3jR3OmqNoRhtZAP6SP7Ggtmt63qbTJ1XLazJpslwBFblypEOAA7L2OWANS+xrnR2vW3UmjJeWp8wJSRf6WHeoJdSPMCRmgMq8DmgBpNsgUIxBGSccD70DczmS8MDWoe28MN4+f588rj980CktuJZYpFdlKZ4B4OfegQ1hpIrVjbsRMBhBj8x9qsHnX/hm36y1O/u9V1ddPmS6aD8OkQdyxOSe4OMmqLUvwQsYY4Xjv7m4YEEhwFyMY7fXB5pRbvhp0jqfSMV7b6jqZvLGQAwxAEBDk54PuD6VkVb49a5DBpiaNaeF/FIaQDumOwpzMGDEZQZ5+VboR8In1I+9QWDY6gFn7jjHpQhtJEFbAGBnijT0P8H7qPVOh47a42yNaymPBOcDupoykep+m73V7g/8Av2itQowvJIx6j5/OpRjfUWrax051RPY6LqFy8YACkebO4Z7HjNWCY0ToW2utJbqPreW5na4O4R78cehY9+fQVKLX05p1ro97awC4kX8VFmJLeEKdoyfOxJPbHbHYVAz0rqKPWNauri9t1ktJgILYgEiNFJ/Op9STmrBBdeaFCo0y3jt444DI3jLE204Y5DD+9UPPhv0NaRtPq99mSLOy22nOfdh75oNC0vT7m1uJGvGiii3AW8UDbQAeDu9zUohLlhouqap1VciF4I1FtbxSthtobBIPuTn7VBJ2vUth1TYk2ULi9iALwyKQYz6Z9CM0Ft060/DWEcDHe2Mucdye9BFW3SekWt3LPb2cUTsd2UGCG9x7VYMzufhzdX/WmoXj+BPpx3I7XZMmXYckc8EVRKaP8I+nXikLQuxOQSTnBzj6UEjonw90XSrq3S3s7dplJcl1EhGDx396lFx1TQ4NVjaC7UNCU2lAO/8AvNWDCLp9X+E/WgWImXSp2LRq7eSRT3B9iP8AFSjd9P1+21TRodVsMS2rLmTbyUGOePXFQSltcLcW0c1vh4mXcjDswoDLdRm4FvISsgXeTghDzjAPbPyzmgVFxCzuiOC0WN3sM+5oDqySJmNg3rx7UFb60tNUubGJdFdEvhIdryflUFSM49TVgyTQenJemOorf/U4H1Fpp1edAh/hOQTuQ9375JA4q0bnbPBcxxT20wkjKkqYzlT9ayGWu38um2MbLEJ7iRtoUds+/wAgBzQeW/iHqi6j1PdzeL44HkL9txHtWhVUOVyvHNB2F9zQT8hUAhAdp5FCG0mSAzE4HGDRppvwL1bwOpJbEsFiuYyVX3deR98Zoy2ZtRgmjkSRZocEp51K7se3vUow/rfpFE124mVpfD4mk2MWdCc4A+VWCH1281/UmFnpklzPYRFBEG8uGC4yQfcn9alGgaJo95rRsbi53WaxwrHOm7BjYcHnvz/moJaw07pXSI5IW1K0CQnDhpAWB9R796CudY62msTRW+gadI8KnDXMkLLv9MA8HGOKC3dAXF1dRfh75f41moi4G0AdwcfTj7UFhv7RjqMBV5AJFZHkR8FRjIx6CgpXVNjJ1JqNn07p26CztSJLlpIydyj2J+fGaC+afplrazqLa3SKNIggx3IHYUEsBk4wQc4oK11L1z070/M9rql6wuVA3QopLcjNBDwfFboqeSO2W7kQNxuaEhQfnQLt8TuireVoV1UeXnckTFT9DigHRuv+mbu9ZV1W3Nyc4IRlVl9O47/KgtU+s6baw+JcX1umRkAuM/p3oK/1t0rYdX6cqXKESqN8Ug/Mp9P1qwZ702mo9GdUTWJsmOn3EY8CAORGXJAwScjJ5q0bJDNLb6YklxbKsgA3wwndg9sDtWQN3aw3ZKTwLLEQOGORn6ehoG1vYAw3FikRt7JSuH3Hc/GSc5P0oHn4aO2uGuYyiose044wBQIm6F1dwfh5ARs8R8L2BHGfnQHv9PS4PjxrGLtFKxysm4qD3oCxboIIo7e1jhQHzAYUJ8wP8VYM56j1ktaal1BMrS2sAaK1OQDD6eUepY9yfQVR5zv7hrmaSaRtzyHJY0DcE4BPIPb5UBwOO4oJYzFvzEYHAH9XvQhNZN7AEgDOSDRo/wBA1SXRdVtNQgb/AJUgfBHcZ/8AGaGPVlhPbarZ2t5CEeORBKje2RUrI1zYxTBhMinIwcjvUEcugWkO4AMisMEA8N69u3yoERrOhWNxNYy6hapcxAeJHM+D8u9ASLStLlm/EWdpZyxy+Z3RQ3I5B44oJKTT4blFWSNBEOeBg5+goFYbOK1TKhIxnIbGMH50Cpcyo6AMrIcM2OD68Ggb6Lbbllu5Cd88hYBu6rztWglSNkfm4P70Gaat8Rba96w0vp3R2mhufxyi4kO3YyDOV9+f8VYMw+P0cP8Ax4JVuEKzW8bEr5tuMj0+lUZ7Y2X4288GK+towRlZXYqv9uKCQi6YmbT2u11GzaJWKnYxbBB49KCFnhubdiwL+U8OhP60ElpXUFxY6nDdXQF0qYbZKxwT9vWg3npb42aHcmC11C3uLSQjEkpIdQfr3xUondP6x6e6tv7e101hczRzrNtaFiFC/wAxPYHtUF+lj8bYCTgMG59cUC4OBQQOo2eoXepFTeL/AKYQN1sEwWx6Fu+DQLX2kw3Ok3Vjas9qJ48Exd1PHb9KCE6R0G86Ut7mK71KK4gklM7TyKRIBjtjtjj96AOreudJsrMJbXksk8jBCbVdzRjONxBHP/3QQ9x1jcWGkERWWqXdpMPCt7x4wfFOOWPbA+fAqwZr8TJ9Qbp2ymvEjsrSTEVvawyHz45Lv6E4wPqaoyl8g91OKDlYEc8mgKW5PH7UE80f8PPHl7gDFAZkUjawUIQG8w5ouknAUbl9Dg59KK1X4Z9XXFvo8mlRXax3KHfBG8Rk3qe6jHOc54+dMZO7jr/qK8vWtba4tYu38TwvDOMc8N60wPLbrW10PS7pnvrnUtbAKobgBUUk9jg8f/lMC2rydMdRSaRqWoLEbx4UefwxkL2BVvvn9Klgv3Tp0lPxFno6wJ4IVmEOMEHsf2xUD2e4ks7n+NGDaCMu8q8lCMcEfPNA6tW/EwrMybEYZUHnI9Cf/FAzu7G4LXTWs38SRNqhs4Bz3z9KCO6x07UNT6altNMvEs7xkC7nPlI9R2/egwbrDT+r+kupLCeK9nu3KBYGRy+QvdWFWCU6avtA6h1iKDqLRhpmpvkxz2p8JGb3z7k557VRX77TdHteuPBut401CAouyXGcdvmASaC069030brYaay/CwPFwWspRErfUN7UGZX2hWSiY6ZrMc0CvhUlBUk/UcGgiLq2mtG8F54yDwfDfIoGkrRsSZXwOB2zmgsvw06XHVfUcFvI22xQ753Ze4H8v3OBUo9a6XodjpltHbabDHZIhVsQqBuA9DxznFQTQUe5oDYGMnn7UEbpV3JqDyz+BJFbBtsXiDBf/qx6CgDXL42cSRwGM3UzBQrHGFzy32oG2i2kKTSI80lzMow0rqQoyew+3tQO59KtJJRI9rEzgg7igz3z/egZ6paJdGGwW4eBXy7pGeXUdwT6A5qwebPjJrcOr9TvbWZQ2Onr+Gi2nIJHcj7+vyqigOuRk+vtxQAqEk4BU0ABj7j96CzzKxYD0GeM0CQG3OMAjkfOgbSZwzE4PJyfWi6caTdzaffW95akrPFIrLg/tRHpGzs9C6t0W31FrO3Y43MrcbH9c/eloZ6v0JpWoKlrHHBbScSFEHYc5Pz71NDXUoJrK1g0dvw9qsspW3nXaN6KMrHnH5ieSfan0U3Rr1uidaRbiwk8BUSS6naQkjc3ZcHaRnn70wbja6lZX+nw3NvMksM+FXnPJ9DUCeoXj2hSG2t2km7op4U/f/FAvHaNePb3N0jRzRA7VD8Akc9u/FAvcxnawZQ3HYtjJoKfDFAdeub6Vo1dSULIBtTbgYOfU8jNWUVvrm5ih0m1urixhlsI5HJliOwR5/LkkHHJPamjHdQ0HWdemlutN0+YWBYtC0rHDhjwVz3zWgx1n4e9U6QE8XT5Zd//APR5se9XBXbjS9S0zAvbO5tyWKAOhG4/KpQ3ZHXO5JFK98qRUGhfCbph77Uvx91pv463wVjR0LR7s483796WjW7rTrXpHWrSW2YK7lmXTbaIFpCRjjHOOSeeBipaNLtXuIre3R43lnkGXYADZnnmoH6Dkbzn6UDGWe9a/hKG3jsW8riQMJS3svp86B6zgMQmDtGT8qDNb6XUpOoPx72tzOkjFYowOduDwDjj70Gg6Wsq2KNeAJKRuKk52fIn5DvVkENrvW+iaSAsl0txI2Asdud5Yk4A4pgzv4l9ST6JZSXbyyprWpw+FFa5G21gz5icfzH3pgwCSQlh688+tUEwjjngZ7UHAHuWAHpn+1AXj5frQWXOGBZcKSe3c/rQIyAtnI27eBj/ADQIMAuH8vHBB9DQwVpPLjOckEZ/ahi9/Czqj/S9VhtLm6aG1uZFUsT5VOfX5Gpg9GiNJArxsrxsv1BHypYGF7pljeG3kvLZSlqzNGGxhSRjP6GoG1yLUWiWc2nSPA4KJGItyYHYHHarop3wu0jWYNUvzriNBp8ErraRMANxJ/N7nA7ZqDUHgSQLvAbacjI7H3oOuIFuYzGS68jJRyp4+lA0m0yDwGjiTw3bzBwTuDe+TQVbV+mLmW1NtbSok9weZiC2zPLEZ+/FBM6Xo40/TYdL8Jr21G4vJcuCck55HbFWUKQ/h4tR/DTz2o8TK21qmMgKOf8AfpmrokljG1i5BHI57D5VNorut9Lab1LA638W6H8sboNrqQckq3pntV0RWsfD6K7SGC3vTFahQJY2iVmkI9d+Mimie0Hp2DQ7AQacio3JZgqjcT3zxk1KHGldPWtnqMupS5uNTmGGnk5KjGNqf0r8qgmkhVGcquGblm96BDUZZYLGVrdN8+MIvux7UGKTdXdbaRrFvbaxbWN4d58BmwCjHPORycLx2q4LNe9S9TdN6I13qkWmzSXdwBCGlO4hiMKAB2A9ag0WySQwpLLtMjDcQBhVz6CgoHxF17XbnUYunulgsUsu4TTvjIUAEhR+x4qwZwtkOi7651PXJobm4tohHbQhdgecjnaPZeOfeqMy1vWLvWNQlvb+ZpJpWyT2A9gB6Cgjy5AO4A5oAGRgBR39aA7Z8MkFtxPY0AbV9UGfpQWhj5TkBQGxuBoEGG1iQCyHvj1oELgSkK6oRnIBIxzRdJ28Q3+fHiAds96LoH3AF1wCfyijNbF8JviI1rbQ6Pq/iSopxFOx5Uf0n3qUbWQk8II2SRyDPuCDUHMpSIiJQSBwDwKAgTxApnEbyIQ+APyn0+9A5B3AgfmoEWgcb3VlMpGFYjt+negSs7zxH/C3RWO9UElM8SAHG5fl/agNdXcEbJAZ1WadvDQDJO7Gf7c0ED1dqWv2enzw6Rb24nZfJd3EwREHqxyMZHzOKBbT7H8PZWTK5uZ9o3Xm1SzEry5PsT7UFF+JnUezSZ9LttRs2km2m5KSFWXDZI491AyBzVwK6J8T7CRtPjee0tbaGAtdNISdoXgLEo5JJxyfSmC6aF1fo2vELZXDJOxwkMybHYe4HtUFiJWJd8rKqjuTQcZV8SNI0dy43BlGVA+ZoBniE0RU7tp77Tg5zQUv4hNrU2tdNWOhylPEnaS5UHGYlAzn5cn74oHGt6l0z07k6nJC123HhKPFlbPptGTj9q0Kx0XMvU+ty6vqQtpWlZo4LOdGDWsak8Aflycgk+v2qC8a1q8OnaXLPOz2kCIWkZ+CqjgYx6n0xTBkmp9Sabp0KdRyI5vJkaGw08MVKIDw8jA557896QY1q2o3eqXr3N7O8skjnlnzgn5e1UMCGV/MOM0BJFOVwfX0oFtgZTjkg8UBtpOeO1B3hg85FBZXVMM+VJAA2+h96BO4IMJ/p7qvuKBKacmOKB5CYYx5UzgDPfHzoGwD5OApJHAHrQEAk8NWdNpx+XdkA0AIGhkGWOQcgg8UGw/DP4kmwhi07WCTZqAiSbstGc4+pFKNvs5o7q2Sa3kWaJxkOp4NZDOHUh/qL2k8LW78eG7kbZv+0+/yoEZp7fUpX/BXpgvYZTCSRtO7vtwe/vQdY6jeyatPp91FEPw8aSNMoYbi2cADt6Z70DS60KW7luJdV1JniJzBtURG2b0KN7+/vQKSWUWnLLqN3cSLP4ex5Y8jxiPykr23+nzzigpXUPVOu6Vqmmf8UWttb9OXDqkjRnfI3H849uRkDOKC0axLFr+nLB0rrUMM0bqCIGGGX1AH09qsGc9UfBiTV9Vhu9Pu5oPGLNeG6bczN7jHvVEr058Gre3sLeDVrmOdo3LmSFNjEH+XdntQXO51XQOl5YrCKGWa8SMYS3tzMyLjjJHb9alDqz1S5ktJ7nVdLmSVDiOONfEMiE4Xy+h9xUDm11CaTxEOn3VmpHFxKFCr9s5GPmKAus6jb9M6RJf3c88yxpjcxL7uM5OO3HrQVTSupoOuYdZMTSpptriNFtXKXDA/Pjhs9h2xQOJ7Xpnpa2S91WK2swqjbGw3ysfcnuxrQsGmapYvpwvra1FtDL52Mi+Gx49sZoMb+LXV0t+jWl3OYLGTO2zjx4h2nyszHsG4/Sgxt5ZJmEsjl3PB3GgSlyXBxkDnNADseB2PsaA8SoXQyFgmQCV70B5R/EIjYmPJxnvigEKcYDfrQF2/X9aCwqC7l2zwfT/FAJJ5747E5oGTqZArKOfccftQHZWwmOD23Z4NAEkTEBmwR7g5AoG7KSSE5HyFAMTyQsMHa/cEcEc5oL58P+v7rppdryPNAXx+GfsQe7Z9D2/Wg3zSdX0fqzT08F433eYwscOpHt9PcVkQPW632mX9vfWdrbXiRgrIdu2eHIwGD57+nIoKdc/ELVdC0u5afp27SUtta5vJMMzk+UDjzYHtQWDpj4gxXmif/wAitXZkGZzFGW8MehZO+PmM0Fibr/poWQmF6xXA2xmFg59sKRk0C2nQP1KFvdb0vwIUJNtDKcsVP8zD0Jx2oJDTNA0vR5p7qzs44pJOXkUc/SgNfa/pNvbF5L2JgTsCo2WJzjGKA1jcNcxOF/m/I0zbt4+gxgenNA5s7CCxWWYQxpNLgyMiY3nt2oHajcuexAoEL1C1uyFkVHO1ixHb17/KgwT4rdXWep6oul2OpywaTYqVLxDyySDjaM9x6Z+tWDPdB1TW7Wa9sumpGlursqMQRlpXwd3BA4571RcdN0i41G7h/wBSmNxqdkwn1O6u5/4cAXlYgcnngE/pQNvih8S211obHRyYbWInfJG/Ex9MD2+tBmNzcTXTtJcSSSSHH5jngDAH0oEdxbg5A9wKAzPwO+fegSLfLJoFoR5fUfegXOcHGAT8qAM+XaBzQCCwGDuyPlQWJpV37Q5TIweM7u/f9qA9vNYpFML2KaR8YiaJgAh55PvQRvnYoqA+MThcDnPai4PKWQlH3K6tggjnPaiEmz4ZznBPfFAVWG0AZDH1z2HzoE7lhv8AJIHI/mGRQI8AEeuOMGgndN6pu7V4RJLKY4WDLhypXj0oNL6d+MMS4ttetDdQgDNwAPEwPRh2NZGkabrvTXVZiexvba5aI+ILWZQG3Y4IDdvtQScvTdjK8chtFjkQ+VlYgqPXGKA17daV07apJrV9CsZbELTgbu3YY78UEHf9evJ4K9P6JqGoiR1XxjCUjAJ5OT3NBM9YdSWPTenwy3t7bWbSuBunUthfUhRyT+1BA2vXnS/jGdeo9LnIHljeLwSCe5zgmgejrOz1S3kGhazoaXYGAJ5Sw3e38uaCsJfX02rPD1XfXtvcCXdBJGjLbOO+EK88Y7nIoLB1H1Bb6WkN1ddSQ29io/5MZEjzt7DGTjj2FXNGUfEH4wRaxCtnp2kwGGM7llvBvIOO4XOAe/fNMwZbqusalfLBHfzSvFH+SIgKo+igYqiwWfWV30rpp03p6exJnUPJexQnxuR+Ulu2PkKCrT6jcSiTxZnbxCWcFidxPJJ96BBDlQ35fbFAbahBHJIGO+KAIwg5YnB455oDkKcbW7UAOowSMjOORQcCVXPp+9AqDkHcDmgMNpHPcUBTuzQWCJXcFvKR2Y0CMiqjnz4UcH/6oG8gKluwxyAfSjQ6ylEJBJfv37/OiYQMjbjkZXOSP/FEELAEkNn1waBCXdjORnPoc80CZeTb5wQe2BQAm4y7jlhjvQcWO3cW4GB7ZpgGC5a3uUeNyGQ5GCR+45pgt+l/EzqLTgxj1O5Zc4CSOXCj70wOE+Jd/Pq0V7qVvb3bISAWUBhnuc+/2pgvkHx0soLaNIdKkEiqR5yDg47cYpgresfELSNc1n8VrFtAw2ZBiiywwcgeb14x2xTBYNA13ozUo/GOqWVizDc1nf6crIh/7wOf1rOURvVupdE6ncpFeakiSWsZdbjSLfw1Zs+VVyMHA75xWsFDHU0idTJdf65rT28YKpPvHjKp9Bk49qYK/rGpXF/qU9zPdyzyyMSJJAAx9ifnVlwNZ7vxYEh8GAEHO8DDH5H5U0IPK8jHxSzFQAMnOKgJkFwPT6UBlAII5z3zQCjnA5OKBXeuAT9KABMA208DtxQHRhzgUC3KjJx9z2oAP6/L3NAHC8Hg/XNAcNtGe4oEy5yeaCdLk7yx2qQCAO1AmXZSSexHbPNAhJuLJkgjvzRonuAJy2cd8UCbyOi453Dj7GiYTZyVPPl74oYLJIyq68EH3Gf3oYTDEjLbiP1FEDHOUOdgOfXIBFAm77j5Mnng/WgLI5UE8Eg9iO9AnuJbuQx5wOBQFRxuIbOc54PrQK28ws76F722EyI4d4HJUOPb35yKBm8oeQsi4BPbPb71RyYIHmPGRg00GRsbsHIPY0Bg52AEEseBUCQJyOPXtQDtcdvvjtQCQ/GBwKAuXU4PrQBvZE5IGeO1AffjBJ57UBvEO045HagFWAAzktQKIzBeMg0C5kz5mOG/WgMrDJJ7jmgEnIyOccYoA3cEMRj05oC7/wDeBQTduzEoNxwcZGaAJOWfPNAlISVOT60aIQfkj+amgJ3bnnigKeFGPQUCf/x0Smw4V8exogX/AOY3+/SgKxKxeU459KAgJOckntQJkkcgkGgAAFFz/XQEmJaY7jnk96BM9yPQelAf/wCX7UBv/jagGP8AKB6ZoDf00HMfO/0oAH5TQA/5moECSMDPFAvGASMjPP8AigVX+b60BW7/AHoHEJJD55oDd4snv70CsSjCcDmgVAAbgYoGYJ3nk9qBUAYHAoP/2Q==
\ No newline at end of file +Pandoc Test SuiteJohnMacFarlaneAnonymousJuly 17, 2006pandoc<p>Pandoc Test Suite</p>

John MacFarlane

Anonymous

July 17, 2006

This is a set of tests for pandoc. Most of them are adapted from John Gruber’s markdown test suite.

——————————

<p>Headers</p>
<p>Level 2 with an embedded link </url></p>
<p>Level 3 with emphasis</p>
<p>Level 4</p>
<p>Level 5</p>
<p>Level 1</p>
<p>Level 2 with emphasis</p>
<p>Level 3</p>

with no blank line

<p>Level 2</p>

with no blank line

——————————

<p>Paragraphs</p>

Here’s a regular paragraph.

In Markdown 1.0.0 and earlier. Version 8. This line turns into a list item. Because a hard-wrapped line in the middle of a paragraph looked like a list item.

Here’s one with a bullet. * criminey.

There should be a hard line breakhere.

——————————

<p>Block Quotes</p>

E-mail style:

This is a block quote. It is pretty short.

Code in a block quote:

sub status {

print "working";

}

A list:

 1. item one

 2. item two

Nested block quotes:

nested

nested

This should not be a block quote: 2 > 1.

And a following paragraph.

——————————

<p>Code Blocks</p>

Code:

---- (should be four hyphens)

sub status {

print "working";

}

this code block is indented by one tab

And:

this code block is indented by two tabs

These should not be escaped: \$ \\ \> \[ \{

——————————

<p>Lists</p>
<p>Unordered</p>

Asterisks tight:

• asterisk 1

• asterisk 2

• asterisk 3

Asterisks loose:

• asterisk 1

• asterisk 2

• asterisk 3

Pluses tight:

• Plus 1

• Plus 2

• Plus 3

Pluses loose:

• Plus 1

• Plus 2

• Plus 3

Minuses tight:

• Minus 1

• Minus 2

• Minus 3

Minuses loose:

• Minus 1

• Minus 2

• Minus 3

<p>Ordered</p>

Tight:

 1. First

 2. Second

 3. Third

and:

 1. One

 2. Two

 3. Three

Loose using tabs:

 1. First

 2. Second

 3. Third

and using spaces:

 1. One

 2. Two

 3. Three

Multiple paragraphs:

 1. Item 1, graf one.Item 1. graf two. The quick brown fox jumped over the lazy dog’s back.

 2. Item 2.

 3. Item 3.

<p>Nested</p>

• Tab

◦ Tab

* Tab

Here’s another:

 1. First

 2. Second:

   • Fee

   • Fie

   • Foe

 3. Third

Same thing but with paragraphs:

 1. First

 2. Second:

   • Fee

   • Fie

   • Foe

 3. Third

<p>Tabs and spaces</p>

• this is a list item indented with tabs

• this is a list item indented with spaces

◦ this is an example list item indented with tabs

◦ this is an example list item indented with spaces

<p>Fancy list markers</p>

 (2) begins with 2

 (3) and now 3with a continuation

 (3) iv. sublist with roman numerals, starting with 4

 (3) v. more items

 (3) v. (A) a subsublist

 (3) v. (B) a subsublist

Nesting:

 A. Upper Alpha

 A. I. Upper Roman.

 A. I. (6) Decimal start with 6

 A. I. (6) c) Lower alpha with paren

Autonumbering:

 1. Autonumber.

 2. More.

 2. 1. Nested.

Should not be a list item:

M.A. 2007

B. Williams

——————————

<p>Definition Lists</p>

Tight using spaces:

apple

    red fruit

orange

    orange fruit

banana

    yellow fruit

Tight using tabs:

apple

    red fruit

orange

    orange fruit

banana

    yellow fruit

Loose:

apple

    red fruit

orange

    orange fruit

banana

    yellow fruit

Multiple blocks with italics:

apple

    red fruit    contains seeds, crisp, pleasant to taste

orange

    orange fruit

    { orange code block }

    orange block quote

Multiple definitions, tight:

apple

    red fruit    computer

orange

    orange fruit    bank

Multiple definitions, loose:

apple

    red fruit    computer

orange

    orange fruit    bank

Blank line after term, indented marker, alternate markers:

apple

    red fruit    computer

orange

    orange fruit

 1. sublist

 2. sublist

<p>HTML Blocks</p>

Simple block on one line:

foo

And nested without indentation:

foobar

Interpreted markdown in a table:

<table>

<tr>

<td>

This is emphasized

</td>

<td>

And this is strong

</td>

</tr>

</table>

<script type="text/javascript">document.write('This *should not* be interpreted as markdown');</script>

Here’s a simple block:

foo

This should be a code block, though:

<div>

foo

</div>

As should this:

<div>foo</div>

Now, nested:

foo

This should just be an HTML comment:

<!-- Comment -->

Multiline:

<!--

Blah

Blah

-->

<!--

This is another comment.

-->

Code block:

<!-- Comment -->

Just plain comment, with trailing spaces on the line:

<!-- foo -->

Code:

<hr />

Hr’s:

<hr>

<hr />

<hr />

<hr>

<hr />

<hr />

<hr class="foo" id="bar" />

<hr class="foo" id="bar" />

<hr class="foo" id="bar">

——————————

<p>Inline Markup</p>

This is emphasized, and so is this.

This is strong, and so is this.

An emphasized link[1].

This is strong and em.

So is this word.

This is strong and em.

So is this word.

This is code: >, $, \, \$, <html>.

This is strikeout.

Superscripts: abcd ahello ahello there.

Subscripts: H2O, H23O, Hmany of themO.

These should not be superscripts or subscripts, because of the unescaped spaces: a^b c^d, a~b c~d.

——————————

<p>Smart quotes, ellipses, dashes</p>

“Hello,” said the spider. “‘Shelob’ is my name.”

‘A’, ‘B’, and ‘C’ are letters.

‘Oak,’ ‘elm,’ and ‘beech’ are names of trees. So is ‘pine.’

‘He said, “I want to go.”’ Were you alive in the 70’s?

Here is some quoted ‘code’ and a “quoted link[2]”.

Some dashes: one—two — three—four — five.

Dashes between numbers: 5–7, 255–66, 1987–1999.

Ellipses…and…and….

——————————

<p>LaTeX</p>

• 

• 2+2=4

• x \in y

• \alpha \wedge \omega

• 223

• p-Tree

• Here’s some display math: \frac{d}{dx}f(x)=\lim_{h\to 0}\frac{f(x+h)-f(x)}{h}

• Here’s one that has a line break in it: \alpha + \omega \times x^2.

These shouldn’t be math:

• To get the famous equation, write $e = mc^2$.

• $22,000 is a lot of money. So is $34,000. (It worked if “lot” is emphasized.)

• Shoes ($20) and socks ($5).

• Escaped $: $73 this should be emphasized 23$.

Here’s a LaTeX table:

\begin{tabular}{|l|l|}\hline

Animal & Number \\ \hline

Dog & 2 \\

Cat & 1 \\ \hline

\end{tabular}

——————————

<p>Special Characters</p>

Here is some unicode:

• I hat: Î

• o umlaut: ö

• section: §

• set membership: ∈

• copyright: ©

AT&T has an ampersand in their name.

AT&T is another way to write it.

This & that.

4 < 5.

6 > 5.

Backslash: \

Backtick: `

Asterisk: *

Underscore: _

Left brace: {

Right brace: }

Left bracket: [

Right bracket: ]

Left paren: (

Right paren: )

Greater-than: >

Hash: #

Period: .

Bang: !

Plus: +

Minus: -

——————————

<p>Links</p>
<p>Explicit</p>

Just a URL[3].

URL and title[4].

URL and title[5].

URL and title[6].

URL and title[7]

URL and title[8]

with_underscore[9]

Email link[10]

Empty[11].

<p>Reference</p>

Foo bar[12].

Foo bar[13].

Foo bar[14].

With embedded [brackets][15].

b[16] by itself should be a link.

Indented once[17].

Indented twice[18].

Indented thrice[19].

This should [not][] be a link.

[not]: /url

Foo bar[20].

Foo biz[21].

<p>With ampersands</p>

Here’s a link with an ampersand in the URL[22].

Here’s a link with an amersand in the link text: AT&T[23].

Here’s an inline link[24].

Here’s an inline link in pointy braces[25].

<p>Autolinks</p>

With an ampersand: http://example.com/?foo=1&bar=2[26]

• In a list?

• http://example.com/[27]

• It should.

An e-mail address: nobody@nowhere.net[28]

Blockquoted: http://example.com/[29]

Auto-links should not occur here: <http://example.com/>

or here: <http://example.com/>

——————————

<p>Images</p>

From “Voyage dans la Lune” by Georges Melies (1902):

lalune

Here is a movie movie icon.

——————————

<p>Footnotes</p>

Here is a footnote reference,[30] and another.[31] This should not be a footnote reference, because it contains a space.[^my note] Here is an inline note.[32]

Notes can go in quotes.[33]

 1. And in list items.[34]

This paragraph should not be part of the note, as it is not indented.

<p>1</p>

/url

<p>2</p>

http://example.com/?foo=1&bar=2

<p>3</p>

/url/

<p>4</p>

title: /url/

<p>5</p>

title preceded by two spaces: /url/

<p>6</p>

title preceded by a tab: /url/

<p>7</p>

title with "quotes" in it: /url/

<p>8</p>

title with single quotes: /url/

<p>9</p>

/url/with_underscore

<p>10</p>

mailto:nobody@nowhere.net

<p>11</p>

<p>12</p>

/url/

<p>13</p>

/url/

<p>14</p>

/url/

<p>15</p>

/url/

<p>16</p>

/url/

<p>17</p>

/url

<p>18</p>

/url

<p>19</p>

/url

<p>20</p>

Title with "quotes" inside: /url/

<p>21</p>

Title with "quote" inside: /url/

<p>22</p>

http://example.com/?foo=1&bar=2

<p>23</p>

AT&T: http://att.com/

<p>24</p>

/script?foo=1&bar=2

<p>25</p>

/script?foo=1&bar=2

<p>26</p>

http://example.com/?foo=1&bar=2

<p>27</p>

http://example.com/

<p>28</p>

mailto:nobody@nowhere.net

<p>29</p>

http://example.com/

<p>30</p>

Here is the footnote. It can go anywhere after the footnote reference. It need not be placed at the end of the document.

<p>31</p>

Here’s the long note. This one contains multiple blocks.

Subsequent blocks are indented to show that they belong to the footnote (as with list items).

{ <code> }

If you want, you can indent every line, but you can also be lazy and just indent the first line of each block.

<p>32</p>

This is easier to type. Inline notes may contain links[32] and ] verbatim characters, as well as [bracketed text].

<p>33</p>

In quote.

<p>34</p>

In list.

/9j/4AAQSkZJRgABAQEASABIAAD//gBQVGhpcyBhcnQgaXMgaW4gdGhlIHB1YmxpYyBkb21haW4uIEtldmluIEh1Z2hlcywga2V2aW5oQGVpdC5jb20sIFNlcHRlbWJlciAxOTk1/9sAQwABAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEB/9sAQwEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEB/8AAEQgAFgAUAwEiAAIRAQMRAf/EABoAAQACAwEAAAAAAAAAAAAAAAAICQUGCgf/xAAjEAABBQEAAwABBQAAAAAAAAAGAwQFBwgCAAEJChEVOXa3/8QAFgEBAQEAAAAAAAAAAAAAAAAABggA/8QAJhEBAAECBQEJAAAAAAAAAAAAAQIAAwQFBhEhszE0NlFUcXR1tP/aAAwDAQACEQMRAD8AqQzziPNmpiqnIO1q4H+WkB84MdlzRSuM82/jVw/JCORtRmQz5d2VTy6WmS2eSYx3U/qkSRbgFsqRzH2Is4/mCluXc33vy8xTnJjTNqV/T8LKmkhr8Hq1da2aOvTfIh2CFeNt+GxFBP8AJFdFUbPWh+4FdXV7OtZOMR7mK9lBWNN+JBmMQ5cwmfH8DEFhTZUCRlE6CBq/ds/nBh9oYygeY1L9FnCUnBSN1t+w0l9bNomx1cllsOrL9OCTKtKOIqua6UVjP0dEvTyM7gp/3whbkAD0ScX3r6MLg+C2/XsMhCnJRn/5cVNHyJHiX6JKIFhhqnFeagm9BIgjfcJyNBTZiROBUk6Mp8CJRmT4NWU2MatV7n495DPk/wAbMJSRJOTBDItq0KR5s/nJN7LPW8AJWtYAoKQaDp+u4XShxgXhYcbHoxNTllCwETGQ8ag2jmDVsk8w/wCOp/C/hn+mWV/utpePH+D5wmF39NY6UakjUYR1Dn0YgRM5zQAAAMdfAA4AOAOArjkMNQ3vgm7UKtBR+m9QHFD5tpnDtpy+t2R20gK/OsmFtuDpaL5mVyiT5qdEVAvZci5ch5VoSGKbwlWTBr0RPoZT07av9lHfrXo6yLApWMugKpPM9SV1cDm65s/wkOHZBojoqiM+6GpMSj4FhtayNAUi5H3LfQBG2KWssFoSPuJdKyMLKtpuLi+e3jwFICUg7CSHsNVlYlKdizOTvKdq3KTsG8pQirsAG6vAB5FdhP490U4gfjxi+DedoqO4YftmKdKNulO26jiOv+2Ga/bftVNFXpHtVHrpLpRFJTpP3z77T469++fTx48e4LueE+NY6UKk7UniLP8A7rNf3X6//9k=/9j/4AAQSkZJRgABAQEAeAB4AAD/2wBDAAYEBQYFBAYGBQYHBwYIChAKCgkJChQODwwQFxQYGBcUFhYaHSUfGhsjHBYWICwgIyYnKSopGR8tMC0oMCUoKSj/2wBDAQcHBwoIChMKChMoGhYaKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCj/wAARCAD6APoDAREAAhEBAxEB/8QAHAAAAAcBAQAAAAAAAAAAAAAAAQIDBAUGBwAI/8QAPhAAAgEDAwIEBAQFAgUFAAMAAQIDAAQRBRIhBjETIkFRB2FxgRQykaEjQlKxwRXwFjNictEIJEPh8SZTgv/EABcBAQEBAQAAAAAAAAAAAAAAAAABAgT/xAAbEQEBAQEAAwEAAAAAAAAAAAAAARECEiExQf/aAAwDAQACEQMRAD8A2t0YoQpwT2qVzMV+N3UHgrDY2eoM0y58VEbgfp9K1yMRmnuJ5h40jyYHGSeKrWE8u2QAApOMdqGCsmT8h70TAJwMAZx249aKBy4c9vTNUC0zDCgmmmG7Ockjkj1PrUTAjcy5XP0ouCgHae4IomOJHhgIc55PHY0Uk5IXLMcUBQ27n96JYO2MYLebHtRBA7BcMx29sdxQJqwZRtIP+BQKpjHHc+xzigNGoAO/k+nPAoAYlee5oBiGeWySO9AJCgY5PHagFCADzj2GaA2N2TkjA/U0HMwbPPeiyBLDfkkj04FCl1cBMgn6URwYFGySR6D2oAeQDAxnHGKAhU4IbGc+tFwnwDj9aK7f8v2oNu+IHxNvJdXmt9EmKWSqArA/mPvxUxMZNe3Ml1dvNcMzSSEsxPOferJhht/OWyAPc0UfdgDcuM8n50AMCykZFARsngcY/egTcbjnJz9O9AB2kZGSQOcUCX8x83bntQCMruJ4B7D1oCyOGzxtJ9M80CAdg5UjFE0aFJrghLeNpHY4IRdx/QUNWCw6D6q1EZttEvirHAZ4ig/U4qw1b9H+CHVN3Mq6hJaWMJ5ZjJ4hA/7R3P3q3ET+pf8Ap/lWNm03XkkkA8qTW+3PHupP9qxopV78G+s7VSV0+OcAn/kzqSfscVvIKzqPTWu6XKE1LSL+Bhz5oDg/cd6lEZzGwLrtPqrA8frUCJfcw9gfegUjZsEAffNADyHt78UAjCjzDJxRcO5Pw3gwCGOVJQp8ZncMGOeNoxwMY96GCbQffFFcUXKjDDt2NEo+N3yyM5z3okKuqJgIzONoJyuMGi4QfGcqSfXBoYHJx659qKIRnnsfUGgJn/poJYoTIGLY+eDzQFlQK2G/KCTmgbspfO0qce/agPGcR7nHf9vnQFfBPlOc88Gg7uucc/M0Bd208YJJweKAYrea4kKQICRGW5IUYUZJ570DYqcknt3FE0VuVyDzj1oamOlulda6puvC0a0eZVIWSbtGn1Y1NNbX0x8ENH0qL8X1NdtqDoNxiQbIh8u+WpqL70Tc6fcxypouiRadbW8hhLFFXcB7Edz+tNFvEZxkmmgShbA9PlUA+Hgg/wBqDgmBkd6ArJuJBGR7VdEdqWgaVqMfh6hp9pcLj/5Ig2KaKJrvwW6S1EFoLaWwmPIe2fAz81ORTRm3UfwI1mzBbRL+K/ReyS/w3x/b+1Wexmev9O6xoE2zWdOubUDszr5T9G7H9auCJj2n3PPrUXTlGBB2kYx96GlQMjJJHuRRXBgDgk8DtRKH8w4OfYA0SUlIMsFXJ4oujHH8ufnRRGOSNoJNAeFC77F2jPucfvQFEqgY3nj/AKaCUY58wwq54AoCzOmVMke9QeRnGR7ZoEIF7pnaTk49KDpSSwQntQJsGKjgggZ9uDQc4OOe1Am2UCkHOR7dqA8t/cSW8MEkrGGEsUTPCk4zj9KJT3pzQtS6m1aPT9Jh8SVxlmJwqL/UfYURuuhfArR7f8NLrF1cXciKDJCrbI2b7c4+9NGtaRptrpdqltYW0VtAn5Y41wBUodvGjqUdQyn0YZqAIreOBFSFFRF7BQAKA1xcRwKplcJuOBn1NAR7y2ikWMzoZnGVQHJNAuQcD3oBKkD2FBy8jnvQFxnjjmg4rxwKBMqCBtPNA3vbCC+tngvYo54HGGSRQQR9DV0Y91n8DNOvFkuOmZmsrk5PgSNuiY98D1X+1XRhWu6DqWgX72er2j2069t/ZvmD2IoGG7jbnj1FFlB224PB+VClN4DYJHyAojmPGCck8cetCAxgjPp6UaAGKtx6+9ATAXO7nFBw8HHLN+goJhBuj2FeAcnmgNazW8U0vjweODGyqpYrsYjytx3x3oGa5LEEjH9XvQGlgmjjMmQq4HBPfPYgevagG5nhe3tkFuInQHxJQTmQntn0wKBKTlAeDx60DSY+U9zn+mgsnQvROr9Y3W2xi8KxV8SXUnCrjvj1Y/IUR6c6A6H03o6wMVgrSXMoBmuX/NIf8Djt/eiLfjJwO9ZBiOfmKDhktzQAzYBLZ8oyaDF+rOptVv8AUjNZL4tjA/lT+kr3wvqTQX/pi3Y+DqFxKXurmFWAaPaVzg4I/b0oHlxqV7penRTXFu93dPLsESYB2k8n7CgnradLq1WaIOFI/K42sPkRQCg3Kcd6Dgp3d6AdrGg5VxnjmgKWB8uQGxnFAUgKuSefSghuqNC0jXbAWGtxQyJKdsYc4YMf6T6GtDzR8S/hnqfSUz3NvuvNILYSZR5o+ezj/Pb6UGfLzyD/AJoFFySQVBHpQDJ5kGByPahAbWxn5+po0OF3D+XPtQJsNwOe+aAuygmMkebgHnHFALHYpJwSeGz2oGpOJWAI49BQEZlYAHkg4oARVOMvtBIJJ7AUAX6xxSOsUgmjViFcKRuHviiVfvhT8NZuqpk1LVFeHRkPlHZpznsP+n50qPS+mWVppdnFa2cEcFtGu1I41ChR8qyHVxK8cLPDD4kgGVQHBNAa0maaBJGTYzDJXOcUCy5JOaA2OMfoaArkheM7vlQNYNOtoWLJCgLHJwo5NApPKLaNpGRQB6j2oGmnRvcyNd3O/DkeErLhkWgklIdCyZOCRzxzQEeRxhdpUnncBkD5UCxXjJ7+tAlctMsIMLohz5mcZAH09aBQYdQwyAeaAuA7MAQxHH0oG1481nbGVInuWU5Kr+bHrgepoKB1u+o6jqlvBH05NevEBPBK0pQR4I4BHZj+1Bb9IS7lsFtNWtYwDGFYB/EXHqpJ7/WtQYx8VfhGbdZtV6Uh8gy81mpyR6koPb5UGKY4YkeYd88fbFAI5AC98c5oQBb+U9+9GnN5RgDgjOPWgAN3yMfWgAqc91/UUD2RSSRg9+49KCR6e0WfX9WS0icRwgb55WOFijH5nP0FBYNRi6dSR7HRNPmu0hOW1GaXaZMdwBwAP3oynE0XRYrFtV02wS4ECj8dp1wcsE7eJEf39qlFZ616ZttPu7Kbp9Zbi0vYzNCcgjHqoHuKsEp8LPh7P1PqjXerxywaXaviRSu1pWH8g+XuflQemIIY7S3SK3hVIo12pGoAAA7AClEL1N1RH0/oTalcwx+IACLaSQKx59Ppmshv0D1jH1ZbTubU27xkkAnKsuSMg/UUFluLlLaJXETyecKAg554zigXiubeRnSKeJ5FOGVXBIPsaBLULoWkIfw3kYsAqIOSTQJMbpm3oqlmwACeF9yfn+1A+Bx34oE5IY5P+YFbnPIzQKAckHuRQCAQOO1AL8r9KDhkZOT9M8UCcrxgAyYJzwD70CT3Itxm8kgi3fly+P7/AOKA9pskhEkZysnOfeg6RH8w3tgjAHtQRZ1uystSg0m5eRJ2UbHceV8fP3oJkBSAVII9xQFdSRwKDDvjN8L/AMSJte6chxcgFrm1QcSf9aj39x61YMH8Q+CkfhqpQncxBDH5H6VRwXJ/Ke1Am2QchuMYOaNFSAVznB9qAm8f10D2RmX8jDHP3oLbebtA6ej0m2LrfX6LcX7IMskf8kf6HcffIoG8yTadZxSTxCK3kRZUwSFfkruIJ78GhiS6Y1OS3160uZJFWO5bwZtxzuQ8bcfPNMZXvo2wsLnQ9R0q/maJNNv5Yo3bjCuMAHPzqA2jdUan0lF0/ZXcElxp9zE+5WVd/DE71IPPB7H2po1bSNXsdYthLp1ykyEcj+ZT817ioITrnoux6vs1gv5JYnjz4ckZ/Ln5etA+6N0BemdBttMina4WEFfFdQpIJJ7D60E5I4Vo9qnnsQO1A3k0yzeTxhCizZJ3qNpz9RQO449igMSxHGW5NAIwBtUAUAMORkfegMhG3jtQD8+fvQGXJz7UAHuRQA5YDI5FB0qCQA5yaCs2/SFit/Jd3AmvJ2bO64ctt5zwD2oLMilVAUDgcAelAJLbhgZz3oGN9HPIYmhtrWRw2czjt7Y+dA+h3mJS67W9gc0AvuLYANADpkZABHY85oPOnxy+Hx06Z+odGjC2jt/7qBRwjH+cY9D6/OrKMebcceHwfaqCYIyDgZ96GhHOFJI4/WjQpXnsaCz9J6fDqGvRC8OLO3Vri5PB/hqMkfc4H3oDT3UupapcXrKS9zISgDdhnAGPbsKC5aLLBHq9p01c6bbagPE23kpJYhmz5IySAAMj6nNGdRnT2lu3V9vaQQrJDHfCMFj5kAfufsMUFogu5H0jrLUYXK+Lq0aRse/lf/8AOKlFfudagvbnQpNQRmtILydCwPdCQcgMOMZFQanPoeiawBd9M6s9jeKPK1vKQp+RFA4septa6fuFtuqbRrmzx5b+BAdo927A+vsflQXfTr2z1O3W5025juIW/mjOR9KAZI914khaRNo4XdwT9KAl3b2+oWpjMoZWbOVfnI9sUCrXUNssUU8w3sQoJH5jQLvwQQC3NAKvuUPtK54waDg23v6UA7weBnNAIOBigMr+hoOjdZQdhBx3waAVG0Z7UBWfAOQSflQChyNxBAxQRutarb6bHALi9trSW4kEcJnGd7ewFA/j8QEK/IA/MBjmgWDDBB7igj9dupLTTbiaHZ4oQ7A7bQW9ATVgwXSNV6onl8azW6t45pWdxHIxWA/zNtz7A8Glg2S1u7fX+nt0J/H2c4MMhmQoW9GBUjj60g8sfEHpebpDqi4sHLG2Y77eQ486E8fcdvtVFekGW4UfegKVAAKgnFGhuDzxQXbpDTZF6a13UnUqrCOzQ5wGZmXIJ+lE0ppkEK6nJcRWcTW9hA08iKcjcowpye/mxQ0+6VRbC/jvLm48L8LG9y8pIOXxkDnuSTipqHXQMng3es9S3fhn8DbvcZI5Mr/lH700dc3Dad8NtPs4nU6jeXD6nMCwBRF5XOfU8YHrTNJFF1X8RawW1jc4GxTKNrZB385yPkBTFw1stSu7Ni9tPLGSQfK5Aphi8J8UNUm6fn0u72yvJ5fGbuF/39aYYtGgadp9/axXnRetzaXqnhqZI3bEcj4547Ak/X6UxFisPiXe6NMdO65057eQAr+LhUlHHbOPX07UwWXpQ6BqMo1LpgW0sioVI8Qgxk+684qC028M5890Y3kHKbUwF+lA4LDOzu2M4FAOG3DaoI9cntQdJxzQEyR259f/AKoGl5fSQRFo7ZpB/MhYIR9zxQdayyXKb7gqox5Yo2yB9WHc0DPUIWnhWKxkuYFRs5gcKWbPY59KBkx6isVeSGW31JNwHhyOUkA+o8v9qCfjkMo/LJFKqBmRvSgc2swnRyFcYODuXGfpQMtRsLK8vYJL+wjuGiUtHK6hghz6Z7H6UEmCsig84I9RigiruC9t0DaaVmIIHhTOQMeuGwT9qCJ1ywv9T0U29xFFiaVBJGHz5M5ODgYPY/arKJPTtLW1t44i7SKq48w8x+ZPrTRJoipGFQAAdgKgzX47dMJrXSrXkUe6807MykDkp/MP8/aro80FQyZ+tUJ7hvH0x270XQ7KGtXvIk0T4c9P2bIhkvpnvJVfjIxhf7qftRDXpu0/1DpzXltUlkvmWMBI+2zdnn64oYa6yX0XTm0i4jQ3t6wmuV53xov5UPpyeeKyLbpFtZ6Xpmn6TqNq7/ic6pqQRR/DVf8Alq2fTOP0FXBnXU+ox32o3lzeW+JrxlMXHKR9wfbJ/tVWK5f3AnaAjafCTwwcY4BOM/qKKSjA4Dg8j37UHZKkE5P0olSFlcLDdJPbTNBOigjxOVZu3+80Rbbnrq9l0t9I6isRd2rgKpPlZMdyre9An07oupoh1zo2+lea2fMlr+WZFx7ZwwqWDVug/ihDq7R6b1EPwmpMNokPlVj8xng1BqEUe1EAJOMDOc5oDSxq6YYeuaAJF4oCBUQ7mJ45zQHYB14wR86AVjBXyjge1AEcRTHlA9hQE8kbgEohJ5yQM0ETHNqMOr3IZQ9tIMQyEjKt7D3FBLqywRPJKTuxlj3zQI3Ut14e+yhWRj28Q7RjH60EfpF3rU/jLqFrHbS4/hqpJXH19aCRa8jgiVr1xGwXzYyf99qA9tc29/aRXFnKs1vINyOO2KByoxwe9AYocHGKBvdwLcWzxSLuR1KuD6gjBoPHXWujt071Nf6YSSkUnkJ4yp5H7f2rQgWAA3Y+1An4j/1t+tBrHxKuYS+gx24LRx6ZFtI/lz60FY0+/v8ASphNpd68EpXY5AGNvzFF1YOirZbzVrvX9dkNxZWH8eeaY5Lyj8qj7kcVlETqOqXd/HrPUNzcNE16Tbwxf1JkEgD2AA/etBte9R2Oq2cv+p6XHJfBFjgmjkMaRgAAEqO5o1FWfbgjsR8+9AlI5CgEggeoNAq0iug8uD7g80KKmCcZ7fPmjJzJfT/hWtjJvhOPK/OOe49u9A96X1W90/VrRtNkkSfxQF8I5yScdvX6UGidSLpfVFzcvbRiy6kgZBGysFW7B9T7HHNSjU+o9S1iz0e2uNLmX8RYxJ+KgYeVwVGTn5d6gjug/iU3UOt/6TewQpP59skL5B29x/8AYoNHPB78Ggb2l3bXO78PKsoyVyvIBHBFAoSkbfyhn4GfWgTnmWFN7ybAvc4Jz9hQRdx1dp0S3Dw+JJHbDdPIUZUjX3yRz9Bmrgzbqb4x9Oxho4bB751O5HPkXPsc80wQHT/xrJ1IHUbGKO0kdRiBiAgz+YjnsPpTKN/tLy3vLOK5t5klt5F3LKhyCPemAYLuK5XMDEj1OCP71ArGWLMPT0oIbU7h11u2t49OllWWNm/FIRsjI4AI/egfQ2ktpbww2XgxoDl9wJ49cUCHUGv2GixM13Mkcm0squwUH5/SrgxDW/jFcXOteHb3otrKEEiRISRM3zGc49v1qDT+gfiBpvV7y2unxTxywRhz4ozuHbOR2+9Bmf8A6kNIEWpaZqiooEqtBIR6kHI/atfRjDEt3AKjgVQjug9j+lQWh72e/htTOzyeCnhHPomeMYoJvQum7vVD47K9jpsQBkvZ5NoAHcgUAa7rKamE0Lp9Xh0G1OZZTwZSO8jn9cCsivdS38F9cJDZIY7G2URxKe5x/MT7nNaEKrENwAFPPlosFwS2cd/cc0UlIm3JOeKDo2LH+UA0SjgDk98URzPiJ2449e/NAbS7v8PdpKkpikQ5WQLkqccGgmYNQmXWLeQLG9wVRQVPlcj+Yn3xQa98OviAjz3WjdXSpFdliEuJCNjDtsJ7enepRdel+kdL0rqOTVdIsoYklV1dixO3nunpg9jUCnU3WMeka5b2EUcl3JInmigQs6ZPlJAHY8+vpQP9O1m3nthNo0cTwM2JDwoVj6H5gd6CbhtUiVn8TcXO4ktkZ+We1BAf8Z6fZ2uqXWpyxQrbStGseQzMB2IA961B59+IHXmodXal+HsPFh04HbHCo25+bY/zQWv4f/CCxvII73qC8iuXYb1tYZeF9txHf6U3BatX+DvSl86x6cr2dwjbnEUmcj6Enj6U8hLdJdEX/SmowJp2tTT6Oc+La3HO0442+3NBf1LmRUjjQAfmc+nyHvWQockYyQcY3CgaabaPZxGNnaUFi3mPb6f+KA2q3RstNurnBxDE0mPfCk1YPMemaP1L8RtYN9fJPc2aMUaVmCKg54H0z6VRYendf6Z6T1W56a6j6fgfwJyguhGJmPzbIzjHtSjTn0zSunbi01fSkt9Os5GAmWNCDOGxtXb6HnNZEZ8etOF90DPKFy1rKk3zAzg/3rXI8u7zvOTg4zVoTLDJ81QWDTb2SwuvFgcrkbXwM5H0PFGqsjpd6+kcT61Nc2ieb8OikFc/9PA+WfSjKA1nWBzpFlZ/hLWM4KH8zsPVj6mghN4IyQRk5NGo5BkFmyAfSgVjChdpGO/FAXYpOHLBe/FAQqoBJbA9sUBGxgtgEj/eaCf6DGjt1TZf8RNGumKS7mQZQkDIB+WaMrf8Ub/ovV7V20JIYL62K4khhCLOCcEcAdu9BmCuEQvxvyFUg42+v+/rQaj0zax/EXRY9Nns0t9TtM+BqCKAjEclXA98jn+1Si7Cz6u6O0tLjTrxLu2tQJJrDwcKE/m2M2SfeoLrpupDV9Mh1OytUS2vIN8m4BZQf6T7+vPpj50GfdK9L6rJqk1y1y0elRDKRqdjHHoyDhjx39e9BZr7fagW0j3kul3iETRqHkeF8ZBUjkZIxjtk5rQ86dW6r+O1OcW0UtvaRsY4oWfLKBxz7k/5NA46P6X1rqS6WPS7V9v88rAqi/f3oN46X6C1DSotkus+BIwKl8hn2+3PapROXPT2t20bPY6kJ5UGYmbIfIHGW5z68VBI6DrzzWSrrAjtrwFUbDja5OBlfuaCbluJLeNwIpLiVF3bVXAP0Pv8qBxLO8cYcW7vnuqkAigNFKs8CyxlwG/lcYI+1A31ayF/pt1auSFmiaM/LIxVgwfoO413o3qqfSLyUSwodogAyZVGcbPTPr71aNDvendJ6wtbu7Fi1lezK0bS4VZMjtnFZE0bC5u9Jh0qRAr2yw4uWx59vBI44PegN1tpbap0lqOk2sipLPB4aFsnHbBNOR5A1exFhqFxbeKkngyMhdOxIPcVuhiZFz/zBUEwcKvYnP6fWi0+6chjn6h062uATFLcRrIMnzAsO9EehNR+GvTV3GUh0+O2YsGaWHIf9amjIfib0no3S0VtFY3M000zMzLJtLKvvkenyx96oz0rwNjA8cj2osFLbVAbOc9jRQiXOAwxnj3oBlAxwDj37UDY+vHOQeTQBIdqjcPMfnQwJclWyBgCjJBFeefw4VaVycBUGST2wAKD0L8H9C1rSIILjWLSCytY1lZASVnlL4PI/wD8+vvUo1uwbxI5GkjdVc7isvOBjtj2qBWKFZiQ8CJCB5FHYj5jHFArDbQ20ey3RY1HOAOPsKCH1u61CPSLt9MtlXUHUrbCbJBbPdtvYetXR5T1y2udD6lni1ErJdJLvlK4wWOCePvVgsV/8Sr67UW1vA0NiowIonMe4+7FeT9ARQRmodWa9EYpPBhs1Tygw26rk9xknkn70EjonxZ17TXjAeKTkZ3L+YZ7N8vpSjX+lOpNM6umgkMG3EgBV1DYbG4kewz2NZGkC43CP8MPFBONysMAD50DaHVH8S6N1a+BaxMUjd280pA5wPb296DrXWLK9WNoJdtwybxDKPDcAnHIoJBifTBzzmgaz2UFzPFNNbwvLCcxuyglT7igdRRKg8qAZ5JAAzQEnuYoHiSWQIZW2ID/ADH2H6UERr12BY6hueIQJaO7SK/nHfnHtx3pyPGWoN4jynuCfU963RF+DL/UtQWTkjaWY/8ATnHFGql+j1VerdJY8r+KiJz/ANwoy9C/EjqSbpbRY723RJC8ojIcZ4IJ/wAVkecer9en1+9FzeLCCq4URjgDP7mtLhteadBY2kMczyHUpcO0YxtiUjgH/q9celAiLy1kjCX1ruyMLNGdrj0+h+lE0+t+kNQltJ7yKS3jgiTxUFw/hySp7qp70NV6YEBgWUNjBoaKeAODnHrRoVgDnBP0ozpxZ2f4y5trVeGuJFiBPpk4zQep9C0LTembS30fQbWP8ZsDyTugZgf6ix9fYZpbgmbXSmXULaa6kMzpltzcjJ//AGpaLCY1CDsF74PrUCgHY0HbSx7Z96BGUfxB2xjtQZ11t0Tb6jNfyw2wM18gV5AcBdpzyPnV0Yp1F0o/TEczXjXaTOQYpIk3QlT3B9Rj0zVl0VKbVppImheUSwbsgFfXHc0De0tri/ujFYQSSyfmKopPHqaDV/g9p+padr/gkSRTzKu0kZRlPLYPbOPf2pg9GWzRCMJAFxH5do4wayKX1z/G0CdzqLWRkiaTxQBLudclQvovbv3oMU/4Z67uwnUAt3u1Zw42yhmx3/Ln8v0oN86L1d00i3i1UiGQIocNnEbnkqT2xgiguEbI4DIysp7EHNAZnxQQ/Usksej3EsCl5EUthR5sY52/Mjigr6Qrp3R15LqEcIlmgdpFGAsY2navPJApyPJtwd8rnGBuJz6Gt0MzGSTyf0qCwSKA5ZsAjnn2otTXQYj/AOMNIDqCrXUZwf8AuGDRG6fF6Gyfo6+ub0CR4EPgIScLIeAcfc1keatN0661a+S3sYTPKzAbV9B7/StLrQ/iXp9pYLp8elWsUM11AzXMqt53I7g7j244oiB6W6Tn6j2TeAy2FspTeB+Z+/8AmgtnWlvpdl1Dp1pq07Ja20GFQpuDHHAwPf39KDHriVTKSPOCeBnHHtQGsrG5v5pfwcTOIlMjgEeVfck0XRIreS7uUigRpJXOEVe5PtRE/wBJ9HaxqvUcdhNFJp0lviaSWVcMgzxgdySeBipo9T6O8NppUJ1K4iW5KgSvIyqxb5jPH0paJm1NvKivE6Mp7MpyP1FQLRTwy58F0cjuAckfagOTtO3+Y8igMWCIWbOPlzQNhNBOWEbq5Q+YKc4+tAD7JEZgQfXj0oI/VtIttXsZLW5hRopByCP/ADVlGRa78Erae63aXK1tG3LAncM/Kmh10l8IZdBv4rxtTE0yggJsyoz6/P0po0zSNKEMdo9xGnjxuzkqMAEgjj7GmialjWQMgyCRyQcGoITqHT7q/a30+G2jFmwLSzl8GPBGFA9c5NBPRwJDbpHCipEi4CjtigqfWltqCaG8WhNbxyzOBIs8W8FcY4+dWQQ/wtuZdIGqadrknhy2u2QyOSEZOeRngY+XvTBZZevOmhC8janbqiZ53Zzj2FMFcs+sh1ZqsFrp8UkGkrlpbh+DNzhVX5Z5NQTfXyWUXSV2t+wW3EZ5I4HHt61eYPI0mA5C9snFaoLsPv8AvUEk5JcA8cZG480WnOlXX4PVLO4yQ0cyP244Yf8AiiPUfUump1B0/c2O8xfi4v8AmL3UcGpgw/SujNX0Trj8PpckimOMvHO/kEg9Rjs3PpV0aFq/Qqa1ZJ/qcrverEqNOwGM+uMfemiVtrKbQdMNjp9rvtkhPht6mU5yT8u1BkvXg1qXUtOvddgRY1R3j2YHiMvZSD27CgzSCyuNQ1KK0giL3Mz4VAOc/wDignoNNOnaHeiW8hgkku/Al2+Ziqgn09M0ETp0qpqSmGKOdFcEeLwMfPHag3JLuCRtPmQWsDhNphtVAcn1w3BPFSwDdWGpX1/OYdOtbbSrlQ80szHdn0GR24/c1AbWemdatLbTJdGvJIJypDQwMV3exwOPatSz9Ei/R+txy2mr3evyHV4miWIDhGwwyGA7nGRS2YNZAUBWYDdjGayEvxMYB3nYu4KCfU+woG93c2enWs1xcPFDCp8zEgDPzq4GGgz22saS1zZSZhkdsFePXt86YHWmySeLNDMYikZ4YNlvvUD+VARxQJqgwRQHUAAe2O1AWOFRM8mDlgB37fagb6reXFt4ItLZJnZsuWfaI0Hdjwcn2FAz0nWX1i4u4xY3VpFbv4eZ1x4vGdy/KgkriN2aMRlQoOW3DOR/5qwYr1P1tp2pdS3WnanKkGh24kRl2eaYgcb/AFxnnAqiv9Jno0dRLJPbtdQtkNPIALaMnODsPPpSjbdK0DTbWQXui+Gsco3BU5hPP5gPT7VkU74t6PZHpq/1N5ZZbwrtRnmOwDPOFJwPsK1xR5ybudw788VaC5X2WoJRULSBpAe5PA/aiinAZnHck4A70THq/Qr23/4Y0u4lmCpLBGA7epxjH60Du+WGOBvFlFuWOFcYyCfbPrUojri6k06xX8PFNfBUJ3ltzM3scVBjfVvVXVNit5dapNDZGQGK3shjeAe7YHIwAOT3zVggNTiu+orrR4p7m+upJFR7h3TPhggDaoBwRjnPH5hV0af0xotnoD3l5dWdrY2YjGLhwPEHoef996CC6m0HpuPpk3Wny2s9sJPHJ3AeI2D39T37UGU9QTDULuGPSLPwIyoVIYk2lj6/X70G2/DPp0hVudRuBLcwxhRGkeEjB9M+p96DSLprVHiieaAE+YxHkke4H1qUOIBawL4uAuc8nvj71AwjRtQ1eO78QNp9odyLju/qT8uf70Gb6r8SpLzryy0vp+4NxYSSCEswI2u2Rn3OOD9qC4dVamen9NlaC7tUaIFvCmnHiy4HJXJxnOeDVwed+rOvLnqSyWO4jZSru/kc7ck+30GKosXw2+KmqaDJDY3jR3OmqNoRhtZAP6SP7Ggtmt63qbTJ1XLazJpslwBFblypEOAA7L2OWANS+xrnR2vW3UmjJeWp8wJSRf6WHeoJdSPMCRmgMq8DmgBpNsgUIxBGSccD70DczmS8MDWoe28MN4+f588rj980CktuJZYpFdlKZ4B4OfegQ1hpIrVjbsRMBhBj8x9qsHnX/hm36y1O/u9V1ddPmS6aD8OkQdyxOSe4OMmqLUvwQsYY4Xjv7m4YEEhwFyMY7fXB5pRbvhp0jqfSMV7b6jqZvLGQAwxAEBDk54PuD6VkVb49a5DBpiaNaeF/FIaQDumOwpzMGDEZQZ5+VboR8In1I+9QWDY6gFn7jjHpQhtJEFbAGBnijT0P8H7qPVOh47a42yNaymPBOcDupoykep+m73V7g/8Av2itQowvJIx6j5/OpRjfUWrax051RPY6LqFy8YACkebO4Z7HjNWCY0ToW2utJbqPreW5na4O4R78cehY9+fQVKLX05p1ro97awC4kX8VFmJLeEKdoyfOxJPbHbHYVAz0rqKPWNauri9t1ktJgILYgEiNFJ/Op9STmrBBdeaFCo0y3jt444DI3jLE204Y5DD+9UPPhv0NaRtPq99mSLOy22nOfdh75oNC0vT7m1uJGvGiii3AW8UDbQAeDu9zUohLlhouqap1VciF4I1FtbxSthtobBIPuTn7VBJ2vUth1TYk2ULi9iALwyKQYz6Z9CM0Ft060/DWEcDHe2Mucdye9BFW3SekWt3LPb2cUTsd2UGCG9x7VYMzufhzdX/WmoXj+BPpx3I7XZMmXYckc8EVRKaP8I+nXikLQuxOQSTnBzj6UEjonw90XSrq3S3s7dplJcl1EhGDx396lFx1TQ4NVjaC7UNCU2lAO/8AvNWDCLp9X+E/WgWImXSp2LRq7eSRT3B9iP8AFSjd9P1+21TRodVsMS2rLmTbyUGOePXFQSltcLcW0c1vh4mXcjDswoDLdRm4FvISsgXeTghDzjAPbPyzmgVFxCzuiOC0WN3sM+5oDqySJmNg3rx7UFb60tNUubGJdFdEvhIdryflUFSM49TVgyTQenJemOorf/U4H1Fpp1edAh/hOQTuQ9375JA4q0bnbPBcxxT20wkjKkqYzlT9ayGWu38um2MbLEJ7iRtoUds+/wAgBzQeW/iHqi6j1PdzeL44HkL9txHtWhVUOVyvHNB2F9zQT8hUAhAdp5FCG0mSAzE4HGDRppvwL1bwOpJbEsFiuYyVX3deR98Zoy2ZtRgmjkSRZocEp51K7se3vUow/rfpFE124mVpfD4mk2MWdCc4A+VWCH1281/UmFnpklzPYRFBEG8uGC4yQfcn9alGgaJo95rRsbi53WaxwrHOm7BjYcHnvz/moJaw07pXSI5IW1K0CQnDhpAWB9R796CudY62msTRW+gadI8KnDXMkLLv9MA8HGOKC3dAXF1dRfh75f41moi4G0AdwcfTj7UFhv7RjqMBV5AJFZHkR8FRjIx6CgpXVNjJ1JqNn07p26CztSJLlpIydyj2J+fGaC+afplrazqLa3SKNIggx3IHYUEsBk4wQc4oK11L1z070/M9rql6wuVA3QopLcjNBDwfFboqeSO2W7kQNxuaEhQfnQLt8TuireVoV1UeXnckTFT9DigHRuv+mbu9ZV1W3Nyc4IRlVl9O47/KgtU+s6baw+JcX1umRkAuM/p3oK/1t0rYdX6cqXKESqN8Ug/Mp9P1qwZ702mo9GdUTWJsmOn3EY8CAORGXJAwScjJ5q0bJDNLb6YklxbKsgA3wwndg9sDtWQN3aw3ZKTwLLEQOGORn6ehoG1vYAw3FikRt7JSuH3Hc/GSc5P0oHn4aO2uGuYyiose044wBQIm6F1dwfh5ARs8R8L2BHGfnQHv9PS4PjxrGLtFKxysm4qD3oCxboIIo7e1jhQHzAYUJ8wP8VYM56j1ktaal1BMrS2sAaK1OQDD6eUepY9yfQVR5zv7hrmaSaRtzyHJY0DcE4BPIPb5UBwOO4oJYzFvzEYHAH9XvQhNZN7AEgDOSDRo/wBA1SXRdVtNQgb/AJUgfBHcZ/8AGaGPVlhPbarZ2t5CEeORBKje2RUrI1zYxTBhMinIwcjvUEcugWkO4AMisMEA8N69u3yoERrOhWNxNYy6hapcxAeJHM+D8u9ASLStLlm/EWdpZyxy+Z3RQ3I5B44oJKTT4blFWSNBEOeBg5+goFYbOK1TKhIxnIbGMH50Cpcyo6AMrIcM2OD68Ggb6Lbbllu5Cd88hYBu6rztWglSNkfm4P70Gaat8Rba96w0vp3R2mhufxyi4kO3YyDOV9+f8VYMw+P0cP8Ax4JVuEKzW8bEr5tuMj0+lUZ7Y2X4288GK+towRlZXYqv9uKCQi6YmbT2u11GzaJWKnYxbBB49KCFnhubdiwL+U8OhP60ElpXUFxY6nDdXQF0qYbZKxwT9vWg3npb42aHcmC11C3uLSQjEkpIdQfr3xUondP6x6e6tv7e101hczRzrNtaFiFC/wAxPYHtUF+lj8bYCTgMG59cUC4OBQQOo2eoXepFTeL/AKYQN1sEwWx6Fu+DQLX2kw3Ok3Vjas9qJ48Exd1PHb9KCE6R0G86Ut7mK71KK4gklM7TyKRIBjtjtjj96AOreudJsrMJbXksk8jBCbVdzRjONxBHP/3QQ9x1jcWGkERWWqXdpMPCt7x4wfFOOWPbA+fAqwZr8TJ9Qbp2ymvEjsrSTEVvawyHz45Lv6E4wPqaoyl8g91OKDlYEc8mgKW5PH7UE80f8PPHl7gDFAZkUjawUIQG8w5ouknAUbl9Dg59KK1X4Z9XXFvo8mlRXax3KHfBG8Rk3qe6jHOc54+dMZO7jr/qK8vWtba4tYu38TwvDOMc8N60wPLbrW10PS7pnvrnUtbAKobgBUUk9jg8f/lMC2rydMdRSaRqWoLEbx4UefwxkL2BVvvn9Klgv3Tp0lPxFno6wJ4IVmEOMEHsf2xUD2e4ks7n+NGDaCMu8q8lCMcEfPNA6tW/EwrMybEYZUHnI9Cf/FAzu7G4LXTWs38SRNqhs4Bz3z9KCO6x07UNT6altNMvEs7xkC7nPlI9R2/egwbrDT+r+kupLCeK9nu3KBYGRy+QvdWFWCU6avtA6h1iKDqLRhpmpvkxz2p8JGb3z7k557VRX77TdHteuPBut401CAouyXGcdvmASaC069030brYaay/CwPFwWspRErfUN7UGZX2hWSiY6ZrMc0CvhUlBUk/UcGgiLq2mtG8F54yDwfDfIoGkrRsSZXwOB2zmgsvw06XHVfUcFvI22xQ753Ze4H8v3OBUo9a6XodjpltHbabDHZIhVsQqBuA9DxznFQTQUe5oDYGMnn7UEbpV3JqDyz+BJFbBtsXiDBf/qx6CgDXL42cSRwGM3UzBQrHGFzy32oG2i2kKTSI80lzMow0rqQoyew+3tQO59KtJJRI9rEzgg7igz3z/egZ6paJdGGwW4eBXy7pGeXUdwT6A5qwebPjJrcOr9TvbWZQ2Onr+Gi2nIJHcj7+vyqigOuRk+vtxQAqEk4BU0ABj7j96CzzKxYD0GeM0CQG3OMAjkfOgbSZwzE4PJyfWi6caTdzaffW95akrPFIrLg/tRHpGzs9C6t0W31FrO3Y43MrcbH9c/eloZ6v0JpWoKlrHHBbScSFEHYc5Pz71NDXUoJrK1g0dvw9qsspW3nXaN6KMrHnH5ieSfan0U3Rr1uidaRbiwk8BUSS6naQkjc3ZcHaRnn70wbja6lZX+nw3NvMksM+FXnPJ9DUCeoXj2hSG2t2km7op4U/f/FAvHaNePb3N0jRzRA7VD8Akc9u/FAvcxnawZQ3HYtjJoKfDFAdeub6Vo1dSULIBtTbgYOfU8jNWUVvrm5ih0m1urixhlsI5HJliOwR5/LkkHHJPamjHdQ0HWdemlutN0+YWBYtC0rHDhjwVz3zWgx1n4e9U6QE8XT5Zd//APR5se9XBXbjS9S0zAvbO5tyWKAOhG4/KpQ3ZHXO5JFK98qRUGhfCbph77Uvx91pv463wVjR0LR7s483796WjW7rTrXpHWrSW2YK7lmXTbaIFpCRjjHOOSeeBipaNLtXuIre3R43lnkGXYADZnnmoH6Dkbzn6UDGWe9a/hKG3jsW8riQMJS3svp86B6zgMQmDtGT8qDNb6XUpOoPx72tzOkjFYowOduDwDjj70Gg6Wsq2KNeAJKRuKk52fIn5DvVkENrvW+iaSAsl0txI2Asdud5Yk4A4pgzv4l9ST6JZSXbyyprWpw+FFa5G21gz5icfzH3pgwCSQlh688+tUEwjjngZ7UHAHuWAHpn+1AXj5frQWXOGBZcKSe3c/rQIyAtnI27eBj/ADQIMAuH8vHBB9DQwVpPLjOckEZ/ahi9/Czqj/S9VhtLm6aG1uZFUsT5VOfX5Gpg9GiNJArxsrxsv1BHypYGF7pljeG3kvLZSlqzNGGxhSRjP6GoG1yLUWiWc2nSPA4KJGItyYHYHHarop3wu0jWYNUvzriNBp8ErraRMANxJ/N7nA7ZqDUHgSQLvAbacjI7H3oOuIFuYzGS68jJRyp4+lA0m0yDwGjiTw3bzBwTuDe+TQVbV+mLmW1NtbSok9weZiC2zPLEZ+/FBM6Xo40/TYdL8Jr21G4vJcuCck55HbFWUKQ/h4tR/DTz2o8TK21qmMgKOf8AfpmrokljG1i5BHI57D5VNorut9Lab1LA638W6H8sboNrqQckq3pntV0RWsfD6K7SGC3vTFahQJY2iVmkI9d+Mimie0Hp2DQ7AQacio3JZgqjcT3zxk1KHGldPWtnqMupS5uNTmGGnk5KjGNqf0r8qgmkhVGcquGblm96BDUZZYLGVrdN8+MIvux7UGKTdXdbaRrFvbaxbWN4d58BmwCjHPORycLx2q4LNe9S9TdN6I13qkWmzSXdwBCGlO4hiMKAB2A9ag0WySQwpLLtMjDcQBhVz6CgoHxF17XbnUYunulgsUsu4TTvjIUAEhR+x4qwZwtkOi7651PXJobm4tohHbQhdgecjnaPZeOfeqMy1vWLvWNQlvb+ZpJpWyT2A9gB6Cgjy5AO4A5oAGRgBR39aA7Z8MkFtxPY0AbV9UGfpQWhj5TkBQGxuBoEGG1iQCyHvj1oELgSkK6oRnIBIxzRdJ28Q3+fHiAds96LoH3AF1wCfyijNbF8JviI1rbQ6Pq/iSopxFOx5Uf0n3qUbWQk8II2SRyDPuCDUHMpSIiJQSBwDwKAgTxApnEbyIQ+APyn0+9A5B3AgfmoEWgcb3VlMpGFYjt+negSs7zxH/C3RWO9UElM8SAHG5fl/agNdXcEbJAZ1WadvDQDJO7Gf7c0ED1dqWv2enzw6Rb24nZfJd3EwREHqxyMZHzOKBbT7H8PZWTK5uZ9o3Xm1SzEry5PsT7UFF+JnUezSZ9LttRs2km2m5KSFWXDZI491AyBzVwK6J8T7CRtPjee0tbaGAtdNISdoXgLEo5JJxyfSmC6aF1fo2vELZXDJOxwkMybHYe4HtUFiJWJd8rKqjuTQcZV8SNI0dy43BlGVA+ZoBniE0RU7tp77Tg5zQUv4hNrU2tdNWOhylPEnaS5UHGYlAzn5cn74oHGt6l0z07k6nJC123HhKPFlbPptGTj9q0Kx0XMvU+ty6vqQtpWlZo4LOdGDWsak8Aflycgk+v2qC8a1q8OnaXLPOz2kCIWkZ+CqjgYx6n0xTBkmp9Sabp0KdRyI5vJkaGw08MVKIDw8jA557896QY1q2o3eqXr3N7O8skjnlnzgn5e1UMCGV/MOM0BJFOVwfX0oFtgZTjkg8UBtpOeO1B3hg85FBZXVMM+VJAA2+h96BO4IMJ/p7qvuKBKacmOKB5CYYx5UzgDPfHzoGwD5OApJHAHrQEAk8NWdNpx+XdkA0AIGhkGWOQcgg8UGw/DP4kmwhi07WCTZqAiSbstGc4+pFKNvs5o7q2Sa3kWaJxkOp4NZDOHUh/qL2k8LW78eG7kbZv+0+/yoEZp7fUpX/BXpgvYZTCSRtO7vtwe/vQdY6jeyatPp91FEPw8aSNMoYbi2cADt6Z70DS60KW7luJdV1JniJzBtURG2b0KN7+/vQKSWUWnLLqN3cSLP4ex5Y8jxiPykr23+nzzigpXUPVOu6Vqmmf8UWttb9OXDqkjRnfI3H849uRkDOKC0axLFr+nLB0rrUMM0bqCIGGGX1AH09qsGc9UfBiTV9Vhu9Pu5oPGLNeG6bczN7jHvVEr058Gre3sLeDVrmOdo3LmSFNjEH+XdntQXO51XQOl5YrCKGWa8SMYS3tzMyLjjJHb9alDqz1S5ktJ7nVdLmSVDiOONfEMiE4Xy+h9xUDm11CaTxEOn3VmpHFxKFCr9s5GPmKAus6jb9M6RJf3c88yxpjcxL7uM5OO3HrQVTSupoOuYdZMTSpptriNFtXKXDA/Pjhs9h2xQOJ7Xpnpa2S91WK2swqjbGw3ysfcnuxrQsGmapYvpwvra1FtDL52Mi+Gx49sZoMb+LXV0t+jWl3OYLGTO2zjx4h2nyszHsG4/Sgxt5ZJmEsjl3PB3GgSlyXBxkDnNADseB2PsaA8SoXQyFgmQCV70B5R/EIjYmPJxnvigEKcYDfrQF2/X9aCwqC7l2zwfT/FAJJ5747E5oGTqZArKOfccftQHZWwmOD23Z4NAEkTEBmwR7g5AoG7KSSE5HyFAMTyQsMHa/cEcEc5oL58P+v7rppdryPNAXx+GfsQe7Z9D2/Wg3zSdX0fqzT08F433eYwscOpHt9PcVkQPW632mX9vfWdrbXiRgrIdu2eHIwGD57+nIoKdc/ELVdC0u5afp27SUtta5vJMMzk+UDjzYHtQWDpj4gxXmif/wAitXZkGZzFGW8MehZO+PmM0Fibr/poWQmF6xXA2xmFg59sKRk0C2nQP1KFvdb0vwIUJNtDKcsVP8zD0Jx2oJDTNA0vR5p7qzs44pJOXkUc/SgNfa/pNvbF5L2JgTsCo2WJzjGKA1jcNcxOF/m/I0zbt4+gxgenNA5s7CCxWWYQxpNLgyMiY3nt2oHajcuexAoEL1C1uyFkVHO1ixHb17/KgwT4rdXWep6oul2OpywaTYqVLxDyySDjaM9x6Z+tWDPdB1TW7Wa9sumpGlursqMQRlpXwd3BA4571RcdN0i41G7h/wBSmNxqdkwn1O6u5/4cAXlYgcnngE/pQNvih8S211obHRyYbWInfJG/Ex9MD2+tBmNzcTXTtJcSSSSHH5jngDAH0oEdxbg5A9wKAzPwO+fegSLfLJoFoR5fUfegXOcHGAT8qAM+XaBzQCCwGDuyPlQWJpV37Q5TIweM7u/f9qA9vNYpFML2KaR8YiaJgAh55PvQRvnYoqA+MThcDnPai4PKWQlH3K6tggjnPaiEmz4ZznBPfFAVWG0AZDH1z2HzoE7lhv8AJIHI/mGRQI8AEeuOMGgndN6pu7V4RJLKY4WDLhypXj0oNL6d+MMS4ttetDdQgDNwAPEwPRh2NZGkabrvTXVZiexvba5aI+ILWZQG3Y4IDdvtQScvTdjK8chtFjkQ+VlYgqPXGKA17daV07apJrV9CsZbELTgbu3YY78UEHf9evJ4K9P6JqGoiR1XxjCUjAJ5OT3NBM9YdSWPTenwy3t7bWbSuBunUthfUhRyT+1BA2vXnS/jGdeo9LnIHljeLwSCe5zgmgejrOz1S3kGhazoaXYGAJ5Sw3e38uaCsJfX02rPD1XfXtvcCXdBJGjLbOO+EK88Y7nIoLB1H1Bb6WkN1ddSQ29io/5MZEjzt7DGTjj2FXNGUfEH4wRaxCtnp2kwGGM7llvBvIOO4XOAe/fNMwZbqusalfLBHfzSvFH+SIgKo+igYqiwWfWV30rpp03p6exJnUPJexQnxuR+Ulu2PkKCrT6jcSiTxZnbxCWcFidxPJJ96BBDlQ35fbFAbahBHJIGO+KAIwg5YnB455oDkKcbW7UAOowSMjOORQcCVXPp+9AqDkHcDmgMNpHPcUBTuzQWCJXcFvKR2Y0CMiqjnz4UcH/6oG8gKluwxyAfSjQ6ylEJBJfv37/OiYQMjbjkZXOSP/FEELAEkNn1waBCXdjORnPoc80CZeTb5wQe2BQAm4y7jlhjvQcWO3cW4GB7ZpgGC5a3uUeNyGQ5GCR+45pgt+l/EzqLTgxj1O5Zc4CSOXCj70wOE+Jd/Pq0V7qVvb3bISAWUBhnuc+/2pgvkHx0soLaNIdKkEiqR5yDg47cYpgresfELSNc1n8VrFtAw2ZBiiywwcgeb14x2xTBYNA13ozUo/GOqWVizDc1nf6crIh/7wOf1rOURvVupdE6ncpFeakiSWsZdbjSLfw1Zs+VVyMHA75xWsFDHU0idTJdf65rT28YKpPvHjKp9Bk49qYK/rGpXF/qU9zPdyzyyMSJJAAx9ifnVlwNZ7vxYEh8GAEHO8DDH5H5U0IPK8jHxSzFQAMnOKgJkFwPT6UBlAII5z3zQCjnA5OKBXeuAT9KABMA208DtxQHRhzgUC3KjJx9z2oAP6/L3NAHC8Hg/XNAcNtGe4oEy5yeaCdLk7yx2qQCAO1AmXZSSexHbPNAhJuLJkgjvzRonuAJy2cd8UCbyOi453Dj7GiYTZyVPPl74oYLJIyq68EH3Gf3oYTDEjLbiP1FEDHOUOdgOfXIBFAm77j5Mnng/WgLI5UE8Eg9iO9AnuJbuQx5wOBQFRxuIbOc54PrQK28ws76F722EyI4d4HJUOPb35yKBm8oeQsi4BPbPb71RyYIHmPGRg00GRsbsHIPY0Bg52AEEseBUCQJyOPXtQDtcdvvjtQCQ/GBwKAuXU4PrQBvZE5IGeO1AffjBJ57UBvEO045HagFWAAzktQKIzBeMg0C5kz5mOG/WgMrDJJ7jmgEnIyOccYoA3cEMRj05oC7/wDeBQTduzEoNxwcZGaAJOWfPNAlISVOT60aIQfkj+amgJ3bnnigKeFGPQUCf/x0Smw4V8exogX/AOY3+/SgKxKxeU459KAgJOckntQJkkcgkGgAAFFz/XQEmJaY7jnk96BM9yPQelAf/wCX7UBv/jagGP8AKB6ZoDf00HMfO/0oAH5TQA/5moECSMDPFAvGASMjPP8AigVX+b60BW7/AHoHEJJD55oDd4snv70CsSjCcDmgVAAbgYoGYJ3nk9qBUAYHAoP/2Q==
\ No newline at end of file diff --git a/tests/writer.html b/tests/writer.html index b0227e21b..e8e619f44 100644 --- a/tests/writer.html +++ b/tests/writer.html @@ -324,7 +324,6 @@ These should not be escaped: \$ \\ \> \[ \{
foo
-

And nested without indentation:

@@ -336,7 +335,6 @@ foo bar
-

Interpreted markdown in a table:

@@ -353,10 +351,8 @@ And this is strong

Here’s a simple block:

- foo
-

This should be a code block, though:

<div>
     foo
@@ -365,14 +361,12 @@ foo
 
<div>foo</div>

Now, nested:

-
-
- +
+
foo
-
- +

This should just be an HTML comment:

diff --git a/tests/writer.markdown b/tests/writer.markdown index 2201ac8d1..7d67e4e87 100644 --- a/tests/writer.markdown +++ b/tests/writer.markdown @@ -356,20 +356,31 @@ HTML Blocks Simple block on one line:
+ foo +
And nested without indentation:
+
+
+ foo +
+
+
+ bar +
+
Interpreted markdown in a table: @@ -390,8 +401,9 @@ And this is **strong** Here’s a simple block:
- + foo +
This should be a code block, though: @@ -407,12 +419,17 @@ As should this: Now, nested:
-
-
- + +
+ +
+ foo +
-
+ +
+
This should just be an HTML comment: diff --git a/tests/writer.mediawiki b/tests/writer.mediawiki index 7eccc44e8..2f3726285 100644 --- a/tests/writer.mediawiki +++ b/tests/writer.mediawiki @@ -311,22 +311,30 @@ Blank line after term, indented marker, alternate markers: Simple block on one line:
+ foo -
+
And nested without indentation:
+
+
+ foo +
+
+ bar -
+
+
Interpreted markdown in a table:
@@ -345,10 +353,10 @@ And this is '''strong''' Here’s a simple block:
- + foo -
+ This should be a code block, though:
<div>
@@ -360,14 +368,18 @@ As should this:
 Now, nested:
 
 
-
-
- + +
+ +
+ foo +
-
+
+
This should just be an HTML comment: diff --git a/tests/writer.native b/tests/writer.native index d1b14b24e..678d7595f 100644 --- a/tests/writer.native +++ b/tests/writer.native @@ -228,15 +228,9 @@ Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "John",Spa ,[Plain [Str "sublist"]]]]])] ,Header 1 ("html-blocks",[],[]) [Str "HTML",Space,Str "Blocks"] ,Para [Str "Simple",Space,Str "block",Space,Str "on",Space,Str "one",Space,Str "line:"] -,RawBlock (Format "html") "
" -,Plain [Str "foo"] -,RawBlock (Format "html") "
\n" +,Div ("",[],[]) [Plain [Str "foo"]] ,Para [Str "And",Space,Str "nested",Space,Str "without",Space,Str "indentation:"] -,RawBlock (Format "html") "
\n
\n
" -,Plain [Str "foo"] -,RawBlock (Format "html") "
\n
\n
" -,Plain [Str "bar"] -,RawBlock (Format "html") "
\n
\n" +,Div ("",[],[]) [Div ("",[],[]) [Div ("",[],[]) [Plain [Str "foo"]]],Div ("",[],[]) [Plain [Str "bar"]]] ,Para [Str "Interpreted",Space,Str "markdown",Space,Str "in",Space,Str "a",Space,Str "table:"] ,RawBlock (Format "html") "
\n\n\n\n
" ,Plain [Str "This",Space,Str "is",Space,Emph [Str "emphasized"]] @@ -244,17 +238,13 @@ Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "John",Spa ,Plain [Str "And",Space,Str "this",Space,Str "is",Space,Strong [Str "strong"]] ,RawBlock (Format "html") "
\n\n\n" ,Para [Str "Here\8217s",Space,Str "a",Space,Str "simple",Space,Str "block:"] -,RawBlock (Format "html") "
\n " -,Plain [Str "foo"] -,RawBlock (Format "html") "
\n" +,Div ("",[],[]) [Plain [Str "foo"]] ,Para [Str "This",Space,Str "should",Space,Str "be",Space,Str "a",Space,Str "code",Space,Str "block,",Space,Str "though:"] ,CodeBlock ("",[],[]) "
\n foo\n
" ,Para [Str "As",Space,Str "should",Space,Str "this:"] ,CodeBlock ("",[],[]) "
foo
" ,Para [Str "Now,",Space,Str "nested:"] -,RawBlock (Format "html") "
\n
\n
\n " -,Plain [Str "foo"] -,RawBlock (Format "html") "
\n
\n
\n" +,Div ("",[],[]) [Div ("",[],[]) [Div ("",[],[]) [Plain [Str "foo"]]]] ,Para [Str "This",Space,Str "should",Space,Str "just",Space,Str "be",Space,Str "an",Space,Str "HTML",Space,Str "comment:"] ,RawBlock (Format "html") "\n" ,Para [Str "Multiline:"] diff --git a/tests/writer.opml b/tests/writer.opml index b0954a439..228cad247 100644 --- a/tests/writer.opml +++ b/tests/writer.opml @@ -44,7 +44,7 @@ - + diff --git a/tests/writer.org b/tests/writer.org index b8058a406..85016f352 100644 --- a/tests/writer.org +++ b/tests/writer.org @@ -359,7 +359,13 @@ And nested without indentation: #+BEGIN_HTML
+#+END_HTML + +#+BEGIN_HTML
+#+END_HTML + +#+BEGIN_HTML
#+END_HTML @@ -367,7 +373,13 @@ foo #+BEGIN_HTML
+#+END_HTML + +#+BEGIN_HTML
+#+END_HTML + +#+BEGIN_HTML
#+END_HTML @@ -375,6 +387,9 @@ bar #+BEGIN_HTML
+#+END_HTML + +#+BEGIN_HTML
#+END_HTML @@ -407,7 +422,6 @@ Here's a simple block: #+BEGIN_HTML
- #+END_HTML foo @@ -434,16 +448,27 @@ Now, nested: #+BEGIN_HTML
-
-
- +#+END_HTML + +#+BEGIN_HTML +
+#+END_HTML + +#+BEGIN_HTML +
#+END_HTML foo #+BEGIN_HTML
-
+#+END_HTML + +#+BEGIN_HTML +
+#+END_HTML + +#+BEGIN_HTML
#+END_HTML diff --git a/tests/writer.plain b/tests/writer.plain index cc61916d2..60e7bb329 100644 --- a/tests/writer.plain +++ b/tests/writer.plain @@ -352,10 +352,13 @@ HTML Blocks Simple block on one line: foo + And nested without indentation: foo + bar + Interpreted markdown in a table: This is emphasized @@ -363,6 +366,7 @@ And this is strong Here’s a simple block: foo + This should be a code block, though:
@@ -376,6 +380,7 @@ As should this: Now, nested: foo + This should just be an HTML comment: Multiline: diff --git a/tests/writer.rst b/tests/writer.rst index 41da5bc73..68bc4a06c 100644 --- a/tests/writer.rst +++ b/tests/writer.rst @@ -394,7 +394,13 @@ And nested without indentation: .. raw:: html
+ +.. raw:: html +
+ +.. raw:: html +
foo @@ -402,7 +408,13 @@ foo .. raw:: html
+ +.. raw:: html +
+ +.. raw:: html +
bar @@ -410,6 +422,9 @@ bar .. raw:: html
+ +.. raw:: html +
Interpreted markdown in a table: @@ -442,7 +457,6 @@ Here’s a simple block: .. raw:: html
- foo @@ -469,16 +483,27 @@ Now, nested: .. raw:: html
-
-
- + +.. raw:: html + +
+ +.. raw:: html + +
foo .. raw:: html
-
+ +.. raw:: html + +
+ +.. raw:: html +
This should just be an HTML comment: diff --git a/tests/writer.textile b/tests/writer.textile index 31789a2b0..5042f79cb 100644 --- a/tests/writer.textile +++ b/tests/writer.textile @@ -352,20 +352,33 @@ h1(#html-blocks). HTML Blocks Simple block on one line:
+ foo +
And nested without indentation:
+
+
+ foo +
+ +
+
+ bar +
+ +
Interpreted markdown in a table: @@ -386,8 +399,9 @@ And this is *strong* Here's a simple block:
- + foo +
This should be a code block, though: @@ -405,12 +419,19 @@ bc.
foo
Now, nested:
-
-
- + +
+ +
+ foo + +
+ +
-
+ +
This should just be an HTML comment: -- cgit v1.2.3 From 70386a6a54c54189b8456b547a657873481a70b7 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 18 Aug 2013 15:36:54 -0700 Subject: Removed scripts directory. This has been put in its own github repo: https://github.com/jgm/pandoc-filters-python --- pandoc.cabal | 10 ------- scripts/abc.py | 50 ----------------------------------- scripts/caps.py | 15 ----------- scripts/comments.py | 30 --------------------- scripts/deemph.py | 15 ----------- scripts/deflists.py | 20 -------------- scripts/graphviz.py | 47 --------------------------------- scripts/myemph.py | 18 ------------- scripts/pandoc.py | 75 ----------------------------------------------------- scripts/tikz.py | 67 ----------------------------------------------- 10 files changed, 347 deletions(-) delete mode 100755 scripts/abc.py delete mode 100755 scripts/caps.py delete mode 100755 scripts/comments.py delete mode 100755 scripts/deemph.py delete mode 100755 scripts/deflists.py delete mode 100755 scripts/graphviz.py delete mode 100755 scripts/myemph.py delete mode 100755 scripts/pandoc.py delete mode 100755 scripts/tikz.py diff --git a/pandoc.cabal b/pandoc.cabal index a3d0dfa83..e22908918 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -112,16 +112,6 @@ Extra-Source-Files: -- generated man pages (produced post-build) man/man1/pandoc.1, man/man5/pandoc_markdown.5, - -- python library and sample python scripts - scripts/abc.py, - scripts/comments.py, - scripts/graphviz.py, - scripts/pandoc.py, - scripts/caps.py, - scripts/deemph.py, - scripts/myemph.py, - scripts/tikz.py, - scripts/deflists.py, -- tests tests/bodybg.gif, tests/docbook-reader.docbook diff --git a/scripts/abc.py b/scripts/abc.py deleted file mode 100755 index daecd1070..000000000 --- a/scripts/abc.py +++ /dev/null @@ -1,50 +0,0 @@ -#!/usr/bin/env python - -""" -Pandoc filter to process code blocks with class "abc" containing -ABC notation into images. Assumes that abcm2ps and ImageMagick's -convert are in the path. Images are put in the abc-images directory. -""" - -import hashlib -import os -import sys -from pandoc import toJSONFilter -from subprocess import Popen, PIPE, call - -imagedir = "abc-images" - -def sha1(x): - return hashlib.sha1(x).hexdigest() - -def abc2eps(abc, filetype, outfile): - p = Popen(["abcm2ps", "-O", outfile + '.eps', "-"],stdin=PIPE) - p.stdin.write(abc) - p.communicate() - p.stdin.close() - call(["convert", outfile + '.eps', outfile + '.' + filetype]) - -def abc(key, value, format): - if key == 'CodeBlock': - [[ident,classes,keyvals], code] = value - if "abc" in classes: - outfile = imagedir + '/' + sha1(code) - if format == "html": - filetype = "png" - elif format == "latex": - filetype = "pdf" - else: - filetype = "png" - src = outfile + '.' + filetype - if not os.path.isfile(src): - try: - os.mkdir(imagedir) - sys.stderr.write('Created directory ' + imagedir + '\n') - except OSError: - pass - abc2eps(code, filetype, outfile) - sys.stderr.write('Created image ' + src + '\n') - return {'Para': [{'Image': [[], [src,""]]}]} - -if __name__ == "__main__": - toJSONFilter(abc) diff --git a/scripts/caps.py b/scripts/caps.py deleted file mode 100755 index b86cd1520..000000000 --- a/scripts/caps.py +++ /dev/null @@ -1,15 +0,0 @@ -#!/usr/bin/env python - -""" -Pandoc filter to convert all regular text to uppercase. -Code, link URLs, etc. are not affected. -""" - -from pandoc import toJSONFilter - -def caps(key, value, format): - if key == 'Str': - return {'Str': value.upper()} - -if __name__ == "__main__": - toJSONFilter(caps) diff --git a/scripts/comments.py b/scripts/comments.py deleted file mode 100755 index ded21039c..000000000 --- a/scripts/comments.py +++ /dev/null @@ -1,30 +0,0 @@ -#!/usr/bin/env python -from pandoc import toJSONFilter -import re - -""" -Pandoc filter that causes everything between -'' and '' -to be ignored. The comment lines must appear on -lines by themselves, with blank lines surrounding -them. -""" - -incomment = False - -def comment(k,v,fmt): - global incomment - if k == 'RawBlock': - fmt, s = v - if fmt == "html": - if re.search("", s): - incomment = True - return [] - elif re.search("", s): - incomment = False - return [] - if incomment: - return [] # suppress anything in a comment - -if __name__ == "__main__": - toJSONFilter(comment) diff --git a/scripts/deemph.py b/scripts/deemph.py deleted file mode 100755 index f69dac5b8..000000000 --- a/scripts/deemph.py +++ /dev/null @@ -1,15 +0,0 @@ -#!/usr/bin/env python -from pandoc import walk, toJSONFilter -from caps import caps - -""" -Pandoc filter that causes emphasized text to be displayed -in ALL CAPS. -""" - -def deemph(key, val, fmt): - if key == 'Emph': - return walk(val, caps, fmt) - -if __name__ == "__main__": - toJSONFilter(deemph) diff --git a/scripts/deflists.py b/scripts/deflists.py deleted file mode 100755 index 502963419..000000000 --- a/scripts/deflists.py +++ /dev/null @@ -1,20 +0,0 @@ -#!/usr/bin/env python - -""" -Pandoc filter to convert definition lists to bullet -lists with the defined terms in strong emphasis (for -compatibility with standard markdown). -""" - -from pandoc import toJSONFilter - -def deflists(key, value, format): - if key == 'DefinitionList': - return {'BulletList': [tobullet(t,d) for [t,d] in value]} - -def tobullet(term, defs): - return [{'Para': [{'Strong': term}]}] + [b for d in defs for b in d] - - -if __name__ == "__main__": - toJSONFilter(deflists) diff --git a/scripts/graphviz.py b/scripts/graphviz.py deleted file mode 100755 index 519a3a9cc..000000000 --- a/scripts/graphviz.py +++ /dev/null @@ -1,47 +0,0 @@ -#!/usr/bin/env python - -""" -Pandoc filter to process code blocks with class "graphviz" into -graphviz-generated images. -""" - -import pygraphviz -import hashlib -import os -import sys -from pandoc import toJSONFilter - -def sha1(x): - return hashlib.sha1(x).hexdigest() - -imagedir = "graphviz-images" - -def graphviz(key, value, format): - if key == 'CodeBlock': - [[ident,classes,keyvals], code] = value - caption = "caption" - if "graphviz" in classes: - G = pygraphviz.AGraph(string = code) - G.layout() - filename = sha1(code) - if format == "html": - filetype = "png" - elif format == "latex": - filetype = "pdf" - else: - filetype = "png" - alt = [{'Str': caption}] - src = imagedir + '/' + filename + '.' + filetype - if not os.path.isfile(src): - try: - os.mkdir(imagedir) - sys.stderr.write('Created directory ' + imagedir + '\n') - except OSError: - pass - G.draw(src) - sys.stderr.write('Created image ' + src + '\n') - tit = "" - return {'Para': [{'Image': [alt, [src,tit]]}]} - -if __name__ == "__main__": - toJSONFilter(graphviz) diff --git a/scripts/myemph.py b/scripts/myemph.py deleted file mode 100755 index 2a322b385..000000000 --- a/scripts/myemph.py +++ /dev/null @@ -1,18 +0,0 @@ -#!/usr/bin/env python -from pandoc import toJSONFilter - -""" -Pandoc filter that causes emphasis to be rendered using -the custom macro '\myemph{...}' rather than '\emph{...}' -in latex. Other output formats are unaffected. -""" - -def latex(s): - return {'RawInline': ['latex', s]} - -def myemph(k, v, f): - if k == 'Emph' and f == 'latex': - return [latex('\\myemph{')] + v + [latex('}')] - -if __name__ == "__main__": - toJSONFilter(myemph) diff --git a/scripts/pandoc.py b/scripts/pandoc.py deleted file mode 100755 index f21e9cc83..000000000 --- a/scripts/pandoc.py +++ /dev/null @@ -1,75 +0,0 @@ -# Author: John MacFarlane -# Copyright: (C) 2013 John MacFarlane -# License: GPL version 2 or higher - -""" -Functions to aid writing python scripts that process the pandoc -AST serialized as JSON. -""" - -import sys -import json - -def walk(x, action, format = ""): - """Walk a tree, applying an action to every object. - Returns a modified tree. - """ - if isinstance(x, list): - array = [] - for item in x: - if isinstance(item, dict): - if item == {}: - array.append(walk(item, action, format)) - else: - for k in item: - res = action(k, item[k], format) - if res is None: - array.append(walk(item, action, format)) - elif isinstance(res, list): - for z in res: - array.append(walk(z, action, format)) - else: - array.append(walk(res, action, format)) - else: - array.append(walk(item, action, format)) - return array - elif isinstance(x, dict): - obj = {} - for k in x: - obj[k] = walk(x[k], action, format) - return obj - else: - return x - -def toJSONFilter(action): - """Converts an action into a filter that reads a JSON-formatted - pandoc document from stdin, transforms it by walking the tree - with the action, and returns a new JSON-formatted pandoc document - to stdout. The argument is a function action(key, value, format), - where key is the type of the pandoc object (e.g. 'Str', 'Para'), - value is the contents of the object (e.g. a string for 'Str', - a list of inline elements for 'Para'), and format is the target - output format (which will be taken for the first command line - argument if present). If the function returns None, the object - to which it applies will remain unchanged. If it returns an - object, the object will be replaced. If it returns a list, the - list will be spliced in to the list to which the target object - belongs. (So, returning an empty list deletes the object.) - """ - doc = json.loads(sys.stdin.read()) - if len(sys.argv) > 1: - format = sys.argv[1] - else: - format = "" - altered = walk(doc, action, format) - json.dump(altered, sys.stdout) - -def attributes(attrs): - """Returns an attribute list, constructed from the - dictionary attrs. - """ - attrs = attrs or [] - ident = attrs["id"] or "" - classes = attrs["classes"] or [] - keyvals = [x for x in attrs and x != "classes" and x != "id"] - return [ident, classes, keyvals] diff --git a/scripts/tikz.py b/scripts/tikz.py deleted file mode 100755 index 4ff8b2383..000000000 --- a/scripts/tikz.py +++ /dev/null @@ -1,67 +0,0 @@ -#!/usr/bin/env python - -""" -Pandoc filter to process raw latex tikz environments into images. -Assumes that pdflatex is in the path, and that the standalone -package is available. Also assumes that ImageMagick's convert -is in the path. Images are put in the tikz-images directory. -""" - -import hashlib -import re -import os -import sys -import shutil -from pandoc import toJSONFilter -from subprocess import Popen, PIPE, call -from tempfile import mkdtemp - -imagedir = "tikz-images" - -def sha1(x): - return hashlib.sha1(x).hexdigest() - -def tikz2image(tikz, filetype, outfile): - tmpdir = mkdtemp() - olddir = os.getcwd() - os.chdir(tmpdir) - f = open('tikz.tex', 'w') - f.write("""\\documentclass{standalone} - \\usepackage{tikz} - \\begin{document} - """) - f.write(tikz) - f.write("\n\\end{document}\n") - f.close() - p = call(["pdflatex", 'tikz.tex'], stdout=sys.stderr) - os.chdir(olddir) - if filetype == 'pdf': - shutil.copyfile(tmpdir + '/tikz.pdf', outfile + '.pdf') - else: - call(["convert", tmpdir + '/tikz.pdf', outfile + '.' + filetype]) - shutil.rmtree(tmpdir) - -def tikz(key, value, format): - if key == 'RawBlock': - [fmt, code] = value - if fmt == "latex" and re.match("\\\\begin{tikzpicture}", code): - outfile = imagedir + '/' + sha1(code) - if format == "html": - filetype = "png" - elif format == "latex": - filetype = "pdf" - else: - filetype = "png" - src = outfile + '.' + filetype - if not os.path.isfile(src): - try: - os.mkdir(imagedir) - sys.stderr.write('Created directory ' + imagedir + '\n') - except OSError: - pass - tikz2image(code, filetype, outfile) - sys.stderr.write('Created image ' + src + '\n') - return {'Para': [{'Image': [[], [src,""]]}]} - -if __name__ == "__main__": - toJSONFilter(tikz) -- cgit v1.2.3 From 05cc3a5c2182072ba93ffe4ac8b6661c098202e7 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 18 Aug 2013 15:58:08 -0700 Subject: Improved `--filter` documentation. --- README | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/README b/README index c25d611d6..c1429aec3 100644 --- a/README +++ b/README @@ -259,13 +259,30 @@ Reader options require different kinds of images. Currently this option only affects the markdown and LaTeX readers. -`--filter=`*PATH* +`--filter=`*EXECUTABLE* : Specify an executable to be used as a filter transforming the Pandoc AST after the input is parsed and before the output is written. The executable should read JSON from stdin and write JSON to stdout. The JSON must be formatted like pandoc's own - JSON input and output. Filters may be most easily created in Haskell, - using the utility function `toJsonFilter` from `Text.Pandoc`. + JSON input and output. The name of the output format will be + passed to the filter as the first argument. Hence, + + pandoc --filter ./caps.py -t latex + + is equivalent to + + pandoc -t json | ./caps.py latex | pandoc -f json -t latex + + The latter form may be useful for debugging filters. + + Filters may be written in any language. `Text.Pandoc.JSON` + exports `toJSONFilter` to facilitate writing filters in Haskell. + Those who would prefer to write filters in pandoc can use the + module `pandoc.py`: see + for the module and several examples. Note that the *EXECUTABLE* + will be sought in the user's `PATH`, and not in the working directory, + if no directory is provided. If you want to run a script in the + working directory, preface the filename with `./`. `--normalize` : Normalize the document after reading: merge adjacent -- cgit v1.2.3 From af786829a0d64e373218f4c84c105796e9663b6f Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 18 Aug 2013 16:22:56 -0700 Subject: Parsing: Added stateMeta' to ParserState. --- src/Text/Pandoc/Parsing.hs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index 2f42aba41..c16d5bb1d 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -801,6 +801,7 @@ data ParserState = ParserState stateNotes :: NoteTable, -- ^ List of notes (raw bodies) stateNotes' :: NoteTable', -- ^ List of notes (parsed bodies) stateMeta :: Meta, -- ^ Document metadata + stateMeta' :: F Meta, -- ^ Document metadata stateHeaderTable :: [HeaderType], -- ^ Ordered list of header types used stateHeaders :: M.Map Inlines String, -- ^ List of headers and ids (used for implicit ref links) stateIdentifiers :: [String], -- ^ List of header identifiers used @@ -834,6 +835,7 @@ defaultParserState = stateNotes = [], stateNotes' = [], stateMeta = nullMeta, + stateMeta' = return nullMeta, stateHeaderTable = [], stateHeaders = M.empty, stateIdentifiers = [], -- cgit v1.2.3 From 0e2605ffdf69b7a6a7c942a986dec4283a886e82 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 18 Aug 2013 18:39:04 -0700 Subject: Allow multiple YAML metadata blocks in document. --- README | 27 +++++---- src/Text/Pandoc/Readers/Markdown.hs | 107 +++++++++++++++++++----------------- 2 files changed, 73 insertions(+), 61 deletions(-) diff --git a/README b/README index c1429aec3..7a2b01f49 100644 --- a/README +++ b/README @@ -1815,14 +1815,21 @@ YAML metadata block **Extension: `yaml_metadata_block`** -If the file begins with a YAML object, delimited by a line of three -hyphens (`---`) at the top and a line of three hyphens (`---`) or three -dots (`...`) at the bottom, metadata will be taken from the fields -of the YAML object. Metadata can contain lists and objects (nested -arbitrarily), but all string scalars will be interpreted as markdown. - -Fields with names ending in an underscore will be ignored by -pandoc. (They may be given a role by external processors.) +A YAML metadata block is a valid YAML object, delimited by a line of three +hyphens (`---`) at the top and a line of three hyphens (`---`) or three dots +(`...`) at the bottom. A YAML metadata block may occur anywhere in the +document, but if it is not at the beginning, it must be preceded by a blank +line. + +Metadata will be taken from the fields of the YAML object and added to any +existing document metadata. Metadata can contain lists and objects (nested +arbitrarily), but all string scalars will be interpreted as markdown. Fields +with names ending in an underscore will be ignored by pandoc. (They may be +given a role by external processors.) + +A document may contain multiple metadata blocks. The metadata fields will +be combined through a *left-biased union*: if two metadata blocks attempt +to set the same field, the value from the first block will be taken. Note that YAML escaping rules must be followed. Thus, for example, if a title contains a colon, it must be quoted. The pipe character @@ -1844,8 +1851,8 @@ when the field contains blank lines: It consists of two paragraphs. ... -Template variables will be set from the metadata. Thus, for example, -in writing HTML, the variable `abstract` will be set to the HTML +Template variables will be set automatically from the metadata. Thus, for +example, in writing HTML, the variable `abstract` will be set to the HTML equivalent of the markdown in the `abstract` field:

This is the abstract.

diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 535fc02c6..a653c2e98 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -203,13 +203,10 @@ dateLine = try $ do skipSpaces trimInlinesF . mconcat <$> manyTill inline newline -titleBlock :: MarkdownParser (F (Pandoc -> Pandoc)) -titleBlock = pandocTitleBlock - <|> yamlTitleBlock - <|> mmdTitleBlock - <|> return (return id) +titleBlock :: MarkdownParser () +titleBlock = pandocTitleBlock <|> mmdTitleBlock -pandocTitleBlock :: MarkdownParser (F (Pandoc -> Pandoc)) +pandocTitleBlock :: MarkdownParser () pandocTitleBlock = try $ do guardEnabled Ext_pandoc_title_block lookAhead (char '%') @@ -217,16 +214,18 @@ pandocTitleBlock = try $ do author <- option (return []) authorsLine date <- option mempty dateLine optional blanklines - return $ do - title' <- title - author' <- author - date' <- date - return $ if B.isNull title' then id else B.setMeta "title" title' - . if null author' then id else B.setMeta "author" author' - . if B.isNull date' then id else B.setMeta "date" date' - -yamlTitleBlock :: MarkdownParser (F (Pandoc -> Pandoc)) -yamlTitleBlock = try $ do + let meta' = do title' <- title + author' <- author + date' <- date + return $ + ( if B.isNull title' then id else B.setMeta "title" title' + . if null author' then id else B.setMeta "author" author' + . if B.isNull date' then id else B.setMeta "date" date' ) + nullMeta + updateState $ \st -> st{ stateMeta' = stateMeta' st <> meta' } + +yamlMetaBlock :: MarkdownParser (F Blocks) +yamlMetaBlock = try $ do guardEnabled Ext_yaml_metadata_block pos <- getPosition string "---" @@ -236,33 +235,39 @@ yamlTitleBlock = try $ do let rawYaml = unlines ("---" : (rawYamlLines ++ ["..."])) optional blanklines opts <- stateOptions <$> getState - case Yaml.decodeEither' $ UTF8.fromString rawYaml of - Right (Yaml.Object hashmap) -> return $ return $ - H.foldrWithKey (\k v f -> - if ignorable k - then f - else B.setMeta (T.unpack k) (yamlToMeta opts v) . f) - id hashmap - Right Yaml.Null -> return $ return id - Right _ -> do - addWarning (Just pos) "YAML header is not an object" - return $ return id - Left err' -> do - case err' of - InvalidYaml (Just YamlParseException{ - yamlProblem = problem - , yamlContext = _ctxt - , yamlProblemMark = Yaml.YamlMark { - yamlLine = yline - , yamlColumn = ycol - }}) -> - addWarning (Just $ setSourceLine - (setSourceColumn pos (sourceColumn pos + ycol)) - (sourceLine pos + 1 + yline)) - $ "Could not parse YAML header: " ++ problem - _ -> addWarning (Just pos) - $ "Could not parse YAML header: " ++ show err' - return $ return id + meta' <- case Yaml.decodeEither' $ UTF8.fromString rawYaml of + Right (Yaml.Object hashmap) -> return $ return $ + H.foldrWithKey (\k v m -> + if ignorable k + then m + else B.setMeta (T.unpack k) + (yamlToMeta opts v) m) + nullMeta hashmap + Right Yaml.Null -> return $ return nullMeta + Right _ -> do + addWarning (Just pos) "YAML header is not an object" + return $ return nullMeta + Left err' -> do + case err' of + InvalidYaml (Just YamlParseException{ + yamlProblem = problem + , yamlContext = _ctxt + , yamlProblemMark = Yaml.YamlMark { + yamlLine = yline + , yamlColumn = ycol + }}) -> + addWarning (Just $ setSourceLine + (setSourceColumn pos + (sourceColumn pos + ycol)) + (sourceLine pos + 1 + yline)) + $ "Could not parse YAML header: " ++ + problem + _ -> addWarning (Just pos) + $ "Could not parse YAML header: " ++ + show err' + return $ return nullMeta + updateState $ \st -> st{ stateMeta' = stateMeta' st <> meta' } + return mempty -- ignore fields ending with _ ignorable :: Text -> Bool @@ -295,13 +300,13 @@ yamlToMeta _ _ = MetaString "" stopLine :: MarkdownParser () stopLine = try $ (string "---" <|> string "...") >> blankline >> return () -mmdTitleBlock :: MarkdownParser (F (Pandoc -> Pandoc)) +mmdTitleBlock :: MarkdownParser () mmdTitleBlock = try $ do guardEnabled Ext_mmd_title_block kvPairs <- many1 kvPair blanklines - return $ return $ \(Pandoc m bs) -> - Pandoc (foldl (\m' (k,v) -> addMetaField k v m') m kvPairs) bs + updateState $ \st -> st{ stateMeta' = stateMeta' st <> + return (Meta $ M.fromList kvPairs) } kvPair :: MarkdownParser (String, MetaValue) kvPair = try $ do @@ -318,15 +323,14 @@ parseMarkdown = do updateState $ \state -> state { stateOptions = let oldOpts = stateOptions state in oldOpts{ readerParseRaw = True } } - titleTrans <- option (return id) titleBlock + optional titleBlock blocks <- parseBlocks st <- getState + let meta = runF (stateMeta' st) st + let Pandoc _ bs = B.doc $ runF blocks st mbsty <- getOption readerCitationStyle refs <- getOption readerReferences - return $ processBiblio mbsty refs - $ runF titleTrans st - $ B.doc - $ runF blocks st + return $ processBiblio mbsty refs $ Pandoc meta bs addWarning :: Maybe SourcePos -> String -> MarkdownParser () addWarning mbpos msg = @@ -442,6 +446,7 @@ parseBlocks = mconcat <$> manyTill block eof block :: MarkdownParser (F Blocks) block = choice [ mempty <$ blanklines , codeBlockFenced + , yamlMetaBlock , guardEnabled Ext_latex_macros *> (macro >>= return . return) , header , lhsCodeBlock -- cgit v1.2.3 From e8ddcfd997bd1733b715a4321f0e57c7860071d2 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 19 Aug 2013 16:03:22 -0700 Subject: Scale LaTeX tables so they don't exceed columnwidth. --- src/Text/Pandoc/Writers/LaTeX.hs | 6 +++- tests/tables.latex | 64 ++++++++++++++++++++-------------------- 2 files changed, 37 insertions(+), 33 deletions(-) diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index 98553c421..ab579a326 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -470,9 +470,13 @@ tableRowToLaTeX header aligns widths cols = do AlignRight -> "\\raggedleft" AlignCenter -> "\\centering" AlignDefault -> "\\raggedright" + -- scale factor compensates for extra space between columns + -- so the whole table isn't larger than columnwidth + let scaleFactor = 0.97 ** fromIntegral (length aligns) let toCell 0 _ c = c toCell w a c = "\\begin{minipage}" <> valign <> - braces (text (printf "%.2f\\columnwidth" w)) <> + braces (text (printf "%.2f\\columnwidth" + (w * scaleFactor))) <> (halign a <> cr <> c <> cr) <> "\\end{minipage}" let cells = zipWith3 toCell widths aligns renderedCells return $ hsep (intersperse "&" cells) $$ "\\\\\\noalign{\\medskip}" diff --git a/tests/tables.latex b/tests/tables.latex index 82abeb9a5..c27e10461 100644 --- a/tests/tables.latex +++ b/tests/tables.latex @@ -54,34 +54,34 @@ Multiline table with caption: \begin{longtable}[c]{@{}clrl@{}} \hline\noalign{\medskip} -\begin{minipage}[b]{0.15\columnwidth}\centering +\begin{minipage}[b]{0.13\columnwidth}\centering Centered Header -\end{minipage} & \begin{minipage}[b]{0.14\columnwidth}\raggedright +\end{minipage} & \begin{minipage}[b]{0.12\columnwidth}\raggedright Left Aligned -\end{minipage} & \begin{minipage}[b]{0.16\columnwidth}\raggedleft +\end{minipage} & \begin{minipage}[b]{0.14\columnwidth}\raggedleft Right Aligned -\end{minipage} & \begin{minipage}[b]{0.34\columnwidth}\raggedright +\end{minipage} & \begin{minipage}[b]{0.30\columnwidth}\raggedright Default aligned \end{minipage} \\\noalign{\medskip} \hline\noalign{\medskip} -\begin{minipage}[t]{0.15\columnwidth}\centering +\begin{minipage}[t]{0.13\columnwidth}\centering First -\end{minipage} & \begin{minipage}[t]{0.14\columnwidth}\raggedright +\end{minipage} & \begin{minipage}[t]{0.12\columnwidth}\raggedright row -\end{minipage} & \begin{minipage}[t]{0.16\columnwidth}\raggedleft +\end{minipage} & \begin{minipage}[t]{0.14\columnwidth}\raggedleft 12.0 -\end{minipage} & \begin{minipage}[t]{0.34\columnwidth}\raggedright +\end{minipage} & \begin{minipage}[t]{0.30\columnwidth}\raggedright Example of a row that spans multiple lines. \end{minipage} \\\noalign{\medskip} -\begin{minipage}[t]{0.15\columnwidth}\centering +\begin{minipage}[t]{0.13\columnwidth}\centering Second -\end{minipage} & \begin{minipage}[t]{0.14\columnwidth}\raggedright +\end{minipage} & \begin{minipage}[t]{0.12\columnwidth}\raggedright row -\end{minipage} & \begin{minipage}[t]{0.16\columnwidth}\raggedleft +\end{minipage} & \begin{minipage}[t]{0.14\columnwidth}\raggedleft 5.0 -\end{minipage} & \begin{minipage}[t]{0.34\columnwidth}\raggedright +\end{minipage} & \begin{minipage}[t]{0.30\columnwidth}\raggedright Here's another one. Note the blank line between rows. \end{minipage} \\\noalign{\medskip} @@ -94,34 +94,34 @@ Multiline table without caption: \begin{longtable}[c]{@{}clrl@{}} \hline\noalign{\medskip} -\begin{minipage}[b]{0.15\columnwidth}\centering +\begin{minipage}[b]{0.13\columnwidth}\centering Centered Header -\end{minipage} & \begin{minipage}[b]{0.14\columnwidth}\raggedright +\end{minipage} & \begin{minipage}[b]{0.12\columnwidth}\raggedright Left Aligned -\end{minipage} & \begin{minipage}[b]{0.16\columnwidth}\raggedleft +\end{minipage} & \begin{minipage}[b]{0.14\columnwidth}\raggedleft Right Aligned -\end{minipage} & \begin{minipage}[b]{0.34\columnwidth}\raggedright +\end{minipage} & \begin{minipage}[b]{0.30\columnwidth}\raggedright Default aligned \end{minipage} \\\noalign{\medskip} \hline\noalign{\medskip} -\begin{minipage}[t]{0.15\columnwidth}\centering +\begin{minipage}[t]{0.13\columnwidth}\centering First -\end{minipage} & \begin{minipage}[t]{0.14\columnwidth}\raggedright +\end{minipage} & \begin{minipage}[t]{0.12\columnwidth}\raggedright row -\end{minipage} & \begin{minipage}[t]{0.16\columnwidth}\raggedleft +\end{minipage} & \begin{minipage}[t]{0.14\columnwidth}\raggedleft 12.0 -\end{minipage} & \begin{minipage}[t]{0.34\columnwidth}\raggedright +\end{minipage} & \begin{minipage}[t]{0.30\columnwidth}\raggedright Example of a row that spans multiple lines. \end{minipage} \\\noalign{\medskip} -\begin{minipage}[t]{0.15\columnwidth}\centering +\begin{minipage}[t]{0.13\columnwidth}\centering Second -\end{minipage} & \begin{minipage}[t]{0.14\columnwidth}\raggedright +\end{minipage} & \begin{minipage}[t]{0.12\columnwidth}\raggedright row -\end{minipage} & \begin{minipage}[t]{0.16\columnwidth}\raggedleft +\end{minipage} & \begin{minipage}[t]{0.14\columnwidth}\raggedleft 5.0 -\end{minipage} & \begin{minipage}[t]{0.34\columnwidth}\raggedright +\end{minipage} & \begin{minipage}[t]{0.30\columnwidth}\raggedright Here's another one. Note the blank line between rows. \end{minipage} \\\noalign{\medskip} @@ -145,23 +145,23 @@ Multiline table without column headers: \begin{longtable}[c]{@{}clrl@{}} \hline\noalign{\medskip} -\begin{minipage}[t]{0.15\columnwidth}\centering +\begin{minipage}[t]{0.13\columnwidth}\centering First -\end{minipage} & \begin{minipage}[t]{0.14\columnwidth}\raggedright +\end{minipage} & \begin{minipage}[t]{0.12\columnwidth}\raggedright row -\end{minipage} & \begin{minipage}[t]{0.16\columnwidth}\raggedleft +\end{minipage} & \begin{minipage}[t]{0.14\columnwidth}\raggedleft 12.0 -\end{minipage} & \begin{minipage}[t]{0.34\columnwidth}\raggedright +\end{minipage} & \begin{minipage}[t]{0.30\columnwidth}\raggedright Example of a row that spans multiple lines. \end{minipage} \\\noalign{\medskip} -\begin{minipage}[t]{0.15\columnwidth}\centering +\begin{minipage}[t]{0.13\columnwidth}\centering Second -\end{minipage} & \begin{minipage}[t]{0.14\columnwidth}\raggedright +\end{minipage} & \begin{minipage}[t]{0.12\columnwidth}\raggedright row -\end{minipage} & \begin{minipage}[t]{0.16\columnwidth}\raggedleft +\end{minipage} & \begin{minipage}[t]{0.14\columnwidth}\raggedleft 5.0 -\end{minipage} & \begin{minipage}[t]{0.34\columnwidth}\raggedright +\end{minipage} & \begin{minipage}[t]{0.30\columnwidth}\raggedright Here's another one. Note the blank line between rows. \end{minipage} \\\noalign{\medskip} -- cgit v1.2.3 From 7048c130ec9d128dd1c9d1ddf8e7ce3c15eaf435 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 18 Aug 2013 23:01:23 -0700 Subject: Create Cite element even if no matching reference in the biblio. * Add ??? as fallback text for non-resolved citations. * Biblio: Put references (including a header at the end of the document, if one exists) inside a Div with class "references". This gives some control over styling of references, and allows scripts to manipulate them. * Markdown writer: Print markdown citation codes, and disable printing of references, if `citations` extension is enabled. NOTE: It would be good to improve what citeproc-hs does for a nonexistent key. --- src/Text/Pandoc/Biblio.hs | 5 +- src/Text/Pandoc/Readers/Markdown.hs | 21 ++++---- src/Text/Pandoc/Writers/Markdown.hs | 35 +++++++------ tests/Tests/Old.hs | 2 +- tests/markdown-citations.chicago-author-date.txt | 10 +++- tests/markdown-citations.ieee.txt | 40 ++++++++------- tests/markdown-citations.mhra.txt | 62 ++++++++++++++---------- 7 files changed, 105 insertions(+), 70 deletions(-) diff --git a/src/Text/Pandoc/Biblio.hs b/src/Text/Pandoc/Biblio.hs index 206b38530..1c0975f11 100644 --- a/src/Text/Pandoc/Biblio.hs +++ b/src/Text/Pandoc/Biblio.hs @@ -55,7 +55,10 @@ processBiblio (Just style) r p = cits_map = M.fromList $ zip grps (citations result) biblioList = map (renderPandoc' style) (bibliography result) Pandoc m b = bottomUp mvPunct . deNote . topDown (processCite style cits_map) $ p' - in Pandoc m $ b ++ biblioList + (bs, lastb) = case reverse b of + x@(Header _ _ _) : xs -> (reverse xs, [x]) + _ -> (b, []) + in Pandoc m $ bs ++ [Div ("",["references"],[]) (lastb ++ biblioList)] -- | Substitute 'Cite' elements with formatted citations. processCite :: Style -> M.Map [Citation] [FormattedOutput] -> Inline -> Inline diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index a653c2e98..05662d9b5 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -55,7 +55,6 @@ import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock ) import Text.Pandoc.Readers.HTML ( htmlTag, htmlInBalanced, isInlineTag, isBlockTag, isTextTag, isCommentTag ) import Text.Pandoc.Biblio (processBiblio) -import qualified Text.CSL as CSL import Data.Monoid (mconcat, mempty) import Control.Applicative ((<$>), (<*), (*>), (<$)) import Control.Monad @@ -1797,11 +1796,13 @@ rawHtmlInline = do cite :: MarkdownParser (F Inlines) cite = do guardEnabled Ext_citations - getOption readerReferences >>= guard . not . null - citations <- textualCite <|> normalCite - return $ flip B.cite mempty <$> citations + citations <- textualCite <|> (fmap (flip B.cite unknownC) <$> normalCite) + return citations + +unknownC :: Inlines +unknownC = B.str "???" -textualCite :: MarkdownParser (F [Citation]) +textualCite :: MarkdownParser (F Inlines) textualCite = try $ do (_, key) <- citeKey let first = Citation{ citationId = key @@ -1813,8 +1814,12 @@ textualCite = try $ do } mbrest <- option Nothing $ try $ spnl >> Just <$> normalCite case mbrest of - Just rest -> return $ (first:) <$> rest - Nothing -> option (return [first]) $ bareloc first + Just rest -> return $ (flip B.cite unknownC . (first:)) <$> rest + Nothing -> (fmap (flip B.cite unknownC) <$> bareloc first) <|> + return (do st <- askF + return $ case M.lookup key (stateExamples st) of + Just n -> B.str (show n) + _ -> B.cite [first] unknownC) bareloc :: Citation -> MarkdownParser (F [Citation]) bareloc c = try $ do @@ -1846,8 +1851,6 @@ citeKey = try $ do let internal p = try $ p >>~ lookAhead (letter <|> digit) rest <- many $ letter <|> digit <|> internal (oneOf ":.#$%&-_+?<>~/") let key = first:rest - citations' <- map CSL.refId <$> getOption readerReferences - guard $ key `elem` citations' return (suppress_author, key) suffix :: MarkdownParser (F Inlines) diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index 623c445df..d617954dd 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -186,7 +186,12 @@ pandocToMarkdown opts (Pandoc meta blocks) = do let toc = if writerTableOfContents opts then tableOfContents opts headerBlocks else empty - body <- blockListToMarkdown opts blocks + -- Strip off final 'references' header if markdown citations enabled + let blocks' = case reverse blocks of + (Div (_,["references"],_) _):xs + | isEnabled Ext_citations opts -> reverse xs + _ -> blocks + body <- blockListToMarkdown opts blocks' st <- get notes' <- notesToMarkdown opts (reverse $ stNotes st) st' <- get -- note that the notes may contain refs @@ -304,10 +309,10 @@ blockToMarkdown _ Null = return empty blockToMarkdown opts (Div attrs ils) = do isPlain <- gets stPlain contents <- blockListToMarkdown opts ils - return $ if isPlain + return $ if isPlain || not (isEnabled Ext_markdown_in_html_blocks opts) then contents <> blankline else tagWithAttrs "div" attrs <> blankline <> - contents <> blankline <> "
" <> blankline + contents <> blankline <> "
" <> blankline blockToMarkdown opts (Plain inlines) = do contents <- inlineListToMarkdown opts inlines return $ contents <> cr @@ -711,17 +716,20 @@ inlineToMarkdown opts (LineBreak) | isEnabled Ext_escaped_line_breaks opts = return $ "\\" <> cr | otherwise = return $ " " <> cr inlineToMarkdown _ Space = return space -inlineToMarkdown opts (Cite (c:cs) lst@[RawInline "latex" _]) +inlineToMarkdown opts (Cite [] lst) = inlineListToMarkdown opts lst +inlineToMarkdown opts (Cite (c:cs) lst) | not (isEnabled Ext_citations opts) = inlineListToMarkdown opts lst - | citationMode c == AuthorInText = do - suffs <- inlineListToMarkdown opts $ citationSuffix c - rest <- mapM convertOne cs - let inbr = suffs <+> joincits rest - br = if isEmpty inbr then empty else char '[' <> inbr <> char ']' - return $ text ("@" ++ citationId c) <+> br - | otherwise = do - cits <- mapM convertOne (c:cs) - return $ text "[" <> joincits cits <> text "]" + | otherwise = + if citationMode c == AuthorInText + then do + suffs <- inlineListToMarkdown opts $ citationSuffix c + rest <- mapM convertOne cs + let inbr = suffs <+> joincits rest + br = if isEmpty inbr then empty else char '[' <> inbr <> char ']' + return $ text ("@" ++ citationId c) <+> br + else do + cits <- mapM convertOne (c:cs) + return $ text "[" <> joincits cits <> text "]" where joincits = hcat . intersperse (text "; ") . filter (not . isEmpty) convertOne Citation { citationId = k @@ -738,7 +746,6 @@ inlineToMarkdown opts (Cite (c:cs) lst@[RawInline "latex" _]) return $ pdoc <+> r modekey SuppressAuthor = "-" modekey _ = "" -inlineToMarkdown opts (Cite _ lst) = inlineListToMarkdown opts lst inlineToMarkdown opts (Link txt (src, tit)) = do linktext <- inlineListToMarkdown opts txt let linktitle = if null tit diff --git a/tests/Tests/Old.hs b/tests/Tests/Old.hs index 0ba240084..8609781d0 100644 --- a/tests/Tests/Old.hs +++ b/tests/Tests/Old.hs @@ -196,7 +196,7 @@ markdownCitationTests ++ [test "natbib" wopts "markdown-citations.txt" "markdown-citations.txt"] where - ropts = ["-r", "markdown", "-w", "markdown", "--bibliography", + ropts = ["-r", "markdown", "-w", "markdown-citations", "--bibliography", "biblio.bib", "--no-wrap"] wopts = ["-r", "markdown", "-w", "markdown", "--no-wrap", "--natbib"] styleToTest style = test style (ropts ++ ["--csl", style ++ ".csl"]) diff --git a/tests/markdown-citations.chicago-author-date.txt b/tests/markdown-citations.chicago-author-date.txt index de242300d..81d7482cb 100644 --- a/tests/markdown-citations.chicago-author-date.txt +++ b/tests/markdown-citations.chicago-author-date.txt @@ -1,9 +1,9 @@ Pandoc with citeproc-hs ======================= -- [@nonexistent] +- ([CSL BIBLIOGRAPHIC DATA ERROR: reference "nonexistent" not found.]) -- @nonexistent +- ([CSL BIBLIOGRAPHIC DATA ERROR: reference "nonexistent" not found.]) - Doe (2005) says blah. @@ -29,15 +29,21 @@ Pandoc with citeproc-hs - With some markup (*see* Doe 2005, 32). +
+ References ========== +“Nonexistent Not Found!” + Doe, John. 2005. *First Book*. Cambridge: Cambridge University Press. ———. 2006. “Article.” *Journal of Generic Studies* 6: 33–34. Doe, John, and Jenny Roe. 2007. “Why Water Is Wet.” In *Third Book*, edited by Sam Smith. Oxford: Oxford University Press. +
+ [^1]: Doe and Roe (2007, 12) and a citation without locators (Doe and Roe 2007). [^2]: Some citations (see Doe 2005, chap. 3; Doe and Roe 2007; Doe 2006). diff --git a/tests/markdown-citations.ieee.txt b/tests/markdown-citations.ieee.txt index a397e3f38..4085a7c63 100644 --- a/tests/markdown-citations.ieee.txt +++ b/tests/markdown-citations.ieee.txt @@ -1,45 +1,51 @@ Pandoc with citeproc-hs ======================= -- [@nonexistent] +- [] -- @nonexistent +- -- Reference 1 says blah. +- Reference 2 says blah. -- Reference 1 says blah. +- Reference 2 says blah. -- Reference 1 says blah. +- Reference 2 says blah. -- Reference 1 [3] says blah. +- Reference 2 [4] says blah. - In a note.[^1] -- A citation group [1], [3]. +- A citation group [2], [4]. -- Another one [1]. +- Another one [2]. - And another one in a note.[^2] -- Citation with a suffix and locator [1]. +- Citation with a suffix and locator [2]. -- Citation with suffix only [1]. +- Citation with suffix only [2]. - Now some modifiers.[^3] -- With some markup [1]. +- With some markup [2]. + +
References ========== -[1] J. Doe, *First Book*. Cambridge: Cambridge University Press, 2005. +[1]“nonexistent not found!” . + +[2] J. Doe, *First Book*. Cambridge: Cambridge University Press, 2005. + +[3] J. Doe, “Article,” *Journal of Generic Studies*, vol. 6, pp. 33–34, 2006. -[2] J. Doe, “Article,” *Journal of Generic Studies*, vol. 6, pp. 33–34, 2006. +[4] J. Doe and J. Roe, “Why Water Is Wet,” in *Third Book*, S. Smith, Ed. Oxford: Oxford University Press, 2007. -[3] J. Doe and J. Roe, “Why Water Is Wet,” in *Third Book*, S. Smith, Ed. Oxford: Oxford University Press, 2007. +
-[^1]: Reference 3 and a citation without locators [3]. +[^1]: Reference 4 and a citation without locators [4]. -[^2]: Some citations [1–3]. +[^2]: Some citations [2–4]. -[^3]: Like a citation without author: [1], and now Doe with a locator [2]. +[^3]: Like a citation without author: [2], and now Doe with a locator [3]. diff --git a/tests/markdown-citations.mhra.txt b/tests/markdown-citations.mhra.txt index d33a1b94b..01d9c45ca 100644 --- a/tests/markdown-citations.mhra.txt +++ b/tests/markdown-citations.mhra.txt @@ -1,33 +1,35 @@ Pandoc with citeproc-hs ======================= -- [@nonexistent] +- [^1] -- @nonexistent +- [^2] -- John Doe[^1] says blah. +- John Doe[^3] says blah. -- Doe[^2] says blah. +- Doe[^4] says blah. -- Doe[^3] says blah. +- Doe[^5] says blah. -- Doe[^4] says blah. +- Doe[^6] says blah. -- In a note.[^5] +- In a note.[^7] -- A citation group.[^6] +- A citation group.[^8] -- Another one.[^7] +- Another one.[^9] -- And another one in a note.[^8] +- And another one in a note.[^10] -- Citation with a suffix and locator.[^9] +- Citation with a suffix and locator.[^11] -- Citation with suffix only.[^10] +- Citation with suffix only.[^12] -- Now some modifiers.[^11] +- Now some modifiers.[^13] -- With some markup.[^12] +- With some markup.[^14] + +
References ========== @@ -38,26 +40,34 @@ Doe, John, ‘Article’, *Journal of Generic Studies*, 6 (2006), 33–34. Doe, John, and Jenny Roe, ‘Why Water Is Wet’, in *Third Book*, ed. by Sam Smith (Oxford: Oxford University Press, 2007). -[^1]: *First Book* (Cambridge: Cambridge University Press, 2005). +‘Nonexistent Not Found!’. + +
+ +[^1]: [CSL BIBLIOGRAPHIC DATA ERROR: reference "nonexistent" not found.]. + +[^2]: [CSL STYLE ERROR: reference with no printed form.]. + +[^3]: *First Book* (Cambridge: Cambridge University Press, 2005). -[^2]: *First Book*, p. 30. +[^4]: *First Book*, p. 30. -[^3]: *First Book*, p. 30, with suffix. +[^5]: *First Book*, p. 30, with suffix. -[^4]: *First Book*; ‘Article’, *Journal of Generic Studies*, 6 (2006), 33–34 (p. 30); see also John Doe and Jenny Roe, ‘Why Water Is Wet’, in *Third Book*, ed. by Sam Smith (Oxford: Oxford University Press, 2007). +[^6]: *First Book*; ‘Article’, *Journal of Generic Studies*, 6 (2006), 33–34 (p. 30); see also John Doe and Jenny Roe, ‘Why Water Is Wet’, in *Third Book*, ed. by Sam Smith (Oxford: Oxford University Press, 2007). -[^5]: Doe and Roe, p. 12 and a citation without locators Doe and Roe. +[^7]: Doe and Roe, p. 12 and a citation without locators Doe and Roe. -[^6]: See Doe, *First Book*, chap. 3; also Doe and Roe, pp. 34–35. +[^8]: See Doe, *First Book*, chap. 3; also Doe and Roe, pp. 34–35. -[^7]: See Doe, *First Book*, pp. 34–35. +[^9]: See Doe, *First Book*, pp. 34–35. -[^8]: Some citations see Doe, *First Book*, chap. 3; Doe and Roe; Doe, ‘Article’, 33–34. +[^10]: Some citations see Doe, *First Book*, chap. 3; Doe and Roe; Doe, ‘Article’, 33–34. -[^9]: Doe, *First Book*, pp. 33, 35–37, and nowhere else. +[^11]: Doe, *First Book*, pp. 33, 35–37, and nowhere else. -[^10]: Doe, *First Book* and nowhere else. +[^12]: Doe, *First Book* and nowhere else. -[^11]: Like a citation without author: *First Book*, and now Doe with a locator ‘Article’, 33–34 (p. 44). +[^13]: Like a citation without author: *First Book*, and now Doe with a locator ‘Article’, 33–34 (p. 44). -[^12]: *See* Doe, *First Book*, p. 32. +[^14]: *See* Doe, *First Book*, p. 32. -- cgit v1.2.3 From 0b5156cc7e6e77bb072e6f4e09f6468f6d8a8f60 Mon Sep 17 00:00:00 2001 From: Scott Morrison Date: Wed, 21 Aug 2013 16:04:06 +1000 Subject: adding some cedilla characters to the LaTeX reader --- src/Text/Pandoc/Readers/LaTeX.hs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 414e50fc8..37cec697b 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -673,6 +673,14 @@ cedilla 'c' = 'ç' cedilla 'C' = 'Ç' cedilla 's' = 'ş' cedilla 'S' = 'Ş' +cedilla 't' = 'ţ' +cedilla 'T' = 'Ţ' +cedilla 'e' = 'ȩ' +cedilla 'E' = 'Ȩ' +cedilla 'h' = 'ḩ' +cedilla 'H' = 'Ḩ' +cedilla 'o' = 'o̧' +cedilla 'O' = ''O̧ cedilla c = c hacek :: Char -> Char -- cgit v1.2.3 From 5b97b150cc63a5cab48a544e2b15881409702781 Mon Sep 17 00:00:00 2001 From: Scott Morrison Date: Wed, 21 Aug 2013 16:10:42 +1000 Subject: cedilla-o breaks the compile, removing again --- src/Text/Pandoc/Readers/LaTeX.hs | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 37cec697b..20ed88717 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -679,8 +679,6 @@ cedilla 'e' = 'ȩ' cedilla 'E' = 'Ȩ' cedilla 'h' = 'ḩ' cedilla 'H' = 'Ḩ' -cedilla 'o' = 'o̧' -cedilla 'O' = ''O̧ cedilla c = c hacek :: Char -> Char -- cgit v1.2.3 From 1d91e2cdb380a22b8d988291d726dd1612318b80 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 21 Aug 2013 20:07:36 -0700 Subject: LaTeX reader: Added o-cedilla. --- src/Text/Pandoc/Readers/LaTeX.hs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index ded57df5a..028d83e24 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -681,6 +681,8 @@ cedilla 'e' = 'ȩ' cedilla 'E' = 'Ȩ' cedilla 'h' = 'ḩ' cedilla 'H' = 'Ḩ' +cedilla 'o' = 'o̧' +cedilla 'O' = 'O̧' cedilla c = c hacek :: Char -> Char -- cgit v1.2.3 From 5f09cf7ff033ae11c5094fe39f8cd2ac11657229 Mon Sep 17 00:00:00 2001 From: Florian Eitel Date: Thu, 22 Aug 2013 20:15:36 +0200 Subject: Write id for code block to label attr in latex when listing is used The code: ~~~{#test} asdf ~~~ gets compiled to html:
    asdf
    
So it is possible to link to the identifier `test` But this doesn't happen on latex When using the listings package (`--listings`) it is possible to set the identifier using the `label=test` property: \begin{lstlisting}[label=id] hi \end{lstlisting} And this is exactly what this patch is doing. Modified LaTeX Reader/Writer and added tests for this. --- src/Text/Pandoc/Readers/LaTeX.hs | 3 ++- src/Text/Pandoc/Writers/LaTeX.hs | 8 ++++++-- tests/Tests/Readers/LaTeX.hs | 7 +++++++ tests/Tests/Writers/LaTeX.hs | 7 +++++++ 4 files changed, 22 insertions(+), 3 deletions(-) diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index ded57df5a..b785a9852 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -47,6 +47,7 @@ import Text.Pandoc.Builder import Data.Char (isLetter) import Control.Applicative import Data.Monoid +import Data.Maybe (fromMaybe) import System.Environment (getEnv) import System.FilePath (replaceExtension, ()) import Data.List (intercalate, intersperse) @@ -901,7 +902,7 @@ environments = M.fromList lookup "numbers" options == Just "left" ] ++ maybe [] (:[]) (lookup "language" options >>= fromListingsLanguage) - let attr = ("",classes,kvs) + let attr = (fromMaybe "" (lookup "label" options),classes,kvs) codeBlockWith attr <$> (verbEnv "lstlisting")) , ("minted", do options <- option [] keyvals lang <- grouped (many1 $ satisfy (/='}')) diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index ab579a326..bf056001f 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -313,7 +313,7 @@ blockToLaTeX (BlockQuote lst) = do _ -> do contents <- blockListToLaTeX lst return $ "\\begin{quote}" $$ contents $$ "\\end{quote}" -blockToLaTeX (CodeBlock (_,classes,keyvalAttr) str) = do +blockToLaTeX (CodeBlock (identifier,classes,keyvalAttr) str) = do opts <- gets stOptions case () of _ | isEnabled Ext_literate_haskell opts && "haskell" `elem` classes && @@ -344,7 +344,11 @@ blockToLaTeX (CodeBlock (_,classes,keyvalAttr) str) = do [ (if key == "startFrom" then "firstnumber" else key) ++ "=" ++ attr | - (key,attr) <- keyvalAttr ] + (key,attr) <- keyvalAttr ] ++ + (if identifier == "" + then [] + else [ "label=" ++ identifier ]) + else [] printParams | null params = empty diff --git a/tests/Tests/Readers/LaTeX.hs b/tests/Tests/Readers/LaTeX.hs index 88029b7c2..dff6e4537 100644 --- a/tests/Tests/Readers/LaTeX.hs +++ b/tests/Tests/Readers/LaTeX.hs @@ -55,6 +55,13 @@ tests = [ testGroup "basic" "hi % this is a comment\nthere\n" =?> para "hi there" ] + , testGroup "code blocks" + [ "identifier" =: + "\\begin{lstlisting}[label=test]\\end{lstlisting}" =?> codeBlockWith ("test", [], [("label","test")]) "" + , "no identifier" =: + "\\begin{lstlisting}\\end{lstlisting}" =?> codeBlock "" + ] + , testGroup "citations" [ natbibCitations , biblatexCitations diff --git a/tests/Tests/Writers/LaTeX.hs b/tests/Tests/Writers/LaTeX.hs index ebde5b97c..5f702a85d 100644 --- a/tests/Tests/Writers/LaTeX.hs +++ b/tests/Tests/Writers/LaTeX.hs @@ -10,6 +10,9 @@ import Tests.Arbitrary() latex :: (ToString a, ToPandoc a) => a -> String latex = writeLaTeX def . toPandoc +latexListing :: (ToString a, ToPandoc a) => a -> String +latexListing = writeLaTeX def{ writerListings = True } . toPandoc + {- "my test" =: X =?> Y @@ -31,6 +34,10 @@ tests :: [Test] tests = [ testGroup "code blocks" [ "in footnotes" =: note (para "hi" <> codeBlock "hi") =?> "\\footnote{hi\n\n\\begin{Verbatim}\nhi\n\\end{Verbatim}\n}" + , test latexListing "identifier" $ codeBlockWith ("id",[],[]) "hi" =?> + ("\\begin{lstlisting}[label=id]\nhi\n\\end{lstlisting}" :: String) + , test latexListing "no identifier" $ codeBlock "hi" =?> + ("\\begin{lstlisting}\nhi\n\\end{lstlisting}" :: String) ] , testGroup "math" [ "escape |" =: para (math "\\sigma|_{\\{x\\}}") =?> -- cgit v1.2.3 From 6e222ce2252637da77a8d7ab460af4f0ce0841c1 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 24 Aug 2013 12:54:39 -0700 Subject: Improved error reporting on filters. Avoid showing spurious output and avoid double error messages. --- pandoc.hs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pandoc.hs b/pandoc.hs index a16ffbc3c..8eed67544 100644 --- a/pandoc.hs +++ b/pandoc.hs @@ -91,14 +91,14 @@ isTextFormat :: String -> Bool isTextFormat s = takeWhile (`notElem` "+-") s `notElem` ["odt","docx","epub","epub3"] externalFilter :: FilePath -> [String] -> Pandoc -> IO Pandoc -externalFilter f args' d = E.handle filterException $ - do (exitcode, outbs, errbs) <- pipeProcess Nothing f args' $ encode d +externalFilter f args' d = do + (exitcode, outbs, errbs) <- E.handle filterException $ + pipeProcess Nothing f args' $ encode d when (not $ B.null errbs) $ B.hPutStr stderr errbs case exitcode of ExitSuccess -> return $ either error id $ eitherDecode' outbs - ExitFailure _ -> err 83 $ "Error running filter " ++ f ++ "\n" ++ - UTF8.toStringLazy outbs - where filterException :: E.SomeException -> IO Pandoc + ExitFailure _ -> err 83 $ "Error running filter " ++ f + where filterException :: E.SomeException -> IO a filterException e = err 83 $ "Error running filter " ++ f ++ "\n" ++ show e -- cgit v1.2.3 From 74250b6c351180cb350150b8069824111193b913 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 24 Aug 2013 16:10:13 -0700 Subject: Moved most of Text.Pandoc.Readers.TeXMath to texmath 0.6.4. --- pandoc.cabal | 2 +- src/Text/Pandoc/Readers/TeXMath.hs | 84 +------------------------------------- 2 files changed, 3 insertions(+), 83 deletions(-) diff --git a/pandoc.cabal b/pandoc.cabal index e22908918..ac28ad068 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -246,7 +246,7 @@ Library old-locale >= 1 && < 1.1, time >= 1.2 && < 1.5, HTTP >= 4000.0.5 && < 4000.3, - texmath >= 0.6.3 && < 0.7, + texmath >= 0.6.4 && < 0.7, xml >= 1.3.12 && < 1.4, random >= 1 && < 1.1, extensible-exceptions >= 0.1 && < 0.2, diff --git a/src/Text/Pandoc/Readers/TeXMath.hs b/src/Text/Pandoc/Readers/TeXMath.hs index fe49a992e..1f7088f72 100644 --- a/src/Text/Pandoc/Readers/TeXMath.hs +++ b/src/Text/Pandoc/Readers/TeXMath.hs @@ -30,93 +30,13 @@ Conversion of TeX math to a list of 'Pandoc' inline elements. module Text.Pandoc.Readers.TeXMath ( readTeXMath ) where import Text.Pandoc.Definition -import Text.TeXMath.Types -import Text.TeXMath.Parser +import Text.TeXMath -- | Converts a raw TeX math formula to a list of 'Pandoc' inlines. -- Defaults to raw formula between @$@ characters if entire formula -- can't be converted. readTeXMath :: String -- ^ String to parse (assumes @'\n'@ line endings) -> [Inline] -readTeXMath inp = case texMathToPandoc inp of +readTeXMath inp = case texMathToPandoc DisplayInline inp of Left _ -> [Str ("$" ++ inp ++ "$")] Right res -> res - -texMathToPandoc :: String -> Either String [Inline] -texMathToPandoc inp = inp `seq` - case parseFormula inp of - Left err -> Left err - Right exps -> case expsToInlines exps of - Nothing -> Left "Formula too complex for [Inline]" - Just r -> Right r - -expsToInlines :: [Exp] -> Maybe [Inline] -expsToInlines xs = do - res <- mapM expToInlines xs - return (concat res) - -expToInlines :: Exp -> Maybe [Inline] -expToInlines (ENumber s) = Just [Str s] -expToInlines (EIdentifier s) = Just [Emph [Str s]] -expToInlines (EMathOperator s) = Just [Str s] -expToInlines (ESymbol t s) = Just $ addSpace t (Str s) - where addSpace Op x = [x, thinspace] - addSpace Bin x = [medspace, x, medspace] - addSpace Rel x = [widespace, x, widespace] - addSpace Pun x = [x, thinspace] - addSpace _ x = [x] - thinspace = Str "\x2006" - medspace = Str "\x2005" - widespace = Str "\x2004" -expToInlines (EStretchy x) = expToInlines x -expToInlines (EDelimited start end xs) = do - xs' <- mapM expToInlines xs - return $ [Str start] ++ concat xs' ++ [Str end] -expToInlines (EGrouped xs) = expsToInlines xs -expToInlines (ESpace "0.167em") = Just [Str "\x2009"] -expToInlines (ESpace "0.222em") = Just [Str "\x2005"] -expToInlines (ESpace "0.278em") = Just [Str "\x2004"] -expToInlines (ESpace "0.333em") = Just [Str "\x2004"] -expToInlines (ESpace "1em") = Just [Str "\x2001"] -expToInlines (ESpace "2em") = Just [Str "\x2001\x2001"] -expToInlines (ESpace _) = Just [Str " "] -expToInlines (EBinary _ _ _) = Nothing -expToInlines (ESub x y) = do - x' <- expToInlines x - y' <- expToInlines y - return $ x' ++ [Subscript y'] -expToInlines (ESuper x y) = do - x' <- expToInlines x - y' <- expToInlines y - return $ x' ++ [Superscript y'] -expToInlines (ESubsup x y z) = do - x' <- expToInlines x - y' <- expToInlines y - z' <- expToInlines z - return $ x' ++ [Subscript y'] ++ [Superscript z'] -expToInlines (EDown x y) = expToInlines (ESub x y) -expToInlines (EUp x y) = expToInlines (ESuper x y) -expToInlines (EDownup x y z) = expToInlines (ESubsup x y z) -expToInlines (EText TextNormal x) = Just [Str x] -expToInlines (EText TextBold x) = Just [Strong [Str x]] -expToInlines (EText TextMonospace x) = Just [Code nullAttr x] -expToInlines (EText TextItalic x) = Just [Emph [Str x]] -expToInlines (EText _ x) = Just [Str x] -expToInlines (EOver (EGrouped [EIdentifier [c]]) (ESymbol Accent [accent])) = - case accent of - '\x203E' -> Just [Emph [Str [c,'\x0304']]] -- bar - '\x00B4' -> Just [Emph [Str [c,'\x0301']]] -- acute - '\x0060' -> Just [Emph [Str [c,'\x0300']]] -- grave - '\x02D8' -> Just [Emph [Str [c,'\x0306']]] -- breve - '\x02C7' -> Just [Emph [Str [c,'\x030C']]] -- check - '.' -> Just [Emph [Str [c,'\x0307']]] -- dot - '\x00B0' -> Just [Emph [Str [c,'\x030A']]] -- ring - '\x20D7' -> Just [Emph [Str [c,'\x20D7']]] -- arrow right - '\x20D6' -> Just [Emph [Str [c,'\x20D6']]] -- arrow left - '\x005E' -> Just [Emph [Str [c,'\x0302']]] -- hat - '\x0302' -> Just [Emph [Str [c,'\x0302']]] -- hat - '~' -> Just [Emph [Str [c,'\x0303']]] -- tilde - _ -> Nothing -expToInlines _ = Nothing - - -- cgit v1.2.3 From deb59b62354e38df9c85ce6985e5c28dd2301ee7 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 24 Aug 2013 22:27:08 -0700 Subject: Removed dependency on citeproc-hs. Going forward we'll use pandoc-citeproc, as an external filter. The `--bibliography`, `--csl`, and `--citation-abbreviation` fields have been removed. Instead one must include `bibliography`, `csl`, or `csl-abbrevs` fields in the document's YAML metadata. The filter can then be used as follows: pandoc --filter pandoc-citeproc The `Text.Pandoc.Biblio` module has been removed. Henceforth, `Text.CSL.Pandoc` from pandoc-citations can be used by library users. The Markdown and LaTeX readers now longer format bibliographies and citations. That must be done using `processCites` or `processCites'` from Text.CSL.Pandoc. All bibliography-related fields have been removed from `ReaderOptions` and `WriterOptions`: `writerBiblioFiles`, `readerReferences`, `readerCitationStyle`. API change. --- README | 105 ++++----- data/default.csl | 458 ------------------------------------ pandoc.cabal | 7 +- pandoc.hs | 68 +----- src/Text/Pandoc/Biblio.hs | 216 ----------------- src/Text/Pandoc/Options.hs | 7 - src/Text/Pandoc/Readers/LaTeX.hs | 5 +- src/Text/Pandoc/Readers/Markdown.hs | 5 +- src/Text/Pandoc/Writers/LaTeX.hs | 8 +- tests/Tests/Old.hs | 14 -- 10 files changed, 50 insertions(+), 843 deletions(-) delete mode 100644 data/default.csl delete mode 100644 src/Text/Pandoc/Biblio.hs diff --git a/README b/README index 7a2b01f49..f85e62e14 100644 --- a/README +++ b/README @@ -598,54 +598,6 @@ Options affecting specific writers Citation rendering ------------------ -`--bibliography=`*FILE* -: Specify bibliography database to be used in resolving - citations. The database type will be determined from the - extension of *FILE*, which may be `.mods` (MODS format), - `.bib` (BibLaTeX format, which will normally work for BibTeX - files as well), `.bibtex` (BibTeX format), - `.ris` (RIS format), `.enl` (EndNote format), - `.xml` (EndNote XML format), `.wos` (ISI format), - `.medline` (MEDLINE format), `.copac` (Copac format), - or `.json` (citeproc JSON). If you want to use multiple - bibliographies, just use this option repeatedly. - -`--csl=`*FILE* -: Specify [CSL] style to be used in formatting citations and - the bibliography. If *FILE* is not found, pandoc will look - for it in - - $HOME/.csl - - in unix, - - C:\Documents And Settings\USERNAME\Application Data\csl - - in Windows XP, and - - C:\Users\USERNAME\AppData\Roaming\csl - - in Windows 7. If the `--csl` option is not specified, pandoc - will use a default style: either `default.csl` in the - user data directory (see `--data-dir`), or, if that is - not present, the Chicago author-date style. - -`--citation-abbreviations=`*FILE* -: Specify a file containing abbreviations for journal titles and - other bibliographic fields (indicated by setting `form="short"` - in the CSL node for the field). The format is described at - . - Here is a short example: - - { "default": { - "container-title": { - "Lloyd's Law Reports": "Lloyd's Rep", - "Estates Gazette": "EG", - "Scots Law Times": "SLT" - } - } - } - `--natbib` : Use natbib for citations in LaTeX output. @@ -2378,9 +2330,14 @@ Citations **Extension: `citations`** -Pandoc can automatically generate citations and a bibliography in a number of -styles (using Andrea Rossato's `hs-citeproc`). In order to use this feature, -you will need a bibliographic database in one of the following formats: +Using an external filter, `pandoc-citeproc`, pandoc can automatically generate +citations and a bibliography in a number of styles. Basic usage is + + pandoc --filter pandoc-citeproc myinput.txt + +In order to use this feature, you will need to specify a bibliography file +using the `bibliography` metadata field in a YAML metadata section. +The bibliography may have any of these formats: Format File extension ------------ -------------- @@ -2398,18 +2355,40 @@ you will need a bibliographic database in one of the following formats: Note that `.bib` can generally be used with both BibTeX and BibLaTeX files, but you can use `.bibtex` to force BibTeX. -You will need to specify the bibliography file using the `--bibliography` -command-line option (which may be repeated if you have several -bibliographies). - -By default, pandoc will use a Chicago author-date format for citations -and references. To use another style, you will need to use the -`--csl` option to specify a [CSL] 1.0 style file. A primer on -creating and modifying CSL styles can be found at -. -A repository of CSL styles can be found at -. -See also for easy browsing. +Alternatively you can use a `references` field in the document's YAML +metadata. This should include an array of YAML-encoded references, +for example: + + --- + references: + - id: fenner2012a + title: One-click science marketing + author: + - family: Fenner + given: Martin + container-title: Nature Materials + volume: 11 + URL: 'http://dx.doi.org/10.1038/nmat3283' + DOI: 10.1038/nmat3283 + issue: 4 + publisher: Nature Publishing Group + page: 261-263 + type: article-journal + issued: + year: 2012 + month: 3 + ... + +(The program `mods2yaml`, which comes with `pandoc-citeproc`, can help produce +these from a MODS reference collection.) + +By default, `pandoc-citeproc` will use a Chicago author-date format for +citations and references. To use another style, you will need to specify +a [CSL] 1.0 style file in the `csl` metadata field. A primer on creating and +modifying CSL styles can be found at +. A repository of CSL styles +can be found at . See also + for easy browsing. Citations go inside square brackets and are separated by semicolons. Each citation must have a key, composed of '@' + the citation diff --git a/data/default.csl b/data/default.csl deleted file mode 100644 index 83a70d0b5..000000000 --- a/data/default.csl +++ /dev/null @@ -1,458 +0,0 @@ - - diff --git a/pandoc.cabal b/pandoc.cabal index ac28ad068..0ab990a17 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -99,8 +99,6 @@ Data-Files: data/slideous/slideous.js, -- data for dzslides writer data/dzslides/template.html, - -- data for citeproc - data/default.csl, -- sample lua custom writer data/sample.lua -- documentation @@ -250,7 +248,6 @@ Library xml >= 1.3.12 && < 1.4, random >= 1 && < 1.1, extensible-exceptions >= 0.1 && < 0.2, - citeproc-hs >= 0.3.7 && < 0.4, pandoc-types >= 1.12 && < 1.13, aeson >= 0.6 && < 0.7, tagsoup >= 0.12.5 && < 0.14, @@ -323,7 +320,6 @@ Library Text.Pandoc.UTF8, Text.Pandoc.Templates, Text.Pandoc.XML, - Text.Pandoc.Biblio, Text.Pandoc.SelfContained, Text.Pandoc.Process Other-Modules: Text.Pandoc.Readers.Haddock.Lex, @@ -353,8 +349,7 @@ Executable pandoc extensible-exceptions >= 0.1 && < 0.2, highlighting-kate >= 0.5.5 && < 0.6, aeson >= 0.6 && < 0.7, - HTTP >= 4000.0.5 && < 4000.3, - citeproc-hs >= 0.3.7 && < 0.4 + HTTP >= 4000.0.5 && < 4000.3 Ghc-Options: -rtsopts -with-rtsopts=-K16m -Wall -fno-warn-unused-do-bind Ghc-Prof-Options: -auto-all -caf-all -rtsopts -with-rtsopts=-K16m if os(windows) diff --git a/pandoc.hs b/pandoc.hs index 8eed67544..6ad5694f1 100644 --- a/pandoc.hs +++ b/pandoc.hs @@ -35,7 +35,7 @@ import Text.Pandoc.PDF (makePDF) import Text.Pandoc.Readers.LaTeX (handleIncludes) import Text.Pandoc.Shared ( tabFilter, readDataFileUTF8, readDataFile, safeRead, headerShift, normalize, err, warn ) -import Text.Pandoc.XML ( toEntities, fromEntities ) +import Text.Pandoc.XML ( toEntities ) import Text.Pandoc.SelfContained ( makeSelfContained ) import Text.Pandoc.Process (pipeProcess) import Text.Highlighting.Kate ( languages, Style, tango, pygments, @@ -46,20 +46,18 @@ import System.FilePath import System.Console.GetOpt import Data.Char ( toLower ) import Data.List ( intercalate, isPrefixOf, sort ) -import System.Directory ( getAppUserDataDirectory, doesFileExist, findExecutable ) +import System.Directory ( getAppUserDataDirectory, findExecutable ) import System.IO ( stdout, stderr ) import System.IO.Error ( isDoesNotExistError ) import qualified Control.Exception as E import Control.Exception.Extensible ( throwIO ) import qualified Text.Pandoc.UTF8 as UTF8 -import qualified Text.CSL as CSL import Control.Monad (when, unless, liftM) import Data.Foldable (foldrM) import Network.HTTP (simpleHTTP, mkRequest, getResponseBody, RequestMethod(..)) import Network.URI (parseURI, isURI, URI(..)) import qualified Data.ByteString.Lazy as B import qualified Data.ByteString as BS -import Text.CSL.Reference (Reference(..)) import Data.Aeson (eitherDecode', encode) copyrightMessage :: String @@ -70,7 +68,7 @@ copyrightMessage = "\nCopyright (C) 2006-2013 John MacFarlane\n" ++ compileInfo :: String compileInfo = - "\nCompiled with citeproc-hs " ++ VERSION_citeproc_hs ++ ", texmath " ++ + "\nCompiled with texmath " ++ VERSION_texmath ++ ", highlighting-kate " ++ VERSION_highlighting_kate ++ ".\nSyntax highlighting is supported for the following languages:\n " ++ wrapWords 4 78 @@ -146,9 +144,6 @@ data Opt = Opt , optIndentedCodeClasses :: [String] -- ^ Default classes for indented code blocks , optDataDir :: Maybe FilePath , optCiteMethod :: CiteMethod -- ^ Method to output cites - , optBibliography :: [String] - , optCslFile :: Maybe FilePath - , optAbbrevsFile :: Maybe FilePath , optListings :: Bool -- ^ Use listings package for code blocks , optLaTeXEngine :: String -- ^ Program to use for latex -> pdf , optSlideLevel :: Maybe Int -- ^ Header level that creates slides @@ -203,9 +198,6 @@ defaultOpts = Opt , optIndentedCodeClasses = [] , optDataDir = Nothing , optCiteMethod = Citeproc - , optBibliography = [] - , optCslFile = Nothing - , optAbbrevsFile = Nothing , optListings = False , optLaTeXEngine = "pdflatex" , optSlideLevel = Nothing @@ -650,24 +642,6 @@ options = "PROGRAM") "" -- "Name of latex program to use in generating PDF" - , Option "" ["bibliography"] - (ReqArg - (\arg opt -> return opt { optBibliography = (optBibliography opt) ++ [arg] }) - "FILENAME") - "" - - , Option "" ["csl"] - (ReqArg - (\arg opt -> return opt { optCslFile = Just arg }) - "FILENAME") - "" - - , Option "" ["citation-abbreviations"] - (ReqArg - (\arg opt -> return opt { optAbbrevsFile = Just arg }) - "FILENAME") - "" - , Option "" ["natbib"] (NoArg (\opt -> return opt { optCiteMethod = Natbib })) @@ -904,9 +878,6 @@ main = do , optIdentifierPrefix = idPrefix , optIndentedCodeClasses = codeBlockClasses , optDataDir = mbDataDir - , optBibliography = reffiles - , optCslFile = mbCsl - , optAbbrevsFile = cslabbrevs , optCiteMethod = citeMethod , optListings = listings , optLaTeXEngine = latexEngine @@ -1007,36 +978,6 @@ main = do $ lines dztempl return $ ("dzslides-core", dzcore) : variables' else return variables' - - -- unescape reference ids, which may contain XML entities, so - -- that we can do lookups with regular string equality - let unescapeRefId ref = ref{ refId = fromEntities (refId ref) } - - refs <- mapM (\f -> E.catch (CSL.readBiblioFile f) - (\e -> let _ = (e :: E.SomeException) - in err 23 $ "Error reading bibliography `" ++ f ++ - "'" ++ "\n" ++ show e)) - reffiles >>= - return . map unescapeRefId . concat - - mbsty <- if citeMethod == Citeproc && not (null refs) - then do - csl <- CSL.parseCSL =<< - case mbCsl of - Nothing -> readDataFileUTF8 datadir - "default.csl" - Just cslfile -> do - exists <- doesFileExist cslfile - if exists - then UTF8.readFile cslfile - else do - csldir <- getAppUserDataDirectory "csl" - readDataFileUTF8 (Just csldir) - (replaceExtension cslfile "csl") - abbrevs <- maybe (return []) CSL.readJsonAbbrevFile cslabbrevs - return $ Just csl { CSL.styleAbbrevs = abbrevs } - else return Nothing - let sourceURL = case sources of [] -> Nothing (x:_) -> case parseURI x of @@ -1054,8 +995,6 @@ main = do , readerColumns = columns , readerTabStop = tabStop , readerOldDashes = oldDashes - , readerReferences = refs - , readerCitationStyle = mbsty , readerIndentedCodeClasses = codeBlockClasses , readerApplyMacros = not laTeXOutput , readerDefaultImageExtension = defaultImageExtension @@ -1069,7 +1008,6 @@ main = do writerHTMLMathMethod = mathMethod, writerIncremental = incremental, writerCiteMethod = citeMethod, - writerBiblioFiles = reffiles, writerIgnoreNotes = False, writerNumberSections = numberSections, writerNumberOffset = numberFrom, diff --git a/src/Text/Pandoc/Biblio.hs b/src/Text/Pandoc/Biblio.hs deleted file mode 100644 index 1c0975f11..000000000 --- a/src/Text/Pandoc/Biblio.hs +++ /dev/null @@ -1,216 +0,0 @@ -{-# LANGUAGE PatternGuards #-} -{- -Copyright (C) 2008 Andrea Rossato - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Biblio - Copyright : Copyright (C) 2008-2010 Andrea Rossato - License : GNU GPL, version 2 or above - - Maintainer : Andrea Rossato - Stability : alpha - Portability : portable --} - -module Text.Pandoc.Biblio ( processBiblio ) where - -import Data.List -import Data.Char ( isDigit, isPunctuation ) -import qualified Data.Map as M -import Text.CSL hiding ( Cite(..), Citation(..), endWithPunct ) -import qualified Text.CSL as CSL ( Cite(..) ) -import Text.Pandoc.Definition -import Text.Pandoc.Generic -import Text.Pandoc.Walk -import Text.Pandoc.Shared (stringify) -import Text.Parsec hiding (State) -import Control.Monad -import Control.Monad.State - --- | Process a 'Pandoc' document by adding citations formatted --- according to a CSL style, using 'citeproc' from citeproc-hs. -processBiblio :: Maybe Style -> [Reference] -> Pandoc -> Pandoc -processBiblio Nothing _ p = p -processBiblio _ [] p = p -processBiblio (Just style) r p = - let p' = evalState (bottomUpM setHash p) 1 - grps = query getCitation p' - result = citeproc procOpts style r (setNearNote style $ - map (map toCslCite) grps) - cits_map = M.fromList $ zip grps (citations result) - biblioList = map (renderPandoc' style) (bibliography result) - Pandoc m b = bottomUp mvPunct . deNote . topDown (processCite style cits_map) $ p' - (bs, lastb) = case reverse b of - x@(Header _ _ _) : xs -> (reverse xs, [x]) - _ -> (b, []) - in Pandoc m $ bs ++ [Div ("",["references"],[]) (lastb ++ biblioList)] - --- | Substitute 'Cite' elements with formatted citations. -processCite :: Style -> M.Map [Citation] [FormattedOutput] -> Inline -> Inline -processCite s cs (Cite t _) = - case M.lookup t cs of - Just (x:xs) - | isTextualCitation t && not (null xs) -> - let xs' = renderPandoc s xs - in if styleClass s == "note" - then Cite t (renderPandoc s [x] ++ [Note [Para xs']]) - else Cite t (renderPandoc s [x] ++ [Space | not (startWithPunct xs')] ++ xs') - | otherwise -> if styleClass s == "note" - then Cite t [Note [Para $ renderPandoc s (x:xs)]] - else Cite t (renderPandoc s (x:xs)) - _ -> Strong [Str "???"] -- TODO raise error instead? -processCite _ _ x = x - -isNote :: Inline -> Bool -isNote (Note _) = True -isNote (Cite _ [Note _]) = True -isNote _ = False - -mvPunct :: [Inline] -> [Inline] -mvPunct (Space : Space : xs) = Space : xs -mvPunct (Space : x : ys) | isNote x, startWithPunct ys = - Str (headInline ys) : x : tailFirstInlineStr ys -mvPunct (Space : x : ys) | isNote x = x : ys -mvPunct xs = xs - --- A replacement for citeproc-hs's endWithPunct, which wrongly treats --- a sentence ending in '.)' as not ending with punctuation, leading --- to an extra period. -endWithPunct :: [Inline] -> Bool -endWithPunct [] = True -endWithPunct xs@(_:_) = case reverse (stringify [last xs]) of - [] -> True - (')':c:_) | isEndPunct c -> True - (c:_) | isEndPunct c -> True - | otherwise -> False - where isEndPunct c = c `elem` ".,;:!?" - -deNote :: Pandoc -> Pandoc -deNote = topDown go - where go (Cite (c:cs) [Note xs]) = - Cite (c:cs) [Note $ bottomUp go' $ sanitize c xs] - go (Note xs) = Note $ bottomUp go' xs - go x = x - go' (Note [Para xs]:ys) = - if startWithPunct ys && endWithPunct xs - then initInline xs ++ ys - else xs ++ ys - go' xs = xs - sanitize :: Citation -> [Block] -> [Block] - sanitize Citation{citationPrefix = pref} [Para xs] = - case (null pref, endWithPunct xs) of - (True, False) -> [Para $ xs ++ [Str "."]] - (True, True) -> [Para xs] - (False, False) -> [Para $ toCapital $ xs ++ [Str "."]] - (False, True) -> [Para $ toCapital xs] - sanitize _ bs = bs - -isTextualCitation :: [Citation] -> Bool -isTextualCitation (c:_) = citationMode c == AuthorInText -isTextualCitation _ = False - --- | Retrieve all citations from a 'Pandoc' docuument. To be used with --- 'query'. -getCitation :: Inline -> [[Citation]] -getCitation i | Cite t _ <- i = [t] - | otherwise = [] - -setHash :: Citation -> State Int Citation -setHash c = do - ident <- get - put $ ident + 1 - return c{ citationHash = ident } - -toCslCite :: Citation -> CSL.Cite -toCslCite c - = let (l, s) = locatorWords $ citationSuffix c - (la,lo) = parseLocator l - s' = case (l,s) of - -- treat a bare locator as if it begins with space - -- so @item1 [blah] is like [@item1, blah] - ("",(x:_)) - | not (isPunct x) -> [Space] ++ s - _ -> s - isPunct (Str (x:_)) = isPunctuation x - isPunct _ = False - citMode = case citationMode c of - AuthorInText -> (True, False) - SuppressAuthor -> (False,True ) - NormalCitation -> (False,False) - in emptyCite { CSL.citeId = citationId c - , CSL.citePrefix = PandocText $ citationPrefix c - , CSL.citeSuffix = PandocText s' - , CSL.citeLabel = la - , CSL.citeLocator = lo - , CSL.citeNoteNumber = show $ citationNoteNum c - , CSL.authorInText = fst citMode - , CSL.suppressAuthor = snd citMode - , CSL.citeHash = citationHash c - } - -locatorWords :: [Inline] -> (String, [Inline]) -locatorWords inp = - case parse pLocatorWords "suffix" $ breakup inp of - Right r -> r - Left _ -> ("",inp) - where breakup [] = [] - breakup (Str x : xs) = map Str (splitup x) ++ breakup xs - breakup (x : xs) = x : breakup xs - splitup = groupBy (\x y -> x /= '\160' && y /= '\160') - -pLocatorWords :: Parsec [Inline] st (String, [Inline]) -pLocatorWords = do - l <- pLocator - s <- getInput -- rest is suffix - if length l > 0 && last l == ',' - then return (init l, Str "," : s) - else return (l, s) - -pMatch :: (Inline -> Bool) -> Parsec [Inline] st Inline -pMatch condition = try $ do - t <- anyToken - guard $ condition t - return t - -pSpace :: Parsec [Inline] st Inline -pSpace = pMatch (\t -> t == Space || t == Str "\160") - -pLocator :: Parsec [Inline] st String -pLocator = try $ do - optional $ pMatch (== Str ",") - optional pSpace - f <- (guardFollowingDigit >> return [Str "p"]) -- "page" the default - <|> many1 (notFollowedBy pSpace >> anyToken) - gs <- many1 pWordWithDigits - return $ stringify f ++ (' ' : unwords gs) - -guardFollowingDigit :: Parsec [Inline] st () -guardFollowingDigit = do - t <- lookAhead anyToken - case t of - Str (d:_) | isDigit d -> return () - _ -> mzero - -pWordWithDigits :: Parsec [Inline] st String -pWordWithDigits = try $ do - optional pSpace - r <- many1 (notFollowedBy pSpace >> anyToken) - let s = stringify r - guard $ any isDigit s - return s - diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs index c7c37d6b8..48e418ab2 100644 --- a/src/Text/Pandoc/Options.hs +++ b/src/Text/Pandoc/Options.hs @@ -48,7 +48,6 @@ import Data.Set (Set) import qualified Data.Set as Set import Data.Default import Text.Pandoc.Highlighting (Style, pygments) -import qualified Text.CSL as CSL -- | Individually selectable syntax extensions. data Extension = @@ -205,8 +204,6 @@ data ReaderOptions = ReaderOptions{ , readerOldDashes :: Bool -- ^ Use pandoc <= 1.8.2.1 behavior -- in parsing dashes; -- is em-dash; -- - before numerial is en-dash - , readerReferences :: [CSL.Reference] -- ^ Bibliographic references - , readerCitationStyle :: Maybe CSL.Style -- ^ Citation style , readerApplyMacros :: Bool -- ^ Apply macros to TeX math , readerIndentedCodeClasses :: [String] -- ^ Default classes for -- indented code blocks @@ -223,8 +220,6 @@ instance Default ReaderOptions , readerColumns = 80 , readerTabStop = 4 , readerOldDashes = False - , readerReferences = [] - , readerCitationStyle = Nothing , readerApplyMacros = True , readerIndentedCodeClasses = [] , readerDefaultImageExtension = "" @@ -289,7 +284,6 @@ data WriterOptions = WriterOptions , writerSourceURL :: Maybe String -- ^ Absolute URL + directory of 1st source file , writerUserDataDir :: Maybe FilePath -- ^ Path of user data directory , writerCiteMethod :: CiteMethod -- ^ How to print cites - , writerBiblioFiles :: [FilePath] -- ^ Biblio files to use for citations , writerHtml5 :: Bool -- ^ Produce HTML5 , writerHtmlQTags :: Bool -- ^ Use @@ tags for quotes in HTML , writerBeamer :: Bool -- ^ Produce beamer LaTeX slide show @@ -332,7 +326,6 @@ instance Default WriterOptions where , writerSourceURL = Nothing , writerUserDataDir = Nothing , writerCiteMethod = Citeproc - , writerBiblioFiles = [] , writerHtml5 = False , writerHtmlQTags = False , writerBeamer = False diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index ded57df5a..e558ed1b9 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -38,7 +38,6 @@ import Text.Pandoc.Definition import Text.Pandoc.Walk import Text.Pandoc.Shared import Text.Pandoc.Options -import Text.Pandoc.Biblio (processBiblio) import Text.Pandoc.Parsing hiding ((<|>), many, optional, space) import qualified Text.Pandoc.UTF8 as UTF8 import Data.Char ( chr, ord ) @@ -67,9 +66,7 @@ parseLaTeX = do eof st <- getState let meta = stateMeta st - refs <- getOption readerReferences - mbsty <- getOption readerCitationStyle - let (Pandoc _ bs') = processBiblio mbsty refs $ doc bs + let (Pandoc _ bs') = doc bs return $ Pandoc meta bs' type LP = Parser [Char] ParserState diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 05662d9b5..658335202 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -54,7 +54,6 @@ import Text.Pandoc.Parsing hiding (tableWith) import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock ) import Text.Pandoc.Readers.HTML ( htmlTag, htmlInBalanced, isInlineTag, isBlockTag, isTextTag, isCommentTag ) -import Text.Pandoc.Biblio (processBiblio) import Data.Monoid (mconcat, mempty) import Control.Applicative ((<$>), (<*), (*>), (<$)) import Control.Monad @@ -327,9 +326,7 @@ parseMarkdown = do st <- getState let meta = runF (stateMeta' st) st let Pandoc _ bs = B.doc $ runF blocks st - mbsty <- getOption readerCitationStyle - refs <- getOption readerReferences - return $ processBiblio mbsty refs $ Pandoc meta bs + return $ Pandoc meta bs addWarning :: Maybe SourcePos -> String -> MarkdownParser () addWarning mbpos msg = diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index ab579a326..6a781ddec 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -43,7 +43,6 @@ import Data.Char ( toLower, isPunctuation ) import Control.Applicative ((<|>)) import Control.Monad.State import Text.Pandoc.Pretty -import System.FilePath (dropExtension) import Text.Pandoc.Slides import Text.Pandoc.Highlighting (highlight, styleToLaTeX, formatLaTeXInline, formatLaTeXBlock, @@ -120,7 +119,6 @@ pandocToLaTeX options (Pandoc meta blocks) = do (biblioTitle :: String) <- liftM (render colwidth) $ inlineListToLaTeX lastHeader let main = render colwidth $ vsep body st <- get - let biblioFiles = intercalate "," $ map dropExtension $ writerBiblioFiles options let context = defField "toc" (writerTableOfContents options) $ defField "toc-depth" (show (writerTOCDepth options - if writerChapters options @@ -152,11 +150,9 @@ pandocToLaTeX options (Pandoc meta blocks) = do $ writerHighlightStyle options ) else id) $ (case writerCiteMethod options of - Natbib -> defField "biblio-files" biblioFiles . - defField "biblio-title" biblioTitle . + Natbib -> defField "biblio-title" biblioTitle . defField "natbib" True - Biblatex -> defField "biblio-files" biblioFiles . - defField "biblio-title" biblioTitle . + Biblatex -> defField "biblio-title" biblioTitle . defField "biblatex" True _ -> id) $ metadata diff --git a/tests/Tests/Old.hs b/tests/Tests/Old.hs index 8609781d0..5054559a1 100644 --- a/tests/Tests/Old.hs +++ b/tests/Tests/Old.hs @@ -63,7 +63,6 @@ tests = [ testGroup "markdown" "markdown-reader-more.txt" "markdown-reader-more.native" , lhsReaderTest "markdown+lhs" ] - , testGroup "citations" markdownCitationTests ] , testGroup "rst" [ testGroup "writer" (writerTests "rst" ++ lhsWriterTests "rst") @@ -190,19 +189,6 @@ fb2WriterTest title opts inputfile normfile = ignoreBinary = unlines . filter (not . startsWith " [String] -- ^ Options to pass to pandoc -- cgit v1.2.3 From 2f156eeb3011936988d6542146e416a21769c831 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 24 Aug 2013 22:42:40 -0700 Subject: Updated travis build. --- .travis.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 1216994db..487e3cb98 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,7 +2,6 @@ language: haskell before_install: - cabal install cabal-dev - 'git clone https://github.com/jgm/pandoc-types && cabal-dev add-source pandoc-types' - - 'git clone https://github.com/jgm/citeproc-hs && cabal-dev add-source citeproc-hs' install: - cabal-dev install-deps --enable-tests script: -- cgit v1.2.3 From af898dd44f71f1a512fa95d510149e31655817d7 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 24 Aug 2013 22:43:22 -0700 Subject: Removed citeproc-hs from osx, windows build process. --- make_osx_package.sh | 1 - windows/make-windows-installer.bat | 1 - 2 files changed, 2 deletions(-) diff --git a/make_osx_package.sh b/make_osx_package.sh index dcf06fad4..8b268f7d7 100755 --- a/make_osx_package.sh +++ b/make_osx_package.sh @@ -16,7 +16,6 @@ mkdir -p $RESOURCES echo Building pandoc... cabal-dev install-deps -cabal-dev install --reinstall --force-reinstalls --flags="embed_data_files" citeproc-hs cabal-dev configure --prefix=/usr/local --datasubdir=$BASE --docdir=/usr/local/doc/$BASE cabal-dev build cabal-dev copy --destdir=$ROOT diff --git a/windows/make-windows-installer.bat b/windows/make-windows-installer.bat index 12d873b70..998da6775 100644 --- a/windows/make-windows-installer.bat +++ b/windows/make-windows-installer.bat @@ -2,7 +2,6 @@ cd .. rem cabal update cabal-dev clean -cabal-dev install --reinstall --force-reinstall --flags="embed_data_files" citeproc-hs if %errorlevel% neq 0 exit /b %errorlevel% cabal-dev install --reinstall --force-reinstall --flags="embed_data_files" if %errorlevel% neq 0 exit /b %errorlevel% -- cgit v1.2.3 From 152d2919ab804a6b0de313a987f207bc9501d98d Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 25 Aug 2013 07:41:37 -0700 Subject: Removed tests relating to citation processing. --- pandoc.cabal | 7 - tests/biblio.bib | 26 -- tests/chicago-author-date.csl | 458 ----------------------- tests/ieee.csl | 302 --------------- tests/markdown-citations.chicago-author-date.txt | 51 --- tests/markdown-citations.ieee.txt | 51 --- tests/markdown-citations.mhra.txt | 73 ---- tests/mhra.csl | 399 -------------------- 8 files changed, 1367 deletions(-) delete mode 100644 tests/biblio.bib delete mode 100644 tests/chicago-author-date.csl delete mode 100644 tests/ieee.csl delete mode 100644 tests/markdown-citations.chicago-author-date.txt delete mode 100644 tests/markdown-citations.ieee.txt delete mode 100644 tests/markdown-citations.mhra.txt delete mode 100644 tests/mhra.csl diff --git a/pandoc.cabal b/pandoc.cabal index 0ab990a17..818f36f48 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -123,10 +123,6 @@ Extra-Source-Files: tests/insert, tests/lalune.jpg, tests/movie.jpg, - tests/biblio.bib, - tests/chicago-author-date.csl, - tests/ieee.csl, - tests/mhra.csl, tests/latex-reader.latex, tests/latex-reader.native, tests/textile-reader.textile, @@ -134,9 +130,6 @@ Extra-Source-Files: tests/markdown-reader-more.txt, tests/markdown-reader-more.native, tests/markdown-citations.txt, - tests/markdown-citations.chicago-author-date.txt, - tests/markdown-citations.mhra.txt, - tests/markdown-citations.ieee.txt, tests/textile-reader.textile, tests/mediawiki-reader.wiki, tests/mediawiki-reader.native, diff --git a/tests/biblio.bib b/tests/biblio.bib deleted file mode 100644 index 4eb2ba0d0..000000000 --- a/tests/biblio.bib +++ /dev/null @@ -1,26 +0,0 @@ -@Book{item1, -author="John Doe", -title="First Book", -year="2005", -address="Cambridge", -publisher="Cambridge University Press" -} - -@Article{item2, -author="John Doe", -title="Article", -year="2006", -journal="Journal of Generic Studies", -volume="6", -pages="33-34" -} - -@InCollection{пункт3, -author="John Doe and Jenny Roe", -title="Why Water Is Wet", -booktitle="Third Book", -editor="Sam Smith", -publisher="Oxford University Press", -address="Oxford", -year="2007" -} diff --git a/tests/chicago-author-date.csl b/tests/chicago-author-date.csl deleted file mode 100644 index 83a70d0b5..000000000 --- a/tests/chicago-author-date.csl +++ /dev/null @@ -1,458 +0,0 @@ - - diff --git a/tests/ieee.csl b/tests/ieee.csl deleted file mode 100644 index cd7ba4943..000000000 --- a/tests/ieee.csl +++ /dev/null @@ -1,302 +0,0 @@ - - \ No newline at end of file diff --git a/tests/markdown-citations.chicago-author-date.txt b/tests/markdown-citations.chicago-author-date.txt deleted file mode 100644 index 81d7482cb..000000000 --- a/tests/markdown-citations.chicago-author-date.txt +++ /dev/null @@ -1,51 +0,0 @@ -Pandoc with citeproc-hs -======================= - -- ([CSL BIBLIOGRAPHIC DATA ERROR: reference "nonexistent" not found.]) - -- ([CSL BIBLIOGRAPHIC DATA ERROR: reference "nonexistent" not found.]) - -- Doe (2005) says blah. - -- Doe (2005, 30) says blah. - -- Doe (2005, 30, with suffix) says blah. - -- Doe (2005; 2006, 30; see also Doe and Roe 2007) says blah. - -- In a note.[^1] - -- A citation group (see Doe 2005, chap. 3; also Doe and Roe 2007, 34–35). - -- Another one (see Doe 2005, 34–35). - -- And another one in a note.[^2] - -- Citation with a suffix and locator (Doe 2005, 33, 35–37, and nowhere else). - -- Citation with suffix only (Doe 2005 and nowhere else). - -- Now some modifiers.[^3] - -- With some markup (*see* Doe 2005, 32). - -
- -References -========== - -“Nonexistent Not Found!” - -Doe, John. 2005. *First Book*. Cambridge: Cambridge University Press. - -———. 2006. “Article.” *Journal of Generic Studies* 6: 33–34. - -Doe, John, and Jenny Roe. 2007. “Why Water Is Wet.” In *Third Book*, edited by Sam Smith. Oxford: Oxford University Press. - -
- -[^1]: Doe and Roe (2007, 12) and a citation without locators (Doe and Roe 2007). - -[^2]: Some citations (see Doe 2005, chap. 3; Doe and Roe 2007; Doe 2006). - -[^3]: Like a citation without author: (2005), and now Doe with a locator (2006, 44). diff --git a/tests/markdown-citations.ieee.txt b/tests/markdown-citations.ieee.txt deleted file mode 100644 index 4085a7c63..000000000 --- a/tests/markdown-citations.ieee.txt +++ /dev/null @@ -1,51 +0,0 @@ -Pandoc with citeproc-hs -======================= - -- [] - -- - -- Reference 2 says blah. - -- Reference 2 says blah. - -- Reference 2 says blah. - -- Reference 2 [4] says blah. - -- In a note.[^1] - -- A citation group [2], [4]. - -- Another one [2]. - -- And another one in a note.[^2] - -- Citation with a suffix and locator [2]. - -- Citation with suffix only [2]. - -- Now some modifiers.[^3] - -- With some markup [2]. - -
- -References -========== - -[1]“nonexistent not found!” . - -[2] J. Doe, *First Book*. Cambridge: Cambridge University Press, 2005. - -[3] J. Doe, “Article,” *Journal of Generic Studies*, vol. 6, pp. 33–34, 2006. - -[4] J. Doe and J. Roe, “Why Water Is Wet,” in *Third Book*, S. Smith, Ed. Oxford: Oxford University Press, 2007. - -
- -[^1]: Reference 4 and a citation without locators [4]. - -[^2]: Some citations [2–4]. - -[^3]: Like a citation without author: [2], and now Doe with a locator [3]. diff --git a/tests/markdown-citations.mhra.txt b/tests/markdown-citations.mhra.txt deleted file mode 100644 index 01d9c45ca..000000000 --- a/tests/markdown-citations.mhra.txt +++ /dev/null @@ -1,73 +0,0 @@ -Pandoc with citeproc-hs -======================= - -- [^1] - -- [^2] - -- John Doe[^3] says blah. - -- Doe[^4] says blah. - -- Doe[^5] says blah. - -- Doe[^6] says blah. - -- In a note.[^7] - -- A citation group.[^8] - -- Another one.[^9] - -- And another one in a note.[^10] - -- Citation with a suffix and locator.[^11] - -- Citation with suffix only.[^12] - -- Now some modifiers.[^13] - -- With some markup.[^14] - -
- -References -========== - -Doe, John, ‘Article’, *Journal of Generic Studies*, 6 (2006), 33–34. - ----, *First Book* (Cambridge: Cambridge University Press, 2005). - -Doe, John, and Jenny Roe, ‘Why Water Is Wet’, in *Third Book*, ed. by Sam Smith (Oxford: Oxford University Press, 2007). - -‘Nonexistent Not Found!’. - -
- -[^1]: [CSL BIBLIOGRAPHIC DATA ERROR: reference "nonexistent" not found.]. - -[^2]: [CSL STYLE ERROR: reference with no printed form.]. - -[^3]: *First Book* (Cambridge: Cambridge University Press, 2005). - -[^4]: *First Book*, p. 30. - -[^5]: *First Book*, p. 30, with suffix. - -[^6]: *First Book*; ‘Article’, *Journal of Generic Studies*, 6 (2006), 33–34 (p. 30); see also John Doe and Jenny Roe, ‘Why Water Is Wet’, in *Third Book*, ed. by Sam Smith (Oxford: Oxford University Press, 2007). - -[^7]: Doe and Roe, p. 12 and a citation without locators Doe and Roe. - -[^8]: See Doe, *First Book*, chap. 3; also Doe and Roe, pp. 34–35. - -[^9]: See Doe, *First Book*, pp. 34–35. - -[^10]: Some citations see Doe, *First Book*, chap. 3; Doe and Roe; Doe, ‘Article’, 33–34. - -[^11]: Doe, *First Book*, pp. 33, 35–37, and nowhere else. - -[^12]: Doe, *First Book* and nowhere else. - -[^13]: Like a citation without author: *First Book*, and now Doe with a locator ‘Article’, 33–34 (p. 44). - -[^14]: *See* Doe, *First Book*, p. 32. diff --git a/tests/mhra.csl b/tests/mhra.csl deleted file mode 100644 index fe34c8f84..000000000 --- a/tests/mhra.csl +++ /dev/null @@ -1,399 +0,0 @@ - - \ No newline at end of file -- cgit v1.2.3 From a68805bebd172b107e75fe330698cbc7eee7f75a Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 25 Aug 2013 07:47:22 -0700 Subject: Added `-F` as shortcut for `--filter`. --- pandoc.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandoc.hs b/pandoc.hs index 6ad5694f1..5b0250836 100644 --- a/pandoc.hs +++ b/pandoc.hs @@ -280,7 +280,7 @@ options = "STRING") "" -- "Classes (whitespace- or comma-separated) to use for indented code-blocks" - , Option "" ["filter"] + , Option "F" ["filter"] (ReqArg (\arg opt -> return opt { optPlugins = externalFilter arg : optPlugins opt }) -- cgit v1.2.3 From 4e4c948b417a597f17bd8be12b6e981a87f00506 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 26 Aug 2013 22:30:27 -0700 Subject: Added markdown citation parsing test. --- pandoc.cabal | 1 + tests/Tests/Old.hs | 4 ++++ tests/markdown-citations.native | 17 +++++++++++++++++ 3 files changed, 22 insertions(+) create mode 100644 tests/markdown-citations.native diff --git a/pandoc.cabal b/pandoc.cabal index 818f36f48..51f60f160 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -130,6 +130,7 @@ Extra-Source-Files: tests/markdown-reader-more.txt, tests/markdown-reader-more.native, tests/markdown-citations.txt, + tests/markdown-citations.native, tests/textile-reader.textile, tests/mediawiki-reader.wiki, tests/mediawiki-reader.native, diff --git a/tests/Tests/Old.hs b/tests/Tests/Old.hs index 5054559a1..01e4403fb 100644 --- a/tests/Tests/Old.hs +++ b/tests/Tests/Old.hs @@ -63,6 +63,10 @@ tests = [ testGroup "markdown" "markdown-reader-more.txt" "markdown-reader-more.native" , lhsReaderTest "markdown+lhs" ] + , testGroup "citations" + [ test "citations" ["-r", "markdown", "-w", "native"] + "markdown-citations.txt" "markdown-citations.native" + ] ] , testGroup "rst" [ testGroup "writer" (writerTests "rst" ++ lhsWriterTests "rst") diff --git a/tests/markdown-citations.native b/tests/markdown-citations.native new file mode 100644 index 000000000..1cd4bd035 --- /dev/null +++ b/tests/markdown-citations.native @@ -0,0 +1,17 @@ +[Header 1 ("pandoc-with-citeproc-hs",[],[]) [Str "Pandoc",Space,Str "with",Space,Str "citeproc-hs"] +,BulletList + [[Para [Cite [Citation {citationId = "nonexistent", citationPrefix = [], citationSuffix = [], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 0}] [Str "???"]]] + ,[Para [Cite [Citation {citationId = "nonexistent", citationPrefix = [], citationSuffix = [], citationMode = AuthorInText, citationNoteNum = 0, citationHash = 0}] [Str "???"]]] + ,[Para [Cite [Citation {citationId = "item1", citationPrefix = [], citationSuffix = [], citationMode = AuthorInText, citationNoteNum = 0, citationHash = 0}] [Str "???"],Space,Str "says",Space,Str "blah."]] + ,[Para [Cite [Citation {citationId = "item1", citationPrefix = [], citationSuffix = [Str "p.",Space,Str "30"], citationMode = AuthorInText, citationNoteNum = 0, citationHash = 0}] [Str "???"],Space,Str "says",Space,Str "blah."]] + ,[Para [Cite [Citation {citationId = "item1", citationPrefix = [], citationSuffix = [Str "p.",Space,Str "30,",Space,Str "with",Space,Str "suffix"], citationMode = AuthorInText, citationNoteNum = 0, citationHash = 0}] [Str "???"],Space,Str "says",Space,Str "blah."]] + ,[Para [Cite [Citation {citationId = "item1", citationPrefix = [], citationSuffix = [], citationMode = AuthorInText, citationNoteNum = 0, citationHash = 0},Citation {citationId = "item2", citationPrefix = [], citationSuffix = [Space,Str "p.",Space,Str "30"], citationMode = SuppressAuthor, citationNoteNum = 0, citationHash = 0},Citation {citationId = "\1087\1091\1085\1082\1090\&3", citationPrefix = [Str "see",Space,Str "also"], citationSuffix = [], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 0}] [Str "???"],Space,Str "says",Space,Str "blah."]] + ,[Para [Str "In",Space,Str "a",Space,Str "note.",Note [Para [Cite [Citation {citationId = "\1087\1091\1085\1082\1090\&3", citationPrefix = [], citationSuffix = [Str "p.",Space,Str "12"], citationMode = AuthorInText, citationNoteNum = 0, citationHash = 0}] [Str "???"],Space,Str "and",Space,Str "a",Space,Str "citation",Space,Str "without",Space,Str "locators",Space,Cite [Citation {citationId = "\1087\1091\1085\1082\1090\&3", citationPrefix = [], citationSuffix = [], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 0}] [Str "???"],Str "."]]]] + ,[Para [Str "A",Space,Str "citation",Space,Str "group",Space,Cite [Citation {citationId = "item1", citationPrefix = [Str "see"], citationSuffix = [Space,Str "chap.",Space,Str "3"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 0},Citation {citationId = "\1087\1091\1085\1082\1090\&3", citationPrefix = [Str "also"], citationSuffix = [Space,Str "p.",Space,Str "34-35"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 0}] [Str "???"],Str "."]] + ,[Para [Str "Another",Space,Str "one",Space,Cite [Citation {citationId = "item1", citationPrefix = [Str "see"], citationSuffix = [Space,Str "p.",Space,Str "34-35"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 0}] [Str "???"],Str "."]] + ,[Para [Str "And",Space,Str "another",Space,Str "one",Space,Str "in",Space,Str "a",Space,Str "note.",Note [Para [Str "Some",Space,Str "citations",Space,Cite [Citation {citationId = "item1", citationPrefix = [Str "see"], citationSuffix = [Space,Str "chap.",Space,Str "3"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 0},Citation {citationId = "\1087\1091\1085\1082\1090\&3", citationPrefix = [], citationSuffix = [], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 0},Citation {citationId = "item2", citationPrefix = [], citationSuffix = [], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 0}] [Str "???"],Str "."]]]] + ,[Para [Str "Citation",Space,Str "with",Space,Str "a",Space,Str "suffix",Space,Str "and",Space,Str "locator",Space,Cite [Citation {citationId = "item1", citationPrefix = [], citationSuffix = [Space,Str "pp.",Space,Str "33,",Space,Str "35-37,",Space,Str "and",Space,Str "nowhere",Space,Str "else"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 0}] [Str "???"],Str "."]] + ,[Para [Str "Citation",Space,Str "with",Space,Str "suffix",Space,Str "only",Space,Cite [Citation {citationId = "item1", citationPrefix = [], citationSuffix = [Space,Str "and",Space,Str "nowhere",Space,Str "else"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 0}] [Str "???"],Str "."]] + ,[Para [Str "Now",Space,Str "some",Space,Str "modifiers.",Note [Para [Str "Like",Space,Str "a",Space,Str "citation",Space,Str "without",Space,Str "author:",Space,Cite [Citation {citationId = "item1", citationPrefix = [], citationSuffix = [], citationMode = SuppressAuthor, citationNoteNum = 0, citationHash = 0}] [Str "???"],Str ",",Space,Str "and",Space,Str "now",Space,Str "Doe",Space,Str "with",Space,Str "a",Space,Str "locator",Space,Cite [Citation {citationId = "item2", citationPrefix = [], citationSuffix = [Space,Str "p.",Space,Str "44"], citationMode = SuppressAuthor, citationNoteNum = 0, citationHash = 0}] [Str "???"],Str "."]]]] + ,[Para [Str "With",Space,Str "some",Space,Str "markup",Space,Cite [Citation {citationId = "item1", citationPrefix = [Emph [Str "see"]], citationSuffix = [Space,Str "p.",Space,Strong [Str "32"]], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 0}] [Str "???"],Str "."]]] +,Header 1 ("references",[],[]) [Str "References"]] -- cgit v1.2.3 From 80148095781b44c7f5af132b48605adaa93a0558 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 27 Aug 2013 20:12:21 -0700 Subject: LaTeX reader: Allow accents with combining characters. accent now returns [Char], not Char. --- src/Text/Pandoc/Readers/LaTeX.hs | 370 +++++++++++++++++++-------------------- 1 file changed, 185 insertions(+), 185 deletions(-) diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 5d73134cd..b9ca986fb 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -543,196 +543,196 @@ doLHSverb = codeWith ("",["haskell"],[]) <$> manyTill (satisfy (/='\n')) (char ' lit :: String -> LP Inlines lit = pure . str -accent :: (Char -> Char) -> Inlines -> LP Inlines +accent :: (Char -> String) -> Inlines -> LP Inlines accent f ils = case toList ils of - (Str (x:xs) : ys) -> return $ fromList $ (Str (f x : xs) : ys) + (Str (x:xs) : ys) -> return $ fromList $ (Str (f x ++ xs) : ys) [] -> mzero _ -> return ils -grave :: Char -> Char -grave 'A' = 'À' -grave 'E' = 'È' -grave 'I' = 'Ì' -grave 'O' = 'Ò' -grave 'U' = 'Ù' -grave 'a' = 'à' -grave 'e' = 'è' -grave 'i' = 'ì' -grave 'o' = 'ò' -grave 'u' = 'ù' -grave c = c - -acute :: Char -> Char -acute 'A' = 'Á' -acute 'E' = 'É' -acute 'I' = 'Í' -acute 'O' = 'Ó' -acute 'U' = 'Ú' -acute 'Y' = 'Ý' -acute 'a' = 'á' -acute 'e' = 'é' -acute 'i' = 'í' -acute 'o' = 'ó' -acute 'u' = 'ú' -acute 'y' = 'ý' -acute 'C' = 'Ć' -acute 'c' = 'ć' -acute 'L' = 'Ĺ' -acute 'l' = 'ĺ' -acute 'N' = 'Ń' -acute 'n' = 'ń' -acute 'R' = 'Ŕ' -acute 'r' = 'ŕ' -acute 'S' = 'Ś' -acute 's' = 'ś' -acute 'Z' = 'Ź' -acute 'z' = 'ź' -acute c = c - -circ :: Char -> Char -circ 'A' = 'Â' -circ 'E' = 'Ê' -circ 'I' = 'Î' -circ 'O' = 'Ô' -circ 'U' = 'Û' -circ 'a' = 'â' -circ 'e' = 'ê' -circ 'i' = 'î' -circ 'o' = 'ô' -circ 'u' = 'û' -circ 'C' = 'Ĉ' -circ 'c' = 'ĉ' -circ 'G' = 'Ĝ' -circ 'g' = 'ĝ' -circ 'H' = 'Ĥ' -circ 'h' = 'ĥ' -circ 'J' = 'Ĵ' -circ 'j' = 'ĵ' -circ 'S' = 'Ŝ' -circ 's' = 'ŝ' -circ 'W' = 'Ŵ' -circ 'w' = 'ŵ' -circ 'Y' = 'Ŷ' -circ 'y' = 'ŷ' -circ c = c - -tilde :: Char -> Char -tilde 'A' = 'Ã' -tilde 'a' = 'ã' -tilde 'O' = 'Õ' -tilde 'o' = 'õ' -tilde 'I' = 'Ĩ' -tilde 'i' = 'ĩ' -tilde 'U' = 'Ũ' -tilde 'u' = 'ũ' -tilde 'N' = 'Ñ' -tilde 'n' = 'ñ' -tilde c = c - -umlaut :: Char -> Char -umlaut 'A' = 'Ä' -umlaut 'E' = 'Ë' -umlaut 'I' = 'Ï' -umlaut 'O' = 'Ö' -umlaut 'U' = 'Ü' -umlaut 'a' = 'ä' -umlaut 'e' = 'ë' -umlaut 'i' = 'ï' -umlaut 'o' = 'ö' -umlaut 'u' = 'ü' -umlaut c = c - -dot :: Char -> Char -dot 'C' = 'Ċ' -dot 'c' = 'ċ' -dot 'E' = 'Ė' -dot 'e' = 'ė' -dot 'G' = 'Ġ' -dot 'g' = 'ġ' -dot 'I' = 'İ' -dot 'Z' = 'Ż' -dot 'z' = 'ż' -dot c = c - -macron :: Char -> Char -macron 'A' = 'Ā' -macron 'E' = 'Ē' -macron 'I' = 'Ī' -macron 'O' = 'Ō' -macron 'U' = 'Ū' -macron 'a' = 'ā' -macron 'e' = 'ē' -macron 'i' = 'ī' -macron 'o' = 'ō' -macron 'u' = 'ū' -macron c = c - -cedilla :: Char -> Char -cedilla 'c' = 'ç' -cedilla 'C' = 'Ç' -cedilla 's' = 'ş' -cedilla 'S' = 'Ş' -cedilla 't' = 'ţ' -cedilla 'T' = 'Ţ' -cedilla 'e' = 'ȩ' -cedilla 'E' = 'Ȩ' -cedilla 'h' = 'ḩ' -cedilla 'H' = 'Ḩ' -cedilla 'o' = 'o̧' -cedilla 'O' = 'O̧' -cedilla c = c - -hacek :: Char -> Char -hacek 'A' = 'Ǎ' -hacek 'a' = 'ǎ' -hacek 'C' = 'Č' -hacek 'c' = 'č' -hacek 'D' = 'Ď' -hacek 'd' = 'ď' -hacek 'E' = 'Ě' -hacek 'e' = 'ě' -hacek 'G' = 'Ǧ' -hacek 'g' = 'ǧ' -hacek 'H' = 'Ȟ' -hacek 'h' = 'ȟ' -hacek 'I' = 'Ǐ' -hacek 'i' = 'ǐ' -hacek 'j' = 'ǰ' -hacek 'K' = 'Ǩ' -hacek 'k' = 'ǩ' -hacek 'L' = 'Ľ' -hacek 'l' = 'ľ' -hacek 'N' = 'Ň' -hacek 'n' = 'ň' -hacek 'O' = 'Ǒ' -hacek 'o' = 'ǒ' -hacek 'R' = 'Ř' -hacek 'r' = 'ř' -hacek 'S' = 'Š' -hacek 's' = 'š' -hacek 'T' = 'Ť' -hacek 't' = 'ť' -hacek 'U' = 'Ǔ' -hacek 'u' = 'ǔ' -hacek 'Z' = 'Ž' -hacek 'z' = 'ž' -hacek c = c - -breve :: Char -> Char -breve 'A' = 'Ă' -breve 'a' = 'ă' -breve 'E' = 'Ĕ' -breve 'e' = 'ĕ' -breve 'G' = 'Ğ' -breve 'g' = 'ğ' -breve 'I' = 'Ĭ' -breve 'i' = 'ĭ' -breve 'O' = 'Ŏ' -breve 'o' = 'ŏ' -breve 'U' = 'Ŭ' -breve 'u' = 'ŭ' -breve c = c +grave :: Char -> String +grave 'A' = "À" +grave 'E' = "È" +grave 'I' = "Ì" +grave 'O' = "Ò" +grave 'U' = "Ù" +grave 'a' = "à" +grave 'e' = "è" +grave 'i' = "ì" +grave 'o' = "ò" +grave 'u' = "ù" +grave c = [c] + +acute :: Char -> String +acute 'A' = "Á" +acute 'E' = "É" +acute 'I' = "Í" +acute 'O' = "Ó" +acute 'U' = "Ú" +acute 'Y' = "Ý" +acute 'a' = "á" +acute 'e' = "é" +acute 'i' = "í" +acute 'o' = "ó" +acute 'u' = "ú" +acute 'y' = "ý" +acute 'C' = "Ć" +acute 'c' = "ć" +acute 'L' = "Ĺ" +acute 'l' = "ĺ" +acute 'N' = "Ń" +acute 'n' = "ń" +acute 'R' = "Ŕ" +acute 'r' = "ŕ" +acute 'S' = "Ś" +acute 's' = "ś" +acute 'Z' = "Ź" +acute 'z' = "ź" +acute c = [c] + +circ :: Char -> String +circ 'A' = "Â" +circ 'E' = "Ê" +circ 'I' = "Î" +circ 'O' = "Ô" +circ 'U' = "Û" +circ 'a' = "â" +circ 'e' = "ê" +circ 'i' = "î" +circ 'o' = "ô" +circ 'u' = "û" +circ 'C' = "Ĉ" +circ 'c' = "ĉ" +circ 'G' = "Ĝ" +circ 'g' = "ĝ" +circ 'H' = "Ĥ" +circ 'h' = "ĥ" +circ 'J' = "Ĵ" +circ 'j' = "ĵ" +circ 'S' = "Ŝ" +circ 's' = "ŝ" +circ 'W' = "Ŵ" +circ 'w' = "ŵ" +circ 'Y' = "Ŷ" +circ 'y' = "ŷ" +circ c = [c] + +tilde :: Char -> String +tilde 'A' = "Ã" +tilde 'a' = "ã" +tilde 'O' = "Õ" +tilde 'o' = "õ" +tilde 'I' = "Ĩ" +tilde 'i' = "ĩ" +tilde 'U' = "Ũ" +tilde 'u' = "ũ" +tilde 'N' = "Ñ" +tilde 'n' = "ñ" +tilde c = [c] + +umlaut :: Char -> String +umlaut 'A' = "Ä" +umlaut 'E' = "Ë" +umlaut 'I' = "Ï" +umlaut 'O' = "Ö" +umlaut 'U' = "Ü" +umlaut 'a' = "ä" +umlaut 'e' = "ë" +umlaut 'i' = "ï" +umlaut 'o' = "ö" +umlaut 'u' = "ü" +umlaut c = [c] + +dot :: Char -> String +dot 'C' = "Ċ" +dot 'c' = "ċ" +dot 'E' = "Ė" +dot 'e' = "ė" +dot 'G' = "Ġ" +dot 'g' = "ġ" +dot 'I' = "İ" +dot 'Z' = "Ż" +dot 'z' = "ż" +dot c = [c] + +macron :: Char -> String +macron 'A' = "Ā" +macron 'E' = "Ē" +macron 'I' = "Ī" +macron 'O' = "Ō" +macron 'U' = "Ū" +macron 'a' = "ā" +macron 'e' = "ē" +macron 'i' = "ī" +macron 'o' = "ō" +macron 'u' = "ū" +macron c = [c] + +cedilla :: Char -> String +cedilla 'c' = "ç" +cedilla 'C' = "Ç" +cedilla 's' = "ş" +cedilla 'S' = "Ş" +cedilla 't' = "ţ" +cedilla 'T' = "Ţ" +cedilla 'e' = "ȩ" +cedilla 'E' = "Ȩ" +cedilla 'h' = "ḩ" +cedilla 'H' = "Ḩ" +cedilla 'o' = "o̧" +cedilla 'O' = "O̧" +cedilla c = [c] + +hacek :: Char -> String +hacek 'A' = "Ǎ" +hacek 'a' = "ǎ" +hacek 'C' = "Č" +hacek 'c' = "č" +hacek 'D' = "Ď" +hacek 'd' = "ď" +hacek 'E' = "Ě" +hacek 'e' = "ě" +hacek 'G' = "Ǧ" +hacek 'g' = "ǧ" +hacek 'H' = "Ȟ" +hacek 'h' = "ȟ" +hacek 'I' = "Ǐ" +hacek 'i' = "ǐ" +hacek 'j' = "ǰ" +hacek 'K' = "Ǩ" +hacek 'k' = "ǩ" +hacek 'L' = "Ľ" +hacek 'l' = "ľ" +hacek 'N' = "Ň" +hacek 'n' = "ň" +hacek 'O' = "Ǒ" +hacek 'o' = "ǒ" +hacek 'R' = "Ř" +hacek 'r' = "ř" +hacek 'S' = "Š" +hacek 's' = "š" +hacek 'T' = "Ť" +hacek 't' = "ť" +hacek 'U' = "Ǔ" +hacek 'u' = "ǔ" +hacek 'Z' = "Ž" +hacek 'z' = "ž" +hacek c = [c] + +breve :: Char -> String +breve 'A' = "Ă" +breve 'a' = "ă" +breve 'E' = "Ĕ" +breve 'e' = "ĕ" +breve 'G' = "Ğ" +breve 'g' = "ğ" +breve 'I' = "Ĭ" +breve 'i' = "ĭ" +breve 'O' = "Ŏ" +breve 'o' = "ŏ" +breve 'U' = "Ŭ" +breve 'u' = "ŭ" +breve c = [c] tok :: LP Inlines tok = try $ grouped inline <|> inlineCommand <|> str <$> (count 1 $ inlineChar) -- cgit v1.2.3 From dd5cb82348dfb2b8febb01db8bdc98ddeac394dc Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 28 Aug 2013 08:43:51 -0700 Subject: Generalized type of stringify. --- src/Text/Pandoc/Shared.hs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index eef150351..9a9a092fc 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -1,4 +1,5 @@ -{-# LANGUAGE DeriveDataTypeable, CPP, MultiParamTypeClasses #-} +{-# LANGUAGE DeriveDataTypeable, CPP, MultiParamTypeClasses, + FlexibleContexts #-} {- Copyright (C) 2006-2013 John MacFarlane @@ -383,10 +384,10 @@ consolidateInlines (Code a1 x : Code a2 y : zs) | a1 == a2 = consolidateInlines (x : xs) = x : consolidateInlines xs consolidateInlines [] = [] --- | Convert list of inlines to a string with formatting removed. +-- | Convert pandoc structure to a string with formatting removed. -- Footnotes are skipped (since we don't want their contents in link -- labels). -stringify :: [Inline] -> String +stringify :: Walkable Inline a => a -> String stringify = query go . walk deNote where go :: Inline -> [Char] go Space = " " -- cgit v1.2.3 From 940515a00ba49b9feb3d736dc071059400f83015 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 28 Aug 2013 16:54:37 -0700 Subject: LaTeX reader: allow spaces in alignment spec in tables. E.g. `{ l r c }`. --- src/Text/Pandoc/Readers/LaTeX.hs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index b9ca986fb..e91ea1e82 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -1116,12 +1116,13 @@ complexNatbibCitation mode = try $ do parseAligns :: LP [Alignment] parseAligns = try $ do char '{' - optional $ char '|' + let maybeBar = try $ spaces >> optional (char '|') + maybeBar let cAlign = AlignCenter <$ char 'c' let lAlign = AlignLeft <$ char 'l' let rAlign = AlignRight <$ char 'r' let alignChar = optional sp *> (cAlign <|> lAlign <|> rAlign) - aligns' <- sepEndBy alignChar (optional $ char '|') + aligns' <- sepEndBy alignChar maybeBar spaces char '}' spaces -- cgit v1.2.3 From ebd2eb30d284b816519c4d416e5382bb1bd2b16a Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 31 Aug 2013 09:41:48 -0700 Subject: INSTALL: Added instructions for pandoc-citeproc. --- INSTALL | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/INSTALL b/INSTALL index ba3cb8eb6..76eb09e7e 100644 --- a/INSTALL +++ b/INSTALL @@ -53,6 +53,12 @@ Quick install man pandoc +6. If you want to process citations with pandoc, you will also need to + install a separate package, `pandoc-citeproc`. This can be installed + using cabal: + + cabal install pandoc-citeproc + [GHC]: http://www.haskell.org/ghc/ [Haskell platform]: http://hackage.haskell.org/platform/ [cabal-install]: http://hackage.haskell.org/trac/hackage/wiki/CabalInstall -- cgit v1.2.3 From c27c0ce0ca025d8c3c8d16d4d88190abef76dc17 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 31 Aug 2013 16:39:05 -0700 Subject: Updated changelog --- changelog | 129 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 125 insertions(+), 4 deletions(-) diff --git a/changelog b/changelog index 42c25f978..57587f3db 100644 --- a/changelog +++ b/changelog @@ -1,5 +1,18 @@ [pending release 1.12] + * New `--filter/-F` option to make it easier to run "filters" + (Pandoc AST transformations that operate on JSON serializations). + Filters are always passed the name of the output format, so their + behavior can be tailored to it. The repository + contains + a python module for writing pandoc filters in python, with + a number of examples. + + * Added `--print-default-data-file` option, which allows printing + of any of pandoc's data files. (For example, + `pandoc --print-default-data-file reference.odt` will print + `reference.odt`.) + * `--toc-level` no longer implies `--toc`. Reason: EPUB users who don't want a visible TOC may still want to set the TOC level for in the book navigation. @@ -11,6 +24,37 @@ which URLs it is fetching, but not giving the full header). In addition, there are better error messages when fetching a URL fails. + * citeproc support has been removed from core pandoc (API changes). + + + The `--bibliography`, `--csl`, and `--citation-abbreviation` + options have been removed. + + Markdown and LaTeX citations as still parsed, but an external + filter, `pandoc-citeproc`, is now needed to process + them against a bibliography and CSL stylesheet. The bibliography + and stylesheet should be specified in the document's YAML metadata + (`bibliography` and `csl` fields), and the filter called with + `pandoc --filter pandoc-citeproc`. + + The `Text.Pandoc.Biblio` module has been removed, and the Markdown + and LaTeX readers no longer process citations. Users of the + pandoc library who want citation support will need to use + `Text.CSL.Pandoc` from `pandoc-citations`. + + All bibliography-related fields have been removed from + `ReaderOptions` and `WriterOptions`: `writerBiblioFiles`, + `readerReferences`, `readerCitationStyle`. + + Note that a Cite element is now created in parsing markdown whether + or not there is a matching reference (indeed, pandoc has no way of + knowing, since the `--bibliography` option has been removed). + By default citations will print as `???`. + + The `pandoc-citeproc` script will put the bibliography at the + end of the document, as before. However, it will be put inside a Div + element with class "references", allowing users some control + over the styling of references. A final header, if any, will + be included in the Div. + * The markdown writer will not print a bibliography if the + `citations` extension is enabled. (If the citations are formatted + as markdown citations, it is redundant to have a bibliography, + since one will be generated automatically.) + * All slide formats: Support incremental slide view for definition lists. * Added syntax for "pauses" in beamer or reaveljs slide shows. @@ -42,6 +86,16 @@ + Variables completely shadow metadata. If many variables with the same name are set, a list is created. + * `Format` is now a newtype, not an alias for String. + Equality comparisons are case-insensitive. + + * New generic block container (`Div`) and inline container + (`Span`) elements have been added. These can take attributes. + They will render in HTML, Textile, MediaWiki, Org, RST and + and Markdown (with `markdown_in_html` extension) as HTML `
` + and `` elements; in other formats they will simply pass through + their contents. But they can be targeted by scripts. + * `Text.Pandoc` + Make `toJsonFilter` an alias for `toJSONFilter` from `Text.Pandoc.JSON`. @@ -51,6 +105,36 @@ * Removed the deprecated `jsonFilter` function. + Added `readJSON`, `writeJSON` to the API (#817). + * Most of `Text.Pandoc.Readers.TeXMath` has been moved to the + `texmath` module (0.6.4). (This allows `pandoc-citeproc` to + handle simple math in bibliography fields.) + + [ TODO - systematic documentation of pandoc-types API changes, + including .JSON, .Walk and changes to .Definition, .Builder. + Include the new Format newtype, and the new Span and Div + elements.] + + * Added `Text.Pandoc.Walk` (in `pandoc-types`), which exports + hand-written tree-walking functions that are orders of magnitude + faster than the SYB functions from `Text.Pandoc.Generic`. + These functions are now used where possible in pandoc's code. + Added `Tests.Walk` to verify that `walk` and `query` match + the generic traversals `bottomUp` and `queryWith`. + (API change.) + + * Added `Text.Pandoc.Process`, exporting `pipeProcess`. + This is a souped-up version of `readProcessWithErrorcode` that + uses lazy bytestrings instead of strings and allows setting + environment variables. (Used in `Text.Pandoc.PDF`.) + + * Added `Text.Pandoc.Compat.Monoid`. + This allows pandoc to compile with `base` < 4.5, where `Data.Monoid` + doesn't export `<>`. Thanks to Dirk Ullirch for the patch. + + * Added `Text.Pandoc.Compat.TagSoupEntity`. + This allows pandoc to compile with `tagsoup` 0.13.x. + Thanks to Dirk Ullrich for the patch. + * `Text.Pandoc.Shared` + `openURL` now follows redirects (#701). @@ -60,11 +144,15 @@ This fixes bugs in `--self-contained` on pandoc compiled with `embed_data_files` (#833). + Fixed `readDefaultDataFile` so it works on Windows. + + Better error messages for `readDefaultDataFile`. Instead of + listing the last path tried, which can confuse people who are + using `--self-contained`, so now we just list the data file name. + URL-escape pipe characters. Even though these are legal, `Network.URI` doesn't regard them as legal in URLs. So we escape them first (#535). + `openURL`: Print diagnostic output to stderr, not stdout. + `openURL`: Properly handle `data:` URIs. + + `stringify`: Generalized type. * `Text.Pandoc.Biblio` @@ -122,7 +210,7 @@ will load the script sample.lua and use it as a custom writer. `data/sample.lua` is provided as an example. (This can be printed - with the new `--print-custom-lua-writer` option. + with `pandoc --print-default-data-file sample.lua`.) * Added OPML reader and writer. @@ -154,13 +242,18 @@ might speed things up in some cases.) + Implemented `Ext_ascii_identifiers` (#807). + Allow internal `+` in citation identifiers (#856). - + Added support for YAML metadata block at the beginning of document. + + Added support for YAML metadata blocks, which can come anywhere + in the document (not just at the beginning). A document can contain + multiple YAML metadata blocks. + Improved strong/emph parsing, using the strategy of . The new parsing algorithm requires no backtracking, and no keeping track of nesting levels. It will give different results in some edge cases, but these should not affect normal uses. + Allow `.` or `)` after `#` in ATX headers if no `fancy_lists`. + + Do not generate blank title, author, or date metadata elements. + Leave these out entirely if they aren't present. + + HTML span and div tags are parsed as pandoc Span and Div elements. * RST reader @@ -200,6 +293,9 @@ + Support `\v{}` for hacek (#926). + Don't add spurious ", " to citation suffixes. This is added when needed in `Text.Pandoc.Biblio` anyway. + + Allow spaces in alignment spec in tables, e.g. `{ l r c }`. + + Improved support for accented characters (thanks to Scott Morrison). + + Parse label after section command and set id (#951). * MediaWiki reader @@ -225,6 +321,10 @@ This caused problems with array environments (#891). + Change `\` to `/` in paths. `/` works even on Windows in LaTeX. `\` will cause major problems if unescaped. + + Write id for code block to label attribute in LaTeX when listings + is used (thanks to Florian Eitel). + + Scale LaTeX tables so they don't exceed columnwidth. + + Avoid problem with footnotes in unnumbered headers (#940). * Beamer writer @@ -274,6 +374,10 @@ + Fixing wrong numbered-list indentation in open document format (Alexander Kondratskiy) (#369). + + `reference.odt`: Added pandoc as "generator" in `meta.xml`. + + Minor changes for ODF 1.2 conformance (#939). We leave the + nonconforming `contextual-spacing` attribute, which is provided by + LibreOffice itself and seems well supported. * Docx writer @@ -347,6 +451,8 @@ to find them, either in the directory containing the first source file, or at an absolute URL, or at a URL relative to the base URL of the first command line argument (#917). + + If compiling with `pdflatex` yields an encoding error, offer + the suggestion to use `--latex-engine=xelatex`. * `Text.Pandoc.UTF8` @@ -363,7 +469,7 @@ + Better error reporting in `readWith`: On error have it print the line in which the error occurred, with a caret pointing to the column. + In `ParserState`, replace `stateTitle`, `stateAuthors`, `stateDate` - with `stateMeta`. + with `stateMeta` and `stateMeta'`. * `Text.Pandoc.XML` @@ -375,6 +481,21 @@ + Added `Ext_ascii_identifiers`. (API change.) This will force `Ext_auto_identifiers` to use ASCII only. Set as default for github markdown. + + Changed `writerSourceDir` to `writerSourceURL` and changed + the type to a `Maybe`. Previously we used to store the directory + of the first input file, even if it was local, and used this as a + base directory for finding images in ODT, EPUB, Docx, and PDF. + This has been confusing to many users. It seems better to look for + images relative to the current working directory, even if the first + file argument is in another directory. `writerSourceURL` is set + to 'Just url' when the first command-line argument is an absolute + URL. (So, relative links will be resolved in relation to the first + page.) Otherwise, 'Nothing'. The ODT, EPUB, Docx, and PDF + writers have been modified accordingly. Note that this change may + break some existing workflows. If you have been assuming that + relative links will be interpreted relative to the directory of + the first file argument, you'll need to make that the current + directory before running pandoc. API change (#942). * `Text.Pandoc.Templates` @@ -422,7 +543,7 @@ * Added CONTRIBUTING.md. - * Use latest `chicago-author-date.csl` as `default.csl`. + * Improved INSTALL instructions. * `make-windows-installer.bat`: Removed explicit paths for executables. -- cgit v1.2.3 From 6ed41fdfcc3b57e88cf98b875a75ab5e1629dca6 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 1 Sep 2013 08:54:10 -0700 Subject: Factored out registerHeader from markdown reader, added to Parsing. Text.Pandoc.Parsing now exports registerHeader, which can be used in other readers. --- src/Text/Pandoc/Parsing.hs | 32 ++++++++++++++++++++++++++++++++ src/Text/Pandoc/Readers/Markdown.hs | 30 ++---------------------------- 2 files changed, 34 insertions(+), 28 deletions(-) diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index c16d5bb1d..701b2ef84 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -75,6 +75,7 @@ module Text.Pandoc.Parsing ( (>>~), SubstTable, Key (..), toKey, + registerHeader, smartPunctuation, withQuoteContext, singleQuoteStart, @@ -151,6 +152,7 @@ where import Text.Pandoc.Definition import Text.Pandoc.Options import Text.Pandoc.Builder (Blocks, Inlines, rawBlock, HasMeta(..)) +import qualified Text.Pandoc.Builder as B import Text.Pandoc.XML (fromEntities) import qualified Text.Pandoc.UTF8 as UTF8 (putStrLn) import Text.Parsec @@ -162,11 +164,13 @@ import Text.Pandoc.Shared import qualified Data.Map as M import Text.TeXMath.Macros (applyMacros, Macro, parseMacroDefinitions) import Text.Pandoc.Compat.TagSoupEntity ( lookupEntity ) +import Text.Pandoc.Asciify (toAsciiChar) import Data.Default import qualified Data.Set as Set import Control.Monad.Reader import Control.Applicative ((*>), (<*), (<$), liftA2) import Data.Monoid +import Data.Maybe (catMaybes) type Parser t s = Parsec t s @@ -886,6 +890,34 @@ type KeyTable = M.Map Key Target type SubstTable = M.Map Key Inlines +-- | Add header to the list of headers in state, together +-- with its associated identifier. If the identifier is null +-- and the auto_identifers extension is set, generate a new +-- unique identifier, and update the list of identifiers +-- in state. +registerHeader :: Attr -> Inlines -> Parser s ParserState Attr +registerHeader (ident,classes,kvs) header' = do + ids <- stateIdentifiers `fmap` getState + exts <- getOption readerExtensions + let insert' = M.insertWith (\_new old -> old) + if null ident && Ext_auto_identifiers `Set.member` exts + then do + let id' = uniqueIdent (B.toList header') ids + let id'' = if Ext_ascii_identifiers `Set.member` exts + then catMaybes $ map toAsciiChar id' + else id' + updateState $ \st -> st{ + stateIdentifiers = if id' == id'' + then id' : ids + else id' : id'' : ids, + stateHeaders = insert' header' id' $ stateHeaders st } + return (id'',classes,kvs) + else do + unless (null ident) $ + updateState $ \st -> st{ + stateHeaders = insert' header' ident $ stateHeaders st } + return (ident,classes,kvs) + -- | Fail unless we're in "smart typography" mode. failUnlessSmart :: Parser [tok] ParserState () failUnlessSmart = getOption readerSmart >>= guard diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 658335202..267b30032 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -49,7 +49,6 @@ import Text.Pandoc.Builder (Inlines, Blocks, trimInlines, (<>)) import Text.Pandoc.Options import Text.Pandoc.Shared import Text.Pandoc.XML (fromEntities) -import Text.Pandoc.Asciify (toAsciiChar) import Text.Pandoc.Parsing hiding (tableWith) import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock ) import Text.Pandoc.Readers.HTML ( htmlTag, htmlInBalanced, isInlineTag, isBlockTag, @@ -471,31 +470,6 @@ block = choice [ mempty <$ blanklines header :: MarkdownParser (F Blocks) header = setextHeader <|> atxHeader "header" --- returns unique identifier -addToHeaderList :: Attr -> F Inlines -> MarkdownParser Attr -addToHeaderList (ident,classes,kvs) text = do - let header' = runF text defaultParserState - exts <- getOption readerExtensions - let insert' = M.insertWith (\_new old -> old) - if null ident && Ext_auto_identifiers `Set.member` exts - then do - ids <- stateIdentifiers `fmap` getState - let id' = uniqueIdent (B.toList header') ids - let id'' = if Ext_ascii_identifiers `Set.member` exts - then catMaybes $ map toAsciiChar id' - else id' - updateState $ \st -> st{ - stateIdentifiers = if id' == id'' - then id' : ids - else id' : id'' : ids, - stateHeaders = insert' header' id' $ stateHeaders st } - return (id'',classes,kvs) - else do - unless (null ident) $ - updateState $ \st -> st{ - stateHeaders = insert' header' ident $ stateHeaders st } - return (ident,classes,kvs) - atxHeader :: MarkdownParser (F Blocks) atxHeader = try $ do level <- many1 (char '#') >>= return . length @@ -504,7 +478,7 @@ atxHeader = try $ do skipSpaces text <- trimInlinesF . mconcat <$> many (notFollowedBy atxClosing >> inline) attr <- atxClosing - attr' <- addToHeaderList attr text + attr' <- registerHeader attr (runF text defaultParserState) return $ B.headerWith attr' level <$> text atxClosing :: MarkdownParser Attr @@ -543,7 +517,7 @@ setextHeader = try $ do many (char underlineChar) blanklines let level = (fromMaybe 0 $ findIndex (== underlineChar) setextHChars) + 1 - attr' <- addToHeaderList attr text + attr' <- registerHeader attr (runF text defaultParserState) return $ B.headerWith attr' level <$> text -- -- cgit v1.2.3 From 9282f632786e85c7a31f974f20162214c5387c00 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 1 Sep 2013 09:13:31 -0700 Subject: Use registerHeader in RST and LaTeX readers. This will give automatic unique identifiers, unless `-auto_identifiers` is specified. --- src/Text/Pandoc/Readers/LaTeX.hs | 6 ++-- src/Text/Pandoc/Readers/RST.hs | 6 ++-- tests/Tests/Readers/LaTeX.hs | 10 +++---- tests/latex-reader.native | 60 ++++++++++++++++++++-------------------- tests/lhs-test.html | 2 +- tests/lhs-test.html+lhs | 2 +- tests/lhs-test.latex | 2 +- tests/lhs-test.latex+lhs | 2 +- tests/lhs-test.native | 2 +- tests/rst-reader.native | 58 +++++++++++++++++++------------------- 10 files changed, 76 insertions(+), 74 deletions(-) diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index e91ea1e82..ff5b73348 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -318,9 +318,9 @@ section (ident, classes, kvs) lvl = do let lvl' = if hasChapters then lvl + 1 else lvl skipopts contents <- grouped inline - lab <- option ident $ try $ spaces >> controlSeq "label" >> - spaces >> braced - return $ headerWith (lab, classes, kvs) lvl' contents + lab <- option ident $ try (spaces >> controlSeq "label" >> spaces >> braced) + attr' <- registerHeader (lab, classes, kvs) contents + return $ headerWith attr' lvl' contents inlineCommand :: LP Inlines inlineCommand = try $ do diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index df0a8294d..32893128a 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -275,7 +275,8 @@ doubleHeader = try $ do Just ind -> (headerTable, ind + 1) Nothing -> (headerTable ++ [DoubleHeader c], (length headerTable) + 1) setState (state { stateHeaderTable = headerTable' }) - return $ B.header level txt + attr <- registerHeader nullAttr txt + return $ B.headerWith attr level txt -- a header with line on the bottom only singleHeader :: RSTParser Blocks @@ -295,7 +296,8 @@ singleHeader = try $ do Just ind -> (headerTable, ind + 1) Nothing -> (headerTable ++ [SingleHeader c], (length headerTable) + 1) setState (state { stateHeaderTable = headerTable' }) - return $ B.header level txt + attr <- registerHeader nullAttr txt + return $ B.headerWith attr level txt -- -- hrule block diff --git a/tests/Tests/Readers/LaTeX.hs b/tests/Tests/Readers/LaTeX.hs index dff6e4537..c1efd1b68 100644 --- a/tests/Tests/Readers/LaTeX.hs +++ b/tests/Tests/Readers/LaTeX.hs @@ -28,17 +28,17 @@ tests = [ testGroup "basic" , testGroup "headers" [ "level 1" =: - "\\section{header}" =?> header 1 "header" + "\\section{header}" =?> headerWith ("header",[],[]) 1 "header" , "level 2" =: - "\\subsection{header}" =?> header 2 "header" + "\\subsection{header}" =?> headerWith ("header",[],[]) 2 "header" , "level 3" =: - "\\subsubsection{header}" =?> header 3 "header" + "\\subsubsection{header}" =?> headerWith ("header",[],[]) 3 "header" , "emph" =: "\\section{text \\emph{emph}}" =?> - header 1 ("text" <> space <> emph "emph") + headerWith ("text-emph",[],[]) 1 ("text" <> space <> emph "emph") , "link" =: "\\section{text \\href{/url}{link}}" =?> - header 1 ("text" <> space <> link "/url" "" "link") + headerWith ("text-link",[],[]) 1 ("text" <> space <> link "/url" "" "link") ] , testGroup "math" diff --git a/tests/latex-reader.native b/tests/latex-reader.native index 23e600000..15b667b2f 100644 --- a/tests/latex-reader.native +++ b/tests/latex-reader.native @@ -2,25 +2,25 @@ Pandoc (Meta {unMeta = fromList [("authors",MetaList [MetaInlines [Str "John",Sp [RawBlock (Format "latex") "\\maketitle" ,Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "set",Space,Str "of",Space,Str "tests",Space,Str "for",Space,Str "pandoc.",Space,Str "Most",Space,Str "of",Space,Str "them",Space,Str "are",Space,Str "adapted",Space,Str "from",Space,Str "John",Space,Str "Gruber\8217s",Space,Str "markdown",Space,Str "test",Space,Str "suite."] ,HorizontalRule -,Header 1 ("",[],[]) [Str "Headers"] -,Header 2 ("",[],[]) [Str "Level",Space,Str "2",Space,Str "with",Space,Str "an",Space,Link [Str "embedded",Space,Str "link"] ("/url","")] -,Header 3 ("",[],[]) [Str "Level",Space,Str "3",Space,Str "with",Space,Emph [Str "emphasis"]] +,Header 1 ("headers",[],[]) [Str "Headers"] +,Header 2 ("level-2-with-an-embedded-link",[],[]) [Str "Level",Space,Str "2",Space,Str "with",Space,Str "an",Space,Link [Str "embedded",Space,Str "link"] ("/url","")] +,Header 3 ("level-3-with-emphasis",[],[]) [Str "Level",Space,Str "3",Space,Str "with",Space,Emph [Str "emphasis"]] ,Para [Str "Level",Space,Str "4"] ,Para [Str "Level",Space,Str "5"] -,Header 1 ("",[],[]) [Str "Level",Space,Str "1"] -,Header 2 ("",[],[]) [Str "Level",Space,Str "2",Space,Str "with",Space,Emph [Str "emphasis"]] -,Header 3 ("",[],[]) [Str "Level",Space,Str "3"] +,Header 1 ("level-1",[],[]) [Str "Level",Space,Str "1"] +,Header 2 ("level-2-with-emphasis",[],[]) [Str "Level",Space,Str "2",Space,Str "with",Space,Emph [Str "emphasis"]] +,Header 3 ("level-3",[],[]) [Str "Level",Space,Str "3"] ,Para [Str "with",Space,Str "no",Space,Str "blank",Space,Str "line"] -,Header 2 ("",[],[]) [Str "Level",Space,Str "2"] +,Header 2 ("level-2",[],[]) [Str "Level",Space,Str "2"] ,Para [Str "with",Space,Str "no",Space,Str "blank",Space,Str "line"] ,HorizontalRule -,Header 1 ("",[],[]) [Str "Paragraphs"] +,Header 1 ("paragraphs",[],[]) [Str "Paragraphs"] ,Para [Str "Here\8217s",Space,Str "a",Space,Str "regular",Space,Str "paragraph."] ,Para [Str "In",Space,Str "Markdown",Space,Str "1.0.0",Space,Str "and",Space,Str "earlier.",Space,Str "Version",Space,Str "8.",Space,Str "This",Space,Str "line",Space,Str "turns",Space,Str "into",Space,Str "a",Space,Str "list",Space,Str "item.",Space,Str "Because",Space,Str "a",Space,Str "hard-wrapped",Space,Str "line",Space,Str "in",Space,Str "the",Space,Str "middle",Space,Str "of",Space,Str "a",Space,Str "paragraph",Space,Str "looked",Space,Str "like",Space,Str "a",Space,Str "list",Space,Str "item."] ,Para [Str "Here\8217s",Space,Str "one",Space,Str "with",Space,Str "a",Space,Str "bullet.",Space,Str "*",Space,Str "criminey."] ,Para [Str "There",Space,Str "should",Space,Str "be",Space,Str "a",Space,Str "hard",Space,Str "line",Space,Str "break",LineBreak,Str "here."] ,HorizontalRule -,Header 1 ("",[],[]) [Str "Block",Space,Str "Quotes"] +,Header 1 ("block-quotes",[],[]) [Str "Block",Space,Str "Quotes"] ,Para [Str "E-mail",Space,Str "style:"] ,BlockQuote [Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "block",Space,Str "quote.",Space,Str "It",Space,Str "is",Space,Str "pretty",Space,Str "short."]] @@ -52,15 +52,15 @@ Pandoc (Meta {unMeta = fromList [("authors",MetaList [MetaInlines [Str "John",Sp [Para [Str "Don\8217t",Space,Str "quote",Space,Str "me."]]] ,Para [Str "And",Space,Str "a",Space,Str "following",Space,Str "paragraph."] ,HorizontalRule -,Header 1 ("",[],[]) [Str "Code",Space,Str "Blocks"] +,Header 1 ("code-blocks",[],[]) [Str "Code",Space,Str "Blocks"] ,Para [Str "Code:"] ,CodeBlock ("",[],[]) "---- (should be four hyphens)\n\nsub status {\n print \"working\";\n}\n\nthis code block is indented by one tab" ,Para [Str "And:"] ,CodeBlock ("",[],[]) " this code block is indented by two tabs\n\nThese should not be escaped: \\$ \\\\ \\> \\[ \\{" ,Para [Str "this",Space,Str "has",Space,Emph [Str "two",LineBreak,Str "lines"]] ,HorizontalRule -,Header 1 ("",[],[]) [Str "Lists"] -,Header 2 ("",[],[]) [Str "Unordered"] +,Header 1 ("lists",[],[]) [Str "Lists"] +,Header 2 ("unordered",[],[]) [Str "Unordered"] ,Para [Str "Asterisks",Space,Str "tight:"] ,BulletList [[Para [Str "asterisk",Space,Str "1"]] @@ -91,7 +91,7 @@ Pandoc (Meta {unMeta = fromList [("authors",MetaList [MetaInlines [Str "John",Sp [[Para [Str "Minus",Space,Str "1"]] ,[Para [Str "Minus",Space,Str "2"]] ,[Para [Str "Minus",Space,Str "3"]]] -,Header 2 ("",[],[]) [Str "Ordered"] +,Header 2 ("ordered",[],[]) [Str "Ordered"] ,Para [Str "Tight:"] ,OrderedList (1,Decimal,Period) [[Para [Str "First"]] @@ -118,7 +118,7 @@ Pandoc (Meta {unMeta = fromList [("authors",MetaList [MetaInlines [Str "John",Sp ,Para [Str "Item",Space,Str "1.",Space,Str "graf",Space,Str "two.",Space,Str "The",Space,Str "quick",Space,Str "brown",Space,Str "fox",Space,Str "jumped",Space,Str "over",Space,Str "the",Space,Str "lazy",Space,Str "dog\8217s",Space,Str "back."]] ,[Para [Str "Item",Space,Str "2."]] ,[Para [Str "Item",Space,Str "3."]]] -,Header 2 ("",[],[]) [Str "Nested"] +,Header 2 ("nested",[],[]) [Str "Nested"] ,BulletList [[Para [Str "Tab"] ,BulletList @@ -143,14 +143,14 @@ Pandoc (Meta {unMeta = fromList [("authors",MetaList [MetaInlines [Str "John",Sp ,[Para [Str "Fie"]] ,[Para [Str "Foe"]]]] ,[Para [Str "Third"]]] -,Header 2 ("",[],[]) [Str "Tabs",Space,Str "and",Space,Str "spaces"] +,Header 2 ("tabs-and-spaces",[],[]) [Str "Tabs",Space,Str "and",Space,Str "spaces"] ,BulletList [[Para [Str "this",Space,Str "is",Space,Str "a",Space,Str "list",Space,Str "item",Space,Str "indented",Space,Str "with",Space,Str "tabs"]] ,[Para [Str "this",Space,Str "is",Space,Str "a",Space,Str "list",Space,Str "item",Space,Str "indented",Space,Str "with",Space,Str "spaces"] ,BulletList [[Para [Str "this",Space,Str "is",Space,Str "an",Space,Str "example",Space,Str "list",Space,Str "item",Space,Str "indented",Space,Str "with",Space,Str "tabs"]] ,[Para [Str "this",Space,Str "is",Space,Str "an",Space,Str "example",Space,Str "list",Space,Str "item",Space,Str "indented",Space,Str "with",Space,Str "spaces"]]]]] -,Header 2 ("",[],[]) [Str "Fancy",Space,Str "list",Space,Str "markers"] +,Header 2 ("fancy-list-markers",[],[]) [Str "Fancy",Space,Str "list",Space,Str "markers"] ,OrderedList (2,Decimal,TwoParens) [[Para [Str "begins",Space,Str "with",Space,Str "2"]] ,[Para [Str "and",Space,Str "now",Space,Str "3"] @@ -180,7 +180,7 @@ Pandoc (Meta {unMeta = fromList [("authors",MetaList [MetaInlines [Str "John",Sp ,Para [Str "M.A.",Space,Str "2007"] ,Para [Str "B.",Space,Str "Williams"] ,HorizontalRule -,Header 1 ("",[],[]) [Str "Definition",Space,Str "Lists"] +,Header 1 ("definition-lists",[],[]) [Str "Definition",Space,Str "Lists"] ,Para [Str "Tight",Space,Str "using",Space,Str "spaces:"] ,DefinitionList [([Str "apple"], @@ -215,7 +215,7 @@ Pandoc (Meta {unMeta = fromList [("authors",MetaList [MetaInlines [Str "John",Sp ,CodeBlock ("",[],[]) "{ orange code block }" ,BlockQuote [Para [Str "orange",Space,Str "block",Space,Str "quote"]]]])] -,Header 1 ("",[],[]) [Str "HTML",Space,Str "Blocks"] +,Header 1 ("html-blocks",[],[]) [Str "HTML",Space,Str "Blocks"] ,Para [Str "Simple",Space,Str "block",Space,Str "on",Space,Str "one",Space,Str "line:"] ,Para [Str "foo",Space,Str "And",Space,Str "nested",Space,Str "without",Space,Str "indentation:"] ,Para [Str "foo",Space,Str "bar",Space,Str "Interpreted",Space,Str "markdown",Space,Str "in",Space,Str "a",Space,Str "table:"] @@ -234,7 +234,7 @@ Pandoc (Meta {unMeta = fromList [("authors",MetaList [MetaInlines [Str "John",Sp ,CodeBlock ("",[],[]) "
" ,Para [Str "Hr\8217s:"] ,HorizontalRule -,Header 1 ("",[],[]) [Str "Inline",Space,Str "Markup"] +,Header 1 ("inline-markup",[],[]) [Str "Inline",Space,Str "Markup"] ,Para [Str "This",Space,Str "is",Space,Emph [Str "emphasized"],Str ",",Space,Str "and",Space,Str "so",Space,Emph [Str "is",Space,Str "this"],Str "."] ,Para [Str "This",Space,Str "is",Space,Strong [Str "strong"],Str ",",Space,Str "and",Space,Str "so",Space,Strong [Str "is",Space,Str "this"],Str "."] ,Para [Str "An",Space,Emph [Link [Str "emphasized",Space,Str "link"] ("/url","")],Str "."] @@ -248,7 +248,7 @@ Pandoc (Meta {unMeta = fromList [("authors",MetaList [MetaInlines [Str "John",Sp ,Para [Str "Subscripts:",Space,Str "H",Subscript [Str "2"],Str "O,",Space,Str "H",Subscript [Str "23"],Str "O,",Space,Str "H",Subscript [Str "many",Space,Str "of",Space,Str "them"],Str "O."] ,Para [Str "These",Space,Str "should",Space,Str "not",Space,Str "be",Space,Str "superscripts",Space,Str "or",Space,Str "subscripts,",Space,Str "because",Space,Str "of",Space,Str "the",Space,Str "unescaped",Space,Str "spaces:",Space,Str "a^b",Space,Str "c^d,",Space,Str "a",Math InlineMath "\\sim",Str "b",Space,Str "c",Math InlineMath "\\sim",Str "d."] ,HorizontalRule -,Header 1 ("",[],[]) [Str "Smart",Space,Str "quotes,",Space,Str "ellipses,",Space,Str "dashes"] +,Header 1 ("smart-quotes-ellipses-dashes",[],[]) [Str "Smart",Space,Str "quotes,",Space,Str "ellipses,",Space,Str "dashes"] ,Para [Quoted DoubleQuote [Str "Hello,"],Space,Str "said",Space,Str "the",Space,Str "spider.",Space,Quoted DoubleQuote [Quoted SingleQuote [Str "Shelob"],Space,Str "is",Space,Str "my",Space,Str "name."]] ,Para [Quoted SingleQuote [Str "A"],Str ",",Space,Quoted SingleQuote [Str "B"],Str ",",Space,Str "and",Space,Quoted SingleQuote [Str "C"],Space,Str "are",Space,Str "letters."] ,Para [Quoted SingleQuote [Str "Oak,"],Space,Quoted SingleQuote [Str "elm,"],Space,Str "and",Space,Quoted SingleQuote [Str "beech"],Space,Str "are",Space,Str "names",Space,Str "of",Space,Str "trees.",Space,Str "So",Space,Str "is",Space,Quoted SingleQuote [Str "pine."]] @@ -258,7 +258,7 @@ Pandoc (Meta {unMeta = fromList [("authors",MetaList [MetaInlines [Str "John",Sp ,Para [Str "Dashes",Space,Str "between",Space,Str "numbers:",Space,Str "5\8211\&7,",Space,Str "255\8211\&66,",Space,Str "1987\8211\&1999."] ,Para [Str "Ellipses\8230and\8230and\8230."] ,HorizontalRule -,Header 1 ("",[],[]) [Str "LaTeX"] +,Header 1 ("latex",[],[]) [Str "LaTeX"] ,BulletList [[Para [Cite [Citation {citationId = "smith.1899", citationPrefix = [], citationSuffix = [Str "22-23"], citationMode = AuthorInText, citationNoteNum = 0, citationHash = 0}] [RawInline (Format "latex") "\\cite[22-23]{smith.1899}"]]] ,[Para [RawInline (Format "latex") "\\doublespacing"]] @@ -288,7 +288,7 @@ Pandoc (Meta {unMeta = fromList [("authors",MetaList [MetaInlines [Str "John",Sp [[[Plain [Str "Animal"]]] ,[[Plain [Str "Vegetable"]]]] ,HorizontalRule -,Header 1 ("",[],[]) [Str "Special",Space,Str "Characters"] +,Header 1 ("special-characters",[],[]) [Str "Special",Space,Str "Characters"] ,Para [Str "Here",Space,Str "is",Space,Str "some",Space,Str "unicode:"] ,BulletList [[Para [Str "I",Space,Str "hat:",Space,Str "\206"]] @@ -318,8 +318,8 @@ Pandoc (Meta {unMeta = fromList [("authors",MetaList [MetaInlines [Str "John",Sp ,Para [Str "Plus:",Space,Str "+"] ,Para [Str "Minus:",Space,Str "-"] ,HorizontalRule -,Header 1 ("",[],[]) [Str "Links"] -,Header 2 ("",[],[]) [Str "Explicit"] +,Header 1 ("links",[],[]) [Str "Links"] +,Header 2 ("explicit",[],[]) [Str "Explicit"] ,Para [Str "Just",Space,Str "a",Space,Link [Str "URL"] ("/url/",""),Str "."] ,Para [Link [Str "URL",Space,Str "and",Space,Str "title"] ("/url/",""),Str "."] ,Para [Link [Str "URL",Space,Str "and",Space,Str "title"] ("/url/",""),Str "."] @@ -329,7 +329,7 @@ Pandoc (Meta {unMeta = fromList [("authors",MetaList [MetaInlines [Str "John",Sp ,Para [Link [Str "with_underscore"] ("/url/with_underscore","")] ,Para [Link [Str "Email",Space,Str "link"] ("mailto:nobody@nowhere.net","")] ,Para [Link [Str "Empty"] ("",""),Str "."] -,Header 2 ("",[],[]) [Str "Reference"] +,Header 2 ("reference",[],[]) [Str "Reference"] ,Para [Str "Foo",Space,Link [Str "bar"] ("/url/",""),Str "."] ,Para [Str "Foo",Space,Link [Str "bar"] ("/url/",""),Str "."] ,Para [Str "Foo",Space,Link [Str "bar"] ("/url/",""),Str "."] @@ -342,12 +342,12 @@ Pandoc (Meta {unMeta = fromList [("authors",MetaList [MetaInlines [Str "John",Sp ,CodeBlock ("",[],[]) "[not]: /url" ,Para [Str "Foo",Space,Link [Str "bar"] ("/url/",""),Str "."] ,Para [Str "Foo",Space,Link [Str "biz"] ("/url/",""),Str "."] -,Header 2 ("",[],[]) [Str "With",Space,Str "ampersands"] +,Header 2 ("with-ampersands",[],[]) [Str "With",Space,Str "ampersands"] ,Para [Str "Here\8217s",Space,Str "a",Space,Link [Str "link",Space,Str "with",Space,Str "an",Space,Str "ampersand",Space,Str "in",Space,Str "the",Space,Str "URL"] ("http://example.com/?foo=1&bar=2",""),Str "."] ,Para [Str "Here\8217s",Space,Str "a",Space,Str "link",Space,Str "with",Space,Str "an",Space,Str "amersand",Space,Str "in",Space,Str "the",Space,Str "link",Space,Str "text:",Space,Link [Str "AT&T"] ("http://att.com/",""),Str "."] ,Para [Str "Here\8217s",Space,Str "an",Space,Link [Str "inline",Space,Str "link"] ("/script?foo=1&bar=2",""),Str "."] ,Para [Str "Here\8217s",Space,Str "an",Space,Link [Str "inline",Space,Str "link",Space,Str "in",Space,Str "pointy",Space,Str "braces"] ("/script?foo=1&bar=2",""),Str "."] -,Header 2 ("",[],[]) [Str "Autolinks"] +,Header 2 ("autolinks",[],[]) [Str "Autolinks"] ,Para [Str "With",Space,Str "an",Space,Str "ampersand:",Space,Link [Str "http://example.com/?foo=1&bar=2"] ("http://example.com/?foo=1&bar=2","")] ,BulletList [[Para [Str "In",Space,Str "a",Space,Str "list?"]] @@ -359,17 +359,17 @@ Pandoc (Meta {unMeta = fromList [("authors",MetaList [MetaInlines [Str "John",Sp ,Para [Str "Auto-links",Space,Str "should",Space,Str "not",Space,Str "occur",Space,Str "here:",Space,Code ("",[],[]) ""] ,CodeBlock ("",[],[]) "or here: " ,HorizontalRule -,Header 1 ("",[],[]) [Str "Images"] +,Header 1 ("images",[],[]) [Str "Images"] ,Para [Str "From",Space,Quoted DoubleQuote [Str "Voyage",Space,Str "dans",Space,Str "la",Space,Str "Lune"],Space,Str "by",Space,Str "Georges",Space,Str "Melies",Space,Str "(1902):"] ,Para [Image [Str "image"] ("lalune.jpg","")] ,Para [Str "Here",Space,Str "is",Space,Str "a",Space,Str "movie",Space,Image [Str "image"] ("movie.jpg",""),Space,Str "icon."] ,HorizontalRule -,Header 1 ("",[],[]) [Str "Footnotes"] +,Header 1 ("footnotes",[],[]) [Str "Footnotes"] ,Para [Str "Here",Space,Str "is",Space,Str "a",Space,Str "footnote",Space,Str "reference,",Note [Para [Str "Here",Space,Str "is",Space,Str "the",Space,Str "footnote.",Space,Str "It",Space,Str "can",Space,Str "go",Space,Str "anywhere",Space,Str "after",Space,Str "the",Space,Str "footnote",Space,Str "reference.",Space,Str "It",Space,Str "need",Space,Str "not",Space,Str "be",Space,Str "placed",Space,Str "at",Space,Str "the",Space,Str "end",Space,Str "of",Space,Str "the",Space,Str "document."]],Space,Str "and",Space,Str "another.",Note [Para [Str "Here\8217s",Space,Str "the",Space,Str "long",Space,Str "note.",Space,Str "This",Space,Str "one",Space,Str "contains",Space,Str "multiple",Space,Str "blocks."],Para [Str "Subsequent",Space,Str "blocks",Space,Str "are",Space,Str "indented",Space,Str "to",Space,Str "show",Space,Str "that",Space,Str "they",Space,Str "belong",Space,Str "to",Space,Str "the",Space,Str "footnote",Space,Str "(as",Space,Str "with",Space,Str "list",Space,Str "items)."],CodeBlock ("",[],[]) " { }",Para [Str "If",Space,Str "you",Space,Str "want,",Space,Str "you",Space,Str "can",Space,Str "indent",Space,Str "every",Space,Str "line,",Space,Str "but",Space,Str "you",Space,Str "can",Space,Str "also",Space,Str "be",Space,Str "lazy",Space,Str "and",Space,Str "just",Space,Str "indent",Space,Str "the",Space,Str "first",Space,Str "line",Space,Str "of",Space,Str "each",Space,Str "block."]],Space,Str "This",Space,Str "should",Space,Emph [Str "not"],Space,Str "be",Space,Str "a",Space,Str "footnote",Space,Str "reference,",Space,Str "because",Space,Str "it",Space,Str "contains",Space,Str "a",Space,Str "space.[^my",Space,Str "note]",Space,Str "Here",Space,Str "is",Space,Str "an",Space,Str "inline",Space,Str "note.",Note [Para [Str "This",Space,Str "is",Space,Emph [Str "easier"],Space,Str "to",Space,Str "type.",Space,Str "Inline",Space,Str "notes",Space,Str "may",Space,Str "contain",Space,Link [Str "links"] ("http://google.com",""),Space,Str "and",Space,Code ("",[],[]) "]",Space,Str "verbatim",Space,Str "characters,",Space,Str "as",Space,Str "well",Space,Str "as",Space,Str "[bracketed",Space,Str "text]."]]] ,BlockQuote [Para [Str "Notes",Space,Str "can",Space,Str "go",Space,Str "in",Space,Str "quotes.",Note [Para [Str "In",Space,Str "quote."]]]] ,OrderedList (1,Decimal,Period) [[Para [Str "And",Space,Str "in",Space,Str "list",Space,Str "items.",Note [Para [Str "In",Space,Str "list."]]]]] ,Para [Str "This",Space,Str "paragraph",Space,Str "should",Space,Str "not",Space,Str "be",Space,Str "part",Space,Str "of",Space,Str "the",Space,Str "note,",Space,Str "as",Space,Str "it",Space,Str "is",Space,Str "not",Space,Str "indented."] -,Header 1 ("",[],[]) [Str "Escaped",Space,Str "characters"] +,Header 1 ("escaped-characters",[],[]) [Str "Escaped",Space,Str "characters"] ,Para [Str "$",Space,Str "%",Space,Str "&",Space,Str "#",Space,Str "_",Space,Str "{",Space,Str "}"]] diff --git a/tests/lhs-test.html b/tests/lhs-test.html index 6fc51b1e9..bde505a1e 100644 --- a/tests/lhs-test.html +++ b/tests/lhs-test.html @@ -27,7 +27,7 @@ code > span.er { color: #ff0000; font-weight: bold; } -

lhs test

+

lhs test

unsplit is an arrow that takes a pair of values and combines them to return a single value:

unsplit :: (Arrow a) => (b -> c -> d) -> a (b, c) d
 unsplit = arr . uncurry
diff --git a/tests/lhs-test.html+lhs b/tests/lhs-test.html+lhs
index bc0935bd1..fcdcad303 100644
--- a/tests/lhs-test.html+lhs
+++ b/tests/lhs-test.html+lhs
@@ -27,7 +27,7 @@ code > span.er { color: #ff0000; font-weight: bold; }
   
 
 
-

lhs test

+

lhs test

unsplit is an arrow that takes a pair of values and combines them to return a single value:

> unsplit :: (Arrow a) => (b -> c -> d) -> a (b, c) d
 > unsplit = arr . uncurry
diff --git a/tests/lhs-test.latex b/tests/lhs-test.latex
index 0bfdec6a5..51c62f98a 100644
--- a/tests/lhs-test.latex
+++ b/tests/lhs-test.latex
@@ -68,7 +68,7 @@
 
 \begin{document}
 
-\section{lhs test}
+\section{lhs test}\label{lhs-test}
 
 \texttt{unsplit} is an arrow that takes a pair of values and combines them to
 return a single value:
diff --git a/tests/lhs-test.latex+lhs b/tests/lhs-test.latex+lhs
index ce91b37e1..606d49a12 100644
--- a/tests/lhs-test.latex+lhs
+++ b/tests/lhs-test.latex+lhs
@@ -49,7 +49,7 @@
 
 \begin{document}
 
-\section{lhs test}
+\section{lhs test}\label{lhs-test}
 
 \texttt{unsplit} is an arrow that takes a pair of values and combines them to
 return a single value:
diff --git a/tests/lhs-test.native b/tests/lhs-test.native
index 3a22d1f8a..63037d9e3 100644
--- a/tests/lhs-test.native
+++ b/tests/lhs-test.native
@@ -1,4 +1,4 @@
-[Header 1 ("",[],[]) [Str "lhs",Space,Str "test"]
+[Header 1 ("lhs-test",[],[]) [Str "lhs",Space,Str "test"]
 ,Para [Code ("",[],[]) "unsplit",Space,Str "is",Space,Str "an",Space,Str "arrow",Space,Str "that",Space,Str "takes",Space,Str "a",Space,Str "pair",Space,Str "of",Space,Str "values",Space,Str "and",Space,Str "combines",Space,Str "them",Space,Str "to",Space,Str "return",Space,Str "a",Space,Str "single",Space,Str "value:"]
 ,CodeBlock ("",["sourceCode","literate","haskell"],[]) "unsplit :: (Arrow a) => (b -> c -> d) -> a (b, c) d\nunsplit = arr . uncurry\n          -- arr (\\op (x,y) -> x `op` y)"
 ,Para [Code ("",[],[]) "(***)",Space,Str "combines",Space,Str "two",Space,Str "arrows",Space,Str "into",Space,Str "a",Space,Str "new",Space,Str "arrow",Space,Str "by",Space,Str "running",Space,Str "the",Space,Str "two",Space,Str "arrows",Space,Str "on",Space,Str "a",Space,Str "pair",Space,Str "of",Space,Str "values",Space,Str "(one",Space,Str "arrow",Space,Str "on",Space,Str "the",Space,Str "first",Space,Str "item",Space,Str "of",Space,Str "the",Space,Str "pair",Space,Str "and",Space,Str "one",Space,Str "arrow",Space,Str "on",Space,Str "the",Space,Str "second",Space,Str "item",Space,Str "of",Space,Str "the",Space,Str "pair)."]
diff --git a/tests/rst-reader.native b/tests/rst-reader.native
index abceaaab7..497810f39 100644
--- a/tests/rst-reader.native
+++ b/tests/rst-reader.native
@@ -1,11 +1,11 @@
 Pandoc (Meta {unMeta = fromList [("authors",MetaList [MetaInlines [Str "John",Space,Str "MacFarlane"],MetaInlines [Str "Anonymous"]]),("date",MetaInlines [Str "July",Space,Str "17,",Space,Str "2006"]),("revision",MetaBlocks [Para [Str "3"]]),("subtitle",MetaInlines [Str "Subtitle"]),("title",MetaInlines [Str "Pandoc",Space,Str "Test",Space,Str "Suite"])]})
-[Header 1 ("",[],[]) [Str "Level",Space,Str "one",Space,Str "header"]
+[Header 1 ("level-one-header",[],[]) [Str "Level",Space,Str "one",Space,Str "header"]
 ,Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "set",Space,Str "of",Space,Str "tests",Space,Str "for",Space,Str "pandoc.",Space,Str "Most",Space,Str "of",Space,Str "them",Space,Str "are",Space,Str "adapted",Space,Str "from",Space,Str "John",Space,Str "Gruber\8217s",Space,Str "markdown",Space,Str "test",Space,Str "suite."]
-,Header 2 ("",[],[]) [Str "Level",Space,Str "two",Space,Str "header"]
-,Header 3 ("",[],[]) [Str "Level",Space,Str "three"]
-,Header 4 ("",[],[]) [Str "Level",Space,Str "four",Space,Str "with",Space,Emph [Str "emphasis"]]
-,Header 5 ("",[],[]) [Str "Level",Space,Str "five"]
-,Header 1 ("",[],[]) [Str "Paragraphs"]
+,Header 2 ("level-two-header",[],[]) [Str "Level",Space,Str "two",Space,Str "header"]
+,Header 3 ("level-three",[],[]) [Str "Level",Space,Str "three"]
+,Header 4 ("level-four-with-emphasis",[],[]) [Str "Level",Space,Str "four",Space,Str "with",Space,Emph [Str "emphasis"]]
+,Header 5 ("level-five",[],[]) [Str "Level",Space,Str "five"]
+,Header 1 ("paragraphs",[],[]) [Str "Paragraphs"]
 ,Para [Str "Here\8217s",Space,Str "a",Space,Str "regular",Space,Str "paragraph."]
 ,Para [Str "In",Space,Str "Markdown",Space,Str "1.0.0",Space,Str "and",Space,Str "earlier.",Space,Str "Version",Space,Str "8.",Space,Str "This",Space,Str "line",Space,Str "turns",Space,Str "into",Space,Str "a",Space,Str "list",Space,Str "item.",Space,Str "Because",Space,Str "a",Space,Str "hard-wrapped",Space,Str "line",Space,Str "in",Space,Str "the",Space,Str "middle",Space,Str "of",Space,Str "a",Space,Str "paragraph",Space,Str "looked",Space,Str "like",Space,Str "a",Space,Str "list",Space,Str "item."]
 ,Para [Str "Here\8217s",Space,Str "one",Space,Str "with",Space,Str "a",Space,Str "bullet.",Space,Str "*",Space,Str "criminey."]
@@ -13,7 +13,7 @@ Pandoc (Meta {unMeta = fromList [("authors",MetaList [MetaInlines [Str "John",Sp
 ,HorizontalRule
 ,Para [Str "Another:"]
 ,HorizontalRule
-,Header 1 ("",[],[]) [Str "Block",Space,Str "Quotes"]
+,Header 1 ("block-quotes",[],[]) [Str "Block",Space,Str "Quotes"]
 ,Para [Str "Here\8217s",Space,Str "a",Space,Str "block",Space,Str "quote:"]
 ,BlockQuote
  [Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "block",Space,Str "quote.",Space,Str "It",Space,Str "is",Space,Str "pretty",Space,Str "short."]]
@@ -31,7 +31,7 @@ Pandoc (Meta {unMeta = fromList [("authors",MetaList [MetaInlines [Str "John",Sp
   [Para [Str "nested"]
   ,BlockQuote
    [Para [Str "nested"]]]]
-,Header 1 ("",[],[]) [Str "Code",Space,Str "Blocks"]
+,Header 1 ("code-blocks",[],[]) [Str "Code",Space,Str "Blocks"]
 ,Para [Str "Code:"]
 ,CodeBlock ("",[],[]) "---- (should be four hyphens)\n\nsub status {\n    print \"working\";\n}"
 ,CodeBlock ("",[],[]) "this code block is indented by one tab"
@@ -39,8 +39,8 @@ Pandoc (Meta {unMeta = fromList [("authors",MetaList [MetaInlines [Str "John",Sp
 ,CodeBlock ("",[],[]) "this block is indented by two tabs\n\nThese should not be escaped:  \\$ \\\\ \\> \\[ \\{"
 ,Para [Str "And:"]
 ,CodeBlock ("",["sourceCode","python"],[]) "def my_function(x):\n    return x + 1"
-,Header 1 ("",[],[]) [Str "Lists"]
-,Header 2 ("",[],[]) [Str "Unordered"]
+,Header 1 ("lists",[],[]) [Str "Lists"]
+,Header 2 ("unordered",[],[]) [Str "Unordered"]
 ,Para [Str "Asterisks",Space,Str "tight:"]
 ,BulletList
  [[Plain [Str "asterisk",Space,Str "1"]]
@@ -71,7 +71,7 @@ Pandoc (Meta {unMeta = fromList [("authors",MetaList [MetaInlines [Str "John",Sp
  [[Plain [Str "Minus",Space,Str "1"]]
  ,[Plain [Str "Minus",Space,Str "2"]]
  ,[Plain [Str "Minus",Space,Str "3"]]]
-,Header 2 ("",[],[]) [Str "Ordered"]
+,Header 2 ("ordered",[],[]) [Str "Ordered"]
 ,Para [Str "Tight:"]
 ,OrderedList (1,Decimal,Period)
  [[Plain [Str "First"]]
@@ -115,7 +115,7 @@ Pandoc (Meta {unMeta = fromList [("authors",MetaList [MetaInlines [Str "John",Sp
     ,[Plain [Str "Fie"]]
     ,[Plain [Str "Foe"]]]]]
  ,[Plain [Str "Third"]]]
-,Header 2 ("",[],[]) [Str "Fancy",Space,Str "list",Space,Str "markers"]
+,Header 2 ("fancy-list-markers",[],[]) [Str "Fancy",Space,Str "list",Space,Str "markers"]
 ,OrderedList (2,Decimal,TwoParens)
  [[Plain [Str "begins",Space,Str "with",Space,Str "2"]]
  ,[Para [Str "and",Space,Str "now",Space,Str "3"]
@@ -145,7 +145,7 @@ Pandoc (Meta {unMeta = fromList [("authors",MetaList [MetaInlines [Str "John",Sp
 ,OrderedList (4,LowerAlpha,TwoParens)
  [[Plain [Str "item",Space,Str "1"]]
  ,[Plain [Str "item",Space,Str "2"]]]
-,Header 2 ("",[],[]) [Str "Definition"]
+,Header 2 ("definition",[],[]) [Str "Definition"]
 ,DefinitionList
  [([Str "term",Space,Str "1"],
    [[Para [Str "Definition",Space,Str "1."]]])
@@ -154,7 +154,7 @@ Pandoc (Meta {unMeta = fromList [("authors",MetaList [MetaInlines [Str "John",Sp
     ,Para [Str "Definition",Space,Str "2,",Space,Str "paragraph",Space,Str "2."]]])
  ,([Str "term",Space,Str "with",Space,Emph [Str "emphasis"]],
    [[Para [Str "Definition",Space,Str "3."]]])]
-,Header 1 ("",[],[]) [Str "Field",Space,Str "Lists"]
+,Header 1 ("field-lists",[],[]) [Str "Field",Space,Str "Lists"]
 ,BlockQuote
  [DefinitionList
   [([Str "address"],
@@ -170,18 +170,18 @@ Pandoc (Meta {unMeta = fromList [("authors",MetaList [MetaInlines [Str "John",Sp
    [[Para [Emph [Str "Nowhere"],Str ",",Space,Str "MA,",Space,Str "USA"]]])
  ,([Str "phone"],
    [[Para [Str "123-4567"]]])]
-,Header 1 ("",[],[]) [Str "HTML",Space,Str "Blocks"]
+,Header 1 ("html-blocks",[],[]) [Str "HTML",Space,Str "Blocks"]
 ,Para [Str "Simple",Space,Str "block",Space,Str "on",Space,Str "one",Space,Str "line:"]
 ,RawBlock (Format "html") "
foo
" ,Para [Str "Now,",Space,Str "nested:"] ,RawBlock (Format "html") "
\n
\n
\n foo\n
\n
\n
" -,Header 1 ("",[],[]) [Str "LaTeX",Space,Str "Block"] +,Header 1 ("latex-block",[],[]) [Str "LaTeX",Space,Str "Block"] ,RawBlock (Format "latex") "\\begin{tabular}{|l|l|}\\hline\nAnimal & Number \\\\ \\hline\nDog & 2 \\\\\nCat & 1 \\\\ \\hline\n\\end{tabular}" -,Header 1 ("",[],[]) [Str "Inline",Space,Str "Markup"] +,Header 1 ("inline-markup",[],[]) [Str "Inline",Space,Str "Markup"] ,Para [Str "This",Space,Str "is",Space,Emph [Str "emphasized"],Str ".",Space,Str "This",Space,Str "is",Space,Strong [Str "strong"],Str "."] ,Para [Str "This",Space,Str "is",Space,Str "code:",Space,Code ("",[],[]) ">",Str ",",Space,Code ("",[],[]) "$",Str ",",Space,Code ("",[],[]) "\\",Str ",",Space,Code ("",[],[]) "\\$",Str ",",Space,Code ("",[],[]) "",Str "."] ,Para [Str "This",Space,Str "is",Subscript [Str "subscripted"],Space,Str "and",Space,Str "this",Space,Str "is",Space,Superscript [Str "superscripted"],Str "."] -,Header 1 ("",[],[]) [Str "Special",Space,Str "Characters"] +,Header 1 ("special-characters",[],[]) [Str "Special",Space,Str "Characters"] ,Para [Str "Here",Space,Str "is",Space,Str "some",Space,Str "unicode:"] ,BulletList [[Plain [Str "I",Space,Str "hat:",Space,Str "\206"]] @@ -209,7 +209,7 @@ Pandoc (Meta {unMeta = fromList [("authors",MetaList [MetaInlines [Str "John",Sp ,Para [Str "Bang:",Space,Str "!"] ,Para [Str "Plus:",Space,Str "+"] ,Para [Str "Minus:",Space,Str "-"] -,Header 1 ("",[],[]) [Str "Links"] +,Header 1 ("links",[],[]) [Str "Links"] ,Para [Str "Explicit:",Space,Str "a",Space,Link [Str "URL"] ("/url/",""),Str "."] ,Para [Str "Two",Space,Str "anonymous",Space,Str "links:",Space,Link [Str "the",Space,Str "first"] ("/url1/",""),Space,Str "and",Space,Link [Str "the",Space,Str "second"] ("/url2/","")] ,Para [Str "Reference",Space,Str "links:",Space,Link [Str "link1"] ("/url1/",""),Space,Str "and",Space,Link [Str "link2"] ("/url2/",""),Space,Str "and",Space,Link [Str "link1"] ("/url1/",""),Space,Str "again."] @@ -218,20 +218,20 @@ Pandoc (Meta {unMeta = fromList [("authors",MetaList [MetaInlines [Str "John",Sp ,Para [Str "Autolinks:",Space,Link [Str "http://example.com/?foo=1&bar=2"] ("http://example.com/?foo=1&bar=2",""),Space,Str "and",Space,Link [Str "nobody@nowhere.net"] ("mailto:nobody@nowhere.net",""),Str "."] ,Para [Str "But",Space,Str "not",Space,Str "here:"] ,CodeBlock ("",[],[]) "http://example.com/" -,Header 1 ("",[],[]) [Str "Images"] +,Header 1 ("images",[],[]) [Str "Images"] ,Para [Str "From",Space,Quoted DoubleQuote [Str "Voyage",Space,Str "dans",Space,Str "la",Space,Str "Lune"],Space,Str "by",Space,Str "Georges",Space,Str "Melies",Space,Str "(1902):"] ,Para [Image [Str "image"] ("lalune.jpg","")] ,Para [Image [Str "Voyage dans la Lune"] ("lalune.jpg","")] ,Para [Str "Here",Space,Str "is",Space,Str "a",Space,Str "movie",Space,Image [Str "movie"] ("movie.jpg",""),Space,Str "icon."] ,Para [Str "And",Space,Str "an",Space,Link [Image [Str "A movie"] ("movie.jpg","")] ("/url",""),Str "."] -,Header 1 ("",[],[]) [Str "Comments"] +,Header 1 ("comments",[],[]) [Str "Comments"] ,Para [Str "First",Space,Str "paragraph"] ,Para [Str "Another",Space,Str "paragraph"] ,Para [Str "A",Space,Str "third",Space,Str "paragraph"] -,Header 1 ("",[],[]) [Str "Line",Space,Str "blocks"] +,Header 1 ("line-blocks",[],[]) [Str "Line",Space,Str "blocks"] ,Para [Str "But",Space,Str "can",Space,Str "a",Space,Str "bee",Space,Str "be",Space,Str "said",Space,Str "to",Space,Str "be",LineBreak,Str "\160\160\160\160or",Space,Str "not",Space,Str "to",Space,Str "be",Space,Str "an",Space,Str "entire",Space,Str "bee,",LineBreak,Str "\160\160\160\160\160\160\160\160when",Space,Str "half",Space,Str "the",Space,Str "bee",Space,Str "is",Space,Str "not",Space,Str "a",Space,Str "bee,",LineBreak,Str "\160\160\160\160\160\160\160\160\160\160\160\160due",Space,Str "to",Space,Str "some",Space,Str "ancient",Space,Str "injury?"] ,Para [Str "Continuation",Space,Str "line",LineBreak,Str "\160\160and",Space,Str "another"] -,Header 1 ("",[],[]) [Str "Simple",Space,Str "Tables"] +,Header 1 ("simple-tables",[],[]) [Str "Simple",Space,Str "Tables"] ,Table [] [AlignDefault,AlignDefault,AlignDefault] [0.0,0.0,0.0] [[Plain [Str "col",Space,Str "1"]] ,[Plain [Str "col",Space,Str "2"]] @@ -253,7 +253,7 @@ Pandoc (Meta {unMeta = fromList [("authors",MetaList [MetaInlines [Str "John",Sp ,[[Plain [Str "r2",Space,Str "d"]] ,[Plain [Str "e"]] ,[Plain [Str "f"]]]] -,Header 1 ("",[],[]) [Str "Grid",Space,Str "Tables"] +,Header 1 ("grid-tables",[],[]) [Str "Grid",Space,Str "Tables"] ,Table [] [AlignDefault,AlignDefault,AlignDefault] [0.2375,0.15,0.1625] [[Plain [Str "col",Space,Str "1"]] ,[Plain [Str "col",Space,Str "2"]] @@ -298,26 +298,26 @@ Pandoc (Meta {unMeta = fromList [("authors",MetaList [MetaInlines [Str "John",Sp ,[Plain [Str "b",Space,Str "2"]] ,[Plain [Str "b",Space,Str "2"]]]] ,[Plain [Str "c",Space,Str "c",Space,Str "2",Space,Str "c",Space,Str "2"]]]] -,Header 1 ("",[],[]) [Str "Footnotes"] +,Header 1 ("footnotes",[],[]) [Str "Footnotes"] ,Para [Note [Para [Str "Note",Space,Str "with",Space,Str "one",Space,Str "line."]]] ,Para [Note [Para [Str "Note",Space,Str "with",Space,Str "continuation",Space,Str "line."]]] ,Para [Note [Para [Str "Note",Space,Str "with"],Para [Str "continuation",Space,Str "block."]]] ,Para [Note [Para [Str "Note",Space,Str "with",Space,Str "continuation",Space,Str "line"],Para [Str "and",Space,Str "a",Space,Str "second",Space,Str "para."]]] ,Para [Str "Not",Space,Str "in",Space,Str "note."] -,Header 1 ("",[],[]) [Str "Math"] +,Header 1 ("math",[],[]) [Str "Math"] ,Para [Str "Some",Space,Str "inline",Space,Str "math",Space,Math InlineMath "E=mc^2",Str ".",Space,Str "Now",Space,Str "some",Space,Str "display",Space,Str "math:"] ,Para [Math DisplayMath "E=mc^2"] ,Para [Math DisplayMath "E = mc^2"] ,Para [Math DisplayMath "E = mc^2",Math DisplayMath "\\alpha = \\beta"] ,Para [Math DisplayMath "E &= mc^2\\\\\nF &= \\pi E",Math DisplayMath "F &= \\gamma \\alpha^2"] ,Para [Str "All",Space,Str "done."] -,Header 1 ("",[],[]) [Str "Default-Role"] +,Header 1 ("default-role",[],[]) [Str "Default-Role"] ,Para [Str "Try",Space,Str "changing",Space,Str "the",Space,Str "default",Space,Str "role",Space,Str "to",Space,Str "a",Space,Str "few",Space,Str "different",Space,Str "things."] -,Header 2 ("",[],[]) [Str "Doesn\8217t",Space,Str "Break",Space,Str "Title",Space,Str "Parsing"] +,Header 2 ("doesnt-break-title-parsing",[],[]) [Str "Doesn\8217t",Space,Str "Break",Space,Str "Title",Space,Str "Parsing"] ,Para [Str "Inline",Space,Str "math:",Space,Math InlineMath "E=mc^2",Space,Str "or",Space,Math InlineMath "E=mc^2",Space,Str "or",Space,Math InlineMath "E=mc^2",Str ".",Space,Str "Other",Space,Str "roles:",Space,Superscript [Str "super"],Str ",",Space,Subscript [Str "sub"],Str "."] ,Para [Math DisplayMath "\\alpha = beta",Math DisplayMath "E = mc^2"] ,Para [Str "Some",Space,Superscript [Str "of"],Space,Str "these",Space,Superscript [Str "words"],Space,Str "are",Space,Str "in",Space,Superscript [Str "superscript"],Str "."] ,Para [Str "Reset",Space,Str "default-role",Space,Str "to",Space,Str "the",Space,Str "default",Space,Str "default."] ,Para [Str "And",Space,Str "now",Space,Str "some-invalid-string-3231231",Space,Str "is",Space,Str "nonsense."] -,Header 2 ("",[],[]) [Str "Literal",Space,Str "symbols"] +,Header 2 ("literal-symbols",[],[]) [Str "Literal",Space,Str "symbols"] ,Para [Str "2*2",Space,Str "=",Space,Str "4*1"]] -- cgit v1.2.3 From 90c49b0aaed34ef1efb8e342d80f93cb477512a7 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 1 Sep 2013 09:22:55 -0700 Subject: Use registerHeader in Textile reader. This produces automatic header identifiers, unless `auto_identifiers` extension is disabled. Closes #967. --- src/Text/Pandoc/Readers/Textile.hs | 4 ++- tests/textile-reader.native | 58 +++++++++++++++++++------------------- 2 files changed, 32 insertions(+), 30 deletions(-) diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index 8ccd1e227..23e07f621 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -52,6 +52,7 @@ TODO : refactor common patterns across readers : module Text.Pandoc.Readers.Textile ( readTextile) where import Text.Pandoc.Definition +import qualified Text.Pandoc.Builder as B import Text.Pandoc.Shared import Text.Pandoc.Options import Text.Pandoc.Parsing @@ -179,7 +180,8 @@ header = try $ do char '.' whitespace name <- normalizeSpaces <$> manyTill inline blockBreak - return $ Header level attr name + attr' <- registerHeader attr (B.fromList name) + return $ Header level attr' name -- | Blockquote of the form "bq. content" blockQuote :: Parser [Char] ParserState Block diff --git a/tests/textile-reader.native b/tests/textile-reader.native index 7e709a505..ebfbc07fd 100644 --- a/tests/textile-reader.native +++ b/tests/textile-reader.native @@ -1,13 +1,13 @@ Pandoc (Meta {unMeta = fromList []}) [Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "set",Space,Str "of",Space,Str "tests",Space,Str "for",Space,Str "pandoc",Space,Str "Textile",Space,Str "Reader",Str ".",Space,Str "Part",Space,Str "of",Space,Str "it",Space,Str "comes",LineBreak,Str "from",Space,Str "John",Space,Str "Gruber",Str "\8217",Str "s",Space,Str "markdown",Space,Str "test",Space,Str "suite",Str "."] ,HorizontalRule -,Header 1 ("",[],[]) [Str "Headers"] -,Header 2 ("",[],[]) [Str "Level",Space,Str "2",Space,Str "with",Space,Str "an",Space,Link [Str "embeded",Space,Str "link"] ("http://www.example.com","")] -,Header 3 ("",[],[]) [Str "Level",Space,Str "3",Space,Str "with",Space,Strong [Str "emphasis"]] -,Header 4 ("",[],[]) [Str "Level",Space,Str "4"] -,Header 5 ("",[],[]) [Str "Level",Space,Str "5"] -,Header 6 ("",[],[]) [Str "Level",Space,Str "6"] -,Header 1 ("",[],[]) [Str "Paragraphs"] +,Header 1 ("headers",[],[]) [Str "Headers"] +,Header 2 ("level-2-with-an-embeded-link",[],[]) [Str "Level",Space,Str "2",Space,Str "with",Space,Str "an",Space,Link [Str "embeded",Space,Str "link"] ("http://www.example.com","")] +,Header 3 ("level-3-with-emphasis",[],[]) [Str "Level",Space,Str "3",Space,Str "with",Space,Strong [Str "emphasis"]] +,Header 4 ("level-4",[],[]) [Str "Level",Space,Str "4"] +,Header 5 ("level-5",[],[]) [Str "Level",Space,Str "5"] +,Header 6 ("level-6",[],[]) [Str "Level",Space,Str "6"] +,Header 1 ("paragraphs",[],[]) [Str "Paragraphs"] ,Para [Str "Here",Str "\8217",Str "s",Space,Str "a",Space,Str "regular",Space,Str "paragraph",Str "."] ,Para [Str "Line",Space,Str "breaks",Space,Str "are",Space,Str "preserved",Space,Str "in",Space,Str "textile",Str ",",Space,Str "so",Space,Str "you",Space,Str "can",Space,Str "not",Space,Str "wrap",Space,Str "your",Space,Str "very",LineBreak,Str "long",Space,Str "paragraph",Space,Str "with",Space,Str "your",Space,Str "favourite",Space,Str "text",Space,Str "editor",Space,Str "and",Space,Str "have",Space,Str "it",Space,Str "rendered",LineBreak,Str "with",Space,Str "no",Space,Str "break",Str "."] ,Para [Str "Here",Str "\8217",Str "s",Space,Str "one",Space,Str "with",Space,Str "a",Space,Str "bullet",Str "."] @@ -16,23 +16,23 @@ Pandoc (Meta {unMeta = fromList []}) ,Para [Str "There",Space,Str "should",Space,Str "be",Space,Str "a",Space,Str "paragraph",Space,Str "break",Space,Str "between",Space,Str "here"] ,Para [Str "and",Space,Str "here",Str "."] ,Para [Str "pandoc",Space,Str "converts",Space,Str "textile",Str "."] -,Header 1 ("",[],[]) [Str "Block",Space,Str "Quotes"] +,Header 1 ("block-quotes",[],[]) [Str "Block",Space,Str "Quotes"] ,BlockQuote [Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "famous",Space,Str "quote",Space,Str "from",Space,Str "somebody",Str ".",Space,Str "He",Space,Str "had",Space,Str "a",Space,Str "lot",Space,Str "of",Space,Str "things",Space,Str "to",LineBreak,Str "say",Str ",",Space,Str "so",Space,Str "the",Space,Str "text",Space,Str "is",Space,Str "really",Space,Str "really",Space,Str "long",Space,Str "and",Space,Str "spans",Space,Str "on",Space,Str "multiple",Space,Str "lines",Str "."]] ,Para [Str "And",Space,Str "a",Space,Str "following",Space,Str "paragraph",Str "."] -,Header 1 ("",[],[]) [Str "Code",Space,Str "Blocks"] +,Header 1 ("code-blocks",[],[]) [Str "Code",Space,Str "Blocks"] ,Para [Str "Code",Str ":"] ,CodeBlock ("",[],[]) " ---- (should be four hyphens)\n\n sub status {\n print \"working\";\n }\n\n this code block is indented by one tab" ,Para [Str "And",Str ":"] ,CodeBlock ("",[],[]) " this code block is indented by two tabs\n\n These should not be escaped: \\$ \\\\ \\> \\[ \\{" ,CodeBlock ("",[],[]) "Code block with .bc\n continued\n @",Str ",",Space,Code ("",[],[]) "@",Str "."] -,Header 1 ("",[],[]) [Str "Notextile"] +,Header 1 ("notextile",[],[]) [Str "Notextile"] ,Para [Str "A",Space,Str "block",Space,Str "of",Space,Str "text",Space,Str "can",Space,Str "be",Space,Str "protected",Space,Str "with",Space,Str "notextile",Space,Str ":"] ,Para [Str "\nNo *bold* and\n* no bullet\n"] ,Para [Str "and",Space,Str "inlines",Space,Str "can",Space,Str "be",Space,Str "protected",Space,Str "with",Space,Str "double *equals (=)* markup",Str "."] -,Header 1 ("",[],[]) [Str "Lists"] -,Header 2 ("",[],[]) [Str "Unordered"] +,Header 1 ("lists",[],[]) [Str "Lists"] +,Header 2 ("unordered",[],[]) [Str "Unordered"] ,Para [Str "Asterisks",Space,Str "tight",Str ":"] ,BulletList [[Plain [Str "asterisk",Space,Str "1"]] @@ -42,13 +42,13 @@ Pandoc (Meta {unMeta = fromList []}) ,BulletList [[Plain [Str "asterisk",Space,Str "1",LineBreak,Str "newline"]] ,[Plain [Str "asterisk",Space,Str "2"]]] -,Header 2 ("",[],[]) [Str "Ordered"] +,Header 2 ("ordered",[],[]) [Str "Ordered"] ,Para [Str "Tight",Str ":"] ,OrderedList (1,DefaultStyle,DefaultDelim) [[Plain [Str "First"]] ,[Plain [Str "Second"]] ,[Plain [Str "Third"]]] -,Header 2 ("",[],[]) [Str "Nested"] +,Header 2 ("nested",[],[]) [Str "Nested"] ,BulletList [[Plain [Str "ui",Space,Str "1"] ,BulletList @@ -63,7 +63,7 @@ Pandoc (Meta {unMeta = fromList []}) ,BulletList [[Plain [Str "ui",Space,Str "2",Str ".",Str "1",Str ".",Str "1"]] ,[Plain [Str "ui",Space,Str "2",Str ".",Str "1",Str ".",Str "2"]]]]]]] -,Header 2 ("",[],[]) [Str "Definition",Space,Str "List"] +,Header 2 ("definition-list",[],[]) [Str "Definition",Space,Str "List"] ,DefinitionList [([Str "coffee"], [[Plain [Str "Hot",Space,Str "and",Space,Str "black"]]]) @@ -74,23 +74,23 @@ Pandoc (Meta {unMeta = fromList []}) ,Para [Str "Cold",Space,Str "drink",Space,Str "that",Space,Str "goes",Space,Str "great",Space,Str "with",Space,Str "cookies",Str "."]]]) ,([Str "beer"], [[Plain [Str "fresh",Space,Str "and",Space,Str "bitter"]]])] -,Header 1 ("",[],[]) [Str "Inline",Space,Str "Markup"] +,Header 1 ("inline-markup",[],[]) [Str "Inline",Space,Str "Markup"] ,Para [Str "This",Space,Str "is",Space,Emph [Str "emphasized"],Str ",",Space,Str "and",Space,Str "so",Space,Emph [Str "is",Space,Str "this"],Str ".",LineBreak,Str "This",Space,Str "is",Space,Strong [Str "strong"],Str ",",Space,Str "and",Space,Str "so",Space,Strong [Str "is",Space,Str "this"],Str ".",LineBreak,Str "Hyphenated-words-are-ok",Str ",",Space,Str "as",Space,Str "well",Space,Str "as",Space,Str "strange_underscore_notation",Str ".",LineBreak,Str "A",Space,Link [Strong [Str "strong",Space,Str "link"]] ("http://www.foobar.com",""),Str "."] ,Para [Emph [Strong [Str "This",Space,Str "is",Space,Str "strong",Space,Str "and",Space,Str "em",Str "."]],LineBreak,Str "So",Space,Str "is",Space,Strong [Emph [Str "this"]],Space,Str "word",Space,Str "and",Space,Emph [Strong [Str "that",Space,Str "one"]],Str ".",LineBreak,Strikeout [Str "This",Space,Str "is",Space,Str "strikeout",Space,Str "and",Space,Strong [Str "strong"]]] ,Para [Str "Superscripts",Str ":",Space,Str "a",Superscript [Str "bc"],Str "d",Space,Str "a",Superscript [Strong [Str "hello"]],Space,Str "a",Superscript [Str "hello",Space,Str "there"],Str ".",LineBreak,Str "Subscripts",Str ":",Space,Subscript [Str "here"],Space,Str "H",Subscript [Str "2"],Str "O",Str ",",Space,Str "H",Subscript [Str "23"],Str "O",Str ",",Space,Str "H",Subscript [Str "many",Space,Str "of",Space,Str "them"],Str "O",Str "."] ,Para [Str "Dashes",Space,Str ":",Space,Str "How",Space,Str "cool",Space,Str "\8212",Space,Str "automatic",Space,Str "dashes",Str "."] ,Para [Str "Elipses",Space,Str ":",Space,Str "He",Space,Str "thought",Space,Str "and",Space,Str "thought",Space,Str "\8230",Space,Str "and",Space,Str "then",Space,Str "thought",Space,Str "some",Space,Str "more",Str "."] ,Para [Str "Quotes",Space,Str "and",Space,Str "apostrophes",Space,Str ":",Space,Quoted DoubleQuote [Str "I",Str "\8217",Str "d",Space,Str "like",Space,Str "to",Space,Str "thank",Space,Str "you"],Space,Str "for",Space,Str "example",Str "."] -,Header 1 ("",[],[]) [Str "Links"] -,Header 2 ("",[],[]) [Str "Explicit"] +,Header 1 ("links",[],[]) [Str "Links"] +,Header 2 ("explicit",[],[]) [Str "Explicit"] ,Para [Str "Just",Space,Str "a",Space,Link [Str "url"] ("http://www.url.com","")] ,Para [Link [Str "Email",Space,Str "link"] ("mailto:nobody@nowhere.net","")] ,Para [Str "Automatic",Space,Str "linking",Space,Str "to",Space,Link [Str "http://www.example.com"] ("http://www.example.com",""),Str "."] ,Para [Link [Str "Example"] ("http://www.example.com/",""),Str ":",Space,Str "Example",Space,Str "of",Space,Str "a",Space,Str "link",Space,Str "followed",Space,Str "by",Space,Str "a",Space,Str "colon",Str "."] ,Para [Str "A",Space,Str "link",Link [Str "with",Space,Str "brackets"] ("http://www.example.com",""),Str "and",Space,Str "no",Space,Str "spaces",Str "."] -,Header 1 ("",[],[]) [Str "Tables"] +,Header 1 ("tables",[],[]) [Str "Tables"] ,Para [Str "Textile",Space,Str "allows",Space,Str "tables",Space,Str "with",Space,Str "and",Space,Str "without",Space,Str "headers",Space,Str ":"] -,Header 2 ("",[],[]) [Str "Without",Space,Str "headers"] +,Header 2 ("without-headers",[],[]) [Str "Without",Space,Str "headers"] ,Table [] [AlignDefault,AlignDefault,AlignDefault] [0.0,0.0,0.0] [] [[[Plain [Str "name"]] @@ -106,7 +106,7 @@ Pandoc (Meta {unMeta = fromList []}) ,[Plain [Str "45"]] ,[Plain [Str "f"]]]] ,Para [Str "and",Space,Str "some",Space,Str "text",Space,Str "following",Space,Str "\8230"] -,Header 2 ("",[],[]) [Str "With",Space,Str "headers"] +,Header 2 ("with-headers",[],[]) [Str "With",Space,Str "headers"] ,Table [] [AlignDefault,AlignDefault,AlignDefault] [0.0,0.0,0.0] [[Plain [Str "name"]] ,[Plain [Str "age"]] @@ -120,9 +120,9 @@ Pandoc (Meta {unMeta = fromList []}) ,[[Plain [Str "bella"]] ,[Plain [Str "45"]] ,[Plain [Str "f"]]]] -,Header 1 ("",[],[]) [Str "Images"] +,Header 1 ("images",[],[]) [Str "Images"] ,Para [Str "Textile",Space,Str "inline",Space,Str "image",Space,Str "syntax",Str ",",Space,Str "like",Space,LineBreak,Str "here",Space,Image [Str "this is the alt text"] ("this_is_an_image.png","this is the alt text"),LineBreak,Str "and",Space,Str "here",Space,Image [Str ""] ("this_is_an_image.png",""),Str "."] -,Header 1 ("",[],[]) [Str "Attributes"] +,Header 1 ("attributes",[],[]) [Str "Attributes"] ,Header 2 ("ident",["bar","foo"],[("style","color:red"),("lang","en")]) [Str "HTML",Space,Str "and",Space,Str "CSS",Space,Str "attributes",Space,Str "are",Space,Str "parsed",Space,Str "in",Space,Str "headers",Str "."] ,Para [Str "as",Space,Str "well",Space,Str "as",Space,Strong [Str "inline",Space,Str "attributes"],Space,Str "of",Space,Str " all kind"] ,Para [Str "and",Space,Str "paragraph",Space,Str "attributes",Str ",",Space,Str "and",Space,Str "table",Space,Str "attributes",Str "."] @@ -134,9 +134,9 @@ Pandoc (Meta {unMeta = fromList []}) ,[[Plain [Str "joan"]] ,[Plain [Str "24"]] ,[Plain [Str "f"]]]] -,Header 1 ("",[],[]) [Str "Entities"] +,Header 1 ("entities",[],[]) [Str "Entities"] ,Para [Str "*",LineBreak,Str "&"] -,Header 1 ("",[],[]) [Str "Raw",Space,Str "HTML"] +,Header 1 ("raw-html",[],[]) [Str "Raw",Space,Str "HTML"] ,Para [Str "However",Str ",",Space,RawInline (Format "html") "",Space,Str "raw",Space,Str "HTML",Space,Str "inlines",Space,RawInline (Format "html") "",Space,Str "are",Space,Str "accepted",Str ",",Space,Str "as",Space,Str "well",Space,Str "as",Space,Str ":"] ,RawBlock (Format "html") "
" ,Para [Str "any",Space,Strong [Str "Raw",Space,Str "HTML",Space,Str "Block"],Space,Str "with",Space,Str "bold"] @@ -150,18 +150,18 @@ Pandoc (Meta {unMeta = fromList []}) [[Plain [Str "this",Space,Str "<",Str "div",Str ">",Space,Str "won",Str "\8217",Str "t",Space,Str "produce",Space,Str "raw",Space,Str "html",Space,Str "blocks",Space,Str "<",Str "/div",Str ">"]] ,[Plain [Str "but",Space,Str "this",Space,RawInline (Format "html") "",Space,Str "will",Space,Str "produce",Space,Str "inline",Space,Str "html",Space,RawInline (Format "html") ""]]] ,Para [Str "Can",Space,Str "you",Space,Str "prove",Space,Str "that",Space,Str "2",Space,Str "<",Space,Str "3",Space,Str "?"] -,Header 1 ("",[],[]) [Str "Raw",Space,Str "LaTeX"] +,Header 1 ("raw-latex",[],[]) [Str "Raw",Space,Str "LaTeX"] ,Para [Str "This",Space,Str "Textile",Space,Str "reader",Space,Str "also",Space,Str "accepts",Space,Str "raw",Space,Str "LaTeX",Space,Str "for",Space,Str "blocks",Space,Str ":"] ,RawBlock (Format "latex") "\\begin{itemize}\n \\item one\n \\item two\n\\end{itemize}" ,Para [Str "and",Space,Str "for",Space,RawInline (Format "latex") "\\emph{inlines}",Str "."] -,Header 1 ("",[],[]) [Str "Acronyms",Space,Str "and",Space,Str "marks"] +,Header 1 ("acronyms-and-marks",[],[]) [Str "Acronyms",Space,Str "and",Space,Str "marks"] ,Para [Str "PBS (Public Broadcasting System)"] ,Para [Str "Hi",Str "\8482"] ,Para [Str "Hi",Space,Str "\8482"] ,Para [Str "\174",Space,Str "Hi",Str "\174"] ,Para [Str "Hi",Str "\169",Str "2008",Space,Str "\169",Space,Str "2008"] -,Header 1 ("",[],[]) [Str "Footnotes"] +,Header 1 ("footnotes",[],[]) [Str "Footnotes"] ,Para [Str "A",Space,Str "note",Str ".",Note [Para [Str "The",Space,Str "note",LineBreak,Str "is",Space,Str "here",Str "!"]],Space,Str "Another",Space,Str "note",Note [Para [Str "Other",Space,Str "note",Str "."]],Str "."] -,Header 1 ("",[],[]) [Str "Comment",Space,Str "blocks"] +,Header 1 ("comment-blocks",[],[]) [Str "Comment",Space,Str "blocks"] ,Null ,Para [Str "not",Space,Str "a",Space,Str "comment",Str "."]] -- cgit v1.2.3 From 8e61a6214ffda2f76980dbb36542c1d36951150c Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 1 Sep 2013 09:45:29 -0700 Subject: Document fact that --toc doesn't do anything for docx or odt output. See #458. --- README | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README b/README index f85e62e14..7b75565c1 100644 --- a/README +++ b/README @@ -342,7 +342,7 @@ General writer options : Include an automatically generated table of contents (or, in the case of `latex`, `context`, and `rst`, an instruction to create one) in the output document. This option has no effect on `man`, - `docbook`, `slidy`, `slideous`, or `s5` output. + `docbook`, `slidy`, `slideous`, `s5`, `docx`, or `odt` output. `--toc-depth=`*NUMBER* : Specify the number of section levels to include in the table -- cgit v1.2.3 From 8b0052ba5b0578814a5aca14a0e02874a10cf947 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 1 Sep 2013 15:05:51 -0700 Subject: Mathjax in HTML slide shows: include explicit "Typeset" instruction. This seems to be needed for some formats (e.g. slideous) and won't hurt in others. Closes #966. --- src/Text/Pandoc/Writers/HTML.hs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index 25079574e..63b466af3 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -143,7 +143,8 @@ pandocToHtml opts (Pandoc meta blocks) = do MathJax url -> H.script ! A.src (toValue url) ! A.type_ "text/javascript" - $ mempty + $ preEscapedString + "MathJax.Hub.Queue([\"Typeset\",MathJax.Hub]);" JsMath (Just url) -> H.script ! A.src (toValue url) ! A.type_ "text/javascript" -- cgit v1.2.3 From 9b0b9b6e03c05ca81ff3cf52787a30ea00cb3a76 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 1 Sep 2013 15:18:56 -0700 Subject: Markdown reader: Don't autolink a bare URI that is followed by ``. Closes #937. --- src/Text/Pandoc/Readers/Markdown.hs | 1 + tests/Tests/Readers/Markdown.hs | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 267b30032..9b98cbc3e 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1662,6 +1662,7 @@ bareURL :: MarkdownParser (F Inlines) bareURL = try $ do guardEnabled Ext_autolink_bare_uris (orig, src) <- uri <|> emailAddress + notFollowedBy $ try $ spaces >> htmlTag (~== TagClose "a") return $ return $ B.link src "" (B.str orig) autoLink :: MarkdownParser (F Inlines) diff --git a/tests/Tests/Readers/Markdown.hs b/tests/Tests/Readers/Markdown.hs index 8a9ed9667..ccca147ab 100644 --- a/tests/Tests/Readers/Markdown.hs +++ b/tests/Tests/Readers/Markdown.hs @@ -24,7 +24,7 @@ infix 4 =: testBareLink :: (String, Inlines) -> Test testBareLink (inp, ils) = test (readMarkdown def{ readerExtensions = - Set.fromList [Ext_autolink_bare_uris] }) + Set.fromList [Ext_autolink_bare_uris, Ext_raw_html] }) inp (inp, doc $ para ils) autolink :: String -> Inlines @@ -34,6 +34,9 @@ bareLinkTests :: [(String, Inlines)] bareLinkTests = [ ("http://google.com is a search engine.", autolink "http://google.com" <> " is a search engine.") + , ("http://foo.bar.baz", + rawInline "html" "" <> + "http://foo.bar.baz" <> rawInline "html" "") , ("Try this query: http://google.com?search=fish&time=hour.", "Try this query: " <> autolink "http://google.com?search=fish&time=hour" <> ".") , ("HTTPS://GOOGLE.COM,", -- cgit v1.2.3 From 53f61019e27dcc14112136609a72b27e17e0eb06 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 1 Sep 2013 15:37:02 -0700 Subject: Added `--metadata/-M` option. This is like `--variable/-V`, but actually adds to metadata, not just variables. --- pandoc.cabal | 1 + pandoc.hs | 30 +++++++++++++++++++++++------- 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/pandoc.cabal b/pandoc.cabal index 51f60f160..e5deb0896 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -334,6 +334,7 @@ Library Executable pandoc Build-Depends: pandoc, + pandoc-types >= 1.12 && < 1.13, base >= 4.2 && <5, directory >= 1 && < 1.3, filepath >= 1.1 && < 1.4, diff --git a/pandoc.hs b/pandoc.hs index 5b0250836..57840c2ef 100644 --- a/pandoc.hs +++ b/pandoc.hs @@ -31,6 +31,7 @@ writers. -} module Main where import Text.Pandoc +import Text.Pandoc.Builder (setMeta) import Text.Pandoc.PDF (makePDF) import Text.Pandoc.Readers.LaTeX (handleIncludes) import Text.Pandoc.Shared ( tabFilter, readDataFileUTF8, readDataFile, @@ -112,6 +113,7 @@ data Opt = Opt , optTransforms :: [Pandoc -> Pandoc] -- ^ Doc transforms to apply , optTemplate :: Maybe FilePath -- ^ Custom template , optVariables :: [(String,String)] -- ^ Template variables to set + , optMetadata :: [(String,String)] -- ^ Metadata fields to set , optOutputFile :: String -- ^ Name of output file , optNumberSections :: Bool -- ^ Number sections in LaTeX , optNumberOffset :: [Int] -- ^ Starting number for sections @@ -166,6 +168,7 @@ defaultOpts = Opt , optTransforms = [] , optTemplate = Nothing , optVariables = [] + , optMetadata = [] , optOutputFile = "-" -- "-" means stdout , optNumberSections = False , optNumberOffset = [0,0,0,0,0,0] @@ -321,6 +324,16 @@ options = "FILENAME") "" -- "Use custom template" + , Option "M" ["metadata"] + (ReqArg + (\arg opt -> do + let (key,val) = case break (`elem` ":=") arg of + (k,_:v) -> (k,v) + (k,_) -> (k,"true") + return opt{ optMetadata = (key,val) : optMetadata opt }) + "KEY[:VALUE]") + "" + , Option "V" ["variable"] (ReqArg (\arg opt -> do @@ -329,7 +342,7 @@ options = (k,_) -> (k,"true") return opt{ optVariables = (key,val) : optVariables opt }) "KEY[:VALUE]") - "" -- "Use custom template" + "" , Option "D" ["print-default-template"] (ReqArg @@ -844,6 +857,7 @@ main = do , optWriter = writerName , optParseRaw = parseRaw , optVariables = variables + , optMetadata = metadata , optTableOfContents = toc , optTransforms = transforms , optTemplate = templatePath @@ -1062,8 +1076,10 @@ main = do handleIncludes' . convertTabs . intercalate "\n" >>= reader readerOpts - let doc0 = foldr ($) doc transforms - doc1 <- foldrM ($) doc0 $ map ($ [writerName']) plugins + + let doc0 = foldr (\(k,v) -> setMeta k (MetaString v)) doc metadata + let doc1 = foldr ($) doc0 transforms + doc2 <- foldrM ($) doc1 $ map ($ [writerName']) plugins let writeBinary :: B.ByteString -> IO () writeBinary = B.writeFile (UTF8.encodePath outputFile) @@ -1074,15 +1090,15 @@ main = do case getWriter writerName' of Left e -> err 9 e - Right (IOStringWriter f) -> f writerOptions doc1 >>= writerFn outputFile - Right (IOByteStringWriter f) -> f writerOptions doc1 >>= writeBinary + Right (IOStringWriter f) -> f writerOptions doc2 >>= writerFn outputFile + Right (IOByteStringWriter f) -> f writerOptions doc2 >>= writeBinary Right (PureStringWriter f) | pdfOutput -> do - res <- makePDF latexEngine f writerOptions doc1 + res <- makePDF latexEngine f writerOptions doc2 case res of Right pdf -> writeBinary pdf Left err' -> err 43 $ UTF8.toStringLazy err' - | otherwise -> selfcontain (f writerOptions doc1 ++ + | otherwise -> selfcontain (f writerOptions doc2 ++ ['\n' | not standalone']) >>= writerFn outputFile . handleEntities where htmlFormat = writerName' `elem` -- cgit v1.2.3 From 39cdafd50598d58daad5cb6483f19268f7a06316 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 1 Sep 2013 15:54:48 -0700 Subject: Restore --bibliography, --csl, --citation-abbreviations. These are now implemented as: --bibliography FILE => --metadata bibliography=FILE --filter pandoc-citeproc --csl FILE => --metadata csl=FILE --citation-abbreviations FILE => --metadata csl-abbreviations=FILE --- pandoc.hs | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/pandoc.hs b/pandoc.hs index 57840c2ef..0d493d403 100644 --- a/pandoc.hs +++ b/pandoc.hs @@ -655,6 +655,33 @@ options = "PROGRAM") "" -- "Name of latex program to use in generating PDF" + , Option "" ["bibliography"] + (ReqArg + (\arg opt -> + return opt{ optMetadata = ("bibliography",arg) : + optMetadata opt + , optPlugins = externalFilter "pandoc-citeproc" + : optPlugins opt + }) + "FILE") + "" + + , Option "" ["csl"] + (ReqArg + (\arg opt -> + return opt{ optMetadata = ("csl",arg) : + optMetadata opt }) + "FILE") + "" + + , Option "" ["citation-abbreviations"] + (ReqArg + (\arg opt -> + return opt{ optMetadata = ("csl-abbreviations",arg) : + optMetadata opt }) + "FILE") + "" + , Option "" ["natbib"] (NoArg (\opt -> return opt { optCiteMethod = Natbib })) -- cgit v1.2.3 From 1240edbc3b4f716c58fc3dfe9fc277c45b8529ab Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 1 Sep 2013 16:09:00 -0700 Subject: Change for latest pandoc-citeproc. --- pandoc.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandoc.hs b/pandoc.hs index 0d493d403..3713467ad 100644 --- a/pandoc.hs +++ b/pandoc.hs @@ -677,7 +677,7 @@ options = , Option "" ["citation-abbreviations"] (ReqArg (\arg opt -> - return opt{ optMetadata = ("csl-abbreviations",arg) : + return opt{ optMetadata = ("citation-abbreviations",arg) : optMetadata opt }) "FILE") "" -- cgit v1.2.3 From 8977b2aaed096a71e669e1161f6ce10d5286f34e Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 1 Sep 2013 16:22:40 -0700 Subject: Changed --metadata to return Boolean True if no value. Also documented in README. --- README | 6 ++++++ pandoc.hs | 15 ++++++++------- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/README b/README index 7b75565c1..236637d10 100644 --- a/README +++ b/README @@ -284,6 +284,12 @@ Reader options if no directory is provided. If you want to run a script in the working directory, preface the filename with `./`. +`-M` *KEY[=VAL]*, `--metadata=`*KEY[:VAL]* +: Set the metadata field *KEY* to the value *VAL* after + parsing. A value specified on the command line overrides a value + specified in the document. Values will be interpreted as raw strings. + If no value is specified, the value will be treated as Boolean true. + `--normalize` : Normalize the document after reading: merge adjacent `Str` or `Emph` elements, for example, and remove repeated `Space`s. diff --git a/pandoc.hs b/pandoc.hs index 3713467ad..0bc2d7359 100644 --- a/pandoc.hs +++ b/pandoc.hs @@ -113,7 +113,7 @@ data Opt = Opt , optTransforms :: [Pandoc -> Pandoc] -- ^ Doc transforms to apply , optTemplate :: Maybe FilePath -- ^ Custom template , optVariables :: [(String,String)] -- ^ Template variables to set - , optMetadata :: [(String,String)] -- ^ Metadata fields to set + , optMetadata :: [(String,MetaValue)] -- ^ Metadata fields to set , optOutputFile :: String -- ^ Name of output file , optNumberSections :: Bool -- ^ Number sections in LaTeX , optNumberOffset :: [Int] -- ^ Starting number for sections @@ -328,8 +328,8 @@ options = (ReqArg (\arg opt -> do let (key,val) = case break (`elem` ":=") arg of - (k,_:v) -> (k,v) - (k,_) -> (k,"true") + (k,_:v) -> (k, MetaString v) + (k,_) -> (k, MetaBool True) return opt{ optMetadata = (key,val) : optMetadata opt }) "KEY[:VALUE]") "" @@ -658,7 +658,7 @@ options = , Option "" ["bibliography"] (ReqArg (\arg opt -> - return opt{ optMetadata = ("bibliography",arg) : + return opt{ optMetadata = ("bibliography",MetaString arg) : optMetadata opt , optPlugins = externalFilter "pandoc-citeproc" : optPlugins opt @@ -669,7 +669,7 @@ options = , Option "" ["csl"] (ReqArg (\arg opt -> - return opt{ optMetadata = ("csl",arg) : + return opt{ optMetadata = ("csl", MetaString arg) : optMetadata opt }) "FILE") "" @@ -677,7 +677,8 @@ options = , Option "" ["citation-abbreviations"] (ReqArg (\arg opt -> - return opt{ optMetadata = ("citation-abbreviations",arg) : + return opt{ optMetadata = ("citation-abbreviations", + MetaString arg) : optMetadata opt }) "FILE") "" @@ -1104,7 +1105,7 @@ main = do reader readerOpts - let doc0 = foldr (\(k,v) -> setMeta k (MetaString v)) doc metadata + let doc0 = foldr (\(k,v) -> setMeta k v) doc metadata let doc1 = foldr ($) doc0 transforms doc2 <- foldrM ($) doc1 $ map ($ [writerName']) plugins -- cgit v1.2.3 From 9ca89ec673b3f531927793799f8dfecc9fbf85c1 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 1 Sep 2013 16:26:08 -0700 Subject: Documented --bibliography, --csl, --citation-abbreviations. --- README | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/README b/README index 236637d10..dc08c374d 100644 --- a/README +++ b/README @@ -604,6 +604,23 @@ Options affecting specific writers Citation rendering ------------------ +`--bibliography=`*FILE* +: Set the `bibliography` field in the document's metadata to *FILE*, + overriding any value set in the metadata, and tell pandoc to + use the `pandoc-citeproc` filter. (This is equivalent to the + combination `--metadata bibliography=FILE --filter pandoc-citeproc`.) + Note that an error will result unless `pandoc-citeproc` is installed. + +`--csl=`*FILE* +: Set the `csl` field in the document's metadata to *FILE*, + overriding any value set in the metadata. (This is equivalent to + `--metadata csl=FILE`.) + +`--citation-abbreviations=`*FILE* +: Set the `citation-abbreviations` field in the document's metadata to + *FILE*, overriding any value set in the metadata. (This is equivalent to + `--metadata citation-abbreviations=FILE`.) + `--natbib` : Use natbib for citations in LaTeX output. -- cgit v1.2.3 From b4c449ed5d97d707df5b1f2806e39511871365a5 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 2 Sep 2013 09:09:27 -0700 Subject: Improved make_osx_package.sh. New PackageMaker location. New method of installing, to get pandoc-citeproc executables too. Use embed_data_files. --- make_osx_package.sh | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/make_osx_package.sh b/make_osx_package.sh index 8b268f7d7..6f600ccc6 100755 --- a/make_osx_package.sh +++ b/make_osx_package.sh @@ -1,24 +1,32 @@ #!/bin/sh -e -DIST=osx_package +DIST=`pwd`/osx_package VERSION=$(grep -e '^Version' pandoc.cabal | awk '{print $2}') RESOURCES=$DIST/Resources ROOT=$DIST/pandoc SCRIPTS=osx-resources BASE=pandoc-$VERSION +ICU=/usr/local/Cellar/icu4c/51.1 ME=jgm CODESIGNID="Developer ID Application: John Macfarlane" -PACKAGEMAKER=/Developer/Applications/Utilities/PackageMaker.app/Contents/MacOS/PackageMaker +PACKAGEMAKER=/Applications/PackageMaker.app/Contents/MacOS/PackageMaker echo Removing old files... rm -rf $DIST mkdir -p $RESOURCES +# echo Updating database +# cabal update + +echo Adding source dirs # (TODO - remove when released) +cabal-dev add-source /Users/jgm/src/pandoc-types +cabal-dev add-source /Users/jgm/src/pandoc-citeproc + echo Building pandoc... -cabal-dev install-deps -cabal-dev configure --prefix=/usr/local --datasubdir=$BASE --docdir=/usr/local/doc/$BASE -cabal-dev build -cabal-dev copy --destdir=$ROOT +cabal-dev install hsb2hs +cabal-dev install -v1 --prefix $ROOT/usr/local --libdir /usr/local/lib --datadir /usr/local/share --flags="embed_data_files unicode_collation" --extra-lib-dirs=$ICU/lib --extra-include-dirs=$ICU/include pandoc-citeproc +cabal-dev install -v1 --prefix $ROOT/usr/local --libdir /usr/local/lib --datadir /usr/local/share --flags="embed_data_files" + # remove library files rm -r $ROOT/usr/local/lib chown -R $ME:staff $DIST -- cgit v1.2.3 From e97b54b1231f839a3ffebce97d4447d90d9638a5 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 2 Sep 2013 10:41:52 -0700 Subject: Fixed make_osx_package.sh. --- make_osx_package.sh | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/make_osx_package.sh b/make_osx_package.sh index 6f600ccc6..fe497ff4d 100755 --- a/make_osx_package.sh +++ b/make_osx_package.sh @@ -24,14 +24,17 @@ cabal-dev add-source /Users/jgm/src/pandoc-citeproc echo Building pandoc... cabal-dev install hsb2hs -cabal-dev install -v1 --prefix $ROOT/usr/local --libdir /usr/local/lib --datadir /usr/local/share --flags="embed_data_files unicode_collation" --extra-lib-dirs=$ICU/lib --extra-include-dirs=$ICU/include pandoc-citeproc -cabal-dev install -v1 --prefix $ROOT/usr/local --libdir /usr/local/lib --datadir /usr/local/share --flags="embed_data_files" -# remove library files -rm -r $ROOT/usr/local/lib -chown -R $ME:staff $DIST +cabal-dev install --reinstall -v1 --prefix $ROOT/tmp --flags="embed_data_files unicode_collation" --extra-lib-dirs=$ICU/lib --extra-include-dirs=$ICU/include pandoc-citeproc +cabal-dev install -v1 --prefix $ROOT/tmp --flags="embed_data_files" + +mkdir -p $ROOT/usr/local/share +cp -r $ROOT/tmp/bin $ROOT/usr/local/ +cp -r $ROOT/tmp/share/man $ROOT/usr/local/share/ +rm -rf $ROOT/tmp -gzip $ROOT/usr/local/share/man/man?/*.* +chown -R $ME:staff $DIST +# gzip $ROOT/usr/local/share/man/man?/*.* # cabal gives man pages the wrong permissions chmod +r $ROOT/usr/local/share/man/man?/*.* @@ -51,10 +54,11 @@ sudo $PACKAGEMAKER \ --id net.johnmacfarlane.pandoc \ --resources $RESOURCES \ --version $VERSION \ - --no-relocate \ --scripts $SCRIPTS \ --out $BASE.pkg + # --no-relocate + echo Signing package... sudo codesign --force --sign "$CODESIGNID" $BASE.pkg -- cgit v1.2.3 From 8f6c27920d69c8a77ec0ff341ece81194e39bb4e Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 5 Sep 2013 09:22:37 -0700 Subject: Preliminary changes to windows installer script. --- windows/make-windows-installer.bat | 8 ++++++-- windows/pandoc.wxs | 11 +++++++++++ 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/windows/make-windows-installer.bat b/windows/make-windows-installer.bat index 998da6775..a948a1a9f 100644 --- a/windows/make-windows-installer.bat +++ b/windows/make-windows-installer.bat @@ -1,9 +1,13 @@ @echo off cd .. -rem cabal update +cabal update cabal-dev clean if %errorlevel% neq 0 exit /b %errorlevel% -cabal-dev install --reinstall --force-reinstall --flags="embed_data_files" +cabal-dev add-source ../pandoc-types +cabal-dev add-source ../pandoc-citeproc +cabal-dev install hsb2hs +cabal-dev install --reinstall --flags="embed_data_files unicode_collation" --extra-lib-dirs=../icu/lib --extra-include-dirs=../icu/include pandoc-citeproc +cabal-dev install --reinstall --flags="embed_data_files" if %errorlevel% neq 0 exit /b %errorlevel% strip cabal-dev\bin\pandoc.exe cabal-dev\bin\pandoc.exe -s --template data\templates\default.html -S README -o README.html diff --git a/windows/pandoc.wxs b/windows/pandoc.wxs index 85edd29a8..2c87e41e9 100644 --- a/windows/pandoc.wxs +++ b/windows/pandoc.wxs @@ -51,6 +51,17 @@ Source="..\COPYING.rtf" /> + + + + + + + -- cgit v1.2.3 From 6fb9e82281250a4d4b8ddf3eb38c4974b321e5a5 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 5 Sep 2013 19:55:19 -0700 Subject: Fixed typo. --- README | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README b/README index dc08c374d..0a04fb6ab 100644 --- a/README +++ b/README @@ -277,7 +277,7 @@ Reader options Filters may be written in any language. `Text.Pandoc.JSON` exports `toJSONFilter` to facilitate writing filters in Haskell. - Those who would prefer to write filters in pandoc can use the + Those who would prefer to write filters in python can use the module `pandoc.py`: see for the module and several examples. Note that the *EXECUTABLE* will be sought in the user's `PATH`, and not in the working directory, -- cgit v1.2.3 From 7803c012df1d2e88e5b63c2d044d9b0342a01106 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 5 Sep 2013 20:52:57 -0700 Subject: Windows installer: gave up on unicode collation. --- windows/make-windows-installer.bat | 1 - 1 file changed, 1 deletion(-) diff --git a/windows/make-windows-installer.bat b/windows/make-windows-installer.bat index a948a1a9f..0813496b3 100644 --- a/windows/make-windows-installer.bat +++ b/windows/make-windows-installer.bat @@ -6,7 +6,6 @@ if %errorlevel% neq 0 exit /b %errorlevel% cabal-dev add-source ../pandoc-types cabal-dev add-source ../pandoc-citeproc cabal-dev install hsb2hs -cabal-dev install --reinstall --flags="embed_data_files unicode_collation" --extra-lib-dirs=../icu/lib --extra-include-dirs=../icu/include pandoc-citeproc cabal-dev install --reinstall --flags="embed_data_files" if %errorlevel% neq 0 exit /b %errorlevel% strip cabal-dev\bin\pandoc.exe -- cgit v1.2.3 From f49749c46de07bd245492c93c4d0d824651cc8bb Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 5 Sep 2013 21:01:56 -0700 Subject: More windows package tweaks. Make sure subordinate packages are reinstalled. --- windows/make-windows-installer.bat | 1 + 1 file changed, 1 insertion(+) diff --git a/windows/make-windows-installer.bat b/windows/make-windows-installer.bat index 0813496b3..9b651860a 100644 --- a/windows/make-windows-installer.bat +++ b/windows/make-windows-installer.bat @@ -6,6 +6,7 @@ if %errorlevel% neq 0 exit /b %errorlevel% cabal-dev add-source ../pandoc-types cabal-dev add-source ../pandoc-citeproc cabal-dev install hsb2hs +cabal-dev install --force --reinstall --flags="embed_data_files" pandoc-types pandoc-citeproc cabal-dev install --reinstall --flags="embed_data_files" if %errorlevel% neq 0 exit /b %errorlevel% strip cabal-dev\bin\pandoc.exe -- cgit v1.2.3 From 728e47ae15252619444a9ee91f2ceeecd4f3cf98 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 6 Sep 2013 15:40:08 -0700 Subject: MediaWiki reader: Allow Image: for images. Closes #971. --- src/Text/Pandoc/Readers/MediaWiki.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs index 8f1ff2776..2b938cd82 100644 --- a/src/Text/Pandoc/Readers/MediaWiki.hs +++ b/src/Text/Pandoc/Readers/MediaWiki.hs @@ -523,7 +523,7 @@ endline = () <$ try (newline <* image :: MWParser Inlines image = try $ do sym "[[" - sym "File:" + sym "File:" <|> sym "Image:" fname <- many1 (noneOf "|]") _ <- many (try $ char '|' *> imageOption) caption <- (B.str fname <$ sym "]]") -- cgit v1.2.3 From 8d43e08ce7be8673cc399b948d29386f525e9e1f Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 6 Sep 2013 22:26:18 -0700 Subject: Markdown writer: Fixed bugs in YAML header output. --- src/Text/Pandoc/Writers/Markdown.hs | 6 +++--- tests/writer.markdown | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index d617954dd..23e730bf0 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -39,7 +39,7 @@ import Text.Pandoc.Writers.Shared import Text.Pandoc.Options import Text.Pandoc.Parsing hiding (blankline, char, space) import Data.List ( group, isPrefixOf, find, intersperse, transpose, sortBy ) -import Data.Char ( isSpace ) +import Data.Char ( isSpace, isPunctuation ) import Data.Ord ( comparing ) import Text.Pandoc.Pretty import Control.Monad.State @@ -143,7 +143,7 @@ jsonToYaml (Object hashmap) = | otherwise -> (k' <> ":") $$ x (k', Object _, x) -> (k' <> ":") $$ nest 2 x (_, String "", _) -> empty - (k', _, x) -> k' <> ":" <> space <> x) + (k', _, x) -> k' <> ":" <> space <> hang 2 "" x) $ sortBy (comparing fst) $ H.toList hashmap jsonToYaml (Array vec) = vcat $ map (\v -> hang 2 "- " (jsonToYaml v)) $ V.toList vec @@ -151,7 +151,7 @@ jsonToYaml (String "") = empty jsonToYaml (String s) = case T.unpack s of x | '\n' `elem` x -> hang 2 ("|" <> cr) $ text x - | not (any (`elem` x) "\"'#:[]{}?-") -> text x + | not (any isPunctuation x) -> text x | otherwise -> text $ "'" ++ substitute "'" "''" x ++ "'" jsonToYaml (Bool b) = text $ show b jsonToYaml (Number n) = text $ show n diff --git a/tests/writer.markdown b/tests/writer.markdown index 7d67e4e87..9cf153637 100644 --- a/tests/writer.markdown +++ b/tests/writer.markdown @@ -2,7 +2,7 @@ author: - John MacFarlane - Anonymous -date: July 17, 2006 +date: 'July 17, 2006' title: Pandoc Test Suite ... -- cgit v1.2.3 From 43a077e302be21511660b44e53ef51fbd81e91c5 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 6 Sep 2013 22:29:47 -0700 Subject: Tweaked windows install script. Assumes that pandoc-types and pandoc-citeproc are in Hackage. --- windows/make-windows-installer.bat | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/windows/make-windows-installer.bat b/windows/make-windows-installer.bat index 9b651860a..5890479ae 100644 --- a/windows/make-windows-installer.bat +++ b/windows/make-windows-installer.bat @@ -3,10 +3,8 @@ cd .. cabal update cabal-dev clean if %errorlevel% neq 0 exit /b %errorlevel% -cabal-dev add-source ../pandoc-types -cabal-dev add-source ../pandoc-citeproc cabal-dev install hsb2hs -cabal-dev install --force --reinstall --flags="embed_data_files" pandoc-types pandoc-citeproc +cabal-dev install --only-dependencies --force --reinstall --flags="embed_data_files" cabal-dev install --reinstall --flags="embed_data_files" if %errorlevel% neq 0 exit /b %errorlevel% strip cabal-dev\bin\pandoc.exe -- cgit v1.2.3 From 91550dd6dbca6655b5f08d7f17fbe99fe58801a2 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 6 Sep 2013 22:31:01 -0700 Subject: make_osx_package.sh: Assume pandoc-types, pandoc-citeproc are in Hackage. --- make_osx_package.sh | 4 ---- 1 file changed, 4 deletions(-) diff --git a/make_osx_package.sh b/make_osx_package.sh index fe497ff4d..0c84951c2 100755 --- a/make_osx_package.sh +++ b/make_osx_package.sh @@ -18,10 +18,6 @@ mkdir -p $RESOURCES # echo Updating database # cabal update -echo Adding source dirs # (TODO - remove when released) -cabal-dev add-source /Users/jgm/src/pandoc-types -cabal-dev add-source /Users/jgm/src/pandoc-citeproc - echo Building pandoc... cabal-dev install hsb2hs -- cgit v1.2.3 From 5afd373ae45f525ff1eff5e54c1850fe2c614b4b Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 7 Sep 2013 09:36:37 -0700 Subject: Added `lists_without_preceding_blankline` extension. * Added `Ext_lists_without_preceding_blankline` to `Extension` in `Options`. Added this option to `githubMarkdownExtensions`. * Made markdown reader sensitive to this. * Closes #972. --- README | 4 ++++ src/Text/Pandoc/Options.hs | 2 ++ src/Text/Pandoc/Readers/Markdown.hs | 1 + 3 files changed, 7 insertions(+) diff --git a/README b/README index 0a04fb6ab..7d926216b 100644 --- a/README +++ b/README @@ -2454,6 +2454,10 @@ in pandoc, but may be enabled by adding `+EXTENSION` to the format name, where `EXTENSION` is the name of the extension. Thus, for example, `markdown+hard_line_breaks` is markdown with hard line breaks. +**Extension: `lists_without_preceding_blankline`**\ +Allow a list to occur right after a paragraph, with no intervening +blank space. + **Extension: `hard_line_breaks`**\ Causes all newlines within a paragraph to be interpreted as hard line breaks instead of spaces. diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs index 48e418ab2..5f65abdde 100644 --- a/src/Text/Pandoc/Options.hs +++ b/src/Text/Pandoc/Options.hs @@ -80,6 +80,7 @@ data Extension = | Ext_link_attributes -- ^ MMD style reference link attributes | Ext_autolink_bare_uris -- ^ Make all absolute URIs into links | Ext_fancy_lists -- ^ Enable fancy list numbers and delimiters + | Ext_lists_without_preceding_blankline -- ^ Allow lists without preceding blank | Ext_startnum -- ^ Make start number of ordered list significant | Ext_definition_lists -- ^ Definition lists as in pandoc, mmd, php | Ext_example_lists -- ^ Markdown-style numbered examples @@ -169,6 +170,7 @@ githubMarkdownExtensions = Set.fromList , Ext_intraword_underscores , Ext_strikeout , Ext_hard_line_breaks + , Ext_lists_without_preceding_blankline ] multimarkdownExtensions :: Set Extension diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 9b98cbc3e..2ca0d312a 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1559,6 +1559,7 @@ endline :: MarkdownParser (F Inlines) endline = try $ do newline notFollowedBy blankline + guardDisabled Ext_lists_without_preceding_blankline <|> notFollowedBy listStart guardEnabled Ext_blank_before_blockquote <|> notFollowedBy emailBlockQuoteStart guardEnabled Ext_blank_before_header <|> notFollowedBy (char '#') -- atx header -- parse potential list-starts differently if in a list: -- cgit v1.2.3 From 56f56e5e1594ef5d18326d1eb6de3176db307c6a Mon Sep 17 00:00:00 2001 From: Merijn Verstraaten Date: Sat, 7 Sep 2013 18:58:16 +0100 Subject: Added support for LaTeX style literate Haskell code blocks in rST. --- src/Text/Pandoc/Readers/RST.hs | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index 32893128a..c12a1493a 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -347,14 +347,25 @@ lhsCodeBlock = try $ do getPosition >>= guard . (==1) . sourceColumn guardEnabled Ext_literate_haskell optional codeBlockStart - lns <- many1 birdTrackLine - -- if (as is normal) there is always a space after >, drop it - let lns' = if all (\ln -> null ln || take 1 ln == " ") lns - then map (drop 1) lns - else lns + lns <- latexCodeBlock <|> birdCodeBlock blanklines return $ B.codeBlockWith ("", ["sourceCode", "literate", "haskell"], []) - $ intercalate "\n" lns' + $ intercalate "\n" lns + +latexCodeBlock :: Parser [Char] st [[Char]] +latexCodeBlock = try $ do + try (latexBlockLine "\\begin{code}") + many1Till anyLine (try $ latexBlockLine "\\end{code}") + where + latexBlockLine s = skipMany spaceChar >> string s >> blankline + +birdCodeBlock :: Parser [Char] st [[Char]] +birdCodeBlock = filterSpace <$> many1 birdTrackLine + where filterSpace lns = + -- if (as is normal) there is always a space after >, drop it + if all (\ln -> null ln || take 1 ln == " ") lns + then map (drop 1) lns + else lns birdTrackLine :: Parser [Char] st [Char] birdTrackLine = char '>' >> anyLine -- cgit v1.2.3 From 2c13b6f6dc4f55b76861991dea318e3566cec9a2 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 7 Sep 2013 22:43:56 -0700 Subject: MedaWiki reader: Implement some mathjax extensions. * `:` for display math * `\(..\)` for inline math * `\[..\]` for display math We omit the `$` forms as the heuristics are harder. --- src/Text/Pandoc/Readers/MediaWiki.hs | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs index 2b938cd82..0432915bc 100644 --- a/src/Text/Pandoc/Readers/MediaWiki.hs +++ b/src/Text/Pandoc/Readers/MediaWiki.hs @@ -91,7 +91,7 @@ nested p = do return res specialChars :: [Char] -specialChars = "'[]<=&*{}|\"" +specialChars = "'[]<=&*{}|\":\\" spaceChars :: [Char] spaceChars = " \n\t" @@ -380,8 +380,9 @@ defListItem = try $ do terms <- mconcat . intersperse B.linebreak <$> many defListTerm -- we allow dd with no dt, or dt with no dd defs <- if B.isNull terms - then many1 $ listItem ':' - else many $ listItem ':' + then notFollowedBy (try $ string ":") *> + many1 (listItem ':') + else many (listItem ':') return (terms, defs) defListTerm :: MWParser Inlines @@ -462,6 +463,7 @@ inline = whitespace <|> image <|> internalLink <|> externalLink + <|> math <|> inlineTag <|> B.singleton <$> charRef <|> inlineHtml @@ -472,6 +474,16 @@ inline = whitespace str :: MWParser Inlines str = B.str <$> many1 (noneOf $ specialChars ++ spaceChars) +math :: MWParser Inlines +math = (B.displayMath <$> try (char ':' >> charsInTags "math")) + <|> (B.math <$> charsInTags "math") + <|> (B.displayMath <$> try (dmStart *> manyTill anyChar dmEnd)) + <|> (B.math <$> try (mStart *> manyTill (satisfy (/='\n')) mEnd)) + where dmStart = string "\\[" + dmEnd = try (string "\\]") + mStart = string "\\(" + mEnd = try (string "\\)") + variable :: MWParser String variable = try $ do string "{{{" @@ -495,7 +507,6 @@ inlineTag = do TagOpen "del" _ -> B.strikeout <$> inlinesInTags "del" TagOpen "sub" _ -> B.subscript <$> inlinesInTags "sub" TagOpen "sup" _ -> B.superscript <$> inlinesInTags "sup" - TagOpen "math" _ -> B.math <$> charsInTags "math" TagOpen "code" _ -> B.code <$> charsInTags "code" TagOpen "tt" _ -> B.code <$> charsInTags "tt" TagOpen "hask" _ -> B.codeWith ("",["haskell"],[]) <$> charsInTags "hask" -- cgit v1.2.3 From f5726924c5967b95789aec2226b1f0a6f5ce93dc Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 7 Sep 2013 22:58:04 -0700 Subject: HTML5 template: Add meta tag to allow user scaling. (Erik Evenson) --- data/templates | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data/templates b/data/templates index 0cb55f228..1ccb16bb3 160000 --- a/data/templates +++ b/data/templates @@ -1 +1 @@ -Subproject commit 0cb55f2289148b106ab78ce8f15efc8d0b8acda0 +Subproject commit 1ccb16bb33e8022c9511284e6718386efa3a0bbf -- cgit v1.2.3 From 4bc9e695b846ff2733a55c4cff5eb373b4f1f0c4 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 8 Sep 2013 11:04:47 -0700 Subject: Partial reorganization of changelog. --- changelog | 657 ++++++++++++++++++++++++++++++++++---------------------------- 1 file changed, 356 insertions(+), 301 deletions(-) diff --git a/changelog b/changelog index 57587f3db..2e8d3ff89 100644 --- a/changelog +++ b/changelog @@ -1,5 +1,27 @@ [pending release 1.12] + [new features] + + * Added `opml` (OPML) as input and output format. The `_note` attribute, + used in OmniOutliner and supported by multimarkdown, is supported. + We treat the contents as markdown blocks under a section header. + + * Added `haddock` (Haddock markup) as input format (David Lazar). + + * Added `revealjs` output format, for reveal.js HTML 5 slide shows. + (Thanks to Jamie F. Olson for the initial patch.) + Nested vertical stacks are used for hierarchical structure. + Results for more than one level of nesting may be odd. + + * Custom writers can now be written in lua. + + pandoc -t data/sample.lua + + will load the script sample.lua and use it as a custom writer. + (For a sample, do `pandoc --print-default-data-file sample.lua`.) + Note that pandoc embeds a lua interpreter, so lua need not be + installed separately. + * New `--filter/-F` option to make it easier to run "filters" (Pandoc AST transformations that operate on JSON serializations). Filters are always passed the name of the output format, so their @@ -8,11 +30,18 @@ a python module for writing pandoc filters in python, with a number of examples. + * Added `--metadata/-M` option. + This is like `--variable/-V`, but actually adds to metadata, not + just variables. + * Added `--print-default-data-file` option, which allows printing of any of pandoc's data files. (For example, `pandoc --print-default-data-file reference.odt` will print `reference.odt`.) + + [behavior changes] + * `--toc-level` no longer implies `--toc`. Reason: EPUB users who don't want a visible TOC may still want to set the TOC level for in the book navigation. @@ -24,10 +53,328 @@ which URLs it is fetching, but not giving the full header). In addition, there are better error messages when fetching a URL fails. - * citeproc support has been removed from core pandoc (API changes). + * All slide formats now support incremental slide view for definition lists. + + * Parse `\(..\)` and `\[..\]` as math in MediaWiki reader. + Parse `:...` as display math. These notations are used with + the MathJax MediaWiki extension. + + * `Text.Pandoc.ImageSize`: Handle EPS (#903). This change will make + EPS images properly sized on conversion to Word. + + [API changes] + + * `Format` is now a newtype, not an alias for String. + Equality comparisons are case-insensitive. + + * Added `Text.Pandoc.Process`, exporting `pipeProcess`. + This is a souped-up version of `readProcessWithErrorcode` that + uses lazy bytestrings instead of strings and allows setting + environment variables. (Used in `Text.Pandoc.PDF`.) + + * New module `Text.Pandoc.Readers.OPML`. + + * New module `Text.Pandoc.Writers.OPML`. + + * New module `Text.Pandoc.Readers.Haddock` (David Lazar). + This is based on Haddock's own lexer/parser. + + [bug fixes] + + * In markdown, don't autolink a bare URI that is followed by `` + (#937). + + * `Text.Pandoc.Shared` + + + `openURL` now follows redirects (#701), properly handles `data:` + URIs, and prints diagnostic output to stderr rather than stdout. + + `readDefaultDataFile`: normalize the paths. This fixes bugs in + `--self-contained` on pandoc compiled with `embed_data_files` (#833). + + Fixed `readDefaultDataFile` so it works on Windows. + + Better error messages for `readDefaultDataFile`. Instead of + listing the last path tried, which can confuse people who are + using `--self-contained`, so now we just list the data file name. + + URL-escape pipe characters. Even though these are legal, `Network.URI` + doesn't regard them as legal in URLs. So we escape them first (#535). + + * Mathjax in HTML slide shows: include explicit "Typeset" call. + This seems to be needed for some formats (e.g. slideous) and won't + hurt in others (#966). + + * Produce automatic header identifiers in parsing textile, RST, + and LaTeX, unless `auto_identifiers` extension is disabled (#967). + + * `Text.Pandoc.SelfContained`: Strip off fragment, query of relative URL + before treating as a filename. This fixes `--self-contained` when used + with CSS files that include web fonts using the method described here: + + (#739). Handle `src` in `embed`, `audio`, `source`, `input` tags. - + The `--bibliography`, `--csl`, and `--citation-abbreviation` - options have been removed. + * Slidy: Use slidy.js rather than slidy.js.gz. + Reason: some browsers have trouble with the gzipped js file, + at least on the local file system (#795). + + * Textile reader: + + + Correctly handle entities. + + Improved handling of `
` blocks (#927). Remove internal HTML tags
+      in code blocks, rather than printing them verbatim. Parse attributes
+      on `
` tag for code blocks.
+  
+  * HTML reader: Handle non-simple tables (#893).  Column widths are read from
+    `col` tags if present, otherwise divided equally.
+
+  * LaTeX reader
+
+    + Support alltt environment (#892).
+    + Support `\textasciitilde`, `\textasciicircum` (#810).
+    + Treat `\textsl` as emphasized text reader (#850).
+    + Skip positional options after `\begin{figure}`.
+    + Support `\v{}` for hacek (#926).
+    + Don't add spurious ", " to citation suffixes.
+      This is added when needed in `Text.Pandoc.Biblio` anyway.
+    + Allow spaces in alignment spec in tables, e.g. `{ l r c }`.
+    + Improved support for accented characters (thanks to Scott Morrison).
+    + Parse label after section command and set id (#951).
+
+  * RST reader:
+
+    + Don't insert paragraphs where docutils doesn't.
+      `rst2html` doesn't add `

` tags to list items (even when they are + separated by blank lines) unless there are multiple paragraphs in the + list. This commit changes the RST reader to conform more closely to + what docutils does (#880). + + Improved metadata. Treat initial field list as metadata when + standalone specified. Previously ALL fields "title", "author", + "date" in field lists were treated as metadata, even if not at + the beginning. Use `subtitle` metadata field for subtitle. + + Fixed 'authors' metadata parsing in reST. Semicolons separate + different authors. + + * MediaWiki reader + + + Allow space before table rows. + + Fixed regression for `URL`. + `<` is no longer allowed in URLs, according to the uri parser + in `Text.Pandoc.Parsing`. Added a test case. + + Correctly handle indented preformatted text without preceding + or following blank line. + + Fixed `|` links inside table cells. Improved attribute parsing. + + Skip attributes on table rows. Previously we just crashed if + rows had attributes, now we ignore them. + + Ignore attributes on headers. + + Allow `Image:` for images (#971). + + * LaTeX writer + + + Don't use ligatures in escaping inline code. + + Fixed footnote numbers in LaTeX/PDF tables. This fixes a bug + wherein notes were numbered incorrectly in tables (#827). + + Always create labels for sections. Previously the labels were only + created when there were links to the section in the document (#871). + + Stop escaping `|` in LaTeX math. + This caused problems with array environments (#891). + + Change `\` to `/` in paths. `/` works even on Windows in LaTeX. + `\` will cause major problems if unescaped. + + Write id for code block to label attribute in LaTeX when listings + is used (thanks to Florian Eitel). + + Scale LaTeX tables so they don't exceed columnwidth. + + Avoid problem with footnotes in unnumbered headers (#940). + + * Beamer writer: when creating beamer slides, add `allowframebreaks` option + to the slide if it is one of the header classes. It is recommended + that your bibliography slide have this attribute: + + # References {.allowframebreaks} + + This causes multiple slides to be created if necessary, depending + on the length of the bibliography. + + * ConTeXt writer: Properly handle tables without captions. The old output + only worked in MkII. This should work in MkIV as well (#837). + + * MediaWiki writer: Use native mediawiki tables instead of HTML (#720). + + * HTML writer: + + + Fixed `--no-highlight` (Alexander Kondratskiy). + + Don't convert to lowercase in email obfuscation (#839). + + * AsciiDoc writer: + + + Support `--atx-headers` (Max Rydahl Andersen). + + Don't print empty identifier blocks `([[]])` on headers (Max + Rydahl Andersen). + + * ODT writer: + + + Fixing wrong numbered-list indentation in open document format + (Alexander Kondratskiy) (#369). + + `reference.odt`: Added pandoc as "generator" in `meta.xml`. + + Minor changes for ODF 1.2 conformance (#939). We leave the + nonconforming `contextual-spacing` attribute, which is provided by + LibreOffice itself and seems well supported. + + * Docx writer: + + + Fixed rendering of display math in lists. + In 1.11 and 1.11.1, display math in lists rendered as a new list + item. Now it always appears centered, just as outside of lists, + and in proper display math style, no matter how far indented the + containing list item is (#784). + + Use `w:br` with `w:type` `textWrapping` for linebreaks. + Previously we used `w:cr` (#873). + + Use Compact style for Plain block elements, to + differentiate between tight and loose lists (#775). + + Ignore most components of `reference.docx`. + We take the `word/styles.xml`, `docProps/app.xml`, + `word/theme/theme1.xml`, and `word/fontTable.xml` from + `reference.docx`, ignoring everything else. This should help + with the corruption problems caused when different versions of + Word resave the reference.docx and reorganize things. + + Made `--no-highlight` work properly. + + * EPUB writer + + + Don't add `dc:creator` tags if present in EPUB metadata. + + Add `id="toc-title"` to `h1` in `nav.xhtml` (#799). + + Don't put blank title page in reading sequence. + Set `linear="no"` if no title block. Addresses #797. + + Download webtex images and include as data URLs. + This allows you to use `--webtex` in creating EPUBs. + Math with `--webtex` is automatically made self-contained. + + In `data/epub.css`, removed highlighting styles (which + are no longer needed, since styles are added by the HTML + writer according to `--highlighting-style`). Simplified + margin fields. + + If resource not found, skip it, as in Docx writer (#916). + + * RTF writer: + + + Properly handle characters above the 0000-FFFF range. + Uses surrogate pairs. Thanks to Hiromi Ishii for the patch. + + Fixed regression with RTF table of contents. + + Only autolink absolute URIs. This fixes a regression, #830. + + * Markdown writer: + + + Only autolink absolute URIs. This fixes a regression, #830. + + Don't wrap attributes in fenced code blocks. + + Write full metadata in MMD style title blocks. + + * `Text.Pandoc.Templates`: Fixed bug retrieving default template + for markdown variants. + + [template changes] + + * DocBook: Use DocBook 4.5 doctype. + + * Org: '#+TITLE:' is inserted before the title. + Previously the writer did this. + + * LaTeX: Changes to make mathfont work with xelatex. + We need the mathspec library, not just fontspec, for this. + We also need to set options for setmathfont (#734). + + * LaTeX: Use `tex-ansi` mapping for `monofont`. + This ensures that straight quotes appear as straight, rather than + being treated as curly. See #889. + + * Made `\includegraphics` more flexible in LaTeX template. + Now it can be used with options, if needed. Thanks to Bernhard Weichel. + + * LaTeX/Beamer: Added `classoption` variable. + This is intended for class options like `oneside`; it may + be repeated with different options. (Thanks to Oliver Matthews.) + + * Beamer: Added `fonttheme` variable. (Thanks to Luis Osa.) + + * LaTeX: Added `biblio-style` variable (#920). + + * DZSlides: title attribute on title section. + + * HTML5: add meta tag to allow scaling by user (Erik Evenson) + + [under-the-hood improvements] + + * Added `Text.Pandoc.Compat.Monoid`. + This allows pandoc to compile with `base` < 4.5, where `Data.Monoid` + doesn't export `<>`. Thanks to Dirk Ullirch for the patch. + + * Added `Text.Pandoc.Compat.TagSoupEntity`. + This allows pandoc to compile with `tagsoup` 0.13.x. + Thanks to Dirk Ullrich for the patch. + + * Most of `Text.Pandoc.Readers.TeXMath` has been moved to the + `texmath` module (0.6.4). (This allows `pandoc-citeproc` to + handle simple math in bibliography fields.) + + * Added `Text.Pandoc.Writers.Shared` for shared functions used + only in writers. `metaToJSON` is used in writers to create a + JSON object for use in the templates from the pandoc metadata + and variables. `getField`, `setField`, and `defField` are + for working with JSON template contexts. + + * Added `Text.Pandoc.Asciify` utility module. + This exports functions to create ASCII-only versions of identifiers. + + * `Text.Pandoc.UTF8`: use strict bytestrings in reading. The use of lazy + bytestrings seemed to cause problems using pandoc on 64-bit Windows + 7/8 (#874). + + * Factored out `registerHeader` from markdown reader, added to + `Text.Pandoc.Parsing`. + + * Removed `blaze_html_0_5` flag, require `blaze-html` >= 0.5. + Reason: < 0.5 does not provide a monoid instance for Attribute, + which is now needed by the HTML writer (#803). + + * Added `http-conduit` flag, which allows fetching https resources. + It also brings in a large number of dependencies (`http-conduit` + and its dependencies), which is why for now it is an optional flag + (#820). + + * Added CONTRIBUTING.md. + + * Improved INSTALL instructions. + + * `make-windows-installer.bat`: Removed explicit paths for executables. + + * `aeson` is now used instead of `json` for JSON. + + * Set default stack size to 16M. This is needed for some large + conversions, esp. if pandoc is compiled with 64-bit ghc. + + * Various small documentation improvements. + Thanks to achalddave and drothlis for patches. + + * Removed comment that chokes recent versions of CPP (#933). + + * Removed support for GHC version < 7.2, since pandoc-types now + requires at least GHC 7.2 for GHC generics. + + +------------------ + + + Added `lists_without_preceding_blankline` extension. + * Added `Ext_lists_without_preceding_blankline` to + `Extension` in `Options`. Added this option to + `githubMarkdownExtensions`. + * Made markdown reader sensitive to this. + * Closes #972. + + + +------------------ + + * citeproc support has been removed from core pandoc (API changes). + + + The `--bibliography`, `--csl`, and `--citation-abbreviation` options are + now implemented as follows: + --bibliography FILE => --metadata bibliography=FILE --filter pandoc-citeproc + --csl FILE => --metadata csl=FILE + --citation-abbreviations FILE => --metadata csl-abbreviations=FILE + Markdown and LaTeX citations as still parsed, but an external filter, `pandoc-citeproc`, is now needed to process them against a bibliography and CSL stylesheet. The bibliography @@ -55,8 +402,6 @@ as markdown citations, it is redundant to have a bibliography, since one will be generated automatically.) - * All slide formats: Support incremental slide view for definition lists. - * Added syntax for "pauses" in beamer or reaveljs slide shows. This gives @@ -68,15 +413,6 @@ [note - no longer seems to work in recente revealjs - perhaps this should be reverted] - * Slidy: Use slidy.js rather than slidy.js.gz. - Reason: some browsers have trouble with the gzipped js file, - at least on the local file system (#795). - - * Added `revealjs` output format, for reveal.js HTML 5 slide shows. - Thanks to Jamie F. Olson for the initial patch. - Nested vertical stacks are used for hierarchical structure. - Results for more than one level of nesting may be odd. - * Use new flexible metadata type. + Depend on `pandoc-types` 1.12. This changes the type of @@ -86,9 +422,6 @@ + Variables completely shadow metadata. If many variables with the same name are set, a list is created. - * `Format` is now a newtype, not an alias for String. - Equality comparisons are case-insensitive. - * New generic block container (`Div`) and inline container (`Span`) elements have been added. These can take attributes. They will render in HTML, Textile, MediaWiki, Org, RST and @@ -105,10 +438,6 @@ * Removed the deprecated `jsonFilter` function. + Added `readJSON`, `writeJSON` to the API (#817). - * Most of `Text.Pandoc.Readers.TeXMath` has been moved to the - `texmath` module (0.6.4). (This allows `pandoc-citeproc` to - handle simple math in bibliography fields.) - [ TODO - systematic documentation of pandoc-types API changes, including .JSON, .Walk and changes to .Definition, .Builder. Include the new Format newtype, and the new Span and Div @@ -122,38 +451,15 @@ the generic traversals `bottomUp` and `queryWith`. (API change.) - * Added `Text.Pandoc.Process`, exporting `pipeProcess`. - This is a souped-up version of `readProcessWithErrorcode` that - uses lazy bytestrings instead of strings and allows setting - environment variables. (Used in `Text.Pandoc.PDF`.) - - * Added `Text.Pandoc.Compat.Monoid`. - This allows pandoc to compile with `base` < 4.5, where `Data.Monoid` - doesn't export `<>`. Thanks to Dirk Ullirch for the patch. - - * Added `Text.Pandoc.Compat.TagSoupEntity`. - This allows pandoc to compile with `tagsoup` 0.13.x. - Thanks to Dirk Ullrich for the patch. - * `Text.Pandoc.Shared` - + `openURL` now follows redirects (#701). + `openURL` and `fetchItem` now return an Either, for better error handling. (API change.) - + `readDefaultDataFile`: normalize the paths. - This fixes bugs in `--self-contained` on pandoc compiled with - `embed_data_files` (#833). - + Fixed `readDefaultDataFile` so it works on Windows. - + Better error messages for `readDefaultDataFile`. Instead of - listing the last path tried, which can confuse people who are - using `--self-contained`, so now we just list the data file name. - + URL-escape pipe characters. - Even though these are legal, `Network.URI` doesn't regard them - as legal in URLs. So we escape them first (#535). - + `openURL`: Print diagnostic output to stderr, not stdout. - + `openURL`: Properly handle `data:` URIs. - + `stringify`: Generalized type. + + * `Text.Pandoc.Shared` + + `stringify`: Generalized type. + * `Text.Pandoc.Biblio` + Override citeproc-hs's `endWithPunct`. @@ -180,52 +486,10 @@ pandoc types. They use GHC generics and should be faster than the old JSON serialization using `Data.Aeson.Generic`. - * New module `Text.Pandoc.Writers.Shared` for shared functions used - only in writers. - - + Added `metaToJSON`. This is used in writers to create a - JSON object for use in the templates from the pandoc metadata - and variables. - + Added `getField`, `setField`, `defField` for working with JSON - template contexts. - - * Added `Text.Pandoc.Asciify` utility module. - This exports functions to create ASCII-only versions of identifiers. - - * `Text.Pandoc.ImageSize`: Handle EPS (#903). This change will make - EPS images properly sized on conversion to Word. - - * `Text.Pandoc.SelfContained` - - + Strip off fragment, query of relative URL before treating as - a filename. This fixes `--self-contained` when used with CSS - files that include web fonts using the method described here: - - (#739). - + Handle `src` in `embed`, `audio`, `source`, `input` tags. - * Added `Text.Pandoc.Writers.Custom` and custom output formats. - pandoc -t data/sample.lua - - will load the script sample.lua and use it as a custom writer. - `data/sample.lua` is provided as an example. (This can be printed - with `pandoc --print-default-data-file sample.lua`.) - * Added OPML reader and writer. - + New module `Text.Pandoc.Readers.OPML`. - The `_note` attribute is supported. This is unofficial, but - used e.g. in OmniOutliner and supported by multimarkdown. - We treat the contents as markdown blocks under a section - header. - + New module `Text.Pandoc.Writers.OPML`. - - * Added Haddock reader. - - + New module `Text.Pandoc.Readers.Haddock` (David Lazar). - This is based on Haddock's own lexer/parser. - * Markdown reader + Added `ignore_line_breaks` markdown extension. @@ -255,102 +519,6 @@ Leave these out entirely if they aren't present. + HTML span and div tags are parsed as pandoc Span and Div elements. - * RST reader - - + Don't insert paragraphs where docutils doesn't. - `rst2html` doesn't add `

` tags to list items (even when they are - separated by blank lines) unless there are multiple paragraphs in the - list. This commit changes the RST reader to conform more closely to - what docutils does (#880). - + Improved metadata. Treat initial field list as metadata when - standalone specified. Previously ALL fields "title", "author", - "date" in field lists were treated as metadata, even if not at - the beginning. Use `subtitle` metadata field for subtitle. - + Fixed 'authors' metadata parsing in reST. Semicolons separate - different authors. - - * Textile reader - - + Correctly handle entities. - + Improved handling of `

` blocks.
-    + Fixed a bug in which `
` in certain contexts was
-      not recognized as a code block (#927).
-    + Remove internal HTML tags in code blocks, rather than printing
-      them verbatim.
-    * Parse attributes on `
` tag for code blocks.
-
-  * HTML reader
-
-    + Handle non-simple tables (#893).  Column widths are read from
-      `col` tags if present, otherwise divided equally.
-
-  * LaTeX reader
-
-    + Support alltt environment (#892).
-    + Support `\textasciitilde`, `\textasciicircum` (#810).
-    + Treat `\textsl` as emphasized text reader (#850).
-    + Skip positional options after `\begin{figure}`.
-    + Support `\v{}` for hacek (#926).
-    + Don't add spurious ", " to citation suffixes.
-      This is added when needed in `Text.Pandoc.Biblio` anyway.
-    + Allow spaces in alignment spec in tables, e.g. `{ l r c }`.
-    + Improved support for accented characters (thanks to Scott Morrison).
-    + Parse label after section command and set id (#951).
-
-  * MediaWiki reader
-
-    + Allow space before table rows.
-    + Fixed regression for `URL`.
-      `<` is no longer allowed in URLs, according to the uri parser
-      in `Text.Pandoc.Parsing`.  Added a test case.
-    + Correctly handle indented preformatted text without preceding
-      or following blank line.
-    + Fixed `|` links inside table cells.  Improved attribute parsing.
-    + Skip attributes on table rows.  Previously we just crashed if
-      rows had attributes, now we ignore them.
-    + Ignore attributes on headers.
-
-  * LaTeX writer
-
-    + Don't use ligatures in escaping inline code.
-    + Fixed footnote numbers in LaTeX/PDF tables.  This fixes a bug
-      wherein notes were numbered incorrectly in tables (#827).
-    + Always create labels for sections.  Previously the labels were only
-      created when there were links to the section in the document (#871).
-    + Stop escaping `|` in LaTeX math.
-      This caused problems with array environments (#891).
-    + Change `\` to `/` in paths.  `/` works even on Windows in LaTeX.
-      `\` will cause major problems if unescaped.
-    + Write id for code block to label attribute in LaTeX when listings
-      is used (thanks to Florian Eitel).
-    + Scale LaTeX tables so they don't exceed columnwidth.
-    + Avoid problem with footnotes in unnumbered headers (#940).
-
-  * Beamer writer
-
-    + When creating beamer slides, add `allowframebreaks` option
-      to the slide if it is one of the header classes.  It's recommended
-      that your bibliography slide have this attribute:
-    
-        # References {.allowframebreaks}
-    
-    This causes multiple slides to be created if necessary, depending
-    on the length of the bibliography.
-
-  * ConTeXt writer
-
-    + Properly handle tables without captions.  The old output only
-      worked in MkII. This should work in MkIV as well (#837).
-
-  * MediaWiki writer
-
-    + Use native mediawiki tables instead of HTML (#720).
-
-  * HTML writer
-
-    + Fixed `--no-highlight` (Alexander Kondratskiy).
-    + Don't convert to lowercase in email obfuscation (#839).
-
   * Man writer
 
     + Offer more fine-grained control in template.
@@ -364,55 +532,6 @@
       pandoc will parse it as before into a title, section, header, and
       footer.  But you can also specify these elements explicitly (#885).
 
-  * AsciiDoc writer
-
-    + Support `--atx-headers` (Max Rydahl Andersen).
-    + Don't print empty identifier blocks `([[]])` on headers (Max
-      Rydahl Andersen).
-
-  * ODT writer
-
-    + Fixing wrong numbered-list indentation in open document format
-      (Alexander Kondratskiy) (#369).
-    + `reference.odt`: Added pandoc as "generator" in `meta.xml`.
-    + Minor changes for ODF 1.2 conformance (#939). We leave the
-      nonconforming `contextual-spacing` attribute, which is provided by
-      LibreOffice itself and seems well supported.
-
-  * Docx writer
-
-    + Fixed rendering of display math in lists.
-      In 1.11 and 1.11.1, display math in lists rendered as a new list
-      item.  Now it always appears centered, just as outside of lists,
-      and in proper display math style, no matter how far indented the
-      containing list item is (#784).
-    + Use `w:br` with `w:type` `textWrapping` for linebreaks.
-      Previously we used `w:cr` (#873).
-    + Use Compact style for Plain block elements.
-      This differentiates between tight and loose lists (#775).
-    + Ignore most components of `reference.docx`.
-      We take the `word/styles.xml`, `docProps/app.xml`,
-      `word/theme/theme1.xml`, and `word/fontTable.xml` from
-      `reference.docx`, ignoring everything else.  This should help
-      with the corruption problems caused when different versions of
-      Word resave the reference.docx and reorganize things.
-    +  Made `--no-highlight` work properly.
-
-  * EPUB writer
-
-    + Don't add `dc:creator` tags if present in EPUB metadata.
-    + Add `id="toc-title"` to `h1` in `nav.xhtml` (#799).
-    + Don't put blank title page in reading sequence.
-      Set `linear="no"` if no title block.  Addresses #797.
-    + Download webtex images and include as data URLs.
-      This allows you to use `--webtex` in creating EPUBs.
-      Math with `--webtex` is automatically made self-contained.
-    + In `data/epub.css`, removed highlighting styles (which
-      are no longer needed, since styles are added by the HTML
-      writer according to `--highlighting-style`).  Simplified
-      margin fields.
-    + If resource not found, skip it, as in Docx writer (#916).
-
   * Markdown writer
 
     + Allow simple tables to be printed as grid tables,
@@ -422,19 +541,9 @@
     + Put multiple authors on separate lines in pandoc titleblock.
       Also, don't wrap long author entries, as new lines get treated
       as new authors.
-    + Only autolink absolute URIs.  This fixes a regression, #830.
-    + Don't wrap attributes in fenced code blocks.
     + Support YAML title block (render fields in alphabetical order
       to make output predictable).
-    + Write full metadata in MMD style title blocks.
-
-  * RTF writer
-
-    + Properly handle characters above the 0000-FFFF range.
-      Uses surrogate pairs.  Thanks to Hiromi Ishii for the patch.
-    + Fixed regression with RTF table of contents.
-    + Only autolink absolute URIs.  This fixes a regression, #830.
-
+  
   * `Text.Pandoc.PDF`
 
     + On Windows, create temdir in working directory.
@@ -454,11 +563,6 @@
     + If compiling with `pdflatex` yields an encoding error, offer
       the suggestion to use `--latex-engine=xelatex`.
 
-  * `Text.Pandoc.UTF8`
-
-    + Use strict bytestrings in reading.  The use of lazy bytestrings seemed
-      to cause problems using pandoc on 64-bit Windows 7/8 (#874).
-
   * `Text.Pandoc.Parsing`
 
     + Further improvements to URI parser.
@@ -510,56 +614,7 @@
     + Templates can now contain "record lookups" in variables;
       for example, `author.institution` will retrieve the `institution`
       field of the `author` variable.
-    + Fixed bug retrieving default template for markdown variants.
-
-  * Default template changes
-
-    + DocBook:  Use DocBook 4.5 doctype.
-    + Org: '#+TITLE:' is inserted before the title.
-      Previously the writer did this.
-    + LaTeX:  Changes to make mathfont work with xelatex.
-      We need the mathspec library, not just fontspec, for this.
-      We also need to set options for setmathfont (#734).
-    + LaTeX: Use `tex-ansi` mapping for `monofont`.
-      This ensures that straight quotes appear as straight, rather than
-      being treated as curly.  See #889.
-    + Made `\includegraphics` more flexible in LaTeX template.
-      Now it can be used with options, if needed.  Thanks to Bernhard Weichel.
-    + LaTeX/Beamer: Added `classoption` variable.
-      This is intended for class options like `oneside`; it may
-      be repeated with different options.  (Thanks to Oliver Matthews.)
-    + Beamer: Added `fonttheme` variable.  (Thanks to Luis Osa.)
-    + LaTeX: Added `biblio-style` variable (#920).
-    + DZSlides: title attribute on title section.
-
-  * Removed `blaze_html_0_5` flag, require `blaze-html` >= 0.5.
-    Reason:  < 0.5 does not provide a monoid instance for Attribute,
-    which is now needed by the HTML writer (#803).
-
-  * Added `http-conduit` flag, which allows fetching https resources.
-    It also brings in a large number of dependencies (`http-conduit`
-    and its dependencies), which is why for now it is an optional flag
-    (#820).
-
-  * Added CONTRIBUTING.md.
-
-  * Improved INSTALL instructions.
-
-  * `make-windows-installer.bat`: Removed explicit paths for executables.
-
-  * `aeson` is now used instead of `json` for JSON.
-
-  * Set default stack size to 16M.  This is needed for some large
-    conversions, esp. if pandoc is compiled with 64-bit ghc.
-
-  * Various small documentation improvements.
-    Thanks to achalddave and drothlis for patches.
-    
-  * Removed comment that chokes recent versions of CPP (#933).
-    
-  * Removed support for GHC version < 7.2, since pandoc-types now
-    requires at least GHC 7.2 for GHC generics.
-
+  
 pandoc (1.11.1)
 
   * Markdown reader:
-- 
cgit v1.2.3


From cf2506acdc721ec27ed310cd7bdad8affb28d1e5 Mon Sep 17 00:00:00 2001
From: John MacFarlane 
Date: Sun, 8 Sep 2013 11:43:46 -0700
Subject: Markdown: Allow backtick code blocks not to be preceded by blank
 line.

Closes #975.
---
 src/Text/Pandoc/Readers/Markdown.hs | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 2ca0d312a..4a7789e17 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -872,6 +872,7 @@ para = try $ do
             newline
             (blanklines >> return mempty)
               <|> (guardDisabled Ext_blank_before_blockquote >> lookAhead blockQuote)
+              <|> (guardEnabled Ext_backtick_code_blocks >> lookAhead codeBlockFenced)
               <|> (guardDisabled Ext_blank_before_header >> lookAhead header)
             return $ do
               result' <- result
@@ -1562,6 +1563,8 @@ endline = try $ do
   guardDisabled Ext_lists_without_preceding_blankline <|> notFollowedBy listStart
   guardEnabled Ext_blank_before_blockquote <|> notFollowedBy emailBlockQuoteStart
   guardEnabled Ext_blank_before_header <|> notFollowedBy (char '#') -- atx header
+  guardEnabled Ext_backtick_code_blocks >>
+     notFollowedBy (() <$ (lookAhead (char '`') >> codeBlockFenced))
   -- parse potential list-starts differently if in a list:
   st <- getState
   when (stateParserContext st == ListItemState) $ do
-- 
cgit v1.2.3


From 777226296b04fa37094ecb07eb33f8d3e05af036 Mon Sep 17 00:00:00 2001
From: John MacFarlane 
Date: Sun, 8 Sep 2013 11:49:13 -0700
Subject: markdown+list_without_preceding_blankline:+Interpret text before list
 as paragraph.

---
 src/Text/Pandoc/Readers/Markdown.hs | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 4a7789e17..122db17de 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -871,9 +871,11 @@ para = try $ do
     $ try $ do
             newline
             (blanklines >> return mempty)
-              <|> (guardDisabled Ext_blank_before_blockquote >> lookAhead blockQuote)
-              <|> (guardEnabled Ext_backtick_code_blocks >> lookAhead codeBlockFenced)
-              <|> (guardDisabled Ext_blank_before_header >> lookAhead header)
+              <|> (guardDisabled Ext_blank_before_blockquote >> () <$ lookAhead blockQuote)
+              <|> (guardEnabled Ext_backtick_code_blocks >> () <$ lookAhead codeBlockFenced)
+              <|> (guardDisabled Ext_blank_before_header >> () <$ lookAhead header)
+              <|> (guardEnabled Ext_lists_without_preceding_blankline >>
+                       () <$ lookAhead listStart)
             return $ do
               result' <- result
               case B.toList result' of
-- 
cgit v1.2.3


From f8ecda0152990f4e1c7a6b5f45cecdefe5ac54f5 Mon Sep 17 00:00:00 2001
From: John MacFarlane 
Date: Sun, 8 Sep 2013 11:50:34 -0700
Subject: More work on changelog.

---
 changelog | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/changelog b/changelog
index 2e8d3ff89..6c7be08cb 100644
--- a/changelog
+++ b/changelog
@@ -364,6 +364,9 @@
     * Made markdown reader sensitive to this.
     * Closes #972.
 
+    Markdown: Allow backtick code blocks not to be preceded by blank line.
+        
+	    Closes #975.
 
 
 ------------------
-- 
cgit v1.2.3


From c78557f3ca333d9ae925fdcb8a7c03199f5e47fd Mon Sep 17 00:00:00 2001
From: John MacFarlane 
Date: Sun, 8 Sep 2013 12:04:47 -0700
Subject: Templates:  more consistent behavior of `$for$`.

When `foo` is not a list, `$for(foo)$...$endfor$` should behave like
$if(foo)$...$endif$.  So if `foo` resolves to "", no output should
be produced.

See pandoc-templates#39.
---
 src/Text/Pandoc/Templates.hs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Text/Pandoc/Templates.hs b/src/Text/Pandoc/Templates.hs
index 22a44e735..7f744c7e1 100644
--- a/src/Text/Pandoc/Templates.hs
+++ b/src/Text/Pandoc/Templates.hs
@@ -212,7 +212,7 @@ iter var' template sep = Template $ \val -> unTemplate
            Just (Array vec) -> mconcat $ intersperse sep
                                        $ map (setVar template var')
                                        $ toList vec
-           Just x           -> setVar template var' x
+           Just x           -> cond var' (setVar template var' x) mempty
            Nothing          -> mempty) val
 
 setVar :: Template -> Variable -> Value -> Template
-- 
cgit v1.2.3


From 1911b619ee4ce0307f5a78daafc647eb4abe6bd6 Mon Sep 17 00:00:00 2001
From: John MacFarlane 
Date: Sun, 8 Sep 2013 12:07:18 -0700
Subject: Added more raw material to changelog.

---
 changelog | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/changelog b/changelog
index 6c7be08cb..b7091460f 100644
--- a/changelog
+++ b/changelog
@@ -368,6 +368,14 @@
         
 	    Closes #975.
 
+    Templates:  more consistent behavior of `$for$`.
+    
+    When `foo` is not a list, `$for(foo)$...$endfor$` should behave like
+    $if(foo)$...$endif$.  So if `foo` resolves to "", no output should
+    be produced.
+    
+    See pandoc-templates#39.
+
 
 ------------------
 
-- 
cgit v1.2.3


From 81e2df32c92ee95771f2613b9ad30aeaf11423e5 Mon Sep 17 00:00:00 2001
From: John MacFarlane 
Date: Sun, 8 Sep 2013 15:47:50 -0700
Subject: Made . . . for pause work in all slide show formats except slideous.

---
 README                          |  5 ++---
 changelog                       |  5 +----
 src/Text/Pandoc/Writers/HTML.hs | 22 +++++++++++++++-------
 3 files changed, 18 insertions(+), 14 deletions(-)

diff --git a/README b/README
index 7d926216b..56ad50b3c 100644
--- a/README
+++ b/README
@@ -2680,9 +2680,8 @@ a single document.
 Inserting pauses
 ----------------
 
-In reveal.js and beamer slide shows, you can add "pauses" within
-a slide by including a paragraph containing three dots, separated
-by spaces:
+You can add "pauses" within a slide by including a paragraph containing
+three dots, separated by spaces:
 
     # Slide with a pause
 
diff --git a/changelog b/changelog
index b7091460f..b3e9d8b0b 100644
--- a/changelog
+++ b/changelog
@@ -413,7 +413,7 @@
       as markdown citations, it is redundant to have a bibliography,
       since one will be generated automatically.)
  
-  * Added syntax for "pauses" in beamer or reaveljs slide shows.
+  * Added syntax for "pauses" in slide shows:
 
         This gives
 
@@ -421,9 +421,6 @@
 
         a pause.
 
-    [note - no longer seems to work in recente revealjs - perhaps
-    this should be reverted]
-
   * Use new flexible metadata type.
 
     + Depend on `pandoc-types` 1.12.  This changes the type of
diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
index 63b466af3..78a3edce8 100644
--- a/src/Text/Pandoc/Writers/HTML.hs
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -268,11 +268,24 @@ elementToHtml slideLevel opts (Sec level num (id',classes,keyvals) title' elemen
                 else blockToHtml opts (Header level' (id',classes,keyvals) title')
   let isSec (Sec _ _ _ _ _) = True
       isSec (Blk _)         = False
+  let isPause (Blk x) = x == Para [Str ".",Space,Str ".",Space,Str "."]
+      isPause _       = False
+  let fragmentClass = case writerSlideVariant opts of
+                           RevealJsSlides  -> "fragment"
+                           _               -> "incremental"
+  let inDiv xs = Blk (RawBlock (Format "html") ("
")) : + (xs ++ [Blk (RawBlock (Format "html") "
")]) innerContents <- mapM (elementToHtml slideLevel opts) $ if titleSlide -- title slides have no content of their own then filter isSec elements - else elements + else if slide + then case splitBy isPause elements of + [] -> [] + [x] -> x + xs -> concatMap inDiv xs + else elements let inNl x = mconcat $ nl opts : intersperse (nl opts) x ++ [nl opts] let classes' = ["titleslide" | titleSlide] ++ ["slide" | slide] ++ ["section" | (slide || writerSectionDivs opts) && @@ -401,10 +414,6 @@ blockToHtml opts (Para [Image txt (s,'f':'i':'g':':':tit)]) = do [nl opts, img, capt, nl opts] else H.div ! A.class_ "figure" $ mconcat [nl opts, img, capt, nl opts] --- . . . indicates a pause in a slideshow -blockToHtml opts (Para [Str ".",Space,Str ".",Space,Str "."]) - | writerSlideVariant opts == RevealJsSlides = - blockToHtml opts (RawBlock "html" "
") blockToHtml opts (Para lst) = do contents <- inlineListToHtml opts lst return $ H.p contents @@ -580,8 +589,7 @@ toListItem opts item = nl opts >> H.li item blockListToHtml :: WriterOptions -> [Block] -> State WriterState Html blockListToHtml opts lst = - mapM (blockToHtml opts) lst >>= - return . mconcat . intersperse (nl opts) + fmap (mconcat . intersperse (nl opts)) $ mapM (blockToHtml opts) lst -- | Convert list of Pandoc inline elements to HTML. inlineListToHtml :: WriterOptions -> [Inline] -> State WriterState Html -- cgit v1.2.3 From 508694c83ab628338c661fad088d159bdeab2c2a Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 8 Sep 2013 16:17:16 -0700 Subject: More work on changelog. --- changelog | 232 ++++++++++++++++++++++++++++++-------------------------------- 1 file changed, 113 insertions(+), 119 deletions(-) diff --git a/changelog b/changelog index b3e9d8b0b..99a2950c1 100644 --- a/changelog +++ b/changelog @@ -39,6 +39,25 @@ `pandoc --print-default-data-file reference.odt` will print `reference.odt`.) + * Added syntax for "pauses" in slide shows: + + This gives + + . . . + + a pause. + + * New markdown extensions: + + + `ignore_line_breaks`: causes intra-paragraph line breaks to be ignored, + rather than being treated as hard line breaks or spaces. This is useful + for some East Asian languages, where spaces aren't used between words, + but text is separated into lines for readability. + + `yaml_metadata_block`: Parse YAML metadata blocks. (Default.) + + `ascii_identifiers`: This will force `auto_identifiers` to use ASCII + only. (Default for `markdown_github`.) (#807) + + `lists_without_preceding_blankline`: Allow lists to start without + preceding blank space. (Default for `markdown_github`.) (#972) [behavior changes] @@ -53,14 +72,34 @@ which URLs it is fetching, but not giving the full header). In addition, there are better error messages when fetching a URL fails. + * Better error reporting in some readers, due to changes in `readWith`: + the line in which the error occured is printed, with a caret pointing + to the column. + * All slide formats now support incremental slide view for definition lists. * Parse `\(..\)` and `\[..\]` as math in MediaWiki reader. Parse `:...` as display math. These notations are used with the MathJax MediaWiki extension. - * `Text.Pandoc.ImageSize`: Handle EPS (#903). This change will make - EPS images properly sized on conversion to Word. + * Man writer: The `title`, `section`, `header`, and `footer` can now + all be set individually in metadata. The `description` variable has been + removed. Quotes have been added so that spaces are allowed in the + title. If you have a title that begins + + COMMAND(1) footer here | header here + + pandoc will still parse it into a title, section, header, and + footer. But you can also specify these elements explicitly (#885). + + * Markdown writer + + + Allow simple tables to be printed as grid tables, + if other table options are disabled. This means you can do + `pandoc -t markdown-pipe_tables-simple_tables-multiline_tables` + and all tables will render as grid tables. + + Support YAML title block (render fields in alphabetical order + to make output predictable). [API changes] @@ -79,6 +118,28 @@ * New module `Text.Pandoc.Readers.Haddock` (David Lazar). This is based on Haddock's own lexer/parser. + * New module `Text.Pandoc.Writers.Custom`. + + * In `Text.Pandoc.Shared`, `openURL` and `fetchItem` now return an + Either, for better error handling. + + * Made `stringify` polymorphic in `Text.Pandoc.Shared`. + + * Removed `stripTags` from `Text.Pandoc.XML`. + + * `Text.Pandoc.Templates`: + + + Simplified `Template` type to a newtype. + + Removed `Empty`. + + Changed type of `renderTemplate`: it now takes a JSON context + and a compiled template. + + Export `compileTemplate`. + + Export `renderTemplate'` that takes a string instead of a compiled + template. + + Export `varListToJSON`. + + * `Text.Pandoc.PDF` exports `makePDF` instead of `tex2pdf`. + [bug fixes] * In markdown, don't autolink a bare URI that is followed by `` @@ -101,6 +162,13 @@ This seems to be needed for some formats (e.g. slideous) and won't hurt in others (#966). + * `Text.Pandoc.PDF` + + + On Windows, create temdir in working directory, since the system + temp directory path may contain tildes, which can cause + problems in LaTeX (#777). + + Put temporary output directory in `TEXINPUTS` (see #917). + * Produce automatic header identifiers in parsing textile, RST, and LaTeX, unless `auto_identifiers` extension is disabled (#967). @@ -110,6 +178,12 @@ (#739). Handle `src` in `embed`, `audio`, `source`, `input` tags. + * `Text.Pandoc.Parsing`: `uri` parser no longer treats punctuation before + percent-encoding, or a `+` character, as final punctuation. + + * `Text.Pandoc.ImageSize`: Handle EPS (#903). This change will make + EPS images properly sized on conversion to Word. + * Slidy: Use slidy.js rather than slidy.js.gz. Reason: some browsers have trouble with the gzipped js file, at least on the local file system (#795). @@ -261,12 +335,23 @@ + Only autolink absolute URIs. This fixes a regression, #830. + Don't wrap attributes in fenced code blocks. + Write full metadata in MMD style title blocks. + + Put multiple authors on separate lines in pandoc titleblock. + Also, don't wrap long author entries, as new lines get treated + as new authors. - * `Text.Pandoc.Templates`: Fixed bug retrieving default template - for markdown variants. + * `Text.Pandoc.Templates`: + + + Fixed bug retrieving default template for markdown variants. + + Templates can now contain "record lookups" in variables; + for example, `author.institution` will retrieve the `institution` + field of the `author` variable. + + More consistent behavior of `$for$`. When `foo` is not a list, + `$for(foo)$...$endfor$` should behave like $if(foo)$...$endif$. + So if `foo` resolves to "", no output should be produced. + See pandoc-templates#39. [template changes] - + * DocBook: Use DocBook 4.5 doctype. * Org: '#+TITLE:' is inserted before the title. @@ -297,6 +382,12 @@ [under-the-hood improvements] + * Markdown reader:Improved strong/emph parsing, using the strategy of + . The new parsing algorithm requires + no backtracking, and no keeping track of nesting levels. It will give + different results in some edge cases, but these should not affect normal + uses. + * Added `Text.Pandoc.Compat.Monoid`. This allows pandoc to compile with `base` < 4.5, where `Data.Monoid` doesn't export `<>`. Thanks to Dirk Ullirch for the patch. @@ -354,29 +445,6 @@ requires at least GHC 7.2 for GHC generics. ------------------- - - - Added `lists_without_preceding_blankline` extension. - * Added `Ext_lists_without_preceding_blankline` to - `Extension` in `Options`. Added this option to - `githubMarkdownExtensions`. - * Made markdown reader sensitive to this. - * Closes #972. - - Markdown: Allow backtick code blocks not to be preceded by blank line. - - Closes #975. - - Templates: more consistent behavior of `$for$`. - - When `foo` is not a list, `$for(foo)$...$endfor$` should behave like - $if(foo)$...$endif$. So if `foo` resolves to "", no output should - be produced. - - See pandoc-templates#39. - - ------------------ * citeproc support has been removed from core pandoc (API changes). @@ -413,14 +481,6 @@ as markdown citations, it is redundant to have a bibliography, since one will be generated automatically.) - * Added syntax for "pauses" in slide shows: - - This gives - - . . . - - a pause. - * Use new flexible metadata type. + Depend on `pandoc-types` 1.12. This changes the type of @@ -459,14 +519,7 @@ the generic traversals `bottomUp` and `queryWith`. (API change.) - * `Text.Pandoc.Shared` - - + `openURL` and `fetchItem` now return an Either, for - better error handling. (API change.) - - * `Text.Pandoc.Shared` - - + `stringify`: Generalized type. + * `Text.Pandoc.Biblio` @@ -494,75 +547,34 @@ pandoc types. They use GHC generics and should be faster than the old JSON serialization using `Data.Aeson.Generic`. - * Added `Text.Pandoc.Writers.Custom` and custom output formats. - - * Added OPML reader and writer. * Markdown reader - + Added `ignore_line_breaks` markdown extension. - This causes intra-paragraph line breaks to be ignored, - rather than being treated as hard line breaks or spaces. - This is useful for some East Asian languages, where spaces - aren't used between words, but text is separated into lines - for readability. + Properly handle blank line at beginning of input (#882). + Fixed bug in unmatched reference links. The input `[*infile*] [*outfile*]` was getting improperly parsed: "infile" was emphasized, but "*outfile*" was literal (#883). + Check for blank lines first in blocks. (And skip them. This might speed things up in some cases.) - + Implemented `Ext_ascii_identifiers` (#807). + Allow internal `+` in citation identifiers (#856). - + Added support for YAML metadata blocks, which can come anywhere - in the document (not just at the beginning). A document can contain - multiple YAML metadata blocks. - + Improved strong/emph parsing, using the strategy of - . The new parsing algorithm requires - no backtracking, and no keeping track of nesting levels. - It will give different results in some edge cases, but these should - not affect normal uses. + Allow `.` or `)` after `#` in ATX headers if no `fancy_lists`. + Do not generate blank title, author, or date metadata elements. Leave these out entirely if they aren't present. - + HTML span and div tags are parsed as pandoc Span and Div elements. - - * Man writer + + Allow backtick code blocks not to be preceded by blank line (#975). - + Offer more fine-grained control in template. - Now the `title`, `section`, `header`, and `footer` can all be set - individually in metadata. The `description` variable has been - removed. Quotes have been added so that spaces are allowed in the title. - If you have a title that begins - - COMMAND(1) footer here | header here + * Markdown reader - pandoc will parse it as before into a title, section, header, and - footer. But you can also specify these elements explicitly (#885). + + Added support for YAML metadata blocks, which can come anywhere + in the document (not just at the beginning). A document can contain + multiple YAML metadata blocks. + + HTML span and div tags are parsed as pandoc Span and Div elements. - * Markdown writer - + Allow simple tables to be printed as grid tables, - if other table options are disabled. This means you can do - `pandoc -t markdown-pipe_tables-simple_tables-multiline_tables` - and all tables will render as grid tables. - + Put multiple authors on separate lines in pandoc titleblock. - Also, don't wrap long author entries, as new lines get treated - as new authors. - + Support YAML title block (render fields in alphabetical order - to make output predictable). - + + + * `Text.Pandoc.PDF` - + On Windows, create temdir in working directory. - Reason: the path to the system temp directory may contain tildes, - which causes problems in LaTeX when the username is more than - eight characters (#777). - + Put temporary output directory in `TEXINPUTS`. - This will help later when we try to download external resources. - We can put them in the temp directory (see #917). - + `Text.Pandoc.PDF` exports `makePDF` instead of `tex2pdf`. - (API change.) + `makePDF` walks the pandoc AST and checks for the existence of images in the local directory. If they are not found, it attempts to find them, either in the directory containing the first source @@ -571,24 +583,20 @@ + If compiling with `pdflatex` yields an encoding error, offer the suggestion to use `--latex-engine=xelatex`. + * `Text.Pandoc.Parsing` - + Further improvements to URI parser. - Don't treat punctuation before percent-encoding as final punctuation. - Don't treat '+' as final punctuation. + Generalized state type on `readWith` (API change). - + Specialize readWith to `String` input. - + Better error reporting in `readWith`: On error have it print the line - in which the error occurred, with a caret pointing to the column. + + Specialize readWith to `String` input. (API change). + In `ParserState`, replace `stateTitle`, `stateAuthors`, `stateDate` with `stateMeta` and `stateMeta'`. - * `Text.Pandoc.XML` - - + Removed `stripTags`. (API change.) - * `Text.Pandoc.Options` - + * Added `Ext_lists_without_preceding_blankline` to + `Extension` in `Options`. Added this option to + `githubMarkdownExtensions`. + + Implemented `Ext_ascii_identifiers`. + + Ext_ignore_line_breaks. + Added `Ext_yaml_metadata_block`. (API change.) + Added `Ext_ascii_identifiers`. (API change.) This will force `Ext_auto_identifiers` to use ASCII only. @@ -609,20 +617,6 @@ the first file argument, you'll need to make that the current directory before running pandoc. API change (#942). - * `Text.Pandoc.Templates` - - + Changed type of `renderTemplate`: it now takes a JSON context - and a compiled template. (API change.) - + Export `compileTemplate`. (API change.) - + Export `renderTemplate'` that takes a string instead of a compiled - template. (API change.) - + Export `varListToJSON`. (API change.) - + Removed `Empty`. - + Simplified `Template` type to a newtype. - + Templates can now contain "record lookups" in variables; - for example, `author.institution` will retrieve the `institution` - field of the `author` variable. - pandoc (1.11.1) * Markdown reader: -- cgit v1.2.3 From 5b3df017c04e1d721b406c57d0aa7c706024c293 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 8 Sep 2013 20:36:35 -0700 Subject: More work on changelog. --- changelog | 154 +++++++++++++++++++++++++++++--------------------------------- 1 file changed, 73 insertions(+), 81 deletions(-) diff --git a/changelog b/changelog index 99a2950c1..848dcce69 100644 --- a/changelog +++ b/changelog @@ -72,6 +72,16 @@ which URLs it is fetching, but not giving the full header). In addition, there are better error messages when fetching a URL fails. + * Previously we used to store the directory of the first input file, + even if it was local, and used this as a base directory for finding + images in ODT, EPUB, Docx, and PDF. This has been confusing to many + users. So we now look for images relative to the current + working directory, even if the first file argument is in another + directory. Note that this change may break some existing workflows. + If you have been assuming that relative links will be interpreted + relative to the directory of the first file argument, you'll need + to make that the current directory before running pandoc. (#942) + * Better error reporting in some readers, due to changes in `readWith`: the line in which the error occured is printed, with a caret pointing to the column. @@ -92,6 +102,13 @@ pandoc will still parse it into a title, section, header, and footer. But you can also specify these elements explicitly (#885). + * Markdown reader + + + Added support for YAML metadata blocks, which can come anywhere + in the document (not just at the beginning). A document can contain + multiple YAML metadata blocks. + + HTML span and div tags are parsed as pandoc Span and Div elements. + * Markdown writer + Allow simple tables to be printed as grid tables, @@ -139,7 +156,26 @@ + Export `varListToJSON`. * `Text.Pandoc.PDF` exports `makePDF` instead of `tex2pdf`. - + + * `Text.Pandoc`: + + + Made `toJsonFilter` an alias for `toJSONFilter` from `Text.Pandoc.JSON`. + + Removed `ToJsonFilter` typeclass. `ToJSONFilter` from + `Text.Pandoc.JSON` should be used instead. (Compiling against + pandoc-types instead of pandoc will also produce smaller executables.) + * Removed the deprecated `jsonFilter` function. + + Added `readJSON`, `writeJSON` to the API (#817). + + * `Text.Pandoc.Options`: + + + Added `Ext_lists_without_preceding_blankline`, + `Ext_ascii_identifiers`, `Ext_ignore_line_breaks`, + `Ext_yaml_metadataBlock` to `Extension`. + + Changed `writerSourceDir` to `writerSourceURL` and changed the type to + a `Maybe`. `writerSourceURL` is set to 'Just url' when the first + command-line argument is an absolute URL. (So, relative links will be + resolved in relation to the first page.) Otherwise, 'Nothing'. + [bug fixes] * In markdown, don't autolink a bare URI that is followed by `` @@ -168,7 +204,11 @@ temp directory path may contain tildes, which can cause problems in LaTeX (#777). + Put temporary output directory in `TEXINPUTS` (see #917). - + + `makePDF` tries to download images that are not found locally, + if the first argument is a URL (#917). + + If compiling with `pdflatex` yields an encoding error, offer + the suggestion to use `--latex-engine=xelatex`. + * Produce automatic header identifiers in parsing textile, RST, and LaTeX, unless `auto_identifiers` extension is disabled (#967). @@ -188,6 +228,18 @@ Reason: some browsers have trouble with the gzipped js file, at least on the local file system (#795). + * Markdown reader + + + Properly handle blank line at beginning of input (#882). + + Fixed bug in unmatched reference links. The input + `[*infile*] [*outfile*]` was getting improperly parsed: + "infile" was emphasized, but "*outfile*" was literal (#883). + + Allow internal `+` in citation identifiers (#856). + + Allow `.` or `)` after `#` in ATX headers if no `fancy_lists`. + + Do not generate blank title, author, or date metadata elements. + Leave these out entirely if they aren't present. + + Allow backtick code blocks not to be preceded by blank line (#975). + * Textile reader: + Correctly handle entities. @@ -409,6 +461,13 @@ * Added `Text.Pandoc.Asciify` utility module. This exports functions to create ASCII-only versions of identifiers. + * `Text.Pandoc.Parsing` + + + Generalized state type on `readWith` (API change). + + Specialize readWith to `String` input. (API change). + + In `ParserState`, replace `stateTitle`, `stateAuthors`, `stateDate` + with `stateMeta` and `stateMeta'`. + * `Text.Pandoc.UTF8`: use strict bytestrings in reading. The use of lazy bytestrings seemed to cause problems using pandoc on 64-bit Windows 7/8 (#874). @@ -447,6 +506,18 @@ ------------------ + TODO - metadata changes + + TODO - json module + + TODO - walk module + + TODO - citation changes + removed Biblio + removed integrated citation support + etc. + include pandoc-citeproc changes + * citeproc support has been removed from core pandoc (API changes). + The `--bibliography`, `--csl`, and `--citation-abbreviation` options are @@ -497,15 +568,6 @@ and `` elements; in other formats they will simply pass through their contents. But they can be targeted by scripts. - * `Text.Pandoc` - - + Make `toJsonFilter` an alias for `toJSONFilter` from `Text.Pandoc.JSON`. - + Removed `ToJsonFilter` typeclass. `ToJSONFilter` from - `Text.Pandoc.JSON` should be used instead. (Compiling against - pandoc-types instead of pandoc will also produce smaller executables.) - * Removed the deprecated `jsonFilter` function. - + Added `readJSON`, `writeJSON` to the API (#817). - [ TODO - systematic documentation of pandoc-types API changes, including .JSON, .Walk and changes to .Definition, .Builder. Include the new Format newtype, and the new Span and Div @@ -547,76 +609,6 @@ pandoc types. They use GHC generics and should be faster than the old JSON serialization using `Data.Aeson.Generic`. - - * Markdown reader - - + Properly handle blank line at beginning of input (#882). - + Fixed bug in unmatched reference links. The input - `[*infile*] [*outfile*]` was getting improperly parsed: - "infile" was emphasized, but "*outfile*" was literal (#883). - + Check for blank lines first in blocks. (And skip them. This - might speed things up in some cases.) - + Allow internal `+` in citation identifiers (#856). - + Allow `.` or `)` after `#` in ATX headers if no `fancy_lists`. - + Do not generate blank title, author, or date metadata elements. - Leave these out entirely if they aren't present. - + Allow backtick code blocks not to be preceded by blank line (#975). - - * Markdown reader - - + Added support for YAML metadata blocks, which can come anywhere - in the document (not just at the beginning). A document can contain - multiple YAML metadata blocks. - + HTML span and div tags are parsed as pandoc Span and Div elements. - - - - - - * `Text.Pandoc.PDF` - - + `makePDF` walks the pandoc AST and checks for the existence of - images in the local directory. If they are not found, it attempts - to find them, either in the directory containing the first source - file, or at an absolute URL, or at a URL relative to the base URL - of the first command line argument (#917). - + If compiling with `pdflatex` yields an encoding error, offer - the suggestion to use `--latex-engine=xelatex`. - - - * `Text.Pandoc.Parsing` - - + Generalized state type on `readWith` (API change). - + Specialize readWith to `String` input. (API change). - + In `ParserState`, replace `stateTitle`, `stateAuthors`, `stateDate` - with `stateMeta` and `stateMeta'`. - - * `Text.Pandoc.Options` - * Added `Ext_lists_without_preceding_blankline` to - `Extension` in `Options`. Added this option to - `githubMarkdownExtensions`. - + Implemented `Ext_ascii_identifiers`. - + Ext_ignore_line_breaks. - + Added `Ext_yaml_metadata_block`. (API change.) - + Added `Ext_ascii_identifiers`. (API change.) - This will force `Ext_auto_identifiers` to use ASCII only. - Set as default for github markdown. - + Changed `writerSourceDir` to `writerSourceURL` and changed - the type to a `Maybe`. Previously we used to store the directory - of the first input file, even if it was local, and used this as a - base directory for finding images in ODT, EPUB, Docx, and PDF. - This has been confusing to many users. It seems better to look for - images relative to the current working directory, even if the first - file argument is in another directory. `writerSourceURL` is set - to 'Just url' when the first command-line argument is an absolute - URL. (So, relative links will be resolved in relation to the first - page.) Otherwise, 'Nothing'. The ODT, EPUB, Docx, and PDF - writers have been modified accordingly. Note that this change may - break some existing workflows. If you have been assuming that - relative links will be interpreted relative to the directory of - the first file argument, you'll need to make that the current - directory before running pandoc. API change (#942). - pandoc (1.11.1) * Markdown reader: -- cgit v1.2.3 From 0702d8d521f1fd41015a67ecf23c9a224b2ea21e Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 8 Sep 2013 22:45:03 -0700 Subject: More work on changelog. --- changelog | 76 ++++++++++++++++++++++++--------------------------------------- 1 file changed, 29 insertions(+), 47 deletions(-) diff --git a/changelog b/changelog index 848dcce69..e9e722e5a 100644 --- a/changelog +++ b/changelog @@ -72,6 +72,12 @@ which URLs it is fetching, but not giving the full header). In addition, there are better error messages when fetching a URL fails. + * Citation support is no longer baked in to core pandoc. Users who + need citations will need to install and use a separate filter + (`--filter pandoc-citeproc`). This filter will take `bibliography`, + `csl`, and `citation-abbreviations` from the metadata, though it + may still be specified on the command line as before. + * Previously we used to store the directory of the first input file, even if it was local, and used this as a base directory for finding images in ODT, EPUB, Docx, and PDF. This has been confusing to many @@ -258,7 +264,7 @@ + Skip positional options after `\begin{figure}`. + Support `\v{}` for hacek (#926). + Don't add spurious ", " to citation suffixes. - This is added when needed in `Text.Pandoc.Biblio` anyway. + This is added when needed in pandoc-citeproc. + Allow spaces in alignment spec in tables, e.g. `{ l r c }`. + Improved support for accented characters (thanks to Scott Morrison). + Parse label after section command and set id (#951). @@ -506,51 +512,33 @@ ------------------ - TODO - metadata changes - - TODO - json module - - TODO - walk module - - TODO - citation changes - removed Biblio - removed integrated citation support - etc. - include pandoc-citeproc changes - * citeproc support has been removed from core pandoc (API changes). - + The `--bibliography`, `--csl`, and `--citation-abbreviation` options are - now implemented as follows: - --bibliography FILE => --metadata bibliography=FILE --filter pandoc-citeproc - --csl FILE => --metadata csl=FILE - --citation-abbreviations FILE => --metadata csl-abbreviations=FILE - + Markdown and LaTeX citations as still parsed, but an external - filter, `pandoc-citeproc`, is now needed to process - them against a bibliography and CSL stylesheet. The bibliography - and stylesheet should be specified in the document's YAML metadata - (`bibliography` and `csl` fields), and the filter called with - `pandoc --filter pandoc-citeproc`. - + The `Text.Pandoc.Biblio` module has been removed, and the Markdown + + The `Text.Pandoc.Biblio` module has been removed (API change), + and the Markdown and LaTeX readers no longer process citations. Users of the pandoc library who want citation support will need to use `Text.CSL.Pandoc` from `pandoc-citations`. + + All bibliography-related fields have been removed from `ReaderOptions` and `WriterOptions`: `writerBiblioFiles`, - `readerReferences`, `readerCitationStyle`. + `readerReferences`, `readerCitationStyle`. (API change) + + Note that a Cite element is now created in parsing markdown whether or not there is a matching reference (indeed, pandoc has no way of knowing, since the `--bibliography` option has been removed). - By default citations will print as `???`. + By default citations will print as `???`. (behavior change) + + The `pandoc-citeproc` script will put the bibliography at the end of the document, as before. However, it will be put inside a Div element with class "references", allowing users some control over the styling of references. A final header, if any, will - be included in the Div. + be included in the Div. (behavior change) + * The markdown writer will not print a bibliography if the `citations` extension is enabled. (If the citations are formatted as markdown citations, it is redundant to have a bibliography, - since one will be generated automatically.) + since one will be generated automatically.) (behavior change) * Use new flexible metadata type. @@ -568,11 +556,6 @@ and `` elements; in other formats they will simply pass through their contents. But they can be targeted by scripts. - [ TODO - systematic documentation of pandoc-types API changes, - including .JSON, .Walk and changes to .Definition, .Builder. - Include the new Format newtype, and the new Span and Div - elements.] - * Added `Text.Pandoc.Walk` (in `pandoc-types`), which exports hand-written tree-walking functions that are orders of magnitude faster than the SYB functions from `Text.Pandoc.Generic`. @@ -581,15 +564,20 @@ the generic traversals `bottomUp` and `queryWith`. (API change.) - + * New module `Text.Pandoc.JSON` in pandoc-types. + + + This provides `ToJSON` and `FromJSON` instances for the basic + pandoc types. They use GHC generics and should be faster than the + old JSON serialization using `Data.Aeson.Generic`. + - * `Text.Pandoc.Biblio` + * Citation processing improvements (now part of pandoc-citeproc): + (bug fixes) - + Override citeproc-hs's `endWithPunct`. - The new version correctly sees a sentence ending in '.)' as ending - with punctuation. This fixes a bug which led such sentences to receive - an extra period at the end: '.).'. Thanks to Steve Petersen for - reporting. + + Fixed `endWithPunct` The new version correctly sees a sentence + ending in '.)' as ending with punctuation. This fixes a bug which + led such sentences to receive an extra period at the end: '.).'. + Thanks to Steve Petersen for reporting. + Don't interfere with Notes that aren't citation notes. This fixes a bug in which notes not generated from citations were being altered (e.g. first letter capitalized) (#898). @@ -603,12 +591,6 @@ `\citep[23]{item1}` in LaTeX will be interpreted properly, with '23' treated as a locator of type 'page'. - * New module `Text.Pandoc.JSON` in pandoc-types. - - + This provides `ToJSON` and `FromJSON` instances for the basic - pandoc types. They use GHC generics and should be faster than the - old JSON serialization using `Data.Aeson.Generic`. - pandoc (1.11.1) * Markdown reader: -- cgit v1.2.3 From 4381c37b100a4cfd14020458e6b4e340a02b851e Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 8 Sep 2013 22:45:11 -0700 Subject: `--bibliography` no longer implies `-F pandoc-citeproc`. This could lead to double filtering if the user specifies `-F` too. --- README | 6 ++---- pandoc.hs | 2 -- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/README b/README index 56ad50b3c..7a827ef7b 100644 --- a/README +++ b/README @@ -606,10 +606,8 @@ Citation rendering `--bibliography=`*FILE* : Set the `bibliography` field in the document's metadata to *FILE*, - overriding any value set in the metadata, and tell pandoc to - use the `pandoc-citeproc` filter. (This is equivalent to the - combination `--metadata bibliography=FILE --filter pandoc-citeproc`.) - Note that an error will result unless `pandoc-citeproc` is installed. + overriding any value set in the metadata. (This is equivalent to + `--metadata bibliography=FILE`.) `--csl=`*FILE* : Set the `csl` field in the document's metadata to *FILE*, diff --git a/pandoc.hs b/pandoc.hs index 0bc2d7359..2c20e16b5 100644 --- a/pandoc.hs +++ b/pandoc.hs @@ -660,8 +660,6 @@ options = (\arg opt -> return opt{ optMetadata = ("bibliography",MetaString arg) : optMetadata opt - , optPlugins = externalFilter "pandoc-citeproc" - : optPlugins opt }) "FILE") "" -- cgit v1.2.3 From a9f3abc653bc7c0cb320056e31bb569652e03321 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 9 Sep 2013 11:19:37 -0700 Subject: Markdown: don't parse citation right after alphanumeric. An `@` after an alphanumeric is probably an email address. --- src/Text/Pandoc/Readers/Markdown.hs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 122db17de..9f2bc4447 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1823,6 +1823,11 @@ normalCite = try $ do citeKey :: MarkdownParser (Bool, String) citeKey = try $ do + -- make sure we're not right after an alphanumeric, + -- since foo@bar.baz is probably an email address + lastStrPos <- stateLastStrPos <$> getState + pos <- getPosition + guard $ lastStrPos /= Just pos suppress_author <- option False (char '-' >> return True) char '@' first <- letter -- cgit v1.2.3 From 6185ee8685a6a09c9b03a7976010dbf1aa584fd6 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 9 Sep 2013 20:23:16 -0700 Subject: Updated changelog. --- changelog | 242 ++++++++++++++++++++++++++++++++------------------------------ 1 file changed, 126 insertions(+), 116 deletions(-) diff --git a/changelog b/changelog index e9e722e5a..b9391ae34 100644 --- a/changelog +++ b/changelog @@ -2,6 +2,24 @@ [new features] + * Much more flexible metadata, including arbitrary fields and structured + values. Metadata can be specified flexibly in pandoc markdown using + YAML metadata blocks, which may occur anywhere in the document: + + --- + title: Here is my title. + abstract: | + This is the abstract. + + 1. It can contain + 2. block content + and *inline markup* + + tags: [cat, dog, animal] + ... + + Metadata fields automatically populate template variables. + * Added `opml` (OPML) as input and output format. The `_note` attribute, used in OmniOutliner and supported by multimarkdown, is supported. We treat the contents as markdown blocks under a section header. @@ -45,7 +63,7 @@ . . . - a pause. + me pause. * New markdown extensions: @@ -78,6 +96,20 @@ `csl`, and `citation-abbreviations` from the metadata, though it may still be specified on the command line as before. + * A `Cite` element is now created in parsing markdown whether or not + there is a matching reference. By default citations will print as `???`. + + * The `pandoc-citeproc` script will put the bibliography at the + end of the document, as before. However, it will be put inside a `Div` + element with class "references", allowing users some control + over the styling of references. A final header, if any, will + be included in the `Div`. + + * The markdown writer will not print a bibliography if the + `citations` extension is enabled. (If the citations are formatted + as markdown citations, it is redundant to have a bibliography, + since one will be generated automatically.) + * Previously we used to store the directory of the first input file, even if it was local, and used this as a base directory for finding images in ODT, EPUB, Docx, and PDF. This has been confusing to many @@ -98,6 +130,13 @@ Parse `:...` as display math. These notations are used with the MathJax MediaWiki extension. + * All writers: template variables are set automatically from metadata + fields. However, variables specified on the command line with + `--variable` will completely shadow metadata fields. + + * If `--variable` is used to set many variables with the same name, + a list is created. + * Man writer: The `title`, `section`, `header`, and `footer` can now all be set individually in metadata. The `description` variable has been removed. Quotes have been added so that spaces are allowed in the @@ -123,12 +162,38 @@ and all tables will render as grid tables. + Support YAML title block (render fields in alphabetical order to make output predictable). - + [API changes] + * `Meta` in `Text.Pandoc.Definition` has been changed to allow + structured metadata. (Note: existing code that pattern-matches + on `Meta` will have to be revised.) Metadata can now contain + indefinitely many fields, with content that can be a string, + a Boolean, a list of `Inline` elements, a list of `Block` + elements, or a map or list of these. + + * A new generic block container (`Div`) has been added to `Block`, + and a generic inline container (`Span`) has been added to `Inline`. + These can take attributes. They will render in HTML, Textile, + MediaWiki, Org, RST and and Markdown (with `markdown_in_html` + extension) as HTML `
` and `` elements; in other formats + they will simply pass through their contents. But they can be + targeted by scripts. + * `Format` is now a newtype, not an alias for String. Equality comparisons are case-insensitive. + * Added `Text.Pandoc.Walk`, which exports hand-written tree-walking + functions that are much faster than the SYB functions from + `Text.Pandoc.Generic`. These functions are now used where possible + in pandoc's code. (`Tests.Walk` verifies that `walk` and `query` + match the generic traversals `bottomUp` and `queryWith`.) + + * Added `Text.Pandoc.JSON`, which provides `ToJSON` and `FromJSON` + instances for the basic pandoc types. They use GHC generics and + should be faster than the old JSON serialization using + `Data.Aeson.Generic`. + * Added `Text.Pandoc.Process`, exporting `pipeProcess`. This is a souped-up version of `readProcessWithErrorcode` that uses lazy bytestrings instead of strings and allows setting @@ -145,9 +210,9 @@ * In `Text.Pandoc.Shared`, `openURL` and `fetchItem` now return an Either, for better error handling. - + * Made `stringify` polymorphic in `Text.Pandoc.Shared`. - + * Removed `stripTags` from `Text.Pandoc.XML`. * `Text.Pandoc.Templates`: @@ -181,6 +246,14 @@ a `Maybe`. `writerSourceURL` is set to 'Just url' when the first command-line argument is an absolute URL. (So, relative links will be resolved in relation to the first page.) Otherwise, 'Nothing'. + + All bibliography-related fields have been removed from + `ReaderOptions` and `WriterOptions`: `writerBiblioFiles`, + `readerReferences`, `readerCitationStyle`. + + * The `Text.Pandoc.Biblio` module has been removed. Users of the + pandoc library who want citation support will need to use + `Text.CSL.Pandoc` from `pandoc-citations`. + [bug fixes] @@ -223,7 +296,7 @@ with CSS files that include web fonts using the method described here: (#739). Handle `src` in `embed`, `audio`, `source`, `input` tags. - + * `Text.Pandoc.Parsing`: `uri` parser no longer treats punctuation before percent-encoding, or a `+` character, as final punctuation. @@ -252,7 +325,7 @@ + Improved handling of `
` blocks (#927). Remove internal HTML tags
       in code blocks, rather than printing them verbatim. Parse attributes
       on `
` tag for code blocks.
-  
+
   * HTML reader: Handle non-simple tables (#893).  Column widths are read from
     `col` tags if present, otherwise divided equally.
 
@@ -316,9 +389,9 @@
   * Beamer writer: when creating beamer slides, add `allowframebreaks` option
       to the slide if it is one of the header classes.  It is recommended
       that your bibliography slide have this attribute:
-    
+
         # References {.allowframebreaks}
-    
+
     This causes multiple slides to be created if necessary, depending
     on the length of the bibliography.
 
@@ -398,7 +471,7 @@
       as new authors.
 
   * `Text.Pandoc.Templates`:
-  
+
     + Fixed bug retrieving default template for markdown variants.
     + Templates can now contain "record lookups" in variables;
       for example, `author.institution` will retrieve the `institution`
@@ -408,8 +481,26 @@
       So if `foo` resolves to "", no output should be produced.
       See pandoc-templates#39.
 
+  * Citation processing improvements (now part of pandoc-citeproc):
+
+    + Fixed `endWithPunct` The new version correctly sees a sentence
+      ending in '.)' as ending with punctuation.  This fixes a bug which
+      led such sentences to receive an extra period at the end: '.).'.
+      Thanks to Steve Petersen for reporting.
+    + Don't interfere with Notes that aren't citation notes.
+      This fixes a bug in which notes not generated from citations were
+      being altered (e.g. first letter capitalized) (#898).
+    + Only capitalize footnote citations when they have a prefix.
+    + Changes in suffix parsing.  A suffix beginning with a digit gets 'p'
+      inserted before it before passing to citeproc-hs, so that bare numbers
+      are treated as page numbers by default.  A suffix not beginning with
+      punctuation has a space added at the beginning (rather than a comma and
+      space, as was done before for not-author-in-text citations).
+      The result is that `\citep[23]{item1}` in LaTeX will be interpreted
+      properly, with '23' treated as a locator of type 'page'.
+
   [template changes]
-   
+
   * DocBook:  Use DocBook 4.5 doctype.
 
   * Org: '#+TITLE:' is inserted before the title.
@@ -477,7 +568,7 @@
   * `Text.Pandoc.UTF8`: use strict bytestrings in reading.  The use of lazy
      bytestrings seemed to cause problems using pandoc on 64-bit Windows
      7/8 (#874).
-    
+
   * Factored out `registerHeader` from markdown reader, added to
     `Text.Pandoc.Parsing`.
 
@@ -503,94 +594,13 @@
 
   * Various small documentation improvements.
     Thanks to achalddave and drothlis for patches.
-    
+
   * Removed comment that chokes recent versions of CPP (#933).
-    
+
   * Removed support for GHC version < 7.2, since pandoc-types now
     requires at least GHC 7.2 for GHC generics.
 
 
-------------------
-
-  * citeproc support has been removed from core pandoc (API changes).
-
-    + The `Text.Pandoc.Biblio` module has been removed (API change),
-    and the Markdown
-      and LaTeX readers no longer process citations.  Users of the
-      pandoc library who want citation support will need to use
-      `Text.CSL.Pandoc` from `pandoc-citations`.
-
-    + All bibliography-related fields have been removed from
-      `ReaderOptions` and `WriterOptions`: `writerBiblioFiles`,
-      `readerReferences`, `readerCitationStyle`. (API change)
-
-    + Note that a Cite element is now created in parsing markdown whether
-      or not there is a matching reference (indeed, pandoc has no way of
-      knowing, since the `--bibliography` option has been removed).
-      By default citations will print as `???`. (behavior change)
-
-    + The `pandoc-citeproc` script will put the bibliography at the
-      end of the document, as before.  However, it will be put inside a Div
-      element with class "references", allowing users some control
-      over the styling of references.  A final header, if any, will
-      be included in the Div. (behavior change)
-
-    * The markdown writer will not print a bibliography if the
-      `citations` extension is enabled.  (If the citations are formatted
-      as markdown citations, it is redundant to have a bibliography,
-      since one will be generated automatically.) (behavior change)
- 
-  * Use new flexible metadata type.
-
-    + Depend on `pandoc-types` 1.12.  This changes the type of
-      `Meta` to allow structured metadata.  (API change:  existing
-      code that pattern-matches on `Meta` will have to be revised.)
-    + Revised readers and writers to use the new `Meta` type.
-    + Variables completely shadow metadata.
-      If many variables with the same name are set, a list is created.
-
-  * New generic block container (`Div`) and inline container
-    (`Span`) elements have been added. These can take attributes.
-    They will render in HTML, Textile, MediaWiki, Org, RST and
-    and Markdown (with `markdown_in_html` extension) as HTML `
` - and `` elements; in other formats they will simply pass through - their contents. But they can be targeted by scripts. - - * Added `Text.Pandoc.Walk` (in `pandoc-types`), which exports - hand-written tree-walking functions that are orders of magnitude - faster than the SYB functions from `Text.Pandoc.Generic`. - These functions are now used where possible in pandoc's code. - Added `Tests.Walk` to verify that `walk` and `query` match - the generic traversals `bottomUp` and `queryWith`. - (API change.) - - * New module `Text.Pandoc.JSON` in pandoc-types. - - + This provides `ToJSON` and `FromJSON` instances for the basic - pandoc types. They use GHC generics and should be faster than the - old JSON serialization using `Data.Aeson.Generic`. - - - * Citation processing improvements (now part of pandoc-citeproc): - (bug fixes) - - + Fixed `endWithPunct` The new version correctly sees a sentence - ending in '.)' as ending with punctuation. This fixes a bug which - led such sentences to receive an extra period at the end: '.).'. - Thanks to Steve Petersen for reporting. - + Don't interfere with Notes that aren't citation notes. - This fixes a bug in which notes not generated from citations were - being altered (e.g. first letter capitalized) (#898). - + Only capitalize footnote citations when they have a prefix. - + Changes in suffix parsing. A suffix beginning with a digit gets 'p' - inserted before it before passing to citeproc-hs, so that bare numbers - are treated as page numbers by default. A suffix not beginning with - punctuation has a space added at the beginning (rather than a comma and - space, as was done before for not-author-in-text citations). - The result of these changes (and the last commit) is that - `\citep[23]{item1}` in LaTeX will be interpreted properly, - with '23' treated as a locator of type 'page'. - pandoc (1.11.1) * Markdown reader: @@ -603,7 +613,7 @@ pandoc (1.11.1) paragraphs. The unmatched quotes now get turned into literal left double quotes. (No `Quoted` inline is generated, however.) Closes #99 (again). - + * HTML writer: Fixed numbering mismatch between TOC and sections. `--number-offset` now affects TOC numbering as well as section numbering, as it should have all along. Closes #789. @@ -638,7 +648,7 @@ pandoc (1.11) * Added `--number-offset` option. (See README for description.) * Added `--default-image-extension` option. (See README for description.) - + * `--number-sections` behavior change: headers with class `unnumbered` will not be numbered. @@ -689,7 +699,7 @@ pandoc (1.11) + Better support for Verbatim and minted environments. Closes #763. * Markdown reader: - + + `-` in an attribute context = `.unnumbered`. The point of this is to provide a way to specify unnumbered headers in non-English documents. @@ -715,7 +725,7 @@ pandoc (1.11) Closes #723. * Textile reader: - + + Handle attributes on headers. * LaTeX reader: @@ -748,7 +758,7 @@ pandoc (1.11) `unnumbered` class. * Textile writer: - + + Support header attributes. * Markdown writer: @@ -762,11 +772,11 @@ pandoc (1.11) use `pandoc -t markdown-citations`. * RST writer: - + + Support `:number-lines:` in code blocks. * Docx writer: - + + Better treatment of display math. Display math inside a paragraph is now put in a separate paragraph, so it will render properly (centered and without extra blank lines around it). @@ -784,7 +794,7 @@ pandoc (1.11) edited. Closes #414. * EPUB writer: - + + Fix section numbering. Previously the numbering restarted from 1 in each chapter (with `--number-sections`), though the numbers in the table of contents were correct. @@ -807,7 +817,7 @@ pandoc (1.11) section number. * `Text.Pandoc.Pretty`: - + + Fixed `chomp` so it works inside `Prefixed` elements. + Changed `Show` instance so it is better for debugging. @@ -877,7 +887,7 @@ pandoc (1.10.1) Thanks to Nick Bart for the suggestion of using @{}. * `Text.Pandoc.Parsing`: - + + More efficient version of `anyLine`. + Type of `macro` has changed; the parser now returns `Blocks` instead of `Block`. @@ -900,7 +910,7 @@ pandoc (1.10.0.5) Thanks to Andrew Lee for pointing out the bug. * Markdown reader: Fixed regressions in fenced code blocks. Closes #722. - + + Tilde code fences can again take a bare language string (`~~~ haskell`), not just curly-bracketed attributes (`~~~ {.haskell}`). @@ -931,7 +941,7 @@ pandoc (1.10.0.5) `hsb2hs`. * Changes to `make-windows-installer.bat`. - + + Exit batch file if any of the cabal-dev installs fail. + There's no longer any need to reinstall `highlighting-kate`. + Don't start with a `cabal update`; leave that to the user. @@ -1683,19 +1693,19 @@ pandoc (1.9.4.2) checks for the base version that intelligibly enable encoding/decoding when needed. Fixes a bug with multilingual filenames when pandoc was compiled with ghc 7.4 (#540). - + * Don't generate an empty H1 after hrule slide breaks. We now use a slide-level header with contents `[Str "\0"]` to mark an hrule break. This avoids creation of an empty H1 in these contexts. Closes #484. - + * Docbook reader: Added support for "bold" emphasis. Thanks to mb21. - + * In make_osx_package.sh, ensure citeproc-hs is built with the embed_data_files flag. - + * MediaWiki writer: Avoid extra blank lines after sublists (Gavin Beatty). - + * ConTeXt writer: Don't escape `&`, `^`, `<`, `>`, `_`, simplified escapes for `}` and `{` to `\{` and `\}` (Aditya Mahajan). @@ -1706,7 +1716,7 @@ pandoc (1.9.4.2) * Added some missing exports and tests to the cabal file (Alexander V Vershilov). - + * Compile with `-rtsopts` and `-threaded` by default. pandoc (1.9.4.1) @@ -1724,7 +1734,7 @@ pandoc (1.9.4.1) + Use microtype if available. * Biblio: - + + Add comma to beginning of bare suffix, e.g. `@item1 [50]`. Motivation: `@item1 [50]` should be as close as possible to `[@item1, 50]`. @@ -2028,12 +2038,12 @@ pandoc (1.9.1.1) pandoc (1.9.1) * LaTeX reader: - + + Fixed regression in 1.9; properly handle escaped $ in latex math. + Put LaTeX verse environments in blockquotes. * Markdown reader: - + + Limit nesting of strong/emph. This avoids exponential lookahead in parasitic cases, like `a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**`. + Improved attributes syntax (inn code blocks/spans): @@ -2252,7 +2262,7 @@ pandoc (1.9) will not be wrapped in `` tags. - + * The LaTeX parser has been completely rewritten; it is now much more accurate, robust, and extensible. However, there are two important changes in how it treats unknown LaTeX. (1) Previously, unknown -- cgit v1.2.3 From 046d817485bdd849f99446e09faf332c98161035 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 9 Sep 2013 20:28:57 -0700 Subject: Further changelog formatting cleanup. --- changelog | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/changelog b/changelog index b9391ae34..122b5a5bf 100644 --- a/changelog +++ b/changelog @@ -4680,7 +4680,7 @@ pandoc (0.46) unstable; urgency=low HTML reader now uses rawHtmlBlock', which excludes and , since these are handled in parseHtml. (Resolves Issue #38.) - + Fixed bug (emph parser was looking for tag, not ). + + Fixed bug (emph parser was looking for `` tag, not ``). + Don't interpret contents of style tags as markdown. (Resolves Issue #40.) @@ -4765,15 +4765,15 @@ pandoc (0.45) unstable; urgency=low + readTeXMath is now used for default HTML output in HTML, S5, RTF, and Docbook, if no other method for displaying math in HTML is specified. Enclosing $'s are no longer printed by default. - + By default, math is put inside . This way it can be + + By default, math is put inside ``. This way it can be distinguished from the surrounding text, e.g. put in a different font. * New --gladtex and --mimetex options for display of math in HTML: - + If --gladtex is specified, math is output between tags, so + + If --gladtex is specified, math is output between `` tags, so it can be processed by gladTeX. - + If --mimetex is specified, math is put in tags with a link + + If --mimetex is specified, math is put in `` tags with a link to the mimetex CGI script (or any other script that takes TeX math as input and outputs an image). The URL of the script may be specified, but defaults to /cgi-bin/mimetex.cgi. @@ -4787,13 +4787,13 @@ pandoc (0.45) unstable; urgency=low + Fixed bug: parser for minimized attributes should not swallow trailing spaces. + Simplified HTML attribute parsing. - + Changed parsing of code blocks in HTML reader: tag is no - longer needed.
 suffices. All HTML tags in the code block
+    + Changed parsing of code blocks in HTML reader:  `` tag is no
+      longer needed. `
` suffices. All HTML tags in the code block
       (e.g. for syntax highlighting) are skipped, because they are not
-      portable to other output formats. A ... block not
-      surrounded by 
 now counts as inline HTML, not a code block.
+      portable to other output formats. A `...` block not
+      surrounded by `
` now counts as inline HTML, not a code block.
     + Remove just one leading and one trailing newline from contents of
-      
...
in codeBlock parser. + `
...
` in codeBlock parser. * Markdown reader: @@ -5115,8 +5115,8 @@ pandoc (0.43) unstable; urgency=low end code block with a nonindented line.) + Changed definition of 'emph': italics with '_' must not be followed by an alphanumeric character. This is to help - prevent interpretation of e.g. [LC_TYPE]: my_type as - '[LCTYPE]:mytype'. + prevent interpretation of e.g. `[LC_TYPE]: my_type` as + `[LCTYPE]:mytype`. + Improved Markdown.pl-compatibility in referenceLink: the two parts of a reference-style link may be separated by one space, but not more... [a] [link], [not] [a link]. @@ -5124,7 +5124,7 @@ pandoc (0.43) unstable; urgency=low Markdown.pl: the marker for the end of the code section is a clump of the same number of `'s with which the section began, followed by a non-` character. So, for example, - ` h ``` i ` -> h ``` i. + ` h ``` i ` -> `h ``` i`. + Split 'title' into 'linkTitle' and 'referenceTitle', since the rules are slightly different. + Rewrote 'para' for greater efficiency. @@ -5231,7 +5231,7 @@ pandoc (0.41) unstable; urgency=low [ John MacFarlane ] * Fixed bugs in HTML reader: - + Skip material at end *only if* is present (previously, + + Skip material at end *only if* `` is present (previously, only part of the document would be parsed if an error was found; now a proper error message is given). + Added new constant eitherBlockOrInline with elements that may @@ -5440,9 +5440,9 @@ pandoc (0.4) unstable; urgency=low may cause documents to be parsed differently. Users should take care in upgrading. + Changed autoLink parsing to conform better to Markdown.pl's - behavior. is not treated as a link, but - , , and - are. + behavior. `` is not treated as a link, but + ``, ``, and + `` are. + Cleaned up handling of embedded quotes in link titles. Now these are stored as a '"' character, not as '"'. + Use lookAhead parser for the 'first pass' (looking for reference keys), @@ -5506,7 +5506,7 @@ pandoc (0.4) unstable; urgency=low been removed, since they are written programatically. This change introduces a new dependency on the xhtml package. + Fixed two bugs in email obfuscation involving improper escaping - of '&' in the
" <> blankline blockToMarkdown opts (Plain inlines) = do contents <- inlineListToMarkdown opts inlines - return $ contents <> cr + -- escape if para starts with ordered list marker + st <- get + let colwidth = if writerWrapText opts + then Just $ writerColumns opts + else Nothing + let rendered = render colwidth contents + let escapeDelimiter (x:xs) | x `elem` ".()" = '\\':x:xs + | otherwise = x : escapeDelimiter xs + escapeDelimiter [] = [] + let contents' = if isEnabled Ext_all_symbols_escapable opts && + not (stPlain st) && beginsWithOrderedListMarker rendered + then text $ escapeDelimiter rendered + else contents + return $ contents' <> cr -- title beginning with fig: indicates figure blockToMarkdown opts (Para [Image alt (src,'f':'i':'g':':':tit)]) = blockToMarkdown opts (Para [Image alt (src,tit)]) -blockToMarkdown opts (Para inlines) = do - contents <- inlineListToMarkdown opts inlines - -- escape if para starts with ordered list marker - st <- get - let esc = if isEnabled Ext_all_symbols_escapable opts && - not (stPlain st) && - beginsWithOrderedListMarker (render Nothing contents) - then text "\x200B" -- zero-width space, a hack - else empty - return $ esc <> contents <> blankline +blockToMarkdown opts (Para inlines) = + (<> blankline) `fmap` blockToMarkdown opts (Plain inlines) blockToMarkdown opts (RawBlock f str) | f == "html" = do st <- get diff --git a/tests/writer.opml b/tests/writer.opml index 228cad247..34d161fb8 100644 --- a/tests/writer.opml +++ b/tests/writer.opml @@ -18,7 +18,7 @@ - + @@ -55,18 +55,18 @@ - + - + - + - + -- cgit v1.2.3 From cbeb3bb2132908b76e3a83e61ff99418ebdf83b4 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 24 Apr 2014 17:37:10 -0700 Subject: EPUB writer: Fixed some idrefs to match changes in ids. --- src/Text/Pandoc/Writers/EPUB.hs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index 9f10554a9..c39a7798d 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -440,7 +440,7 @@ writeEPUB opts doc@(Pandoc meta _) = do xs -> [("properties", unwords xs)]) $ () let chapterRefNode ent = unode "itemref" ! - [("idref", takeFileName $ eRelativePath ent)] $ () + [("idref", toId $ eRelativePath ent)] $ () let pictureNode ent = unode "item" ! [("id", toId $ eRelativePath ent), ("href", eRelativePath ent), @@ -488,8 +488,8 @@ writeEPUB opts doc@(Pandoc meta _) = do case epubCoverImage metadata of Nothing -> [] Just _ -> [ unode "itemref" ! - [("idref", "cover"),("linear","no")] $ () ] - ++ ((unode "itemref" ! [("idref", "title_page") + [("idref", "cover_xhtml"),("linear","no")] $ () ] + ++ ((unode "itemref" ! [("idref", "title_page_xhtml") ,("linear", if null (docTitle meta) then "no" else "yes")] $ ()) : -- cgit v1.2.3 From 2eec20d92fd0f498da5b66ac03cf6f8159392323 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Fri, 25 Apr 2014 15:29:28 +0200 Subject: Org reader: Enable internal links Internal links in Org are possible by using an anchor-name as the target of a link: [[some-anchor][This]] is an internal link. It links <> here. --- src/Text/Pandoc/Readers/Org.hs | 50 ++++++++++++++++++++++++++++++++---------- tests/Tests/Readers/Org.hs | 25 +++++++++++++++++++++ 2 files changed, 63 insertions(+), 12 deletions(-) diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 7f1893936..0e52bff90 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -79,6 +79,7 @@ type OrgBlockAttributes = M.Map String String -- | Org-mode parser state data OrgParserState = OrgParserState { orgStateOptions :: ReaderOptions + , orgStateAnchorIds :: [String] , orgStateBlockAttributes :: OrgBlockAttributes , orgStateEmphasisCharStack :: [Char] , orgStateEmphasisNewlines :: Maybe Int @@ -105,6 +106,7 @@ instance Default OrgParserState where defaultOrgParserState :: OrgParserState defaultOrgParserState = OrgParserState { orgStateOptions = def + , orgStateAnchorIds = [] , orgStateBlockAttributes = M.empty , orgStateEmphasisCharStack = [] , orgStateEmphasisNewlines = Nothing @@ -116,6 +118,10 @@ defaultOrgParserState = OrgParserState , orgStateNotes' = [] } +recordAnchorId :: String -> OrgParser () +recordAnchorId i = updateState $ \s -> + s{ orgStateAnchorIds = i : (orgStateAnchorIds s) } + addBlockAttribute :: String -> String -> OrgParser () addBlockAttribute key val = updateState $ \s -> let attrs = orgStateBlockAttributes s @@ -848,17 +854,14 @@ explicitOrImageLink = try $ do title <- enclosedRaw (char '[') (char ']') title' <- parseFromString (mconcat <$> many inline) title char ']' - return $ B.link src "" <$> - if isImageFilename src && isImageFilename title - then return $ B.image title mempty mempty - else title' + return $ if isImageFilename src && isImageFilename title + then pure $ B.link src "" $ B.image title mempty mempty + else linkToInlinesF src =<< title' selflinkOrImage :: OrgParser (F Inlines) selflinkOrImage = try $ do src <- char '[' *> linkTarget <* char ']' - returnF $ if isImageFilename src - then B.image src "" "" - else B.link src "" (B.str src) + return $ linkToInlinesF src (B.str src) plainLink :: OrgParser (F Inlines) plainLink = try $ do @@ -878,6 +881,26 @@ selfTarget = try $ char '[' *> linkTarget <* char ']' linkTarget :: OrgParser String linkTarget = enclosed (char '[') (char ']') (noneOf "\n\r[]") +linkToInlinesF :: String -> Inlines -> F Inlines +linkToInlinesF s@('#':_) = pure . B.link s "" +linkToInlinesF s + | isImageFilename s = const . pure $ B.image s "" "" + | isUri s = pure . B.link s "" + | isRelativeUrl s = pure . B.link s "" +linkToInlinesF s = \title -> do + anchorB <- (s `elem`) <$> asksF orgStateAnchorIds + if anchorB + then pure $ B.link ('#':s) "" title + else pure $ B.emph title + +isRelativeUrl :: String -> Bool +isRelativeUrl s = (':' `notElem` s) && ("./" `isPrefixOf` s) + +isUri :: String -> Bool +isUri s = let (scheme, path) = break (== ':') s + in all (\c -> isAlphaNum c || c `elem` ".-") scheme + && not (null path) + isImageFilename :: String -> Bool isImageFilename filename = any (\x -> ('.':x) `isSuffixOf` filename) imageExtensions && @@ -894,12 +917,15 @@ isImageFilename filename = -- an anchor. anchor :: OrgParser (F Inlines) -anchor = try $ pure <$> (B.spanWith <$> attributes <*> pure mempty) +anchor = try $ do + anchorId <- parseAnchor + recordAnchorId anchorId + returnF $ B.spanWith (solidify anchorId, [], []) mempty where - name = string "<<" - *> many1 (noneOf "\t\n\r<>\"' ") - <* string ">>" - attributes = name >>= \n -> return (solidify n, [], []) + parseAnchor = string "<<" + *> many1 (noneOf "\t\n\r<>\"' ") + <* string ">>" + <* skipSpaces -- | Replace every char but [a-zA-Z0-9_.-:] with a hypen '-'. This mirrors -- the org function @org-export-solidify-link-text@. diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index ed774f527..96747d148 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -202,6 +202,11 @@ tests = , link "http://moltkeplatz.de" "" "http://moltkeplatz.de" , "for", "fnords." ]) + + , "Anchor" =: + "<> Link here later." =?> + (para $ spanWith ("anchor", [], []) mempty <> + "Link" <> space <> "here" <> space <> "later.") ] , testGroup "Meta Information" $ @@ -279,6 +284,26 @@ tests = , ":END:" ] =?> para (":FOO:" <> space <> ":END:") + + , "Anchor reference" =: + unlines [ "<> Target." + , "" + , "[[link-here][See here!]]" + ] =?> + (para (spanWith ("link-here", [], []) mempty <> "Target.") <> + para (link "#link-here" "" ("See" <> space <> "here!"))) + + , "Search links are read as emph" =: + "[[Wally][Where's Wally?]]" =?> + (para (emph $ "Where's" <> space <> "Wally?")) + + , "Link to nonexistent anchor" =: + unlines [ "<> Target." + , "" + , "[[link$here][See here!]]" + ] =?> + (para (spanWith ("link-here", [], []) mempty <> "Target.") <> + para (emph ("See" <> space <> "here!"))) ] , testGroup "Basic Blocks" $ -- cgit v1.2.3 From b09412d852880a0c8e18e1cab9b0ce33f0e0e8a2 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Fri, 25 Apr 2014 16:14:52 +0200 Subject: LaTeX writer: Mark span contents with label if span has an ID Prepend `\label{span-id}` to span contents iff `span-id` is defined. --- src/Text/Pandoc/Writers/LaTeX.hs | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index e12c9078f..e52220f01 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -655,16 +655,20 @@ isQuoted _ = False -- | Convert inline element to LaTeX inlineToLaTeX :: Inline -- ^ Inline to convert -> State WriterState Doc -inlineToLaTeX (Span (_,classes,_) ils) = do +inlineToLaTeX (Span (id',classes,_) ils) = do let noEmph = "csl-no-emph" `elem` classes let noStrong = "csl-no-strong" `elem` classes let noSmallCaps = "csl-no-smallcaps" `elem` classes - ((if noEmph then inCmd "textup" else id) . - (if noStrong then inCmd "textnormal" else id) . - (if noSmallCaps then inCmd "textnormal" else id) . - (if not (noEmph || noStrong || noSmallCaps) - then braces - else id)) `fmap` inlineListToLaTeX ils + let label' = if (null id') + then empty + else text "\\label" <> braces (text $ toLabel id') + fmap (label' <>) + ((if noEmph then inCmd "textup" else id) . + (if noStrong then inCmd "textnormal" else id) . + (if noSmallCaps then inCmd "textnormal" else id) . + (if not (noEmph || noStrong || noSmallCaps) + then braces + else id)) `fmap` inlineListToLaTeX ils inlineToLaTeX (Emph lst) = inlineListToLaTeX lst >>= return . inCmd "emph" inlineToLaTeX (Strong lst) = -- cgit v1.2.3 From a744e3868ea0d0e7c25bcdf7304b3ed598419b1c Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 25 Apr 2014 20:14:39 -0700 Subject: Bump version to 1.12.4. --- pandoc.cabal | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandoc.cabal b/pandoc.cabal index e455c82a4..732b7cf50 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -1,5 +1,5 @@ Name: pandoc -Version: 1.12.3.3 +Version: 1.12.4 Cabal-Version: >= 1.10 Build-Type: Custom License: GPL -- cgit v1.2.3 From 2b3926d63a74b5b9ff9c61181608a6fac30968d8 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 25 Apr 2014 21:41:39 -0700 Subject: Updated changelog. --- changelog | 242 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 242 insertions(+) diff --git a/changelog b/changelog index 90a34c2e4..5a4f001a9 100644 --- a/changelog +++ b/changelog @@ -1,3 +1,245 @@ +pandoc (1.12.4) + + * Made it possible to run filters that aren't executable (#1096). + Pandoc first tries to find the executable (searching the path + if path isn't given). If it fails, but the file exists and has + a `.py`, `.pl`, `.rb`, `.hs`, or `.php` extension, pandoc runs the filter + using the appropriate interpreter. This should make it easier to + use filters on Windows, and make it more convenient for everyone. + + * Added Emacs org-mode reader (Albert Krewinkel). + + * Added InDesign ICML Writer (mb21). + + * MediaWiki reader: + + + Accept image links in more languages (Jaime Marquínez Ferrándiz). + + Fixed bug in certain nested lists (#1213). If a level 2 list was + followed by a level 1 list, the first item of the level 1 list + would be lost. + + Handle table rows containing just an HTML comment (#1230). + + * LaTeX reader: + + + LaTeX reader: Better handling of `table` environment (#1204). + Positioning options no longer rendered verbatim. + + Better handling of figure and table with caption (#1204). + + Handle `@{}` and `p{length}` in tabular. The length is not actually + recorded, but at least we get a table (#1180). + + * Markdown reader: + + + Ensure that whole numbers in YAML metadata are rendered without + decimal points. (This became necessary with changes to aeson + and yaml libraries. aeson >= 0.7 and yaml >= 0.8.8.2 are now required.) + + Fixed regression on line breaks in strict mode (#1203). + + Small efficiency improvements. + + Improved parsing of nested `div`s. Formerly a closing `div` tag + would be missed if it came right after other block-level tags. + + Avoid backtracking when closing `
` not found. + + Fixed bug in reference link parsing in `markdown_mmd`. + + * Textile reader: + + + Better support for attributes. Instead of being ignored, attributes + are now parsed and included in Span inlines. The output will be a bit + different from stock textile: e.g. for `*(foo)hi*`, we'll get + `hi` instead of + `hi`. But at least the data is not lost. + + Improved treatment of HTML spans (%) (#1115). + + Improved link parsing. In particular we now pick up on attributes. + Since pandoc links can't have attributes, we enclose the whole link in + a span if there are attributes (#1008). + + Implemented correct parsing rules for inline markup (#1175, Matthew + Pickering). + + Use Builder (Matthew Pickering). + + * DocBook reader: + + + Better treatment of `formalpara`. We now emit the title (if present) + as a separate paragraph with boldface text (#1215). + + Set metadata `author` not `authors`. + + Added recognition of `authorgroup` and `releaseinfo` elements (#1214, + Matthew Pickering). + + Converted current meta information parsing in DocBook to a more + extensible version which is aware of the more recent meta + representation (Matthew Pickering). + + * HTML reader: + + + Require tagsoup 0.13.1, to fix a bug with parsing of script tags + (#1248). + + Treat processing instructions & declarations as block. Previously + these were treated as inline, and included in paragraph tags in HTML + or DocBook output, which is generally not what is wanted (#1233). + + Updated `closes` with rules from HTML5 spec. + + Use Builder (Matthew Pickering, #1162). + + * RST reader: + + + Remove duplicate `http` in PEP links (Albert Krewinkel). + + Make rst figures true figures (#1168, CasperVector) + + Enhanced Pandoc's support for rST roles (Merijn Verstaaten). + rST parser now supports: all built-in rST roles, new role definition, + role inheritance, though with some limitations. + + * LaTeX writer: + + + Mark span contents with label if span has an ID (Albert Krewinkel). + + Made `--toc-depth` work well with books in latex/pdf output (#1210). + + Handle line breaks in simple table cells (#1217). + + Workaround for level 4-5 headers in quotes. These previously produced + invalid LaTeX: `\paragraph` or `\subparagraph` in a `quote` environment. + This adds an `mbox{}` in these contexts to work around the problem. + See (#1221). + + Use `\/` to avoid en-dash ligature instead of `-{}-` (Vaclav Zeman). + This is to fix LuaLaTeX output. The `-{}-` sequence does not avoid the + ligature with LuaLaTeX but `\/` does. + + * DocBook writer: + + + Improve handling of hard line breaks in Docbook writer + (Neil Mayhew). Use a `` for the entire paragraph, not + just for the newline character. + + Don't let line breaks inside footnotes influence the enclosing + paragraph (Neil Mayhew). + + * EPUB writer: + + + Include extension in epub ids. This fixes a problem with duplicate + extensions for fonts and images with the same base name but different + extensions (#1254). + + Handle files linked in raw `img` tags (#1170). + + Handle media in `audio` source tags (#1170). + Note that we now use a `media` directory rather than `images`. + + Incorporate files linked in `video` tags (#1170). `src` and `poster` + will both be incorporated into `content.opf` and the epub container. + + * HTML writer: + + + Add colgroup around col tags (#877). Also affects EPUB writer. + + Fixed bug with unnumbered section headings. Unnumbered section + headings (with class `unnumbered`) were getting numbers. + + * Man writer: Ensure that terms in definition lists aren't line wrapped + (#1195). + + * Markdown writer: + + + Use proper escapes to avoid unwanted lists (#980). Previously we used + 0-width spaces, an ugly hack. + + Use longer backtick fences if needed (#1206). If the content contains a + backtick fence and there are attributes, make sure longer fences are + used to delimit the code. Note: This works well in pandoc, but github + markdown is more limited, and will interpret the first string of three + or more backticks as ending the code block. + + * RST writer: Avoid stack overflow with certain tables (#1197). + + * RTF writer: Fixed table cells containing paragraphs. + + * Custom writer: Correctly handle UTF-8 in custom lua scripts (#1189). + + * `Text.Pandoc.Options`: Added `readerTrace` to `ReaderOptions` + + * `Text.Pandoc.Shared`: + + + Added `compactify'DL` (formerly in markdown reader) (Albert Krewinkel). + + Fixed bug in `toRomanNumeral`: numbers ending with '9' would + be rendered as Roman numerals ending with 'IXIV' (#1249). Thanks to + Jesse Rosenthal. + + `openURL`: set proxy with value of http_proxy env variable (#1211). + Note: proxies with non-root paths are not supported, due to + limitations in `http-conduit`. + + * `Text.Pandoc.PDF`: + + + Ensure that temp directories deleted on Windows (#1192). The PDF is + now read as a strict bytestring, ensuring that process ownership will + be terminated, so the temp directory can be deleted. + + Use `/` as path separators in a few places, even on Windows. + This seems to be necessary for texlive (#1151, thanks to Tim Lin). + + Use `;` for `TEXINPUTS` separator on Windows (#1151). + + Changes to error reporting, to handle non-UTF8 error output. + + * `Text.Pandoc.Templates`: + + + Removed unneeded datatype context (Merijn Verstraaten). + + + YAML objects resolve to "true" in conditionals (#1133). + Note: If `address` is a YAML object and you just have `$address$` + in your template, the word `true` will appear, which may be + unexpected. (Previously nothing would appear.) + + * `Text.Pandoc.SelfContained`: Handle `poster` attribute in `video` + tags (#1188). + + * `Text.Pandoc.Parsing`: + + + Made `F` an instance of Applicative (#1138). + + Added `stateCaption`. + + Added `HasMacros`, simplified other typeclasses. + Removed `updateHeaderMap`, `setHeaderMap`, `getHeaderMap`, + `updateIdentifierList`, `setIdentifierList`, `getIdentifierList`. + + Changed the smart punctuation parser to return `Inlines` + rather than `Inline` (Matthew Pickering). + + Changed `HasReaderOptions`, `HasHeaderMap`, `HasIdentifierList` + from typeclasses of monads to typeclasses of states. This simplifies + the instance definitions and provides more flexibility. Generalized + type of `getOption` and added a default definition. Removed + `askReaderOption`. Added `extractReaderOption`. Added + `extractHeaderMap` and `updateHeaderMap` in `HasHeaderMap`. + Gave default definitions for `getHeaderMap`, `putHeaderMap`, + `modifyHeaderMap`. Added `extractIdentifierList` and + `updateIdentifierList` in `HasIdentifierList`. Gave defaults + for `getIdentifierList`, `putIdentifierList`, and + `modifyIdentifierList`. The ultimate goal here is to allow different + parsers to use their own, tailored parser states (instead of + `ParserState`) while still using shared functions. + + * Template changes: + + + LaTeX template: use `fontenc` package only with `pdflatex` (#1164). + + Beamer template: Consistent styles for figure and table captions + (aaronwolen). + + Beamer template: Adjust widths correctly for oversized images + (Garrick Aden-Buie). + + Beamer template: Added caption package (#1200). + + Beamer template: changes for better unicode handling (KarolS). + + DocBook template: use `authorgroup` if there are authors. + + revealjs template: Move `include-after` to end (certainlyakey). + + revealjs template: Fixed PDF print function (#1220, kevinkenan). + + * Bumped version bounds of dependencies. + + * Added a `--trace` command line option, for debugging backtracking + bugs. So far this only works with the markdown reader. + + * MathMLinHTML: Fixed deprecation warning (#362, gwern, Albert Krewinkel). + + * Updated travis script to test with multiple GHC versions. + + * Force failure of a Travis build if GHC produces warnings (Albert + Krewinkel). + + * Add `.editorconfig` (Albert Krewinkel). + See for details. + + * Give more useful error message if '-t pdf' is specified (#1155). + + * README: + + + Added an explanation of how to use YAML metadata to + force items to appear in the bibliography without citations in + the text (like LaTeX `\nocite`). + + Added note to `--bibtex/--natbib`: not for use in making PDF + (#1194, thanks to nahoj). + + * Moved some doc files from `data-files` to `extra-source-files` (#1123). + They aren't needed at runtime. We keep README and COPYRIGHT in data + to ensure that they'll be available on all systems on which pandoc + is installed. + pandoc (1.12.3.3) * To changes to source; recompiled tarball with latest alex and -- cgit v1.2.3 From 35ea8de3690fcd9bf06532576ced4d82fd51f26d Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 26 Apr 2014 12:04:08 -0700 Subject: HTML writer: improved detection of image links. Previously image links with queries were not recognized, leading to use of an embed tag rather than an img tag. --- src/Text/Pandoc/Writers/HTML.hs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index e0385af25..1de4345f9 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -40,6 +40,7 @@ import Text.Pandoc.Slides import Text.Pandoc.Highlighting ( highlight, styleToCss, formatHtmlInline, formatHtmlBlock ) import Text.Pandoc.XML (fromEntities, escapeStringForXML) +import Network.URI ( parseURIReference, URI(..) ) import Network.HTTP ( urlEncode ) import Numeric ( showHex ) import Data.Char ( ord, toLower ) @@ -396,7 +397,10 @@ imageExts = [ "art", "bmp", "cdr", "cdt", "cpt", "cr2", "crw", "djvu", "erf", treatAsImage :: FilePath -> Bool treatAsImage fp = - let ext = map toLower $ drop 1 $ takeExtension fp + let path = case uriPath `fmap` parseURIReference fp of + Nothing -> fp + Just up -> up + ext = map toLower $ drop 1 $ takeExtension path in null ext || ext `elem` imageExts -- | Convert Pandoc block element to HTML. -- cgit v1.2.3 From 22e36e104058b0ea93dbda106374f6c02bbf36d6 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 26 Apr 2014 12:14:42 -0700 Subject: LaTeX reader: Made `\nocite` work. This adds nocite citations to a metadata field, `nocite`. These will appear in the bibliography but not in the text (unless you use a `$nocite$` variable in your template, of course). --- src/Text/Pandoc/Readers/LaTeX.hs | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index fd761dbec..b5d529eb9 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -322,7 +322,8 @@ blockCommands = M.fromList $ ] addMeta :: ToMetaValue a => String -> a -> LP () -addMeta field val = updateState $ setMeta field val +addMeta field val = updateState $ \st -> + st{ stateMeta = addMetaField field val $ stateMeta st } setCaption :: Inlines -> LP Blocks setCaption ils = do @@ -341,7 +342,7 @@ authors = try $ do -- skip e.g. \vspace{10pt} auths <- sepBy oneAuthor (controlSeq "and") char '}' - addMeta "authors" (map trimInlines auths) + addMeta "author" (map trimInlines auths) section :: Attr -> Int -> LP Blocks section (ident, classes, kvs) lvl = do @@ -525,10 +526,12 @@ inlineCommands = M.fromList $ , ("citeauthor", (try (tok *> optional sp *> controlSeq "citetext") *> complexNatbibCitation AuthorInText) <|> citation "citeauthor" AuthorInText False) + , ("nocite", mempty <$ (citation "nocite" NormalCitation False >>= + addMeta "nocite")) ] ++ map ignoreInlines -- these commands will be ignored unless --parse-raw is specified, -- in which case they will appear as raw latex blocks: - [ "noindent", "index", "nocite" ] + [ "noindent", "index" ] mkImage :: String -> LP Inlines mkImage src = do -- cgit v1.2.3 From f81fc6c3bdfcb439892f92b69c02b77342277cc8 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 26 Apr 2014 12:32:22 -0700 Subject: Update latex reader test for change in latex reader. We use 'author' for metadata, not 'authors'. --- tests/latex-reader.native | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/latex-reader.native b/tests/latex-reader.native index fcc3153cf..abc4b05a7 100644 --- a/tests/latex-reader.native +++ b/tests/latex-reader.native @@ -1,4 +1,4 @@ -Pandoc (Meta {unMeta = fromList [("authors",MetaList [MetaInlines [Str "John",Space,Str "MacFarlane"],MetaInlines [Str "Anonymous"]]),("date",MetaInlines [Str "July",Space,Str "17,",Space,Str "2006"]),("title",MetaInlines [Str "Pandoc",Space,Str "Test",Space,Str "Suite"])]}) +Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "John",Space,Str "MacFarlane"],MetaInlines [Str "Anonymous"]]),("date",MetaInlines [Str "July",Space,Str "17,",Space,Str "2006"]),("title",MetaInlines [Str "Pandoc",Space,Str "Test",Space,Str "Suite"])]}) [RawBlock (Format "latex") "\\maketitle" ,Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "set",Space,Str "of",Space,Str "tests",Space,Str "for",Space,Str "pandoc.",Space,Str "Most",Space,Str "of",Space,Str "them",Space,Str "are",Space,Str "adapted",Space,Str "from",Space,Str "John",Space,Str "Gruber\8217s",Space,Str "markdown",Space,Str "test",Space,Str "suite."] ,HorizontalRule -- cgit v1.2.3 From fb24888f52f2d6dfe37adb966ae7a9761e77d4cb Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Sun, 27 Apr 2014 19:51:14 +0200 Subject: COPYRIGHT: Specifiy the version of the GPL All code in pandoc licensed under the GPL version 2 or later is just marked as being licensed under the GPL. There are multiple versions of the GPL, most notably version 2, version 3 and the Affero GPL. As to avoid possible confusion, licensing info is updated to be more specific about the GPL and its version. --- COPYRIGHT | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/COPYRIGHT b/COPYRIGHT index cfec5a4bf..85dbe1585 100644 --- a/COPYRIGHT +++ b/COPYRIGHT @@ -35,38 +35,38 @@ licenses. src/Text/Pandoc/Writers/Texinfo.hs Copyright (C) 2008-2010 John MacFarlane and Peter Wang -Released under the GPL. +Released under the GNU General Public License version 2 or later. ---------------------------------------------------------------------- src/Text/Pandoc/Writers/OpenDocument.hs Copyright (C) 2008-2010 Andrea Rossato and John MacFarlane -Released under the GPL. +Released under the GNU General Public License version 2 or later. ---------------------------------------------------------------------- src/Text/Pandoc/Writers/Org.hs Copyright (C) 2010 Puneeth Chaganti -Released under the GPL. +Released under the GNU General Public License version 2 or later. ---------------------------------------------------------------------- src/Text/Pandoc/Readers/Textile.hs Copyright (C) 2010 Paul Rivier -Released under the GPL. +Released under the GNU General Public License version 2 or later. ---------------------------------------------------------------------- src/Text/Pandoc/Readers/Org.hs tests/Tests/Readers/Org.hs Copyright (C) 2014 Albert Krewinkel -Released under the GPL. +Released under the GNU General Public License version 2 or later. ---------------------------------------------------------------------- src/Text/Pandoc/Biblio.hs Copyright (C) 2008-2010 Andrea Rossato -Released under the GPL. +Released under the GNU General Public License version 2 or later. ---------------------------------------------------------------------- data/LaTeXMathML.js @@ -74,15 +74,13 @@ Adapted by Jeff Knisely and Douglas Woodall from ASCIIMathML.js v. 1.4.7 Copyright (C) 2005 Peter Jipsen -Released under the GPL. +Released under the GNU General Public License version 2 or later. ---------------------------------------------------------------------- -data/MathMLInHTML.js +data/MathMLinHTML.js Copyright (C) 2004 Peter Jipsen http://www.chapman.edu/~jipsen Released under the GNU General Public License version 2 or later. -See the GNU General Public License -(at http://www.gnu.org/copyleft/gpl.html) for more details. ---------------------------------------------------------------------- s5/default @@ -113,7 +111,7 @@ windows/modpath.iss Copyright (c) 2007 Jared Breland http://legroom.net/software -Released under the GPL. +Released under the GNU General Public License version 2 or later. ------------------------------------------------------------------------ The dzslides template contains javascript and CSS from Paul Rouget's -- cgit v1.2.3 From 6cf60f2f8e6aa4e948f7f8140c4f0c7b3156f037 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Sun, 27 Apr 2014 21:20:06 +0200 Subject: COPYRIGHT: Remove outdated copyright statements Some files are no longer distributed with pandoc, the respective copyright and licensing information is removed. --- COPYRIGHT | 37 ------------------------------------- 1 file changed, 37 deletions(-) diff --git a/COPYRIGHT b/COPYRIGHT index 85dbe1585..cd5adb1be 100644 --- a/COPYRIGHT +++ b/COPYRIGHT @@ -62,12 +62,6 @@ Copyright (C) 2014 Albert Krewinkel Released under the GNU General Public License version 2 or later. ----------------------------------------------------------------------- -src/Text/Pandoc/Biblio.hs -Copyright (C) 2008-2010 Andrea Rossato - -Released under the GNU General Public License version 2 or later. - ---------------------------------------------------------------------- data/LaTeXMathML.js Adapted by Jeff Knisely and Douglas Woodall from @@ -82,37 +76,6 @@ Copyright (C) 2004 Peter Jipsen http://www.chapman.edu/~jipsen Released under the GNU General Public License version 2 or later. ----------------------------------------------------------------------- -s5/default -S5 slides.js and css files -by Eric A. Meyer - Date: Sun, 27 Apr 2014 20:09:54 +0200 Subject: README: Update variable defaults documentation The documented default values of variables now agree with their true default values as defined in Writer/HTML.hs. --- README | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README b/README index 18c296fdd..8ca5ccc41 100644 --- a/README +++ b/README @@ -507,7 +507,7 @@ Options affecting specific writers `-c` *URL*, `--css=`*URL* : Link to a CSS style sheet. This option can be be used repeatedly to - include multiple files. They will be included in the order specified. + include multiple files. They will be included in the order specified. `--reference-odt=`*FILE* : Use the specified file as a style reference in producing an ODT. @@ -771,9 +771,9 @@ as `title`, `author`, and `date`) as well as the following: : base URL for Slidy documents (defaults to `http://www.w3.org/Talks/Tools/Slidy2`) `slideous-url` -: base URL for Slideous documents (defaults to `default`) +: base URL for Slideous documents (defaults to `slideous`) `s5-url` -: base URL for S5 documents (defaults to `ui/default`) +: base URL for S5 documents (defaults to `s5/default`) `revealjs-url` : base URL for reveal.js documents (defaults to `reveal.js`) `theme` -- cgit v1.2.3 From f80678df7f003c2e1c4f226fab39cea55f3907a8 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 27 Apr 2014 20:38:15 -0700 Subject: A bit of refactoring that shouldn't change any semantics. In preparation for a fix to #1267. --- pandoc.hs | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/pandoc.hs b/pandoc.hs index d23fa79f3..d39ed3a59 100644 --- a/pandoc.hs +++ b/pandoc.hs @@ -243,13 +243,13 @@ options :: [OptDescr (Opt -> IO Opt)] options = [ Option "fr" ["from","read"] (ReqArg - (\arg opt -> return opt { optReader = map toLower arg }) + (\arg opt -> return opt { optReader = arg }) "FORMAT") "" , Option "tw" ["to","write"] (ReqArg - (\arg opt -> return opt { optWriter = map toLower arg }) + (\arg opt -> return opt { optWriter = arg }) "FORMAT") "" @@ -1008,14 +1008,13 @@ main = do then "html" else "markdown" in defaultReaderName fallback sources - else readerName - - let writerName' = if null writerName - then defaultWriterName outputFile - else case writerName of - "epub2" -> "epub" - "html4" -> "html" - x -> x + else map toLower readerName + + let writerName' = case map toLower writerName of + [] -> defaultWriterName outputFile + "epub2" -> "epub" + "html4" -> "html" + x -> x let pdfOutput = map toLower (takeExtension outputFile) == ".pdf" -- cgit v1.2.3 From c8f97d3d418f929a1f499a9ef37c17d71f282d45 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 27 Apr 2014 20:56:50 -0700 Subject: Fix #1267. We now check the writerName for a lua script in pandoc.hs, so that lowercasing and format parsing aren't done. Note this behavior change: getWriter in Text.Pandoc no longer returns a custom writer on input "foo.lua". --- pandoc.hs | 23 +++++++++++++---------- src/Text/Pandoc.hs | 35 ++++++++++++++++------------------- 2 files changed, 29 insertions(+), 29 deletions(-) diff --git a/pandoc.hs b/pandoc.hs index d39ed3a59..93e1d4a76 100644 --- a/pandoc.hs +++ b/pandoc.hs @@ -47,7 +47,7 @@ import System.Exit ( exitWith, ExitCode (..) ) import System.FilePath import System.Console.GetOpt import Data.Char ( toLower ) -import Data.List ( intercalate, isPrefixOf, sort ) +import Data.List ( intercalate, isPrefixOf, isSuffixOf, sort ) import System.Directory ( getAppUserDataDirectory, findExecutable, doesFileExist ) import System.IO ( stdout, stderr ) @@ -1021,15 +1021,18 @@ main = do let laTeXOutput = "latex" `isPrefixOf` writerName' || "beamer" `isPrefixOf` writerName' - writer <- case getWriter writerName' of - Left e -> err 9 $ - if writerName' == "pdf" - then e ++ "\nTo create a pdf with pandoc, use the " ++ - "latex or beamer writer and specify\n" ++ - "an output file with .pdf extension " ++ - "(pandoc -t latex -o filename.pdf)." - else e - Right w -> return w + writer <- if ".lua" `isSuffixOf` writerName' + -- note: use non-lowercased version writerName + then return $ IOStringWriter $ writeCustom writerName + else case getWriter writerName' of + Left e -> err 9 $ + if writerName' == "pdf" + then e ++ "\nTo create a pdf with pandoc, use " ++ + "the latex or beamer writer and specify\n" ++ + "an output file with .pdf extension " ++ + "(pandoc -t latex -o filename.pdf)." + else e + Right w -> return w reader <- case getReader readerName' of Right r -> return r diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs index 66b0e49c0..a37c98814 100644 --- a/src/Text/Pandoc.hs +++ b/src/Text/Pandoc.hs @@ -152,7 +152,7 @@ import Text.Pandoc.Options import Text.Pandoc.Shared (safeRead, warn) import Data.Aeson import qualified Data.ByteString.Lazy as BL -import Data.List (intercalate, isSuffixOf) +import Data.List (intercalate) import Data.Version (showVersion) import Data.Set (Set) import qualified Data.Set as Set @@ -292,24 +292,21 @@ getReader s = -- | Retrieve writer based on formatSpec (format+extensions). getWriter :: String -> Either String Writer -getWriter s = - case parseFormatSpec s of - Left e -> Left $ intercalate "\n" $ [m | Message m <- errorMessages e] - Right (writerName, setExts) -> - case lookup writerName writers of - Nothing - | ".lua" `isSuffixOf` s -> - Right $ IOStringWriter $ writeCustom s - | otherwise -> Left $ "Unknown writer: " ++ writerName - Just (PureStringWriter r) -> Right $ PureStringWriter $ - \o -> r o{ writerExtensions = setExts $ - getDefaultExtensions writerName } - Just (IOStringWriter r) -> Right $ IOStringWriter $ - \o -> r o{ writerExtensions = setExts $ - getDefaultExtensions writerName } - Just (IOByteStringWriter r) -> Right $ IOByteStringWriter $ - \o -> r o{ writerExtensions = setExts $ - getDefaultExtensions writerName } +getWriter s + = case parseFormatSpec s of + Left e -> Left $ intercalate "\n" $ [m | Message m <- errorMessages e] + Right (writerName, setExts) -> + case lookup writerName writers of + Nothing -> Left $ "Unknown writer: " ++ writerName + Just (PureStringWriter r) -> Right $ PureStringWriter $ + \o -> r o{ writerExtensions = setExts $ + getDefaultExtensions writerName } + Just (IOStringWriter r) -> Right $ IOStringWriter $ + \o -> r o{ writerExtensions = setExts $ + getDefaultExtensions writerName } + Just (IOByteStringWriter r) -> Right $ IOByteStringWriter $ + \o -> r o{ writerExtensions = setExts $ + getDefaultExtensions writerName } {-# DEPRECATED toJsonFilter "Use 'toJSONFilter' from 'Text.Pandoc.JSON' instead" #-} -- | Deprecated. Use @toJSONFilter@ from @Text.Pandoc.JSON@ instead. -- cgit v1.2.3 From 5dfeb5d52f92083bb37dd56bd0711aa0c93e9361 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 27 Apr 2014 21:01:30 -0700 Subject: Allow html4 as synonym of html as reader. It already worked for writer. --- pandoc.hs | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/pandoc.hs b/pandoc.hs index 93e1d4a76..959605625 100644 --- a/pandoc.hs +++ b/pandoc.hs @@ -1003,12 +1003,13 @@ main = do Just _ -> return mbDataDir -- assign reader and writer based on options and filenames - let readerName' = if null readerName - then let fallback = if any isURI sources - then "html" - else "markdown" - in defaultReaderName fallback sources - else map toLower readerName + let readerName' = case map toLower readerName of + [] -> defaultReaderName + (if any isURI sources + then "html" + else "markdown") sources + "html4" -> "html" + x -> x let writerName' = case map toLower writerName of [] -> defaultWriterName outputFile -- cgit v1.2.3 From b6ae5d5e99a1575fcba3512b3a82ff77773a80ec Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 29 Apr 2014 18:14:39 -0700 Subject: ADded SmallCaps to Arbitrary instance. --- tests/Tests/Arbitrary.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/Tests/Arbitrary.hs b/tests/Tests/Arbitrary.hs index 31c0cb46a..82346d65f 100644 --- a/tests/Tests/Arbitrary.hs +++ b/tests/Tests/Arbitrary.hs @@ -49,7 +49,7 @@ arbInline n = frequency $ [ (60, liftM Str realString) , (10, liftM Strikeout $ arbInlines (n-1)) , (10, liftM Superscript $ arbInlines (n-1)) , (10, liftM Subscript $ arbInlines (n-1)) --- , (10, liftM SmallCaps $ arbInlines (n-1)) + , (10, liftM SmallCaps $ arbInlines (n-1)) , (10, do x1 <- arbitrary x2 <- arbInlines (n-1) return $ Quoted x1 x2) -- cgit v1.2.3 From f8a34f1694c9fda3319fcface155eed97608ded1 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 29 Apr 2014 18:32:42 -0700 Subject: Added Cite to Arbitrary instance. See #1269. This reveals some test failures. --- tests/Tests/Arbitrary.hs | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/tests/Tests/Arbitrary.hs b/tests/Tests/Arbitrary.hs index 82346d65f..3675d97bf 100644 --- a/tests/Tests/Arbitrary.hs +++ b/tests/Tests/Arbitrary.hs @@ -64,6 +64,7 @@ arbInline n = frequency $ [ (60, liftM Str realString) x3 <- realString x2 <- liftM escapeURI realString return $ Image x1 (x2,x3)) + , (2, liftM2 Cite arbitrary (arbInlines 1)) , (2, liftM Note $ resize 3 $ listOf1 $ arbBlock (n-1)) ] @@ -111,7 +112,6 @@ instance Arbitrary Pandoc where arbitrary = resize 8 $ liftM normalize $ liftM2 Pandoc arbitrary arbitrary -{- instance Arbitrary CitationMode where arbitrary = do x <- choose (0 :: Int, 2) @@ -123,14 +123,13 @@ instance Arbitrary CitationMode where instance Arbitrary Citation where arbitrary - = do x1 <- liftM (filter (`notElem` ",;]@ \t\n")) arbitrary - x2 <- arbitrary - x3 <- arbitrary + = do x1 <- listOf $ elements $ ['a'..'z'] ++ ['0'..'9'] ++ ['_'] + x2 <- arbInlines 1 + x3 <- arbInlines 1 x4 <- arbitrary x5 <- arbitrary x6 <- arbitrary return (Citation x1 x2 x3 x4 x5 x6) --} instance Arbitrary MathType where arbitrary -- cgit v1.2.3 From 7f86f95f3613677d515974f66660dad99a260b8b Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 29 Apr 2014 22:25:46 -0700 Subject: Require pandoc-types 1.12.3.3. Fixes #1269. --- pandoc.cabal | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandoc.cabal b/pandoc.cabal index 732b7cf50..002d1671c 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -221,7 +221,7 @@ Library xml >= 1.3.12 && < 1.4, random >= 1 && < 1.1, extensible-exceptions >= 0.1 && < 0.2, - pandoc-types >= 1.12.3 && < 1.13, + pandoc-types >= 1.12.3.3 && < 1.13, aeson >= 0.7 && < 0.8, tagsoup >= 0.13.1 && < 0.14, base64-bytestring >= 0.1 && < 1.1, @@ -319,7 +319,7 @@ Library Executable pandoc Build-Depends: pandoc, - pandoc-types >= 1.12.3 && < 1.13, + pandoc-types >= 1.12.3.3 && < 1.13, base >= 4.2 && <5, directory >= 1 && < 1.3, filepath >= 1.1 && < 1.4, @@ -364,7 +364,7 @@ Test-Suite test-pandoc Build-Depends: base >= 4.2 && < 5, syb >= 0.1 && < 0.5, pandoc, - pandoc-types >= 1.12.3 && < 1.13, + pandoc-types >= 1.12.3.3 && < 1.13, bytestring >= 0.9 && < 0.11, text >= 0.11 && < 1.2, directory >= 1 && < 1.3, -- cgit v1.2.3 From a22b3a218dece2cf9861c8c38ff239cd96ae0a06 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Tue, 29 Apr 2014 15:36:13 +0200 Subject: gitignore: Ignore cabal's sandbox files --- .gitignore | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 98fe2fd6d..97150be15 100644 --- a/.gitignore +++ b/.gitignore @@ -4,11 +4,13 @@ README.* !README.Debian INSTALL.* .configure-stamp +.cabal-sandbox +cabal.sandbox.config +pandoc.cabal.orig man/man?/*.1 man/man?/*.5 man/man?/*.html *.diff -pandoc.cabal.orig *.o *.hi *.pyc -- cgit v1.2.3 From 093229dc35506bff88f4edc6f2ae5316d621f8ff Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 30 Apr 2014 08:58:10 -0700 Subject: ConTeXt writer: Improved autolinks. Closes #1270. --- src/Text/Pandoc/Writers/ConTeXt.hs | 16 +++++----------- tests/writer.context | 10 ++++------ 2 files changed, 9 insertions(+), 17 deletions(-) diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs index 3095cf508..cec420dcf 100644 --- a/src/Text/Pandoc/Writers/ConTeXt.hs +++ b/src/Text/Pandoc/Writers/ConTeXt.hs @@ -35,7 +35,7 @@ import Text.Pandoc.Writers.Shared import Text.Pandoc.Options import Text.Pandoc.Walk (query) import Text.Printf ( printf ) -import Data.List ( intercalate, isPrefixOf ) +import Data.List ( intercalate ) import Control.Monad.State import Text.Pandoc.Pretty import Text.Pandoc.Templates ( renderTemplate' ) @@ -283,14 +283,6 @@ inlineToConTeXt (RawInline "tex" str) = return $ text str inlineToConTeXt (RawInline _ _) = return empty inlineToConTeXt (LineBreak) = return $ text "\\crlf" <> cr inlineToConTeXt Space = return space --- autolink -inlineToConTeXt (Link [Str str] (src, tit)) - | if "mailto:" `isPrefixOf` src - then src == escapeURI ("mailto:" ++ str) - else src == escapeURI str = - inlineToConTeXt (Link - [RawInline "context" "\\hyphenatedurl{", Str str, RawInline "context" "}"] - (src, tit)) -- Handle HTML-like internal document references to sections inlineToConTeXt (Link txt (('#' : ref), _)) = do opts <- gets stOptions @@ -305,6 +297,7 @@ inlineToConTeXt (Link txt (('#' : ref), _)) = do <> brackets (text ref) inlineToConTeXt (Link txt (src, _)) = do + let isAutolink = txt == [Str src] st <- get let next = stNextRef st put $ st {stNextRef = next + 1} @@ -313,8 +306,9 @@ inlineToConTeXt (Link txt (src, _)) = do return $ "\\useURL" <> brackets (text ref) <> brackets (text $ escapeStringUsing [('#',"\\#"),('%',"\\%")] src) - <> brackets empty - <> brackets label + <> (if isAutolink + then empty + else brackets empty <> brackets label) <> "\\from" <> brackets (text ref) inlineToConTeXt (Image _ (src, _)) = do diff --git a/tests/writer.context b/tests/writer.context index 0b031fd76..0c5024d89 100644 --- a/tests/writer.context +++ b/tests/writer.context @@ -813,24 +813,22 @@ braces]\from[url26]. \subsection[autolinks]{Autolinks} -With an ampersand: -\useURL[url27][http://example.com/?foo=1&bar=2][][\hyphenatedurl{http://example.com/?foo=1&bar=2}]\from[url27] +With an ampersand: \useURL[url27][http://example.com/?foo=1&bar=2]\from[url27] \startitemize[packed] \item In a list? \item - \useURL[url28][http://example.com/][][\hyphenatedurl{http://example.com/}]\from[url28] + \useURL[url28][http://example.com/]\from[url28] \item It should. \stopitemize An e-mail address: -\useURL[url29][mailto:nobody@nowhere.net][][\hyphenatedurl{nobody@nowhere.net}]\from[url29] +\useURL[url29][mailto:nobody@nowhere.net][][nobody@nowhere.net]\from[url29] \startblockquote -Blockquoted: -\useURL[url30][http://example.com/][][\hyphenatedurl{http://example.com/}]\from[url30] +Blockquoted: \useURL[url30][http://example.com/]\from[url30] \stopblockquote Auto-links should not occur here: \type{} -- cgit v1.2.3 From 81bf82c258f12700d64c8d090f75a90c8a18ec61 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 30 Apr 2014 09:59:36 -0700 Subject: RST reader: Better handling of directives. * We now correctly handle field lists that are indented more than 3 spaces. * We treat an "aafig" directive as a code block with attributes, so it can be processed in a filter. (Closes #1212.) --- src/Text/Pandoc/Readers/RST.hs | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index 7785861cc..4dc1fa006 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -185,22 +185,22 @@ block = choice [ codeBlock -- field list -- -rawFieldListItem :: String -> RSTParser (String, String) -rawFieldListItem indent = try $ do - string indent +rawFieldListItem :: Int -> RSTParser (String, String) +rawFieldListItem minIndent = try $ do + indent <- length <$> many (char ' ') + guard $ indent >= minIndent char ':' name <- many1Till (noneOf "\n") (char ':') (() <$ lookAhead newline) <|> skipMany1 spaceChar first <- anyLine - rest <- option "" $ try $ do lookAhead (string indent >> spaceChar) + rest <- option "" $ try $ do lookAhead (count indent (char ' ') >> spaceChar) indentedBlock let raw = (if null first then "" else (first ++ "\n")) ++ rest ++ "\n" - return (name, raw) + return (name, trimr raw) -fieldListItem :: String - -> RSTParser (Inlines, [Blocks]) -fieldListItem indent = try $ do - (name, raw) <- rawFieldListItem indent +fieldListItem :: Int -> RSTParser (Inlines, [Blocks]) +fieldListItem minIndent = try $ do + (name, raw) <- rawFieldListItem minIndent let term = B.str name contents <- parseFromString parseBlocks raw optional blanklines @@ -208,7 +208,7 @@ fieldListItem indent = try $ do fieldList :: RSTParser Blocks fieldList = try $ do - indent <- lookAhead $ many spaceChar + indent <- length <$> lookAhead (many spaceChar) items <- many1 $ fieldListItem indent case items of [] -> return mempty @@ -521,11 +521,11 @@ directive' = do skipMany spaceChar top <- many $ satisfy (/='\n') <|> try (char '\n' <* - notFollowedBy' (rawFieldListItem " ") <* + notFollowedBy' (rawFieldListItem 3) <* count 3 (char ' ') <* notFollowedBy blankline) newline - fields <- many $ rawFieldListItem " " + fields <- many $ rawFieldListItem 3 body <- option "" $ try $ blanklines >> indentedBlock optional blanklines let body' = body ++ "\n\n" @@ -576,6 +576,9 @@ directive' = do role -> role }) "code" -> codeblock (lookup "number-lines" fields) (trim top) body "code-block" -> codeblock (lookup "number-lines" fields) (trim top) body + "aafig" -> do + let attribs = ("", ["aafig"], fields) + return $ B.codeBlockWith attribs $ stripTrailingNewlines body "math" -> return $ B.para $ mconcat $ map B.displayMath $ toChunks $ top ++ "\n\n" ++ body "figure" -> do -- cgit v1.2.3 From eaba340b9381264f3706c780182711a8713b3def Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 30 Apr 2014 11:28:18 -0700 Subject: RST reader: Some fixes to last change, and use "author" not "authors". (in metadata) --- src/Text/Pandoc/Readers/RST.hs | 13 +++++++------ tests/rst-reader.native | 2 +- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index 4dc1fa006..54b6fa34a 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -113,15 +113,16 @@ titleTransform (bs, meta) = metaFromDefList :: [([Inline], [[Block]])] -> Meta -> Meta metaFromDefList ds meta = adjustAuthors $ foldr f meta ds where f (k,v) = setMeta (map toLower $ stringify k) (mconcat $ map fromList v) - adjustAuthors (Meta metamap) = Meta $ M.adjust toPlain "author" + adjustAuthors (Meta metamap) = Meta $ M.adjust splitAuthors "author" $ M.adjust toPlain "date" $ M.adjust toPlain "title" - $ M.adjust splitAuthors "authors" + $ M.mapKeys (\k -> if k == "authors" then "author" else k) $ metamap toPlain (MetaBlocks [Para xs]) = MetaInlines xs toPlain x = x - splitAuthors (MetaBlocks [Para xs]) = MetaList $ map MetaInlines - $ splitAuthors' xs + splitAuthors (MetaBlocks [Para xs]) + = MetaList $ map MetaInlines + $ splitAuthors' xs splitAuthors x = x splitAuthors' = map normalizeSpaces . splitOnSemi . concatMap factorSemi @@ -196,7 +197,7 @@ rawFieldListItem minIndent = try $ do rest <- option "" $ try $ do lookAhead (count indent (char ' ') >> spaceChar) indentedBlock let raw = (if null first then "" else (first ++ "\n")) ++ rest ++ "\n" - return (name, trimr raw) + return (name, raw) fieldListItem :: Int -> RSTParser (Inlines, [Blocks]) fieldListItem minIndent = try $ do @@ -577,7 +578,7 @@ directive' = do "code" -> codeblock (lookup "number-lines" fields) (trim top) body "code-block" -> codeblock (lookup "number-lines" fields) (trim top) body "aafig" -> do - let attribs = ("", ["aafig"], fields) + let attribs = ("", ["aafig"], map (\(k,v) -> (k, trimr v)) fields) return $ B.codeBlockWith attribs $ stripTrailingNewlines body "math" -> return $ B.para $ mconcat $ map B.displayMath $ toChunks $ top ++ "\n\n" ++ body diff --git a/tests/rst-reader.native b/tests/rst-reader.native index fd48bc60c..c77d15775 100644 --- a/tests/rst-reader.native +++ b/tests/rst-reader.native @@ -1,4 +1,4 @@ -Pandoc (Meta {unMeta = fromList [("authors",MetaList [MetaInlines [Str "John",Space,Str "MacFarlane"],MetaInlines [Str "Anonymous"]]),("date",MetaInlines [Str "July",Space,Str "17,",Space,Str "2006"]),("revision",MetaBlocks [Para [Str "3"]]),("subtitle",MetaInlines [Str "Subtitle"]),("title",MetaInlines [Str "Pandoc",Space,Str "Test",Space,Str "Suite"])]}) +Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "John",Space,Str "MacFarlane"],MetaInlines [Str "Anonymous"]]),("date",MetaInlines [Str "July",Space,Str "17,",Space,Str "2006"]),("revision",MetaBlocks [Para [Str "3"]]),("subtitle",MetaInlines [Str "Subtitle"]),("title",MetaInlines [Str "Pandoc",Space,Str "Test",Space,Str "Suite"])]}) [Header 1 ("level-one-header",[],[]) [Str "Level",Space,Str "one",Space,Str "header"] ,Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "set",Space,Str "of",Space,Str "tests",Space,Str "for",Space,Str "pandoc.",Space,Str "Most",Space,Str "of",Space,Str "them",Space,Str "are",Space,Str "adapted",Space,Str "from",Space,Str "John",Space,Str "Gruber\8217s",Space,Str "markdown",Space,Str "test",Space,Str "suite."] ,Header 2 ("level-two-header",[],[]) [Str "Level",Space,Str "two",Space,Str "header"] -- cgit v1.2.3 From 9ea9b916af66995c878ab89e6122e5d2b58dde84 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 30 Apr 2014 23:25:07 -0700 Subject: Use \setkeys{Gin}{} to set appropriate defaults for \includegraphics. In pandoc and beamer templates. Thanks to Yihui Xie. --- data/templates | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data/templates b/data/templates index 2afb0792b..c3a7937a2 160000 --- a/data/templates +++ b/data/templates @@ -1 +1 @@ -Subproject commit 2afb0792ba411006f51cd078fb7c409f0df19dbb +Subproject commit c3a7937a2852e654da23df978b7abf79955008b7 -- cgit v1.2.3 From ac104c4fdb957937c163a558348934a95fc13727 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 30 Apr 2014 23:29:10 -0700 Subject: Updated tests for new LaTeX template. --- tests/writer.latex | 22 ++++++---------------- 1 file changed, 6 insertions(+), 16 deletions(-) diff --git a/tests/writer.latex b/tests/writer.latex index 4cb989fba..a2f973854 100644 --- a/tests/writer.latex +++ b/tests/writer.latex @@ -22,24 +22,14 @@ \IfFileExists{microtype.sty}{\usepackage{microtype}}{} \usepackage{fancyvrb} \usepackage{graphicx} -% Redefine \includegraphics so that, unless explicit options are -% given, the image width will not exceed the width of the page. -% Images get their normal width if they fit onto the page, but -% are scaled down if they would overflow the margins. \makeatletter -\def\ScaleIfNeeded{% - \ifdim\Gin@nat@width>\linewidth - \linewidth - \else - \Gin@nat@width - \fi -} +\def\maxwidth{\ifdim\Gin@nat@width>\linewidth\linewidth\else\Gin@nat@width\fi} +\def\maxheight{\ifdim\Gin@nat@height>\textheight\textheight\else\Gin@nat@height\fi} \makeatother -\let\Oldincludegraphics\includegraphics -{% - \catcode`\@=11\relax% - \gdef\includegraphics{\@ifnextchar[{\Oldincludegraphics}{\Oldincludegraphics[width=\ScaleIfNeeded]}}% -}% +% Scale images if necessary, so that they will not overflow the page +% margins by default, and it is still possible to overwrite the defaults +% using explicit options in \includegraphics[width, height, ...]{} +\setkeys{Gin}{width=\maxwidth,height=\maxheight,keepaspectratio} \ifxetex \usepackage[setpagesize=false, % page size defined by xetex unicode=false, % unicode breaks when used with xetex -- cgit v1.2.3 From 8726eebcd363ccb33ea8c297b004feca7ef37ceb Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Wed, 30 Apr 2014 11:16:01 +0200 Subject: Org reader: Add support for custom link types Org allows users to define their own custom link types. E.g., in a document with a lot of links to Wikipedia articles, one can define a custom wikipedia link-type via #+LINK: wp https://en.wikipedia.org/wiki/ This allows to write [[wp:Org_mode][Org-mode]] instead of the equivallent [[https://en.wikipedia.org/wiki/Org_mode][Org-mode]]. --- src/Text/Pandoc/Readers/Org.hs | 68 +++++++++++++++++++++++++++++++++++++----- tests/Tests/Readers/Org.hs | 26 ++++++++++++++++ 2 files changed, 86 insertions(+), 8 deletions(-) diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 0e52bff90..d68ef45ef 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -43,7 +43,7 @@ import Text.Pandoc.Shared (compactify', compactify'DL) import Control.Applicative ( Applicative, pure , (<$>), (<$), (<*>), (<*), (*>), (<**>) ) -import Control.Monad (foldM, guard, liftM, liftM2, when) +import Control.Monad (foldM, guard, liftM, liftM2, mzero, when) import Control.Monad.Reader (Reader, runReader, ask, asks) import Data.Char (isAlphaNum, toLower) import Data.Default @@ -51,6 +51,7 @@ import Data.List (intersperse, isPrefixOf, isSuffixOf) import qualified Data.Map as M import Data.Maybe (listToMaybe, fromMaybe, isJust) import Data.Monoid (Monoid, mconcat, mempty, mappend) +import Network.HTTP (urlEncode) -- | Parse org-mode string and return a Pandoc document. readOrg :: ReaderOptions -- ^ Reader options @@ -76,6 +77,8 @@ type OrgNoteTable = [OrgNoteRecord] type OrgBlockAttributes = M.Map String String +type OrgLinkFormatters = M.Map String (String -> String) + -- | Org-mode parser state data OrgParserState = OrgParserState { orgStateOptions :: ReaderOptions @@ -86,6 +89,7 @@ data OrgParserState = OrgParserState , orgStateLastForbiddenCharPos :: Maybe SourcePos , orgStateLastPreCharPos :: Maybe SourcePos , orgStateLastStrPos :: Maybe SourcePos + , orgStateLinkFormatters :: OrgLinkFormatters , orgStateMeta :: Meta , orgStateMeta' :: F Meta , orgStateNotes' :: OrgNoteTable @@ -113,6 +117,7 @@ defaultOrgParserState = OrgParserState , orgStateLastForbiddenCharPos = Nothing , orgStateLastPreCharPos = Nothing , orgStateLastStrPos = Nothing + , orgStateLinkFormatters = M.empty , orgStateMeta = nullMeta , orgStateMeta' = return nullMeta , orgStateNotes' = [] @@ -175,6 +180,13 @@ resetEmphasisNewlines :: OrgParser () resetEmphasisNewlines = updateState $ \s -> s{ orgStateEmphasisNewlines = Nothing } +addLinkFormat :: String + -> (String -> String) + -> OrgParser () +addLinkFormat key formatter = updateState $ \s -> + let fs = orgStateLinkFormatters s + in s{ orgStateLinkFormatters = M.insert key formatter fs } + addToNotesTable :: OrgNoteRecord -> OrgParser () addToNotesTable note = do oldnotes <- orgStateNotes' <$> getState @@ -423,7 +435,8 @@ specialLine :: OrgParser (F Blocks) specialLine = fmap return . try $ metaLine <|> commentLine metaLine :: OrgParser Blocks -metaLine = try $ metaLineStart *> declarationLine +metaLine = try $ mempty + <$ (metaLineStart *> (optionLine <|> declarationLine)) commentLine :: OrgParser Blocks commentLine = try $ commentLineStart *> anyLine *> pure mempty @@ -436,14 +449,14 @@ metaLineStart = try $ mappend <$> many spaceChar <*> string "#+" commentLineStart :: OrgParser String commentLineStart = try $ mappend <$> many spaceChar <*> string "# " -declarationLine :: OrgParser Blocks +declarationLine :: OrgParser () declarationLine = try $ do key <- metaKey inlinesF <- metaInlines updateState $ \st -> let meta' = B.setMeta <$> pure key <*> inlinesF <*> pure nullMeta in st { orgStateMeta' = orgStateMeta' st <> meta' } - return mempty + return () metaInlines :: OrgParser (F MetaValue) metaInlines = fmap (MetaInlines . B.toList) <$> inlinesTillNewline @@ -453,6 +466,35 @@ metaKey = map toLower <$> many1 (noneOf ": \n\r") <* char ':' <* skipSpaces +optionLine :: OrgParser () +optionLine = try $ do + key <- metaKey + case key of + "link" -> parseLinkFormat >>= uncurry addLinkFormat + _ -> mzero + +parseLinkFormat :: OrgParser ((String, String -> String)) +parseLinkFormat = try $ do + linkType <- (:) <$> letter <*> many (alphaNum <|> oneOf "-_") <* skipSpaces + linkSubst <- parseFormat + return (linkType, linkSubst) + +-- | An ad-hoc, single-argument-only implementation of a printf-style format +-- parser. +parseFormat :: OrgParser (String -> String) +parseFormat = try $ do + replacePlain <|> replaceUrl <|> justAppend + where + -- inefficient, but who cares + replacePlain = try $ (\x -> concat . flip intersperse x) + <$> sequence [tillSpecifier 's', rest] + replaceUrl = try $ (\x -> concat . flip intersperse x . urlEncode) + <$> sequence [tillSpecifier 'h', rest] + justAppend = try $ (++) <$> rest + + rest = manyTill anyChar (eof <|> () <$ oneOf "\n\r") + tillSpecifier c = manyTill (noneOf "\n\r") (try $ string ('%':c:"")) + -- -- Headers -- @@ -850,13 +892,15 @@ linkOrImage = explicitOrImageLink explicitOrImageLink :: OrgParser (F Inlines) explicitOrImageLink = try $ do char '[' - src <- linkTarget + srcF <- applyCustomLinkFormat =<< linkTarget title <- enclosedRaw (char '[') (char ']') title' <- parseFromString (mconcat <$> many inline) title char ']' - return $ if isImageFilename src && isImageFilename title - then pure $ B.link src "" $ B.image title mempty mempty - else linkToInlinesF src =<< title' + return $ do + src <- srcF + if isImageFilename src && isImageFilename title + then pure $ B.link src "" $ B.image title mempty mempty + else linkToInlinesF src =<< title' selflinkOrImage :: OrgParser (F Inlines) selflinkOrImage = try $ do @@ -881,6 +925,14 @@ selfTarget = try $ char '[' *> linkTarget <* char ']' linkTarget :: OrgParser String linkTarget = enclosed (char '[') (char ']') (noneOf "\n\r[]") +applyCustomLinkFormat :: String -> OrgParser (F String) +applyCustomLinkFormat link = do + let (linkType, rest) = break (== ':') link + return $ do + formatter <- M.lookup linkType <$> asksF orgStateLinkFormatters + return $ maybe link ($ drop 1 rest) formatter + + linkToInlinesF :: String -> Inlines -> F Inlines linkToInlinesF s@('#':_) = pure . B.link s "" linkToInlinesF s diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index 96747d148..78684f0f1 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -304,6 +304,32 @@ tests = ] =?> (para (spanWith ("link-here", [], []) mempty <> "Target.") <> para (emph ("See" <> space <> "here!"))) + + , "Link abbreviation" =: + unlines [ "#+LINK: wp https://en.wikipedia.org/wiki/%s" + , "[[wp:Org_mode][Wikipedia on Org-mode]]" + ] =?> + (para (link "https://en.wikipedia.org/wiki/Org_mode" "" + ("Wikipedia" <> space <> "on" <> space <> "Org-mode"))) + + , "Link abbreviation, defined after first use" =: + unlines [ "[[zl:non-sense][Non-sense articles]]" + , "#+LINK: zl http://zeitlens.com/tags/%s.html" + ] =?> + (para (link "http://zeitlens.com/tags/non-sense.html" "" + ("Non-sense" <> space <> "articles"))) + + , "Link abbreviation, URL encoded arguments" =: + unlines [ "#+link: expl http://example.com/%h/foo" + , "[[expl:Hello, World!][Moin!]]" + ] =?> + (para (link "http://example.com/Hello%2C%20World%21/foo" "" "Moin!")) + + , "Link abbreviation, append arguments" =: + unlines [ "#+link: expl http://example.com/" + , "[[expl:foo][bar]]" + ] =?> + (para (link "http://example.com/foo" "" "bar")) ] , testGroup "Basic Blocks" $ -- cgit v1.2.3 From 007eb96e06bc1fff12119addf2e03552ac992b2e Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 1 May 2014 09:23:21 -0700 Subject: Markdown reader: Make one-column pipe tables work. Closes #1218. --- src/Text/Pandoc/Readers/Markdown.hs | 22 ++++++++++++++++------ tests/pipe-tables.native | 10 +++++++++- tests/pipe-tables.txt | 10 ++++++++++ 3 files changed, 35 insertions(+), 7 deletions(-) diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index d3ca8d26f..97a3168da 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1221,11 +1221,20 @@ removeOneLeadingSpace xs = gridTableFooter :: MarkdownParser [Char] gridTableFooter = blanklines +pipeBreak :: MarkdownParser [Alignment] +pipeBreak = try $ do + nonindentSpaces + openPipe <- (True <$ char '|') <|> return False + first <- pipeTableHeaderPart + rest <- many $ sepPipe *> pipeTableHeaderPart + -- surrounding pipes needed for a one-column table: + guard $ not (null rest && not openPipe) + optional (char '|') + blankline + return (first:rest) + pipeTable :: MarkdownParser ([Alignment], [Double], F [Blocks], F [[Blocks]]) pipeTable = try $ do - let pipeBreak = nonindentSpaces *> optional (char '|') *> - pipeTableHeaderPart `sepBy1` sepPipe <* - optional (char '|') <* blankline (heads,aligns) <- try ( pipeBreak >>= \als -> return (return $ replicate (length als) mempty, als)) <|> ( pipeTableRow >>= \row -> pipeBreak >>= \als -> @@ -1244,12 +1253,13 @@ sepPipe = try $ do pipeTableRow :: MarkdownParser (F [Blocks]) pipeTableRow = do nonindentSpaces - optional (char '|') + openPipe <- (True <$ char '|') <|> return False let cell = mconcat <$> many (notFollowedBy (blankline <|> char '|') >> inline) first <- cell - sepPipe - rest <- cell `sepBy1` sepPipe + rest <- many $ sepPipe *> cell + -- surrounding pipes needed for a one-column table: + guard $ not (null rest && not openPipe) optional (char '|') blankline let cells = sequence (first:rest) diff --git a/tests/pipe-tables.native b/tests/pipe-tables.native index 5420a7bd3..eafd21d22 100644 --- a/tests/pipe-tables.native +++ b/tests/pipe-tables.native @@ -67,4 +67,12 @@ ,[[Plain [Str "orange"]] ,[Plain [Str "17"]]] ,[[Plain [Str "pear"]] - ,[Plain [Str "302"]]]]] + ,[Plain [Str "302"]]]] +,Para [Str "One-column:"] +,Table [] [AlignDefault] [0.0] + [[Plain [Str "hi"]]] + [[[Plain [Str "lo"]]]] +,Para [Str "Header-less",Space,Str "one-column:"] +,Table [] [AlignCenter] [0.0] + [[]] + [[[Plain [Str "hi"]]]]] diff --git a/tests/pipe-tables.txt b/tests/pipe-tables.txt index 79d79200f..ee8d54d9f 100644 --- a/tests/pipe-tables.txt +++ b/tests/pipe-tables.txt @@ -40,3 +40,13 @@ apple | 5 orange| 17 pear | 302 +One-column: + +|hi| +|--| +|lo| + +Header-less one-column: + +|:-:| +|hi| -- cgit v1.2.3 From 4c4382420356928d73026395d4ab2f0f9957df08 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 2 May 2014 22:58:47 -0700 Subject: Fixed empty reference links. Closes #1186. Includes test. --- src/Text/Pandoc/Readers/Markdown.hs | 4 +--- tests/markdown-reader-more.native | 5 ++++- tests/markdown-reader-more.txt | 8 ++++++++ 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 97a3168da..36f73d847 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -349,10 +349,8 @@ referenceKey = try $ do char ':' skipSpaces >> optional newline >> skipSpaces >> notFollowedBy (char '[') let sourceURL = liftM unwords $ many $ try $ do - notFollowedBy' referenceTitle - skipMany spaceChar - optional $ newline >> notFollowedBy blankline skipMany spaceChar + notFollowedBy' referenceTitle notFollowedBy' (() <$ reference) many1 $ notFollowedBy space >> litChar let betweenAngles = try $ char '<' >> diff --git a/tests/markdown-reader-more.native b/tests/markdown-reader-more.native index 27f09dada..0d74c233d 100644 --- a/tests/markdown-reader-more.native +++ b/tests/markdown-reader-more.native @@ -136,4 +136,7 @@ ,Para [Link [Str "link"] ("/hithere)","")] ,Para [Link [Str "linky"] ("hi_(there_(nested))","")] ,Header 2 ("reference-link-fallbacks",[],[]) [Str "Reference",Space,Str "link",Space,Str "fallbacks"] -,Para [Str "[",Emph [Str "not",Space,Str "a",Space,Str "link"],Str "]",Space,Str "[",Emph [Str "nope"],Str "]\8230"]] +,Para [Str "[",Emph [Str "not",Space,Str "a",Space,Str "link"],Str "]",Space,Str "[",Emph [Str "nope"],Str "]\8230"] +,Header 2 ("empty-reference-links",[],[]) [Str "Empty",Space,Str "reference",Space,Str "links"] +,Para [Str "bar"] +,Para [Link [Str "foo2"] ("","")]] diff --git a/tests/markdown-reader-more.txt b/tests/markdown-reader-more.txt index d133b3dbb..739543bfd 100644 --- a/tests/markdown-reader-more.txt +++ b/tests/markdown-reader-more.txt @@ -235,3 +235,11 @@ Empty cells ## Reference link fallbacks [*not a link*] [*nope*]... + +## Empty reference links + +[foo2]: + +bar + +[foo2] -- cgit v1.2.3 From 743dac493fab08abdec59feb7bd57030a3ba5c90 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 3 May 2014 15:15:04 -0700 Subject: LaTeX reader: Better error messages with include files. Closes #1274. Rewrote handleIncludes. We now report the actual source file and position where the error occurs, even if it is included. We do this by inserting special commands, `\PandocStartInclude` and `\PandocEndInclude`, that encode this information in the preprocessing phase. Also generalized the types of a couple functions from `Text.Pandoc.Parsing`. --- src/Text/Pandoc/Parsing.hs | 2 +- src/Text/Pandoc/Readers/LaTeX.hs | 172 ++++++++++++++++++++++++++------------- 2 files changed, 118 insertions(+), 56 deletions(-) diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index a9009eaa2..4d0a677da 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -504,7 +504,7 @@ withHorizDisplacement parser = do -- | Applies a parser and returns the raw string that was parsed, -- along with the value produced by the parser. -withRaw :: Parser [Char] st a -> Parser [Char] st (a, [Char]) +withRaw :: Monad m => ParsecT [Char] st m a -> ParsecT [Char] st m (a, [Char]) withRaw parser = do pos1 <- getPosition inp <- getInput diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index b5d529eb9..d1e0b6f0a 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -40,8 +40,10 @@ import Text.Pandoc.Shared import Text.Pandoc.Options import Text.Pandoc.Parsing hiding ((<|>), many, optional, space, mathDisplay, mathInline) +import Text.Parsec.Prim (ParsecT, runParserT) import qualified Text.Pandoc.UTF8 as UTF8 import Data.Char ( chr, ord ) +import Control.Monad.Trans (lift) import Control.Monad import Text.Pandoc.Builder import Data.Char (isLetter, isAlphaNum) @@ -303,6 +305,8 @@ blockCommands = M.fromList $ , ("documentclass", skipopts *> braced *> preamble) , ("centerline", (para . trimInlines) <$> (skipopts *> tok)) , ("caption", tok >>= setCaption) + , ("PandocStartInclude", startInclude) + , ("PandocEndInclude", endInclude) ] ++ map ignoreBlocks -- these commands will be ignored unless --parse-raw is specified, -- in which case they will appear as raw latex blocks @@ -794,31 +798,107 @@ rawEnv name = do (withRaw (env name blocks) >>= applyMacros' . snd) else env name blocks +---- + +type IncludeParser = ParsecT [Char] [String] IO String + -- | Replace "include" commands with file contents. handleIncludes :: String -> IO String -handleIncludes = handleIncludes' [] - --- parents parameter prevents infinite include loops -handleIncludes' :: [FilePath] -> String -> IO String -handleIncludes' _ [] = return [] -handleIncludes' parents ('\\':'%':xs) = - ("\\%"++) `fmap` handleIncludes' parents xs -handleIncludes' parents ('%':xs) = handleIncludes' parents - $ drop 1 $ dropWhile (/='\n') xs -handleIncludes' parents ('\\':xs) = - case runParser include defaultParserState "input" ('\\':xs) of - Right (fs, rest) -> do yss <- mapM (\f -> if f `elem` parents - then "" <$ warn ("Include file loop in '" - ++ f ++ "'.") - else readTeXFile f >>= - handleIncludes' (f:parents)) fs - rest' <- handleIncludes' parents rest - return $ intercalate "\n" yss ++ rest' - _ -> case runParser (verbCmd <|> verbatimEnv) defaultParserState - "input" ('\\':xs) of - Right (r, rest) -> (r ++) `fmap` handleIncludes' parents rest - _ -> ('\\':) `fmap` handleIncludes' parents xs -handleIncludes' parents (x:xs) = (x:) `fmap` handleIncludes' parents xs +handleIncludes s = do + res <- runParserT includeParser' [] "input" s + case res of + Right s' -> return s' + Left e -> error $ show e + +includeParser' :: IncludeParser +includeParser' = + concat <$> many (comment' <|> escaped' <|> blob' <|> include' + <|> startMarker' <|> endMarker' + <|> verbCmd' <|> verbatimEnv' <|> backslash') + +comment' :: IncludeParser +comment' = do + char '%' + xs <- manyTill anyChar newline + return ('%':xs ++ "\n") + +escaped' :: IncludeParser +escaped' = try $ string "\\%" <|> string "\\\\" + +verbCmd' :: IncludeParser +verbCmd' = fmap snd <$> + withRaw $ try $ do + string "\\verb" + c <- anyChar + manyTill anyChar (char c) + +verbatimEnv' :: IncludeParser +verbatimEnv' = fmap snd <$> + withRaw $ try $ do + string "\\begin" + name <- braced' + guard $ name `elem` ["verbatim", "Verbatim", "lstlisting", + "minted", "alltt"] + manyTill anyChar (try $ string $ "\\end{" ++ name ++ "}") + +blob' :: IncludeParser +blob' = try $ many1 (noneOf "\\%") + +backslash' :: IncludeParser +backslash' = string "\\" + +braced' :: IncludeParser +braced' = try $ char '{' *> manyTill (satisfy (/='}')) (char '}') + +include' :: IncludeParser +include' = do + name <- try $ do + char '\\' + try (string "include") + <|> try (string "input") + <|> string "usepackage" + -- skip options + skipMany $ try $ char '[' *> (manyTill anyChar (char ']')) + fs <- (map trim . splitBy (==',')) <$> braced' + pos <- getPosition + let fs' = if name == "usepackage" + then map (flip replaceExtension ".sty") fs + else map (flip replaceExtension ".tex") fs + containers <- getState + let fn = case containers of + (f':_) -> f' + [] -> "input" + -- now process each include file in order... + rest <- getInput + results' <- forM fs' (\f -> do + when (f `elem` containers) $ + fail "Include file loop!" + contents <- lift $ readTeXFile f + return $ "\\PandocStartInclude{" ++ f ++ "}" ++ + contents ++ "\\PandocEndInclude{" ++ + fn ++ "}{" ++ show (sourceLine pos) ++ "}{" + ++ show (sourceColumn pos) ++ "}") + setInput $ concat results' ++ rest + return "" + +startMarker' :: IncludeParser +startMarker' = try $ do + string "\\PandocStartInclude" + fn <- braced' + updateState (fn:) + setPosition $ newPos fn 1 1 + return $ "\\PandocStartInclude{" ++ fn ++ "}" + +endMarker' :: IncludeParser +endMarker' = try $ do + string "\\PandocEndInclude" + fn <- braced' + ln <- braced' + co <- braced' + updateState tail + setPosition $ newPos fn (fromMaybe 1 $ safeRead ln) (fromMaybe 1 $ safeRead co) + return $ "\\PandocEndInclude{" ++ fn ++ "}{" ++ ln ++ "}{" ++ + co ++ "}" readTeXFile :: FilePath -> IO String readTeXFile f = do @@ -833,27 +913,7 @@ readFileFromDirs (d:ds) f = E.catch (UTF8.readFile $ d f) $ \(_ :: E.SomeException) -> readFileFromDirs ds f -include :: LP ([FilePath], String) -include = do - name <- controlSeq "include" - <|> controlSeq "input" - <|> controlSeq "usepackage" - skipopts - fs <- (splitBy (==',')) <$> braced - rest <- getInput - let fs' = if name == "usepackage" - then map (flip replaceExtension ".sty") fs - else map (flip replaceExtension ".tex") fs - return (fs', rest) - -verbCmd :: LP (String, String) -verbCmd = do - (_,r) <- withRaw $ do - controlSeq "verb" - c <- anyChar - manyTill anyChar (char c) - rest <- getInput - return (r, rest) +---- keyval :: LP (String, String) keyval = try $ do @@ -875,17 +935,6 @@ alltt t = walk strToCode <$> parseFromString blocks where strToCode (Str s) = Code nullAttr s strToCode x = x -verbatimEnv :: LP (String, String) -verbatimEnv = do - (_,r) <- withRaw $ do - controlSeq "begin" - name <- braced - guard $ name `elem` ["verbatim", "Verbatim", "lstlisting", - "minted", "alltt"] - verbEnv name - rest <- getInput - return (r,rest) - rawLaTeXBlock :: Parser [Char] ParserState String rawLaTeXBlock = snd <$> try (withRaw (environment <|> blockCommand)) @@ -1218,3 +1267,16 @@ simpTable = try $ do lookAhead $ controlSeq "end" -- make sure we're at end return $ table mempty (zip aligns (repeat 0)) header'' rows +startInclude :: LP Blocks +startInclude = do + fn <- braced + setPosition $ newPos fn 1 1 + return mempty + +endInclude :: LP Blocks +endInclude = do + fn <- braced + ln <- braced + co <- braced + setPosition $ newPos fn (fromMaybe 1 $ safeRead ln) (fromMaybe 1 $ safeRead co) + return mempty -- cgit v1.2.3 From 2ba78730861b0947ed26aec00f0520e5affd5f7c Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 3 May 2014 18:34:23 -0700 Subject: LaTeX reader: Fixed regression introduced with last commit. Tests now pass again. --- src/Text/Pandoc/Readers/LaTeX.hs | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index d1e0b6f0a..79c66b510 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -852,18 +852,18 @@ braced' = try $ char '{' *> manyTill (satisfy (/='}')) (char '}') include' :: IncludeParser include' = do - name <- try $ do + fs' <- try $ do char '\\' - try (string "include") - <|> try (string "input") - <|> string "usepackage" - -- skip options - skipMany $ try $ char '[' *> (manyTill anyChar (char ']')) - fs <- (map trim . splitBy (==',')) <$> braced' + name <- try (string "include") + <|> try (string "input") + <|> string "usepackage" + -- skip options + skipMany $ try $ char '[' *> (manyTill anyChar (char ']')) + fs <- (map trim . splitBy (==',')) <$> braced' + return $ if name == "usepackage" + then map (flip replaceExtension ".sty") fs + else map (flip replaceExtension ".tex") fs pos <- getPosition - let fs' = if name == "usepackage" - then map (flip replaceExtension ".sty") fs - else map (flip replaceExtension ".tex") fs containers <- getState let fn = case containers of (f':_) -> f' -- cgit v1.2.3 From ccbf4fc9c20ccdce0f7144845cd022ee8cfca1af Mon Sep 17 00:00:00 2001 From: Neil Mayhew Date: Mon, 14 Apr 2014 19:07:14 -0600 Subject: Distinguish tight and loose lists in Docbook output Determined by the first block of the first item being Plain. --- src/Text/Pandoc/Writers/Docbook.hs | 26 +++-- tests/Tests/Writers/Docbook.hs | 199 +++++++++++++++++++++++++++++++++++-- tests/writer.docbook | 60 +++++------ 3 files changed, 236 insertions(+), 49 deletions(-) diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs index 2d6ce3020..e1b62f02d 100644 --- a/src/Text/Pandoc/Writers/Docbook.hs +++ b/src/Text/Pandoc/Writers/Docbook.hs @@ -185,10 +185,13 @@ blockToDocbook _ (CodeBlock (_,classes,_) str) = else languagesByExtension . map toLower $ s langs = concatMap langsFrom classes blockToDocbook opts (BulletList lst) = - inTagsIndented "itemizedlist" $ listItemsToDocbook opts lst + let attribs = case lst of + ((Plain _:_):_) -> [("spacing", "compact")] + _ -> [] + in inTags True "itemizedlist" attribs $ listItemsToDocbook opts lst blockToDocbook _ (OrderedList _ []) = empty blockToDocbook opts (OrderedList (start, numstyle, _) (first:rest)) = - let attribs = case numstyle of + let numeration = case numstyle of DefaultStyle -> [] Decimal -> [("numeration", "arabic")] Example -> [("numeration", "arabic")] @@ -196,14 +199,21 @@ blockToDocbook opts (OrderedList (start, numstyle, _) (first:rest)) = LowerAlpha -> [("numeration", "loweralpha")] UpperRoman -> [("numeration", "upperroman")] LowerRoman -> [("numeration", "lowerroman")] - items = if start == 1 - then listItemsToDocbook opts (first:rest) - else (inTags True "listitem" [("override",show start)] - (blocksToDocbook opts $ map plainToPara first)) $$ - listItemsToDocbook opts rest + spacing = case first of + (Plain _:_) -> [("spacing", "compact")] + _ -> [] + attribs = numeration ++ spacing + items = if start == 1 + then listItemsToDocbook opts (first:rest) + else (inTags True "listitem" [("override",show start)] + (blocksToDocbook opts $ map plainToPara first)) $$ + listItemsToDocbook opts rest in inTags True "orderedlist" attribs items blockToDocbook opts (DefinitionList lst) = - inTagsIndented "variablelist" $ deflistItemsToDocbook opts lst + let attribs = case lst of + ((_, (Plain _:_):_):_) -> [("spacing", "compact")] + _ -> [] + in inTags True "variablelist" attribs $ deflistItemsToDocbook opts lst blockToDocbook _ (RawBlock f str) | f == "docbook" = text str -- raw XML block | f == "html" = text str -- allow html for backwards compatibility diff --git a/tests/Tests/Writers/Docbook.hs b/tests/Tests/Writers/Docbook.hs index e815b4f5a..97126b473 100644 --- a/tests/Tests/Writers/Docbook.hs +++ b/tests/Tests/Writers/Docbook.hs @@ -31,22 +31,199 @@ lineblock :: Blocks lineblock = para ("some text" <> linebreak <> "and more lines" <> linebreak <> "and again") -lineblock_out :: String -lineblock_out = "some text\n" ++ - "and more lines\n" ++ - "and again" +lineblock_out :: [String] +lineblock_out = [ "some text" + , "and more lines" + , "and again" + ] tests :: [Test] tests = [ testGroup "line blocks" [ "none" =: para "This is a test" - =?> "\n This is a test\n" + =?> unlines + [ "" + , " This is a test" + , "" + ] , "basic" =: lineblock - =?> lineblock_out + =?> unlines lineblock_out , "blockquote" =: blockQuote lineblock - =?> ("
\n" ++ lineblock_out ++ "\n
") - , "footnote" =: para ("This is a test" <> note lineblock <> " of footnotes") - =?> ("\n This is a test\n" ++ - lineblock_out ++ - "\n of footnotes\n") + =?> unlines + ( [ "
" ] ++ + lineblock_out ++ + [ "
" ] + ) + , "footnote" =: para ("This is a test" <> + note lineblock <> + " of footnotes") + =?> unlines + ( [ "" + , " This is a test" ] ++ + lineblock_out ++ + [ " of footnotes" + , "" ] + ) + ] + , testGroup "compact lists" + [ testGroup "bullet" + [ "compact" =: bulletList [plain "a", plain "b", plain "c"] + =?> unlines + [ "" + , " " + , " " + , " a" + , " " + , " " + , " " + , " " + , " b" + , " " + , " " + , " " + , " " + , " c" + , " " + , " " + , "" + ] + , "loose" =: bulletList [para "a", para "b", para "c"] + =?> unlines + [ "" + , " " + , " " + , " a" + , " " + , " " + , " " + , " " + , " b" + , " " + , " " + , " " + , " " + , " c" + , " " + , " " + , "" + ] + ] + , testGroup "ordered" + [ "compact" =: orderedList [plain "a", plain "b", plain "c"] + =?> unlines + [ "" + , " " + , " " + , " a" + , " " + , " " + , " " + , " " + , " b" + , " " + , " " + , " " + , " " + , " c" + , " " + , " " + , "" + ] + , "loose" =: orderedList [para "a", para "b", para "c"] + =?> unlines + [ "" + , " " + , " " + , " a" + , " " + , " " + , " " + , " " + , " b" + , " " + , " " + , " " + , " " + , " c" + , " " + , " " + , "" + ] + ] + , testGroup "definition" + [ "compact" =: definitionList [ ("an", [plain "apple" ]) + , ("a", [plain "banana"]) + , ("an", [plain "orange"])] + =?> unlines + [ "" + , " " + , " " + , " an" + , " " + , " " + , " " + , " apple" + , " " + , " " + , " " + , " " + , " " + , " a" + , " " + , " " + , " " + , " banana" + , " " + , " " + , " " + , " " + , " " + , " an" + , " " + , " " + , " " + , " orange" + , " " + , " " + , " " + , "" + ] + , "loose" =: definitionList [ ("an", [para "apple" ]) + , ("a", [para "banana"]) + , ("an", [para "orange"])] + =?> unlines + [ "" + , " " + , " " + , " an" + , " " + , " " + , " " + , " apple" + , " " + , " " + , " " + , " " + , " " + , " a" + , " " + , " " + , " " + , " banana" + , " " + , " " + , " " + , " " + , " " + , " an" + , " " + , " " + , " " + , " orange" + , " " + , " " + , " " + , "" + ] + ] ] ] diff --git a/tests/writer.docbook b/tests/writer.docbook index 9cb9a5359..26dcbadaa 100644 --- a/tests/writer.docbook +++ b/tests/writer.docbook @@ -93,7 +93,7 @@ sub status { A list: - + item one @@ -156,7 +156,7 @@ These should not be escaped: \$ \\ \> \[ \{ Asterisks tight: - + asterisk 1 @@ -196,7 +196,7 @@ These should not be escaped: \$ \\ \> \[ \{ Pluses tight: - + Plus 1 @@ -236,7 +236,7 @@ These should not be escaped: \$ \\ \> \[ \{ Minuses tight: - + Minus 1 @@ -279,7 +279,7 @@ These should not be escaped: \$ \\ \> \[ \{ Tight: - + First @@ -299,7 +299,7 @@ These should not be escaped: \$ \\ \> \[ \{ and: - + One @@ -383,17 +383,17 @@ These should not be escaped: \$ \\ \> \[ \{ Nested - + Tab - + Tab - + Tab @@ -407,7 +407,7 @@ These should not be escaped: \$ \\ \> \[ \{ Here’s another: - + First @@ -417,7 +417,7 @@ These should not be escaped: \$ \\ \> \[ \{ Second: - + Fee @@ -454,7 +454,7 @@ These should not be escaped: \$ \\ \> \[ \{ Second: - + Fee @@ -508,7 +508,7 @@ These should not be escaped: \$ \\ \> \[ \{ Fancy list markers - + begins with 2 @@ -521,7 +521,7 @@ These should not be escaped: \$ \\ \> \[ \{ with a continuation - + sublist with roman numerals, starting with 4 @@ -531,7 +531,7 @@ These should not be escaped: \$ \\ \> \[ \{ more items - + a subsublist @@ -550,22 +550,22 @@ These should not be escaped: \$ \\ \> \[ \{ Nesting: - + Upper Alpha - + Upper Roman. - + Decimal start with 6 - + Lower alpha with paren @@ -581,7 +581,7 @@ These should not be escaped: \$ \\ \> \[ \{ Autonumbering: - + Autonumber. @@ -591,7 +591,7 @@ These should not be escaped: \$ \\ \> \[ \{ More. - + Nested. @@ -616,7 +616,7 @@ These should not be escaped: \$ \\ \> \[ \{ Tight using spaces: - + apple @@ -651,7 +651,7 @@ These should not be escaped: \$ \\ \> \[ \{ Tight using tabs: - + apple @@ -757,7 +757,7 @@ These should not be escaped: \$ \\ \> \[ \{ Multiple definitions, tight: - + apple @@ -841,7 +841,7 @@ These should not be escaped: \$ \\ \> \[ \{ orange fruit - + sublist @@ -1051,7 +1051,7 @@ These should not be escaped: \$ \\ \> \[ \{ LaTeX - + @@ -1097,7 +1097,7 @@ These should not be escaped: \$ \\ \> \[ \{ These shouldn’t be math: - + To get the famous equation, write $e = mc^2$. @@ -1130,7 +1130,7 @@ These should not be escaped: \$ \\ \> \[ \{ Here is some unicode: - + I hat: Î @@ -1316,7 +1316,7 @@ These should not be escaped: \$ \\ \> \[ \{ With an ampersand: http://example.com/?foo=1&bar=2 - + In a list? @@ -1414,7 +1414,7 @@ or here: <http://example.com/> - + And in list items. -- cgit v1.2.3 From abd3a039b9adcafa8aa1df6e0753a725f90c78fc Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 3 May 2014 20:45:05 -0700 Subject: DocBook writer: Small tweaks to last commit. * Use isTightList from Shared. * Adjust writer test, since isTightList is a bit different from what was used before. Closes #1250. --- src/Text/Pandoc/Writers/Docbook.hs | 12 +++--------- tests/writer.docbook | 2 +- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs index e1b62f02d..1a8e58354 100644 --- a/src/Text/Pandoc/Writers/Docbook.hs +++ b/src/Text/Pandoc/Writers/Docbook.hs @@ -185,9 +185,7 @@ blockToDocbook _ (CodeBlock (_,classes,_) str) = else languagesByExtension . map toLower $ s langs = concatMap langsFrom classes blockToDocbook opts (BulletList lst) = - let attribs = case lst of - ((Plain _:_):_) -> [("spacing", "compact")] - _ -> [] + let attribs = [("spacing", "compact") | isTightList lst] in inTags True "itemizedlist" attribs $ listItemsToDocbook opts lst blockToDocbook _ (OrderedList _ []) = empty blockToDocbook opts (OrderedList (start, numstyle, _) (first:rest)) = @@ -199,9 +197,7 @@ blockToDocbook opts (OrderedList (start, numstyle, _) (first:rest)) = LowerAlpha -> [("numeration", "loweralpha")] UpperRoman -> [("numeration", "upperroman")] LowerRoman -> [("numeration", "lowerroman")] - spacing = case first of - (Plain _:_) -> [("spacing", "compact")] - _ -> [] + spacing = [("spacing", "compact") | isTightList (first:rest)] attribs = numeration ++ spacing items = if start == 1 then listItemsToDocbook opts (first:rest) @@ -210,9 +206,7 @@ blockToDocbook opts (OrderedList (start, numstyle, _) (first:rest)) = listItemsToDocbook opts rest in inTags True "orderedlist" attribs items blockToDocbook opts (DefinitionList lst) = - let attribs = case lst of - ((_, (Plain _:_):_):_) -> [("spacing", "compact")] - _ -> [] + let attribs = [("spacing", "compact") | isTightList $ concatMap snd lst] in inTags True "variablelist" attribs $ deflistItemsToDocbook opts lst blockToDocbook _ (RawBlock f str) | f == "docbook" = text str -- raw XML block diff --git a/tests/writer.docbook b/tests/writer.docbook index 26dcbadaa..01daa2c30 100644 --- a/tests/writer.docbook +++ b/tests/writer.docbook @@ -508,7 +508,7 @@ These should not be escaped: \$ \\ \> \[ \{ Fancy list markers - + begins with 2 -- cgit v1.2.3 From fde52c25a65c479871afcf8192f56d2918230f5e Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 3 May 2014 21:08:45 -0700 Subject: AsciiDoc writer: Correctly handle empty table cells. Closes #1245. --- src/Text/Pandoc/Writers/AsciiDoc.hs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Text/Pandoc/Writers/AsciiDoc.hs b/src/Text/Pandoc/Writers/AsciiDoc.hs index 68b525742..e5868172e 100644 --- a/src/Text/Pandoc/Writers/AsciiDoc.hs +++ b/src/Text/Pandoc/Writers/AsciiDoc.hs @@ -217,6 +217,7 @@ blockToAsciiDoc opts (Table caption aligns widths headers rows) = do let makeCell [Plain x] = do d <- blockListToAsciiDoc opts [Plain x] return $ text "|" <> chomp d makeCell [Para x] = makeCell [Plain x] + makeCell [] = return $ text "|" makeCell _ = return $ text "|" <> "[multiblock cell omitted]" let makeRow cells = hsep `fmap` mapM makeCell cells rows' <- mapM makeRow rows @@ -227,7 +228,7 @@ blockToAsciiDoc opts (Table caption aligns widths headers rows) = do else 100000 let maxwidth = maximum $ map offset (head':rows') let body = if maxwidth > colwidth then vsep rows' else vcat rows' - let border = text $ "|" ++ replicate ((min maxwidth colwidth) - 1) '=' + let border = text $ "|" ++ replicate (max 5 (min maxwidth colwidth) - 1) '=' return $ caption'' $$ tablespec $$ border $$ head'' $$ body $$ border $$ blankline blockToAsciiDoc opts (BulletList items) = do -- cgit v1.2.3 From 41c89d51c7b3d0099e72c0693bfd413bb4498de9 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 3 May 2014 21:24:20 -0700 Subject: AsciiDoc writer: Added test for empty table cells. --- pandoc.cabal | 1 + tests/tables.asciidoc | 11 ++++------- tests/test-pandoc.hs | 2 ++ 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/pandoc.cabal b/pandoc.cabal index 002d1671c..3991d2d82 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -392,6 +392,7 @@ Test-Suite test-pandoc Tests.Writers.ConTeXt Tests.Writers.HTML Tests.Writers.Markdown + Tests.Writers.AsciiDoc Tests.Writers.LaTeX Ghc-Options: -rtsopts -Wall -fno-warn-unused-do-bind Default-Language: Haskell98 diff --git a/tests/tables.asciidoc b/tests/tables.asciidoc index 38daca192..ba647866a 100644 --- a/tests/tables.asciidoc +++ b/tests/tables.asciidoc @@ -52,20 +52,17 @@ Multiline table without caption: Table without column headers: [cols=">,<,^,>",] -|============================================================================= +|================== |12 |12 |12 |12 - |123 |123 |123 |123 - |1 |1 |1 |1 -|============================================================================= +|================== Multiline table without column headers: [width="78%",cols="^21%,<17%,>20%,42%",] -|============================================================================= +|======================================================================= |First |row |12.0 |Example of a row that spans multiple lines. - |Second |row |5.0 |Here's another one. Note the blank line between rows. -|============================================================================= +|======================================================================= diff --git a/tests/test-pandoc.hs b/tests/test-pandoc.hs index a7d4fca01..80d672589 100644 --- a/tests/test-pandoc.hs +++ b/tests/test-pandoc.hs @@ -15,6 +15,7 @@ import qualified Tests.Writers.HTML import qualified Tests.Writers.Docbook import qualified Tests.Writers.Native import qualified Tests.Writers.Markdown +import qualified Tests.Writers.AsciiDoc import qualified Tests.Shared import qualified Tests.Walk import Text.Pandoc.Shared (inDirectory) @@ -30,6 +31,7 @@ tests = [ testGroup "Old" Tests.Old.tests , testGroup "HTML" Tests.Writers.HTML.tests , testGroup "Docbook" Tests.Writers.Docbook.tests , testGroup "Markdown" Tests.Writers.Markdown.tests + , testGroup "AsciiDoc" Tests.Writers.AsciiDoc.tests ] , testGroup "Readers" [ testGroup "LaTeX" Tests.Readers.LaTeX.tests -- cgit v1.2.3 From 96c0c950cab8b42d5d6b8b1a6f1fb20f7f4a5aae Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 3 May 2014 21:31:53 -0700 Subject: AsciiDoc writer: Handle multiblock table cells. Closes #1246. --- src/Text/Pandoc/Writers/AsciiDoc.hs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Text/Pandoc/Writers/AsciiDoc.hs b/src/Text/Pandoc/Writers/AsciiDoc.hs index e5868172e..15579cba2 100644 --- a/src/Text/Pandoc/Writers/AsciiDoc.hs +++ b/src/Text/Pandoc/Writers/AsciiDoc.hs @@ -218,7 +218,8 @@ blockToAsciiDoc opts (Table caption aligns widths headers rows) = do return $ text "|" <> chomp d makeCell [Para x] = makeCell [Plain x] makeCell [] = return $ text "|" - makeCell _ = return $ text "|" <> "[multiblock cell omitted]" + makeCell bs = do d <- blockListToAsciiDoc opts bs + return $ text "a|" $$ d let makeRow cells = hsep `fmap` mapM makeCell cells rows' <- mapM makeRow rows head' <- makeRow headers -- cgit v1.2.3 From 9625cf6f40ddb6e3683a6435f15cd1d3ba74a281 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 3 May 2014 22:05:24 -0700 Subject: README: Note about `unnumbered` being added to final 'References' sect. --- README | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README b/README index 8ca5ccc41..3b9b90128 100644 --- a/README +++ b/README @@ -2478,7 +2478,9 @@ document with an appropriate header: # References -The bibliography will be inserted after this header. +The bibliography will be inserted after this header. Note that +the `unnumbered` class will be added to this header, so that the +section will not be numbered. If you want to include items in the bibliography without actually citing them in the body text, you can define a dummy `nocite` metadata -- cgit v1.2.3 From 6b532c2131f13f5c294dcea88a8d041f8be388dd Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 3 May 2014 22:33:36 -0700 Subject: Added Tests.Writer.AsciiDoc to repository. --- tests/Tests/Writers/AsciiDoc.hs | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 tests/Tests/Writers/AsciiDoc.hs diff --git a/tests/Tests/Writers/AsciiDoc.hs b/tests/Tests/Writers/AsciiDoc.hs new file mode 100644 index 000000000..118e648d3 --- /dev/null +++ b/tests/Tests/Writers/AsciiDoc.hs @@ -0,0 +1,37 @@ +{-# LANGUAGE OverloadedStrings #-} +module Tests.Writers.AsciiDoc (tests) where + +import Test.Framework +import Text.Pandoc.Builder +import Text.Pandoc +import Tests.Helpers +import Tests.Arbitrary() +import Data.Monoid + +asciidoc :: (ToString a, ToPandoc a) => a -> String +asciidoc = writeAsciiDoc def{ writerWrapText = False } . toPandoc + +tests :: [Test] +tests = [ testGroup "tables" + [ test asciidoc "empty cells" $ + simpleTable [] [[mempty],[mempty]] =?> unlines + [ "[cols=\"\",]" + , "|====" + , "|" + , "|" + , "|====" + ] + , test asciidoc "multiblock cells" $ + simpleTable [] [[para "Para 1" <> para "Para 2"]] + =?> unlines + [ "[cols=\"\",]" + , "|=====" + , "a|" + , "Para 1" + , "" + , "Para 2" + , "" + , "|=====" + ] + ] + ] -- cgit v1.2.3 From 3e42f08e87c4795b260154e9747df29bc1613ccc Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 4 May 2014 08:07:17 -0700 Subject: Markdown reader: Fixed bug with unwanted code in lists. Closes #1154. When reading a raw list item, we now strip off nonindent spaces. --- src/Text/Pandoc/Readers/Markdown.hs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 36f73d847..69245cf66 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -746,6 +746,7 @@ listLine = try $ do many spaceChar listStart) notFollowedBy' $ htmlTag (~== TagClose "div") + nonindentSpaces chunks <- manyTill ( many1 (satisfy $ \c -> c /= '\n' && c /= '<') <|> liftM snd (htmlTag isCommentTag) -- cgit v1.2.3 From 1e5042489223edc4eb5fa428ee47ed525bc1f83f Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 4 May 2014 08:19:48 -0700 Subject: Added test for #1154. --- tests/Tests/Readers/Markdown.hs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tests/Tests/Readers/Markdown.hs b/tests/Tests/Readers/Markdown.hs index 492680a35..5a51fe759 100644 --- a/tests/Tests/Readers/Markdown.hs +++ b/tests/Tests/Readers/Markdown.hs @@ -216,4 +216,13 @@ tests = [ testGroup "inline code" -- , testGroup "round trip" -- [ property "p_markdown_round_trip" p_markdown_round_trip -- ] + , testGroup "lists" + [ "issue #1154" =: + " -
\n first div breaks\n
\n\n \n\n
\n with this div too.\n
\n" + =?> bulletList [divWith nullAttr (plain $ text "first div breaks") <> + rawBlock "html" "\n" <> + divWith nullAttr (plain $ text "with this div too.")] + ] ] -- cgit v1.2.3 From d72871598174474218ae46dd984632a3753882b1 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 4 May 2014 10:45:20 -0700 Subject: Docx writer: Added ability to give fallback in parseXml. --- src/Text/Pandoc/Writers/Docx.hs | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 2a834c2da..bb2071455 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -217,7 +217,7 @@ writeDocx opts doc@(Pandoc meta _) = do -- styles let newstyles = styleToOpenXml $ writerHighlightStyle opts let stylepath = "word/styles.xml" - styledoc <- parseXml refArchive stylepath + styledoc <- parseXml refArchive stylepath Nothing let styledoc' = styledoc{ elContent = elContent styledoc ++ [Elem x | x <- newstyles, writerHighlight opts] } let styleEntry = toEntry stylepath epochtime $ renderXml styledoc' @@ -256,19 +256,20 @@ writeDocx opts doc@(Pandoc meta _) = do ] let relsEntry = toEntry relsPath epochtime $ renderXml rels - let entryFromArchive path = (toEntry path epochtime . renderXml) `fmap` - parseXml refArchive path - docPropsAppEntry <- entryFromArchive "docProps/app.xml" - themeEntry <- entryFromArchive "word/theme/theme1.xml" - fontTableEntry <- entryFromArchive "word/fontTable.xml" - settingsEntry <- entryFromArchive "word/settings.xml" - webSettingsEntry <- entryFromArchive "word/webSettings.xml" + let entryFromArchive path fallback = + (toEntry path epochtime . renderXml) `fmap` + parseXml refArchive path fallback + docPropsAppEntry <- entryFromArchive "docProps/app.xml" Nothing + themeEntry <- entryFromArchive "word/theme/theme1.xml" Nothing + fontTableEntry <- entryFromArchive "word/fontTable.xml" Nothing + settingsEntry <- entryFromArchive "word/settings.xml" Nothing + webSettingsEntry <- entryFromArchive "word/webSettings.xml" Nothing let miscRels = [ f | f <- filesInArchive refArchive , "word/_rels/" `isPrefixOf` f , ".xml.rels" `isSuffixOf` f , f /= "word/_rels/document.xml.rels" , f /= "word/_rels/footnotes.xml.rels" ] - miscRelEntries <- mapM entryFromArchive miscRels + miscRelEntries <- mapM (\f -> entryFromArchive f Nothing) miscRels -- Create archive let archive = foldr addEntryToArchive emptyArchive $ @@ -814,10 +815,10 @@ inlineToOpenXML opts (Image alt (src, tit)) = do br :: Element br = mknode "w:r" [] [mknode "w:br" [("w:type","textWrapping")] () ] -parseXml :: Archive -> String -> IO Element -parseXml refArchive relpath = - case findEntryByPath relpath refArchive of - Just e -> case parseXMLDoc $ UTF8.toStringLazy $ fromEntry e of - Just d -> return d - Nothing -> fail $ relpath ++ " corrupt in reference docx" - Nothing -> fail $ relpath ++ " missing in reference docx" +parseXml :: Archive -> String -> Maybe String -> IO Element +parseXml refArchive relpath fallback = + case (findEntryByPath relpath refArchive + >>= parseXMLDoc . UTF8.toStringLazy . fromEntry) `mplus` + (fallback >>= parseXMLDoc) of + Just d -> return d + Nothing -> fail $ relpath ++ " corrupt or missing in reference docx" -- cgit v1.2.3 From 0c7e084342b2a077f83809e6613979adcefb1592 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 4 May 2014 10:54:45 -0700 Subject: Docx writer: Fall back on distribution reference.docx. * Undid changes to parseXml in last commit. * Instead of a string fallback, we have parseXml fall back on the reference.docx that comes with pandoc if the user's reference.docx does not contain a needed file. * Closes #1185. --- src/Text/Pandoc/Writers/Docx.hs | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index bb2071455..fcb73a427 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -116,6 +116,7 @@ writeDocx opts doc@(Pandoc meta _) = do case writerReferenceDocx opts of Just f -> B.readFile f Nothing -> readDataFile datadir "reference.docx" + distArchive <- liftM (toArchive . toLazy) $ readDataFile Nothing "reference.docx" ((contents, footnotes), st) <- runStateT (writeOpenXML opts{writerWrapText = False} doc') defaultWriterState @@ -217,7 +218,7 @@ writeDocx opts doc@(Pandoc meta _) = do -- styles let newstyles = styleToOpenXml $ writerHighlightStyle opts let stylepath = "word/styles.xml" - styledoc <- parseXml refArchive stylepath Nothing + styledoc <- parseXml refArchive distArchive stylepath let styledoc' = styledoc{ elContent = elContent styledoc ++ [Elem x | x <- newstyles, writerHighlight opts] } let styleEntry = toEntry stylepath epochtime $ renderXml styledoc' @@ -256,20 +257,20 @@ writeDocx opts doc@(Pandoc meta _) = do ] let relsEntry = toEntry relsPath epochtime $ renderXml rels - let entryFromArchive path fallback = + let entryFromArchive path = (toEntry path epochtime . renderXml) `fmap` - parseXml refArchive path fallback - docPropsAppEntry <- entryFromArchive "docProps/app.xml" Nothing - themeEntry <- entryFromArchive "word/theme/theme1.xml" Nothing - fontTableEntry <- entryFromArchive "word/fontTable.xml" Nothing - settingsEntry <- entryFromArchive "word/settings.xml" Nothing - webSettingsEntry <- entryFromArchive "word/webSettings.xml" Nothing + parseXml refArchive distArchive path + docPropsAppEntry <- entryFromArchive "docProps/app.xml" + themeEntry <- entryFromArchive "word/theme/theme1.xml" + fontTableEntry <- entryFromArchive "word/fontTable.xml" + settingsEntry <- entryFromArchive "word/settings.xml" + webSettingsEntry <- entryFromArchive "word/webSettings.xml" let miscRels = [ f | f <- filesInArchive refArchive , "word/_rels/" `isPrefixOf` f , ".xml.rels" `isSuffixOf` f , f /= "word/_rels/document.xml.rels" , f /= "word/_rels/footnotes.xml.rels" ] - miscRelEntries <- mapM (\f -> entryFromArchive f Nothing) miscRels + miscRelEntries <- mapM entryFromArchive miscRels -- Create archive let archive = foldr addEntryToArchive emptyArchive $ @@ -815,10 +816,10 @@ inlineToOpenXML opts (Image alt (src, tit)) = do br :: Element br = mknode "w:r" [] [mknode "w:br" [("w:type","textWrapping")] () ] -parseXml :: Archive -> String -> Maybe String -> IO Element -parseXml refArchive relpath fallback = - case (findEntryByPath relpath refArchive - >>= parseXMLDoc . UTF8.toStringLazy . fromEntry) `mplus` - (fallback >>= parseXMLDoc) of +parseXml :: Archive -> Archive -> String -> IO Element +parseXml refArchive distArchive relpath = + case ((findEntryByPath relpath refArchive `mplus` + findEntryByPath relpath distArchive) + >>= parseXMLDoc . UTF8.toStringLazy . fromEntry) of Just d -> return d Nothing -> fail $ relpath ++ " corrupt or missing in reference docx" -- cgit v1.2.3 From 26dc79cdd12c68ddd24f2c58eede0785121fedc8 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 4 May 2014 12:18:44 -0700 Subject: Clarified use of `--natbib` and `--biblatex`. Not for use with pandoc-citeproc. --- README | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/README b/README index 3b9b90128..05eccd0ee 100644 --- a/README +++ b/README @@ -634,15 +634,15 @@ Citation rendering `--natbib` : Use natbib for citations in LaTeX output. This option is not for use - with PDF output, since pandoc will not automatically run bibtex in - producing the PDF. It is intended for use in producing a LaTeX file - that can be processed with pdflatex and bibtex. + with the `pandoc-citeproc` filter or with PDF output. It is intended for + use in producing a LaTeX file that can be processed with pdflatex and + bibtex. `--biblatex` : Use biblatex for citations in LaTeX output. This option is not for use - with PDF output, since pandoc will not automatically run bibtex in - producing the PDF. It is intended for use in producing a LaTeX file - that can be processed with pdflatex and bibtex or biber. + with the `pandoc-citeproc` filter or with PDF output. It is intended for + use in producing a LaTeX file that can be processed with pdflatex and + bibtex or biber. Math rendering in HTML ---------------------- -- cgit v1.2.3 From 9fe669976cdd8acee28f565b414143ffe66a24cb Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 4 May 2014 12:21:21 -0700 Subject: README: Clarified citation key syntax. --- README | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README b/README index 05eccd0ee..f2189ac96 100644 --- a/README +++ b/README @@ -2450,7 +2450,9 @@ can be found at . See also Citations go inside square brackets and are separated by semicolons. Each citation must have a key, composed of '@' + the citation identifier from the database, and may optionally have a prefix, -a locator, and a suffix. Here are some examples: +a locator, and a suffix. The citation key must begin with a letter +or `_`, and may contain alphanumerics, `_`, and internal punctuation +characters (`:.#$%&-+?<>~/`). Here are some examples: Blah blah [see @doe99, pp. 33-35; also @smith04, ch. 1]. -- cgit v1.2.3 From 51aa3048347280db6798a84a30af4f6e1ae56b26 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 4 May 2014 14:43:05 -0700 Subject: LaTeX writer: Fixed inconsistencies with reference escaping. - toLabel is now monadic, and it does the needed string escaping. - Closes #1130. --- src/Text/Pandoc/Writers/LaTeX.hs | 131 ++++++++++++++++++++------------------- 1 file changed, 67 insertions(+), 64 deletions(-) diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index e52220f01..c17e041b5 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -193,7 +193,7 @@ stringToLaTeX _ [] = return "" stringToLaTeX ctx (x:xs) = do opts <- gets stOptions rest <- stringToLaTeX ctx xs - let ligatures = writerTeXLigatures opts && (ctx /= CodeString) + let ligatures = writerTeXLigatures opts && ctx == TextString let isUrl = ctx == URLString when (x == '€') $ modify $ \st -> st{ stUsesEuro = True } @@ -207,7 +207,8 @@ stringToLaTeX ctx (x:xs) = do '&' -> "\\&" ++ rest '_' | not isUrl -> "\\_" ++ rest '#' -> "\\#" ++ rest - '-' -> case xs of -- prevent adjacent hyphens from forming ligatures + '-' | not isUrl -> case xs of + -- prevent adjacent hyphens from forming ligatures ('-':_) -> "-\\/" ++ rest _ -> '-' : rest '~' | not isUrl -> "\\textasciitilde{}" ++ rest @@ -229,12 +230,13 @@ stringToLaTeX ctx (x:xs) = do '\x2013' | ligatures -> "--" ++ rest _ -> x : rest -toLabel :: String -> String -toLabel [] = "" -toLabel (x:xs) - | (isLetter x || isDigit x) && isAscii x = x:toLabel xs - | elem x "-+=:;." = x:toLabel xs - | otherwise = "ux" ++ printf "%x" (ord x) ++ toLabel xs +toLabel :: String -> State WriterState String +toLabel z = go `fmap` stringToLaTeX URLString z + where go [] = "" + go (x:xs) + | (isLetter x || isDigit x) && isAscii x = x:go xs + | elem x "-+=:;." = x:go xs + | otherwise = "ux" ++ printf "%x" (ord x) ++ go xs -- | Puts contents into LaTeX command. inCmd :: String -> Doc -> Doc @@ -340,57 +342,57 @@ blockToLaTeX (BlockQuote lst) = do return $ "\\begin{quote}" $$ contents $$ "\\end{quote}" blockToLaTeX (CodeBlock (identifier,classes,keyvalAttr) str) = do opts <- gets stOptions + ref <- toLabel identifier + let linkAnchor = if null identifier + then empty + else "\\hyperdef{}" <> braces (text ref) <> + braces ("\\label" <> braces (text ref)) + let lhsCodeBlock = do + modify $ \s -> s{ stLHS = True } + return $ flush (linkAnchor $$ "\\begin{code}" $$ text str $$ + "\\end{code}") $$ cr + let rawCodeBlock = do + st <- get + env <- if stInNote st + then modify (\s -> s{ stVerbInNote = True }) >> + return "Verbatim" + else return "verbatim" + return $ flush (linkAnchor $$ text ("\\begin{" ++ env ++ "}") $$ + text str $$ text ("\\end{" ++ env ++ "}")) <> cr + let listingsCodeBlock = do + st <- get + let params = if writerListings (stOptions st) + then (case getListingsLanguage classes of + Just l -> [ "language=" ++ l ] + Nothing -> []) ++ + [ "numbers=left" | "numberLines" `elem` classes + || "number" `elem` classes + || "number-lines" `elem` classes ] ++ + [ (if key == "startFrom" + then "firstnumber" + else key) ++ "=" ++ attr | + (key,attr) <- keyvalAttr ] ++ + (if identifier == "" + then [] + else [ "label=" ++ ref ]) + + else [] + printParams + | null params = empty + | otherwise = brackets $ hcat (intersperse ", " (map text params)) + return $ flush ("\\begin{lstlisting}" <> printParams $$ text str $$ + "\\end{lstlisting}") $$ cr + let highlightedCodeBlock = + case highlight formatLaTeXBlock ("",classes,keyvalAttr) str of + Nothing -> rawCodeBlock + Just h -> modify (\st -> st{ stHighlighting = True }) >> + return (flush $ linkAnchor $$ text h) case () of _ | isEnabled Ext_literate_haskell opts && "haskell" `elem` classes && "literate" `elem` classes -> lhsCodeBlock | writerListings opts -> listingsCodeBlock | writerHighlight opts && not (null classes) -> highlightedCodeBlock | otherwise -> rawCodeBlock - where ref = text $ toLabel identifier - linkAnchor = if null identifier - then empty - else "\\hyperdef{}" <> braces ref <> - braces ("\\label" <> braces ref) - lhsCodeBlock = do - modify $ \s -> s{ stLHS = True } - return $ flush (linkAnchor $$ "\\begin{code}" $$ text str $$ - "\\end{code}") $$ cr - rawCodeBlock = do - st <- get - env <- if stInNote st - then modify (\s -> s{ stVerbInNote = True }) >> - return "Verbatim" - else return "verbatim" - return $ flush (linkAnchor $$ text ("\\begin{" ++ env ++ "}") $$ - text str $$ text ("\\end{" ++ env ++ "}")) <> cr - listingsCodeBlock = do - st <- get - let params = if writerListings (stOptions st) - then (case getListingsLanguage classes of - Just l -> [ "language=" ++ l ] - Nothing -> []) ++ - [ "numbers=left" | "numberLines" `elem` classes - || "number" `elem` classes - || "number-lines" `elem` classes ] ++ - [ (if key == "startFrom" - then "firstnumber" - else key) ++ "=" ++ attr | - (key,attr) <- keyvalAttr ] ++ - (if identifier == "" - then [] - else [ "label=" ++ toLabel identifier ]) - - else [] - printParams - | null params = empty - | otherwise = brackets $ hcat (intersperse ", " (map text params)) - return $ flush ("\\begin{lstlisting}" <> printParams $$ text str $$ - "\\end{lstlisting}") $$ cr - highlightedCodeBlock = - case highlight formatLaTeXBlock ("",classes,keyvalAttr) str of - Nothing -> rawCodeBlock - Just h -> modify (\st -> st{ stHighlighting = True }) >> - return (flush $ linkAnchor $$ text h) blockToLaTeX (RawBlock f x) | f == Format "latex" || f == Format "tex" = return $ text x @@ -579,6 +581,7 @@ sectionHeader :: Bool -- True for unnumbered -> State WriterState Doc sectionHeader unnumbered ref level lst = do txt <- inlineListToLaTeX lst + lab <- text `fmap` toLabel ref let noNote (Note _) = Str "" noNote x = x let lstNoNotes = walk noNote lst @@ -599,13 +602,13 @@ sectionHeader unnumbered ref level lst = do let refLabel x = (if ref `elem` internalLinks then text "\\hyperdef" <> braces empty - <> braces (text $ toLabel ref) + <> braces lab <> braces x else x) - let headerWith x y r = refLabel $ text x <> y <> - if null r + let headerWith x y = refLabel $ text x <> y <> + if null ref then empty - else text "\\label" <> braces (text $ toLabel r) + else text "\\label" <> braces lab let sectionType = case level' of 0 | writerBeamer opts -> "part" | otherwise -> "chapter" @@ -624,7 +627,7 @@ sectionHeader unnumbered ref level lst = do return $ if level' > 5 then txt else prefix $$ - headerWith ('\\':sectionType) stuffing ref + headerWith ('\\':sectionType) stuffing $$ if unnumbered then "\\addcontentsline{toc}" <> braces (text sectionType) <> @@ -659,9 +662,10 @@ inlineToLaTeX (Span (id',classes,_) ils) = do let noEmph = "csl-no-emph" `elem` classes let noStrong = "csl-no-strong" `elem` classes let noSmallCaps = "csl-no-smallcaps" `elem` classes - let label' = if (null id') - then empty - else text "\\label" <> braces (text $ toLabel id') + label' <- if null id' + then return empty + else toLabel id' >>= \x -> + return (text "\\label" <> braces (text x)) fmap (label' <>) ((if noEmph then inCmd "textup" else id) . (if noStrong then inCmd "textnormal" else id) . @@ -745,9 +749,8 @@ inlineToLaTeX (LineBreak) = return "\\\\" inlineToLaTeX Space = return space inlineToLaTeX (Link txt ('#':ident, _)) = do contents <- inlineListToLaTeX txt - ident' <- stringToLaTeX URLString ident - return $ text "\\hyperref" <> brackets (text $ toLabel ident') <> - braces contents + lab <- toLabel ident + return $ text "\\hyperref" <> brackets (text lab) <> braces contents inlineToLaTeX (Link txt (src, _)) = case txt of [Str x] | x == src -> -- autolink -- cgit v1.2.3 From dbd6c1540f9688a3439fceec405ec4d86dc951d5 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 4 May 2014 16:21:18 -0700 Subject: Fixed the fix to #1154. We need to strip off up to 4 spaces, not up to 3. --- src/Text/Pandoc/Readers/Markdown.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 69245cf66..aac87f363 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -746,7 +746,7 @@ listLine = try $ do many spaceChar listStart) notFollowedBy' $ htmlTag (~== TagClose "div") - nonindentSpaces + optional (() <$ indentSpaces) chunks <- manyTill ( many1 (satisfy $ \c -> c /= '\n' && c /= '<') <|> liftM snd (htmlTag isCommentTag) -- cgit v1.2.3 From 0edbb5597c447a6fc03bf0df4e1f4a7e58a7b7ee Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 4 May 2014 16:34:36 -0700 Subject: Updated changelog. --- changelog | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 52 insertions(+), 3 deletions(-) diff --git a/changelog b/changelog index 5a4f001a9..6874f883e 100644 --- a/changelog +++ b/changelog @@ -21,11 +21,17 @@ pandoc (1.12.4) * LaTeX reader: + + Give better location information on errors, pointing to line + numbers within included files (#1274). + LaTeX reader: Better handling of `table` environment (#1204). Positioning options no longer rendered verbatim. + Better handling of figure and table with caption (#1204). + Handle `@{}` and `p{length}` in tabular. The length is not actually recorded, but at least we get a table (#1180). + + Properly handle `\nocite`. It now adds a `nocite` metadata + field. Citations there will appear in the bibliography but not + in the text (unless you explicitly put a `$nocite$` variable + in your template). * Markdown reader: @@ -38,6 +44,10 @@ pandoc (1.12.4) would be missed if it came right after other block-level tags. + Avoid backtracking when closing `
` not found. + Fixed bug in reference link parsing in `markdown_mmd`. + + Fixed a bug in list parsing (#1154). When reading a raw list + item, we now strip off up to 4 spaces. + + Fixed parsing of empty reference link definitions (#1186). + + Made one-column pipe tables work (#1218). * Textile reader: @@ -82,6 +92,11 @@ pandoc (1.12.4) + Enhanced Pandoc's support for rST roles (Merijn Verstaaten). rST parser now supports: all built-in rST roles, new role definition, role inheritance, though with some limitations. + + Use `author` rather than `authors` in metadata. + + Better handling of directives. We now correctly handle field + lists that are indented more than three spaces. We treat an + `aafig` directive as a code block with attributes, so it can be + processed in a filter (#1212). * LaTeX writer: @@ -95,6 +110,9 @@ pandoc (1.12.4) + Use `\/` to avoid en-dash ligature instead of `-{}-` (Vaclav Zeman). This is to fix LuaLaTeX output. The `-{}-` sequence does not avoid the ligature with LuaLaTeX but `\/` does. + + Fixed string escaping in `hyperref` and `hyperdef` (#1130). + + * ConTeXt writer: Improved autolinks (#1270). * DocBook writer: @@ -103,6 +121,13 @@ pandoc (1.12.4) just for the newline character. + Don't let line breaks inside footnotes influence the enclosing paragraph (Neil Mayhew). + + Distinguish tight and loose lists in DocBook output, using + `spacing="compact"` (Neil Mayhew, #1250). + + * Docx writer: When needed files are not present in the user's + `reference.docx`, fall back on the versions in the `reference.docx` + in pandoc's data files. This fixes a bug that occurs when a + `reference.docx` saved by LibreOffice is used. (#1185) * EPUB writer: @@ -120,6 +145,9 @@ pandoc (1.12.4) + Add colgroup around col tags (#877). Also affects EPUB writer. + Fixed bug with unnumbered section headings. Unnumbered section headings (with class `unnumbered`) were getting numbers. + + Improved detection of image links. Previously image links with + queries were not recognized, causing `` to be used instead + of ``. * Man writer: Ensure that terms in definition lists aren't line wrapped (#1195). @@ -138,7 +166,16 @@ pandoc (1.12.4) * RTF writer: Fixed table cells containing paragraphs. - * Custom writer: Correctly handle UTF-8 in custom lua scripts (#1189). + * Custom writer: + + + Correctly handle UTF-8 in custom lua scripts (#1189). + + Fix bugs with lua scripts with mixed-case filenames and + paths containing `+` or `-` (#1267). Note that `getWriter` + in `Text.Pandoc` no longer returns a custom writer on input + `foo.lua`. + + * AsciiDoc writer: Handle multiblock and empty table cells + (#1245, #1246). Added tests. * `Text.Pandoc.Options`: Added `readerTrace` to `ReaderOptions` @@ -202,8 +239,9 @@ pandoc (1.12.4) + LaTeX template: use `fontenc` package only with `pdflatex` (#1164). + Beamer template: Consistent styles for figure and table captions (aaronwolen). - + Beamer template: Adjust widths correctly for oversized images - (Garrick Aden-Buie). + + LaTeX and beamer template: Adjust widths correctly for oversized + images. Use `\setkeys{Gin}{}` to set appropriate defaults for + `\includegraphics` (Yihui Xie, Garrick Aden-Buie). + Beamer template: Added caption package (#1200). + Beamer template: changes for better unicode handling (KarolS). + DocBook template: use `authorgroup` if there are authors. @@ -227,6 +265,11 @@ pandoc (1.12.4) * Give more useful error message if '-t pdf' is specified (#1155). + * Added `Cite`, `SmallCaps` to `Arbitrary` instance (#1269). + + * Allow `html4` as a synonym of `html` as a reader (it already works + as a writer). + * README: + Added an explanation of how to use YAML metadata to @@ -234,6 +277,12 @@ pandoc (1.12.4) the text (like LaTeX `\nocite`). + Added note to `--bibtex/--natbib`: not for use in making PDF (#1194, thanks to nahoj). + + Added explanatory notes about `--natbib` and `--biblatex`. + + Added specification of legal syntax for citation keys. + + Fixed variable defaults documentation (Albert Krewinkel). + + * Removed copyright statements for files that have been removed + (Albert Krewinkel). * Moved some doc files from `data-files` to `extra-source-files` (#1123). They aren't needed at runtime. We keep README and COPYRIGHT in data -- cgit v1.2.3 From f31e0027263c500ea41405cddac640312793b12b Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 4 May 2014 18:48:38 -0700 Subject: make_osx_package: Use cpphs to avoid problems with clang cpp. See https://github.com/jgm/pandoc/issues/1172 --- make_osx_package.sh | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/make_osx_package.sh b/make_osx_package.sh index 6c943c84a..fc3691218 100755 --- a/make_osx_package.sh +++ b/make_osx_package.sh @@ -26,8 +26,10 @@ cabal update echo Building pandoc... cabal sandbox init cabal clean -cabal install --reinstall --flags="embed_data_files" -cabal install --reinstall --flags="embed_data_files" pandoc-citeproc +# Use cpphs to avoid problems with clang cpp on ghc 7.8 osx: +which cpphs || cabal install cpphs +cabal install --reinstall --flags="embed_data_files" --ghc-options '-pgmPcpphs -optP--cpp' +cabal install --reinstall --flags="embed_data_files" pandoc-citeproc --ghc-options '-pgmPcpphs -optP--cpp' mkdir -p $DEST/bin mkdir -p $DEST/share/man/man1 -- cgit v1.2.3 From 7524e8e1f010868db7d26703f2443f78d4a4ceff Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 5 May 2014 07:08:57 -0700 Subject: Windows package script: use cabal sandbox, not cabal-dev. --- windows/make-windows-installer.bat | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/windows/make-windows-installer.bat b/windows/make-windows-installer.bat index d27859057..3e16c887f 100644 --- a/windows/make-windows-installer.bat +++ b/windows/make-windows-installer.bat @@ -1,21 +1,22 @@ @echo off cd .. cabal update -cabal-dev clean -cabal install hsb2hs cabal-dev +cabal sandbox init +cabal clean +cabal install hsb2hs if %errorlevel% neq 0 exit /b %errorlevel% -cabal-dev install -v1 --force --reinstall --flags="embed_data_files" +cabal install -v1 --force --reinstall --flags="embed_data_files" if %errorlevel% neq 0 exit /b %errorlevel% -cabal-dev install -v1 --reinstall --flags="embed_data_files" pandoc-citeproc +cabal install -v1 --reinstall --flags="embed_data_files" pandoc-citeproc if %errorlevel% neq 0 exit /b %errorlevel% -strip cabal-dev\bin\pandoc.exe -strip cabal-dev\bin\pandoc-citeproc.exe -cabal-dev\bin\pandoc.exe -s --template data\templates\default.html -S README -o README.html +strip .\.cabal-sandbox\bin\pandoc.exe +strip .\.cabal-sandbox\bin\pandoc-citeproc.exe +\.cabal-sandbox\bin\pandoc.exe -s --template data\templates\default.html -S README -o README.html if %errorlevel% neq 0 exit /b %errorlevel% -cabal-dev\bin\pandoc.exe -s --template data\templates\default.rtf COPYING -t rtf -S -o COPYING.rtf +.\cabal-sandbox\bin\pandoc.exe -s --template data\templates\default.rtf COPYING -t rtf -S -o COPYING.rtf if %errorlevel% neq 0 exit /b %errorlevel% copy COPYRIGHT COPYRIGHT.txt -for /f "tokens=1-2 delims= " %%a in ('cabal-dev\bin\pandoc --version') do ( +for /f "tokens=1-2 delims= " %%a in ('.\cabal-sandbox\bin\pandoc --version') do ( @set VERSION=%%b goto :next ) -- cgit v1.2.3 From 71bd4fb2b3778d3906a63938625ebcadca40b8c8 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Mon, 5 May 2014 14:39:25 +0200 Subject: Org reader: Read inline code blocks Org's inline code blocks take forms like `src_haskell(print "hi")` and are frequently used to include results from computations called from within the document. The blocks are read as inline code and marked with the special class `rundoc-block`. Proper handling and execution of these blocks is the subject of a separate library, rundoc, which is work in progress. This closes #1278. --- src/Text/Pandoc/Readers/Org.hs | 43 +++++++++++++++++++++++++++++++++++++++--- tests/Tests/Readers/Org.hs | 18 ++++++++++++++++++ 2 files changed, 58 insertions(+), 3 deletions(-) diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index d68ef45ef..dba61dfe0 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -43,6 +43,7 @@ import Text.Pandoc.Shared (compactify', compactify'DL) import Control.Applicative ( Applicative, pure , (<$>), (<$), (<*>), (<*), (*>), (<**>) ) +import Control.Arrow (first) import Control.Monad (foldM, guard, liftM, liftM2, mzero, when) import Control.Monad.Reader (Reader, runReader, ask, asks) import Data.Char (isAlphaNum, toLower) @@ -721,7 +722,6 @@ bulletList = fmap B.bulletList . fmap compactify' . sequence <$> many1 (listItem bulletListStart) orderedList :: OrgParser (F Blocks) --- orderedList = B.orderedList . compactify' <$> many1 (listItem orderedListStart) orderedList = fmap B.orderedList . fmap compactify' . sequence <$> many1 (listItem orderedListStart) @@ -746,11 +746,11 @@ definitionListItem :: OrgParser Int definitionListItem parseMarkerGetLength = try $ do markerLength <- parseMarkerGetLength term <- manyTill (noneOf "\n\r") (try $ string "::") - first <- anyLineNewline + line1 <- anyLineNewline blank <- option "" ("\n" <$ blankline) cont <- concat <$> many (listContinuation markerLength) term' <- parseFromString inline term - contents' <- parseFromString parseBlocks $ first ++ blank ++ cont + contents' <- parseFromString parseBlocks $ line1 ++ blank ++ cont return $ (,) <$> term' <*> fmap (:[]) contents' @@ -789,6 +789,7 @@ inline = , footnote , linkOrImage , anchor + , inlineCodeBlock , str , endline , emph @@ -989,6 +990,42 @@ solidify = map replaceSpecialChar | c `elem` "_.-:" = c | otherwise = '-' +-- | Parses an inline code block and marks it as an babel block. +inlineCodeBlock :: OrgParser (F Inlines) +inlineCodeBlock = try $ do + string "src_" + lang <- many1 orgArgWordChar + opts <- option [] $ enclosedByPair '[' ']' blockOption + inlineCode <- enclosedByPair '{' '}' (noneOf "\n\r") + let attrClasses = [translateLang lang, rundocBlockClass] + let attrKeyVal = map toRundocAttrib (("language", lang) : opts) + returnF $ B.codeWith ("", attrClasses, attrKeyVal) inlineCode + where enclosedByPair s e p = char s *> many1Till p (char e) + +-- | The class-name used to mark rundoc blocks. +rundocBlockClass :: String +rundocBlockClass = "rundoc-block" + +blockOption :: OrgParser (String, String) +blockOption = try $ (,) <$> orgArgKey <*> orgArgValue + +orgArgKey :: OrgParser String +orgArgKey = try $ + skipSpaces *> char ':' + *> many1 orgArgWordChar + <* many1 spaceChar + +orgArgValue :: OrgParser String +orgArgValue = try $ + skipSpaces *> many1 orgArgWordChar + <* skipSpaces + +orgArgWordChar :: OrgParser Char +orgArgWordChar = alphaNum <|> oneOf "-_" + +toRundocAttrib :: (String, String) -> (String, String) +toRundocAttrib = first ("rundoc-" ++) + emph :: OrgParser (F Inlines) emph = fmap B.emph <$> emphasisBetween '/' diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index 78684f0f1..949976aba 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -207,6 +207,24 @@ tests = "<> Link here later." =?> (para $ spanWith ("anchor", [], []) mempty <> "Link" <> space <> "here" <> space <> "later.") + + , "Inline code block" =: + "src_emacs-lisp{(message \"Hello\")}" =?> + (para $ codeWith ( "" + , [ "commonlisp", "rundoc-block" ] + , [ ("rundoc-language", "emacs-lisp") ]) + "(message \"Hello\")") + + , "Inline code block with arguments" =: + "src_sh[:export both :results output]{echo 'Hello, World'}" =?> + (para $ codeWith ( "" + , [ "bash", "rundoc-block" ] + , [ ("rundoc-language", "sh") + , ("rundoc-export", "both") + , ("rundoc-results", "output") + ] + ) + "echo 'Hello, World'") ] , testGroup "Meta Information" $ -- cgit v1.2.3 From 0e46173ae40520a135655885dc11525caaecb1e3 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 6 May 2014 07:22:40 -0700 Subject: Windows install script: install alex, happy. --- windows/make-windows-installer.bat | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/windows/make-windows-installer.bat b/windows/make-windows-installer.bat index 3e16c887f..1734e1a27 100644 --- a/windows/make-windows-installer.bat +++ b/windows/make-windows-installer.bat @@ -3,7 +3,7 @@ cd .. cabal update cabal sandbox init cabal clean -cabal install hsb2hs +cabal install hsb2hs alex happy if %errorlevel% neq 0 exit /b %errorlevel% cabal install -v1 --force --reinstall --flags="embed_data_files" if %errorlevel% neq 0 exit /b %errorlevel% -- cgit v1.2.3 From 5ff2adc08f5b00dcca2c1f6ecd03958e94bc198c Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 6 May 2014 07:25:05 -0700 Subject: make_osx_package: install alex, happy, hsb2hs. --- make_osx_package.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/make_osx_package.sh b/make_osx_package.sh index fc3691218..1a12601dc 100755 --- a/make_osx_package.sh +++ b/make_osx_package.sh @@ -27,7 +27,7 @@ echo Building pandoc... cabal sandbox init cabal clean # Use cpphs to avoid problems with clang cpp on ghc 7.8 osx: -which cpphs || cabal install cpphs +cabal install cpphs alex happy hsb2hs cabal install --reinstall --flags="embed_data_files" --ghc-options '-pgmPcpphs -optP--cpp' cabal install --reinstall --flags="embed_data_files" pandoc-citeproc --ghc-options '-pgmPcpphs -optP--cpp' -- cgit v1.2.3 From 7019069a325dae0f2729c29c9e2436f19c2271c2 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 6 May 2014 08:42:30 -0700 Subject: Windows install script: Fixed typo in path. --- windows/make-windows-installer.bat | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/windows/make-windows-installer.bat b/windows/make-windows-installer.bat index 1734e1a27..3f3696f12 100644 --- a/windows/make-windows-installer.bat +++ b/windows/make-windows-installer.bat @@ -11,7 +11,7 @@ cabal install -v1 --reinstall --flags="embed_data_files" pandoc-citeproc if %errorlevel% neq 0 exit /b %errorlevel% strip .\.cabal-sandbox\bin\pandoc.exe strip .\.cabal-sandbox\bin\pandoc-citeproc.exe -\.cabal-sandbox\bin\pandoc.exe -s --template data\templates\default.html -S README -o README.html +.\.cabal-sandbox\bin\pandoc.exe -s --template data\templates\default.html -S README -o README.html if %errorlevel% neq 0 exit /b %errorlevel% .\cabal-sandbox\bin\pandoc.exe -s --template data\templates\default.rtf COPYING -t rtf -S -o COPYING.rtf if %errorlevel% neq 0 exit /b %errorlevel% -- cgit v1.2.3 From ef8de35505833938c70253fb11659400fc3e18fb Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 6 May 2014 08:56:52 -0700 Subject: Windows installer: more typos from transition from cabal-dev. --- windows/make-windows-installer.bat | 4 ++-- windows/pandoc.wxs | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/windows/make-windows-installer.bat b/windows/make-windows-installer.bat index 3f3696f12..4b17b449f 100644 --- a/windows/make-windows-installer.bat +++ b/windows/make-windows-installer.bat @@ -13,10 +13,10 @@ strip .\.cabal-sandbox\bin\pandoc.exe strip .\.cabal-sandbox\bin\pandoc-citeproc.exe .\.cabal-sandbox\bin\pandoc.exe -s --template data\templates\default.html -S README -o README.html if %errorlevel% neq 0 exit /b %errorlevel% -.\cabal-sandbox\bin\pandoc.exe -s --template data\templates\default.rtf COPYING -t rtf -S -o COPYING.rtf +.\.cabal-sandbox\bin\pandoc.exe -s --template data\templates\default.rtf COPYING -t rtf -S -o COPYING.rtf if %errorlevel% neq 0 exit /b %errorlevel% copy COPYRIGHT COPYRIGHT.txt -for /f "tokens=1-2 delims= " %%a in ('.\cabal-sandbox\bin\pandoc --version') do ( +for /f "tokens=1-2 delims= " %%a in ('.\.cabal-sandbox\bin\pandoc --version') do ( @set VERSION=%%b goto :next ) diff --git a/windows/pandoc.wxs b/windows/pandoc.wxs index 7a20effe5..75c316772 100644 --- a/windows/pandoc.wxs +++ b/windows/pandoc.wxs @@ -44,7 +44,7 @@ Name="Version" Type="string" Value="[ProductVersion]" KeyPath="yes"/> + Source="..\.cabal-sandbox\bin\pandoc.exe" /> + Source="..\.cabal-sandbox\bin\pandoc-citeproc.exe" /> Date: Tue, 6 May 2014 09:06:10 -0700 Subject: Update latex/beamer templates: load upquote after fontenc. --- data/templates | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data/templates b/data/templates index c3a7937a2..5de411030 160000 --- a/data/templates +++ b/data/templates @@ -1 +1 @@ -Subproject commit c3a7937a2852e654da23df978b7abf79955008b7 +Subproject commit 5de411030bfcf0b369d10ff084caaad8b37ccfd6 -- cgit v1.2.3 From 1744798d54919b5cd2fd5efbdce821b9b961185e Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 6 May 2014 09:07:32 -0700 Subject: Updated changelog. --- changelog | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/changelog b/changelog index 6874f883e..1f9e11963 100644 --- a/changelog +++ b/changelog @@ -241,7 +241,8 @@ pandoc (1.12.4) (aaronwolen). + LaTeX and beamer template: Adjust widths correctly for oversized images. Use `\setkeys{Gin}{}` to set appropriate defaults for - `\includegraphics` (Yihui Xie, Garrick Aden-Buie). + `\includegraphics` (Yihui Xie, Garrick Aden-Buie). Load + `upquote` only after `fontenc` (Yihui Xie). + Beamer template: Added caption package (#1200). + Beamer template: changes for better unicode handling (KarolS). + DocBook template: use `authorgroup` if there are authors. @@ -289,6 +290,8 @@ pandoc (1.12.4) to ensure that they'll be available on all systems on which pandoc is installed. + * Use cabal sandboxes in Windows build script. + pandoc (1.12.3.3) * To changes to source; recompiled tarball with latest alex and -- cgit v1.2.3 From 250537532aab29615c2f2061e465bfa5f0374781 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 6 May 2014 09:49:13 -0700 Subject: Updated templates: conditionalize author/date in latex template. --- data/templates | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data/templates b/data/templates index 5de411030..fc5229df1 160000 --- a/data/templates +++ b/data/templates @@ -1 +1 @@ -Subproject commit 5de411030bfcf0b369d10ff084caaad8b37ccfd6 +Subproject commit fc5229df167aa61cdbcf14ce24581b36bb0a5b90 -- cgit v1.2.3 From ddb47ef6e82b99c6c5a4c71dac6e92ff48e1eb9e Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 6 May 2014 09:55:43 -0700 Subject: Added fontfamily variable to latex template. (tlvince) --- README | 2 ++ data/templates | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/README b/README index f2189ac96..c5e5aec25 100644 --- a/README +++ b/README @@ -790,6 +790,8 @@ as `title`, `author`, and `date`) as well as the following: `geometry` : options for LaTeX `geometry` class, e.g. `margin=1in`; may be repeated for multiple options +`fontfamily` +: font package to use for LaTeX documents (with pdflatex) `mainfont`, `sansfont`, `monofont`, `mathfont` : fonts for LaTeX documents (works only with xelatex and lualatex) diff --git a/data/templates b/data/templates index fc5229df1..04e59d4df 160000 --- a/data/templates +++ b/data/templates @@ -1 +1 @@ -Subproject commit fc5229df167aa61cdbcf14ce24581b36bb0a5b90 +Subproject commit 04e59d4df89bedfecea159efdf34cc2e6d56a409 -- cgit v1.2.3 From d508c554f760e1840fd77ceb8e6ed2b504b3fde4 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 6 May 2014 10:48:31 -0700 Subject: LaTeX template: Added linestretch variable. Documented this and fontfamily. --- README | 9 ++++++++- data/templates | 2 +- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/README b/README index c5e5aec25..7ea1146f4 100644 --- a/README +++ b/README @@ -790,8 +790,15 @@ as `title`, `author`, and `date`) as well as the following: `geometry` : options for LaTeX `geometry` class, e.g. `margin=1in`; may be repeated for multiple options +`linestretch` +: adjusts line spacing (requires the `setspace` package) `fontfamily` -: font package to use for LaTeX documents (with pdflatex) +: font package to use for LaTeX documents (with pdflatex): + TeXLive has `bookman` (Bookman), `utopia` or `fourier` (Utopia), + `fouriernc` (New Century Schoolbook), `times` or `txfonts` (Times), + `mathpazo` or `pxfonts` or `mathpple` (Palatino), + `libertine` (Linux Libertine), `arev` (Arev Sans), + and the default `lmodern`, among others. `mainfont`, `sansfont`, `monofont`, `mathfont` : fonts for LaTeX documents (works only with xelatex and lualatex) diff --git a/data/templates b/data/templates index 04e59d4df..620e8fe5e 160000 --- a/data/templates +++ b/data/templates @@ -1 +1 @@ -Subproject commit 04e59d4df89bedfecea159efdf34cc2e6d56a409 +Subproject commit 620e8fe5e1ed1a1a4a2243587f3063ccfe745673 -- cgit v1.2.3 From 152d5af751daa822b099d39c5168f96de9b255df Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 6 May 2014 19:45:00 -0700 Subject: Updated changelog. --- changelog | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/changelog b/changelog index 1f9e11963..5238e943f 100644 --- a/changelog +++ b/changelog @@ -236,7 +236,9 @@ pandoc (1.12.4) * Template changes: - + LaTeX template: use `fontenc` package only with `pdflatex` (#1164). + + LaTeX template: Use `fontenc` package only with `pdflatex` (#1164). + + LaTeX template: Add `linestretch` and `fontfamily` variables. + + LaTeX template: Conditionalize author and date commands. + Beamer template: Consistent styles for figure and table captions (aaronwolen). + LaTeX and beamer template: Adjust widths correctly for oversized -- cgit v1.2.3 From e7b42947bfa3d59ac59bf2b8d1e17415c24f518f Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 6 May 2014 20:20:28 -0700 Subject: Updated tests for template changes. --- tests/lhs-test.latex | 6 ++---- tests/lhs-test.latex+lhs | 6 ++---- tests/writer.latex | 4 ++-- 3 files changed, 6 insertions(+), 10 deletions(-) diff --git a/tests/lhs-test.latex b/tests/lhs-test.latex index 78f072600..0ed6640bd 100644 --- a/tests/lhs-test.latex +++ b/tests/lhs-test.latex @@ -3,8 +3,6 @@ \usepackage{amssymb,amsmath} \usepackage{ifxetex,ifluatex} \usepackage{fixltx2e} % provides \textsubscript -% use upquote if available, for straight quotes in verbatim environments -\IfFileExists{upquote.sty}{\usepackage{upquote}}{} \ifnum 0\ifxetex 1\fi\ifluatex 1\fi=0 % if pdftex \usepackage[T1]{fontenc} \usepackage[utf8]{inputenc} @@ -18,6 +16,8 @@ \defaultfontfeatures{Mapping=tex-text,Scale=MatchLowercase} \newcommand{\euro}{€} \fi +% use upquote if available, for straight quotes in verbatim environments +\IfFileExists{upquote.sty}{\usepackage{upquote}}{} % use microtype if available \IfFileExists{microtype.sty}{\usepackage{microtype}}{} \usepackage{color} @@ -63,8 +63,6 @@ \setlength{\emergencystretch}{3em} % prevent overfull lines \setcounter{secnumdepth}{0} -\author{} -\date{} \begin{document} diff --git a/tests/lhs-test.latex+lhs b/tests/lhs-test.latex+lhs index 50a0e15e1..67841d54b 100644 --- a/tests/lhs-test.latex+lhs +++ b/tests/lhs-test.latex+lhs @@ -3,8 +3,6 @@ \usepackage{amssymb,amsmath} \usepackage{ifxetex,ifluatex} \usepackage{fixltx2e} % provides \textsubscript -% use upquote if available, for straight quotes in verbatim environments -\IfFileExists{upquote.sty}{\usepackage{upquote}}{} \ifnum 0\ifxetex 1\fi\ifluatex 1\fi=0 % if pdftex \usepackage[T1]{fontenc} \usepackage[utf8]{inputenc} @@ -18,6 +16,8 @@ \defaultfontfeatures{Mapping=tex-text,Scale=MatchLowercase} \newcommand{\euro}{€} \fi +% use upquote if available, for straight quotes in verbatim environments +\IfFileExists{upquote.sty}{\usepackage{upquote}}{} % use microtype if available \IfFileExists{microtype.sty}{\usepackage{microtype}}{} \usepackage{listings} @@ -44,8 +44,6 @@ \setlength{\emergencystretch}{3em} % prevent overfull lines \setcounter{secnumdepth}{0} -\author{} -\date{} \begin{document} diff --git a/tests/writer.latex b/tests/writer.latex index a2f973854..1ac79feca 100644 --- a/tests/writer.latex +++ b/tests/writer.latex @@ -3,8 +3,6 @@ \usepackage{amssymb,amsmath} \usepackage{ifxetex,ifluatex} \usepackage{fixltx2e} % provides \textsubscript -% use upquote if available, for straight quotes in verbatim environments -\IfFileExists{upquote.sty}{\usepackage{upquote}}{} \ifnum 0\ifxetex 1\fi\ifluatex 1\fi=0 % if pdftex \usepackage[T1]{fontenc} \usepackage[utf8]{inputenc} @@ -18,6 +16,8 @@ \defaultfontfeatures{Mapping=tex-text,Scale=MatchLowercase} \newcommand{\euro}{€} \fi +% use upquote if available, for straight quotes in verbatim environments +\IfFileExists{upquote.sty}{\usepackage{upquote}}{} % use microtype if available \IfFileExists{microtype.sty}{\usepackage{microtype}}{} \usepackage{fancyvrb} -- cgit v1.2.3 From 10644607e35369ec3b19b5d02fbe9b936d0ecb85 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 6 May 2014 22:13:59 -0700 Subject: Textile reader: Rewrote some inline parsing code for clarity. (It seems clearer to put the whitespace parsing in the grouped parser. This also uses stateLastStrPos to determine when the border is adjacent to an alphanumeric.) --- src/Text/Pandoc/Readers/Textile.hs | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index ae9c0cc8e..3c07a4d85 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -596,32 +596,28 @@ ungroupedSimpleInline :: Parser [Char] ParserState t -- ^ surrounding ungroupedSimpleInline border construct = try $ do st <- getState pos <- getPosition - isWhitespace <- option False (whitespace >> return True) - guard $ (stateQuoteContext st /= NoQuote) - || (sourceColumn pos == 1) - || isWhitespace + let afterString = stateLastStrPos st == Just pos + guard $ not afterString border *> notFollowedBy (oneOf " \t\n\r") attr <- attributes body <- trimInlines . mconcat <$> withQuoteContext InSingleQuote (manyTill inline (try border <* notFollowedBy alphaNum)) - let result = construct $ + return $ construct $ if attr == nullAttr then body else B.spanWith attr body - return $ if isWhitespace - then B.space <> result - else result groupedSimpleInline :: Parser [Char] ParserState t -> (Inlines -> Inlines) -> Parser [Char] ParserState Inlines groupedSimpleInline border construct = try $ do char '[' - withQuoteContext InSingleQuote (simpleInline border construct) >>~ char ']' - - - + sp1 <- option mempty $ B.space <$ whitespace + result <- withQuoteContext InSingleQuote (simpleInline border construct) + sp2 <- option mempty $ B.space <$ whitespace + char ']' + return $ sp1 <> result <> sp2 -- | Create a singleton list singleton :: a -> [a] -- cgit v1.2.3 From d6a9ba1cdc4cb10d34c61593d04868da3abb5e40 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 6 May 2014 22:28:11 -0700 Subject: Make `--trace` work with textile reader. --- src/Text/Pandoc/Readers/Textile.hs | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index 3c07a4d85..b67e8fbc8 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -61,9 +61,11 @@ import Text.HTML.TagSoup (parseTags, innerText, fromAttrib, Tag(..)) import Text.HTML.TagSoup.Match import Data.List ( intercalate ) import Data.Char ( digitToInt, isUpper) -import Control.Monad ( guard, liftM ) +import Control.Monad ( guard, liftM, when ) +import Text.Printf import Control.Applicative ((<$>), (*>), (<*), (<$)) import Data.Monoid +import Debug.Trace (trace) -- | Parse a Textile text and return a Pandoc document. readTextile :: ReaderOptions -- ^ Reader options @@ -135,9 +137,17 @@ blockParsers = [ codeBlock endBlock :: Parser [Char] ParserState Blocks endBlock = string "\n\n" >> return mempty + -- | Any block in the order of definition of blockParsers block :: Parser [Char] ParserState Blocks -block = choice blockParsers "block" +block = do + res <- choice blockParsers "block" + pos <- getPosition + tr <- getOption readerTrace + when tr $ + trace (printf "line %d: %s" (sourceLine pos) + (take 60 $ show $ B.toList res)) (return ()) + return res commentBlock :: Parser [Char] ParserState Blocks commentBlock = try $ do -- cgit v1.2.3 From ea4e947bd0308861dbbbe020d21afe7943db1b98 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 6 May 2014 23:16:47 -0700 Subject: Textile reader: Disallow blank lines in inline contexts. @hi there@ should not be a single code span. --- src/Text/Pandoc/Readers/Textile.hs | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index b67e8fbc8..622a41168 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -481,7 +481,7 @@ str = do -- followed by parens, parens content is unconditionally word acronym fullStr <- option baseStr $ try $ do guard $ all isUpper baseStr - acro <- enclosed (char '(') (char ')') anyChar + acro <- enclosed (char '(') (char ')') anyChar' return $ concat [baseStr, " (", acro, ")"] updateLastStrPos return $ B.str fullStr @@ -528,8 +528,8 @@ link = try $ do image :: Parser [Char] ParserState Inlines image = try $ do char '!' >> notFollowedBy space - src <- manyTill anyChar (lookAhead $ oneOf "!(") - alt <- option "" (try $ (char '(' >> manyTill anyChar (char ')'))) + src <- manyTill anyChar' (lookAhead $ oneOf "!(") + alt <- option "" (try $ (char '(' >> manyTill anyChar' (char ')'))) char '!' return $ B.image src alt (B.str alt) @@ -537,12 +537,14 @@ escapedInline :: Parser [Char] ParserState Inlines escapedInline = escapedEqs <|> escapedTag escapedEqs :: Parser [Char] ParserState Inlines -escapedEqs = B.str <$> (try $ string "==" *> manyTill anyChar (try $ string "==")) +escapedEqs = B.str <$> + (try $ string "==" *> manyTill anyChar' (try $ string "==")) -- | literal text escaped btw tags escapedTag :: Parser [Char] ParserState Inlines escapedTag = B.str <$> - (try $ string "" *> manyTill anyChar (try $ string "")) + (try $ string "" *> + manyTill anyChar' (try $ string "")) -- | Any special symbol defined in wordBoundaries symbol :: Parser [Char] ParserState Inlines @@ -552,13 +554,18 @@ symbol = B.str . singleton <$> (oneOf wordBoundaries <|> oneOf markupChars) code :: Parser [Char] ParserState Inlines code = code1 <|> code2 +-- any character except a newline before a blank line +anyChar' :: Parser [Char] ParserState Char +anyChar' = + satisfy (/='\n') <|> (try $ char '\n' <* notFollowedBy blankline) + code1 :: Parser [Char] ParserState Inlines -code1 = B.code <$> surrounded (char '@') anyChar +code1 = B.code <$> surrounded (char '@') anyChar' code2 :: Parser [Char] ParserState Inlines code2 = do htmlTag (tagOpen (=="tt") null) - B.code <$> manyTill anyChar (try $ htmlTag $ tagClose (=="tt")) + B.code <$> manyTill anyChar' (try $ htmlTag $ tagClose (=="tt")) -- | Html / CSS attributes attributes :: Parser [Char] ParserState Attr @@ -570,7 +577,7 @@ attribute = classIdAttr <|> styleAttr <|> langAttr classIdAttr :: Parser [Char] ParserState (Attr -> Attr) classIdAttr = try $ do -- (class class #id) char '(' - ws <- words `fmap` manyTill anyChar (char ')') + ws <- words `fmap` manyTill anyChar' (char ')') case reverse ws of [] -> return $ \(_,_,keyvals) -> ("",[],keyvals) (('#':ident'):classes') -> return $ \(_,_,keyvals) -> @@ -580,7 +587,7 @@ classIdAttr = try $ do -- (class class #id) styleAttr :: Parser [Char] ParserState (Attr -> Attr) styleAttr = do - style <- try $ enclosed (char '{') (char '}') anyChar + style <- try $ enclosed (char '{') (char '}') anyChar' return $ \(id',classes,keyvals) -> (id',classes,("style",style):keyvals) langAttr :: Parser [Char] ParserState (Attr -> Attr) @@ -592,13 +599,15 @@ langAttr = do surrounded :: Parser [Char] st t -- ^ surrounding parser -> Parser [Char] st a -- ^ content parser (to be used repeatedly) -> Parser [Char] st [a] -surrounded border = enclosed (border *> notFollowedBy (oneOf " \t\n\r")) (try border) +surrounded border = + enclosed (border *> notFollowedBy (oneOf " \t\n\r")) (try border) simpleInline :: Parser [Char] ParserState t -- ^ surrounding parser -> (Inlines -> Inlines) -- ^ Inline constructor -> Parser [Char] ParserState Inlines -- ^ content parser (to be used repeatedly) -simpleInline border construct = groupedSimpleInline border construct <|> ungroupedSimpleInline border construct +simpleInline border construct = groupedSimpleInline border construct + <|> ungroupedSimpleInline border construct ungroupedSimpleInline :: Parser [Char] ParserState t -- ^ surrounding parser -> (Inlines -> Inlines) -- ^ Inline constructor -- cgit v1.2.3 From 442eecc15c2b805872600e111a510e022d1920f7 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 6 May 2014 23:27:16 -0700 Subject: Textile reader: Rewrote simpleInline for clarity and efficiency. This way we only look once for the opening `[`. --- src/Text/Pandoc/Readers/Textile.hs | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index 622a41168..f83298d4c 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -381,6 +381,7 @@ inline = do -- | Inline parsers tried in order inlineParsers :: [Parser [Char] ParserState Inlines] inlineParsers = [ inlineMarkup + , groupedInlineMarkup , str , whitespace , endline @@ -602,17 +603,10 @@ surrounded :: Parser [Char] st t -- ^ surrounding parser surrounded border = enclosed (border *> notFollowedBy (oneOf " \t\n\r")) (try border) - simpleInline :: Parser [Char] ParserState t -- ^ surrounding parser - -> (Inlines -> Inlines) -- ^ Inline constructor - -> Parser [Char] ParserState Inlines -- ^ content parser (to be used repeatedly) -simpleInline border construct = groupedSimpleInline border construct - <|> ungroupedSimpleInline border construct - -ungroupedSimpleInline :: Parser [Char] ParserState t -- ^ surrounding parser - -> (Inlines -> Inlines) -- ^ Inline constructor - -> Parser [Char] ParserState Inlines -- ^ content parser (to be used repeatedly) -ungroupedSimpleInline border construct = try $ do + -> (Inlines -> Inlines) -- ^ Inline constructor + -> Parser [Char] ParserState Inlines -- ^ content parser (to be used repeatedly) +simpleInline border construct = try $ do st <- getState pos <- getPosition let afterString = stateLastStrPos st == Just pos @@ -627,13 +621,11 @@ ungroupedSimpleInline border construct = try $ do then body else B.spanWith attr body -groupedSimpleInline :: Parser [Char] ParserState t - -> (Inlines -> Inlines) - -> Parser [Char] ParserState Inlines -groupedSimpleInline border construct = try $ do +groupedInlineMarkup :: Parser [Char] ParserState Inlines +groupedInlineMarkup = try $ do char '[' sp1 <- option mempty $ B.space <$ whitespace - result <- withQuoteContext InSingleQuote (simpleInline border construct) + result <- withQuoteContext InSingleQuote inlineMarkup sp2 <- option mempty $ B.space <$ whitespace char ']' return $ sp1 <> result <> sp2 -- cgit v1.2.3 From 84f2336a7db04dbc3c36594d1b4aca2e18186617 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 6 May 2014 23:41:56 -0700 Subject: Textile reader: Rearranged inline parsers for performance. This is possible because of the rewrite of simpleInline. Also removed a redundant parser for grouped inlines. --- src/Text/Pandoc/Readers/Textile.hs | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index f83298d4c..2f1fd30b4 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -380,17 +380,16 @@ inline = do -- | Inline parsers tried in order inlineParsers :: [Parser [Char] ParserState Inlines] -inlineParsers = [ inlineMarkup - , groupedInlineMarkup - , str +inlineParsers = [ str , whitespace , endline , code , escapedInline + , inlineMarkup + , groupedInlineMarkup , rawHtmlInline , rawLaTeXInline' , note - , try $ (char '[' *> inlineMarkup <* char ']') , link , image , mark -- cgit v1.2.3 From 0048b8c68c338048d2c78fc36cceba5fbe6f7110 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 7 May 2014 09:32:02 -0700 Subject: README: Updated Authors. --- README | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README b/README index 7ea1146f4..59243c2df 100644 --- a/README +++ b/README @@ -2966,7 +2966,8 @@ Nathan Gass, Jonathan Daugherty, Jérémy Bobbio, Justin Bogner, qerub, Christopher Sawicki, Kelsey Hightower, Masayoshi Takahashi, Antoine Latter, Ralf Stephan, Eric Seidel, B. Scott Michel, Gavin Beatty, Sergey Astanin, Arlo O'Keeffe, Denis Laxalde, Brent Yorgey, David Lazar, -Jamie F. Olson. +Jamie F. Olson, Matthew Pickering, Albert Krewinkel, mb21, Jesse +Rosenthal. [markdown]: http://daringfireball.net/projects/markdown/ [reStructuredText]: http://docutils.sourceforge.net/docs/ref/rst/introduction.html -- cgit v1.2.3 From 0050b509052ff81ba021b98fdbc573d3475ed74c Mon Sep 17 00:00:00 2001 From: mpickering Date: Wed, 7 May 2014 13:03:45 +0100 Subject: Fix textile reader hanging. Textile reader hung on pandoc -f textile http://johnmacfarlane.net/pandoc/demo/example25.textile The reader no longer hangs. --- src/Text/Pandoc/Readers/Textile.hs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index 2f1fd30b4..f7c87ab5a 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -513,7 +513,8 @@ link = try $ do char '"' *> notFollowedBy (oneOf " \t\n\r") attr <- attributes name <- trimInlines . mconcat <$> - withQuoteContext InSingleQuote (manyTill inline (try (string "\":"))) + withQuoteContext InDoubleQuote (many1Till inline (try (char '"'))) + char ':' let stop = if bracketed then char ']' else lookAhead $ space <|> -- cgit v1.2.3 From f0f88111e6597ade7e771457fc1b81bcc9a6d974 Mon Sep 17 00:00:00 2001 From: mpickering Date: Wed, 7 May 2014 13:03:45 +0100 Subject: Small improvement to textile reader fix. Removed 'try'. --- src/Text/Pandoc/Readers/Textile.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index f7c87ab5a..9ee34caa5 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -513,7 +513,7 @@ link = try $ do char '"' *> notFollowedBy (oneOf " \t\n\r") attr <- attributes name <- trimInlines . mconcat <$> - withQuoteContext InDoubleQuote (many1Till inline (try (char '"'))) + withQuoteContext InDoubleQuote (many1Till inline (char '"')) char ':' let stop = if bracketed then char ']' -- cgit v1.2.3 From 985ba0b32bb787f52241a0e16d3e967c6215249f Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 7 May 2014 11:09:45 -0700 Subject: Require latest highlighting-kate. --- pandoc.cabal | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandoc.cabal b/pandoc.cabal index 3991d2d82..077a07110 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -226,7 +226,7 @@ Library tagsoup >= 0.13.1 && < 0.14, base64-bytestring >= 0.1 && < 1.1, zlib >= 0.5 && < 0.6, - highlighting-kate >= 0.5.6 && < 0.6, + highlighting-kate >= 0.5.7 && < 0.6, data-default >= 0.4 && < 0.6, temporary >= 1.1 && < 1.3, blaze-html >= 0.5 && < 0.8, @@ -327,7 +327,7 @@ Executable pandoc text >= 0.11 && < 1.2, bytestring >= 0.9 && < 0.11, extensible-exceptions >= 0.1 && < 0.2, - highlighting-kate >= 0.5.6 && < 0.6, + highlighting-kate >= 0.5.7 && < 0.6, aeson >= 0.7 && < 0.8, yaml >= 0.8.8.2 && < 0.9, containers >= 0.1 && < 0.6, @@ -370,7 +370,7 @@ Test-Suite test-pandoc directory >= 1 && < 1.3, filepath >= 1.1 && < 1.4, process >= 1 && < 1.3, - highlighting-kate >= 0.5.6 && < 0.6, + highlighting-kate >= 0.5.7 && < 0.6, Diff >= 0.2 && < 0.4, test-framework >= 0.3 && < 0.9, test-framework-hunit >= 0.2 && < 0.4, -- cgit v1.2.3 From b71810d7e6af369adef042b825bb4572b7c58ca5 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 7 May 2014 11:10:25 -0700 Subject: Require latest texmath. --- pandoc.cabal | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandoc.cabal b/pandoc.cabal index 077a07110..d3b0b71f8 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -217,7 +217,7 @@ Library old-locale >= 1 && < 1.1, time >= 1.2 && < 1.5, HTTP >= 4000.0.5 && < 4000.3, - texmath >= 0.6.6 && < 0.7, + texmath >= 0.6.6.1 && < 0.7, xml >= 1.3.12 && < 1.4, random >= 1 && < 1.1, extensible-exceptions >= 0.1 && < 0.2, -- cgit v1.2.3 From 249c3e8c5a1864b2c3e4776d499f2ab55fc84607 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 7 May 2014 11:21:04 -0700 Subject: Bumped version bound for mtl. --- pandoc.cabal | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandoc.cabal b/pandoc.cabal index d3b0b71f8..ea0aa71ba 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -206,7 +206,7 @@ Library unordered-containers >= 0.2 && < 0.3, array >= 0.3 && < 0.6, parsec >= 3.1 && < 3.2, - mtl >= 1.1 && < 2.2, + mtl >= 1.1 && < 2.3, network >= 2 && < 2.6, filepath >= 1.1 && < 1.4, process >= 1 && < 1.3, -- cgit v1.2.3 From 8afbd7e66499c4c9a24b225d3af01d353e0876ca Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 7 May 2014 20:12:46 -0700 Subject: OSX package script: cabal init before update. --- make_osx_package.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/make_osx_package.sh b/make_osx_package.sh index 1a12601dc..3119f140e 100755 --- a/make_osx_package.sh +++ b/make_osx_package.sh @@ -20,11 +20,11 @@ echo Removing old files... rm -rf $DIST mkdir -p $RESOURCES +cabal sandbox init echo Updating database cabal update echo Building pandoc... -cabal sandbox init cabal clean # Use cpphs to avoid problems with clang cpp on ghc 7.8 osx: cabal install cpphs alex happy hsb2hs -- cgit v1.2.3 From 8fdbef841d0ef77dcc2e30cfa475e92a0f3de6cf Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Thu, 8 May 2014 21:50:20 +0200 Subject: Update copyright notices for 2014, add missing notices --- COPYRIGHT | 10 +++++----- Setup.hs | 17 +++++++++++++++++ benchmark/benchmark-pandoc.hs | 18 +++++++++++++++++- pandoc.cabal | 2 +- pandoc.hs | 6 +++--- src/Text/Pandoc.hs | 4 ++-- src/Text/Pandoc/Asciify.hs | 4 ++-- src/Text/Pandoc/Highlighting.hs | 4 ++-- src/Text/Pandoc/ImageSize.hs | 4 ++-- src/Text/Pandoc/MIME.hs | 4 ++-- src/Text/Pandoc/Options.hs | 4 ++-- src/Text/Pandoc/PDF.hs | 4 ++-- src/Text/Pandoc/Parsing.hs | 4 ++-- src/Text/Pandoc/Pretty.hs | 4 ++-- src/Text/Pandoc/Process.hs | 4 ++-- src/Text/Pandoc/Readers/HTML.hs | 4 ++-- src/Text/Pandoc/Readers/LaTeX.hs | 4 ++-- src/Text/Pandoc/Readers/Markdown.hs | 4 ++-- src/Text/Pandoc/Readers/MediaWiki.hs | 4 ++-- src/Text/Pandoc/Readers/Native.hs | 4 ++-- src/Text/Pandoc/Readers/RST.hs | 4 ++-- src/Text/Pandoc/Readers/TeXMath.hs | 4 ++-- src/Text/Pandoc/Readers/Textile.hs | 5 +++-- src/Text/Pandoc/SelfContained.hs | 4 ++-- src/Text/Pandoc/Shared.hs | 4 ++-- src/Text/Pandoc/Slides.hs | 4 ++-- src/Text/Pandoc/Templates.hs | 4 ++-- src/Text/Pandoc/UTF8.hs | 4 ++-- src/Text/Pandoc/UUID.hs | 4 ++-- src/Text/Pandoc/Writers/AsciiDoc.hs | 4 ++-- src/Text/Pandoc/Writers/ConTeXt.hs | 4 ++-- src/Text/Pandoc/Writers/Custom.hs | 4 ++-- src/Text/Pandoc/Writers/Docbook.hs | 4 ++-- src/Text/Pandoc/Writers/Docx.hs | 4 ++-- src/Text/Pandoc/Writers/EPUB.hs | 4 ++-- src/Text/Pandoc/Writers/HTML.hs | 4 ++-- src/Text/Pandoc/Writers/LaTeX.hs | 4 ++-- src/Text/Pandoc/Writers/Man.hs | 4 ++-- src/Text/Pandoc/Writers/Markdown.hs | 4 ++-- src/Text/Pandoc/Writers/MediaWiki.hs | 4 ++-- src/Text/Pandoc/Writers/Native.hs | 4 ++-- src/Text/Pandoc/Writers/ODT.hs | 4 ++-- src/Text/Pandoc/Writers/OPML.hs | 4 ++-- src/Text/Pandoc/Writers/OpenDocument.hs | 6 +++--- src/Text/Pandoc/Writers/Org.hs | 5 +++-- src/Text/Pandoc/Writers/RST.hs | 4 ++-- src/Text/Pandoc/Writers/RTF.hs | 4 ++-- src/Text/Pandoc/Writers/Shared.hs | 4 ++-- src/Text/Pandoc/Writers/Texinfo.hs | 4 ++-- src/Text/Pandoc/Writers/Textile.hs | 4 ++-- src/Text/Pandoc/XML.hs | 4 ++-- 51 files changed, 138 insertions(+), 103 deletions(-) diff --git a/COPYRIGHT b/COPYRIGHT index cd5adb1be..065090018 100644 --- a/COPYRIGHT +++ b/COPYRIGHT @@ -1,5 +1,5 @@ Pandoc -Copyright (C) 2006-2013 John MacFarlane +Copyright (C) 2006-2014 John MacFarlane This code is released under the [GPL], version 2 or later: @@ -33,25 +33,25 @@ licenses. ---------------------------------------------------------------------- src/Text/Pandoc/Writers/Texinfo.hs -Copyright (C) 2008-2010 John MacFarlane and Peter Wang +Copyright (C) 2008-2014 John MacFarlane and Peter Wang Released under the GNU General Public License version 2 or later. ---------------------------------------------------------------------- src/Text/Pandoc/Writers/OpenDocument.hs -Copyright (C) 2008-2010 Andrea Rossato and John MacFarlane +Copyright (C) 2008-2014 Andrea Rossato and John MacFarlane Released under the GNU General Public License version 2 or later. ---------------------------------------------------------------------- src/Text/Pandoc/Writers/Org.hs -Copyright (C) 2010 Puneeth Chaganti +Copyright (C) 2010-2014 Puneeth Chaganti and JohnMacFarlane Released under the GNU General Public License version 2 or later. ---------------------------------------------------------------------- src/Text/Pandoc/Readers/Textile.hs -Copyright (C) 2010 Paul Rivier +Copyright (C) 2010-2014 Paul Rivier and John MacFarlane Released under the GNU General Public License version 2 or later. diff --git a/Setup.hs b/Setup.hs index 89d03ee7a..f5d18eee4 100644 --- a/Setup.hs +++ b/Setup.hs @@ -1,4 +1,21 @@ {-# LANGUAGE CPP #-} +{- +Copyright (C) 2006-2014 John MacFarlane + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} import Distribution.Simple import Distribution.Simple.PreProcess diff --git a/benchmark/benchmark-pandoc.hs b/benchmark/benchmark-pandoc.hs index 2eaaf91a1..9238b09d7 100644 --- a/benchmark/benchmark-pandoc.hs +++ b/benchmark/benchmark-pandoc.hs @@ -1,3 +1,20 @@ +{- +Copyright (C) 2012-2014 John MacFarlane + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} import Text.Pandoc import Criterion.Main import Criterion.Config @@ -36,4 +53,3 @@ main = do let writers' = [(n,w) | (n, PureStringWriter w) <- writers] defaultMainWith conf (return ()) $ map (writerBench doc) writers' ++ readerBs - diff --git a/pandoc.cabal b/pandoc.cabal index ea0aa71ba..63c748a47 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -4,7 +4,7 @@ Cabal-Version: >= 1.10 Build-Type: Custom License: GPL License-File: COPYING -Copyright: (c) 2006-2013 John MacFarlane +Copyright: (c) 2006-2014 John MacFarlane Author: John MacFarlane Maintainer: John MacFarlane Bug-Reports: https://github.com/jgm/pandoc/issues diff --git a/pandoc.hs b/pandoc.hs index 959605625..5dd0e6899 100644 --- a/pandoc.hs +++ b/pandoc.hs @@ -1,6 +1,6 @@ {-# LANGUAGE CPP #-} {- -Copyright (C) 2006-2013 John MacFarlane +Copyright (C) 2006-2014 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Main - Copyright : Copyright (C) 2006-2013 John MacFarlane + Copyright : Copyright (C) 2006-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane @@ -69,7 +69,7 @@ import qualified Data.Yaml as Yaml import qualified Data.Text as T copyrightMessage :: String -copyrightMessage = "\nCopyright (C) 2006-2013 John MacFarlane\n" ++ +copyrightMessage = "\nCopyright (C) 2006-2014 John MacFarlane\n" ++ "Web: http://johnmacfarlane.net/pandoc\n" ++ "This is free software; see the source for copying conditions. There is no\n" ++ "warranty, not even for merchantability or fitness for a particular purpose." diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs index a37c98814..dd5bc18f6 100644 --- a/src/Text/Pandoc.hs +++ b/src/Text/Pandoc.hs @@ -1,6 +1,6 @@ {-# LANGUAGE ScopedTypeVariables, FlexibleInstances #-} {- -Copyright (C) 2006-2010 John MacFarlane +Copyright (C) 2006-2014 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc - Copyright : Copyright (C) 2006-2010 John MacFarlane + Copyright : Copyright (C) 2006-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane diff --git a/src/Text/Pandoc/Asciify.hs b/src/Text/Pandoc/Asciify.hs index 1c177da90..8a5ccec5c 100644 --- a/src/Text/Pandoc/Asciify.hs +++ b/src/Text/Pandoc/Asciify.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2013 John MacFarlane +Copyright (C) 2013-2014 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.SelfContained - Copyright : Copyright (C) 2013 John MacFarlane + Copyright : Copyright (C) 2013-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane diff --git a/src/Text/Pandoc/Highlighting.hs b/src/Text/Pandoc/Highlighting.hs index 11d608db6..2e7a9f648 100644 --- a/src/Text/Pandoc/Highlighting.hs +++ b/src/Text/Pandoc/Highlighting.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2008 John MacFarlane +Copyright (C) 2008-2014 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Highlighting - Copyright : Copyright (C) 2008 John MacFarlane + Copyright : Copyright (C) 2008-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane diff --git a/src/Text/Pandoc/ImageSize.hs b/src/Text/Pandoc/ImageSize.hs index 3c9623b3c..a6d076fa9 100644 --- a/src/Text/Pandoc/ImageSize.hs +++ b/src/Text/Pandoc/ImageSize.hs @@ -1,6 +1,6 @@ {-# LANGUAGE OverloadedStrings, ScopedTypeVariables #-} {- - Copyright (C) 2011 John MacFarlane + Copyright (C) 2011-2014 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ {- | Module : Text.Pandoc.ImageSize -Copyright : Copyright (C) 2011 John MacFarlane +Copyright : Copyright (C) 2011-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane diff --git a/src/Text/Pandoc/MIME.hs b/src/Text/Pandoc/MIME.hs index 977cb576b..6e6284b25 100644 --- a/src/Text/Pandoc/MIME.hs +++ b/src/Text/Pandoc/MIME.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2011 John MacFarlane +Copyright (C) 2011-2014 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.MIME - Copyright : Copyright (C) 2011 John MacFarlane + Copyright : Copyright (C) 2011-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs index 38220f542..611a6bb06 100644 --- a/src/Text/Pandoc/Options.hs +++ b/src/Text/Pandoc/Options.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2012 John MacFarlane +Copyright (C) 2012-2014 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Options - Copyright : Copyright (C) 2012 John MacFarlane + Copyright : Copyright (C) 2012-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs index abc5c41b7..e4e06e6c9 100644 --- a/src/Text/Pandoc/PDF.hs +++ b/src/Text/Pandoc/PDF.hs @@ -1,6 +1,6 @@ {-# LANGUAGE OverloadedStrings, CPP #-} {- -Copyright (C) 2012 John MacFarlane +Copyright (C) 2012-2014 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.PDF - Copyright : Copyright (C) 2012 John MacFarlane + Copyright : Copyright (C) 2012-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index 4d0a677da..d1e55cbc4 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -1,7 +1,7 @@ {-# LANGUAGE GeneralizedNewtypeDeriving, TypeSynonymInstances, FlexibleInstances#-} {- -Copyright (C) 2006-2010 John MacFarlane +Copyright (C) 2006-2014 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -20,7 +20,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Parsing - Copyright : Copyright (C) 2006-2010 John MacFarlane + Copyright : Copyright (C) 2006-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane diff --git a/src/Text/Pandoc/Pretty.hs b/src/Text/Pandoc/Pretty.hs index 5331587ce..d25ba725f 100644 --- a/src/Text/Pandoc/Pretty.hs +++ b/src/Text/Pandoc/Pretty.hs @@ -1,6 +1,6 @@ {-# LANGUAGE GeneralizedNewtypeDeriving, CPP #-} {- -Copyright (C) 2010 John MacFarlane +Copyright (C) 2010-2014 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111(-1)307 USA {- | Module : Text.Pandoc.Pretty - Copyright : Copyright (C) 2010 John MacFarlane + Copyright : Copyright (C) 2010-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane diff --git a/src/Text/Pandoc/Process.hs b/src/Text/Pandoc/Process.hs index 112c5b974..9c8853366 100644 --- a/src/Text/Pandoc/Process.hs +++ b/src/Text/Pandoc/Process.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2013 John MacFarlane +Copyright (C) 2013-2014 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Process - Copyright : Copyright (C) 2013 John MacFarlane + Copyright : Copyright (C) 2013-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index c94ee3d6b..905e55b22 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2006-2010 John MacFarlane +Copyright (C) 2006-2014 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.HTML - Copyright : Copyright (C) 2006-2010 John MacFarlane + Copyright : Copyright (C) 2006-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 79c66b510..bfafea1f6 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -1,6 +1,6 @@ {-# LANGUAGE ScopedTypeVariables, OverloadedStrings #-} {- -Copyright (C) 2006-2012 John MacFarlane +Copyright (C) 2006-2014 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.LaTeX - Copyright : Copyright (C) 2006-2012 John MacFarlane + Copyright : Copyright (C) 2006-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index aac87f363..d1637b701 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1,6 +1,6 @@ {-# LANGUAGE RelaxedPolyRec #-} -- needed for inlinesBetween on GHC < 7 {- -Copyright (C) 2006-2010 John MacFarlane +Copyright (C) 2006-2014 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.Markdown - Copyright : Copyright (C) 2006-2013 John MacFarlane + Copyright : Copyright (C) 2006-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs index feaedb7c2..e4fabc898 100644 --- a/src/Text/Pandoc/Readers/MediaWiki.hs +++ b/src/Text/Pandoc/Readers/MediaWiki.hs @@ -1,7 +1,7 @@ {-# LANGUAGE RelaxedPolyRec, FlexibleInstances, TypeSynonymInstances #-} -- RelaxedPolyRec needed for inlinesBetween on GHC < 7 {- - Copyright (C) 2012 John MacFarlane + Copyright (C) 2012-2014 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -20,7 +20,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.MediaWiki - Copyright : Copyright (C) 2012 John MacFarlane + Copyright : Copyright (C) 2012-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane diff --git a/src/Text/Pandoc/Readers/Native.hs b/src/Text/Pandoc/Readers/Native.hs index c5d4cb98a..f4dfa62c1 100644 --- a/src/Text/Pandoc/Readers/Native.hs +++ b/src/Text/Pandoc/Readers/Native.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2011 John MacFarlane +Copyright (C) 2011-2014 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.Native - Copyright : Copyright (C) 2011 John MacFarlane + Copyright : Copyright (C) 2011-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index 54b6fa34a..fa8438e70 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -1,6 +1,6 @@ {-# LANGUAGE OverloadedStrings #-} {- -Copyright (C) 2006-2010 John MacFarlane +Copyright (C) 2006-2014 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.RST - Copyright : Copyright (C) 2006-2010 John MacFarlane + Copyright : Copyright (C) 2006-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane diff --git a/src/Text/Pandoc/Readers/TeXMath.hs b/src/Text/Pandoc/Readers/TeXMath.hs index 6bd617f7e..f03eae044 100644 --- a/src/Text/Pandoc/Readers/TeXMath.hs +++ b/src/Text/Pandoc/Readers/TeXMath.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2007-2010 John MacFarlane +Copyright (C) 2007-2014 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.TeXMath - Copyright : Copyright (C) 2007-2010 John MacFarlane + Copyright : Copyright (C) 2007-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index 9ee34caa5..6d839ec1d 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -1,5 +1,6 @@ {- -Copyright (C) 2010 Paul Rivier | tr '*#' '.@' +Copyright (C) 2010-2014 Paul Rivier | tr '*#' '.@' + and John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.Textile - Copyright : Copyright (C) 2010-2012 Paul Rivier and John MacFarlane + Copyright : Copyright (C) 2010-2014 Paul Rivier and John MacFarlane License : GNU GPL, version 2 or above Maintainer : Paul Rivier diff --git a/src/Text/Pandoc/SelfContained.hs b/src/Text/Pandoc/SelfContained.hs index 7fc9c2966..2a2f56281 100644 --- a/src/Text/Pandoc/SelfContained.hs +++ b/src/Text/Pandoc/SelfContained.hs @@ -1,6 +1,6 @@ {-# LANGUAGE OverloadedStrings #-} {- -Copyright (C) 2011 John MacFarlane +Copyright (C) 2011-2014 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.SelfContained - Copyright : Copyright (C) 2011 John MacFarlane + Copyright : Copyright (C) 2011-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 6f0629ea2..31c490af6 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -1,7 +1,7 @@ {-# LANGUAGE DeriveDataTypeable, CPP, MultiParamTypeClasses, FlexibleContexts, ScopedTypeVariables #-} {- -Copyright (C) 2006-2013 John MacFarlane +Copyright (C) 2006-2014 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -20,7 +20,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Shared - Copyright : Copyright (C) 2006-2013 John MacFarlane + Copyright : Copyright (C) 2006-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane diff --git a/src/Text/Pandoc/Slides.hs b/src/Text/Pandoc/Slides.hs index 50c46d17f..2b863c780 100644 --- a/src/Text/Pandoc/Slides.hs +++ b/src/Text/Pandoc/Slides.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2012 John MacFarlane +Copyright (C) 2012-2014 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Slides - Copyright : Copyright (C) 2012 John MacFarlane + Copyright : Copyright (C) 2012-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane diff --git a/src/Text/Pandoc/Templates.hs b/src/Text/Pandoc/Templates.hs index 52625abf6..551db6483 100644 --- a/src/Text/Pandoc/Templates.hs +++ b/src/Text/Pandoc/Templates.hs @@ -1,7 +1,7 @@ {-# LANGUAGE TypeSynonymInstances, FlexibleInstances, CPP, OverloadedStrings, GeneralizedNewtypeDeriving #-} {- -Copyright (C) 2009-2013 John MacFarlane +Copyright (C) 2009-2014 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -20,7 +20,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Templates - Copyright : Copyright (C) 2009-2013 John MacFarlane + Copyright : Copyright (C) 2009-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane diff --git a/src/Text/Pandoc/UTF8.hs b/src/Text/Pandoc/UTF8.hs index 229442543..33c9ec1c5 100644 --- a/src/Text/Pandoc/UTF8.hs +++ b/src/Text/Pandoc/UTF8.hs @@ -1,6 +1,6 @@ {-# LANGUAGE CPP #-} {- -Copyright (C) 2010 John MacFarlane +Copyright (C) 2010-2014 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.UTF8 - Copyright : Copyright (C) 2010 John MacFarlane + Copyright : Copyright (C) 2010-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane diff --git a/src/Text/Pandoc/UUID.hs b/src/Text/Pandoc/UUID.hs index 082644eea..eebfe09d2 100644 --- a/src/Text/Pandoc/UUID.hs +++ b/src/Text/Pandoc/UUID.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2010 John MacFarlane +Copyright (C) 2010-2014 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.UUID - Copyright : Copyright (C) 2010 John MacFarlane + Copyright : Copyright (C) 2010-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane diff --git a/src/Text/Pandoc/Writers/AsciiDoc.hs b/src/Text/Pandoc/Writers/AsciiDoc.hs index 15579cba2..19112d8f5 100644 --- a/src/Text/Pandoc/Writers/AsciiDoc.hs +++ b/src/Text/Pandoc/Writers/AsciiDoc.hs @@ -1,6 +1,6 @@ {-# LANGUAGE OverloadedStrings #-} {- -Copyright (C) 2006-2010 John MacFarlane +Copyright (C) 2006-2014 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.AsciiDoc - Copyright : Copyright (C) 2006-2010 John MacFarlane + Copyright : Copyright (C) 2006-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs index cec420dcf..3b321cc19 100644 --- a/src/Text/Pandoc/Writers/ConTeXt.hs +++ b/src/Text/Pandoc/Writers/ConTeXt.hs @@ -1,6 +1,6 @@ {-# LANGUAGE OverloadedStrings #-} {- -Copyright (C) 2007-2010 John MacFarlane +Copyright (C) 2007-2014 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.ConTeXt - Copyright : Copyright (C) 2007-2010 John MacFarlane + Copyright : Copyright (C) 2007-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane diff --git a/src/Text/Pandoc/Writers/Custom.hs b/src/Text/Pandoc/Writers/Custom.hs index 0b30287f5..88f590c43 100644 --- a/src/Text/Pandoc/Writers/Custom.hs +++ b/src/Text/Pandoc/Writers/Custom.hs @@ -1,6 +1,6 @@ {-# LANGUAGE OverlappingInstances, FlexibleInstances, OverloadedStrings #-} {-# OPTIONS_GHC -fno-warn-orphans #-} -{- Copyright (C) 2012 John MacFarlane +{- Copyright (C) 2012-2014 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Custom - Copyright : Copyright (C) 2012 John MacFarlane + Copyright : Copyright (C) 2012-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs index 1a8e58354..ba6a92a08 100644 --- a/src/Text/Pandoc/Writers/Docbook.hs +++ b/src/Text/Pandoc/Writers/Docbook.hs @@ -1,6 +1,6 @@ {-# LANGUAGE OverloadedStrings #-} {- -Copyright (C) 2006-2010 John MacFarlane +Copyright (C) 2006-2014 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Docbook - Copyright : Copyright (C) 2006-2010 John MacFarlane + Copyright : Copyright (C) 2006-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index fcb73a427..551d97855 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -1,6 +1,6 @@ {-# LANGUAGE ScopedTypeVariables #-} {- -Copyright (C) 2012 John MacFarlane +Copyright (C) 2012-2014 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Docx - Copyright : Copyright (C) 2012 John MacFarlane + Copyright : Copyright (C) 2012-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index c39a7798d..893ec3be9 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -1,6 +1,6 @@ {-# LANGUAGE PatternGuards, CPP, ScopedTypeVariables #-} {- -Copyright (C) 2010 John MacFarlane +Copyright (C) 2010-2014 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.EPUB - Copyright : Copyright (C) 2010 John MacFarlane + Copyright : Copyright (C) 2010-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index 1de4345f9..9a26cf2ac 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -1,7 +1,7 @@ {-# LANGUAGE OverloadedStrings, CPP #-} {-# OPTIONS_GHC -fno-warn-deprecations #-} {- -Copyright (C) 2006-2010 John MacFarlane +Copyright (C) 2006-2014 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -20,7 +20,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.HTML - Copyright : Copyright (C) 2006-2010 John MacFarlane + Copyright : Copyright (C) 2006-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index c17e041b5..c221b318e 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -1,6 +1,6 @@ {-# LANGUAGE OverloadedStrings, ScopedTypeVariables #-} {- -Copyright (C) 2006-2010 John MacFarlane +Copyright (C) 2006-2014 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.LaTeX - Copyright : Copyright (C) 2006-2010 John MacFarlane + Copyright : Copyright (C) 2006-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane diff --git a/src/Text/Pandoc/Writers/Man.hs b/src/Text/Pandoc/Writers/Man.hs index 680bfef44..41eb3e5be 100644 --- a/src/Text/Pandoc/Writers/Man.hs +++ b/src/Text/Pandoc/Writers/Man.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2007-2010 John MacFarlane +Copyright (C) 2007-2014 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Man - Copyright : Copyright (C) 2007-2010 John MacFarlane + Copyright : Copyright (C) 2007-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index 95082add6..f42a1b54c 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -1,6 +1,6 @@ {-# LANGUAGE OverloadedStrings, TupleSections, ScopedTypeVariables #-} {- -Copyright (C) 2006-2013 John MacFarlane +Copyright (C) 2006-2014 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Markdown - Copyright : Copyright (C) 2006-2013 John MacFarlane + Copyright : Copyright (C) 2006-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane diff --git a/src/Text/Pandoc/Writers/MediaWiki.hs b/src/Text/Pandoc/Writers/MediaWiki.hs index 83fefaa29..3b987ba2b 100644 --- a/src/Text/Pandoc/Writers/MediaWiki.hs +++ b/src/Text/Pandoc/Writers/MediaWiki.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2008-2010 John MacFarlane +Copyright (C) 2008-2014 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.MediaWiki - Copyright : Copyright (C) 2008-2010 John MacFarlane + Copyright : Copyright (C) 2008-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane diff --git a/src/Text/Pandoc/Writers/Native.hs b/src/Text/Pandoc/Writers/Native.hs index 090b97433..cb821e40b 100644 --- a/src/Text/Pandoc/Writers/Native.hs +++ b/src/Text/Pandoc/Writers/Native.hs @@ -1,6 +1,6 @@ {-# LANGUAGE OverloadedStrings #-} {- -Copyright (C) 2006-2010 John MacFarlane +Copyright (C) 2006-2014 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Native - Copyright : Copyright (C) 2006-2010 John MacFarlane + Copyright : Copyright (C) 2006-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs index c3652d65d..15f7c8be8 100644 --- a/src/Text/Pandoc/Writers/ODT.hs +++ b/src/Text/Pandoc/Writers/ODT.hs @@ -1,6 +1,6 @@ {-# LANGUAGE ScopedTypeVariables #-} {- -Copyright (C) 2008-2010 John MacFarlane +Copyright (C) 2008-2014 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.ODT - Copyright : Copyright (C) 2008-2010 John MacFarlane + Copyright : Copyright (C) 2008-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane diff --git a/src/Text/Pandoc/Writers/OPML.hs b/src/Text/Pandoc/Writers/OPML.hs index f6926c1dc..dd359f3f5 100644 --- a/src/Text/Pandoc/Writers/OPML.hs +++ b/src/Text/Pandoc/Writers/OPML.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2013 John MacFarlane +Copyright (C) 2013-2014 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.OPML - Copyright : Copyright (C) 2013 John MacFarlane + Copyright : Copyright (C) 2013-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs index 0029c3296..b6da2694c 100644 --- a/src/Text/Pandoc/Writers/OpenDocument.hs +++ b/src/Text/Pandoc/Writers/OpenDocument.hs @@ -1,7 +1,7 @@ {-# LANGUAGE PatternGuards, OverloadedStrings #-} {- -Copyright (C) 2008-2010 Andrea Rossato -and John MacFarlane. +Copyright (C) 2008-2014 Andrea Rossato + and John MacFarlane. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -20,7 +20,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.OpenDocument - Copyright : Copyright (C) 2008-2010 Andrea Rossato and John MacFarlane + Copyright : Copyright (C) 2008-2014 Andrea Rossato and John MacFarlane License : GNU GPL, version 2 or above Maintainer : Andrea Rossato diff --git a/src/Text/Pandoc/Writers/Org.hs b/src/Text/Pandoc/Writers/Org.hs index 58a5729e7..87046537c 100644 --- a/src/Text/Pandoc/Writers/Org.hs +++ b/src/Text/Pandoc/Writers/Org.hs @@ -1,6 +1,7 @@ {-# LANGUAGE OverloadedStrings #-} {- -Copyright (C) 2006-2010 Puneeth Chaganti +Copyright (C) 2010-2014 Puneeth Chaganti + and John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +20,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Org - Copyright : Copyright (C) 2010 Puneeth Chaganti + Copyright : Copyright (C) 2010-2014 Puneeth Chaganti and John MacFarlane License : GNU GPL, version 2 or above Maintainer : Puneeth Chaganti diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs index 1e7596b21..31c97349b 100644 --- a/src/Text/Pandoc/Writers/RST.hs +++ b/src/Text/Pandoc/Writers/RST.hs @@ -1,6 +1,6 @@ {-# LANGUAGE OverloadedStrings #-} {- -Copyright (C) 2006-2010 John MacFarlane +Copyright (C) 2006-2014 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.RST - Copyright : Copyright (C) 2006-2010 John MacFarlane + Copyright : Copyright (C) 2006-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane diff --git a/src/Text/Pandoc/Writers/RTF.hs b/src/Text/Pandoc/Writers/RTF.hs index 3e0bd9976..e0428aaa8 100644 --- a/src/Text/Pandoc/Writers/RTF.hs +++ b/src/Text/Pandoc/Writers/RTF.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2006-2010 John MacFarlane +Copyright (C) 2006-2014 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.RTF - Copyright : Copyright (C) 2006-2010 John MacFarlane + Copyright : Copyright (C) 2006-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane diff --git a/src/Text/Pandoc/Writers/Shared.hs b/src/Text/Pandoc/Writers/Shared.hs index 604aac1c9..800e741a4 100644 --- a/src/Text/Pandoc/Writers/Shared.hs +++ b/src/Text/Pandoc/Writers/Shared.hs @@ -1,6 +1,6 @@ {-# LANGUAGE OverloadedStrings #-} {- -Copyright (C) 2013 John MacFarlane +Copyright (C) 2013-2014 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Shared - Copyright : Copyright (C) 2013 John MacFarlane + Copyright : Copyright (C) 2013-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane diff --git a/src/Text/Pandoc/Writers/Texinfo.hs b/src/Text/Pandoc/Writers/Texinfo.hs index bf3df8035..8ac717bab 100644 --- a/src/Text/Pandoc/Writers/Texinfo.hs +++ b/src/Text/Pandoc/Writers/Texinfo.hs @@ -1,6 +1,6 @@ {-# LANGUAGE OverloadedStrings #-} {- -Copyright (C) 2008-2010 John MacFarlane and Peter Wang +Copyright (C) 2008-2014 John MacFarlane and Peter Wang This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Texinfo - Copyright : Copyright (C) 2008-2010 John MacFarlane and Peter Wang + Copyright : Copyright (C) 2008-2014 John MacFarlane and Peter Wang License : GNU GPL, version 2 or above Maintainer : John MacFarlane diff --git a/src/Text/Pandoc/Writers/Textile.hs b/src/Text/Pandoc/Writers/Textile.hs index 95aedf780..3a6982a01 100644 --- a/src/Text/Pandoc/Writers/Textile.hs +++ b/src/Text/Pandoc/Writers/Textile.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2010 John MacFarlane +Copyright (C) 2010-2014 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Textile - Copyright : Copyright (C) 2010 John MacFarlane + Copyright : Copyright (C) 2010-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane diff --git a/src/Text/Pandoc/XML.hs b/src/Text/Pandoc/XML.hs index c11af9a19..8000368aa 100644 --- a/src/Text/Pandoc/XML.hs +++ b/src/Text/Pandoc/XML.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2006-2010 John MacFarlane +Copyright (C) 2006-2014 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.XML - Copyright : Copyright (C) 2006-2010 John MacFarlane + Copyright : Copyright (C) 2006-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane -- cgit v1.2.3 From 7760504bb26f215e7d0c57da843f1f1dcc8c1186 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Thu, 8 May 2014 17:01:58 +0200 Subject: Org reader: refactor #+BEGIN..#+END block parsing code --- src/Text/Pandoc/Readers/Org.hs | 122 +++++++++++++++++++++++++++-------------- 1 file changed, 80 insertions(+), 42 deletions(-) diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index dba61dfe0..9df8ce0b3 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -50,7 +50,7 @@ import Data.Char (isAlphaNum, toLower) import Data.Default import Data.List (intersperse, isPrefixOf, isSuffixOf) import qualified Data.Map as M -import Data.Maybe (listToMaybe, fromMaybe, isJust) +import Data.Maybe (fromMaybe, isJust) import Data.Monoid (Monoid, mconcat, mempty, mappend) import Network.HTTP (urlEncode) @@ -162,7 +162,8 @@ popInlineCharStack = updateState $ \s -> s{ orgStateEmphasisCharStack = drop 1 . orgStateEmphasisCharStack $ s } surroundingEmphasisChar :: OrgParser [Char] -surroundingEmphasisChar = take 1 . drop 1 . orgStateEmphasisCharStack <$> getState +surroundingEmphasisChar = + take 1 . drop 1 . orgStateEmphasisCharStack <$> getState startEmphasisNewlinesCounting :: Int -> OrgParser () startEmphasisNewlinesCounting maxNewlines = updateState $ \s -> @@ -170,7 +171,7 @@ startEmphasisNewlinesCounting maxNewlines = updateState $ \s -> decEmphasisNewlinesCount :: OrgParser () decEmphasisNewlinesCount = updateState $ \s -> - s{ orgStateEmphasisNewlines = (\n -> n - 1) <$> orgStateEmphasisNewlines s } + s{ orgStateEmphasisNewlines = (\n -> n - 1) <$> orgStateEmphasisNewlines s } newlinesCountWithinLimits :: OrgParser Bool newlinesCountWithinLimits = do @@ -296,41 +297,60 @@ lookupInlinesAttr attr = try $ do -- Org Blocks (#+BEGIN_... / #+END_...) -- +type BlockProperties = (Int, String) -- (Indentation, Block-Type) + orgBlock :: OrgParser (F Blocks) orgBlock = try $ do - (indent, blockType, args) <- blockHeader - content <- rawBlockContent indent blockType - contentBlocks <- parseFromString parseBlocks (content ++ "\n") - let classArgs = [ translateLang . fromMaybe [] $ listToMaybe args ] - case blockType of - "comment" -> return mempty - "html" -> returnF $ B.rawBlock "html" content - "latex" -> returnF $ B.rawBlock "latex" content - "ascii" -> returnF $ B.rawBlock "ascii" content - "example" -> returnF $ exampleCode content - "quote" -> return $ B.blockQuote <$> contentBlocks - "verse" -> parseVerse content - "src" -> codeBlockWithAttr classArgs content - _ -> return $ B.divWith ("", [blockType], []) <$> contentBlocks + blockProp@(_, blkType) <- blockHeaderStart + ($ blockProp) $ + case blkType of + "comment" -> withRaw' (const mempty) + "html" -> withRaw' (return . (B.rawBlock blkType)) + "latex" -> withRaw' (return . (B.rawBlock blkType)) + "ascii" -> withRaw' (return . (B.rawBlock blkType)) + "example" -> withRaw' (return . exampleCode) + "quote" -> withParsed (fmap B.blockQuote) + "verse" -> verseBlock + "src" -> codeBlock + _ -> withParsed (fmap $ divWithClass blkType) + +blockHeaderStart :: OrgParser (Int, String) +blockHeaderStart = try $ (,) <$> indent <*> blockType where - parseVerse :: String -> OrgParser (F Blocks) - parseVerse cs = - fmap B.para . mconcat . intersperse (pure B.linebreak) - <$> mapM (parseFromString parseInlines) (lines cs) - -blockHeader :: OrgParser (Int, String, [String]) -blockHeader = (,,) <$> blockIndent - <*> blockType - <*> (skipSpaces *> blockArgs) - where blockIndent = length <$> many spaceChar - blockType = map toLower <$> (stringAnyCase "#+begin_" *> many letter) - blockArgs = manyTill (many nonspaceChar <* skipSpaces) newline - -codeBlockWithAttr :: [String] -> String -> OrgParser (F Blocks) -codeBlockWithAttr classArgs content = do - identifier <- fromMaybe "" <$> lookupBlockAttribute "name" - caption <- lookupInlinesAttr "caption" - let codeBlck = B.codeBlockWith (identifier, classArgs, []) content + indent = length <$> many spaceChar + blockType = map toLower <$> (stringAnyCase "#+begin_" *> many orgArgWordChar) + +withRaw' :: (String -> F Blocks) -> BlockProperties -> OrgParser (F Blocks) +withRaw' f blockProp = (ignHeaders *> (f <$> rawBlockContent blockProp)) + +withParsed :: (F Blocks -> F Blocks) -> BlockProperties -> OrgParser (F Blocks) +withParsed f blockProp = (ignHeaders *> (f <$> parsedBlockContent blockProp)) + +ignHeaders :: OrgParser () +ignHeaders = (() <$ newline) <|> (() <$ anyLine) + +divWithClass :: String -> Blocks -> Blocks +divWithClass cls = B.divWith ("", [cls], []) + +verseBlock :: BlockProperties -> OrgParser (F Blocks) +verseBlock blkProp = try $ do + ignHeaders + content <- rawBlockContent blkProp + fmap B.para . mconcat . intersperse (pure B.linebreak) + <$> mapM (parseFromString parseInlines) (lines content) + +codeBlock :: BlockProperties -> OrgParser (F Blocks) +codeBlock blkProp = do + skipSpaces + language <- optionMaybe orgArgWord + (classes, kv) <- codeHeaderArgs + id' <- fromMaybe "" <$> lookupBlockAttribute "name" + caption <- lookupInlinesAttr "caption" + content <- rawBlockContent blkProp + let attr = ( id' + , maybe id (\l -> (l:)) language $ classes + , kv ) + let codeBlck = B.codeBlockWith attr content return $ maybe (pure codeBlck) (labelDiv codeBlck) caption where labelDiv blk value = @@ -338,14 +358,21 @@ codeBlockWithAttr classArgs content = do <*> pure blk) labelledBlock = fmap (B.plain . B.spanWith ("", ["label"], [])) -rawBlockContent :: Int -> String -> OrgParser String -rawBlockContent indent blockType = +rawBlockContent :: BlockProperties -> OrgParser String +rawBlockContent (indent, blockType) = try $ unlines . map commaEscaped <$> manyTill indentedLine blockEnder where - indentedLine = try $ choice [ blankline *> pure "\n" - , indentWith indent *> anyLine - ] - blockEnder = try $ indentWith indent *> stringAnyCase ("#+end_" <> blockType) + indentedLine = try $ + choice [ blankline *> pure "\n" + , indentWith indent *> anyLine + ] + blockEnder = try $ + indentWith indent *> stringAnyCase ("#+end_" <> blockType) + +parsedBlockContent :: BlockProperties -> OrgParser (F Blocks) +parsedBlockContent blkProps = try $ do + raw <- rawBlockContent blkProps + parseFromString parseBlocks (raw ++ "\n") -- indent by specified number of spaces (or equiv. tabs) indentWith :: Int -> OrgParser String @@ -356,6 +383,13 @@ indentWith num = do else choice [ try (count num (char ' ')) , try (char '\t' >> count (num - tabStop) (char ' ')) ] +orgArgWord :: OrgParser String +orgArgWord = many1 orgArgWordChar + +codeHeaderArgs :: OrgParser ([String], [(String, String)]) +codeHeaderArgs = + (\x -> (x, [])) <$> manyTill (many nonspaceChar <* skipSpaces) newline + translateLang :: String -> String translateLang "C" = "c" translateLang "C++" = "cpp" @@ -1002,9 +1036,13 @@ inlineCodeBlock = try $ do returnF $ B.codeWith ("", attrClasses, attrKeyVal) inlineCode where enclosedByPair s e p = char s *> many1Till p (char e) +-- | Prefix used for Rundoc classes and arguments. +rundocPrefix :: String +rundocPrefix = "rundoc-" + -- | The class-name used to mark rundoc blocks. rundocBlockClass :: String -rundocBlockClass = "rundoc-block" +rundocBlockClass = rundocPrefix ++ "block" blockOption :: OrgParser (String, String) blockOption = try $ (,) <$> orgArgKey <*> orgArgValue -- cgit v1.2.3 From 757c4f68f3f3cab99db9499936e3ae4775ebbddf Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Fri, 9 May 2014 18:07:37 +0200 Subject: Org reader: Support arguments for code blocks The general form of source block headers (`#+BEGIN_SRC
`) was not recognized by the reader. This patch adds support for the above form, adds header arguments to the block's key-value pairs and marks the block as a rundoc block if header arguments are present. This closes #1286. --- src/Text/Pandoc/Readers/Org.hs | 98 ++++++++++++++++++++++++------------------ tests/Tests/Readers/Org.hs | 14 ++++++ 2 files changed, 70 insertions(+), 42 deletions(-) diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 9df8ce0b3..c05ac92d0 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -276,7 +276,7 @@ parseBlockAttributes = do where attribute :: OrgParser (String, String) attribute = try $ do - key <- metaLineStart *> many1Till (noneOf "\n\r") (char ':') + key <- metaLineStart *> many1Till nonspaceChar (char ':') val <- skipSpaces *> anyLine return (map toLower key, val) @@ -342,16 +342,11 @@ verseBlock blkProp = try $ do codeBlock :: BlockProperties -> OrgParser (F Blocks) codeBlock blkProp = do skipSpaces - language <- optionMaybe orgArgWord - (classes, kv) <- codeHeaderArgs + (classes, kv) <- codeHeaderArgs <|> (mempty <$ ignHeaders) id' <- fromMaybe "" <$> lookupBlockAttribute "name" - caption <- lookupInlinesAttr "caption" content <- rawBlockContent blkProp - let attr = ( id' - , maybe id (\l -> (l:)) language $ classes - , kv ) - let codeBlck = B.codeBlockWith attr content - return $ maybe (pure codeBlck) (labelDiv codeBlck) caption + let codeBlck = B.codeBlockWith ( id', classes, kv ) content + maybe (pure codeBlck) (labelDiv codeBlck) <$> lookupInlinesAttr "caption" where labelDiv blk value = B.divWith nullAttr <$> (mappend <$> labelledBlock value @@ -383,12 +378,33 @@ indentWith num = do else choice [ try (count num (char ' ')) , try (char '\t' >> count (num - tabStop) (char ' ')) ] +type SwitchOption = (Char, Maybe String) + orgArgWord :: OrgParser String orgArgWord = many1 orgArgWordChar +-- | Parse code block arguments +-- TODO: We currently don't handle switches. codeHeaderArgs :: OrgParser ([String], [(String, String)]) -codeHeaderArgs = - (\x -> (x, [])) <$> manyTill (many nonspaceChar <* skipSpaces) newline +codeHeaderArgs = try $ do + language <- skipSpaces *> orgArgWord + _ <- skipSpaces *> (try $ switch `sepBy` (many1 spaceChar)) + parameters <- manyTill blockOption newline + let pandocLang = translateLang language + return $ + if hasRundocParameters parameters + then ( [ pandocLang, rundocBlockClass ] + , map toRundocAttrib (("language", language) : parameters) + ) + else ([ pandocLang ], parameters) + where hasRundocParameters = not . null + +switch :: OrgParser SwitchOption +switch = try $ simpleSwitch <|> lineNumbersSwitch + where + simpleSwitch = (\c -> (c, Nothing)) <$> (oneOf "-+" *> letter) + lineNumbersSwitch = (\ls -> ('l', Just ls)) <$> + (string "-l \"" *> many1Till nonspaceChar (char '"')) translateLang :: String -> String translateLang "C" = "c" @@ -401,6 +417,32 @@ translateLang "sh" = "bash" translateLang "sqlite" = "sql" translateLang cs = cs +-- | Prefix used for Rundoc classes and arguments. +rundocPrefix :: String +rundocPrefix = "rundoc-" + +-- | The class-name used to mark rundoc blocks. +rundocBlockClass :: String +rundocBlockClass = rundocPrefix ++ "block" + +blockOption :: OrgParser (String, String) +blockOption = try $ (,) <$> orgArgKey <*> orgArgValue + +orgArgKey :: OrgParser String +orgArgKey = try $ + skipSpaces *> char ':' + *> many1 orgArgWordChar + +orgArgValue :: OrgParser String +orgArgValue = try $ + skipSpaces *> many1 orgArgWordChar <* skipSpaces + +orgArgWordChar :: OrgParser Char +orgArgWordChar = alphaNum <|> oneOf "-_" + +toRundocAttrib :: (String, String) -> (String, String) +toRundocAttrib = first ("rundoc-" ++) + commaEscaped :: String -> String commaEscaped (',':cs@('*':_)) = cs commaEscaped (',':cs@('#':'+':_)) = cs @@ -425,7 +467,7 @@ drawer = try $ do drawerStart :: OrgParser String drawerStart = try $ - skipSpaces *> drawerName <* skipSpaces <* newline + skipSpaces *> drawerName <* skipSpaces <* P.newline where drawerName = try $ char ':' *> validDrawerName <* char ':' validDrawerName = stringAnyCase "PROPERTIES" <|> stringAnyCase "LOGBOOK" @@ -435,7 +477,7 @@ drawerLine = try anyLine drawerEnd :: OrgParser String drawerEnd = try $ - skipSpaces *> stringAnyCase ":END:" <* skipSpaces <* newline + skipSpaces *> stringAnyCase ":END:" <* skipSpaces <* P.newline -- @@ -446,7 +488,7 @@ drawerEnd = try $ figure :: OrgParser (F Blocks) figure = try $ do (cap, nam) <- nameAndCaption - src <- skipSpaces *> selfTarget <* skipSpaces <* newline + src <- skipSpaces *> selfTarget <* skipSpaces <* P.newline guard (isImageFilename src) return $ do cap' <- cap @@ -1036,34 +1078,6 @@ inlineCodeBlock = try $ do returnF $ B.codeWith ("", attrClasses, attrKeyVal) inlineCode where enclosedByPair s e p = char s *> many1Till p (char e) --- | Prefix used for Rundoc classes and arguments. -rundocPrefix :: String -rundocPrefix = "rundoc-" - --- | The class-name used to mark rundoc blocks. -rundocBlockClass :: String -rundocBlockClass = rundocPrefix ++ "block" - -blockOption :: OrgParser (String, String) -blockOption = try $ (,) <$> orgArgKey <*> orgArgValue - -orgArgKey :: OrgParser String -orgArgKey = try $ - skipSpaces *> char ':' - *> many1 orgArgWordChar - <* many1 spaceChar - -orgArgValue :: OrgParser String -orgArgValue = try $ - skipSpaces *> many1 orgArgWordChar - <* skipSpaces - -orgArgWordChar :: OrgParser Char -orgArgWordChar = alphaNum <|> oneOf "-_" - -toRundocAttrib :: (String, String) -> (String, String) -toRundocAttrib = first ("rundoc-" ++) - emph :: OrgParser (F Inlines) emph = fmap B.emph <$> emphasisBetween '/' diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index 949976aba..a78e8861f 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -822,6 +822,20 @@ tests = in mconcat [ para $ spcSep [ "Low", "German", "greeting" ] , codeBlockWith attr' code' ] + , "Source block with rundoc/babel arguments" =: + unlines [ "#+BEGIN_SRC emacs-lisp :exports both" + , "(progn (message \"Hello, World!\")" + , " (+ 23 42))" + , "#+END_SRC" ] =?> + let classes = [ "commonlisp" -- as kate doesn't know emacs-lisp syntax + , "rundoc-block" + ] + params = [ ("rundoc-language", "emacs-lisp") + , ("rundoc-exports", "both") + ] + code' = unlines [ "(progn (message \"Hello, World!\")" + , " (+ 23 42))" ] + in codeBlockWith ("", classes, params) code' , "Example block" =: unlines [ "#+begin_example" -- cgit v1.2.3 From 07694b30184bcf2ed0e2998016df394f47a1996f Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Fri, 9 May 2014 18:23:23 +0200 Subject: Org reader: Fix parsing of blank lines within blocks Blank lines were parsed as two newlines instead of just one. Thanks to Xiao Hanyu (@xiaohanyu) for pointing this out. --- src/Text/Pandoc/Readers/Org.hs | 8 ++------ tests/Tests/Readers/Org.hs | 9 +++++++++ 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index c05ac92d0..0f218d43f 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -357,12 +357,8 @@ rawBlockContent :: BlockProperties -> OrgParser String rawBlockContent (indent, blockType) = try $ unlines . map commaEscaped <$> manyTill indentedLine blockEnder where - indentedLine = try $ - choice [ blankline *> pure "\n" - , indentWith indent *> anyLine - ] - blockEnder = try $ - indentWith indent *> stringAnyCase ("#+end_" <> blockType) + indentedLine = try $ ("" <$ blankline) <|> (indentWith indent *> anyLine) + blockEnder = try $ indentWith indent *> stringAnyCase ("#+end_" <> blockType) parsedBlockContent :: BlockProperties -> OrgParser (F Blocks) parsedBlockContent blkProps = try $ do diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index a78e8861f..87b0d0c90 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -920,5 +920,14 @@ tests = (unlines [ "fmap id = id" , "fmap (p . q) = (fmap p) . (fmap q)" ]))) + + , "Convert blank lines in blocks to single newlines" =: + unlines [ "#+begin_html" + , "" + , "boring" + , "" + , "#+end_html" + ] =?> + rawBlock "html" "\nboring\n\n" ] ] -- cgit v1.2.3 From c5fd631b550a3b05b60de1684c80387bc46a88cc Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Sat, 10 May 2014 11:25:20 +0200 Subject: Org reader: Fix block parameter reader, relax constraints The reader produced wrong results for block containing non-letter chars in their parameter arguments. This patch relaxes constraints in that it allows block header arguments to contain any non-space character (except for ']' for inline blocks). Thanks to Xiao Hanyu for noticing this. --- src/Text/Pandoc/Readers/Org.hs | 19 +++++++++++++------ tests/Tests/Readers/Org.hs | 12 ++++++++++++ 2 files changed, 25 insertions(+), 6 deletions(-) diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 0f218d43f..2e4a29beb 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -318,7 +318,7 @@ blockHeaderStart :: OrgParser (Int, String) blockHeaderStart = try $ (,) <$> indent <*> blockType where indent = length <$> many spaceChar - blockType = map toLower <$> (stringAnyCase "#+begin_" *> many orgArgWordChar) + blockType = map toLower <$> (stringAnyCase "#+begin_" *> orgArgWord) withRaw' :: (String -> F Blocks) -> BlockProperties -> OrgParser (F Blocks) withRaw' f blockProp = (ignHeaders *> (f <$> rawBlockContent blockProp)) @@ -422,16 +422,23 @@ rundocBlockClass :: String rundocBlockClass = rundocPrefix ++ "block" blockOption :: OrgParser (String, String) -blockOption = try $ (,) <$> orgArgKey <*> orgArgValue +blockOption = try $ (,) <$> orgArgKey <*> orgParamValue + +inlineBlockOption :: OrgParser (String, String) +inlineBlockOption = try $ (,) <$> orgArgKey <*> orgInlineParamValue orgArgKey :: OrgParser String orgArgKey = try $ skipSpaces *> char ':' *> many1 orgArgWordChar -orgArgValue :: OrgParser String -orgArgValue = try $ - skipSpaces *> many1 orgArgWordChar <* skipSpaces +orgParamValue :: OrgParser String +orgParamValue = try $ + skipSpaces *> many1 (noneOf "\t\n\r ") <* skipSpaces + +orgInlineParamValue :: OrgParser String +orgInlineParamValue = try $ + skipSpaces *> many1 (noneOf "\t\n\r ]") <* skipSpaces orgArgWordChar :: OrgParser Char orgArgWordChar = alphaNum <|> oneOf "-_" @@ -1067,7 +1074,7 @@ inlineCodeBlock :: OrgParser (F Inlines) inlineCodeBlock = try $ do string "src_" lang <- many1 orgArgWordChar - opts <- option [] $ enclosedByPair '[' ']' blockOption + opts <- option [] $ enclosedByPair '[' ']' inlineBlockOption inlineCode <- enclosedByPair '{' '}' (noneOf "\n\r") let attrClasses = [translateLang lang, rundocBlockClass] let attrKeyVal = map toRundocAttrib (("language", lang) : opts) diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index 87b0d0c90..4ef7a7731 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -929,5 +929,17 @@ tests = , "#+end_html" ] =?> rawBlock "html" "\nboring\n\n" + + , "Non-letter chars in source block parameters" =: + unlines [ "#+BEGIN_SRC C :tangle xxxx.c :city Zürich" + , "code body" + , "#+END_SRC" + ] =?> + let classes = [ "c", "rundoc-block" ] + params = [ ("rundoc-language", "C") + , ("rundoc-tangle", "xxxx.c") + , ("rundoc-city", "Zürich") + ] + in codeBlockWith ( "", classes, params) "code body\n" ] ] -- cgit v1.2.3 From 009260647612238b5af964afcbeb452001f2ab0c Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 10 May 2014 23:26:32 -0700 Subject: LaTeX reader: Don't error on "%foo" with no newline. --- src/Text/Pandoc/Readers/LaTeX.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index bfafea1f6..8476c8636 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -125,7 +125,7 @@ comment :: LP () comment = do char '%' skipMany (satisfy (/='\n')) - newline + optional newline return () bgroup :: LP () -- cgit v1.2.3 From 113a32daa8a628a1f6f166c6b43d07ac005b8d42 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Sun, 11 May 2014 15:02:48 +0200 Subject: Process: Fix minor typo in pipeProcess' docs Replace fullstop with comma, adjust capitalisation. --- src/Text/Pandoc/Process.hs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/Text/Pandoc/Process.hs b/src/Text/Pandoc/Process.hs index 9c8853366..19872b405 100644 --- a/src/Text/Pandoc/Process.hs +++ b/src/Text/Pandoc/Process.hs @@ -47,7 +47,7 @@ terminates, and then returns the 'ExitCode' of the process, the standard output, and the standard error. If an asynchronous exception is thrown to the thread executing -@readProcessWithExitCode@. The forked process will be terminated and +@readProcessWithExitCode@, the forked process will be terminated and @readProcessWithExitCode@ will wait (block) until the process has been terminated. -} @@ -102,4 +102,3 @@ forkWait a = do res <- newEmptyMVar _ <- mask $ \restore -> forkIO $ try (restore a) >>= putMVar res return (takeMVar res >>= either (\ex -> throwIO (ex :: SomeException)) return) - -- cgit v1.2.3 From a8319d133908f3c39834984e5e11991b166c37b7 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 11 May 2014 22:52:29 -0700 Subject: LaTeX reader: set `bibliography` in metadata from `\bibliography` cmd. --- src/Text/Pandoc/Readers/LaTeX.hs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 8476c8636..4b9e424d9 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -307,6 +307,8 @@ blockCommands = M.fromList $ , ("caption", tok >>= setCaption) , ("PandocStartInclude", startInclude) , ("PandocEndInclude", endInclude) + , ("bibliography", mempty <$ (skipopts *> braced >>= + addMeta "bibliography" . splitBibs)) ] ++ map ignoreBlocks -- these commands will be ignored unless --parse-raw is specified, -- in which case they will appear as raw latex blocks @@ -314,7 +316,7 @@ blockCommands = M.fromList $ -- newcommand, etc. should be parsed by macro, but we need this -- here so these aren't parsed as inline commands to ignore , "special", "pdfannot", "pdfstringdef" - , "bibliography", "bibliographystyle" + , "bibliographystyle" , "maketitle", "makeindex", "makeglossary" , "addcontentsline", "addtocontents", "addtocounter" -- \ignore{} is used conventionally in literate haskell for definitions @@ -329,6 +331,9 @@ addMeta :: ToMetaValue a => String -> a -> LP () addMeta field val = updateState $ \st -> st{ stateMeta = addMetaField field val $ stateMeta st } +splitBibs :: String -> [Inlines] +splitBibs = map (str . flip replaceExtension "bib" . trim) . splitBy (==',') + setCaption :: Inlines -> LP Blocks setCaption ils = do updateState $ \st -> st{ stateCaption = Just ils } -- cgit v1.2.3 From e52196691b1747db661dd4a4e7bd032ba76fcb0e Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 12 May 2014 09:21:16 -0700 Subject: Require highlighting-kate >= 0.5.8. This fixes a performance regression. --- pandoc.cabal | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandoc.cabal b/pandoc.cabal index 63c748a47..dc0faa3d1 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -226,7 +226,7 @@ Library tagsoup >= 0.13.1 && < 0.14, base64-bytestring >= 0.1 && < 1.1, zlib >= 0.5 && < 0.6, - highlighting-kate >= 0.5.7 && < 0.6, + highlighting-kate >= 0.5.8 && < 0.6, data-default >= 0.4 && < 0.6, temporary >= 1.1 && < 1.3, blaze-html >= 0.5 && < 0.8, @@ -327,7 +327,7 @@ Executable pandoc text >= 0.11 && < 1.2, bytestring >= 0.9 && < 0.11, extensible-exceptions >= 0.1 && < 0.2, - highlighting-kate >= 0.5.7 && < 0.6, + highlighting-kate >= 0.5.8 && < 0.6, aeson >= 0.7 && < 0.8, yaml >= 0.8.8.2 && < 0.9, containers >= 0.1 && < 0.6, @@ -370,7 +370,7 @@ Test-Suite test-pandoc directory >= 1 && < 1.3, filepath >= 1.1 && < 1.4, process >= 1 && < 1.3, - highlighting-kate >= 0.5.7 && < 0.6, + highlighting-kate >= 0.5.8 && < 0.6, Diff >= 0.2 && < 0.4, test-framework >= 0.3 && < 0.9, test-framework-hunit >= 0.2 && < 0.4, -- cgit v1.2.3 From 707c74b8dbc7e57938f0ac5f96f06bd645d038d4 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 12 May 2014 12:08:24 -0700 Subject: Updated changelog. --- changelog | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/changelog b/changelog index 5238e943f..4339376df 100644 --- a/changelog +++ b/changelog @@ -1,3 +1,22 @@ +pandoc (1.12.4.1) + + * Require highlighting-kate >= 0.5.8. Fixes a performance regression. + + * LaTeX reader: + + + Set `bibliography` in metadata from `\bibliography` command. + + Don't error on `%foo` with no trailing newline. + + * Org reader: + + + Support code block headers (`#+BEGIN_SRC ...`) (Albert Krewinkel). + + Fix parsing of blank lines within blocks (Albert Krewinkel). + + * Updated copyright notices (Albert Krewinkel). + + * Fixed templates. The previous release did not include up-to-date + templates, due to a packaging error. + pandoc (1.12.4) * Made it possible to run filters that aren't executable (#1096). -- cgit v1.2.3 From 02b8608aab21d29316013512bc6cdeb65245a485 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 12 May 2014 12:08:36 -0700 Subject: Version bump to 1.12.4.1. --- pandoc.cabal | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandoc.cabal b/pandoc.cabal index dc0faa3d1..6b0952513 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -1,5 +1,5 @@ Name: pandoc -Version: 1.12.4 +Version: 1.12.4.1 Cabal-Version: >= 1.10 Build-Type: Custom License: GPL -- cgit v1.2.3 From 06edff7402bc88ca99723b718b83a5ea95860b33 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 12 May 2014 12:30:30 -0700 Subject: Added default.icml to data-files so it installs with the package. --- changelog | 3 +-- pandoc.cabal | 1 + 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/changelog b/changelog index 4339376df..10dff74fc 100644 --- a/changelog +++ b/changelog @@ -14,8 +14,7 @@ pandoc (1.12.4.1) * Updated copyright notices (Albert Krewinkel). - * Fixed templates. The previous release did not include up-to-date - templates, due to a packaging error. + * Added default.icml to data files so it installs with the package. pandoc (1.12.4) diff --git a/pandoc.cabal b/pandoc.cabal index 6b0952513..f29ee8fb1 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -45,6 +45,7 @@ Data-Files: data/templates/default.docbook, data/templates/default.beamer, data/templates/default.opendocument, + data/templates/default.icml, data/templates/default.opml, data/templates/default.latex, data/templates/default.context, -- cgit v1.2.3 From 2348f07b11700e685cd194c93c5891d2c288dbc0 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 12 May 2014 13:05:42 -0700 Subject: Shared addMetaField: if old and new values both lists, concatenate. --- src/Text/Pandoc/Shared.hs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 31c490af6..4f506b5a6 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -564,8 +564,10 @@ addMetaField :: ToMetaValue a -> Meta addMetaField key val (Meta meta) = Meta $ M.insertWith combine key (toMetaValue val) meta - where combine newval (MetaList xs) = MetaList (xs ++ [newval]) + where combine newval (MetaList xs) = MetaList (xs ++ tolist newval) combine newval x = MetaList [x, newval] + tolist (MetaList ys) = ys + tolist y = [y] -- | Create 'Meta' from old-style title, authors, date. This is -- provided to ease the transition from the old API. -- cgit v1.2.3 From aa019448d6eef010922ea778597a2b7a0f3bd58f Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 12 May 2014 13:06:06 -0700 Subject: LaTeX reader: Support `\addbibresource`. --- src/Text/Pandoc/Readers/LaTeX.hs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 4b9e424d9..6f870318f 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -309,6 +309,8 @@ blockCommands = M.fromList $ , ("PandocEndInclude", endInclude) , ("bibliography", mempty <$ (skipopts *> braced >>= addMeta "bibliography" . splitBibs)) + , ("addbibresource", mempty <$ (skipopts *> braced >>= + addMeta "bibliography" . splitBibs)) ] ++ map ignoreBlocks -- these commands will be ignored unless --parse-raw is specified, -- in which case they will appear as raw latex blocks -- cgit v1.2.3 From 77b2589d3f10f988ddf6804dee401112057f5a17 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 12 May 2014 13:07:45 -0700 Subject: Updated changelog. --- changelog | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/changelog b/changelog index 10dff74fc..1c9bea607 100644 --- a/changelog +++ b/changelog @@ -2,9 +2,14 @@ pandoc (1.12.4.1) * Require highlighting-kate >= 0.5.8. Fixes a performance regression. + * Shared: `addMetaValue` now behaves slightly differently: + if both the new and old values are lists, it concatenates their + contents to form a new list. + * LaTeX reader: - + Set `bibliography` in metadata from `\bibliography` command. + + Set `bibliography` in metadata from `\bibliography` or + `\addbibresource` command. + Don't error on `%foo` with no trailing newline. * Org reader: -- cgit v1.2.3 From 1e8cd2c27755bac4086489e9a6eed19bf4d72d45 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 12 May 2014 19:59:44 -0700 Subject: Moved osx package stuff to osx directory; added uninstall script. Thanks to Daniel T. Staal for an uninstall script from which this one is modified. --- make_osx_package.sh | 86 ---------------------------- osx-resources/InstallationCheck | 14 ----- osx-resources/InstallationCheck.strings | 3 - osx/make_osx_package.sh | 87 +++++++++++++++++++++++++++++ osx/osx-resources/InstallationCheck | 14 +++++ osx/osx-resources/InstallationCheck.strings | 3 + osx/uninstall-pandoc.pl | 81 +++++++++++++++++++++++++++ 7 files changed, 185 insertions(+), 103 deletions(-) delete mode 100755 make_osx_package.sh delete mode 100755 osx-resources/InstallationCheck delete mode 100644 osx-resources/InstallationCheck.strings create mode 100755 osx/make_osx_package.sh create mode 100755 osx/osx-resources/InstallationCheck create mode 100644 osx/osx-resources/InstallationCheck.strings create mode 100755 osx/uninstall-pandoc.pl diff --git a/make_osx_package.sh b/make_osx_package.sh deleted file mode 100755 index 3119f140e..000000000 --- a/make_osx_package.sh +++ /dev/null @@ -1,86 +0,0 @@ -#!/bin/bash -e - -DIST=`pwd`/osx_package -SANDBOX=`pwd`/.cabal-sandbox -VERSION=$(grep -e '^Version' pandoc.cabal | awk '{print $2}') -RESOURCES=$DIST/Resources -ROOT=$DIST/pandoc -DEST=$ROOT/usr/local -SCRIPTS=osx-resources -BASE=pandoc-$VERSION -ME=$(whoami) -CODESIGNID="Developer ID Application: John Macfarlane" -PACKAGEMAKER=/Applications/PackageMaker.app/Contents/MacOS/PackageMaker -EXES="pandoc pandoc-citeproc" - -read -s -p "sudo password: " PASSWORD -echo $PASSWORD | sudo -S echo "Password valid, continuing." - -echo Removing old files... -rm -rf $DIST -mkdir -p $RESOURCES - -cabal sandbox init -echo Updating database -cabal update - -echo Building pandoc... -cabal clean -# Use cpphs to avoid problems with clang cpp on ghc 7.8 osx: -cabal install cpphs alex happy hsb2hs -cabal install --reinstall --flags="embed_data_files" --ghc-options '-pgmPcpphs -optP--cpp' -cabal install --reinstall --flags="embed_data_files" pandoc-citeproc --ghc-options '-pgmPcpphs -optP--cpp' - -mkdir -p $DEST/bin -mkdir -p $DEST/share/man/man1 -mkdir -p $DEST/share/man/man5 -for f in $EXES; do - cp $SANDBOX/bin/$f $DEST/bin/; - cp $SANDBOX/share/man/man1/$f.1 $DEST/share/man/man1/ -done -cp $SANDBOX/share/man/man5/pandoc_markdown.5 $DEST/share/man/man5/ - -chown -R $ME:staff $DIST -# gzip $DEST/share/man/man?/*.* -# cabal gives man pages the wrong permissions -chmod +r $DEST/share/man/man?/*.* - -echo Copying license... -$SANDBOX/bin/pandoc --data data -t rtf -s COPYING -o $RESOURCES/License.rtf - -echo Signing pandoc executable... - -codesign --force --sign "$CODESIGNID" $DEST/bin/pandoc -# make sure it's valid... returns nonzero exit code if it isn't: -spctl --assess --type execute $DEST/bin/pandoc - -echo Creating OSX package... -# remove old package first -echo $PASSWORD | sudo -S rm -rf $BASE.pkg $BASE.dmg - -sudo $PACKAGEMAKER \ - --root $ROOT \ - --id net.johnmacfarlane.pandoc \ - --resources $RESOURCES \ - --version $VERSION \ - --scripts $SCRIPTS \ - --out $BASE.pkg - - # --no-relocate - -echo Signing package... - -sudo codesign --force --sign "$CODESIGNID" $BASE.pkg -# make sure it's valid... -spctl --assess --type install $BASE.pkg - -echo Creating zip... -zip -9 -r $BASE.pkg.zip $BASE.pkg - -# echo Creating disk image... -# sudo hdiutil create "$BASE.dmg" \ -# -format UDZO -ov \ -# -volname "pandoc $VERSION" \ -# -srcfolder $BASE.pkg -# sudo hdiutil internet-enable "$BASE.dmg" - diff --git a/osx-resources/InstallationCheck b/osx-resources/InstallationCheck deleted file mode 100755 index 2bd691f5c..000000000 --- a/osx-resources/InstallationCheck +++ /dev/null @@ -1,14 +0,0 @@ -#!/bin/sh -cputype=`/usr/sbin/sysctl -n hw.cputype` -sixtyfourbit=`/usr/sbin/sysctl -n hw.cpu64bit_capable` - -if [ "x$cputype" != "x7" ] # x86 -then - exit 112 -fi - -if [ "x$sixtyfourbit" != "x1" ] # 64 bit -then - exit 113 -fi - diff --git a/osx-resources/InstallationCheck.strings b/osx-resources/InstallationCheck.strings deleted file mode 100644 index 6c8efe0d4..000000000 --- a/osx-resources/InstallationCheck.strings +++ /dev/null @@ -1,3 +0,0 @@ -"16" = "This installer works only on Intel Macs."; -"17" = "This installer requires a 64-bit processor."; - diff --git a/osx/make_osx_package.sh b/osx/make_osx_package.sh new file mode 100755 index 000000000..c28f8fe5f --- /dev/null +++ b/osx/make_osx_package.sh @@ -0,0 +1,87 @@ +#!/bin/bash -e + +DIST=`pwd`/osx_package +SANDBOX=`pwd`/.cabal-sandbox +VERSION=$(grep -e '^Version' pandoc.cabal | awk '{print $2}') +RESOURCES=$DIST/Resources +ROOT=$DIST/pandoc +DEST=$ROOT/usr/local +SCRIPTS=osx-resources +BASE=pandoc-$VERSION +ME=$(whoami) +CODESIGNID="Developer ID Application: John Macfarlane" +PACKAGEMAKER=/Applications/PackageMaker.app/Contents/MacOS/PackageMaker +EXES="pandoc pandoc-citeproc" + +read -s -p "sudo password: " PASSWORD +echo $PASSWORD | sudo -S echo "Password valid, continuing." + +echo Removing old files... +rm -rf $DIST +mkdir -p $RESOURCES + +cabal sandbox init +echo Updating database +cabal update + +echo Building pandoc... +cabal clean +# Use cpphs to avoid problems with clang cpp on ghc 7.8 osx: +cabal install cpphs alex happy hsb2hs +cabal install --reinstall --flags="embed_data_files" --ghc-options '-pgmPcpphs -optP--cpp' +cabal install --reinstall --flags="embed_data_files" pandoc-citeproc --ghc-options '-pgmPcpphs -optP--cpp' + +mkdir -p $DEST/bin +mkdir -p $DEST/share/man/man1 +mkdir -p $DEST/share/man/man5 +for f in $EXES; do + cp $SANDBOX/bin/$f $DEST/bin/; + cp $SANDBOX/share/man/man1/$f.1 $DEST/share/man/man1/ +done +cp $SANDBOX/share/man/man5/pandoc_markdown.5 $DEST/share/man/man5/ +cp $SCRIPTS/uninstall-pandoc.pl $DEST/bin/ + +chown -R $ME:staff $DIST +# gzip $DEST/share/man/man?/*.* +# cabal gives man pages the wrong permissions +chmod +r $DEST/share/man/man?/*.* + +echo Copying license... +$SANDBOX/bin/pandoc --data data -t rtf -s COPYING -o $RESOURCES/License.rtf + +echo Signing pandoc executable... + +codesign --force --sign "$CODESIGNID" $DEST/bin/pandoc +# make sure it's valid... returns nonzero exit code if it isn't: +spctl --assess --type execute $DEST/bin/pandoc + +echo Creating OSX package... +# remove old package first +echo $PASSWORD | sudo -S rm -rf $BASE.pkg $BASE.dmg + +sudo $PACKAGEMAKER \ + --root $ROOT \ + --id net.johnmacfarlane.pandoc \ + --resources $RESOURCES \ + --version $VERSION \ + --scripts $SCRIPTS \ + --out $BASE.pkg + + # --no-relocate + +echo Signing package... + +sudo codesign --force --sign "$CODESIGNID" $BASE.pkg +# make sure it's valid... +spctl --assess --type install $BASE.pkg + +echo Creating zip... +zip -9 -r $BASE.pkg.zip $BASE.pkg + +# echo Creating disk image... +# sudo hdiutil create "$BASE.dmg" \ +# -format UDZO -ov \ +# -volname "pandoc $VERSION" \ +# -srcfolder $BASE.pkg +# sudo hdiutil internet-enable "$BASE.dmg" + diff --git a/osx/osx-resources/InstallationCheck b/osx/osx-resources/InstallationCheck new file mode 100755 index 000000000..2bd691f5c --- /dev/null +++ b/osx/osx-resources/InstallationCheck @@ -0,0 +1,14 @@ +#!/bin/sh +cputype=`/usr/sbin/sysctl -n hw.cputype` +sixtyfourbit=`/usr/sbin/sysctl -n hw.cpu64bit_capable` + +if [ "x$cputype" != "x7" ] # x86 +then + exit 112 +fi + +if [ "x$sixtyfourbit" != "x1" ] # 64 bit +then + exit 113 +fi + diff --git a/osx/osx-resources/InstallationCheck.strings b/osx/osx-resources/InstallationCheck.strings new file mode 100644 index 000000000..6c8efe0d4 --- /dev/null +++ b/osx/osx-resources/InstallationCheck.strings @@ -0,0 +1,3 @@ +"16" = "This installer works only on Intel Macs."; +"17" = "This installer requires a 64-bit processor."; + diff --git a/osx/uninstall-pandoc.pl b/osx/uninstall-pandoc.pl new file mode 100755 index 000000000..292bcfd96 --- /dev/null +++ b/osx/uninstall-pandoc.pl @@ -0,0 +1,81 @@ +#!/usr/bin/perl + +# Script to remove all files installed by the OSX pandoc installer +# and unregister the package. Modified from a script contributed +# by Daniel T. Staal. + +use warnings; +use strict; + +use File::Spec; + +# The main info: this is the list of files to remove and the pkg_id. +my $pkg_id = 'net.johnmacfarlane.pandoc'; + +my @pkg_info; + +# Find which, if any, volume Pandoc is installed on. +my $volume; + +# First check /, then other volumes on the box. +my $cur_test = `pkgutil --pkgs=$pkg_id`; +if ( $cur_test =~ m/$pkg_id/ ) { + $volume = '/'; +} else { + opendir( my $dh, '/Volumes' ) or die "Can't list Volumes: $!\n"; + foreach my $dir ( readdir($dh) ) { + next if $dir =~ m/^\./; # Skip dotfiles. + + my $path = File::Spec->rel2abs( $dir, '/Volumes' ); + next if !( -d $path ); # Skip anything that isn't a directory. + + my $cur_test = `pkgutil --pkgs=$pkg_id --volume '$path'`; + if ( $cur_test =~ m/$pkg_id/ ) { + $volume = $path; + last; + } + } +} + +die "Pandoc not installed.\n" if !( defined($volume) ); + +my @pkg_files = (); +my $f; +for $f (split '\n', `pkgutil --volume '$volume' --only-files --files $pkg_id`) { + push @pkg_files, File::Spec->rel2abs($f, $volume); +}; + +print "The following files will be deleted:\n\n"; +print join("\n", @pkg_files); +print "\n\n"; +print "Do you want to proceed and uninstall pandoc (Y/N)?"; +my $input = ; + +if ($input =~ m/^[Yy]/) { + + # Actually remove the files. + foreach $f (@pkg_files) { + if (system("sudo rm $f") == 0) { + warn "Deleted $f\n"; + } else { + warn "Unable to delete $f: $!\n"; + warn "Aborting uninstall.\n"; + exit 1; + } + } + + # Clean up the install. + if (system("sudo pkgutil --forget $pkg_id --volume '$volume'") != 0) { + warn "Unable to clean up install: $!\n"; + exit 1; + } + +} else { + + print "OK, aborting uninstall.\n"; + exit 0; + +} + +print "Pandoc has been successfully uninstalled.\n"; +exit 0; -- cgit v1.2.3 From dc8de99e598a26acd51768c38ba40b8eff40a10f Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 12 May 2014 20:02:14 -0700 Subject: Updated changelog. --- changelog | 3 +++ 1 file changed, 3 insertions(+) diff --git a/changelog b/changelog index 1c9bea607..e58f5516d 100644 --- a/changelog +++ b/changelog @@ -21,6 +21,9 @@ pandoc (1.12.4.1) * Added default.icml to data files so it installs with the package. + * Moved OSX package materials to osx directory. Added uninstall + script (thanks to Daniel T. Staal). + pandoc (1.12.4) * Made it possible to run filters that aren't executable (#1096). -- cgit v1.2.3 From dfcc32ac9f1ebe58f99b4fcf34516897b231bacf Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 12 May 2014 20:05:16 -0700 Subject: Moved make_osx_package.sh back to root. --- make_osx_package.sh | 88 +++++++++++++++++++++++++++++++++++++++++++++++++ osx/make_osx_package.sh | 87 ------------------------------------------------ 2 files changed, 88 insertions(+), 87 deletions(-) create mode 100755 make_osx_package.sh delete mode 100755 osx/make_osx_package.sh diff --git a/make_osx_package.sh b/make_osx_package.sh new file mode 100755 index 000000000..168119356 --- /dev/null +++ b/make_osx_package.sh @@ -0,0 +1,88 @@ +#!/bin/bash -e + +DIST=`pwd`/osx_package +SANDBOX=`pwd`/.cabal-sandbox +VERSION=$(grep -e '^Version' pandoc.cabal | awk '{print $2}') +RESOURCES=$DIST/Resources +ROOT=$DIST/pandoc +DEST=$ROOT/usr/local +OSX=osx +SCRIPTS=$OSX/osx-resources +BASE=pandoc-$VERSION +ME=$(whoami) +CODESIGNID="Developer ID Application: John Macfarlane" +PACKAGEMAKER=/Applications/PackageMaker.app/Contents/MacOS/PackageMaker +EXES="pandoc pandoc-citeproc" + +read -s -p "sudo password: " PASSWORD +echo $PASSWORD | sudo -S echo "Password valid, continuing." + +echo Removing old files... +rm -rf $DIST +mkdir -p $RESOURCES + +cabal sandbox init +echo Updating database +cabal update + +echo Building pandoc... +cabal clean +# Use cpphs to avoid problems with clang cpp on ghc 7.8 osx: +cabal install cpphs alex happy hsb2hs +cabal install --reinstall --flags="embed_data_files" --ghc-options '-pgmPcpphs -optP--cpp' +cabal install --reinstall --flags="embed_data_files" pandoc-citeproc --ghc-options '-pgmPcpphs -optP--cpp' + +mkdir -p $DEST/bin +mkdir -p $DEST/share/man/man1 +mkdir -p $DEST/share/man/man5 +for f in $EXES; do + cp $SANDBOX/bin/$f $DEST/bin/; + cp $SANDBOX/share/man/man1/$f.1 $DEST/share/man/man1/ +done +cp $SANDBOX/share/man/man5/pandoc_markdown.5 $DEST/share/man/man5/ +cp $OSX/uninstall-pandoc.pl $DEST/bin/ + +chown -R $ME:staff $DIST +# gzip $DEST/share/man/man?/*.* +# cabal gives man pages the wrong permissions +chmod +r $DEST/share/man/man?/*.* + +echo Copying license... +$SANDBOX/bin/pandoc --data data -t rtf -s COPYING -o $RESOURCES/License.rtf + +echo Signing pandoc executable... + +codesign --force --sign "$CODESIGNID" $DEST/bin/pandoc +# make sure it's valid... returns nonzero exit code if it isn't: +spctl --assess --type execute $DEST/bin/pandoc + +echo Creating OSX package... +# remove old package first +echo $PASSWORD | sudo -S rm -rf $BASE.pkg $BASE.dmg + +sudo $PACKAGEMAKER \ + --root $ROOT \ + --id net.johnmacfarlane.pandoc \ + --resources $RESOURCES \ + --version $VERSION \ + --scripts $SCRIPTS \ + --out $BASE.pkg + + # --no-relocate + +echo Signing package... + +sudo codesign --force --sign "$CODESIGNID" $BASE.pkg +# make sure it's valid... +spctl --assess --type install $BASE.pkg + +echo Creating zip... +zip -9 -r $BASE.pkg.zip $BASE.pkg + +# echo Creating disk image... +# sudo hdiutil create "$BASE.dmg" \ +# -format UDZO -ov \ +# -volname "pandoc $VERSION" \ +# -srcfolder $BASE.pkg +# sudo hdiutil internet-enable "$BASE.dmg" + diff --git a/osx/make_osx_package.sh b/osx/make_osx_package.sh deleted file mode 100755 index c28f8fe5f..000000000 --- a/osx/make_osx_package.sh +++ /dev/null @@ -1,87 +0,0 @@ -#!/bin/bash -e - -DIST=`pwd`/osx_package -SANDBOX=`pwd`/.cabal-sandbox -VERSION=$(grep -e '^Version' pandoc.cabal | awk '{print $2}') -RESOURCES=$DIST/Resources -ROOT=$DIST/pandoc -DEST=$ROOT/usr/local -SCRIPTS=osx-resources -BASE=pandoc-$VERSION -ME=$(whoami) -CODESIGNID="Developer ID Application: John Macfarlane" -PACKAGEMAKER=/Applications/PackageMaker.app/Contents/MacOS/PackageMaker -EXES="pandoc pandoc-citeproc" - -read -s -p "sudo password: " PASSWORD -echo $PASSWORD | sudo -S echo "Password valid, continuing." - -echo Removing old files... -rm -rf $DIST -mkdir -p $RESOURCES - -cabal sandbox init -echo Updating database -cabal update - -echo Building pandoc... -cabal clean -# Use cpphs to avoid problems with clang cpp on ghc 7.8 osx: -cabal install cpphs alex happy hsb2hs -cabal install --reinstall --flags="embed_data_files" --ghc-options '-pgmPcpphs -optP--cpp' -cabal install --reinstall --flags="embed_data_files" pandoc-citeproc --ghc-options '-pgmPcpphs -optP--cpp' - -mkdir -p $DEST/bin -mkdir -p $DEST/share/man/man1 -mkdir -p $DEST/share/man/man5 -for f in $EXES; do - cp $SANDBOX/bin/$f $DEST/bin/; - cp $SANDBOX/share/man/man1/$f.1 $DEST/share/man/man1/ -done -cp $SANDBOX/share/man/man5/pandoc_markdown.5 $DEST/share/man/man5/ -cp $SCRIPTS/uninstall-pandoc.pl $DEST/bin/ - -chown -R $ME:staff $DIST -# gzip $DEST/share/man/man?/*.* -# cabal gives man pages the wrong permissions -chmod +r $DEST/share/man/man?/*.* - -echo Copying license... -$SANDBOX/bin/pandoc --data data -t rtf -s COPYING -o $RESOURCES/License.rtf - -echo Signing pandoc executable... - -codesign --force --sign "$CODESIGNID" $DEST/bin/pandoc -# make sure it's valid... returns nonzero exit code if it isn't: -spctl --assess --type execute $DEST/bin/pandoc - -echo Creating OSX package... -# remove old package first -echo $PASSWORD | sudo -S rm -rf $BASE.pkg $BASE.dmg - -sudo $PACKAGEMAKER \ - --root $ROOT \ - --id net.johnmacfarlane.pandoc \ - --resources $RESOURCES \ - --version $VERSION \ - --scripts $SCRIPTS \ - --out $BASE.pkg - - # --no-relocate - -echo Signing package... - -sudo codesign --force --sign "$CODESIGNID" $BASE.pkg -# make sure it's valid... -spctl --assess --type install $BASE.pkg - -echo Creating zip... -zip -9 -r $BASE.pkg.zip $BASE.pkg - -# echo Creating disk image... -# sudo hdiutil create "$BASE.dmg" \ -# -format UDZO -ov \ -# -volname "pandoc $VERSION" \ -# -srcfolder $BASE.pkg -# sudo hdiutil internet-enable "$BASE.dmg" - -- cgit v1.2.3 From 88d8f7428a80e1296ba43226bd49c366623b49ff Mon Sep 17 00:00:00 2001 From: "Daniel T. Staal" Date: Tue, 13 May 2014 19:59:20 -0400 Subject: Code cleanup and debug. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Various cleanups: - @pkg_info was never used. Removed. - Simplified getting the list of files slightly. - Used list form of system(). This prevents shell interpretation of command and arguments, preventing bugs. (And solves one.) - Changed $! (OS_ERROR) to $? (CHILD_ERROR) after system() calls to get the error from the external program. (Note that $? is the numeric return code from the child program.) - Allow script to continue after finding some of the files have been removed previously. - Convert 'warn "…"; exit 1;' to 'die "…";', the more common equivalent idiom. - Convert 'exit 0;' to 'exit;', to be more clear we are not exiting abnormally. Signed-off-by: Daniel T. Staal --- osx/uninstall-pandoc.pl | 38 ++++++++++++++++++-------------------- 1 file changed, 18 insertions(+), 20 deletions(-) diff --git a/osx/uninstall-pandoc.pl b/osx/uninstall-pandoc.pl index 292bcfd96..a5194d9bd 100755 --- a/osx/uninstall-pandoc.pl +++ b/osx/uninstall-pandoc.pl @@ -12,8 +12,6 @@ use File::Spec; # The main info: this is the list of files to remove and the pkg_id. my $pkg_id = 'net.johnmacfarlane.pandoc'; -my @pkg_info; - # Find which, if any, volume Pandoc is installed on. my $volume; @@ -39,12 +37,11 @@ if ( $cur_test =~ m/$pkg_id/ ) { die "Pandoc not installed.\n" if !( defined($volume) ); -my @pkg_files = (); -my $f; -for $f (split '\n', `pkgutil --volume '$volume' --only-files --files $pkg_id`) { - push @pkg_files, File::Spec->rel2abs($f, $volume); -}; +# Get the list of files to remove. +my @pkg_files = `pkgutil --volume '$volume' --only-files --files '$pkg_id'`; +@pkg_files = map { chomp; File::Spec->rel2abs($_, $volume) } @pkg_files; +# Confirm uninistall with the user. print "The following files will be deleted:\n\n"; print join("\n", @pkg_files); print "\n\n"; @@ -54,28 +51,29 @@ my $input = ; if ($input =~ m/^[Yy]/) { # Actually remove the files. - foreach $f (@pkg_files) { - if (system("sudo rm $f") == 0) { - warn "Deleted $f\n"; - } else { - warn "Unable to delete $f: $!\n"; - warn "Aborting uninstall.\n"; - exit 1; + foreach my $file (@pkg_files) { + if ( -e $file ) { + if ( system( 'sudo', 'rm', $file ) == 0 ) { + warn "Deleted $file\n"; + } else { + warn "Unable to delete $file: $?\n"; + die "Aborting Uninstall.\n"; + } + } else { + warn "File $file does not exist. Skipping.\n"; } } # Clean up the install. - if (system("sudo pkgutil --forget $pkg_id --volume '$volume'") != 0) { - warn "Unable to clean up install: $!\n"; - exit 1; + if (system('sudo', 'pkgutil', '--forget', $pkg_id, '--volume', $volume) != 0) { + die "Unable to clean up install: $?\n"; } } else { print "OK, aborting uninstall.\n"; - exit 0; - + exit; } print "Pandoc has been successfully uninstalled.\n"; -exit 0; +exit; -- cgit v1.2.3 From 222a51bf99bc6b0bfc6be6ebc03159a0ad875e4f Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 13 May 2014 23:34:23 -0700 Subject: make_osx_package.sh: Use linker options to ensure OSX 10.6+ compatibility. --- make_osx_package.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/make_osx_package.sh b/make_osx_package.sh index 168119356..86ce784ed 100755 --- a/make_osx_package.sh +++ b/make_osx_package.sh @@ -29,8 +29,8 @@ echo Building pandoc... cabal clean # Use cpphs to avoid problems with clang cpp on ghc 7.8 osx: cabal install cpphs alex happy hsb2hs -cabal install --reinstall --flags="embed_data_files" --ghc-options '-pgmPcpphs -optP--cpp' -cabal install --reinstall --flags="embed_data_files" pandoc-citeproc --ghc-options '-pgmPcpphs -optP--cpp' +cabal install --ghc-options="-optl-mmacosx-version-min=10.6" --reinstall --flags="embed_data_files" --ghc-options '-pgmPcpphs -optP--cpp' +cabal install --ghc-options="-optl-mmacosx-version-min=10.6" --reinstall --flags="embed_data_files" pandoc-citeproc --ghc-options '-pgmPcpphs -optP--cpp' mkdir -p $DEST/bin mkdir -p $DEST/share/man/man1 -- cgit v1.2.3 From 9df589b9c5a4f2dcb19445239dfae41b54625330 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Wed, 14 May 2014 14:45:37 +0200 Subject: Introduce class HasLastStrPosition, generalize functions Both `ParserState` and `OrgParserState` keep track of the parser position at which the last string ended. This patch introduces a new class `HasLastStrPosition` and makes the above types instances of that class. This enables the generalization of functions updating the state or checking if one is right after a string. --- src/Text/Pandoc/Parsing.hs | 32 +++++++++++++++++++++++--------- src/Text/Pandoc/Readers/Markdown.hs | 11 +++-------- src/Text/Pandoc/Readers/Org.hs | 11 ++++------- 3 files changed, 30 insertions(+), 24 deletions(-) diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index d1e55cbc4..344f6c7ba 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -54,7 +54,6 @@ module Text.Pandoc.Parsing ( (>>~), withRaw, escaped, characterReference, - updateLastStrPos, anyOrderedListMarker, orderedListMarker, charRef, @@ -66,11 +65,14 @@ module Text.Pandoc.Parsing ( (>>~), testStringWith, guardEnabled, guardDisabled, + updateLastStrPos, + notAfterString, ParserState (..), HasReaderOptions (..), HasHeaderMap (..), HasIdentifierList (..), HasMacros (..), + HasLastStrPosition (..), defaultParserState, HeaderType (..), ParserContext (..), @@ -904,6 +906,14 @@ instance HasMacros ParserState where extractMacros = stateMacros updateMacros f st = st{ stateMacros = f $ stateMacros st } +class HasLastStrPosition st where + setLastStrPos :: SourcePos -> st -> st + getLastStrPos :: st -> Maybe SourcePos + +instance HasLastStrPosition ParserState where + setLastStrPos pos st = st{ stateLastStrPos = Just pos } + getLastStrPos st = stateLastStrPos st + defaultParserState :: ParserState defaultParserState = ParserState { stateOptions = def, @@ -938,6 +948,17 @@ guardEnabled ext = getOption readerExtensions >>= guard . Set.member ext guardDisabled :: HasReaderOptions st => Extension -> Parser s st () guardDisabled ext = getOption readerExtensions >>= guard . not . Set.member ext +-- | Update the position on which the last string ended. +updateLastStrPos :: HasLastStrPosition st => Parser s st () +updateLastStrPos = getPosition >>= updateState . setLastStrPos + +-- | Whether we are right after the end of a string. +notAfterString :: HasLastStrPosition st => Parser s st Bool +notAfterString = do + pos <- getPosition + st <- getState + return $ getLastStrPos st /= Just pos + data HeaderType = SingleHeader Char -- ^ Single line of characters underneath | DoubleHeader Char -- ^ Lines of characters above and below @@ -1049,17 +1070,11 @@ charOrRef cs = guard (c `elem` cs) return c) -updateLastStrPos :: Parser [Char] ParserState () -updateLastStrPos = getPosition >>= \p -> - updateState $ \s -> s{ stateLastStrPos = Just p } - singleQuoteStart :: Parser [Char] ParserState () singleQuoteStart = do failIfInQuoteContext InSingleQuote - pos <- getPosition - st <- getState -- single quote start can't be right after str - guard $ stateLastStrPos st /= Just pos + guard =<< notAfterString () <$ charOrRef "'\8216\145" singleQuoteEnd :: Parser [Char] st () @@ -1156,4 +1171,3 @@ applyMacros' target = do then do macros <- extractMacros `fmap` getState return $ applyMacros macros target else return target - diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index d1637b701..1ac98e94c 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1474,9 +1474,7 @@ strongOrEmph = enclosure '*' <|> (checkIntraword >> enclosure '_') where checkIntraword = do exts <- getOption readerExtensions when (Ext_intraword_underscores `Set.member` exts) $ do - pos <- getPosition - lastStrPos <- stateLastStrPos <$> getState - guard $ lastStrPos /= Just pos + guard =<< notAfterString -- | Parses a list of inlines between start and end delimiters. inlinesBetween :: (Show b) @@ -1518,8 +1516,7 @@ nonEndline = satisfy (/='\n') str :: MarkdownParser (F Inlines) str = do result <- many1 alphaNum - pos <- getPosition - updateState $ \s -> s{ stateLastStrPos = Just pos } + updateLastStrPos let spacesToNbr = map (\c -> if c == ' ' then '\160' else c) isSmart <- getOption readerSmart if isSmart @@ -1821,9 +1818,7 @@ citeKey :: MarkdownParser (Bool, String) citeKey = try $ do -- make sure we're not right after an alphanumeric, -- since foo@bar.baz is probably an email address - lastStrPos <- stateLastStrPos <$> getState - pos <- getPosition - guard $ lastStrPos /= Just pos + guard =<< notAfterString suppress_author <- option False (char '-' >> return True) char '@' first <- letter <|> char '_' diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 2e4a29beb..5dbcaee98 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -105,6 +105,10 @@ instance HasMeta OrgParserState where deleteMeta field st = st{ orgStateMeta = deleteMeta field $ orgStateMeta st } +instance HasLastStrPosition OrgParserState where + getLastStrPos = orgStateLastStrPos + setLastStrPos pos st = st{ orgStateLastStrPos = Just pos } + instance Default OrgParserState where def = defaultOrgParserState @@ -1274,13 +1278,6 @@ afterEmphasisPreChar = do lastPrePos <- orgStateLastPreCharPos <$> getState return . fromMaybe True $ (== pos) <$> lastPrePos --- | Whether we are right after the end of a string -notAfterString :: OrgParser Bool -notAfterString = do - pos <- getPosition - lastStrPos <- orgStateLastStrPos <$> getState - return $ lastStrPos /= Just pos - -- | Whether the parser is right after a forbidden border char notAfterForbiddenBorderChar :: OrgParser Bool notAfterForbiddenBorderChar = do -- cgit v1.2.3 From 2423f9e6b180bc6b04d222a4b574de995d296f80 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Wed, 14 May 2014 14:58:05 +0200 Subject: Move `citeKey` from Readers.Markdown to Parsing The function can be used by other readers, so it is made accessible for all parsers. --- src/Text/Pandoc/Parsing.hs | 13 +++++++++++++ src/Text/Pandoc/Readers/Markdown.hs | 14 -------------- 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index 344f6c7ba..4cd6591c0 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -94,6 +94,7 @@ module Text.Pandoc.Parsing ( (>>~), apostrophe, dash, nested, + citeKey, macro, applyMacros', Parser, @@ -1144,6 +1145,18 @@ nested p = do updateState $ \st -> st{ stateMaxNestingLevel = nestlevel } return res +citeKey :: HasLastStrPosition st => Parser [Char] st (Bool, String) +citeKey = try $ do + guard =<< notAfterString + suppress_author <- option False (char '-' *> return True) + char '@' + firstChar <- letter <|> char '_' + let regchar = satisfy (\c -> isAlphaNum c || c == '_') + let internal p = try $ p <* lookAhead regchar + rest <- many $ regchar <|> internal (oneOf ":.#$%&-+?<>~/") + let key = firstChar:rest + return (suppress_author, key) + -- -- Macros -- diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 1ac98e94c..5129bc2e3 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1814,20 +1814,6 @@ normalCite = try $ do char ']' return citations -citeKey :: MarkdownParser (Bool, String) -citeKey = try $ do - -- make sure we're not right after an alphanumeric, - -- since foo@bar.baz is probably an email address - guard =<< notAfterString - suppress_author <- option False (char '-' >> return True) - char '@' - first <- letter <|> char '_' - let regchar = satisfy (\c -> isAlphaNum c || c == '_') - let internal p = try $ p >>~ lookAhead regchar - rest <- many $ regchar <|> internal (oneOf ":.#$%&-+?<>~/") - let key = first:rest - return (suppress_author, key) - suffix :: MarkdownParser (F Inlines) suffix = try $ do hasSpace <- option False (notFollowedBy nonspaceChar >> return True) -- cgit v1.2.3 From ceeb701c254c6dc4c054e10dd151d9ef6f751ad7 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Wed, 14 May 2014 14:49:30 +0200 Subject: Org reader: support Pandocs citation extension Citations are defined via the "normal citation" syntax used in markdown, with the sole difference that newlines are not allowed between "[...]". This is for consistency, as org-mode generally disallows newlines between square brackets. The extension is turned on by default and can be turned off via the default syntax-extension mechanism, i.e. by specifying "org-citation" as the input format. Move `citeKey` from Readers.Markdown into Parsing The function can be used by other readers, so it is made accessible for all parsers. --- src/Text/Pandoc.hs | 2 +- src/Text/Pandoc/Readers/Org.hs | 55 ++++++++++++++++++++++++++++++++++++++++-- tests/Tests/Readers/Org.hs | 22 +++++++++++++++++ 3 files changed, 76 insertions(+), 3 deletions(-) diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs index dd5bc18f6..130338f0e 100644 --- a/src/Text/Pandoc.hs +++ b/src/Text/Pandoc.hs @@ -275,6 +275,7 @@ getDefaultExtensions "markdown_mmd" = multimarkdownExtensions getDefaultExtensions "markdown_github" = githubMarkdownExtensions getDefaultExtensions "markdown" = pandocExtensions getDefaultExtensions "plain" = pandocExtensions +getDefaultExtensions "org" = Set.fromList [Ext_citations] getDefaultExtensions "textile" = Set.fromList [Ext_auto_identifiers, Ext_raw_tex] getDefaultExtensions _ = Set.fromList [Ext_auto_identifiers] @@ -319,4 +320,3 @@ readJSON _ = either error id . eitherDecode' . UTF8.fromStringLazy writeJSON :: WriterOptions -> Pandoc -> String writeJSON _ = UTF8.toStringLazy . encode - diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 5dbcaee98..86dda2732 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -869,6 +869,7 @@ inline :: OrgParser (F Inlines) inline = choice [ whitespace , linebreak + , cite , footnote , linkOrImage , anchor @@ -933,6 +934,51 @@ endline = try $ do updateLastPreCharPos return . return $ B.space +cite :: OrgParser (F Inlines) +cite = try $ do + guardEnabled Ext_citations + (cs, raw) <- withRaw normalCite + return $ (flip B.cite (B.text raw)) <$> cs + +normalCite :: OrgParser (F [Citation]) +normalCite = try $ char '[' + *> skipSpaces + *> citeList + <* skipSpaces + <* char ']' + +citeList :: OrgParser (F [Citation]) +citeList = sequence <$> sepBy1 citation (try $ char ';' *> skipSpaces) + +citation :: OrgParser (F Citation) +citation = try $ do + pref <- prefix + (suppress_author, key) <- citeKey + suff <- suffix + return $ do + x <- pref + y <- suff + return $ Citation{ citationId = key + , citationPrefix = B.toList x + , citationSuffix = B.toList y + , citationMode = if suppress_author + then SuppressAuthor + else NormalCitation + , citationNoteNum = 0 + , citationHash = 0 + } + where + prefix = trimInlinesF . mconcat <$> + manyTill inline (char ']' <|> (']' <$ lookAhead citeKey)) + suffix = try $ do + hasSpace <- option False (notFollowedBy nonspaceChar >> return True) + skipSpaces + rest <- trimInlinesF . mconcat <$> + many (notFollowedBy (oneOf ";]") *> inline) + return $ if hasSpace + then (B.space <>) <$> rest + else rest + footnote :: OrgParser (F Inlines) footnote = try $ inlineNote <|> referencedNote @@ -1007,7 +1053,7 @@ selfTarget :: OrgParser String selfTarget = try $ char '[' *> linkTarget <* char ']' linkTarget :: OrgParser String -linkTarget = enclosed (char '[') (char ']') (noneOf "\n\r[]") +linkTarget = enclosedByPair '[' ']' (noneOf "\n\r[]") applyCustomLinkFormat :: String -> OrgParser (F String) applyCustomLinkFormat link = do @@ -1083,7 +1129,12 @@ inlineCodeBlock = try $ do let attrClasses = [translateLang lang, rundocBlockClass] let attrKeyVal = map toRundocAttrib (("language", lang) : opts) returnF $ B.codeWith ("", attrClasses, attrKeyVal) inlineCode - where enclosedByPair s e p = char s *> many1Till p (char e) + +enclosedByPair :: Char -- ^ opening char + -> Char -- ^ closing char + -> OrgParser a -- ^ parser + -> OrgParser [a] +enclosedByPair s e p = char s *> many1Till p (char e) emph :: OrgParser (F Inlines) emph = fmap B.emph <$> emphasisBetween '/' diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index 4ef7a7731..ca97ba348 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -225,6 +225,28 @@ tests = ] ) "echo 'Hello, World'") + + , "Citation" =: + "[@nonexistent]" =?> + let citation = Citation + { citationId = "nonexistent" + , citationPrefix = [] + , citationSuffix = [] + , citationMode = NormalCitation + , citationNoteNum = 0 + , citationHash = 0} + in (para $ cite [citation] "[@nonexistent]") + + , "Citation containing text" =: + "[see @item1 p. 34-35]" =?> + let citation = Citation + { citationId = "item1" + , citationPrefix = [Str "see"] + , citationSuffix = [Space ,Str "p.",Space,Str "34-35"] + , citationMode = NormalCitation + , citationNoteNum = 0 + , citationHash = 0} + in (para $ cite [citation] "[see @item1 p. 34-35]") ] , testGroup "Meta Information" $ -- cgit v1.2.3 From 034cd4bb214a30de3739c756eab428d5fbe617cc Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 14 May 2014 13:39:31 -0700 Subject: osx package: Include uninstall script in zip file. Don't put it in $PREFIX/bin as before. --- make_osx_package.sh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/make_osx_package.sh b/make_osx_package.sh index 86ce784ed..f546396b7 100755 --- a/make_osx_package.sh +++ b/make_osx_package.sh @@ -40,7 +40,6 @@ for f in $EXES; do cp $SANDBOX/share/man/man1/$f.1 $DEST/share/man/man1/ done cp $SANDBOX/share/man/man5/pandoc_markdown.5 $DEST/share/man/man5/ -cp $OSX/uninstall-pandoc.pl $DEST/bin/ chown -R $ME:staff $DIST # gzip $DEST/share/man/man?/*.* @@ -77,7 +76,7 @@ sudo codesign --force --sign "$CODESIGNID" $BASE.pkg spctl --assess --type install $BASE.pkg echo Creating zip... -zip -9 -r $BASE.pkg.zip $BASE.pkg +zip -9 -j -r $BASE.pkg.zip $BASE.pkg $OSX/uninstall-pandoc.pl # echo Creating disk image... # sudo hdiutil create "$BASE.dmg" \ -- cgit v1.2.3 From cf533a1c2c88607ca57fb20325a9d121f8426a00 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 14 May 2014 13:46:09 -0700 Subject: Bump version to 1.12.4.2. --- pandoc.cabal | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandoc.cabal b/pandoc.cabal index f29ee8fb1..9a9e6f539 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -1,5 +1,5 @@ Name: pandoc -Version: 1.12.4.1 +Version: 1.12.4.2 Cabal-Version: >= 1.10 Build-Type: Custom License: GPL -- cgit v1.2.3 From 8b9fafc0e482617c9b0011c8ddb672c6b272f247 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 14 May 2014 13:46:18 -0700 Subject: Updated changelog. --- changelog | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/changelog b/changelog index e58f5516d..e012dabbf 100644 --- a/changelog +++ b/changelog @@ -1,4 +1,4 @@ -pandoc (1.12.4.1) +pandoc (1.12.4.2) * Require highlighting-kate >= 0.5.8. Fixes a performance regression. @@ -16,13 +16,31 @@ pandoc (1.12.4.1) + Support code block headers (`#+BEGIN_SRC ...`) (Albert Krewinkel). + Fix parsing of blank lines within blocks (Albert Krewinkel). + + Support pandoc citation extension (Albert Krewinkel). This can + be turned off by specifying `org-citation` as the input format. + + * Markdown reader: + + + `citeKey` moved to `Text.Pandoc.Parsing` so it can be used by + other readers (Albert Krewinkel). + + * `Text.Pandoc.Parsing`: + + + Added `citeKey` (see above). + + Added `HasLastStrPosition` type class and `updateLastStrPos` + and `notAfterString` functions. * Updated copyright notices (Albert Krewinkel). * Added default.icml to data files so it installs with the package. - * Moved OSX package materials to osx directory. Added uninstall - script (thanks to Daniel T. Staal). + * OSX package: + + + The binary is now built with options to ensure that it can be + used with OSX 10.6+. + + Moved OSX package materials to osx directory. + + Added OSX package uninstall script, included in the zip container + (thanks to Daniel T. Staal). pandoc (1.12.4) -- cgit v1.2.3 From 256390b3f9f535bd29105e73855a6220669a8193 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 14 May 2014 14:23:17 -0700 Subject: Fixed 034cd4bb214a30de3739c756eab428d5fbe617cc. Preserve directory structure for pkg. --- make_osx_package.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/make_osx_package.sh b/make_osx_package.sh index f546396b7..7e9d10c0e 100755 --- a/make_osx_package.sh +++ b/make_osx_package.sh @@ -76,7 +76,8 @@ sudo codesign --force --sign "$CODESIGNID" $BASE.pkg spctl --assess --type install $BASE.pkg echo Creating zip... -zip -9 -j -r $BASE.pkg.zip $BASE.pkg $OSX/uninstall-pandoc.pl +zip -9 -r $BASE.pkg.zip $BASE.pkg +zip -9 -j -r $BASE.pkg.zip $OSX/uninstall-pandoc.pl # echo Creating disk image... # sudo hdiutil create "$BASE.dmg" \ -- cgit v1.2.3 From c5c9b0d2890699ee8fcdbb660662957f8efad319 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 15 May 2014 10:11:48 -0700 Subject: EPUB writer: Fixed regression on cover image. In 1.12.4 and 1.12.4.2, the cover image would not appear properly, because the metadata id was not correct. This was introduced by the fix to #1254. Now we derive the id from the actual cover image filename, which we preserve rather than using "cover-image." --- src/Text/Pandoc/Writers/EPUB.hs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index 893ec3be9..4d2a39846 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -333,7 +333,7 @@ writeEPUB opts doc@(Pandoc meta _) = do case epubCoverImage metadata of Nothing -> return ([],[]) Just img -> do - let coverImage = "cover-image" ++ takeExtension img + let coverImage = "media/" ++ takeFileName img let cpContent = renderHtml $ writeHtml opts' (Pandoc meta [RawBlock (Format "html") $ "
\n\"cover\n
"]) imgContent <- B.readFile img @@ -561,8 +561,8 @@ writeEPUB opts doc@(Pandoc meta _) = do ,("content", "0")] $ () ] ++ case epubCoverImage metadata of Nothing -> [] - Just _ -> [unode "meta" ! [("name","cover"), - ("content","cover-image")] $ ()] + Just img -> [unode "meta" ! [("name","cover"), + ("content", toId img)] $ ()] , unode "docTitle" $ unode "text" $ plainTitle , unode "navMap" $ tpNode : evalState (mapM (navPointNode navMapFormatter) secs) 1 @@ -657,8 +657,8 @@ metadataElement version md currentTime = coverageNodes = maybe [] (dcTag' "coverage") $ epubCoverage md rightsNodes = maybe [] (dcTag' "rights") $ epubRights md coverImageNodes = maybe [] - (const $ [unode "meta" ! [("name","cover"), - ("content","cover-image")] $ ()]) + (\img -> [unode "meta" ! [("name","cover"), + ("content",toId img)] $ ()]) $ epubCoverImage md modifiedNodes = [ unode "meta" ! [("property", "dcterms:modified")] $ (showDateTimeISO8601 currentTime) | version == EPUB3 ] -- cgit v1.2.3 From 8de5daed57b2d588fa53d939a9e04864b3e3527a Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 15 May 2014 12:58:30 -0700 Subject: Require highlighting-kate 0.5.8.1. 0.5.8 has a serious bug that causes error failure with ocaml and fsharp. --- pandoc.cabal | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandoc.cabal b/pandoc.cabal index 9a9e6f539..ec32c8c35 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -227,7 +227,7 @@ Library tagsoup >= 0.13.1 && < 0.14, base64-bytestring >= 0.1 && < 1.1, zlib >= 0.5 && < 0.6, - highlighting-kate >= 0.5.8 && < 0.6, + highlighting-kate >= 0.5.8.1 && < 0.6, data-default >= 0.4 && < 0.6, temporary >= 1.1 && < 1.3, blaze-html >= 0.5 && < 0.8, @@ -328,7 +328,7 @@ Executable pandoc text >= 0.11 && < 1.2, bytestring >= 0.9 && < 0.11, extensible-exceptions >= 0.1 && < 0.2, - highlighting-kate >= 0.5.8 && < 0.6, + highlighting-kate >= 0.5.8.1 && < 0.6, aeson >= 0.7 && < 0.8, yaml >= 0.8.8.2 && < 0.9, containers >= 0.1 && < 0.6, @@ -371,7 +371,7 @@ Test-Suite test-pandoc directory >= 1 && < 1.3, filepath >= 1.1 && < 1.4, process >= 1 && < 1.3, - highlighting-kate >= 0.5.8 && < 0.6, + highlighting-kate >= 0.5.8.1 && < 0.6, Diff >= 0.2 && < 0.4, test-framework >= 0.3 && < 0.9, test-framework-hunit >= 0.2 && < 0.4, -- cgit v1.2.3 From 46e7bcae69c7161541bf83df608f93bd27595c9c Mon Sep 17 00:00:00 2001 From: Michael Snoyman Date: Fri, 16 May 2014 09:32:24 +0300 Subject: Allow scientific 0.3 --- pandoc.cabal | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandoc.cabal b/pandoc.cabal index ec32c8c35..8ce77c8f0 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -234,7 +234,7 @@ Library blaze-markup >= 0.5.1 && < 0.7, attoparsec >= 0.10 && < 0.12, yaml >= 0.8.8.2 && < 0.9, - scientific >= 0.2 && < 0.3, + scientific >= 0.2 && < 0.4, vector >= 0.10 && < 0.11, hslua >= 0.3 && < 0.4, binary >= 0.5 && < 0.8 -- cgit v1.2.3 From cb6879cd2f8b59b90ff1781432bdf855c7c1d7a5 Mon Sep 17 00:00:00 2001 From: Phillip Alday Date: Fri, 16 May 2014 17:36:01 +0200 Subject: fixed escape for literal paragraph --- README | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README b/README index 59243c2df..a2169e5ff 100644 --- a/README +++ b/README @@ -2498,8 +2498,8 @@ citing them in the body text, you can define a dummy `nocite` metadata field and put the citations there: --- - nocite: - | @item1, @item2 + nocite: | + @item1, @item2 ... @item3 -- cgit v1.2.3 From 650f83194914845104aa20c2a3487ee5d9fdf77f Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 16 May 2014 09:50:30 -0700 Subject: travis: Test with ghc 7.8.2 rather than 7.8.1. --- .travis.yml | 2 +- pandoc.cabal | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 9c8f18188..23627b1b0 100644 --- a/.travis.yml +++ b/.travis.yml @@ -5,7 +5,7 @@ env: # - GHCVER=6.12.3 - GHCVER=7.4.2 - GHCVER=7.6.3 - - GHCVER=7.8.1 # see note about Alex/Happy + - GHCVER=7.8.2 # see note about Alex/Happy # - GHCVER=head # see section about GHC HEAD snapshots # Note: the distinction between `before_install` and `install` is not important. diff --git a/pandoc.cabal b/pandoc.cabal index 8ce77c8f0..6f12ec375 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -11,7 +11,7 @@ Bug-Reports: https://github.com/jgm/pandoc/issues Stability: alpha Homepage: http://johnmacfarlane.net/pandoc Category: Text -Tested-With: GHC == 7.4.2, GHC == 7.6.3, GHC == 7.8.1 +Tested-With: GHC == 7.4.2, GHC == 7.6.3, GHC == 7.8.2 Synopsis: Conversion between markup formats Description: Pandoc is a Haskell library for converting from one markup format to another, and a command-line tool that uses -- cgit v1.2.3 From eabaa625fe91541f67e35feac0d0e31479447101 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 16 May 2014 10:34:20 -0700 Subject: Fixed travis build to install alex for GHC 7.8.2. --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 23627b1b0..7f24986d2 100644 --- a/.travis.yml +++ b/.travis.yml @@ -18,7 +18,7 @@ before_install: install: - cabal-1.18 update - | - if [ $GHCVER = "head" ] || [ $GHCVER = "7.8.1" ]; then + if [ $GHCVER = "head" ] || [ $GHCVER = "7.8.2" ]; then cabal-1.18 install happy alex export PATH=$HOME/.cabal/bin:$PATH fi -- cgit v1.2.3 From 06da7be1fdb6dd80238c0131a8d1c903899d5232 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 16 May 2014 21:51:25 -0700 Subject: make_osx_package: Call zip file pandoc-VERSION-osx.zip. The zip should not be named SOMETHING.pkg.zip, or OSX finder will extract it into a folder named SOMETHING.pkg, which it will interpret as a defective package. Closes #1308. --- make_osx_package.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/make_osx_package.sh b/make_osx_package.sh index 7e9d10c0e..4e93a1a03 100755 --- a/make_osx_package.sh +++ b/make_osx_package.sh @@ -76,8 +76,8 @@ sudo codesign --force --sign "$CODESIGNID" $BASE.pkg spctl --assess --type install $BASE.pkg echo Creating zip... -zip -9 -r $BASE.pkg.zip $BASE.pkg -zip -9 -j -r $BASE.pkg.zip $OSX/uninstall-pandoc.pl +zip -9 -r $BASE-osx.zip $BASE.pkg +zip -9 -j -r $BASE-osx.zip $OSX/uninstall-pandoc.pl # echo Creating disk image... # sudo hdiutil create "$BASE.dmg" \ -- cgit v1.2.3 From 0915967d840ad453027fb30a00b69401d3d8b50e Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 16 May 2014 21:55:07 -0700 Subject: Windows build script: add -windows to file name. --- windows/make-windows-installer.bat | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/windows/make-windows-installer.bat b/windows/make-windows-installer.bat index 4b17b449f..f639b9405 100644 --- a/windows/make-windows-installer.bat +++ b/windows/make-windows-installer.bat @@ -30,7 +30,7 @@ cd windows echo Creating msi... candle -dVERSION=%VERSION% pandoc.wxs if %errorlevel% neq 0 exit /b %errorlevel% -light -sw1076 -ext WixUIExtension -out pandoc-%VERSION%.msi pandoc.wixobj +light -sw1076 -ext WixUIExtension -out pandoc-%VERSION%-windows.msi pandoc.wixobj if %errorlevel% neq 0 exit /b %errorlevel% echo Starting kSign: sign, then quit kSign to complete the build... kSign -- cgit v1.2.3 From 4c6aaec56520fea67a73aaad4f0f71746bffe2f5 Mon Sep 17 00:00:00 2001 From: Michael Thompson Date: Sat, 17 May 2014 13:48:31 -0400 Subject: no need to hide 'catch' This doesn't normally cause a problem because of some ghc workaround special to this case, but I was able to trigger an error with a complicated mixture of sandboxing, directing `cabal` to a locally installed ghc, and something else. `catch` isn't actually used in the file, so it seems it might as well go. --- Setup.hs | 1 - 1 file changed, 1 deletion(-) diff --git a/Setup.hs b/Setup.hs index f5d18eee4..c1c3f6472 100644 --- a/Setup.hs +++ b/Setup.hs @@ -29,7 +29,6 @@ import Distribution.Verbosity ( Verbosity, silent ) import Distribution.Simple.InstallDirs (mandir, CopyDest (NoCopyDest), toPathTemplate) import Distribution.Simple.Utils (installOrdinaryFiles, info) import Distribution.Simple.Test (test) -import Prelude hiding (catch) import System.Process ( rawSystem ) import System.FilePath ( () ) import System.Directory ( findExecutable ) -- cgit v1.2.3 From ee8c8da8ccfc7e3eb33679fd8a3a465766f9d5f7 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 18 May 2014 22:04:39 -0700 Subject: Removed dependency on conduit. * http-conduit flag is now https. * Instead of http-conduit, we depend on http-client and http-client-tls. --- INSTALL | 5 ++--- pandoc.cabal | 11 ++++++----- src/Text/Pandoc/Shared.hs | 14 ++++++++------ 3 files changed, 16 insertions(+), 14 deletions(-) diff --git a/INSTALL b/INSTALL index 9442adbf0..f3366e103 100644 --- a/INSTALL +++ b/INSTALL @@ -116,9 +116,8 @@ assume that the pandoc source directory is your working directory. cabal install hsb2hs - - `http-conduit`: use the `http-conduit` library to fetch external - resources (default yes -- without this, pandoc cannot make SSL - connections) + - `https`: enable support for downloading resources over https + (using the `http-client` and `http-client-tls` libraries). 3. Build: diff --git a/pandoc.cabal b/pandoc.cabal index 6f12ec375..1741c59f6 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -196,8 +196,8 @@ Flag embed_data_files Description: Embed data files in binary for relocatable executable. Default: False -Flag http-conduit - Description: Enable downloading of resources over https. +Flag https + Description: Enable support for downloading of resources over https. Default: True Library @@ -239,10 +239,11 @@ Library hslua >= 0.3 && < 0.4, binary >= 0.5 && < 0.8 Build-Tools: alex, happy - if flag(http-conduit) - Build-Depends: http-conduit >= 1.9 && < 2.2, + if flag(https) + Build-Depends: http-client >= 0.3.2 && < 0.4, + http-client-tls >= 0.2 && < 0.3, http-types >= 0.8 && < 0.9 - cpp-options: -DHTTP_CONDUIT + cpp-options: -DHTTP_CLIENT if flag(embed_data_files) cpp-options: -DEMBED_DATA_FILES -- Build-Tools: hsb2hs -- not yet recognized by cabal diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 4f506b5a6..d8cbe46d9 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -118,11 +118,13 @@ import System.FilePath ( joinPath, splitDirectories ) #else import Paths_pandoc (getDataFileName) #endif -#ifdef HTTP_CONDUIT +#ifdef HTTP_CLIENT import Data.ByteString.Lazy (toChunks) -import Network.HTTP.Conduit (httpLbs, parseUrl, withManager, - responseBody, responseHeaders, addProxy, - Request(port,host)) +import Network.HTTP.Client (httpLbs, parseUrl, withManager, + responseBody, responseHeaders, + Request(port,host)) +import Network.HTTP.Client.Internal (addProxy) +import Network.HTTP.Client.TLS (tlsManagerSettings) import System.Environment (getEnv) import Network.HTTP.Types.Header ( hContentType) import Network (withSocketsDo) @@ -665,7 +667,7 @@ openURL u let mime = takeWhile (/=',') $ drop 5 u contents = B8.pack $ unEscapeString $ drop 1 $ dropWhile (/=',') u in return $ Right (decodeLenient contents, Just mime) -#ifdef HTTP_CONDUIT +#ifdef HTTP_CLIENT | otherwise = withSocketsDo $ E.try $ do req <- parseUrl u (proxy :: Either E.SomeException String) <- E.try $ getEnv "http_proxy" @@ -674,7 +676,7 @@ openURL u Right pr -> case parseUrl pr of Just r -> addProxy (host r) (port r) req Nothing -> req - resp <- withManager $ httpLbs req' + resp <- withManager tlsManagerSettings $ httpLbs req' return (BS.concat $ toChunks $ responseBody resp, UTF8.toString `fmap` lookup hContentType (responseHeaders resp)) #else -- cgit v1.2.3 From 8d04c821aaa8a96803126140b58b8aef02d85906 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 19 May 2014 09:45:00 -0700 Subject: Avoid `import Prelude hiding (catch)`. See #1309. --- src/Text/Pandoc/UTF8.hs | 3 +-- src/Text/Pandoc/Writers/EPUB.hs | 10 +++------- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/src/Text/Pandoc/UTF8.hs b/src/Text/Pandoc/UTF8.hs index 33c9ec1c5..cf25de85b 100644 --- a/src/Text/Pandoc/UTF8.hs +++ b/src/Text/Pandoc/UTF8.hs @@ -51,8 +51,7 @@ import System.IO hiding (readFile, writeFile, getContents, #if MIN_VERSION_base(4,6,0) import Prelude hiding (readFile, writeFile, getContents, putStr, putStrLn) #else -import Prelude hiding (readFile, writeFile, getContents, putStr, putStrLn, - catch) +import Prelude hiding (readFile, writeFile, getContents, putStr, putStrLn) #endif import qualified System.IO as IO import qualified Data.ByteString.Char8 as B diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index 4d2a39846..9514e87c9 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -59,11 +59,7 @@ import Text.Pandoc.Writers.Markdown ( writePlain ) import Data.Char ( toLower, isDigit, isAlphaNum ) import Network.URI ( unEscapeString ) import Text.Pandoc.MIME (getMimeType) -#if MIN_VERSION_base(4,6,0) -#else -import Prelude hiding (catch) -#endif -import Control.Exception (catch, SomeException) +import qualified Control.Exception as E import Text.Blaze.Html.Renderer.Utf8 (renderHtml) import Text.HTML.TagSoup @@ -153,10 +149,10 @@ getEPUBMetadata opts meta = do then case lookup "lang" (writerVariables opts) of Just x -> return m{ epubLanguage = x } Nothing -> do - localeLang <- catch (liftM + localeLang <- E.catch (liftM (map (\c -> if c == '_' then '-' else c) . takeWhile (/='.')) $ getEnv "LANG") - (\e -> let _ = (e :: SomeException) in return "en-US") + (\e -> let _ = (e :: E.SomeException) in return "en-US") return m{ epubLanguage = localeLang } else return m let fixDate m = -- cgit v1.2.3 From 3c77ab98bf9a055237a69be48001f5c6ef1d64ca Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 19 May 2014 12:52:25 -0700 Subject: EPUB writer: Handle multiple dates with OPF `event` attributes. Note: in EPUB3 we can have only one dc:date, so only the first one is used. --- src/Text/Pandoc/Writers/EPUB.hs | 47 +++++++++++++++++++++++++++++++++-------- 1 file changed, 38 insertions(+), 9 deletions(-) diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index 9514e87c9..b6687c330 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -72,7 +72,7 @@ data Chapter = Chapter (Maybe [Int]) [Block] data EPUBMetadata = EPUBMetadata{ epubIdentifier :: [Identifier] , epubTitle :: [Title] - , epubDate :: String + , epubDate :: [Date] , epubLanguage :: String , epubCreator :: [Creator] , epubContributor :: [Creator] @@ -93,6 +93,11 @@ data Stylesheet = StylesheetPath FilePath | StylesheetContents String deriving Show +data Date = Date{ + dateText :: String + , dateEvent :: Maybe String + } deriving Show + data Creator = Creator{ creatorText :: String , creatorRole :: Maybe String @@ -159,7 +164,9 @@ getEPUBMetadata opts meta = do if null (epubDate m) then do currentTime <- getCurrentTime - return $ m{ epubDate = showDateTimeISO8601 currentTime } + return $ m{ epubDate = [ Date{ + dateText = showDateTimeISO8601 currentTime + , dateEvent = Nothing } ] } else return m let addAuthor m = if any (\c -> creatorRole c == Just "aut") $ epubCreator m @@ -183,8 +190,10 @@ addMetadataFromXML e@(Element (QName name _ (Just "dc")) attrs _ _) md , titleFileAs = getAttr "file-as" , titleType = getAttr "type" } : epubTitle md } - | name == "date" = md{ epubDate = fromMaybe "" $ normalizeDate' - $ strContent e } + | name == "date" = md{ epubDate = + Date{ dateText = fromMaybe "" $ normalizeDate' $ strContent e + , dateEvent = getAttr "event" + } : epubDate md } | name == "language" = md{ epubLanguage = strContent e } | name == "creator" = md{ epubCreator = Creator{ creatorText = strContent e @@ -249,6 +258,16 @@ getCreator s meta = getList s meta handleMetaValue , creatorRole = metaValueToString <$> M.lookup "role" m } handleMetaValue mv = Creator (metaValueToString mv) Nothing Nothing +getDate :: String -> Meta -> [Date] +getDate s meta = getList s meta handleMetaValue + where handleMetaValue (MetaMap m) = + Date{ dateText = maybe "" id $ + M.lookup "text" m >>= normalizeDate' . metaValueToString + , dateEvent = metaValueToString <$> M.lookup "event" m } + handleMetaValue mv = Date { dateText = maybe "" + id $ normalizeDate' $ metaValueToString mv + , dateEvent = Nothing } + simpleList :: String -> Meta -> [String] simpleList s meta = case lookupMeta s meta of @@ -278,8 +297,7 @@ metadataFromMeta opts meta = EPUBMetadata{ } where identifiers = getIdentifier meta titles = getTitle meta - date = fromMaybe "" $ - (metaValueToString <$> lookupMeta "date" meta) >>= normalizeDate' + date = getDate "date" meta language = maybe "" metaValueToString $ lookupMeta "language" meta `mplus` lookupMeta "lang" meta creators = getCreator "creator" meta @@ -637,7 +655,14 @@ metadataElement version md currentTime = identifierNodes = withIds "epub-id" toIdentifierNode $ epubIdentifier md titleNodes = withIds "epub-title" toTitleNode $ epubTitle md - dateNodes = dcTag' "date" $ epubDate md + dateNodes = if version == EPUB2 + then withIds "epub-date" toDateNode $ epubDate md + else -- epub3 allows only one dc:date + -- http://www.idpf.org/epub/30/spec/epub30-publications.html#sec-opf-dcdate + case epubDate md of + [] -> [] + (x:_) -> [dcNode "date" ! [("id","epub-date")] + $ dateText x] languageNodes = [dcTag "language" $ epubLanguage md] creatorNodes = withIds "epub-creator" (toCreatorNode "creator") $ epubCreator md @@ -671,7 +696,7 @@ metadataElement version md currentTime = (schemeToOnix `fmap` scheme) toCreatorNode s id' creator | version == EPUB2 = [dcNode s ! - ([("id",id')] ++ + (("id",id') : maybe [] (\x -> [("opf:file-as",x)]) (creatorFileAs creator) ++ maybe [] (\x -> [("opf:role",x)]) (creatorRole creator >>= toRelator)) $ creatorText creator] @@ -685,7 +710,7 @@ metadataElement version md currentTime = (creatorRole creator >>= toRelator) toTitleNode id' title | version == EPUB2 = [dcNode "title" ! - ([("id",id')] ++ + (("id",id') : maybe [] (\x -> [("opf:file-as",x)]) (titleFileAs title) ++ maybe [] (\x -> [("opf:title-type",x)]) (titleType title)) $ titleText title] @@ -697,6 +722,10 @@ metadataElement version md currentTime = maybe [] (\x -> [unode "meta" ! [("refines",'#':id'),("property","title-type")] $ x]) (titleType title) + toDateNode id' date = [dcNode "date" ! + (("id",id') : + maybe [] (\x -> [("opf:event",x)]) (dateEvent date)) $ + dateText date] schemeToOnix "ISBN-10" = "02" schemeToOnix "GTIN-13" = "03" schemeToOnix "UPC" = "04" -- cgit v1.2.3 From 3238a2f9191b83864abd682261634a603ec89056 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Tue, 20 May 2014 22:29:21 +0200 Subject: Org reader: support for inline LaTeX Inline LaTeX is now accepted and parsed by the org-mode reader. Both, math symbols (like \tau) and LaTeX commands (like \cite{Coffee}), can be used without any further escaping. --- src/Text/Pandoc/Readers/LaTeX.hs | 1 + src/Text/Pandoc/Readers/Org.hs | 32 +++++++++++++++++++++++++++++++- tests/Tests/Readers/Org.hs | 27 +++++++++++++++++++++++++++ 3 files changed, 59 insertions(+), 1 deletion(-) diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 6f870318f..7fc587882 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -31,6 +31,7 @@ Conversion of LaTeX to 'Pandoc' document. module Text.Pandoc.Readers.LaTeX ( readLaTeX, rawLaTeXInline, rawLaTeXBlock, + inlineCommand, handleIncludes ) where diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 86dda2732..c3ea8d7c2 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -39,12 +39,15 @@ import Text.Pandoc.Parsing hiding ( F, unF, askF, asksF, runF , newline, orderedListMarker , parseFromString , updateLastStrPos ) +import Text.Pandoc.Readers.LaTeX (inlineCommand, rawLaTeXInline) import Text.Pandoc.Shared (compactify', compactify'DL) +import Text.Parsec.Pos (updatePosString) +import Text.TeXMath (texMathToPandoc, DisplayType(..)) import Control.Applicative ( Applicative, pure , (<$>), (<$), (<*>), (<*), (*>), (<**>) ) import Control.Arrow (first) -import Control.Monad (foldM, guard, liftM, liftM2, mzero, when) +import Control.Monad (foldM, guard, liftM, liftM2, mplus, mzero, when) import Control.Monad.Reader (Reader, runReader, ask, asks) import Data.Char (isAlphaNum, toLower) import Data.Default @@ -886,6 +889,7 @@ inline = , verbatim , subscript , superscript + , inlineLaTeX , symbol ] <* (guard =<< newlinesCountWithinLimits) "inline" @@ -1351,3 +1355,29 @@ simpleSubOrSuperString = try $ , mappend <$> option [] ((:[]) <$> oneOf "+-") <*> many1 alphaNum ] + +inlineLaTeX :: OrgParser (F Inlines) +inlineLaTeX = try $ do + cmd <- inlineLaTeXCommand + maybe mzero returnF $ parseAsMath cmd `mplus` parseAsInlineLaTeX cmd + where + parseAsMath :: String -> Maybe Inlines + parseAsMath cs = maybeRight $ B.fromList <$> texMathToPandoc DisplayInline cs + + parseAsInlineLaTeX :: String -> Maybe Inlines + parseAsInlineLaTeX cs = maybeRight $ runParser inlineCommand state "" cs + + state :: ParserState + state = def{ stateOptions = def{ readerParseRaw = True }} + +maybeRight :: Either a b -> Maybe b +maybeRight = either (const Nothing) Just + +inlineLaTeXCommand :: OrgParser String +inlineLaTeXCommand = try $ do + rest <- getInput + pos <- getPosition + case runParser rawLaTeXInline def "source" rest of + Right (RawInline _ cs) -> cs <$ (setInput $ drop (length cs) rest) + <* (setPosition $ updatePosString pos cs) + _ -> mzero diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index ca97ba348..4ed77887f 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -247,6 +247,33 @@ tests = , citationNoteNum = 0 , citationHash = 0} in (para $ cite [citation] "[see @item1 p. 34-35]") + + , "Inline LaTeX symbol" =: + "\\dots" =?> + para "…" + + , "Inline LaTeX command" =: + "\\textit{Emphasised}" =?> + para (emph "Emphasised") + + , "Inline LaTeX math symbol" =: + "\\tau" =?> + para (emph "τ") + + , "Unknown inline LaTeX command" =: + "\\notacommand{foo}" =?> + para (rawInline "latex" "\\notacommand{foo}") + + , "LaTeX citation" =: + "\\cite{Coffee}" =?> + let citation = Citation + { citationId = "Coffee" + , citationPrefix = [] + , citationSuffix = [] + , citationMode = AuthorInText + , citationNoteNum = 0 + , citationHash = 0} + in (para . cite [citation] $ rawInline "latex" "\\cite{Coffee}") ] , testGroup "Meta Information" $ -- cgit v1.2.3 From f1cd6af5ec091c7077468c13640e2a20d799566e Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 27 May 2014 10:32:46 -0700 Subject: Updated date on README. --- README | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README b/README index a2169e5ff..73c937301 100644 --- a/README +++ b/README @@ -1,6 +1,6 @@ % Pandoc User's Guide % John MacFarlane -% January 19, 2013 +% May 16, 2014 Synopsis ======== -- cgit v1.2.3 From 265f0e3da12f9d8f7c9d3677a71b387fb5e6ff5f Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 27 May 2014 10:39:13 -0700 Subject: Fixed documentation of attributes. Closes #1315. --- README | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/README b/README index 73c937301..5f82f94b8 100644 --- a/README +++ b/README @@ -971,10 +971,8 @@ of the line containing the header text: {#identifier .class .class key=value key=value} -Although this syntax allows assignment of classes and key/value attributes, -only identifiers currently have any affect in the writers (and only in some -writers: HTML, LaTeX, ConTeXt, Textile, AsciiDoc). Thus, for example, -the following headers will all be assigned the identifier `foo`: +Thus, for example, the following headers will all be assigned the identifier +`foo`: # My header {#foo} @@ -985,6 +983,12 @@ the following headers will all be assigned the identifier `foo`: (This syntax is compatible with [PHP Markdown Extra].) +Note that although this syntax allows assignment of classes and key/value +attributes, writers generally don't use all of this information. Identifiers, +classes, and key/value attributes are used in HTML and HTML-based formats such +as EPUB and slidy. Identifiers are used for labels and link anchors in the +LaTeX, ConTeXt, Textile, and AsciiDoc writers. + Headers with the class `unnumbered` will not be numbered, even if `--number-sections` is specified. A single hyphen (`-`) in an attribute context is equivalent to `.unnumbered`, and preferable in non-English -- cgit v1.2.3 From fd11a5a5eb51d54f0d3ee91e859a63854f465971 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 27 May 2014 11:22:53 -0700 Subject: Require latest zip-archive. This has fixes for unicode path names. Note that compiling pandoc against zip-archive 0.2.3 or 0.2.3.1 will lead to invalid zip containers, causing LibreOffice (e.g.) to regard ODTs as corrupt. --- pandoc.cabal | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandoc.cabal b/pandoc.cabal index 1741c59f6..f7d7d9c7e 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -214,7 +214,7 @@ Library directory >= 1 && < 1.3, bytestring >= 0.9 && < 0.11, text >= 0.11 && < 1.2, - zip-archive >= 0.1.3.3 && < 0.3, + zip-archive >= 0.2.3.2 && < 0.3, old-locale >= 1 && < 1.1, time >= 1.2 && < 1.5, HTTP >= 4000.0.5 && < 4000.3, -- cgit v1.2.3 From 2e80613451651ec8f1945daa7540168a427f0507 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 27 May 2014 11:59:28 -0700 Subject: Markdown reader: inline math must have nonspace before final `$`. Closes #1313. --- src/Text/Pandoc/Parsing.hs | 10 ++++++---- tests/markdown-reader-more.native | 2 ++ tests/markdown-reader-more.txt | 4 ++++ 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index 4cd6591c0..8bc042e28 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -464,11 +464,13 @@ mathInlineWith :: String -> String -> Parser [Char] st String mathInlineWith op cl = try $ do string op notFollowedBy space - words' <- many1Till (count 1 (noneOf "\n\\") + words' <- many1Till (count 1 (noneOf " \t\n\\") <|> (char '\\' >> anyChar >>= \c -> return ['\\',c]) - <|> count 1 newline <* notFollowedBy' blankline - *> return " ") - (try $ string cl) + <|> do (blankline <* notFollowedBy' blankline) <|> + (oneOf " \t" <* skipMany (oneOf " \t")) + notFollowedBy (char '$') + return " " + ) (try $ string cl) notFollowedBy digit -- to prevent capture of $5 return $ concat words' diff --git a/tests/markdown-reader-more.native b/tests/markdown-reader-more.native index 0d74c233d..b4713bc93 100644 --- a/tests/markdown-reader-more.native +++ b/tests/markdown-reader-more.native @@ -16,6 +16,8 @@ ,Header 3 ("my-header",[],[]) [Str "my",Space,Str "header"] ,Header 2 ("in-math",[],[]) [Str "$",Space,Str "in",Space,Str "math"] ,Para [Math InlineMath "\\$2 + \\$3"] +,Para [Str "This",Space,Str "should",Space,Str "not",Space,Str "be",Space,Str "math:"] +,Para [Str "$PATH",Space,Str "90",Space,Str "$PATH"] ,Header 2 ("commented-out-list-item",[],[]) [Str "Commented-out",Space,Str "list",Space,Str "item"] ,BulletList [[Plain [Str "one",Space,RawInline (Format "html") ""]] diff --git a/tests/markdown-reader-more.txt b/tests/markdown-reader-more.txt index 739543bfd..4cd69c9d8 100644 --- a/tests/markdown-reader-more.txt +++ b/tests/markdown-reader-more.txt @@ -58,6 +58,10 @@ $\$2 + \$3$ +This should not be math: + +$PATH 90 $PATH + ## Commented-out list item - one -- cgit v1.2.3 From 2d90803b7ce586c4c882b907f00f574e3432f3c1 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 27 May 2014 12:12:02 -0700 Subject: Require latest highlighting-kate. Fixes #1317. --- pandoc.cabal | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandoc.cabal b/pandoc.cabal index f7d7d9c7e..a2138d6bf 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -227,7 +227,7 @@ Library tagsoup >= 0.13.1 && < 0.14, base64-bytestring >= 0.1 && < 1.1, zlib >= 0.5 && < 0.6, - highlighting-kate >= 0.5.8.1 && < 0.6, + highlighting-kate >= 0.5.8.2 && < 0.6, data-default >= 0.4 && < 0.6, temporary >= 1.1 && < 1.3, blaze-html >= 0.5 && < 0.8, @@ -329,7 +329,7 @@ Executable pandoc text >= 0.11 && < 1.2, bytestring >= 0.9 && < 0.11, extensible-exceptions >= 0.1 && < 0.2, - highlighting-kate >= 0.5.8.1 && < 0.6, + highlighting-kate >= 0.5.8.2 && < 0.6, aeson >= 0.7 && < 0.8, yaml >= 0.8.8.2 && < 0.9, containers >= 0.1 && < 0.6, @@ -372,7 +372,7 @@ Test-Suite test-pandoc directory >= 1 && < 1.3, filepath >= 1.1 && < 1.4, process >= 1 && < 1.3, - highlighting-kate >= 0.5.8.1 && < 0.6, + highlighting-kate >= 0.5.8.2 && < 0.6, Diff >= 0.2 && < 0.4, test-framework >= 0.3 && < 0.9, test-framework-hunit >= 0.2 && < 0.4, -- cgit v1.2.3 From e3ddc371dee9630dae71d61a69088b06cea8e909 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 27 May 2014 12:44:39 -0700 Subject: Markdown reader: Handle `c++` and `objective-c` as language identifiers in github-style fenced blocks. Closes #1318. Note: This is special-case handling of these two cases. It would be good to do something more systematic. --- src/Text/Pandoc/Readers/Markdown.hs | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 5129bc2e3..caa938ed6 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -618,12 +618,19 @@ codeBlockFenced = try $ do skipMany spaceChar attr <- option ([],[],[]) $ try (guardEnabled Ext_fenced_code_attributes >> attributes) - <|> ((\x -> ("",[x],[])) <$> identifier) + <|> ((\x -> ("",[toLanguageId x],[])) <$> many1 nonspaceChar) blankline contents <- manyTill anyLine (blockDelimiter (== c) (Just size)) blanklines return $ return $ B.codeBlockWith attr $ intercalate "\n" contents +-- correctly handle github language identifiers +toLanguageId :: String -> String +toLanguageId = map toLower . go + where go "c++" = "cpp" + go "objective-c" = "objectivec" + go x = x + codeBlockIndented :: MarkdownParser (F Blocks) codeBlockIndented = do contents <- many1 (indentedLine <|> -- cgit v1.2.3 From 63865e4670c7d4e0a3873244d8a1c3afe12ebb2a Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 28 May 2014 10:37:39 -0700 Subject: Windows install script: force install of pandoc-citeproc. --- windows/make-windows-installer.bat | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/windows/make-windows-installer.bat b/windows/make-windows-installer.bat index f639b9405..a0fd44ea4 100644 --- a/windows/make-windows-installer.bat +++ b/windows/make-windows-installer.bat @@ -7,7 +7,7 @@ cabal install hsb2hs alex happy if %errorlevel% neq 0 exit /b %errorlevel% cabal install -v1 --force --reinstall --flags="embed_data_files" if %errorlevel% neq 0 exit /b %errorlevel% -cabal install -v1 --reinstall --flags="embed_data_files" pandoc-citeproc +cabal install -v1 --force --reinstall --flags="embed_data_files" pandoc-citeproc if %errorlevel% neq 0 exit /b %errorlevel% strip .\.cabal-sandbox\bin\pandoc.exe strip .\.cabal-sandbox\bin\pandoc-citeproc.exe -- cgit v1.2.3 From 9cf5f74e8fad9c7b898553724a37035bfc46f268 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 28 May 2014 10:40:50 -0700 Subject: PDF writer: Fixed treatment of data uris for images. Closes #1062. --- pandoc.cabal | 3 ++- src/Text/Pandoc/PDF.hs | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/pandoc.cabal b/pandoc.cabal index a2138d6bf..c834319a2 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -237,7 +237,8 @@ Library scientific >= 0.2 && < 0.4, vector >= 0.10 && < 0.11, hslua >= 0.3 && < 0.4, - binary >= 0.5 && < 0.8 + binary >= 0.5 && < 0.8, + SHA >= 1.6 && < 1.7 Build-Tools: alex, happy if flag(https) Build-Depends: http-client >= 0.3.2 && < 0.4, diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs index e4e06e6c9..bd55c565f 100644 --- a/src/Text/Pandoc/PDF.hs +++ b/src/Text/Pandoc/PDF.hs @@ -38,11 +38,11 @@ import qualified Data.ByteString as BS import System.Exit (ExitCode (..)) import System.FilePath import System.Directory +import Data.Digest.Pure.SHA (showDigest, sha1) import System.Environment import Control.Monad (unless) import Data.List (isInfixOf) import Data.Maybe (fromMaybe) -import qualified Data.ByteString.Base64 as B64 import qualified Text.Pandoc.UTF8 as UTF8 import Text.Pandoc.Definition import Text.Pandoc.Walk (walkM) @@ -98,7 +98,7 @@ handleImage' baseURL tmpdir (Image ils (src,tit)) = do Right (contents, Just mime) -> do let ext = fromMaybe (takeExtension src) $ extensionFromMimeType mime - let basename = UTF8.toString $ B64.encode $ UTF8.fromString src + let basename = showDigest $ sha1 $ BL.fromChunks [contents] let fname = tmpdir basename <.> ext BS.writeFile fname contents return $ Image ils (fname,tit) -- cgit v1.2.3 From 072411e5228fbdedd196ce1161f4a95027b0ac6b Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 28 May 2014 12:14:35 -0700 Subject: Windows installer: Use one install command for pandoc, pandoc-citeproc. --- windows/make-windows-installer.bat | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/windows/make-windows-installer.bat b/windows/make-windows-installer.bat index a0fd44ea4..0bd05a0e3 100644 --- a/windows/make-windows-installer.bat +++ b/windows/make-windows-installer.bat @@ -5,9 +5,7 @@ cabal sandbox init cabal clean cabal install hsb2hs alex happy if %errorlevel% neq 0 exit /b %errorlevel% -cabal install -v1 --force --reinstall --flags="embed_data_files" -if %errorlevel% neq 0 exit /b %errorlevel% -cabal install -v1 --force --reinstall --flags="embed_data_files" pandoc-citeproc +cabal install -v1 --force --reinstall --flags="embed_data_files" . pandoc-citeproc if %errorlevel% neq 0 exit /b %errorlevel% strip .\.cabal-sandbox\bin\pandoc.exe strip .\.cabal-sandbox\bin\pandoc-citeproc.exe -- cgit v1.2.3 From 23a9b800a35d3c17d29a278b6bb218f05642d282 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 31 May 2014 22:02:33 -0700 Subject: Docx writer: Take over document formatting from reference.docx. This includes margins, page size, page orientation. --- src/Text/Pandoc/Writers/Docx.hs | 47 ++++++++++++++++++++++++++--------------- 1 file changed, 30 insertions(+), 17 deletions(-) diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 551d97855..6fd76c9c7 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -204,11 +204,35 @@ writeDocx opts doc@(Pandoc meta _) = do let toImageEntry (_,path,_,_,img) = toEntry ("word/" ++ path) epochtime $ toLazy img let imageEntries = map toImageEntry imgs + -- adjust contents to add sectPr from reference.docx + let docpath = "word/document.xml" + parsedDoc <- parseXml refArchive distArchive docpath + let sectprs = filterElementsName (\qn -> qPrefix qn == Just "w" && + qName qn == "sectPr") + parsedDoc + + let stdAttributes = + [("xmlns:w","http://schemas.openxmlformats.org/wordprocessingml/2006/main") + ,("xmlns:m","http://schemas.openxmlformats.org/officeDocument/2006/math") + ,("xmlns:r","http://schemas.openxmlformats.org/officeDocument/2006/relationships") + ,("xmlns:o","urn:schemas-microsoft-com:office:office") + ,("xmlns:v","urn:schemas-microsoft-com:vml") + ,("xmlns:w10","urn:schemas-microsoft-com:office:word") + ,("xmlns:a","http://schemas.openxmlformats.org/drawingml/2006/main") + ,("xmlns:pic","http://schemas.openxmlformats.org/drawingml/2006/picture") + ,("xmlns:wp","http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing")] + + let contents' = contents ++ sectprs + let docContents = mknode "w:document" stdAttributes + $ mknode "w:body" [] $ contents' + -- word/document.xml - let contentEntry = toEntry "word/document.xml" epochtime $ renderXml contents + let contentEntry = toEntry "word/document.xml" epochtime + $ renderXml docContents -- footnotes - let footnotesEntry = toEntry "word/footnotes.xml" epochtime $ renderXml footnotes + let notes = mknode "w:footnotes" stdAttributes footnotes + let footnotesEntry = toEntry "word/footnotes.xml" epochtime $ renderXml notes -- footnote rels let footnoteRelEntry = toEntry "word/_rels/footnotes.xml.rels" epochtime @@ -392,8 +416,9 @@ mkLvl marker lvl = getNumId :: WS Int getNumId = length `fmap` gets stLists --- | Convert Pandoc document to two OpenXML elements (the main document and footnotes). -writeOpenXML :: WriterOptions -> Pandoc -> WS (Element, Element) +-- | Convert Pandoc document to two lists of +-- OpenXML elements (the main document and footnotes). +writeOpenXML :: WriterOptions -> Pandoc -> WS ([Element], [Element]) writeOpenXML opts (Pandoc meta blocks) = do let tit = docTitle meta ++ case lookupMeta "subtitle" meta of Just (MetaBlocks [Plain xs]) -> LineBreak : xs @@ -411,19 +436,7 @@ writeOpenXML opts (Pandoc meta blocks) = do doc' <- blocksToOpenXML opts blocks' notes' <- reverse `fmap` gets stFootnotes let meta' = title ++ authors ++ date - let stdAttributes = - [("xmlns:w","http://schemas.openxmlformats.org/wordprocessingml/2006/main") - ,("xmlns:m","http://schemas.openxmlformats.org/officeDocument/2006/math") - ,("xmlns:r","http://schemas.openxmlformats.org/officeDocument/2006/relationships") - ,("xmlns:o","urn:schemas-microsoft-com:office:office") - ,("xmlns:v","urn:schemas-microsoft-com:vml") - ,("xmlns:w10","urn:schemas-microsoft-com:office:word") - ,("xmlns:a","http://schemas.openxmlformats.org/drawingml/2006/main") - ,("xmlns:pic","http://schemas.openxmlformats.org/drawingml/2006/picture") - ,("xmlns:wp","http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing")] - let doc = mknode "w:document" stdAttributes $ mknode "w:body" [] (meta' ++ doc') - let notes = mknode "w:footnotes" stdAttributes notes' - return (doc, notes) + return (meta' ++ doc', notes') -- | Convert a list of Pandoc blocks to OpenXML. blocksToOpenXML :: WriterOptions -> [Block] -> WS [Element] -- cgit v1.2.3 From c8bc70a6bb12e0fbfadadb9480efafb3be4f11a5 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 1 Jun 2014 15:28:10 -0700 Subject: LaTeX/Beamer templates: remove conditional around date. Closes #1321. --- data/templates | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data/templates b/data/templates index 620e8fe5e..9f1636367 160000 --- a/data/templates +++ b/data/templates @@ -1 +1 @@ -Subproject commit 620e8fe5e1ed1a1a4a2243587f3063ccfe745673 +Subproject commit 9f1636367e28deaa949c4377eaea2acdc13a148f -- cgit v1.2.3 From 6327ccf523bb5d550d85dd7782079b8f070fe5d1 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 1 Jun 2014 15:29:27 -0700 Subject: Minor code reformat. --- src/Text/Pandoc/Writers/Docx.hs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 6fd76c9c7..026cfcb41 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -60,6 +60,11 @@ import qualified Control.Exception as E import Text.Pandoc.MIME (getMimeType, extensionFromMimeType) import Control.Applicative ((<|>)) +data ListMarker = NoMarker + | BulletMarker + | NumberMarker ListNumberStyle ListNumberDelim Int + deriving (Show, Read, Eq, Ord) + data WriterState = WriterState{ stTextProperties :: [Element] , stParaProperties :: [Element] @@ -73,11 +78,6 @@ data WriterState = WriterState{ , stLists :: [ListMarker] } -data ListMarker = NoMarker - | BulletMarker - | NumberMarker ListNumberStyle ListNumberDelim Int - deriving (Show, Read, Eq, Ord) - defaultWriterState :: WriterState defaultWriterState = WriterState{ stTextProperties = [] -- cgit v1.2.3 From 6848f642e82322c0894c62d3215e98325ab7fd8c Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 1 Jun 2014 21:15:03 -0700 Subject: Docx writer: Header and footer are now carried over from reference.docx. --- data/reference.docx | Bin 9797 -> 9360 bytes src/Text/Pandoc/Writers/Docx.hs | 54 ++++++++++++++++++++++++++++------------ 2 files changed, 38 insertions(+), 16 deletions(-) diff --git a/data/reference.docx b/data/reference.docx index a9c268b9f..789237dd8 100644 Binary files a/data/reference.docx and b/data/reference.docx differ diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 026cfcb41..584662be8 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -155,8 +155,11 @@ writeDocx opts doc@(Pandoc meta _) = do ,("/word/document.xml", "application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml") ,("/word/footnotes.xml", - "application/vnd.openxmlformats-officedocument.wordprocessingml.footnotes+xml") - ] ++ map mkImageOverride imgs + "application/vnd.openxmlformats-officedocument.wordprocessingml.footnotes+xml"), + ("/word/header1.xml", + "application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml"), + ("/word/footer1.xml", + "application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml") ] ++ map mkImageOverride imgs let defaultnodes = [mknode "Default" [("Extension","xml"),("ContentType","application/xml")] (), mknode "Default" @@ -191,7 +194,14 @@ writeDocx opts doc@(Pandoc meta _) = do "theme/theme1.xml") ,("http://schemas.openxmlformats.org/officeDocument/2006/relationships/footnotes", "rId7", - "footnotes.xml")] + "footnotes.xml") + ,("http://schemas.openxmlformats.org/officeDocument/2006/relationships/header", + "rId8", + "header1.xml") + ,("http://schemas.openxmlformats.org/officeDocument/2006/relationships/footer", + "rId9", + "footer1.xml") + ] let toImgRel (ident,path,_,_,_) = mknode "Relationship" [("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/image"),("Id",ident),("Target",path)] () let imgrels = map toImgRel imgs let toLinkRel (src,ident) = mknode "Relationship" [("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink"),("Id",ident),("Target",src),("TargetMode","External") ] () @@ -207,9 +217,16 @@ writeDocx opts doc@(Pandoc meta _) = do -- adjust contents to add sectPr from reference.docx let docpath = "word/document.xml" parsedDoc <- parseXml refArchive distArchive docpath - let sectprs = filterElementsName (\qn -> qPrefix qn == Just "w" && - qName qn == "sectPr") - parsedDoc + let mbsectpr = filterElementName (\qn -> qPrefix qn == Just "w" && + qName qn == "sectPr") parsedDoc + let sectPrProps = case mbsectpr of + Nothing -> [] + Just e -> filterElementsName (\qn -> + qPrefix qn == Just "w" && + qName qn `notElem` ["headerReference","footerReference","sectPr"]) e + let headerPr = mknode "w:headerReference" [("w:type","default"),("r:id","rId8")] $ () + let footerPr = mknode "w:footerReference" [("w:type","default"),("r:id","rId9")] $ () + let sectpr = mknode "w:sectPr" [] $ [headerPr, footerPr] ++ sectPrProps let stdAttributes = [("xmlns:w","http://schemas.openxmlformats.org/wordprocessingml/2006/main") @@ -222,7 +239,7 @@ writeDocx opts doc@(Pandoc meta _) = do ,("xmlns:pic","http://schemas.openxmlformats.org/drawingml/2006/picture") ,("xmlns:wp","http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing")] - let contents' = contents ++ sectprs + let contents' = contents ++ [sectpr] let docContents = mknode "w:document" stdAttributes $ mknode "w:body" [] $ contents' @@ -281,20 +298,25 @@ writeDocx opts doc@(Pandoc meta _) = do ] let relsEntry = toEntry relsPath epochtime $ renderXml rels - let entryFromArchive path = + let entryFromArchive arch path = (toEntry path epochtime . renderXml) `fmap` - parseXml refArchive distArchive path - docPropsAppEntry <- entryFromArchive "docProps/app.xml" - themeEntry <- entryFromArchive "word/theme/theme1.xml" - fontTableEntry <- entryFromArchive "word/fontTable.xml" - settingsEntry <- entryFromArchive "word/settings.xml" - webSettingsEntry <- entryFromArchive "word/webSettings.xml" + parseXml arch distArchive path + docPropsAppEntry <- entryFromArchive refArchive "docProps/app.xml" + themeEntry <- entryFromArchive refArchive "word/theme/theme1.xml" + fontTableEntry <- entryFromArchive refArchive "word/fontTable.xml" + -- we take settings.xml from dist archive because the ref archive + -- sometimes references special footnotes and endnotes that may + -- not be defined in footnotes.xml or endnotes.xml. + settingsEntry <- entryFromArchive distArchive "word/settings.xml" + webSettingsEntry <- entryFromArchive distArchive "word/webSettings.xml" + headerEntry <- entryFromArchive refArchive "word/header1.xml" + footerEntry <- entryFromArchive refArchive "word/footer1.xml" let miscRels = [ f | f <- filesInArchive refArchive , "word/_rels/" `isPrefixOf` f , ".xml.rels" `isSuffixOf` f , f /= "word/_rels/document.xml.rels" , f /= "word/_rels/footnotes.xml.rels" ] - miscRelEntries <- mapM entryFromArchive miscRels + miscRelEntries <- mapM (entryFromArchive refArchive) miscRels -- Create archive let archive = foldr addEntryToArchive emptyArchive $ @@ -302,7 +324,7 @@ writeDocx opts doc@(Pandoc meta _) = do footnoteRelEntry : numEntry : styleEntry : footnotesEntry : docPropsEntry : docPropsAppEntry : themeEntry : fontTableEntry : settingsEntry : webSettingsEntry : - imageEntries ++ miscRelEntries + headerEntry : footerEntry : imageEntries ++ miscRelEntries return $ fromArchive archive styleToOpenXml :: Style -> [Element] -- cgit v1.2.3 From 438ccbe2e681871dbff7faa60c3a76f1c89a1245 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 1 Jun 2014 21:32:15 -0700 Subject: Removed header1.xml, footer1.xml from reference.docx. --- data/reference.docx | Bin 9360 -> 8473 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/data/reference.docx b/data/reference.docx index 789237dd8..7efc62458 100644 Binary files a/data/reference.docx and b/data/reference.docx differ -- cgit v1.2.3 From 7242165bed21a63b49e1ce7d639400f095800204 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 1 Jun 2014 22:29:13 -0700 Subject: Docx writer: Improved handling of headers/footers. --- src/Text/Pandoc/Writers/Docx.hs | 105 ++++++++++++++++++++-------------------- 1 file changed, 53 insertions(+), 52 deletions(-) diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 584662be8..098da119b 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -59,6 +59,7 @@ import Text.Printf (printf) import qualified Control.Exception as E import Text.Pandoc.MIME (getMimeType, extensionFromMimeType) import Control.Applicative ((<|>)) +import Data.Maybe (mapMaybe) data ListMarker = NoMarker | BulletMarker @@ -123,6 +124,40 @@ writeDocx opts doc@(Pandoc meta _) = do epochtime <- floor `fmap` getPOSIXTime let imgs = M.elems $ stImages st + -- create entries for images in word/media/... + let toImageEntry (_,path,_,_,img) = toEntry ("word/" ++ path) epochtime $ toLazy img + let imageEntries = map toImageEntry imgs + + -- adjust contents to add sectPr from reference.docx + parsedDoc <- parseXml refArchive distArchive "word/document.xml" + let wname f qn = qPrefix qn == Just "w" && f (qName qn) + let mbsectpr = filterElementName (wname (=="sectPr")) parsedDoc + + let sectpr = maybe (mknode "w:sectPr" [] $ ()) id mbsectpr + + let stdAttributes = + [("xmlns:w","http://schemas.openxmlformats.org/wordprocessingml/2006/main") + ,("xmlns:m","http://schemas.openxmlformats.org/officeDocument/2006/math") + ,("xmlns:r","http://schemas.openxmlformats.org/officeDocument/2006/relationships") + ,("xmlns:o","urn:schemas-microsoft-com:office:office") + ,("xmlns:v","urn:schemas-microsoft-com:vml") + ,("xmlns:w10","urn:schemas-microsoft-com:office:word") + ,("xmlns:a","http://schemas.openxmlformats.org/drawingml/2006/main") + ,("xmlns:pic","http://schemas.openxmlformats.org/drawingml/2006/picture") + ,("xmlns:wp","http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing")] + + let contents' = contents ++ [sectpr] + let docContents = mknode "w:document" stdAttributes + $ mknode "w:body" [] $ contents' + + parsedRels <- parseXml refArchive distArchive "word/_rels/document.xml.rels" + let isHeaderNode e = findAttr (QName "Type" Nothing Nothing) e == Just "http://schemas.openxmlformats.org/officeDocument/2006/relationships/header" + let isFooterNode e = findAttr (QName "Type" Nothing Nothing) e == Just "http://schemas.openxmlformats.org/officeDocument/2006/relationships/footer" + let headers = filterElements isHeaderNode parsedRels + let footers = filterElements isFooterNode parsedRels + + let extractTarget e = findAttr (QName "Target" Nothing Nothing) e + -- we create [Content_Types].xml and word/_rels/document.xml.rels -- from scratch rather than reading from reference.docx, -- because Word sometimes changes these files when a reference.docx is modified, @@ -135,7 +170,7 @@ writeDocx opts doc@(Pandoc meta _) = do let mkImageOverride (_, imgpath, mbMimeType, _, _) = mkOverrideNode ("/word/" ++ imgpath, fromMaybe "application/octet-stream" mbMimeType) - let overrides = map mkOverrideNode + let overrides = map mkOverrideNode ( [("/word/webSettings.xml", "application/vnd.openxmlformats-officedocument.wordprocessingml.webSettings+xml") ,("/word/numbering.xml", @@ -155,11 +190,13 @@ writeDocx opts doc@(Pandoc meta _) = do ,("/word/document.xml", "application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml") ,("/word/footnotes.xml", - "application/vnd.openxmlformats-officedocument.wordprocessingml.footnotes+xml"), - ("/word/header1.xml", - "application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml"), - ("/word/footer1.xml", - "application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml") ] ++ map mkImageOverride imgs + "application/vnd.openxmlformats-officedocument.wordprocessingml.footnotes+xml") + ] ++ + map (\x -> (maybe "" ("/word/" ++) $ extractTarget x, + "application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml")) headers ++ + map (\x -> (maybe "" ("/word/" ++) $ extractTarget x, + "application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml")) footers) ++ + map mkImageOverride imgs let defaultnodes = [mknode "Default" [("Extension","xml"),("ContentType","application/xml")] (), mknode "Default" @@ -195,13 +232,8 @@ writeDocx opts doc@(Pandoc meta _) = do ,("http://schemas.openxmlformats.org/officeDocument/2006/relationships/footnotes", "rId7", "footnotes.xml") - ,("http://schemas.openxmlformats.org/officeDocument/2006/relationships/header", - "rId8", - "header1.xml") - ,("http://schemas.openxmlformats.org/officeDocument/2006/relationships/footer", - "rId9", - "footer1.xml") - ] + ] ++ + headers ++ footers let toImgRel (ident,path,_,_,_) = mknode "Relationship" [("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/image"),("Id",ident),("Target",path)] () let imgrels = map toImgRel imgs let toLinkRel (src,ident) = mknode "Relationship" [("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink"),("Id",ident),("Target",src),("TargetMode","External") ] () @@ -210,38 +242,6 @@ writeDocx opts doc@(Pandoc meta _) = do let relEntry = toEntry "word/_rels/document.xml.rels" epochtime $ renderXml reldoc - -- create entries for images in word/media/... - let toImageEntry (_,path,_,_,img) = toEntry ("word/" ++ path) epochtime $ toLazy img - let imageEntries = map toImageEntry imgs - - -- adjust contents to add sectPr from reference.docx - let docpath = "word/document.xml" - parsedDoc <- parseXml refArchive distArchive docpath - let mbsectpr = filterElementName (\qn -> qPrefix qn == Just "w" && - qName qn == "sectPr") parsedDoc - let sectPrProps = case mbsectpr of - Nothing -> [] - Just e -> filterElementsName (\qn -> - qPrefix qn == Just "w" && - qName qn `notElem` ["headerReference","footerReference","sectPr"]) e - let headerPr = mknode "w:headerReference" [("w:type","default"),("r:id","rId8")] $ () - let footerPr = mknode "w:footerReference" [("w:type","default"),("r:id","rId9")] $ () - let sectpr = mknode "w:sectPr" [] $ [headerPr, footerPr] ++ sectPrProps - - let stdAttributes = - [("xmlns:w","http://schemas.openxmlformats.org/wordprocessingml/2006/main") - ,("xmlns:m","http://schemas.openxmlformats.org/officeDocument/2006/math") - ,("xmlns:r","http://schemas.openxmlformats.org/officeDocument/2006/relationships") - ,("xmlns:o","urn:schemas-microsoft-com:office:office") - ,("xmlns:v","urn:schemas-microsoft-com:vml") - ,("xmlns:w10","urn:schemas-microsoft-com:office:word") - ,("xmlns:a","http://schemas.openxmlformats.org/drawingml/2006/main") - ,("xmlns:pic","http://schemas.openxmlformats.org/drawingml/2006/picture") - ,("xmlns:wp","http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing")] - - let contents' = contents ++ [sectpr] - let docContents = mknode "w:document" stdAttributes - $ mknode "w:body" [] $ contents' -- word/document.xml let contentEntry = toEntry "word/document.xml" epochtime @@ -304,13 +304,13 @@ writeDocx opts doc@(Pandoc meta _) = do docPropsAppEntry <- entryFromArchive refArchive "docProps/app.xml" themeEntry <- entryFromArchive refArchive "word/theme/theme1.xml" fontTableEntry <- entryFromArchive refArchive "word/fontTable.xml" - -- we take settings.xml from dist archive because the ref archive - -- sometimes references special footnotes and endnotes that may - -- not be defined in footnotes.xml or endnotes.xml. + -- we use dist archive for settings.xml, because Word sometimes + -- adds references to footnotes or endnotes we don't have... settingsEntry <- entryFromArchive distArchive "word/settings.xml" - webSettingsEntry <- entryFromArchive distArchive "word/webSettings.xml" - headerEntry <- entryFromArchive refArchive "word/header1.xml" - footerEntry <- entryFromArchive refArchive "word/footer1.xml" + webSettingsEntry <- entryFromArchive refArchive "word/webSettings.xml" + headerFooterEntries <- mapM (entryFromArchive refArchive) $ + mapMaybe (\e -> fmap ("word/" ++) $ extractTarget e) + (headers ++ footers) let miscRels = [ f | f <- filesInArchive refArchive , "word/_rels/" `isPrefixOf` f , ".xml.rels" `isSuffixOf` f @@ -324,7 +324,8 @@ writeDocx opts doc@(Pandoc meta _) = do footnoteRelEntry : numEntry : styleEntry : footnotesEntry : docPropsEntry : docPropsAppEntry : themeEntry : fontTableEntry : settingsEntry : webSettingsEntry : - headerEntry : footerEntry : imageEntries ++ miscRelEntries + imageEntries ++ headerFooterEntries ++ + miscRelEntries return $ fromArchive archive styleToOpenXml :: Style -> [Element] -- cgit v1.2.3 From 455072bdf266d1d49706e04f94b0063b015cd4ca Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 1 Jun 2014 23:38:27 -0700 Subject: Updated README on reference.docx. --- README | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README b/README index 5f82f94b8..d5e75b154 100644 --- a/README +++ b/README @@ -523,7 +523,8 @@ Options affecting specific writers : Use the specified file as a style reference in producing a docx file. For best results, the reference docx should be a modified version of a docx file produced using pandoc. The contents of the reference docx - are ignored, but its stylesheets are used in the new docx. If no + are ignored, but its stylesheets and document properties (including + margins, page size, header, and footer) are used in the new docx. If no reference docx is specified on the command line, pandoc will look for a file `reference.docx` in the user data directory (see `--data-dir`). If this is not found either, sensible defaults will be -- cgit v1.2.3 From 0bd8d5f86b4733fdce89deb78471bbd7daa45f9e Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 1 Jun 2014 23:39:15 -0700 Subject: Updated tests for c8bc70a6bb12e0fbfadadb9480efafb3be4f11a5. --- tests/lhs-test.latex | 1 + tests/lhs-test.latex+lhs | 1 + 2 files changed, 2 insertions(+) diff --git a/tests/lhs-test.latex b/tests/lhs-test.latex index 0ed6640bd..6600608fe 100644 --- a/tests/lhs-test.latex +++ b/tests/lhs-test.latex @@ -63,6 +63,7 @@ \setlength{\emergencystretch}{3em} % prevent overfull lines \setcounter{secnumdepth}{0} +\date{} \begin{document} diff --git a/tests/lhs-test.latex+lhs b/tests/lhs-test.latex+lhs index 67841d54b..d6cb7c497 100644 --- a/tests/lhs-test.latex+lhs +++ b/tests/lhs-test.latex+lhs @@ -44,6 +44,7 @@ \setlength{\emergencystretch}{3em} % prevent overfull lines \setcounter{secnumdepth}{0} +\date{} \begin{document} -- cgit v1.2.3 From e1cf47efa0029a385c39053c9f441ade29d7a991 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 1 Jun 2014 23:45:05 -0700 Subject: Templates: Fail informatively on template syntax errors. With the move from parsec to attoparsec, we lost good error reporting. In fact, since we weren't testing for end of input, malformed templates would fail silently. Here we revert back to Parsec for better error messages. --- pandoc.cabal | 1 - src/Text/Pandoc/Templates.hs | 70 ++++++++++++++++++++++++-------------------- 2 files changed, 38 insertions(+), 33 deletions(-) diff --git a/pandoc.cabal b/pandoc.cabal index c834319a2..01b3c401a 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -232,7 +232,6 @@ Library temporary >= 1.1 && < 1.3, blaze-html >= 0.5 && < 0.8, blaze-markup >= 0.5.1 && < 0.7, - attoparsec >= 0.10 && < 0.12, yaml >= 0.8.8.2 && < 0.9, scientific >= 0.2 && < 0.4, vector >= 0.10 && < 0.11, diff --git a/src/Text/Pandoc/Templates.hs b/src/Text/Pandoc/Templates.hs index 551db6483..89856a9ee 100644 --- a/src/Text/Pandoc/Templates.hs +++ b/src/Text/Pandoc/Templates.hs @@ -96,8 +96,8 @@ module Text.Pandoc.Templates ( renderTemplate import Data.Char (isAlphaNum) import Control.Monad (guard, when) import Data.Aeson (ToJSON(..), Value(..)) -import qualified Data.Attoparsec.Text as A -import Data.Attoparsec.Text (Parser) +import qualified Text.Parsec as P +import Text.Parsec.Text (Parser) import Control.Applicative import qualified Data.Text as T import Data.Text (Text) @@ -172,7 +172,10 @@ renderTemplate :: (ToJSON a, TemplateTarget b) => Template -> a -> b renderTemplate (Template f) context = toTarget $ f $ toJSON context compileTemplate :: Text -> Either String Template -compileTemplate template = A.parseOnly pTemplate template +compileTemplate template = + case P.parse (pTemplate <* P.eof) "template" template of + Left e -> Left (show e) + Right x -> Right x -- | Like 'renderTemplate', but compiles the template first, -- raising an error if compilation fails. @@ -230,7 +233,7 @@ replaceVar _ _ old = old pTemplate :: Parser Template pTemplate = do - sp <- A.option mempty pInitialSpace + sp <- P.option mempty pInitialSpace rest <- mconcat <$> many (pConditional <|> pFor <|> pNewline <|> @@ -239,40 +242,43 @@ pTemplate = do pEscapedDollar) return $ sp <> rest +takeWhile1 :: (Char -> Bool) -> Parser Text +takeWhile1 f = T.pack <$> P.many1 (P.satisfy f) + pLit :: Parser Template -pLit = lit <$> A.takeWhile1 (\x -> x /='$' && x /= '\n') +pLit = lit <$> takeWhile1 (\x -> x /='$' && x /= '\n') pNewline :: Parser Template pNewline = do - A.char '\n' - sp <- A.option mempty pInitialSpace + P.char '\n' + sp <- P.option mempty pInitialSpace return $ lit "\n" <> sp pInitialSpace :: Parser Template pInitialSpace = do - sps <- A.takeWhile1 (==' ') + sps <- takeWhile1 (==' ') let indentVar = if T.null sps then id else indent (T.length sps) - v <- A.option mempty $ indentVar <$> pVar + v <- P.option mempty $ indentVar <$> pVar return $ lit sps <> v pEscapedDollar :: Parser Template -pEscapedDollar = lit "$" <$ A.string "$$" +pEscapedDollar = lit "$" <$ P.try (P.string "$$") pVar :: Parser Template -pVar = var <$> (A.char '$' *> pIdent <* A.char '$') +pVar = var <$> (P.try $ P.char '$' *> pIdent <* P.char '$') pIdent :: Parser [Text] pIdent = do first <- pIdentPart - rest <- many (A.char '.' *> pIdentPart) + rest <- many (P.char '.' *> pIdentPart) return (first:rest) pIdentPart :: Parser Text -pIdentPart = do - first <- A.letter - rest <- A.takeWhile (\c -> isAlphaNum c || c == '_' || c == '-') +pIdentPart = P.try $ do + first <- P.letter + rest <- T.pack <$> P.many (P.satisfy (\c -> isAlphaNum c || c == '_' || c == '-')) let id' = T.singleton first <> rest guard $ id' `notElem` reservedWords return id' @@ -281,38 +287,38 @@ reservedWords :: [Text] reservedWords = ["else","endif","for","endfor","sep"] skipEndline :: Parser () -skipEndline = A.skipWhile (`elem` " \t") >> A.char '\n' >> return () +skipEndline = P.try $ P.skipMany (P.satisfy (`elem` " \t")) >> P.char '\n' >> return () pConditional :: Parser Template pConditional = do - A.string "$if(" + P.try $ P.string "$if(" id' <- pIdent - A.string ")$" + P.string ")$" -- if newline after the "if", then a newline after "endif" will be swallowed - multiline <- A.option False (True <$ skipEndline) + multiline <- P.option False (True <$ skipEndline) ifContents <- pTemplate - elseContents <- A.option mempty $ - do A.string "$else$" - when multiline $ A.option () skipEndline + elseContents <- P.option mempty $ P.try $ + do P.string "$else$" + when multiline $ P.option () skipEndline pTemplate - A.string "$endif$" - when multiline $ A.option () skipEndline + P.string "$endif$" + when multiline $ P.option () skipEndline return $ cond id' ifContents elseContents pFor :: Parser Template pFor = do - A.string "$for(" + P.try $ P.string "$for(" id' <- pIdent - A.string ")$" + P.string ")$" -- if newline after the "for", then a newline after "endfor" will be swallowed - multiline <- A.option False $ skipEndline >> return True + multiline <- P.option False $ skipEndline >> return True contents <- pTemplate - sep <- A.option mempty $ - do A.string "$sep$" - when multiline $ A.option () skipEndline + sep <- P.option mempty $ + do P.try $ P.string "$sep$" + when multiline $ P.option () skipEndline pTemplate - A.string "$endfor$" - when multiline $ A.option () skipEndline + P.string "$endfor$" + when multiline $ P.option () skipEndline return $ iter id' contents sep indent :: Int -> Template -> Template -- cgit v1.2.3 From 4552555625fd3904189322c81382166a4f4d1de5 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 2 Jun 2014 13:50:28 -0700 Subject: Require latest texmath. Closes #1319. This allows `\left` to be used with `]`, `)`, etc. --- pandoc.cabal | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandoc.cabal b/pandoc.cabal index 01b3c401a..0fb00b150 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -218,7 +218,7 @@ Library old-locale >= 1 && < 1.1, time >= 1.2 && < 1.5, HTTP >= 4000.0.5 && < 4000.3, - texmath >= 0.6.6.1 && < 0.7, + texmath >= 0.6.6.2 && < 0.7, xml >= 1.3.12 && < 1.4, random >= 1 && < 1.1, extensible-exceptions >= 0.1 && < 0.2, -- cgit v1.2.3 From bf915da6cd0dc97a231100b784450e334c715969 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 2 Jun 2014 20:07:41 -0700 Subject: Docx writer: Make images work in reference.docx headers/footers. * All media from reference.docx are copied into result. * Added defaults for common image types to [Content Types]. * Avoided redundant XML parse + write for entries taken over from reference.docx, for better performance. --- src/Text/Pandoc/Writers/Docx.hs | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 098da119b..8aaf3c1b8 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -197,10 +197,21 @@ writeDocx opts doc@(Pandoc meta _) = do map (\x -> (maybe "" ("/word/" ++) $ extractTarget x, "application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml")) footers) ++ map mkImageOverride imgs + let imageDefaults = map (\(x,y) -> mknode "Default" + [("Extension",x),("ContentType",y)] ()) + [("jpg","image/jpeg") + ,("jpeg","image/jpeg") + ,("png","image/png") + ,("svg","image/svg+xml") + ,("tif","image/tiff") + ,("tiff","image/tiff") + ,("bmp","image/x-ms-bmp") + ,("gif","image/gif") + ] let defaultnodes = [mknode "Default" [("Extension","xml"),("ContentType","application/xml")] (), mknode "Default" - [("Extension","rels"),("ContentType","application/vnd.openxmlformats-package.relationships+xml")] ()] + [("Extension","rels"),("ContentType","application/vnd.openxmlformats-package.relationships+xml")] ()] ++ imageDefaults let contentTypesDoc = mknode "Types" [("xmlns","http://schemas.openxmlformats.org/package/2006/content-types")] $ defaultnodes ++ overrides let contentTypesEntry = toEntry "[Content_Types].xml" epochtime $ renderXml contentTypesDoc @@ -311,12 +322,13 @@ writeDocx opts doc@(Pandoc meta _) = do headerFooterEntries <- mapM (entryFromArchive refArchive) $ mapMaybe (\e -> fmap ("word/" ++) $ extractTarget e) (headers ++ footers) - let miscRels = [ f | f <- filesInArchive refArchive - , "word/_rels/" `isPrefixOf` f - , ".xml.rels" `isSuffixOf` f - , f /= "word/_rels/document.xml.rels" - , f /= "word/_rels/footnotes.xml.rels" ] - miscRelEntries <- mapM (entryFromArchive refArchive) miscRels + let miscRelEntries = [ e | e <- zEntries refArchive + , "word/_rels/" `isPrefixOf` (eRelativePath e) + , ".xml.rels" `isSuffixOf` (eRelativePath e) + , eRelativePath e /= "word/_rels/document.xml.rels" + , eRelativePath e /= "word/_rels/footnotes.xml.rels" ] + let otherMediaEntries = [ e | e <- zEntries refArchive + , "word/media/" `isPrefixOf` eRelativePath e ] -- Create archive let archive = foldr addEntryToArchive emptyArchive $ @@ -325,7 +337,7 @@ writeDocx opts doc@(Pandoc meta _) = do docPropsEntry : docPropsAppEntry : themeEntry : fontTableEntry : settingsEntry : webSettingsEntry : imageEntries ++ headerFooterEntries ++ - miscRelEntries + miscRelEntries ++ otherMediaEntries return $ fromArchive archive styleToOpenXml :: Style -> [Element] -- cgit v1.2.3 From 326d7fa8f89f9a4b74042bf4cbb04931e26c8d8d Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 2 Jun 2014 20:20:16 -0700 Subject: Docx writer: Improved entryFromArchive to avoid parse. No need to parse the XML if we're just going to render it right away! --- src/Text/Pandoc/Writers/Docx.hs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 8aaf3c1b8..1e37b5515 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -310,8 +310,9 @@ writeDocx opts doc@(Pandoc meta _) = do let relsEntry = toEntry relsPath epochtime $ renderXml rels let entryFromArchive arch path = - (toEntry path epochtime . renderXml) `fmap` - parseXml arch distArchive path + maybe (fail $ path ++ " corrupt or missing in reference docx") + return + (findEntryByPath path arch `mplus` findEntryByPath path distArchive) docPropsAppEntry <- entryFromArchive refArchive "docProps/app.xml" themeEntry <- entryFromArchive refArchive "word/theme/theme1.xml" fontTableEntry <- entryFromArchive refArchive "word/fontTable.xml" -- cgit v1.2.3 From cbfde5cb50a461995a8b60d148615b5a72159f3d Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 2 Jun 2014 20:39:27 -0700 Subject: Docx writer: Create overrides per-image for media/ in ref docx. This should be somewhat more robust and cover more types of images. --- src/Text/Pandoc/Writers/Docx.hs | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 1e37b5515..e630c5094 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -170,6 +170,9 @@ writeDocx opts doc@(Pandoc meta _) = do let mkImageOverride (_, imgpath, mbMimeType, _, _) = mkOverrideNode ("/word/" ++ imgpath, fromMaybe "application/octet-stream" mbMimeType) + let mkMediaOverride imgpath = mkOverrideNode ('/':imgpath, + fromMaybe "application/octet-stream" + $ getMimeType imgpath) let overrides = map mkOverrideNode ( [("/word/webSettings.xml", "application/vnd.openxmlformats-officedocument.wordprocessingml.webSettings+xml") @@ -196,22 +199,14 @@ writeDocx opts doc@(Pandoc meta _) = do "application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml")) headers ++ map (\x -> (maybe "" ("/word/" ++) $ extractTarget x, "application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml")) footers) ++ - map mkImageOverride imgs - let imageDefaults = map (\(x,y) -> mknode "Default" - [("Extension",x),("ContentType",y)] ()) - [("jpg","image/jpeg") - ,("jpeg","image/jpeg") - ,("png","image/png") - ,("svg","image/svg+xml") - ,("tif","image/tiff") - ,("tiff","image/tiff") - ,("bmp","image/x-ms-bmp") - ,("gif","image/gif") - ] + map mkImageOverride imgs ++ + map mkMediaOverride [ eRelativePath e | e <- zEntries refArchive + , "word/media/" `isPrefixOf` eRelativePath e ] + let defaultnodes = [mknode "Default" [("Extension","xml"),("ContentType","application/xml")] (), mknode "Default" - [("Extension","rels"),("ContentType","application/vnd.openxmlformats-package.relationships+xml")] ()] ++ imageDefaults + [("Extension","rels"),("ContentType","application/vnd.openxmlformats-package.relationships+xml")] ()] let contentTypesDoc = mknode "Types" [("xmlns","http://schemas.openxmlformats.org/package/2006/content-types")] $ defaultnodes ++ overrides let contentTypesEntry = toEntry "[Content_Types].xml" epochtime $ renderXml contentTypesDoc -- cgit v1.2.3 From 2a627f85fe27d4351e9c612454d18ae701a466a3 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 3 Jun 2014 11:00:54 -0700 Subject: Shared: Added ordNub. API change (adds export). --- src/Text/Pandoc/Shared.hs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index d8cbe46d9..b0adf55f5 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -35,6 +35,7 @@ module Text.Pandoc.Shared ( splitByIndices, splitStringByIndices, substitute, + ordNub, -- * Text processing backslashEscapes, escapeStringUsing, @@ -94,6 +95,7 @@ import Data.List ( find, isPrefixOf, intercalate ) import qualified Data.Map as M import Network.URI ( escapeURIString, isURI, nonStrictRelativeTo, unEscapeString, parseURIReference ) +import qualified Data.Set as Set import System.Directory import Text.Pandoc.MIME (getMimeType) import System.FilePath ( (), takeExtension, dropExtension ) @@ -174,6 +176,13 @@ substitute target replacement lst@(x:xs) = then replacement ++ substitute target replacement (drop (length target) lst) else x : substitute target replacement xs +ordNub :: (Ord a) => [a] -> [a] +ordNub l = go Set.empty l + where + go _ [] = [] + go s (x:xs) = if x `Set.member` s then go s xs + else x : go (Set.insert x s) xs + -- -- Text processing -- -- cgit v1.2.3 From 9b4e772718a868392a51727ff8cc5eba2ce35bcd Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 3 Jun 2014 11:01:23 -0700 Subject: Templates: use ordNum instead of ord. Closes #1022. --- src/Text/Pandoc/Templates.hs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Text/Pandoc/Templates.hs b/src/Text/Pandoc/Templates.hs index 89856a9ee..4ae6a6d8a 100644 --- a/src/Text/Pandoc/Templates.hs +++ b/src/Text/Pandoc/Templates.hs @@ -103,7 +103,7 @@ import qualified Data.Text as T import Data.Text (Text) import Data.Text.Encoding (encodeUtf8) import Text.Pandoc.Compat.Monoid ((<>), Monoid(..)) -import Data.List (intersperse, nub) +import Data.List (intersperse) import System.FilePath ((), (<.>)) import qualified Data.Map as M import qualified Data.HashMap.Strict as H @@ -116,7 +116,7 @@ import Text.Blaze.Internal (preEscapedText) import Text.Blaze (preEscapedText, Html) #endif import Data.ByteString.Lazy (ByteString, fromChunks) -import Text.Pandoc.Shared (readDataFileUTF8) +import Text.Pandoc.Shared (readDataFileUTF8, ordNub) import Data.Vector ((!?)) -- | Get default template for the specified writer. @@ -163,7 +163,7 @@ varListToJSON assoc = toJSON $ M.fromList assoc' where assoc' = [(T.pack k, toVal [T.pack z | (y,z) <- assoc, not (null z), y == k]) - | k <- nub $ map fst assoc ] + | k <- ordNub $ map fst assoc ] toVal [x] = toJSON x toVal [] = Null toVal xs = toJSON xs -- cgit v1.2.3 From 356a32e9384ae5c3a871d9790079d89831f6de75 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 3 Jun 2014 11:02:11 -0700 Subject: Version bump to 1.12.5. --- pandoc.cabal | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandoc.cabal b/pandoc.cabal index 0fb00b150..5782938e5 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -1,5 +1,5 @@ Name: pandoc -Version: 1.12.4.2 +Version: 1.12.5 Cabal-Version: >= 1.10 Build-Type: Custom License: GPL -- cgit v1.2.3 From 05355ac57b7ccadfa4462f3304a7f6364147c8eb Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 3 Jun 2014 11:03:40 -0700 Subject: Docx writer: Simplified abstractNumId numbering. Instead of sequential numbering, we assign numbers based on the list marker styles. This simplifies some of the code and should make it easier to modify numbering in the future. --- src/Text/Pandoc/Writers/Docx.hs | 49 +++++++++++++++++++++++++---------------- 1 file changed, 30 insertions(+), 19 deletions(-) diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index e630c5094..572823871 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -66,6 +66,25 @@ data ListMarker = NoMarker | NumberMarker ListNumberStyle ListNumberDelim Int deriving (Show, Read, Eq, Ord) +listMarkerToId :: ListMarker -> String +listMarkerToId NoMarker = "0" +listMarkerToId BulletMarker = "1" +listMarkerToId (NumberMarker sty delim n) = + styNum : delimNum : show n + where styNum = case sty of + DefaultStyle -> '2' + Example -> '3' + Decimal -> '4' + LowerRoman -> '5' + UpperRoman -> '6' + LowerAlpha -> '7' + UpperAlpha -> '8' + delimNum = case delim of + DefaultDelim -> '0' + Period -> '1' + OneParen -> '2' + TwoParens -> '3' + data WriterState = WriterState{ stTextProperties :: [Element] , stParaProperties :: [Element] @@ -75,7 +94,6 @@ data WriterState = WriterState{ , stImages :: M.Map FilePath (String, String, Maybe String, Element, B.ByteString) , stListLevel :: Int , stListNumId :: Int - , stNumStyles :: M.Map ListMarker Int , stLists :: [ListMarker] } @@ -89,7 +107,6 @@ defaultWriterState = WriterState{ , stImages = M.empty , stListLevel = -1 , stListNumId = 1 - , stNumStyles = M.fromList [(NoMarker, 0)] , stLists = [NoMarker] } @@ -273,7 +290,7 @@ writeDocx opts doc@(Pandoc meta _) = do -- construct word/numbering.xml let numpath = "word/numbering.xml" numEntry <- (toEntry numpath epochtime . renderXml) - `fmap` mkNumbering (stNumStyles st) (stLists st) + `fmap` mkNumbering (stLists st) let docPropsPath = "docProps/core.xml" let docProps = mknode "cp:coreProperties" [("xmlns:cp","http://schemas.openxmlformats.org/package/2006/metadata/core-properties") @@ -371,29 +388,28 @@ styleToOpenXml style = parStyle : map toStyle alltoktypes $ backgroundColor style ) ] -mkNumbering :: M.Map ListMarker Int -> [ListMarker] -> IO Element -mkNumbering markers lists = do - elts <- mapM mkAbstractNum (M.toList markers) +mkNumbering :: [ListMarker] -> IO Element +mkNumbering lists = do + elts <- mapM mkAbstractNum (ordNub lists) return $ mknode "w:numbering" [("xmlns:w","http://schemas.openxmlformats.org/wordprocessingml/2006/main")] - $ elts ++ zipWith (mkNum markers) lists [1..(length lists)] + $ elts ++ zipWith mkNum lists [1..(length lists)] -mkNum :: M.Map ListMarker Int -> ListMarker -> Int -> Element -mkNum markers marker numid = +mkNum :: ListMarker -> Int -> Element +mkNum marker numid = mknode "w:num" [("w:numId",show numid)] - $ mknode "w:abstractNumId" [("w:val",show absnumid)] () + $ mknode "w:abstractNumId" [("w:val",listMarkerToId marker)] () : case marker of NoMarker -> [] BulletMarker -> [] NumberMarker _ _ start -> map (\lvl -> mknode "w:lvlOverride" [("w:ilvl",show (lvl :: Int))] $ mknode "w:startOverride" [("w:val",show start)] ()) [0..6] - where absnumid = fromMaybe 0 $ M.lookup marker markers -mkAbstractNum :: (ListMarker,Int) -> IO Element -mkAbstractNum (marker,numid) = do +mkAbstractNum :: ListMarker -> IO Element +mkAbstractNum marker = do nsid <- randomRIO (0x10000000 :: Integer, 0xFFFFFFFF :: Integer) - return $ mknode "w:abstractNum" [("w:abstractNumId",show numid)] + return $ mknode "w:abstractNum" [("w:abstractNumId",listMarkerToId marker)] $ mknode "w:nsid" [("w:val", printf "%8x" nsid)] () : mknode "w:multiLevelType" [("w:val","multilevel")] () : map (mkLvl marker) [0..6] @@ -594,11 +610,6 @@ addList :: ListMarker -> WS () addList marker = do lists <- gets stLists modify $ \st -> st{ stLists = lists ++ [marker] } - numStyles <- gets stNumStyles - case M.lookup marker numStyles of - Just _ -> return () - Nothing -> modify $ \st -> - st{ stNumStyles = M.insert marker (M.size numStyles + 1) numStyles } listItemToOpenXML :: WriterOptions -> Int -> [Block] -> WS [Element] listItemToOpenXML _ _ [] = return [] -- cgit v1.2.3 From 2842ad5a978de758d70801b5279f75b9ba679406 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 3 Jun 2014 11:33:09 -0700 Subject: Docx writer: Changed abstractNumId numbering scheme. Now the minimum id used by pandoc is 990. All ids start with "99". This gives some room for a reference.docx to define numbering styles. Note: this is not yet possible, since pandoc generates numbering.xml entirely on its own. --- src/Text/Pandoc/Writers/Docx.hs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 572823871..ca0892547 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -67,10 +67,10 @@ data ListMarker = NoMarker deriving (Show, Read, Eq, Ord) listMarkerToId :: ListMarker -> String -listMarkerToId NoMarker = "0" -listMarkerToId BulletMarker = "1" +listMarkerToId NoMarker = "990" +listMarkerToId BulletMarker = "991" listMarkerToId (NumberMarker sty delim n) = - styNum : delimNum : show n + '9' : '9' : styNum : delimNum : show n where styNum = case sty of DefaultStyle -> '2' Example -> '3' -- cgit v1.2.3 From ec047aaa8c1c1e9d69b0029a2e4512785fbc15a8 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 3 Jun 2014 12:13:31 -0700 Subject: Docx writer: pandoc uses only numIds >= 1000 for lists. This opens up the possiblity (with further code changes) of preserving some numbering from the reference.docx (e.g. header numbering.) See #1305. --- src/Text/Pandoc/Writers/Docx.hs | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index ca0892547..3d2f5d4b5 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -388,12 +388,16 @@ styleToOpenXml style = parStyle : map toStyle alltoktypes $ backgroundColor style ) ] +-- this is the lowest number used for a list numId +baseListId :: Int +baseListId = 1000 + mkNumbering :: [ListMarker] -> IO Element mkNumbering lists = do elts <- mapM mkAbstractNum (ordNub lists) return $ mknode "w:numbering" [("xmlns:w","http://schemas.openxmlformats.org/wordprocessingml/2006/main")] - $ elts ++ zipWith mkNum lists [1..(length lists)] + $ elts ++ zipWith mkNum lists [baseListId..(baseListId + length lists - 1)] mkNum :: ListMarker -> Int -> Element mkNum marker numid = @@ -461,7 +465,7 @@ mkLvl marker lvl = patternFor _ s = s ++ "." getNumId :: WS Int -getNumId = length `fmap` gets stLists +getNumId = ((999 +) . length) `fmap` gets stLists -- | Convert Pandoc document to two lists of -- OpenXML elements (the main document and footnotes). @@ -615,7 +619,8 @@ listItemToOpenXML :: WriterOptions -> Int -> [Block] -> WS [Element] listItemToOpenXML _ _ [] = return [] listItemToOpenXML opts numid (first:rest) = do first' <- withNumId numid $ blockToOpenXML opts first - rest' <- withNumId 1 $ blocksToOpenXML opts rest + -- baseListId is the code for no list marker: + rest' <- withNumId baseListId $ blocksToOpenXML opts rest return $ first' ++ rest' alignmentToString :: Alignment -> [Char] -- cgit v1.2.3 From 0ddb4cd2e8883c226ca7ab8a92737dc29f07dfda Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 3 Jun 2014 13:14:32 -0700 Subject: Docx writer: Combine reference.docx numbering with pandoc's. This should have fixed #1305, allowing the reference.docx to define section numbering, but it doesn't. Now the headings appear with proper indentation, but the numbers don't appear. Unclear why. styles.xml and numbering.xml basically match the docx which has the expected result. --- src/Text/Pandoc/Writers/Docx.hs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 3d2f5d4b5..785238d6f 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -289,8 +289,10 @@ writeDocx opts doc@(Pandoc meta _) = do -- construct word/numbering.xml let numpath = "word/numbering.xml" - numEntry <- (toEntry numpath epochtime . renderXml) - `fmap` mkNumbering (stLists st) + numbering <- parseXml refArchive distArchive numpath + newNumElts <- mkNumbering (stLists st) + let numEntry = toEntry numpath epochtime $ renderXml numbering{ elContent = + elContent numbering ++ map Elem newNumElts } let docPropsPath = "docProps/core.xml" let docProps = mknode "cp:coreProperties" [("xmlns:cp","http://schemas.openxmlformats.org/package/2006/metadata/core-properties") @@ -392,12 +394,10 @@ styleToOpenXml style = parStyle : map toStyle alltoktypes baseListId :: Int baseListId = 1000 -mkNumbering :: [ListMarker] -> IO Element +mkNumbering :: [ListMarker] -> IO [Element] mkNumbering lists = do elts <- mapM mkAbstractNum (ordNub lists) - return $ mknode "w:numbering" - [("xmlns:w","http://schemas.openxmlformats.org/wordprocessingml/2006/main")] - $ elts ++ zipWith mkNum lists [baseListId..(baseListId + length lists - 1)] + return $ elts ++ zipWith mkNum lists [baseListId..(baseListId + length lists - 1)] mkNum :: ListMarker -> Int -> Element mkNum marker numid = -- cgit v1.2.3 From 45f3851611007f18530b52c9fcc5f0106fbc6816 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 3 Jun 2014 16:46:55 -0700 Subject: Docx writer: Section numbering carries over from reference.docx. Closes #1305. --- src/Text/Pandoc/Writers/Docx.hs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 785238d6f..4e64a79df 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -291,8 +291,13 @@ writeDocx opts doc@(Pandoc meta _) = do let numpath = "word/numbering.xml" numbering <- parseXml refArchive distArchive numpath newNumElts <- mkNumbering (stLists st) + let allElts = onlyElems (elContent numbering) ++ newNumElts let numEntry = toEntry numpath epochtime $ renderXml numbering{ elContent = - elContent numbering ++ map Elem newNumElts } + -- we want all the abstractNums first, then the nums, + -- otherwise things break: + [Elem e | e <- allElts + , qName (elName e) == "abstractNum" ] ++ + [Elem e | e <- allElts, qName (elName e) == "num" ] } let docPropsPath = "docProps/core.xml" let docProps = mknode "cp:coreProperties" [("xmlns:cp","http://schemas.openxmlformats.org/package/2006/metadata/core-properties") -- cgit v1.2.3 From aff6ba921b38304944de851ff7f4d6f0df651964 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 3 Jun 2014 17:17:33 -0700 Subject: Require texmath >= 0.6.6.3. Closes #1324. This fixes \tilde{E}, especially in docx. --- pandoc.cabal | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandoc.cabal b/pandoc.cabal index 5782938e5..501a25a45 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -218,7 +218,7 @@ Library old-locale >= 1 && < 1.1, time >= 1.2 && < 1.5, HTTP >= 4000.0.5 && < 4000.3, - texmath >= 0.6.6.2 && < 0.7, + texmath >= 0.6.6.3 && < 0.7, xml >= 1.3.12 && < 1.4, random >= 1 && < 1.1, extensible-exceptions >= 0.1 && < 0.2, -- cgit v1.2.3 From ab5dda7a601ad93b97ee02b3d216cc6d5321a462 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 3 Jun 2014 23:17:03 -0700 Subject: Markdown writer: Prettier pipe tables. Columns are now aligned. Closes #1323. --- src/Text/Pandoc/Writers/Markdown.hs | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index f42a1b54c..a67271a5d 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -478,16 +478,24 @@ addMarkdownAttribute s = pipeTable :: Bool -> [Alignment] -> [Doc] -> [[Doc]] -> State WriterState Doc pipeTable headless aligns rawHeaders rawRows = do + let sp = text " " + let blockFor AlignLeft x y = lblock (x + 2) (sp <> y) <> lblock 0 empty + blockFor AlignCenter x y = cblock (x + 2) (sp <> y) <> lblock 0 empty + blockFor AlignRight x y = rblock (x + 2) (sp <> y) <> lblock 0 empty + blockFor _ x y = lblock (x + 2) (sp <> y) <> lblock 0 empty + let widths = map (max 3 . maximum . map offset) $ transpose (rawHeaders : rawRows) let torow cs = nowrap $ text "|" <> - hcat (intersperse (text "|") $ map chomp cs) <> text "|" - let toborder (a, h) = let wid = max (offset h) 3 - in text $ case a of - AlignLeft -> ':':replicate (wid - 1) '-' - AlignCenter -> ':':replicate (wid - 2) '-' ++ ":" - AlignRight -> replicate (wid - 1) '-' ++ ":" - AlignDefault -> replicate wid '-' + hcat (intersperse (text "|") $ + zipWith3 blockFor aligns widths (map chomp cs)) + <> text "|" + let toborder (a, w) = text $ case a of + AlignLeft -> ':':replicate (w + 1) '-' + AlignCenter -> ':':replicate w '-' ++ ":" + AlignRight -> replicate (w + 1) '-' ++ ":" + AlignDefault -> replicate (w + 2) '-' let header = if headless then empty else torow rawHeaders - let border = torow $ map toborder $ zip aligns rawHeaders + let border = nowrap $ text "|" <> hcat (intersperse (text "|") $ + map toborder $ zip aligns widths) <> text "|" let body = vcat $ map torow rawRows return $ header $$ border $$ body -- cgit v1.2.3 From 96815746611d63fae7be718cce643a52effd6525 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 3 Jun 2014 23:17:42 -0700 Subject: LaTeX reader: Handle comments at the end of tables. This resolves the issue illustrated in http://stackoverflow.com/questions/24009489/comments-in-latex-break-pandoc-table. --- src/Text/Pandoc/Readers/LaTeX.hs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 7fc587882..c3c0ba423 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -1269,6 +1269,7 @@ simpTable = try $ do header' <- option [] $ try (parseTableRow cols <* lbreak <* hline) rows <- sepEndBy (parseTableRow cols) (lbreak <* optional hline) spaces + skipMany (comment *> spaces) let header'' = if null header' then replicate cols mempty else header' -- cgit v1.2.3 From ff25cf079cd683ee1a2ad3d473e1db9084e0ba35 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 4 Jun 2014 08:18:24 -0700 Subject: Reformatted nocite example in README. --- README | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README b/README index d5e75b154..6030fa2bb 100644 --- a/README +++ b/README @@ -2503,8 +2503,8 @@ citing them in the body text, you can define a dummy `nocite` metadata field and put the citations there: --- - nocite: | - @item1, @item2 + nocite: | + @item1, @item2 ... @item3 -- cgit v1.2.3 From b2d75c67e41862f98163fc76d3151de2005f6e35 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 6 Jun 2014 22:19:59 -0700 Subject: make_osx_package: don't cabal update at beginning. --- make_osx_package.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/make_osx_package.sh b/make_osx_package.sh index 4e93a1a03..3e4317140 100755 --- a/make_osx_package.sh +++ b/make_osx_package.sh @@ -22,8 +22,8 @@ rm -rf $DIST mkdir -p $RESOURCES cabal sandbox init -echo Updating database -cabal update +# echo Updating database +# cabal update echo Building pandoc... cabal clean -- cgit v1.2.3 From f011d24165a0d65b7a34fc86c1dfc002697e29ee Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 6 Jun 2014 22:20:21 -0700 Subject: Require aeson >= 0.7.0.5 to avoid build problems. --- pandoc.cabal | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandoc.cabal b/pandoc.cabal index 501a25a45..be7d4977f 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -330,7 +330,7 @@ Executable pandoc bytestring >= 0.9 && < 0.11, extensible-exceptions >= 0.1 && < 0.2, highlighting-kate >= 0.5.8.2 && < 0.6, - aeson >= 0.7 && < 0.8, + aeson >= 0.7.0.5 && < 0.8, yaml >= 0.8.8.2 && < 0.9, containers >= 0.1 && < 0.6, HTTP >= 4000.0.5 && < 4000.3 -- cgit v1.2.3 From e78db3caf6703fb98f671cd3dbdc2e7840ce406c Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 9 Jun 2014 22:33:56 -0700 Subject: Updated default latex template so `\subtitle` works properly. Closes #1327. --- data/templates | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data/templates b/data/templates index 9f1636367..2c51fb0c4 160000 --- a/data/templates +++ b/data/templates @@ -1 +1 @@ -Subproject commit 9f1636367e28deaa949c4377eaea2acdc13a148f +Subproject commit 2c51fb0c4045542dcfaa07aff37778adc9452e9d -- cgit v1.2.3 From abbf33ae7d6ee3358fd74e434ccd897c774bc3d0 Mon Sep 17 00:00:00 2001 From: James Aspnes Date: Thu, 12 Jun 2014 21:19:00 -0400 Subject: allow (and discard) optional argument for \caption --- src/Text/Pandoc/Readers/LaTeX.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index c3c0ba423..6b5958920 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -305,7 +305,7 @@ blockCommands = M.fromList $ , ("item", skipopts *> loose_item) , ("documentclass", skipopts *> braced *> preamble) , ("centerline", (para . trimInlines) <$> (skipopts *> tok)) - , ("caption", tok >>= setCaption) + , ("caption", skipopts *> tok >>= setCaption) , ("PandocStartInclude", startInclude) , ("PandocEndInclude", endInclude) , ("bibliography", mempty <$ (skipopts *> braced >>= -- cgit v1.2.3 From 260404542009c477aff4837a8d77bb727c75ee20 Mon Sep 17 00:00:00 2001 From: Douglas Calvert Date: Fri, 13 Jun 2014 21:13:38 -0400 Subject: input fmt is org-citations its plural not singular --- changelog | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/changelog b/changelog index e012dabbf..fe3b1845b 100644 --- a/changelog +++ b/changelog @@ -17,7 +17,7 @@ pandoc (1.12.4.2) + Support code block headers (`#+BEGIN_SRC ...`) (Albert Krewinkel). + Fix parsing of blank lines within blocks (Albert Krewinkel). + Support pandoc citation extension (Albert Krewinkel). This can - be turned off by specifying `org-citation` as the input format. + be turned off by specifying `org-citations` as the input format. * Markdown reader: -- cgit v1.2.3 From 293e4cfdc3028218089115acd6f091b9ea3aa7d6 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Sat, 14 Jun 2014 10:02:35 -0400 Subject: Add DocX files to tree. This introduces Text.Pandoc.DocX, and its exported `readDocX` function. --- src/Text/Pandoc/Readers/DocX.hs | 479 +++++++++++++++++++++++++++ src/Text/Pandoc/Readers/DocX/Lists.hs | 208 ++++++++++++ src/Text/Pandoc/Readers/DocX/Parse.hs | 604 ++++++++++++++++++++++++++++++++++ 3 files changed, 1291 insertions(+) create mode 100644 src/Text/Pandoc/Readers/DocX.hs create mode 100644 src/Text/Pandoc/Readers/DocX/Lists.hs create mode 100644 src/Text/Pandoc/Readers/DocX/Parse.hs diff --git a/src/Text/Pandoc/Readers/DocX.hs b/src/Text/Pandoc/Readers/DocX.hs new file mode 100644 index 000000000..976e2e271 --- /dev/null +++ b/src/Text/Pandoc/Readers/DocX.hs @@ -0,0 +1,479 @@ +{- +Copyright (C) 2014 Jesse Rosenthal + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.DocX + Copyright : Copyright (C) 2014 Jesse Rosenthal + License : GNU GPL, version 2 or above + + Maintainer : Jesse Rosenthal + Stability : alpha + Portability : portable + +Conversion of DocX type (defined in Text.Pandoc.Readers.DocX.Parse) +to 'Pandoc' document. -} + +{- +Current state of implementation of DocX entities ([x] means +implemented, [-] means partially implemented): + +* Blocks + + - [X] Para + - [X] CodeBlock (styled with `SourceCode`) + - [X] BlockQuote (styled with `Quote`, `BlockQuote`, or, optionally, + indented) + - [X] OrderedList + - [X] BulletList + - [X] DefinitionList (styled with adjacent `DefinitionTerm` and `Definition`) + - [X] Header (styled with `Heading#`) + - [ ] HorizontalRule + - [-] Table (column widths and alignments not yet implemented) + +* Inlines + + - [X] Str + - [X] Emph (From italics. `underline` currently read as span. In + future, it might optionally be emph as well) + - [X] Strong + - [X] Strikeout + - [X] Superscript + - [X] Subscript + - [X] SmallCaps + - [ ] Quoted + - [ ] Cite + - [X] Code (styled with `VerbatimChar`) + - [X] Space + - [X] LineBreak (these are invisible in Word: entered with Shift-Return) + - [ ] Math + - [X] Link (links to an arbitrary bookmark create a span with the target as + id and "anchor" class) + - [-] Image (Links to path in archive. Future option for + data-encoded URI likely.) + - [X] Note (Footnotes and Endnotes are silently combined.) +-} + +module Text.Pandoc.Readers.DocX + ( readDocX + ) where + +import Codec.Archive.Zip +import Text.Pandoc.Definition +import Text.Pandoc.Options +import Text.Pandoc.Builder (text, toList) +import Text.Pandoc.Generic (bottomUp) +import Text.Pandoc.MIME (getMimeType) +import Text.Pandoc.UTF8 (toString) +import Text.Pandoc.Readers.DocX.Parse +import Text.Pandoc.Readers.DocX.Lists +import Data.Maybe (mapMaybe, isJust, fromJust) +import Data.List (delete, isPrefixOf, (\\), intersect) +import qualified Data.ByteString as BS +import qualified Data.ByteString.Lazy as B +import Data.ByteString.Base64 (encode) +import System.FilePath (combine) + +readDocX :: ReaderOptions + -> B.ByteString + -> Pandoc +readDocX opts bytes = + case archiveToDocX (toArchive bytes) of + Just docx -> Pandoc nullMeta (docxToBlocks opts docx) + Nothing -> error $ "couldn't parse docx file" + +runStyleToSpanAttr :: RunStyle -> (String, [String], [(String, String)]) +runStyleToSpanAttr rPr = ("", + mapMaybe id [ + if isBold rPr then (Just "strong") else Nothing, + if isItalic rPr then (Just "emph") else Nothing, + if isSmallCaps rPr then (Just "smallcaps") else Nothing, + if isStrike rPr then (Just "strike") else Nothing, + if isSuperScript rPr then (Just "superscript") else Nothing, + if isSubScript rPr then (Just "subscript") else Nothing, + rStyle rPr], + case underline rPr of + Just fmt -> [("underline", fmt)] + _ -> [] + ) + +parStyleToDivAttr :: ParagraphStyle -> (String, [String], [(String, String)]) +parStyleToDivAttr pPr = ("", + pStyle pPr, + case indent pPr of + Just n -> [("indent", (show n))] + Nothing -> [] + ) + +strToInlines :: String -> [Inline] +strToInlines = toList . text + +codeSpans :: [String] +codeSpans = ["VerbatimChar"] + +blockQuoteDivs :: [String] +blockQuoteDivs = ["Quote", "BlockQuote"] + +codeDivs :: [String] +codeDivs = ["SourceCode"] + +runElemToInlines :: RunElem -> [Inline] +runElemToInlines (TextRun s) = strToInlines s +runElemToInlines (LnBrk) = [LineBreak] + +runElemToString :: RunElem -> String +runElemToString (TextRun s) = s +runElemToString (LnBrk) = ['\n'] + +runElemsToString :: [RunElem] -> String +runElemsToString = concatMap runElemToString + +strNormalize :: [Inline] -> [Inline] +strNormalize [] = [] +strNormalize (Str "" : ils) = strNormalize ils +strNormalize ((Str s) : (Str s') : l) = strNormalize ((Str (s++s')) : l) +strNormalize (il:ils) = il : (strNormalize ils) + +runToInlines :: ReaderOptions -> DocX -> Run -> [Inline] +runToInlines _ _ (Run rs runElems) + | isJust (rStyle rs) && (fromJust (rStyle rs)) `elem` codeSpans = + case runStyleToSpanAttr rs == ("", [], []) of + True -> [Str (runElemsToString runElems)] + False -> [Span (runStyleToSpanAttr rs) [Str (runElemsToString runElems)]] + | otherwise = case runStyleToSpanAttr rs == ("", [], []) of + True -> concatMap runElemToInlines runElems + False -> [Span (runStyleToSpanAttr rs) (concatMap runElemToInlines runElems)] +runToInlines opts docx@(DocX _ notes _ _ _ ) (Footnote fnId) = + case (getFootNote fnId notes) of + Just bodyParts -> + [Note [Div ("", ["footnote"], []) (map (bodyPartToBlock opts docx) bodyParts)]] + Nothing -> + [Note [Div ("", ["footnote"], []) []]] +runToInlines opts docx@(DocX _ notes _ _ _) (Endnote fnId) = + case (getEndNote fnId notes) of + Just bodyParts -> + [Note [Div ("", ["endnote"], []) (map (bodyPartToBlock opts docx) bodyParts)]] + Nothing -> + [Note [Div ("", ["endnote"], []) []]] + +parPartToInlines :: ReaderOptions -> DocX -> ParPart -> [Inline] +parPartToInlines opts docx (PlainRun r) = runToInlines opts docx r +parPartToInlines _ _ (BookMark _ anchor) = + [Span (anchor, ["anchor"], []) []] +parPartToInlines _ (DocX _ _ _ rels _) (Drawing relid) = + case lookupRelationship relid rels of + Just target -> [Image [] (combine "word" target, "")] + Nothing -> [Image [] ("", "")] +parPartToInlines opts docx (InternalHyperLink anchor runs) = + [Link (concatMap (runToInlines opts docx) runs) ('#' : anchor, "")] +parPartToInlines opts docx@(DocX _ _ _ rels _) (ExternalHyperLink relid runs) = + case lookupRelationship relid rels of + Just target -> + [Link (concatMap (runToInlines opts docx) runs) (target, "")] + Nothing -> + [Link (concatMap (runToInlines opts docx) runs) ("", "")] + +isAnchorSpan :: Inline -> Bool +isAnchorSpan (Span (ident, classes, kvs) ils) = + (not . null) ident && + classes == ["anchor"] && + null kvs && + null ils +isAnchorSpan _ = False + +dummyAnchors :: [String] +dummyAnchors = ["_GoBack"] + +makeHeaderAnchors :: Block -> Block +makeHeaderAnchors h@(Header n (_, classes, kvs) ils) = + case filter isAnchorSpan ils of + [] -> h + (x@(Span (ident, _, _) _) : xs) -> + case ident `elem` dummyAnchors of + True -> h + False -> Header n (ident, classes, kvs) (ils \\ (x:xs)) + _ -> h +makeHeaderAnchors blk = blk + + +parPartsToInlines :: ReaderOptions -> DocX -> [ParPart] -> [Inline] +parPartsToInlines opts docx parparts = + -- + -- We're going to skip data-uri's for now. It should be an option, + -- not mandatory. + -- + --bottomUp (makeImagesSelfContained docx) $ + bottomUp spanCorrect $ + bottomUp spanTrim $ + bottomUp spanReduce $ + concatMap (parPartToInlines opts docx) parparts + +cellToBlocks :: ReaderOptions -> DocX -> Cell -> [Block] +cellToBlocks opts docx (Cell bps) = map (bodyPartToBlock opts docx) bps + +rowToBlocksList :: ReaderOptions -> DocX -> Row -> [[Block]] +rowToBlocksList opts docx (Row cells) = map (cellToBlocks opts docx) cells + +bodyPartToBlock :: ReaderOptions -> DocX -> BodyPart -> Block +bodyPartToBlock opts docx (Paragraph pPr parparts) = + Div (parStyleToDivAttr pPr) [Para (parPartsToInlines opts docx parparts)] +bodyPartToBlock opts docx@(DocX _ _ numbering _ _) (ListItem pPr numId lvl parparts) = + let + kvs = case lookupLevel numId lvl numbering of + Just (_, fmt, txt, Just start) -> [ ("level", lvl) + , ("num-id", numId) + , ("format", fmt) + , ("text", txt) + , ("start", (show start)) + ] + + Just (_, fmt, txt, Nothing) -> [ ("level", lvl) + , ("num-id", numId) + , ("format", fmt) + , ("text", txt) + ] + Nothing -> [] + in + Div + ("", ["list-item"], kvs) + [bodyPartToBlock opts docx (Paragraph pPr parparts)] +bodyPartToBlock _ _ (Tbl _ _ _ []) = + Para [] +bodyPartToBlock opts docx (Tbl cap _ look (r:rs)) = + let caption = strToInlines cap + (hdr, rows) = case firstRowFormatting look of + True -> (Just r, rs) + False -> (Nothing, r:rs) + hdrCells = case hdr of + Just r' -> rowToBlocksList opts docx r' + Nothing -> [] + cells = map (rowToBlocksList opts docx) rows + + size = case null hdrCells of + True -> length $ head cells + False -> length $ hdrCells + -- + -- The two following variables (horizontal column alignment and + -- relative column widths) go to the default at the + -- moment. Width information is in the TblGrid field of the Tbl, + -- so should be possible. Alignment might be more difficult, + -- since there doesn't seem to be a column entity in docx. + alignments = take size (repeat AlignDefault) + widths = take size (repeat 0) :: [Double] + in + Table caption alignments widths hdrCells cells + +makeImagesSelfContained :: DocX -> Inline -> Inline +makeImagesSelfContained (DocX _ _ _ _ media) i@(Image alt (uri, title)) = + case lookup uri media of + Just bs -> case getMimeType uri of + Just mime -> let data_uri = + "data:" ++ mime ++ ";base64," ++ toString (encode $ BS.concat $ B.toChunks bs) + in + Image alt (data_uri, title) + Nothing -> i + Nothing -> i +makeImagesSelfContained _ inline = inline + +bodyToBlocks :: ReaderOptions -> DocX -> Body -> [Block] +bodyToBlocks opts docx (Body bps) = + bottomUp removeEmptyPars $ + bottomUp strNormalize $ + bottomUp spanRemove $ + bottomUp divRemove $ + map (makeHeaderAnchors) $ + bottomUp divCorrect $ + bottomUp divReduce $ + bottomUp divCorrectPreReduce $ + bottomUp blocksToDefinitions $ + blocksToBullets $ + map (bodyPartToBlock opts docx) bps + +docxToBlocks :: ReaderOptions -> DocX -> [Block] +docxToBlocks opts d@(DocX (Document _ body) _ _ _ _) = bodyToBlocks opts d body + +spanReduce :: [Inline] -> [Inline] +spanReduce [] = [] +spanReduce ((Span (id1, classes1, kvs1) ils1) : ils) + | (id1, classes1, kvs1) == ("", [], []) = ils1 ++ (spanReduce ils) +spanReduce (s1@(Span (id1, classes1, kvs1) ils1) : + s2@(Span (id2, classes2, kvs2) ils2) : + ils) = + let classes' = classes1 `intersect` classes2 + kvs' = kvs1 `intersect` kvs2 + classes1' = classes1 \\ classes' + kvs1' = kvs1 \\ kvs' + classes2' = classes2 \\ classes' + kvs2' = kvs2 \\ kvs' + in + case null classes' && null kvs' of + True -> s1 : (spanReduce (s2 : ils)) + False -> let attr' = ("", classes', kvs') + attr1' = (id1, classes1', kvs1') + attr2' = (id2, classes2', kvs2') + in + spanReduce (Span attr' [(Span attr1' ils1), (Span attr2' ils2)] : + ils) +spanReduce (il:ils) = il : (spanReduce ils) + +ilToCode :: Inline -> String +ilToCode (Str s) = s +ilToCode _ = "" + +spanRemove' :: Inline -> [Inline] +spanRemove' s@(Span (ident, classes, _) []) + -- "_GoBack" is automatically inserted. We don't want to keep it. + | classes == ["anchor"] && not (ident `elem` dummyAnchors) = [s] +spanRemove' (Span (_, _, kvs) ils) = + case lookup "underline" kvs of + Just val -> [Span ("", [], [("underline", val)]) ils] + Nothing -> ils +spanRemove' il = [il] + +spanRemove :: [Inline] -> [Inline] +spanRemove = concatMap spanRemove' + +spanTrim' :: Inline -> [Inline] +spanTrim' il@(Span _ []) = [il] +spanTrim' il@(Span attr (il':[])) + | il' == Space = [Span attr [], Space] + | otherwise = [il] +spanTrim' (Span attr ils) + | head ils == Space && last ils == Space = + [Space, Span attr (init $ tail ils), Space] + | head ils == Space = [Space, Span attr (tail ils)] + | last ils == Space = [Span attr (init ils), Space] +spanTrim' il = [il] + +spanTrim :: [Inline] -> [Inline] +spanTrim = concatMap spanTrim' + +spanCorrect' :: Inline -> [Inline] +spanCorrect' (Span ("", [], []) ils) = ils +spanCorrect' (Span (ident, classes, kvs) ils) + | "emph" `elem` classes = + [Emph $ spanCorrect' $ Span (ident, (delete "emph" classes), kvs) ils] + | "strong" `elem` classes = + [Strong $ spanCorrect' $ Span (ident, (delete "strong" classes), kvs) ils] + | "smallcaps" `elem` classes = + [SmallCaps $ spanCorrect' $ Span (ident, (delete "smallcaps" classes), kvs) ils] + | "strike" `elem` classes = + [Strikeout $ spanCorrect' $ Span (ident, (delete "strike" classes), kvs) ils] + | "superscript" `elem` classes = + [Superscript $ spanCorrect' $ Span (ident, (delete "superscript" classes), kvs) ils] + | "subscript" `elem` classes = + [Subscript $ spanCorrect' $ Span (ident, (delete "subscript" classes), kvs) ils] + | (not . null) (codeSpans `intersect` classes) = + [Code (ident, (classes \\ codeSpans), kvs) (init $ unlines $ map ilToCode ils)] + | otherwise = + [Span (ident, classes, kvs) ils] +spanCorrect' il = [il] + +spanCorrect :: [Inline] -> [Inline] +spanCorrect = concatMap spanCorrect' + +removeEmptyPars :: [Block] -> [Block] +removeEmptyPars blks = filter (\b -> b /= (Para [])) blks + +divReduce :: [Block] -> [Block] +divReduce [] = [] +divReduce ((Div (id1, classes1, kvs1) blks1) : blks) + | (id1, classes1, kvs1) == ("", [], []) = blks1 ++ (divReduce blks) +divReduce (d1@(Div (id1, classes1, kvs1) blks1) : + d2@(Div (id2, classes2, kvs2) blks2) : + blks) = + let classes' = classes1 `intersect` classes2 + kvs' = kvs1 `intersect` kvs2 + classes1' = classes1 \\ classes' + kvs1' = kvs1 \\ kvs' + classes2' = classes2 \\ classes' + kvs2' = kvs2 \\ kvs' + in + case null classes' && null kvs' of + True -> d1 : (divReduce (d2 : blks)) + False -> let attr' = ("", classes', kvs') + attr1' = (id1, classes1', kvs1') + attr2' = (id2, classes2', kvs2') + in + divReduce (Div attr' [(Div attr1' blks1), (Div attr2' blks2)] : + blks) +divReduce (blk:blks) = blk : (divReduce blks) + +isHeaderClass :: String -> Maybe Int +isHeaderClass s | "Heading" `isPrefixOf` s = + case reads (drop (length "Heading") s) :: [(Int, String)] of + [] -> Nothing + ((n, "") : []) -> Just n + _ -> Nothing +isHeaderClass _ = Nothing + +findHeaderClass :: [String] -> Maybe Int +findHeaderClass ss = case mapMaybe id $ map isHeaderClass ss of + [] -> Nothing + n : _ -> Just n + +blksToInlines :: [Block] -> [Inline] +blksToInlines (Para ils : _) = ils +blksToInlines (Plain ils : _) = ils +blksToInlines _ = [] + +divCorrectPreReduce' :: Block -> [Block] +divCorrectPreReduce' (Div (ident, classes, kvs) blks) + | isJust $ findHeaderClass classes = + let n = fromJust $ findHeaderClass classes + in + [Header n (ident, delete ("Heading" ++ (show n)) classes, kvs) (blksToInlines blks)] + | otherwise = [Div (ident, classes, kvs) blks] +divCorrectPreReduce' blk = [blk] + +divCorrectPreReduce :: [Block] -> [Block] +divCorrectPreReduce = concatMap divCorrectPreReduce' + +blkToCode :: Block -> String +blkToCode (Para []) = "" +blkToCode (Para ((Code _ s):ils)) = s ++ (blkToCode (Para ils)) +blkToCode (Para ((Span (_, classes, _) ils'): ils)) + | (not . null) (codeSpans `intersect` classes) = + (init $ unlines $ map ilToCode ils') ++ (blkToCode (Para ils)) +blkToCode _ = "" + +divRemove' :: Block -> [Block] +divRemove' (Div (_, _, kvs) blks) = + case lookup "indent" kvs of + Just val -> [Div ("", [], [("indent", val)]) blks] + Nothing -> blks +divRemove' blk = [blk] + +divRemove :: [Block] -> [Block] +divRemove = concatMap divRemove' + +divCorrect' :: Block -> [Block] +divCorrect' b@(Div (ident, classes, kvs) blks) + | (not . null) (blockQuoteDivs `intersect` classes) = + [BlockQuote [Div (ident, classes \\ blockQuoteDivs, kvs) blks]] + | (not . null) (codeDivs `intersect` classes) = + [CodeBlock (ident, (classes \\ codeDivs), kvs) (init $ unlines $ map blkToCode blks)] + | otherwise = + case lookup "indent" kvs of + Just "0" -> [Div (ident, classes, filter (\kv -> fst kv /= "indent") kvs) blks] + Just _ -> + [BlockQuote [Div (ident, classes, filter (\kv -> fst kv /= "indent") kvs) blks]] + Nothing -> [b] +divCorrect' blk = [blk] + +divCorrect :: [Block] -> [Block] +divCorrect = concatMap divCorrect' diff --git a/src/Text/Pandoc/Readers/DocX/Lists.hs b/src/Text/Pandoc/Readers/DocX/Lists.hs new file mode 100644 index 000000000..b20679261 --- /dev/null +++ b/src/Text/Pandoc/Readers/DocX/Lists.hs @@ -0,0 +1,208 @@ +{- +Copyright (C) 2014 Jesse Rosenthal + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.DocX.Lists + Copyright : Copyright (C) 2014 Jesse Rosenthal + License : GNU GPL, version 2 or above + + Maintainer : Jesse Rosenthal + Stability : alpha + Portability : portable + +Functions for converting flat DocX paragraphs into nested lists. +-} + +module Text.Pandoc.Readers.DocX.Lists ( blocksToBullets + , blocksToDefinitions) where + +import Text.Pandoc.JSON +import Text.Pandoc.Shared (trim) +import Control.Monad +import Data.List +import Data.Maybe + +isListItem :: Block -> Bool +isListItem (Div (_, classes, _) _) | "list-item" `elem` classes = True +isListItem _ = False + +getLevel :: Block -> Maybe Integer +getLevel (Div (_, _, kvs) _) = liftM read $ lookup "level" kvs +getLevel _ = Nothing + +getLevelN :: Block -> Integer +getLevelN b = case getLevel b of + Just n -> n + Nothing -> -1 + +getNumId :: Block -> Maybe Integer +getNumId (Div (_, _, kvs) _) = liftM read $ lookup "num-id" kvs +getNumId _ = Nothing + +getNumIdN :: Block -> Integer +getNumIdN b = case getNumId b of + Just n -> n + Nothing -> -1 + +getText :: Block -> Maybe String +getText (Div (_, _, kvs) _) = lookup "text" kvs +getText _ = Nothing + +data ListType = Itemized | Enumerated ListAttributes + +listStyleMap :: [(String, ListNumberStyle)] +listStyleMap = [("upperLetter", UpperAlpha), + ("lowerLetter", LowerAlpha), + ("upperRoman", UpperRoman), + ("lowerRoman", LowerRoman), + ("decimal", Decimal)] + +listDelimMap :: [(String, ListNumberDelim)] +listDelimMap = [("%1)", OneParen), + ("(%1)", TwoParens), + ("%1.", Period)] + +getListType :: Block -> Maybe ListType +getListType b@(Div (_, _, kvs) _) | isListItem b = + let + start = lookup "start" kvs + frmt = lookup "format" kvs + txt = lookup "text" kvs + in + case frmt of + Just "bullet" -> Just Itemized + Just f -> + case txt of + Just t -> Just $ Enumerated ( + read (fromMaybe "1" start) :: Int, + fromMaybe DefaultStyle (lookup f listStyleMap), + fromMaybe DefaultDelim (lookup t listDelimMap)) + Nothing -> Nothing + _ -> Nothing +getListType _ = Nothing + +listParagraphDivs :: [String] +listParagraphDivs = ["ListParagraph"] + +-- This is a first stab at going through and attaching meaning to list +-- paragraphs, without an item marker, following a list item. We +-- assume that these are paragraphs in the same item. + +handleListParagraphs :: [Block] -> [Block] +handleListParagraphs [] = [] +handleListParagraphs ( + (Div attr1@(_, classes1, _) blks1) : + (Div (ident2, classes2, kvs2) blks2) : + blks + ) | "list-item" `elem` classes1 && + not ("list-item" `elem` classes2) && + (not . null) (listParagraphDivs `intersect` classes2) = + -- We don't want to keep this indent. + let newDiv2 = + (Div (ident2, classes2, filter (\kv -> fst kv /= "indent") kvs2) blks2) + in + handleListParagraphs ((Div attr1 (blks1 ++ [newDiv2])) : blks) +handleListParagraphs (blk:blks) = blk : (handleListParagraphs blks) + +separateBlocks' :: Block -> [[Block]] -> [[Block]] +separateBlocks' blk ([] : []) = [[blk]] +separateBlocks' b@(BulletList _) acc = (init acc) ++ [(last acc) ++ [b]] +separateBlocks' b@(OrderedList _ _) acc = (init acc) ++ [(last acc) ++ [b]] +-- The following is for the invisible bullet lists. This is how +-- pandoc-generated ooxml does multiparagraph item lists. +separateBlocks' b acc | liftM trim (getText b) == Just "" = + (init acc) ++ [(last acc) ++ [b]] +separateBlocks' b acc = acc ++ [[b]] + +separateBlocks :: [Block] -> [[Block]] +separateBlocks blks = foldr separateBlocks' [[]] (reverse blks) + +flatToBullets' :: Integer -> [Block] -> [Block] +flatToBullets' _ [] = [] +flatToBullets' num xs@(b : elems) + | getLevelN b == num = b : (flatToBullets' num elems) + | otherwise = + let bNumId = getNumIdN b + bLevel = getLevelN b + (children, remaining) = + span + (\b' -> + ((getLevelN b') > bLevel || + ((getLevelN b') == bLevel && (getNumIdN b') == bNumId))) + xs + in + case getListType b of + Just (Enumerated attr) -> + (OrderedList attr (separateBlocks $ flatToBullets' bLevel children)) : + (flatToBullets' num remaining) + _ -> + (BulletList (separateBlocks $ flatToBullets' bLevel children)) : + (flatToBullets' num remaining) + +flatToBullets :: [Block] -> [Block] +flatToBullets elems = flatToBullets' (-1) elems + +blocksToBullets :: [Block] -> [Block] +blocksToBullets blks = + -- bottomUp removeListItemDivs $ + flatToBullets $ (handleListParagraphs blks) + + +plainParaInlines :: Block -> [Inline] +plainParaInlines (Plain ils) = ils +plainParaInlines (Para ils) = ils +plainParaInlines _ = [] + +blocksToDefinitions' :: [([Inline], [[Block]])] -> [Block] -> [Block] -> [Block] +blocksToDefinitions' [] acc [] = reverse acc +blocksToDefinitions' defAcc acc [] = + reverse $ (DefinitionList (reverse defAcc)) : acc +blocksToDefinitions' defAcc acc + ((Div (_, classes1, _) blks1) : (Div (ident2, classes2, kvs2) blks2) : blks) + | "DefinitionTerm" `elem` classes1 && "Definition" `elem` classes2 = + let remainingAttr2 = (ident2, delete "Definition" classes2, kvs2) + pair = case remainingAttr2 == ("", [], []) of + True -> (concatMap plainParaInlines blks1, [blks2]) + False -> (concatMap plainParaInlines blks1, [[Div remainingAttr2 blks2]]) + in + blocksToDefinitions' (pair : defAcc) acc blks +blocksToDefinitions' defAcc acc + ((Div (ident2, classes2, kvs2) blks2) : blks) + | (not . null) defAcc && "Definition" `elem` classes2 = + let remainingAttr2 = (ident2, delete "Definition" classes2, kvs2) + defItems2 = case remainingAttr2 == ("", [], []) of + True -> blks2 + False -> [Div remainingAttr2 blks2] + ((defTerm, defItems):defs) = defAcc + defAcc' = case null defItems of + True -> (defTerm, [defItems2]) : defs + False -> (defTerm, init defItems ++ [last defItems ++ defItems2]) : defs + in + blocksToDefinitions' defAcc' acc blks +blocksToDefinitions' [] acc (b:blks) = + blocksToDefinitions' [] (b:acc) blks +blocksToDefinitions' defAcc acc (b:blks) = + blocksToDefinitions' [] (b : (DefinitionList (reverse defAcc)) : acc) blks + + +blocksToDefinitions :: [Block] -> [Block] +blocksToDefinitions = blocksToDefinitions' [] [] + + + + diff --git a/src/Text/Pandoc/Readers/DocX/Parse.hs b/src/Text/Pandoc/Readers/DocX/Parse.hs new file mode 100644 index 000000000..d7033d9e8 --- /dev/null +++ b/src/Text/Pandoc/Readers/DocX/Parse.hs @@ -0,0 +1,604 @@ +{- +Copyright (C) 2014 Jesse Rosenthal + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.DocX.Parse + Copyright : Copyright (C) 2014 Jesse Rosenthal + License : GNU GPL, version 2 or above + + Maintainer : Jesse Rosenthal + Stability : alpha + Portability : portable + +Conversion of DocX archive into DocX haskell type +-} + + +module Text.Pandoc.Readers.DocX.Parse ( DocX(..) + , Document(..) + , Body(..) + , BodyPart(..) + , TblLook(..) + , ParPart(..) + , Run(..) + , RunElem(..) + , Notes + , Numbering + , Relationship + , Media + , RunStyle(..) + , ParagraphStyle(..) + , Row(..) + , Cell(..) + , getFootNote + , getEndNote + , lookupLevel + , lookupRelationship + , archiveToDocX + ) where +import Codec.Archive.Zip +import Text.XML.Light +import Data.Maybe +import Data.List +import System.FilePath +import Data.Bits ((.|.)) +import qualified Data.ByteString.Lazy as B +import qualified Text.Pandoc.UTF8 as UTF8 + +attrToNSPair :: Attr -> Maybe (String, String) +attrToNSPair (Attr (QName s _ (Just "xmlns")) val) = Just (s, val) +attrToNSPair _ = Nothing + + +type NameSpaces = [(String, String)] + +data DocX = DocX Document Notes Numbering [Relationship] Media + deriving Show + +archiveToDocX :: Archive -> Maybe DocX +archiveToDocX archive = do + let notes = archiveToNotes archive + rels = archiveToRelationships archive + media = archiveToMedia archive + doc <- archiveToDocument archive + numbering <- archiveToNumbering archive + return $ DocX doc notes numbering rels media + +data Document = Document NameSpaces Body + deriving Show + +archiveToDocument :: Archive -> Maybe Document +archiveToDocument zf = do + entry <- findEntryByPath "word/document.xml" zf + docElem <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry + let namespaces = mapMaybe attrToNSPair (elAttribs docElem) + bodyElem <- findChild (QName "body" (lookup "w" namespaces) Nothing) docElem + body <- elemToBody namespaces bodyElem + return $ Document namespaces body + +type Media = [(FilePath, B.ByteString)] + +filePathIsMedia :: FilePath -> Bool +filePathIsMedia fp = + let (dir, _) = splitFileName fp + in + (dir == "word/media/") + +getMediaPair :: Archive -> FilePath -> Maybe (FilePath, B.ByteString) +getMediaPair zf fp = + case findEntryByPath fp zf of + Just e -> Just (fp, fromEntry e) + Nothing -> Nothing + +archiveToMedia :: Archive -> Media +archiveToMedia zf = + mapMaybe (getMediaPair zf) (filter filePathIsMedia (filesInArchive zf)) + +data Numbering = Numbering NameSpaces [Numb] [AbstractNumb] + deriving Show + +data Numb = Numb String String -- right now, only a key to an abstract num + deriving Show + +data AbstractNumb = AbstractNumb String [Level] + deriving Show + +-- (ilvl, format, string, start) +type Level = (String, String, String, Maybe Integer) + +lookupLevel :: String -> String -> Numbering -> Maybe Level +lookupLevel numId ilvl (Numbering _ numbs absNumbs) = do + absNumId <- lookup numId $ map (\(Numb nid absnumid) -> (nid, absnumid)) numbs + lvls <- lookup absNumId $ map (\(AbstractNumb aid ls) -> (aid, ls)) absNumbs + lvl <- lookup ilvl $ map (\l@(i, _, _, _) -> (i, l)) lvls + return lvl + +numElemToNum :: NameSpaces -> Element -> Maybe Numb +numElemToNum ns element | + qName (elName element) == "num" && + qURI (elName element) == (lookup "w" ns) = do + numId <- findAttr (QName "numId" (lookup "w" ns) (Just "w")) element + absNumId <- findChild (QName "abstractNumId" (lookup "w" ns) (Just "w")) element + >>= findAttr (QName "val" (lookup "w" ns) (Just "w")) + return $ Numb numId absNumId +numElemToNum _ _ = Nothing + +absNumElemToAbsNum :: NameSpaces -> Element -> Maybe AbstractNumb +absNumElemToAbsNum ns element | + qName (elName element) == "abstractNum" && + qURI (elName element) == (lookup "w" ns) = do + absNumId <- findAttr + (QName "abstractNumId" (lookup "w" ns) (Just "w")) + element + let levelElems = findChildren + (QName "lvl" (lookup "w" ns) (Just "w")) + element + levels = mapMaybe id $ map (levelElemToLevel ns) levelElems + return $ AbstractNumb absNumId levels +absNumElemToAbsNum _ _ = Nothing + +levelElemToLevel :: NameSpaces -> Element -> Maybe Level +levelElemToLevel ns element | + qName (elName element) == "lvl" && + qURI (elName element) == (lookup "w" ns) = do + ilvl <- findAttr (QName "ilvl" (lookup "w" ns) (Just "w")) element + fmt <- findChild (QName "numFmt" (lookup "w" ns) (Just "w")) element + >>= findAttr (QName "val" (lookup "w" ns) (Just "w")) + txt <- findChild (QName "lvlText" (lookup "w" ns) (Just "w")) element + >>= findAttr (QName "val" (lookup "w" ns) (Just "w")) + let start = findChild (QName "start" (lookup "w" ns) (Just "w")) element + >>= findAttr (QName "val" (lookup "w" ns) (Just "w")) + >>= (\s -> listToMaybe (map fst (reads s :: [(Integer, String)]))) + return (ilvl, fmt, txt, start) +levelElemToLevel _ _ = Nothing + +archiveToNumbering :: Archive -> Maybe Numbering +archiveToNumbering zf = + case findEntryByPath "word/numbering.xml" zf of + Nothing -> Just $ Numbering [] [] [] + Just entry -> do + numberingElem <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry + let namespaces = mapMaybe attrToNSPair (elAttribs numberingElem) + numElems = findChildren + (QName "num" (lookup "w" namespaces) (Just "w")) + numberingElem + absNumElems = findChildren + (QName "abstractNum" (lookup "w" namespaces) (Just "w")) + numberingElem + nums = mapMaybe id $ map (numElemToNum namespaces) numElems + absNums = mapMaybe id $ map (absNumElemToAbsNum namespaces) absNumElems + return $ Numbering namespaces nums absNums + +data Notes = Notes NameSpaces (Maybe [(String, [BodyPart])]) (Maybe [(String, [BodyPart])]) + deriving Show + +noteElemToNote :: NameSpaces -> Element -> Maybe (String, [BodyPart]) +noteElemToNote ns element + | qName (elName element) `elem` ["endnote", "footnote"] && + qURI (elName element) == (lookup "w" ns) = + do + noteId <- findAttr (QName "id" (lookup "w" ns) (Just "w")) element + let bps = map fromJust + $ filter isJust + $ map (elemToBodyPart ns) + $ filterChildrenName (isParOrTbl ns) element + return $ (noteId, bps) +noteElemToNote _ _ = Nothing + +getFootNote :: String -> Notes -> Maybe [BodyPart] +getFootNote s (Notes _ fns _) = fns >>= (lookup s) + +getEndNote :: String -> Notes -> Maybe [BodyPart] +getEndNote s (Notes _ _ ens) = ens >>= (lookup s) + +elemToNotes :: NameSpaces -> String -> Element -> Maybe [(String, [BodyPart])] +elemToNotes ns notetype element + | qName (elName element) == (notetype ++ "s") && + qURI (elName element) == (lookup "w" ns) = + Just $ map fromJust + $ filter isJust + $ map (noteElemToNote ns) + $ findChildren (QName notetype (lookup "w" ns) (Just "w")) element +elemToNotes _ _ _ = Nothing + +archiveToNotes :: Archive -> Notes +archiveToNotes zf = + let fnElem = findEntryByPath "word/footnotes.xml" zf + >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry) + enElem = findEntryByPath "word/endnotes.xml" zf + >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry) + fn_namespaces = case fnElem of + Just e -> mapMaybe attrToNSPair (elAttribs e) + Nothing -> [] + en_namespaces = case enElem of + Just e -> mapMaybe attrToNSPair (elAttribs e) + Nothing -> [] + ns = unionBy (\x y -> fst x == fst y) fn_namespaces en_namespaces + fn = fnElem >>= (elemToNotes ns "footnote") + en = enElem >>= (elemToNotes ns "endnote") + in + Notes ns fn en + + +data Relationship = Relationship (RelId, Target) + deriving Show + +lookupRelationship :: RelId -> [Relationship] -> Maybe Target +lookupRelationship relid rels = + lookup relid (map (\(Relationship pair) -> pair) rels) + +filePathIsRel :: FilePath -> Bool +filePathIsRel fp = + let (dir, name) = splitFileName fp + in + (dir == "word/_rels/") && ((takeExtension name) == ".rels") + +relElemToRelationship :: Element -> Maybe Relationship +relElemToRelationship element | qName (elName element) == "Relationship" = + do + relId <- findAttr (QName "Id" Nothing Nothing) element + target <- findAttr (QName "Target" Nothing Nothing) element + return $ Relationship (relId, target) +relElemToRelationship _ = Nothing + + +archiveToRelationships :: Archive -> [Relationship] +archiveToRelationships archive = + let relPaths = filter filePathIsRel (filesInArchive archive) + entries = map fromJust $ filter isJust $ map (\f -> findEntryByPath f archive) relPaths + relElems = map fromJust $ filter isJust $ map (parseXMLDoc . UTF8.toStringLazy . fromEntry) entries + rels = map fromJust $ filter isJust $ map relElemToRelationship $ concatMap elChildren relElems + in + rels + +data Body = Body [BodyPart] + deriving Show + +isParOrTbl :: NameSpaces -> QName -> Bool +isParOrTbl ns q = qName q `elem` ["p", "tbl"] && + qURI q == (lookup "w" ns) + +elemToBody :: NameSpaces -> Element -> Maybe Body +elemToBody ns element | qName (elName element) == "body" && qURI (elName element) == (lookup "w" ns) = + Just $ Body + $ map fromJust + $ filter isJust + $ map (elemToBodyPart ns) $ filterChildrenName (isParOrTbl ns) element +elemToBody _ _ = Nothing + +isRunOrLinkOrBookmark :: NameSpaces -> QName -> Bool +isRunOrLinkOrBookmark ns q = qName q `elem` ["r", "hyperlink", "bookmarkStart"] && + qURI q == (lookup "w" ns) + +elemToNumInfo :: NameSpaces -> Element -> Maybe (String, String) +elemToNumInfo ns element + | qName (elName element) == "p" && + qURI (elName element) == (lookup "w" ns) = + do + pPr <- findChild (QName "pPr" (lookup "w" ns) (Just "w")) element + numPr <- findChild (QName "numPr" (lookup "w" ns) (Just "w")) pPr + lvl <- findChild (QName "ilvl" (lookup "w" ns) (Just "w")) numPr >>= + findAttr (QName "val" (lookup "w" ns) (Just "w")) + numId <- findChild (QName "numId" (lookup "w" ns) (Just "w")) numPr >>= + findAttr (QName "val" (lookup "w" ns) (Just "w")) + return (numId, lvl) +elemToNumInfo _ _ = Nothing + +-- isBookMarkTag :: NameSpaces -> QName -> Bool +-- isBookMarkTag ns q = qName q `elem` ["bookmarkStart", "bookmarkEnd"] && +-- qURI q == (lookup "w" ns) + +-- parChildrenToBookmark :: NameSpaces -> [Element] -> BookMark +-- parChildrenToBookmark ns (bms : bme : _) +-- | qName (elName bms) == "bookmarkStart" && +-- qURI (elName bms) == (lookup "w" ns) && +-- qName (elName bme) == "bookmarkEnd" && +-- qURI (elName bme) == (lookup "w" ns) = do +-- bmId <- findAttr (QName "id" (lookup "w" ns) (Just "w")) bms +-- bmName <- findAttr (QName "name" (lookup "w" ns) (Just "w")) bms +-- return $ (bmId, bmName) +-- parChildrenToBookmark _ _ = Nothing + +elemToBodyPart :: NameSpaces -> Element -> Maybe BodyPart +elemToBodyPart ns element + | qName (elName element) == "p" && + qURI (elName element) == (lookup "w" ns) = + let parstyle = elemToParagraphStyle ns element + parparts = mapMaybe id + $ map (elemToParPart ns) + $ filterChildrenName (isRunOrLinkOrBookmark ns) element + in + case elemToNumInfo ns element of + Just (numId, lvl) -> Just $ ListItem parstyle numId lvl parparts + Nothing -> Just $ Paragraph parstyle parparts + | qName (elName element) == "tbl" && + qURI (elName element) == (lookup "w" ns) = + let + caption = findChild (QName "tblPr" (lookup "w" ns) (Just "w")) element + >>= findChild (QName "tblCaption" (lookup "w" ns) (Just "w")) + >>= findAttr (QName "val" (lookup "w" ns) (Just "w")) + grid = case + findChild (QName "tblGrid" (lookup "w" ns) (Just "w")) element + of + Just g -> elemToTblGrid ns g + Nothing -> [] + tblLook = findChild (QName "tblPr" (lookup "w" ns) (Just "w")) element + >>= findChild (QName "tblLook" (lookup "w" ns) (Just "w")) + >>= elemToTblLook ns + in + Just $ Tbl + (fromMaybe "" caption) + grid + (fromMaybe defaultTblLook tblLook) + (mapMaybe (elemToRow ns) (elChildren element)) + | otherwise = Nothing + +elemToTblLook :: NameSpaces -> Element -> Maybe TblLook +elemToTblLook ns element + | qName (elName element) == "tblLook" && + qURI (elName element) == (lookup "w" ns) = + let firstRow = findAttr (QName "firstRow" (lookup "w" ns) (Just "w")) element + val = findAttr (QName "val" (lookup "w" ns) (Just "w")) element + firstRowFmt = + case firstRow of + Just "1" -> True + Just _ -> False + Nothing -> case val of + Just bitMask -> testBitMask bitMask 0x020 + Nothing -> False + in + Just $ TblLook{firstRowFormatting = firstRowFmt} +elemToTblLook _ _ = Nothing + +testBitMask :: String -> Int -> Bool +testBitMask bitMaskS n = + case (reads ("0x" ++ bitMaskS) :: [(Int, String)]) of + [] -> False + ((n', _) : _) -> ((n' .|. n) /= 0) + +data ParagraphStyle = ParagraphStyle { pStyle :: [String] + , indent :: Maybe Integer + } + deriving Show + +defaultParagraphStyle :: ParagraphStyle +defaultParagraphStyle = ParagraphStyle { pStyle = [] + , indent = Nothing + } + +elemToParagraphStyle :: NameSpaces -> Element -> ParagraphStyle +elemToParagraphStyle ns element = + case findChild (QName "pPr" (lookup "w" ns) (Just "w")) element of + Just pPr -> + ParagraphStyle + {pStyle = + mapMaybe id $ + map + (findAttr (QName "val" (lookup "w" ns) (Just "w"))) + (findChildren (QName "pStyle" (lookup "w" ns) (Just "w")) pPr) + , indent = + findChild (QName "ind" (lookup "w" ns) (Just "w")) pPr >>= + findAttr (QName "left" (lookup "w" ns) (Just "w")) >>= + stringToInteger + } + Nothing -> defaultParagraphStyle + + +data BodyPart = Paragraph ParagraphStyle [ParPart] + | ListItem ParagraphStyle String String [ParPart] + | Tbl String TblGrid TblLook [Row] + + deriving Show + +type TblGrid = [Integer] + +data TblLook = TblLook {firstRowFormatting::Bool} + deriving Show + +defaultTblLook :: TblLook +defaultTblLook = TblLook{firstRowFormatting = False} + +stringToInteger :: String -> Maybe Integer +stringToInteger s = listToMaybe $ map fst (reads s :: [(Integer, String)]) + +elemToTblGrid :: NameSpaces -> Element -> TblGrid +elemToTblGrid ns element + | qName (elName element) == "tblGrid" && + qURI (elName element) == (lookup "w" ns) = + let + cols = findChildren (QName "gridCol" (lookup "w" ns) (Just "w")) element + in + mapMaybe (\e -> + findAttr (QName "val" (lookup "w" ns) (Just ("w"))) e + >>= stringToInteger + ) + cols +elemToTblGrid _ _ = [] + +data Row = Row [Cell] + deriving Show + + +elemToRow :: NameSpaces -> Element -> Maybe Row +elemToRow ns element + | qName (elName element) == "tr" && + qURI (elName element) == (lookup "w" ns) = + let + cells = findChildren (QName "tc" (lookup "w" ns) (Just "w")) element + in + Just $ Row (mapMaybe (elemToCell ns) cells) +elemToRow _ _ = Nothing + +data Cell = Cell [BodyPart] + deriving Show + +elemToCell :: NameSpaces -> Element -> Maybe Cell +elemToCell ns element + | qName (elName element) == "tc" && + qURI (elName element) == (lookup "w" ns) = + Just $ Cell (mapMaybe (elemToBodyPart ns) (elChildren element)) +elemToCell _ _ = Nothing + +data ParPart = PlainRun Run + | BookMark BookMarkId Anchor + | InternalHyperLink Anchor [Run] + | ExternalHyperLink RelId [Run] + | Drawing String + deriving Show + +data Run = Run RunStyle [RunElem] + | Footnote String + | Endnote String + deriving Show + +data RunElem = TextRun String | LnBrk + deriving Show + +data RunStyle = RunStyle { isBold :: Bool + , isItalic :: Bool + , isSmallCaps :: Bool + , isStrike :: Bool + , isSuperScript :: Bool + , isSubScript :: Bool + , underline :: Maybe String + , rStyle :: Maybe String } + deriving Show + +defaultRunStyle :: RunStyle +defaultRunStyle = RunStyle { isBold = False + , isItalic = False + , isSmallCaps = False + , isStrike = False + , isSuperScript = False + , isSubScript = False + , underline = Nothing + , rStyle = Nothing + } + +elemToRunStyle :: NameSpaces -> Element -> RunStyle +elemToRunStyle ns element = + case findChild (QName "rPr" (lookup "w" ns) (Just "w")) element of + Just rPr -> + RunStyle + { + isBold = isJust $ findChild (QName "b" (lookup "w" ns) (Just "w")) rPr + , isItalic = isJust $ findChild (QName "i" (lookup "w" ns) (Just "w")) rPr + , isSmallCaps = isJust $ findChild (QName "smallCaps" (lookup "w" ns) (Just "w")) rPr + , isStrike = isJust $ findChild (QName "strike" (lookup "w" ns) (Just "w")) rPr + , isSuperScript = + (Just "superscript" == + (findChild (QName "vertAlign" (lookup "w" ns) (Just "w")) rPr >>= + findAttr (QName "val" (lookup "w" ns) (Just "w")))) + , isSubScript = + (Just "subscript" == + (findChild (QName "vertAlign" (lookup "w" ns) (Just "w")) rPr >>= + findAttr (QName "val" (lookup "w" ns) (Just "w")))) + , underline = + findChild (QName "u" (lookup "w" ns) (Just "w")) rPr >>= + findAttr (QName "val" (lookup "w" ns) (Just "w")) + , rStyle = + findChild (QName "rStyle" (lookup "w" ns) (Just "w")) rPr >>= + findAttr (QName "val" (lookup "w" ns) (Just "w")) + } + Nothing -> defaultRunStyle + +elemToRun :: NameSpaces -> Element -> Maybe Run +elemToRun ns element + | qName (elName element) == "r" && + qURI (elName element) == (lookup "w" ns) = + case + findChild (QName "footnoteReference" (lookup "w" ns) (Just "w")) element >>= + findAttr (QName "id" (lookup "w" ns) (Just "w")) + of + Just s -> Just $ Footnote s + Nothing -> + case + findChild (QName "endnoteReference" (lookup "w" ns) (Just "w")) element >>= + findAttr (QName "id" (lookup "w" ns) (Just "w")) + of + Just s -> Just $ Endnote s + Nothing -> Just $ + Run (elemToRunStyle ns element) + (elemToRunElems ns element) +elemToRun _ _ = Nothing + +elemToRunElem :: NameSpaces -> Element -> Maybe RunElem +elemToRunElem ns element + | qName (elName element) == "t" && + qURI (elName element) == (lookup "w" ns) = + Just $ TextRun (strContent element) + | qName (elName element) == "br" && + qURI (elName element) == (lookup "w" ns) = + Just $ LnBrk + | otherwise = Nothing + + +elemToRunElems :: NameSpaces -> Element -> [RunElem] +elemToRunElems ns element + | qName (elName element) == "r" && + qURI (elName element) == (lookup "w" ns) = + mapMaybe (elemToRunElem ns) (elChildren element) + | otherwise = [] + +elemToDrawing :: NameSpaces -> Element -> Maybe ParPart +elemToDrawing ns element + | qName (elName element) == "drawing" && + qURI (elName element) == (lookup "w" ns) = + let a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main" + in + findElement (QName "blip" (Just a_ns) (Just "a")) element + >>= findAttr (QName "embed" (lookup "r" ns) (Just "r")) + >>= (\s -> Just $ Drawing s) +elemToDrawing _ _ = Nothing + + +elemToParPart :: NameSpaces -> Element -> Maybe ParPart +elemToParPart ns element + | qName (elName element) == "r" && + qURI (elName element) == (lookup "w" ns) = + case findChild (QName "drawing" (lookup "w" ns) (Just "w")) element of + Just drawingElem -> elemToDrawing ns drawingElem + Nothing -> do + r <- elemToRun ns element + return $ PlainRun r +elemToParPart ns element + | qName (elName element) == "bookmarkStart" && + qURI (elName element) == (lookup "w" ns) = do + bmId <- findAttr (QName "id" (lookup "w" ns) (Just "w")) element + bmName <- findAttr (QName "name" (lookup "w" ns) (Just "w")) element + return $ BookMark bmId bmName +elemToParPart ns element + | qName (elName element) == "hyperlink" && + qURI (elName element) == (lookup "w" ns) = + let runs = map fromJust $ filter isJust $ map (elemToRun ns) + $ findChildren (QName "r" (lookup "w" ns) (Just "w")) element + in + case findAttr (QName "anchor" (lookup "w" ns) (Just "w")) element of + Just anchor -> + Just $ InternalHyperLink anchor runs + Nothing -> + case findAttr (QName "id" (lookup "r" ns) (Just "r")) element of + Just relId -> Just $ ExternalHyperLink relId runs + Nothing -> Nothing +elemToParPart _ _ = Nothing + +type Target = String +type Anchor = String +type BookMarkId = String +type RelId = String + -- cgit v1.2.3 From 7f4668d87185929f75e5d3852c13ef2a5430b0d9 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Sat, 14 Jun 2014 10:02:52 -0400 Subject: Add files to cabal. Note there is a build warning for unused `makeImagesSelfContained` function, since there isn't yet a command-line-option to make use of it. --- pandoc.cabal | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandoc.cabal b/pandoc.cabal index be7d4977f..db76017b6 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -275,6 +275,7 @@ Library Text.Pandoc.Readers.Textile, Text.Pandoc.Readers.Native, Text.Pandoc.Readers.Haddock, + Text.Pandoc.Readers.DocX, Text.Pandoc.Writers.Native, Text.Pandoc.Writers.Docbook, Text.Pandoc.Writers.OPML, @@ -305,6 +306,8 @@ Library Text.Pandoc.Process Other-Modules: Text.Pandoc.Readers.Haddock.Lex, Text.Pandoc.Readers.Haddock.Parse, + Text.Pandoc.Readers.DocX.Lists, + Text.Pandoc.Readers.DocX.Parse, Text.Pandoc.Writers.Shared, Text.Pandoc.Asciify, Text.Pandoc.MIME, -- cgit v1.2.3 From 3bc818d2d3079f1d31dbb409f839585a32f26f6e Mon Sep 17 00:00:00 2001 From: mpickering Date: Sun, 15 Jun 2014 13:38:16 +0100 Subject: Integrated the docx reader into the main pandoc program. Changes also include generalising the types of reader allowed. The mechanism now mimics the more general output mechanism. --- pandoc.hs | 13 ++++++++++--- src/Text/Pandoc.hs | 56 +++++++++++++++++++++++++++++++++++------------------- 2 files changed, 46 insertions(+), 23 deletions(-) diff --git a/pandoc.hs b/pandoc.hs index 5dd0e6899..0a8070d7c 100644 --- a/pandoc.hs +++ b/pandoc.hs @@ -858,6 +858,7 @@ defaultReaderName fallback (x:xs) = ".textile" -> "textile" ".native" -> "native" ".json" -> "json" + ".docx" -> "docx" _ -> defaultReaderName fallback xs -- Returns True if extension of first source is .lhs @@ -1158,15 +1159,21 @@ main = do Left e -> throwIO e Right (bs,_) -> return $ UTF8.toString bs + let readFiles [] = error "Cannot read archive from stdin" + readFiles (x:_) = B.readFile x + let convertTabs = tabFilter (if preserveTabs then 0 else tabStop) let handleIncludes' = if readerName' == "latex" || readerName' == "latex+lhs" then handleIncludes else return - doc <- readSources sources >>= - handleIncludes' . convertTabs . intercalate "\n" >>= - reader readerOpts + doc <- case reader of + StringReader r-> + readSources sources >>= + handleIncludes' . convertTabs . intercalate "\n" >>= + r readerOpts + ByteStringReader r -> readFiles sources >>= r readerOpts let doc0 = M.foldWithKey setMeta doc metadata diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs index 130338f0e..aff471a3c 100644 --- a/src/Text/Pandoc.hs +++ b/src/Text/Pandoc.hs @@ -62,6 +62,8 @@ module Text.Pandoc , readers , writers -- * Readers: converting /to/ Pandoc format + , Reader (..) + , readDocX , readMarkdown , readMediaWiki , readRST @@ -125,6 +127,7 @@ import Text.Pandoc.Readers.HTML import Text.Pandoc.Readers.Textile import Text.Pandoc.Readers.Native import Text.Pandoc.Readers.Haddock +import Text.Pandoc.Readers.DocX import Text.Pandoc.Writers.Native import Text.Pandoc.Writers.Markdown import Text.Pandoc.Writers.RST @@ -192,24 +195,34 @@ markdown o s = do mapM_ warn warnings return doc +data Reader = StringReader (ReaderOptions -> String -> IO Pandoc) + | ByteStringReader (ReaderOptions -> BL.ByteString -> IO Pandoc) + +mkStringReader :: (ReaderOptions -> String -> Pandoc) -> Reader +mkStringReader r = StringReader (\o s -> return $ r o s) + +mkBSReader :: (ReaderOptions -> BL.ByteString -> Pandoc) -> Reader +mkBSReader r = ByteStringReader (\o s -> return $ r o s) + -- | Association list of formats and readers. -readers :: [(String, ReaderOptions -> String -> IO Pandoc)] -readers = [ ("native" , \_ s -> return $ readNative s) - ,("json" , \o s -> return $ readJSON o s) - ,("markdown" , markdown) - ,("markdown_strict" , markdown) - ,("markdown_phpextra" , markdown) - ,("markdown_github" , markdown) - ,("markdown_mmd", markdown) - ,("rst" , \o s -> return $ readRST o s) - ,("mediawiki" , \o s -> return $ readMediaWiki o s) - ,("docbook" , \o s -> return $ readDocBook o s) - ,("opml" , \o s -> return $ readOPML o s) - ,("org" , \o s -> return $ readOrg o s) - ,("textile" , \o s -> return $ readTextile o s) -- TODO : textile+lhs - ,("html" , \o s -> return $ readHtml o s) - ,("latex" , \o s -> return $ readLaTeX o s) - ,("haddock" , \o s -> return $ readHaddock o s) +readers :: [(String, Reader)] +readers = [ ("native" , StringReader $ \_ s -> return $ readNative s) + ,("json" , mkStringReader readJSON ) + ,("markdown" , StringReader markdown) + ,("markdown_strict" , StringReader markdown) + ,("markdown_phpextra" , StringReader markdown) + ,("markdown_github" , StringReader markdown) + ,("markdown_mmd", StringReader markdown) + ,("rst" , mkStringReader readRST ) + ,("mediawiki" , mkStringReader readMediaWiki) + ,("docbook" , mkStringReader readDocBook) + ,("opml" , mkStringReader readOPML) + ,("org" , mkStringReader readOrg) + ,("textile" , mkStringReader readTextile) -- TODO : textile+lhs + ,("html" , mkStringReader readHtml) + ,("latex" , mkStringReader readLaTeX) + ,("haddock" , mkStringReader readHaddock) + ,("docx" , mkBSReader readDocX) ] data Writer = PureStringWriter (WriterOptions -> Pandoc -> String) @@ -280,14 +293,17 @@ getDefaultExtensions "textile" = Set.fromList [Ext_auto_identifiers, Ext getDefaultExtensions _ = Set.fromList [Ext_auto_identifiers] -- | Retrieve reader based on formatSpec (format+extensions). -getReader :: String -> Either String (ReaderOptions -> String -> IO Pandoc) +getReader :: String -> Either String Reader getReader s = case parseFormatSpec s of Left e -> Left $ intercalate "\n" $ [m | Message m <- errorMessages e] - Right (readerName, setExts) -> + Right (readerName, setExts) -> case lookup readerName readers of Nothing -> Left $ "Unknown reader: " ++ readerName - Just r -> Right $ \o -> + Just (StringReader r) -> Right $ StringReader $ \o -> + r o{ readerExtensions = setExts $ + getDefaultExtensions readerName } + Just (ByteStringReader r) -> Right $ ByteStringReader $ \o -> r o{ readerExtensions = setExts $ getDefaultExtensions readerName } -- cgit v1.2.3 From f928e4c8dce694c47e0cda35f82bfadd55c0e4b2 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Sun, 15 Jun 2014 14:55:17 -0400 Subject: Add DocX automated tests. Note this makes use of input and output files in the tests/ dir. --- tests/Tests/Readers/DocX.hs | 68 ++++++++++++++++++++++++++++ tests/docx.block_quotes.docx | Bin 0 -> 41855 bytes tests/docx.block_quotes_parse_indent.native | 8 ++++ tests/docx.headers.docx | Bin 0 -> 30298 bytes tests/docx.headers.native | 5 ++ tests/docx.image.docx | Bin 0 -> 109656 bytes tests/docx.image_no_embed.native | 2 + tests/docx.inline_formatting.docx | Bin 0 -> 32322 bytes tests/docx.inline_formatting.native | 5 ++ tests/docx.links.docx | Bin 0 -> 41751 bytes tests/docx.links.native | 6 +++ tests/docx.lists.docx | Bin 0 -> 31775 bytes tests/docx.lists.native | 18 ++++++++ tests/docx.notes.docx | Bin 0 -> 30734 bytes tests/docx.notes.native | 2 + tests/docx.tables.docx | Bin 0 -> 42792 bytes tests/docx.tables.native | 24 ++++++++++ tests/docx.unicode.docx | Bin 0 -> 13098 bytes tests/docx.unicode.native | 1 + 19 files changed, 139 insertions(+) create mode 100644 tests/Tests/Readers/DocX.hs create mode 100644 tests/docx.block_quotes.docx create mode 100644 tests/docx.block_quotes_parse_indent.native create mode 100644 tests/docx.headers.docx create mode 100644 tests/docx.headers.native create mode 100644 tests/docx.image.docx create mode 100644 tests/docx.image_no_embed.native create mode 100644 tests/docx.inline_formatting.docx create mode 100644 tests/docx.inline_formatting.native create mode 100644 tests/docx.links.docx create mode 100644 tests/docx.links.native create mode 100644 tests/docx.lists.docx create mode 100644 tests/docx.lists.native create mode 100644 tests/docx.notes.docx create mode 100644 tests/docx.notes.native create mode 100644 tests/docx.tables.docx create mode 100644 tests/docx.tables.native create mode 100644 tests/docx.unicode.docx create mode 100644 tests/docx.unicode.native diff --git a/tests/Tests/Readers/DocX.hs b/tests/Tests/Readers/DocX.hs new file mode 100644 index 000000000..f4564ea1d --- /dev/null +++ b/tests/Tests/Readers/DocX.hs @@ -0,0 +1,68 @@ +module Tests.Readers.DocX (tests) where + +import Text.Pandoc.Options +import Text.Pandoc.Readers.Native +import Text.Pandoc.Definition +import Tests.Helpers +import Test.Framework +import qualified Data.ByteString.Lazy as B +import Text.Pandoc.Readers.DocX + +compareOutput :: FilePath -> FilePath -> IO (Pandoc, Pandoc) +compareOutput docxFile nativeFile = do + df <- B.readFile docxFile + nf <- Prelude.readFile nativeFile + return $ (readDocX def df, readNative nf) + +testCompare' :: String -> FilePath -> FilePath -> IO Test +testCompare' name docxFile nativeFile = do + (dp, np) <- compareOutput docxFile nativeFile + return $ test id name (dp, np) + +testCompare :: String -> FilePath -> FilePath -> Test +testCompare name docxFile nativeFile = + buildTest $ testCompare' name docxFile nativeFile + + +tests :: [Test] +tests = [ testGroup "inlines" + [ testCompare + "font formatting" + "docx.inline_formatting.docx" + "docx.inline_formatting.native" + , testCompare + "hyperlinks" + "docx.links.docx" + "docx.links.native" + , testCompare + "inline image with reference output" + "docx.image.docx" + "docx.image_no_embed.native" + , testCompare + "handling unicode input" + "docx.unicode.docx" + "docx.unicode.native"] + , testGroup "blocks" + [ testCompare + "headers" + "docx.headers.docx" + "docx.headers.native" + , testCompare + "lists" + "docx.lists.docx" + "docx.lists.native" + , testCompare + "footnotes and endnotes" + "docx.notes.docx" + "docx.notes.native" + , testCompare + "blockquotes (parsing indent as blockquote)" + "docx.block_quotes.docx" + "docx.block_quotes_parse_indent.native" + , testCompare + "tables" + "docx.tables.docx" + "docx.tables.native" + ] + ] + diff --git a/tests/docx.block_quotes.docx b/tests/docx.block_quotes.docx new file mode 100644 index 000000000..729ae1f43 Binary files /dev/null and b/tests/docx.block_quotes.docx differ diff --git a/tests/docx.block_quotes_parse_indent.native b/tests/docx.block_quotes_parse_indent.native new file mode 100644 index 000000000..da1cef110 --- /dev/null +++ b/tests/docx.block_quotes_parse_indent.native @@ -0,0 +1,8 @@ +[Header 2 ("",[],[]) [Str "Some",Space,Str "block",Space,Str "quotes,",Space,Str "in",Space,Str "different",Space,Str "ways"] +,Para [Str "This",Space,Str "is",Space,Str "the",Space,Str "proper",Space,Str "way,",Space,Str "with",Space,Str "a",Space,Str "style"] +,BlockQuote + [Para [Str "I",Space,Str "don\8217t",Space,Str "know",Space,Str "why",Space,Str "this",Space,Str "would",Space,Str "be",Space,Str "in",Space,Str "italics,",Space,Str "but",Space,Str "so",Space,Str "it",Space,Str "appears",Space,Str "to",Space,Str "be",Space,Str "on",Space,Str "my",Space,Str "screen."]] +,Para [Str "And",Space,Str "this",Space,Str "is",Space,Str "the",Space,Str "way",Space,Str "that",Space,Str "most",Space,Str "people",Space,Str "do",Space,Str "it:"] +,BlockQuote + [Para [Str "I",Space,Str "just",Space,Str "indented",Space,Str "this,",Space,Str "so",Space,Str "it",Space,Str "looks",Space,Str "like",Space,Str "a",Space,Str "block",Space,Str "quote.",Space,Str "I",Space,Str "think",Space,Str "this",Space,Str "is",Space,Str "how",Space,Str "most",Space,Str "people",Space,Str "do",Space,Str "block",Space,Str "quotes",Space,Str "in",Space,Str "their",Space,Str "documents."]] +,Para [Str "And",Space,Str "back",Space,Str "to",Space,Str "the",Space,Str "normal",Space,Str "style."]] diff --git a/tests/docx.headers.docx b/tests/docx.headers.docx new file mode 100644 index 000000000..630b6bfc5 Binary files /dev/null and b/tests/docx.headers.docx differ diff --git a/tests/docx.headers.native b/tests/docx.headers.native new file mode 100644 index 000000000..e4d4a4781 --- /dev/null +++ b/tests/docx.headers.native @@ -0,0 +1,5 @@ +[Header 1 ("",[],[]) [Str "A",Space,Str "Test",Space,Str "of",Space,Str "Headers"] +,Header 2 ("",[],[]) [Str "Second",Space,Str "Level"] +,Para [Str "Some",Space,Str "plain",Space,Str "text."] +,Header 3 ("",[],[]) [Str "Third",Space,Str "level"] +,Para [Str "Some",Space,Str "more",Space,Str "plain",Space,Str "text."]] diff --git a/tests/docx.image.docx b/tests/docx.image.docx new file mode 100644 index 000000000..060f2b204 Binary files /dev/null and b/tests/docx.image.docx differ diff --git a/tests/docx.image_no_embed.native b/tests/docx.image_no_embed.native new file mode 100644 index 000000000..18debf135 --- /dev/null +++ b/tests/docx.image_no_embed.native @@ -0,0 +1,2 @@ +[Header 2 ("",[],[]) [Str "An",Space,Str "image"] +,Para [Image [] ("word/media/image1.jpeg","")]] diff --git a/tests/docx.inline_formatting.docx b/tests/docx.inline_formatting.docx new file mode 100644 index 000000000..eccf26425 Binary files /dev/null and b/tests/docx.inline_formatting.docx differ diff --git a/tests/docx.inline_formatting.native b/tests/docx.inline_formatting.native new file mode 100644 index 000000000..dc8a3d19a --- /dev/null +++ b/tests/docx.inline_formatting.native @@ -0,0 +1,5 @@ +[Para [Str "Regular",Space,Str "text",Space,Emph [Str "italics"],Space,Strong [Str "bold",Space,Emph [Str "bold",Space,Str "italics"]],Str "."] +,Para [Str "This",Space,Str "is",Space,SmallCaps [Str "Small",Space,Str "Caps"],Str ",",Space,Str "and",Space,Str "this",Space,Str "is",Space,Strikeout [Str "strikethrough"],Str "."] +,Para [Str "Some",Space,Str "people",Space,Str "use",Space,Span ("",[],[("underline","single")]) [Str "single",Space,Str "underlines",Space,Str "for",Space,Emph [Str "emphasis"]],Str "."] +,Para [Str "Above",Space,Str "the",Space,Str "line",Space,Str "is",Space,Superscript [Str "superscript"],Space,Str "and",Space,Str "below",Space,Str "the",Space,Str "line",Space,Str "is",Space,Subscript [Str "subscript"],Str "."] +,Para [Str "A",Space,Str "line",LineBreak,Str "break."]] diff --git a/tests/docx.links.docx b/tests/docx.links.docx new file mode 100644 index 000000000..10ec62fd7 Binary files /dev/null and b/tests/docx.links.docx differ diff --git a/tests/docx.links.native b/tests/docx.links.native new file mode 100644 index 000000000..98768de5a --- /dev/null +++ b/tests/docx.links.native @@ -0,0 +1,6 @@ +[Header 2 ("",[],[]) [Str "An",Space,Str "internal",Space,Str "link",Space,Str "and",Space,Str "an",Space,Str "external",Space,Str "link"] +,Para [Str "An",Space,Link [Str "external",Space,Str "link"] ("http://google.com",""),Space,Str "to",Space,Str "a",Space,Str "popular",Space,Str "website."] +,Para [Str "An",Space,Link [Str "internal",Space,Str "link"] ("#_A_section_for",""),Space,Str "to",Space,Str "a",Space,Str "section",Space,Str "header."] +,Para [Str "An",Space,Link [Str "internal",Space,Str "link"] ("#my_bookmark",""),Space,Str "to",Space,Str "a",Space,Str "bookmark."] +,Header 2 ("_A_section_for",[],[]) [Str "A",Space,Str "section",Space,Str "for",Space,Str "testing",Space,Str "link",Space,Str "targets"] +,Para [Str "A",Space,Str "bookmark",Space,Str "right",Space,Span ("my_bookmark",["anchor"],[]) [],Str "here"]] diff --git a/tests/docx.lists.docx b/tests/docx.lists.docx new file mode 100644 index 000000000..bf7fd8ae4 Binary files /dev/null and b/tests/docx.lists.docx differ diff --git a/tests/docx.lists.native b/tests/docx.lists.native new file mode 100644 index 000000000..e46bc140b --- /dev/null +++ b/tests/docx.lists.native @@ -0,0 +1,18 @@ +[Header 2 ("",[],[]) [Str "Some",Space,Str "nested",Space,Str "lists"] +,OrderedList (1,Decimal,Period) + [[Para [Str "one"]] + ,[Para [Str "two"] + ,OrderedList (1,LowerAlpha,DefaultDelim) + [[Para [Str "a"]] + ,[Para [Str "b"]]]]] +,BulletList + [[Para [Str "one"]] + ,[Para [Str "two"] + ,BulletList + [[Para [Str "three"] + ,BulletList + [[Para [Str "four"] + ,Para [Str "Sub",Space,Str "paragraph"]]]]]] + ,[Para [Str "Same",Space,Str "list"]]] +,BulletList + [[Para [Str "Different",Space,Str "list",Space,Str "adjacent",Space,Str "to",Space,Str "the",Space,Str "one",Space,Str "above."]]]] diff --git a/tests/docx.notes.docx b/tests/docx.notes.docx new file mode 100644 index 000000000..eb6fa12d4 Binary files /dev/null and b/tests/docx.notes.docx differ diff --git a/tests/docx.notes.native b/tests/docx.notes.native new file mode 100644 index 000000000..1e9b6bba4 --- /dev/null +++ b/tests/docx.notes.native @@ -0,0 +1,2 @@ +[Header 2 ("",[],[]) [Str "A",Space,Str "footnote"] +,Para [Str "Test",Space,Str "footnote.",Note [Para [Space,Str "My",Space,Str "note."]],Space,Str "Test",Space,Str "endnote.",Note [Para [Space,Str "This",Space,Str "is",Space,Str "an",Space,Str "endnote",Space,Str "at",Space,Str "the",Space,Str "end",Space,Str "of",Space,Str "the",Space,Str "document."]]]] diff --git a/tests/docx.tables.docx b/tests/docx.tables.docx new file mode 100644 index 000000000..7dcff8d35 Binary files /dev/null and b/tests/docx.tables.docx differ diff --git a/tests/docx.tables.native b/tests/docx.tables.native new file mode 100644 index 000000000..8dbaabda7 --- /dev/null +++ b/tests/docx.tables.native @@ -0,0 +1,24 @@ +[Header 2 ("",[],[]) [Str "A",Space,Str "table,",Space,Str "with",Space,Str "and",Space,Str "without",Space,Str "a",Space,Str "header",Space,Str "row"] +,Table [] [AlignDefault,AlignDefault,AlignDefault,AlignDefault] [0.0,0.0,0.0,0.0] + [[Para [Str "Name"]] + ,[Para [Str "Game"]] + ,[Para [Str "Fame"]] + ,[Para [Str "Blame"]]] + [[[Para [Str "Lebron",Space,Str "James"]] + ,[Para [Str "Basketball"]] + ,[Para [Str "Very",Space,Str "High"]] + ,[Para [Str "Leaving",Space,Str "Cleveland"]]] + ,[[Para [Str "Ryan",Space,Str "Braun"]] + ,[Para [Str "Baseball"]] + ,[Para [Str "Moderate"]] + ,[Para [Str "Steroids"]]] + ,[[Para [Str "Russell",Space,Str "Wilson"]] + ,[Para [Str "Football"]] + ,[Para [Str "High"]] + ,[Para [Str "Tacky",Space,Str "uniform"]]]] +,Table [] [AlignDefault,AlignDefault] [0.0,0.0] + [] + [[[Para [Str "Sinple"]] + ,[Para [Str "Table"]]] + ,[[Para [Str "Without"]] + ,[Para [Str "Header"]]]]] diff --git a/tests/docx.unicode.docx b/tests/docx.unicode.docx new file mode 100644 index 000000000..78d0107a1 Binary files /dev/null and b/tests/docx.unicode.docx differ diff --git a/tests/docx.unicode.native b/tests/docx.unicode.native new file mode 100644 index 000000000..e636355c7 --- /dev/null +++ b/tests/docx.unicode.native @@ -0,0 +1 @@ +[Para [Str "Hello,",Space,Str "\19990\30028.",Space,Str "This",Space,Str "costs",Space,Str "\8364\&10."]] -- cgit v1.2.3 From cfd5290fc5fb131a763ae3f56b1dd1c4bcef731d Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Sun, 15 Jun 2014 14:59:37 -0400 Subject: Reference new DocX tests in cabal file. --- pandoc.cabal | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/pandoc.cabal b/pandoc.cabal index db76017b6..00fa4e06a 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -184,7 +184,25 @@ Extra-Source-Files: tests/fb2.math.markdown, tests/fb2.math.fb2, tests/fb2.test-small.png, - tests/fb2.test.jpg + tests/fb2.test.jpg, + tests/docx.block_quotes.docx, + tests/docx.block_quotes_parse_indent.native, + tests/docx.headers.docx, + tests/docx.headers.native, + tests/docx.image.docx, + tests/docx.image_no_embed.native, + tests/docx.inline_formatting.docx, + tests/docx.inline_formatting.native, + tests/docx.links.docx, + tests/docx.links.native, + tests/docx.lists.docx, + tests/docx.lists.native, + tests/docx.notes.docx, + tests/docx.notes.native, + tests/docx.tables.docx, + tests/docx.tables.native, + tests/docx.unicode.docx, + tests/docx.unicode.native Extra-Tmp-Files: man/man1/pandoc.1, man/man5/pandoc_markdown.5 @@ -393,6 +411,7 @@ Test-Suite test-pandoc Tests.Readers.Markdown Tests.Readers.Org Tests.Readers.RST + Tests.Readers.DocX Tests.Writers.Native Tests.Writers.ConTeXt Tests.Writers.HTML -- cgit v1.2.3 From 6b939134e72ca736f6d1f95527c1a7cefb2c0837 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Sun, 15 Jun 2014 15:00:04 -0400 Subject: Add DocX tests to test-pandoc.hs --- tests/test-pandoc.hs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/test-pandoc.hs b/tests/test-pandoc.hs index 80d672589..9f9d85147 100644 --- a/tests/test-pandoc.hs +++ b/tests/test-pandoc.hs @@ -9,6 +9,7 @@ import qualified Tests.Readers.LaTeX import qualified Tests.Readers.Markdown import qualified Tests.Readers.Org import qualified Tests.Readers.RST +import qualified Tests.Readers.DocX import qualified Tests.Writers.ConTeXt import qualified Tests.Writers.LaTeX import qualified Tests.Writers.HTML @@ -38,6 +39,8 @@ tests = [ testGroup "Old" Tests.Old.tests , testGroup "Markdown" Tests.Readers.Markdown.tests , testGroup "Org" Tests.Readers.Org.tests , testGroup "RST" Tests.Readers.RST.tests + , testGroup "DocX" Tests.Readers.DocX.tests + ] ] -- cgit v1.2.3 From c709cec0bdf7a3029a43f0c46d071a7ca1ab6a13 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Sun, 15 Jun 2014 15:34:48 -0400 Subject: Updated README to reflect docx reader. --- README | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/README b/README index 6030fa2bb..1883ecd57 100644 --- a/README +++ b/README @@ -13,15 +13,15 @@ Description Pandoc is a [Haskell] library for converting from one markup format to another, and a command-line tool that uses this library. It can read [markdown] and (subsets of) [Textile], [reStructuredText], [HTML], -[LaTeX], [MediaWiki markup], [Haddock markup], [OPML], [Emacs Org-mode] -and [DocBook]; and it can write plain text, [markdown], -[reStructuredText], [XHTML], [HTML 5], [LaTeX] (including [beamer] slide -shows), [ConTeXt], [RTF], [OPML], [DocBook], [OpenDocument], [ODT], -[Word docx], [GNU Texinfo], [MediaWiki markup], [EPUB] (v2 or v3), -[FictionBook2], [Textile], [groff man] pages, [Emacs Org-Mode], -[AsciiDoc], [InDesign ICML], and [Slidy], [Slideous], [DZSlides], -[reveal.js] or [S5] HTML slide shows. It can also produce [PDF] output -on systems where LaTeX is installed. +[LaTeX], [MediaWiki markup], [Haddock markup], [OPML], [Emacs +Org-mode], [DocBook], and [Word docx]; and it can write plain text, +[markdown], [reStructuredText], [XHTML], [HTML 5], [LaTeX] (including +[beamer] slide shows), [ConTeXt], [RTF], [OPML], [DocBook], +[OpenDocument], [ODT], [Word docx], [GNU Texinfo], [MediaWiki markup], +[EPUB] (v2 or v3), [FictionBook2], [Textile], [groff man] pages, +[Emacs Org-Mode], [AsciiDoc], [InDesign ICML], and [Slidy], +[Slideous], [DZSlides], [reveal.js] or [S5] HTML slide shows. It can +also produce [PDF] output on systems where LaTeX is installed. Pandoc's enhanced version of markdown includes syntax for footnotes, tables, flexible ordered lists, definition lists, fenced code blocks, -- cgit v1.2.3 From 7807564d4493af1462d61138c63d8ec365abc792 Mon Sep 17 00:00:00 2001 From: mpickering Date: Mon, 16 Jun 2014 20:45:54 +0100 Subject: Moved extractSpaces to Shared.hs Generalised and move the extractSpaces function from `HTML.hs` to `Shared.hs` so that the docx reader can also use it. --- src/Text/Pandoc/Readers/HTML.hs | 17 ++++------------- src/Text/Pandoc/Shared.hs | 16 ++++++++++++++++ 2 files changed, 20 insertions(+), 13 deletions(-) diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 905e55b22..9cdc5a567 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -50,7 +50,6 @@ import Data.Char ( isDigit ) import Control.Monad ( liftM, guard, when, mzero ) import Control.Applicative ( (<$>), (<$), (<*) ) import Data.Monoid -import Data.Sequence (ViewL(..), ViewR(..), viewr, viewl) isSpace :: Char -> Bool isSpace ' ' = True @@ -369,9 +368,9 @@ pQ = do then InSingleQuote else InDoubleQuote let constructor = case quoteType of - SingleQuote -> B.singleQuoted + SingleQuote -> B.singleQuoted DoubleQuote -> B.doubleQuoted - withQuoteContext innerQuoteContext $ + withQuoteContext innerQuoteContext $ pInlinesInTags "q" constructor pEmph :: TagParser Inlines @@ -406,7 +405,7 @@ pLink = try $ do let url = fromAttrib "href" tag let title = fromAttrib "title" tag lab <- trimInlines . mconcat <$> manyTill inline (pCloses "a") - return $ B.link (escapeURI url) title lab + return $ B.link (escapeURI url) title lab pImage :: TagParser Inlines pImage = do @@ -439,15 +438,7 @@ pRawHtmlInline = do pInlinesInTags :: String -> (Inlines -> Inlines) -> TagParser Inlines -pInlinesInTags tagtype f = do - contents <- B.unMany <$> pInTags tagtype inline - let left = case viewl contents of - (Space :< _) -> B.space - _ -> mempty - let right = case viewr contents of - (_ :> Space) -> B.space - _ -> mempty - return (left <> f (trimInlines . B.Many $ contents) <> right) +pInlinesInTags tagtype f = extractSpaces f <$> pInTags tagtype inline pInTags :: (Monoid a) => String -> TagParser a -> TagParser a diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index b0adf55f5..5b0d9b6b4 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -53,6 +53,7 @@ module Text.Pandoc.Shared ( -- * Pandoc block and inline list processing orderedListMarkers, normalizeSpaces, + extractSpaces, normalize, stringify, compactify, @@ -113,6 +114,7 @@ import qualified Data.ByteString as BS import qualified Data.ByteString.Char8 as B8 import Text.Pandoc.Compat.Monoid import Data.ByteString.Base64 (decodeLenient) +import Data.Sequence (ViewR(..), ViewL(..), viewl, viewr) #ifdef EMBED_DATA_FILES import Text.Pandoc.Data (dataFiles) @@ -331,6 +333,20 @@ isSpaceOrEmpty Space = True isSpaceOrEmpty (Str "") = True isSpaceOrEmpty _ = False +-- | Extract the leading and trailing spaces from inside an inline element +-- and place them outside the element. + +extractSpaces :: (Inlines -> Inlines) -> Inlines -> Inlines +extractSpaces f is = + let contents = B.unMany is + left = case viewl contents of + (Space :< _) -> B.space + _ -> mempty + right = case viewr contents of + (_ :> Space) -> B.space + _ -> mempty in + (left <> f (B.trimInlines . B.Many $ contents) <> right) + -- | Normalize @Pandoc@ document, consolidating doubled 'Space's, -- combining adjacent 'Str's and 'Emph's, remove 'Null's and -- empty elements, etc. -- cgit v1.2.3 From 01ef573ac2f6620e9f70ae8965e5ccc664e3aec4 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 16 Jun 2014 14:18:06 -0700 Subject: Org reader: fixed #1342. This change rewrites `inlineLaTeXCommand` so that parsec will know when input is being consumed. Previously a run-time error would be produced with some input involving raw latex. (I believe this does not affect the last release, as the inline latex reading was added recently.) --- src/Text/Pandoc/Readers/Org.hs | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index c3ea8d7c2..0e872abf0 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -38,10 +38,9 @@ import qualified Text.Pandoc.Parsing as P import Text.Pandoc.Parsing hiding ( F, unF, askF, asksF, runF , newline, orderedListMarker , parseFromString - , updateLastStrPos ) + ) import Text.Pandoc.Readers.LaTeX (inlineCommand, rawLaTeXInline) import Text.Pandoc.Shared (compactify', compactify'DL) -import Text.Parsec.Pos (updatePosString) import Text.TeXMath (texMathToPandoc, DisplayType(..)) import Control.Applicative ( Applicative, pure @@ -148,10 +147,6 @@ resetBlockAttributes :: OrgParser () resetBlockAttributes = updateState $ \s -> s{ orgStateBlockAttributes = orgStateBlockAttributes def } -updateLastStrPos :: OrgParser () -updateLastStrPos = getPosition >>= \p -> - updateState $ \s -> s{ orgStateLastStrPos = Just p } - updateLastForbiddenCharPos :: OrgParser () updateLastForbiddenCharPos = getPosition >>= \p -> updateState $ \s -> s{ orgStateLastForbiddenCharPos = Just p} @@ -1376,8 +1371,9 @@ maybeRight = either (const Nothing) Just inlineLaTeXCommand :: OrgParser String inlineLaTeXCommand = try $ do rest <- getInput - pos <- getPosition case runParser rawLaTeXInline def "source" rest of - Right (RawInline _ cs) -> cs <$ (setInput $ drop (length cs) rest) - <* (setPosition $ updatePosString pos cs) + Right (RawInline _ cs) -> do + let len = length cs + count len anyChar + return cs _ -> mzero -- cgit v1.2.3 From 9090c549aab29552ee29e88ab2d38995a42a421e Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 16 Jun 2014 15:12:10 -0700 Subject: Added failing HTML reader test for table. See #1341. --- tests/html-reader.html | 20 +++++++++++++++++++- tests/html-reader.native | 13 ++++++++++++- 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/tests/html-reader.html b/tests/html-reader.html index 1e104b00f..d059d7b4b 100644 --- a/tests/html-reader.html +++ b/tests/html-reader.html @@ -431,6 +431,24 @@ An e-mail address: nobody [at] nowhere.net

Trailing space text

text Leading spaces

Trailing spaces text

- +

Tables

+ + + + + + + + + + + + + + + + +
XYZ
123
456
+ diff --git a/tests/html-reader.native b/tests/html-reader.native index 8fbecf34f..85866aef1 100644 --- a/tests/html-reader.native +++ b/tests/html-reader.native @@ -308,4 +308,15 @@ Pandoc (Meta {unMeta = fromList [("generator",MetaInlines [Str "pandoc"]),("titl ,Para [Str "text",Space,Emph [Str "Leading",Space,Str "space"]] ,Para [Emph [Str "Trailing",Space,Str "space"],Space,Str "text"] ,Para [Str "text",Space,Emph [Str "Leading",Space,Str "spaces"]] -,Para [Emph [Str "Trailing",Space,Str "spaces"],Space,Str "text"]] +,Para [Emph [Str "Trailing",Space,Str "spaces"],Space,Str "text"] +,Header 1 ("",[],[]) [Str "Tables"] +,Table [] [AlignLeft,AlignLeft,AlignLeft] [0.3333333333333333,0.3333333333333333,0.3333333333333333] + [[Plain [Str "X"]] + ,[Plain [Str "Y"]] + ,[Plain [Str "Z"]]] + [[[Plain [Str "1"]] + ,[Plain [Str "2"]] + ,[Plain [Str "3"]]] + ,[[Plain [Str "4"]] + ,[Plain [Str "5"]] + ,[Plain [Str "6"]]]]] -- cgit v1.2.3 From e7d6b2e6ad26d90b1c07c57a6ad6a43064c52a28 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 16 Jun 2014 15:15:35 -0700 Subject: Updated HTML reader table test - should be simple table. --- tests/html-reader.native | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/html-reader.native b/tests/html-reader.native index 85866aef1..c6ed36910 100644 --- a/tests/html-reader.native +++ b/tests/html-reader.native @@ -310,7 +310,7 @@ Pandoc (Meta {unMeta = fromList [("generator",MetaInlines [Str "pandoc"]),("titl ,Para [Str "text",Space,Emph [Str "Leading",Space,Str "spaces"]] ,Para [Emph [Str "Trailing",Space,Str "spaces"],Space,Str "text"] ,Header 1 ("",[],[]) [Str "Tables"] -,Table [] [AlignLeft,AlignLeft,AlignLeft] [0.3333333333333333,0.3333333333333333,0.3333333333333333] +,Table [] [AlignDefault,AlignDefault,AlignDefault] [0.0,0.0,0.0] [[Plain [Str "X"]] ,[Plain [Str "Y"]] ,[Plain [Str "Z"]]] -- cgit v1.2.3 From 31fd843133ee9482b6af353a7d793cae18929425 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 16 Jun 2014 15:41:23 -0700 Subject: HTML reader: Fixed major parsing problem with HTML tables. Table cells were being combined into one cell. Closes #1341. --- src/Text/Pandoc/Readers/HTML.hs | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 9cdc5a567..d27afc543 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -238,30 +238,26 @@ pTable = try $ do caption <- option mempty $ pInTags "caption" inline >>~ skipMany pBlank -- TODO actually read these and take width information from them widths' <- pColgroup <|> many pCol - head' <- option mempty $ pOptInTag "thead" $ pInTags "tr" (pCell "th") + head' <- option [] $ pOptInTag "thead" $ pInTags "tr" (pCell "th") skipMany pBlank rows <- pOptInTag "tbody" $ many1 $ try $ skipMany pBlank >> pInTags "tr" (pCell "td") skipMany pBlank TagClose _ <- pSatisfy (~== TagClose "table") - let isSinglePlain [] = True - isSinglePlain [Plain _] = True - isSinglePlain _ = False - let lHead = B.toList head' - let lRows = map B.toList rows - let isSimple = all isSinglePlain (lHead:lRows) - let cols = length $ if null lHead - then head lRows - else lHead + let isSinglePlain x = case B.toList x of + [Plain _] -> True + _ -> False + let isSimple = all isSinglePlain $ concat (head':rows) + let cols = length $ if null head' then head rows else head' -- fail if there are colspans or rowspans - guard $ all (\r -> length r == cols) lRows - let aligns = replicate cols AlignLeft + guard $ all (\r -> length r == cols) rows + let aligns = replicate cols AlignDefault let widths = if null widths' then if isSimple then replicate cols 0 else replicate cols (1.0 / fromIntegral cols) else widths' - return $ B.table caption (zip aligns widths) [head'] [rows] + return $ B.table caption (zip aligns widths) head' rows pCol :: TagParser Double pCol = try $ do @@ -279,12 +275,12 @@ pColgroup = try $ do skipMany pBlank manyTill pCol (pCloses "colgroup" <|> eof) <* skipMany pBlank -pCell :: String -> TagParser Blocks +pCell :: String -> TagParser [Blocks] pCell celltype = try $ do skipMany pBlank res <- pInTags celltype block skipMany pBlank - return res + return [res] pBlockQuote :: TagParser Blocks pBlockQuote = do -- cgit v1.2.3 From 459805de4c4e7129dc624086a654febb161f99ad Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 16 Jun 2014 17:43:56 -0700 Subject: LaTeX reader: don't assume preamble doesn't contain environments. Closes #1338. --- src/Text/Pandoc/Readers/LaTeX.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 6b5958920..9e7a38b8f 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -1134,7 +1134,7 @@ paragraph = do preamble :: LP Blocks preamble = mempty <$> manyTill preambleBlock beginDoc - where beginDoc = lookAhead $ controlSeq "begin" *> string "{document}" + where beginDoc = lookAhead $ try $ controlSeq "begin" *> string "{document}" preambleBlock = (void comment) <|> (void sp) <|> (void blanklines) -- cgit v1.2.3 From 87c08be58f54ae60cfd57be6c17a256cbdb105d6 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 16 Jun 2014 19:18:33 -0700 Subject: LaTeX reader: handle leading/trailing spaces in emph better. `\emph{ hi }` gets parsed as `[Space, Emph [Str "hi"], Space]` so that we don't get things like `* hi *` in markdown output. Also applies to textbf and some other constructions. Closes #1146. (`--normalize` isn't touched by this, but normalization should not generally be necessary with the changes to the readers.) --- src/Text/Pandoc/Readers/LaTeX.hs | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 9e7a38b8f..3c4d4ee52 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -397,18 +397,18 @@ isBlockCommand s = maybe False (const True) $ M.lookup s blockCommands inlineCommands :: M.Map String (LP Inlines) inlineCommands = M.fromList $ - [ ("emph", emph <$> tok) - , ("textit", emph <$> tok) - , ("textsl", emph <$> tok) - , ("textsc", smallcaps <$> tok) - , ("sout", strikeout <$> tok) - , ("textsuperscript", superscript <$> tok) - , ("textsubscript", subscript <$> tok) + [ ("emph", extractSpaces emph <$> tok) + , ("textit", extractSpaces emph <$> tok) + , ("textsl", extractSpaces emph <$> tok) + , ("textsc", extractSpaces smallcaps <$> tok) + , ("sout", extractSpaces strikeout <$> tok) + , ("textsuperscript", extractSpaces superscript <$> tok) + , ("textsubscript", extractSpaces subscript <$> tok) , ("textbackslash", lit "\\") , ("backslash", lit "\\") , ("slash", lit "/") - , ("textbf", strong <$> tok) - , ("textnormal", spanWith ("",["nodecor"],[]) <$> tok) + , ("textbf", extractSpaces strong <$> tok) + , ("textnormal", extractSpaces (spanWith ("",["nodecor"],[])) <$> tok) , ("ldots", lit "…") , ("dots", lit "…") , ("mdots", lit "…") @@ -428,15 +428,15 @@ inlineCommands = M.fromList $ , ("{", lit "{") , ("}", lit "}") -- old TeX commands - , ("em", emph <$> inlines) - , ("it", emph <$> inlines) - , ("sl", emph <$> inlines) - , ("bf", strong <$> inlines) + , ("em", extractSpaces emph <$> inlines) + , ("it", extractSpaces emph <$> inlines) + , ("sl", extractSpaces emph <$> inlines) + , ("bf", extractSpaces strong <$> inlines) , ("rm", inlines) - , ("itshape", emph <$> inlines) - , ("slshape", emph <$> inlines) - , ("scshape", smallcaps <$> inlines) - , ("bfseries", strong <$> inlines) + , ("itshape", extractSpaces emph <$> inlines) + , ("slshape", extractSpaces emph <$> inlines) + , ("scshape", extractSpaces smallcaps <$> inlines) + , ("bfseries", extractSpaces strong <$> inlines) , ("/", pure mempty) -- italic correction , ("aa", lit "å") , ("AA", lit "Å") -- cgit v1.2.3 From 9da5d8955ecc090d1582a9d007a0ecace654d229 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 16 Jun 2014 20:48:55 -0700 Subject: Markdown reader: fixed #1333 (table parsing bug). --- src/Text/Pandoc/Readers/Markdown.hs | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index caa938ed6..c20a2a1fc 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1117,13 +1117,14 @@ multilineTable headless = multilineTableHeader :: Bool -- ^ Headerless table -> MarkdownParser (F [Blocks], [Alignment], [Int]) multilineTableHeader headless = try $ do - if headless - then return '\n' - else tableSep >>~ notFollowedBy blankline + unless headless $ + tableSep >> notFollowedBy blankline rawContent <- if headless then return $ repeat "" - else many1 - (notFollowedBy tableSep >> many1Till anyChar newline) + else many1 $ do + notFollowedBy blankline + notFollowedBy tableSep + anyLine initSp <- nonindentSpaces dashes <- many1 (dashedLine '-') newline -- cgit v1.2.3 From f9b97e6bfb1d26bd328cdbb1ca83c4558e7f4a0c Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 16 Jun 2014 21:26:50 -0700 Subject: Small improvement to fix to #1333. This allows blank lines at end of multiline headers. --- src/Text/Pandoc/Readers/Markdown.hs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index c20a2a1fc..a6720beba 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1121,10 +1121,7 @@ multilineTableHeader headless = try $ do tableSep >> notFollowedBy blankline rawContent <- if headless then return $ repeat "" - else many1 $ do - notFollowedBy blankline - notFollowedBy tableSep - anyLine + else many1 $ notFollowedBy tableSep >> anyLine initSp <- nonindentSpaces dashes <- many1 (dashedLine '-') newline -- cgit v1.2.3 From 78ee2416d105bd25337819a49835623a8a296224 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 16 Jun 2014 22:03:26 -0700 Subject: Org reader: make tildes create inline code. Closes #1345. Also relabeled 'code' and 'verbatim' parsers to accord with the org-mode manual. I'm not sure what the distinction between code and verbatim is supposed to be, but I'm pretty sure both should be represented as Code inlines in pandoc. The previous behavior resulted in the text not appearing in any output format. --- src/Text/Pandoc/Readers/Org.hs | 8 ++++---- tests/Tests/Readers/Org.hs | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 0e872abf0..7a35e2ca0 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -1148,11 +1148,11 @@ strikeout = fmap B.strikeout <$> emphasisBetween '+' underline :: OrgParser (F Inlines) underline = fmap B.strong <$> emphasisBetween '_' -code :: OrgParser (F Inlines) -code = return . B.code <$> verbatimBetween '=' - verbatim :: OrgParser (F Inlines) -verbatim = return . B.rawInline "" <$> verbatimBetween '~' +verbatim = return . B.code <$> verbatimBetween '=' + +code :: OrgParser (F Inlines) +code = return . B.code <$> verbatimBetween '~' subscript :: OrgParser (F Inlines) subscript = fmap B.subscript <$> try (char '_' *> subOrSuperExpr) diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index 4ed77887f..f8240ca3d 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -50,13 +50,13 @@ tests = "+Kill Bill+" =?> para (strikeout . spcSep $ [ "Kill", "Bill" ]) - , "Code" =: + , "Verbatim" =: "=Robot.rock()=" =?> para (code "Robot.rock()") - , "Verbatim" =: + , "Code" =: "~word for word~" =?> - para (rawInline "" "word for word") + para (code "word for word") , "Math $..$" =: "$E=mc^2$" =?> -- cgit v1.2.3 From 7c1d38ac7df151ce353d0d8d601ef17b33faea9b Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 16 Jun 2014 22:18:01 -0700 Subject: Bump version to 1.13 --- pandoc.cabal | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandoc.cabal b/pandoc.cabal index 00fa4e06a..a6126a331 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -1,5 +1,5 @@ Name: pandoc -Version: 1.12.5 +Version: 1.13 Cabal-Version: >= 1.10 Build-Type: Custom License: GPL -- cgit v1.2.3 From bbe99003f8d25dc65ab12851907ecd5d9aad746c Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 16 Jun 2014 22:44:40 -0700 Subject: Naming: Use Docx instead of DocX. For consistency with the existing writer. --- pandoc.cabal | 8 +- src/Text/Pandoc.hs | 6 +- src/Text/Pandoc/Readers/DocX.hs | 479 --------------------------- src/Text/Pandoc/Readers/DocX/Lists.hs | 208 ------------ src/Text/Pandoc/Readers/DocX/Parse.hs | 604 ---------------------------------- src/Text/Pandoc/Readers/Docx.hs | 479 +++++++++++++++++++++++++++ src/Text/Pandoc/Readers/Docx/Lists.hs | 208 ++++++++++++ src/Text/Pandoc/Readers/Docx/Parse.hs | 604 ++++++++++++++++++++++++++++++++++ tests/Tests/Readers/DocX.hs | 68 ---- tests/Tests/Readers/Docx.hs | 68 ++++ tests/test-pandoc.hs | 4 +- 11 files changed, 1368 insertions(+), 1368 deletions(-) delete mode 100644 src/Text/Pandoc/Readers/DocX.hs delete mode 100644 src/Text/Pandoc/Readers/DocX/Lists.hs delete mode 100644 src/Text/Pandoc/Readers/DocX/Parse.hs create mode 100644 src/Text/Pandoc/Readers/Docx.hs create mode 100644 src/Text/Pandoc/Readers/Docx/Lists.hs create mode 100644 src/Text/Pandoc/Readers/Docx/Parse.hs delete mode 100644 tests/Tests/Readers/DocX.hs create mode 100644 tests/Tests/Readers/Docx.hs diff --git a/pandoc.cabal b/pandoc.cabal index a6126a331..5898af5ad 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -293,7 +293,7 @@ Library Text.Pandoc.Readers.Textile, Text.Pandoc.Readers.Native, Text.Pandoc.Readers.Haddock, - Text.Pandoc.Readers.DocX, + Text.Pandoc.Readers.Docx, Text.Pandoc.Writers.Native, Text.Pandoc.Writers.Docbook, Text.Pandoc.Writers.OPML, @@ -324,8 +324,8 @@ Library Text.Pandoc.Process Other-Modules: Text.Pandoc.Readers.Haddock.Lex, Text.Pandoc.Readers.Haddock.Parse, - Text.Pandoc.Readers.DocX.Lists, - Text.Pandoc.Readers.DocX.Parse, + Text.Pandoc.Readers.Docx.Lists, + Text.Pandoc.Readers.Docx.Parse, Text.Pandoc.Writers.Shared, Text.Pandoc.Asciify, Text.Pandoc.MIME, @@ -411,7 +411,7 @@ Test-Suite test-pandoc Tests.Readers.Markdown Tests.Readers.Org Tests.Readers.RST - Tests.Readers.DocX + Tests.Readers.Docx Tests.Writers.Native Tests.Writers.ConTeXt Tests.Writers.HTML diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs index aff471a3c..45c2f453b 100644 --- a/src/Text/Pandoc.hs +++ b/src/Text/Pandoc.hs @@ -63,7 +63,7 @@ module Text.Pandoc , writers -- * Readers: converting /to/ Pandoc format , Reader (..) - , readDocX + , readDocx , readMarkdown , readMediaWiki , readRST @@ -127,7 +127,7 @@ import Text.Pandoc.Readers.HTML import Text.Pandoc.Readers.Textile import Text.Pandoc.Readers.Native import Text.Pandoc.Readers.Haddock -import Text.Pandoc.Readers.DocX +import Text.Pandoc.Readers.Docx import Text.Pandoc.Writers.Native import Text.Pandoc.Writers.Markdown import Text.Pandoc.Writers.RST @@ -222,7 +222,7 @@ readers = [ ("native" , StringReader $ \_ s -> return $ readNative s) ,("html" , mkStringReader readHtml) ,("latex" , mkStringReader readLaTeX) ,("haddock" , mkStringReader readHaddock) - ,("docx" , mkBSReader readDocX) + ,("docx" , mkBSReader readDocx) ] data Writer = PureStringWriter (WriterOptions -> Pandoc -> String) diff --git a/src/Text/Pandoc/Readers/DocX.hs b/src/Text/Pandoc/Readers/DocX.hs deleted file mode 100644 index 976e2e271..000000000 --- a/src/Text/Pandoc/Readers/DocX.hs +++ /dev/null @@ -1,479 +0,0 @@ -{- -Copyright (C) 2014 Jesse Rosenthal - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Readers.DocX - Copyright : Copyright (C) 2014 Jesse Rosenthal - License : GNU GPL, version 2 or above - - Maintainer : Jesse Rosenthal - Stability : alpha - Portability : portable - -Conversion of DocX type (defined in Text.Pandoc.Readers.DocX.Parse) -to 'Pandoc' document. -} - -{- -Current state of implementation of DocX entities ([x] means -implemented, [-] means partially implemented): - -* Blocks - - - [X] Para - - [X] CodeBlock (styled with `SourceCode`) - - [X] BlockQuote (styled with `Quote`, `BlockQuote`, or, optionally, - indented) - - [X] OrderedList - - [X] BulletList - - [X] DefinitionList (styled with adjacent `DefinitionTerm` and `Definition`) - - [X] Header (styled with `Heading#`) - - [ ] HorizontalRule - - [-] Table (column widths and alignments not yet implemented) - -* Inlines - - - [X] Str - - [X] Emph (From italics. `underline` currently read as span. In - future, it might optionally be emph as well) - - [X] Strong - - [X] Strikeout - - [X] Superscript - - [X] Subscript - - [X] SmallCaps - - [ ] Quoted - - [ ] Cite - - [X] Code (styled with `VerbatimChar`) - - [X] Space - - [X] LineBreak (these are invisible in Word: entered with Shift-Return) - - [ ] Math - - [X] Link (links to an arbitrary bookmark create a span with the target as - id and "anchor" class) - - [-] Image (Links to path in archive. Future option for - data-encoded URI likely.) - - [X] Note (Footnotes and Endnotes are silently combined.) --} - -module Text.Pandoc.Readers.DocX - ( readDocX - ) where - -import Codec.Archive.Zip -import Text.Pandoc.Definition -import Text.Pandoc.Options -import Text.Pandoc.Builder (text, toList) -import Text.Pandoc.Generic (bottomUp) -import Text.Pandoc.MIME (getMimeType) -import Text.Pandoc.UTF8 (toString) -import Text.Pandoc.Readers.DocX.Parse -import Text.Pandoc.Readers.DocX.Lists -import Data.Maybe (mapMaybe, isJust, fromJust) -import Data.List (delete, isPrefixOf, (\\), intersect) -import qualified Data.ByteString as BS -import qualified Data.ByteString.Lazy as B -import Data.ByteString.Base64 (encode) -import System.FilePath (combine) - -readDocX :: ReaderOptions - -> B.ByteString - -> Pandoc -readDocX opts bytes = - case archiveToDocX (toArchive bytes) of - Just docx -> Pandoc nullMeta (docxToBlocks opts docx) - Nothing -> error $ "couldn't parse docx file" - -runStyleToSpanAttr :: RunStyle -> (String, [String], [(String, String)]) -runStyleToSpanAttr rPr = ("", - mapMaybe id [ - if isBold rPr then (Just "strong") else Nothing, - if isItalic rPr then (Just "emph") else Nothing, - if isSmallCaps rPr then (Just "smallcaps") else Nothing, - if isStrike rPr then (Just "strike") else Nothing, - if isSuperScript rPr then (Just "superscript") else Nothing, - if isSubScript rPr then (Just "subscript") else Nothing, - rStyle rPr], - case underline rPr of - Just fmt -> [("underline", fmt)] - _ -> [] - ) - -parStyleToDivAttr :: ParagraphStyle -> (String, [String], [(String, String)]) -parStyleToDivAttr pPr = ("", - pStyle pPr, - case indent pPr of - Just n -> [("indent", (show n))] - Nothing -> [] - ) - -strToInlines :: String -> [Inline] -strToInlines = toList . text - -codeSpans :: [String] -codeSpans = ["VerbatimChar"] - -blockQuoteDivs :: [String] -blockQuoteDivs = ["Quote", "BlockQuote"] - -codeDivs :: [String] -codeDivs = ["SourceCode"] - -runElemToInlines :: RunElem -> [Inline] -runElemToInlines (TextRun s) = strToInlines s -runElemToInlines (LnBrk) = [LineBreak] - -runElemToString :: RunElem -> String -runElemToString (TextRun s) = s -runElemToString (LnBrk) = ['\n'] - -runElemsToString :: [RunElem] -> String -runElemsToString = concatMap runElemToString - -strNormalize :: [Inline] -> [Inline] -strNormalize [] = [] -strNormalize (Str "" : ils) = strNormalize ils -strNormalize ((Str s) : (Str s') : l) = strNormalize ((Str (s++s')) : l) -strNormalize (il:ils) = il : (strNormalize ils) - -runToInlines :: ReaderOptions -> DocX -> Run -> [Inline] -runToInlines _ _ (Run rs runElems) - | isJust (rStyle rs) && (fromJust (rStyle rs)) `elem` codeSpans = - case runStyleToSpanAttr rs == ("", [], []) of - True -> [Str (runElemsToString runElems)] - False -> [Span (runStyleToSpanAttr rs) [Str (runElemsToString runElems)]] - | otherwise = case runStyleToSpanAttr rs == ("", [], []) of - True -> concatMap runElemToInlines runElems - False -> [Span (runStyleToSpanAttr rs) (concatMap runElemToInlines runElems)] -runToInlines opts docx@(DocX _ notes _ _ _ ) (Footnote fnId) = - case (getFootNote fnId notes) of - Just bodyParts -> - [Note [Div ("", ["footnote"], []) (map (bodyPartToBlock opts docx) bodyParts)]] - Nothing -> - [Note [Div ("", ["footnote"], []) []]] -runToInlines opts docx@(DocX _ notes _ _ _) (Endnote fnId) = - case (getEndNote fnId notes) of - Just bodyParts -> - [Note [Div ("", ["endnote"], []) (map (bodyPartToBlock opts docx) bodyParts)]] - Nothing -> - [Note [Div ("", ["endnote"], []) []]] - -parPartToInlines :: ReaderOptions -> DocX -> ParPart -> [Inline] -parPartToInlines opts docx (PlainRun r) = runToInlines opts docx r -parPartToInlines _ _ (BookMark _ anchor) = - [Span (anchor, ["anchor"], []) []] -parPartToInlines _ (DocX _ _ _ rels _) (Drawing relid) = - case lookupRelationship relid rels of - Just target -> [Image [] (combine "word" target, "")] - Nothing -> [Image [] ("", "")] -parPartToInlines opts docx (InternalHyperLink anchor runs) = - [Link (concatMap (runToInlines opts docx) runs) ('#' : anchor, "")] -parPartToInlines opts docx@(DocX _ _ _ rels _) (ExternalHyperLink relid runs) = - case lookupRelationship relid rels of - Just target -> - [Link (concatMap (runToInlines opts docx) runs) (target, "")] - Nothing -> - [Link (concatMap (runToInlines opts docx) runs) ("", "")] - -isAnchorSpan :: Inline -> Bool -isAnchorSpan (Span (ident, classes, kvs) ils) = - (not . null) ident && - classes == ["anchor"] && - null kvs && - null ils -isAnchorSpan _ = False - -dummyAnchors :: [String] -dummyAnchors = ["_GoBack"] - -makeHeaderAnchors :: Block -> Block -makeHeaderAnchors h@(Header n (_, classes, kvs) ils) = - case filter isAnchorSpan ils of - [] -> h - (x@(Span (ident, _, _) _) : xs) -> - case ident `elem` dummyAnchors of - True -> h - False -> Header n (ident, classes, kvs) (ils \\ (x:xs)) - _ -> h -makeHeaderAnchors blk = blk - - -parPartsToInlines :: ReaderOptions -> DocX -> [ParPart] -> [Inline] -parPartsToInlines opts docx parparts = - -- - -- We're going to skip data-uri's for now. It should be an option, - -- not mandatory. - -- - --bottomUp (makeImagesSelfContained docx) $ - bottomUp spanCorrect $ - bottomUp spanTrim $ - bottomUp spanReduce $ - concatMap (parPartToInlines opts docx) parparts - -cellToBlocks :: ReaderOptions -> DocX -> Cell -> [Block] -cellToBlocks opts docx (Cell bps) = map (bodyPartToBlock opts docx) bps - -rowToBlocksList :: ReaderOptions -> DocX -> Row -> [[Block]] -rowToBlocksList opts docx (Row cells) = map (cellToBlocks opts docx) cells - -bodyPartToBlock :: ReaderOptions -> DocX -> BodyPart -> Block -bodyPartToBlock opts docx (Paragraph pPr parparts) = - Div (parStyleToDivAttr pPr) [Para (parPartsToInlines opts docx parparts)] -bodyPartToBlock opts docx@(DocX _ _ numbering _ _) (ListItem pPr numId lvl parparts) = - let - kvs = case lookupLevel numId lvl numbering of - Just (_, fmt, txt, Just start) -> [ ("level", lvl) - , ("num-id", numId) - , ("format", fmt) - , ("text", txt) - , ("start", (show start)) - ] - - Just (_, fmt, txt, Nothing) -> [ ("level", lvl) - , ("num-id", numId) - , ("format", fmt) - , ("text", txt) - ] - Nothing -> [] - in - Div - ("", ["list-item"], kvs) - [bodyPartToBlock opts docx (Paragraph pPr parparts)] -bodyPartToBlock _ _ (Tbl _ _ _ []) = - Para [] -bodyPartToBlock opts docx (Tbl cap _ look (r:rs)) = - let caption = strToInlines cap - (hdr, rows) = case firstRowFormatting look of - True -> (Just r, rs) - False -> (Nothing, r:rs) - hdrCells = case hdr of - Just r' -> rowToBlocksList opts docx r' - Nothing -> [] - cells = map (rowToBlocksList opts docx) rows - - size = case null hdrCells of - True -> length $ head cells - False -> length $ hdrCells - -- - -- The two following variables (horizontal column alignment and - -- relative column widths) go to the default at the - -- moment. Width information is in the TblGrid field of the Tbl, - -- so should be possible. Alignment might be more difficult, - -- since there doesn't seem to be a column entity in docx. - alignments = take size (repeat AlignDefault) - widths = take size (repeat 0) :: [Double] - in - Table caption alignments widths hdrCells cells - -makeImagesSelfContained :: DocX -> Inline -> Inline -makeImagesSelfContained (DocX _ _ _ _ media) i@(Image alt (uri, title)) = - case lookup uri media of - Just bs -> case getMimeType uri of - Just mime -> let data_uri = - "data:" ++ mime ++ ";base64," ++ toString (encode $ BS.concat $ B.toChunks bs) - in - Image alt (data_uri, title) - Nothing -> i - Nothing -> i -makeImagesSelfContained _ inline = inline - -bodyToBlocks :: ReaderOptions -> DocX -> Body -> [Block] -bodyToBlocks opts docx (Body bps) = - bottomUp removeEmptyPars $ - bottomUp strNormalize $ - bottomUp spanRemove $ - bottomUp divRemove $ - map (makeHeaderAnchors) $ - bottomUp divCorrect $ - bottomUp divReduce $ - bottomUp divCorrectPreReduce $ - bottomUp blocksToDefinitions $ - blocksToBullets $ - map (bodyPartToBlock opts docx) bps - -docxToBlocks :: ReaderOptions -> DocX -> [Block] -docxToBlocks opts d@(DocX (Document _ body) _ _ _ _) = bodyToBlocks opts d body - -spanReduce :: [Inline] -> [Inline] -spanReduce [] = [] -spanReduce ((Span (id1, classes1, kvs1) ils1) : ils) - | (id1, classes1, kvs1) == ("", [], []) = ils1 ++ (spanReduce ils) -spanReduce (s1@(Span (id1, classes1, kvs1) ils1) : - s2@(Span (id2, classes2, kvs2) ils2) : - ils) = - let classes' = classes1 `intersect` classes2 - kvs' = kvs1 `intersect` kvs2 - classes1' = classes1 \\ classes' - kvs1' = kvs1 \\ kvs' - classes2' = classes2 \\ classes' - kvs2' = kvs2 \\ kvs' - in - case null classes' && null kvs' of - True -> s1 : (spanReduce (s2 : ils)) - False -> let attr' = ("", classes', kvs') - attr1' = (id1, classes1', kvs1') - attr2' = (id2, classes2', kvs2') - in - spanReduce (Span attr' [(Span attr1' ils1), (Span attr2' ils2)] : - ils) -spanReduce (il:ils) = il : (spanReduce ils) - -ilToCode :: Inline -> String -ilToCode (Str s) = s -ilToCode _ = "" - -spanRemove' :: Inline -> [Inline] -spanRemove' s@(Span (ident, classes, _) []) - -- "_GoBack" is automatically inserted. We don't want to keep it. - | classes == ["anchor"] && not (ident `elem` dummyAnchors) = [s] -spanRemove' (Span (_, _, kvs) ils) = - case lookup "underline" kvs of - Just val -> [Span ("", [], [("underline", val)]) ils] - Nothing -> ils -spanRemove' il = [il] - -spanRemove :: [Inline] -> [Inline] -spanRemove = concatMap spanRemove' - -spanTrim' :: Inline -> [Inline] -spanTrim' il@(Span _ []) = [il] -spanTrim' il@(Span attr (il':[])) - | il' == Space = [Span attr [], Space] - | otherwise = [il] -spanTrim' (Span attr ils) - | head ils == Space && last ils == Space = - [Space, Span attr (init $ tail ils), Space] - | head ils == Space = [Space, Span attr (tail ils)] - | last ils == Space = [Span attr (init ils), Space] -spanTrim' il = [il] - -spanTrim :: [Inline] -> [Inline] -spanTrim = concatMap spanTrim' - -spanCorrect' :: Inline -> [Inline] -spanCorrect' (Span ("", [], []) ils) = ils -spanCorrect' (Span (ident, classes, kvs) ils) - | "emph" `elem` classes = - [Emph $ spanCorrect' $ Span (ident, (delete "emph" classes), kvs) ils] - | "strong" `elem` classes = - [Strong $ spanCorrect' $ Span (ident, (delete "strong" classes), kvs) ils] - | "smallcaps" `elem` classes = - [SmallCaps $ spanCorrect' $ Span (ident, (delete "smallcaps" classes), kvs) ils] - | "strike" `elem` classes = - [Strikeout $ spanCorrect' $ Span (ident, (delete "strike" classes), kvs) ils] - | "superscript" `elem` classes = - [Superscript $ spanCorrect' $ Span (ident, (delete "superscript" classes), kvs) ils] - | "subscript" `elem` classes = - [Subscript $ spanCorrect' $ Span (ident, (delete "subscript" classes), kvs) ils] - | (not . null) (codeSpans `intersect` classes) = - [Code (ident, (classes \\ codeSpans), kvs) (init $ unlines $ map ilToCode ils)] - | otherwise = - [Span (ident, classes, kvs) ils] -spanCorrect' il = [il] - -spanCorrect :: [Inline] -> [Inline] -spanCorrect = concatMap spanCorrect' - -removeEmptyPars :: [Block] -> [Block] -removeEmptyPars blks = filter (\b -> b /= (Para [])) blks - -divReduce :: [Block] -> [Block] -divReduce [] = [] -divReduce ((Div (id1, classes1, kvs1) blks1) : blks) - | (id1, classes1, kvs1) == ("", [], []) = blks1 ++ (divReduce blks) -divReduce (d1@(Div (id1, classes1, kvs1) blks1) : - d2@(Div (id2, classes2, kvs2) blks2) : - blks) = - let classes' = classes1 `intersect` classes2 - kvs' = kvs1 `intersect` kvs2 - classes1' = classes1 \\ classes' - kvs1' = kvs1 \\ kvs' - classes2' = classes2 \\ classes' - kvs2' = kvs2 \\ kvs' - in - case null classes' && null kvs' of - True -> d1 : (divReduce (d2 : blks)) - False -> let attr' = ("", classes', kvs') - attr1' = (id1, classes1', kvs1') - attr2' = (id2, classes2', kvs2') - in - divReduce (Div attr' [(Div attr1' blks1), (Div attr2' blks2)] : - blks) -divReduce (blk:blks) = blk : (divReduce blks) - -isHeaderClass :: String -> Maybe Int -isHeaderClass s | "Heading" `isPrefixOf` s = - case reads (drop (length "Heading") s) :: [(Int, String)] of - [] -> Nothing - ((n, "") : []) -> Just n - _ -> Nothing -isHeaderClass _ = Nothing - -findHeaderClass :: [String] -> Maybe Int -findHeaderClass ss = case mapMaybe id $ map isHeaderClass ss of - [] -> Nothing - n : _ -> Just n - -blksToInlines :: [Block] -> [Inline] -blksToInlines (Para ils : _) = ils -blksToInlines (Plain ils : _) = ils -blksToInlines _ = [] - -divCorrectPreReduce' :: Block -> [Block] -divCorrectPreReduce' (Div (ident, classes, kvs) blks) - | isJust $ findHeaderClass classes = - let n = fromJust $ findHeaderClass classes - in - [Header n (ident, delete ("Heading" ++ (show n)) classes, kvs) (blksToInlines blks)] - | otherwise = [Div (ident, classes, kvs) blks] -divCorrectPreReduce' blk = [blk] - -divCorrectPreReduce :: [Block] -> [Block] -divCorrectPreReduce = concatMap divCorrectPreReduce' - -blkToCode :: Block -> String -blkToCode (Para []) = "" -blkToCode (Para ((Code _ s):ils)) = s ++ (blkToCode (Para ils)) -blkToCode (Para ((Span (_, classes, _) ils'): ils)) - | (not . null) (codeSpans `intersect` classes) = - (init $ unlines $ map ilToCode ils') ++ (blkToCode (Para ils)) -blkToCode _ = "" - -divRemove' :: Block -> [Block] -divRemove' (Div (_, _, kvs) blks) = - case lookup "indent" kvs of - Just val -> [Div ("", [], [("indent", val)]) blks] - Nothing -> blks -divRemove' blk = [blk] - -divRemove :: [Block] -> [Block] -divRemove = concatMap divRemove' - -divCorrect' :: Block -> [Block] -divCorrect' b@(Div (ident, classes, kvs) blks) - | (not . null) (blockQuoteDivs `intersect` classes) = - [BlockQuote [Div (ident, classes \\ blockQuoteDivs, kvs) blks]] - | (not . null) (codeDivs `intersect` classes) = - [CodeBlock (ident, (classes \\ codeDivs), kvs) (init $ unlines $ map blkToCode blks)] - | otherwise = - case lookup "indent" kvs of - Just "0" -> [Div (ident, classes, filter (\kv -> fst kv /= "indent") kvs) blks] - Just _ -> - [BlockQuote [Div (ident, classes, filter (\kv -> fst kv /= "indent") kvs) blks]] - Nothing -> [b] -divCorrect' blk = [blk] - -divCorrect :: [Block] -> [Block] -divCorrect = concatMap divCorrect' diff --git a/src/Text/Pandoc/Readers/DocX/Lists.hs b/src/Text/Pandoc/Readers/DocX/Lists.hs deleted file mode 100644 index b20679261..000000000 --- a/src/Text/Pandoc/Readers/DocX/Lists.hs +++ /dev/null @@ -1,208 +0,0 @@ -{- -Copyright (C) 2014 Jesse Rosenthal - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Readers.DocX.Lists - Copyright : Copyright (C) 2014 Jesse Rosenthal - License : GNU GPL, version 2 or above - - Maintainer : Jesse Rosenthal - Stability : alpha - Portability : portable - -Functions for converting flat DocX paragraphs into nested lists. --} - -module Text.Pandoc.Readers.DocX.Lists ( blocksToBullets - , blocksToDefinitions) where - -import Text.Pandoc.JSON -import Text.Pandoc.Shared (trim) -import Control.Monad -import Data.List -import Data.Maybe - -isListItem :: Block -> Bool -isListItem (Div (_, classes, _) _) | "list-item" `elem` classes = True -isListItem _ = False - -getLevel :: Block -> Maybe Integer -getLevel (Div (_, _, kvs) _) = liftM read $ lookup "level" kvs -getLevel _ = Nothing - -getLevelN :: Block -> Integer -getLevelN b = case getLevel b of - Just n -> n - Nothing -> -1 - -getNumId :: Block -> Maybe Integer -getNumId (Div (_, _, kvs) _) = liftM read $ lookup "num-id" kvs -getNumId _ = Nothing - -getNumIdN :: Block -> Integer -getNumIdN b = case getNumId b of - Just n -> n - Nothing -> -1 - -getText :: Block -> Maybe String -getText (Div (_, _, kvs) _) = lookup "text" kvs -getText _ = Nothing - -data ListType = Itemized | Enumerated ListAttributes - -listStyleMap :: [(String, ListNumberStyle)] -listStyleMap = [("upperLetter", UpperAlpha), - ("lowerLetter", LowerAlpha), - ("upperRoman", UpperRoman), - ("lowerRoman", LowerRoman), - ("decimal", Decimal)] - -listDelimMap :: [(String, ListNumberDelim)] -listDelimMap = [("%1)", OneParen), - ("(%1)", TwoParens), - ("%1.", Period)] - -getListType :: Block -> Maybe ListType -getListType b@(Div (_, _, kvs) _) | isListItem b = - let - start = lookup "start" kvs - frmt = lookup "format" kvs - txt = lookup "text" kvs - in - case frmt of - Just "bullet" -> Just Itemized - Just f -> - case txt of - Just t -> Just $ Enumerated ( - read (fromMaybe "1" start) :: Int, - fromMaybe DefaultStyle (lookup f listStyleMap), - fromMaybe DefaultDelim (lookup t listDelimMap)) - Nothing -> Nothing - _ -> Nothing -getListType _ = Nothing - -listParagraphDivs :: [String] -listParagraphDivs = ["ListParagraph"] - --- This is a first stab at going through and attaching meaning to list --- paragraphs, without an item marker, following a list item. We --- assume that these are paragraphs in the same item. - -handleListParagraphs :: [Block] -> [Block] -handleListParagraphs [] = [] -handleListParagraphs ( - (Div attr1@(_, classes1, _) blks1) : - (Div (ident2, classes2, kvs2) blks2) : - blks - ) | "list-item" `elem` classes1 && - not ("list-item" `elem` classes2) && - (not . null) (listParagraphDivs `intersect` classes2) = - -- We don't want to keep this indent. - let newDiv2 = - (Div (ident2, classes2, filter (\kv -> fst kv /= "indent") kvs2) blks2) - in - handleListParagraphs ((Div attr1 (blks1 ++ [newDiv2])) : blks) -handleListParagraphs (blk:blks) = blk : (handleListParagraphs blks) - -separateBlocks' :: Block -> [[Block]] -> [[Block]] -separateBlocks' blk ([] : []) = [[blk]] -separateBlocks' b@(BulletList _) acc = (init acc) ++ [(last acc) ++ [b]] -separateBlocks' b@(OrderedList _ _) acc = (init acc) ++ [(last acc) ++ [b]] --- The following is for the invisible bullet lists. This is how --- pandoc-generated ooxml does multiparagraph item lists. -separateBlocks' b acc | liftM trim (getText b) == Just "" = - (init acc) ++ [(last acc) ++ [b]] -separateBlocks' b acc = acc ++ [[b]] - -separateBlocks :: [Block] -> [[Block]] -separateBlocks blks = foldr separateBlocks' [[]] (reverse blks) - -flatToBullets' :: Integer -> [Block] -> [Block] -flatToBullets' _ [] = [] -flatToBullets' num xs@(b : elems) - | getLevelN b == num = b : (flatToBullets' num elems) - | otherwise = - let bNumId = getNumIdN b - bLevel = getLevelN b - (children, remaining) = - span - (\b' -> - ((getLevelN b') > bLevel || - ((getLevelN b') == bLevel && (getNumIdN b') == bNumId))) - xs - in - case getListType b of - Just (Enumerated attr) -> - (OrderedList attr (separateBlocks $ flatToBullets' bLevel children)) : - (flatToBullets' num remaining) - _ -> - (BulletList (separateBlocks $ flatToBullets' bLevel children)) : - (flatToBullets' num remaining) - -flatToBullets :: [Block] -> [Block] -flatToBullets elems = flatToBullets' (-1) elems - -blocksToBullets :: [Block] -> [Block] -blocksToBullets blks = - -- bottomUp removeListItemDivs $ - flatToBullets $ (handleListParagraphs blks) - - -plainParaInlines :: Block -> [Inline] -plainParaInlines (Plain ils) = ils -plainParaInlines (Para ils) = ils -plainParaInlines _ = [] - -blocksToDefinitions' :: [([Inline], [[Block]])] -> [Block] -> [Block] -> [Block] -blocksToDefinitions' [] acc [] = reverse acc -blocksToDefinitions' defAcc acc [] = - reverse $ (DefinitionList (reverse defAcc)) : acc -blocksToDefinitions' defAcc acc - ((Div (_, classes1, _) blks1) : (Div (ident2, classes2, kvs2) blks2) : blks) - | "DefinitionTerm" `elem` classes1 && "Definition" `elem` classes2 = - let remainingAttr2 = (ident2, delete "Definition" classes2, kvs2) - pair = case remainingAttr2 == ("", [], []) of - True -> (concatMap plainParaInlines blks1, [blks2]) - False -> (concatMap plainParaInlines blks1, [[Div remainingAttr2 blks2]]) - in - blocksToDefinitions' (pair : defAcc) acc blks -blocksToDefinitions' defAcc acc - ((Div (ident2, classes2, kvs2) blks2) : blks) - | (not . null) defAcc && "Definition" `elem` classes2 = - let remainingAttr2 = (ident2, delete "Definition" classes2, kvs2) - defItems2 = case remainingAttr2 == ("", [], []) of - True -> blks2 - False -> [Div remainingAttr2 blks2] - ((defTerm, defItems):defs) = defAcc - defAcc' = case null defItems of - True -> (defTerm, [defItems2]) : defs - False -> (defTerm, init defItems ++ [last defItems ++ defItems2]) : defs - in - blocksToDefinitions' defAcc' acc blks -blocksToDefinitions' [] acc (b:blks) = - blocksToDefinitions' [] (b:acc) blks -blocksToDefinitions' defAcc acc (b:blks) = - blocksToDefinitions' [] (b : (DefinitionList (reverse defAcc)) : acc) blks - - -blocksToDefinitions :: [Block] -> [Block] -blocksToDefinitions = blocksToDefinitions' [] [] - - - - diff --git a/src/Text/Pandoc/Readers/DocX/Parse.hs b/src/Text/Pandoc/Readers/DocX/Parse.hs deleted file mode 100644 index d7033d9e8..000000000 --- a/src/Text/Pandoc/Readers/DocX/Parse.hs +++ /dev/null @@ -1,604 +0,0 @@ -{- -Copyright (C) 2014 Jesse Rosenthal - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Readers.DocX.Parse - Copyright : Copyright (C) 2014 Jesse Rosenthal - License : GNU GPL, version 2 or above - - Maintainer : Jesse Rosenthal - Stability : alpha - Portability : portable - -Conversion of DocX archive into DocX haskell type --} - - -module Text.Pandoc.Readers.DocX.Parse ( DocX(..) - , Document(..) - , Body(..) - , BodyPart(..) - , TblLook(..) - , ParPart(..) - , Run(..) - , RunElem(..) - , Notes - , Numbering - , Relationship - , Media - , RunStyle(..) - , ParagraphStyle(..) - , Row(..) - , Cell(..) - , getFootNote - , getEndNote - , lookupLevel - , lookupRelationship - , archiveToDocX - ) where -import Codec.Archive.Zip -import Text.XML.Light -import Data.Maybe -import Data.List -import System.FilePath -import Data.Bits ((.|.)) -import qualified Data.ByteString.Lazy as B -import qualified Text.Pandoc.UTF8 as UTF8 - -attrToNSPair :: Attr -> Maybe (String, String) -attrToNSPair (Attr (QName s _ (Just "xmlns")) val) = Just (s, val) -attrToNSPair _ = Nothing - - -type NameSpaces = [(String, String)] - -data DocX = DocX Document Notes Numbering [Relationship] Media - deriving Show - -archiveToDocX :: Archive -> Maybe DocX -archiveToDocX archive = do - let notes = archiveToNotes archive - rels = archiveToRelationships archive - media = archiveToMedia archive - doc <- archiveToDocument archive - numbering <- archiveToNumbering archive - return $ DocX doc notes numbering rels media - -data Document = Document NameSpaces Body - deriving Show - -archiveToDocument :: Archive -> Maybe Document -archiveToDocument zf = do - entry <- findEntryByPath "word/document.xml" zf - docElem <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry - let namespaces = mapMaybe attrToNSPair (elAttribs docElem) - bodyElem <- findChild (QName "body" (lookup "w" namespaces) Nothing) docElem - body <- elemToBody namespaces bodyElem - return $ Document namespaces body - -type Media = [(FilePath, B.ByteString)] - -filePathIsMedia :: FilePath -> Bool -filePathIsMedia fp = - let (dir, _) = splitFileName fp - in - (dir == "word/media/") - -getMediaPair :: Archive -> FilePath -> Maybe (FilePath, B.ByteString) -getMediaPair zf fp = - case findEntryByPath fp zf of - Just e -> Just (fp, fromEntry e) - Nothing -> Nothing - -archiveToMedia :: Archive -> Media -archiveToMedia zf = - mapMaybe (getMediaPair zf) (filter filePathIsMedia (filesInArchive zf)) - -data Numbering = Numbering NameSpaces [Numb] [AbstractNumb] - deriving Show - -data Numb = Numb String String -- right now, only a key to an abstract num - deriving Show - -data AbstractNumb = AbstractNumb String [Level] - deriving Show - --- (ilvl, format, string, start) -type Level = (String, String, String, Maybe Integer) - -lookupLevel :: String -> String -> Numbering -> Maybe Level -lookupLevel numId ilvl (Numbering _ numbs absNumbs) = do - absNumId <- lookup numId $ map (\(Numb nid absnumid) -> (nid, absnumid)) numbs - lvls <- lookup absNumId $ map (\(AbstractNumb aid ls) -> (aid, ls)) absNumbs - lvl <- lookup ilvl $ map (\l@(i, _, _, _) -> (i, l)) lvls - return lvl - -numElemToNum :: NameSpaces -> Element -> Maybe Numb -numElemToNum ns element | - qName (elName element) == "num" && - qURI (elName element) == (lookup "w" ns) = do - numId <- findAttr (QName "numId" (lookup "w" ns) (Just "w")) element - absNumId <- findChild (QName "abstractNumId" (lookup "w" ns) (Just "w")) element - >>= findAttr (QName "val" (lookup "w" ns) (Just "w")) - return $ Numb numId absNumId -numElemToNum _ _ = Nothing - -absNumElemToAbsNum :: NameSpaces -> Element -> Maybe AbstractNumb -absNumElemToAbsNum ns element | - qName (elName element) == "abstractNum" && - qURI (elName element) == (lookup "w" ns) = do - absNumId <- findAttr - (QName "abstractNumId" (lookup "w" ns) (Just "w")) - element - let levelElems = findChildren - (QName "lvl" (lookup "w" ns) (Just "w")) - element - levels = mapMaybe id $ map (levelElemToLevel ns) levelElems - return $ AbstractNumb absNumId levels -absNumElemToAbsNum _ _ = Nothing - -levelElemToLevel :: NameSpaces -> Element -> Maybe Level -levelElemToLevel ns element | - qName (elName element) == "lvl" && - qURI (elName element) == (lookup "w" ns) = do - ilvl <- findAttr (QName "ilvl" (lookup "w" ns) (Just "w")) element - fmt <- findChild (QName "numFmt" (lookup "w" ns) (Just "w")) element - >>= findAttr (QName "val" (lookup "w" ns) (Just "w")) - txt <- findChild (QName "lvlText" (lookup "w" ns) (Just "w")) element - >>= findAttr (QName "val" (lookup "w" ns) (Just "w")) - let start = findChild (QName "start" (lookup "w" ns) (Just "w")) element - >>= findAttr (QName "val" (lookup "w" ns) (Just "w")) - >>= (\s -> listToMaybe (map fst (reads s :: [(Integer, String)]))) - return (ilvl, fmt, txt, start) -levelElemToLevel _ _ = Nothing - -archiveToNumbering :: Archive -> Maybe Numbering -archiveToNumbering zf = - case findEntryByPath "word/numbering.xml" zf of - Nothing -> Just $ Numbering [] [] [] - Just entry -> do - numberingElem <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry - let namespaces = mapMaybe attrToNSPair (elAttribs numberingElem) - numElems = findChildren - (QName "num" (lookup "w" namespaces) (Just "w")) - numberingElem - absNumElems = findChildren - (QName "abstractNum" (lookup "w" namespaces) (Just "w")) - numberingElem - nums = mapMaybe id $ map (numElemToNum namespaces) numElems - absNums = mapMaybe id $ map (absNumElemToAbsNum namespaces) absNumElems - return $ Numbering namespaces nums absNums - -data Notes = Notes NameSpaces (Maybe [(String, [BodyPart])]) (Maybe [(String, [BodyPart])]) - deriving Show - -noteElemToNote :: NameSpaces -> Element -> Maybe (String, [BodyPart]) -noteElemToNote ns element - | qName (elName element) `elem` ["endnote", "footnote"] && - qURI (elName element) == (lookup "w" ns) = - do - noteId <- findAttr (QName "id" (lookup "w" ns) (Just "w")) element - let bps = map fromJust - $ filter isJust - $ map (elemToBodyPart ns) - $ filterChildrenName (isParOrTbl ns) element - return $ (noteId, bps) -noteElemToNote _ _ = Nothing - -getFootNote :: String -> Notes -> Maybe [BodyPart] -getFootNote s (Notes _ fns _) = fns >>= (lookup s) - -getEndNote :: String -> Notes -> Maybe [BodyPart] -getEndNote s (Notes _ _ ens) = ens >>= (lookup s) - -elemToNotes :: NameSpaces -> String -> Element -> Maybe [(String, [BodyPart])] -elemToNotes ns notetype element - | qName (elName element) == (notetype ++ "s") && - qURI (elName element) == (lookup "w" ns) = - Just $ map fromJust - $ filter isJust - $ map (noteElemToNote ns) - $ findChildren (QName notetype (lookup "w" ns) (Just "w")) element -elemToNotes _ _ _ = Nothing - -archiveToNotes :: Archive -> Notes -archiveToNotes zf = - let fnElem = findEntryByPath "word/footnotes.xml" zf - >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry) - enElem = findEntryByPath "word/endnotes.xml" zf - >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry) - fn_namespaces = case fnElem of - Just e -> mapMaybe attrToNSPair (elAttribs e) - Nothing -> [] - en_namespaces = case enElem of - Just e -> mapMaybe attrToNSPair (elAttribs e) - Nothing -> [] - ns = unionBy (\x y -> fst x == fst y) fn_namespaces en_namespaces - fn = fnElem >>= (elemToNotes ns "footnote") - en = enElem >>= (elemToNotes ns "endnote") - in - Notes ns fn en - - -data Relationship = Relationship (RelId, Target) - deriving Show - -lookupRelationship :: RelId -> [Relationship] -> Maybe Target -lookupRelationship relid rels = - lookup relid (map (\(Relationship pair) -> pair) rels) - -filePathIsRel :: FilePath -> Bool -filePathIsRel fp = - let (dir, name) = splitFileName fp - in - (dir == "word/_rels/") && ((takeExtension name) == ".rels") - -relElemToRelationship :: Element -> Maybe Relationship -relElemToRelationship element | qName (elName element) == "Relationship" = - do - relId <- findAttr (QName "Id" Nothing Nothing) element - target <- findAttr (QName "Target" Nothing Nothing) element - return $ Relationship (relId, target) -relElemToRelationship _ = Nothing - - -archiveToRelationships :: Archive -> [Relationship] -archiveToRelationships archive = - let relPaths = filter filePathIsRel (filesInArchive archive) - entries = map fromJust $ filter isJust $ map (\f -> findEntryByPath f archive) relPaths - relElems = map fromJust $ filter isJust $ map (parseXMLDoc . UTF8.toStringLazy . fromEntry) entries - rels = map fromJust $ filter isJust $ map relElemToRelationship $ concatMap elChildren relElems - in - rels - -data Body = Body [BodyPart] - deriving Show - -isParOrTbl :: NameSpaces -> QName -> Bool -isParOrTbl ns q = qName q `elem` ["p", "tbl"] && - qURI q == (lookup "w" ns) - -elemToBody :: NameSpaces -> Element -> Maybe Body -elemToBody ns element | qName (elName element) == "body" && qURI (elName element) == (lookup "w" ns) = - Just $ Body - $ map fromJust - $ filter isJust - $ map (elemToBodyPart ns) $ filterChildrenName (isParOrTbl ns) element -elemToBody _ _ = Nothing - -isRunOrLinkOrBookmark :: NameSpaces -> QName -> Bool -isRunOrLinkOrBookmark ns q = qName q `elem` ["r", "hyperlink", "bookmarkStart"] && - qURI q == (lookup "w" ns) - -elemToNumInfo :: NameSpaces -> Element -> Maybe (String, String) -elemToNumInfo ns element - | qName (elName element) == "p" && - qURI (elName element) == (lookup "w" ns) = - do - pPr <- findChild (QName "pPr" (lookup "w" ns) (Just "w")) element - numPr <- findChild (QName "numPr" (lookup "w" ns) (Just "w")) pPr - lvl <- findChild (QName "ilvl" (lookup "w" ns) (Just "w")) numPr >>= - findAttr (QName "val" (lookup "w" ns) (Just "w")) - numId <- findChild (QName "numId" (lookup "w" ns) (Just "w")) numPr >>= - findAttr (QName "val" (lookup "w" ns) (Just "w")) - return (numId, lvl) -elemToNumInfo _ _ = Nothing - --- isBookMarkTag :: NameSpaces -> QName -> Bool --- isBookMarkTag ns q = qName q `elem` ["bookmarkStart", "bookmarkEnd"] && --- qURI q == (lookup "w" ns) - --- parChildrenToBookmark :: NameSpaces -> [Element] -> BookMark --- parChildrenToBookmark ns (bms : bme : _) --- | qName (elName bms) == "bookmarkStart" && --- qURI (elName bms) == (lookup "w" ns) && --- qName (elName bme) == "bookmarkEnd" && --- qURI (elName bme) == (lookup "w" ns) = do --- bmId <- findAttr (QName "id" (lookup "w" ns) (Just "w")) bms --- bmName <- findAttr (QName "name" (lookup "w" ns) (Just "w")) bms --- return $ (bmId, bmName) --- parChildrenToBookmark _ _ = Nothing - -elemToBodyPart :: NameSpaces -> Element -> Maybe BodyPart -elemToBodyPart ns element - | qName (elName element) == "p" && - qURI (elName element) == (lookup "w" ns) = - let parstyle = elemToParagraphStyle ns element - parparts = mapMaybe id - $ map (elemToParPart ns) - $ filterChildrenName (isRunOrLinkOrBookmark ns) element - in - case elemToNumInfo ns element of - Just (numId, lvl) -> Just $ ListItem parstyle numId lvl parparts - Nothing -> Just $ Paragraph parstyle parparts - | qName (elName element) == "tbl" && - qURI (elName element) == (lookup "w" ns) = - let - caption = findChild (QName "tblPr" (lookup "w" ns) (Just "w")) element - >>= findChild (QName "tblCaption" (lookup "w" ns) (Just "w")) - >>= findAttr (QName "val" (lookup "w" ns) (Just "w")) - grid = case - findChild (QName "tblGrid" (lookup "w" ns) (Just "w")) element - of - Just g -> elemToTblGrid ns g - Nothing -> [] - tblLook = findChild (QName "tblPr" (lookup "w" ns) (Just "w")) element - >>= findChild (QName "tblLook" (lookup "w" ns) (Just "w")) - >>= elemToTblLook ns - in - Just $ Tbl - (fromMaybe "" caption) - grid - (fromMaybe defaultTblLook tblLook) - (mapMaybe (elemToRow ns) (elChildren element)) - | otherwise = Nothing - -elemToTblLook :: NameSpaces -> Element -> Maybe TblLook -elemToTblLook ns element - | qName (elName element) == "tblLook" && - qURI (elName element) == (lookup "w" ns) = - let firstRow = findAttr (QName "firstRow" (lookup "w" ns) (Just "w")) element - val = findAttr (QName "val" (lookup "w" ns) (Just "w")) element - firstRowFmt = - case firstRow of - Just "1" -> True - Just _ -> False - Nothing -> case val of - Just bitMask -> testBitMask bitMask 0x020 - Nothing -> False - in - Just $ TblLook{firstRowFormatting = firstRowFmt} -elemToTblLook _ _ = Nothing - -testBitMask :: String -> Int -> Bool -testBitMask bitMaskS n = - case (reads ("0x" ++ bitMaskS) :: [(Int, String)]) of - [] -> False - ((n', _) : _) -> ((n' .|. n) /= 0) - -data ParagraphStyle = ParagraphStyle { pStyle :: [String] - , indent :: Maybe Integer - } - deriving Show - -defaultParagraphStyle :: ParagraphStyle -defaultParagraphStyle = ParagraphStyle { pStyle = [] - , indent = Nothing - } - -elemToParagraphStyle :: NameSpaces -> Element -> ParagraphStyle -elemToParagraphStyle ns element = - case findChild (QName "pPr" (lookup "w" ns) (Just "w")) element of - Just pPr -> - ParagraphStyle - {pStyle = - mapMaybe id $ - map - (findAttr (QName "val" (lookup "w" ns) (Just "w"))) - (findChildren (QName "pStyle" (lookup "w" ns) (Just "w")) pPr) - , indent = - findChild (QName "ind" (lookup "w" ns) (Just "w")) pPr >>= - findAttr (QName "left" (lookup "w" ns) (Just "w")) >>= - stringToInteger - } - Nothing -> defaultParagraphStyle - - -data BodyPart = Paragraph ParagraphStyle [ParPart] - | ListItem ParagraphStyle String String [ParPart] - | Tbl String TblGrid TblLook [Row] - - deriving Show - -type TblGrid = [Integer] - -data TblLook = TblLook {firstRowFormatting::Bool} - deriving Show - -defaultTblLook :: TblLook -defaultTblLook = TblLook{firstRowFormatting = False} - -stringToInteger :: String -> Maybe Integer -stringToInteger s = listToMaybe $ map fst (reads s :: [(Integer, String)]) - -elemToTblGrid :: NameSpaces -> Element -> TblGrid -elemToTblGrid ns element - | qName (elName element) == "tblGrid" && - qURI (elName element) == (lookup "w" ns) = - let - cols = findChildren (QName "gridCol" (lookup "w" ns) (Just "w")) element - in - mapMaybe (\e -> - findAttr (QName "val" (lookup "w" ns) (Just ("w"))) e - >>= stringToInteger - ) - cols -elemToTblGrid _ _ = [] - -data Row = Row [Cell] - deriving Show - - -elemToRow :: NameSpaces -> Element -> Maybe Row -elemToRow ns element - | qName (elName element) == "tr" && - qURI (elName element) == (lookup "w" ns) = - let - cells = findChildren (QName "tc" (lookup "w" ns) (Just "w")) element - in - Just $ Row (mapMaybe (elemToCell ns) cells) -elemToRow _ _ = Nothing - -data Cell = Cell [BodyPart] - deriving Show - -elemToCell :: NameSpaces -> Element -> Maybe Cell -elemToCell ns element - | qName (elName element) == "tc" && - qURI (elName element) == (lookup "w" ns) = - Just $ Cell (mapMaybe (elemToBodyPart ns) (elChildren element)) -elemToCell _ _ = Nothing - -data ParPart = PlainRun Run - | BookMark BookMarkId Anchor - | InternalHyperLink Anchor [Run] - | ExternalHyperLink RelId [Run] - | Drawing String - deriving Show - -data Run = Run RunStyle [RunElem] - | Footnote String - | Endnote String - deriving Show - -data RunElem = TextRun String | LnBrk - deriving Show - -data RunStyle = RunStyle { isBold :: Bool - , isItalic :: Bool - , isSmallCaps :: Bool - , isStrike :: Bool - , isSuperScript :: Bool - , isSubScript :: Bool - , underline :: Maybe String - , rStyle :: Maybe String } - deriving Show - -defaultRunStyle :: RunStyle -defaultRunStyle = RunStyle { isBold = False - , isItalic = False - , isSmallCaps = False - , isStrike = False - , isSuperScript = False - , isSubScript = False - , underline = Nothing - , rStyle = Nothing - } - -elemToRunStyle :: NameSpaces -> Element -> RunStyle -elemToRunStyle ns element = - case findChild (QName "rPr" (lookup "w" ns) (Just "w")) element of - Just rPr -> - RunStyle - { - isBold = isJust $ findChild (QName "b" (lookup "w" ns) (Just "w")) rPr - , isItalic = isJust $ findChild (QName "i" (lookup "w" ns) (Just "w")) rPr - , isSmallCaps = isJust $ findChild (QName "smallCaps" (lookup "w" ns) (Just "w")) rPr - , isStrike = isJust $ findChild (QName "strike" (lookup "w" ns) (Just "w")) rPr - , isSuperScript = - (Just "superscript" == - (findChild (QName "vertAlign" (lookup "w" ns) (Just "w")) rPr >>= - findAttr (QName "val" (lookup "w" ns) (Just "w")))) - , isSubScript = - (Just "subscript" == - (findChild (QName "vertAlign" (lookup "w" ns) (Just "w")) rPr >>= - findAttr (QName "val" (lookup "w" ns) (Just "w")))) - , underline = - findChild (QName "u" (lookup "w" ns) (Just "w")) rPr >>= - findAttr (QName "val" (lookup "w" ns) (Just "w")) - , rStyle = - findChild (QName "rStyle" (lookup "w" ns) (Just "w")) rPr >>= - findAttr (QName "val" (lookup "w" ns) (Just "w")) - } - Nothing -> defaultRunStyle - -elemToRun :: NameSpaces -> Element -> Maybe Run -elemToRun ns element - | qName (elName element) == "r" && - qURI (elName element) == (lookup "w" ns) = - case - findChild (QName "footnoteReference" (lookup "w" ns) (Just "w")) element >>= - findAttr (QName "id" (lookup "w" ns) (Just "w")) - of - Just s -> Just $ Footnote s - Nothing -> - case - findChild (QName "endnoteReference" (lookup "w" ns) (Just "w")) element >>= - findAttr (QName "id" (lookup "w" ns) (Just "w")) - of - Just s -> Just $ Endnote s - Nothing -> Just $ - Run (elemToRunStyle ns element) - (elemToRunElems ns element) -elemToRun _ _ = Nothing - -elemToRunElem :: NameSpaces -> Element -> Maybe RunElem -elemToRunElem ns element - | qName (elName element) == "t" && - qURI (elName element) == (lookup "w" ns) = - Just $ TextRun (strContent element) - | qName (elName element) == "br" && - qURI (elName element) == (lookup "w" ns) = - Just $ LnBrk - | otherwise = Nothing - - -elemToRunElems :: NameSpaces -> Element -> [RunElem] -elemToRunElems ns element - | qName (elName element) == "r" && - qURI (elName element) == (lookup "w" ns) = - mapMaybe (elemToRunElem ns) (elChildren element) - | otherwise = [] - -elemToDrawing :: NameSpaces -> Element -> Maybe ParPart -elemToDrawing ns element - | qName (elName element) == "drawing" && - qURI (elName element) == (lookup "w" ns) = - let a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main" - in - findElement (QName "blip" (Just a_ns) (Just "a")) element - >>= findAttr (QName "embed" (lookup "r" ns) (Just "r")) - >>= (\s -> Just $ Drawing s) -elemToDrawing _ _ = Nothing - - -elemToParPart :: NameSpaces -> Element -> Maybe ParPart -elemToParPart ns element - | qName (elName element) == "r" && - qURI (elName element) == (lookup "w" ns) = - case findChild (QName "drawing" (lookup "w" ns) (Just "w")) element of - Just drawingElem -> elemToDrawing ns drawingElem - Nothing -> do - r <- elemToRun ns element - return $ PlainRun r -elemToParPart ns element - | qName (elName element) == "bookmarkStart" && - qURI (elName element) == (lookup "w" ns) = do - bmId <- findAttr (QName "id" (lookup "w" ns) (Just "w")) element - bmName <- findAttr (QName "name" (lookup "w" ns) (Just "w")) element - return $ BookMark bmId bmName -elemToParPart ns element - | qName (elName element) == "hyperlink" && - qURI (elName element) == (lookup "w" ns) = - let runs = map fromJust $ filter isJust $ map (elemToRun ns) - $ findChildren (QName "r" (lookup "w" ns) (Just "w")) element - in - case findAttr (QName "anchor" (lookup "w" ns) (Just "w")) element of - Just anchor -> - Just $ InternalHyperLink anchor runs - Nothing -> - case findAttr (QName "id" (lookup "r" ns) (Just "r")) element of - Just relId -> Just $ ExternalHyperLink relId runs - Nothing -> Nothing -elemToParPart _ _ = Nothing - -type Target = String -type Anchor = String -type BookMarkId = String -type RelId = String - diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs new file mode 100644 index 000000000..df4be41ff --- /dev/null +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -0,0 +1,479 @@ +{- +Copyright (C) 2014 Jesse Rosenthal + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.Docx + Copyright : Copyright (C) 2014 Jesse Rosenthal + License : GNU GPL, version 2 or above + + Maintainer : Jesse Rosenthal + Stability : alpha + Portability : portable + +Conversion of Docx type (defined in Text.Pandoc.Readers.Docx.Parse) +to 'Pandoc' document. -} + +{- +Current state of implementation of Docx entities ([x] means +implemented, [-] means partially implemented): + +* Blocks + + - [X] Para + - [X] CodeBlock (styled with `SourceCode`) + - [X] BlockQuote (styled with `Quote`, `BlockQuote`, or, optionally, + indented) + - [X] OrderedList + - [X] BulletList + - [X] DefinitionList (styled with adjacent `DefinitionTerm` and `Definition`) + - [X] Header (styled with `Heading#`) + - [ ] HorizontalRule + - [-] Table (column widths and alignments not yet implemented) + +* Inlines + + - [X] Str + - [X] Emph (From italics. `underline` currently read as span. In + future, it might optionally be emph as well) + - [X] Strong + - [X] Strikeout + - [X] Superscript + - [X] Subscript + - [X] SmallCaps + - [ ] Quoted + - [ ] Cite + - [X] Code (styled with `VerbatimChar`) + - [X] Space + - [X] LineBreak (these are invisible in Word: entered with Shift-Return) + - [ ] Math + - [X] Link (links to an arbitrary bookmark create a span with the target as + id and "anchor" class) + - [-] Image (Links to path in archive. Future option for + data-encoded URI likely.) + - [X] Note (Footnotes and Endnotes are silently combined.) +-} + +module Text.Pandoc.Readers.Docx + ( readDocx + ) where + +import Codec.Archive.Zip +import Text.Pandoc.Definition +import Text.Pandoc.Options +import Text.Pandoc.Builder (text, toList) +import Text.Pandoc.Generic (bottomUp) +import Text.Pandoc.MIME (getMimeType) +import Text.Pandoc.UTF8 (toString) +import Text.Pandoc.Readers.Docx.Parse +import Text.Pandoc.Readers.Docx.Lists +import Data.Maybe (mapMaybe, isJust, fromJust) +import Data.List (delete, isPrefixOf, (\\), intersect) +import qualified Data.ByteString as BS +import qualified Data.ByteString.Lazy as B +import Data.ByteString.Base64 (encode) +import System.FilePath (combine) + +readDocx :: ReaderOptions + -> B.ByteString + -> Pandoc +readDocx opts bytes = + case archiveToDocx (toArchive bytes) of + Just docx -> Pandoc nullMeta (docxToBlocks opts docx) + Nothing -> error $ "couldn't parse docx file" + +runStyleToSpanAttr :: RunStyle -> (String, [String], [(String, String)]) +runStyleToSpanAttr rPr = ("", + mapMaybe id [ + if isBold rPr then (Just "strong") else Nothing, + if isItalic rPr then (Just "emph") else Nothing, + if isSmallCaps rPr then (Just "smallcaps") else Nothing, + if isStrike rPr then (Just "strike") else Nothing, + if isSuperScript rPr then (Just "superscript") else Nothing, + if isSubScript rPr then (Just "subscript") else Nothing, + rStyle rPr], + case underline rPr of + Just fmt -> [("underline", fmt)] + _ -> [] + ) + +parStyleToDivAttr :: ParagraphStyle -> (String, [String], [(String, String)]) +parStyleToDivAttr pPr = ("", + pStyle pPr, + case indent pPr of + Just n -> [("indent", (show n))] + Nothing -> [] + ) + +strToInlines :: String -> [Inline] +strToInlines = toList . text + +codeSpans :: [String] +codeSpans = ["VerbatimChar"] + +blockQuoteDivs :: [String] +blockQuoteDivs = ["Quote", "BlockQuote"] + +codeDivs :: [String] +codeDivs = ["SourceCode"] + +runElemToInlines :: RunElem -> [Inline] +runElemToInlines (TextRun s) = strToInlines s +runElemToInlines (LnBrk) = [LineBreak] + +runElemToString :: RunElem -> String +runElemToString (TextRun s) = s +runElemToString (LnBrk) = ['\n'] + +runElemsToString :: [RunElem] -> String +runElemsToString = concatMap runElemToString + +strNormalize :: [Inline] -> [Inline] +strNormalize [] = [] +strNormalize (Str "" : ils) = strNormalize ils +strNormalize ((Str s) : (Str s') : l) = strNormalize ((Str (s++s')) : l) +strNormalize (il:ils) = il : (strNormalize ils) + +runToInlines :: ReaderOptions -> Docx -> Run -> [Inline] +runToInlines _ _ (Run rs runElems) + | isJust (rStyle rs) && (fromJust (rStyle rs)) `elem` codeSpans = + case runStyleToSpanAttr rs == ("", [], []) of + True -> [Str (runElemsToString runElems)] + False -> [Span (runStyleToSpanAttr rs) [Str (runElemsToString runElems)]] + | otherwise = case runStyleToSpanAttr rs == ("", [], []) of + True -> concatMap runElemToInlines runElems + False -> [Span (runStyleToSpanAttr rs) (concatMap runElemToInlines runElems)] +runToInlines opts docx@(Docx _ notes _ _ _ ) (Footnote fnId) = + case (getFootNote fnId notes) of + Just bodyParts -> + [Note [Div ("", ["footnote"], []) (map (bodyPartToBlock opts docx) bodyParts)]] + Nothing -> + [Note [Div ("", ["footnote"], []) []]] +runToInlines opts docx@(Docx _ notes _ _ _) (Endnote fnId) = + case (getEndNote fnId notes) of + Just bodyParts -> + [Note [Div ("", ["endnote"], []) (map (bodyPartToBlock opts docx) bodyParts)]] + Nothing -> + [Note [Div ("", ["endnote"], []) []]] + +parPartToInlines :: ReaderOptions -> Docx -> ParPart -> [Inline] +parPartToInlines opts docx (PlainRun r) = runToInlines opts docx r +parPartToInlines _ _ (BookMark _ anchor) = + [Span (anchor, ["anchor"], []) []] +parPartToInlines _ (Docx _ _ _ rels _) (Drawing relid) = + case lookupRelationship relid rels of + Just target -> [Image [] (combine "word" target, "")] + Nothing -> [Image [] ("", "")] +parPartToInlines opts docx (InternalHyperLink anchor runs) = + [Link (concatMap (runToInlines opts docx) runs) ('#' : anchor, "")] +parPartToInlines opts docx@(Docx _ _ _ rels _) (ExternalHyperLink relid runs) = + case lookupRelationship relid rels of + Just target -> + [Link (concatMap (runToInlines opts docx) runs) (target, "")] + Nothing -> + [Link (concatMap (runToInlines opts docx) runs) ("", "")] + +isAnchorSpan :: Inline -> Bool +isAnchorSpan (Span (ident, classes, kvs) ils) = + (not . null) ident && + classes == ["anchor"] && + null kvs && + null ils +isAnchorSpan _ = False + +dummyAnchors :: [String] +dummyAnchors = ["_GoBack"] + +makeHeaderAnchors :: Block -> Block +makeHeaderAnchors h@(Header n (_, classes, kvs) ils) = + case filter isAnchorSpan ils of + [] -> h + (x@(Span (ident, _, _) _) : xs) -> + case ident `elem` dummyAnchors of + True -> h + False -> Header n (ident, classes, kvs) (ils \\ (x:xs)) + _ -> h +makeHeaderAnchors blk = blk + + +parPartsToInlines :: ReaderOptions -> Docx -> [ParPart] -> [Inline] +parPartsToInlines opts docx parparts = + -- + -- We're going to skip data-uri's for now. It should be an option, + -- not mandatory. + -- + --bottomUp (makeImagesSelfContained docx) $ + bottomUp spanCorrect $ + bottomUp spanTrim $ + bottomUp spanReduce $ + concatMap (parPartToInlines opts docx) parparts + +cellToBlocks :: ReaderOptions -> Docx -> Cell -> [Block] +cellToBlocks opts docx (Cell bps) = map (bodyPartToBlock opts docx) bps + +rowToBlocksList :: ReaderOptions -> Docx -> Row -> [[Block]] +rowToBlocksList opts docx (Row cells) = map (cellToBlocks opts docx) cells + +bodyPartToBlock :: ReaderOptions -> Docx -> BodyPart -> Block +bodyPartToBlock opts docx (Paragraph pPr parparts) = + Div (parStyleToDivAttr pPr) [Para (parPartsToInlines opts docx parparts)] +bodyPartToBlock opts docx@(Docx _ _ numbering _ _) (ListItem pPr numId lvl parparts) = + let + kvs = case lookupLevel numId lvl numbering of + Just (_, fmt, txt, Just start) -> [ ("level", lvl) + , ("num-id", numId) + , ("format", fmt) + , ("text", txt) + , ("start", (show start)) + ] + + Just (_, fmt, txt, Nothing) -> [ ("level", lvl) + , ("num-id", numId) + , ("format", fmt) + , ("text", txt) + ] + Nothing -> [] + in + Div + ("", ["list-item"], kvs) + [bodyPartToBlock opts docx (Paragraph pPr parparts)] +bodyPartToBlock _ _ (Tbl _ _ _ []) = + Para [] +bodyPartToBlock opts docx (Tbl cap _ look (r:rs)) = + let caption = strToInlines cap + (hdr, rows) = case firstRowFormatting look of + True -> (Just r, rs) + False -> (Nothing, r:rs) + hdrCells = case hdr of + Just r' -> rowToBlocksList opts docx r' + Nothing -> [] + cells = map (rowToBlocksList opts docx) rows + + size = case null hdrCells of + True -> length $ head cells + False -> length $ hdrCells + -- + -- The two following variables (horizontal column alignment and + -- relative column widths) go to the default at the + -- moment. Width information is in the TblGrid field of the Tbl, + -- so should be possible. Alignment might be more difficult, + -- since there doesn't seem to be a column entity in docx. + alignments = take size (repeat AlignDefault) + widths = take size (repeat 0) :: [Double] + in + Table caption alignments widths hdrCells cells + +makeImagesSelfContained :: Docx -> Inline -> Inline +makeImagesSelfContained (Docx _ _ _ _ media) i@(Image alt (uri, title)) = + case lookup uri media of + Just bs -> case getMimeType uri of + Just mime -> let data_uri = + "data:" ++ mime ++ ";base64," ++ toString (encode $ BS.concat $ B.toChunks bs) + in + Image alt (data_uri, title) + Nothing -> i + Nothing -> i +makeImagesSelfContained _ inline = inline + +bodyToBlocks :: ReaderOptions -> Docx -> Body -> [Block] +bodyToBlocks opts docx (Body bps) = + bottomUp removeEmptyPars $ + bottomUp strNormalize $ + bottomUp spanRemove $ + bottomUp divRemove $ + map (makeHeaderAnchors) $ + bottomUp divCorrect $ + bottomUp divReduce $ + bottomUp divCorrectPreReduce $ + bottomUp blocksToDefinitions $ + blocksToBullets $ + map (bodyPartToBlock opts docx) bps + +docxToBlocks :: ReaderOptions -> Docx -> [Block] +docxToBlocks opts d@(Docx (Document _ body) _ _ _ _) = bodyToBlocks opts d body + +spanReduce :: [Inline] -> [Inline] +spanReduce [] = [] +spanReduce ((Span (id1, classes1, kvs1) ils1) : ils) + | (id1, classes1, kvs1) == ("", [], []) = ils1 ++ (spanReduce ils) +spanReduce (s1@(Span (id1, classes1, kvs1) ils1) : + s2@(Span (id2, classes2, kvs2) ils2) : + ils) = + let classes' = classes1 `intersect` classes2 + kvs' = kvs1 `intersect` kvs2 + classes1' = classes1 \\ classes' + kvs1' = kvs1 \\ kvs' + classes2' = classes2 \\ classes' + kvs2' = kvs2 \\ kvs' + in + case null classes' && null kvs' of + True -> s1 : (spanReduce (s2 : ils)) + False -> let attr' = ("", classes', kvs') + attr1' = (id1, classes1', kvs1') + attr2' = (id2, classes2', kvs2') + in + spanReduce (Span attr' [(Span attr1' ils1), (Span attr2' ils2)] : + ils) +spanReduce (il:ils) = il : (spanReduce ils) + +ilToCode :: Inline -> String +ilToCode (Str s) = s +ilToCode _ = "" + +spanRemove' :: Inline -> [Inline] +spanRemove' s@(Span (ident, classes, _) []) + -- "_GoBack" is automatically inserted. We don't want to keep it. + | classes == ["anchor"] && not (ident `elem` dummyAnchors) = [s] +spanRemove' (Span (_, _, kvs) ils) = + case lookup "underline" kvs of + Just val -> [Span ("", [], [("underline", val)]) ils] + Nothing -> ils +spanRemove' il = [il] + +spanRemove :: [Inline] -> [Inline] +spanRemove = concatMap spanRemove' + +spanTrim' :: Inline -> [Inline] +spanTrim' il@(Span _ []) = [il] +spanTrim' il@(Span attr (il':[])) + | il' == Space = [Span attr [], Space] + | otherwise = [il] +spanTrim' (Span attr ils) + | head ils == Space && last ils == Space = + [Space, Span attr (init $ tail ils), Space] + | head ils == Space = [Space, Span attr (tail ils)] + | last ils == Space = [Span attr (init ils), Space] +spanTrim' il = [il] + +spanTrim :: [Inline] -> [Inline] +spanTrim = concatMap spanTrim' + +spanCorrect' :: Inline -> [Inline] +spanCorrect' (Span ("", [], []) ils) = ils +spanCorrect' (Span (ident, classes, kvs) ils) + | "emph" `elem` classes = + [Emph $ spanCorrect' $ Span (ident, (delete "emph" classes), kvs) ils] + | "strong" `elem` classes = + [Strong $ spanCorrect' $ Span (ident, (delete "strong" classes), kvs) ils] + | "smallcaps" `elem` classes = + [SmallCaps $ spanCorrect' $ Span (ident, (delete "smallcaps" classes), kvs) ils] + | "strike" `elem` classes = + [Strikeout $ spanCorrect' $ Span (ident, (delete "strike" classes), kvs) ils] + | "superscript" `elem` classes = + [Superscript $ spanCorrect' $ Span (ident, (delete "superscript" classes), kvs) ils] + | "subscript" `elem` classes = + [Subscript $ spanCorrect' $ Span (ident, (delete "subscript" classes), kvs) ils] + | (not . null) (codeSpans `intersect` classes) = + [Code (ident, (classes \\ codeSpans), kvs) (init $ unlines $ map ilToCode ils)] + | otherwise = + [Span (ident, classes, kvs) ils] +spanCorrect' il = [il] + +spanCorrect :: [Inline] -> [Inline] +spanCorrect = concatMap spanCorrect' + +removeEmptyPars :: [Block] -> [Block] +removeEmptyPars blks = filter (\b -> b /= (Para [])) blks + +divReduce :: [Block] -> [Block] +divReduce [] = [] +divReduce ((Div (id1, classes1, kvs1) blks1) : blks) + | (id1, classes1, kvs1) == ("", [], []) = blks1 ++ (divReduce blks) +divReduce (d1@(Div (id1, classes1, kvs1) blks1) : + d2@(Div (id2, classes2, kvs2) blks2) : + blks) = + let classes' = classes1 `intersect` classes2 + kvs' = kvs1 `intersect` kvs2 + classes1' = classes1 \\ classes' + kvs1' = kvs1 \\ kvs' + classes2' = classes2 \\ classes' + kvs2' = kvs2 \\ kvs' + in + case null classes' && null kvs' of + True -> d1 : (divReduce (d2 : blks)) + False -> let attr' = ("", classes', kvs') + attr1' = (id1, classes1', kvs1') + attr2' = (id2, classes2', kvs2') + in + divReduce (Div attr' [(Div attr1' blks1), (Div attr2' blks2)] : + blks) +divReduce (blk:blks) = blk : (divReduce blks) + +isHeaderClass :: String -> Maybe Int +isHeaderClass s | "Heading" `isPrefixOf` s = + case reads (drop (length "Heading") s) :: [(Int, String)] of + [] -> Nothing + ((n, "") : []) -> Just n + _ -> Nothing +isHeaderClass _ = Nothing + +findHeaderClass :: [String] -> Maybe Int +findHeaderClass ss = case mapMaybe id $ map isHeaderClass ss of + [] -> Nothing + n : _ -> Just n + +blksToInlines :: [Block] -> [Inline] +blksToInlines (Para ils : _) = ils +blksToInlines (Plain ils : _) = ils +blksToInlines _ = [] + +divCorrectPreReduce' :: Block -> [Block] +divCorrectPreReduce' (Div (ident, classes, kvs) blks) + | isJust $ findHeaderClass classes = + let n = fromJust $ findHeaderClass classes + in + [Header n (ident, delete ("Heading" ++ (show n)) classes, kvs) (blksToInlines blks)] + | otherwise = [Div (ident, classes, kvs) blks] +divCorrectPreReduce' blk = [blk] + +divCorrectPreReduce :: [Block] -> [Block] +divCorrectPreReduce = concatMap divCorrectPreReduce' + +blkToCode :: Block -> String +blkToCode (Para []) = "" +blkToCode (Para ((Code _ s):ils)) = s ++ (blkToCode (Para ils)) +blkToCode (Para ((Span (_, classes, _) ils'): ils)) + | (not . null) (codeSpans `intersect` classes) = + (init $ unlines $ map ilToCode ils') ++ (blkToCode (Para ils)) +blkToCode _ = "" + +divRemove' :: Block -> [Block] +divRemove' (Div (_, _, kvs) blks) = + case lookup "indent" kvs of + Just val -> [Div ("", [], [("indent", val)]) blks] + Nothing -> blks +divRemove' blk = [blk] + +divRemove :: [Block] -> [Block] +divRemove = concatMap divRemove' + +divCorrect' :: Block -> [Block] +divCorrect' b@(Div (ident, classes, kvs) blks) + | (not . null) (blockQuoteDivs `intersect` classes) = + [BlockQuote [Div (ident, classes \\ blockQuoteDivs, kvs) blks]] + | (not . null) (codeDivs `intersect` classes) = + [CodeBlock (ident, (classes \\ codeDivs), kvs) (init $ unlines $ map blkToCode blks)] + | otherwise = + case lookup "indent" kvs of + Just "0" -> [Div (ident, classes, filter (\kv -> fst kv /= "indent") kvs) blks] + Just _ -> + [BlockQuote [Div (ident, classes, filter (\kv -> fst kv /= "indent") kvs) blks]] + Nothing -> [b] +divCorrect' blk = [blk] + +divCorrect :: [Block] -> [Block] +divCorrect = concatMap divCorrect' diff --git a/src/Text/Pandoc/Readers/Docx/Lists.hs b/src/Text/Pandoc/Readers/Docx/Lists.hs new file mode 100644 index 000000000..68559d98b --- /dev/null +++ b/src/Text/Pandoc/Readers/Docx/Lists.hs @@ -0,0 +1,208 @@ +{- +Copyright (C) 2014 Jesse Rosenthal + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.Docx.Lists + Copyright : Copyright (C) 2014 Jesse Rosenthal + License : GNU GPL, version 2 or above + + Maintainer : Jesse Rosenthal + Stability : alpha + Portability : portable + +Functions for converting flat docx paragraphs into nested lists. +-} + +module Text.Pandoc.Readers.Docx.Lists ( blocksToBullets + , blocksToDefinitions) where + +import Text.Pandoc.JSON +import Text.Pandoc.Shared (trim) +import Control.Monad +import Data.List +import Data.Maybe + +isListItem :: Block -> Bool +isListItem (Div (_, classes, _) _) | "list-item" `elem` classes = True +isListItem _ = False + +getLevel :: Block -> Maybe Integer +getLevel (Div (_, _, kvs) _) = liftM read $ lookup "level" kvs +getLevel _ = Nothing + +getLevelN :: Block -> Integer +getLevelN b = case getLevel b of + Just n -> n + Nothing -> -1 + +getNumId :: Block -> Maybe Integer +getNumId (Div (_, _, kvs) _) = liftM read $ lookup "num-id" kvs +getNumId _ = Nothing + +getNumIdN :: Block -> Integer +getNumIdN b = case getNumId b of + Just n -> n + Nothing -> -1 + +getText :: Block -> Maybe String +getText (Div (_, _, kvs) _) = lookup "text" kvs +getText _ = Nothing + +data ListType = Itemized | Enumerated ListAttributes + +listStyleMap :: [(String, ListNumberStyle)] +listStyleMap = [("upperLetter", UpperAlpha), + ("lowerLetter", LowerAlpha), + ("upperRoman", UpperRoman), + ("lowerRoman", LowerRoman), + ("decimal", Decimal)] + +listDelimMap :: [(String, ListNumberDelim)] +listDelimMap = [("%1)", OneParen), + ("(%1)", TwoParens), + ("%1.", Period)] + +getListType :: Block -> Maybe ListType +getListType b@(Div (_, _, kvs) _) | isListItem b = + let + start = lookup "start" kvs + frmt = lookup "format" kvs + txt = lookup "text" kvs + in + case frmt of + Just "bullet" -> Just Itemized + Just f -> + case txt of + Just t -> Just $ Enumerated ( + read (fromMaybe "1" start) :: Int, + fromMaybe DefaultStyle (lookup f listStyleMap), + fromMaybe DefaultDelim (lookup t listDelimMap)) + Nothing -> Nothing + _ -> Nothing +getListType _ = Nothing + +listParagraphDivs :: [String] +listParagraphDivs = ["ListParagraph"] + +-- This is a first stab at going through and attaching meaning to list +-- paragraphs, without an item marker, following a list item. We +-- assume that these are paragraphs in the same item. + +handleListParagraphs :: [Block] -> [Block] +handleListParagraphs [] = [] +handleListParagraphs ( + (Div attr1@(_, classes1, _) blks1) : + (Div (ident2, classes2, kvs2) blks2) : + blks + ) | "list-item" `elem` classes1 && + not ("list-item" `elem` classes2) && + (not . null) (listParagraphDivs `intersect` classes2) = + -- We don't want to keep this indent. + let newDiv2 = + (Div (ident2, classes2, filter (\kv -> fst kv /= "indent") kvs2) blks2) + in + handleListParagraphs ((Div attr1 (blks1 ++ [newDiv2])) : blks) +handleListParagraphs (blk:blks) = blk : (handleListParagraphs blks) + +separateBlocks' :: Block -> [[Block]] -> [[Block]] +separateBlocks' blk ([] : []) = [[blk]] +separateBlocks' b@(BulletList _) acc = (init acc) ++ [(last acc) ++ [b]] +separateBlocks' b@(OrderedList _ _) acc = (init acc) ++ [(last acc) ++ [b]] +-- The following is for the invisible bullet lists. This is how +-- pandoc-generated ooxml does multiparagraph item lists. +separateBlocks' b acc | liftM trim (getText b) == Just "" = + (init acc) ++ [(last acc) ++ [b]] +separateBlocks' b acc = acc ++ [[b]] + +separateBlocks :: [Block] -> [[Block]] +separateBlocks blks = foldr separateBlocks' [[]] (reverse blks) + +flatToBullets' :: Integer -> [Block] -> [Block] +flatToBullets' _ [] = [] +flatToBullets' num xs@(b : elems) + | getLevelN b == num = b : (flatToBullets' num elems) + | otherwise = + let bNumId = getNumIdN b + bLevel = getLevelN b + (children, remaining) = + span + (\b' -> + ((getLevelN b') > bLevel || + ((getLevelN b') == bLevel && (getNumIdN b') == bNumId))) + xs + in + case getListType b of + Just (Enumerated attr) -> + (OrderedList attr (separateBlocks $ flatToBullets' bLevel children)) : + (flatToBullets' num remaining) + _ -> + (BulletList (separateBlocks $ flatToBullets' bLevel children)) : + (flatToBullets' num remaining) + +flatToBullets :: [Block] -> [Block] +flatToBullets elems = flatToBullets' (-1) elems + +blocksToBullets :: [Block] -> [Block] +blocksToBullets blks = + -- bottomUp removeListItemDivs $ + flatToBullets $ (handleListParagraphs blks) + + +plainParaInlines :: Block -> [Inline] +plainParaInlines (Plain ils) = ils +plainParaInlines (Para ils) = ils +plainParaInlines _ = [] + +blocksToDefinitions' :: [([Inline], [[Block]])] -> [Block] -> [Block] -> [Block] +blocksToDefinitions' [] acc [] = reverse acc +blocksToDefinitions' defAcc acc [] = + reverse $ (DefinitionList (reverse defAcc)) : acc +blocksToDefinitions' defAcc acc + ((Div (_, classes1, _) blks1) : (Div (ident2, classes2, kvs2) blks2) : blks) + | "DefinitionTerm" `elem` classes1 && "Definition" `elem` classes2 = + let remainingAttr2 = (ident2, delete "Definition" classes2, kvs2) + pair = case remainingAttr2 == ("", [], []) of + True -> (concatMap plainParaInlines blks1, [blks2]) + False -> (concatMap plainParaInlines blks1, [[Div remainingAttr2 blks2]]) + in + blocksToDefinitions' (pair : defAcc) acc blks +blocksToDefinitions' defAcc acc + ((Div (ident2, classes2, kvs2) blks2) : blks) + | (not . null) defAcc && "Definition" `elem` classes2 = + let remainingAttr2 = (ident2, delete "Definition" classes2, kvs2) + defItems2 = case remainingAttr2 == ("", [], []) of + True -> blks2 + False -> [Div remainingAttr2 blks2] + ((defTerm, defItems):defs) = defAcc + defAcc' = case null defItems of + True -> (defTerm, [defItems2]) : defs + False -> (defTerm, init defItems ++ [last defItems ++ defItems2]) : defs + in + blocksToDefinitions' defAcc' acc blks +blocksToDefinitions' [] acc (b:blks) = + blocksToDefinitions' [] (b:acc) blks +blocksToDefinitions' defAcc acc (b:blks) = + blocksToDefinitions' [] (b : (DefinitionList (reverse defAcc)) : acc) blks + + +blocksToDefinitions :: [Block] -> [Block] +blocksToDefinitions = blocksToDefinitions' [] [] + + + + diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs new file mode 100644 index 000000000..22e9dd909 --- /dev/null +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -0,0 +1,604 @@ +{- +Copyright (C) 2014 Jesse Rosenthal + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.Docx.Parse + Copyright : Copyright (C) 2014 Jesse Rosenthal + License : GNU GPL, version 2 or above + + Maintainer : Jesse Rosenthal + Stability : alpha + Portability : portable + +Conversion of docx archive into Docx haskell type +-} + + +module Text.Pandoc.Readers.Docx.Parse ( Docx(..) + , Document(..) + , Body(..) + , BodyPart(..) + , TblLook(..) + , ParPart(..) + , Run(..) + , RunElem(..) + , Notes + , Numbering + , Relationship + , Media + , RunStyle(..) + , ParagraphStyle(..) + , Row(..) + , Cell(..) + , getFootNote + , getEndNote + , lookupLevel + , lookupRelationship + , archiveToDocx + ) where +import Codec.Archive.Zip +import Text.XML.Light +import Data.Maybe +import Data.List +import System.FilePath +import Data.Bits ((.|.)) +import qualified Data.ByteString.Lazy as B +import qualified Text.Pandoc.UTF8 as UTF8 + +attrToNSPair :: Attr -> Maybe (String, String) +attrToNSPair (Attr (QName s _ (Just "xmlns")) val) = Just (s, val) +attrToNSPair _ = Nothing + + +type NameSpaces = [(String, String)] + +data Docx = Docx Document Notes Numbering [Relationship] Media + deriving Show + +archiveToDocx :: Archive -> Maybe Docx +archiveToDocx archive = do + let notes = archiveToNotes archive + rels = archiveToRelationships archive + media = archiveToMedia archive + doc <- archiveToDocument archive + numbering <- archiveToNumbering archive + return $ Docx doc notes numbering rels media + +data Document = Document NameSpaces Body + deriving Show + +archiveToDocument :: Archive -> Maybe Document +archiveToDocument zf = do + entry <- findEntryByPath "word/document.xml" zf + docElem <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry + let namespaces = mapMaybe attrToNSPair (elAttribs docElem) + bodyElem <- findChild (QName "body" (lookup "w" namespaces) Nothing) docElem + body <- elemToBody namespaces bodyElem + return $ Document namespaces body + +type Media = [(FilePath, B.ByteString)] + +filePathIsMedia :: FilePath -> Bool +filePathIsMedia fp = + let (dir, _) = splitFileName fp + in + (dir == "word/media/") + +getMediaPair :: Archive -> FilePath -> Maybe (FilePath, B.ByteString) +getMediaPair zf fp = + case findEntryByPath fp zf of + Just e -> Just (fp, fromEntry e) + Nothing -> Nothing + +archiveToMedia :: Archive -> Media +archiveToMedia zf = + mapMaybe (getMediaPair zf) (filter filePathIsMedia (filesInArchive zf)) + +data Numbering = Numbering NameSpaces [Numb] [AbstractNumb] + deriving Show + +data Numb = Numb String String -- right now, only a key to an abstract num + deriving Show + +data AbstractNumb = AbstractNumb String [Level] + deriving Show + +-- (ilvl, format, string, start) +type Level = (String, String, String, Maybe Integer) + +lookupLevel :: String -> String -> Numbering -> Maybe Level +lookupLevel numId ilvl (Numbering _ numbs absNumbs) = do + absNumId <- lookup numId $ map (\(Numb nid absnumid) -> (nid, absnumid)) numbs + lvls <- lookup absNumId $ map (\(AbstractNumb aid ls) -> (aid, ls)) absNumbs + lvl <- lookup ilvl $ map (\l@(i, _, _, _) -> (i, l)) lvls + return lvl + +numElemToNum :: NameSpaces -> Element -> Maybe Numb +numElemToNum ns element | + qName (elName element) == "num" && + qURI (elName element) == (lookup "w" ns) = do + numId <- findAttr (QName "numId" (lookup "w" ns) (Just "w")) element + absNumId <- findChild (QName "abstractNumId" (lookup "w" ns) (Just "w")) element + >>= findAttr (QName "val" (lookup "w" ns) (Just "w")) + return $ Numb numId absNumId +numElemToNum _ _ = Nothing + +absNumElemToAbsNum :: NameSpaces -> Element -> Maybe AbstractNumb +absNumElemToAbsNum ns element | + qName (elName element) == "abstractNum" && + qURI (elName element) == (lookup "w" ns) = do + absNumId <- findAttr + (QName "abstractNumId" (lookup "w" ns) (Just "w")) + element + let levelElems = findChildren + (QName "lvl" (lookup "w" ns) (Just "w")) + element + levels = mapMaybe id $ map (levelElemToLevel ns) levelElems + return $ AbstractNumb absNumId levels +absNumElemToAbsNum _ _ = Nothing + +levelElemToLevel :: NameSpaces -> Element -> Maybe Level +levelElemToLevel ns element | + qName (elName element) == "lvl" && + qURI (elName element) == (lookup "w" ns) = do + ilvl <- findAttr (QName "ilvl" (lookup "w" ns) (Just "w")) element + fmt <- findChild (QName "numFmt" (lookup "w" ns) (Just "w")) element + >>= findAttr (QName "val" (lookup "w" ns) (Just "w")) + txt <- findChild (QName "lvlText" (lookup "w" ns) (Just "w")) element + >>= findAttr (QName "val" (lookup "w" ns) (Just "w")) + let start = findChild (QName "start" (lookup "w" ns) (Just "w")) element + >>= findAttr (QName "val" (lookup "w" ns) (Just "w")) + >>= (\s -> listToMaybe (map fst (reads s :: [(Integer, String)]))) + return (ilvl, fmt, txt, start) +levelElemToLevel _ _ = Nothing + +archiveToNumbering :: Archive -> Maybe Numbering +archiveToNumbering zf = + case findEntryByPath "word/numbering.xml" zf of + Nothing -> Just $ Numbering [] [] [] + Just entry -> do + numberingElem <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry + let namespaces = mapMaybe attrToNSPair (elAttribs numberingElem) + numElems = findChildren + (QName "num" (lookup "w" namespaces) (Just "w")) + numberingElem + absNumElems = findChildren + (QName "abstractNum" (lookup "w" namespaces) (Just "w")) + numberingElem + nums = mapMaybe id $ map (numElemToNum namespaces) numElems + absNums = mapMaybe id $ map (absNumElemToAbsNum namespaces) absNumElems + return $ Numbering namespaces nums absNums + +data Notes = Notes NameSpaces (Maybe [(String, [BodyPart])]) (Maybe [(String, [BodyPart])]) + deriving Show + +noteElemToNote :: NameSpaces -> Element -> Maybe (String, [BodyPart]) +noteElemToNote ns element + | qName (elName element) `elem` ["endnote", "footnote"] && + qURI (elName element) == (lookup "w" ns) = + do + noteId <- findAttr (QName "id" (lookup "w" ns) (Just "w")) element + let bps = map fromJust + $ filter isJust + $ map (elemToBodyPart ns) + $ filterChildrenName (isParOrTbl ns) element + return $ (noteId, bps) +noteElemToNote _ _ = Nothing + +getFootNote :: String -> Notes -> Maybe [BodyPart] +getFootNote s (Notes _ fns _) = fns >>= (lookup s) + +getEndNote :: String -> Notes -> Maybe [BodyPart] +getEndNote s (Notes _ _ ens) = ens >>= (lookup s) + +elemToNotes :: NameSpaces -> String -> Element -> Maybe [(String, [BodyPart])] +elemToNotes ns notetype element + | qName (elName element) == (notetype ++ "s") && + qURI (elName element) == (lookup "w" ns) = + Just $ map fromJust + $ filter isJust + $ map (noteElemToNote ns) + $ findChildren (QName notetype (lookup "w" ns) (Just "w")) element +elemToNotes _ _ _ = Nothing + +archiveToNotes :: Archive -> Notes +archiveToNotes zf = + let fnElem = findEntryByPath "word/footnotes.xml" zf + >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry) + enElem = findEntryByPath "word/endnotes.xml" zf + >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry) + fn_namespaces = case fnElem of + Just e -> mapMaybe attrToNSPair (elAttribs e) + Nothing -> [] + en_namespaces = case enElem of + Just e -> mapMaybe attrToNSPair (elAttribs e) + Nothing -> [] + ns = unionBy (\x y -> fst x == fst y) fn_namespaces en_namespaces + fn = fnElem >>= (elemToNotes ns "footnote") + en = enElem >>= (elemToNotes ns "endnote") + in + Notes ns fn en + + +data Relationship = Relationship (RelId, Target) + deriving Show + +lookupRelationship :: RelId -> [Relationship] -> Maybe Target +lookupRelationship relid rels = + lookup relid (map (\(Relationship pair) -> pair) rels) + +filePathIsRel :: FilePath -> Bool +filePathIsRel fp = + let (dir, name) = splitFileName fp + in + (dir == "word/_rels/") && ((takeExtension name) == ".rels") + +relElemToRelationship :: Element -> Maybe Relationship +relElemToRelationship element | qName (elName element) == "Relationship" = + do + relId <- findAttr (QName "Id" Nothing Nothing) element + target <- findAttr (QName "Target" Nothing Nothing) element + return $ Relationship (relId, target) +relElemToRelationship _ = Nothing + + +archiveToRelationships :: Archive -> [Relationship] +archiveToRelationships archive = + let relPaths = filter filePathIsRel (filesInArchive archive) + entries = map fromJust $ filter isJust $ map (\f -> findEntryByPath f archive) relPaths + relElems = map fromJust $ filter isJust $ map (parseXMLDoc . UTF8.toStringLazy . fromEntry) entries + rels = map fromJust $ filter isJust $ map relElemToRelationship $ concatMap elChildren relElems + in + rels + +data Body = Body [BodyPart] + deriving Show + +isParOrTbl :: NameSpaces -> QName -> Bool +isParOrTbl ns q = qName q `elem` ["p", "tbl"] && + qURI q == (lookup "w" ns) + +elemToBody :: NameSpaces -> Element -> Maybe Body +elemToBody ns element | qName (elName element) == "body" && qURI (elName element) == (lookup "w" ns) = + Just $ Body + $ map fromJust + $ filter isJust + $ map (elemToBodyPart ns) $ filterChildrenName (isParOrTbl ns) element +elemToBody _ _ = Nothing + +isRunOrLinkOrBookmark :: NameSpaces -> QName -> Bool +isRunOrLinkOrBookmark ns q = qName q `elem` ["r", "hyperlink", "bookmarkStart"] && + qURI q == (lookup "w" ns) + +elemToNumInfo :: NameSpaces -> Element -> Maybe (String, String) +elemToNumInfo ns element + | qName (elName element) == "p" && + qURI (elName element) == (lookup "w" ns) = + do + pPr <- findChild (QName "pPr" (lookup "w" ns) (Just "w")) element + numPr <- findChild (QName "numPr" (lookup "w" ns) (Just "w")) pPr + lvl <- findChild (QName "ilvl" (lookup "w" ns) (Just "w")) numPr >>= + findAttr (QName "val" (lookup "w" ns) (Just "w")) + numId <- findChild (QName "numId" (lookup "w" ns) (Just "w")) numPr >>= + findAttr (QName "val" (lookup "w" ns) (Just "w")) + return (numId, lvl) +elemToNumInfo _ _ = Nothing + +-- isBookMarkTag :: NameSpaces -> QName -> Bool +-- isBookMarkTag ns q = qName q `elem` ["bookmarkStart", "bookmarkEnd"] && +-- qURI q == (lookup "w" ns) + +-- parChildrenToBookmark :: NameSpaces -> [Element] -> BookMark +-- parChildrenToBookmark ns (bms : bme : _) +-- | qName (elName bms) == "bookmarkStart" && +-- qURI (elName bms) == (lookup "w" ns) && +-- qName (elName bme) == "bookmarkEnd" && +-- qURI (elName bme) == (lookup "w" ns) = do +-- bmId <- findAttr (QName "id" (lookup "w" ns) (Just "w")) bms +-- bmName <- findAttr (QName "name" (lookup "w" ns) (Just "w")) bms +-- return $ (bmId, bmName) +-- parChildrenToBookmark _ _ = Nothing + +elemToBodyPart :: NameSpaces -> Element -> Maybe BodyPart +elemToBodyPart ns element + | qName (elName element) == "p" && + qURI (elName element) == (lookup "w" ns) = + let parstyle = elemToParagraphStyle ns element + parparts = mapMaybe id + $ map (elemToParPart ns) + $ filterChildrenName (isRunOrLinkOrBookmark ns) element + in + case elemToNumInfo ns element of + Just (numId, lvl) -> Just $ ListItem parstyle numId lvl parparts + Nothing -> Just $ Paragraph parstyle parparts + | qName (elName element) == "tbl" && + qURI (elName element) == (lookup "w" ns) = + let + caption = findChild (QName "tblPr" (lookup "w" ns) (Just "w")) element + >>= findChild (QName "tblCaption" (lookup "w" ns) (Just "w")) + >>= findAttr (QName "val" (lookup "w" ns) (Just "w")) + grid = case + findChild (QName "tblGrid" (lookup "w" ns) (Just "w")) element + of + Just g -> elemToTblGrid ns g + Nothing -> [] + tblLook = findChild (QName "tblPr" (lookup "w" ns) (Just "w")) element + >>= findChild (QName "tblLook" (lookup "w" ns) (Just "w")) + >>= elemToTblLook ns + in + Just $ Tbl + (fromMaybe "" caption) + grid + (fromMaybe defaultTblLook tblLook) + (mapMaybe (elemToRow ns) (elChildren element)) + | otherwise = Nothing + +elemToTblLook :: NameSpaces -> Element -> Maybe TblLook +elemToTblLook ns element + | qName (elName element) == "tblLook" && + qURI (elName element) == (lookup "w" ns) = + let firstRow = findAttr (QName "firstRow" (lookup "w" ns) (Just "w")) element + val = findAttr (QName "val" (lookup "w" ns) (Just "w")) element + firstRowFmt = + case firstRow of + Just "1" -> True + Just _ -> False + Nothing -> case val of + Just bitMask -> testBitMask bitMask 0x020 + Nothing -> False + in + Just $ TblLook{firstRowFormatting = firstRowFmt} +elemToTblLook _ _ = Nothing + +testBitMask :: String -> Int -> Bool +testBitMask bitMaskS n = + case (reads ("0x" ++ bitMaskS) :: [(Int, String)]) of + [] -> False + ((n', _) : _) -> ((n' .|. n) /= 0) + +data ParagraphStyle = ParagraphStyle { pStyle :: [String] + , indent :: Maybe Integer + } + deriving Show + +defaultParagraphStyle :: ParagraphStyle +defaultParagraphStyle = ParagraphStyle { pStyle = [] + , indent = Nothing + } + +elemToParagraphStyle :: NameSpaces -> Element -> ParagraphStyle +elemToParagraphStyle ns element = + case findChild (QName "pPr" (lookup "w" ns) (Just "w")) element of + Just pPr -> + ParagraphStyle + {pStyle = + mapMaybe id $ + map + (findAttr (QName "val" (lookup "w" ns) (Just "w"))) + (findChildren (QName "pStyle" (lookup "w" ns) (Just "w")) pPr) + , indent = + findChild (QName "ind" (lookup "w" ns) (Just "w")) pPr >>= + findAttr (QName "left" (lookup "w" ns) (Just "w")) >>= + stringToInteger + } + Nothing -> defaultParagraphStyle + + +data BodyPart = Paragraph ParagraphStyle [ParPart] + | ListItem ParagraphStyle String String [ParPart] + | Tbl String TblGrid TblLook [Row] + + deriving Show + +type TblGrid = [Integer] + +data TblLook = TblLook {firstRowFormatting::Bool} + deriving Show + +defaultTblLook :: TblLook +defaultTblLook = TblLook{firstRowFormatting = False} + +stringToInteger :: String -> Maybe Integer +stringToInteger s = listToMaybe $ map fst (reads s :: [(Integer, String)]) + +elemToTblGrid :: NameSpaces -> Element -> TblGrid +elemToTblGrid ns element + | qName (elName element) == "tblGrid" && + qURI (elName element) == (lookup "w" ns) = + let + cols = findChildren (QName "gridCol" (lookup "w" ns) (Just "w")) element + in + mapMaybe (\e -> + findAttr (QName "val" (lookup "w" ns) (Just ("w"))) e + >>= stringToInteger + ) + cols +elemToTblGrid _ _ = [] + +data Row = Row [Cell] + deriving Show + + +elemToRow :: NameSpaces -> Element -> Maybe Row +elemToRow ns element + | qName (elName element) == "tr" && + qURI (elName element) == (lookup "w" ns) = + let + cells = findChildren (QName "tc" (lookup "w" ns) (Just "w")) element + in + Just $ Row (mapMaybe (elemToCell ns) cells) +elemToRow _ _ = Nothing + +data Cell = Cell [BodyPart] + deriving Show + +elemToCell :: NameSpaces -> Element -> Maybe Cell +elemToCell ns element + | qName (elName element) == "tc" && + qURI (elName element) == (lookup "w" ns) = + Just $ Cell (mapMaybe (elemToBodyPart ns) (elChildren element)) +elemToCell _ _ = Nothing + +data ParPart = PlainRun Run + | BookMark BookMarkId Anchor + | InternalHyperLink Anchor [Run] + | ExternalHyperLink RelId [Run] + | Drawing String + deriving Show + +data Run = Run RunStyle [RunElem] + | Footnote String + | Endnote String + deriving Show + +data RunElem = TextRun String | LnBrk + deriving Show + +data RunStyle = RunStyle { isBold :: Bool + , isItalic :: Bool + , isSmallCaps :: Bool + , isStrike :: Bool + , isSuperScript :: Bool + , isSubScript :: Bool + , underline :: Maybe String + , rStyle :: Maybe String } + deriving Show + +defaultRunStyle :: RunStyle +defaultRunStyle = RunStyle { isBold = False + , isItalic = False + , isSmallCaps = False + , isStrike = False + , isSuperScript = False + , isSubScript = False + , underline = Nothing + , rStyle = Nothing + } + +elemToRunStyle :: NameSpaces -> Element -> RunStyle +elemToRunStyle ns element = + case findChild (QName "rPr" (lookup "w" ns) (Just "w")) element of + Just rPr -> + RunStyle + { + isBold = isJust $ findChild (QName "b" (lookup "w" ns) (Just "w")) rPr + , isItalic = isJust $ findChild (QName "i" (lookup "w" ns) (Just "w")) rPr + , isSmallCaps = isJust $ findChild (QName "smallCaps" (lookup "w" ns) (Just "w")) rPr + , isStrike = isJust $ findChild (QName "strike" (lookup "w" ns) (Just "w")) rPr + , isSuperScript = + (Just "superscript" == + (findChild (QName "vertAlign" (lookup "w" ns) (Just "w")) rPr >>= + findAttr (QName "val" (lookup "w" ns) (Just "w")))) + , isSubScript = + (Just "subscript" == + (findChild (QName "vertAlign" (lookup "w" ns) (Just "w")) rPr >>= + findAttr (QName "val" (lookup "w" ns) (Just "w")))) + , underline = + findChild (QName "u" (lookup "w" ns) (Just "w")) rPr >>= + findAttr (QName "val" (lookup "w" ns) (Just "w")) + , rStyle = + findChild (QName "rStyle" (lookup "w" ns) (Just "w")) rPr >>= + findAttr (QName "val" (lookup "w" ns) (Just "w")) + } + Nothing -> defaultRunStyle + +elemToRun :: NameSpaces -> Element -> Maybe Run +elemToRun ns element + | qName (elName element) == "r" && + qURI (elName element) == (lookup "w" ns) = + case + findChild (QName "footnoteReference" (lookup "w" ns) (Just "w")) element >>= + findAttr (QName "id" (lookup "w" ns) (Just "w")) + of + Just s -> Just $ Footnote s + Nothing -> + case + findChild (QName "endnoteReference" (lookup "w" ns) (Just "w")) element >>= + findAttr (QName "id" (lookup "w" ns) (Just "w")) + of + Just s -> Just $ Endnote s + Nothing -> Just $ + Run (elemToRunStyle ns element) + (elemToRunElems ns element) +elemToRun _ _ = Nothing + +elemToRunElem :: NameSpaces -> Element -> Maybe RunElem +elemToRunElem ns element + | qName (elName element) == "t" && + qURI (elName element) == (lookup "w" ns) = + Just $ TextRun (strContent element) + | qName (elName element) == "br" && + qURI (elName element) == (lookup "w" ns) = + Just $ LnBrk + | otherwise = Nothing + + +elemToRunElems :: NameSpaces -> Element -> [RunElem] +elemToRunElems ns element + | qName (elName element) == "r" && + qURI (elName element) == (lookup "w" ns) = + mapMaybe (elemToRunElem ns) (elChildren element) + | otherwise = [] + +elemToDrawing :: NameSpaces -> Element -> Maybe ParPart +elemToDrawing ns element + | qName (elName element) == "drawing" && + qURI (elName element) == (lookup "w" ns) = + let a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main" + in + findElement (QName "blip" (Just a_ns) (Just "a")) element + >>= findAttr (QName "embed" (lookup "r" ns) (Just "r")) + >>= (\s -> Just $ Drawing s) +elemToDrawing _ _ = Nothing + + +elemToParPart :: NameSpaces -> Element -> Maybe ParPart +elemToParPart ns element + | qName (elName element) == "r" && + qURI (elName element) == (lookup "w" ns) = + case findChild (QName "drawing" (lookup "w" ns) (Just "w")) element of + Just drawingElem -> elemToDrawing ns drawingElem + Nothing -> do + r <- elemToRun ns element + return $ PlainRun r +elemToParPart ns element + | qName (elName element) == "bookmarkStart" && + qURI (elName element) == (lookup "w" ns) = do + bmId <- findAttr (QName "id" (lookup "w" ns) (Just "w")) element + bmName <- findAttr (QName "name" (lookup "w" ns) (Just "w")) element + return $ BookMark bmId bmName +elemToParPart ns element + | qName (elName element) == "hyperlink" && + qURI (elName element) == (lookup "w" ns) = + let runs = map fromJust $ filter isJust $ map (elemToRun ns) + $ findChildren (QName "r" (lookup "w" ns) (Just "w")) element + in + case findAttr (QName "anchor" (lookup "w" ns) (Just "w")) element of + Just anchor -> + Just $ InternalHyperLink anchor runs + Nothing -> + case findAttr (QName "id" (lookup "r" ns) (Just "r")) element of + Just relId -> Just $ ExternalHyperLink relId runs + Nothing -> Nothing +elemToParPart _ _ = Nothing + +type Target = String +type Anchor = String +type BookMarkId = String +type RelId = String + diff --git a/tests/Tests/Readers/DocX.hs b/tests/Tests/Readers/DocX.hs deleted file mode 100644 index f4564ea1d..000000000 --- a/tests/Tests/Readers/DocX.hs +++ /dev/null @@ -1,68 +0,0 @@ -module Tests.Readers.DocX (tests) where - -import Text.Pandoc.Options -import Text.Pandoc.Readers.Native -import Text.Pandoc.Definition -import Tests.Helpers -import Test.Framework -import qualified Data.ByteString.Lazy as B -import Text.Pandoc.Readers.DocX - -compareOutput :: FilePath -> FilePath -> IO (Pandoc, Pandoc) -compareOutput docxFile nativeFile = do - df <- B.readFile docxFile - nf <- Prelude.readFile nativeFile - return $ (readDocX def df, readNative nf) - -testCompare' :: String -> FilePath -> FilePath -> IO Test -testCompare' name docxFile nativeFile = do - (dp, np) <- compareOutput docxFile nativeFile - return $ test id name (dp, np) - -testCompare :: String -> FilePath -> FilePath -> Test -testCompare name docxFile nativeFile = - buildTest $ testCompare' name docxFile nativeFile - - -tests :: [Test] -tests = [ testGroup "inlines" - [ testCompare - "font formatting" - "docx.inline_formatting.docx" - "docx.inline_formatting.native" - , testCompare - "hyperlinks" - "docx.links.docx" - "docx.links.native" - , testCompare - "inline image with reference output" - "docx.image.docx" - "docx.image_no_embed.native" - , testCompare - "handling unicode input" - "docx.unicode.docx" - "docx.unicode.native"] - , testGroup "blocks" - [ testCompare - "headers" - "docx.headers.docx" - "docx.headers.native" - , testCompare - "lists" - "docx.lists.docx" - "docx.lists.native" - , testCompare - "footnotes and endnotes" - "docx.notes.docx" - "docx.notes.native" - , testCompare - "blockquotes (parsing indent as blockquote)" - "docx.block_quotes.docx" - "docx.block_quotes_parse_indent.native" - , testCompare - "tables" - "docx.tables.docx" - "docx.tables.native" - ] - ] - diff --git a/tests/Tests/Readers/Docx.hs b/tests/Tests/Readers/Docx.hs new file mode 100644 index 000000000..0a963ddc6 --- /dev/null +++ b/tests/Tests/Readers/Docx.hs @@ -0,0 +1,68 @@ +module Tests.Readers.Docx (tests) where + +import Text.Pandoc.Options +import Text.Pandoc.Readers.Native +import Text.Pandoc.Definition +import Tests.Helpers +import Test.Framework +import qualified Data.ByteString.Lazy as B +import Text.Pandoc.Readers.Docx + +compareOutput :: FilePath -> FilePath -> IO (Pandoc, Pandoc) +compareOutput docxFile nativeFile = do + df <- B.readFile docxFile + nf <- Prelude.readFile nativeFile + return $ (readDocx def df, readNative nf) + +testCompare' :: String -> FilePath -> FilePath -> IO Test +testCompare' name docxFile nativeFile = do + (dp, np) <- compareOutput docxFile nativeFile + return $ test id name (dp, np) + +testCompare :: String -> FilePath -> FilePath -> Test +testCompare name docxFile nativeFile = + buildTest $ testCompare' name docxFile nativeFile + + +tests :: [Test] +tests = [ testGroup "inlines" + [ testCompare + "font formatting" + "docx.inline_formatting.docx" + "docx.inline_formatting.native" + , testCompare + "hyperlinks" + "docx.links.docx" + "docx.links.native" + , testCompare + "inline image with reference output" + "docx.image.docx" + "docx.image_no_embed.native" + , testCompare + "handling unicode input" + "docx.unicode.docx" + "docx.unicode.native"] + , testGroup "blocks" + [ testCompare + "headers" + "docx.headers.docx" + "docx.headers.native" + , testCompare + "lists" + "docx.lists.docx" + "docx.lists.native" + , testCompare + "footnotes and endnotes" + "docx.notes.docx" + "docx.notes.native" + , testCompare + "blockquotes (parsing indent as blockquote)" + "docx.block_quotes.docx" + "docx.block_quotes_parse_indent.native" + , testCompare + "tables" + "docx.tables.docx" + "docx.tables.native" + ] + ] + diff --git a/tests/test-pandoc.hs b/tests/test-pandoc.hs index 9f9d85147..c07a51ec5 100644 --- a/tests/test-pandoc.hs +++ b/tests/test-pandoc.hs @@ -9,7 +9,7 @@ import qualified Tests.Readers.LaTeX import qualified Tests.Readers.Markdown import qualified Tests.Readers.Org import qualified Tests.Readers.RST -import qualified Tests.Readers.DocX +import qualified Tests.Readers.Docx import qualified Tests.Writers.ConTeXt import qualified Tests.Writers.LaTeX import qualified Tests.Writers.HTML @@ -39,7 +39,7 @@ tests = [ testGroup "Old" Tests.Old.tests , testGroup "Markdown" Tests.Readers.Markdown.tests , testGroup "Org" Tests.Readers.Org.tests , testGroup "RST" Tests.Readers.RST.tests - , testGroup "DocX" Tests.Readers.DocX.tests + , testGroup "Docx" Tests.Readers.Docx.tests ] ] -- cgit v1.2.3 From 7d60c798bf12a93ca4d7f4d973c917ba0d5a96ff Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 16 Jun 2014 23:02:20 -0700 Subject: Fixed compiler warning. --- src/Text/Pandoc/Readers/Docx.hs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index df4be41ff..4035cde99 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -216,7 +216,9 @@ parPartsToInlines opts docx parparts = -- We're going to skip data-uri's for now. It should be an option, -- not mandatory. -- - --bottomUp (makeImagesSelfContained docx) $ + (if False -- TODO depend on option + then bottomUp (makeImagesSelfContained docx) + else id) $ bottomUp spanCorrect $ bottomUp spanTrim $ bottomUp spanReduce $ -- cgit v1.2.3 From fc291efad3430be0645e979e0279c93195012075 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 17 Jun 2014 00:38:55 -0700 Subject: LaTeX reader: Correctly handle table rows with too few cells. LaTeX seems to treat them as if they have empty cells at the end. Closes #241. --- src/Text/Pandoc/Readers/LaTeX.hs | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 3c4d4ee52..97bfaa455 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -1255,10 +1255,14 @@ parseTableRow :: Int -- ^ number of columns parseTableRow cols = try $ do let tableCellInline = notFollowedBy (amp <|> lbreak) >> inline let tableCell = (plain . trimInlines . mconcat) <$> many tableCellInline - cells' <- sepBy tableCell amp - guard $ length cells' == cols + cells' <- sepBy1 tableCell amp + let numcells = length cells' + guard $ numcells <= cols && numcells >= 1 + guard $ cells' /= [mempty] + -- note: a & b in a three-column table leaves an empty 3rd cell: + let cells'' = cells' ++ replicate (cols - numcells) mempty spaces - return cells' + return cells'' simpTable :: LP Blocks simpTable = try $ do -- cgit v1.2.3 From 59272e4d99668ddc48f07eb761979c2f49cf76d5 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 17 Jun 2014 12:14:02 -0700 Subject: DocBook reader: Support . Closes #1236. Note, this is a bit of a kludge, to work around the fact that xml-light doesn't parse `` correctly. We preprocess the input, replacing that instruction with `
`, and then parse that as a line break. Other XML instructions are simply removed from the input stream. --- src/Text/Pandoc/Readers/DocBook.hs | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index d58f8b3c5..cf1d5132e 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -492,7 +492,7 @@ List of all DocBook tags, with [x] indicating implemented, anything else [ ] xref - A cross reference to another part of the document [ ] year - The year of publication of a document - +[x] ?asciidoc-br? - line break from asciidoc docbook output -} type DB = State DBState @@ -507,7 +507,7 @@ data DBState = DBState{ dbSectionLevel :: Int readDocBook :: ReaderOptions -> String -> Pandoc readDocBook _ inp = Pandoc (dbMeta st') (toList $ mconcat bs) - where (bs, st') = runState (mapM parseBlock $ normalizeTree $ parseXML inp) + where (bs, st') = runState (mapM parseBlock $ normalizeTree $ parseXML inp') DBState{ dbSectionLevel = 0 , dbQuoteType = DoubleQuote , dbMeta = mempty @@ -515,6 +515,17 @@ readDocBook _ inp = Pandoc (dbMeta st') (toList $ mconcat bs) , dbBook = False , dbFigureTitle = mempty } + inp' = handleInstructions inp + +-- We treat specially (issue #1236), converting it +-- to
, since xml-light doesn't parse the instruction correctly. +-- Other xml instructions are simply removed from the input stream. +handleInstructions :: String -> String +handleInstructions ('<':'?':'a':'s':'c':'i':'i':'d':'o':'c':'-':'b':'r':'?':'>':xs) = '<':'b':'r':'/':'>': handleInstructions xs +handleInstructions xs = case break (=='<') xs of + (ys, []) -> ys + ([], '<':zs) -> '<' : handleInstructions zs + (ys, zs) -> ys ++ handleInstructions zs getFigure :: Element -> DB Blocks getFigure e = do @@ -920,6 +931,10 @@ parseInline (Elem e) = "footnote" -> (note . mconcat) <$> (mapM parseBlock $ elContent e) "title" -> return mempty "affiliation" -> return mempty + -- Note: this isn't a real docbook tag; it's what we convert + -- to in handleInstructions, above. A kludge to + -- work around xml-light's inability to parse an instruction. + "br" -> return linebreak _ -> innerInlines where innerInlines = (trimInlines . mconcat) <$> (mapM parseInline $ elContent e) -- cgit v1.2.3 From b371e83d7362c6ffc6e4dd98b7288ddd7f23f46a Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 17 Jun 2014 15:15:56 -0700 Subject: Highlighting: Let .numberLines work even if no language given. Closes #1287, jgm/highlighting-kate#40. --- src/Text/Pandoc/Highlighting.hs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/Text/Pandoc/Highlighting.hs b/src/Text/Pandoc/Highlighting.hs index 2e7a9f648..7f975d4c6 100644 --- a/src/Text/Pandoc/Highlighting.hs +++ b/src/Text/Pandoc/Highlighting.hs @@ -74,7 +74,12 @@ highlight formatter (_, classes, keyvals) rawCode = ["number","numberLines", "number-lines"]) classes } lcclasses = map (map toLower) classes in case find (`elem` lcLanguages) lcclasses of - Nothing -> Nothing + Nothing + | numberLines fmtOpts -> Just + $ formatter fmtOpts{ codeClasses = [], + containerClasses = classes } + $ map (\ln -> [(NormalTok, ln)]) $ lines rawCode + | otherwise -> Nothing Just language -> Just $ formatter fmtOpts{ codeClasses = [language], containerClasses = classes } -- cgit v1.2.3 From ab390a10ec3bc42c71d8746152acbf3ee7b1595b Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 18 Jun 2014 11:33:09 -0700 Subject: Removed old haddock reader code. Add dependency on haddock-library. This also removes the dependency on alex and happy. --- INSTALL | 6 +- pandoc.cabal | 8 +- src/Text/Pandoc/Readers/Haddock.hs | 32 ++++-- src/Text/Pandoc/Readers/Haddock/Lex.x | 171 ------------------------------ src/Text/Pandoc/Readers/Haddock/Parse.y | 178 -------------------------------- 5 files changed, 25 insertions(+), 370 deletions(-) delete mode 100644 src/Text/Pandoc/Readers/Haddock/Lex.x delete mode 100644 src/Text/Pandoc/Readers/Haddock/Parse.y diff --git a/INSTALL b/INSTALL index f3366e103..eb9b2b030 100644 --- a/INSTALL +++ b/INSTALL @@ -12,11 +12,7 @@ Quick install ------------- 1. Install the [Haskell platform]. This will give you [GHC] and - the [cabal-install] build tool, as well as `alex` and `happy`. - If you do not use the Haskell platform, you'll need to install - `alex` and `happy` separately: - - cabal install alex happy + the [cabal-install] build tool. 2. Update your package database: diff --git a/pandoc.cabal b/pandoc.cabal index 5898af5ad..f9938bede 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -255,8 +255,8 @@ Library vector >= 0.10 && < 0.11, hslua >= 0.3 && < 0.4, binary >= 0.5 && < 0.8, - SHA >= 1.6 && < 1.7 - Build-Tools: alex, happy + SHA >= 1.6 && < 1.7, + haddock-library >= 1.0 && < 1.1 if flag(https) Build-Depends: http-client >= 0.3.2 && < 0.4, http-client-tls >= 0.2 && < 0.3, @@ -322,9 +322,7 @@ Library Text.Pandoc.XML, Text.Pandoc.SelfContained, Text.Pandoc.Process - Other-Modules: Text.Pandoc.Readers.Haddock.Lex, - Text.Pandoc.Readers.Haddock.Parse, - Text.Pandoc.Readers.Docx.Lists, + Other-Modules: Text.Pandoc.Readers.Docx.Lists, Text.Pandoc.Readers.Docx.Parse, Text.Pandoc.Writers.Shared, Text.Pandoc.Asciify, diff --git a/src/Text/Pandoc/Readers/Haddock.hs b/src/Text/Pandoc/Readers/Haddock.hs index 0e74406ef..65d8de98f 100644 --- a/src/Text/Pandoc/Readers/Haddock.hs +++ b/src/Text/Pandoc/Readers/Haddock.hs @@ -3,7 +3,8 @@ Copyright : Copyright (C) 2013 David Lazar License : GNU GPL, version 2 or above - Maintainer : David Lazar + Maintainer : David Lazar , + John MacFarlane Stability : alpha Conversion of Haddock markup to 'Pandoc' document. @@ -12,22 +13,31 @@ module Text.Pandoc.Readers.Haddock ( readHaddock ) where -import Text.Pandoc.Builder +import Text.Pandoc.Builder (Blocks, Inlines) +import qualified Text.Pandoc.Builder as B +import Data.Monoid +import Text.Pandoc.Definition import Text.Pandoc.Options -import Text.Pandoc.Readers.Haddock.Lex -import Text.Pandoc.Readers.Haddock.Parse +import Documentation.Haddock.Parser (parseParas, Identifier) +import Documentation.Haddock.Types -- | Parse Haddock markup and return a 'Pandoc' document. readHaddock :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse -> Pandoc -readHaddock _ s = Pandoc nullMeta blocks - where - blocks = case parseParas (tokenise s (0,0)) of - Left [] -> error "parse failure" - Left (tok:_) -> error $ "parse failure " ++ pos (tokenPos tok) - where pos (l, c) = "(line " ++ show l ++ ", column " ++ show c ++ ")" - Right x -> mergeLists (toList x) +readHaddock _ = B.doc . docHToBlocks . parseParas + +docHToBlocks :: DocH mod Identifier -> Blocks +docHToBlocks d = + case d of + DocAppend d1 d2 -> mappend (docHToBlocks d1) (docHToBlocks d2) + DocParagraph ils -> B.para $ docHToInlines ils + +docHToInlines :: DocH mod Identifier -> Inlines +docHToInlines d = + case d of + DocAppend d1 d2 -> mappend (docHToInlines d1) (docHToInlines d2) + DocString s -> B.text s -- similar to 'docAppend' in Haddock.Doc mergeLists :: [Block] -> [Block] diff --git a/src/Text/Pandoc/Readers/Haddock/Lex.x b/src/Text/Pandoc/Readers/Haddock/Lex.x deleted file mode 100644 index 120e96ebf..000000000 --- a/src/Text/Pandoc/Readers/Haddock/Lex.x +++ /dev/null @@ -1,171 +0,0 @@ --- --- Haddock - A Haskell Documentation Tool --- --- (c) Simon Marlow 2002 --- --- This file was modified and integrated into GHC by David Waern 2006. --- Then moved back into Haddock by Isaac Dupree in 2009 :-) --- Then copied into Pandoc by David Lazar in 2013 :-D - -{ -{-# LANGUAGE BangPatterns #-} -- Generated by Alex -{-# OPTIONS -Wwarn -w #-} --- The above warning supression flag is a temporary kludge. --- While working on this module you are encouraged to remove it and fix --- any warnings in the module. See --- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings --- for details - -module Text.Pandoc.Readers.Haddock.Lex ( - Token(..), - LToken, - tokenise, - tokenPos - ) where - -import Data.Char -import Numeric (readHex) -} - -%wrapper "posn" - -$ws = $white # \n -$digit = [0-9] -$hexdigit = [0-9a-fA-F] -$special = [\"\@] -$alphanum = [A-Za-z0-9] -$ident = [$alphanum \'\_\.\!\#\$\%\&\*\+\/\<\=\>\?\@\\\\\^\|\-\~\:] - -:- - --- beginning of a paragraph -<0,para> { - $ws* \n ; - $ws* \> { begin birdtrack } - $ws* prop \> .* \n { strtoken TokProperty `andBegin` property} - $ws* \>\>\> { strtoken TokExamplePrompt `andBegin` exampleexpr } - $ws* [\*\-] { token TokBullet `andBegin` string } - $ws* \[ { token TokDefStart `andBegin` def } - $ws* \( $digit+ \) { token TokNumber `andBegin` string } - $ws* $digit+ \. { token TokNumber `andBegin` string } - $ws* { begin string } -} - --- beginning of a line - { - $ws* \> { begin birdtrack } - $ws* \>\>\> { strtoken TokExamplePrompt `andBegin` exampleexpr } - - $ws* \n { token TokPara `andBegin` para } - -- ^ Here, we really want to be able to say - -- $ws* (\n | ) { token TokPara `andBegin` para} - -- because otherwise a trailing line of whitespace will result in - -- a spurious TokString at the end of a docstring. We don't have , - -- though (NOW I realise what it was for :-). To get around this, we always - -- append \n to the end of a docstring. - - () { begin string } -} - - .* \n? { strtokenNL TokBirdTrack `andBegin` line } - - () { token TokPara `andBegin` para } - - { - $ws* \n { token TokPara `andBegin` para } - $ws* \>\>\> { strtoken TokExamplePrompt `andBegin` exampleexpr } - () { begin exampleresult } -} - - .* \n { strtokenNL TokExampleExpression `andBegin` example } - - .* \n { strtokenNL TokExampleResult `andBegin` example } - - { - $special { strtoken $ \s -> TokSpecial (head s) } - \<\< [^\>]* \>\> { strtoken $ \s -> TokPic (init $ init $ tail $ tail s) } - \< [^\>]* \> { strtoken $ \s -> TokURL (init (tail s)) } - \# [^\#]* \# { strtoken $ \s -> TokAName (init (tail s)) } - \/ [^\/]* \/ { strtoken $ \s -> TokEmphasis (init (tail s)) } - [\'\`] $ident+ [\'\`] { strtoken $ \s -> TokIdent (init (tail s)) } - \\ . { strtoken (TokString . tail) } - "&#" $digit+ \; { strtoken $ \s -> TokString [chr (read (init (drop 2 s)))] } - "&#" [xX] $hexdigit+ \; - { strtoken $ \s -> case readHex (init (drop 3 s)) of [(n,_)] -> TokString [chr n] } - -- allow special characters through if they don't fit one of the previous - -- patterns. - [\/\'\`\<\#\&\\] { strtoken TokString } - [^ $special \/ \< \# \n \'\` \& \\ \]]* \n { strtokenNL TokString `andBegin` line } - [^ $special \/ \< \# \n \'\` \& \\ \]]+ { strtoken TokString } -} - - { - \] { token TokDefEnd `andBegin` string } -} - --- ']' doesn't have any special meaning outside of the [...] at the beginning --- of a definition paragraph. - { - \] { strtoken TokString } -} - -{ --- | A located token -type LToken = (Token, AlexPosn) - -data Token - = TokPara - | TokNumber - | TokBullet - | TokDefStart - | TokDefEnd - | TokSpecial Char - | TokIdent String - | TokString String - | TokURL String - | TokPic String - | TokEmphasis String - | TokAName String - | TokBirdTrack String - | TokProperty String - | TokExamplePrompt String - | TokExampleExpression String - | TokExampleResult String - deriving Show - -tokenPos :: LToken -> (Int, Int) -tokenPos t = let AlexPn _ line col = snd t in (line, col) - -type StartCode = Int -type Action = AlexPosn -> String -> StartCode -> (StartCode -> [LToken]) -> [LToken] - -tokenise :: String -> (Int, Int) -> [LToken] -tokenise str (line, col) = go (posn,'\n',[],eofHack str) para - where posn = AlexPn 0 line col - go inp@(pos,_,_,str) sc = - case alexScan inp sc of - AlexEOF -> [] - AlexError _ -> [] - AlexSkip inp' len -> go inp' sc - AlexToken inp' len act -> act pos (take len str) sc (\sc -> go inp' sc) - --- NB. we add a final \n to the string, (see comment in the beginning of line --- production above). -eofHack str = str++"\n" - -andBegin :: Action -> StartCode -> Action -andBegin act new_sc = \pos str _ cont -> act pos str new_sc cont - -token :: Token -> Action -token t = \pos _ sc cont -> (t, pos) : cont sc - -strtoken, strtokenNL :: (String -> Token) -> Action -strtoken t = \pos str sc cont -> (t str, pos) : cont sc -strtokenNL t = \pos str sc cont -> (t (filter (/= '\r') str), pos) : cont sc --- ^ We only want LF line endings in our internal doc string format, so we --- filter out all CRs. - -begin :: StartCode -> Action -begin sc = \_ _ _ cont -> cont sc - -} diff --git a/src/Text/Pandoc/Readers/Haddock/Parse.y b/src/Text/Pandoc/Readers/Haddock/Parse.y deleted file mode 100644 index 9c2bbc8a9..000000000 --- a/src/Text/Pandoc/Readers/Haddock/Parse.y +++ /dev/null @@ -1,178 +0,0 @@ --- This code was copied from the 'haddock' package, modified, and integrated --- into Pandoc by David Lazar. -{ -{-# LANGUAGE BangPatterns #-} -- required for versions of Happy before 1.18.6 --- The above warning supression flag is a temporary kludge. --- While working on this module you are encouraged to remove it and fix --- any warnings in the module. See --- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings --- for details - -module Text.Pandoc.Readers.Haddock.Parse (parseString, parseParas) where - -import Text.Pandoc.Readers.Haddock.Lex -import Text.Pandoc.Builder -import Text.Pandoc.Shared (trim, trimr) -import Data.Generics (everywhere, mkT) -import Data.Char (isSpace) -import Data.Maybe (fromMaybe) -import Data.List (stripPrefix, intersperse) -import Data.Monoid (mempty, mconcat) -} - -%expect 0 - -%tokentype { LToken } - -%token - '/' { (TokSpecial '/',_) } - '@' { (TokSpecial '@',_) } - '[' { (TokDefStart,_) } - ']' { (TokDefEnd,_) } - DQUO { (TokSpecial '\"',_) } - URL { (TokURL $$,_) } - PIC { (TokPic $$,_) } - ANAME { (TokAName $$,_) } - '/../' { (TokEmphasis $$,_) } - '-' { (TokBullet,_) } - '(n)' { (TokNumber,_) } - '>..' { (TokBirdTrack $$,_) } - PROP { (TokProperty $$,_) } - PROMPT { (TokExamplePrompt $$,_) } - RESULT { (TokExampleResult $$,_) } - EXP { (TokExampleExpression $$,_) } - IDENT { (TokIdent $$,_) } - PARA { (TokPara,_) } - STRING { (TokString $$,_) } - -%monad { Either [LToken] } - -%name parseParas doc -%name parseString seq - -%% - -doc :: { Blocks } - : apara PARA doc { $1 <> $3 } - | PARA doc { $2 } - | apara { $1 } - | {- empty -} { mempty } - -apara :: { Blocks } - : ulpara { bulletList [$1] } - | olpara { orderedList [$1] } - | defpara { definitionList [$1] } - | para { $1 } - -ulpara :: { Blocks } - : '-' para { $2 } - -olpara :: { Blocks } - : '(n)' para { $2 } - -defpara :: { (Inlines, [Blocks]) } - : '[' seq ']' seq { (trimInlines $2, [plain $ trimInlines $4]) } - -para :: { Blocks } - : seq { para' $1 } - | codepara { codeBlockWith ([], ["haskell"], []) $1 } - | property { $1 } - | examples { $1 } - -codepara :: { String } - : '>..' codepara { $1 ++ $2 } - | '>..' { $1 } - -property :: { Blocks } - : PROP { makeProperty $1 } - -examples :: { Blocks } - : example examples { $1 <> $2 } - | example { $1 } - -example :: { Blocks } - : PROMPT EXP result { makeExample $1 $2 (lines $3) } - | PROMPT EXP { makeExample $1 $2 [] } - -result :: { String } - : RESULT result { $1 ++ $2 } - | RESULT { $1 } - -seq :: { Inlines } - : elem seq { $1 <> $2 } - | elem { $1 } - -elem :: { Inlines } - : elem1 { $1 } - | '@' seq1 '@' { monospace $2 } - -seq1 :: { Inlines } - : PARA seq1 { linebreak <> $2 } - | elem1 seq1 { $1 <> $2 } - | elem1 { $1 } - -elem1 :: { Inlines } - : STRING { text $1 } - | '/../' { emph (str $1) } - | URL { makeHyperlink $1 } - | PIC { image $1 $1 mempty } - | ANAME { mempty } -- TODO - | IDENT { codeWith ([], ["haskell"], []) $1 } - | DQUO strings DQUO { codeWith ([], ["haskell"], []) $2 } - -strings :: { String } - : STRING { $1 } - | STRING strings { $1 ++ $2 } - -{ -happyError :: [LToken] -> Either [LToken] a -happyError toks = Left toks - -para' :: Inlines -> Blocks -para' = para . trimInlines - -monospace :: Inlines -> Inlines -monospace = everywhere (mkT go) - where - go (Str s) = Code nullAttr s - go x = x - --- | Create a `Hyperlink` from given string. --- --- A hyperlink consists of a URL and an optional label. The label is separated --- from the url by one or more whitespace characters. -makeHyperlink :: String -> Inlines -makeHyperlink input = case break isSpace $ trim input of - (url, "") -> link url url (str url) - (url, lb) -> link url url (trimInlines $ text lb) - -makeProperty :: String -> Blocks -makeProperty s = case trim s of - 'p':'r':'o':'p':'>':xs -> - codeBlockWith ([], ["property"], []) (dropWhile isSpace xs) - xs -> - error $ "makeProperty: invalid input " ++ show xs - --- | Create an 'Example', stripping superfluous characters as appropriate -makeExample :: String -> String -> [String] -> Blocks -makeExample prompt expression result = - para $ codeWith ([], ["haskell","expr"], []) (trim expression) - <> linebreak - <> (mconcat $ intersperse linebreak $ map coder result') - where - -- 1. drop trailing whitespace from the prompt, remember the prefix - prefix = takeWhile isSpace prompt - - -- 2. drop, if possible, the exact same sequence of whitespace - -- characters from each result line - -- - -- 3. interpret lines that only contain the string "" as an - -- empty line - result' = map (substituteBlankLine . tryStripPrefix prefix) result - where - tryStripPrefix xs ys = fromMaybe ys $ stripPrefix xs ys - - substituteBlankLine "" = "" - substituteBlankLine line = line - coder = codeWith ([], ["result"], []) -} -- cgit v1.2.3 From a78d8b84ca16910bb0e2f5a0ffe5334d642943b2 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 18 Jun 2014 11:34:06 -0700 Subject: Travis: don't need alex, happy. --- .travis.yml | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/.travis.yml b/.travis.yml index 7f24986d2..1ca7a1228 100644 --- a/.travis.yml +++ b/.travis.yml @@ -12,16 +12,11 @@ env: before_install: - travis_retry sudo add-apt-repository -y ppa:hvr/ghc - travis_retry sudo apt-get update - - travis_retry sudo apt-get install cabal-install-1.18 ghc-$GHCVER alex happy + - travis_retry sudo apt-get install cabal-install-1.18 ghc-$GHCVER - export PATH=/opt/ghc/$GHCVER/bin:$PATH install: - cabal-1.18 update - - | - if [ $GHCVER = "head" ] || [ $GHCVER = "7.8.2" ]; then - cabal-1.18 install happy alex - export PATH=$HOME/.cabal/bin:$PATH - fi # - git clone https://github.com/jgm/pandoc-types && cd pandoc-types && cabal-1.18 install && cd .. - cabal-1.18 install --only-dependencies --enable-tests -- cgit v1.2.3 From 9fc5c8d7af31a47d8e3e8ea6dbb541178ec9ca66 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 18 Jun 2014 12:27:27 -0700 Subject: Rewrote haddock reader to use haddock-library. This brings pandoc's rendering of haddock markup in line with the new haddock. Note that we preserve line breaks in `@` code blocks, unlike the earlier version. Modified tests pass. More tests would be good. --- src/Text/Pandoc/Readers/Haddock.hs | 124 ++++++++++++++++++++++++++++++------- tests/haddock-reader.haddock | 20 +++--- tests/haddock-reader.native | 6 +- 3 files changed, 115 insertions(+), 35 deletions(-) diff --git a/src/Text/Pandoc/Readers/Haddock.hs b/src/Text/Pandoc/Readers/Haddock.hs index 65d8de98f..a512f969d 100644 --- a/src/Text/Pandoc/Readers/Haddock.hs +++ b/src/Text/Pandoc/Readers/Haddock.hs @@ -15,10 +15,13 @@ module Text.Pandoc.Readers.Haddock import Text.Pandoc.Builder (Blocks, Inlines) import qualified Text.Pandoc.Builder as B +import Text.Pandoc.Shared (trim, splitBy) import Data.Monoid +import Data.List (intersperse, stripPrefix) +import Data.Maybe (fromMaybe) import Text.Pandoc.Definition import Text.Pandoc.Options -import Documentation.Haddock.Parser (parseParas, Identifier) +import Documentation.Haddock.Parser import Documentation.Haddock.Types -- | Parse Haddock markup and return a 'Pandoc' document. @@ -27,25 +30,102 @@ readHaddock :: ReaderOptions -- ^ Reader options -> Pandoc readHaddock _ = B.doc . docHToBlocks . parseParas -docHToBlocks :: DocH mod Identifier -> Blocks -docHToBlocks d = - case d of +docHToBlocks :: DocH String Identifier -> Blocks +docHToBlocks d' = + case d' of + DocEmpty -> mempty DocAppend d1 d2 -> mappend (docHToBlocks d1) (docHToBlocks d2) - DocParagraph ils -> B.para $ docHToInlines ils - -docHToInlines :: DocH mod Identifier -> Inlines -docHToInlines d = - case d of - DocAppend d1 d2 -> mappend (docHToInlines d1) (docHToInlines d2) - DocString s -> B.text s - --- similar to 'docAppend' in Haddock.Doc -mergeLists :: [Block] -> [Block] -mergeLists (BulletList xs : BulletList ys : blocks) - = mergeLists (BulletList (xs ++ ys) : blocks) -mergeLists (OrderedList _ xs : OrderedList a ys : blocks) - = mergeLists (OrderedList a (xs ++ ys) : blocks) -mergeLists (DefinitionList xs : DefinitionList ys : blocks) - = mergeLists (DefinitionList (xs ++ ys) : blocks) -mergeLists (x : blocks) = x : mergeLists blocks -mergeLists [] = [] + DocString _ -> inlineFallback + DocParagraph ils -> B.para $ docHToInlines False ils + DocIdentifier _ -> inlineFallback + DocIdentifierUnchecked _ -> inlineFallback + DocModule s -> B.plain $ docHToInlines False $ DocModule s + DocWarning _ -> mempty -- TODO + DocEmphasis _ -> inlineFallback + DocMonospaced _ -> inlineFallback + DocBold _ -> inlineFallback + DocHeader h -> B.header (headerLevel h) + (docHToInlines False $ headerTitle h) + DocUnorderedList items -> B.bulletList (map docHToBlocks items) + DocOrderedList items -> B.orderedList (map docHToBlocks items) + DocDefList items -> B.definitionList (map (\(d,t) -> + (docHToInlines False d, + [consolidatePlains $ docHToBlocks t])) items) + DocCodeBlock (DocString s) -> B.codeBlockWith ("",["haskell"],[]) s + DocCodeBlock d -> B.para $ docHToInlines True d + DocHyperlink _ -> inlineFallback + DocPic _ -> inlineFallback + DocAName _ -> inlineFallback + DocProperty s -> B.codeBlockWith ("",["property","haskell"],[]) (trim s) + DocExamples es -> mconcat $ map (\e -> + makeExample ">>>" (exampleExpression e) (exampleResult e)) es + + where inlineFallback = B.plain $ docHToInlines False d' + consolidatePlains = B.fromList . consolidatePlains' . B.toList + consolidatePlains' zs@(Plain _ : _) = + let (xs, ys) = span isPlain zs in + Plain (concatMap extractContents xs) : consolidatePlains' ys + consolidatePlains' (x : xs) = x : consolidatePlains' xs + consolidatePlains' [] = [] + isPlain (Plain _) = True + isPlain _ = False + extractContents (Plain xs) = xs + extractContents _ = [] + +docHToInlines :: Bool -> DocH String Identifier -> Inlines +docHToInlines isCode d' = + case d' of + DocEmpty -> mempty + DocAppend d1 d2 -> mappend (docHToInlines isCode d1) + (docHToInlines isCode d2) + DocString s + | isCode -> mconcat $ intersperse B.linebreak + $ map B.code $ splitBy (=='\n') s + | otherwise -> B.text s + DocParagraph _ -> mempty + DocIdentifier (_,s,_) -> B.codeWith ("",["haskell"],[]) s + DocIdentifierUnchecked s -> B.codeWith ("",["haskell"],[]) s + DocModule s -> B.codeWith ("",["haskell"],[]) s + DocWarning _ -> mempty -- TODO + DocEmphasis d -> B.emph (docHToInlines isCode d) + DocMonospaced (DocString s) -> B.code s + DocMonospaced d -> docHToInlines True d + DocBold d -> B.strong (docHToInlines isCode d) + DocHeader _ -> mempty + DocUnorderedList _ -> mempty + DocOrderedList _ -> mempty + DocDefList _ -> mempty + DocCodeBlock _ -> mempty + DocHyperlink h -> B.link (hyperlinkUrl h) (hyperlinkUrl h) + (maybe (B.text $ hyperlinkUrl h) B.text $ hyperlinkLabel h) + DocPic p -> B.image (pictureUri p) (fromMaybe (pictureUri p) $ pictureTitle p) + (maybe mempty B.text $ pictureTitle p) + DocAName s -> B.spanWith (s,["anchor"],[]) mempty + DocProperty _ -> mempty + DocExamples _ -> mempty + +-- | Create an 'Example', stripping superfluous characters as appropriate +makeExample :: String -> String -> [String] -> Blocks +makeExample prompt expression result = + B.para $ B.codeWith ("",["prompt"],[]) prompt + <> B.space + <> B.codeWith ([], ["haskell","expr"], []) (trim expression) + <> B.linebreak + <> (mconcat $ intersperse B.linebreak $ map coder result') + where + -- 1. drop trailing whitespace from the prompt, remember the prefix + prefix = takeWhile (`elem` " \t") prompt + + -- 2. drop, if possible, the exact same sequence of whitespace + -- characters from each result line + -- + -- 3. interpret lines that only contain the string "" as an + -- empty line + result' = map (substituteBlankLine . tryStripPrefix prefix) result + where + tryStripPrefix xs ys = fromMaybe ys $ stripPrefix xs ys + + substituteBlankLine "" = "" + substituteBlankLine line = line + coder = B.codeWith ([], ["result"], []) + diff --git a/tests/haddock-reader.haddock b/tests/haddock-reader.haddock index c4f6d6c36..c3ef0c9fc 100644 --- a/tests/haddock-reader.haddock +++ b/tests/haddock-reader.haddock @@ -18,10 +18,10 @@ This is a code block: This is another code block: @ - f x = x + x. - The \@...\@ code block /interprets markup normally/. - "Module.Foo" - \"Hello World\" +f x = x + x. +The \@...\@ code block /interprets markup normally/. +"Module.Foo" +\"Hello World\" @ Haddock supports REPL examples: @@ -42,21 +42,21 @@ This is a reference to the "Foo" module. This is a bulleted list: - * first item + * first item - * second item + * second item This is an enumerated list: - (1) first item + (1) first item - 2. second item + 2. second item This is a definition list: - [@foo@] The description of @foo@. + [@foo@] The description of @foo@. - [@bar@] The description of @bar@. + [@bar@] The description of @bar@. Here is a link: diff --git a/tests/haddock-reader.native b/tests/haddock-reader.native index 877719b50..8edb0b29a 100644 --- a/tests/haddock-reader.native +++ b/tests/haddock-reader.native @@ -4,13 +4,13 @@ Pandoc (Meta {unMeta = fromList []}) ,Para [Str "*",Space,Str "This",Space,Str "is",Space,Str "a",Space,Str "paragraph,",Space,Str "not",Space,Str "a",Space,Str "list",Space,Str "item.",Space,Str ">",Space,Str "This",Space,Str "sentence",Space,Str "is",Space,Str "not",Space,Str "code.",Space,Str ">>>",Space,Str "This",Space,Str "is",Space,Str "not",Space,Str "an",Space,Str "example."] ,Para [Str "The",Space,Str "references",Space,Str "\955,",Space,Str "\955",Space,Str "and",Space,Str "\955",Space,Str "all",Space,Str "represent",Space,Str "the",Space,Str "lower-case",Space,Str "letter",Space,Str "lambda."] ,Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "code",Space,Str "block:"] -,CodeBlock ("",["haskell"],[]) " map :: (a -> b) -> [a] -> [b]\n map _ [] = []\n map f (x:xs) = f x : map f xs\n" +,CodeBlock ("",["haskell"],[]) "map :: (a -> b) -> [a] -> [b]\nmap _ [] = []\nmap f (x:xs) = f x : map f xs" ,Para [Str "This",Space,Str "is",Space,Str "another",Space,Str "code",Space,Str "block:"] -,Para [Code ("",[],[]) "f",Space,Code ("",[],[]) "x",Space,Code ("",[],[]) "=",Space,Code ("",[],[]) "x",Space,Code ("",[],[]) "+",Space,Code ("",[],[]) "x.",Space,Code ("",[],[]) "The",Space,Code ("",[],[]) "@...@",Space,Code ("",[],[]) "code",Space,Code ("",[],[]) "block",Space,Emph [Code ("",[],[]) "interprets markup normally"],Code ("",[],[]) ".",Space,Code ("",["haskell"],[]) "Module.Foo",Space,Code ("",[],[]) "\"Hello",Space,Code ("",[],[]) "World\""] +,Para [Code ("",[],[]) "f x = x + x.",LineBreak,Code ("",[],[]) "The @...@ code block ",Emph [Code ("",[],[]) "interprets markup normally"],Code ("",[],[]) ".",Code ("",["haskell"],[]) "Module.Foo",Code ("",[],[]) "",LineBreak,Code ("",[],[]) "\"Hello World\""] ,Para [Str "Haddock",Space,Str "supports",Space,Str "REPL",Space,Str "examples:"] ,Para [Code ("",["haskell","expr"],[]) "fib 10",LineBreak,Code ("",["result"],[]) "55"] ,Para [Code ("",["haskell","expr"],[]) "putStrLn \"foo\\nbar\"",LineBreak,Code ("",["result"],[]) "foo",LineBreak,Code ("",["result"],[]) "bar"] -,Para [Str "That",Space,Str "was",Space,Emph [Str "really cool"],Str "!",Space,Str "I",Space,Str "had",Space,Str "no",Space,Str "idea",Space,Code ("",[],[]) "fib",Space,Code ("",[],[]) "10",Space,Code ("",[],[]) "=",Space,Code ("",[],[]) "55",Str "."] +,Para [Str "That",Space,Str "was",Space,Emph [Str "really",Space,Str "cool"],Str "!",Space,Str "I",Space,Str "had",Space,Str "no",Space,Str "idea",Space,Code ("",[],[]) "fib 10 = 55",Str "."] ,Para [Str "This",Space,Str "module",Space,Str "defines",Space,Str "the",Space,Str "type",Space,Code ("",["haskell"],[]) "T",Str ".",Space,Str "The",Space,Str "identifier",Space,Code ("",["haskell"],[]) "M.T",Space,Str "is",Space,Str "not",Space,Str "in",Space,Str "scope",Space,Str "I",Space,Str "don't",Space,Str "have",Space,Str "to",Space,Str "escape",Space,Str "my",Space,Str "apostrophes;",Space,Str "great,",Space,Str "isn't",Space,Str "it?",Space,Str "This",Space,Str "is",Space,Str "a",Space,Str "reference",Space,Str "to",Space,Str "the",Space,Code ("",["haskell"],[]) "Foo",Space,Str "module."] ,Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "bulleted",Space,Str "list:"] ,BulletList -- cgit v1.2.3 From 35e57db5c292957e74c24eb2cee63928c7865cc6 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 18 Jun 2014 15:32:13 -0700 Subject: Finished first draft of Haddock writer. --- README | 12 +- pandoc.cabal | 7 +- src/Text/Pandoc.hs | 3 + src/Text/Pandoc/Readers/Haddock.hs | 13 +- src/Text/Pandoc/Writers/Haddock.hs | 357 +++++++++++++++++++++++++++++++++++++ 5 files changed, 381 insertions(+), 11 deletions(-) create mode 100644 src/Text/Pandoc/Writers/Haddock.hs diff --git a/README b/README index 1883ecd57..122db23ec 100644 --- a/README +++ b/README @@ -18,10 +18,10 @@ Org-mode], [DocBook], and [Word docx]; and it can write plain text, [markdown], [reStructuredText], [XHTML], [HTML 5], [LaTeX] (including [beamer] slide shows), [ConTeXt], [RTF], [OPML], [DocBook], [OpenDocument], [ODT], [Word docx], [GNU Texinfo], [MediaWiki markup], -[EPUB] (v2 or v3), [FictionBook2], [Textile], [groff man] pages, -[Emacs Org-Mode], [AsciiDoc], [InDesign ICML], and [Slidy], -[Slideous], [DZSlides], [reveal.js] or [S5] HTML slide shows. It can -also produce [PDF] output on systems where LaTeX is installed. +[Haddock markup], [EPUB] (v2 or v3), [FictionBook2], [Textile], +[groff man] pages, [Emacs Org-Mode], [AsciiDoc], [InDesign ICML], +and [Slidy], [Slideous], [DZSlides], [reveal.js] or [S5] HTML slide shows. +It can also produce [PDF] output on systems where LaTeX is installed. Pandoc's enhanced version of markdown includes syntax for footnotes, tables, flexible ordered lists, definition lists, fenced code blocks, @@ -169,8 +169,8 @@ General options `context` (ConTeXt), `man` (groff man), `mediawiki` (MediaWiki markup), `textile` (Textile), `org` (Emacs Org-Mode), `texinfo` (GNU Texinfo), `opml` (OPML), `docbook` (DocBook), `opendocument` (OpenDocument), `odt` - (OpenOffice text document), `docx` (Word docx), - `rtf` (rich text format), `epub` (EPUB v2 book), `epub3` + (OpenOffice text document), `docx` (Word docx), `haddock` (Haddock + markup), `rtf` (rich text format), `epub` (EPUB v2 book), `epub3` (EPUB v3), `fb2` (FictionBook2 e-book), `asciidoc` (AsciiDoc), `icml` (InDesign ICML), `slidy` (Slidy HTML and javascript slide show), `slideous` (Slideous HTML and javascript slide show), `dzslides` diff --git a/pandoc.cabal b/pandoc.cabal index f9938bede..058e82a7f 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -21,9 +21,9 @@ Description: Pandoc is a Haskell library for converting from one markup markdown, reStructuredText, HTML, LaTeX, ConTeXt, Docbook, OPML, OpenDocument, ODT, Word docx, RTF, MediaWiki, Textile, groff man pages, plain text, Emacs Org-Mode, AsciiDoc, - EPUB (v2 and v3), FictionBook2, InDesign ICML, and several kinds - of HTML/javascript slide shows (S5, Slidy, Slideous, DZSlides, - reveal.js). + Haddock markup, EPUB (v2 and v3), FictionBook2, + InDesign ICML, and several kinds of HTML/javascript + slide shows (S5, Slidy, Slideous, DZSlides, reveal.js). . Pandoc extends standard markdown syntax with footnotes, embedded LaTeX, definition lists, tables, and other @@ -305,6 +305,7 @@ Library Text.Pandoc.Writers.Texinfo, Text.Pandoc.Writers.Man, Text.Pandoc.Writers.Markdown, + Text.Pandoc.Writers.Haddock, Text.Pandoc.Writers.RST, Text.Pandoc.Writers.Org, Text.Pandoc.Writers.AsciiDoc, diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs index 45c2f453b..d2e7887b5 100644 --- a/src/Text/Pandoc.hs +++ b/src/Text/Pandoc.hs @@ -102,6 +102,7 @@ module Text.Pandoc , writeFB2 , writeOrg , writeAsciiDoc + , writeHaddock , writeCustom -- * Rendering templates and default templates , module Text.Pandoc.Templates @@ -149,6 +150,7 @@ import Text.Pandoc.Writers.MediaWiki import Text.Pandoc.Writers.Textile import Text.Pandoc.Writers.Org import Text.Pandoc.Writers.AsciiDoc +import Text.Pandoc.Writers.Haddock import Text.Pandoc.Writers.Custom import Text.Pandoc.Templates import Text.Pandoc.Options @@ -279,6 +281,7 @@ writers = [ ,("rtf" , IOStringWriter writeRTFWithEmbeddedImages) ,("org" , PureStringWriter writeOrg) ,("asciidoc" , PureStringWriter writeAsciiDoc) + ,("haddock" , PureStringWriter writeHaddock) ] getDefaultExtensions :: String -> Set Extension diff --git a/src/Text/Pandoc/Readers/Haddock.hs b/src/Text/Pandoc/Readers/Haddock.hs index a512f969d..f184eabdb 100644 --- a/src/Text/Pandoc/Readers/Haddock.hs +++ b/src/Text/Pandoc/Readers/Haddock.hs @@ -23,19 +23,28 @@ import Text.Pandoc.Definition import Text.Pandoc.Options import Documentation.Haddock.Parser import Documentation.Haddock.Types +import Debug.Trace (trace) -- | Parse Haddock markup and return a 'Pandoc' document. readHaddock :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse -> Pandoc -readHaddock _ = B.doc . docHToBlocks . parseParas +readHaddock opts = B.doc . docHToBlocks . trace' . parseParas + where trace' x = if readerTrace opts + then trace (show x) x + else x docHToBlocks :: DocH String Identifier -> Blocks docHToBlocks d' = case d' of DocEmpty -> mempty + DocAppend (DocParagraph (DocHeader h)) (DocParagraph (DocAName ident)) -> + B.headerWith (ident,[],[]) (headerLevel h) + (docHToInlines False $ headerTitle h) DocAppend d1 d2 -> mappend (docHToBlocks d1) (docHToBlocks d2) DocString _ -> inlineFallback + DocParagraph (DocHeader h) -> docHToBlocks (DocHeader h) + DocParagraph (DocAName h) -> B.plain $ docHToInlines False $ DocAName h DocParagraph ils -> B.para $ docHToInlines False ils DocIdentifier _ -> inlineFallback DocIdentifierUnchecked _ -> inlineFallback @@ -64,7 +73,7 @@ docHToBlocks d' = consolidatePlains = B.fromList . consolidatePlains' . B.toList consolidatePlains' zs@(Plain _ : _) = let (xs, ys) = span isPlain zs in - Plain (concatMap extractContents xs) : consolidatePlains' ys + Para (concatMap extractContents xs) : consolidatePlains' ys consolidatePlains' (x : xs) = x : consolidatePlains' xs consolidatePlains' [] = [] isPlain (Plain _) = True diff --git a/src/Text/Pandoc/Writers/Haddock.hs b/src/Text/Pandoc/Writers/Haddock.hs new file mode 100644 index 000000000..4d6b8e69f --- /dev/null +++ b/src/Text/Pandoc/Writers/Haddock.hs @@ -0,0 +1,357 @@ +{-# LANGUAGE OverloadedStrings, TupleSections, ScopedTypeVariables #-} +{- +Copyright (C) 2014 John MacFarlane + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Writers.Haddock + Copyright : Copyright (C) 2014 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane + Stability : alpha + Portability : portable + +Conversion of 'Pandoc' documents to haddock markup. + +Haddock: +-} +module Text.Pandoc.Writers.Haddock (writeHaddock) where +import Text.Pandoc.Definition +import Text.Pandoc.Templates (renderTemplate') +import Text.Pandoc.Shared +import Text.Pandoc.Writers.Shared +import Text.Pandoc.Options +import Data.List ( intersperse, transpose ) +import Text.Pandoc.Pretty +import Control.Monad.State +import Text.Pandoc.Readers.TeXMath (readTeXMath') +import Network.URI (isURI) +import Data.Default + +type Notes = [[Block]] +data WriterState = WriterState { stNotes :: Notes } +instance Default WriterState + where def = WriterState{ stNotes = [] } + +-- | Convert Pandoc to Haddock. +writeHaddock :: WriterOptions -> Pandoc -> String +writeHaddock opts document = + evalState (pandocToHaddock opts{ + writerWrapText = writerWrapText opts } document) def + +-- | Return haddock representation of document. +pandocToHaddock :: WriterOptions -> Pandoc -> State WriterState String +pandocToHaddock opts (Pandoc meta blocks) = do + let colwidth = if writerWrapText opts + then Just $ writerColumns opts + else Nothing + body <- blockListToHaddock opts blocks + st <- get + notes' <- notesToHaddock opts (reverse $ stNotes st) + let render' :: Doc -> String + render' = render colwidth + let main = render' $ body <> + (if isEmpty notes' then empty else blankline <> notes') + metadata <- metaToJSON opts + (fmap (render colwidth) . blockListToHaddock opts) + (fmap (render colwidth) . inlineListToHaddock opts) + meta + let context = defField "body" main + $ metadata + if writerStandalone opts + then return $ renderTemplate' (writerTemplate opts) context + else return main + +-- | Return haddock representation of notes. +notesToHaddock :: WriterOptions -> [[Block]] -> State WriterState Doc +notesToHaddock opts notes = + mapM (\(num, note) -> noteToHaddock opts num note) (zip [1..] notes) >>= + return . vsep + +-- | Return haddock representation of a note. +noteToHaddock :: WriterOptions -> Int -> [Block] -> State WriterState Doc +noteToHaddock opts num blocks = do + contents <- blockListToHaddock opts blocks + let num' = text $ writerIdentifierPrefix opts ++ show num + let marker = text "[" <> num' <> text "]" + let markerSize = 4 + offset num' + let spacer = case writerTabStop opts - markerSize of + n | n > 0 -> text $ replicate n ' ' + _ -> text " " + return $ if isEnabled Ext_footnotes opts + then hang (writerTabStop opts) (marker <> spacer) contents + else marker <> spacer <> contents + +-- | Escape special characters for Haddock. +escapeString :: String -> String +escapeString = escapeStringUsing haddockEscapes + where haddockEscapes = backslashEscapes "\\/'`\"@<" + +-- | Convert Pandoc block element to haddock. +blockToHaddock :: WriterOptions -- ^ Options + -> Block -- ^ Block element + -> State WriterState Doc +blockToHaddock _ Null = return empty +blockToHaddock opts (Div _ ils) = do + contents <- blockListToHaddock opts ils + return $ contents <> blankline +blockToHaddock opts (Plain inlines) = do + contents <- inlineListToHaddock opts inlines + return $ contents <> cr +-- title beginning with fig: indicates figure +blockToHaddock opts (Para [Image alt (src,'f':'i':'g':':':tit)]) = + blockToHaddock opts (Para [Image alt (src,tit)]) +blockToHaddock opts (Para inlines) = + -- TODO: if it contains linebreaks, we need to use a @...@ block + (<> blankline) `fmap` blockToHaddock opts (Plain inlines) +blockToHaddock _ (RawBlock f str) + | f == "haddock" = do + return $ text str <> text "\n" + | otherwise = return empty +blockToHaddock _ HorizontalRule = + return $ blankline <> text "--------------" <> blankline +blockToHaddock opts (Header level (ident,_,_) inlines) = do + contents <- inlineListToHaddock opts inlines + let attr' = if null ident + then empty + else cr <> text "#" <> text ident <> text "#" + return $ nowrap (text (replicate level '=') <> space <> contents) + <> attr' <> blankline +blockToHaddock _ (CodeBlock (_,_,_) str) = + return $ prefixed "> " (text str) <> blankline +-- Nothing in haddock corresponds to block quotes: +blockToHaddock opts (BlockQuote blocks) = + blockListToHaddock opts blocks +-- Haddock doesn't have tables. Use haddock tables in code. +blockToHaddock opts (Table caption aligns widths headers rows) = do + caption' <- inlineListToHaddock opts caption + let caption'' = if null caption || not (isEnabled Ext_table_captions opts) + then empty + else blankline <> caption' <> blankline + rawHeaders <- mapM (blockListToHaddock opts) headers + rawRows <- mapM (mapM (blockListToHaddock opts)) rows + let isSimple = all (==0) widths + let isPlainBlock (Plain _) = True + isPlainBlock _ = False + let hasBlocks = not (all isPlainBlock $ concat . concat $ headers:rows) + (nst,tbl) <- case True of + _ | isSimple -> fmap (nest 2,) $ + pandocTable opts (all null headers) aligns widths + rawHeaders rawRows + | not hasBlocks -> fmap (nest 2,) $ + pandocTable opts (all null headers) aligns widths + rawHeaders rawRows + | otherwise -> fmap (id,) $ + gridTable opts (all null headers) aligns widths + rawHeaders rawRows + return $ prefixed "> " $ nst $ tbl $$ blankline $$ caption'' $$ blankline +blockToHaddock opts (BulletList items) = do + contents <- mapM (bulletListItemToHaddock opts) items + return $ cat contents <> blankline +blockToHaddock opts (OrderedList (start,sty,delim) items) = do + let attribs = (start, sty, delim) + let markers = orderedListMarkers attribs + let markers' = map (\m -> if length m < 3 + then m ++ replicate (3 - length m) ' ' + else m) markers + contents <- mapM (\(item, num) -> orderedListItemToHaddock opts item num) $ + zip markers' items + return $ cat contents <> blankline +blockToHaddock opts (DefinitionList items) = do + contents <- mapM (definitionListItemToHaddock opts) items + return $ cat contents <> blankline + +pandocTable :: WriterOptions -> Bool -> [Alignment] -> [Double] + -> [Doc] -> [[Doc]] -> State WriterState Doc +pandocTable opts headless aligns widths rawHeaders rawRows = do + let isSimple = all (==0) widths + let alignHeader alignment = case alignment of + AlignLeft -> lblock + AlignCenter -> cblock + AlignRight -> rblock + AlignDefault -> lblock + let numChars = maximum . map offset + let widthsInChars = if isSimple + then map ((+2) . numChars) + $ transpose (rawHeaders : rawRows) + else map + (floor . (fromIntegral (writerColumns opts) *)) + widths + let makeRow = hcat . intersperse (lblock 1 (text " ")) . + (zipWith3 alignHeader aligns widthsInChars) + let rows' = map makeRow rawRows + let head' = makeRow rawHeaders + let maxRowHeight = maximum $ map height (head':rows') + let underline = cat $ intersperse (text " ") $ + map (\width -> text (replicate width '-')) widthsInChars + let border = if maxRowHeight > 1 + then text (replicate (sum widthsInChars + + length widthsInChars - 1) '-') + else if headless + then underline + else empty + let head'' = if headless + then empty + else border <> cr <> head' + let body = if maxRowHeight > 1 + then vsep rows' + else vcat rows' + let bottom = if headless + then underline + else border + return $ head'' $$ underline $$ body $$ bottom + +gridTable :: WriterOptions -> Bool -> [Alignment] -> [Double] + -> [Doc] -> [[Doc]] -> State WriterState Doc +gridTable opts headless _aligns widths headers' rawRows = do + let numcols = length headers' + let widths' = if all (==0) widths + then replicate numcols (1.0 / fromIntegral numcols) + else widths + let widthsInChars = map (floor . (fromIntegral (writerColumns opts) *)) widths' + let hpipeBlocks blocks = hcat [beg, middle, end] + where h = maximum (map height blocks) + sep' = lblock 3 $ vcat (map text $ replicate h " | ") + beg = lblock 2 $ vcat (map text $ replicate h "| ") + end = lblock 2 $ vcat (map text $ replicate h " |") + middle = chomp $ hcat $ intersperse sep' blocks + let makeRow = hpipeBlocks . zipWith lblock widthsInChars + let head' = makeRow headers' + let rows' = map (makeRow . map chomp) rawRows + let border ch = char '+' <> char ch <> + (hcat $ intersperse (char ch <> char '+' <> char ch) $ + map (\l -> text $ replicate l ch) widthsInChars) <> + char ch <> char '+' + let body = vcat $ intersperse (border '-') rows' + let head'' = if headless + then empty + else head' $$ border '=' + return $ border '-' $$ head'' $$ body $$ border '-' + +-- | Convert bullet list item (list of blocks) to haddock +bulletListItemToHaddock :: WriterOptions -> [Block] -> State WriterState Doc +bulletListItemToHaddock opts items = do + contents <- blockListToHaddock opts items + let sps = replicate (writerTabStop opts - 2) ' ' + let start = text ('-' : ' ' : sps) + -- remove trailing blank line if it is a tight list + let contents' = case reverse items of + (BulletList xs:_) | isTightList xs -> + chomp contents <> cr + (OrderedList _ xs:_) | isTightList xs -> + chomp contents <> cr + _ -> contents + return $ hang (writerTabStop opts) start $ contents' <> cr + +-- | Convert ordered list item (a list of blocks) to haddock +orderedListItemToHaddock :: WriterOptions -- ^ options + -> String -- ^ list item marker + -> [Block] -- ^ list item (list of blocks) + -> State WriterState Doc +orderedListItemToHaddock opts marker items = do + contents <- blockListToHaddock opts items + let sps = case length marker - writerTabStop opts of + n | n > 0 -> text $ replicate n ' ' + _ -> text " " + let start = text marker <> sps + return $ hang (writerTabStop opts) start $ contents <> cr + +-- | Convert definition list item (label, list of blocks) to haddock +definitionListItemToHaddock :: WriterOptions + -> ([Inline],[[Block]]) + -> State WriterState Doc +definitionListItemToHaddock opts (label, defs) = do + labelText <- inlineListToHaddock opts label + defs' <- mapM (mapM (blockToHaddock opts)) defs + let contents = vcat $ map (\d -> hang 4 empty $ vcat d <> cr) defs' + return $ nowrap (brackets labelText) <> cr <> contents <> cr + +-- | Convert list of Pandoc block elements to haddock +blockListToHaddock :: WriterOptions -- ^ Options + -> [Block] -- ^ List of block elements + -> State WriterState Doc +blockListToHaddock opts blocks = + mapM (blockToHaddock opts) blocks >>= return . cat + +-- | Convert list of Pandoc inline elements to haddock. +inlineListToHaddock :: WriterOptions -> [Inline] -> State WriterState Doc +inlineListToHaddock opts lst = + mapM (inlineToHaddock opts) lst >>= return . cat + +-- | Convert Pandoc inline element to haddock. +inlineToHaddock :: WriterOptions -> Inline -> State WriterState Doc +inlineToHaddock opts (Span (ident,_,_) ils) = do + contents <- inlineListToHaddock opts ils + if not (null ident) && null ils + then return $ "#" <> text ident <> "#" + else return contents +inlineToHaddock opts (Emph lst) = do + contents <- inlineListToHaddock opts lst + return $ "/" <> contents <> "/" +inlineToHaddock opts (Strong lst) = do + contents <- inlineListToHaddock opts lst + return $ "__" <> contents <> "__" +inlineToHaddock opts (Strikeout lst) = do + contents <- inlineListToHaddock opts lst + -- not supported in haddock, but we fake it: + return $ "~~" <> contents <> "~~" +-- not supported in haddock: +inlineToHaddock opts (Superscript lst) = inlineListToHaddock opts lst +-- not supported in haddock: +inlineToHaddock opts (Subscript lst) = inlineListToHaddock opts lst +-- not supported in haddock: +inlineToHaddock opts (SmallCaps lst) = inlineListToHaddock opts lst +inlineToHaddock opts (Quoted SingleQuote lst) = do + contents <- inlineListToHaddock opts lst + return $ "‘" <> contents <> "’" +inlineToHaddock opts (Quoted DoubleQuote lst) = do + contents <- inlineListToHaddock opts lst + return $ "“" <> contents <> "”" +inlineToHaddock _ (Code _ str) = + return $ "@" <> text (escapeString str) <> "@" +inlineToHaddock _ (Str str) = do + return $ text $ escapeString str +inlineToHaddock opts (Math mt str) = do + let adjust x = case mt of + DisplayMath -> cr <> x <> cr + InlineMath -> x + adjust `fmap` (inlineListToHaddock opts $ readTeXMath' mt str) +inlineToHaddock _ (RawInline f str) + | f == "haddock" = return $ text str + | otherwise = return empty +-- no line break in haddock (see above on CodeBlock) +inlineToHaddock _ (LineBreak) = return cr +inlineToHaddock _ Space = return space +inlineToHaddock opts (Cite _ lst) = inlineListToHaddock opts lst +inlineToHaddock opts (Link txt (src, _)) = do + linktext <- inlineListToHaddock opts txt + let useAuto = isURI src && + case txt of + [Str s] | escapeURI s == src -> True + _ -> False + return $ nowrap $ "<" <> text src <> + (if useAuto then empty else space <> linktext) <> ">" +inlineToHaddock opts (Image alternate (source, tit)) = do + linkhaddock <- inlineToHaddock opts (Link alternate (source, tit)) + return $ "<" <> linkhaddock <> ">" +-- haddock doesn't have notes, but we can fake it: +inlineToHaddock opts (Note contents) = do + modify (\st -> st{ stNotes = contents : stNotes st }) + st <- get + let ref = text $ writerIdentifierPrefix opts ++ show (length $ stNotes st) + return $ "[" <> ref <> "]" -- cgit v1.2.3 From 0d364a284d9a43a419abff94d227ad341ed82c2c Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 18 Jun 2014 17:49:32 -0700 Subject: Added haddock template. --- data/templates | 2 +- pandoc.cabal | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/data/templates b/data/templates index 2c51fb0c4..f3b3f2dda 160000 --- a/data/templates +++ b/data/templates @@ -1 +1 @@ -Subproject commit 2c51fb0c4045542dcfaa07aff37778adc9452e9d +Subproject commit f3b3f2dda6737b9c6af03e1f4701861394032778 diff --git a/pandoc.cabal b/pandoc.cabal index 058e82a7f..b064f39a7 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -62,6 +62,7 @@ Data-Files: data/templates/default.revealjs, data/templates/default.dzslides, data/templates/default.asciidoc, + data/templates/default.haddock, data/templates/default.textile, data/templates/default.org, data/templates/default.epub, -- cgit v1.2.3 From ff6a2baeb9940276fbbaf486a5711378d13cc1e1 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 18 Jun 2014 17:49:59 -0700 Subject: More polish on Haddock reader/writer. --- src/Text/Pandoc/Readers/Haddock.hs | 46 +++++++++++++++++++++++++++++++++----- src/Text/Pandoc/Writers/Haddock.hs | 23 +++++-------------- 2 files changed, 47 insertions(+), 22 deletions(-) diff --git a/src/Text/Pandoc/Readers/Haddock.hs b/src/Text/Pandoc/Readers/Haddock.hs index f184eabdb..a3dfb7c3c 100644 --- a/src/Text/Pandoc/Readers/Haddock.hs +++ b/src/Text/Pandoc/Readers/Haddock.hs @@ -45,7 +45,9 @@ docHToBlocks d' = DocString _ -> inlineFallback DocParagraph (DocHeader h) -> docHToBlocks (DocHeader h) DocParagraph (DocAName h) -> B.plain $ docHToInlines False $ DocAName h - DocParagraph ils -> B.para $ docHToInlines False ils + DocParagraph x -> let (ils, rest) = getInlines x + in (B.para $ docHToInlines False ils) + <> docHToBlocks rest DocIdentifier _ -> inlineFallback DocIdentifierUnchecked _ -> inlineFallback DocModule s -> B.plain $ docHToInlines False $ DocModule s @@ -60,7 +62,7 @@ docHToBlocks d' = DocDefList items -> B.definitionList (map (\(d,t) -> (docHToInlines False d, [consolidatePlains $ docHToBlocks t])) items) - DocCodeBlock (DocString s) -> B.codeBlockWith ("",["haskell"],[]) s + DocCodeBlock (DocString s) -> B.codeBlockWith ("",[],[]) s DocCodeBlock d -> B.para $ docHToInlines True d DocHyperlink _ -> inlineFallback DocPic _ -> inlineFallback @@ -92,9 +94,9 @@ docHToInlines isCode d' = $ map B.code $ splitBy (=='\n') s | otherwise -> B.text s DocParagraph _ -> mempty - DocIdentifier (_,s,_) -> B.codeWith ("",["haskell"],[]) s - DocIdentifierUnchecked s -> B.codeWith ("",["haskell"],[]) s - DocModule s -> B.codeWith ("",["haskell"],[]) s + DocIdentifier (_,s,_) -> B.codeWith ("",["haskell","identifier"],[]) s + DocIdentifierUnchecked s -> B.codeWith ("",["haskell","identifier"],[]) s + DocModule s -> B.codeWith ("",["haskell","module"],[]) s DocWarning _ -> mempty -- TODO DocEmphasis d -> B.emph (docHToInlines isCode d) DocMonospaced (DocString s) -> B.code s @@ -113,6 +115,40 @@ docHToInlines isCode d' = DocProperty _ -> mempty DocExamples _ -> mempty +getInlines :: DocH String Identifier -> (DocH String Identifier, DocH String Identifier) +getInlines (DocAppend x y) = if isInline x + then let (a, b) = getInlines y + in (DocAppend x a, b) + else (DocEmpty, DocAppend x y) +getInlines x = if isInline x + then (x, DocEmpty) + else (DocEmpty, x) + +isInline :: DocH String Identifier -> Bool +isInline d' = + case d' of + DocEmpty -> True + DocAppend d1 _ -> isInline d1 + DocString _ -> True + DocParagraph _ -> False + DocIdentifier _ -> True + DocIdentifierUnchecked _ -> True + DocModule _ -> True + DocWarning _ -> True + DocEmphasis _ -> True + DocMonospaced _ -> True + DocBold _ -> True + DocHeader _ -> False + DocUnorderedList _ -> False + DocOrderedList _ -> False + DocDefList _ -> False + DocCodeBlock _ -> False + DocHyperlink _ -> True + DocPic _ -> True + DocAName _ -> True + DocProperty _ -> False + DocExamples _ -> False + -- | Create an 'Example', stripping superfluous characters as appropriate makeExample :: String -> String -> [String] -> Blocks makeExample prompt expression result = diff --git a/src/Text/Pandoc/Writers/Haddock.hs b/src/Text/Pandoc/Writers/Haddock.hs index 4d6b8e69f..36f57c2b7 100644 --- a/src/Text/Pandoc/Writers/Haddock.hs +++ b/src/Text/Pandoc/Writers/Haddock.hs @@ -80,22 +80,11 @@ pandocToHaddock opts (Pandoc meta blocks) = do -- | Return haddock representation of notes. notesToHaddock :: WriterOptions -> [[Block]] -> State WriterState Doc notesToHaddock opts notes = - mapM (\(num, note) -> noteToHaddock opts num note) (zip [1..] notes) >>= - return . vsep - --- | Return haddock representation of a note. -noteToHaddock :: WriterOptions -> Int -> [Block] -> State WriterState Doc -noteToHaddock opts num blocks = do - contents <- blockListToHaddock opts blocks - let num' = text $ writerIdentifierPrefix opts ++ show num - let marker = text "[" <> num' <> text "]" - let markerSize = 4 + offset num' - let spacer = case writerTabStop opts - markerSize of - n | n > 0 -> text $ replicate n ' ' - _ -> text " " - return $ if isEnabled Ext_footnotes opts - then hang (writerTabStop opts) (marker <> spacer) contents - else marker <> spacer <> contents + if null notes + then return empty + else do + contents <- blockToHaddock opts $ OrderedList (1,DefaultStyle,DefaultDelim) notes + return $ text "#notes#" <> blankline <> contents -- | Escape special characters for Haddock. escapeString :: String -> String @@ -354,4 +343,4 @@ inlineToHaddock opts (Note contents) = do modify (\st -> st{ stNotes = contents : stNotes st }) st <- get let ref = text $ writerIdentifierPrefix opts ++ show (length $ stNotes st) - return $ "[" <> ref <> "]" + return $ "<#notes [" <> ref <> "]>" -- cgit v1.2.3 From cf15b929f833ea31b35bafb40f782e113546caa0 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 18 Jun 2014 17:55:21 -0700 Subject: Added haddock writer tests. --- pandoc.cabal | 2 + tests/Tests/Old.hs | 3 +- tests/haddock-reader.native | 4 +- tests/tables.haddock | 72 +++++ tests/writer.haddock | 660 ++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 738 insertions(+), 3 deletions(-) create mode 100644 tests/tables.haddock create mode 100644 tests/writer.haddock diff --git a/pandoc.cabal b/pandoc.cabal index b064f39a7..a0312e95b 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -135,6 +135,7 @@ Extra-Source-Files: tests/tables.opendocument, tests/tables.org, tests/tables.asciidoc, + tests/tables.haddock, tests/tables.texinfo, tests/tables.rst, tests/tables.rtf, @@ -156,6 +157,7 @@ Extra-Source-Files: tests/writer.opendocument, tests/writer.org, tests/writer.asciidoc, + tests/writer.haddock, tests/writer.rst, tests/writer.rtf, tests/writer.texinfo, diff --git a/tests/Tests/Old.hs b/tests/Tests/Old.hs index 424e1b7c5..fa01b1358 100644 --- a/tests/Tests/Old.hs +++ b/tests/Tests/Old.hs @@ -131,7 +131,8 @@ tests = [ testGroup "markdown" "opml-reader.opml" "opml-reader.native" ] , testGroup "haddock" - [ test "reader" ["-r", "haddock", "-w", "native", "-s"] + [ testGroup "writer" $ writerTests "haddock" + , test "reader" ["-r", "haddock", "-w", "native", "-s"] "haddock-reader.haddock" "haddock-reader.native" ] , testGroup "other writers" $ map (\f -> testGroup f $ writerTests f) diff --git a/tests/haddock-reader.native b/tests/haddock-reader.native index 8edb0b29a..f50fae4ec 100644 --- a/tests/haddock-reader.native +++ b/tests/haddock-reader.native @@ -4,14 +4,14 @@ Pandoc (Meta {unMeta = fromList []}) ,Para [Str "*",Space,Str "This",Space,Str "is",Space,Str "a",Space,Str "paragraph,",Space,Str "not",Space,Str "a",Space,Str "list",Space,Str "item.",Space,Str ">",Space,Str "This",Space,Str "sentence",Space,Str "is",Space,Str "not",Space,Str "code.",Space,Str ">>>",Space,Str "This",Space,Str "is",Space,Str "not",Space,Str "an",Space,Str "example."] ,Para [Str "The",Space,Str "references",Space,Str "\955,",Space,Str "\955",Space,Str "and",Space,Str "\955",Space,Str "all",Space,Str "represent",Space,Str "the",Space,Str "lower-case",Space,Str "letter",Space,Str "lambda."] ,Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "code",Space,Str "block:"] -,CodeBlock ("",["haskell"],[]) "map :: (a -> b) -> [a] -> [b]\nmap _ [] = []\nmap f (x:xs) = f x : map f xs" +,CodeBlock ("",[],[]) "map :: (a -> b) -> [a] -> [b]\nmap _ [] = []\nmap f (x:xs) = f x : map f xs" ,Para [Str "This",Space,Str "is",Space,Str "another",Space,Str "code",Space,Str "block:"] ,Para [Code ("",[],[]) "f x = x + x.",LineBreak,Code ("",[],[]) "The @...@ code block ",Emph [Code ("",[],[]) "interprets markup normally"],Code ("",[],[]) ".",Code ("",["haskell"],[]) "Module.Foo",Code ("",[],[]) "",LineBreak,Code ("",[],[]) "\"Hello World\""] ,Para [Str "Haddock",Space,Str "supports",Space,Str "REPL",Space,Str "examples:"] ,Para [Code ("",["haskell","expr"],[]) "fib 10",LineBreak,Code ("",["result"],[]) "55"] ,Para [Code ("",["haskell","expr"],[]) "putStrLn \"foo\\nbar\"",LineBreak,Code ("",["result"],[]) "foo",LineBreak,Code ("",["result"],[]) "bar"] ,Para [Str "That",Space,Str "was",Space,Emph [Str "really",Space,Str "cool"],Str "!",Space,Str "I",Space,Str "had",Space,Str "no",Space,Str "idea",Space,Code ("",[],[]) "fib 10 = 55",Str "."] -,Para [Str "This",Space,Str "module",Space,Str "defines",Space,Str "the",Space,Str "type",Space,Code ("",["haskell"],[]) "T",Str ".",Space,Str "The",Space,Str "identifier",Space,Code ("",["haskell"],[]) "M.T",Space,Str "is",Space,Str "not",Space,Str "in",Space,Str "scope",Space,Str "I",Space,Str "don't",Space,Str "have",Space,Str "to",Space,Str "escape",Space,Str "my",Space,Str "apostrophes;",Space,Str "great,",Space,Str "isn't",Space,Str "it?",Space,Str "This",Space,Str "is",Space,Str "a",Space,Str "reference",Space,Str "to",Space,Str "the",Space,Code ("",["haskell"],[]) "Foo",Space,Str "module."] +,Para [Str "This",Space,Str "module",Space,Str "defines",Space,Str "the",Space,Str "type",Space,Code ("",[],[]) "T",Str ".",Space,Str "The",Space,Str "identifier",Space,Code ("",[],[]) "M.T",Space,Str "is",Space,Str "not",Space,Str "in",Space,Str "scope",Space,Str "I",Space,Str "don't",Space,Str "have",Space,Str "to",Space,Str "escape",Space,Str "my",Space,Str "apostrophes;",Space,Str "great,",Space,Str "isn't",Space,Str "it?",Space,Str "This",Space,Str "is",Space,Str "a",Space,Str "reference",Space,Str "to",Space,Str "the",Space,Code ("",[],[]) "Foo",Space,Str "module."] ,Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "bulleted",Space,Str "list:"] ,BulletList [[Para [Str "first",Space,Str "item"]] diff --git a/tests/tables.haddock b/tests/tables.haddock new file mode 100644 index 000000000..e054dc072 --- /dev/null +++ b/tests/tables.haddock @@ -0,0 +1,72 @@ +Simple table with caption: + +> Right Left Center Default +> ------- ------ -------- --------- +> 12 12 12 12 +> 123 123 123 123 +> 1 1 1 1 +> +Simple table without caption: + +> Right Left Center Default +> ------- ------ -------- --------- +> 12 12 12 12 +> 123 123 123 123 +> 1 1 1 1 +> +Simple table indented two spaces: + +> Right Left Center Default +> ------- ------ -------- --------- +> 12 12 12 12 +> 123 123 123 123 +> 1 1 1 1 +> +Multiline table with caption: + +> -------------------------------------------------------------- +> Centered Left Right Default aligned +> Header Aligned Aligned +> ----------- ---------- ------------ -------------------------- +> First row 12.0 Example of a row that +> spans multiple lines. +> +> Second row 5.0 Here\'s another one. Note +> the blank line between +> rows. +> -------------------------------------------------------------- +> +Multiline table without caption: + +> -------------------------------------------------------------- +> Centered Left Right Default aligned +> Header Aligned Aligned +> ----------- ---------- ------------ -------------------------- +> First row 12.0 Example of a row that +> spans multiple lines. +> +> Second row 5.0 Here\'s another one. Note +> the blank line between +> rows. +> -------------------------------------------------------------- +> +Table without column headers: + +> ----- ----- ----- ----- +> 12 12 12 12 +> 123 123 123 123 +> 1 1 1 1 +> ----- ----- ----- ----- +> +Multiline table without column headers: + +> ----------- ---------- ------------ -------------------------- +> First row 12.0 Example of a row that +> spans multiple lines. +> +> Second row 5.0 Here\'s another one. Note +> the blank line between +> rows. +> ----------- ---------- ------------ -------------------------- +> + diff --git a/tests/writer.haddock b/tests/writer.haddock new file mode 100644 index 000000000..7b0811220 --- /dev/null +++ b/tests/writer.haddock @@ -0,0 +1,660 @@ +This is a set of tests for pandoc. Most of them are adapted from John Gruber’s +markdown test suite. + +-------------- + += Headers +#headers# + +== Level 2 with an +#level-2-with-an-embedded-link# + +=== Level 3 with /emphasis/ +#level-3-with-emphasis# + +==== Level 4 +#level-4# + +===== Level 5 +#level-5# + += Level 1 +#level-1# + +== Level 2 with /emphasis/ +#level-2-with-emphasis# + +=== Level 3 +#level-3# + +with no blank line + +== Level 2 +#level-2# + +with no blank line + +-------------- + += Paragraphs +#paragraphs# + +Here’s a regular paragraph. + +In Markdown 1.0.0 and earlier. Version 8. This line turns into a list item. +Because a hard-wrapped line in the middle of a paragraph looked like a list +item. + +Here’s one with a bullet. * criminey. + +There should be a hard line break +here. + +-------------- + += Block Quotes +#block-quotes# + +E-mail style: + +This is a block quote. It is pretty short. + +Code in a block quote: + +> sub status { +> print "working"; +> } + +A list: + +1. item one +2. item two + +Nested block quotes: + +nested + +nested + +This should not be a block quote: 2 > 1. + +And a following paragraph. + +-------------- + += Code Blocks +#code-blocks# + +Code: + +> ---- (should be four hyphens) +> +> sub status { +> print "working"; +> } +> +> this code block is indented by one tab + +And: + +> this code block is indented by two tabs +> +> These should not be escaped: \$ \\ \> \[ \{ + +-------------- + += Lists +#lists# + +== Unordered +#unordered# + +Asterisks tight: + +- asterisk 1 +- asterisk 2 +- asterisk 3 + +Asterisks loose: + +- asterisk 1 + +- asterisk 2 + +- asterisk 3 + +Pluses tight: + +- Plus 1 +- Plus 2 +- Plus 3 + +Pluses loose: + +- Plus 1 + +- Plus 2 + +- Plus 3 + +Minuses tight: + +- Minus 1 +- Minus 2 +- Minus 3 + +Minuses loose: + +- Minus 1 + +- Minus 2 + +- Minus 3 + +== Ordered +#ordered# + +Tight: + +1. First +2. Second +3. Third + +and: + +1. One +2. Two +3. Three + +Loose using tabs: + +1. First + +2. Second + +3. Third + +and using spaces: + +1. One + +2. Two + +3. Three + +Multiple paragraphs: + +1. Item 1, graf one. + + Item 1. graf two. The quick brown fox jumped over the lazy dog’s back. + +2. Item 2. + +3. Item 3. + +== Nested +#nested# + +- Tab + - Tab + - Tab + +Here’s another: + +1. First +2. Second: + - Fee + - Fie + - Foe + +3. Third + +Same thing but with paragraphs: + +1. First + +2. Second: + + - Fee + - Fie + - Foe + +3. Third + +== Tabs and spaces +#tabs-and-spaces# + +- this is a list item indented with tabs + +- this is a list item indented with spaces + + - this is an example list item indented with tabs + + - this is an example list item indented with spaces + +== Fancy list markers +#fancy-list-markers# + +(2) begins with 2 +(3) and now 3 + + with a continuation + + iv. sublist with roman numerals, starting with 4 + v. more items + (A) a subsublist + (B) a subsublist + +Nesting: + +A. Upper Alpha + I. Upper Roman. + (6) Decimal start with 6 + c) Lower alpha with paren + +Autonumbering: + +1. Autonumber. +2. More. + 1. Nested. + +Should not be a list item: + +M.A. 2007 + +B. Williams + +-------------- + += Definition Lists +#definition-lists# + +Tight using spaces: + +[apple] + red fruit +[orange] + orange fruit +[banana] + yellow fruit + +Tight using tabs: + +[apple] + red fruit +[orange] + orange fruit +[banana] + yellow fruit + +Loose: + +[apple] + red fruit + +[orange] + orange fruit + +[banana] + yellow fruit + +Multiple blocks with italics: + +[/apple/] + red fruit + + contains seeds, crisp, pleasant to taste + +[/orange/] + orange fruit + + > { orange code block } + + orange block quote + +Multiple definitions, tight: + +[apple] + red fruit + computer +[orange] + orange fruit + bank + +Multiple definitions, loose: + +[apple] + red fruit + + computer + +[orange] + orange fruit + + bank + +Blank line after term, indented marker, alternate markers: + +[apple] + red fruit + + computer + +[orange] + orange fruit + + 1. sublist + 2. sublist + += HTML Blocks +#html-blocks# + +Simple block on one line: + +foo + +And nested without indentation: + +foo + +bar + +Interpreted markdown in a table: + +This is /emphasized/ +And this is __strong__ +Here’s a simple block: + +foo + +This should be a code block, though: + +>
+> foo +>
+ +As should this: + +>
foo
+ +Now, nested: + +foo + +This should just be an HTML comment: + +Multiline: + +Code block: + +> + +Just plain comment, with trailing spaces on the line: + +Code: + +>
+ +Hr’s: + +-------------- + += Inline Markup +#inline-markup# + +This is /emphasized/, and so /is this/. + +This is __strong__, and so __is this__. + +An //. + +__/This is strong and em./__ + +So is __/this/__ word. + +__/This is strong and em./__ + +So is __/this/__ word. + +This is code: @>@, @$@, @\\@, @\\$@, @\@. + +~~This is /strikeout/.~~ + +Superscripts: abcd a/hello/ ahello there. + +Subscripts: H2O, H23O, Hmany of themO. + +These should not be superscripts or subscripts, because of the unescaped +spaces: a^b c^d, a~b c~d. + +-------------- + += Smart quotes, ellipses, dashes +#smart-quotes-ellipses-dashes# + +“Hello,” said the spider. “‘Shelob’ is my name.” + +‘A’, ‘B’, and ‘C’ are letters. + +‘Oak,’ ‘elm,’ and ‘beech’ are names of trees. So is ‘pine.’ + +‘He said, “I want to go.”’ Were you alive in the 70’s? + +Here is some quoted ‘@code@’ and a +“”. + +Some dashes: one—two — three—four — five. + +Dashes between numbers: 5–7, 255–66, 1987–1999. + +Ellipses…and…and…. + +-------------- + += LaTeX +#latex# + +- +- 2 + 2 = 4 +- /x/ ∈ /y/ +- /α/ ∧ /ω/ +- 223 +- /p/-Tree +- Here’s some display math: + $$\\frac{d}{dx}f(x)=\\lim_{h\\to 0}\\frac{f(x+h)-f(x)}{h}$$ +- Here’s one that has a line break in it: /α/ + /ω/ × /x/2. + +These shouldn’t be math: + +- To get the famous equation, write @$e = mc^2$@. +- $22,000 is a /lot/ of money. So is $34,000. (It worked if “lot” is + emphasized.) +- Shoes ($20) and socks ($5). +- Escaped @$@: $73 /this should be emphasized/ 23$. + +Here’s a LaTeX table: + +-------------- + += Special Characters +#special-characters# + +Here is some unicode: + +- I hat: Î +- o umlaut: ö +- section: § +- set membership: ∈ +- copyright: © + +AT&T has an ampersand in their name. + +AT&T is another way to write it. + +This & that. + +4 \< 5. + +6 > 5. + +Backslash: \\ + +Backtick: \` + +Asterisk: * + +Underscore: _ + +Left brace: { + +Right brace: } + +Left bracket: [ + +Right bracket: ] + +Left paren: ( + +Right paren: ) + +Greater-than: > + +Hash: # + +Period: . + +Bang: ! + +Plus: + + +Minus: - + +-------------- + += Links +#links# + +== Explicit +#explicit# + +Just a . + +. + +. + +. + + + + + + + + + +< Empty>. + +== Reference +#reference# + +Foo . + +Foo . + +Foo . + +With . + + by itself should be a link. + +Indented . + +Indented . + +Indented . + +This should [not][] be a link. + +> [not]: /url + +Foo . + +Foo . + +== With ampersands +#with-ampersands# + +Here’s a . + +Here’s a link with an amersand in the link text: . + +Here’s an . + +Here’s an . + +== Autolinks +#autolinks# + +With an ampersand: + +- In a list? +- +- It should. + +An e-mail address: + +Blockquoted: + +Auto-links should not occur here: @\@ + +> or here: + +-------------- + += Images +#images# + +From “Voyage dans la Lune” by Georges Melies (1902): + +<> + +Here is a movie <> icon. + +-------------- + += Footnotes +#footnotes# + +Here is a footnote reference,<#notes [1]> and another.<#notes [2]> This should +/not/ be a footnote reference, because it contains a space.[^my note] Here is +an inline note.<#notes [3]> + +Notes can go in quotes.<#notes [4]> + +1. And in list items.<#notes [5]> + +This paragraph should not be part of the note, as it is not indented. + +#notes# + +1. Here is the footnote. It can go anywhere after the footnote reference. It + need not be placed at the end of the document. + +2. Here’s the long note. This one contains multiple blocks. + + Subsequent blocks are indented to show that they belong to the footnote + (as with list items). + + > { } + + If you want, you can indent every line, but you can also be lazy and just + indent the first line of each block. + +3. This is /easier/ to type. Inline notes may contain + and @]@ verbatim characters, as well as + [bracketed text]. + +4. In quote. + +5. In list. -- cgit v1.2.3 From bc037b69a4e4868c9a5eab5b0ee12a41118da02a Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 18 Jun 2014 18:04:08 -0700 Subject: Revised haddock reader tests for changes in reader. --- tests/haddock-reader.native | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/haddock-reader.native b/tests/haddock-reader.native index f50fae4ec..b62189046 100644 --- a/tests/haddock-reader.native +++ b/tests/haddock-reader.native @@ -6,12 +6,12 @@ Pandoc (Meta {unMeta = fromList []}) ,Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "code",Space,Str "block:"] ,CodeBlock ("",[],[]) "map :: (a -> b) -> [a] -> [b]\nmap _ [] = []\nmap f (x:xs) = f x : map f xs" ,Para [Str "This",Space,Str "is",Space,Str "another",Space,Str "code",Space,Str "block:"] -,Para [Code ("",[],[]) "f x = x + x.",LineBreak,Code ("",[],[]) "The @...@ code block ",Emph [Code ("",[],[]) "interprets markup normally"],Code ("",[],[]) ".",Code ("",["haskell"],[]) "Module.Foo",Code ("",[],[]) "",LineBreak,Code ("",[],[]) "\"Hello World\""] +,Para [Code ("",[],[]) "f x = x + x.",LineBreak,Code ("",[],[]) "The @...@ code block ",Emph [Code ("",[],[]) "interprets markup normally"],Code ("",[],[]) ".",Code ("",["haskell","module"],[]) "Module.Foo",Code ("",[],[]) "",LineBreak,Code ("",[],[]) "\"Hello World\""] ,Para [Str "Haddock",Space,Str "supports",Space,Str "REPL",Space,Str "examples:"] -,Para [Code ("",["haskell","expr"],[]) "fib 10",LineBreak,Code ("",["result"],[]) "55"] -,Para [Code ("",["haskell","expr"],[]) "putStrLn \"foo\\nbar\"",LineBreak,Code ("",["result"],[]) "foo",LineBreak,Code ("",["result"],[]) "bar"] +,Para [Code ("",["prompt"],[]) ">>>",Space,Code ("",["haskell","expr"],[]) "fib 10",LineBreak,Code ("",["result"],[]) "55"] +,Para [Code ("",["prompt"],[]) ">>>",Space,Code ("",["haskell","expr"],[]) "putStrLn \"foo\\nbar\"",LineBreak,Code ("",["result"],[]) "foo",LineBreak,Code ("",["result"],[]) "bar"] ,Para [Str "That",Space,Str "was",Space,Emph [Str "really",Space,Str "cool"],Str "!",Space,Str "I",Space,Str "had",Space,Str "no",Space,Str "idea",Space,Code ("",[],[]) "fib 10 = 55",Str "."] -,Para [Str "This",Space,Str "module",Space,Str "defines",Space,Str "the",Space,Str "type",Space,Code ("",[],[]) "T",Str ".",Space,Str "The",Space,Str "identifier",Space,Code ("",[],[]) "M.T",Space,Str "is",Space,Str "not",Space,Str "in",Space,Str "scope",Space,Str "I",Space,Str "don't",Space,Str "have",Space,Str "to",Space,Str "escape",Space,Str "my",Space,Str "apostrophes;",Space,Str "great,",Space,Str "isn't",Space,Str "it?",Space,Str "This",Space,Str "is",Space,Str "a",Space,Str "reference",Space,Str "to",Space,Str "the",Space,Code ("",[],[]) "Foo",Space,Str "module."] +,Para [Str "This",Space,Str "module",Space,Str "defines",Space,Str "the",Space,Str "type",Space,Code ("",["haskell","identifier"],[]) "T",Str ".",Space,Str "The",Space,Str "identifier",Space,Code ("",["haskell","identifier"],[]) "M.T",Space,Str "is",Space,Str "not",Space,Str "in",Space,Str "scope",Space,Str "I",Space,Str "don't",Space,Str "have",Space,Str "to",Space,Str "escape",Space,Str "my",Space,Str "apostrophes;",Space,Str "great,",Space,Str "isn't",Space,Str "it?",Space,Str "This",Space,Str "is",Space,Str "a",Space,Str "reference",Space,Str "to",Space,Str "the",Space,Code ("",["haskell","module"],[]) "Foo",Space,Str "module."] ,Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "bulleted",Space,Str "list:"] ,BulletList [[Para [Str "first",Space,Str "item"]] @@ -23,9 +23,9 @@ Pandoc (Meta {unMeta = fromList []}) ,Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "definition",Space,Str "list:"] ,DefinitionList [([Code ("",[],[]) "foo"], - [[Plain [Str "The",Space,Str "description",Space,Str "of",Space,Code ("",[],[]) "foo",Str "."]]]) + [[Para [Str "The",Space,Str "description",Space,Str "of",Space,Code ("",[],[]) "foo",Str "."]]]) ,([Code ("",[],[]) "bar"], - [[Plain [Str "The",Space,Str "description",Space,Str "of",Space,Code ("",[],[]) "bar",Str "."]]])] + [[Para [Str "The",Space,Str "description",Space,Str "of",Space,Code ("",[],[]) "bar",Str "."]]])] ,Para [Str "Here",Space,Str "is",Space,Str "a",Space,Str "link:",Space,Link [Str "http://haskell.org"] ("http://haskell.org","http://haskell.org")] ,Para [Link [Str "Haskell"] ("http://haskell.org","http://haskell.org"),Space,Str "is",Space,Str "a",Space,Str "fun",Space,Str "language!"] ,Para [Link [Str "Click",Space,Str "Here!"] ("http://example.com","http://example.com")]] -- cgit v1.2.3 From c4182b39ca009f02fc4e0768056d37d64b93df7c Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 18 Jun 2014 18:08:41 -0700 Subject: Small fix to haddock "tables". --- src/Text/Pandoc/Writers/Haddock.hs | 4 ++-- tests/tables.haddock | 13 +++++++++---- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/src/Text/Pandoc/Writers/Haddock.hs b/src/Text/Pandoc/Writers/Haddock.hs index 36f57c2b7..1939d3a6d 100644 --- a/src/Text/Pandoc/Writers/Haddock.hs +++ b/src/Text/Pandoc/Writers/Haddock.hs @@ -129,7 +129,7 @@ blockToHaddock opts (BlockQuote blocks) = -- Haddock doesn't have tables. Use haddock tables in code. blockToHaddock opts (Table caption aligns widths headers rows) = do caption' <- inlineListToHaddock opts caption - let caption'' = if null caption || not (isEnabled Ext_table_captions opts) + let caption'' = if null caption then empty else blankline <> caption' <> blankline rawHeaders <- mapM (blockListToHaddock opts) headers @@ -148,7 +148,7 @@ blockToHaddock opts (Table caption aligns widths headers rows) = do | otherwise -> fmap (id,) $ gridTable opts (all null headers) aligns widths rawHeaders rawRows - return $ prefixed "> " $ nst $ tbl $$ blankline $$ caption'' $$ blankline + return $ (prefixed "> " $ nst $ tbl $$ blankline $$ caption'') $$ blankline blockToHaddock opts (BulletList items) = do contents <- mapM (bulletListItemToHaddock opts) items return $ cat contents <> blankline diff --git a/tests/tables.haddock b/tests/tables.haddock index e054dc072..413ec97ad 100644 --- a/tests/tables.haddock +++ b/tests/tables.haddock @@ -6,6 +6,8 @@ Simple table with caption: > 123 123 123 123 > 1 1 1 1 > +> Demonstration of simple table syntax. + Simple table without caption: > Right Left Center Default @@ -13,7 +15,7 @@ Simple table without caption: > 12 12 12 12 > 123 123 123 123 > 1 1 1 1 -> + Simple table indented two spaces: > Right Left Center Default @@ -22,6 +24,8 @@ Simple table indented two spaces: > 123 123 123 123 > 1 1 1 1 > +> Demonstration of simple table syntax. + Multiline table with caption: > -------------------------------------------------------------- @@ -36,6 +40,8 @@ Multiline table with caption: > rows. > -------------------------------------------------------------- > +> Here\'s the caption. It may span multiple lines. + Multiline table without caption: > -------------------------------------------------------------- @@ -49,7 +55,7 @@ Multiline table without caption: > the blank line between > rows. > -------------------------------------------------------------- -> + Table without column headers: > ----- ----- ----- ----- @@ -57,7 +63,7 @@ Table without column headers: > 123 123 123 123 > 1 1 1 1 > ----- ----- ----- ----- -> + Multiline table without column headers: > ----------- ---------- ------------ -------------------------- @@ -68,5 +74,4 @@ Multiline table without column headers: > the blank line between > rows. > ----------- ---------- ------------ -------------------------- -> -- cgit v1.2.3 From de7b3a3d08264d6aa755436583d53f9a61252fa2 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 18 Jun 2014 18:11:01 -0700 Subject: Haddock writer: Only use Decimal list style. --- src/Text/Pandoc/Writers/Haddock.hs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Text/Pandoc/Writers/Haddock.hs b/src/Text/Pandoc/Writers/Haddock.hs index 1939d3a6d..59d979ea8 100644 --- a/src/Text/Pandoc/Writers/Haddock.hs +++ b/src/Text/Pandoc/Writers/Haddock.hs @@ -152,8 +152,8 @@ blockToHaddock opts (Table caption aligns widths headers rows) = do blockToHaddock opts (BulletList items) = do contents <- mapM (bulletListItemToHaddock opts) items return $ cat contents <> blankline -blockToHaddock opts (OrderedList (start,sty,delim) items) = do - let attribs = (start, sty, delim) +blockToHaddock opts (OrderedList (start,_,delim) items) = do + let attribs = (start, Decimal, delim) let markers = orderedListMarkers attribs let markers' = map (\m -> if length m < 3 then m ++ replicate (3 - length m) ' ' -- cgit v1.2.3 From 95b6ffcef6428318dd9ca25be6ce6e113ef3c499 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 18 Jun 2014 18:11:08 -0700 Subject: Updated haddock writer tests. --- tests/writer.haddock | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/writer.haddock b/tests/writer.haddock index 7b0811220..129242153 100644 --- a/tests/writer.haddock +++ b/tests/writer.haddock @@ -240,17 +240,17 @@ Same thing but with paragraphs: with a continuation - iv. sublist with roman numerals, starting with 4 - v. more items - (A) a subsublist - (B) a subsublist + 4. sublist with roman numerals, starting with 4 + 5. more items + (1) a subsublist + (2) a subsublist Nesting: -A. Upper Alpha - I. Upper Roman. +1. Upper Alpha + 1. Upper Roman. (6) Decimal start with 6 - c) Lower alpha with paren + 3) Lower alpha with paren Autonumbering: -- cgit v1.2.3 From 00281559bf9c955ece6b18d48ef487fdc5f4406e Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 19 Jun 2014 00:28:23 -0700 Subject: Haddock writer: Use _____ for hrule. Avoids interpretation as list. --- src/Text/Pandoc/Writers/Haddock.hs | 4 ++-- tests/writer.haddock | 26 +++++++++++++------------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/src/Text/Pandoc/Writers/Haddock.hs b/src/Text/Pandoc/Writers/Haddock.hs index 59d979ea8..1c82839d0 100644 --- a/src/Text/Pandoc/Writers/Haddock.hs +++ b/src/Text/Pandoc/Writers/Haddock.hs @@ -112,8 +112,8 @@ blockToHaddock _ (RawBlock f str) | f == "haddock" = do return $ text str <> text "\n" | otherwise = return empty -blockToHaddock _ HorizontalRule = - return $ blankline <> text "--------------" <> blankline +blockToHaddock opts HorizontalRule = + return $ blankline <> text (replicate (writerColumns opts) '_') <> blankline blockToHaddock opts (Header level (ident,_,_) inlines) = do contents <- inlineListToHaddock opts inlines let attr' = if null ident diff --git a/tests/writer.haddock b/tests/writer.haddock index 129242153..0772331e3 100644 --- a/tests/writer.haddock +++ b/tests/writer.haddock @@ -1,7 +1,7 @@ This is a set of tests for pandoc. Most of them are adapted from John Gruber’s markdown test suite. --------------- +______________________________________________________________________________ = Headers #headers# @@ -34,7 +34,7 @@ with no blank line with no blank line --------------- +______________________________________________________________________________ = Paragraphs #paragraphs# @@ -50,7 +50,7 @@ Here’s one with a bullet. * criminey. There should be a hard line break here. --------------- +______________________________________________________________________________ = Block Quotes #block-quotes# @@ -80,7 +80,7 @@ This should not be a block quote: 2 > 1. And a following paragraph. --------------- +______________________________________________________________________________ = Code Blocks #code-blocks# @@ -101,7 +101,7 @@ And: > > These should not be escaped: \$ \\ \> \[ \{ --------------- +______________________________________________________________________________ = Lists #lists# @@ -264,7 +264,7 @@ M.A. 2007 B. Williams --------------- +______________________________________________________________________________ = Definition Lists #definition-lists# @@ -397,7 +397,7 @@ Code: Hr’s: --------------- +______________________________________________________________________________ = Inline Markup #inline-markup# @@ -427,7 +427,7 @@ Subscripts: H2O, H23O, Hmany of themO. These should not be superscripts or subscripts, because of the unescaped spaces: a^b c^d, a~b c~d. --------------- +______________________________________________________________________________ = Smart quotes, ellipses, dashes #smart-quotes-ellipses-dashes# @@ -449,7 +449,7 @@ Dashes between numbers: 5–7, 255–66, 1987–1999. Ellipses…and…and…. --------------- +______________________________________________________________________________ = LaTeX #latex# @@ -474,7 +474,7 @@ These shouldn’t be math: Here’s a LaTeX table: --------------- +______________________________________________________________________________ = Special Characters #special-characters# @@ -529,7 +529,7 @@ Plus: + Minus: - --------------- +______________________________________________________________________________ = Links #links# @@ -610,7 +610,7 @@ Auto-links should not occur here: @\@ > or here: --------------- +______________________________________________________________________________ = Images #images# @@ -621,7 +621,7 @@ From “Voyage dans la Lune” by Georges Melies (1902): Here is a movie <> icon. --------------- +______________________________________________________________________________ = Footnotes #footnotes# -- cgit v1.2.3 From ceb742b1246f975437d7a0083139d248c94036b5 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Thu, 19 Jun 2014 12:05:16 -0400 Subject: Add ReaderOptions to the docx tests This will allow for testing different media embedding (in addition to any other applicable options.) --- tests/Tests/Readers/Docx.hs | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/tests/Tests/Readers/Docx.hs b/tests/Tests/Readers/Docx.hs index 0a963ddc6..273f03f4d 100644 --- a/tests/Tests/Readers/Docx.hs +++ b/tests/Tests/Readers/Docx.hs @@ -8,20 +8,23 @@ import Test.Framework import qualified Data.ByteString.Lazy as B import Text.Pandoc.Readers.Docx -compareOutput :: FilePath -> FilePath -> IO (Pandoc, Pandoc) -compareOutput docxFile nativeFile = do +compareOutput :: ReaderOptions -> FilePath -> FilePath -> IO (Pandoc, Pandoc) +compareOutput opts docxFile nativeFile = do df <- B.readFile docxFile nf <- Prelude.readFile nativeFile - return $ (readDocx def df, readNative nf) + return $ (readDocx opts df, readNative nf) -testCompare' :: String -> FilePath -> FilePath -> IO Test -testCompare' name docxFile nativeFile = do - (dp, np) <- compareOutput docxFile nativeFile +testCompareWithOptsIO :: ReaderOptions -> String -> FilePath -> FilePath -> IO Test +testCompareWithOptsIO opts name docxFile nativeFile = do + (dp, np) <- compareOutput opts docxFile nativeFile return $ test id name (dp, np) +testCompareWithOpts :: ReaderOptions -> String -> FilePath -> FilePath -> Test +testCompareWithOpts opts name docxFile nativeFile = + buildTest $ testCompareWithOptsIO opts name docxFile nativeFile + testCompare :: String -> FilePath -> FilePath -> Test -testCompare name docxFile nativeFile = - buildTest $ testCompare' name docxFile nativeFile +testCompare = testCompareWithOpts def tests :: [Test] -- cgit v1.2.3 From 84391a887dc3fae00bc20419ad163ef4af445f0e Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 19 Jun 2014 11:51:35 -0700 Subject: Require highlighting-kate >= 0.5.8.3. This change to highlighting-kate means that PHP fragments no longer need to start with `= 0.13.1 && < 0.14, base64-bytestring >= 0.1 && < 1.1, zlib >= 0.5 && < 0.6, - highlighting-kate >= 0.5.8.2 && < 0.6, + highlighting-kate >= 0.5.8.3 && < 0.6, data-default >= 0.4 && < 0.6, temporary >= 1.1 && < 1.3, blaze-html >= 0.5 && < 0.8, @@ -352,7 +352,7 @@ Executable pandoc text >= 0.11 && < 1.2, bytestring >= 0.9 && < 0.11, extensible-exceptions >= 0.1 && < 0.2, - highlighting-kate >= 0.5.8.2 && < 0.6, + highlighting-kate >= 0.5.8.3 && < 0.6, aeson >= 0.7.0.5 && < 0.8, yaml >= 0.8.8.2 && < 0.9, containers >= 0.1 && < 0.6, @@ -395,7 +395,7 @@ Test-Suite test-pandoc directory >= 1 && < 1.3, filepath >= 1.1 && < 1.4, process >= 1 && < 1.3, - highlighting-kate >= 0.5.8.2 && < 0.6, + highlighting-kate >= 0.5.8.3 && < 0.6, Diff >= 0.2 && < 0.4, test-framework >= 0.3 && < 0.9, test-framework-hunit >= 0.2 && < 0.4, -- cgit v1.2.3 From 5cb53a48d541b97b5f60968715a5969133196d70 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 19 Jun 2014 14:30:03 -0700 Subject: ImageSize: ignore unknown exif header tag rather than crashing. Some images seem to have tag type of 256, which was causing a runtime error. --- src/Text/Pandoc/ImageSize.hs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Text/Pandoc/ImageSize.hs b/src/Text/Pandoc/ImageSize.hs index a6d076fa9..9e6b457c0 100644 --- a/src/Text/Pandoc/ImageSize.hs +++ b/src/Text/Pandoc/ImageSize.hs @@ -217,7 +217,7 @@ exifHeader hdr = do numentries <- getWord16 let ifdEntry = do tag <- getWord16 >>= \t -> - maybe (fail $ "Unknown tag type " ++ show t) return + maybe (return UnknownTagType) return (M.lookup t tagTypeTable) dataFormat <- getWord16 numComponents <- getWord32 @@ -337,6 +337,7 @@ data TagType = ImageDescription | SensingMethod | FileSource | SceneType + | UnknownTagType deriving (Show, Eq, Ord) tagTypeTable :: M.Map Word16 TagType -- cgit v1.2.3 From 86fc44d6b3f82a2b274d4b592d1dd6152bd1eaf5 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Thu, 19 Jun 2014 17:53:52 -0400 Subject: Add literal tabs to parser. --- src/Text/Pandoc/Readers/Docx/Parse.hs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index 22e9dd909..18200bcf9 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -466,7 +466,7 @@ data Run = Run RunStyle [RunElem] | Endnote String deriving Show -data RunElem = TextRun String | LnBrk +data RunElem = TextRun String | LnBrk | Tab deriving Show data RunStyle = RunStyle { isBold :: Bool @@ -545,6 +545,9 @@ elemToRunElem ns element | qName (elName element) == "br" && qURI (elName element) == (lookup "w" ns) = Just $ LnBrk + | qName (elName element) == "tab" && + qURI (elName element) == (lookup "w" ns) = + Just $ Tab | otherwise = Nothing -- cgit v1.2.3 From 0e7d2dbd4304902cb6c6d4e9618592b5148dc598 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Thu, 19 Jun 2014 17:55:02 -0400 Subject: Have Docx reader properly interpret tabs. --- src/Text/Pandoc/Readers/Docx.hs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 4035cde99..c43879ed9 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -134,10 +134,12 @@ codeDivs = ["SourceCode"] runElemToInlines :: RunElem -> [Inline] runElemToInlines (TextRun s) = strToInlines s runElemToInlines (LnBrk) = [LineBreak] +runElemToInlines (Tab) = [Space] runElemToString :: RunElem -> String runElemToString (TextRun s) = s runElemToString (LnBrk) = ['\n'] +runElemToString (Tab) = ['\t'] runElemsToString :: [RunElem] -> String runElemsToString = concatMap runElemToString -- cgit v1.2.3 From a934db9a320ec76e15e62954b75d0e8d2d972244 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Thu, 19 Jun 2014 19:28:55 -0400 Subject: Introduce blockNormalize This will help take care of spaces introduced at the beginning of strings. --- src/Text/Pandoc/Readers/Docx.hs | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index c43879ed9..9c1d0c5e6 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -150,6 +150,19 @@ strNormalize (Str "" : ils) = strNormalize ils strNormalize ((Str s) : (Str s') : l) = strNormalize ((Str (s++s')) : l) strNormalize (il:ils) = il : (strNormalize ils) +blockNormalize :: Block -> Block +blockNormalize (Plain (Space : ils)) = blockNormalize (Plain ils) +blockNormalize (Plain ils) = Plain $ strNormalize ils +blockNormalize (Para (Space : ils)) = blockNormalize (Para ils) +blockNormalize (Para ils) = Para $ strNormalize ils +blockNormalize (Header n attr (Space : ils)) = + blockNormalize $ Header n attr ils +blockNormalize (Table (Space : ils) align width hdr cells) = + blockNormalize $ Table ils align width hdr cells +blockNormalize (Table ils align width hdr cells) = + Table (strNormalize ils) align width hdr cells +blockNormalize blk = blk + runToInlines :: ReaderOptions -> Docx -> Run -> [Inline] runToInlines _ _ (Run rs runElems) | isJust (rStyle rs) && (fromJust (rStyle rs)) `elem` codeSpans = @@ -296,7 +309,7 @@ makeImagesSelfContained _ inline = inline bodyToBlocks :: ReaderOptions -> Docx -> Body -> [Block] bodyToBlocks opts docx (Body bps) = bottomUp removeEmptyPars $ - bottomUp strNormalize $ + bottomUp blockNormalize $ bottomUp spanRemove $ bottomUp divRemove $ map (makeHeaderAnchors) $ -- cgit v1.2.3 From d19996d7438fbd2ce56cf3ce46b99cd71437cacb Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Thu, 19 Jun 2014 19:29:59 -0400 Subject: Fix notes test. This previously allowed spaces at the beginning of a paragraph. --- tests/docx.notes.native | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/docx.notes.native b/tests/docx.notes.native index 1e9b6bba4..5a94b1999 100644 --- a/tests/docx.notes.native +++ b/tests/docx.notes.native @@ -1,2 +1,2 @@ [Header 2 ("",[],[]) [Str "A",Space,Str "footnote"] -,Para [Str "Test",Space,Str "footnote.",Note [Para [Space,Str "My",Space,Str "note."]],Space,Str "Test",Space,Str "endnote.",Note [Para [Space,Str "This",Space,Str "is",Space,Str "an",Space,Str "endnote",Space,Str "at",Space,Str "the",Space,Str "end",Space,Str "of",Space,Str "the",Space,Str "document."]]]] +,Para [Str "Test",Space,Str "footnote.",Note [Para [Str "My",Space,Str "note."]],Space,Str "Test",Space,Str "endnote.",Note [Para [Str "This",Space,Str "is",Space,Str "an",Space,Str "endnote",Space,Str "at",Space,Str "the",Space,Str "end",Space,Str "of",Space,Str "the",Space,Str "document."]]]] -- cgit v1.2.3 From da0d1d27ac98ca28e66bc2df3de2bce738068fb8 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Thu, 19 Jun 2014 19:33:22 -0400 Subject: Add tabs tests. --- tests/Tests/Readers/Docx.hs | 7 ++++++- tests/docx.tabs.docx | Bin 0 -> 12919 bytes tests/docx.tabs.native | 2 ++ 3 files changed, 8 insertions(+), 1 deletion(-) create mode 100644 tests/docx.tabs.docx create mode 100644 tests/docx.tabs.native diff --git a/tests/Tests/Readers/Docx.hs b/tests/Tests/Readers/Docx.hs index 273f03f4d..3a13641a9 100644 --- a/tests/Tests/Readers/Docx.hs +++ b/tests/Tests/Readers/Docx.hs @@ -44,7 +44,12 @@ tests = [ testGroup "inlines" , testCompare "handling unicode input" "docx.unicode.docx" - "docx.unicode.native"] + "docx.unicode.native" + , testCompare + "literal tabs" + "docx.tabs.docx" + "docx.tabs.native" + ] , testGroup "blocks" [ testCompare "headers" diff --git a/tests/docx.tabs.docx b/tests/docx.tabs.docx new file mode 100644 index 000000000..6ff5f4bb1 Binary files /dev/null and b/tests/docx.tabs.docx differ diff --git a/tests/docx.tabs.native b/tests/docx.tabs.native new file mode 100644 index 000000000..05461f20b --- /dev/null +++ b/tests/docx.tabs.native @@ -0,0 +1,2 @@ +[Para [Str "Some",Space,Str "text",Space,Str "separated",Space,Str "by",Space,Str "a",Space,Str "tab."] +,Para [Str "Tab-indented",Space,Str "text."]] -- cgit v1.2.3 From 3c059dbe600608f4166b02c63d7153ace3156665 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 19 Jun 2014 23:24:28 -0700 Subject: HTML reader: Allow space between `` and ``. Test case: ```
X Y
1 2
``` --- src/Text/Pandoc/Readers/HTML.hs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index d27afc543..204239923 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -262,6 +262,7 @@ pTable = try $ do pCol :: TagParser Double pCol = try $ do TagOpen _ attribs <- pSatisfy (~== TagOpen "col" []) + skipMany pBlank optional $ pSatisfy (~== TagClose "col") skipMany pBlank return $ case lookup "width" attribs of -- cgit v1.2.3 From 557b302731411057cf12e62c87d98752f713d5d0 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 19 Jun 2014 23:31:17 -0700 Subject: Docx writer: Use Compact style for empty table cells. Otherwise we get overly tall lines when there are empty table cells and the other cells are compact. Closes #1353. --- src/Text/Pandoc/Writers/Docx.hs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 4e64a79df..31e64f14e 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -571,10 +571,12 @@ blockToOpenXML opts (Table caption aligns widths headers rows) = do [ mknode "w:tcBorders" [] $ mknode "w:bottom" [("w:val","single")] () , mknode "w:vAlign" [("w:val","bottom")] () ] + let emptyCell = [mknode "w:p" [] [mknode "w:pPr" [] $ + [mknode "w:pStyle" [("w:val","Compact")] ()]]] let mkcell border contents = mknode "w:tc" [] $ [ borderProps | border ] ++ if null contents - then [mknode "w:p" [] ()] + then emptyCell else contents let mkrow border cells = mknode "w:tr" [] $ map (mkcell border) cells let textwidth = 7920 -- 5.5 in in twips, 1/20 pt -- cgit v1.2.3 From 7fd48b30e0a6e1e3c02a2b66c76118d10c02636f Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Fri, 20 Jun 2014 09:30:30 -0400 Subject: Docx reader: Fix hdr handling in block norm `blockNormalize` previously forgot to account for the case in which a Header's inlines did not start with a space. --- src/Text/Pandoc/Readers/Docx.hs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 9c1d0c5e6..84d50a396 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -157,6 +157,8 @@ blockNormalize (Para (Space : ils)) = blockNormalize (Para ils) blockNormalize (Para ils) = Para $ strNormalize ils blockNormalize (Header n attr (Space : ils)) = blockNormalize $ Header n attr ils +blockNormalize (Header n attr ils) = + Header n attr $ strNormalize ils blockNormalize (Table (Space : ils) align width hdr cells) = blockNormalize $ Table ils align width hdr cells blockNormalize (Table ils align width hdr cells) = -- cgit v1.2.3 From 3da515bdb005cf16589b88d80aa4a8a71760e366 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Fri, 20 Jun 2014 10:12:28 -0400 Subject: Docx reader: simplify blockNormalize Use a function `stripSpaces`, instead of recursion. Makes it a bit easier to read and mantain, and simplify normalizing DefinitionList, which was left out the first time. --- src/Text/Pandoc/Readers/Docx.hs | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 84d50a396..08afc94e6 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -150,19 +150,17 @@ strNormalize (Str "" : ils) = strNormalize ils strNormalize ((Str s) : (Str s') : l) = strNormalize ((Str (s++s')) : l) strNormalize (il:ils) = il : (strNormalize ils) +stripSpaces :: [Inline] -> [Inline] +stripSpaces ils = + reverse $ dropWhile (Space ==) $ reverse $ dropWhile (Space ==) ils + blockNormalize :: Block -> Block -blockNormalize (Plain (Space : ils)) = blockNormalize (Plain ils) -blockNormalize (Plain ils) = Plain $ strNormalize ils -blockNormalize (Para (Space : ils)) = blockNormalize (Para ils) -blockNormalize (Para ils) = Para $ strNormalize ils -blockNormalize (Header n attr (Space : ils)) = - blockNormalize $ Header n attr ils +blockNormalize (Plain ils) = Plain $ strNormalize $ stripSpaces ils +blockNormalize (Para ils) = Para $ strNormalize $ stripSpaces ils blockNormalize (Header n attr ils) = - Header n attr $ strNormalize ils -blockNormalize (Table (Space : ils) align width hdr cells) = - blockNormalize $ Table ils align width hdr cells + Header n attr $ strNormalize $ stripSpaces ils blockNormalize (Table ils align width hdr cells) = - Table (strNormalize ils) align width hdr cells + Table (strNormalize $ stripSpaces ils) align width hdr cells blockNormalize blk = blk runToInlines :: ReaderOptions -> Docx -> Run -> [Inline] -- cgit v1.2.3 From 03af19a7e12ff3a7f0a396ebed73c6c17f12ad07 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Fri, 20 Jun 2014 10:16:32 -0400 Subject: Docx Reader: Normalize DefinitionLists Previously DefinitionList had been left out of `blockNormalize`. Now it is included. --- src/Text/Pandoc/Readers/Docx.hs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 08afc94e6..e62cf6f0e 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -161,6 +161,8 @@ blockNormalize (Header n attr ils) = Header n attr $ strNormalize $ stripSpaces ils blockNormalize (Table ils align width hdr cells) = Table (strNormalize $ stripSpaces ils) align width hdr cells +blockNormalize (DefinitionList pairs) = + DefinitionList $ map (\(ils, blklsts) -> (strNormalize (stripSpaces ils), blklsts)) pairs blockNormalize blk = blk runToInlines :: ReaderOptions -> Docx -> Run -> [Inline] -- cgit v1.2.3 From 2aa5f58c5b82dd5750e1bf5f30e1936d132104ac Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Fri, 20 Jun 2014 10:27:18 -0400 Subject: Docx reader: Add a comment explaining strNormalize `normalize` from Text.Pandoc.Shared is more general. In tests, though, it more than doubles the run time. `strNormalize` does less, but it does what we need. This comment is added for future maintainability. --- src/Text/Pandoc/Readers/Docx.hs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index e62cf6f0e..8a8bc46a6 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -144,6 +144,10 @@ runElemToString (Tab) = ['\t'] runElemsToString :: [RunElem] -> String runElemsToString = concatMap runElemToString +--- We use this instead of the more general +--- Text.Pandoc.Shared.normalize for reasons of efficiency. For +--- whatever reason, `normalize` makes a run take almost twice as +--- long. (It does more, but this does what we need) strNormalize :: [Inline] -> [Inline] strNormalize [] = [] strNormalize (Str "" : ils) = strNormalize ils -- cgit v1.2.3 From f6ae644831aa40f7199ecfc9a1631880639b6312 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Fri, 20 Jun 2014 12:31:36 -0400 Subject: Make strNormalize go bottomUp. This was how it used to be before it was folded into blockNormalize. --- src/Text/Pandoc/Readers/Docx.hs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 8a8bc46a6..479a88161 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -159,14 +159,14 @@ stripSpaces ils = reverse $ dropWhile (Space ==) $ reverse $ dropWhile (Space ==) ils blockNormalize :: Block -> Block -blockNormalize (Plain ils) = Plain $ strNormalize $ stripSpaces ils -blockNormalize (Para ils) = Para $ strNormalize $ stripSpaces ils +blockNormalize (Plain ils) = Plain $ bottomUp strNormalize $ stripSpaces ils +blockNormalize (Para ils) = Para $ bottomUp strNormalize $ stripSpaces ils blockNormalize (Header n attr ils) = - Header n attr $ strNormalize $ stripSpaces ils + Header n attr $ bottomUp strNormalize $ stripSpaces ils blockNormalize (Table ils align width hdr cells) = - Table (strNormalize $ stripSpaces ils) align width hdr cells + Table (bottomUp strNormalize $ stripSpaces ils) align width hdr cells blockNormalize (DefinitionList pairs) = - DefinitionList $ map (\(ils, blklsts) -> (strNormalize (stripSpaces ils), blklsts)) pairs + DefinitionList $ map (\(ils, blklsts) -> (bottomUp strNormalize (stripSpaces ils), blklsts)) pairs blockNormalize blk = blk runToInlines :: ReaderOptions -> Docx -> Run -> [Inline] -- cgit v1.2.3 From 12efffa85a257dbe81137f97334b2c6a7e072777 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 20 Jun 2014 10:24:30 -0700 Subject: LaTeX writer: Fixed strikeout + highlighted code. Closes #1294. Previously strikeout highlighted code caused an error. --- src/Text/Pandoc/Writers/LaTeX.hs | 11 ++++++++++- tests/Tests/Writers/LaTeX.hs | 12 +++++++++++- 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index c221b318e..ed735242f 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -678,7 +678,9 @@ inlineToLaTeX (Emph lst) = inlineToLaTeX (Strong lst) = inlineListToLaTeX lst >>= return . inCmd "textbf" inlineToLaTeX (Strikeout lst) = do - contents <- inlineListToLaTeX lst + -- we need to protect VERB in an mbox or we get an error + -- see #1294 + contents <- inlineListToLaTeX $ protectCode lst modify $ \s -> s{ stStrikeout = True } return $ inCmd "sout" contents inlineToLaTeX (Superscript lst) = @@ -784,6 +786,13 @@ inlineToLaTeX (Note contents) = do -- note: a \n before } needed when note ends with a Verbatim environment else "\\footnote" <> braces noteContents +protectCode :: [Inline] -> [Inline] +protectCode [] = [] +protectCode (x@(Code ("",[],[]) _) : xs) = x : protectCode xs +protectCode (x@(Code _ _) : xs) = ltx "\\mbox{" : x : ltx "}" : xs + where ltx = RawInline (Format "latex") +protectCode (x : xs) = x : protectCode xs + citationsToNatbib :: [Citation] -> State WriterState Doc citationsToNatbib (one:[]) = citeCommand c p s k diff --git a/tests/Tests/Writers/LaTeX.hs b/tests/Tests/Writers/LaTeX.hs index 8a9519e2e..6db6542a0 100644 --- a/tests/Tests/Writers/LaTeX.hs +++ b/tests/Tests/Writers/LaTeX.hs @@ -8,7 +8,7 @@ import Tests.Helpers import Tests.Arbitrary() latex :: (ToString a, ToPandoc a) => a -> String -latex = writeLaTeX def . toPandoc +latex = writeLaTeX def{ writerHighlight = True } . toPandoc latexListing :: (ToString a, ToPandoc a) => a -> String latexListing = writeLaTeX def{ writerListings = True } . toPandoc @@ -54,4 +54,14 @@ tests = [ testGroup "code blocks" (text "Header 1" <> note (plain $ text "note")) =?> "\\section*{Header 1\\footnote{note}}\\label{foo}\n\\addcontentsline{toc}{section}{Header 1}\n" ] + , testGroup "inline code" + [ "struck out and highlighted" =: + strikeout (codeWith ("",["haskell"],[]) "foo" <> space + <> str "bar") =?> + "\\sout{\\mbox{\\VERB|\\NormalTok{foo}|} bar}" + , "struck out and not highlighted" =: + strikeout (code "foo" <> space + <> str "bar") =?> + "\\sout{\\texttt{foo} bar}" + ] ] -- cgit v1.2.3 From cab4b829b3683cec1def11d7189b5a850f23b016 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 20 Jun 2014 10:39:24 -0700 Subject: Support --trace in HTML reader. --- src/Text/Pandoc/Readers/HTML.hs | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 204239923..7bbad4257 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -50,6 +50,8 @@ import Data.Char ( isDigit ) import Control.Monad ( liftM, guard, when, mzero ) import Control.Applicative ( (<$>), (<$), (<*) ) import Data.Monoid +import Text.Printf (printf) +import Debug.Trace (trace) isSpace :: Char -> Bool isSpace ' ' = True @@ -92,7 +94,10 @@ pHead = pInTags "head" $ pTitle <|> pMetaTag <|> (mempty <$ pAnyTag) return mempty block :: TagParser Blocks -block = choice +block = do + tr <- getOption readerTrace + pos <- getPosition + res <- choice [ pPara , pHeader , pBlockQuote @@ -106,6 +111,10 @@ block = choice , pDiv , pRawHtmlBlock ] + when tr $ trace (printf "line %d: %s" (sourceLine pos) + (take 60 $ show $ B.toList res)) (return ()) + return res + pList :: TagParser Blocks pList = pBulletList <|> pOrderedList <|> pDefinitionList -- cgit v1.2.3 From b3b40546cb5ad00ee6fadcd83bcc38854fb137ae Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 20 Jun 2014 10:47:29 -0700 Subject: HTML reader: Fix performance issue with malformed HTML tables. We let a `` tag close an open `` or ``. Closes #1167. --- src/Text/Pandoc/Readers/HTML.hs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 7bbad4257..552e8a251 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -471,6 +471,8 @@ pCloses tagtype = try $ do (TagClose "ul") | tagtype == "li" -> return () (TagClose "ol") | tagtype == "li" -> return () (TagClose "dl") | tagtype == "li" -> return () + (TagClose "table") | tagtype == "td" -> return () + (TagClose "table") | tagtype == "tr" -> return () _ -> mzero pTagText :: TagParser Inlines -- cgit v1.2.3 From 2eadc7805392c165f7286bd9a337b310b41c897d Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 20 Jun 2014 10:58:26 -0700 Subject: ImageSize: Use default instead of failing if image size not found in exif header. Closes #1358. --- src/Text/Pandoc/ImageSize.hs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/Text/Pandoc/ImageSize.hs b/src/Text/Pandoc/ImageSize.hs index 9e6b457c0..68b34dcf3 100644 --- a/src/Text/Pandoc/ImageSize.hs +++ b/src/Text/Pandoc/ImageSize.hs @@ -76,6 +76,9 @@ imageSize img = do Eps -> epsSize img Pdf -> Nothing -- TODO +defaultSize :: (Integer, Integer) +defaultSize = (72, 72) + sizeInPixels :: ImageSize -> (Integer, Integer) sizeInPixels s = (pxX s, pxY s) @@ -260,7 +263,9 @@ exifHeader hdr = do lookup ExifImageHeight allentries) of (Just (UnsignedLong w), Just (UnsignedLong h)) -> return (fromIntegral w, fromIntegral h) - _ -> fail "Could not determine image width, height" + _ -> return defaultSize + -- we return a default width and height when + -- the exif header doesn't contain these let resfactor = case lookup ResolutionUnit allentries of Just (UnsignedShort 1) -> (100 / 254) _ -> 1 -- cgit v1.2.3 From 56c410ef6a3b7c3dd4054b18ea667cb4ab5a5ee0 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 20 Jun 2014 11:10:35 -0700 Subject: Markdown reader: Prevent spurious line breaks after list items. When the `hard_line_breaks` option was specified, pandoc would produce a spurious line break after a tight list item. This patch solves the problem. Closes #1137. --- src/Text/Pandoc/Readers/Markdown.hs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index a6720beba..2c9064994 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1560,7 +1560,8 @@ endline = try $ do guardEnabled Ext_blank_before_header <|> notFollowedBy (char '#') -- atx header guardDisabled Ext_backtick_code_blocks <|> notFollowedBy (() <$ (lookAhead (char '`') >> codeBlockFenced)) - (guardEnabled Ext_hard_line_breaks >> return (return B.linebreak)) + (eof >> return mempty) + <|> (guardEnabled Ext_hard_line_breaks >> return (return B.linebreak)) <|> (guardEnabled Ext_ignore_line_breaks >> return mempty) <|> (return $ return B.space) -- cgit v1.2.3 From d81b4358ea8d0da615c01a468196fd0592934af6 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 20 Jun 2014 11:26:38 -0700 Subject: LaTeX writer: Correctly handle figures in notes. Notes can't contain figures in LaTeX, so we fake it to avoid an error. Closes #1053. --- src/Text/Pandoc/Writers/LaTeX.hs | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index ed735242f..f2f7438c4 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -311,12 +311,14 @@ blockToLaTeX (Plain lst) = inlineListToLaTeX $ dropWhile isLineBreakOrSpace lst -- title beginning with fig: indicates that the image is a figure blockToLaTeX (Para [Image txt (src,'f':'i':'g':':':tit)]) = do - capt <- if null txt - then return empty - else (\c -> "\\caption" <> braces c) `fmap` inlineListToLaTeX txt + inNote <- gets stInNote + capt <- inlineListToLaTeX txt img <- inlineToLaTeX (Image txt (src,tit)) - return $ "\\begin{figure}[htbp]" $$ "\\centering" $$ img $$ - capt $$ "\\end{figure}" + return $ if inNote + -- can't have figures in notes + then "\\begin{center}" $$ img $+$ capt $$ "\\end{center}" + else "\\begin{figure}[htbp]" $$ "\\centering" $$ img $$ + ("\\caption" <> braces capt) $$ "\\end{figure}" -- . . . indicates pause in beamer slides blockToLaTeX (Para [Str ".",Space,Str ".",Space,Str "."]) = do beamer <- writerBeamer `fmap` gets stOptions -- cgit v1.2.3 From 8f20ac3da3b0d1111f94161d5b3528dfa94d1069 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 20 Jun 2014 11:39:24 -0700 Subject: MediaWiki reader: Support --trace. --- src/Text/Pandoc/Readers/MediaWiki.hs | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs index e4fabc898..eb9dd5b8a 100644 --- a/src/Text/Pandoc/Readers/MediaWiki.hs +++ b/src/Text/Pandoc/Readers/MediaWiki.hs @@ -55,6 +55,8 @@ import qualified Data.Foldable as F import qualified Data.Map as M import Data.Char (isDigit, isSpace) import Data.Maybe (fromMaybe) +import Text.Printf (printf) +import Debug.Trace (trace) -- | Read mediawiki from an input string and return a Pandoc document. readMediaWiki :: ReaderOptions -- ^ Reader options @@ -187,7 +189,10 @@ parseMediaWiki = do -- block :: MWParser Blocks -block = mempty <$ skipMany1 blankline +block = do + tr <- getOption readerTrace + pos <- getPosition + res <- mempty <$ skipMany1 blankline <|> table <|> header <|> hrule @@ -199,6 +204,10 @@ block = mempty <$ skipMany1 blankline <|> blockTag <|> (B.rawBlock "mediawiki" <$> template) <|> para + when tr $ + trace (printf "line %d: %s" (sourceLine pos) + (take 60 $ show $ B.toList res)) (return ()) + return res para :: MWParser Blocks para = do -- cgit v1.2.3 From d397a66107a932e702d0f9cbba5df3ce09be25fd Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 20 Jun 2014 12:00:26 -0700 Subject: MediaWiki reader: Tightened up template parsing. The opening "{{" must be followed by an alphanumeric or ':'. This prevents the exponential slowdown in #1033. Closes #1033. --- src/Text/Pandoc/Readers/MediaWiki.hs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs index eb9dd5b8a..f1dcce8f7 100644 --- a/src/Text/Pandoc/Readers/MediaWiki.hs +++ b/src/Text/Pandoc/Readers/MediaWiki.hs @@ -317,6 +317,7 @@ template :: MWParser String template = try $ do string "{{" notFollowedBy (char '{') + lookAhead $ letter <|> digit <|> char ':' let chunk = template <|> variable <|> many1 (noneOf "{}") <|> count 1 anyChar contents <- manyTill chunk (try $ string "}}") return $ "{{" ++ concat contents ++ "}}" -- cgit v1.2.3 From 5d0103606f469a6336beb2f7a90f210dd29485bd Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 20 Jun 2014 15:24:24 -0700 Subject: Markdown reader: Support smallcaps through span. `foo` will be parsed as a `SmallCaps` inline, and will work in all output formats that support small caps. Closes #1360. --- README | 8 ++++++++ src/Text/Pandoc/Readers/Markdown.hs | 7 ++++++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/README b/README index 122db23ec..012b75e2a 100644 --- a/README +++ b/README @@ -2020,6 +2020,14 @@ Attributes can be attached to verbatim text, just as with `<$>`{.haskell} +### Small caps ### + +To write small caps, you can use an HTML span tag: + + Small caps + +This will work in all output formats that support small caps. + Math ---- diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 2c9064994..6c710c8ff 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1736,7 +1736,12 @@ spanHtml = try $ do let ident = fromMaybe "" $ lookup "id" attrs let classes = maybe [] words $ lookup "class" attrs let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"] - return $ B.spanWith (ident, classes, keyvals) <$> contents + case lookup "style" keyvals of + Just s | null ident && null classes && + map toLower (filter (`notElem` " \t;") s) == + "font-variant:small-caps" + -> return $ B.smallcaps <$> contents + _ -> return $ B.spanWith (ident, classes, keyvals) <$> contents divHtml :: MarkdownParser (F Blocks) divHtml = try $ do -- cgit v1.2.3 From a4508d7fcff0fc80af7b9d03177679860f4d00e6 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Fri, 20 Jun 2014 18:26:15 -0400 Subject: Docx reader tests: Introduce NoNormPandoc type. This is just a wrapper around Pandoc that doesn't normalize with `toString`. We want to make sure that our own normalization process works. If, in the future, we are able to hook into the builder's normalization, this will be removed. --- tests/Tests/Readers/Docx.hs | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/tests/Tests/Readers/Docx.hs b/tests/Tests/Readers/Docx.hs index 3a13641a9..e8fa33241 100644 --- a/tests/Tests/Readers/Docx.hs +++ b/tests/Tests/Readers/Docx.hs @@ -7,12 +7,37 @@ import Tests.Helpers import Test.Framework import qualified Data.ByteString.Lazy as B import Text.Pandoc.Readers.Docx +import Text.Pandoc.Writers.Native (writeNative) +import qualified Data.Map as M -compareOutput :: ReaderOptions -> FilePath -> FilePath -> IO (Pandoc, Pandoc) +-- We define a wrapper around pandoc that doesn't normalize in the +-- tests. Since we do our own normalization, we want to make sure +-- we're doing it right. + +data NoNormPandoc = NoNormPandoc {unNoNorm :: Pandoc} + deriving Show + +noNorm :: Pandoc -> NoNormPandoc +noNorm = NoNormPandoc + +instance ToString NoNormPandoc where + toString d = writeNative def{ writerStandalone = s } $ toPandoc d + where s = case d of + NoNormPandoc (Pandoc (Meta m) _) + | M.null m -> False + | otherwise -> True + +instance ToPandoc NoNormPandoc where + toPandoc = unNoNorm + +compareOutput :: ReaderOptions + -> FilePath + -> FilePath + -> IO (NoNormPandoc, NoNormPandoc) compareOutput opts docxFile nativeFile = do df <- B.readFile docxFile nf <- Prelude.readFile nativeFile - return $ (readDocx opts df, readNative nf) + return $ (noNorm (readDocx opts df), noNorm (readNative nf)) testCompareWithOptsIO :: ReaderOptions -> String -> FilePath -> FilePath -> IO Test testCompareWithOptsIO opts name docxFile nativeFile = do -- cgit v1.2.3 From f5c8b05a1a82639ac8746183674dcaf019c75050 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 20 Jun 2014 23:08:00 -0700 Subject: Filters: don't print misleading error message. Previously pandoc would say that a filter was not found, even in a case where the filter had a syntax error. --- pandoc.hs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/pandoc.hs b/pandoc.hs index 0a8070d7c..60ea3cb03 100644 --- a/pandoc.hs +++ b/pandoc.hs @@ -121,10 +121,7 @@ externalFilter f args' d = do ExitFailure _ -> err 83 $ "Error running filter " ++ f where filterException :: E.SomeException -> IO a filterException e = err 83 $ "Error running filter " ++ f ++ "\n" ++ - if ioeGetErrorType `fmap` E.fromException e == - Just ResourceVanished - then f ++ " not found in path" - else show e + show e -- | Data structure for command line options. data Opt = Opt -- cgit v1.2.3 From 08fe16e9720a3a191caf095d48e0a6c454039bf9 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 21 Jun 2014 11:26:43 -0700 Subject: Fixed compiler warnings. --- pandoc.hs | 2 -- 1 file changed, 2 deletions(-) diff --git a/pandoc.hs b/pandoc.hs index 60ea3cb03..10673132b 100644 --- a/pandoc.hs +++ b/pandoc.hs @@ -62,8 +62,6 @@ import qualified Data.ByteString.Lazy as B import qualified Data.ByteString as BS import Data.Aeson (eitherDecode', encode) import qualified Data.Map as M -import System.IO.Error(ioeGetErrorType) -import GHC.IO.Exception (IOErrorType(ResourceVanished)) import Data.Yaml (decode) import qualified Data.Yaml as Yaml import qualified Data.Text as T -- cgit v1.2.3 From 9c7e0dc84b2384347099827999f0e2f7be4f7e51 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Sat, 21 Jun 2014 17:53:45 -0400 Subject: Implement new normalization. There were some problems with the old str normalization. This fixes those problems. Also, since it drills down on its own, it only needs to be mapped over the blocks, not walked over the tree. --- src/Text/Pandoc/Readers/Docx.hs | 68 ++++++++++++++++++++++++++++++++++------- 1 file changed, 57 insertions(+), 11 deletions(-) diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 479a88161..299adf5a8 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -148,25 +148,71 @@ runElemsToString = concatMap runElemToString --- Text.Pandoc.Shared.normalize for reasons of efficiency. For --- whatever reason, `normalize` makes a run take almost twice as --- long. (It does more, but this does what we need) -strNormalize :: [Inline] -> [Inline] -strNormalize [] = [] -strNormalize (Str "" : ils) = strNormalize ils -strNormalize ((Str s) : (Str s') : l) = strNormalize ((Str (s++s')) : l) -strNormalize (il:ils) = il : (strNormalize ils) +inlineNormalize :: [Inline] -> [Inline] +inlineNormalize [] = [] +inlineNormalize (Str "" : ils) = inlineNormalize ils +inlineNormalize ((Str s) : (Str s') : l) = + inlineNormalize (Str (s++s') : l) +inlineNormalize ((Emph ils) : (Emph ils') : l) = + inlineNormalize $ (Emph $ inlineNormalize (ils ++ ils')) : l +inlineNormalize ((Emph ils) : l) = + Emph (inlineNormalize ils) : (inlineNormalize l) +inlineNormalize ((Strong ils) : (Strong ils') : l) = + inlineNormalize $ (Strong $ inlineNormalize (ils ++ ils')) : l +inlineNormalize ((Strong ils) : l) = + Strong (inlineNormalize ils) : (inlineNormalize l) +inlineNormalize ((Strikeout ils) : (Strikeout ils') : l) = + inlineNormalize $ (Strikeout $ inlineNormalize (ils ++ ils')) : l +inlineNormalize ((Strikeout ils) : l) = + Strikeout (inlineNormalize ils) : (inlineNormalize l) +inlineNormalize ((Superscript ils) : (Superscript ils') : l) = + inlineNormalize $ (Superscript $ inlineNormalize (ils ++ ils')) : l +inlineNormalize ((Superscript ils) : l) = + Superscript (inlineNormalize ils) : (inlineNormalize l) +inlineNormalize ((Subscript ils) : (Subscript ils') : l) = + inlineNormalize $ (Subscript $ inlineNormalize (ils ++ ils')) : l +inlineNormalize ((Subscript ils) : l) = + Subscript (inlineNormalize ils) : (inlineNormalize l) +inlineNormalize ((Space : Space : l)) = + inlineNormalize $ (Space : l) +inlineNormalize ((Quoted qt ils) : l) = + Quoted qt (inlineNormalize ils) : inlineNormalize l +inlineNormalize ((Cite cits ils) : l) = + let + f :: Citation -> Citation + f (Citation s pref suff mode num hash) = + Citation s (inlineNormalize pref) (inlineNormalize suff) mode num hash + in + Cite (map f cits) (inlineNormalize ils) : (inlineNormalize l) +inlineNormalize ((Link ils s) : l) = + Link (inlineNormalize ils) s : (inlineNormalize l) +inlineNormalize ((Image ils s) : l) = + Image (inlineNormalize ils) s : (inlineNormalize l) +inlineNormalize ((Note blks) : l) = + Note (map blockNormalize blks) : (inlineNormalize l) +inlineNormalize ((Span attr ils) : l) = + Span attr (inlineNormalize ils) : (inlineNormalize l) +inlineNormalize (il : l) = il : (inlineNormalize l) stripSpaces :: [Inline] -> [Inline] stripSpaces ils = reverse $ dropWhile (Space ==) $ reverse $ dropWhile (Space ==) ils blockNormalize :: Block -> Block -blockNormalize (Plain ils) = Plain $ bottomUp strNormalize $ stripSpaces ils -blockNormalize (Para ils) = Para $ bottomUp strNormalize $ stripSpaces ils +blockNormalize (Plain ils) = Plain $ stripSpaces $ inlineNormalize ils +blockNormalize (Para ils) = Para $ stripSpaces $ inlineNormalize ils blockNormalize (Header n attr ils) = - Header n attr $ bottomUp strNormalize $ stripSpaces ils + Header n attr $ stripSpaces $ inlineNormalize ils blockNormalize (Table ils align width hdr cells) = - Table (bottomUp strNormalize $ stripSpaces ils) align width hdr cells + Table (stripSpaces $ inlineNormalize ils) align width hdr cells blockNormalize (DefinitionList pairs) = - DefinitionList $ map (\(ils, blklsts) -> (bottomUp strNormalize (stripSpaces ils), blklsts)) pairs + DefinitionList $ map (\(ils, blklsts) -> (stripSpaces (inlineNormalize ils), (map (map blockNormalize) blklsts))) pairs +blockNormalize (BlockQuote blks) = BlockQuote (map blockNormalize blks) +blockNormalize (OrderedList attr blkslst) = + OrderedList attr $ map (\blks -> map blockNormalize blks) blkslst +blockNormalize (BulletList blkslst) = + BulletList $ map (\blks -> map blockNormalize blks) blkslst +blockNormalize (Div attr blks) = Div attr (map blockNormalize blks) blockNormalize blk = blk runToInlines :: ReaderOptions -> Docx -> Run -> [Inline] @@ -315,7 +361,7 @@ makeImagesSelfContained _ inline = inline bodyToBlocks :: ReaderOptions -> Docx -> Body -> [Block] bodyToBlocks opts docx (Body bps) = bottomUp removeEmptyPars $ - bottomUp blockNormalize $ + map blockNormalize $ bottomUp spanRemove $ bottomUp divRemove $ map (makeHeaderAnchors) $ -- cgit v1.2.3 From ca4add679ce6dd438cc3f6d58f82d04a9ad6305e Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Sat, 21 Jun 2014 17:58:32 -0400 Subject: Add normalization test. Add torture-test for new normalization functions. One problem that this test demonstrates is that word has a tendency to turn off formatting at a space, and then turn it back on after. I'm not sure yet whether this is something we should fix. --- tests/Tests/Readers/Docx.hs | 4 ++++ tests/docx.normalize.docx | Bin 0 -> 25994 bytes tests/docx.normalize.native | 2 ++ 3 files changed, 6 insertions(+) create mode 100644 tests/docx.normalize.docx create mode 100644 tests/docx.normalize.native diff --git a/tests/Tests/Readers/Docx.hs b/tests/Tests/Readers/Docx.hs index e8fa33241..74184efc6 100644 --- a/tests/Tests/Readers/Docx.hs +++ b/tests/Tests/Readers/Docx.hs @@ -74,6 +74,10 @@ tests = [ testGroup "inlines" "literal tabs" "docx.tabs.docx" "docx.tabs.native" + , testCompare + "normalizing inlines" + "docx.normalize.docx" + "docx.normalize.native" ] , testGroup "blocks" [ testCompare diff --git a/tests/docx.normalize.docx b/tests/docx.normalize.docx new file mode 100644 index 000000000..5e4370a47 Binary files /dev/null and b/tests/docx.normalize.docx differ diff --git a/tests/docx.normalize.native b/tests/docx.normalize.native new file mode 100644 index 000000000..fa34d0581 --- /dev/null +++ b/tests/docx.normalize.native @@ -0,0 +1,2 @@ +[Para [Str "These",Space,Str "are",Space,Str "different",Space,Str "fonts."] +,Para [Strong [Str "These",Space,Emph [Str "are"]],Space,Strong [Emph [Strikeout [Str "different"]],Space,Str "fonts."]]] -- cgit v1.2.3 From 8e5bd9d851aa0f60462015f61e3980b134e3c131 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Sun, 22 Jun 2014 01:47:11 -0400 Subject: Docx reader: Fix spacing in formatting. The normalizing tests revealed a problem with unformatted spaces, brought about by `spanTrim`. This fixes by not trimming the spaces out of spans until they are in their final form. --- src/Text/Pandoc/Readers/Docx.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 299adf5a8..09c2330fb 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -286,8 +286,8 @@ parPartsToInlines opts docx parparts = (if False -- TODO depend on option then bottomUp (makeImagesSelfContained docx) else id) $ - bottomUp spanCorrect $ bottomUp spanTrim $ + bottomUp spanCorrect $ bottomUp spanReduce $ concatMap (parPartToInlines opts docx) parparts -- cgit v1.2.3 From b3df3a38611fe4fd03fa2d4e38ba45ae7cf8fe08 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Sun, 22 Jun 2014 01:56:33 -0400 Subject: Docx reader tests: Correct normalize test. --- tests/docx.normalize.native | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/docx.normalize.native b/tests/docx.normalize.native index fa34d0581..aeba672c4 100644 --- a/tests/docx.normalize.native +++ b/tests/docx.normalize.native @@ -1,2 +1,2 @@ [Para [Str "These",Space,Str "are",Space,Str "different",Space,Str "fonts."] -,Para [Strong [Str "These",Space,Emph [Str "are"]],Space,Strong [Emph [Strikeout [Str "different"]],Space,Str "fonts."]]] +,Para [Strong [Str "These",Space,Emph [Str "are",Space,Strikeout [Str "different"]],Space,Str "fonts."]]] -- cgit v1.2.3 From ed43513087b514a5240fde04784dbf8709182513 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Sun, 22 Jun 2014 01:58:41 -0400 Subject: Docx reader tests: add tests for normalization deep in blocks. --- tests/Tests/Readers/Docx.hs | 4 ++++ tests/docx.deep_normalize.docx | Bin 0 -> 29246 bytes tests/docx.deep_normalize.native | 6 ++++++ 3 files changed, 10 insertions(+) create mode 100644 tests/docx.deep_normalize.docx create mode 100644 tests/docx.deep_normalize.native diff --git a/tests/Tests/Readers/Docx.hs b/tests/Tests/Readers/Docx.hs index 74184efc6..ffb079eee 100644 --- a/tests/Tests/Readers/Docx.hs +++ b/tests/Tests/Readers/Docx.hs @@ -78,6 +78,10 @@ tests = [ testGroup "inlines" "normalizing inlines" "docx.normalize.docx" "docx.normalize.native" + , testCompare + "normalizing inlines deep inside blocks" + "docx.deep_normalize.docx" + "docx.deep_normalize.native" ] , testGroup "blocks" [ testCompare diff --git a/tests/docx.deep_normalize.docx b/tests/docx.deep_normalize.docx new file mode 100644 index 000000000..7626d59ce Binary files /dev/null and b/tests/docx.deep_normalize.docx differ diff --git a/tests/docx.deep_normalize.native b/tests/docx.deep_normalize.native new file mode 100644 index 000000000..9b2089ec8 --- /dev/null +++ b/tests/docx.deep_normalize.native @@ -0,0 +1,6 @@ +[OrderedList (1,Decimal,OneParen) + [[Para [Str "This",Space,Str "is",Space,Str "at",Space,Str "the",Space,Str "first",Space,Str "level"] + ,OrderedList (1,LowerAlpha,DefaultDelim) + [[Para [Str "This",Space,Str "is",Space,Str "at",Space,Str "the",Space,Str "second",Space,Str "level"] + ,OrderedList (1,LowerRoman,DefaultDelim) + [[Para [Str "This",Space,Str "is",Space,Emph [Str "at",Space,Strong [Str "the",Space,Str "third",Space,Str "level"],Str ",",Space,Str "and",Space,Str "I",Space,Str "want",Space,Str "to"],Space,Str "test",Space,Str "normalization",Space,Str "here."]]]]]]]] -- cgit v1.2.3 From e03ed7377cfd3d64d65f186aa76b17417a4e4fde Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 23 Jun 2014 12:41:47 -0700 Subject: Markdown reader: Combine consecutive latex environments. This helps when you have two minipages which can't have blank lines between them. See #690, #1196. --- src/Text/Pandoc/Readers/Markdown.hs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 6c710c8ff..690256224 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -939,8 +939,10 @@ rawVerbatimBlock = try $ do rawTeXBlock :: MarkdownParser (F Blocks) rawTeXBlock = do guardEnabled Ext_raw_tex - result <- (B.rawBlock "latex" <$> rawLaTeXBlock) - <|> (B.rawBlock "context" <$> rawConTeXtEnvironment) + result <- (B.rawBlock "latex" . concat <$> + rawLaTeXBlock `sepEndBy1` blankline) + <|> (B.rawBlock "context" . concat <$> + rawConTeXtEnvironment `sepEndBy1` blankline) spaces return $ return result -- cgit v1.2.3 From 87ab01637e1dc0f583277828bc458567a72e38ce Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 23 Jun 2014 12:51:10 -0700 Subject: LaTeX writer: Use `\textquotesingle` for `'` in inline code. Otherwise we get curly quotes in the PDF output. Closes #1364. --- src/Text/Pandoc/Writers/LaTeX.hs | 1 + tests/Tests/Writers/LaTeX.hs | 2 ++ 2 files changed, 3 insertions(+) diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index f2f7438c4..100bf900d 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -220,6 +220,7 @@ stringToLaTeX ctx (x:xs) = do '>' -> "\\textgreater{}" ++ rest '[' -> "{[}" ++ rest -- to avoid interpretation as ']' -> "{]}" ++ rest -- optional arguments + '\'' | ctx == CodeString -> "\\textquotesingle{}" ++ rest '\160' -> "~" ++ rest '\x2026' -> "\\ldots{}" ++ rest '\x2018' | ligatures -> "`" ++ rest diff --git a/tests/Tests/Writers/LaTeX.hs b/tests/Tests/Writers/LaTeX.hs index 6db6542a0..c32ded36d 100644 --- a/tests/Tests/Writers/LaTeX.hs +++ b/tests/Tests/Writers/LaTeX.hs @@ -63,5 +63,7 @@ tests = [ testGroup "code blocks" strikeout (code "foo" <> space <> str "bar") =?> "\\sout{\\texttt{foo} bar}" + , "single quotes" =: + code "dog's" =?> "\\texttt{dog\\textquotesingle{}s}" ] ] -- cgit v1.2.3 From ef5fad2698f3d4c1fe528f138264cc8abb3b2943 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Mon, 23 Jun 2014 15:25:46 -0400 Subject: Add new typeclass, Reducible This defines a typeclass `Reducible` which allows us to "reduce" pandoc Inlines and Blocks, like so Emph [Strong [Str "foo", Space]] <++> Strong [Emph [Str "bar"]], Str "baz"] = [Strong [Emph [Str "foo", Space, Str "bar"], Space, Str "baz"]] So adjacent formattings and strings are appropriately grouped. Another set of operators for `(Reducible a) => (Many a)` are also included. --- src/Text/Pandoc/Readers/Docx/Reducible.hs | 150 ++++++++++++++++++++++++++++++ 1 file changed, 150 insertions(+) create mode 100644 src/Text/Pandoc/Readers/Docx/Reducible.hs diff --git a/src/Text/Pandoc/Readers/Docx/Reducible.hs b/src/Text/Pandoc/Readers/Docx/Reducible.hs new file mode 100644 index 000000000..1ed31ebd0 --- /dev/null +++ b/src/Text/Pandoc/Readers/Docx/Reducible.hs @@ -0,0 +1,150 @@ +{-# LANGUAGE OverloadedStrings #-} + +module Text.Pandoc.Readers.Docx.Reducible ((<++>), + (<+++>), + Reducible, + Container(..), + container, + innards, + reduceList, + reduceListB, + rebuild) + where + +import Text.Pandoc.Builder +import Data.List ((\\), intersect) + +data Container a = Container ([a] -> a) | NullContainer + +instance (Eq a) => Eq (Container a) where + (Container x) == (Container y) = ((x []) == (y [])) + NullContainer == NullContainer = True + _ == _ = False + +instance (Show a) => Show (Container a) where + show (Container x) = "Container {" ++ + (reverse $ drop 3 $ reverse $ show $ x []) ++ + "}" + show (NullContainer) = "NullContainer" + +class Reducible a where + (<++>) :: a -> a -> [a] + container :: a -> Container a + innards :: a -> [a] + isSpace :: a -> Bool + +(<+++>) :: (Reducible a) => Many a -> Many a -> Many a +mr <+++> ms = fromList $ reduceList $ toList mr ++ toList ms + +reduceListB :: (Reducible a) => Many a -> Many a +reduceListB = fromList . reduceList . toList + +reduceList' :: (Reducible a) => [a] -> [a] -> [a] +reduceList' acc [] = acc +reduceList' [] (x:xs) = reduceList' [x] xs +reduceList' as (x:xs) = reduceList' (init as ++ (last as <++> x) ) xs + +reduceList :: (Reducible a) => [a] -> [a] +reduceList = reduceList' [] + +combineReducibles :: (Reducible a, Eq a) => a -> a -> [a] +combineReducibles r s = + let (conts, rs) = topLevelContainers r + (conts', ss) = topLevelContainers s + shared = conts `intersect` conts' + remaining = conts \\ shared + remaining' = conts' \\ shared + in + case null shared of + True -> case (not . null) rs && isSpace (last rs) of + True -> rebuild conts (init rs) ++ [last rs, s] + False -> [r,s] + False -> rebuild + shared $ + reduceList $ + (rebuild remaining rs) ++ (rebuild remaining' ss) + +instance Reducible Inline where + s1@(Span (id1, classes1, kvs1) ils1) <++> s2@(Span (id2, classes2, kvs2) ils2) = + let classes' = classes1 `intersect` classes2 + kvs' = kvs1 `intersect` kvs2 + classes1' = classes1 \\ classes' + kvs1' = kvs1 \\ kvs' + classes2' = classes2 \\ classes' + kvs2' = kvs2 \\ kvs' + in + case null classes' && null kvs' of + True -> [s1,s2] + False -> let attr' = ("", classes', kvs') + attr1' = (id1, classes1', kvs1') + attr2' = (id2, classes2', kvs2') + s1' = case null classes1' && null kvs1' of + True -> ils1 + False -> [Span attr1' ils1] + s2' = case null classes2' && null kvs2' of + True -> ils2 + False -> [Span attr2' ils2] + in + [Span attr' $ reduceList $ s1' ++ s2'] + + (Str x) <++> (Str y) = [Str (x++y)] + il <++> il' = combineReducibles il il' + + container (Emph _) = Container Emph + container (Strong _) = Container Strong + container (Strikeout _) = Container Strikeout + container (Subscript _) = Container Subscript + container (Superscript _) = Container Superscript + container (Quoted qt _) = Container $ Quoted qt + container (Cite cs _) = Container $ Cite cs + container (Span attr _) = Container $ Span attr + container _ = NullContainer + + innards (Emph ils) = ils + innards (Strong ils) = ils + innards (Strikeout ils) = ils + innards (Subscript ils) = ils + innards (Superscript ils) = ils + innards (Quoted _ ils) = ils + innards (Cite _ ils) = ils + innards (Span _ ils) = ils + innards _ = [] + + isSpace Space = True + isSpace _ = False + +instance Reducible Block where + (Div (ident, classes, kvs) blks) <++> blk | "list-item" `elem` classes = + [Div (ident, classes, kvs) (reduceList blks), blk] + + blk <++> blk' = combineReducibles blk blk' + + container (BlockQuote _) = Container BlockQuote + container (Div attr _) = Container $ Div attr + container _ = NullContainer + + innards (BlockQuote bs) = bs + innards (Div _ bs) = bs + innards _ = [] + + isSpace _ = False + + +topLevelContainers' :: (Reducible a) => [a] -> ([Container a], [a]) +topLevelContainers' (r : []) = case container r of + NullContainer -> ([], [r]) + _ -> + let (conts, inns) = topLevelContainers' (innards r) + in + ((container r) : conts, inns) +topLevelContainers' rs = ([], rs) + +topLevelContainers :: (Reducible a) => a -> ([Container a], [a]) +topLevelContainers il = topLevelContainers' [il] + +rebuild :: [Container a] -> [a] -> [a] +rebuild [] xs = xs +rebuild ((Container f) : cs) xs = rebuild cs $ [f xs] +rebuild (NullContainer : cs) xs = rebuild cs $ xs + + -- cgit v1.2.3 From 94d0fb15382a4855938c540c9e521642bccc00e3 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Mon, 23 Jun 2014 15:27:01 -0400 Subject: Move some of the clean-up logic into List module. This will allow us to get rid of more general functions we no longer need in the main reader. --- src/Text/Pandoc/Readers/Docx/Lists.hs | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/src/Text/Pandoc/Readers/Docx/Lists.hs b/src/Text/Pandoc/Readers/Docx/Lists.hs index 68559d98b..1e37d0076 100644 --- a/src/Text/Pandoc/Readers/Docx/Lists.hs +++ b/src/Text/Pandoc/Readers/Docx/Lists.hs @@ -29,9 +29,12 @@ Functions for converting flat docx paragraphs into nested lists. -} module Text.Pandoc.Readers.Docx.Lists ( blocksToBullets - , blocksToDefinitions) where + , blocksToDefinitions + , listParagraphDivs + ) where import Text.Pandoc.JSON +import Text.Pandoc.Generic (bottomUp) import Text.Pandoc.Shared (trim) import Control.Monad import Data.List @@ -159,10 +162,9 @@ flatToBullets elems = flatToBullets' (-1) elems blocksToBullets :: [Block] -> [Block] blocksToBullets blks = - -- bottomUp removeListItemDivs $ + bottomUp removeListDivs $ flatToBullets $ (handleListParagraphs blks) - plainParaInlines :: Block -> [Inline] plainParaInlines (Plain ils) = ils plainParaInlines (Para ils) = ils @@ -199,6 +201,23 @@ blocksToDefinitions' [] acc (b:blks) = blocksToDefinitions' defAcc acc (b:blks) = blocksToDefinitions' [] (b : (DefinitionList (reverse defAcc)) : acc) blks +removeListDivs' :: Block -> [Block] +removeListDivs' (Div (ident, classes, kvs) blks) + | "list-item" `elem` classes = + case delete "list-item" classes of + [] -> blks + classes' -> [Div (ident, classes', kvs) $ blks] +removeListDivs' (Div (ident, classes, kvs) blks) + | not $ null $ listParagraphDivs `intersect` classes = + case classes \\ listParagraphDivs of + [] -> blks + classes' -> [Div (ident, classes', kvs) blks] +removeListDivs' blk = [blk] + +removeListDivs :: [Block] -> [Block] +removeListDivs = concatMap removeListDivs' + + blocksToDefinitions :: [Block] -> [Block] blocksToDefinitions = blocksToDefinitions' [] [] -- cgit v1.2.3 From 11b0778744d0eeb61e2502e452d010631fab979b Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Mon, 23 Jun 2014 15:27:55 -0400 Subject: Use Reducible in docx reader. This cleans up them implementation, and cuts down on tree-walking. Anecdotally, I've seen about a 3-fold speedup. --- src/Text/Pandoc/Readers/Docx.hs | 384 ++++++++++++---------------------------- 1 file changed, 111 insertions(+), 273 deletions(-) diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 09c2330fb..ffe7f5a92 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -79,8 +79,10 @@ import Text.Pandoc.Builder (text, toList) import Text.Pandoc.Generic (bottomUp) import Text.Pandoc.MIME (getMimeType) import Text.Pandoc.UTF8 (toString) +import Text.Pandoc.Walk import Text.Pandoc.Readers.Docx.Parse import Text.Pandoc.Readers.Docx.Lists +import Text.Pandoc.Readers.Docx.Reducible import Data.Maybe (mapMaybe, isJust, fromJust) import Data.List (delete, isPrefixOf, (\\), intersect) import qualified Data.ByteString as BS @@ -96,28 +98,65 @@ readDocx opts bytes = Just docx -> Pandoc nullMeta (docxToBlocks opts docx) Nothing -> error $ "couldn't parse docx file" -runStyleToSpanAttr :: RunStyle -> (String, [String], [(String, String)]) -runStyleToSpanAttr rPr = ("", - mapMaybe id [ - if isBold rPr then (Just "strong") else Nothing, - if isItalic rPr then (Just "emph") else Nothing, - if isSmallCaps rPr then (Just "smallcaps") else Nothing, - if isStrike rPr then (Just "strike") else Nothing, - if isSuperScript rPr then (Just "superscript") else Nothing, - if isSubScript rPr then (Just "subscript") else Nothing, - rStyle rPr], - case underline rPr of - Just fmt -> [("underline", fmt)] - _ -> [] - ) - -parStyleToDivAttr :: ParagraphStyle -> (String, [String], [(String, String)]) -parStyleToDivAttr pPr = ("", - pStyle pPr, - case indent pPr of - Just n -> [("indent", (show n))] - Nothing -> [] - ) +spansToKeep :: [String] +spansToKeep = ["list-item", "Definition", "DefinitionTerm"] ++ codeSpans + + +-- This is empty, but we put it in for future-proofing. +divsToKeep :: [String] +divsToKeep = [] + +runStyleToContainers :: RunStyle -> [Container Inline] +runStyleToContainers rPr = + let formatters = mapMaybe id + [ if isBold rPr then (Just Strong) else Nothing + , if isItalic rPr then (Just Emph) else Nothing + , if isSmallCaps rPr then (Just SmallCaps) else Nothing + , if isStrike rPr then (Just Strikeout) else Nothing + , if isSuperScript rPr then (Just Superscript) else Nothing + , if isSubScript rPr then (Just Subscript) else Nothing + , rStyle rPr >>= + (\s -> if s `elem` spansToKeep then Just s else Nothing) >>= + (\s -> Just $ Span ("", [s], [])) + , underline rPr >>= (\f -> Just $ Span ("", [], [("underline", f)])) + ] + in + map Container formatters + + +divAttrToContainers :: [String] -> [(String, String)] -> [Container Block] +divAttrToContainers [] [] = [] +divAttrToContainers (c:cs) _ | isJust (isHeaderClass c) = + let n = fromJust (isHeaderClass c) + in + [(Container $ \blks -> + Header n ("", delete ("Heading" ++ show n) cs, []) (blksToInlines blks))] +divAttrToContainers (c:_) _ | c `elem` codeDivs = + [Container $ \blks -> CodeBlock ("", [], []) (concatMap blkToCode blks)] +divAttrToContainers (c:cs) kvs | c `elem` listParagraphDivs = + let kvs' = filter (\(k,_) -> k /= "indent") kvs + in + (Container $ Div ("", [c], [])) : (divAttrToContainers cs kvs') +divAttrToContainers (c:cs) kvs | c `elem` blockQuoteDivs = + (Container BlockQuote) : (divAttrToContainers (cs \\ blockQuoteDivs) kvs) +divAttrToContainers (c:cs) kvs | c `elem` divsToKeep = + (Container $ Div ("", [c], [])) : (divAttrToContainers cs kvs) +divAttrToContainers (_:cs) kvs = divAttrToContainers cs kvs +divAttrToContainers [] (kv:kvs) | fst kv == "indent" = + (Container BlockQuote) : divAttrToContainers [] kvs +divAttrToContainers [] (_:kvs) = + divAttrToContainers [] kvs + + +parStyleToContainers :: ParagraphStyle -> [Container Block] +parStyleToContainers pPr = + let classes = pStyle pPr + kvs = case indent pPr of + Just n -> [("indent", show n)] + Nothing -> [] + in + divAttrToContainers classes kvs + strToInlines :: String -> [Inline] strToInlines = toList . text @@ -144,103 +183,42 @@ runElemToString (Tab) = ['\t'] runElemsToString :: [RunElem] -> String runElemsToString = concatMap runElemToString ---- We use this instead of the more general ---- Text.Pandoc.Shared.normalize for reasons of efficiency. For ---- whatever reason, `normalize` makes a run take almost twice as ---- long. (It does more, but this does what we need) -inlineNormalize :: [Inline] -> [Inline] -inlineNormalize [] = [] -inlineNormalize (Str "" : ils) = inlineNormalize ils -inlineNormalize ((Str s) : (Str s') : l) = - inlineNormalize (Str (s++s') : l) -inlineNormalize ((Emph ils) : (Emph ils') : l) = - inlineNormalize $ (Emph $ inlineNormalize (ils ++ ils')) : l -inlineNormalize ((Emph ils) : l) = - Emph (inlineNormalize ils) : (inlineNormalize l) -inlineNormalize ((Strong ils) : (Strong ils') : l) = - inlineNormalize $ (Strong $ inlineNormalize (ils ++ ils')) : l -inlineNormalize ((Strong ils) : l) = - Strong (inlineNormalize ils) : (inlineNormalize l) -inlineNormalize ((Strikeout ils) : (Strikeout ils') : l) = - inlineNormalize $ (Strikeout $ inlineNormalize (ils ++ ils')) : l -inlineNormalize ((Strikeout ils) : l) = - Strikeout (inlineNormalize ils) : (inlineNormalize l) -inlineNormalize ((Superscript ils) : (Superscript ils') : l) = - inlineNormalize $ (Superscript $ inlineNormalize (ils ++ ils')) : l -inlineNormalize ((Superscript ils) : l) = - Superscript (inlineNormalize ils) : (inlineNormalize l) -inlineNormalize ((Subscript ils) : (Subscript ils') : l) = - inlineNormalize $ (Subscript $ inlineNormalize (ils ++ ils')) : l -inlineNormalize ((Subscript ils) : l) = - Subscript (inlineNormalize ils) : (inlineNormalize l) -inlineNormalize ((Space : Space : l)) = - inlineNormalize $ (Space : l) -inlineNormalize ((Quoted qt ils) : l) = - Quoted qt (inlineNormalize ils) : inlineNormalize l -inlineNormalize ((Cite cits ils) : l) = - let - f :: Citation -> Citation - f (Citation s pref suff mode num hash) = - Citation s (inlineNormalize pref) (inlineNormalize suff) mode num hash - in - Cite (map f cits) (inlineNormalize ils) : (inlineNormalize l) -inlineNormalize ((Link ils s) : l) = - Link (inlineNormalize ils) s : (inlineNormalize l) -inlineNormalize ((Image ils s) : l) = - Image (inlineNormalize ils) s : (inlineNormalize l) -inlineNormalize ((Note blks) : l) = - Note (map blockNormalize blks) : (inlineNormalize l) -inlineNormalize ((Span attr ils) : l) = - Span attr (inlineNormalize ils) : (inlineNormalize l) -inlineNormalize (il : l) = il : (inlineNormalize l) - -stripSpaces :: [Inline] -> [Inline] -stripSpaces ils = - reverse $ dropWhile (Space ==) $ reverse $ dropWhile (Space ==) ils - -blockNormalize :: Block -> Block -blockNormalize (Plain ils) = Plain $ stripSpaces $ inlineNormalize ils -blockNormalize (Para ils) = Para $ stripSpaces $ inlineNormalize ils -blockNormalize (Header n attr ils) = - Header n attr $ stripSpaces $ inlineNormalize ils -blockNormalize (Table ils align width hdr cells) = - Table (stripSpaces $ inlineNormalize ils) align width hdr cells -blockNormalize (DefinitionList pairs) = - DefinitionList $ map (\(ils, blklsts) -> (stripSpaces (inlineNormalize ils), (map (map blockNormalize) blklsts))) pairs -blockNormalize (BlockQuote blks) = BlockQuote (map blockNormalize blks) -blockNormalize (OrderedList attr blkslst) = - OrderedList attr $ map (\blks -> map blockNormalize blks) blkslst -blockNormalize (BulletList blkslst) = - BulletList $ map (\blks -> map blockNormalize blks) blkslst -blockNormalize (Div attr blks) = Div attr (map blockNormalize blks) -blockNormalize blk = blk + +inlineCodeContainer :: Container Inline -> Bool +inlineCodeContainer (Container f) = case f [] of + Span (_, classes, _) _ -> (not . null) (classes `intersect` codeSpans) + _ -> False +inlineCodeContainer _ = False + +-- blockCodeContainer :: Container Block -> Bool +-- blockCodeContainer (Container f) = case f [] of +-- Div (ident, classes, kvs) _ -> (not . null) (classes `intersect` codeDivs) +-- _ -> False +-- blockCodeContainer _ = False runToInlines :: ReaderOptions -> Docx -> Run -> [Inline] runToInlines _ _ (Run rs runElems) - | isJust (rStyle rs) && (fromJust (rStyle rs)) `elem` codeSpans = - case runStyleToSpanAttr rs == ("", [], []) of - True -> [Str (runElemsToString runElems)] - False -> [Span (runStyleToSpanAttr rs) [Str (runElemsToString runElems)]] - | otherwise = case runStyleToSpanAttr rs == ("", [], []) of - True -> concatMap runElemToInlines runElems - False -> [Span (runStyleToSpanAttr rs) (concatMap runElemToInlines runElems)] + | any inlineCodeContainer (runStyleToContainers rs) = + rebuild (runStyleToContainers rs) $ [Str $ runElemsToString runElems] + | otherwise = + rebuild (runStyleToContainers rs) (concatMap runElemToInlines runElems) runToInlines opts docx@(Docx _ notes _ _ _ ) (Footnote fnId) = case (getFootNote fnId notes) of Just bodyParts -> - [Note [Div ("", ["footnote"], []) (map (bodyPartToBlock opts docx) bodyParts)]] + [Note (concatMap (bodyPartToBlocks opts docx) bodyParts)] Nothing -> - [Note [Div ("", ["footnote"], []) []]] + [Note []] runToInlines opts docx@(Docx _ notes _ _ _) (Endnote fnId) = case (getEndNote fnId notes) of Just bodyParts -> - [Note [Div ("", ["endnote"], []) (map (bodyPartToBlock opts docx) bodyParts)]] + [Note (concatMap (bodyPartToBlocks opts docx) bodyParts)] Nothing -> - [Note [Div ("", ["endnote"], []) []]] + [Note []] parPartToInlines :: ReaderOptions -> Docx -> ParPart -> [Inline] parPartToInlines opts docx (PlainRun r) = runToInlines opts docx r -parPartToInlines _ _ (BookMark _ anchor) = - [Span (anchor, ["anchor"], []) []] +parPartToInlines _ _ (BookMark _ anchor) | anchor `elem` dummyAnchors = [] +parPartToInlines _ _ (BookMark _ anchor) = [Span (anchor, ["anchor"], []) []] parPartToInlines _ (Docx _ _ _ rels _) (Drawing relid) = case lookupRelationship relid rels of Just target -> [Image [] (combine "word" target, "")] @@ -276,7 +254,6 @@ makeHeaderAnchors h@(Header n (_, classes, kvs) ils) = _ -> h makeHeaderAnchors blk = blk - parPartsToInlines :: ReaderOptions -> Docx -> [ParPart] -> [Inline] parPartsToInlines opts docx parparts = -- @@ -284,23 +261,32 @@ parPartsToInlines opts docx parparts = -- not mandatory. -- (if False -- TODO depend on option - then bottomUp (makeImagesSelfContained docx) + then walk (makeImagesSelfContained docx) else id) $ - bottomUp spanTrim $ - bottomUp spanCorrect $ - bottomUp spanReduce $ - concatMap (parPartToInlines opts docx) parparts + -- bottomUp spanTrim $ + -- bottomUp spanCorrect $ + -- bottomUp spanReduce $ + reduceList $ concatMap (parPartToInlines opts docx) parparts cellToBlocks :: ReaderOptions -> Docx -> Cell -> [Block] -cellToBlocks opts docx (Cell bps) = map (bodyPartToBlock opts docx) bps +cellToBlocks opts docx (Cell bps) = concatMap (bodyPartToBlocks opts docx) bps rowToBlocksList :: ReaderOptions -> Docx -> Row -> [[Block]] rowToBlocksList opts docx (Row cells) = map (cellToBlocks opts docx) cells -bodyPartToBlock :: ReaderOptions -> Docx -> BodyPart -> Block -bodyPartToBlock opts docx (Paragraph pPr parparts) = - Div (parStyleToDivAttr pPr) [Para (parPartsToInlines opts docx parparts)] -bodyPartToBlock opts docx@(Docx _ _ numbering _ _) (ListItem pPr numId lvl parparts) = +bodyPartToBlocks :: ReaderOptions -> Docx -> BodyPart -> [Block] +bodyPartToBlocks opts docx (Paragraph pPr parparts) = + case parPartsToInlines opts docx parparts of + [] -> + [] + _ -> + let parContents = parPartsToInlines opts docx parparts + trimmedContents = reverse $ dropWhile (Space ==) $ reverse $ dropWhile (Space ==) parContents + in + rebuild + (parStyleToContainers pPr) + [Para trimmedContents] +bodyPartToBlocks opts docx@(Docx _ _ numbering _ _) (ListItem pPr numId lvl parparts) = let kvs = case lookupLevel numId lvl numbering of Just (_, fmt, txt, Just start) -> [ ("level", lvl) @@ -317,12 +303,12 @@ bodyPartToBlock opts docx@(Docx _ _ numbering _ _) (ListItem pPr numId lvl parpa ] Nothing -> [] in - Div - ("", ["list-item"], kvs) - [bodyPartToBlock opts docx (Paragraph pPr parparts)] -bodyPartToBlock _ _ (Tbl _ _ _ []) = - Para [] -bodyPartToBlock opts docx (Tbl cap _ look (r:rs)) = + [Div + ("", ["list-item"], kvs) + (bodyPartToBlocks opts docx (Paragraph pPr parparts))] +bodyPartToBlocks _ _ (Tbl _ _ _ []) = + [Para []] +bodyPartToBlocks opts docx (Tbl cap _ look (r:rs)) = let caption = strToInlines cap (hdr, rows) = case firstRowFormatting look of True -> (Just r, rs) @@ -344,7 +330,8 @@ bodyPartToBlock opts docx (Tbl cap _ look (r:rs)) = alignments = take size (repeat AlignDefault) widths = take size (repeat 0) :: [Double] in - Table caption alignments widths hdrCells cells + [Table caption alignments widths hdrCells cells] + makeImagesSelfContained :: Docx -> Inline -> Inline makeImagesSelfContained (Docx _ _ _ _ media) i@(Image alt (uri, title)) = @@ -360,127 +347,19 @@ makeImagesSelfContained _ inline = inline bodyToBlocks :: ReaderOptions -> Docx -> Body -> [Block] bodyToBlocks opts docx (Body bps) = - bottomUp removeEmptyPars $ - map blockNormalize $ - bottomUp spanRemove $ - bottomUp divRemove $ map (makeHeaderAnchors) $ - bottomUp divCorrect $ - bottomUp divReduce $ - bottomUp divCorrectPreReduce $ bottomUp blocksToDefinitions $ blocksToBullets $ - map (bodyPartToBlock opts docx) bps + concatMap (bodyPartToBlocks opts docx) bps docxToBlocks :: ReaderOptions -> Docx -> [Block] docxToBlocks opts d@(Docx (Document _ body) _ _ _ _) = bodyToBlocks opts d body -spanReduce :: [Inline] -> [Inline] -spanReduce [] = [] -spanReduce ((Span (id1, classes1, kvs1) ils1) : ils) - | (id1, classes1, kvs1) == ("", [], []) = ils1 ++ (spanReduce ils) -spanReduce (s1@(Span (id1, classes1, kvs1) ils1) : - s2@(Span (id2, classes2, kvs2) ils2) : - ils) = - let classes' = classes1 `intersect` classes2 - kvs' = kvs1 `intersect` kvs2 - classes1' = classes1 \\ classes' - kvs1' = kvs1 \\ kvs' - classes2' = classes2 \\ classes' - kvs2' = kvs2 \\ kvs' - in - case null classes' && null kvs' of - True -> s1 : (spanReduce (s2 : ils)) - False -> let attr' = ("", classes', kvs') - attr1' = (id1, classes1', kvs1') - attr2' = (id2, classes2', kvs2') - in - spanReduce (Span attr' [(Span attr1' ils1), (Span attr2' ils2)] : - ils) -spanReduce (il:ils) = il : (spanReduce ils) ilToCode :: Inline -> String ilToCode (Str s) = s ilToCode _ = "" -spanRemove' :: Inline -> [Inline] -spanRemove' s@(Span (ident, classes, _) []) - -- "_GoBack" is automatically inserted. We don't want to keep it. - | classes == ["anchor"] && not (ident `elem` dummyAnchors) = [s] -spanRemove' (Span (_, _, kvs) ils) = - case lookup "underline" kvs of - Just val -> [Span ("", [], [("underline", val)]) ils] - Nothing -> ils -spanRemove' il = [il] - -spanRemove :: [Inline] -> [Inline] -spanRemove = concatMap spanRemove' - -spanTrim' :: Inline -> [Inline] -spanTrim' il@(Span _ []) = [il] -spanTrim' il@(Span attr (il':[])) - | il' == Space = [Span attr [], Space] - | otherwise = [il] -spanTrim' (Span attr ils) - | head ils == Space && last ils == Space = - [Space, Span attr (init $ tail ils), Space] - | head ils == Space = [Space, Span attr (tail ils)] - | last ils == Space = [Span attr (init ils), Space] -spanTrim' il = [il] - -spanTrim :: [Inline] -> [Inline] -spanTrim = concatMap spanTrim' - -spanCorrect' :: Inline -> [Inline] -spanCorrect' (Span ("", [], []) ils) = ils -spanCorrect' (Span (ident, classes, kvs) ils) - | "emph" `elem` classes = - [Emph $ spanCorrect' $ Span (ident, (delete "emph" classes), kvs) ils] - | "strong" `elem` classes = - [Strong $ spanCorrect' $ Span (ident, (delete "strong" classes), kvs) ils] - | "smallcaps" `elem` classes = - [SmallCaps $ spanCorrect' $ Span (ident, (delete "smallcaps" classes), kvs) ils] - | "strike" `elem` classes = - [Strikeout $ spanCorrect' $ Span (ident, (delete "strike" classes), kvs) ils] - | "superscript" `elem` classes = - [Superscript $ spanCorrect' $ Span (ident, (delete "superscript" classes), kvs) ils] - | "subscript" `elem` classes = - [Subscript $ spanCorrect' $ Span (ident, (delete "subscript" classes), kvs) ils] - | (not . null) (codeSpans `intersect` classes) = - [Code (ident, (classes \\ codeSpans), kvs) (init $ unlines $ map ilToCode ils)] - | otherwise = - [Span (ident, classes, kvs) ils] -spanCorrect' il = [il] - -spanCorrect :: [Inline] -> [Inline] -spanCorrect = concatMap spanCorrect' - -removeEmptyPars :: [Block] -> [Block] -removeEmptyPars blks = filter (\b -> b /= (Para [])) blks - -divReduce :: [Block] -> [Block] -divReduce [] = [] -divReduce ((Div (id1, classes1, kvs1) blks1) : blks) - | (id1, classes1, kvs1) == ("", [], []) = blks1 ++ (divReduce blks) -divReduce (d1@(Div (id1, classes1, kvs1) blks1) : - d2@(Div (id2, classes2, kvs2) blks2) : - blks) = - let classes' = classes1 `intersect` classes2 - kvs' = kvs1 `intersect` kvs2 - classes1' = classes1 \\ classes' - kvs1' = kvs1 \\ kvs' - classes2' = classes2 \\ classes' - kvs2' = kvs2 \\ kvs' - in - case null classes' && null kvs' of - True -> d1 : (divReduce (d2 : blks)) - False -> let attr' = ("", classes', kvs') - attr1' = (id1, classes1', kvs1') - attr2' = (id2, classes2', kvs2') - in - divReduce (Div attr' [(Div attr1' blks1), (Div attr2' blks2)] : - blks) -divReduce (blk:blks) = blk : (divReduce blks) isHeaderClass :: String -> Maybe Int isHeaderClass s | "Heading" `isPrefixOf` s = @@ -490,27 +369,12 @@ isHeaderClass s | "Heading" `isPrefixOf` s = _ -> Nothing isHeaderClass _ = Nothing -findHeaderClass :: [String] -> Maybe Int -findHeaderClass ss = case mapMaybe id $ map isHeaderClass ss of - [] -> Nothing - n : _ -> Just n blksToInlines :: [Block] -> [Inline] blksToInlines (Para ils : _) = ils blksToInlines (Plain ils : _) = ils blksToInlines _ = [] -divCorrectPreReduce' :: Block -> [Block] -divCorrectPreReduce' (Div (ident, classes, kvs) blks) - | isJust $ findHeaderClass classes = - let n = fromJust $ findHeaderClass classes - in - [Header n (ident, delete ("Heading" ++ (show n)) classes, kvs) (blksToInlines blks)] - | otherwise = [Div (ident, classes, kvs) blks] -divCorrectPreReduce' blk = [blk] - -divCorrectPreReduce :: [Block] -> [Block] -divCorrectPreReduce = concatMap divCorrectPreReduce' blkToCode :: Block -> String blkToCode (Para []) = "" @@ -520,29 +384,3 @@ blkToCode (Para ((Span (_, classes, _) ils'): ils)) (init $ unlines $ map ilToCode ils') ++ (blkToCode (Para ils)) blkToCode _ = "" -divRemove' :: Block -> [Block] -divRemove' (Div (_, _, kvs) blks) = - case lookup "indent" kvs of - Just val -> [Div ("", [], [("indent", val)]) blks] - Nothing -> blks -divRemove' blk = [blk] - -divRemove :: [Block] -> [Block] -divRemove = concatMap divRemove' - -divCorrect' :: Block -> [Block] -divCorrect' b@(Div (ident, classes, kvs) blks) - | (not . null) (blockQuoteDivs `intersect` classes) = - [BlockQuote [Div (ident, classes \\ blockQuoteDivs, kvs) blks]] - | (not . null) (codeDivs `intersect` classes) = - [CodeBlock (ident, (classes \\ codeDivs), kvs) (init $ unlines $ map blkToCode blks)] - | otherwise = - case lookup "indent" kvs of - Just "0" -> [Div (ident, classes, filter (\kv -> fst kv /= "indent") kvs) blks] - Just _ -> - [BlockQuote [Div (ident, classes, filter (\kv -> fst kv /= "indent") kvs) blks]] - Nothing -> [b] -divCorrect' blk = [blk] - -divCorrect :: [Block] -> [Block] -divCorrect = concatMap divCorrect' -- cgit v1.2.3 From 8517a4f2e59dc2ecf20a96eedfa5b2cc47f0aeba Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Mon, 23 Jun 2014 15:29:04 -0400 Subject: Add Reducible to cabal file. --- pandoc.cabal | 1 + 1 file changed, 1 insertion(+) diff --git a/pandoc.cabal b/pandoc.cabal index e35a76caf..634d249fe 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -327,6 +327,7 @@ Library Text.Pandoc.SelfContained, Text.Pandoc.Process Other-Modules: Text.Pandoc.Readers.Docx.Lists, + Text.Pandoc.Readers.Docx.Reducible, Text.Pandoc.Readers.Docx.Parse, Text.Pandoc.Writers.Shared, Text.Pandoc.Asciify, -- cgit v1.2.3 From 9b954fa855158d99b4ddba7c3ffe7f2fed7ce25f Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Mon, 23 Jun 2014 15:40:34 -0400 Subject: Add test for correctly trimming spaces in formatting. This used to be fixed in the tree-walking. We need to make sure we're doing it right now. --- tests/Tests/Readers/Docx.hs | 4 ++++ tests/docx.trailing_spaces_in_formatting.docx | Bin 0 -> 12916 bytes tests/docx.trailing_spaces_in_formatting.native | 1 + 3 files changed, 5 insertions(+) create mode 100644 tests/docx.trailing_spaces_in_formatting.docx create mode 100644 tests/docx.trailing_spaces_in_formatting.native diff --git a/tests/Tests/Readers/Docx.hs b/tests/Tests/Readers/Docx.hs index ffb079eee..a42dc31e9 100644 --- a/tests/Tests/Readers/Docx.hs +++ b/tests/Tests/Readers/Docx.hs @@ -82,6 +82,10 @@ tests = [ testGroup "inlines" "normalizing inlines deep inside blocks" "docx.deep_normalize.docx" "docx.deep_normalize.native" + , testCompare + "move trailing spaces outside of formatting" + "docx.trailing_spaces_in_formatting.docx" + "docx.trailing_spaces_in_formatting.native" ] , testGroup "blocks" [ testCompare diff --git a/tests/docx.trailing_spaces_in_formatting.docx b/tests/docx.trailing_spaces_in_formatting.docx new file mode 100644 index 000000000..ebe7404a9 Binary files /dev/null and b/tests/docx.trailing_spaces_in_formatting.docx differ diff --git a/tests/docx.trailing_spaces_in_formatting.native b/tests/docx.trailing_spaces_in_formatting.native new file mode 100644 index 000000000..46ea9bca8 --- /dev/null +++ b/tests/docx.trailing_spaces_in_formatting.native @@ -0,0 +1 @@ +[Para [Str "Turn",Space,Str "my",Space,Emph [Str "formatting"],Space,Str "off",Space,Str "after",Space,Str "the",Space,Str "spaces."]] -- cgit v1.2.3 From 08633fad332fe9acfb884a2ba0ee9f8543ab23ed Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Mon, 23 Jun 2014 20:26:08 -0400 Subject: Add copyright block to T.P.R.Docx.Reducible. --- src/Text/Pandoc/Readers/Docx/Reducible.hs | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/src/Text/Pandoc/Readers/Docx/Reducible.hs b/src/Text/Pandoc/Readers/Docx/Reducible.hs index 1ed31ebd0..8c105d1f1 100644 --- a/src/Text/Pandoc/Readers/Docx/Reducible.hs +++ b/src/Text/Pandoc/Readers/Docx/Reducible.hs @@ -1,5 +1,36 @@ {-# LANGUAGE OverloadedStrings #-} +{- +Copyright (C) 2014 Jesse Rosenthal + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.Docx.Reducible + Copyright : Copyright (C) 2014 Jesse Rosenthal + License : GNU GPL, version 2 or above + + Maintainer : Jesse Rosenthal + Stability : alpha + Portability : portable + +Typeclass for combining adjacent blocks and inlines correctly. +-} + + module Text.Pandoc.Readers.Docx.Reducible ((<++>), (<+++>), Reducible, -- cgit v1.2.3 From 21295c5ab5567126c3112b9417f68c76b4f6debf Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Tue, 24 Jun 2014 10:33:49 -0400 Subject: Docx reader: add failing tests for inline code and code blocks. --- tests/Tests/Readers/Docx.hs | 9 +++++++++ tests/docx.codeblock.docx | Bin 0 -> 8465 bytes tests/docx.codeblock.native | 3 +++ tests/docx.inline_code.docx | Bin 0 -> 8379 bytes tests/docx.inline_code.native | 1 + 5 files changed, 13 insertions(+) create mode 100644 tests/docx.codeblock.docx create mode 100644 tests/docx.codeblock.native create mode 100644 tests/docx.inline_code.docx create mode 100644 tests/docx.inline_code.native diff --git a/tests/Tests/Readers/Docx.hs b/tests/Tests/Readers/Docx.hs index a42dc31e9..c49bee14e 100644 --- a/tests/Tests/Readers/Docx.hs +++ b/tests/Tests/Readers/Docx.hs @@ -86,6 +86,10 @@ tests = [ testGroup "inlines" "move trailing spaces outside of formatting" "docx.trailing_spaces_in_formatting.docx" "docx.trailing_spaces_in_formatting.native" + , testCompare + "inline code (with VerbatimChar style)" + "docx.inline_code.docx" + "docx.inline_code.native" ] , testGroup "blocks" [ testCompare @@ -108,6 +112,11 @@ tests = [ testGroup "inlines" "tables" "docx.tables.docx" "docx.tables.native" + , testCompare + "code block" + "docx.codeblock.docx" + "docx.codeblock.native" + ] ] diff --git a/tests/docx.codeblock.docx b/tests/docx.codeblock.docx new file mode 100644 index 000000000..8ec00953c Binary files /dev/null and b/tests/docx.codeblock.docx differ diff --git a/tests/docx.codeblock.native b/tests/docx.codeblock.native new file mode 100644 index 000000000..441e33511 --- /dev/null +++ b/tests/docx.codeblock.native @@ -0,0 +1,3 @@ +[Para [Str "This",Space,Str "is",Space,Str "some",Space,Str "code:"] +,CodeBlock ("",[],[]) "readDocx :: ReaderOptions\n -> B.ByteString\n -> Pandoc" +,Para [Str "from",Space,Str "the",Space,Str "beginning",Space,Str "of",Space,Str "the",Space,Str "docx",Space,Str "reader."]] diff --git a/tests/docx.inline_code.docx b/tests/docx.inline_code.docx new file mode 100644 index 000000000..75c5ea3cb Binary files /dev/null and b/tests/docx.inline_code.docx differ diff --git a/tests/docx.inline_code.native b/tests/docx.inline_code.native new file mode 100644 index 000000000..11cf2777c --- /dev/null +++ b/tests/docx.inline_code.native @@ -0,0 +1 @@ +[Para [Str "This",Space,Str "is",Space,Str "an",Space,Str "example",Space,Str "of",Space,Code ("",[],[]) "inline code",Space,Str "with",Space,Str "three",Space,Str "spaces."]] -- cgit v1.2.3 From bebea5e936d6c3c90b977a1b8f69bb3a290d30dc Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Tue, 24 Jun 2014 10:34:07 -0400 Subject: Docx reader: pass code tests. --- src/Text/Pandoc/Readers/Docx.hs | 80 ++++++++++++++++++++++++----------------- 1 file changed, 47 insertions(+), 33 deletions(-) diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index ffe7f5a92..5f62d0b21 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -84,7 +84,7 @@ import Text.Pandoc.Readers.Docx.Parse import Text.Pandoc.Readers.Docx.Lists import Text.Pandoc.Readers.Docx.Reducible import Data.Maybe (mapMaybe, isJust, fromJust) -import Data.List (delete, isPrefixOf, (\\), intersect) +import Data.List (delete, isPrefixOf, (\\)) import qualified Data.ByteString as BS import qualified Data.ByteString.Lazy as B import Data.ByteString.Base64 (encode) @@ -99,7 +99,7 @@ readDocx opts bytes = Nothing -> error $ "couldn't parse docx file" spansToKeep :: [String] -spansToKeep = ["list-item", "Definition", "DefinitionTerm"] ++ codeSpans +spansToKeep = ["list-item", "Definition", "DefinitionTerm"] -- This is empty, but we put it in for future-proofing. @@ -108,20 +108,28 @@ divsToKeep = [] runStyleToContainers :: RunStyle -> [Container Inline] runStyleToContainers rPr = - let formatters = mapMaybe id - [ if isBold rPr then (Just Strong) else Nothing - , if isItalic rPr then (Just Emph) else Nothing - , if isSmallCaps rPr then (Just SmallCaps) else Nothing - , if isStrike rPr then (Just Strikeout) else Nothing - , if isSuperScript rPr then (Just Superscript) else Nothing - , if isSubScript rPr then (Just Subscript) else Nothing - , rStyle rPr >>= - (\s -> if s `elem` spansToKeep then Just s else Nothing) >>= - (\s -> Just $ Span ("", [s], [])) - , underline rPr >>= (\f -> Just $ Span ("", [], [("underline", f)])) + let spanClassToContainers :: String -> [Container Inline] + spanClassToContainers s | s `elem` codeSpans = + [Container $ (\ils -> Code ("", [], []) (concatMap ilToCode ils))] + spanClassToContainers s | s `elem` spansToKeep = + [Container $ Span ("", [s], [])] + spanClassToContainers _ = [] + + classContainers = case rStyle rPr of + Nothing -> [] + Just s -> spanClassToContainers s + + formatters = map Container $ mapMaybe id + [ if isBold rPr then (Just Strong) else Nothing + , if isItalic rPr then (Just Emph) else Nothing + , if isSmallCaps rPr then (Just SmallCaps) else Nothing + , if isStrike rPr then (Just Strikeout) else Nothing + , if isSuperScript rPr then (Just Superscript) else Nothing + , if isSubScript rPr then (Just Subscript) else Nothing + , underline rPr >>= (\f -> Just $ Span ("", [], [("underline", f)])) ] in - map Container formatters + classContainers ++ formatters divAttrToContainers :: [String] -> [(String, String)] -> [Container Block] @@ -132,7 +140,9 @@ divAttrToContainers (c:cs) _ | isJust (isHeaderClass c) = [(Container $ \blks -> Header n ("", delete ("Heading" ++ show n) cs, []) (blksToInlines blks))] divAttrToContainers (c:_) _ | c `elem` codeDivs = - [Container $ \blks -> CodeBlock ("", [], []) (concatMap blkToCode blks)] + -- This is a bit of a cludge. We make the codeblock from the raw + -- parparts in bodyPartToBlocks. But we need something to match against. + [Container $ \_ -> CodeBlock ("", [], []) ""] divAttrToContainers (c:cs) kvs | c `elem` listParagraphDivs = let kvs' = filter (\(k,_) -> k /= "indent") kvs in @@ -183,18 +193,23 @@ runElemToString (Tab) = ['\t'] runElemsToString :: [RunElem] -> String runElemsToString = concatMap runElemToString +runToString :: Run -> String +runToString (Run _ runElems) = runElemsToString runElems +runToString _ = "" + +parPartToString :: ParPart -> String +parPartToString (PlainRun run) = runToString run +parPartToString (InternalHyperLink _ runs) = concatMap runToString runs +parPartToString (ExternalHyperLink _ runs) = concatMap runToString runs +parPartToString _ = "" + inlineCodeContainer :: Container Inline -> Bool inlineCodeContainer (Container f) = case f [] of - Span (_, classes, _) _ -> (not . null) (classes `intersect` codeSpans) + Code _ "" -> True _ -> False inlineCodeContainer _ = False --- blockCodeContainer :: Container Block -> Bool --- blockCodeContainer (Container f) = case f [] of --- Div (ident, classes, kvs) _ -> (not . null) (classes `intersect` codeDivs) --- _ -> False --- blockCodeContainer _ = False runToInlines :: ReaderOptions -> Docx -> Run -> [Inline] runToInlines _ _ (Run rs runElems) @@ -274,7 +289,16 @@ cellToBlocks opts docx (Cell bps) = concatMap (bodyPartToBlocks opts docx) bps rowToBlocksList :: ReaderOptions -> Docx -> Row -> [[Block]] rowToBlocksList opts docx (Row cells) = map (cellToBlocks opts docx) cells +blockCodeContainer :: Container Block -> Bool +blockCodeContainer (Container f) = case f [] of + CodeBlock _ _ -> True + _ -> False +blockCodeContainer _ = False + bodyPartToBlocks :: ReaderOptions -> Docx -> BodyPart -> [Block] +bodyPartToBlocks _ _ (Paragraph pPr parparts) + | any blockCodeContainer (parStyleToContainers pPr) = + [CodeBlock ("", [], []) (concatMap parPartToString parparts)] bodyPartToBlocks opts docx (Paragraph pPr parparts) = case parPartsToInlines opts docx parparts of [] -> @@ -358,7 +382,8 @@ docxToBlocks opts d@(Docx (Document _ body) _ _ _ _) = bodyToBlocks opts d body ilToCode :: Inline -> String ilToCode (Str s) = s -ilToCode _ = "" +ilToCode Space = " " +ilToCode _ = "" isHeaderClass :: String -> Maybe Int @@ -369,18 +394,7 @@ isHeaderClass s | "Heading" `isPrefixOf` s = _ -> Nothing isHeaderClass _ = Nothing - blksToInlines :: [Block] -> [Inline] blksToInlines (Para ils : _) = ils blksToInlines (Plain ils : _) = ils blksToInlines _ = [] - - -blkToCode :: Block -> String -blkToCode (Para []) = "" -blkToCode (Para ((Code _ s):ils)) = s ++ (blkToCode (Para ils)) -blkToCode (Para ((Span (_, classes, _) ils'): ils)) - | (not . null) (codeSpans `intersect` classes) = - (init $ unlines $ map ilToCode ils') ++ (blkToCode (Para ils)) -blkToCode _ = "" - -- cgit v1.2.3 From 2621482d69d96a7f069133e57f6df8a479ad5111 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Tue, 24 Jun 2014 11:48:23 -0400 Subject: Docx Reader: add failing defintion list tests. --- tests/Tests/Readers/Docx.hs | 4 ++++ tests/docx.definition_list.docx | Bin 0 -> 8455 bytes tests/docx.definition_list.native | 7 +++++++ 3 files changed, 11 insertions(+) create mode 100644 tests/docx.definition_list.docx create mode 100644 tests/docx.definition_list.native diff --git a/tests/Tests/Readers/Docx.hs b/tests/Tests/Readers/Docx.hs index c49bee14e..4d062bbc0 100644 --- a/tests/Tests/Readers/Docx.hs +++ b/tests/Tests/Readers/Docx.hs @@ -100,6 +100,10 @@ tests = [ testGroup "inlines" "lists" "docx.lists.docx" "docx.lists.native" + , testCompare + "definition lists" + "docx.definition_list.docx" + "docx.definition_list.native" , testCompare "footnotes and endnotes" "docx.notes.docx" diff --git a/tests/docx.definition_list.docx b/tests/docx.definition_list.docx new file mode 100644 index 000000000..a19edda45 Binary files /dev/null and b/tests/docx.definition_list.docx differ diff --git a/tests/docx.definition_list.native b/tests/docx.definition_list.native new file mode 100644 index 000000000..2e08ff1ac --- /dev/null +++ b/tests/docx.definition_list.native @@ -0,0 +1,7 @@ +[DefinitionList + [([Str "Term",Space,Str "1"], + [[Para [Str "Definition",Space,Str "1"]]]) + ,([Str "Term",Space,Str "2",Space,Str "with",Space,Emph [Str "inline",Space,Str "markup"]], + [[Para [Str "Definition",Space,Str "2"] + ,CodeBlock ("",[],[]) "{ some code, part of Definition 2 }" + ,Para [Str "Third",Space,Str "paragraph",Space,Str "of",Space,Str "definition",Space,Str "2."]]])]] -- cgit v1.2.3 From 5ae6b8c6f1ba29fa8b6eec065c6d463e5e03a9aa Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Tue, 24 Jun 2014 12:10:49 -0400 Subject: Docx reader: pass definition test. This commit also fixes a problem with the previous code pushes, which wouldn't allow code blocks to share a div. --- src/Text/Pandoc/Readers/Docx.hs | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 5f62d0b21..67cf519dc 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -99,12 +99,12 @@ readDocx opts bytes = Nothing -> error $ "couldn't parse docx file" spansToKeep :: [String] -spansToKeep = ["list-item", "Definition", "DefinitionTerm"] +spansToKeep = [] -- This is empty, but we put it in for future-proofing. divsToKeep :: [String] -divsToKeep = [] +divsToKeep = ["list-item", "Definition", "DefinitionTerm"] runStyleToContainers :: RunStyle -> [Container Inline] runStyleToContainers rPr = @@ -139,18 +139,18 @@ divAttrToContainers (c:cs) _ | isJust (isHeaderClass c) = in [(Container $ \blks -> Header n ("", delete ("Heading" ++ show n) cs, []) (blksToInlines blks))] -divAttrToContainers (c:_) _ | c `elem` codeDivs = +divAttrToContainers (c:cs) kvs | c `elem` divsToKeep = + (Container $ Div ("", [c], [])) : (divAttrToContainers cs kvs) +divAttrToContainers (c:cs) kvs | c `elem` codeDivs = -- This is a bit of a cludge. We make the codeblock from the raw -- parparts in bodyPartToBlocks. But we need something to match against. - [Container $ \_ -> CodeBlock ("", [], []) ""] + (Container $ \_ -> CodeBlock ("", [], []) "") : (divAttrToContainers cs kvs) divAttrToContainers (c:cs) kvs | c `elem` listParagraphDivs = let kvs' = filter (\(k,_) -> k /= "indent") kvs in (Container $ Div ("", [c], [])) : (divAttrToContainers cs kvs') divAttrToContainers (c:cs) kvs | c `elem` blockQuoteDivs = (Container BlockQuote) : (divAttrToContainers (cs \\ blockQuoteDivs) kvs) -divAttrToContainers (c:cs) kvs | c `elem` divsToKeep = - (Container $ Div ("", [c], [])) : (divAttrToContainers cs kvs) divAttrToContainers (_:cs) kvs = divAttrToContainers cs kvs divAttrToContainers [] (kv:kvs) | fst kv == "indent" = (Container BlockQuote) : divAttrToContainers [] kvs @@ -298,7 +298,12 @@ blockCodeContainer _ = False bodyPartToBlocks :: ReaderOptions -> Docx -> BodyPart -> [Block] bodyPartToBlocks _ _ (Paragraph pPr parparts) | any blockCodeContainer (parStyleToContainers pPr) = - [CodeBlock ("", [], []) (concatMap parPartToString parparts)] + let + otherConts = filter (not . blockCodeContainer) (parStyleToContainers pPr) + in + rebuild + otherConts + [CodeBlock ("", [], []) (concatMap parPartToString parparts)] bodyPartToBlocks opts docx (Paragraph pPr parparts) = case parPartsToInlines opts docx parparts of [] -> @@ -372,7 +377,7 @@ makeImagesSelfContained _ inline = inline bodyToBlocks :: ReaderOptions -> Docx -> Body -> [Block] bodyToBlocks opts docx (Body bps) = map (makeHeaderAnchors) $ - bottomUp blocksToDefinitions $ + blocksToDefinitions $ blocksToBullets $ concatMap (bodyPartToBlocks opts docx) bps -- cgit v1.2.3 From a8866bc1215a4e4c6582dedc940c86cdaeb02d9f Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Tue, 24 Jun 2014 12:15:26 -0400 Subject: Docx reader: remove T.P.Generic import. This marks the removal of the final tree-walk in the code. (Though there is still one in the Lists module.) --- src/Text/Pandoc/Readers/Docx.hs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 67cf519dc..59fb7b37f 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -76,7 +76,6 @@ import Codec.Archive.Zip import Text.Pandoc.Definition import Text.Pandoc.Options import Text.Pandoc.Builder (text, toList) -import Text.Pandoc.Generic (bottomUp) import Text.Pandoc.MIME (getMimeType) import Text.Pandoc.UTF8 (toString) import Text.Pandoc.Walk -- cgit v1.2.3 From 69743cd5981d7e910c5d83da18fc698c8d522e69 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Tue, 24 Jun 2014 14:24:38 -0400 Subject: Docx reader: Ignore zero (or negative) indent If a block has an indentation less than or equal to zero, it should not be treated as a block quote. --- src/Text/Pandoc/Readers/Docx.hs | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 59fb7b37f..b787ca9fb 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -132,7 +132,6 @@ runStyleToContainers rPr = divAttrToContainers :: [String] -> [(String, String)] -> [Container Block] -divAttrToContainers [] [] = [] divAttrToContainers (c:cs) _ | isJust (isHeaderClass c) = let n = fromJust (isHeaderClass c) in @@ -151,10 +150,14 @@ divAttrToContainers (c:cs) kvs | c `elem` listParagraphDivs = divAttrToContainers (c:cs) kvs | c `elem` blockQuoteDivs = (Container BlockQuote) : (divAttrToContainers (cs \\ blockQuoteDivs) kvs) divAttrToContainers (_:cs) kvs = divAttrToContainers cs kvs -divAttrToContainers [] (kv:kvs) | fst kv == "indent" = - (Container BlockQuote) : divAttrToContainers [] kvs -divAttrToContainers [] (_:kvs) = - divAttrToContainers [] kvs +divAttrToContainers [] kvs | isJust (lookup "indent" kvs) = + let kvs' = filter (\(k,_) -> k /= "indent") kvs + in + case fromJust (lookup "indent" kvs) of + "0" -> divAttrToContainers [] kvs' + ('-' : _) -> divAttrToContainers [] kvs' + _ -> (Container BlockQuote) : divAttrToContainers [] kvs' +divAttrToContainers _ _ = [] parStyleToContainers :: ParagraphStyle -> [Container Block] -- cgit v1.2.3 From c343f1a90bc35d745de673de5ff771ddbe60be54 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Wed, 25 Jun 2014 08:10:19 -0400 Subject: Docx Reader: Add change types Insertion and deletion. Dates are just strings for now. --- src/Text/Pandoc/Readers/Docx/Parse.hs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index 18200bcf9..1cb5fe2e3 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -455,6 +455,8 @@ elemToCell ns element elemToCell _ _ = Nothing data ParPart = PlainRun Run + | Insertion ChangeId Author ChangeDate [Run] + | Deletion ChangeId Author ChangeDate [Run] | BookMark BookMarkId Anchor | InternalHyperLink Anchor [Run] | ExternalHyperLink RelId [Run] @@ -604,4 +606,6 @@ type Target = String type Anchor = String type BookMarkId = String type RelId = String - +type ChangeId = String +type Author = String +type ChangeDate = String -- cgit v1.2.3 From 38e1d3e95b8240eeb35db0a1a56e308cfb4835e4 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Wed, 25 Jun 2014 10:32:48 -0400 Subject: Docx reader: Parse Insertions and Deletions. This is just for the Parse module, reading it into the Docx format. It still has to be translated into pandoc. --- src/Text/Pandoc/Readers/Docx/Parse.hs | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index 1cb5fe2e3..c76ef7511 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -541,7 +541,7 @@ elemToRun _ _ = Nothing elemToRunElem :: NameSpaces -> Element -> Maybe RunElem elemToRunElem ns element - | qName (elName element) == "t" && + | (qName (elName element) == "t" || qName (elName element) == "delText") && qURI (elName element) == (lookup "w" ns) = Just $ TextRun (strContent element) | qName (elName element) == "br" && @@ -581,6 +581,22 @@ elemToParPart ns element Nothing -> do r <- elemToRun ns element return $ PlainRun r +elemToParPart ns element + | qName (elName element) == "ins" && + qURI (elName element) == (lookup "w" ns) = do + cId <- findAttr (QName "id" (lookup "w" ns) (Just "w")) element + cAuthor <- findAttr (QName "author" (lookup "w" ns) (Just "w")) element + cDate <- findAttr (QName "date" (lookup "w" ns) (Just "w")) element + let runs = mapMaybe (elemToRun ns) (elChildren element) + return $ Insertion cId cAuthor cDate runs +elemToParPart ns element + | qName (elName element) == "del" && + qURI (elName element) == (lookup "w" ns) = do + cId <- findAttr (QName "id" (lookup "w" ns) (Just "w")) element + cAuthor <- findAttr (QName "author" (lookup "w" ns) (Just "w")) element + cDate <- findAttr (QName "date" (lookup "w" ns) (Just "w")) element + let runs = mapMaybe (elemToRun ns) (elChildren element) + return $ Deletion cId cAuthor cDate runs elemToParPart ns element | qName (elName element) == "bookmarkStart" && qURI (elName element) == (lookup "w" ns) = do -- cgit v1.2.3 From ed44e4ca8c8f3d3c4c7ac65b98f16732c8173b88 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Wed, 25 Jun 2014 10:38:01 -0400 Subject: Docx reader: Add rudimentary track changes support. This will only read the insertions, and ignore the deletions. --- src/Text/Pandoc/Readers/Docx.hs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index b787ca9fb..130e2a1e2 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -234,6 +234,9 @@ runToInlines opts docx@(Docx _ notes _ _ _) (Endnote fnId) = parPartToInlines :: ReaderOptions -> Docx -> ParPart -> [Inline] parPartToInlines opts docx (PlainRun r) = runToInlines opts docx r +parPartToInlines opts docx (Insertion _ _ _ runs) = + concatMap (runToInlines opts docx) runs +parPartToInlines _ _ (Deletion _ _ _ _) = [] parPartToInlines _ _ (BookMark _ anchor) | anchor `elem` dummyAnchors = [] parPartToInlines _ _ (BookMark _ anchor) = [Span (anchor, ["anchor"], []) []] parPartToInlines _ (Docx _ _ _ rels _) (Drawing relid) = -- cgit v1.2.3 From 9614ddfedc18cccbf9fbe1a23fae200c7e67d72d Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Wed, 25 Jun 2014 11:00:15 -0400 Subject: Docx reader: Remove unnecessary filter in Parse. mapMaybe does the filtering for us. --- src/Text/Pandoc/Readers/Docx/Parse.hs | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index c76ef7511..dbbd65681 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -281,10 +281,6 @@ elemToBody ns element | qName (elName element) == "body" && qURI (elName element $ map (elemToBodyPart ns) $ filterChildrenName (isParOrTbl ns) element elemToBody _ _ = Nothing -isRunOrLinkOrBookmark :: NameSpaces -> QName -> Bool -isRunOrLinkOrBookmark ns q = qName q `elem` ["r", "hyperlink", "bookmarkStart"] && - qURI q == (lookup "w" ns) - elemToNumInfo :: NameSpaces -> Element -> Maybe (String, String) elemToNumInfo ns element | qName (elName element) == "p" && @@ -319,9 +315,8 @@ elemToBodyPart ns element | qName (elName element) == "p" && qURI (elName element) == (lookup "w" ns) = let parstyle = elemToParagraphStyle ns element - parparts = mapMaybe id - $ map (elemToParPart ns) - $ filterChildrenName (isRunOrLinkOrBookmark ns) element + parparts = mapMaybe (elemToParPart ns) + $ elChildren element in case elemToNumInfo ns element of Just (numId, lvl) -> Just $ ListItem parstyle numId lvl parparts -- cgit v1.2.3 From a2b6ab847cb1c997c6ae7b8ed36f543a7ed90ecd Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Wed, 25 Jun 2014 11:09:28 -0400 Subject: Docx reader: Add tests for basic track changes This is what seems like the sensible default: read in insertions, and ignore deletions. In the future, it would be good if options were available for either taking in deletions or keeping both in some scriptable format. --- tests/Tests/Readers/Docx.hs | 10 ++++++++++ tests/docx.track_changes_deletion.docx | Bin 0 -> 13350 bytes tests/docx.track_changes_deletion_only_ins.native | 1 + tests/docx.track_changes_insertion.docx | Bin 0 -> 12956 bytes tests/docx.track_changes_insertion_only_ins.native | 1 + 5 files changed, 12 insertions(+) create mode 100644 tests/docx.track_changes_deletion.docx create mode 100644 tests/docx.track_changes_deletion_only_ins.native create mode 100644 tests/docx.track_changes_insertion.docx create mode 100644 tests/docx.track_changes_insertion_only_ins.native diff --git a/tests/Tests/Readers/Docx.hs b/tests/Tests/Readers/Docx.hs index 4d062bbc0..f34e123ed 100644 --- a/tests/Tests/Readers/Docx.hs +++ b/tests/Tests/Readers/Docx.hs @@ -122,5 +122,15 @@ tests = [ testGroup "inlines" "docx.codeblock.native" ] + , testGroup "track changes" + [ testCompare + "insert insertion (insertions only)" + "docx.track_changes_insertion.docx" + "docx.track_changes_insertion_only_ins.native" + , testCompare + "skip deletion (insertions only)" + "docx.track_changes_deletion.docx" + "docx.track_changes_deletion_only_ins.native" + ] ] diff --git a/tests/docx.track_changes_deletion.docx b/tests/docx.track_changes_deletion.docx new file mode 100644 index 000000000..5cfdbeed8 Binary files /dev/null and b/tests/docx.track_changes_deletion.docx differ diff --git a/tests/docx.track_changes_deletion_only_ins.native b/tests/docx.track_changes_deletion_only_ins.native new file mode 100644 index 000000000..205c67810 --- /dev/null +++ b/tests/docx.track_changes_deletion_only_ins.native @@ -0,0 +1 @@ +[Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "text",Space,Str "with",Space,Str "a",Space,Str "deletion."]] diff --git a/tests/docx.track_changes_insertion.docx b/tests/docx.track_changes_insertion.docx new file mode 100644 index 000000000..fbdc9003e Binary files /dev/null and b/tests/docx.track_changes_insertion.docx differ diff --git a/tests/docx.track_changes_insertion_only_ins.native b/tests/docx.track_changes_insertion_only_ins.native new file mode 100644 index 000000000..ca2e46df0 --- /dev/null +++ b/tests/docx.track_changes_insertion_only_ins.native @@ -0,0 +1 @@ +[Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "text",Space,Str "with",Space,Str "two",Space,Str "exciting",Space,Str "insertions."]] -- cgit v1.2.3 From 3ec62d006483d369bb896b283db82e4437b66d05 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Wed, 25 Jun 2014 13:50:08 -0400 Subject: Add TrackChanges type to options. --- src/Text/Pandoc/Options.hs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs index 611a6bb06..e0ad866ad 100644 --- a/src/Text/Pandoc/Options.hs +++ b/src/Text/Pandoc/Options.hs @@ -264,6 +264,12 @@ data HTMLSlideVariant = S5Slides | NoSlides deriving (Show, Read, Eq) +-- | Options for accepting or rejecting MS Word track-changes. +data TrackChanges = AcceptChanges + | RejectChanges + | AllChanges + deriving (Show, Read, Eq) + -- | Options for writers data WriterOptions = WriterOptions { writerStandalone :: Bool -- ^ Include header and footer -- cgit v1.2.3 From 6ff84b5e8da47ff7f4b77bd6cd017beae81fed97 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Wed, 25 Jun 2014 13:57:56 -0400 Subject: Add reader option for track changes. --- src/Text/Pandoc/Options.hs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs index e0ad866ad..d0a76a001 100644 --- a/src/Text/Pandoc/Options.hs +++ b/src/Text/Pandoc/Options.hs @@ -211,6 +211,7 @@ data ReaderOptions = ReaderOptions{ -- indented code blocks , readerDefaultImageExtension :: String -- ^ Default extension for images , readerTrace :: Bool -- ^ Print debugging info + , readerTrackChanges :: TrackChanges } deriving (Show, Read) instance Default ReaderOptions @@ -227,6 +228,7 @@ instance Default ReaderOptions , readerIndentedCodeClasses = [] , readerDefaultImageExtension = "" , readerTrace = False + , readerTrackChanges = AcceptChanges } -- -- cgit v1.2.3 From d824f89fb3996fd27e156da1141808fbf468819d Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Wed, 25 Jun 2014 14:05:21 -0400 Subject: Add TrackChanges to Options export. --- src/Text/Pandoc/Options.hs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs index d0a76a001..b7a3a4b7b 100644 --- a/src/Text/Pandoc/Options.hs +++ b/src/Text/Pandoc/Options.hs @@ -41,6 +41,7 @@ module Text.Pandoc.Options ( Extension(..) , HTMLSlideVariant (..) , EPUBVersion (..) , WriterOptions (..) + , TrackChanges (..) , def , isEnabled ) where -- cgit v1.2.3 From aa194d387c103d001fc911f37d4cfc26be98d93c Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Wed, 25 Jun 2014 14:09:01 -0400 Subject: Add track changes option to command line. --- pandoc.hs | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/pandoc.hs b/pandoc.hs index 10673132b..588723322 100644 --- a/pandoc.hs +++ b/pandoc.hs @@ -174,6 +174,7 @@ data Opt = Opt , optTeXLigatures :: Bool -- ^ Use TeX ligatures for quotes/dashes , optDefaultImageExtension :: String -- ^ Default image extension , optTrace :: Bool -- ^ Print debug information + , optTrackChanges :: TrackChanges -- ^ Accept or reject MS Word track-changes. } -- | Defaults for command-line options. @@ -230,6 +231,7 @@ defaultOpts = Opt , optTeXLigatures = True , optDefaultImageExtension = "" , optTrace = False + , optTrackChanges = AcceptChanges } -- | A list of functions, each transforming the options data structure @@ -776,6 +778,19 @@ options = (\opt -> return opt { optTrace = True })) "" -- "Turn on diagnostic tracing in readers." + , Option "" ["track-changes"] + (ReqArg + (\arg opt -> do + action <- case arg of + "accept" -> return AcceptChanges + "reject" -> return RejectChanges + "all" -> return AllChanges + _ -> err 6 + ("Unknown option for track-changes: " ++ arg) + return opt { optTrackChanges = action }) + "accept|reject|all") + "" -- "Accepting or reject MS Word track-changes."" + , Option "" ["dump-args"] (NoArg (\opt -> return opt { optDumpArgs = True })) @@ -973,6 +988,7 @@ main = do , optTeXLigatures = texLigatures , optDefaultImageExtension = defaultImageExtension , optTrace = trace + , optTrackChanges = trackChanges } = opts when dumpArgs $ @@ -1097,6 +1113,7 @@ main = do , readerApplyMacros = not laTeXOutput , readerDefaultImageExtension = defaultImageExtension , readerTrace = trace + , readerTrackChanges = trackChanges } let writerOptions = def { writerStandalone = standalone', -- cgit v1.2.3 From 0e9bf37f64be0a121a0d682570fc8f0cf2b27c51 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Wed, 25 Jun 2014 14:17:20 -0400 Subject: Docx reader: Make use of track-changes option. --- src/Text/Pandoc/Readers/Docx.hs | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 130e2a1e2..cb0735e31 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -234,9 +234,22 @@ runToInlines opts docx@(Docx _ notes _ _ _) (Endnote fnId) = parPartToInlines :: ReaderOptions -> Docx -> ParPart -> [Inline] parPartToInlines opts docx (PlainRun r) = runToInlines opts docx r -parPartToInlines opts docx (Insertion _ _ _ runs) = - concatMap (runToInlines opts docx) runs -parPartToInlines _ _ (Deletion _ _ _ _) = [] +parPartToInlines opts docx (Insertion _ author date runs) = + case readerTrackChanges opts of + AcceptChanges -> concatMap (runToInlines opts docx) runs + RejectChanges -> [] + AllChanges -> + [Span + ("", ["insertion"], [("author", author), ("date", date)]) + (concatMap (runToInlines opts docx) runs)] +parPartToInlines opts docx (Deletion _ author date runs) = + case readerTrackChanges opts of + AcceptChanges -> [] + RejectChanges -> concatMap (runToInlines opts docx) runs + AllChanges -> + [Span + ("", ["deletion"], [("author", author), ("date", date)]) + (concatMap (runToInlines opts docx) runs)] parPartToInlines _ _ (BookMark _ anchor) | anchor `elem` dummyAnchors = [] parPartToInlines _ _ (BookMark _ anchor) = [Span (anchor, ["anchor"], []) []] parPartToInlines _ (Docx _ _ _ rels _) (Drawing relid) = -- cgit v1.2.3 From 58da1cd3bd986716d3f4441c540e96b1fcc3ea4b Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Wed, 25 Jun 2014 16:01:52 -0400 Subject: Updated README to include "--track-changes" option. --- README | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/README b/README index 012b75e2a..88eb0d8e2 100644 --- a/README +++ b/README @@ -308,6 +308,17 @@ Reader options `--tab-stop=`*NUMBER* : Specify the number of spaces per tab (default is 4). +`--track-changes=`*accept|reject|all* +: Specifies what to do with insertions and deltions produced by the MS + Word "track-changes" feature. *accept* (the default), inserts all + insertions, and ignores all deletions. *reject* inserts all + deletions and ignores insertions. *all* puts in both insertions + and deletions, wrapped in spans with `insertion` and `deletion` + classes, respectively. The author and time of change is + specified. *all* is useful for scripting: only accepting changes + from a certain reviewer, say, or before a certain date. This + option only affects the Docx reader. + General writer options ---------------------- -- cgit v1.2.3 From afdc0af779d245e781ec5de4cf7b3afcef47190b Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Wed, 25 Jun 2014 16:13:59 -0400 Subject: Track changes tests. --- tests/Tests/Readers/Docx.hs | 32 +++++++++++++++++++--- tests/docx.track_changes_deletion_accept.native | 1 + tests/docx.track_changes_deletion_all.native | 1 + tests/docx.track_changes_deletion_only_ins.native | 1 - tests/docx.track_changes_deletion_reject.native | 1 + tests/docx.track_changes_insertion_accept.native | 1 + tests/docx.track_changes_insertion_all.native | 1 + tests/docx.track_changes_insertion_only_ins.native | 1 - tests/docx.track_changes_insertion_reject.native | 1 + 9 files changed, 34 insertions(+), 6 deletions(-) create mode 100644 tests/docx.track_changes_deletion_accept.native create mode 100644 tests/docx.track_changes_deletion_all.native delete mode 100644 tests/docx.track_changes_deletion_only_ins.native create mode 100644 tests/docx.track_changes_deletion_reject.native create mode 100644 tests/docx.track_changes_insertion_accept.native create mode 100644 tests/docx.track_changes_insertion_all.native delete mode 100644 tests/docx.track_changes_insertion_only_ins.native create mode 100644 tests/docx.track_changes_insertion_reject.native diff --git a/tests/Tests/Readers/Docx.hs b/tests/Tests/Readers/Docx.hs index f34e123ed..8c51217cf 100644 --- a/tests/Tests/Readers/Docx.hs +++ b/tests/Tests/Readers/Docx.hs @@ -124,13 +124,37 @@ tests = [ testGroup "inlines" ] , testGroup "track changes" [ testCompare - "insert insertion (insertions only)" + "insertion (default)" "docx.track_changes_insertion.docx" - "docx.track_changes_insertion_only_ins.native" + "docx.track_changes_insertion_accept.native" + , testCompareWithOpts def{readerTrackChanges=AcceptChanges} + "insert insertion (accept)" + "docx.track_changes_insertion.docx" + "docx.track_changes_insertion_accept.native" + , testCompareWithOpts def{readerTrackChanges=RejectChanges} + "remove insertion (reject)" + "docx.track_changes_insertion.docx" + "docx.track_changes_insertion_reject.native" , testCompare - "skip deletion (insertions only)" + "deletion (default)" + "docx.track_changes_deletion.docx" + "docx.track_changes_deletion_accept.native" + , testCompareWithOpts def{readerTrackChanges=AcceptChanges} + "remove deletion (accept)" + "docx.track_changes_deletion.docx" + "docx.track_changes_deletion_accept.native" + , testCompareWithOpts def{readerTrackChanges=RejectChanges} + "insert deletion (reject)" + "docx.track_changes_deletion.docx" + "docx.track_changes_deletion_reject.native" + , testCompareWithOpts def{readerTrackChanges=AllChanges} + "keep insertion (all)" + "docx.track_changes_deletion.docx" + "docx.track_changes_deletion_all.native" + , testCompareWithOpts def{readerTrackChanges=AllChanges} + "keep deletion (all)" "docx.track_changes_deletion.docx" - "docx.track_changes_deletion_only_ins.native" + "docx.track_changes_deletion_all.native" ] ] diff --git a/tests/docx.track_changes_deletion_accept.native b/tests/docx.track_changes_deletion_accept.native new file mode 100644 index 000000000..205c67810 --- /dev/null +++ b/tests/docx.track_changes_deletion_accept.native @@ -0,0 +1 @@ +[Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "text",Space,Str "with",Space,Str "a",Space,Str "deletion."]] diff --git a/tests/docx.track_changes_deletion_all.native b/tests/docx.track_changes_deletion_all.native new file mode 100644 index 000000000..7f4ed2a90 --- /dev/null +++ b/tests/docx.track_changes_deletion_all.native @@ -0,0 +1 @@ +[Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "text",Space,Str "with",Space,Str "a",Span ("",["deletion"],[("author","eng-dept"),("date","2014-06-25T10:42:00Z")]) [Str "n",Space,Str "excessively",Space,Str "modified"],Space,Str "deletion."]] diff --git a/tests/docx.track_changes_deletion_only_ins.native b/tests/docx.track_changes_deletion_only_ins.native deleted file mode 100644 index 205c67810..000000000 --- a/tests/docx.track_changes_deletion_only_ins.native +++ /dev/null @@ -1 +0,0 @@ -[Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "text",Space,Str "with",Space,Str "a",Space,Str "deletion."]] diff --git a/tests/docx.track_changes_deletion_reject.native b/tests/docx.track_changes_deletion_reject.native new file mode 100644 index 000000000..04283bee5 --- /dev/null +++ b/tests/docx.track_changes_deletion_reject.native @@ -0,0 +1 @@ +[Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "text",Space,Str "with",Space,Str "an",Space,Str "excessively",Space,Str "modified",Space,Str "deletion."]] diff --git a/tests/docx.track_changes_insertion_accept.native b/tests/docx.track_changes_insertion_accept.native new file mode 100644 index 000000000..ca2e46df0 --- /dev/null +++ b/tests/docx.track_changes_insertion_accept.native @@ -0,0 +1 @@ +[Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "text",Space,Str "with",Space,Str "two",Space,Str "exciting",Space,Str "insertions."]] diff --git a/tests/docx.track_changes_insertion_all.native b/tests/docx.track_changes_insertion_all.native new file mode 100644 index 000000000..12664e425 --- /dev/null +++ b/tests/docx.track_changes_insertion_all.native @@ -0,0 +1 @@ +[Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "text",Space,Str "with",Space,Span ("",["insertion"],[("author","eng-dept"),("date","2014-06-25T10:40:00Z")]) [Str "two",Space,Str "exciting"],Space,Str "insertions."]] diff --git a/tests/docx.track_changes_insertion_only_ins.native b/tests/docx.track_changes_insertion_only_ins.native deleted file mode 100644 index ca2e46df0..000000000 --- a/tests/docx.track_changes_insertion_only_ins.native +++ /dev/null @@ -1 +0,0 @@ -[Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "text",Space,Str "with",Space,Str "two",Space,Str "exciting",Space,Str "insertions."]] diff --git a/tests/docx.track_changes_insertion_reject.native b/tests/docx.track_changes_insertion_reject.native new file mode 100644 index 000000000..def000abd --- /dev/null +++ b/tests/docx.track_changes_insertion_reject.native @@ -0,0 +1 @@ +[Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "text",Space,Str "with",Space,Str "insertions."]] -- cgit v1.2.3 From 74676df75f0ab109aae2b1843c26058dfff04297 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Wed, 25 Jun 2014 16:19:27 -0400 Subject: Fix typo in README. --- README | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README b/README index 88eb0d8e2..80371e6f7 100644 --- a/README +++ b/README @@ -309,13 +309,13 @@ Reader options : Specify the number of spaces per tab (default is 4). `--track-changes=`*accept|reject|all* -: Specifies what to do with insertions and deltions produced by the MS +: Specifies what to do with insertions and deletions produced by the MS Word "track-changes" feature. *accept* (the default), inserts all insertions, and ignores all deletions. *reject* inserts all deletions and ignores insertions. *all* puts in both insertions and deletions, wrapped in spans with `insertion` and `deletion` classes, respectively. The author and time of change is - specified. *all* is useful for scripting: only accepting changes + included. *all* is useful for scripting: only accepting changes from a certain reviewer, say, or before a certain date. This option only affects the Docx reader. -- cgit v1.2.3 From 2396be6f5777ac04067264d489fb84fbec72d164 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Wed, 25 Jun 2014 17:12:03 -0400 Subject: Docx reader: Code cleanup in parse. Remove some redundant ways of dealing with Maybe. --- src/Text/Pandoc/Readers/Docx/Parse.hs | 50 +++++++++-------------------------- 1 file changed, 12 insertions(+), 38 deletions(-) diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index dbbd65681..07f34450d 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -148,7 +148,7 @@ absNumElemToAbsNum ns element | let levelElems = findChildren (QName "lvl" (lookup "w" ns) (Just "w")) element - levels = mapMaybe id $ map (levelElemToLevel ns) levelElems + levels = mapMaybe (levelElemToLevel ns) levelElems return $ AbstractNumb absNumId levels absNumElemToAbsNum _ _ = Nothing @@ -180,8 +180,8 @@ archiveToNumbering zf = absNumElems = findChildren (QName "abstractNum" (lookup "w" namespaces) (Just "w")) numberingElem - nums = mapMaybe id $ map (numElemToNum namespaces) numElems - absNums = mapMaybe id $ map (absNumElemToAbsNum namespaces) absNumElems + nums = mapMaybe (numElemToNum namespaces) numElems + absNums = mapMaybe (absNumElemToAbsNum namespaces) absNumElems return $ Numbering namespaces nums absNums data Notes = Notes NameSpaces (Maybe [(String, [BodyPart])]) (Maybe [(String, [BodyPart])]) @@ -193,10 +193,8 @@ noteElemToNote ns element qURI (elName element) == (lookup "w" ns) = do noteId <- findAttr (QName "id" (lookup "w" ns) (Just "w")) element - let bps = map fromJust - $ filter isJust - $ map (elemToBodyPart ns) - $ filterChildrenName (isParOrTbl ns) element + let bps = mapMaybe (elemToBodyPart ns) + $ elChildren element return $ (noteId, bps) noteElemToNote _ _ = Nothing @@ -210,9 +208,7 @@ elemToNotes :: NameSpaces -> String -> Element -> Maybe [(String, [BodyPart])] elemToNotes ns notetype element | qName (elName element) == (notetype ++ "s") && qURI (elName element) == (lookup "w" ns) = - Just $ map fromJust - $ filter isJust - $ map (noteElemToNote ns) + Just $ mapMaybe (noteElemToNote ns) $ findChildren (QName notetype (lookup "w" ns) (Just "w")) element elemToNotes _ _ _ = Nothing @@ -260,25 +256,19 @@ relElemToRelationship _ = Nothing archiveToRelationships :: Archive -> [Relationship] archiveToRelationships archive = let relPaths = filter filePathIsRel (filesInArchive archive) - entries = map fromJust $ filter isJust $ map (\f -> findEntryByPath f archive) relPaths - relElems = map fromJust $ filter isJust $ map (parseXMLDoc . UTF8.toStringLazy . fromEntry) entries - rels = map fromJust $ filter isJust $ map relElemToRelationship $ concatMap elChildren relElems + entries = mapMaybe (\f -> findEntryByPath f archive) relPaths + relElems = mapMaybe (parseXMLDoc . UTF8.toStringLazy . fromEntry) entries + rels = mapMaybe relElemToRelationship $ concatMap elChildren relElems in rels data Body = Body [BodyPart] deriving Show -isParOrTbl :: NameSpaces -> QName -> Bool -isParOrTbl ns q = qName q `elem` ["p", "tbl"] && - qURI q == (lookup "w" ns) - elemToBody :: NameSpaces -> Element -> Maybe Body elemToBody ns element | qName (elName element) == "body" && qURI (elName element) == (lookup "w" ns) = Just $ Body - $ map fromJust - $ filter isJust - $ map (elemToBodyPart ns) $ filterChildrenName (isParOrTbl ns) element + $ mapMaybe (elemToBodyPart ns) $ elChildren element elemToBody _ _ = Nothing elemToNumInfo :: NameSpaces -> Element -> Maybe (String, String) @@ -295,21 +285,6 @@ elemToNumInfo ns element return (numId, lvl) elemToNumInfo _ _ = Nothing --- isBookMarkTag :: NameSpaces -> QName -> Bool --- isBookMarkTag ns q = qName q `elem` ["bookmarkStart", "bookmarkEnd"] && --- qURI q == (lookup "w" ns) - --- parChildrenToBookmark :: NameSpaces -> [Element] -> BookMark --- parChildrenToBookmark ns (bms : bme : _) --- | qName (elName bms) == "bookmarkStart" && --- qURI (elName bms) == (lookup "w" ns) && --- qName (elName bme) == "bookmarkEnd" && --- qURI (elName bme) == (lookup "w" ns) = do --- bmId <- findAttr (QName "id" (lookup "w" ns) (Just "w")) bms --- bmName <- findAttr (QName "name" (lookup "w" ns) (Just "w")) bms --- return $ (bmId, bmName) --- parChildrenToBookmark _ _ = Nothing - elemToBodyPart :: NameSpaces -> Element -> Maybe BodyPart elemToBodyPart ns element | qName (elName element) == "p" && @@ -382,8 +357,7 @@ elemToParagraphStyle ns element = Just pPr -> ParagraphStyle {pStyle = - mapMaybe id $ - map + mapMaybe (findAttr (QName "val" (lookup "w" ns) (Just "w"))) (findChildren (QName "pStyle" (lookup "w" ns) (Just "w")) pPr) , indent = @@ -601,7 +575,7 @@ elemToParPart ns element elemToParPart ns element | qName (elName element) == "hyperlink" && qURI (elName element) == (lookup "w" ns) = - let runs = map fromJust $ filter isJust $ map (elemToRun ns) + let runs = mapMaybe (elemToRun ns) $ findChildren (QName "r" (lookup "w" ns) (Just "w")) element in case findAttr (QName "anchor" (lookup "w" ns) (Just "w")) element of -- cgit v1.2.3 From b2127311cb360479dbea59264ada0112a94d7819 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 26 Jun 2014 12:34:41 -0700 Subject: Require haddock-library >= 1.1 and simplify haddock reader code. See #1346. --- pandoc.cabal | 2 +- src/Text/Pandoc/Readers/Haddock.hs | 40 +------------------------------------- 2 files changed, 2 insertions(+), 40 deletions(-) diff --git a/pandoc.cabal b/pandoc.cabal index 634d249fe..eeb233d3d 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -259,7 +259,7 @@ Library hslua >= 0.3 && < 0.4, binary >= 0.5 && < 0.8, SHA >= 1.6 && < 1.7, - haddock-library >= 1.0 && < 1.1 + haddock-library >= 1.1 && < 1.2 if flag(https) Build-Depends: http-client >= 0.3.2 && < 0.4, http-client-tls >= 0.2 && < 0.3, diff --git a/src/Text/Pandoc/Readers/Haddock.hs b/src/Text/Pandoc/Readers/Haddock.hs index a3dfb7c3c..4b46c869d 100644 --- a/src/Text/Pandoc/Readers/Haddock.hs +++ b/src/Text/Pandoc/Readers/Haddock.hs @@ -43,11 +43,8 @@ docHToBlocks d' = (docHToInlines False $ headerTitle h) DocAppend d1 d2 -> mappend (docHToBlocks d1) (docHToBlocks d2) DocString _ -> inlineFallback - DocParagraph (DocHeader h) -> docHToBlocks (DocHeader h) DocParagraph (DocAName h) -> B.plain $ docHToInlines False $ DocAName h - DocParagraph x -> let (ils, rest) = getInlines x - in (B.para $ docHToInlines False ils) - <> docHToBlocks rest + DocParagraph x -> B.para $ docHToInlines False x DocIdentifier _ -> inlineFallback DocIdentifierUnchecked _ -> inlineFallback DocModule s -> B.plain $ docHToInlines False $ DocModule s @@ -115,40 +112,6 @@ docHToInlines isCode d' = DocProperty _ -> mempty DocExamples _ -> mempty -getInlines :: DocH String Identifier -> (DocH String Identifier, DocH String Identifier) -getInlines (DocAppend x y) = if isInline x - then let (a, b) = getInlines y - in (DocAppend x a, b) - else (DocEmpty, DocAppend x y) -getInlines x = if isInline x - then (x, DocEmpty) - else (DocEmpty, x) - -isInline :: DocH String Identifier -> Bool -isInline d' = - case d' of - DocEmpty -> True - DocAppend d1 _ -> isInline d1 - DocString _ -> True - DocParagraph _ -> False - DocIdentifier _ -> True - DocIdentifierUnchecked _ -> True - DocModule _ -> True - DocWarning _ -> True - DocEmphasis _ -> True - DocMonospaced _ -> True - DocBold _ -> True - DocHeader _ -> False - DocUnorderedList _ -> False - DocOrderedList _ -> False - DocDefList _ -> False - DocCodeBlock _ -> False - DocHyperlink _ -> True - DocPic _ -> True - DocAName _ -> True - DocProperty _ -> False - DocExamples _ -> False - -- | Create an 'Example', stripping superfluous characters as appropriate makeExample :: String -> String -> [String] -> Blocks makeExample prompt expression result = @@ -173,4 +136,3 @@ makeExample prompt expression result = substituteBlankLine "" = "" substituteBlankLine line = line coder = B.codeWith ([], ["result"], []) - -- cgit v1.2.3 From b1a8f1fa1ad8a6083f0e00cf786eaeff5f10c3be Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 27 Jun 2014 18:30:57 -0700 Subject: Fixed `--filter` so it doesn't search PATH for a filter with a path. This fixed a bug wherein `--filter ./caps.py` would run `caps.py` from the system path, even if there was a `caps.py` in the working directory. --- pandoc.hs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandoc.hs b/pandoc.hs index 588723322..6281113cb 100644 --- a/pandoc.hs +++ b/pandoc.hs @@ -96,7 +96,9 @@ isTextFormat s = takeWhile (`notElem` "+-") s `notElem` ["odt","docx","epub","ep externalFilter :: FilePath -> [String] -> Pandoc -> IO Pandoc externalFilter f args' d = do - mbexe <- findExecutable f + mbexe <- if '/' `elem` f -- don't check PATH if filter name it has a path + then return Nothing + else findExecutable f (f', args'') <- case mbexe of Just x -> return (x, args') Nothing -> do -- cgit v1.2.3 From 4248f25152d5715ad99f9d8dda8bf83f33f650ff Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Thu, 26 Jun 2014 16:48:41 -0400 Subject: Move Docx reader to DocxContext monad This is a ReaderT State stack, which keeps track of some environment info, such as the options and the docx doc. The state will come in handy in the future, for a couple of planned features (rewriting the section anchors as auto_idents, and hopefully smart-quoting). --- src/Text/Pandoc/Readers/Docx.hs | 247 +++++++++++++++++++++++----------------- 1 file changed, 140 insertions(+), 107 deletions(-) diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index cb0735e31..5773027f2 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -88,6 +88,9 @@ import qualified Data.ByteString as BS import qualified Data.ByteString.Lazy as B import Data.ByteString.Base64 (encode) import System.FilePath (combine) +import qualified Data.Map as M +import Control.Monad.Reader +import Control.Monad.State readDocx :: ReaderOptions -> B.ByteString @@ -97,11 +100,24 @@ readDocx opts bytes = Just docx -> Pandoc nullMeta (docxToBlocks opts docx) Nothing -> error $ "couldn't parse docx file" -spansToKeep :: [String] -spansToKeep = [] +data DState = DState { docxHdrLinks :: M.Map String String } + +data DEnv = DEnv { docxOptions :: ReaderOptions + , docxDocument :: Docx} + +type DocxContext = ReaderT DEnv (State DState) + +evalDocxContext :: DocxContext a -> DEnv -> DState -> a +evalDocxContext ctx env st = evalState (runReaderT ctx env) st + +concatMapM :: (Monad m) => (a -> m [b]) -> [a] -> m [b] +concatMapM f xs = liftM concat (mapM f xs) -- This is empty, but we put it in for future-proofing. +spansToKeep :: [String] +spansToKeep = [] + divsToKeep :: [String] divsToKeep = ["list-item", "Definition", "DefinitionTerm"] @@ -213,57 +229,69 @@ inlineCodeContainer (Container f) = case f [] of inlineCodeContainer _ = False -runToInlines :: ReaderOptions -> Docx -> Run -> [Inline] -runToInlines _ _ (Run rs runElems) +runToInlines :: Run -> DocxContext [Inline] +runToInlines (Run rs runElems) | any inlineCodeContainer (runStyleToContainers rs) = + return $ rebuild (runStyleToContainers rs) $ [Str $ runElemsToString runElems] | otherwise = + return $ rebuild (runStyleToContainers rs) (concatMap runElemToInlines runElems) -runToInlines opts docx@(Docx _ notes _ _ _ ) (Footnote fnId) = +runToInlines (Footnote fnId) = do + (Docx _ notes _ _ _ ) <- asks docxDocument case (getFootNote fnId notes) of - Just bodyParts -> - [Note (concatMap (bodyPartToBlocks opts docx) bodyParts)] - Nothing -> - [Note []] -runToInlines opts docx@(Docx _ notes _ _ _) (Endnote fnId) = + Just bodyParts -> do + blks <- concatMapM bodyPartToBlocks bodyParts + return $ [Note blks] + Nothing -> return [Note []] +runToInlines (Endnote fnId) = do + (Docx _ notes _ _ _ ) <- asks docxDocument case (getEndNote fnId notes) of - Just bodyParts -> - [Note (concatMap (bodyPartToBlocks opts docx) bodyParts)] - Nothing -> - [Note []] - -parPartToInlines :: ReaderOptions -> Docx -> ParPart -> [Inline] -parPartToInlines opts docx (PlainRun r) = runToInlines opts docx r -parPartToInlines opts docx (Insertion _ author date runs) = + Just bodyParts -> do + blks <- concatMapM bodyPartToBlocks bodyParts + return $ [Note blks] + Nothing -> return [Note []] + +parPartToInlines :: ParPart -> DocxContext [Inline] +parPartToInlines (PlainRun r) = runToInlines r +parPartToInlines (Insertion _ author date runs) = do + opts <- asks docxOptions case readerTrackChanges opts of - AcceptChanges -> concatMap (runToInlines opts docx) runs - RejectChanges -> [] - AllChanges -> - [Span - ("", ["insertion"], [("author", author), ("date", date)]) - (concatMap (runToInlines opts docx) runs)] -parPartToInlines opts docx (Deletion _ author date runs) = + AcceptChanges -> concatMapM runToInlines runs >>= return + RejectChanges -> return [] + AllChanges -> do + ils <- (concatMapM runToInlines runs) + return [Span + ("", ["insertion"], [("author", author), ("date", date)]) + ils] +parPartToInlines (Deletion _ author date runs) = do + opts <- asks docxOptions case readerTrackChanges opts of - AcceptChanges -> [] - RejectChanges -> concatMap (runToInlines opts docx) runs - AllChanges -> - [Span - ("", ["deletion"], [("author", author), ("date", date)]) - (concatMap (runToInlines opts docx) runs)] -parPartToInlines _ _ (BookMark _ anchor) | anchor `elem` dummyAnchors = [] -parPartToInlines _ _ (BookMark _ anchor) = [Span (anchor, ["anchor"], []) []] -parPartToInlines _ (Docx _ _ _ rels _) (Drawing relid) = - case lookupRelationship relid rels of + AcceptChanges -> return [] + RejectChanges -> concatMapM runToInlines runs >>= return + AllChanges -> do + ils <- concatMapM runToInlines runs + return [Span + ("", ["deletion"], [("author", author), ("date", date)]) + ils] +parPartToInlines (BookMark _ anchor) | anchor `elem` dummyAnchors = return [] +parPartToInlines (BookMark _ anchor) = return [Span (anchor, ["anchor"], []) []] +parPartToInlines (Drawing relid) = do + (Docx _ _ _ rels _) <- asks docxDocument + return $ case lookupRelationship relid rels of Just target -> [Image [] (combine "word" target, "")] Nothing -> [Image [] ("", "")] -parPartToInlines opts docx (InternalHyperLink anchor runs) = - [Link (concatMap (runToInlines opts docx) runs) ('#' : anchor, "")] -parPartToInlines opts docx@(Docx _ _ _ rels _) (ExternalHyperLink relid runs) = - case lookupRelationship relid rels of +parPartToInlines (InternalHyperLink anchor runs) = do + ils <- concatMapM runToInlines runs + return [Link ils ('#' : anchor, "")] +parPartToInlines (ExternalHyperLink relid runs) = do + (Docx _ _ _ rels _) <- asks docxDocument + rs <- concatMapM runToInlines runs + return $ case lookupRelationship relid rels of Just target -> - [Link (concatMap (runToInlines opts docx) runs) (target, "")] + [Link rs (target, "")] Nothing -> - [Link (concatMap (runToInlines opts docx) runs) ("", "")] + [Link rs ("", "")] isAnchorSpan :: Inline -> Bool isAnchorSpan (Span (ident, classes, kvs) ils) = @@ -287,25 +315,18 @@ makeHeaderAnchors h@(Header n (_, classes, kvs) ils) = _ -> h makeHeaderAnchors blk = blk -parPartsToInlines :: ReaderOptions -> Docx -> [ParPart] -> [Inline] -parPartsToInlines opts docx parparts = - -- - -- We're going to skip data-uri's for now. It should be an option, - -- not mandatory. - -- - (if False -- TODO depend on option - then walk (makeImagesSelfContained docx) - else id) $ - -- bottomUp spanTrim $ - -- bottomUp spanCorrect $ - -- bottomUp spanReduce $ - reduceList $ concatMap (parPartToInlines opts docx) parparts - -cellToBlocks :: ReaderOptions -> Docx -> Cell -> [Block] -cellToBlocks opts docx (Cell bps) = concatMap (bodyPartToBlocks opts docx) bps - -rowToBlocksList :: ReaderOptions -> Docx -> Row -> [[Block]] -rowToBlocksList opts docx (Row cells) = map (cellToBlocks opts docx) cells +parPartsToInlines :: [ParPart] -> DocxContext [Inline] +parPartsToInlines parparts = do + ils <- concatMapM parPartToInlines parparts >>= + -- TODO: Option for self-containted images + (if False then (walkM makeImagesSelfContained) else return) + return $ reduceList $ ils + +cellToBlocks :: Cell -> DocxContext [Block] +cellToBlocks (Cell bps) = concatMapM bodyPartToBlocks bps + +rowToBlocksList :: Row -> DocxContext [[Block]] +rowToBlocksList (Row cells) = mapM cellToBlocks cells blockCodeContainer :: Container Block -> Bool blockCodeContainer (Container f) = case f [] of @@ -313,27 +334,32 @@ blockCodeContainer (Container f) = case f [] of _ -> False blockCodeContainer _ = False -bodyPartToBlocks :: ReaderOptions -> Docx -> BodyPart -> [Block] -bodyPartToBlocks _ _ (Paragraph pPr parparts) +bodyPartToBlocks :: BodyPart -> DocxContext [Block] +bodyPartToBlocks (Paragraph pPr parparts) | any blockCodeContainer (parStyleToContainers pPr) = let otherConts = filter (not . blockCodeContainer) (parStyleToContainers pPr) in + return $ rebuild otherConts [CodeBlock ("", [], []) (concatMap parPartToString parparts)] -bodyPartToBlocks opts docx (Paragraph pPr parparts) = - case parPartsToInlines opts docx parparts of - [] -> - [] - _ -> - let parContents = parPartsToInlines opts docx parparts - trimmedContents = reverse $ dropWhile (Space ==) $ reverse $ dropWhile (Space ==) parContents - in +bodyPartToBlocks (Paragraph pPr parparts) = do + ils <- parPartsToInlines parparts + case ils of + [] -> return [] + _ -> do + parContents <- parPartsToInlines parparts + let trimmedContents = reverse $ + dropWhile (Space ==) $ + reverse $ + dropWhile (Space ==) parContents + return $ rebuild (parStyleToContainers pPr) [Para trimmedContents] -bodyPartToBlocks opts docx@(Docx _ _ numbering _ _) (ListItem pPr numId lvl parparts) = +bodyPartToBlocks (ListItem pPr numId lvl parparts) = do + (Docx _ _ numbering _ _) <- asks docxDocument let kvs = case lookupLevel numId lvl numbering of Just (_, fmt, txt, Just start) -> [ ("level", lvl) @@ -349,23 +375,22 @@ bodyPartToBlocks opts docx@(Docx _ _ numbering _ _) (ListItem pPr numId lvl parp , ("text", txt) ] Nothing -> [] - in - [Div - ("", ["list-item"], kvs) - (bodyPartToBlocks opts docx (Paragraph pPr parparts))] -bodyPartToBlocks _ _ (Tbl _ _ _ []) = - [Para []] -bodyPartToBlocks opts docx (Tbl cap _ look (r:rs)) = + blks <- bodyPartToBlocks (Paragraph pPr parparts) + return $ [Div ("", ["list-item"], kvs) blks] +bodyPartToBlocks (Tbl _ _ _ []) = + return [Para []] +bodyPartToBlocks (Tbl cap _ look (r:rs)) = do let caption = strToInlines cap (hdr, rows) = case firstRowFormatting look of True -> (Just r, rs) False -> (Nothing, r:rs) - hdrCells = case hdr of - Just r' -> rowToBlocksList opts docx r' - Nothing -> [] - cells = map (rowToBlocksList opts docx) rows + hdrCells <- case hdr of + Just r' -> rowToBlocksList r' + Nothing -> return [] + + cells <- mapM rowToBlocksList rows - size = case null hdrCells of + let size = case null hdrCells of True -> length $ head cells False -> length $ hdrCells -- @@ -374,34 +399,42 @@ bodyPartToBlocks opts docx (Tbl cap _ look (r:rs)) = -- moment. Width information is in the TblGrid field of the Tbl, -- so should be possible. Alignment might be more difficult, -- since there doesn't seem to be a column entity in docx. - alignments = take size (repeat AlignDefault) - widths = take size (repeat 0) :: [Double] - in - [Table caption alignments widths hdrCells cells] - - -makeImagesSelfContained :: Docx -> Inline -> Inline -makeImagesSelfContained (Docx _ _ _ _ media) i@(Image alt (uri, title)) = - case lookup uri media of - Just bs -> case getMimeType uri of - Just mime -> let data_uri = - "data:" ++ mime ++ ";base64," ++ toString (encode $ BS.concat $ B.toChunks bs) - in - Image alt (data_uri, title) - Nothing -> i + alignments = replicate size AlignDefault + widths = replicate size 0 :: [Double] + + return [Table caption alignments widths hdrCells cells] + + +makeImagesSelfContained :: Inline -> DocxContext Inline +makeImagesSelfContained i@(Image alt (uri, title)) = do + (Docx _ _ _ _ media) <- asks docxDocument + return $ case lookup uri media of + Just bs -> + case getMimeType uri of + Just mime -> + let data_uri = "data:" ++ mime ++ ";base64," ++ + toString (encode $ BS.concat $ B.toChunks bs) + in + Image alt (data_uri, title) + Nothing -> i Nothing -> i -makeImagesSelfContained _ inline = inline +makeImagesSelfContained inline = return inline -bodyToBlocks :: ReaderOptions -> Docx -> Body -> [Block] -bodyToBlocks opts docx (Body bps) = - map (makeHeaderAnchors) $ - blocksToDefinitions $ - blocksToBullets $ - concatMap (bodyPartToBlocks opts docx) bps +bodyToBlocks :: Body -> DocxContext [Block] +bodyToBlocks (Body bps) = do + blks <- concatMapM bodyPartToBlocks bps + return $ + map (makeHeaderAnchors) $ + blocksToDefinitions $ + blocksToBullets $ blks docxToBlocks :: ReaderOptions -> Docx -> [Block] -docxToBlocks opts d@(Docx (Document _ body) _ _ _ _) = bodyToBlocks opts d body - +docxToBlocks opts d@(Docx (Document _ body) _ _ _ _) = + let dState = DState { docxHdrLinks = M.empty } + dEnv = DEnv { docxOptions = opts + , docxDocument = d} + in + evalDocxContext (bodyToBlocks body) dEnv dState ilToCode :: Inline -> String ilToCode (Str s) = s -- cgit v1.2.3 From db187348cd8bb17ce66d2d4c1db6a5ff46a1ffbc Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Thu, 26 Jun 2014 23:10:11 -0400 Subject: Docx rdr: Avoid mapping makeHeaderAnchors globally It only applies to headers, so we can just apply it when we make a header. --- src/Text/Pandoc/Readers/Docx.hs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 5773027f2..42352a845 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -152,6 +152,7 @@ divAttrToContainers (c:cs) _ | isJust (isHeaderClass c) = let n = fromJust (isHeaderClass c) in [(Container $ \blks -> + makeHeaderAnchor $ Header n ("", delete ("Heading" ++ show n) cs, []) (blksToInlines blks))] divAttrToContainers (c:cs) kvs | c `elem` divsToKeep = (Container $ Div ("", [c], [])) : (divAttrToContainers cs kvs) @@ -304,8 +305,8 @@ isAnchorSpan _ = False dummyAnchors :: [String] dummyAnchors = ["_GoBack"] -makeHeaderAnchors :: Block -> Block -makeHeaderAnchors h@(Header n (_, classes, kvs) ils) = +makeHeaderAnchor :: Block -> Block +makeHeaderAnchor h@(Header n (_, classes, kvs) ils) = case filter isAnchorSpan ils of [] -> h (x@(Span (ident, _, _) _) : xs) -> @@ -313,7 +314,7 @@ makeHeaderAnchors h@(Header n (_, classes, kvs) ils) = True -> h False -> Header n (ident, classes, kvs) (ils \\ (x:xs)) _ -> h -makeHeaderAnchors blk = blk +makeHeaderAnchor blk = blk parPartsToInlines :: [ParPart] -> DocxContext [Inline] parPartsToInlines parparts = do @@ -424,7 +425,6 @@ bodyToBlocks :: Body -> DocxContext [Block] bodyToBlocks (Body bps) = do blks <- concatMapM bodyPartToBlocks bps return $ - map (makeHeaderAnchors) $ blocksToDefinitions $ blocksToBullets $ blks -- cgit v1.2.3 From ab76bbebbe7afd3acdf3218b88f02482c885cc87 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Fri, 27 Jun 2014 11:35:50 -0400 Subject: Docx Reader: Clean up guards Use PatternGuards to get rid of need for `isJust`, `fromJust` altogether. --- src/Text/Pandoc/Readers/Docx.hs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 42352a845..0c52b1acb 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -1,3 +1,5 @@ +{-# LANGUAGE PatternGuards #-} + {- Copyright (C) 2014 Jesse Rosenthal @@ -82,7 +84,7 @@ import Text.Pandoc.Walk import Text.Pandoc.Readers.Docx.Parse import Text.Pandoc.Readers.Docx.Lists import Text.Pandoc.Readers.Docx.Reducible -import Data.Maybe (mapMaybe, isJust, fromJust) +import Data.Maybe (mapMaybe) import Data.List (delete, isPrefixOf, (\\)) import qualified Data.ByteString as BS import qualified Data.ByteString.Lazy as B @@ -148,12 +150,10 @@ runStyleToContainers rPr = divAttrToContainers :: [String] -> [(String, String)] -> [Container Block] -divAttrToContainers (c:cs) _ | isJust (isHeaderClass c) = - let n = fromJust (isHeaderClass c) - in - [(Container $ \blks -> - makeHeaderAnchor $ - Header n ("", delete ("Heading" ++ show n) cs, []) (blksToInlines blks))] +divAttrToContainers (c:cs) _ | Just n <- isHeaderClass c = + [(Container $ \blks -> + makeHeaderAnchor $ + Header n ("", delete ("Heading" ++ show n) cs, []) (blksToInlines blks))] divAttrToContainers (c:cs) kvs | c `elem` divsToKeep = (Container $ Div ("", [c], [])) : (divAttrToContainers cs kvs) divAttrToContainers (c:cs) kvs | c `elem` codeDivs = @@ -167,10 +167,10 @@ divAttrToContainers (c:cs) kvs | c `elem` listParagraphDivs = divAttrToContainers (c:cs) kvs | c `elem` blockQuoteDivs = (Container BlockQuote) : (divAttrToContainers (cs \\ blockQuoteDivs) kvs) divAttrToContainers (_:cs) kvs = divAttrToContainers cs kvs -divAttrToContainers [] kvs | isJust (lookup "indent" kvs) = +divAttrToContainers [] kvs | Just numString <- lookup "indent" kvs = let kvs' = filter (\(k,_) -> k /= "indent") kvs in - case fromJust (lookup "indent" kvs) of + case numString of "0" -> divAttrToContainers [] kvs' ('-' : _) -> divAttrToContainers [] kvs' _ -> (Container BlockQuote) : divAttrToContainers [] kvs' -- cgit v1.2.3 From 1de8d4d08788ef24f69f9f90266604854996080e Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Fri, 27 Jun 2014 11:45:06 -0400 Subject: Docx Reader: Simplify makeHeaderAnchors Using pattern guard, in preparation for doing some more complicated stuff with it (recording header anchors, so we can change them to auto ids.) --- src/Text/Pandoc/Readers/Docx.hs | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 0c52b1acb..9aaf1d340 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -306,14 +306,11 @@ dummyAnchors :: [String] dummyAnchors = ["_GoBack"] makeHeaderAnchor :: Block -> Block -makeHeaderAnchor h@(Header n (_, classes, kvs) ils) = - case filter isAnchorSpan ils of - [] -> h - (x@(Span (ident, _, _) _) : xs) -> - case ident `elem` dummyAnchors of - True -> h - False -> Header n (ident, classes, kvs) (ils \\ (x:xs)) - _ -> h +makeHeaderAnchor (Header n (_, classes, kvs) ils) + | (x : xs) <- filter isAnchorSpan ils + , (Span (ident, _, _) _) <- x + , notElem ident dummyAnchors = + Header n (ident, classes, kvs) (ils \\ (x:xs)) makeHeaderAnchor blk = blk parPartsToInlines :: [ParPart] -> DocxContext [Inline] -- cgit v1.2.3 From 5969baf5b97c0926384b1619be3c4be6d92b277b Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Sat, 28 Jun 2014 02:47:40 -0400 Subject: Rewrote header generation. In preparation for auto ids. --- src/Text/Pandoc/Readers/Docx.hs | 50 ++++++++++++++++++++--------------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 9aaf1d340..bbe770f6e 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -84,6 +84,7 @@ import Text.Pandoc.Walk import Text.Pandoc.Readers.Docx.Parse import Text.Pandoc.Readers.Docx.Lists import Text.Pandoc.Readers.Docx.Reducible +import Text.Pandoc.Shared import Data.Maybe (mapMaybe) import Data.List (delete, isPrefixOf, (\\)) import qualified Data.ByteString as BS @@ -151,9 +152,8 @@ runStyleToContainers rPr = divAttrToContainers :: [String] -> [(String, String)] -> [Container Block] divAttrToContainers (c:cs) _ | Just n <- isHeaderClass c = - [(Container $ \blks -> - makeHeaderAnchor $ - Header n ("", delete ("Heading" ++ show n) cs, []) (blksToInlines blks))] + [Container $ \_ -> + Header n ("", delete ("Heading" ++ show n) cs, []) []] divAttrToContainers (c:cs) kvs | c `elem` divsToKeep = (Container $ Div ("", [c], [])) : (divAttrToContainers cs kvs) divAttrToContainers (c:cs) kvs | c `elem` codeDivs = @@ -305,13 +305,14 @@ isAnchorSpan _ = False dummyAnchors :: [String] dummyAnchors = ["_GoBack"] -makeHeaderAnchor :: Block -> Block +makeHeaderAnchor :: Block -> DocxContext Block makeHeaderAnchor (Header n (_, classes, kvs) ils) | (x : xs) <- filter isAnchorSpan ils , (Span (ident, _, _) _) <- x , notElem ident dummyAnchors = - Header n (ident, classes, kvs) (ils \\ (x:xs)) -makeHeaderAnchor blk = blk + return $ Header n (ident, classes, kvs) (ils \\ (x:xs)) +makeHeaderAnchor blk = return blk + parPartsToInlines :: [ParPart] -> DocxContext [Inline] parPartsToInlines parparts = do @@ -326,36 +327,40 @@ cellToBlocks (Cell bps) = concatMapM bodyPartToBlocks bps rowToBlocksList :: Row -> DocxContext [[Block]] rowToBlocksList (Row cells) = mapM cellToBlocks cells -blockCodeContainer :: Container Block -> Bool -blockCodeContainer (Container f) = case f [] of - CodeBlock _ _ -> True - _ -> False -blockCodeContainer _ = False +isBlockCodeContainer :: Container Block -> Bool +isBlockCodeContainer (Container f) | CodeBlock _ _ <- f [] = True +isBlockCodeContainer _ = False + +isHeaderContainer :: Container Block -> Bool +isHeaderContainer (Container f) | Header _ _ _ <- f [] = True +isHeaderContainer _ = False bodyPartToBlocks :: BodyPart -> DocxContext [Block] bodyPartToBlocks (Paragraph pPr parparts) - | any blockCodeContainer (parStyleToContainers pPr) = + | any isBlockCodeContainer (parStyleToContainers pPr) = let - otherConts = filter (not . blockCodeContainer) (parStyleToContainers pPr) + otherConts = filter (not . isBlockCodeContainer) (parStyleToContainers pPr) in return $ rebuild otherConts [CodeBlock ("", [], []) (concatMap parPartToString parparts)] +bodyPartToBlocks (Paragraph pPr parparts) + | any isHeaderContainer (parStyleToContainers pPr) = do + ils <- parPartsToInlines parparts >>= (return . normalizeSpaces) + let (Container hdrFun) = head $ filter isHeaderContainer (parStyleToContainers pPr) + Header n attr _ = hdrFun [] + hdr <- makeHeaderAnchor $ Header n attr ils + return [hdr] bodyPartToBlocks (Paragraph pPr parparts) = do - ils <- parPartsToInlines parparts + ils <- parPartsToInlines parparts >>= (return . normalizeSpaces) case ils of [] -> return [] _ -> do - parContents <- parPartsToInlines parparts - let trimmedContents = reverse $ - dropWhile (Space ==) $ - reverse $ - dropWhile (Space ==) parContents return $ rebuild (parStyleToContainers pPr) - [Para trimmedContents] + [Para ils] bodyPartToBlocks (ListItem pPr numId lvl parparts) = do (Docx _ _ numbering _ _) <- asks docxDocument let @@ -446,8 +451,3 @@ isHeaderClass s | "Heading" `isPrefixOf` s = ((n, "") : []) -> Just n _ -> Nothing isHeaderClass _ = Nothing - -blksToInlines :: [Block] -> [Inline] -blksToInlines (Para ils : _) = ils -blksToInlines (Plain ils : _) = ils -blksToInlines _ = [] -- cgit v1.2.3 From b89a3ba2b1069205a308ad0f444457d595e5a77f Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Sat, 28 Jun 2014 03:04:34 -0400 Subject: make makeHeaderAnchors make an auto id Record relationship between original id and auto id, so we can fix links after. --- src/Text/Pandoc/Readers/Docx.hs | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index bbe770f6e..a3053b72a 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -104,7 +104,7 @@ readDocx opts bytes = Nothing -> error $ "couldn't parse docx file" -data DState = DState { docxHdrLinks :: M.Map String String } +data DState = DState { docxHeaderAnchors :: M.Map String String } data DEnv = DEnv { docxOptions :: ReaderOptions , docxDocument :: Docx} @@ -310,7 +310,11 @@ makeHeaderAnchor (Header n (_, classes, kvs) ils) | (x : xs) <- filter isAnchorSpan ils , (Span (ident, _, _) _) <- x , notElem ident dummyAnchors = - return $ Header n (ident, classes, kvs) (ils \\ (x:xs)) + do + hdrIDMap <- gets docxHeaderAnchors + let newIdent = uniqueIdent ils (M.elems hdrIDMap) + put DState{docxHeaderAnchors = M.insert ident newIdent hdrIDMap} + return $ Header n (newIdent, classes, kvs) (ils \\ (x:xs)) makeHeaderAnchor blk = return blk @@ -432,7 +436,7 @@ bodyToBlocks (Body bps) = do docxToBlocks :: ReaderOptions -> Docx -> [Block] docxToBlocks opts d@(Docx (Document _ body) _ _ _ _) = - let dState = DState { docxHdrLinks = M.empty } + let dState = DState { docxHeaderAnchors = M.empty } dEnv = DEnv { docxOptions = opts , docxDocument = d} in -- cgit v1.2.3 From dce360e1e6fee089e849c07785d8e21961fefb9b Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Sat, 28 Jun 2014 03:54:58 -0400 Subject: Docx Reader: Introduce link rewriting. --- src/Text/Pandoc/Readers/Docx.hs | 37 ++++++++++++++++++++++++++++++------- 1 file changed, 30 insertions(+), 7 deletions(-) diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index a3053b72a..0607aac7f 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -104,7 +104,7 @@ readDocx opts bytes = Nothing -> error $ "couldn't parse docx file" -data DState = DState { docxHeaderAnchors :: M.Map String String } +data DState = DState { docxAnchorMap :: M.Map String String } data DEnv = DEnv { docxOptions :: ReaderOptions , docxDocument :: Docx} @@ -276,7 +276,22 @@ parPartToInlines (Deletion _ author date runs) = do ("", ["deletion"], [("author", author), ("date", date)]) ils] parPartToInlines (BookMark _ anchor) | anchor `elem` dummyAnchors = return [] -parPartToInlines (BookMark _ anchor) = return [Span (anchor, ["anchor"], []) []] +parPartToInlines (BookMark _ anchor) = + -- We record these, so we can make sure not to overwrite + -- user-defined anchor links with header auto ids. + do + -- Get the anchor map. + anchorMap <- gets docxAnchorMap + -- Check to see if the id is already in there. Rewrite if + -- necessary. This will have the possible effect of rewriting + -- user-defined anchor links. However, since these are not defined + -- in pandoc, it seems like a necessary evil to avoid an extra + -- pass. + let newAnchor = case anchor `elem` (M.elems anchorMap) of + True -> uniqueIdent [Str anchor] (M.elems anchorMap) + False -> anchor + put DState{ docxAnchorMap = M.insert anchor newAnchor anchorMap} + return [Span (anchor, ["anchor"], []) []] parPartToInlines (Drawing relid) = do (Docx _ _ _ rels _) <- asks docxDocument return $ case lookupRelationship relid rels of @@ -311,9 +326,9 @@ makeHeaderAnchor (Header n (_, classes, kvs) ils) , (Span (ident, _, _) _) <- x , notElem ident dummyAnchors = do - hdrIDMap <- gets docxHeaderAnchors + hdrIDMap <- gets docxAnchorMap let newIdent = uniqueIdent ils (M.elems hdrIDMap) - put DState{docxHeaderAnchors = M.insert ident newIdent hdrIDMap} + put DState{docxAnchorMap = M.insert ident newIdent hdrIDMap} return $ Header n (newIdent, classes, kvs) (ils \\ (x:xs)) makeHeaderAnchor blk = return blk @@ -411,6 +426,14 @@ bodyPartToBlocks (Tbl cap _ look (r:rs)) = do return [Table caption alignments widths hdrCells cells] +-- replace targets with generated anchors. +rewriteLink :: Inline -> DocxContext Inline +rewriteLink l@(Link ils ('#':target, title)) = do + anchorMap <- gets docxAnchorMap + return $ case M.lookup target anchorMap of + Just newTarget -> (Link ils ('#':newTarget, title)) + Nothing -> l +rewriteLink il = return il makeImagesSelfContained :: Inline -> DocxContext Inline makeImagesSelfContained i@(Image alt (uri, title)) = do @@ -429,14 +452,15 @@ makeImagesSelfContained inline = return inline bodyToBlocks :: Body -> DocxContext [Block] bodyToBlocks (Body bps) = do - blks <- concatMapM bodyPartToBlocks bps + blks <- concatMapM bodyPartToBlocks bps >>= + walkM rewriteLink return $ blocksToDefinitions $ blocksToBullets $ blks docxToBlocks :: ReaderOptions -> Docx -> [Block] docxToBlocks opts d@(Docx (Document _ body) _ _ _ _) = - let dState = DState { docxHeaderAnchors = M.empty } + let dState = DState { docxAnchorMap = M.empty } dEnv = DEnv { docxOptions = opts , docxDocument = d} in @@ -447,7 +471,6 @@ ilToCode (Str s) = s ilToCode Space = " " ilToCode _ = "" - isHeaderClass :: String -> Maybe Int isHeaderClass s | "Heading" `isPrefixOf` s = case reads (drop (length "Heading") s) :: [(Int, String)] of -- cgit v1.2.3 From b152145d6d4154a59f9ce36d5fc6f1c60aa0928c Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Sat, 28 Jun 2014 03:57:38 -0400 Subject: Change test result to match new behavior. --- tests/docx.links.native | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/docx.links.native b/tests/docx.links.native index 98768de5a..fb95863a4 100644 --- a/tests/docx.links.native +++ b/tests/docx.links.native @@ -1,6 +1,6 @@ [Header 2 ("",[],[]) [Str "An",Space,Str "internal",Space,Str "link",Space,Str "and",Space,Str "an",Space,Str "external",Space,Str "link"] ,Para [Str "An",Space,Link [Str "external",Space,Str "link"] ("http://google.com",""),Space,Str "to",Space,Str "a",Space,Str "popular",Space,Str "website."] -,Para [Str "An",Space,Link [Str "internal",Space,Str "link"] ("#_A_section_for",""),Space,Str "to",Space,Str "a",Space,Str "section",Space,Str "header."] +,Para [Str "An",Space,Link [Str "internal",Space,Str "link"] ("#a-section-for-testing-link-targets",""),Space,Str "to",Space,Str "a",Space,Str "section",Space,Str "header."] ,Para [Str "An",Space,Link [Str "internal",Space,Str "link"] ("#my_bookmark",""),Space,Str "to",Space,Str "a",Space,Str "bookmark."] -,Header 2 ("_A_section_for",[],[]) [Str "A",Space,Str "section",Space,Str "for",Space,Str "testing",Space,Str "link",Space,Str "targets"] +,Header 2 ("a-section-for-testing-link-targets",[],[]) [Str "A",Space,Str "section",Space,Str "for",Space,Str "testing",Space,Str "link",Space,Str "targets"] ,Para [Str "A",Space,Str "bookmark",Space,Str "right",Space,Span ("my_bookmark",["anchor"],[]) [],Str "here"]] -- cgit v1.2.3 From c0a8d5ac7213ac01b5f12dd7dfca66e6d8301f5f Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Sat, 28 Jun 2014 08:40:59 -0400 Subject: Docx Reader: All headers get auto id. Previously, only those with an anchor got an auto id. Now, all do, which puts it in line with pandoc's markdown extension. --- src/Text/Pandoc/Readers/Docx.hs | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 0607aac7f..71baa5dde 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -103,7 +103,6 @@ readDocx opts bytes = Just docx -> Pandoc nullMeta (docxToBlocks opts docx) Nothing -> error $ "couldn't parse docx file" - data DState = DState { docxAnchorMap :: M.Map String String } data DEnv = DEnv { docxOptions :: ReaderOptions @@ -321,6 +320,8 @@ dummyAnchors :: [String] dummyAnchors = ["_GoBack"] makeHeaderAnchor :: Block -> DocxContext Block +-- If there is an anchor already there (an anchor span in the header, +-- to be exact), we rename and associate the new id with the old one. makeHeaderAnchor (Header n (_, classes, kvs) ils) | (x : xs) <- filter isAnchorSpan ils , (Span (ident, _, _) _) <- x @@ -330,6 +331,14 @@ makeHeaderAnchor (Header n (_, classes, kvs) ils) let newIdent = uniqueIdent ils (M.elems hdrIDMap) put DState{docxAnchorMap = M.insert ident newIdent hdrIDMap} return $ Header n (newIdent, classes, kvs) (ils \\ (x:xs)) +-- Otherwise we just give it a name, and register that name (associate +-- it with itself.) +makeHeaderAnchor (Header n (_, classes, kvs) ils) = + do + hdrIDMap <- gets docxAnchorMap + let newIdent = uniqueIdent ils (M.elems hdrIDMap) + put DState{docxAnchorMap = M.insert newIdent newIdent hdrIDMap} + return $ Header n (newIdent, classes, kvs) ils makeHeaderAnchor blk = return blk -- cgit v1.2.3 From 7fb74d88fb3a55a91340c00b5f1e3c006314769d Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Sat, 28 Jun 2014 09:35:17 -0400 Subject: Update docx tests to reflect new expected behavior Now doing auto ids for all headers, so tests should reflect that. --- tests/docx.block_quotes_parse_indent.native | 2 +- tests/docx.headers.native | 6 +++--- tests/docx.image_no_embed.native | 2 +- tests/docx.links.native | 2 +- tests/docx.lists.native | 2 +- tests/docx.notes.native | 2 +- tests/docx.tables.native | 2 +- 7 files changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/docx.block_quotes_parse_indent.native b/tests/docx.block_quotes_parse_indent.native index da1cef110..842b3606a 100644 --- a/tests/docx.block_quotes_parse_indent.native +++ b/tests/docx.block_quotes_parse_indent.native @@ -1,4 +1,4 @@ -[Header 2 ("",[],[]) [Str "Some",Space,Str "block",Space,Str "quotes,",Space,Str "in",Space,Str "different",Space,Str "ways"] +[Header 2 ("some-block-quotes-in-different-ways",[],[]) [Str "Some",Space,Str "block",Space,Str "quotes,",Space,Str "in",Space,Str "different",Space,Str "ways"] ,Para [Str "This",Space,Str "is",Space,Str "the",Space,Str "proper",Space,Str "way,",Space,Str "with",Space,Str "a",Space,Str "style"] ,BlockQuote [Para [Str "I",Space,Str "don\8217t",Space,Str "know",Space,Str "why",Space,Str "this",Space,Str "would",Space,Str "be",Space,Str "in",Space,Str "italics,",Space,Str "but",Space,Str "so",Space,Str "it",Space,Str "appears",Space,Str "to",Space,Str "be",Space,Str "on",Space,Str "my",Space,Str "screen."]] diff --git a/tests/docx.headers.native b/tests/docx.headers.native index e4d4a4781..03f967728 100644 --- a/tests/docx.headers.native +++ b/tests/docx.headers.native @@ -1,5 +1,5 @@ -[Header 1 ("",[],[]) [Str "A",Space,Str "Test",Space,Str "of",Space,Str "Headers"] -,Header 2 ("",[],[]) [Str "Second",Space,Str "Level"] +[Header 1 ("a-test-of-headers",[],[]) [Str "A",Space,Str "Test",Space,Str "of",Space,Str "Headers"] +,Header 2 ("second-level",[],[]) [Str "Second",Space,Str "Level"] ,Para [Str "Some",Space,Str "plain",Space,Str "text."] -,Header 3 ("",[],[]) [Str "Third",Space,Str "level"] +,Header 3 ("third-level",[],[]) [Str "Third",Space,Str "level"] ,Para [Str "Some",Space,Str "more",Space,Str "plain",Space,Str "text."]] diff --git a/tests/docx.image_no_embed.native b/tests/docx.image_no_embed.native index 18debf135..063958bc7 100644 --- a/tests/docx.image_no_embed.native +++ b/tests/docx.image_no_embed.native @@ -1,2 +1,2 @@ -[Header 2 ("",[],[]) [Str "An",Space,Str "image"] +[Header 2 ("an-image",[],[]) [Str "An",Space,Str "image"] ,Para [Image [] ("word/media/image1.jpeg","")]] diff --git a/tests/docx.links.native b/tests/docx.links.native index fb95863a4..c741fe875 100644 --- a/tests/docx.links.native +++ b/tests/docx.links.native @@ -1,4 +1,4 @@ -[Header 2 ("",[],[]) [Str "An",Space,Str "internal",Space,Str "link",Space,Str "and",Space,Str "an",Space,Str "external",Space,Str "link"] +[Header 2 ("an-internal-link-and-an-external-link",[],[]) [Str "An",Space,Str "internal",Space,Str "link",Space,Str "and",Space,Str "an",Space,Str "external",Space,Str "link"] ,Para [Str "An",Space,Link [Str "external",Space,Str "link"] ("http://google.com",""),Space,Str "to",Space,Str "a",Space,Str "popular",Space,Str "website."] ,Para [Str "An",Space,Link [Str "internal",Space,Str "link"] ("#a-section-for-testing-link-targets",""),Space,Str "to",Space,Str "a",Space,Str "section",Space,Str "header."] ,Para [Str "An",Space,Link [Str "internal",Space,Str "link"] ("#my_bookmark",""),Space,Str "to",Space,Str "a",Space,Str "bookmark."] diff --git a/tests/docx.lists.native b/tests/docx.lists.native index e46bc140b..af922b335 100644 --- a/tests/docx.lists.native +++ b/tests/docx.lists.native @@ -1,4 +1,4 @@ -[Header 2 ("",[],[]) [Str "Some",Space,Str "nested",Space,Str "lists"] +[Header 2 ("some-nested-lists",[],[]) [Str "Some",Space,Str "nested",Space,Str "lists"] ,OrderedList (1,Decimal,Period) [[Para [Str "one"]] ,[Para [Str "two"] diff --git a/tests/docx.notes.native b/tests/docx.notes.native index 5a94b1999..ec1b414b6 100644 --- a/tests/docx.notes.native +++ b/tests/docx.notes.native @@ -1,2 +1,2 @@ -[Header 2 ("",[],[]) [Str "A",Space,Str "footnote"] +[Header 2 ("a-footnote",[],[]) [Str "A",Space,Str "footnote"] ,Para [Str "Test",Space,Str "footnote.",Note [Para [Str "My",Space,Str "note."]],Space,Str "Test",Space,Str "endnote.",Note [Para [Str "This",Space,Str "is",Space,Str "an",Space,Str "endnote",Space,Str "at",Space,Str "the",Space,Str "end",Space,Str "of",Space,Str "the",Space,Str "document."]]]] diff --git a/tests/docx.tables.native b/tests/docx.tables.native index 8dbaabda7..2564afcec 100644 --- a/tests/docx.tables.native +++ b/tests/docx.tables.native @@ -1,4 +1,4 @@ -[Header 2 ("",[],[]) [Str "A",Space,Str "table,",Space,Str "with",Space,Str "and",Space,Str "without",Space,Str "a",Space,Str "header",Space,Str "row"] +[Header 2 ("a-table-with-and-without-a-header-row",[],[]) [Str "A",Space,Str "table,",Space,Str "with",Space,Str "and",Space,Str "without",Space,Str "a",Space,Str "header",Space,Str "row"] ,Table [] [AlignDefault,AlignDefault,AlignDefault,AlignDefault] [0.0,0.0,0.0,0.0] [[Para [Str "Name"]] ,[Para [Str "Game"]] -- cgit v1.2.3