diff options
-rw-r--r-- | README | 226 | ||||
-rw-r--r-- | changelog | 517 | ||||
-rw-r--r-- | data/epub.css | 1 | ||||
m--------- | data/templates | 15 | ||||
-rw-r--r-- | pandoc.cabal | 7 | ||||
-rw-r--r-- | pandoc.hs | 7 | ||||
-rw-r--r-- | src/Text/Pandoc/PDF.hs | 2 | ||||
-rw-r--r-- | src/Text/Pandoc/Parsing.hs | 6 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/HTML.hs | 39 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Markdown.hs | 37 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/Docbook.hs | 8 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/EPUB.hs | 82 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/HTML.hs | 6 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/LaTeX.hs | 5 | ||||
-rw-r--r-- | tests/Tests/Readers/HTML.hs | 27 | ||||
-rw-r--r-- | tests/lhs-test.html | 41 | ||||
-rw-r--r-- | tests/lhs-test.html+lhs | 41 | ||||
-rw-r--r-- | tests/lhs-test.latex | 19 | ||||
-rw-r--r-- | tests/markdown-reader-more.native | 3 | ||||
-rw-r--r-- | tests/markdown-reader-more.txt | 6 | ||||
-rw-r--r-- | tests/test-pandoc.hs | 2 |
21 files changed, 965 insertions, 132 deletions
@@ -449,7 +449,8 @@ General writer options : Specifies the coloring style to be used in highlighted source code. Options are `pygments` (the default), `kate`, `monochrome`, - `espresso`, `zenburn`, `haddock`, and `tango`. + `espresso`, `zenburn`, `haddock`, and `tango`. For more information + on syntax highlighting in pandoc, see [Syntax highlighting], below. `-H` *FILE*, `--include-in-header=`*FILE* @@ -743,18 +744,24 @@ Citation rendering overriding any value set in the metadata, and process citations using `pandoc-citeproc`. (This is equivalent to `--metadata bibliography=FILE --filter pandoc-citeproc`.) + If `--natbib` or `--biblatex` is also supplied, `pandoc-citeproc` is not + used, making this equivalent to `--metadata bibliography=FILE`. + If you supply this argument multiple times, each *FILE* will be added + to bibliography. `--csl=`*FILE* : Set the `csl` field in the document's metadata to *FILE*, overriding any value set in the metadata. (This is equivalent to `--metadata csl=FILE`.) + This option is only relevant with `pandoc-citeproc`. `--citation-abbreviations=`*FILE* : Set the `citation-abbreviations` field in the document's metadata to *FILE*, overriding any value set in the metadata. (This is equivalent to `--metadata citation-abbreviations=FILE`.) + This option is only relevant with `pandoc-citeproc`. `--natbib` @@ -1002,12 +1009,12 @@ as `title`, `author`, and `date`) as well as the following: `lot` : include list of tables in LaTeX documents +`bibliography` +: bibliography to use for resolving references + `biblio-style` : bibliography style in LaTeX, when used with `--natbib` -`biblio-files` -: bibliography files to use in LaTeX, with `--natbib` or `--biblatex` - `section` : section number in man pages @@ -1263,10 +1270,17 @@ If there are multiple headers with identical text, the corresponding reference will link to the first one only, and you will need to use explicit links to link to the others, as described above. -Unlike regular reference links, these references are case-sensitive. +Like regular reference links, these references are case-insensitive. + +Explicit link reference definitions always take priority over +implicit header references. So, in the following example, the +link will point to `bar`, not to `#foo`: + + # Foo -Note: if you have defined an explicit identifier for a header, -then implicit references to it will not work. + [foo]: bar + + See [foo] Block quotations ---------------- @@ -1409,6 +1423,8 @@ word. To prevent all highlighting, use the `--no-highlight` flag. To set the highlighting style, use `--highlight-style`. +For more information on highlighting, see [Syntax highlighting], +below. Line blocks ----------- @@ -2386,19 +2402,19 @@ by default, pandoc interprets material between HTML block tags as markdown. Thus, for example, Pandoc will turn <table> - <tr> - <td>*one*</td> - <td>[a link](http://google.com)</td> - </tr> + <tr> + <td>*one*</td> + <td>[a link](http://google.com)</td> + </tr> </table> into <table> - <tr> - <td><em>one</em></td> - <td><a href="http://google.com">a link</a></td> - </tr> + <tr> + <td><em>one</em></td> + <td><a href="http://google.com">a link</a></td> + </tr> </table> whereas `Markdown.pl` will preserve it as is. @@ -2670,7 +2686,10 @@ citations and a bibliography in a number of styles. Basic usage is pandoc --filter pandoc-citeproc myinput.txt In order to use this feature, you will need to specify a bibliography file -using the `bibliography` metadata field in a YAML metadata section. +using the `bibliography` metadata field in a YAML metadata section, or +`--bibliography` command line argument. You can supply multiple `--bibliography` +arguments or set `bibliography` metadata field to YAML array, if you want to +use multiple bibliography files. The bibliography may have any of these formats: Format File extension @@ -2793,6 +2812,12 @@ In this example, the document will contain a citation for `item3` only, but the bibliography will contain entries for `item1`, `item2`, and `item3`. +For LaTeX or PDF output, you can also use NatBib or BibLaTeX +to render bibliography. In order to do so, specify bibliography files as +outlined above, and add `--natbib` or `--biblatex` argument to `pandoc` +invocation. Bear in mind that bibliography files have to be in respective +format (either BibTeX or BibLaTeX). + Non-pandoc extensions --------------------- @@ -2922,20 +2947,22 @@ variants are supported: `markdown_phpextra` (PHP Markdown Extra) : `footnotes`, `pipe_tables`, `raw_html`, `markdown_attribute`, `fenced_code_blocks`, `definition_lists`, `intraword_underscores`, - `header_attributes`, `abbreviations`. + `header_attributes`, `abbreviations`, `shortcut_reference_links`. `markdown_github` (GitHub-flavored Markdown) : `pipe_tables`, `raw_html`, `tex_math_single_backslash`, `fenced_code_blocks`, `auto_identifiers`, `ascii_identifiers`, `backtick_code_blocks`, `autolink_bare_uris`, - `intraword_underscores`, `strikeout`, `hard_line_breaks` + `intraword_underscores`, `strikeout`, `hard_line_breaks`, + `shortcut_reference_links`. `markdown_mmd` (MultiMarkdown) : `pipe_tables` `raw_html`, `markdown_attribute`, `link_attributes`, `raw_tex`, `tex_math_double_backslash`, `intraword_underscores`, `mmd_title_block`, `footnotes`, `definition_lists`, `all_symbols_escapable`, `implicit_header_references`, - `auto_identifiers`, `mmd_header_identifiers` + `auto_identifiers`, `mmd_header_identifiers`, + `shortcut_reference_links`. `markdown_strict` (Markdown.pl) : `raw_html` @@ -3144,6 +3171,16 @@ To show the notes window, press `s` while viewing the presentation. Notes are not yet supported for other slide formats, but the notes will not appear on the slides themselves. +Marking frames "fragile" in beamer +---------------------------------- + +Sometimes it is necessary to add the LaTeX `[fragile]` option to +a frame in beamer (for example, when using the `minted` environment). +This can be forced by adding the `fragile` class to the header +introducing the slide: + + # Fragile slide {.fragile} + EPUB Metadata ============= @@ -3278,6 +3315,24 @@ ordinary HTML (without bird tracks). writes HTML with the Haskell code in bird tracks, so it can be copied and pasted as literate Haskell source. +Syntax highlighting +=================== + +Pandoc will automatically highlight syntax in fenced code blocks that +are marked with a language name. (See [Extension: +`inline_code_attributes`] and [Extension: `fenced_code_attributes`], +above.) The Haskell library [highlighting-kate] is used for +highlighting, which works in HTML, Docx, and LaTeX/PDF output. +The color scheme can be selected using the `--highlight-style` option. +The default color scheme is `pygments`, which imitates the default color +scheme used by the Python library pygments, but pygments is not actually +used to do the highlighting. + +To see a list of language names that pandoc will recognize, type +`pandoc --version`. + +To disable highlighting, use the `--no-highlight` option. + Custom writers ============== @@ -3298,18 +3353,130 @@ which you can modify according to your needs, do Authors ======= -© 2006-2013 John MacFarlane (jgm at berkeley dot edu). Released under the +© 2006-2015 John MacFarlane (jgm@berkeley.edu). Released under the [GPL], version 2 or greater. This software carries no warranty of any kind. (See COPYRIGHT for full copyright and warranty notices.) -Other contributors include Recai Oktaş, Paulo Tanimoto, Peter Wang, -Andrea Rossato, Eric Kow, infinity0x, Luke Plant, shreevatsa.public, -Puneeth Chaganti, Paul Rivier, rodja.trappe, Bradley Kuhn, thsutton, -Nathan Gass, Jonathan Daugherty, Jérémy Bobbio, Justin Bogner, qerub, -Christopher Sawicki, Kelsey Hightower, Masayoshi Takahashi, Antoine -Latter, Ralf Stephan, Eric Seidel, B. Scott Michel, Gavin Beatty, -Sergey Astanin, Arlo O'Keeffe, Denis Laxalde, Brent Yorgey, David Lazar, -Jamie F. Olson, Matthew Pickering, Albert Krewinkel, mb21, Jesse -Rosenthal. + +Contributors include +Aaron Wolen, +Albert Krewinkel, +Alexander Kondratskiy, +Alexander Sulfrian, +Alexander V Vershilov, +Alfred Wechselberger, +Andreas Lööw, +Antoine Latter, +Arlo O'Keeffe, +Artyom Kazak, +Ben Gamari, +Beni Cherniavsky-Paskin, +Bjorn Buckwalter, +Bradley Kuhn, +Brent Yorgey, +Bryan O'Sullivan, +B. Scott Michel, +Caleb McDaniel, +Calvin Beck, +Christoffer Ackelman, +Christoffer Sawicki, +Clare Macrae, +Clint Adams, +Conal Elliott, +Craig S. Bosma, +Daniel Bergey, +Daniel T. Staal, +David Lazar, +David Röthlisberger, +Denis Laxalde, +Douglas Calvert, +Douglas F. Calvert, +Eric Kow, +Eric Seidel, +Florian Eitel, +François Gannaz, +Freiric Barral, +Fyodor Sheremetyev, +Gabor Pali, +Gavin Beatty, +Greg Maslov, +Grégory Bataille, +Greg Rundlett, +gwern, +Gwern Branwen, +Hans-Peter Deifel, +Henry de Valence, +Ilya V. Portnov, +infinity0x, +Jaime Marquínez Ferrándiz, +James Aspnes, +Jamie F. Olson, +Jason Ronallo, +Jeff Arnold, +Jeff Runningen, +Jens Petersen, +Jérémy Bobbio, +Jesse Rosenthal, +J. Lewis Muir, +Joe Hillenbrand, +John MacFarlane, +Jonas Smedegaard, +Jonathan Daugherty, +Josef Svenningsson, +Jose Luis Duran, +Julien Cretel, +Justin Bogner, +Kelsey Hightower, +Konstantin Zudov, +Luke Plant, +Mark Szepieniec, +Mark Wright, +Masayoshi Takahashi, +Matej Kollar, +Mathias Schenner, +Matthew Pickering, +Matthias C. M. Troffaes, +Max Bolingbroke, +Max Rydahl Andersen, +mb21, +Merijn Verstraaten, +Michael Snoyman, +Michael Thompson, +MinRK, +Nathan Gass, +Neil Mayhew, +Nick Bart, +Nicolas Kaiser, +Nikolay Yakimov, +Paulo Tanimoto, +Paul Rivier, +Peter Wang, +Philippe Ombredanne, +Phillip Alday, +Puneeth Chaganti, +qerub, +Ralf Stephan, +Recai Oktaş, +rodja.trappe, +RyanGlScott, +Scott Morrison, +Sergei Trofimovich, +Sergey Astanin, +Shahbaz Youssefi, +Shaun Attfield, +shreevatsa.public, +Simon Hengel, +Sumit Sahrawat, +takahashim, +thsutton, +Tim Lin, +Timothy Humphries, +Todd Sifleet, +Tom Leese, +Uli Köhler, +Václav Zeman, +Viktor Kronvall, +Vincent, and +Wikiwide. [markdown]: http://daringfireball.net/projects/markdown/ [reStructuredText]: http://docutils.sourceforge.net/docs/ref/rst/introduction.html @@ -3353,3 +3520,4 @@ Rosenthal. [EPUBspine]: http://www.idpf.org/epub/301/spec/epub-publications.html#sec-spine-elem [KaTeX]: https://github.com/Khan/KaTeX [CommonMark]: http://commonmark.org +[highlighting-kate]: http://github.com/jgm/highlighting-kate @@ -1,3 +1,520 @@ +pandoc (1.14) + + [new features] + + * Added `commonmark` as input and output format. + + * Added `--verbose` flag for debugging output in PDF production (#1840, + #1653). + + * Allow wildcards in `--epub-embed-font` arguments (#1939). + + * Added `--latex-engine-opt` option (#969, #1779, Sumit Sahrawat). + + * Added `shortcut_reference_links` extension (Konstantin Zudov, #1977). + This is enabled by default for those markdown flavors that + support reading shortcut reference links, namely: `markdown`, + `markdown_strict`, `markdown_github`, `markdown_php`. + If the extension is enabled, the reader parses shortcut reference + links like `[foo]`, and the writer creates such links unless doing + so would cause problems. Users of markdown flavors that support + shortcut reference links should not notice a difference in reading + markdown, but the markdown pandoc produces may differ. + If shortcut links are not desired, the extension can be disabled + in the normal way. + + [behavior changes] + + * `--toc` is now supported for `docx` output (#458, Nikolay Yakimov). + A "dirty" TOC is created at the beginning of document. + It can be regenerated after the document has been opened. + + * An implicit `--filter pandoc-citeproc` is now triggered only when the + `--bibliography` option is used, and not when the `bibliography` + field in metadata is specified (#1849). + + * Markdown reader: + + + Reference links with `implicit_header_references` are no longer + case-sensitive (#1606). + + Definition lists no longer require indentation for first line (#2087). + Previously the body of the definition (after the `:` or `~` marker) + needed to be in column 4. This commit relaxes that requirement, + to better match the behavior of PHP Markdown Extra. So, now + this is a valid definition list: + + foo + : bar + + Resolve a potentially ambiguity with table captions: + + foo + + : bar + + ----- + table + ----- + + Is "bar" a definition, or the caption for the table? We'll count + it as a caption for the table. + + Disallow headerless pipe tables (#1996), to conform to GFM and PHP + Markdown Extra. Note: If you have been using headerless pipe tables, + this change may cause existing tables to break. + + Allow pipe tables with header but no body (#2017). + + Allow a digit as first character of a citation key (Matthias Troffaes). + See https://github.com/jgm/pandoc-citeproc/issues/97 + + * LaTeX reader: + + + Don't limit includes to `.tex` extension (#1882). + If the extension is not `.tex`, it must be given explicitly in + the `\input` or `\include`. + + * Docx reader: + + + Allow numbering in the style file. This allows inherited styles + with numbering (lists) (Jesse Rosenthal). + + * Org reader: + + + Support smart punctuation (Craig Bosma). + + * Append newline to the LineBreak in Dokuwiki, HTML, EPUB, + LaTeX, MediaWiki, OpenDocument, Texinfo writers (#1924, Tim Lin). + + * HTML writer: + + + Add "inline" or "display" class to math spans (#1914). + This allows inline and display math to be styled differently. + + Include raw latex blocks if `--mathjax` specified (#1938). + + Require highlighting-kate >= 0.5.14 (#1903). + This ensures that all code blocks will be wrapped in a `div` + with class `sourceCode`. Also, the default highlighting CSS + now adds `div.sourceCode { x-overflow: auto; }`, which means + that code blocks (even with line numbers) will acquire a scroll + bar on screens too small to display them (e.g. mobile phones). + See also jgm/highlighting-kate#65. + + * LaTeX writer: + + + Use a declaration for tight lists (Jose Luis Duran, Joseph + Harriott). Previously, pandoc hard-coded some commands to make + tight lists in LaTeX. Now we use a custom command instead, + allowing the styling to be changed in a macro in the header. + (Note: existing templates may need to be modified to include + the definition of this macro. See the current template.) + + Beamer output: if the header introducing a slide has the + class `fragile`, add the `[fragile]` option to the slide (#2119). + + * MediaWiki writer: + + + Use `File:` instead of the deprecated `Image:` for images and + other media files (Greg Rundlett). + + * DocBook writer: + + + Render a `Div (id,_,_) [Para _]` element as a `para` element + with an `id` attribute. This makes links to citations work in + DocBook with pandoc-citeproc. + + * RST writer: + + + Normalize headings to sequential levels (Nikolay Yakimov). + This is pretty much required by docutils. + + Treat headings in block quotes, etc as rubrics (Nikolay Yakimov). + + Better handling of raw latex inline (#1961). We use + `` :raw-latex:`...` `` and add a definition for this role to + the template. + + * EPUB writer: + + + Remove `linear=no` from cover `itemref` (#1609). + + Don't use `sup` element for epub footnotes (#1995). + Instead, just use an a element with class `footnoteRef`. + This allows more styling options, and provides better results + in some readers (e.g. iBooks, where anything inside the a + tag breaks popup footnotes). + + Take TOC title from `toc-title` metadata field. + + * Docx writer: + + + Implemented `FirstParagraph` style (Jesse Rosenthal). + Following the ODT writer, we add the `FirstParagraph` style to the + first text paragraph following an image, blockquote, table, heading, + or beginning of document. This allows it to be styled differently. + The default is for it to be the same as `Normal`. + + Added `BodyText` style (Jesse Rosenthal). + We apply a `BodyText` style to all unstyled paragraphs. This is, + essentially, the same as `Normal`, except that since not everything + inherits from `BodyText` (the metadata won't, for example, or + the headers or footnote numbers), we can change the text in the body + without having to make exceptions for everything. If we do want to + change *everything*, we can still do it through `Normal`. + + Altered `Blockquote` style slightly (Jesse Rosenthal). + Since `BlockQuote` derives from `BodyText`, we just want to specify + by default that it won't indent, regardless of what `BodyText` does. + Note that this will not produce any visible difference in the default + configuration. + + Take TOC title from `toc-title` metadata field (Nikolay Yakimov). + + Added a style to figure images (Nikolay Yakimov). + Figures with empty captions use style `Figure`. + Figures with nonempty captions use style `Figure with Caption`, which + is based on `Figure`, and additionally has `keepNext` set. + + * ODT writer: + + + Added figure captions (Nikolay Yakimov). The following styles are + used for figures: + `Figure` -- for figure with empty caption), + `FigureWithCaption` (based on `Figure`) -- for figure with caption, + `FigureCaption` (based on `Caption`) -- for figure captions. + Also, `TableCaption` (based on `Caption`) is used for table captions. + + [API changes] + + * New `Text.Pandoc.Error` module with `PandocError` type + (Matthew Pickering). + + * All readers now return `Either PandocError Pandoc` instead of `Pandoc` + (Matthew Pickering). This allows better handling of errors. + + * Added `Text.Pandoc.Writers.CommonMark`, exporting `writeCommonMark`. + + * Added `Text.Pandoc.Readers.CommonMark`, exporting `readCommonMark`. + + * Derive `Data` and `Typeable` instances for `MediaBag`, `Extension`, + `ReaderOptions`, `EPUBVersion`, `CiteMethod`, `ObfuscationMethod`, + `HTMLSlideVariant`, `TrackChanges`, `WriterOptions` (Shabbaz + Youssefi). + + * New `Ext_shortcut_reference_links` constructor for `Extension` + (Konstantin Zudov). + + [bug fixes] + + * Markdown reader: + + + Allow smart `'` after inline math (#1909, Nikolay Yakimov). + + Check for tex macros after indented code (#1973). + + Rewrote `charsInBalancedBrackets` for efficiency. + + Make sure a closing `</div>` doesn't get included in a + definition list item (#2127). + + Don't parse bracketed text as citation if it might be a link, + image, or footnote (Nikolay Yakimov). + + Require space after key in mmd title block (#2026, Nikolay + Yakimov). Require space after key-value delimiter colon in mmd title + block. + + Require nonempty value in mmd title block (Nikolay Yakimov). + + Disable all metadata block extensions when parsing + metadata field values (#2026, Nikolay Yakimov). Otherwise we + could get a mmd title block inside YAML metadata, for example. + + * HTML reader: + + + Improve self-closing tag detection in `htmlInBalanced` (#2146). + + Handle tables with `<th>` in body rows (#1859, mb21). + + Fixed `htmlTag` (#1820). If the tag parses as a comment, we check + to see if the input starts with `<!--`. If not, it's bogus comment + mode and we fail `htmlTag`. + + Handle `base` tag; if it has an `href` value, this is added to + all relative URLs in links and images. + + * DocBook reader: + + + Look inside "info" elements for section titles (#1931). + + * Docx reader: + + + Parse images in deprecated vml format (Jesse Rosenthal). + + Allow sub/superscript verbatims (Jesse Rosenthal). + Verbatim usually shuts off all other run styles, but we don't want it + to shut off sub/superscript. + + * LaTeX reader: + + + Handle `tabular*` environment (#1850). + Note that the table width is not actually parsed or taken into + account, but pandoc no longer chokes on it. + + Ignore options in `\lstinline` rather than raising error (#1997). + + Add some test cases for simple tables (Mathias Schenner). + + Handle valign argument in tables (Mathias Schenner) (currently + we just ignore this). + + Allow non-empty colsep in tables (Mathias Schenner). + The `tabular` environment allows non-empty column separators + with the "@{...}" syntax. Previously, pandoc would fail to + parse tables if a non-empty colsep was present. With this + commit, these separators are still ignored, but the table gets + parsed. A test case is included. + + Recognize `\newpage` as a block command. + + Allow block content in \title{} (#2001). + + Check for block-level newcommand aliases in blockCommand (Nikolay + Yakimov). + + Guard against paragraph starting with inline macro (Nikolay Yakimov). + + Properly gobble spaces after `\\` (#2007). + + * Textile reader: + + + Handle newlines in table cells, and empty cells (#1919). + + * Org reader: + + + Allow image links with non-image targets (Hans-Peter Deifel). + This matches behavior of Org-Mode for links like + `[[http://example.com][https://www.haskell.org/static/img/logo.png]]`. + + * Docbook writer: + + + Don't print empty id attributes (thanks to Steve Horne). + + * HTML writer: + + + Fixed list-style-type for numbered example lists. + Should be "decimal," not "example" (#1902). + + Do not omit missing `alt` attribute on `img` tag (#1131, + Konstantin Zudov). + + Allow multiple colgroups in table (#2122). + + In revealjs, ensure that lists in speaker notes don't add "fragment" + classes, which can cause additional keypresses to be needed to + advance a slide (#1394). + + * LaTeX writer: + + + Don't escape `$` in URL (#1913). + + Don't use listings in headers (Matthew Pickering, #1963). + + Recognize book documentclass if set in metadata (#1971). + This sets `--chapters` implicitly if the documentclass in metadata + is a book documentclass. Previously this was done only if a book + documentclass was set in a variable. + + Add a `\label` in `\hyperdef` for Div, Span (or links don't work). + + * Texinfo writer: + + + Fix wrapping by using breakable spaces (Tim Lin). + + * RST writer: + + + Fixed toc depth in RST writer. Previously the depth was being + rendered as a floating point number with a decimal point. + + * Markdown writer: + + + Improved escaping (#2086). `<` should not be escaped as `\<`, for + compatibility with original Markdown. We now escape `<` and `>` + with entities. Also, we now backslash-escape square brackets. + + Avoid introducing spurious list items through wrapping (#1946). + + Don't emit span tags if plain or raw HTML disabled. + + * MediaWiki writer: + + + Convert spaces to underscores in wikilink URL (#1982), like MediaWiki. + + * AsciiDoc writer: + + + Insert some needed blank lines (#1860). + + Avoid wrapping after list marker (#1858). + + * EPUB writer: + + + Properly handle internal links to IDs in spans, divs (#1884). + + Use plain writer for metadata dc: fields (#2121). + This gives better results when we have, e.g. multiple paragraphs. + Note that tags aren't allowed in these fields. + + Properly handle image links without an extension (#1855). + + * ICML writer: + + + Better handling of raw blocks and inlines (#1951). + Previously these were always escaped and printed verbatim. + Now they are ignored unless the format is `icml`, in which + case they are passed through unescaped. + + * Custom writer: + + + Raise error if loadstring returns an error status. + + Raise `PandocLuaException` instead of using 'error'. + Eventually we'll change the return type so that no exception + is involved, but at least this can be trapped. + + Use UTF-8 aware bytestring conversion. + + Set foreign encoding to UTF-8 (Nikolay Yakimov, #2101, #1634). + Also factored out ByteString, since it's only used as an intermediate + representation. + + * Docx writer: + + + Copy hyphenation settings from reference.docx (Nikolay Yakimov). + + Filter out illegal XML characters (#1992, Matthew Pickering). + + Added `noProof` to docx syntax highlighting `SourceCode` style. + + Added footnotes id -1 and 0 (Jesse Rosenthal). + Word uses, by default, footnotes with id -1 and 0 for separators. If a + user modifies `reference.docx`, they will end up with a `settings.xml` + file that references these footnotes, but no such footnotes in the + document. This will produce a corruption error. Here we add these to the + document and `settings.xml` file, so future modifications won't break + the file. + + Handle lists correctly inside table cells (Jesse Rosenthal). + Previously we didn't transform lists inside table cells. + + Set firstRow information in tables (Nikolay Yakimov). + + Don't replace `SourceCode` style in `reference.docx` if it is defined + there (Nikolay Yakimov, #1872). If `--no-highlight` specified, remove + any `SourceCode` and `*Tok` styles in `reference.docx`. + + Attempt to match international style names (#1607, Nikolay Yakimov). + + Set these styles as custom (Nikolay Yakimov): `Author`, `Abstract`, + `Compact`, `Image Caption`, `Table Caption`, `Definition Term`, + `Definition`, `First Paragraph`. + + Rename these styles to correspond with Word `Normal.dotm` (Nikolay + Yakimov): `Block Quote -> Block Text`, `Link -> Hyperlink`, + `Footnote Ref -> Footnote Reference`. + + Added `Caption` style (Nikolay Yakimov). + + Changed these styles' inheritance (Nikolay Yakimov): + `Image Caption <- Caption`, `Table Caption <- Caption`. + + Remove `SourceCode` style from `reference.docx` (#1872). + This is added automatically by the docx writer. + + Added toc heading style to `reference.docx` (Nikolay Yakimov). + + * `Text.Pandoc.PDF` + + + Don't suggest "Try xelatex" if xelatex already in use (mb21, #1832). + + More comprehensible errors on image conversion (#2067). + EPS can't be supported without shelling out to something like + ImageMagick, but at least we can avoid mysterious error messages. + + * `Text.Pandoc.Shared`: + + + Make safeRead safe (#1801, Matthew Pickering). + + Addded `mapLeft`, `hush` (Matthew Pickering). + + * `Text.Pandoc.Pretty`: + + + Remove partial function (Matthew Pickering). + + * `Text.Pandoc.SelfContained`: + + + Add `;charset=utf-8` to script mime type if missing (#1842). + + Improved building of data URIs (#1940). Now base64 is used except + for `text/*` mime types. + + `cssURLs` no longer tries to fetch fragment URLs (#2121). + + Properly handle data URIs in css urls (#2129). + Use a proper CSS parser (adds dependency on `text-css`). + + * `Text.Pandoc.UTF8`: + + + Better handling of bare CRs in input files (#2132). + Previously we just stripped them out; now we convert + other line ending styles to LF line endings. + + * `Text.Pandoc.ImageSize`: + + + Fixed some exif header parsing bugs (#1834). + + Make imageSize return an Either, not a Maybe (#1834). + Use `runGetOrFail` (with `binary >= 0.7`) to return `Left` on + parse failure (rather than `error`). + + Improved warnings when image size can't be determined. + + Removed error landmines (Matthew Pickering). + + * Added woff2 to MIME types (Alfred Wechselberger). + + * pandoc: When a binary input format is used, warn that file + arguments past the first one are being ignored (Matthew Pickering). + + [template changes] + + * LaTeX template: + + + Define `\tightlist` macro if not defined. + + Use `\providecommand` for `tightlist`. This avoids a conflict + when memoir class is used (thanks to Joseph Harriott). + + Degrade gracefully if `\paragraph` not defined. + + Include `grffile` together with `graphicx` (#2074). + This properly handles filenames containing spaces and dots. + + Redefine `\paragraph`, `\subparagraph`... to behave more + like section headers (#1658). + + * LaTeX, Beamer templates: + + + Use `bibliography` instead of `biblio-files` + (#1661). Also use `\addbibresource` instead of `\bibliography` for + biblatex. + + * EPUB templates: + + + Use `div`, not `p`, for "rights" on title page. + + Added header-includes, include-before, include-after (#1987). + + * OpenDocument template: + + + Use `text:p` instead of `text:h` for title. + Using `text:h` causes problems with numbering. Closes #2059. + Thansk to @nkalvi for diagnosing this. + + * reveal.js template: + + + Link to non-minified css, js. The minified versions no longer + ship with the library. + + Correctly include style CSS (#1949). + + New configurable options options: `center`, `maxScale`, `slideNuber` + (Dmitry Smirnov, pandoc-templates#89). + + Moved custom CSS after theme. This allows custom CSS to modify + themes, instead of being replaced by them. + + [under the hood improvements] + + * Removed pre-built `reference.docx` and `reference.odt` (Nikolay + Yakimov). Instead the repository now includes the component text files, + and the zipped binaries are built from these using a helper + program, `make-reference-files`. This should make maintenance of + these components easier going forward. + + * `Text.Pandoc.Parsing`: + + + Added new `<+?>` combinator (Nikolay Yakimov). + + Added `stateHeaderKeys` to `ParserState`. + + * `make_deb.sh` fixes: + + + Detect architecture. + + Add Installed-Size to debian package control file (#1900). + + Use `fakeroot` to get permissions right. + + Use `mkdir` and `cp` instead of `install`. + + Set permissions of directories to 755. + + Install in `/usr` rather than `/usr/local`. + + Compress man pages. + + Combine copyright files for `pandoc`, `pandoc-citeproc`. + + * Added `Text.Pandoc.Compat.Locale` and `old-locale` flag + to assist with transition to `time` 1.5. + + * Updated CONTRIBUTING.md with information about issue tags (Matthew + Pickering). + + * Updated travis installs to the new sudo-less syntax (Tim Lin). + + * Updated dependency version bounds. + + * EPUB tests: don't use `joinPath`, which varies across platforms. + Instead, use a forward-slash to join paths, regardless of the + platform. This matches the way `MediaBag` now works. + + * Clarify JSON input and output in usage message (Caleb McDaniel). + + * Improved INSTALL instructions. + + * Always build man pages. Removed make-pandoc-man-pages flag. + + * Makefile: removed man target, now that we generate man pages by default. + + * README: + + + Fixed typos (J. Lewis Muir). + + Added documentation on backtick_code_blocks (#2135, Nikolay Yakimov). + + Added note on in-field markup in biblio databases (Nick Bart). + + Fixed misleading example of raw HTML block. + + Various minor formatting and consistency fixes for the program + options (Andreas Lööw). + + Made definition lists for options all "loose" for consistency. + + Added YAML biblio format to table, and note on `pandoc-citeproc`'s + `--bib2json` and `--bib2yaml` options (Nick Bart). + + Removed obsolete reference to `mods2yaml` (Nick Bart). + pandoc (1.13.2.1) * Updated to build with ghc 7.10.1. diff --git a/data/epub.css b/data/epub.css index 93153d62a..1ea24680d 100644 --- a/data/epub.css +++ b/data/epub.css @@ -12,3 +12,4 @@ h2.author { } h3.date { } ol.toc { padding: 0; margin-left: 1em; } ol.toc li { list-style-type: none; margin: 0; padding: 0; } +a.footnoteRef { vertical-align: super; }
\ No newline at end of file diff --git a/data/templates b/data/templates -Subproject 1b789219e50db1ac52d6fe6e471641a880cd7a3 +Subproject da35eb3c4ad33d39df535b2a045a132f85f77ab diff --git a/pandoc.cabal b/pandoc.cabal index df3ddd92c..dc6af6b63 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -263,7 +263,7 @@ Library tagsoup >= 0.13.1 && < 0.14, base64-bytestring >= 0.1 && < 1.1, zlib >= 0.5 && < 0.7, - highlighting-kate >= 0.5.14 && < 0.6, + highlighting-kate >= 0.6 && < 0.7, data-default >= 0.4 && < 0.6, temporary >= 1.1 && < 1.3, blaze-html >= 0.5 && < 0.9, @@ -396,7 +396,7 @@ Executable pandoc text >= 0.11 && < 1.3, bytestring >= 0.9 && < 0.11, extensible-exceptions >= 0.1 && < 0.2, - highlighting-kate >= 0.5.14 && < 0.6, + highlighting-kate >= 0.6 && < 0.7, aeson >= 0.7.0.5 && < 0.9, yaml >= 0.8.8.2 && < 0.9, containers >= 0.1 && < 0.6, @@ -464,7 +464,7 @@ Test-Suite test-pandoc directory >= 1 && < 1.3, filepath >= 1.1 && < 1.5, process >= 1 && < 1.3, - highlighting-kate >= 0.5.14 && < 0.6, + highlighting-kate >= 0.6 && < 0.7, Diff >= 0.2 && < 0.4, test-framework >= 0.3 && < 0.9, test-framework-hunit >= 0.2 && < 0.4, @@ -481,6 +481,7 @@ Test-Suite test-pandoc Tests.Shared Tests.Walk Tests.Readers.LaTeX + Tests.Readers.HTML Tests.Readers.Markdown Tests.Readers.Org Tests.Readers.RST @@ -58,7 +58,7 @@ import qualified Control.Exception as E import Control.Exception.Extensible ( throwIO ) import qualified Text.Pandoc.UTF8 as UTF8 import Control.Monad (when, unless, (>=>)) -import Data.Maybe (isJust, fromMaybe) +import Data.Maybe (fromMaybe) import Data.Foldable (foldrM) import Network.URI (parseURI, isURI, URI(..)) import qualified Data.ByteString.Lazy as B @@ -751,9 +751,6 @@ options = (\arg opt -> return opt{ optMetadata = addMetadata "bibliography" (readMetaValue arg) $ optMetadata opt - , optVariables = - ("biblio-files", dropExtension arg) : - optVariables opt }) "FILE") "" @@ -1120,7 +1117,7 @@ main = do -- --bibliography implies -F pandoc-citeproc for backwards compatibility: - let needsCiteproc = isJust (M.lookup "bibliography" metadata) && + let needsCiteproc = any ("--bibliography" `isPrefixOf`) rawArgs && optCiteMethod opts `notElem` [Natbib, Biblatex] && "pandoc-citeproc" `notElem` map takeBaseName filters let filters' = if needsCiteproc then "pandoc-citeproc" : filters diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs index 0a09d3222..1711c0f36 100644 --- a/src/Text/Pandoc/PDF.hs +++ b/src/Text/Pandoc/PDF.hs @@ -198,6 +198,8 @@ runTeXProgram verbose program args runNumber numRuns tmpDir source = do let env'' = ("TEXINPUTS", texinputs) : [(k,v) | (k,v) <- env', k /= "TEXINPUTS"] when (verbose && runNumber == 1) $ do + putStrLn $ "[makePDF] temp dir:" + putStrLn tmpDir' putStrLn $ "[makePDF] Command line:" putStrLn $ program ++ " " ++ unwords (map show programArgs) putStr "\n" diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index d8beb1810..82e7e2c33 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -903,7 +903,8 @@ data ParserState = ParserState stateAllowLinks :: Bool, -- ^ Allow parsing of links stateMaxNestingLevel :: Int, -- ^ Max # of nested Strong/Emph stateLastStrPos :: Maybe SourcePos, -- ^ Position after last str parsed - stateKeys :: KeyTable, -- ^ List of reference keys (with fallbacks) + stateKeys :: KeyTable, -- ^ List of reference keys + stateHeaderKeys :: KeyTable, -- ^ List of implicit header ref keys stateSubstitutions :: SubstTable, -- ^ List of substitution references stateNotes :: NoteTable, -- ^ List of notes (raw bodies) stateNotes' :: NoteTable', -- ^ List of notes (parsed bodies) @@ -1001,6 +1002,7 @@ defaultParserState = stateMaxNestingLevel = 6, stateLastStrPos = Nothing, stateKeys = M.empty, + stateHeaderKeys = M.empty, stateSubstitutions = M.empty, stateNotes = [], stateNotes' = [], @@ -1206,7 +1208,7 @@ citeKey = try $ do guard =<< notAfterString suppress_author <- option False (char '-' *> return True) char '@' - firstChar <- alphaNum <|> char '_' + firstChar <- alphaNum <|> char '_' <|> char '*' -- @* for wildcard in nocite let regchar = satisfy (\c -> isAlphaNum c || c == '_') let internal p = try $ p <* lookAhead regchar rest <- many $ regchar <|> internal (oneOf ":.#$%&-+?<>~/") diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 5b3c907aa..c766bb4ee 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -51,7 +51,7 @@ import Text.Pandoc.Options (ReaderOptions(readerParseRaw, readerTrace) import Text.Pandoc.Parsing hiding ((<|>)) import Text.Pandoc.Walk import Data.Maybe ( fromMaybe, isJust) -import Data.List ( intercalate, isInfixOf, isPrefixOf ) +import Data.List ( intercalate, isInfixOf, isPrefixOf, isSuffixOf ) import Data.Char ( isDigit ) import Control.Monad ( liftM, guard, when, mzero, void, unless ) import Control.Arrow ((***)) @@ -62,7 +62,7 @@ import Debug.Trace (trace) import Text.TeXMath (readMathML, writeTeX) import Data.Default (Default (..), def) import Control.Monad.Reader (Reader,ask, asks, local, runReader) - +import Network.URI (isURI) import Text.Pandoc.Error import Text.Parsec.Error @@ -74,7 +74,8 @@ readHtml :: ReaderOptions -- ^ Reader options -> Either PandocError Pandoc readHtml opts inp = mapLeft (ParseFailure . getError) . flip runReader def $ - runParserT parseDoc (HTMLState def{ stateOptions = opts } []) "source" tags + runParserT parseDoc (HTMLState def{ stateOptions = opts } [] Nothing) + "source" tags where tags = stripPrefixes . canonicalizeTags $ parseTagsOptions parseOptions{ optTagPosition = True } inp parseDoc = do @@ -98,7 +99,8 @@ replaceNotes' x = return x data HTMLState = HTMLState { parserState :: ParserState, - noteTable :: [(String, Blocks)] + noteTable :: [(String, Blocks)], + baseHref :: Maybe String } data HTMLLocal = HTMLLocal { quoteContext :: QuoteContext @@ -120,7 +122,7 @@ pBody :: TagParser Blocks pBody = pInTags "body" block pHead :: TagParser Blocks -pHead = pInTags "head" $ pTitle <|> pMetaTag <|> (mempty <$ pAnyTag) +pHead = pInTags "head" $ pTitle <|> pMetaTag <|> pBaseTag <|> (mempty <$ pAnyTag) where pTitle = pInTags "title" inline >>= setTitle . trimInlines setTitle t = mempty <$ (updateState $ B.setMeta "title" t) pMetaTag = do @@ -132,6 +134,17 @@ pHead = pInTags "head" $ pTitle <|> pMetaTag <|> (mempty <$ pAnyTag) let content = fromAttrib "content" mt updateState $ B.setMeta name (B.text content) return mempty + pBaseTag = do + bt <- pSatisfy (~== TagOpen "base" []) + let baseH = fromAttrib "href" bt + if null baseH + then return mempty + else do + let baseH' = case reverse baseH of + '/':_ -> baseH + _ -> baseH ++ "/" + updateState $ \st -> st{ baseHref = Just baseH' } + return mempty block :: TagParser Blocks block = do @@ -566,7 +579,11 @@ pAnchor = try $ do pRelLink :: TagParser Inlines pRelLink = try $ do tag <- pSatisfy (tagOpenLit "a" (isJust . lookup "href")) - let url = fromAttrib "href" tag + mbBaseHref <- baseHref <$> getState + let url' = fromAttrib "href" tag + let url = case (isURI url', mbBaseHref) of + (False, Just h) -> h ++ url' + _ -> url' let title = fromAttrib "title" tag let uid = fromAttrib "id" tag let spanC = case uid of @@ -578,7 +595,11 @@ pRelLink = try $ do pImage :: TagParser Inlines pImage = do tag <- pSelfClosing (=="img") (isJust . lookup "src") - let url = fromAttrib "src" tag + mbBaseHref <- baseHref <$> getState + let url' = fromAttrib "src" tag + let url = case (isURI url', mbBaseHref) of + (False, Just h) -> h ++ url' + _ -> url' let title = fromAttrib "title" tag let alt = fromAttrib "alt" tag return $ B.image (escapeURI url) title (B.text alt) @@ -874,7 +895,7 @@ htmlInBalanced :: (Monad m) -> ParserT String st m String htmlInBalanced f = try $ do (TagOpen t _, tag) <- htmlTag f - guard $ '/' `notElem` tag -- not a self-closing tag + guard $ not $ "/>" `isSuffixOf` tag -- not a self-closing tag let stopper = htmlTag (~== TagClose t) let anytag = snd <$> htmlTag (const True) contents <- many $ notFollowedBy' stopper >> @@ -945,7 +966,7 @@ instance HasReaderOptions HTMLState where extractReaderOptions = extractReaderOptions . parserState instance Default HTMLState where - def = HTMLState def [] + def = HTMLState def [] Nothing instance HasMeta HTMLState where setMeta s b st = st {parserState = setMeta s b $ parserState st} diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index cc5521a62..3b5ae0978 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -177,9 +177,10 @@ charsInBalancedBrackets openBrackets = (char '[' >> charsInBalancedBrackets (openBrackets + 1)) <|> (char ']' >> charsInBalancedBrackets (openBrackets - 1)) <|> (( (() <$ code) - <|> (() <$ escapedChar') + <|> (() <$ (escapedChar')) <|> (newline >> notFollowedBy blankline) <|> skipMany1 (noneOf "[]`\n\\") + <|> (() <$ count 1 (oneOf "`\\")) ) >> charsInBalancedBrackets openBrackets) -- @@ -508,9 +509,12 @@ atxHeader = try $ do notFollowedBy $ guardEnabled Ext_fancy_lists >> (char '.' <|> char ')') -- this would be a list skipSpaces - text <- trimInlinesF . mconcat <$> many (notFollowedBy atxClosing >> inline) + (text, raw) <- withRaw $ + trimInlinesF . mconcat <$> many (notFollowedBy atxClosing >> inline) attr <- atxClosing - attr' <- registerHeader attr (runF text defaultParserState) + attr'@(ident,_,_) <- registerHeader attr (runF text defaultParserState) + guardDisabled Ext_implicit_header_references + <|> registerImplicitHeader raw ident return $ B.headerWith attr' level <$> text atxClosing :: MarkdownParser Attr @@ -543,15 +547,24 @@ setextHeader = try $ do -- This lookahead prevents us from wasting time parsing Inlines -- unless necessary -- it gives a significant performance boost. lookAhead $ anyLine >> many1 (oneOf setextHChars) >> blankline - text <- trimInlinesF . mconcat <$> many1 (notFollowedBy setextHeaderEnd >> inline) + (text, raw) <- withRaw $ + trimInlinesF . mconcat <$> many1 (notFollowedBy setextHeaderEnd >> inline) attr <- setextHeaderEnd underlineChar <- oneOf setextHChars many (char underlineChar) blanklines let level = (fromMaybe 0 $ findIndex (== underlineChar) setextHChars) + 1 - attr' <- registerHeader attr (runF text defaultParserState) + attr'@(ident,_,_) <- registerHeader attr (runF text defaultParserState) + guardDisabled Ext_implicit_header_references + <|> registerImplicitHeader raw ident return $ B.headerWith attr' level <$> text +registerImplicitHeader :: String -> String -> MarkdownParser () +registerImplicitHeader raw ident = do + let key = toKey $ "[" ++ raw ++ "]" + updateState (\s -> s { stateHeaderKeys = + M.insert key ('#':ident,"") (stateHeaderKeys s) }) + -- -- hrule block -- @@ -1699,7 +1712,7 @@ referenceLink :: (String -> String -> Inlines -> Inlines) -> (F Inlines, String) -> MarkdownParser (F Inlines) referenceLink constructor (lab, raw) = do sp <- (True <$ lookAhead (char ' ')) <|> return False - (ref,raw') <- option (mempty, "") $ + (_,raw') <- option (mempty, "") $ lookAhead (try (spnl >> normalCite >> return (mempty, ""))) <|> try (spnl >> reference) @@ -1719,13 +1732,13 @@ referenceLink constructor (lab, raw) = do return $ do keys <- asksF stateKeys case M.lookup key keys of - Nothing -> do - headers <- asksF stateHeaders - ref' <- if labIsRef then lab else ref + Nothing -> if implicitHeaderRefs - then case M.lookup ref' headers of - Just ident -> constructor ('#':ident) "" <$> lab - Nothing -> makeFallback + then do + headerKeys <- asksF stateHeaderKeys + case M.lookup key headerKeys of + Just (src, tit) -> constructor src tit <$> lab + Nothing -> makeFallback else makeFallback Just (src,tit) -> constructor src tit <$> lab diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs index 08a83c85e..f3b99e141 100644 --- a/src/Text/Pandoc/Writers/Docbook.hs +++ b/src/Text/Pandoc/Writers/Docbook.hs @@ -154,6 +154,14 @@ listItemToDocbook opts item = -- | Convert a Pandoc block element to Docbook. blockToDocbook :: WriterOptions -> Block -> Doc blockToDocbook _ Null = empty +-- Add ids to paragraphs in divs with ids - this is needed for +-- pandoc-citeproc to get link anchors in bibliographies: +blockToDocbook opts (Div (ident,_,_) [Para lst]) = + let attribs = [("id", ident) | not (null ident)] in + if hasLineBreaks lst + then flush $ nowrap $ inTags False "literallayout" attribs + $ inlinesToDocbook opts lst + else inTags True "para" attribs $ inlinesToDocbook opts lst blockToDocbook opts (Div _ bs) = blocksToDocbook opts $ map plainToPara bs blockToDocbook _ (Header _ _ _) = empty -- should not occur after hierarchicalize blockToDocbook opts (Plain lst) = inlinesToDocbook opts lst diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index 37c285dc2..4ce7857ac 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -31,7 +31,7 @@ Conversion of 'Pandoc' documents to EPUB. module Text.Pandoc.Writers.EPUB ( writeEPUB ) where import Data.IORef ( IORef, newIORef, readIORef, modifyIORef ) import qualified Data.Map as M -import Data.Maybe ( fromMaybe ) +import Data.Maybe ( fromMaybe, catMaybes ) import Data.List ( isPrefixOf, isInfixOf, intercalate ) import System.Environment ( getEnv ) import Text.Printf (printf) @@ -60,14 +60,14 @@ import Text.Pandoc.Walk (walk, walkM) import Data.Default import Text.Pandoc.Writers.Markdown (writePlain) import Control.Monad.State (modify, get, execState, State, put, evalState) -import Control.Monad (foldM, mplus, liftM, when) +import Control.Monad (mplus, liftM, when) import Text.XML.Light ( unode, Element(..), unqual, Attr(..), add_attrs , strContent, lookupAttr, Node(..), QName(..), parseXML , onlyElems, node, ppElement) import Text.Pandoc.UUID (getRandomUUID) import Text.Pandoc.Writers.HTML (writeHtmlString, writeHtml) import Data.Char ( toLower, isDigit, isAlphaNum ) -import Text.Pandoc.MIME (MimeType, getMimeType) +import Text.Pandoc.MIME (MimeType, getMimeType, extensionFromMimeType) import qualified Control.Exception as E import Text.Blaze.Html.Renderer.Utf8 (renderHtml) import Text.HTML.TagSoup (Tag(TagOpen), fromAttrib, parseTags) @@ -378,17 +378,7 @@ writeEPUB opts doc@(Pandoc meta _) = do mediaRef <- newIORef [] Pandoc _ blocks <- walkM (transformInline opts' mediaRef) doc >>= walkM (transformBlock opts' mediaRef) - pics <- readIORef mediaRef - let readPicEntry entries (oldsrc, newsrc) = do - res <- fetchItem' (writerMediaBag opts') - (writerSourceURL opts') oldsrc - case res of - Left _ -> do - warn $ "Could not find media `" ++ oldsrc ++ "', skipping..." - return entries - Right (img,_) -> return $ - (toEntry newsrc epochtime $ B.fromChunks . (:[]) $ img) : entries - picEntries <- foldM readPicEntry [] pics + picEntries <- (catMaybes . map (snd . snd)) <$> readIORef mediaRef -- handle fonts let matchingGlob f = do @@ -425,10 +415,14 @@ writeEPUB opts doc@(Pandoc meta _) = do let blocks'' = replaceRefs reftable blocks' let isChapterHeader (Header n _ _) = n <= chapterHeaderLevel + isChapterHeader (Div ("",["references"],[]) (Header n _ _:_)) = + n <= chapterHeaderLevel isChapterHeader _ = False let toChapters :: [Block] -> State [Int] [Chapter] toChapters [] = return [] + toChapters (Div ("",["references"],[]) bs@(Header 1 _ _:_) : rest) = + toChapters (bs ++ rest) toChapters (Header n attr@(_,classes,_) ils : bs) = do nums <- get mbnum <- if "unnumbered" `elem` classes @@ -794,59 +788,75 @@ metadataElement version md currentTime = showDateTimeISO8601 :: UTCTime -> String showDateTimeISO8601 = formatTime defaultTimeLocale "%FT%TZ" -transformTag :: IORef [(FilePath, FilePath)] -- ^ (oldpath, newpath) media +transformTag :: WriterOptions + -> IORef [(FilePath, (FilePath, Maybe Entry))] -- ^ (oldpath, newpath, entry) media -> Tag String -> IO (Tag String) -transformTag mediaRef tag@(TagOpen name attr) +transformTag opts mediaRef tag@(TagOpen name attr) | name `elem` ["video", "source", "img", "audio"] = do let src = fromAttrib "src" tag let poster = fromAttrib "poster" tag - newsrc <- modifyMediaRef mediaRef src - newposter <- modifyMediaRef mediaRef poster + newsrc <- modifyMediaRef opts mediaRef src + newposter <- modifyMediaRef opts mediaRef poster let attr' = filter (\(x,_) -> x /= "src" && x /= "poster") attr ++ [("src", newsrc) | not (null newsrc)] ++ [("poster", newposter) | not (null newposter)] return $ TagOpen name attr' -transformTag _ tag = return tag - -modifyMediaRef :: IORef [(FilePath, FilePath)] -> FilePath -> IO FilePath -modifyMediaRef _ "" = return "" -modifyMediaRef mediaRef oldsrc = do +transformTag _ _ tag = return tag + +modifyMediaRef :: WriterOptions + -> IORef [(FilePath, (FilePath, Maybe Entry))] + -> FilePath + -> IO FilePath +modifyMediaRef _ _ "" = return "" +modifyMediaRef opts mediaRef oldsrc = do media <- readIORef mediaRef case lookup oldsrc media of - Just n -> return n - Nothing -> do - let new = "media/file" ++ show (length media) ++ - takeExtension (takeWhile (/='?') oldsrc) -- remove query - modifyIORef mediaRef ( (oldsrc, new): ) + Just (n,_) -> return n + Nothing -> do + res <- fetchItem' (writerMediaBag opts) + (writerSourceURL opts) oldsrc + (new, mbEntry) <- + case res of + Left _ -> do + warn $ "Could not find media `" ++ oldsrc ++ "', skipping..." + return (oldsrc, Nothing) + Right (img,mbMime) -> do + let new = "media/file" ++ show (length media) ++ + fromMaybe (takeExtension (takeWhile (/='?') oldsrc)) + (('.':) <$> (mbMime >>= extensionFromMimeType)) + epochtime <- floor `fmap` getPOSIXTime + let entry = toEntry new epochtime $ B.fromChunks . (:[]) $ img + return (new, Just entry) + modifyIORef mediaRef ( (oldsrc, (new, mbEntry)): ) return new transformBlock :: WriterOptions - -> IORef [(FilePath, FilePath)] -- ^ (oldpath, newpath) media + -> IORef [(FilePath, (FilePath, Maybe Entry))] -- ^ (oldpath, newpath, entry) media -> Block -> IO Block -transformBlock _ mediaRef (RawBlock fmt raw) +transformBlock opts mediaRef (RawBlock fmt raw) | fmt == Format "html" = do let tags = parseTags raw - tags' <- mapM (transformTag mediaRef) tags + tags' <- mapM (transformTag opts mediaRef) tags return $ RawBlock fmt (renderTags' tags') transformBlock _ _ b = return b transformInline :: WriterOptions - -> IORef [(FilePath, FilePath)] -- ^ (oldpath, newpath) media + -> IORef [(FilePath, (FilePath, Maybe Entry))] -- ^ (oldpath, newpath) media -> Inline -> IO Inline -transformInline _ mediaRef (Image lab (src,tit)) = do - newsrc <- modifyMediaRef mediaRef src +transformInline opts mediaRef (Image lab (src,tit)) = do + newsrc <- modifyMediaRef opts mediaRef src return $ Image lab (newsrc, tit) transformInline opts _ (x@(Math _ _)) | WebTeX _ <- writerHTMLMathMethod opts = do raw <- makeSelfContained opts $ writeHtmlInline opts x return $ RawInline (Format "html") raw -transformInline _ mediaRef (RawInline fmt raw) +transformInline opts mediaRef (RawInline fmt raw) | fmt == Format "html" = do let tags = parseTags raw - tags' <- mapM (transformTag mediaRef) tags + tags' <- mapM (transformTag opts mediaRef) tags return $ RawInline fmt (renderTags' tags') transformInline _ _ x = return x diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index 3a89b226b..022a0e17f 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -46,7 +46,7 @@ import Numeric ( showHex ) import Data.Char ( ord, toLower ) import Data.List ( isPrefixOf, intersperse ) import Data.String ( fromString ) -import Data.Maybe ( catMaybes, fromMaybe ) +import Data.Maybe ( catMaybes, fromMaybe, isJust ) import Control.Monad.State import Text.Blaze.Html hiding(contents) #if MIN_VERSION_blaze_markup(0,6,3) @@ -825,7 +825,9 @@ inlineToHtml opts inline = writerIdentifierPrefix opts ++ "fn" ++ ref) ! A.class_ "footnoteRef" ! prefixedId opts ("fnref" ++ ref) - $ H.sup + $ (if isJust (writerEpubVersion opts) + then id + else H.sup) $ toHtml ref return $ case writerEpubVersion opts of Just EPUB3 -> link ! customAttribute "epub:type" "noteref" diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index 70280aaec..a785e1edc 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -274,10 +274,11 @@ elementToBeamer slideLevel (Sec lvl _num (ident,classes,kvs) tit elts) let hasCode (Code _ _) = [True] hasCode _ = [] opts <- gets stOptions - let fragile = not $ null $ query hasCodeBlock elts ++ + let fragile = "fragile" `elem` classes || + not (null $ query hasCodeBlock elts ++ if writerListings opts then query hasCode elts - else [] + else []) let allowframebreaks = "allowframebreaks" `elem` classes let optionslist = ["fragile" | fragile] ++ ["allowframebreaks" | allowframebreaks] diff --git a/tests/Tests/Readers/HTML.hs b/tests/Tests/Readers/HTML.hs new file mode 100644 index 000000000..2eb87a2f3 --- /dev/null +++ b/tests/Tests/Readers/HTML.hs @@ -0,0 +1,27 @@ +{-# LANGUAGE OverloadedStrings #-} +module Tests.Readers.HTML (tests) where + +import Text.Pandoc.Definition +import Test.Framework +import Tests.Helpers +import Tests.Arbitrary() +import Text.Pandoc.Builder +import Text.Pandoc +import Text.Pandoc.Error + +html :: String -> Pandoc +html = handleError . readHtml def + +tests :: [Test] +tests = [ testGroup "base tag" + [ test html "simple" $ + "<head><base href=\"http://www.w3schools.com/images\" ></head><body><img src=\"stickman.gif\" alt=\"Stickman\"></head>" =?> + plain (image "http://www.w3schools.com/images/stickman.gif" "" (text "Stickman")) + , test html "slash at end of base" $ + "<head><base href=\"http://www.w3schools.com/images/\" ></head><body><img src=\"stickman.gif\" alt=\"Stickman\"></head>" =?> + plain (image "http://www.w3schools.com/images/stickman.gif" "" (text "Stickman")) + , test html "absolute URL" $ + "<head><base href=\"http://www.w3schools.com/images/\" ></head><body><img src=\"http://example.com/stickman.gif\" alt=\"Stickman\"></head>" =?> + plain (image "http://example.com/stickman.gif" "" (text "Stickman")) + ] + ] diff --git a/tests/lhs-test.html b/tests/lhs-test.html index 362c93c04..5b5f9ccc2 100644 --- a/tests/lhs-test.html +++ b/tests/lhs-test.html @@ -13,18 +13,35 @@ table.sourceCode, tr.sourceCode, td.lineNumbers, td.sourceCode { table.sourceCode { width: 100%; line-height: 100%; } td.lineNumbers { text-align: right; padding-right: 4px; padding-left: 4px; color: #aaaaaa; border-right: 1px solid #aaaaaa; } td.sourceCode { padding-left: 5px; } -code > span.kw { color: #007020; font-weight: bold; } -code > span.dt { color: #902000; } -code > span.dv { color: #40a070; } -code > span.bn { color: #40a070; } -code > span.fl { color: #40a070; } -code > span.ch { color: #4070a0; } -code > span.st { color: #4070a0; } -code > span.co { color: #60a0b0; font-style: italic; } -code > span.ot { color: #007020; } -code > span.al { color: #ff0000; font-weight: bold; } -code > span.fu { color: #06287e; } -code > span.er { color: #ff0000; font-weight: bold; } +code > span.kw { color: #007020; font-weight: bold; } /* Keyword */ +code > span.dt { color: #902000; } /* DataType */ +code > span.dv { color: #40a070; } /* DecVal */ +code > span.bn { color: #40a070; } /* BaseN */ +code > span.fl { color: #40a070; } /* Float */ +code > span.ch { color: #4070a0; } /* Char */ +code > span.st { color: #4070a0; } /* String */ +code > span.co { color: #60a0b0; font-style: italic; } /* Comment */ +code > span.ot { color: #007020; } /* Other */ +code > span.al { color: #ff0000; font-weight: bold; } /* Alert */ +code > span.fu { color: #06287e; } /* Function */ +code > span.er { color: #ff0000; font-weight: bold; } /* Error */ +code > span.wa { color: #60a0b0; font-weight: bold; font-style: italic; } /* Warning */ +code > span.cn { color: #880000; } /* Constant */ +code > span.sc { color: #4070a0; } /* SpecialChar */ +code > span.vs { color: #4070a0; } /* VerbatimString */ +code > span.ss { color: #bb6688; } /* SpecialString */ +code > span.im { } /* Import */ +code > span.va { color: #19177c; } /* Variable */ +code > span.cf { color: #007020; font-weight: bold; } /* ControlFlow */ +code > span.op { color: #666666; } /* Operator */ +code > span.bu { } /* BuiltIn */ +code > span.ex { } /* Extension */ +code > span.pp { color: #bc7a00; } /* Preprocessor */ +code > span.at { color: #7d9029; } /* Attribute */ +code > span.do { color: #ba2121; font-style: italic; } /* Documentation */ +code > span.an { color: #60a0b0; font-weight: bold; font-style: italic; } /* Annotation */ +code > span.cv { color: #60a0b0; font-weight: bold; font-style: italic; } /* CommentVar */ +code > span.in { color: #60a0b0; font-weight: bold; font-style: italic; } /* Information */ </style> </head> <body> diff --git a/tests/lhs-test.html+lhs b/tests/lhs-test.html+lhs index 492d9c718..0124b84d5 100644 --- a/tests/lhs-test.html+lhs +++ b/tests/lhs-test.html+lhs @@ -13,18 +13,35 @@ table.sourceCode, tr.sourceCode, td.lineNumbers, td.sourceCode { table.sourceCode { width: 100%; line-height: 100%; } td.lineNumbers { text-align: right; padding-right: 4px; padding-left: 4px; color: #aaaaaa; border-right: 1px solid #aaaaaa; } td.sourceCode { padding-left: 5px; } -code > span.kw { color: #007020; font-weight: bold; } -code > span.dt { color: #902000; } -code > span.dv { color: #40a070; } -code > span.bn { color: #40a070; } -code > span.fl { color: #40a070; } -code > span.ch { color: #4070a0; } -code > span.st { color: #4070a0; } -code > span.co { color: #60a0b0; font-style: italic; } -code > span.ot { color: #007020; } -code > span.al { color: #ff0000; font-weight: bold; } -code > span.fu { color: #06287e; } -code > span.er { color: #ff0000; font-weight: bold; } +code > span.kw { color: #007020; font-weight: bold; } /* Keyword */ +code > span.dt { color: #902000; } /* DataType */ +code > span.dv { color: #40a070; } /* DecVal */ +code > span.bn { color: #40a070; } /* BaseN */ +code > span.fl { color: #40a070; } /* Float */ +code > span.ch { color: #4070a0; } /* Char */ +code > span.st { color: #4070a0; } /* String */ +code > span.co { color: #60a0b0; font-style: italic; } /* Comment */ +code > span.ot { color: #007020; } /* Other */ +code > span.al { color: #ff0000; font-weight: bold; } /* Alert */ +code > span.fu { color: #06287e; } /* Function */ +code > span.er { color: #ff0000; font-weight: bold; } /* Error */ +code > span.wa { color: #60a0b0; font-weight: bold; font-style: italic; } /* Warning */ +code > span.cn { color: #880000; } /* Constant */ +code > span.sc { color: #4070a0; } /* SpecialChar */ +code > span.vs { color: #4070a0; } /* VerbatimString */ +code > span.ss { color: #bb6688; } /* SpecialString */ +code > span.im { } /* Import */ +code > span.va { color: #19177c; } /* Variable */ +code > span.cf { color: #007020; font-weight: bold; } /* ControlFlow */ +code > span.op { color: #666666; } /* Operator */ +code > span.bu { } /* BuiltIn */ +code > span.ex { } /* Extension */ +code > span.pp { color: #bc7a00; } /* Preprocessor */ +code > span.at { color: #7d9029; } /* Attribute */ +code > span.do { color: #ba2121; font-style: italic; } /* Documentation */ +code > span.an { color: #60a0b0; font-weight: bold; font-style: italic; } /* Annotation */ +code > span.cv { color: #60a0b0; font-weight: bold; font-style: italic; } /* CommentVar */ +code > span.in { color: #60a0b0; font-weight: bold; font-style: italic; } /* Information */ </style> </head> <body> diff --git a/tests/lhs-test.latex b/tests/lhs-test.latex index 0c8c901f1..a210d926a 100644 --- a/tests/lhs-test.latex +++ b/tests/lhs-test.latex @@ -35,13 +35,30 @@ \newcommand{\DecValTok}[1]{\textcolor[rgb]{0.25,0.63,0.44}{{#1}}} \newcommand{\BaseNTok}[1]{\textcolor[rgb]{0.25,0.63,0.44}{{#1}}} \newcommand{\FloatTok}[1]{\textcolor[rgb]{0.25,0.63,0.44}{{#1}}} +\newcommand{\ConstantTok}[1]{\textcolor[rgb]{0.53,0.00,0.00}{{#1}}} \newcommand{\CharTok}[1]{\textcolor[rgb]{0.25,0.44,0.63}{{#1}}} +\newcommand{\SpecialCharTok}[1]{\textcolor[rgb]{0.25,0.44,0.63}{{#1}}} \newcommand{\StringTok}[1]{\textcolor[rgb]{0.25,0.44,0.63}{{#1}}} +\newcommand{\VerbatimStringTok}[1]{\textcolor[rgb]{0.25,0.44,0.63}{{#1}}} +\newcommand{\SpecialStringTok}[1]{\textcolor[rgb]{0.73,0.40,0.53}{{#1}}} +\newcommand{\ImportTok}[1]{{#1}} \newcommand{\CommentTok}[1]{\textcolor[rgb]{0.38,0.63,0.69}{\textit{{#1}}}} +\newcommand{\DocumentationTok}[1]{\textcolor[rgb]{0.73,0.13,0.13}{\textit{{#1}}}} +\newcommand{\AnnotationTok}[1]{\textcolor[rgb]{0.38,0.63,0.69}{\textbf{\textit{{#1}}}}} +\newcommand{\CommentVarTok}[1]{\textcolor[rgb]{0.38,0.63,0.69}{\textbf{\textit{{#1}}}}} \newcommand{\OtherTok}[1]{\textcolor[rgb]{0.00,0.44,0.13}{{#1}}} -\newcommand{\AlertTok}[1]{\textcolor[rgb]{1.00,0.00,0.00}{\textbf{{#1}}}} \newcommand{\FunctionTok}[1]{\textcolor[rgb]{0.02,0.16,0.49}{{#1}}} +\newcommand{\VariableTok}[1]{\textcolor[rgb]{0.10,0.09,0.49}{{#1}}} +\newcommand{\ControlFlowTok}[1]{\textcolor[rgb]{0.00,0.44,0.13}{\textbf{{#1}}}} +\newcommand{\OperatorTok}[1]{\textcolor[rgb]{0.40,0.40,0.40}{{#1}}} +\newcommand{\BuiltInTok}[1]{{#1}} +\newcommand{\ExtensionTok}[1]{{#1}} +\newcommand{\PreprocessorTok}[1]{\textcolor[rgb]{0.74,0.48,0.00}{{#1}}} +\newcommand{\AttributeTok}[1]{\textcolor[rgb]{0.49,0.56,0.16}{{#1}}} \newcommand{\RegionMarkerTok}[1]{{#1}} +\newcommand{\InformationTok}[1]{\textcolor[rgb]{0.38,0.63,0.69}{\textbf{\textit{{#1}}}}} +\newcommand{\WarningTok}[1]{\textcolor[rgb]{0.38,0.63,0.69}{\textbf{\textit{{#1}}}}} +\newcommand{\AlertTok}[1]{\textcolor[rgb]{1.00,0.00,0.00}{\textbf{{#1}}}} \newcommand{\ErrorTok}[1]{\textcolor[rgb]{1.00,0.00,0.00}{\textbf{{#1}}}} \newcommand{\NormalTok}[1]{{#1}} \ifxetex diff --git a/tests/markdown-reader-more.native b/tests/markdown-reader-more.native index 96204898e..b046d44d5 100644 --- a/tests/markdown-reader-more.native +++ b/tests/markdown-reader-more.native @@ -76,6 +76,7 @@ ,Header 3 ("my-other-header",[],[]) [Str "My",Space,Str "other",Space,Str "header"] ,Para [Str "A",Space,Str "link",Space,Str "to",Space,Link [Str "My",Space,Str "header"] ("#my-header-1",""),Str "."] ,Para [Str "Another",Space,Str "link",Space,Str "to",Space,Link [Str "it"] ("#my-header-1",""),Str "."] +,Para [Str "Should",Space,Str "be",Space,Link [Str "case",Space,Str "insensitive"] ("#my-header-1",""),Str "."] ,Para [Str "Link",Space,Str "to",Space,Link [Str "Explicit",Space,Str "header",Space,Str "attributes"] ("#foobar",""),Str "."] ,Para [Str "But",Space,Str "this",Space,Str "is",Space,Str "not",Space,Str "a",Space,Str "link",Space,Str "to",Space,Link [Str "My",Space,Str "other",Space,Str "header"] ("/foo",""),Str ",",Space,Str "since",Space,Str "the",Space,Str "reference",Space,Str "is",Space,Str "defined."] ,Header 2 ("foobar",["baz"],[("key","val")]) [Str "Explicit",Space,Str "header",Space,Str "attributes"] @@ -150,6 +151,8 @@ ,Para [Link [Str "link"] ("/hi(there)","")] ,Para [Link [Str "link"] ("/hithere)","")] ,Para [Link [Str "linky"] ("hi_(there_(nested))","")] +,Header 2 ("backslashes-in-link-references",[],[]) [Str "Backslashes",Space,Str "in",Space,Str "link",Space,Str "references"] +,Para [Link [Str "*",RawInline (Format "tex") "\\a"] ("b","")] ,Header 2 ("reference-link-fallbacks",[],[]) [Str "Reference",Space,Str "link",Space,Str "fallbacks"] ,Para [Str "[",Emph [Str "not",Space,Str "a",Space,Str "link"],Str "]",Space,Str "[",Emph [Str "nope"],Str "]\8230"] ,Header 2 ("reference-link-followed-by-a-citation",[],[]) [Str "Reference",Space,Str "link",Space,Str "followed",Space,Str "by",Space,Str "a",Space,Str "citation"] diff --git a/tests/markdown-reader-more.txt b/tests/markdown-reader-more.txt index 99e9ec7e8..4906a2eea 100644 --- a/tests/markdown-reader-more.txt +++ b/tests/markdown-reader-more.txt @@ -168,6 +168,8 @@ A link to [My header]. Another link to [it][My header]. +Should be [case insensitive][my header]. + Link to [Explicit header attributes]. [my other header]: /foo @@ -258,6 +260,10 @@ Empty cells [linky]: hi_(there_(nested)) +## Backslashes in link references + +[\*\a](b) + ## Reference link fallbacks [*not a link*] [*nope*]... diff --git a/tests/test-pandoc.hs b/tests/test-pandoc.hs index 805bad414..9bc26416f 100644 --- a/tests/test-pandoc.hs +++ b/tests/test-pandoc.hs @@ -8,6 +8,7 @@ import qualified Tests.Old import qualified Tests.Readers.LaTeX import qualified Tests.Readers.Markdown import qualified Tests.Readers.Org +import qualified Tests.Readers.HTML import qualified Tests.Readers.RST import qualified Tests.Readers.Docx import qualified Tests.Readers.Txt2Tags @@ -46,6 +47,7 @@ tests = [ testGroup "Old" Tests.Old.tests , testGroup "Readers" [ testGroup "LaTeX" Tests.Readers.LaTeX.tests , testGroup "Markdown" Tests.Readers.Markdown.tests + , testGroup "HTML" Tests.Readers.HTML.tests , testGroup "Org" Tests.Readers.Org.tests , testGroup "RST" Tests.Readers.RST.tests , testGroup "Docx" Tests.Readers.Docx.tests |