diff options
68 files changed, 2486 insertions, 665 deletions
diff --git a/.travis.yml b/.travis.yml index 2901288e9..9c031216f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,87 +1,125 @@ -# This file has been generated -- see https://github.com/hvr/multi-ghc-travis -language: c +# Copy these contents into the root directory of your Github project in a file +# named .travis.yml + +# Use new container infrastructure to enable caching sudo: false +# Choose a lightweight base image; we provide our own build tools. +language: c + +# Caching so the next build will be fast too. cache: directories: - - $HOME/.cabsnap - - $HOME/.cabal/packages - -before_cache: - - rm -fv $HOME/.cabal/packages/hackage.haskell.org/build-reports.log - - rm -fv $HOME/.cabal/packages/hackage.haskell.org/00-index.tar + - $HOME/.ghc + - $HOME/.cabal + - $HOME/.stack +# The different configurations we want to test. We have BUILD=cabal which uses +# cabal-install, and BUILD=stack which uses Stack. More documentation on each +# of those below. +# +# We set the compiler values here to tell Travis to use a different +# cache file per set of arguments. +# +# If you need to have different apt packages for each combination in the +# matrix, you can use a line such as: +# addons: {apt: {packages: [libfcgi-dev,libgmp-dev]}} matrix: include: - - env: CABALVER=1.16 GHCVER=7.4.2 GHCOPTS=-Werror - compiler: ": #GHC 7.4.2" - addons: {apt: {packages: [cabal-install-1.16,ghc-7.4.2], sources: [hvr-ghc]}} - - env: CABALVER=1.16 GHCVER=7.6.3 GHCOPTS=-Werror - compiler: ": #GHC 7.6.3" - addons: {apt: {packages: [cabal-install-1.16,ghc-7.6.3], sources: [hvr-ghc]}} - - env: CABALVER=1.18 GHCVER=7.8.4 GHCOPTS=-Werror - compiler: ": #GHC 7.8.4" - addons: {apt: {packages: [cabal-install-1.18,ghc-7.8.4], sources: [hvr-ghc]}} - - env: CABALVER=1.22 GHCVER=7.10.2 GHCOPTS=-Werror - compiler: ": #GHC 7.10.2" - addons: {apt: {packages: [cabal-install-1.22,ghc-7.10.2], sources: [hvr-ghc]}} + # We grab the appropriate GHC and cabal-install versions from hvr's PPA. See: + # https://github.com/hvr/multi-ghc-travis + - env: BUILD=cabal GHCVER=7.4.2 CABALVER=1.16 + compiler: ": #GHC 7.4.2" + addons: {apt: {packages: [cabal-install-1.16,ghc-7.4.2], sources: [hvr-ghc]}} + - env: BUILD=cabal GHCVER=7.6.3 CABALVER=1.16 + compiler: ": #GHC 7.6.3" + addons: {apt: {packages: [cabal-install-1.16,ghc-7.6.3], sources: [hvr-ghc]}} + - env: BUILD=cabal GHCVER=7.8.4 CABALVER=1.18 + compiler: ": #GHC 7.8.4" + addons: {apt: {packages: [cabal-install-1.18,ghc-7.8.4], sources: [hvr-ghc]}} + - env: BUILD=cabal GHCVER=7.10.3 CABALVER=1.22 + compiler: ": #GHC 7.10.3" + addons: {apt: {packages: [cabal-install-1.22,ghc-7.10.3], sources: [hvr-ghc]}} -before_install: - - unset CC - - export PATH=/opt/ghc/$GHCVER/bin:/opt/cabal/$CABALVER/bin:$PATH + # Build with the newest GHC and cabal-install. This is an accepted failure, + # see below. + - env: BUILD=cabal GHCVER=head CABALVER=head + compiler: ": #GHC HEAD" + addons: {apt: {packages: [cabal-install-head,ghc-head], sources: [hvr-ghc]}} -install: - - cabal --version - - echo "$(ghc --version) [$(ghc --print-project-git-commit-id 2> /dev/null || echo '?')]" - - if [ -f $HOME/.cabal/packages/hackage.haskell.org/00-index.tar.gz ]; - then - zcat $HOME/.cabal/packages/hackage.haskell.org/00-index.tar.gz > - $HOME/.cabal/packages/hackage.haskell.org/00-index.tar; - fi - - travis_retry cabal update -v - - sed -i 's/^jobs:/-- jobs:/' ${HOME}/.cabal/config - - cabal install --only-dependencies --enable-tests --enable-benchmarks --dry -v > installplan.txt - - sed -i -e '1,/^Resolving /d' installplan.txt; cat installplan.txt + # The Stack builds. We can pass in arbitrary Stack arguments via the ARGS + # variable, such as using --stack-yaml to point to a different file. + - env: BUILD=stack ARGS="--resolver lts-5" + compiler: ": #stack 7.10.3" + addons: {apt: {packages: [ghc-7.10.3], sources: [hvr-ghc]}} -# check whether current requested install-plan matches cached package-db snapshot - - if diff -u installplan.txt $HOME/.cabsnap/installplan.txt; - then - echo "cabal build-cache HIT"; - rm -rfv .ghc; - cp -a $HOME/.cabsnap/ghc $HOME/.ghc; - cp -a $HOME/.cabsnap/lib $HOME/.cabsnap/share $HOME/.cabsnap/bin $HOME/.cabal/; - else - echo "cabal build-cache MISS"; - rm -rf $HOME/.cabsnap; - mkdir -p $HOME/.ghc $HOME/.cabal/lib $HOME/.cabal/share $HOME/.cabal/bin; - cabal install --only-dependencies --enable-tests --enable-benchmarks; - fi - -# snapshot package-db on cache miss - - if [ ! -d $HOME/.cabsnap ]; - then - echo "snapshotting package-db to build-cache"; - mkdir $HOME/.cabsnap; - cp -a $HOME/.ghc $HOME/.cabsnap/ghc; - cp -a $HOME/.cabal/lib $HOME/.cabal/share $HOME/.cabal/bin installplan.txt $HOME/.cabsnap/; - fi + # Nightly builds are allowed to fail + - env: BUILD=stack ARGS="--resolver nightly" + compiler: ": #stack nightly" + addons: {apt: {packages: [libgmp-dev]}} -# Here starts the actual work to be performed for the package under test; -# any command which exits with a non-zero exit code causes the build to fail. -script: - - if [ -f configure.ac ]; then autoreconf -i; fi - - cabal configure --enable-tests --enable-benchmarks -v2 # -v2 provides useful information for debugging - - cabal build --ghc-options=$GHCOPTS # this builds all libraries and executables (including tests/benchmarks) - - cabal test - - cabal check -# Test that a source-distribution can be generated -# (with cabal >= 1.18 'cabal sdist' would work too): - - ./dist/setup/setup sdist + - env: BUILD=stack ARGS="--resolver lts-5" + compiler: ": #stack 7.10.3 osx" + os: osx -# Check that the resulting source distribution can be built & installed. -# If there are no other `.tar.gz` files in `dist`, this can be even simpler: -# `cabal install --force-reinstalls dist/*-*.tar.gz` - - SRC_TGZ=$(cabal info . | awk '{print $2;exit}').tar.gz && - (cd dist && cabal install --force-reinstalls "$SRC_TGZ") + - env: BUILD=stack ARGS="--resolver nightly" + compiler: ": #stack nightly osx" + os: osx -# EOF + allow_failures: + - env: BUILD=cabal GHCVER=head CABALVER=head + - env: BUILD=stack ARGS="--resolver nightly" + +before_install: +# Using compiler above sets CC to an invalid value, so unset it +- unset CC + +# We want to always allow newer versions of packages when building on GHC HEAD +- CABALARGS="" +- if [ "x$GHCVER" = "xhead" ]; then CABALARGS=--allow-newer; fi + +# Download and unpack the stack executable +- export PATH=/opt/ghc/$GHCVER/bin:/opt/cabal/$CABALVER/bin:$HOME/.local/bin:$PATH +- mkdir -p ~/.local/bin +- | + if [ `uname` = "Darwin" ] + then + curl --insecure -L https://www.stackage.org/stack/osx-x86_64 | tar xz --strip-components=1 --include '*/stack' -C ~/.local/bin + else + curl -L https://www.stackage.org/stack/linux-x86_64 | tar xz --wildcards --strip-components=1 -C ~/.local/bin '*/stack' + fi + +install: +- echo "$(ghc --version) [$(ghc --print-project-git-commit-id 2> /dev/null || echo '?')]" +- if [ -f configure.ac ]; then autoreconf -i; fi +- | + case "$BUILD" in + stack) + stack --no-terminal --install-ghc $ARGS test --only-dependencies + ;; + cabal) + cabal --version + travis_retry cabal update + cabal install --only-dependencies --enable-tests --enable-benchmarks --force-reinstalls --ghc-options=-O0 --reorder-goals --max-backjumps=-1 $CABALARGS + ;; + esac + +script: +- | + case "$BUILD" in + stack) + stack --no-terminal $ARGS test --haddock --no-haddock-deps + ;; + cabal) + cabal configure --enable-tests --enable-benchmarks -v2 --ghc-options="-O0 -Werror" + cabal build + cabal check || [ "$CABALVER" == "1.16" ] + cabal test + cabal copy + # cabal sdist fails on cabal 1.16: + cabal sdist || [ "$CABALVER" == "1.16" ] + SRC_TGZ=$(cabal info . | awk '{print $2;exit}').tar.gz && \ + (cd dist && cabal install --force-reinstalls "$SRC_TGZ" || \ + [ "$CABALVER" == "1.16" ]) + ;; + esac diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index eff033057..f39acab10 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -145,21 +145,40 @@ Tests Tests can be run as follows: + cabal install --only-dependencies --enable-tests cabal configure --enable-tests cabal build cabal test +or, if you're using [stack], + + stack init + stack test + The test program is `tests/test-pandoc.hs`. -Benchmarks can be enabled by passing the `--enable-benchmarks` flag -to `cabal configure`, and run using `cabal bench`. +Benchmarks +---------- + +To run benchmarks with cabal: + + cabal configure --enable-benchmarks + cabal build + cabal bench + +With stack: + + stack bench Using the REPL -------------- With a recent version of cabal, you can do `cabal repl` and get -a ghci REPL for working with pandoc. We recommend using the following -`.ghci` file (which can be placed in the source directory): +a ghci REPL for working with pandoc. With [stack], use +`cabal ghci`. + +We recommend using the following `.ghci` file (which can be +placed in the source directory): ``` :set -fobject-code @@ -266,3 +285,5 @@ The library is structured as follows: [inprogress]: https://github.com/jgm/pandoc/labels/inprogress [more discussion needed]: https://github.com/jgm/pandoc/labels/More%20discussion%20needed [more info needed]: https://github.com/jgm/pandoc/labels/More%20info%20needed +[stack]: https://github.com/commercialhaskell/stack + @@ -1,5 +1,5 @@ Pandoc -Copyright (C) 2006-2015 John MacFarlane <jgm at berkeley dot edu> +Copyright (C) 2006-2016 John MacFarlane <jgm at berkeley dot edu> This code is released under the [GPL], version 2 or later: @@ -11,7 +11,7 @@ https://github.com/jgm/pandoc/wiki/Installing-the-development-version-of-pandoc Quick install with stack ------------------------ -1. Install [stack](https://github.com/commercialhaskell/stack/wiki/Downloads). +1. Install [stack](http://docs.haskellstack.org/en/stable/install_and_upgrade.html). 2. If you used git to get the pandoc source (as opposed to unpacking a release tarball), do @@ -180,6 +180,10 @@ can be run from any directory and is completely self-contained. You can find the pandoc executable in `dist/build/pandoc`. Copy this wherever you please. +Or alternatively with `stack`: + + stack install pandoc --flag pandoc:embed_data_files + [zip-archive]: http://hackage.haskell.org/package/zip-archive [highlighting-kate]: http://hackage.haskell.org/package/highlighting-kate [blaze-html]: http://hackage.haskell.org/package/blaze-html @@ -1,6 +1,6 @@ % Pandoc User's Guide % John MacFarlane -% November 12, 2015 +% January 12, 2016 Synopsis ======== @@ -22,7 +22,7 @@ markup], [Haddock markup], [OPML], [Emacs Org mode], [DocBook], [OpenDocument], [ODT], [Word docx], [GNU Texinfo], [MediaWiki markup], [DokuWiki markup], [Haddock markup], [EPUB] (v2 or v3), [FictionBook2], [Textile], [groff man] pages, [Emacs Org mode], -[AsciiDoc], [InDesign ICML], and [Slidy], [Slideous], [DZSlides], +[AsciiDoc], [InDesign ICML], [TEI XML], and [Slidy], [Slideous], [DZSlides], [reveal.js] or [S5] HTML slide shows. It can also produce [PDF] output on systems where LaTeX, ConTeXt, or `wkhtmltopdf` is installed. @@ -89,6 +89,7 @@ Markdown can be expected to be lossy. [reveal.js]: http://lab.hakim.se/reveal-js/ [FictionBook2]: http://www.fictionbook.org/index.php/Eng:XML_Schema_Fictionbook_2.1 [InDesign ICML]: https://www.adobe.com/content/dam/Adobe/en/devnet/indesign/cs55-docs/IDML/idml-specification.pdf +[TEI Simple]: https://github.com/TEIC/TEI-Simple Using `pandoc` -------------- @@ -277,11 +278,11 @@ General options `docx` (Word docx), `haddock` (Haddock markup), `rtf` (rich text format), `epub` (EPUB v2 book), `epub3` (EPUB v3), `fb2` (FictionBook2 e-book), `asciidoc` (AsciiDoc), `icml` (InDesign - ICML), `slidy` (Slidy HTML and javascript slide show), `slideous` - (Slideous HTML and javascript slide show), `dzslides` (DZSlides - HTML5 + javascript slide show), `revealjs` (reveal.js HTML5 + - javascript slide show), `s5` (S5 HTML and javascript slide show), - or the path of a custom lua writer (see [Custom + ICML), `tei` (TEI Simple), `slidy` (Slidy HTML and javascript slide + show), `slideous` (Slideous HTML and javascript slide show), + `dzslides` (DZSlides HTML5 + javascript slide show), `revealjs` + (reveal.js HTML5 + javascript slide show), `s5` (S5 HTML and javascript + slide show), or the path of a custom lua writer (see [Custom writers], below). Note that `odt`, `epub`, and `epub3` output will not be directed to *stdout*; an output filename must be specified using the `-o/--output` option. If @@ -387,6 +388,14 @@ Reader options require different kinds of images. Currently this option only affects the Markdown and LaTeX readers. +`--file-scope` + +: Parse each file individually before combining for multifile + documents. This will allow footnotes in different files with the + same identifiers to work as expected. If this option is set, + footnotes and links will not work across files. Reading binary + files (docx, odt, epub) implies `--file-scope`. + `--filter=`*EXECUTABLE* : Specify an executable to be used as a filter transforming the @@ -471,7 +480,7 @@ General writer options `-s`, `--standalone` : Produce output with an appropriate header and footer (e.g. a - standalone HTML, LaTeX, or RTF file, not a fragment). This option + standalone HTML, LaTeX, TEI, or RTF file, not a fragment). This option is set automatically for `pdf`, `epub`, `epub3`, `fb2`, `docx`, and `odt` output. @@ -536,9 +545,9 @@ General writer options `--toc`, `--table-of-contents` : Include an automatically generated table of contents (or, in - the case of `latex`, `context`, and `rst`, an instruction to create + the case of `latex`, `context`, `docx`, and `rst`, an instruction to create one) in the output document. This option has no effect on `man`, - `docbook`, `slidy`, `slideous`, `s5`, `docx`, or `odt` output. + `docbook`, `slidy`, `slideous`, `s5`, or `odt` output. `--toc-depth=`*NUMBER* @@ -579,7 +588,7 @@ General writer options : Include contents of *FILE*, verbatim, at the end of the document body (before the `</body>` tag in HTML, or the - `\end{document}` command in LaTeX). This option can be be used + `\end{document}` command in LaTeX). This option can be used repeatedly to include multiple files. They will be included in the order specified. Implies `--standalone`. @@ -627,9 +636,10 @@ Options affecting specific writers `--chapters` : Treat top-level headers as chapters in LaTeX, ConTeXt, and DocBook - output. When the LaTeX document class is set to `report`, `book`, or - `memoir`, this option is implied. If `beamer` is the output - format, top-level headers will become `\part{..}`. + output. When the LaTeX document class is set to `report`, `book`, + or `memoir` (unless the `article` option is specified), this + option is implied. If `beamer` is the output format, top-level + headers will become `\part{..}`. `-N`, `--number-sections` @@ -715,7 +725,7 @@ Options affecting specific writers `-c` *URL*, `--css=`*URL* -: Link to a CSS style sheet. This option can be be used repeatedly to +: Link to a CSS style sheet. This option can be used repeatedly to include multiple files. They will be included in the order specified. `--reference-odt=`*FILE* @@ -1031,6 +1041,10 @@ as the following: `\subtitle`, such as `beamer` or the [KOMA-Script] series (`scrartcl`, `scrreprt`, `scrbook`).[^subtitle] +`institute` +: author affiliations (in LaTeX and Beamer only). Can be a + list, when there are multiple authors. + `abstract` : document summary, included in LaTeX, ConTeXt, AsciiDoc, and Word docx @@ -1090,7 +1104,7 @@ Language variables in the YAML metadata, according to [BCP 47]. For example: `otherlangs: [en-GB, fr]`. This is automatically generated from the `lang` attributes - in all `span`s and `div`s but can be overriden. + in all `span`s and `div`s but can be overridden. Currently only used by LaTeX through the generated `babel-otherlangs` and `polyglossia-otherlangs` variables. The LaTeX writer outputs polyglossia commands in the text but @@ -3377,14 +3391,13 @@ variants are supported: `shortcut_reference_links`. `markdown_github` (GitHub-Flavored Markdown) -: `pipe_tables`, `raw_html`, `tex_math_single_backslash`, - `fenced_code_blocks`, `auto_identifiers`, +: `pipe_tables`, `raw_html`, `fenced_code_blocks`, `auto_identifiers`, `ascii_identifiers`, `backtick_code_blocks`, `autolink_bare_uris`, `intraword_underscores`, `strikeout`, `hard_line_breaks`, `emoji`, `shortcut_reference_links`. `markdown_mmd` (MultiMarkdown) -: `pipe_tables` `raw_html`, `markdown_attribute`, `mmd_link_attributes`, +: `pipe_tables`, `raw_html`, `markdown_attribute`, `mmd_link_attributes`, `raw_tex`, `tex_math_double_backslash`, `intraword_underscores`, `mmd_title_block`, `footnotes`, `definition_lists`, `all_symbols_escapable`, `implicit_header_references`, diff --git a/RELEASE-CHECKLIST b/RELEASE-CHECKLIST index 7ae936d96..2c68a8e7b 100644 --- a/RELEASE-CHECKLIST +++ b/RELEASE-CHECKLIST @@ -20,6 +20,9 @@ _ Generate Ubuntu/Debian deb package (cd deb; make package). _ Upload to HackageDB +_ if docs don't build on Hackage: + 'cabal install neil && neil docs --username=MYUSERNAME' + _ Update website, including short description of changes ('make changes') _ Announce on pandoc-announce, pandoc-discuss diff --git a/appveyor.yml b/appveyor.yml index 00a1aab34..f2fe828fa 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -1,38 +1,19 @@ -install: -- cmd: 'git submodule update --init' -- ps: | - choco install haskellplatform -version 2014.2.0.0 -y - # Haskell Platfrom package doesn't update PATH for the current shell instance +cache: +- "c:\\sr" # stack root, short paths == fewer problems - $env:Path += ";${env:ProgramFiles}\Haskell Platform\2014.2.0.0\bin" - $env:Path += ";${env:ProgramFiles}\Haskell Platform\2014.2.0.0\lib\extralibs\bin" - $env:Path += ";${env:ProgramFiles}\Haskell Platform\2014.2.0.0\mingw\bin" - # choco install wixtoolset - cabal sandbox init - $env:Path += ";.\.cabal-sandbox\bin" - cabal update - cabal install --force hsb2hs +build: off -build_script: -- cmd: | - cabal install --force --enable-tests -fembed_data_files +before_test: +- curl -ostack.zip -L --insecure http://www.stackage.org/stack/windows-i386 +- 7z x stack.zip stack.exe -# after_build: -# - cmd: | -# cabal install -fembed_data_files pandoc-citeproc -# strip .\.cabal-sandbox\bin\pandoc.exe -# strip .\.cabal-sandbox\bin\pandoc-citeproc.exe -# .\.cabal-sandbox\bin\pandoc.exe -s --template data\templates\default.html -S README -o README.html -# .\.cabal-sandbox\bin\pandoc.exe -s --template data\templates\default.rtf COPYING -t rtf -S -o COPYING.rtf -# copy COPYRIGHT COPYRIGHT.txt -# for /f "tokens=2 delims= " %%a in ('.\.cabal-sandbox\bin\pandoc --version') do ( set "VERSION=%%a" && exit ) -# if "%VERSION%" == "" ( echo "Error: could not determine version number." && exit /b 1 ) -# cd windows -# echo Creating msi... -# candle -dVERSION=%VERSION% pandoc.wxs -# if %errorlevel% neq 0 exit /b %errorlevel% -# light -sw1076 -ext WixUIExtension -ext WixUtilExtension -out pandoc-%VERSION%-windows.msi pandoc.wixobj +clone_folder: "c:\\stack" +environment: + global: + STACK_ROOT: "c:\\sr" test_script: -- cmd: | - cabal test +- stack setup > nul +# The ugly echo "" hack is to avoid complaints about 0 being an invalid file +# descriptor +- echo "" | stack --no-terminal test @@ -1,3 +1,157 @@ +pandoc (1.17) + + * Added `--file-scope` option (Jesse Rosenthal). + Traditionally pandoc operates on multiple files by first concatenating + them (around extra line breaks) and then processing the joined file. So + it only parses a multi-file document at the document scope. This has the + benefit that footnotes and links can be in different files, but it also + introduces some difficulties: (a) it is difficult to join files with + footnotes without some sort of preprocessing, which makes it difficult + to write academic documents in small pieces; (b) it makes it impossible + to process multiple binary input files, which can't be catted; (c) it + makes it impossible to process files from different input formats. + The `--file-scope` option causes pandoc to parse the files first, + and then combine the parsed output, instead of combining before + parsing. This makes it impossible to have links across multiple files, + and auto-identified headers won't work correctly if headers in multiple + files have the same name. On the other hand, footnotes across multiple + files will work correctly and will allow more freedom for input formats. + `--file-scope` is selected automatically for binary input files (which + cannot be concatenated anyway) and for pandoc json. + + * Add TEI Writer (csforste) and `tei` output format. + + * Added a general `ByteStringReader` with warnings, used by the docx + reader (API change, Jesse Rosenthal). + + * Add `readDocxWithWarnings` (API change, Jesse Rosenthal). + + * Changed type of Shared.uniqueIdent argument from [String] to Set String. + This avoids performance problems in documents with many identically + named headers (API change, #2671). + + * Removed `tex_math_single_backslash` from `markdown_github` options + (#2707). + + * Make language extensions as well as full language names + trigger syntax highlighting. For example, `py` will now work as + well as `python` (jgm/highlighting-kate#83). + + * Docx reader (Jesse Rosenthal): + + + Handle alternate content. Some word functions (especially graphics) + give various choices for content so there can be backwards compatibility. + + Don't turn numbered headers into lists. + + Docx Reader: Add state to the parser, for warnings + + Update feature checklist in source code. + + Get rid of `Modifiable` typeclass. + + Add tests for adjacent hyperlinks. + + Add a "Link" modifier to `Reducible`. We want to make sure that + links have their spaces removed, and are appropriately smushed + together (#2689). + + * HTML reader: Fixed behavior of base tag (#2777). + + + If the base path does not end with slash, the last component + will be replaced. E.g. base = `http://example.com/foo` + combines with `bar.html` to give `http://example.com/bar.html`. + + If the href begins with a slash, the whole path of the base + is replaced. E.g. base = `http://example.com/foo/` combines + with `/bar.html` to give `http://example.com/bar.html`. + + Rewrote `htmlInBalanced`. This version avoids an exponential + performance problem with `<script>` tags, and it should be faster + in general (#2730). + + Properly handle an empty cell in a simple table (#2718). + + Handle multiple `<meta>` tags with same name. Put them in a list + in the metadata so they are all preserved, rather than (as before) + throwing out all but one.. + + * Markdown reader: + + + Improved pipe table parsing (#2765). + + Allow `+` separators in pipe table cells. We already allowed + them in the header, but not in the body rows, for some reason. + This gives compatibility with org-mode tables. + + Don't cross line boundary parsing pipe table row. + Previously an Emph element could be parsed across the newline + at the end of the pipe table row. + + Use `htmlInBalanced` for `rawVerbatimBlock`, for better + performance (#2730). + + Fixed bug with smart quotes around tex math. + + * LaTeX reader: + + + Handle interior `$` characters in math (#2743). For example, + `$$\hbox{$i$}$$`. + + `inlineCommand` now gobbles an empty `{}` after any command (#2687). + This gives better results when people write e.g. `\TeX{}` in Markdown. + + Properly handle LaTeX "math" environment as inline math (#2171). + + * Textile reader: Support `>`, `<`, `=`, `<>` text alignment attributes. + Closes #2674. + + * Org reader (Albert Krewinkel): + + + Prefix even empty figure names with "fig:" (#2643). The + convention used by pandoc for figures is to mark them by prefixing + the name with `fig:`. The org reader failed to do this if a figure + had no name. + + Refactor link-target processing (#2684). + + * ConTeXt writer: Fix whitespace at line beginning in line blocks (#2744). + Thanks to @c-foster. + + * HTML writer: Don't include alignment attribute for default table columns. + Previously these were given "left" alignment. Better to leave off + alignment attributes altogether (#2694). + + * Markdown writer: Use hyphens for YAML metadata block bottom line, for + better compatibility with other Markdown flavors (Henrik Tramberend). + + * LaTeX writer: + + + Avoid double toprule in headerless table with caption (#2742). + + Clean up options parser (Jesse Rosenthal). + + Treat `memoir` template with `article` option as article, instead + of treating all `memoir` templates as books. + + Allow more flexible table alignment (Henrik Tramberend, #2665). + New default is not to include `[c]` option (which is the default + anyway if no positioning is specified). Now LaTeX emplates can + control the overall table alignment in a document by setting the + longtable length variables `LTleft` and `LTright`. For example, + + \setlength\LTleft\parindent + \setlength\LTright\fill + + will create left-aligned tables that respect paragraph indentation. + + * Docx writer: Handle image alt text (#2754, Mauro Bieg). + + * Org writer - pass through RawInline with format "org". + + * DokuWiki writer: use `$$` for display math. + + * Custom writer: Pass attributes parameter to CaptionedImage (#2697). + + * Make protocol-relative URIs work again (#2737). + + * make_osx_package.sh: Use env variable for developer id certs. + + * Raise `tagsoup` lower bound to 0.13.7 to fix entity-related + problems (#2734). + + * Allow `zip-archive` 0.3. + + * Allow `aeson` 0.11. + +pandoc (1.16.0.2) + + * Depend on deepseq rather than deepseq-generics (fpco/stackage#1096). + + * Fixed regression in latex smart quote parsing (#2645). + In cases where a match was not found for a quote, everything + from the open quote to the end of the paragraph was being dropped. + pandoc (1.16.0.1) * Fixed regression with `--latex-engine` (#2618). In 1.16 `--latex-engine` diff --git a/data/sample.lua b/data/sample.lua index 0cb492392..574f378cc 100644 --- a/data/sample.lua +++ b/data/sample.lua @@ -255,7 +255,7 @@ function html_align(align) end end -function CaptionedImage(src, tit, caption) +function CaptionedImage(src, tit, caption, attr) return '<div class="figure">\n<img src="' .. escape(src,true) .. '" title="' .. escape(tit,true) .. '"/>\n' .. '<p class="caption">' .. caption .. '</p>\n</div>' diff --git a/data/templates b/data/templates -Subproject 8660958b2add3dba83f84d3f8845a029b672bbe +Subproject d39b2207f98e8a6c6f91b0498c183069a0aa7c9 diff --git a/deb/stack.yaml b/deb/stack.yaml index 3f84be738..9986ead40 100644 --- a/deb/stack.yaml +++ b/deb/stack.yaml @@ -15,8 +15,4 @@ packages: - '..' - 'https://hackage.haskell.org/package/pandoc-citeproc-0.9/pandoc-citeproc-0.9.tar.gz' extra-deps: -- 'cmark-0.5.0' -- 'pandoc-types-1.16.0.1' -# Use older aeson to avoid excessive memory use in compilation: -- 'aeson-0.8.0.2' -resolver: lts-4.0 +resolver: lts-5.2 diff --git a/make_osx_package.sh b/make_osx_package.sh index 320c3fead..89f97025b 100755 --- a/make_osx_package.sh +++ b/make_osx_package.sh @@ -11,6 +11,8 @@ SCRIPTS=$OSX/osx-resources BASE=pandoc-$VERSION ME=$(whoami) PACKAGEMAKER=/Applications/PackageMaker.app/Contents/MacOS/PackageMaker +DEVELOPER_ID_APPLICATION=${DEVELOPER_ID_APPLICATION:-Developer ID Application: John Macfarlane} +DEVELOPER_ID_INSTALLER=${DEVELOPER_ID_INSTALLER:-Developer ID Installer: John Macfarlane} # We need this for hsb2hs: PATH=$LOCALBIN:$PATH @@ -51,7 +53,7 @@ $LOCALBIN/pandoc --data data -t html5 -s COPYING -o $RESOURCES/license.html echo Signing pandoc executable... -codesign --force --sign "Developer ID Application: John Macfarlane" $DEST/bin/pandoc +codesign --force --sign ${DEVELOPER_ID_APPLICATION} $DEST/bin/pandoc # make sure it's valid... returns nonzero exit code if it isn't: spctl --assess --type execute $DEST/bin/pandoc @@ -60,7 +62,7 @@ echo Creating OSX package... rm -rf $BASE.pkg pkgbuild --root $DIST/pandoc --identifier net.johnmacfarlane.pandoc --version 1.13 --ownership recommended $DIST/pandoc.pkg -productbuild --distribution osx/distribution.xml --resources $DIST/Resources --package-path $DIST --version 1.13 --sign "Developer ID Installer: John Macfarlane" $BASE-osx.pkg +productbuild --distribution osx/distribution.xml --resources $DIST/Resources --package-path $DIST --version $VERSION --sign ${DEVELOPER_ID_INSTALLER} $BASE-osx.pkg # verify signature spctl --assess --type install $BASE-osx.pkg diff --git a/man/pandoc.1 b/man/pandoc.1 index 6d000f775..24fe70584 100644 --- a/man/pandoc.1 +++ b/man/pandoc.1 @@ -1,5 +1,5 @@ .\"t -.TH PANDOC 1 "November 12, 2015" "pandoc 1.16" +.TH PANDOC 1 "January 12, 2016" "pandoc 1.17" .SH NAME pandoc - general markup converter .SH SYNOPSIS @@ -18,8 +18,8 @@ reStructuredText, XHTML, HTML5, LaTeX (including \f[C]beamer\f[] slide shows), ConTeXt, RTF, OPML, DocBook, OpenDocument, ODT, Word docx, GNU Texinfo, MediaWiki markup, DokuWiki markup, Haddock markup, EPUB (v2 or v3), FictionBook2, Textile, groff man pages, Emacs Org mode, AsciiDoc, -InDesign ICML, and Slidy, Slideous, DZSlides, reveal.js or S5 HTML slide -shows. +InDesign ICML, [TEI XML], and Slidy, Slideous, DZSlides, reveal.js or S5 +HTML slide shows. It can also produce PDF output on systems where LaTeX, ConTeXt, or \f[C]wkhtmltopdf\f[] is installed. .PP @@ -253,11 +253,12 @@ Specify output format. (Haddock markup), \f[C]rtf\f[] (rich text format), \f[C]epub\f[] (EPUB v2 book), \f[C]epub3\f[] (EPUB v3), \f[C]fb2\f[] (FictionBook2 e\-book), \f[C]asciidoc\f[] (AsciiDoc), \f[C]icml\f[] (InDesign ICML), -\f[C]slidy\f[] (Slidy HTML and javascript slide show), \f[C]slideous\f[] -(Slideous HTML and javascript slide show), \f[C]dzslides\f[] (DZSlides -HTML5 + javascript slide show), \f[C]revealjs\f[] (reveal.js HTML5 + -javascript slide show), \f[C]s5\f[] (S5 HTML and javascript slide show), -or the path of a custom lua writer (see Custom writers, below). +\f[C]tei\f[] (TEI Simple), \f[C]slidy\f[] (Slidy HTML and javascript +slide show), \f[C]slideous\f[] (Slideous HTML and javascript slide +show), \f[C]dzslides\f[] (DZSlides HTML5 + javascript slide show), +\f[C]revealjs\f[] (reveal.js HTML5 + javascript slide show), \f[C]s5\f[] +(S5 HTML and javascript slide show), or the path of a custom lua writer +(see Custom writers, below). Note that \f[C]odt\f[], \f[C]epub\f[], and \f[C]epub3\f[] output will not be directed to \f[I]stdout\f[]; an output filename must be specified using the \f[C]\-o/\-\-output\f[] option. @@ -401,6 +402,15 @@ Currently this option only affects the Markdown and LaTeX readers. .RS .RE .TP +.B \f[C]\-\-file\-scope\f[] +Parse each file individually before combining for multifile documents. +This will allow footnotes in different files with the same identifiers +to work as expected. +If this option is set, footnotes and links will not work across files. +Reading binary files (docx, odt, epub) implies \f[C]\-\-file\-scope\f[]. +.RS +.RE +.TP .B \f[C]\-\-filter=\f[]\f[I]EXECUTABLE\f[] Specify an executable to be used as a filter transforming the pandoc AST after the input is parsed and before the output is written. @@ -501,7 +511,7 @@ This option only affects the docx and epub readers. .TP .B \f[C]\-s\f[], \f[C]\-\-standalone\f[] Produce output with an appropriate header and footer (e.g. -a standalone HTML, LaTeX, or RTF file, not a fragment). +a standalone HTML, LaTeX, TEI, or RTF file, not a fragment). This option is set automatically for \f[C]pdf\f[], \f[C]epub\f[], \f[C]epub3\f[], \f[C]fb2\f[], \f[C]docx\f[], and \f[C]odt\f[] output. .RS @@ -582,11 +592,10 @@ rendered page. .TP .B \f[C]\-\-toc\f[], \f[C]\-\-table\-of\-contents\f[] Include an automatically generated table of contents (or, in the case of -\f[C]latex\f[], \f[C]context\f[], and \f[C]rst\f[], an instruction to -create one) in the output document. +\f[C]latex\f[], \f[C]context\f[], \f[C]docx\f[], and \f[C]rst\f[], an +instruction to create one) in the output document. This option has no effect on \f[C]man\f[], \f[C]docbook\f[], -\f[C]slidy\f[], \f[C]slideous\f[], \f[C]s5\f[], \f[C]docx\f[], or -\f[C]odt\f[] output. +\f[C]slidy\f[], \f[C]slideous\f[], \f[C]s5\f[], or \f[C]odt\f[] output. .RS .RE .TP @@ -642,7 +651,7 @@ Implies \f[C]\-\-standalone\f[]. Include contents of \f[I]FILE\f[], verbatim, at the end of the document body (before the \f[C]</body>\f[] tag in HTML, or the \f[C]\\end{document}\f[] command in LaTeX). -This option can be be used repeatedly to include multiple files. +This option can be used repeatedly to include multiple files. They will be included in the order specified. Implies \f[C]\-\-standalone\f[]. .RS @@ -702,7 +711,8 @@ ATX headers. Treat top\-level headers as chapters in LaTeX, ConTeXt, and DocBook output. When the LaTeX document class is set to \f[C]report\f[], \f[C]book\f[], -or \f[C]memoir\f[], this option is implied. +or \f[C]memoir\f[] (unless the \f[C]article\f[] option is specified), +this option is implied. If \f[C]beamer\f[] is the output format, top\-level headers will become \f[C]\\part{..}\f[]. .RS @@ -809,7 +819,7 @@ Implies \f[C]\-\-standalone\f[]. .TP .B \f[C]\-c\f[] \f[I]URL\f[], \f[C]\-\-css=\f[]\f[I]URL\f[] Link to a CSS style sheet. -This option can be be used repeatedly to include multiple files. +This option can be used repeatedly to include multiple files. They will be included in the order specified. .RS .RE @@ -1260,7 +1270,7 @@ a list of other languages used in the document in the YAML metadata, according to BCP 47. For example: \f[C]otherlangs:\ [en\-GB,\ fr]\f[]. This is automatically generated from the \f[C]lang\f[] attributes in all -\f[C]span\f[]s and \f[C]div\f[]s but can be overriden. +\f[C]span\f[]s and \f[C]div\f[]s but can be overridden. Currently only used by LaTeX through the generated \f[C]babel\-otherlangs\f[] and \f[C]polyglossia\-otherlangs\f[] variables. @@ -4214,8 +4224,7 @@ variants are supported: .RE .TP .B \f[C]markdown_github\f[] (GitHub\-Flavored Markdown) -\f[C]pipe_tables\f[], \f[C]raw_html\f[], -\f[C]tex_math_single_backslash\f[], \f[C]fenced_code_blocks\f[], +\f[C]pipe_tables\f[], \f[C]raw_html\f[], \f[C]fenced_code_blocks\f[], \f[C]auto_identifiers\f[], \f[C]ascii_identifiers\f[], \f[C]backtick_code_blocks\f[], \f[C]autolink_bare_uris\f[], \f[C]intraword_underscores\f[], \f[C]strikeout\f[], @@ -4225,7 +4234,7 @@ variants are supported: .RE .TP .B \f[C]markdown_mmd\f[] (MultiMarkdown) -\f[C]pipe_tables\f[] \f[C]raw_html\f[], \f[C]markdown_attribute\f[], +\f[C]pipe_tables\f[], \f[C]raw_html\f[], \f[C]markdown_attribute\f[], \f[C]mmd_link_attributes\f[], \f[C]raw_tex\f[], \f[C]tex_math_double_backslash\f[], \f[C]intraword_underscores\f[], \f[C]mmd_title_block\f[], \f[C]footnotes\f[], \f[C]definition_lists\f[], diff --git a/osx/distribution.xml b/osx/distribution.xml index 024a25bd1..346fe06fe 100644 --- a/osx/distribution.xml +++ b/osx/distribution.xml @@ -2,8 +2,8 @@ <installer-gui-script minSpecVersion="1"> <title>pandoc</title> <organization>net.johnmacfarlane.pandoc</organization> - <domains enable_localSystem="true"/> - <options customize="never" require-scripts="true" rootVolumeOnly="true" /> + <domains enable_localSystem="true" enable_anywhere="true" /> + <options customize="allow" require-scripts="false" rootVolumeOnly="false" /> <!-- Define documents displayed at various steps --> <!-- <welcome file="welcome.html" mime-type="text/html" /> --> <license file="license.html" mime-type="text/html" /> @@ -11,7 +11,7 @@ <options hostArchitectures="x86_64" /> <!-- List all component packages --> <pkg-ref id="net.johnmacfarlane.pandoc" - version="1.13" + version="1.16.0.2" auth="root">pandoc.pkg</pkg-ref> <!-- List them again here. They can now be organized as a hierarchy if you want. --> diff --git a/osx/stack.yaml b/osx/stack.yaml index d9decaba1..50a90f9d6 100644 --- a/osx/stack.yaml +++ b/osx/stack.yaml @@ -18,8 +18,4 @@ packages: - '..' - 'https://hackage.haskell.org/package/pandoc-citeproc-0.9/pandoc-citeproc-0.9.tar.gz' extra-deps: -- 'cmark-0.5.0' -- 'pandoc-types-1.16.0.1' -# Use older aeson to avoid excessive memory use in compilation: -- 'aeson-0.8.0.2' -resolver: lts-4.0 +resolver: lts-5.2 diff --git a/pandoc.cabal b/pandoc.cabal index e46cb3a95..b76d5d6f4 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -1,5 +1,5 @@ Name: pandoc -Version: 1.16.0.1 +Version: 1.17 Cabal-Version: >= 1.10 Build-Type: Custom License: GPL @@ -20,7 +20,7 @@ Description: Pandoc is a Haskell library for converting from one markup MediaWiki markup, TWiki markup, Haddock markup, OPML, Emacs Org-Mode, txt2tags, Word Docx, ODT, and Textile, and it can write Markdown, reStructuredText, XHTML, HTML 5, - LaTeX, ConTeXt, DocBook, OPML, OpenDocument, ODT, + LaTeX, ConTeXt, DocBook, OPML, TEI, OpenDocument, ODT, Word docx, RTF, MediaWiki, DokuWiki, Textile, groff man pages, plain text, Emacs Org-Mode, AsciiDoc, Haddock markup, EPUB (v2 and v3), FictionBook2, InDesign ICML, and several @@ -39,6 +39,7 @@ Data-Files: data/templates/default.html data/templates/default.html5 data/templates/default.docbook + data/templates/default.tei data/templates/default.beamer data/templates/default.opendocument data/templates/default.icml @@ -252,15 +253,15 @@ Library directory >= 1 && < 1.3, bytestring >= 0.9 && < 0.11, text >= 0.11 && < 1.3, - zip-archive >= 0.2.3.4 && < 0.3, + zip-archive >= 0.2.3.4 && < 0.4, HTTP >= 4000.0.5 && < 4000.4, texmath >= 0.8.4.1 && < 0.9, xml >= 1.3.12 && < 1.4, random >= 1 && < 1.2, extensible-exceptions >= 0.1 && < 0.2, pandoc-types >= 1.16 && < 1.17, - aeson >= 0.7 && < 0.11, - tagsoup >= 0.13.1 && < 0.14, + aeson >= 0.7 && < 0.12, + tagsoup >= 0.13.7 && < 0.14, base64-bytestring >= 0.1 && < 1.1, zlib >= 0.5 && < 0.7, highlighting-kate >= 0.6.1 && < 0.7, @@ -276,7 +277,7 @@ Library SHA >= 1.6 && < 1.7, haddock-library >= 1.1 && < 1.3, old-time, - deepseq-generics >= 0.1 && < 0.2, + deepseq >= 1.3 && < 1.5, JuicyPixels >= 3.1.6.1 && < 3.3, filemanip >= 0.3 && < 0.4, cmark >= 0.5 && < 0.6, @@ -362,6 +363,7 @@ Library Text.Pandoc.Writers.Docx, Text.Pandoc.Writers.EPUB, Text.Pandoc.Writers.FB2, + Text.Pandoc.Writers.TEI, Text.Pandoc.PDF, Text.Pandoc.UTF8, Text.Pandoc.Templates, @@ -370,7 +372,7 @@ Library Text.Pandoc.Process, Text.Pandoc.CSS Other-Modules: Text.Pandoc.Readers.Docx.Lists, - Text.Pandoc.Readers.Docx.Reducible, + Text.Pandoc.Readers.Docx.Combine, Text.Pandoc.Readers.Docx.Parse, Text.Pandoc.Readers.Docx.Fonts, Text.Pandoc.Readers.Docx.Util, @@ -414,7 +416,7 @@ Executable pandoc bytestring >= 0.9 && < 0.11, extensible-exceptions >= 0.1 && < 0.2, highlighting-kate >= 0.6.1 && < 0.7, - aeson >= 0.7.0.5 && < 0.11, + aeson >= 0.7.0.5 && < 0.12, yaml >= 0.8.8.2 && < 0.9, containers >= 0.1 && < 0.6, HTTP >= 4000.0.5 && < 4000.4 @@ -479,7 +481,7 @@ Test-Suite test-pandoc containers >= 0.1 && < 0.6, ansi-terminal >= 0.5 && < 0.7, executable-path >= 0.0 && < 0.1, - zip-archive >= 0.2.3.4 && < 0.3 + zip-archive >= 0.2.3.4 && < 0.4 Other-Modules: Tests.Old Tests.Helpers Tests.Arbitrary @@ -504,6 +506,7 @@ Test-Suite test-pandoc Tests.Writers.LaTeX Tests.Writers.Docx Tests.Writers.RST + Tests.Writers.TEI Ghc-Options: -rtsopts -Wall -fno-warn-unused-do-bind -threaded Default-Language: Haskell98 @@ -215,6 +215,7 @@ data Opt = Opt , optExtractMedia :: Maybe FilePath -- ^ Path to extract embedded media , optTrace :: Bool -- ^ Print debug information , optTrackChanges :: TrackChanges -- ^ Accept or reject MS Word track-changes. + , optFileScope :: Bool -- ^ Parse input files before combining , optKaTeXStylesheet :: Maybe String -- ^ Path to stylesheet for KaTeX , optKaTeXJS :: Maybe String -- ^ Path to js file for KaTeX } @@ -278,6 +279,7 @@ defaultOpts = Opt , optExtractMedia = Nothing , optTrace = False , optTrackChanges = AcceptChanges + , optFileScope = False , optKaTeXStylesheet = Nothing , optKaTeXJS = Nothing } @@ -387,6 +389,11 @@ options = "accept|reject|all") "" -- "Accepting or reject MS Word track-changes."" + , Option "" ["file-scope"] + (NoArg + (\opt -> return opt { optFileScope = True })) + "" -- "Parse input files before combining" + , Option "" ["extract-media"] (ReqArg (\arg opt -> @@ -1009,6 +1016,8 @@ defaultWriterName x = ".fb2" -> "fb2" ".opml" -> "opml" ".icml" -> "icml" + ".tei.xml" -> "tei" + ".tei" -> "tei" ['.',y] | y `elem` ['1'..'9'] -> "man" _ -> "html" @@ -1115,6 +1124,7 @@ convertWithOpts opts args = do , optExtractMedia = mbExtractMedia , optTrace = trace , optTrackChanges = trackChanges + , optFileScope = fileScope , optKaTeXStylesheet = katexStylesheet , optKaTeXJS = katexJS } = opts @@ -1267,6 +1277,7 @@ convertWithOpts opts args = do , readerDefaultImageExtension = defaultImageExtension , readerTrace = trace , readerTrackChanges = trackChanges + , readerFileScope = fileScope } when (not (isTextFormat format) && outputFile == "-") $ @@ -1299,13 +1310,25 @@ convertWithOpts opts args = do then handleIncludes else return . Right - (doc, media) <- fmap handleError $ - case reader of + let sourceToDoc :: [FilePath] -> IO (Pandoc, MediaBag) + sourceToDoc sources' = fmap handleError $ + case reader of StringReader r-> do - srcs <- convertTabs . intercalate "\n" <$> readSources sources + srcs <- convertTabs . intercalate "\n" <$> readSources sources' doc <- handleIncludes' srcs either (return . Left) (\s -> fmap (,mempty) <$> r readerOpts s) doc - ByteStringReader r -> readFiles sources >>= r readerOpts + ByteStringReader r -> readFiles sources' >>= r readerOpts + + -- We parse first if (1) fileScope is set, (2), it's a binary + -- reader, or (3) we're reading JSON. This is easier to do of an AND + -- of negatives as opposed to an OR of positives, so we do default + -- parsing if it's a StringReader AND (fileScope is set AND it's not + -- a JSON reader). + (doc, media) <- case reader of + (StringReader _) | not fileScope && readerName' /= "json" -> + sourceToDoc sources + _ -> do pairs <- mapM (\s -> sourceToDoc [s]) sources + return (mconcat $ map fst pairs, mconcat $ map snd pairs) let writerOptions = def { writerStandalone = standalone', writerTemplate = templ, diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs index 3f46648a2..d59ee7846 100644 --- a/src/Text/Pandoc.hs +++ b/src/Text/Pandoc.hs @@ -115,6 +115,7 @@ module Text.Pandoc , writeHaddock , writeCommonMark , writeCustom + , writeTEI -- * Rendering templates and default templates , module Text.Pandoc.Templates -- * Miscellaneous @@ -169,6 +170,7 @@ import Text.Pandoc.Writers.AsciiDoc import Text.Pandoc.Writers.Haddock import Text.Pandoc.Writers.CommonMark import Text.Pandoc.Writers.Custom +import Text.Pandoc.Writers.TEI import Text.Pandoc.Templates import Text.Pandoc.Options import Text.Pandoc.Shared (safeRead, warn, mapLeft, pandocVersion) @@ -221,6 +223,14 @@ mkStringReaderWithWarnings r = StringReader $ \o s -> mkBSReader :: (ReaderOptions -> BL.ByteString -> Either PandocError (Pandoc, MediaBag)) -> Reader mkBSReader r = ByteStringReader (\o s -> return $ r o s) +mkBSReaderWithWarnings :: (ReaderOptions -> BL.ByteString -> Either PandocError (Pandoc, MediaBag, [String])) -> Reader +mkBSReaderWithWarnings r = ByteStringReader $ \o s -> + case r o s of + Left err -> return $ Left err + Right (doc, mediaBag, warnings) -> do + mapM_ warn warnings + return $ Right (doc, mediaBag) + -- | Association list of formats and readers. readers :: [(String, Reader)] readers = [ ("native" , StringReader $ \_ s -> return $ readNative s) @@ -241,7 +251,7 @@ readers = [ ("native" , StringReader $ \_ s -> return $ readNative s) ,("latex" , mkStringReader readLaTeX) ,("haddock" , mkStringReader readHaddock) ,("twiki" , mkStringReader readTWiki) - ,("docx" , mkBSReader readDocx) + ,("docx" , mkBSReaderWithWarnings readDocxWithWarnings) ,("odt" , mkBSReader readOdt) ,("t2t" , mkStringReader readTxt2TagsNoMacros) ,("epub" , mkBSReader readEPUB) @@ -304,6 +314,7 @@ writers = [ ,("asciidoc" , PureStringWriter writeAsciiDoc) ,("haddock" , PureStringWriter writeHaddock) ,("commonmark" , PureStringWriter writeCommonMark) + ,("tei" , PureStringWriter writeTEI) ] getDefaultExtensions :: String -> Set Extension diff --git a/src/Text/Pandoc/Highlighting.hs b/src/Text/Pandoc/Highlighting.hs index d7a14c129..ecfef1832 100644 --- a/src/Text/Pandoc/Highlighting.hs +++ b/src/Text/Pandoc/Highlighting.hs @@ -70,7 +70,8 @@ highlight formatter (_, classes, keyvals) rawCode = startNumber = firstNum, numberLines = any (`elem` ["number","numberLines", "number-lines"]) classes } - lcclasses = map (map toLower) classes + lcclasses = map (map toLower) + (classes ++ concatMap languagesByExtension classes) in case find (`elem` lcLanguages) lcclasses of Nothing | numberLines fmtOpts -> Just diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs index 7dd47cd59..b5736c63d 100644 --- a/src/Text/Pandoc/Options.hs +++ b/src/Text/Pandoc/Options.hs @@ -202,7 +202,6 @@ githubMarkdownExtensions :: Set Extension githubMarkdownExtensions = Set.fromList [ Ext_pipe_tables , Ext_raw_html - , Ext_tex_math_single_backslash , Ext_fenced_code_blocks , Ext_auto_identifiers , Ext_ascii_identifiers @@ -265,6 +264,7 @@ data ReaderOptions = ReaderOptions{ , readerDefaultImageExtension :: String -- ^ Default extension for images , readerTrace :: Bool -- ^ Print debugging info , readerTrackChanges :: TrackChanges + , readerFileScope :: Bool -- ^ Parse before combining } deriving (Show, Read, Data, Typeable, Generic) instance Default ReaderOptions @@ -281,6 +281,7 @@ instance Default ReaderOptions , readerDefaultImageExtension = "" , readerTrace = False , readerTrackChanges = AcceptChanges + , readerFileScope = False } -- diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index 16fe75ed5..325231846 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -915,7 +915,7 @@ data ParserState = ParserState stateMeta' :: F Meta, -- ^ Document metadata stateHeaderTable :: [HeaderType], -- ^ Ordered list of header types used stateHeaders :: M.Map Inlines String, -- ^ List of headers and ids (used for implicit ref links) - stateIdentifiers :: [String], -- ^ List of header identifiers used + stateIdentifiers :: Set.Set String, -- ^ Header identifiers used stateNextExample :: Int, -- ^ Number of next example stateExamples :: M.Map String Int, -- ^ Map from example labels to numbers stateHasChapters :: Bool, -- ^ True if \chapter encountered @@ -973,8 +973,8 @@ instance HasHeaderMap ParserState where updateHeaderMap f st = st{ stateHeaders = f $ stateHeaders st } class HasIdentifierList st where - extractIdentifierList :: st -> [String] - updateIdentifierList :: ([String] -> [String]) -> st -> st + extractIdentifierList :: st -> Set.Set String + updateIdentifierList :: (Set.Set String -> Set.Set String) -> st -> st instance HasIdentifierList ParserState where extractIdentifierList = stateIdentifiers @@ -1013,7 +1013,7 @@ defaultParserState = stateMeta' = return nullMeta, stateHeaderTable = [], stateHeaders = M.empty, - stateIdentifiers = [], + stateIdentifiers = Set.empty, stateNextExample = 1, stateExamples = M.empty, stateHasChapters = False, @@ -1092,8 +1092,8 @@ registerHeader (ident,classes,kvs) header' = do let id'' = if Ext_ascii_identifiers `Set.member` exts then catMaybes $ map toAsciiChar id' else id' - updateState $ updateIdentifierList $ - if id' == id'' then (id' :) else ([id', id''] ++) + updateState $ updateIdentifierList $ Set.insert id' + updateState $ updateIdentifierList $ Set.insert id'' updateState $ updateHeaderMap $ insert' header' id' return (id'',classes,kvs) else do diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 44f67ce75..604bc20de 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -50,8 +50,7 @@ implemented, [-] means partially implemented): * Inlines - [X] Str - - [X] Emph (From italics. `underline` currently read as span. In - future, it might optionally be emph as well) + - [X] Emph (italics and underline both read as Emph) - [X] Strong - [X] Strikeout - [X] Superscript @@ -62,16 +61,16 @@ implemented, [-] means partially implemented): - [X] Code (styled with `VerbatimChar`) - [X] Space - [X] LineBreak (these are invisible in Word: entered with Shift-Return) - - [ ] Math + - [X] Math - [X] Link (links to an arbitrary bookmark create a span with the target as id and "anchor" class) - - [-] Image (Links to path in archive. Future option for - data-encoded URI likely.) + - [X] Image - [X] Note (Footnotes and Endnotes are silently combined.) -} module Text.Pandoc.Readers.Docx - ( readDocx + ( readDocxWithWarnings + , readDocx ) where import Codec.Archive.Zip @@ -81,7 +80,7 @@ import Text.Pandoc.Builder import Text.Pandoc.Walk import Text.Pandoc.Readers.Docx.Parse import Text.Pandoc.Readers.Docx.Lists -import Text.Pandoc.Readers.Docx.Reducible +import Text.Pandoc.Readers.Docx.Combine import Text.Pandoc.Shared import Text.Pandoc.MediaBag (insertMedia, MediaBag) import Data.List (delete, (\\), intersect) @@ -89,6 +88,7 @@ import Text.TeXMath (writeTeX) import Data.Default (Default) import qualified Data.ByteString.Lazy as B import qualified Data.Map as M +import qualified Data.Set as Set import Control.Monad.Reader import Control.Monad.State import Data.Sequence (ViewL(..), viewl) @@ -97,14 +97,22 @@ import qualified Data.Sequence as Seq (null) import Text.Pandoc.Error import Text.Pandoc.Compat.Except +readDocxWithWarnings :: ReaderOptions + -> B.ByteString + -> Either PandocError (Pandoc, MediaBag, [String]) +readDocxWithWarnings opts bytes = + case archiveToDocxWithWarnings (toArchive bytes) of + Right (docx, warnings) -> do + (meta, blks, mediaBag) <- docxToOutput opts docx + return (Pandoc meta blks, mediaBag, warnings) + Left _ -> Left (ParseFailure "couldn't parse docx file") + readDocx :: ReaderOptions -> B.ByteString -> Either PandocError (Pandoc, MediaBag) -readDocx opts bytes = - case archiveToDocx (toArchive bytes) of - Right docx -> (\(meta, blks, mediaBag) -> (Pandoc meta blks, mediaBag)) - <$> (docxToOutput opts docx) - Left _ -> Left (ParseFailure "couldn't parse docx file") +readDocx opts bytes = do + (pandoc, mediaBag, _) <- readDocxWithWarnings opts bytes + return (pandoc, mediaBag) data DState = DState { docxAnchorMap :: M.Map String String , docxMediaBag :: MediaBag @@ -166,7 +174,7 @@ bodyPartsToMeta' (bp : bps) | (Paragraph pPr parParts) <- bp , (c : _)<- intersect (pStyle pPr) (M.keys metaStyles) , (Just metaField) <- M.lookup c metaStyles = do - inlines <- concatReduce <$> mapM parPartToInlines parParts + inlines <- smushInlines <$> mapM parPartToInlines parParts remaining <- bodyPartsToMeta' bps let f (MetaInlines ils) (MetaInlines ils') = MetaBlocks [Para ils, Para ils'] @@ -290,13 +298,13 @@ runToInlines (Run rs runElems) Just SubScrpt -> subscript codeString _ -> codeString | otherwise = do - let ils = concatReduce (map runElemToInlines runElems) + let ils = smushInlines (map runElemToInlines runElems) return $ (runStyleToTransform $ resolveDependentRunStyle rs) ils runToInlines (Footnote bps) = do - blksList <- concatReduce <$> (mapM bodyPartToBlocks bps) + blksList <- smushBlocks <$> (mapM bodyPartToBlocks bps) return $ note blksList runToInlines (Endnote bps) = do - blksList <- concatReduce <$> (mapM bodyPartToBlocks bps) + blksList <- smushBlocks <$> (mapM bodyPartToBlocks bps) return $ note blksList runToInlines (InlineDrawing fp bs ext) = do mediaBag <- gets docxMediaBag @@ -315,19 +323,19 @@ parPartToInlines (PlainRun r) = runToInlines r parPartToInlines (Insertion _ author date runs) = do opts <- asks docxOptions case readerTrackChanges opts of - AcceptChanges -> concatReduce <$> mapM runToInlines runs + AcceptChanges -> smushInlines <$> mapM runToInlines runs RejectChanges -> return mempty AllChanges -> do - ils <- concatReduce <$> mapM runToInlines runs + ils <- smushInlines <$> mapM runToInlines runs let attr = ("", ["insertion"], [("author", author), ("date", date)]) return $ spanWith attr ils parPartToInlines (Deletion _ author date runs) = do opts <- asks docxOptions case readerTrackChanges opts of AcceptChanges -> return mempty - RejectChanges -> concatReduce <$> mapM runToInlines runs + RejectChanges -> smushInlines <$> mapM runToInlines runs AllChanges -> do - ils <- concatReduce <$> mapM runToInlines runs + ils <- smushInlines <$> mapM runToInlines runs let attr = ("", ["deletion"], [("author", author), ("date", date)]) return $ spanWith attr ils parPartToInlines (BookMark _ anchor) | anchor `elem` dummyAnchors = @@ -350,7 +358,7 @@ parPartToInlines (BookMark _ anchor) = -- avoid an extra pass. let newAnchor = if not inHdrBool && anchor `elem` (M.elems anchorMap) - then uniqueIdent [Str anchor] (M.elems anchorMap) + then uniqueIdent [Str anchor] (Set.fromList $ M.elems anchorMap) else anchor unless inHdrBool (modify $ \s -> s { docxAnchorMap = M.insert anchor newAnchor anchorMap}) @@ -360,10 +368,10 @@ parPartToInlines (Drawing fp bs ext) = do modify $ \s -> s { docxMediaBag = insertMedia fp Nothing bs mediaBag } return $ imageWith (extentToAttr ext) fp "" "" parPartToInlines (InternalHyperLink anchor runs) = do - ils <- concatReduce <$> mapM runToInlines runs + ils <- smushInlines <$> mapM runToInlines runs return $ link ('#' : anchor) "" ils parPartToInlines (ExternalHyperLink target runs) = do - ils <- concatReduce <$> mapM runToInlines runs + ils <- smushInlines <$> mapM runToInlines runs return $ link target "" ils parPartToInlines (PlainOMath exps) = do return $ math $ writeTeX exps @@ -393,7 +401,7 @@ makeHeaderAnchor' (Header n (_, classes, kvs) ils) | (c:cs) <- filter isAnchorSpan ils , (Span (ident, ["anchor"], _) _) <- c = do hdrIDMap <- gets docxAnchorMap - let newIdent = uniqueIdent ils (M.elems hdrIDMap) + let newIdent = uniqueIdent ils (Set.fromList $ M.elems hdrIDMap) modify $ \s -> s {docxAnchorMap = M.insert ident newIdent hdrIDMap} return $ Header n (newIdent, classes, kvs) (ils \\ (c:cs)) -- Otherwise we just give it a name, and register that name (associate @@ -401,7 +409,7 @@ makeHeaderAnchor' (Header n (_, classes, kvs) ils) makeHeaderAnchor' (Header n (_, classes, kvs) ils) = do hdrIDMap <- gets docxAnchorMap - let newIdent = uniqueIdent ils (M.elems hdrIDMap) + let newIdent = uniqueIdent ils (Set.fromList $ M.elems hdrIDMap) modify $ \s -> s {docxAnchorMap = M.insert newIdent newIdent hdrIDMap} return $ Header n (newIdent, classes, kvs) ils makeHeaderAnchor' blk = return blk @@ -416,7 +424,7 @@ singleParaToPlain blks = blks cellToBlocks :: Cell -> DocxContext Blocks cellToBlocks (Cell bps) = do - blks <- concatReduce <$> mapM bodyPartToBlocks bps + blks <- smushBlocks <$> mapM bodyPartToBlocks bps return $ fromList $ blocksToDefinitions $ blocksToBullets $ toList blks rowToBlocksList :: Row -> DocxContext [Blocks] @@ -478,11 +486,11 @@ bodyPartToBlocks (Paragraph pPr parparts) $ concatMap parPartToString parparts | Just (style, n) <- pHeading pPr = do ils <- local (\s-> s{docxInHeaderBlock=True}) $ - (concatReduce <$> mapM parPartToInlines parparts) + (smushInlines <$> mapM parPartToInlines parparts) makeHeaderAnchor $ headerWith ("", delete style (pStyle pPr), []) n ils | otherwise = do - ils <- concatReduce <$> mapM parPartToInlines parparts >>= + ils <- smushInlines <$> mapM parPartToInlines parparts >>= (return . fromList . trimLineBreaks . normalizeSpaces . toList) dropIls <- gets docxDropCap let ils' = dropIls <> ils @@ -560,7 +568,7 @@ bodyToOutput :: Body -> DocxContext (Meta, [Block], MediaBag) bodyToOutput (Body bps) = do let (metabps, blkbps) = sepBodyParts bps meta <- bodyPartsToMeta metabps - blks <- concatReduce <$> mapM bodyPartToBlocks blkbps + blks <- smushBlocks <$> mapM bodyPartToBlocks blkbps blks' <- rewriteLinks $ blocksToDefinitions $ blocksToBullets $ toList blks mediaBag <- gets docxMediaBag return $ (meta, diff --git a/src/Text/Pandoc/Readers/Docx/Combine.hs b/src/Text/Pandoc/Readers/Docx/Combine.hs new file mode 100644 index 000000000..39e0df825 --- /dev/null +++ b/src/Text/Pandoc/Readers/Docx/Combine.hs @@ -0,0 +1,154 @@ +{-# LANGUAGE TypeSynonymInstances, FlexibleInstances, + PatternGuards #-} + +module Text.Pandoc.Readers.Docx.Combine ( smushInlines + , smushBlocks + ) + where + +import Text.Pandoc.Builder +import Data.List +import Data.Sequence (ViewR(..), ViewL(..), viewl, viewr, (><), (|>)) +import qualified Data.Sequence as Seq (null) + +data Modifier a = Modifier (a -> a) + | AttrModifier (Attr -> a -> a) Attr + | NullModifier + +spaceOutInlinesL :: Inlines -> (Inlines, Inlines) +spaceOutInlinesL ms = (l, stackInlines fs (m' <> r)) + where (l, m, r) = spaceOutInlines ms + (fs, m') = unstackInlines m + +spaceOutInlinesR :: Inlines -> (Inlines, Inlines) +spaceOutInlinesR ms = (stackInlines fs (l <> m'), r) + where (l, m, r) = spaceOutInlines ms + (fs, m') = unstackInlines m + +spaceOutInlines :: Inlines -> (Inlines, Inlines, Inlines) +spaceOutInlines ils = + let (fs, ils') = unstackInlines ils + contents = unMany ils' + left = case viewl contents of + (Space :< _) -> space + _ -> mempty + right = case viewr contents of + (_ :> Space) -> space + _ -> mempty in + (left, (stackInlines fs $ trimInlines . Many $ contents), right) + +stackInlines :: [Modifier Inlines] -> Inlines -> Inlines +stackInlines [] ms = ms +stackInlines (NullModifier : fs) ms = stackInlines fs ms +stackInlines ((Modifier f) : fs) ms = + if isEmpty ms + then stackInlines fs ms + else f $ stackInlines fs ms +stackInlines ((AttrModifier f attr) : fs) ms = f attr $ stackInlines fs ms + +unstackInlines :: Inlines -> ([Modifier Inlines], Inlines) +unstackInlines ms = case ilModifier ms of + NullModifier -> ([], ms) + _ -> (f : fs, ms') where + f = ilModifier ms + (fs, ms') = unstackInlines $ ilInnards ms + +ilModifier :: Inlines -> Modifier Inlines +ilModifier ils = case viewl (unMany ils) of + (x :< xs) | Seq.null xs -> case x of + (Emph _) -> Modifier emph + (Strong _) -> Modifier strong + (SmallCaps _) -> Modifier smallcaps + (Strikeout _) -> Modifier strikeout + (Superscript _) -> Modifier superscript + (Subscript _) -> Modifier subscript + (Link attr _ tgt) -> Modifier $ linkWith attr (fst tgt) (snd tgt) + (Span attr _) -> AttrModifier spanWith attr + _ -> NullModifier + _ -> NullModifier + +ilInnards :: Inlines -> Inlines +ilInnards ils = case viewl (unMany ils) of + (x :< xs) | Seq.null xs -> case x of + (Emph lst) -> fromList lst + (Strong lst) -> fromList lst + (SmallCaps lst) -> fromList lst + (Strikeout lst) -> fromList lst + (Superscript lst) -> fromList lst + (Subscript lst) -> fromList lst + (Link _ lst _) -> fromList lst + (Span _ lst) -> fromList lst + _ -> ils + _ -> ils + +inlinesL :: Inlines -> (Inlines, Inlines) +inlinesL ils = case viewl $ unMany ils of + (s :< sq) -> (singleton s, Many sq) + _ -> (mempty, ils) + +inlinesR :: Inlines -> (Inlines, Inlines) +inlinesR ils = case viewr $ unMany ils of + (sq :> s) -> (Many sq, singleton s) + _ -> (ils, mempty) + +combineInlines :: Inlines -> Inlines -> Inlines +combineInlines x y = + let (xs', x') = inlinesR x + (y', ys') = inlinesL y + in + xs' <> (combineSingletonInlines x' y') <> ys' + +combineSingletonInlines :: Inlines -> Inlines -> Inlines +combineSingletonInlines x y = + let (xfs, xs) = unstackInlines x + (yfs, ys) = unstackInlines y + shared = xfs `intersect` yfs + x_remaining = xfs \\ shared + y_remaining = yfs \\ shared + x_rem_attr = filter isAttrModifier x_remaining + y_rem_attr = filter isAttrModifier y_remaining + in + case null shared of + True | isEmpty xs && isEmpty ys -> + stackInlines (x_rem_attr ++ y_rem_attr) mempty + | isEmpty xs -> + let (sp, y') = spaceOutInlinesL y in + (stackInlines x_rem_attr mempty) <> sp <> y' + | isEmpty ys -> + let (x', sp) = spaceOutInlinesR x in + x' <> sp <> (stackInlines y_rem_attr mempty) + | otherwise -> + let (x', xsp) = spaceOutInlinesR x + (ysp, y') = spaceOutInlinesL y + in + x' <> xsp <> ysp <> y' + False -> stackInlines shared $ + combineInlines + (stackInlines x_remaining xs) + (stackInlines y_remaining ys) + +combineBlocks :: Blocks -> Blocks -> Blocks +combineBlocks bs cs + | bs' :> (BlockQuote bs'') <- viewr (unMany bs) + , (BlockQuote cs'') :< cs' <- viewl (unMany cs) = + Many $ (bs' |> (BlockQuote (bs'' <> cs''))) >< cs' +combineBlocks bs cs = bs <> cs + +instance (Monoid a, Eq a) => Eq (Modifier a) where + (Modifier f) == (Modifier g) = (f mempty == g mempty) + (AttrModifier f attr) == (AttrModifier g attr') = (f attr mempty == g attr' mempty) + (NullModifier) == (NullModifier) = True + _ == _ = False + +isEmpty :: (Monoid a, Eq a) => a -> Bool +isEmpty x = x == mempty + +isAttrModifier :: Modifier a -> Bool +isAttrModifier (AttrModifier _ _) = True +isAttrModifier _ = False + +smushInlines :: [Inlines] -> Inlines +smushInlines xs = foldl combineInlines mempty xs + +smushBlocks :: [Blocks] -> Blocks +smushBlocks xs = foldl combineBlocks mempty xs diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index eec8b12c9..364483929 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -50,6 +50,7 @@ module Text.Pandoc.Readers.Docx.Parse ( Docx(..) , Row(..) , Cell(..) , archiveToDocx + , archiveToDocxWithWarnings ) where import Codec.Archive.Zip import Text.XML.Light @@ -60,6 +61,7 @@ import Data.Bits ((.|.)) import qualified Data.ByteString.Lazy as B import qualified Text.Pandoc.UTF8 as UTF8 import Control.Monad.Reader +import Control.Monad.State import Control.Applicative ((<|>)) import qualified Data.Map as M import Text.Pandoc.Compat.Except @@ -81,16 +83,20 @@ data ReaderEnv = ReaderEnv { envNotes :: Notes } deriving Show +data ReaderState = ReaderState { stateWarnings :: [String] } + deriving Show + + data DocxError = DocxError | WrongElem deriving Show instance Error DocxError where noMsg = WrongElem -type D = ExceptT DocxError (Reader ReaderEnv) +type D = ExceptT DocxError (ReaderT ReaderEnv (State ReaderState)) -runD :: D a -> ReaderEnv -> Either DocxError a -runD dx re = runReader (runExceptT dx) re +runD :: D a -> ReaderEnv -> ReaderState -> (Either DocxError a, ReaderState) +runD dx re rs = runState (runReaderT (runExceptT dx) re) rs maybeToD :: Maybe a -> D a maybeToD (Just a) = return a @@ -257,7 +263,10 @@ type Author = String type ChangeDate = String archiveToDocx :: Archive -> Either DocxError Docx -archiveToDocx archive = do +archiveToDocx archive = fst <$> archiveToDocxWithWarnings archive + +archiveToDocxWithWarnings :: Archive -> Either DocxError (Docx, [String]) +archiveToDocxWithWarnings archive = do let notes = archiveToNotes archive numbering = archiveToNumbering archive rels = archiveToRelationships archive @@ -265,8 +274,12 @@ archiveToDocx archive = do (styles, parstyles) = archiveToStyles archive rEnv = ReaderEnv notes numbering rels media Nothing styles parstyles InDocument - doc <- runD (archiveToDocument archive) rEnv - return $ Docx doc + rState = ReaderState { stateWarnings = [] } + (eitherDoc, st) = runD (archiveToDocument archive) rEnv rState + case eitherDoc of + Right doc -> Right (Docx doc, stateWarnings st) + Left e -> Left e + archiveToDocument :: Archive -> D Document @@ -576,12 +589,14 @@ elemToBodyPart ns element sty <- asks envParStyles let parstyle = elemToParagraphStyle ns element sty parparts <- mapD (elemToParPart ns) (elChildren element) - case pNumInfo parstyle of - Just (numId, lvl) -> do - num <- asks envNumbering - let levelInfo = lookupLevel numId lvl num - return $ ListItem parstyle numId lvl levelInfo parparts - Nothing -> return $ Paragraph parstyle parparts + -- Word uses list enumeration for numbered headings, so we only + -- want to infer a list from the styles if it is NOT a heading. + case pHeading parstyle of + Nothing | Just (numId, lvl) <- pNumInfo parstyle -> do + num <- asks envNumbering + let levelInfo = lookupLevel numId lvl num + return $ ListItem parstyle numId lvl levelInfo parparts + _ -> return $ Paragraph parstyle parparts elemToBodyPart ns element | isElem ns "w" "tbl" element = do let caption' = findChild (elemName ns "w" "tblPr") element @@ -702,36 +717,58 @@ elemToExtent drawingElem = getDim at = findElement (QName "extent" (Just wp_ns) (Just "wp")) drawingElem >>= findAttr (QName at Nothing Nothing) >>= safeRead -elemToRun :: NameSpaces -> Element -> D Run -elemToRun ns element - | isElem ns "w" "r" element - , Just drawingElem <- findChild (elemName ns "w" "drawing") element = + +childElemToRun :: NameSpaces -> Element -> D Run +childElemToRun ns element + | isElem ns "w" "drawing" element = let a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main" - drawing = findElement (QName "blip" (Just a_ns) (Just "a")) drawingElem + drawing = findElement (QName "blip" (Just a_ns) (Just "a")) element >>= findAttr (QName "embed" (lookup "r" ns) (Just "r")) in case drawing of Just s -> expandDrawingId s >>= - (\(fp, bs) -> return $ InlineDrawing fp bs $ elemToExtent drawingElem) + (\(fp, bs) -> return $ InlineDrawing fp bs $ elemToExtent element) Nothing -> throwError WrongElem -elemToRun ns element - | isElem ns "w" "r" element - , Just ref <- findChild (elemName ns "w" "footnoteReference") element - , Just fnId <- findAttr (elemName ns "w" "id") ref = do +childElemToRun ns element + | isElem ns "w" "footnoteReference" element + , Just fnId <- findAttr (elemName ns "w" "id") element = do notes <- asks envNotes case lookupFootnote fnId notes of Just e -> do bps <- local (\r -> r {envLocation=InFootnote}) $ mapD (elemToBodyPart ns) (elChildren e) return $ Footnote bps Nothing -> return $ Footnote [] -elemToRun ns element - | isElem ns "w" "r" element - , Just ref <- findChild (elemName ns "w" "endnoteReference") element - , Just enId <- findAttr (elemName ns "w" "id") ref = do +childElemToRun ns element + | isElem ns "w" "endnoteReference" element + , Just enId <- findAttr (elemName ns "w" "id") element = do notes <- asks envNotes case lookupEndnote enId notes of Just e -> do bps <- local (\r -> r {envLocation=InEndnote}) $ mapD (elemToBodyPart ns) (elChildren e) return $ Endnote bps Nothing -> return $ Endnote [] +childElemToRun _ _ = throwError WrongElem + +elemToRun :: NameSpaces -> Element -> D Run +elemToRun ns element + | isElem ns "w" "r" element + , Just altCont <- findChild (elemName ns "mc" "AlternateContent") element = + do let choices = findChildren (elemName ns "mc" "Choice") altCont + choiceChildren = map head $ filter (not . null) $ map elChildren choices + outputs <- mapD (childElemToRun ns) choiceChildren + case outputs of + r : _ -> return r + [] -> throwError WrongElem +elemToRun ns element + | isElem ns "w" "r" element + , Just drawingElem <- findChild (elemName ns "w" "drawing") element = + childElemToRun ns drawingElem +elemToRun ns element + | isElem ns "w" "r" element + , Just ref <- findChild (elemName ns "w" "footnoteReference") element = + childElemToRun ns ref +elemToRun ns element + | isElem ns "w" "r" element + , Just ref <- findChild (elemName ns "w" "endnoteReference") element = + childElemToRun ns ref elemToRun ns element | isElem ns "w" "r" element = do runElems <- elemToRunElems ns element @@ -940,3 +977,4 @@ elemToRunElems _ _ = throwError WrongElem setFont :: Maybe Font -> ReaderEnv -> ReaderEnv setFont f s = s{envFont = f} + diff --git a/src/Text/Pandoc/Readers/Docx/Reducible.hs b/src/Text/Pandoc/Readers/Docx/Reducible.hs deleted file mode 100644 index c93b40119..000000000 --- a/src/Text/Pandoc/Readers/Docx/Reducible.hs +++ /dev/null @@ -1,181 +0,0 @@ -{-# LANGUAGE TypeSynonymInstances, FlexibleInstances, - PatternGuards #-} - -module Text.Pandoc.Readers.Docx.Reducible ( concatReduce - , (<+>) - ) - where - - -import Text.Pandoc.Builder -import Data.List -import Data.Sequence (ViewR(..), ViewL(..), viewl, viewr) -import qualified Data.Sequence as Seq (null) - -data Modifier a = Modifier (a -> a) - | AttrModifier (Attr -> a -> a) Attr - | NullModifier - -class (Eq a) => Modifiable a where - modifier :: a -> Modifier a - innards :: a -> a - getL :: a -> (a, a) - getR :: a -> (a, a) - spaceOut :: a -> (a, a, a) - -spaceOutL :: (Monoid a, Modifiable a) => a -> (a, a) -spaceOutL ms = (l, stack fs (m' <> r)) - where (l, m, r) = spaceOut ms - (fs, m') = unstack m - -spaceOutR :: (Monoid a, Modifiable a) => a -> (a, a) -spaceOutR ms = (stack fs (l <> m'), r) - where (l, m, r) = spaceOut ms - (fs, m') = unstack m - -instance (Monoid a, Show a) => Show (Modifier a) where - show (Modifier f) = show $ f mempty - show (AttrModifier f attr) = show $ f attr mempty - show (NullModifier) = "NullModifier" - -instance (Monoid a, Eq a) => Eq (Modifier a) where - (Modifier f) == (Modifier g) = (f mempty == g mempty) - (AttrModifier f attr) == (AttrModifier g attr') = (f attr mempty == g attr' mempty) - (NullModifier) == (NullModifier) = True - _ == _ = False - -instance Modifiable Inlines where - modifier ils = case viewl (unMany ils) of - (x :< xs) | Seq.null xs -> case x of - (Emph _) -> Modifier emph - (Strong _) -> Modifier strong - (SmallCaps _) -> Modifier smallcaps - (Strikeout _) -> Modifier strikeout - (Superscript _) -> Modifier superscript - (Subscript _) -> Modifier subscript - (Span attr _) -> AttrModifier spanWith attr - _ -> NullModifier - _ -> NullModifier - - innards ils = case viewl (unMany ils) of - (x :< xs) | Seq.null xs -> case x of - (Emph lst) -> fromList lst - (Strong lst) -> fromList lst - (SmallCaps lst) -> fromList lst - (Strikeout lst) -> fromList lst - (Superscript lst) -> fromList lst - (Subscript lst) -> fromList lst - (Span _ lst) -> fromList lst - _ -> ils - _ -> ils - - getL ils = case viewl $ unMany ils of - (s :< sq) -> (singleton s, Many sq) - _ -> (mempty, ils) - - getR ils = case viewr $ unMany ils of - (sq :> s) -> (Many sq, singleton s) - _ -> (ils, mempty) - - spaceOut ils = - let (fs, ils') = unstack ils - contents = unMany ils' - left = case viewl contents of - (Space :< _) -> space - _ -> mempty - right = case viewr contents of - (_ :> Space) -> space - _ -> mempty in - (left, (stack fs $ trimInlines .Many $ contents), right) - -instance Modifiable Blocks where - modifier blks = case viewl (unMany blks) of - (x :< xs) | Seq.null xs -> case x of - (BlockQuote _) -> Modifier blockQuote - -- (Div attr _) -> AttrModifier divWith attr - _ -> NullModifier - _ -> NullModifier - - innards blks = case viewl (unMany blks) of - (x :< xs) | Seq.null xs -> case x of - (BlockQuote lst) -> fromList lst - -- (Div attr lst) -> fromList lst - _ -> blks - _ -> blks - - spaceOut blks = (mempty, blks, mempty) - - getL ils = case viewl $ unMany ils of - (s :< sq) -> (singleton s, Many sq) - _ -> (mempty, ils) - - getR ils = case viewr $ unMany ils of - (sq :> s) -> (Many sq, singleton s) - _ -> (ils, mempty) - - -unstack :: (Modifiable a) => a -> ([Modifier a], a) -unstack ms = case modifier ms of - NullModifier -> ([], ms) - _ -> (f : fs, ms') where - f = modifier ms - (fs, ms') = unstack $ innards ms - -stack :: (Monoid a, Modifiable a) => [Modifier a] -> a -> a -stack [] ms = ms -stack (NullModifier : fs) ms = stack fs ms -stack ((Modifier f) : fs) ms = - if isEmpty ms - then stack fs ms - else f $ stack fs ms -stack ((AttrModifier f attr) : fs) ms = f attr $ stack fs ms - -isEmpty :: (Monoid a, Eq a) => a -> Bool -isEmpty x = x == mempty - - -combine :: (Monoid a, Modifiable a, Eq a) => a -> a -> a -combine x y = - let (xs', x') = getR x - (y', ys') = getL y - in - xs' <> (combineSingleton x' y') <> ys' - -isAttrModifier :: Modifier a -> Bool -isAttrModifier (AttrModifier _ _) = True -isAttrModifier _ = False - -combineSingleton :: (Monoid a, Modifiable a, Eq a) => a -> a -> a -combineSingleton x y = - let (xfs, xs) = unstack x - (yfs, ys) = unstack y - shared = xfs `intersect` yfs - x_remaining = xfs \\ shared - y_remaining = yfs \\ shared - x_rem_attr = filter isAttrModifier x_remaining - y_rem_attr = filter isAttrModifier y_remaining - in - case null shared of - True | isEmpty xs && isEmpty ys -> - stack (x_rem_attr ++ y_rem_attr) mempty - | isEmpty xs -> - let (sp, y') = spaceOutL y in - (stack x_rem_attr mempty) <> sp <> y' - | isEmpty ys -> - let (x', sp) = spaceOutR x in - x' <> sp <> (stack y_rem_attr mempty) - | otherwise -> - let (x', xsp) = spaceOutR x - (ysp, y') = spaceOutL y - in - x' <> xsp <> ysp <> y' - False -> stack shared $ - combine - (stack x_remaining xs) - (stack y_remaining ys) - -(<+>) :: (Monoid a, Modifiable a, Eq a) => a -> a -> a -x <+> y = combine x y - -concatReduce :: (Monoid a, Modifiable a) => [a] -> a -concatReduce xs = foldl combine mempty xs diff --git a/src/Text/Pandoc/Readers/EPUB.hs b/src/Text/Pandoc/Readers/EPUB.hs index 79aa540f6..07d282708 100644 --- a/src/Text/Pandoc/Readers/EPUB.hs +++ b/src/Text/Pandoc/Readers/EPUB.hs @@ -30,7 +30,7 @@ import Control.Monad (guard, liftM, when) import Data.List (isPrefixOf, isInfixOf) import Data.Maybe (mapMaybe, fromMaybe) import qualified Data.Map as M (Map, lookup, fromList, elems) -import Control.DeepSeq.Generics (deepseq, NFData) +import Control.DeepSeq (deepseq, NFData) import Debug.Trace (trace) diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index a34e2fb5c..959a2d16f 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -43,7 +43,7 @@ import Text.HTML.TagSoup.Match import Text.Pandoc.Definition import qualified Text.Pandoc.Builder as B import Text.Pandoc.Builder (Blocks, Inlines, trimInlines, HasMeta(..)) -import Text.Pandoc.Shared ( extractSpaces, renderTags' +import Text.Pandoc.Shared ( extractSpaces, renderTags', addMetaField , escapeURI, safeRead, mapLeft ) import Text.Pandoc.Options (ReaderOptions(readerParseRaw, readerTrace) , Extension (Ext_epub_html_exts, @@ -52,9 +52,9 @@ import Text.Pandoc.Parsing hiding ((<|>)) import Text.Pandoc.Walk import qualified Data.Map as M import Data.Maybe ( fromMaybe, isJust) -import Data.List ( intercalate, isInfixOf, isPrefixOf, isSuffixOf ) +import Data.List ( intercalate, isInfixOf, isPrefixOf ) import Data.Char ( isDigit ) -import Control.Monad ( liftM, guard, when, mzero, void, unless ) +import Control.Monad ( guard, when, mzero, void, unless ) import Control.Arrow ((***)) import Control.Applicative ( (<|>) ) import Data.Monoid (First (..)) @@ -63,12 +63,12 @@ import Debug.Trace (trace) import Text.TeXMath (readMathML, writeTeX) import Data.Default (Default (..), def) import Control.Monad.Reader (Reader,ask, asks, local, runReader) -import Network.URI (isURI) +import Network.URI (URI, parseURIReference, nonStrictRelativeTo) import Text.Pandoc.Error import Text.Pandoc.CSS (foldOrElse, pickStyleAttrProps) import Text.Pandoc.Compat.Monoid ((<>)) import Text.Parsec.Error - +import qualified Data.Set as Set -- | Convert HTML-formatted string to 'Pandoc' document. readHtml :: ReaderOptions -- ^ Reader options @@ -77,7 +77,7 @@ readHtml :: ReaderOptions -- ^ Reader options readHtml opts inp = mapLeft (ParseFailure . getError) . flip runReader def $ runParserT parseDoc - (HTMLState def{ stateOptions = opts } [] Nothing [] M.empty) + (HTMLState def{ stateOptions = opts } [] Nothing Set.empty M.empty) "source" tags where tags = stripPrefixes . canonicalizeTags $ parseTagsOptions parseOptions{ optTagPosition = True } inp @@ -103,8 +103,8 @@ data HTMLState = HTMLState { parserState :: ParserState, noteTable :: [(String, Blocks)], - baseHref :: Maybe String, - identifiers :: [String], + baseHref :: Maybe URI, + identifiers :: Set.Set String, headerMap :: M.Map Inlines String } @@ -137,19 +137,17 @@ pHead = pInTags "head" $ pTitle <|> pMetaTag <|> pBaseTag <|> (mempty <$ pAnyTag then return mempty else do let content = fromAttrib "content" mt - updateState $ B.setMeta name (B.text content) + updateState $ \s -> + let ps = parserState s in + s{ parserState = ps{ + stateMeta = addMetaField name (B.text content) + (stateMeta ps) } } return mempty pBaseTag = do bt <- pSatisfy (~== TagOpen "base" []) - let baseH = fromAttrib "href" bt - if null baseH - then return mempty - else do - let baseH' = case reverse baseH of - '/':_ -> baseH - _ -> baseH ++ "/" - updateState $ \st -> st{ baseHref = Just baseH' } - return mempty + updateState $ \st -> st{ baseHref = + parseURIReference $ fromAttrib "href" bt } + return mempty block :: TagParser Blocks block = do @@ -441,6 +439,7 @@ pTable = try $ do -- fail on empty table guard $ not $ null head' && null rows let isSinglePlain x = case B.toList x of + [] -> True [Plain _] -> True _ -> False let isSimple = all isSinglePlain $ concat (head':rows) @@ -605,9 +604,9 @@ pLink = try $ do tag <- pSatisfy $ tagOpenLit "a" (const True) mbBaseHref <- baseHref <$> getState let url' = fromAttrib "href" tag - let url = case (isURI url', mbBaseHref) of - (False, Just h) -> h ++ url' - _ -> url' + let url = case (parseURIReference url', mbBaseHref) of + (Just rel, Just bs) -> show (rel `nonStrictRelativeTo` bs) + _ -> url' let title = fromAttrib "title" tag let uid = fromAttrib "id" tag let cls = words $ fromAttrib "class" tag @@ -619,9 +618,9 @@ pImage = do tag <- pSelfClosing (=="img") (isJust . lookup "src") mbBaseHref <- baseHref <$> getState let url' = fromAttrib "src" tag - let url = case (isURI url', mbBaseHref) of - (False, Just h) -> h ++ url' - _ -> url' + let url = case (parseURIReference url', mbBaseHref) of + (Just rel, Just bs) -> show (rel `nonStrictRelativeTo` bs) + _ -> url' let title = fromAttrib "title" tag let alt = fromAttrib "alt" tag let uid = fromAttrib "id" tag @@ -925,14 +924,45 @@ htmlInBalanced :: (Monad m) => (Tag String -> Bool) -> ParserT String st m String htmlInBalanced f = try $ do - (TagOpen t _, tag) <- htmlTag f - guard $ not $ "/>" `isSuffixOf` tag -- not a self-closing tag - let stopper = htmlTag (~== TagClose t) - let anytag = snd <$> htmlTag (const True) - contents <- many $ notFollowedBy' stopper >> - (htmlInBalanced f <|> anytag <|> count 1 anyChar) - endtag <- liftM snd stopper - return $ tag ++ concat contents ++ endtag + lookAhead (char '<') + inp <- getInput + let ts = canonicalizeTags $ + parseTagsOptions parseOptions{ optTagWarning = True, + optTagPosition = True } inp + case ts of + (TagPosition sr sc : t@(TagOpen tn _) : rest) -> do + guard $ f t + guard $ not $ hasTagWarning (t : take 1 rest) + case htmlInBalanced' tn (t:rest) of + [] -> mzero + xs -> case reverse xs of + (TagClose _ : TagPosition er ec : _) -> do + let ls = er - sr + let cs = ec - sc + lscontents <- concat <$> count ls anyLine + cscontents <- count cs anyChar + (_,closetag) <- htmlTag (~== TagClose tn) + return (lscontents ++ cscontents ++ closetag) + _ -> mzero + _ -> mzero + +htmlInBalanced' :: String + -> [Tag String] + -> [Tag String] +htmlInBalanced' tagname ts = fromMaybe [] $ go 0 ts + where go :: Int -> [Tag String] -> Maybe [Tag String] + go n (t@(TagOpen tn' _):rest) | tn' == tagname = + (t :) <$> go (n + 1) rest + go 1 (t@(TagClose tn'):_) | tn' == tagname = + return [t] + go n (t@(TagClose tn'):rest) | tn' == tagname = + (t :) <$> go (n - 1) rest + go n (t:ts') = (t :) <$> go n ts' + go _ [] = mzero + +hasTagWarning :: [Tag String] -> Bool +hasTagWarning (TagWarning _:_) = True +hasTagWarning _ = False -- | Matches a tag meeting a certain condition. htmlTag :: Monad m @@ -941,8 +971,6 @@ htmlTag :: Monad m htmlTag f = try $ do lookAhead (char '<') inp <- getInput - let hasTagWarning (TagWarning _:_) = True - hasTagWarning _ = False let (next : rest) = canonicalizeTags $ parseTagsOptions parseOptions{ optTagWarning = True } inp guard $ f next diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index d2e8d9d17..2be55c9da 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -166,10 +166,18 @@ mathInline :: LP String -> LP Inlines mathInline p = math <$> (try p >>= applyMacros') mathChars :: LP String -mathChars = (concat <$>) $ - many $ - many1 (satisfy (\c -> c /= '$' && c /='\\')) - <|> (\c -> ['\\',c]) <$> try (char '\\' *> anyChar) +mathChars = + concat <$> many (escapedChar + <|> (snd <$> withRaw braced) + <|> many1 (satisfy isOrdChar)) + where escapedChar = try $ do char '\\' + c <- anyChar + return ['\\',c] + isOrdChar '$' = False + isOrdChar '{' = False + isOrdChar '}' = False + isOrdChar '\\' = False + isOrdChar _ = True quoted' :: (Inlines -> Inlines) -> LP String -> LP () -> LP Inlines quoted' f starter ender = do @@ -179,10 +187,11 @@ quoted' f starter ender = do then do ils <- many (notFollowedBy ender >> inline) (ender >> return (f (mconcat ils))) <|> - lit (case startchs of - "``" -> "“" - "`" -> "‘" - _ -> startchs) + (<> mconcat ils) <$> + lit (case startchs of + "``" -> "“" + "`" -> "‘" + _ -> startchs) else lit startchs doubleQuote :: LP Inlines @@ -421,7 +430,8 @@ inlineCommand = try $ do else if parseRaw then return $ rawInline "latex" rawcommand else return mempty - lookupListDefault mzero [name',name] inlineCommands + (lookupListDefault mzero [name',name] inlineCommands <* + optional (try (string "{}"))) <|> raw unlessParseRaw :: LP () @@ -434,6 +444,7 @@ isBlockCommand s = s `M.member` blockCommands inlineEnvironments :: M.Map String (LP Inlines) inlineEnvironments = M.fromList [ ("displaymath", mathEnv id Nothing "displaymath") + , ("math", math <$> verbEnv "math") , ("equation", mathEnv id Nothing "equation") , ("equation*", mathEnv id Nothing "equation*") , ("gather", mathEnv id (Just "gathered") "gather") diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 77c3a1016..b5d175453 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -36,7 +36,7 @@ import Data.List ( transpose, sortBy, findIndex, intersperse, intercalate ) import qualified Data.Map as M import Data.Scientific (coefficient, base10Exponent) import Data.Ord ( comparing ) -import Data.Char ( isSpace, isAlphaNum, toLower ) +import Data.Char ( isSpace, isAlphaNum, toLower, isPunctuation ) import Data.Maybe import Text.Pandoc.Definition import Text.Pandoc.Emoji (emojis) @@ -61,7 +61,6 @@ import Text.Pandoc.Readers.HTML ( htmlTag, htmlInBalanced, isInlineTag, isBlockT import Control.Monad import System.FilePath (takeExtension, addExtension) import Text.HTML.TagSoup -import Text.HTML.TagSoup.Match (tagOpen) import qualified Data.Set as Set import Text.Printf (printf) import Debug.Trace (trace) @@ -1052,12 +1051,11 @@ strictHtmlBlock :: MarkdownParser String strictHtmlBlock = htmlInBalanced (not . isInlineTag) rawVerbatimBlock :: MarkdownParser String -rawVerbatimBlock = try $ do - (TagOpen tag _, open) <- htmlTag (tagOpen (flip elem - ["pre", "style", "script"]) - (const True)) - contents <- manyTill anyChar (htmlTag (~== TagClose tag)) - return $ open ++ contents ++ renderTags' [TagClose tag] +rawVerbatimBlock = htmlInBalanced isVerbTag + where isVerbTag (TagOpen "pre" _) = True + isVerbTag (TagOpen "style" _) = True + isVerbTag (TagOpen "script" _) = True + isVerbTag _ = False rawTeXBlock :: MarkdownParser (F Blocks) rawTeXBlock = do @@ -1356,16 +1354,18 @@ pipeTable = try $ do nonindentSpaces lookAhead nonspaceChar (heads,(aligns, seplengths)) <- (,) <$> pipeTableRow <*> pipeBreak + let heads' = take (length aligns) <$> heads lines' <- many pipeTableRow + let lines'' = map (take (length aligns) <$>) lines' let maxlength = maximum $ - map (\x -> length . stringify $ runF x def) (heads : lines') + map (\x -> length . stringify $ runF x def) (heads' : lines'') numColumns <- getOption readerColumns let widths = if maxlength > numColumns then map (\len -> fromIntegral (len + 1) / fromIntegral numColumns) seplengths else replicate (length aligns) 0.0 - return $ (aligns, widths, heads, sequence lines') + return $ (aligns, widths, heads', sequence lines'') sepPipe :: MarkdownParser () sepPipe = try $ do @@ -1374,25 +1374,27 @@ sepPipe = try $ do -- parse a row, also returning probable alignments for org-table cells pipeTableRow :: MarkdownParser (F [Blocks]) -pipeTableRow = do +pipeTableRow = try $ do + scanForPipe skipMany spaceChar openPipe <- (True <$ char '|') <|> return False - let cell = mconcat <$> - many (notFollowedBy (blankline <|> char '|') >> inline) - first <- cell - rest <- many $ sepPipe *> cell + -- split into cells + let chunk = void (code <|> rawHtmlInline <|> escapedChar <|> rawLaTeXInline') + <|> void (noneOf "|\n\r") + let cellContents = ((trim . snd) <$> withRaw (many chunk)) >>= + parseFromString pipeTableCell + cells <- cellContents `sepEndBy1` (char '|') -- surrounding pipes needed for a one-column table: - guard $ not (null rest && not openPipe) - optional (char '|') + guard $ not (length cells == 1 && not openPipe) blankline - let cells = sequence (first:rest) - return $ do - cells' <- cells - return $ map - (\ils -> - case trimInlines ils of - ils' | B.isNull ils' -> mempty - | otherwise -> B.plain $ ils') cells' + return $ sequence cells + +pipeTableCell :: MarkdownParser (F Blocks) +pipeTableCell = do + result <- many inline + if null result + then return mempty + else return $ B.plain . mconcat <$> sequence result pipeTableHeaderPart :: Parser [Char] st (Alignment, Int) pipeTableHeaderPart = try $ do @@ -1554,7 +1556,7 @@ math :: MarkdownParser (F Inlines) math = (return . B.displayMath <$> (mathDisplay >>= applyMacros')) <|> (return . B.math <$> (mathInline >>= applyMacros')) <+?> ((getOption readerSmart >>= guard) *> (return <$> apostrophe) - <* notFollowedBy space) + <* notFollowedBy (space <|> satisfy isPunctuation)) -- Parses material enclosed in *s, **s, _s, or __s. -- Designed to avoid backtracking. diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs index d29ec50e7..950497992 100644 --- a/src/Text/Pandoc/Readers/MediaWiki.hs +++ b/src/Text/Pandoc/Readers/MediaWiki.hs @@ -52,6 +52,7 @@ import Text.HTML.TagSoup import Data.Sequence (viewl, ViewL(..), (<|)) import qualified Data.Foldable as F import qualified Data.Map as M +import qualified Data.Set as Set import Data.Char (isDigit, isSpace) import Data.Maybe (fromMaybe) import Text.Printf (printf) @@ -69,7 +70,7 @@ readMediaWiki opts s = , mwNextLinkNumber = 1 , mwCategoryLinks = [] , mwHeaderMap = M.empty - , mwIdentifierList = [] + , mwIdentifierList = Set.empty } (s ++ "\n") @@ -78,7 +79,7 @@ data MWState = MWState { mwOptions :: ReaderOptions , mwNextLinkNumber :: Int , mwCategoryLinks :: [Inlines] , mwHeaderMap :: M.Map Inlines String - , mwIdentifierList :: [String] + , mwIdentifierList :: Set.Set String } type MWParser = Parser [Char] MWState diff --git a/src/Text/Pandoc/Readers/Odt/ContentReader.hs b/src/Text/Pandoc/Readers/Odt/ContentReader.hs index 1f1c57646..8c475eefc 100644 --- a/src/Text/Pandoc/Readers/Odt/ContentReader.hs +++ b/src/Text/Pandoc/Readers/Odt/ContentReader.hs @@ -61,6 +61,7 @@ import Text.Pandoc.Readers.Odt.Generic.XMLConverter import Text.Pandoc.Readers.Odt.Generic.Fallible import Text.Pandoc.Readers.Odt.Generic.Utils +import qualified Data.Set as Set -------------------------------------------------------------------------------- -- State @@ -221,7 +222,7 @@ getPrettyAnchor = proc (baseIdent, uglyAnchor) -> do getHeaderAnchor :: OdtReaderSafe Inlines Anchor getHeaderAnchor = proc title -> do state <- getExtraState -< () - let anchor = uniqueIdent (toList title) (usedAnchors state) + let anchor = uniqueIdent (toList title) (Set.fromList $ usedAnchors state) modifyExtraState (putPrettyAnchor anchor anchor) -<< anchor diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index c7906618c..7dd611be3 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -2,7 +2,7 @@ {-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE MultiParamTypeClasses, FlexibleContexts, FlexibleInstances #-} {- -Copyright (C) 2014-2015 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> +Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -21,7 +21,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.Org - Copyright : Copyright (C) 2014-2015 Albert Krewinkel + Copyright : Copyright (C) 2014-2016 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> @@ -53,6 +53,7 @@ import Data.Char (isAlphaNum, toLower) import Data.Default import Data.List (intersperse, isPrefixOf, isSuffixOf) import qualified Data.Map as M +import qualified Data.Set as Set import Data.Maybe (fromMaybe, isJust) import Network.HTTP (urlEncode) @@ -144,7 +145,7 @@ data OrgParserState = OrgParserState , orgStateMeta' :: F Meta , orgStateNotes' :: OrgNoteTable , orgStateParserContext :: ParserContext - , orgStateIdentifiers :: [String] + , orgStateIdentifiers :: Set.Set String , orgStateHeaderMap :: M.Map Inlines String } @@ -186,7 +187,7 @@ defaultOrgParserState = OrgParserState , orgStateMeta' = return nullMeta , orgStateNotes' = [] , orgStateParserContext = NullState - , orgStateIdentifiers = [] + , orgStateIdentifiers = Set.empty , orgStateHeaderMap = M.empty } @@ -628,7 +629,7 @@ figure = try $ do maybeNam <- lookupBlockAttribute "name" guard $ isJust maybeCap || isJust maybeNam return ( fromMaybe mempty maybeCap - , maybe mempty withFigPrefix maybeNam ) + , withFigPrefix $ fromMaybe mempty maybeNam ) withFigPrefix cs = if "fig:" `isPrefixOf` cs then cs @@ -1238,37 +1239,37 @@ applyCustomLinkFormat link = do formatter <- M.lookup linkType <$> asksF orgStateLinkFormatters return $ maybe link ($ drop 1 rest) formatter --- TODO: might be a lot smarter/cleaner to use parsec and ADTs for this kind --- of parsing. +-- | Take a link and return a function which produces new inlines when given +-- description inlines. linkToInlinesF :: String -> Inlines -> F Inlines -linkToInlinesF s = +linkToInlinesF linkStr = + case linkStr of + "" -> pure . B.link mempty "" -- wiki link (empty by convention) + ('#':_) -> pure . B.link linkStr "" -- document-local fraction + _ -> case cleanLinkString linkStr of + (Just cleanedLink) -> if isImageFilename cleanedLink + then const . pure $ B.image cleanedLink "" "" + else pure . B.link cleanedLink "" + Nothing -> internalLink linkStr -- other internal link + +-- | Cleanup and canonicalize a string describing a link. Return @Nothing@ if +-- the string does not appear to be a link. +cleanLinkString :: String -> Maybe String +cleanLinkString s = case s of - "" -> pure . B.link "" "" - ('#':_) -> pure . B.link s "" - _ | isImageFilename s -> const . pure $ B.image s "" "" - _ | isFileLink s -> pure . B.link (dropLinkType s) "" - _ | isUri s -> pure . B.link s "" - _ | isAbsoluteFilePath s -> pure . B.link ("file://" ++ s) "" - _ | isRelativeFilePath s -> pure . B.link s "" - _ -> internalLink s - -isFileLink :: String -> Bool -isFileLink s = ("file:" `isPrefixOf` s) && not ("file://" `isPrefixOf` s) - -dropLinkType :: String -> String -dropLinkType = tail . snd . break (== ':') - -isRelativeFilePath :: String -> Bool -isRelativeFilePath s = (("./" `isPrefixOf` s) || ("../" `isPrefixOf` s)) && - (':' `notElem` s) - -isUri :: String -> Bool -isUri s = let (scheme, path) = break (== ':') s - in all (\c -> isAlphaNum c || c `elem` (".-" :: String)) scheme - && not (null path) - -isAbsoluteFilePath :: String -> Bool -isAbsoluteFilePath = ('/' ==) . head + '/':_ -> Just $ "file://" ++ s -- absolute path + '.':'/':_ -> Just s -- relative path + '.':'.':'/':_ -> Just s -- relative path + -- Relative path or URL (file schema) + 'f':'i':'l':'e':':':s' -> Just $ if ("//" `isPrefixOf` s') then s else s' + _ | isUrl s -> Just s -- URL + _ -> Nothing + where + isUrl :: String -> Bool + isUrl cs = + let (scheme, path) = break (== ':') cs + in all (\c -> isAlphaNum c || c `elem` (".-"::String)) scheme + && not (null path) isImageFilename :: String -> Bool isImageFilename filename = diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index dd1d289a3..6f64540f8 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -583,7 +583,18 @@ code2 = do -- | Html / CSS attributes attributes :: Parser [Char] ParserState Attr -attributes = (foldl (flip ($)) ("",[],[])) `fmap` many attribute +attributes = (foldl (flip ($)) ("",[],[])) <$> + try (do special <- option id specialAttribute + attrs <- many attribute + return (special : attrs)) + +specialAttribute :: Parser [Char] ParserState (Attr -> Attr) +specialAttribute = do + alignStr <- ("center" <$ char '=') <|> + ("justify" <$ try (string "<>")) <|> + ("right" <$ char '>') <|> + ("left" <$ char '<') + return $ addStyle ("text-align:" ++ alignStr) attribute :: Parser [Char] ParserState (Attr -> Attr) attribute = classIdAttr <|> styleAttr <|> langAttr @@ -602,7 +613,13 @@ classIdAttr = try $ do -- (class class #id) styleAttr :: Parser [Char] ParserState (Attr -> Attr) styleAttr = do style <- try $ enclosed (char '{') (char '}') anyChar' - return $ \(id',classes,keyvals) -> (id',classes,("style",style):keyvals) + return $ addStyle style + +addStyle :: String -> Attr -> Attr +addStyle style (id',classes,keyvals) = + (id',classes,keyvals') + where keyvals' = ("style", style') : [(k,v) | (k,v) <- keyvals, k /= "style"] + style' = style ++ ";" ++ concat [v | ("style",v) <- keyvals] langAttr :: Parser [Char] ParserState (Attr -> Attr) langAttr = do diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index aa07c81e1..075d76847 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -706,14 +706,14 @@ headerLtEq _ _ = False -- | Generate a unique identifier from a list of inlines. -- Second argument is a list of already used identifiers. -uniqueIdent :: [Inline] -> [String] -> String +uniqueIdent :: [Inline] -> Set.Set String -> String uniqueIdent title' usedIdents = let baseIdent = case inlineListToIdentifier title' of "" -> "section" x -> x numIdent n = baseIdent ++ "-" ++ show n - in if baseIdent `elem` usedIdents - then case find (\x -> numIdent x `notElem` usedIdents) ([1..60000] :: [Int]) of + in if baseIdent `Set.member` usedIdents + then case find (\x -> not $ numIdent x `Set.member` usedIdents) ([1..60000] :: [Int]) of Just x -> numIdent x Nothing -> baseIdent -- if we have more than 60,000, allow repeats else baseIdent @@ -892,8 +892,10 @@ readDataFileUTF8 userDir fname = parseURIReference' :: String -> Maybe URI parseURIReference' s = case parseURIReference s of - Just u | length (uriScheme u) > 2 -> Just u - _ -> Nothing + Just u + | length (uriScheme u) > 2 -> Just u + | null (uriScheme u) -> Just u -- protocol-relative + _ -> Nothing -- | Fetch an image or other item from the local filesystem or the net. -- Returns raw content and maybe mime type. diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs index 498e2d10f..8d54d62bd 100644 --- a/src/Text/Pandoc/Writers/ConTeXt.hs +++ b/src/Text/Pandoc/Writers/ConTeXt.hs @@ -279,7 +279,17 @@ blockListToConTeXt lst = liftM vcat $ mapM blockToConTeXt lst -- | Convert list of inline elements to ConTeXt. inlineListToConTeXt :: [Inline] -- ^ Inlines to convert -> State WriterState Doc -inlineListToConTeXt lst = liftM hcat $ mapM inlineToConTeXt lst +inlineListToConTeXt lst = liftM hcat $ mapM inlineToConTeXt $ addStruts lst + -- We add a \strut after a line break that precedes a space, + -- or the space gets swallowed + where addStruts (LineBreak : s : xs) | isSpacey s = + LineBreak : RawInline (Format "context") "\\strut " : s : + addStruts xs + addStruts (x:xs) = x : addStruts xs + addStruts [] = [] + isSpacey Space = True + isSpacey (Str ('\160':_)) = True + isSpacey _ = False -- | Convert inline element to ConTeXt inlineToConTeXt :: Inline -- ^ Inline to convert diff --git a/src/Text/Pandoc/Writers/Custom.hs b/src/Text/Pandoc/Writers/Custom.hs index 9671fc05b..d69eaaa64 100644 --- a/src/Text/Pandoc/Writers/Custom.hs +++ b/src/Text/Pandoc/Writers/Custom.hs @@ -222,8 +222,8 @@ blockToCustom _ Null = return "" blockToCustom lua (Plain inlines) = callfunc lua "Plain" inlines -blockToCustom lua (Para [Image _ txt (src,tit)]) = - callfunc lua "CaptionedImage" src tit txt +blockToCustom lua (Para [Image attr txt (src,tit)]) = + callfunc lua "CaptionedImage" src tit txt (attrToMap attr) blockToCustom lua (Para inlines) = callfunc lua "Para" inlines diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 827d32620..a841e1b66 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -34,6 +34,7 @@ import qualified Data.ByteString as B import qualified Data.ByteString.Lazy as BL import qualified Data.ByteString.Lazy.Char8 as BL8 import qualified Data.Map as M +import qualified Data.Set as Set import qualified Text.Pandoc.UTF8 as UTF8 import Codec.Archive.Zip import Data.Time.Clock.POSIX @@ -95,7 +96,7 @@ data WriterState = WriterState{ stTextProperties :: [Element] , stParaProperties :: [Element] , stFootnotes :: [Element] - , stSectionIds :: [String] + , stSectionIds :: Set.Set String , stExternalLinks :: M.Map String String , stImages :: M.Map FilePath (String, String, Maybe MimeType, Element, B.ByteString) , stListLevel :: Int @@ -117,7 +118,7 @@ defaultWriterState = WriterState{ stTextProperties = [] , stParaProperties = [] , stFootnotes = defaultFootnotes - , stSectionIds = [] + , stSectionIds = Set.empty , stExternalLinks = M.empty , stImages = M.empty , stListLevel = -1 @@ -742,7 +743,7 @@ blockToOpenXML opts (Header lev (ident,_,_) lst) = do let bookmarkName = if null ident then uniqueIdent lst usedIdents else ident - modify $ \s -> s{ stSectionIds = bookmarkName : stSectionIds s } + modify $ \s -> s{ stSectionIds = Set.insert bookmarkName $ stSectionIds s } id' <- getUniqueId let bookmarkStart = mknode "w:bookmarkStart" [("w:id", id') ,("w:name",bookmarkName)] () @@ -1102,7 +1103,7 @@ inlineToOpenXML opts (Link _ txt (src,_)) = do M.insert src i extlinks } return i return [ mknode "w:hyperlink" [("r:id",id')] contents ] -inlineToOpenXML opts (Image attr alt (src, tit)) = do +inlineToOpenXML opts (Image attr alt (src, _)) = do -- first, check to see if we've already done this image pageWidth <- gets stPrintWidth imgs <- gets stImages @@ -1153,7 +1154,7 @@ inlineToOpenXML opts (Image attr alt (src, tit)) = do mknode "wp:inline" [] [ mknode "wp:extent" [("cx",show xemu),("cy",show yemu)] () , mknode "wp:effectExtent" [("b","0"),("l","0"),("r","0"),("t","0")] () - , mknode "wp:docPr" [("descr",tit),("id","1"),("name","Picture")] () + , mknode "wp:docPr" [("descr",stringify alt),("id","1"),("name","Picture")] () , graphic ] let imgext = case mt >>= extensionFromMimeType of Just x -> '.':x diff --git a/src/Text/Pandoc/Writers/DokuWiki.hs b/src/Text/Pandoc/Writers/DokuWiki.hs index f1088b158..56e2b9027 100644 --- a/src/Text/Pandoc/Writers/DokuWiki.hs +++ b/src/Text/Pandoc/Writers/DokuWiki.hs @@ -452,8 +452,11 @@ inlineToDokuWiki _ (Code _ str) = inlineToDokuWiki _ (Str str) = return $ escapeString str -inlineToDokuWiki _ (Math _ str) = return $ "$" ++ str ++ "$" +inlineToDokuWiki _ (Math mathType str) = return $ delim ++ str ++ delim -- note: str should NOT be escaped + where delim = case mathType of + DisplayMath -> "$$" + InlineMath -> "$" inlineToDokuWiki _ (RawInline f str) | f == Format "dokuwiki" = return str diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index 64f94f41f..804dbb926 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -31,6 +31,7 @@ Conversion of 'Pandoc' documents to EPUB. module Text.Pandoc.Writers.EPUB ( writeEPUB ) where import Data.IORef ( IORef, newIORef, readIORef, modifyIORef ) import qualified Data.Map as M +import qualified Data.Set as Set import Data.Maybe ( fromMaybe, catMaybes ) import Data.List ( isPrefixOf, isInfixOf, intercalate ) import System.Environment ( getEnv ) @@ -916,13 +917,13 @@ showChapter = printf "ch%03d.xhtml" -- Add identifiers to any headers without them. addIdentifiers :: [Block] -> [Block] -addIdentifiers bs = evalState (mapM go bs) [] +addIdentifiers bs = evalState (mapM go bs) Set.empty where go (Header n (ident,classes,kvs) ils) = do ids <- get let ident' = if null ident then uniqueIdent ils ids else ident - put $ ident' : ids + modify $ Set.insert ident' return $ Header n (ident',classes,kvs) ils go x = return x diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index 73a8906c3..c5b6a6db2 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -140,40 +140,38 @@ pandocToHtml opts (Pandoc meta blocks) = do st <- get let notes = reverse (stNotes st) let thebody = blocks' >> footnoteSection opts notes - let math = if stMath st - then case writerHTMLMathMethod opts of - LaTeXMathML (Just url) -> - H.script ! A.src (toValue url) - ! A.type_ "text/javascript" - $ mempty - MathML (Just url) -> - H.script ! A.src (toValue url) - ! A.type_ "text/javascript" - $ mempty - MathJax url -> - H.script ! A.src (toValue url) - ! A.type_ "text/javascript" - $ case writerSlideVariant opts of - SlideousSlides -> - preEscapedString - "MathJax.Hub.Queue([\"Typeset\",MathJax.Hub]);" - _ -> mempty - JsMath (Just url) -> - H.script ! A.src (toValue url) - ! A.type_ "text/javascript" - $ mempty - KaTeX js css -> - (H.script ! A.src (toValue js) $ mempty) <> - (H.link ! A.rel "stylesheet" ! A.href (toValue css)) <> - (H.script ! A.type_ "text/javascript" $ toHtml renderKaTeX) - _ -> case lookup "mathml-script" (writerVariables opts) of - Just s | not (writerHtml5 opts) -> - H.script ! A.type_ "text/javascript" - $ preEscapedString - ("/*<![CDATA[*/\n" ++ s ++ "/*]]>*/\n") - | otherwise -> mempty - Nothing -> mempty - else mempty + let math = case writerHTMLMathMethod opts of + LaTeXMathML (Just url) -> + H.script ! A.src (toValue url) + ! A.type_ "text/javascript" + $ mempty + MathML (Just url) -> + H.script ! A.src (toValue url) + ! A.type_ "text/javascript" + $ mempty + MathJax url -> + H.script ! A.src (toValue url) + ! A.type_ "text/javascript" + $ case writerSlideVariant opts of + SlideousSlides -> + preEscapedString + "MathJax.Hub.Queue([\"Typeset\",MathJax.Hub]);" + _ -> mempty + JsMath (Just url) -> + H.script ! A.src (toValue url) + ! A.type_ "text/javascript" + $ mempty + KaTeX js css -> + (H.script ! A.src (toValue js) $ mempty) <> + (H.link ! A.rel "stylesheet" ! A.href (toValue css)) <> + (H.script ! A.type_ "text/javascript" $ toHtml renderKaTeX) + _ -> case lookup "mathml-script" (writerVariables opts) of + Just s | not (writerHtml5 opts) -> + H.script ! A.type_ "text/javascript" + $ preEscapedString + ("/*<![CDATA[*/\n" ++ s ++ "/*]]>*/\n") + | otherwise -> mempty + Nothing -> mempty let context = (if stHighlighting st then defField "highlighting-css" (styleToCss $ writerHighlightStyle opts) @@ -647,7 +645,7 @@ alignmentToString alignment = case alignment of AlignLeft -> "left" AlignRight -> "right" AlignCenter -> "center" - AlignDefault -> "left" + AlignDefault -> "" tableItemToHtml :: WriterOptions -> (Html -> Html) @@ -660,7 +658,10 @@ tableItemToHtml opts tag' align' item = do let attribs = if writerHtml5 opts then A.style (toValue $ "text-align: " ++ alignStr ++ ";") else A.align (toValue alignStr) - return $ (tag' ! attribs $ contents) >> nl opts + let tag'' = if null alignStr + then tag' + else tag' ! attribs + return $ (tag'' $ contents) >> nl opts toListItems :: WriterOptions -> [Html] -> [Html] toListItems opts items = map (toListItem opts) items ++ [nl opts] diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index 7b2911bcf..0f47132b3 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -113,12 +113,7 @@ pandocToLaTeX options (Pandoc meta blocks) = do (fmap (render colwidth) . inlineListToLaTeX) meta let bookClasses = ["memoir","book","report","scrreprt","scrbook"] - let documentClass = case P.parse (do P.skipMany (P.satisfy (/='\\')) - P.string "\\documentclass" - P.skipMany (P.satisfy (/='{')) - P.char '{' - P.manyTill P.letter (P.char '}')) "template" - template of + let documentClass = case P.parse pDocumentClass "template" template of Right r -> r Left _ -> "" case lookup "documentclass" (writerVariables options) `mplus` @@ -577,26 +572,29 @@ blockToLaTeX (Header level (id',classes,_) lst) = do blockToLaTeX (Table caption aligns widths heads rows) = do headers <- if all null heads then return empty - else ($$ "\\midrule\n") `fmap` - (tableRowToLaTeX True aligns widths) heads + else do + contents <- (tableRowToLaTeX True aligns widths) heads + return ("\\toprule" $$ contents $$ "\\midrule") let endhead = if all null heads then empty else text "\\endhead" + let endfirsthead = if all null heads + then empty + else text "\\endfirsthead" captionText <- inlineListToLaTeX caption let capt = if isEmpty captionText then empty - else text "\\caption" <> braces captionText - <> "\\tabularnewline\n\\toprule\n" - <> headers - <> "\\endfirsthead" + else text "\\caption" <> braces captionText <> "\\tabularnewline" + $$ headers + $$ endfirsthead rows' <- mapM (tableRowToLaTeX False aligns widths) rows let colDescriptors = text $ concat $ map toColDescriptor aligns modify $ \s -> s{ stTable = True } - return $ "\\begin{longtable}[c]" <> + return $ "\\begin{longtable}[]" <> braces ("@{}" <> colDescriptors <> "@{}") -- the @{} removes extra space at beginning and end $$ capt - $$ "\\toprule" + $$ (if all null heads then "\\toprule" else empty) $$ headers $$ endhead $$ vcat rows' @@ -1265,3 +1263,24 @@ commonFromBcp47 x = fromIso $ head x deNote :: Inline -> Inline deNote (Note _) = RawInline (Format "latex") "" deNote x = x + +pDocumentOptions :: P.Parsec String () [String] +pDocumentOptions = do + P.char '[' + opts <- P.sepBy + (P.many $ P.spaces *> P.noneOf (" ,]" :: String) <* P.spaces) + (P.char ',') + P.char ']' + return opts + +pDocumentClass :: P.Parsec String () String +pDocumentClass = + do P.skipMany (P.satisfy (/='\\')) + P.string "\\documentclass" + classOptions <- pDocumentOptions <|> return [] + if ("article" :: String) `elem` classOptions + then return "article" + else do P.skipMany (P.satisfy (/='{')) + P.char '{' + P.manyTill P.letter (P.char '}') + diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index 5a92f3cdf..ce993093c 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -53,6 +53,7 @@ import Data.Yaml (Value(Object,String,Array,Bool,Number)) import qualified Data.HashMap.Strict as H import qualified Data.Vector as V import qualified Data.Text as T +import qualified Data.Set as Set type Notes = [[Block]] type Ref = ([Inline], Target, Attr) @@ -61,11 +62,11 @@ data WriterState = WriterState { stNotes :: Notes , stRefs :: Refs , stRefShortcutable :: Bool , stInList :: Bool - , stIds :: [String] + , stIds :: Set.Set String , stPlain :: Bool } instance Default WriterState where def = WriterState{ stNotes = [], stRefs = [], stRefShortcutable = True, - stInList = False, stIds = [], stPlain = False } + stInList = False, stIds = Set.empty, stPlain = False } -- | Convert Pandoc to Markdown. writeMarkdown :: WriterOptions -> Pandoc -> String @@ -116,7 +117,7 @@ plainTitleBlock tit auths dat = dat <> cr yamlMetadataBlock :: Value -> Doc -yamlMetadataBlock v = "---" $$ (jsonToYaml v) $$ "..." +yamlMetadataBlock v = "---" $$ (jsonToYaml v) $$ "---" jsonToYaml :: Value -> Doc jsonToYaml (Object hashmap) = @@ -364,7 +365,7 @@ blockToMarkdown opts (Header level attr inlines) = do -- so we know whether to print an explicit identifier ids <- gets stIds let autoId = uniqueIdent inlines ids - modify $ \st -> st{ stIds = autoId : ids } + modify $ \st -> st{ stIds = Set.insert autoId ids } let attr' = case attr of ("",[],[]) -> empty (id',[],[]) | isEnabled Ext_auto_identifiers opts diff --git a/src/Text/Pandoc/Writers/Org.hs b/src/Text/Pandoc/Writers/Org.hs index d843d2efd..20086ed19 100644 --- a/src/Text/Pandoc/Writers/Org.hs +++ b/src/Text/Pandoc/Writers/Org.hs @@ -102,6 +102,10 @@ escapeString = escapeStringUsing $ , ('\x2026',"...") ] ++ backslashEscapes "^_" +isRawFormat :: Format -> Bool +isRawFormat f = + f == Format "latex" || f == Format "tex" || f == Format "org" + -- | Convert Pandoc block element to Org. blockToOrg :: Block -- ^ Block element -> State WriterState Doc @@ -129,7 +133,7 @@ blockToOrg (Para inlines) = do blockToOrg (RawBlock "html" str) = return $ blankline $$ "#+BEGIN_HTML" $$ nest 2 (text str) $$ "#+END_HTML" $$ blankline -blockToOrg (RawBlock f str) | f `elem` ["org", "latex", "tex"] = +blockToOrg (RawBlock f str) | isRawFormat f = return $ text str blockToOrg (RawBlock _ _) = return empty blockToOrg HorizontalRule = return $ blankline $$ "--------------" $$ blankline @@ -271,7 +275,8 @@ inlineToOrg (Math t str) = do return $ if t == InlineMath then "$" <> text str <> "$" else "$$" <> text str <> "$$" -inlineToOrg (RawInline f str) | f == "tex" || f == "latex" = return $ text str +inlineToOrg (RawInline f str) | isRawFormat f = + return $ text str inlineToOrg (RawInline _ _) = return empty inlineToOrg (LineBreak) = return (text "\\\\" <> cr) inlineToOrg Space = return space diff --git a/src/Text/Pandoc/Writers/TEI.hs b/src/Text/Pandoc/Writers/TEI.hs new file mode 100644 index 000000000..b9e683ab9 --- /dev/null +++ b/src/Text/Pandoc/Writers/TEI.hs @@ -0,0 +1,320 @@ +{-# LANGUAGE OverloadedStrings, PatternGuards #-} +{- +Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Writers.Docbook + Copyright : Copyright (C) 2006-2015 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Conversion of 'Pandoc' documents to Docbook XML. +-} +module Text.Pandoc.Writers.TEI (writeTEI) where +import Text.Pandoc.Definition +import Text.Pandoc.XML +import Text.Pandoc.Shared +import Text.Pandoc.Writers.Shared +import Text.Pandoc.Options +import Text.Pandoc.Templates (renderTemplate') +import Data.List ( stripPrefix, isPrefixOf, isSuffixOf ) +import Data.Char ( toLower ) +import Text.Pandoc.Highlighting ( languages, languagesByExtension ) +import Text.Pandoc.Pretty +import Text.Pandoc.ImageSize +import qualified Text.Pandoc.Builder as B + +-- | Convert list of authors to a docbook <author> section +authorToTEI :: WriterOptions -> [Inline] -> B.Inlines +authorToTEI opts name' = + let name = render Nothing $ inlinesToTEI opts name' + colwidth = if writerWrapText opts == WrapAuto + then Just $ writerColumns opts + else Nothing + in B.rawInline "tei" $ render colwidth $ + inTagsSimple "author" (text $ escapeStringForXML name) + +-- | Convert Pandoc document to string in Docbook format. +writeTEI :: WriterOptions -> Pandoc -> String +writeTEI opts (Pandoc meta blocks) = + let elements = hierarchicalize blocks + colwidth = if writerWrapText opts == WrapAuto + then Just $ writerColumns opts + else Nothing + render' = render colwidth + opts' = if "/book>" `isSuffixOf` + (trimr $ writerTemplate opts) + then opts{ writerChapters = True } + else opts + startLvl = if writerChapters opts' then 0 else 1 + auths' = map (authorToTEI opts) $ docAuthors meta + meta' = B.setMeta "author" auths' meta + Just metadata = metaToJSON opts + (Just . render colwidth . (vcat . + (map (elementToTEI opts' startLvl)) . hierarchicalize)) + (Just . render colwidth . inlinesToTEI opts') + meta' + main = render' $ vcat (map (elementToTEI opts' startLvl) elements) + context = defField "body" main + $ defField "mathml" (case writerHTMLMathMethod opts of + MathML _ -> True + _ -> False) + $ metadata + in if writerStandalone opts + then renderTemplate' (writerTemplate opts) context + else main + +-- | Convert an Element to TEI. +elementToTEI :: WriterOptions -> Int -> Element -> Doc +elementToTEI opts _ (Blk block) = blockToTEI opts block +elementToTEI opts lvl (Sec _ _num (id',_,_) title elements) = + -- TEI doesn't allow sections with no content, so insert some if needed + let elements' = if null elements + then [Blk (Para [])] + else elements + divType = case lvl of + n | n == 0 -> "chapter" + | n >= 1 && n <= 5 -> "level" ++ show n + | otherwise -> "section" + in inTags True "div" [("type", divType) | not (null id')] $ +-- ("id", writerIdentifierPrefix opts ++ id') | not (null id')] $ + inTagsSimple "head" (inlinesToTEI opts title) $$ + vcat (map (elementToTEI opts (lvl + 1)) elements') + +-- | Convert a list of Pandoc blocks to TEI. +blocksToTEI :: WriterOptions -> [Block] -> Doc +blocksToTEI opts = vcat . map (blockToTEI opts) + +-- | Auxiliary function to convert Plain block to Para. +plainToPara :: Block -> Block +plainToPara (Plain x) = Para x +plainToPara x = x + +-- | Convert a list of pairs of terms and definitions into a TEI +-- list with labels and items. +deflistItemsToTEI :: WriterOptions -> [([Inline],[[Block]])] -> Doc +deflistItemsToTEI opts items = + vcat $ map (\(term, defs) -> deflistItemToTEI opts term defs) items + +-- | Convert a term and a list of blocks into a TEI varlistentry. +deflistItemToTEI :: WriterOptions -> [Inline] -> [[Block]] -> Doc +deflistItemToTEI opts term defs = + let def' = concatMap (map plainToPara) defs + in inTagsIndented "label" (inlinesToTEI opts term) $$ + inTagsIndented "item" (blocksToTEI opts def') + +-- | Convert a list of lists of blocks to a list of TEI list items. +listItemsToTEI :: WriterOptions -> [[Block]] -> Doc +listItemsToTEI opts items = vcat $ map (listItemToTEI opts) items + +-- | Convert a list of blocks into a TEI list item. +listItemToTEI :: WriterOptions -> [Block] -> Doc +listItemToTEI opts item = + inTagsIndented "item" $ blocksToTEI opts $ map plainToPara item + +imageToTEI :: WriterOptions -> Attr -> String -> Doc +imageToTEI _ attr src = selfClosingTag "graphic" $ + ("url", src) : idAndRole attr ++ dims + where + dims = go Width "width" ++ go Height "depth" + go dir dstr = case (dimension dir attr) of + Just a -> [(dstr, show a)] + Nothing -> [] + +-- | Convert a Pandoc block element to TEI. +blockToTEI :: WriterOptions -> Block -> Doc +blockToTEI _ Null = empty +-- Add ids to paragraphs in divs with ids - this is needed for +-- pandoc-citeproc to get link anchors in bibliographies: +blockToTEI opts (Div (ident,_,_) [Para lst]) = + let attribs = [("id", ident) | not (null ident)] in + inTags False "p" attribs $ inlinesToTEI opts lst +blockToTEI opts (Div _ bs) = blocksToTEI opts $ map plainToPara bs +blockToTEI _ (Header _ _ _) = empty -- should not occur after hierarchicalize +-- For TEI simple, text must be within containing block element, so +-- we use plainToPara to ensure that Plain text ends up contained by +-- something. +blockToTEI opts (Plain lst) = blockToTEI opts $ Para lst +-- title beginning with fig: indicates that the image is a figure +--blockToTEI opts (Para [Image attr txt (src,'f':'i':'g':':':_)]) = +-- let alt = inlinesToTEI opts txt +-- capt = if null txt +-- then empty +-- else inTagsSimple "title" alt +-- in inTagsIndented "figure" $ +-- capt $$ +-- (inTagsIndented "mediaobject" $ +-- (inTagsIndented "imageobject" +-- (imageToTEI opts attr src)) $$ +-- inTagsSimple "textobject" (inTagsSimple "phrase" alt)) +blockToTEI opts (Para lst) = + inTags False "p" [] $ inlinesToTEI opts lst +blockToTEI opts (BlockQuote blocks) = + inTagsIndented "quote" $ blocksToTEI opts blocks +blockToTEI _ (CodeBlock (_,classes,_) str) = + text ("<ab type='codeblock " ++ lang ++ "'>") <> cr <> + flush (text (escapeStringForXML str) <> cr <> text "</ab>") + where lang = if null langs + then "" + else escapeStringForXML (head langs) + isLang l = map toLower l `elem` map (map toLower) languages + langsFrom s = if isLang s + then [s] + else languagesByExtension . map toLower $ s + langs = concatMap langsFrom classes +blockToTEI opts (BulletList lst) = + let attribs = [("type", "unordered")] + in inTags True "list" attribs $ listItemsToTEI opts lst +blockToTEI _ (OrderedList _ []) = empty +blockToTEI opts (OrderedList (start, numstyle, _) (first:rest)) = + let attribs = case numstyle of + DefaultStyle -> [] + Decimal -> [("type", "ordered:arabic")] + Example -> [("type", "ordered:arabic")] + UpperAlpha -> [("type", "ordered:upperalpha")] + LowerAlpha -> [("type", "ordered:loweralpha")] + UpperRoman -> [("type", "ordered:upperroman")] + LowerRoman -> [("type", "ordered:lowerroman")] + items = if start == 1 + then listItemsToTEI opts (first:rest) + else (inTags True "item" [("n",show start)] + (blocksToTEI opts $ map plainToPara first)) $$ + listItemsToTEI opts rest + in inTags True "list" attribs items +blockToTEI opts (DefinitionList lst) = + let attribs = [("type", "definition")] + in inTags True "list" attribs $ deflistItemsToTEI opts lst +blockToTEI _ (RawBlock f str) + | f == "tei" = text str -- raw TEI block (should such a thing exist). +-- | f == "html" = text str -- allow html for backwards compatibility + | otherwise = empty +blockToTEI _ HorizontalRule = + selfClosingTag "milestone" [("unit","undefined"), ("type","separator"),("rendition","line")] + +-- | TEI Tables +-- TEI Simple's tables are composed of cells and rows; other +-- table info in the AST is here lossily discard. +blockToTEI opts (Table _ _ _ headers rows) = + let + headers' = tableHeadersToTEI opts headers +-- headers' = if all null headers +-- then return empty +-- else tableRowToTEI opts headers + in + inTags True "table" [] $ + vcat $ [headers'] <> map (tableRowToTEI opts) rows + +tableRowToTEI :: WriterOptions + -> [[Block]] + -> Doc +tableRowToTEI opts cols = + inTagsIndented "row" $ vcat $ map (tableItemToTEI opts) cols + +tableHeadersToTEI :: WriterOptions + -> [[Block]] + -> Doc +tableHeadersToTEI opts cols = + inTags True "row" [("role","label")] $ vcat $ map (tableItemToTEI opts) cols + +tableItemToTEI :: WriterOptions + -> [Block] + -> Doc +tableItemToTEI opts item = + inTags False "cell" [] $ vcat $ map (blockToTEI opts) item + +-- | Convert a list of inline elements to TEI. +inlinesToTEI :: WriterOptions -> [Inline] -> Doc +inlinesToTEI opts lst = hcat $ map (inlineToTEI opts) lst + +-- | Convert an inline element to TEI. +inlineToTEI :: WriterOptions -> Inline -> Doc +inlineToTEI _ (Str str) = text $ escapeStringForXML str +inlineToTEI opts (Emph lst) = + inTags False "hi" [("rendition","simple:italic")] $ inlinesToTEI opts lst +inlineToTEI opts (Strong lst) = + inTags False "hi" [("rendition", "simple:bold")] $ inlinesToTEI opts lst +inlineToTEI opts (Strikeout lst) = + inTags False "hi" [("rendition", "simple:strikethrough")] $ + inlinesToTEI opts lst +inlineToTEI opts (Superscript lst) = + inTags False "hi" [("rendition", "simple:superscript")] $ inlinesToTEI opts lst +inlineToTEI opts (Subscript lst) = + inTags False "hi" [("rendition", "simple:subscript")] $ inlinesToTEI opts lst +inlineToTEI opts (SmallCaps lst) = + inTags False "hi" [("rendition", "simple:smallcaps")] $ + inlinesToTEI opts lst +inlineToTEI opts (Quoted _ lst) = + inTagsSimple "quote" $ inlinesToTEI opts lst +inlineToTEI opts (Cite _ lst) = + inlinesToTEI opts lst +inlineToTEI opts (Span _ ils) = + inlinesToTEI opts ils +inlineToTEI _ (Code _ str) = + inTags False "seg" [("type","code")] $ text (escapeStringForXML str) +-- Distinguish display from inline math by wrapping the former in a "figure." +inlineToTEI _ (Math t str) = + case t of + InlineMath -> inTags False "formula" [("notation","TeX")] $ + text (str) + DisplayMath -> inTags True "figure" [("type","math")] $ + inTags False "formula" [("notation","TeX")] $ text (str) + +inlineToTEI _ (RawInline f x) | f == "tei" = text x + | otherwise = empty +inlineToTEI _ LineBreak = selfClosingTag "lb" [] +inlineToTEI _ Space = space +-- because we use \n for LineBreak, we can't do soft breaks: +inlineToTEI _ SoftBreak = space +inlineToTEI opts (Link attr txt (src, _)) + | Just email <- stripPrefix "mailto:" src = + let emailLink = text $ + escapeStringForXML $ email + in case txt of + [Str s] | escapeURI s == email -> emailLink + _ -> inlinesToTEI opts txt <+> + char '(' <> emailLink <> char ')' + | otherwise = + (if isPrefixOf "#" src + then inTags False "ref" $ ("target", drop 1 src) : idAndRole attr + else inTags False "ref" $ ("target", src) : idAndRole attr ) $ + inlinesToTEI opts txt +inlineToTEI opts (Image attr description (src, tit)) = + let titleDoc = if null tit + then empty + else inTags False "figDesc" [] (text $ escapeStringForXML tit) + imageDesc = if null description + then empty + else inTags False "head" [] (inlinesToTEI opts description) + in inTagsIndented "figure" $ imageDesc $$ + imageToTEI opts attr src $$ titleDoc +inlineToTEI opts (Note contents) = + inTagsIndented "note" $ blocksToTEI opts contents + +idAndRole :: Attr -> [(String, String)] +idAndRole (id',cls,_) = ident ++ role + where + ident = if null id' + then [] + else [("id", id')] + role = if null cls + then [] + else [("role", unwords cls)] + diff --git a/src/Text/Pandoc/Writers/Texinfo.hs b/src/Text/Pandoc/Writers/Texinfo.hs index 1aefaa678..8420704dc 100644 --- a/src/Text/Pandoc/Writers/Texinfo.hs +++ b/src/Text/Pandoc/Writers/Texinfo.hs @@ -43,13 +43,14 @@ import Text.Pandoc.Pretty import Text.Pandoc.ImageSize import Network.URI ( isURI, unEscapeString ) import System.FilePath +import qualified Data.Set as Set data WriterState = WriterState { stStrikeout :: Bool -- document contains strikeout , stSuperscript :: Bool -- document contains superscript , stSubscript :: Bool -- document contains subscript , stEscapeComma :: Bool -- in a context where we need @comma - , stIdentifiers :: [String] -- header ids used already + , stIdentifiers :: Set.Set String -- header ids used already , stOptions :: WriterOptions -- writer options } @@ -64,7 +65,7 @@ writeTexinfo options document = evalState (pandocToTexinfo options $ wrapTop document) $ WriterState { stStrikeout = False, stSuperscript = False, stEscapeComma = False, stSubscript = False, - stIdentifiers = [], stOptions = options} + stIdentifiers = Set.empty, stOptions = options} -- | Add a "Top" node around the document, needed by Texinfo. wrapTop :: Pandoc -> Pandoc @@ -215,7 +216,7 @@ blockToTexinfo (Header level _ lst) = do txt <- inlineListToTexinfo lst idsUsed <- gets stIdentifiers let id' = uniqueIdent lst idsUsed - modify $ \st -> st{ stIdentifiers = id' : idsUsed } + modify $ \st -> st{ stIdentifiers = Set.insert id' idsUsed } return $ if (level > 0) && (level <= 4) then blankline <> text "@node " <> node $$ text (seccmd level) <> txt $$ diff --git a/stack.full.yaml b/stack.full.yaml index d79d25d0c..2d752d531 100644 --- a/stack.full.yaml +++ b/stack.full.yaml @@ -12,8 +12,4 @@ packages: - '../pandoc-citeproc' - '../pandoc-types' - '../texmath' -extra-deps: -- 'cmark-0.5.0' -# Use older aeson to avoid excessive memory use in compilation: -- 'aeson-0.8.0.2' -resolver: lts-4.0 +resolver: lts-5.2 diff --git a/stack.yaml b/stack.yaml index 8dcc4c65f..21ff7bff7 100644 --- a/stack.yaml +++ b/stack.yaml @@ -7,10 +7,9 @@ flags: network-uri: true packages: - '.' -extra-deps: -- 'cmark-0.5.0' -- 'pandoc-citeproc-0.9' -- 'pandoc-types-1.16.0.1' -# Use older aeson to avoid excessive memory use in compilation: -- 'aeson-0.8.0.2' -resolver: lts-4.0 +extra-deps: [] +# to compile against aeson 0.11.0.0: +# - 'aeson-0.11.0.0' +# - 'fail-4.9.0.0' +# - 'pandoc-types-1.16.1' +resolver: lts-5.2 diff --git a/tests/Tests/Old.hs b/tests/Tests/Old.hs index c6b22af50..36bb3398e 100644 --- a/tests/Tests/Old.hs +++ b/tests/Tests/Old.hs @@ -162,7 +162,7 @@ tests = [ testGroup "markdown" [ test "reader" ["-r", "twiki", "-w", "native", "-s"] "twiki-reader.twiki" "twiki-reader.native" ] , testGroup "other writers" $ map (\f -> testGroup f $ writerTests f) - [ "opendocument" , "context" , "texinfo", "icml" + [ "opendocument" , "context" , "texinfo", "icml", "tei" , "man" , "plain" , "rtf", "org", "asciidoc" ] , testGroup "writers-lang-and-dir" diff --git a/tests/Tests/Readers/Docx.hs b/tests/Tests/Readers/Docx.hs index 086d3f964..e09d56529 100644 --- a/tests/Tests/Readers/Docx.hs +++ b/tests/Tests/Readers/Docx.hs @@ -111,6 +111,10 @@ tests = [ testGroup "inlines" "docx/links.docx" "docx/links.native" , testCompare + "normalizing adjacent hyperlinks" + "docx/adjacent_links.docx" + "docx/adjacent_links.native" + , testCompare "inline image" "docx/image.docx" "docx/image_no_embed.native" @@ -165,10 +169,14 @@ tests = [ testGroup "inlines" "docx/already_auto_ident.docx" "docx/already_auto_ident.native" , testCompare - "numbered headers automatically made into list" + "single numbered item not made into list" "docx/numbered_header.docx" "docx/numbered_header.native" , testCompare + "enumerated headers not made into numbered list" + "docx/enumerated_headings.docx" + "docx/enumerated_headings.native" + , testCompare "i18n blocks (headers and blockquotes)" "docx/i18n_blocks.docx" "docx/i18n_blocks.native" diff --git a/tests/Tests/Readers/HTML.hs b/tests/Tests/Readers/HTML.hs index 2eb87a2f3..ff27b8aed 100644 --- a/tests/Tests/Readers/HTML.hs +++ b/tests/Tests/Readers/HTML.hs @@ -15,11 +15,14 @@ html = handleError . readHtml def tests :: [Test] tests = [ testGroup "base tag" [ test html "simple" $ - "<head><base href=\"http://www.w3schools.com/images\" ></head><body><img src=\"stickman.gif\" alt=\"Stickman\"></head>" =?> + "<head><base href=\"http://www.w3schools.com/images/foo\" ></head><body><img src=\"stickman.gif\" alt=\"Stickman\"></head>" =?> plain (image "http://www.w3schools.com/images/stickman.gif" "" (text "Stickman")) , test html "slash at end of base" $ "<head><base href=\"http://www.w3schools.com/images/\" ></head><body><img src=\"stickman.gif\" alt=\"Stickman\"></head>" =?> plain (image "http://www.w3schools.com/images/stickman.gif" "" (text "Stickman")) + , test html "slash at beginning of href" $ + "<head><base href=\"http://www.w3schools.com/images/\" ></head><body><img src=\"/stickman.gif\" alt=\"Stickman\"></head>" =?> + plain (image "http://www.w3schools.com/stickman.gif" "" (text "Stickman")) , test html "absolute URL" $ "<head><base href=\"http://www.w3schools.com/images/\" ></head><body><img src=\"http://example.com/stickman.gif\" alt=\"Stickman\"></head>" =?> plain (image "http://example.com/stickman.gif" "" (text "Stickman")) diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index bb3bffe22..b095ac60a 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -190,6 +190,10 @@ tests = "[[./sunset.jpg]]" =?> (para $ image "./sunset.jpg" "" "") + , "Image with explicit file: prefix" =: + "[[file:sunrise.jpg]]" =?> + (para $ image "sunrise.jpg" "" "") + , "Explicit link" =: "[[http://zeitlens.com/][pseudo-random /nonsense/]]" =?> (para $ link "http://zeitlens.com/" "" @@ -635,11 +639,11 @@ tests = ] =?> para (image "edward.jpg" "fig:goodguy" "A very courageous man.") - , "Unnamed figure" =: - unlines [ "#+caption: A great whistleblower." - , "[[snowden.png]]" + , "Figure with no name" =: + unlines [ "#+caption: I've been through the desert on this" + , "[[horse.png]]" ] =?> - para (image "snowden.png" "" "A great whistleblower.") + para (image "horse.png" "fig:" "I've been through the desert on this") , "Figure with `fig:` prefix in name" =: unlines [ "#+caption: Used as a metapher in evolutionary biology." diff --git a/tests/Tests/Writers/TEI.hs b/tests/Tests/Writers/TEI.hs new file mode 100644 index 000000000..56764db9f --- /dev/null +++ b/tests/Tests/Writers/TEI.hs @@ -0,0 +1,43 @@ +{-# LANGUAGE OverloadedStrings #-} +module Tests.Writers.TEI (tests) where + +import Test.Framework +import Text.Pandoc.Builder +import Text.Pandoc +import Tests.Helpers +import Tests.Arbitrary() + +{- + "my test" =: X =?> Y + +is shorthand for + + test html "my test" $ X =?> Y + +which is in turn shorthand for + + test html "my test" (X,Y) +-} + +infix 4 =: +(=:) :: (ToString a, ToPandoc a) + => String -> (a, String) -> Test +(=:) = test (writeTEI def . toPandoc) + +tests :: [Test] +tests = [ testGroup "block elements" + ["para" =: para "Lorem ipsum cetera." + =?> "<p>Lorem ipsum cetera.</p>" + ] + , testGroup "inlines" + [ + "Emphasis" =: emph ("emphasized") + =?> "<p><hi rendition=\"simple:italic\">emphasized</hi></p>" + ,"SingleQuoted" =: singleQuoted (text "quoted material") + =?> "<p><quote>quoted material</quote></p>" + ,"DoubleQuoted" =: doubleQuoted (text "quoted material") + =?> "<p><quote>quoted material</quote></p>" + ,"NestedQuoted" =: doubleQuoted (singleQuoted (text "quoted material")) + =?> "<p><quote><quote>quoted material</quote></quote></p>" + ] + ] diff --git a/tests/docx/adjacent_links.docx b/tests/docx/adjacent_links.docx Binary files differnew file mode 100644 index 000000000..86b1c2a14 --- /dev/null +++ b/tests/docx/adjacent_links.docx diff --git a/tests/docx/adjacent_links.native b/tests/docx/adjacent_links.native new file mode 100644 index 000000000..cca861890 --- /dev/null +++ b/tests/docx/adjacent_links.native @@ -0,0 +1 @@ +[Para [Str "Le",Space,Str "plus",Space,Str "int\233ressant",Space,Str "\233tant",Space,Str "sans",Space,Str "doute",Space,Str "le",Space,Str "Marsan,",Space,Str "propos\233",Space,Str "par",Space,Str "Claude",Space,Str "Marsan",Space,Str "en",Space,Str "1976",Space,Str "qui",Space,Str "avait",Space,Str "m\234me",Space,Str "fait",Space,Str "l'objet",Space,Str "d'une",Space,Str "norme,",Space,Str "mais",Space,Str "qui",Space,Str "n'a",Space,Str "pas",Space,Str "du",Space,Str "tout",Space,Str "\233t\233",Space,Str "adopt\233",Space,Str "\224",Space,Str "cause",Space,Str "des",Space,Str "habitudes",Space,Str "trop",Space,Str "ancr\233es",Space,Str "et",Space,Str "qui",Space,Str "a",Space,Str "fini",Space,Str "par",Space,Str "tomber",Space,Str "dans",Space,Str "l'oubli,",Space,Str "gros",Space,Str "clin",Space,Str "d'\339il",Space,Str "\224",Space,Str "cela",Space,Str "d'ailleurs",Space,Str "dans",Space,Str "le",Space,Str "film",Space,Link ("",[],[]) [Emph [Str "\"Le",Space,Str "nom",Space,Str "des",Space,Str "gens\""]] ("http://www.allocine.fr/film/fichefilm_gen_cfilm=172167.html",""),Str ".",Space,Str "D\8217ailleurs",Space,Str "l\8217\233tat,",Space,Str "bien",Space,Str "conscient",Space,Str "que",Space,Str "tous",Space,Str "les",Space,Str "fran\231ais",Space,Str "\233crivent",Space,Str "sur",Space,Str "des",Space,Str "claviers",Space,Str "compl\232tement",Space,Str "inadapt\233s,",Space,Link ("",[],[]) [Emph [Str "tente",Space,Str "encore",Space,Str "une",Space,Str "fois",Space,Str "de",Space,Str "faire",Space,Str "une",Space,Str "norme",Space,Str "en",Space,Str "ce",Space,Str "moment",Space,Str "m\234me"]] ("http://www.appy-geek.com/Web/ArticleWeb.aspx?regionid=2&articleid=56103389&source=messenger",""),Str "."]] diff --git a/tests/docx/enumerated_headings.docx b/tests/docx/enumerated_headings.docx Binary files differnew file mode 100644 index 000000000..afa84748a --- /dev/null +++ b/tests/docx/enumerated_headings.docx diff --git a/tests/docx/enumerated_headings.native b/tests/docx/enumerated_headings.native new file mode 100644 index 000000000..67c0df5e0 --- /dev/null +++ b/tests/docx/enumerated_headings.native @@ -0,0 +1,4 @@ +[Header 1 ("h1",[],[]) [Str "H1"] +,Header 2 ("h2",[],[]) [Str "H2"] +,Header 3 ("h3",[],[]) [Str "H3"] +,Para [Str "And",Space,Str "some",Space,Str "text"]] diff --git a/tests/pipe-tables.native b/tests/pipe-tables.native index 6cd37f6ff..63c2c17bc 100644 --- a/tests/pipe-tables.native +++ b/tests/pipe-tables.native @@ -98,4 +98,18 @@ ,Para [Str "Pipe",Space,Str "table",Space,Str "with",Space,Str "no",Space,Str "body:"] ,Table [] [AlignDefault] [0.0] [[Plain [Str "Header"]]] - []] + [] +,Para [Str "Pipe",Space,Str "table",Space,Str "with",Space,Str "tricky",Space,Str "cell",Space,Str "contents",Space,Str "(see",Space,Str "#2765):"] +,Table [] [AlignLeft,AlignRight,AlignRight] [0.0,0.0,0.0] + [[] + ,[Plain [Str "IP_gene8-_1st"]] + ,[Plain [Str "IP_gene8+_1st"]]] + [[[Plain [Str "IP_gene8-_1st"]] + ,[Plain [Str "1.0000000"]] + ,[Plain [Str "0.4357325"]]] + ,[[Plain [Str "IP_gene8+_1st"]] + ,[Plain [Str "0.4357325"]] + ,[Plain [Str "1.0000000"]]] + ,[[Plain [Str "foo",Code ("",[],[]) "bar|baz"]] + ,[Plain [Str "and|escaped"]] + ,[Plain [Str "3.0000000"]]]]] diff --git a/tests/pipe-tables.txt b/tests/pipe-tables.txt index e93f64af9..c27c71113 100644 --- a/tests/pipe-tables.txt +++ b/tests/pipe-tables.txt @@ -72,3 +72,11 @@ Pipe table with no body: | Header | | ------ | +Pipe table with tricky cell contents (see #2765): + +| | IP_gene8-_1st| IP_gene8+_1st| +|:--------------|-------------:|-------------:| +|IP_gene8-_1st | 1.0000000| 0.4357325| +|IP_gene8+_1st | 0.4357325| 1.0000000| +|foo`bar|baz` | and\|escaped | 3.0000000| + diff --git a/tests/tables.html b/tests/tables.html index f5ddf3bcd..0a9ea413c 100644 --- a/tests/tables.html +++ b/tests/tables.html @@ -6,7 +6,7 @@ <th align="right">Right</th> <th align="left">Left</th> <th align="center">Center</th> -<th align="left">Default</th> +<th>Default</th> </tr> </thead> <tbody> @@ -14,19 +14,19 @@ <td align="right">12</td> <td align="left">12</td> <td align="center">12</td> -<td align="left">12</td> +<td>12</td> </tr> <tr class="even"> <td align="right">123</td> <td align="left">123</td> <td align="center">123</td> -<td align="left">123</td> +<td>123</td> </tr> <tr class="odd"> <td align="right">1</td> <td align="left">1</td> <td align="center">1</td> -<td align="left">1</td> +<td>1</td> </tr> </tbody> </table> @@ -37,7 +37,7 @@ <th align="right">Right</th> <th align="left">Left</th> <th align="center">Center</th> -<th align="left">Default</th> +<th>Default</th> </tr> </thead> <tbody> @@ -45,19 +45,19 @@ <td align="right">12</td> <td align="left">12</td> <td align="center">12</td> -<td align="left">12</td> +<td>12</td> </tr> <tr class="even"> <td align="right">123</td> <td align="left">123</td> <td align="center">123</td> -<td align="left">123</td> +<td>123</td> </tr> <tr class="odd"> <td align="right">1</td> <td align="left">1</td> <td align="center">1</td> -<td align="left">1</td> +<td>1</td> </tr> </tbody> </table> @@ -69,7 +69,7 @@ <th align="right">Right</th> <th align="left">Left</th> <th align="center">Center</th> -<th align="left">Default</th> +<th>Default</th> </tr> </thead> <tbody> @@ -77,19 +77,19 @@ <td align="right">12</td> <td align="left">12</td> <td align="center">12</td> -<td align="left">12</td> +<td>12</td> </tr> <tr class="even"> <td align="right">123</td> <td align="left">123</td> <td align="center">123</td> -<td align="left">123</td> +<td>123</td> </tr> <tr class="odd"> <td align="right">1</td> <td align="left">1</td> <td align="center">1</td> -<td align="left">1</td> +<td>1</td> </tr> </tbody> </table> @@ -192,13 +192,13 @@ <td align="center">First</td> <td align="left">row</td> <td align="right">12.0</td> -<td align="left">Example of a row that spans multiple lines.</td> +<td>Example of a row that spans multiple lines.</td> </tr> <tr class="even"> <td align="center">Second</td> <td align="left">row</td> <td align="right">5.0</td> -<td align="left">Here's another one. Note the blank line between rows.</td> +<td>Here's another one. Note the blank line between rows.</td> </tr> </tbody> </table> diff --git a/tests/tables.latex b/tests/tables.latex index 850629499..96cbc9579 100644 --- a/tests/tables.latex +++ b/tests/tables.latex @@ -1,6 +1,6 @@ Simple table with caption: -\begin{longtable}[c]{@{}rlcl@{}} +\begin{longtable}[]{@{}rlcl@{}} \caption{Demonstration of simple table syntax.}\tabularnewline \toprule Right & Left & Center & Default\tabularnewline @@ -18,7 +18,7 @@ Right & Left & Center & Default\tabularnewline Simple table without caption: -\begin{longtable}[c]{@{}rlcl@{}} +\begin{longtable}[]{@{}rlcl@{}} \toprule Right & Left & Center & Default\tabularnewline \midrule @@ -31,7 +31,7 @@ Right & Left & Center & Default\tabularnewline Simple table indented two spaces: -\begin{longtable}[c]{@{}rlcl@{}} +\begin{longtable}[]{@{}rlcl@{}} \caption{Demonstration of simple table syntax.}\tabularnewline \toprule Right & Left & Center & Default\tabularnewline @@ -49,7 +49,7 @@ Right & Left & Center & Default\tabularnewline Multiline table with caption: -\begin{longtable}[c]{@{}clrl@{}} +\begin{longtable}[]{@{}clrl@{}} \caption{Here's the caption. It may span multiple lines.}\tabularnewline \toprule \begin{minipage}[b]{0.13\columnwidth}\centering\strut @@ -98,7 +98,7 @@ Here's another one. Note the blank line between rows. Multiline table without caption: -\begin{longtable}[c]{@{}clrl@{}} +\begin{longtable}[]{@{}clrl@{}} \toprule \begin{minipage}[b]{0.13\columnwidth}\centering\strut Centered Header @@ -134,7 +134,7 @@ Here's another one. Note the blank line between rows. Table without column headers: -\begin{longtable}[c]{@{}rlcr@{}} +\begin{longtable}[]{@{}rlcr@{}} \toprule 12 & 12 & 12 & 12\tabularnewline 123 & 123 & 123 & 123\tabularnewline @@ -144,7 +144,7 @@ Table without column headers: Multiline table without column headers: -\begin{longtable}[c]{@{}clrl@{}} +\begin{longtable}[]{@{}clrl@{}} \toprule \begin{minipage}[t]{0.13\columnwidth}\centering\strut First diff --git a/tests/tables.tei b/tests/tables.tei new file mode 100644 index 000000000..45b88b1cb --- /dev/null +++ b/tests/tables.tei @@ -0,0 +1,171 @@ +<p>Simple table with caption:</p> +<table> + <row role="label"> + <cell><p>Right</p></cell> + <cell><p>Left</p></cell> + <cell><p>Center</p></cell> + <cell><p>Default</p></cell> + </row> + <row> + <cell><p>12</p></cell> + <cell><p>12</p></cell> + <cell><p>12</p></cell> + <cell><p>12</p></cell> + </row> + <row> + <cell><p>123</p></cell> + <cell><p>123</p></cell> + <cell><p>123</p></cell> + <cell><p>123</p></cell> + </row> + <row> + <cell><p>1</p></cell> + <cell><p>1</p></cell> + <cell><p>1</p></cell> + <cell><p>1</p></cell> + </row> +</table> +<p>Simple table without caption:</p> +<table> + <row role="label"> + <cell><p>Right</p></cell> + <cell><p>Left</p></cell> + <cell><p>Center</p></cell> + <cell><p>Default</p></cell> + </row> + <row> + <cell><p>12</p></cell> + <cell><p>12</p></cell> + <cell><p>12</p></cell> + <cell><p>12</p></cell> + </row> + <row> + <cell><p>123</p></cell> + <cell><p>123</p></cell> + <cell><p>123</p></cell> + <cell><p>123</p></cell> + </row> + <row> + <cell><p>1</p></cell> + <cell><p>1</p></cell> + <cell><p>1</p></cell> + <cell><p>1</p></cell> + </row> +</table> +<p>Simple table indented two spaces:</p> +<table> + <row role="label"> + <cell><p>Right</p></cell> + <cell><p>Left</p></cell> + <cell><p>Center</p></cell> + <cell><p>Default</p></cell> + </row> + <row> + <cell><p>12</p></cell> + <cell><p>12</p></cell> + <cell><p>12</p></cell> + <cell><p>12</p></cell> + </row> + <row> + <cell><p>123</p></cell> + <cell><p>123</p></cell> + <cell><p>123</p></cell> + <cell><p>123</p></cell> + </row> + <row> + <cell><p>1</p></cell> + <cell><p>1</p></cell> + <cell><p>1</p></cell> + <cell><p>1</p></cell> + </row> +</table> +<p>Multiline table with caption:</p> +<table> + <row role="label"> + <cell><p>Centered Header</p></cell> + <cell><p>Left Aligned</p></cell> + <cell><p>Right Aligned</p></cell> + <cell><p>Default aligned</p></cell> + </row> + <row> + <cell><p>First</p></cell> + <cell><p>row</p></cell> + <cell><p>12.0</p></cell> + <cell><p>Example of a row that spans multiple lines.</p></cell> + </row> + <row> + <cell><p>Second</p></cell> + <cell><p>row</p></cell> + <cell><p>5.0</p></cell> + <cell><p>Here's another one. Note the blank line between rows.</p></cell> + </row> +</table> +<p>Multiline table without caption:</p> +<table> + <row role="label"> + <cell><p>Centered Header</p></cell> + <cell><p>Left Aligned</p></cell> + <cell><p>Right Aligned</p></cell> + <cell><p>Default aligned</p></cell> + </row> + <row> + <cell><p>First</p></cell> + <cell><p>row</p></cell> + <cell><p>12.0</p></cell> + <cell><p>Example of a row that spans multiple lines.</p></cell> + </row> + <row> + <cell><p>Second</p></cell> + <cell><p>row</p></cell> + <cell><p>5.0</p></cell> + <cell><p>Here's another one. Note the blank line between rows.</p></cell> + </row> +</table> +<p>Table without column headers:</p> +<table> + <row role="label"> + <cell></cell> + <cell></cell> + <cell></cell> + <cell></cell> + </row> + <row> + <cell><p>12</p></cell> + <cell><p>12</p></cell> + <cell><p>12</p></cell> + <cell><p>12</p></cell> + </row> + <row> + <cell><p>123</p></cell> + <cell><p>123</p></cell> + <cell><p>123</p></cell> + <cell><p>123</p></cell> + </row> + <row> + <cell><p>1</p></cell> + <cell><p>1</p></cell> + <cell><p>1</p></cell> + <cell><p>1</p></cell> + </row> +</table> +<p>Multiline table without column headers:</p> +<table> + <row role="label"> + <cell></cell> + <cell></cell> + <cell></cell> + <cell></cell> + </row> + <row> + <cell><p>First</p></cell> + <cell><p>row</p></cell> + <cell><p>12.0</p></cell> + <cell><p>Example of a row that spans multiple lines.</p></cell> + </row> + <row> + <cell><p>Second</p></cell> + <cell><p>row</p></cell> + <cell><p>5.0</p></cell> + <cell><p>Here's another one. Note the blank line between rows.</p></cell> + </row> +</table> diff --git a/tests/test-pandoc.hs b/tests/test-pandoc.hs index f7c2f0c1f..2488917cb 100644 --- a/tests/test-pandoc.hs +++ b/tests/test-pandoc.hs @@ -24,6 +24,7 @@ import qualified Tests.Writers.Plain import qualified Tests.Writers.AsciiDoc import qualified Tests.Writers.Docx import qualified Tests.Writers.RST +import qualified Tests.Writers.TEI import qualified Tests.Shared import qualified Tests.Walk import Text.Pandoc.Shared (inDirectory) @@ -44,6 +45,7 @@ tests = [ testGroup "Old" Tests.Old.tests , testGroup "AsciiDoc" Tests.Writers.AsciiDoc.tests , testGroup "Docx" Tests.Writers.Docx.tests , testGroup "RST" Tests.Writers.RST.tests + , testGroup "TEI" Tests.Writers.TEI.tests ] , testGroup "Readers" [ testGroup "LaTeX" Tests.Readers.LaTeX.tests diff --git a/tests/textile-reader.native b/tests/textile-reader.native index fe2c7be24..79a5f52da 100644 --- a/tests/textile-reader.native +++ b/tests/textile-reader.native @@ -132,8 +132,11 @@ Pandoc (Meta {unMeta = fromList []}) ,Header 1 ("images",[],[]) [Str "Images"] ,Para [Str "Textile",Space,Str "inline",Space,Str "image",Space,Str "syntax,",Space,Str "like",LineBreak,Str "here",Space,Image ("",[],[]) [Str "this is the alt text"] ("this_is_an_image.png","this is the alt text"),LineBreak,Str "and",Space,Str "here",Space,Image ("",[],[]) [Str ""] ("this_is_an_image.png",""),Str "."] ,Header 1 ("attributes",[],[]) [Str "Attributes"] -,Header 2 ("ident",["bar","foo"],[("style","color:red"),("lang","en")]) [Str "HTML",Space,Str "and",Space,Str "CSS",Space,Str "attributes",Space,Str "are",Space,Str "parsed",Space,Str "in",Space,Str "headers."] -,Para [Str "as",Space,Str "well",Space,Str "as",Space,Strong [Span ("",["foo"],[]) [Str "inline",Space,Str "attributes"]],Space,Str "of",Space,Span ("",[],[("style","color:red")]) [Str "all",Space,Str "kind"]] +,Header 2 ("ident",["bar","foo"],[("style","color:red;"),("lang","en")]) [Str "HTML",Space,Str "and",Space,Str "CSS",Space,Str "attributes",Space,Str "are",Space,Str "parsed",Space,Str "in",Space,Str "headers."] +,Header 2 ("centered",[],[("style","text-align:center;")]) [Str "Centered"] +,Header 2 ("right",[],[("style","text-align:right;")]) [Str "Right"] +,Header 2 ("justified",[],[("lang","en"),("style","color:blue;text-align:justify;")]) [Str "Justified"] +,Para [Str "as",Space,Str "well",Space,Str "as",Space,Strong [Span ("",["foo"],[]) [Str "inline",Space,Str "attributes"]],Space,Str "of",Space,Span ("",[],[("style","color:red;")]) [Str "all",Space,Str "kind"]] ,Para [Str "and",Space,Str "paragraph",Space,Str "attributes,",Space,Str "and",Space,Str "table",Space,Str "attributes."] ,Table [] [AlignDefault,AlignDefault,AlignDefault] [0.0,0.0,0.0] [] diff --git a/tests/textile-reader.textile b/tests/textile-reader.textile index d5d7378b0..a9c80ccbd 100644 --- a/tests/textile-reader.textile +++ b/tests/textile-reader.textile @@ -207,6 +207,12 @@ h1. Attributes h2[en]{color:red}(foo bar #ident). HTML and CSS attributes are parsed in headers. +h2=. Centered + +h2>. Right + +h2<>{color:blue}[en]. Justified + as well as *(foo)inline attributes* of %{color:red} all kind% p{color:green}. and paragraph attributes, and table attributes. diff --git a/tests/writer.dokuwiki b/tests/writer.dokuwiki index fe1f8296a..79fcdde8a 100644 --- a/tests/writer.dokuwiki +++ b/tests/writer.dokuwiki @@ -459,7 +459,7 @@ Ellipses…and…and…. * $\alpha \wedge \omega$ * $223$ * $p$-Tree - * Here’s some display math: $\frac{d}{dx}f(x)=\lim_{h\to 0}\frac{f(x+h)-f(x)}{h}$ + * Here’s some display math: $$\frac{d}{dx}f(x)=\lim_{h\to 0}\frac{f(x+h)-f(x)}{h}$$ * Here’s one that has a line break in it: $\alpha + \omega \times x^2$. These shouldn’t be math: diff --git a/tests/writer.markdown b/tests/writer.markdown index 6a0b9801c..d705bb044 100644 --- a/tests/writer.markdown +++ b/tests/writer.markdown @@ -4,7 +4,7 @@ author: - Anonymous date: 'July 17, 2006' title: Pandoc Test Suite -... +--- This is a set of tests for pandoc. Most of them are adapted from John Gruber’s markdown test suite. diff --git a/tests/writer.tei b/tests/writer.tei new file mode 100644 index 000000000..41f258775 --- /dev/null +++ b/tests/writer.tei @@ -0,0 +1,861 @@ +<?xml version="1.0" encoding="utf-8"?> +<TEI xmlns="http://www.tei-c.org/ns/1.0"> +<teiHeader> + <fileDesc> + <titleStmt> + <title>Pandoc Test Suite</title> + <author>John MacFarlane</author> + <author>Anonymous</author> + </titleStmt> + <publicationStmt> + <p></p> + </publicationStmt> + <sourceDesc> + <p>Produced by pandoc.</p> + </sourceDesc> + </fileDesc> +</teiHeader> +<text> +<body> +<p>This is a set of tests for pandoc. Most of them are adapted from John +Gruber’s markdown test suite.</p> +<milestone unit="undefined" type="separator" rendition="line" /> +<div type="level1"> + <head>Headers</head> + <div type="level2"> + <head>Level 2 with an <ref target="/url">embedded link</ref></head> + <div type="level3"> + <head>Level 3 with <hi rendition="simple:italic">emphasis</hi></head> + <div type="level4"> + <head>Level 4</head> + <div type="level5"> + <head>Level 5</head> + <p></p> + </div> + </div> + </div> + </div> +</div> +<div type="level1"> + <head>Level 1</head> + <div type="level2"> + <head>Level 2 with <hi rendition="simple:italic">emphasis</hi></head> + <div type="level3"> + <head>Level 3</head> + <p>with no blank line</p> + </div> + </div> + <div type="level2"> + <head>Level 2</head> + <p>with no blank line</p> + <milestone unit="undefined" type="separator" rendition="line" /> + </div> +</div> +<div type="level1"> + <head>Paragraphs</head> + <p>Here’s a regular paragraph.</p> + <p>In Markdown 1.0.0 and earlier. Version 8. This line turns into a list + item. Because a hard-wrapped line in the middle of a paragraph looked like a + list item.</p> + <p>Here’s one with a bullet. * criminey.</p> + <p>There should be a hard line break<lb />here.</p> + <milestone unit="undefined" type="separator" rendition="line" /> +</div> +<div type="level1"> + <head>Block Quotes</head> + <p>E-mail style:</p> + <quote> + <p>This is a block quote. It is pretty short.</p> + </quote> + <quote> + <p>Code in a block quote:</p> + <ab type='codeblock '> +sub status { + print "working"; +} +</ab> + <p>A list:</p> + <list type="ordered:arabic"> + <item> + <p>item one</p> + </item> + <item> + <p>item two</p> + </item> + </list> + <p>Nested block quotes:</p> + <quote> + <p>nested</p> + </quote> + <quote> + <p>nested</p> + </quote> + </quote> + <p>This should not be a block quote: 2 > 1.</p> + <p>And a following paragraph.</p> + <milestone unit="undefined" type="separator" rendition="line" /> +</div> +<div type="level1"> + <head>Code Blocks</head> + <p>Code:</p> + <ab type='codeblock '> +---- (should be four hyphens) + +sub status { + print "working"; +} + +this code block is indented by one tab +</ab> + <p>And:</p> + <ab type='codeblock '> + this code block is indented by two tabs + +These should not be escaped: \$ \\ \> \[ \{ +</ab> + <milestone unit="undefined" type="separator" rendition="line" /> +</div> +<div type="level1"> + <head>Lists</head> + <div type="level2"> + <head>Unordered</head> + <p>Asterisks tight:</p> + <list type="unordered"> + <item> + <p>asterisk 1</p> + </item> + <item> + <p>asterisk 2</p> + </item> + <item> + <p>asterisk 3</p> + </item> + </list> + <p>Asterisks loose:</p> + <list type="unordered"> + <item> + <p>asterisk 1</p> + </item> + <item> + <p>asterisk 2</p> + </item> + <item> + <p>asterisk 3</p> + </item> + </list> + <p>Pluses tight:</p> + <list type="unordered"> + <item> + <p>Plus 1</p> + </item> + <item> + <p>Plus 2</p> + </item> + <item> + <p>Plus 3</p> + </item> + </list> + <p>Pluses loose:</p> + <list type="unordered"> + <item> + <p>Plus 1</p> + </item> + <item> + <p>Plus 2</p> + </item> + <item> + <p>Plus 3</p> + </item> + </list> + <p>Minuses tight:</p> + <list type="unordered"> + <item> + <p>Minus 1</p> + </item> + <item> + <p>Minus 2</p> + </item> + <item> + <p>Minus 3</p> + </item> + </list> + <p>Minuses loose:</p> + <list type="unordered"> + <item> + <p>Minus 1</p> + </item> + <item> + <p>Minus 2</p> + </item> + <item> + <p>Minus 3</p> + </item> + </list> + </div> + <div type="level2"> + <head>Ordered</head> + <p>Tight:</p> + <list type="ordered:arabic"> + <item> + <p>First</p> + </item> + <item> + <p>Second</p> + </item> + <item> + <p>Third</p> + </item> + </list> + <p>and:</p> + <list type="ordered:arabic"> + <item> + <p>One</p> + </item> + <item> + <p>Two</p> + </item> + <item> + <p>Three</p> + </item> + </list> + <p>Loose using tabs:</p> + <list type="ordered:arabic"> + <item> + <p>First</p> + </item> + <item> + <p>Second</p> + </item> + <item> + <p>Third</p> + </item> + </list> + <p>and using spaces:</p> + <list type="ordered:arabic"> + <item> + <p>One</p> + </item> + <item> + <p>Two</p> + </item> + <item> + <p>Three</p> + </item> + </list> + <p>Multiple paragraphs:</p> + <list type="ordered:arabic"> + <item> + <p>Item 1, graf one.</p> + <p>Item 1. graf two. The quick brown fox jumped over the lazy dog’s + back.</p> + </item> + <item> + <p>Item 2.</p> + </item> + <item> + <p>Item 3.</p> + </item> + </list> + </div> + <div type="level2"> + <head>Nested</head> + <list type="unordered"> + <item> + <p>Tab</p> + <list type="unordered"> + <item> + <p>Tab</p> + <list type="unordered"> + <item> + <p>Tab</p> + </item> + </list> + </item> + </list> + </item> + </list> + <p>Here’s another:</p> + <list type="ordered:arabic"> + <item> + <p>First</p> + </item> + <item> + <p>Second:</p> + <list type="unordered"> + <item> + <p>Fee</p> + </item> + <item> + <p>Fie</p> + </item> + <item> + <p>Foe</p> + </item> + </list> + </item> + <item> + <p>Third</p> + </item> + </list> + <p>Same thing but with paragraphs:</p> + <list type="ordered:arabic"> + <item> + <p>First</p> + </item> + <item> + <p>Second:</p> + <list type="unordered"> + <item> + <p>Fee</p> + </item> + <item> + <p>Fie</p> + </item> + <item> + <p>Foe</p> + </item> + </list> + </item> + <item> + <p>Third</p> + </item> + </list> + </div> + <div type="level2"> + <head>Tabs and spaces</head> + <list type="unordered"> + <item> + <p>this is a list item indented with tabs</p> + </item> + <item> + <p>this is a list item indented with spaces</p> + <list type="unordered"> + <item> + <p>this is an example list item indented with tabs</p> + </item> + <item> + <p>this is an example list item indented with spaces</p> + </item> + </list> + </item> + </list> + </div> + <div type="level2"> + <head>Fancy list markers</head> + <list type="ordered:arabic"> + <item n="2"> + <p>begins with 2</p> + </item> + <item> + <p>and now 3</p> + <p>with a continuation</p> + <list type="ordered:lowerroman"> + <item n="4"> + <p>sublist with roman numerals, starting with 4</p> + </item> + <item> + <p>more items</p> + <list type="ordered:upperalpha"> + <item> + <p>a subsublist</p> + </item> + <item> + <p>a subsublist</p> + </item> + </list> + </item> + </list> + </item> + </list> + <p>Nesting:</p> + <list type="ordered:upperalpha"> + <item> + <p>Upper Alpha</p> + <list type="ordered:upperroman"> + <item> + <p>Upper Roman.</p> + <list type="ordered:arabic"> + <item n="6"> + <p>Decimal start with 6</p> + <list type="ordered:loweralpha"> + <item n="3"> + <p>Lower alpha with paren</p> + </item> + </list> + </item> + </list> + </item> + </list> + </item> + </list> + <p>Autonumbering:</p> + <list> + <item> + <p>Autonumber.</p> + </item> + <item> + <p>More.</p> + <list> + <item> + <p>Nested.</p> + </item> + </list> + </item> + </list> + <p>Should not be a list item:</p> + <p>M.A. 2007</p> + <p>B. Williams</p> + <milestone unit="undefined" type="separator" rendition="line" /> + </div> +</div> +<div type="level1"> + <head>Definition Lists</head> + <p>Tight using spaces:</p> + <list type="definition"> + <label> + apple + </label> + <item> + <p>red fruit</p> + </item> + <label> + orange + </label> + <item> + <p>orange fruit</p> + </item> + <label> + banana + </label> + <item> + <p>yellow fruit</p> + </item> + </list> + <p>Tight using tabs:</p> + <list type="definition"> + <label> + apple + </label> + <item> + <p>red fruit</p> + </item> + <label> + orange + </label> + <item> + <p>orange fruit</p> + </item> + <label> + banana + </label> + <item> + <p>yellow fruit</p> + </item> + </list> + <p>Loose:</p> + <list type="definition"> + <label> + apple + </label> + <item> + <p>red fruit</p> + </item> + <label> + orange + </label> + <item> + <p>orange fruit</p> + </item> + <label> + banana + </label> + <item> + <p>yellow fruit</p> + </item> + </list> + <p>Multiple blocks with italics:</p> + <list type="definition"> + <label> + <hi rendition="simple:italic">apple</hi> + </label> + <item> + <p>red fruit</p> + <p>contains seeds, crisp, pleasant to taste</p> + </item> + <label> + <hi rendition="simple:italic">orange</hi> + </label> + <item> + <p>orange fruit</p> + <ab type='codeblock '> +{ orange code block } +</ab> + <quote> + <p>orange block quote</p> + </quote> + </item> + </list> + <p>Multiple definitions, tight:</p> + <list type="definition"> + <label> + apple + </label> + <item> + <p>red fruit</p> + <p>computer</p> + </item> + <label> + orange + </label> + <item> + <p>orange fruit</p> + <p>bank</p> + </item> + </list> + <p>Multiple definitions, loose:</p> + <list type="definition"> + <label> + apple + </label> + <item> + <p>red fruit</p> + <p>computer</p> + </item> + <label> + orange + </label> + <item> + <p>orange fruit</p> + <p>bank</p> + </item> + </list> + <p>Blank line after term, indented marker, alternate markers:</p> + <list type="definition"> + <label> + apple + </label> + <item> + <p>red fruit</p> + <p>computer</p> + </item> + <label> + orange + </label> + <item> + <p>orange fruit</p> + <list type="ordered:arabic"> + <item> + <p>sublist</p> + </item> + <item> + <p>sublist</p> + </item> + </list> + </item> + </list> +</div> +<div type="level1"> + <head>HTML Blocks</head> + <p>Simple block on one line:</p> + <p>foo</p> + <p>And nested without indentation:</p> + <p>foo</p> + <p>bar</p> + <p>Interpreted markdown in a table:</p> + <p>This is <hi rendition="simple:italic">emphasized</hi></p> + <p>And this is <hi rendition="simple:bold">strong</hi></p> + <p>Here’s a simple block:</p> + <p>foo</p> + <p>This should be a code block, though:</p> + <ab type='codeblock '> +<div> + foo +</div> +</ab> + <p>As should this:</p> + <ab type='codeblock '> +<div>foo</div> +</ab> + <p>Now, nested:</p> + <p>foo</p> + <p>This should just be an HTML comment:</p> + <p>Multiline:</p> + <p>Code block:</p> + <ab type='codeblock '> +<!-- Comment --> +</ab> + <p>Just plain comment, with trailing spaces on the line:</p> + <p>Code:</p> + <ab type='codeblock '> +<hr /> +</ab> + <p>Hr’s:</p> + <milestone unit="undefined" type="separator" rendition="line" /> +</div> +<div type="level1"> + <head>Inline Markup</head> + <p>This is <hi rendition="simple:italic">emphasized</hi>, and so + <hi rendition="simple:italic">is this</hi>.</p> + <p>This is <hi rendition="simple:bold">strong</hi>, and so + <hi rendition="simple:bold">is this</hi>.</p> + <p>An <hi rendition="simple:italic"><ref target="/url">emphasized + link</ref></hi>.</p> + <p><hi rendition="simple:bold"><hi rendition="simple:italic">This is strong + and em.</hi></hi></p> + <p>So is + <hi rendition="simple:bold"><hi rendition="simple:italic">this</hi></hi> + word.</p> + <p><hi rendition="simple:bold"><hi rendition="simple:italic">This is strong + and em.</hi></hi></p> + <p>So is + <hi rendition="simple:bold"><hi rendition="simple:italic">this</hi></hi> + word.</p> + <p>This is code: <seg type="code">></seg>, <seg type="code">$</seg>, + <seg type="code">\</seg>, <seg type="code">\$</seg>, + <seg type="code"><html></seg>.</p> + <p><hi rendition="simple:strikethrough">This is + <hi rendition="simple:italic">strikeout</hi>.</hi></p> + <p>Superscripts: a<hi rendition="simple:superscript">bc</hi>d + a<hi rendition="simple:superscript"><hi rendition="simple:italic">hello</hi></hi> + a<hi rendition="simple:superscript">hello there</hi>.</p> + <p>Subscripts: H<hi rendition="simple:subscript">2</hi>O, + H<hi rendition="simple:subscript">23</hi>O, + H<hi rendition="simple:subscript">many of them</hi>O.</p> + <p>These should not be superscripts or subscripts, because of the unescaped + spaces: a^b c^d, a~b c~d.</p> + <milestone unit="undefined" type="separator" rendition="line" /> +</div> +<div type="level1"> + <head>Smart quotes, ellipses, dashes</head> + <p><quote>Hello,</quote> said the spider. <quote><quote>Shelob</quote> is my + name.</quote></p> + <p><quote>A</quote>, <quote>B</quote>, and <quote>C</quote> are letters.</p> + <p><quote>Oak,</quote> <quote>elm,</quote> and <quote>beech</quote> are + names of trees. So is <quote>pine.</quote></p> + <p><quote>He said, <quote>I want to go.</quote></quote> Were you alive in + the 70’s?</p> + <p>Here is some quoted <quote><seg type="code">code</seg></quote> and a + <quote><ref target="http://example.com/?foo=1&bar=2">quoted + link</ref></quote>.</p> + <p>Some dashes: one—two — three—four — five.</p> + <p>Dashes between numbers: 5–7, 255–66, 1987–1999.</p> + <p>Ellipses…and…and….</p> + <milestone unit="undefined" type="separator" rendition="line" /> +</div> +<div type="level1"> + <head>LaTeX</head> + <list type="unordered"> + <item> + <p></p> + </item> + <item> + <p><formula notation="TeX">2+2=4</formula></p> + </item> + <item> + <p><formula notation="TeX">x \in y</formula></p> + </item> + <item> + <p><formula notation="TeX">\alpha \wedge \omega</formula></p> + </item> + <item> + <p><formula notation="TeX">223</formula></p> + </item> + <item> + <p><formula notation="TeX">p</formula>-Tree</p> + </item> + <item> + <p>Here’s some display math: <figure type="math"> + <formula notation="TeX">\frac{d}{dx}f(x)=\lim_{h\to 0}\frac{f(x+h)-f(x)}{h}</formula> + </figure></p> + </item> + <item> + <p>Here’s one that has a line break in it: + <formula notation="TeX">\alpha + \omega \times x^2</formula>.</p> + </item> + </list> + <p>These shouldn’t be math:</p> + <list type="unordered"> + <item> + <p>To get the famous equation, write + <seg type="code">$e = mc^2$</seg>.</p> + </item> + <item> + <p>$22,000 is a <hi rendition="simple:italic">lot</hi> of money. So is + $34,000. (It worked if <quote>lot</quote> is emphasized.)</p> + </item> + <item> + <p>Shoes ($20) and socks ($5).</p> + </item> + <item> + <p>Escaped <seg type="code">$</seg>: $73 + <hi rendition="simple:italic">this should be emphasized</hi> 23$.</p> + </item> + </list> + <p>Here’s a LaTeX table:</p> + <milestone unit="undefined" type="separator" rendition="line" /> +</div> +<div type="level1"> + <head>Special Characters</head> + <p>Here is some unicode:</p> + <list type="unordered"> + <item> + <p>I hat: Î</p> + </item> + <item> + <p>o umlaut: ö</p> + </item> + <item> + <p>section: §</p> + </item> + <item> + <p>set membership: ∈</p> + </item> + <item> + <p>copyright: ©</p> + </item> + </list> + <p>AT&T has an ampersand in their name.</p> + <p>AT&T is another way to write it.</p> + <p>This & that.</p> + <p>4 < 5.</p> + <p>6 > 5.</p> + <p>Backslash: \</p> + <p>Backtick: `</p> + <p>Asterisk: *</p> + <p>Underscore: _</p> + <p>Left brace: {</p> + <p>Right brace: }</p> + <p>Left bracket: [</p> + <p>Right bracket: ]</p> + <p>Left paren: (</p> + <p>Right paren: )</p> + <p>Greater-than: ></p> + <p>Hash: #</p> + <p>Period: .</p> + <p>Bang: !</p> + <p>Plus: +</p> + <p>Minus: -</p> + <milestone unit="undefined" type="separator" rendition="line" /> +</div> +<div type="level1"> + <head>Links</head> + <div type="level2"> + <head>Explicit</head> + <p>Just a <ref target="/url/">URL</ref>.</p> + <p><ref target="/url/">URL and title</ref>.</p> + <p><ref target="/url/">URL and title</ref>.</p> + <p><ref target="/url/">URL and title</ref>.</p> + <p><ref target="/url/">URL and title</ref></p> + <p><ref target="/url/">URL and title</ref></p> + <p><ref target="/url/with_underscore">with_underscore</ref></p> + <p>Email link (nobody@nowhere.net)</p> + <p><ref target="">Empty</ref>.</p> + </div> + <div type="level2"> + <head>Reference</head> + <p>Foo <ref target="/url/">bar</ref>.</p> + <p>Foo <ref target="/url/">bar</ref>.</p> + <p>Foo <ref target="/url/">bar</ref>.</p> + <p>With <ref target="/url/">embedded [brackets]</ref>.</p> + <p><ref target="/url/">b</ref> by itself should be a link.</p> + <p>Indented <ref target="/url">once</ref>.</p> + <p>Indented <ref target="/url">twice</ref>.</p> + <p>Indented <ref target="/url">thrice</ref>.</p> + <p>This should [not][] be a link.</p> + <ab type='codeblock '> +[not]: /url +</ab> + <p>Foo <ref target="/url/">bar</ref>.</p> + <p>Foo <ref target="/url/">biz</ref>.</p> + </div> + <div type="level2"> + <head>With ampersands</head> + <p>Here’s a <ref target="http://example.com/?foo=1&bar=2">link with an + ampersand in the URL</ref>.</p> + <p>Here’s a link with an amersand in the link text: + <ref target="http://att.com/">AT&T</ref>.</p> + <p>Here’s an <ref target="/script?foo=1&bar=2">inline link</ref>.</p> + <p>Here’s an <ref target="/script?foo=1&bar=2">inline link in pointy + braces</ref>.</p> + </div> + <div type="level2"> + <head>Autolinks</head> + <p>With an ampersand: + <ref target="http://example.com/?foo=1&bar=2">http://example.com/?foo=1&bar=2</ref></p> + <list type="unordered"> + <item> + <p>In a list?</p> + </item> + <item> + <p><ref target="http://example.com/">http://example.com/</ref></p> + </item> + <item> + <p>It should.</p> + </item> + </list> + <p>An e-mail address: nobody@nowhere.net</p> + <quote> + <p>Blockquoted: + <ref target="http://example.com/">http://example.com/</ref></p> + </quote> + <p>Auto-links should not occur here: + <seg type="code"><http://example.com/></seg></p> + <ab type='codeblock '> +or here: <http://example.com/> +</ab> + <milestone unit="undefined" type="separator" rendition="line" /> + </div> +</div> +<div type="level1"> + <head>Images</head> + <p>From <quote>Voyage dans la Lune</quote> by Georges Melies (1902):</p> + <p><figure> + <head>lalune</head> + <graphic url="lalune.jpg" /> + <figDesc>fig:Voyage dans la Lune</figDesc> + </figure></p> + <p>Here is a movie <figure> + <head>movie</head> + <graphic url="movie.jpg" /> + </figure> icon.</p> + <milestone unit="undefined" type="separator" rendition="line" /> +</div> +<div type="level1"> + <head>Footnotes</head> + <p>Here is a footnote reference,<note> + <p>Here is the footnote. It can go anywhere after the footnote reference. + It need not be placed at the end of the document.</p> + </note> and another.<note> + <p>Here’s the long note. This one contains multiple blocks.</p> + <p>Subsequent blocks are indented to show that they belong to the footnote + (as with list items).</p> + <ab type='codeblock '> + { <code> } +</ab> + <p>If you want, you can indent every line, but you can also be lazy and + just indent the first line of each block.</p> + </note> This should <hi rendition="simple:italic">not</hi> be a footnote + reference, because it contains a space.[^my note] Here is an inline + note.<note> + <p>This is <hi rendition="simple:italic">easier</hi> to type. Inline notes + may contain <ref target="http://google.com">links</ref> and + <seg type="code">]</seg> verbatim characters, as well as [bracketed + text].</p> + </note></p> + <quote> + <p>Notes can go in quotes.<note> + <p>In quote.</p> + </note></p> + </quote> + <list type="ordered:arabic"> + <item> + <p>And in list items.<note> + <p>In list.</p> + </note></p> + </item> + </list> + <p>This paragraph should not be part of the note, as it is not indented.</p> +</div> +</body> +</text> +</TEI> diff --git a/windows/stack.yaml b/windows/stack.yaml index 91f1c1539..a01dbfc03 100644 --- a/windows/stack.yaml +++ b/windows/stack.yaml @@ -15,7 +15,5 @@ packages: - '..' - '../../pandoc-citeproc' extra-deps: -- 'cmark-0.5.0' - 'hsb2hs-0.3.1' -- 'pandoc-types-1.16.0.1' -resolver: lts-3.20 +resolver: lts-5.2 |