From 190943e1fd75b7fa30689387e4416dd81b584f5e Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 26 Jan 2017 20:39:32 +0100 Subject: EPUB writer: split writeEPUB into writeEPUB2, writeEPUB3. Also include explicit epub2 output format in CLI tool. --- MANUAL.txt | 23 ++++++++++++----------- data/templates | 2 +- pandoc.cabal | 2 +- src/Text/Pandoc.hs | 12 +++++++----- src/Text/Pandoc/Templates.hs | 1 + src/Text/Pandoc/Writers/EPUB.hs | 31 ++++++++++++++++++++++++------- 6 files changed, 46 insertions(+), 25 deletions(-) diff --git a/MANUAL.txt b/MANUAL.txt index 3b8ac2b85..91f4bacc0 100644 --- a/MANUAL.txt +++ b/MANUAL.txt @@ -101,7 +101,7 @@ If no *input-file* is specified, input is read from *stdin*. Otherwise, the *input-files* are concatenated (with a blank line between each) and used as input. Output goes to *stdout* by default (though output to *stdout* is disabled for the `odt`, `docx`, -`epub`, and `epub3` output formats). For output to a file, use the +`epub2`, and `epub3` output formats). For output to a file, use the `-o` option: pandoc -o output.html input.txt @@ -273,7 +273,7 @@ General options (original unextended Markdown), `markdown_phpextra` (PHP Markdown Extra), `markdown_github` (GitHub-Flavored Markdown), `markdown_mmd` (MultiMarkdown), `commonmark` (CommonMark Markdown), `rst` - (reStructuredText), `html` (XHTML), `html5` (HTML5), `latex` + (reStructuredText), `html4` (XHTML4), `html` or `html5` (HTML5), `latex` (LaTeX), `beamer` (LaTeX beamer slide show), `context` (ConTeXt), `man` (groff man), `mediawiki` (MediaWiki markup), `dokuwiki` (DokuWiki markup), `zimwiki` (ZimWiki markup), @@ -281,7 +281,7 @@ General options `texinfo` (GNU Texinfo), `opml` (OPML), `docbook` (DocBook 4), `docbook5` (DocBook 5), `opendocument` (OpenDocument), `odt` (OpenOffice text document), `docx` (Word docx), `haddock` - (Haddock markup), `rtf` (rich text format), `epub` (EPUB v2 + (Haddock markup), `rtf` (rich text format), `epub` or `epub2` (EPUB v2 book), `epub3` (EPUB v3), `fb2` (FictionBook2 e-book), `asciidoc` (AsciiDoc), `icml` (InDesign ICML), `tei` (TEI Simple), `slidy` (Slidy HTML and JavaScript slide show), @@ -293,7 +293,7 @@ General options `epub`, and `epub3` output will not be directed to *stdout*; an output filename must be specified using the `-o/--output` option. If `+lhs` is appended to `markdown`, `rst`, `latex`, - `beamer`, `html`, or `html5`, the output will be rendered as + `beamer`, `html4`, or `html5`, the output will be rendered as literate Haskell source: see [Literate Haskell support], below. Markdown syntax extensions can be individually enabled or disabled by appending `+EXTENSION` or @@ -626,7 +626,7 @@ Options affecting specific writers images, and videos. The resulting file should be "self-contained," in the sense that it needs no external files and no net access to be displayed properly by a browser. This option works only with HTML output - formats, including `html`, `html5`, `html+lhs`, `html5+lhs`, `s5`, + formats, including `html4`, `html5`, `html+lhs`, `html5+lhs`, `s5`, `slidy`, `slideous`, `dzslides`, and `revealjs`. Scripts, images, and stylesheets at absolute URLs will be downloaded; those at relative URLs will be sought relative to the working directory (if the first source @@ -947,10 +947,11 @@ Math rendering in HTML `--mathml`[`=`*URL*] -: Convert TeX math to [MathML] (in `docbook`, `docbook5`, `html` and `html5`). - In standalone `html` output, a small JavaScript (or a link to such a - script if a *URL* is supplied) will be inserted that allows the MathML to - be viewed on some browsers. +: Convert TeX math to [MathML] (in `docbook`, `docbook5`, + `html4` and `html5`). In standalone HTML output, a small + JavaScript (or a link to such a script if a *URL* is + supplied) will be inserted that allows the MathML to be + viewed on some browsers. `--jsmath`[`=`*URL*] @@ -1647,7 +1648,7 @@ Note, however, that this method of providing links to sections works only in HTML, LaTeX, and ConTeXt formats. If the `--section-divs` option is specified, then each section will -be wrapped in a `div` (or a `section`, if `--html5` was specified), +be wrapped in a `div` (or a `section`, if `html5` was specified), and the identifier will be attached to the enclosing `
` (or `
`) tag rather than the header itself. This allows entire sections to be manipulated using JavaScript or treated differently in @@ -3891,7 +3892,7 @@ Literate Haskell support If you append `+lhs` (or `+literate_haskell`) to an appropriate input or output format (`markdown`, `markdown_strict`, `rst`, or `latex` for input or output; -`beamer`, `html` or `html5` for output only), pandoc will treat the document as +`beamer`, `html4` or `html5` for output only), pandoc will treat the document as literate Haskell source. This means that - In Markdown input, "bird track" sections will be parsed as Haskell diff --git a/data/templates b/data/templates index 67d601119..335360e40 160000 --- a/data/templates +++ b/data/templates @@ -1 +1 @@ -Subproject commit 67d601119928f95c525dfb2c518ec61661f1e770 +Subproject commit 335360e40c5cd395b33954906144c834783b41fd diff --git a/pandoc.cabal b/pandoc.cabal index 97e70c830..341ab5a12 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -66,7 +66,7 @@ Data-Files: data/templates/default.haddock data/templates/default.textile data/templates/default.org - data/templates/default.epub + data/templates/default.epub2 data/templates/default.epub3 -- source files for reference.docx data/docx/[Content_Types].xml diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs index aa4cab840..449cab120 100644 --- a/src/Text/Pandoc.hs +++ b/src/Text/Pandoc.hs @@ -115,7 +115,8 @@ module Text.Pandoc , writeRTF , writeODT , writeDocx - , writeEPUB + , writeEPUB2 + , writeEPUB3 , writeFB2 , writeOrg , writeAsciiDoc @@ -278,10 +279,9 @@ writers = [ ,("json" , StringWriter $ \o d -> return $ writeJSON o d) ,("docx" , ByteStringWriter writeDocx) ,("odt" , ByteStringWriter writeODT) - ,("epub" , ByteStringWriter $ \o -> - writeEPUB o{ writerEpubVersion = Just EPUB2 }) - ,("epub3" , ByteStringWriter $ \o -> - writeEPUB o{ writerEpubVersion = Just EPUB3 }) + ,("epub" , ByteStringWriter writeEPUB2) + ,("epub2" , ByteStringWriter writeEPUB2) + ,("epub3" , ByteStringWriter writeEPUB3) ,("fb2" , StringWriter writeFB2) ,("html" , StringWriter writeHtml5String) ,("html4" , StringWriter writeHtml4String) @@ -349,6 +349,8 @@ getDefaultExtensions "epub" = extensionsFromList Ext_native_divs, Ext_native_spans, Ext_epub_html_exts] +getDefaultExtensions "epub2" = getDefaultExtensions "epub" +getDefaultExtensions "epub3" = getDefaultExtensions "epub" getDefaultExtensions "latex" = extensionsFromList [Ext_smart, Ext_auto_identifiers] diff --git a/src/Text/Pandoc/Templates.hs b/src/Text/Pandoc/Templates.hs index 03dc917e6..38d956f1f 100644 --- a/src/Text/Pandoc/Templates.hs +++ b/src/Text/Pandoc/Templates.hs @@ -61,6 +61,7 @@ getDefaultTemplate user writer = do "fb2" -> return $ Right "" "odt" -> getDefaultTemplate user "opendocument" "html" -> getDefaultTemplate user "html5" + "epub" -> getDefaultTemplate user "epub2" "markdown_strict" -> getDefaultTemplate user "markdown" "multimarkdown" -> getDefaultTemplate user "markdown" "markdown_github" -> getDefaultTemplate user "markdown" diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index bd95c170e..c2fc4422e 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -28,7 +28,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion of 'Pandoc' documents to EPUB. -} -module Text.Pandoc.Writers.EPUB ( writeEPUB ) where +module Text.Pandoc.Writers.EPUB ( writeEPUB2, writeEPUB3 ) where import qualified Data.Map as M import qualified Data.Set as Set import Data.Maybe ( fromMaybe, catMaybes ) @@ -75,8 +75,9 @@ import qualified Text.Pandoc.Class as P -- in filenames, chapter0003.xhtml. data Chapter = Chapter (Maybe [Int]) [Block] -data EPUBState = EPUBState { stMediaPaths :: [(FilePath, (FilePath, Maybe Entry))] - } +data EPUBState = EPUBState { + stMediaPaths :: [(FilePath, (FilePath, Maybe Entry))] + } type E m = StateT EPUBState m @@ -336,16 +337,32 @@ metadataFromMeta opts meta = EPUBMetadata{ Just "rtl" -> Just RTL _ -> Nothing +-- | Produce an EPUB2 file from a Pandoc document. +writeEPUB2 :: PandocMonad m + => WriterOptions -- ^ Writer options + -> Pandoc -- ^ Document to convert + -> m B.ByteString +writeEPUB2 = writeEPUB EPUB2 + +-- | Produce an EPUB3 file from a Pandoc document. +writeEPUB3 :: PandocMonad m + => WriterOptions -- ^ Writer options + -> Pandoc -- ^ Document to convert + -> m B.ByteString +writeEPUB3 = writeEPUB EPUB3 + -- | Produce an EPUB file from a Pandoc document. writeEPUB :: PandocMonad m - => WriterOptions -- ^ Writer options + => EPUBVersion + -> WriterOptions -- ^ Writer options -> Pandoc -- ^ Document to convert -> m B.ByteString -writeEPUB opts doc = +writeEPUB epubVersion opts doc = let initState = EPUBState { stMediaPaths = [] } in - evalStateT (pandocToEPUB opts doc) initState + evalStateT (pandocToEPUB opts{ writerEpubVersion = Just epubVersion } doc) + initState pandocToEPUB :: PandocMonad m => WriterOptions @@ -353,7 +370,7 @@ pandocToEPUB :: PandocMonad m -> E m B.ByteString pandocToEPUB opts doc@(Pandoc meta _) = do let version = fromMaybe EPUB2 (writerEpubVersion opts) - let epub3 = version == EPUB3 + let epub3 = writerEpubVersion opts == Just EPUB3 epochtime <- floor <$> lift P.getPOSIXTime let mkEntry path content = toEntry path epochtime content let vars = ("epub3", if epub3 then "true" else "false") -- cgit v1.2.3