diff options
Diffstat (limited to 'src/Text/Pandoc/Writers/EPUB.hs')
-rw-r--r-- | src/Text/Pandoc/Writers/EPUB.hs | 283 |
1 files changed, 283 insertions, 0 deletions
diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs new file mode 100644 index 000000000..deaa2fe33 --- /dev/null +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -0,0 +1,283 @@ +{- +Copyright (C) 2010 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Writers.EPUB + Copyright : Copyright (C) 2010 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Conversion of 'Pandoc' documents to EPUB. +-} +module Text.Pandoc.Writers.EPUB ( writeEPUB ) where +import Data.IORef +import Data.Maybe ( fromMaybe, isNothing ) +import Data.List ( findIndices, isPrefixOf ) +import System.Environment ( getEnv ) +import System.FilePath ( (</>), takeBaseName, takeExtension ) +import qualified Data.ByteString.Lazy as B +import Data.ByteString.Lazy.UTF8 ( fromString ) +import Codec.Archive.Zip +import System.Time +import Text.Pandoc.Shared hiding ( Element ) +import Text.Pandoc.Definition +import Control.Monad (liftM) +import Text.XML.Light hiding (ppTopElement) +import Text.Pandoc.UUID +import Text.Pandoc.Writers.HTML +import Text.Pandoc.Writers.Markdown ( writePlain ) +import Data.Char ( toLower ) + +-- | Produce an EPUB file from a Pandoc document. +writeEPUB :: Maybe String -- ^ EPUB stylesheet specified at command line + -> WriterOptions -- ^ Writer options + -> Pandoc -- ^ Document to convert + -> IO B.ByteString +writeEPUB mbStylesheet opts doc@(Pandoc meta _) = do + (TOD epochtime _) <- getClockTime + let mkEntry path content = toEntry path epochtime content + let opts' = opts{ writerEmailObfuscation = NoObfuscation + , writerStandalone = True + , writerWrapText = False } + let sourceDir = writerSourceDirectory opts' + + -- title page + let vars = writerVariables opts' + let tpContent = fromString $ writeHtmlString + opts'{writerTemplate = pageTemplate + ,writerVariables = ("titlepage","yes"):vars} + (Pandoc meta []) + let tpEntry = mkEntry "title_page.xhtml" tpContent + + -- handle pictures + picsRef <- newIORef [] + Pandoc _ blocks <- liftM (processWith transformBlock) $ processWithM + (transformInlines (writerHTMLMathMethod opts) sourceDir picsRef) doc + pics <- readIORef picsRef + let readPicEntry (oldsrc, newsrc) = readEntry [] oldsrc >>= \e -> + return e{ eRelativePath = newsrc } + picEntries <- mapM readPicEntry pics + + -- body pages + let isH1 (Header 1 _) = True + isH1 _ = False + let h1Indices = dropWhile (== 0) $ findIndices isH1 blocks + let chunks = splitByIndices h1Indices blocks + let titleize (Header 1 xs : ys) = Pandoc meta{docTitle = xs} ys + titleize xs = Pandoc meta xs + let chapToHtml = writeHtmlString opts'{ writerTemplate = pageTemplate + , writerHTMLMathMethod = PlainMath } + let chapters = map titleize chunks + let chapterToEntry :: Int -> Pandoc -> Entry + chapterToEntry num chap = mkEntry ("ch" ++ show num ++ ".xhtml") $ + fromString $ chapToHtml chap + let chapterEntries = zipWith chapterToEntry [1..] chapters + + -- contents.opf + lang <- catch (liftM (takeWhile (/='.')) $ getEnv "lang") + (\_ -> return "en-US") + uuid <- getRandomUUID + let chapterNode ent = unode "item" ! + [("id", takeBaseName $ eRelativePath ent), + ("href", eRelativePath ent), + ("media-type", "application/xhtml+xml")] $ () + let chapterRefNode ent = unode "itemref" ! + [("idref", takeBaseName $ eRelativePath ent)] $ () + let pictureNode ent = unode "item" ! + [("id", takeBaseName $ eRelativePath ent), + ("href", eRelativePath ent), + ("media-type", fromMaybe "application/octet-stream" + $ imageTypeOf $ eRelativePath ent)] $ () + let plainify t = removeTrailingSpace $ + writePlain opts'{ writerStandalone = False } $ + Pandoc meta [Plain t] + let plainTitle = plainify $ docTitle meta + let plainAuthors = map plainify $ docAuthors meta + let contentsData = fromString $ ppTopElement $ + unode "package" ! [("version","2.0") + ,("xmlns","http://www.idpf.org/2007/opf") + ,("unique-identifier","BookId")] $ + [ metadataElement (writerEPUBMetadata opts') + uuid lang plainTitle plainAuthors + , unode "manifest" $ + [ unode "item" ! [("id","ncx"), ("href","toc.ncx") + ,("media-type","application/x-dtbncx+xml")] $ () + , unode "item" ! [("id","style"), ("href","stylesheet.css") + ,("media-type","text/css")] $ () + ] ++ + map chapterNode (tpEntry : chapterEntries) ++ + map pictureNode picEntries + , unode "spine" ! [("toc","ncx")] $ + map chapterRefNode (tpEntry : chapterEntries) + ] + let contentsEntry = mkEntry "content.opf" contentsData + + -- toc.ncx + let navPointNode ent n tit = unode "navPoint" ! + [("id", "navPoint-" ++ show n) + ,("playOrder", show n)] $ + [ unode "navLabel" $ unode "text" tit + , unode "content" ! [("src", + eRelativePath ent)] $ () + ] + let tocData = fromString $ ppTopElement $ + unode "ncx" ! [("version","2005-1") + ,("xmlns","http://www.daisy.org/z3986/2005/ncx/")] $ + [ unode "head" + [ unode "meta" ! [("name","dtb:uid") + ,("content", show uuid)] $ () + , unode "meta" ! [("name","dtb:depth") + ,("content", "1")] $ () + , unode "meta" ! [("name","dtb:totalPageCount") + ,("content", "0")] $ () + , unode "meta" ! [("name","dtb:maxPageNumber") + ,("content", "0")] $ () + ] + , unode "docTitle" $ unode "text" $ plainTitle + , unode "navMap" $ zipWith3 navPointNode (tpEntry : chapterEntries) + [1..(length chapterEntries + 1)] + ("Title Page" : map (\(Pandoc m _) -> + plainify $ docTitle m) chapters) + ] + let tocEntry = mkEntry "toc.ncx" tocData + + -- mimetype + let mimetypeEntry = mkEntry "mimetype" $ fromString "application/epub+zip" + + -- container.xml + let containerData = fromString $ ppTopElement $ + unode "container" ! [("version","1.0") + ,("xmlns","urn:oasis:names:tc:opendocument:xmlns:container")] $ + unode "rootfiles" $ + unode "rootfile" ! [("full-path","content.opf") + ,("media-type","application/oebps-package+xml")] $ () + let containerEntry = mkEntry "META-INF/container.xml" containerData + + -- stylesheet + stylesheet <- case mbStylesheet of + Just s -> return s + Nothing -> readDataFile (writerUserDataDir opts) "epub.css" + let stylesheetEntry = mkEntry "stylesheet.css" $ fromString stylesheet + + -- construct archive + let archive = foldr addEntryToArchive emptyArchive + (mimetypeEntry : containerEntry : stylesheetEntry : tpEntry : + contentsEntry : tocEntry : (picEntries ++ chapterEntries) ) + return $ fromArchive archive + +metadataElement :: String -> UUID -> String -> String -> [String] -> Element +metadataElement metadataXML uuid lang title authors = + let userNodes = parseXML metadataXML + elt = unode "metadata" ! [("xmlns:dc","http://purl.org/dc/elements/1.1/") + ,("xmlns:opf","http://www.idpf.org/2007/opf")] $ + filter isDublinCoreElement $ onlyElems userNodes + dublinElements = ["contributor","coverage","creator","date", + "description","format","identifier","language","publisher", + "relation","rights","source","subject","title","type"] + isDublinCoreElement e = qPrefix (elName e) == Just "dc" && + qName (elName e) `elem` dublinElements + contains e n = not (null (findElements (QName n Nothing (Just "dc")) e)) + newNodes = [ unode "dc:title" title | not (elt `contains` "title") ] ++ + [ unode "dc:language" lang | not (elt `contains` "language") ] ++ + [ unode "dc:identifier" ! [("id","BookId")] $ show uuid | + not (elt `contains` "identifier") ] ++ + [ unode "dc:creator" ! [("opf:role","aut")] $ a | a <- authors ] + in elt{ elContent = elContent elt ++ map Elem newNodes } + +transformInlines :: HTMLMathMethod + -> FilePath + -> IORef [(FilePath, FilePath)] -- ^ (oldpath, newpath) images + -> [Inline] + -> IO [Inline] +transformInlines _ _ _ (Image lab (src,_) : xs) | isNothing (imageTypeOf src) = + return $ Emph lab : xs +transformInlines _ sourceDir picsRef (Image lab (src,tit) : xs) = do + pics <- readIORef picsRef + let oldsrc = sourceDir </> src + let ext = takeExtension src + newsrc <- case lookup oldsrc pics of + Just n -> return n + Nothing -> do + let new = "images/img" ++ show (length pics) ++ ext + modifyIORef picsRef ( (oldsrc, new): ) + return new + return $ Image lab (newsrc, tit) : xs +transformInlines (MathML _) _ _ (x@(Math _ _) : xs) = do + let writeHtmlInline opts z = removeTrailingSpace $ + writeHtmlString opts $ Pandoc (Meta [] [] []) [Plain [z]] + mathml = writeHtmlInline defaultWriterOptions{ + writerHTMLMathMethod = MathML Nothing } x + fallback = writeHtmlInline defaultWriterOptions{ + writerHTMLMathMethod = PlainMath } x + inOps = "<ops:switch xmlns:ops=\"http://www.idpf.org/2007/ops\">" ++ + "<ops:case required-namespace=\"http://www.w3.org/1998/Math/MathML\">" ++ + mathml ++ "</ops:case><ops:default>" ++ fallback ++ "</ops:default>" ++ + "</ops:switch>" + result = if "<math" `isPrefixOf` mathml then inOps else mathml + return $ HtmlInline result : xs +transformInlines _ _ _ (HtmlInline _ : xs) = return $ Str "" : xs +transformInlines _ _ _ (Link lab (_,_) : xs) = return $ lab ++ xs +transformInlines _ _ _ xs = return xs + +transformBlock :: Block -> Block +transformBlock (RawHtml _) = Null +transformBlock x = x + +(!) :: Node t => (t -> Element) -> [(String, String)] -> t -> Element +(!) f attrs n = add_attrs (map (\(k,v) -> Attr (unqual k) v) attrs) (f n) + +-- | Version of 'ppTopElement' that specifies UTF-8 encoding. +ppTopElement :: Element -> String +ppTopElement = ("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" ++) . ppElement + +imageTypeOf :: FilePath -> Maybe String +imageTypeOf x = case drop 1 (map toLower (takeExtension x)) of + "jpg" -> Just "image/jpeg" + "jpeg" -> Just "image/jpeg" + "jfif" -> Just "image/jpeg" + "png" -> Just "image/png" + "gif" -> Just "image/gif" + "svg" -> Just "image/svg+xml" + _ -> Nothing + +pageTemplate :: String +pageTemplate = unlines + [ "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" + , "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.1//EN\" \"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd\">" + , "<html xmlns=\"http://www.w3.org/1999/xhtml\">" + , "<head>" + , "<title>$title$</title>" + , "<link href=\"stylesheet.css\" type=\"text/css\" rel=\"stylesheet\" />" + , "</head>" + , "<body>" + , "$if(titlepage)$" + , "<h1 class=\"title\">$title$</h1>" + , "$for(author)$" + , "<h2 class=\"author\">$author$</h2>" + , "$endfor$" + , "$else$" + , "<h1>$title$</h1>" + , "$body$" + , "$endif$" + , "</body>" + , "</html>" + ] + |