1 files changed, 283 insertions, 0 deletions
diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
new file mode 100644
index 000000000..deaa2fe33
--- /dev/null
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -0,0 +1,283 @@
+{-
+Copyright (C) 2010 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+-}
+
+{- |
+   Module      : Text.Pandoc.Writers.EPUB
+   Copyright   : Copyright (C) 2010 John MacFarlane
+   License     : GNU GPL, version 2 or above
+
+   Maintainer  : John MacFarlane <jgm@berkeley.edu>
+   Stability   : alpha
+   Portability : portable
+
+Conversion of 'Pandoc' documents to EPUB.
+-}
+module Text.Pandoc.Writers.EPUB ( writeEPUB ) where
+import Data.IORef
+import Data.Maybe ( fromMaybe, isNothing )
+import Data.List ( findIndices, isPrefixOf )
+import System.Environment ( getEnv )
+import System.FilePath ( (</>), takeBaseName, takeExtension )
+import qualified Data.ByteString.Lazy as B
+import Data.ByteString.Lazy.UTF8 ( fromString )
+import Codec.Archive.Zip
+import System.Time
+import Text.Pandoc.Shared hiding ( Element )
+import Text.Pandoc.Definition
+import Control.Monad (liftM)
+import Text.XML.Light hiding (ppTopElement)
+import Text.Pandoc.UUID
+import Text.Pandoc.Writers.HTML
+import Text.Pandoc.Writers.Markdown ( writePlain )
+import Data.Char ( toLower )
+
+-- | Produce an EPUB file from a Pandoc document.
+writeEPUB :: Maybe String   -- ^ EPUB stylesheet specified at command line
+          -> WriterOptions  -- ^ Writer options
+          -> Pandoc         -- ^ Document to convert
+          -> IO B.ByteString
+writeEPUB mbStylesheet opts doc@(Pandoc meta _) = do
+  (TOD epochtime _) <- getClockTime
+  let mkEntry path content = toEntry path epochtime content
+  let opts' = opts{ writerEmailObfuscation = NoObfuscation
+                  , writerStandalone = True
+                  , writerWrapText = False }
+  let sourceDir = writerSourceDirectory opts'
+
+  -- title page
+  let vars = writerVariables opts'
+  let tpContent = fromString $ writeHtmlString
+                     opts'{writerTemplate = pageTemplate
+                          ,writerVariables = ("titlepage","yes"):vars}
+                     (Pandoc meta [])
+  let tpEntry = mkEntry "title_page.xhtml" tpContent
+
+  -- handle pictures
+  picsRef <- newIORef []
+  Pandoc _ blocks <- liftM (processWith transformBlock) $ processWithM
+       (transformInlines (writerHTMLMathMethod opts) sourceDir picsRef) doc
+  pics <- readIORef picsRef
+  let readPicEntry (oldsrc, newsrc) = readEntry [] oldsrc >>= \e ->
+                                          return e{ eRelativePath = newsrc }
+  picEntries <- mapM readPicEntry pics
+
+  -- body pages
+  let isH1 (Header 1 _) = True
+      isH1 _            = False
+  let h1Indices = dropWhile (== 0) $ findIndices isH1 blocks
+  let chunks = splitByIndices h1Indices blocks
+  let titleize (Header 1 xs : ys) = Pandoc meta{docTitle = xs} ys
+      titleize xs                 = Pandoc meta xs
+  let chapToHtml = writeHtmlString opts'{ writerTemplate = pageTemplate
+                                        , writerHTMLMathMethod = PlainMath }
+  let chapters = map titleize chunks
+  let chapterToEntry :: Int -> Pandoc -> Entry
+      chapterToEntry num chap = mkEntry ("ch" ++ show num ++ ".xhtml") $
+                                   fromString $ chapToHtml chap
+  let chapterEntries = zipWith chapterToEntry [1..] chapters
+
+  -- contents.opf
+  lang <- catch (liftM (takeWhile (/='.')) $ getEnv "lang")
+                (\_ -> return "en-US")
+  uuid <- getRandomUUID
+  let chapterNode ent = unode "item" !
+                           [("id", takeBaseName $ eRelativePath ent),
+                            ("href", eRelativePath ent),
+                            ("media-type", "application/xhtml+xml")] $ ()
+  let chapterRefNode ent = unode "itemref" !
+                             [("idref", takeBaseName $ eRelativePath ent)] $ ()
+  let pictureNode ent = unode "item" !
+                           [("id", takeBaseName $ eRelativePath ent),
+                            ("href", eRelativePath ent),
+                            ("media-type", fromMaybe "application/octet-stream"
+                               $ imageTypeOf $ eRelativePath ent)] $ ()
+  let plainify t = removeTrailingSpace $
+                    writePlain opts'{ writerStandalone = False } $
+                    Pandoc meta [Plain t]
+  let plainTitle = plainify $ docTitle meta
+  let plainAuthors = map plainify $ docAuthors meta
+  let contentsData = fromString $ ppTopElement $
+        unode "package" ! [("version","2.0")
+                          ,("xmlns","http://www.idpf.org/2007/opf")
+                          ,("unique-identifier","BookId")] $
+          [ metadataElement (writerEPUBMetadata opts')
+              uuid lang plainTitle plainAuthors
+          , unode "manifest" $
+             [ unode "item" ! [("id","ncx"), ("href","toc.ncx")
+                              ,("media-type","application/x-dtbncx+xml")] $ ()
+             , unode "item" ! [("id","style"), ("href","stylesheet.css")
+                              ,("media-type","text/css")] $ ()
+             ] ++
+             map chapterNode (tpEntry : chapterEntries) ++
+             map pictureNode picEntries
+          , unode "spine" ! [("toc","ncx")] $
+              map chapterRefNode (tpEntry : chapterEntries)
+          ]
+  let contentsEntry = mkEntry "content.opf" contentsData
+
+  -- toc.ncx
+  let navPointNode ent n tit = unode "navPoint" !
+                                [("id", "navPoint-" ++ show n)
+                                ,("playOrder", show n)] $
+                                   [ unode "navLabel" $ unode "text" tit
+                                   , unode "content" ! [("src",
+                                        eRelativePath ent)] $ ()
+                                   ]
+  let tocData = fromString $ ppTopElement $
+        unode "ncx" ! [("version","2005-1")
+                       ,("xmlns","http://www.daisy.org/z3986/2005/ncx/")] $
+          [ unode "head"
+             [ unode "meta" ! [("name","dtb:uid")
+                              ,("content", show uuid)] $ ()
+             , unode "meta" ! [("name","dtb:depth")
+                              ,("content", "1")] $ ()
+             , unode "meta" ! [("name","dtb:totalPageCount")
+                              ,("content", "0")] $ ()
+             , unode "meta" ! [("name","dtb:maxPageNumber")
+                              ,("content", "0")] $ ()
+             ] 
+          , unode "docTitle" $ unode "text" $ plainTitle
+          , unode "navMap" $ zipWith3 navPointNode (tpEntry : chapterEntries)
+                                [1..(length chapterEntries + 1)]
+                                ("Title Page" : map (\(Pandoc m _) ->
+                                   plainify $ docTitle m) chapters)
+          ]
+  let tocEntry = mkEntry "toc.ncx" tocData
+
+  -- mimetype
+  let mimetypeEntry = mkEntry "mimetype" $ fromString "application/epub+zip"
+
+  -- container.xml
+  let containerData = fromString $ ppTopElement $
+       unode "container" ! [("version","1.0")
+              ,("xmlns","urn:oasis:names:tc:opendocument:xmlns:container")] $
+         unode "rootfiles" $
+           unode "rootfile" ! [("full-path","content.opf")
+               ,("media-type","application/oebps-package+xml")] $ ()
+  let containerEntry = mkEntry "META-INF/container.xml" containerData
+
+  -- stylesheet
+  stylesheet <- case mbStylesheet of
+                   Just s  -> return s
+                   Nothing -> readDataFile (writerUserDataDir opts) "epub.css"
+  let stylesheetEntry = mkEntry "stylesheet.css" $ fromString stylesheet
+
+  -- construct archive
+  let archive = foldr addEntryToArchive emptyArchive
+                 (mimetypeEntry : containerEntry : stylesheetEntry : tpEntry :
+                  contentsEntry : tocEntry : (picEntries ++ chapterEntries) )
+  return $ fromArchive archive
+
+metadataElement :: String -> UUID -> String -> String -> [String] -> Element
+metadataElement metadataXML uuid lang title authors =
+  let userNodes = parseXML metadataXML
+      elt = unode "metadata" ! [("xmlns:dc","http://purl.org/dc/elements/1.1/")
+                               ,("xmlns:opf","http://www.idpf.org/2007/opf")] $
+            filter isDublinCoreElement $ onlyElems userNodes
+      dublinElements = ["contributor","coverage","creator","date",
+            "description","format","identifier","language","publisher",
+            "relation","rights","source","subject","title","type"]
+      isDublinCoreElement e = qPrefix (elName e) == Just "dc" &&
+                              qName (elName e) `elem` dublinElements
+      contains e n = not (null (findElements (QName n Nothing (Just "dc")) e))
+      newNodes = [ unode "dc:title" title | not (elt `contains` "title") ] ++
+           [ unode "dc:language" lang | not (elt `contains` "language") ] ++
+           [ unode "dc:identifier" ! [("id","BookId")] $ show uuid |
+               not (elt `contains` "identifier") ] ++
+           [ unode "dc:creator" ! [("opf:role","aut")] $ a | a <- authors ]
+  in  elt{ elContent = elContent elt ++ map Elem newNodes }
+
+transformInlines :: HTMLMathMethod
+                 -> FilePath
+                 -> IORef [(FilePath, FilePath)] -- ^ (oldpath, newpath) images
+                 -> [Inline]
+                 -> IO [Inline]
+transformInlines _ _ _ (Image lab (src,_) : xs) | isNothing (imageTypeOf src) =
+  return $ Emph lab : xs
+transformInlines _ sourceDir picsRef (Image lab (src,tit) : xs) = do
+  pics <- readIORef picsRef
+  let oldsrc = sourceDir </> src
+  let ext = takeExtension src
+  newsrc <- case lookup oldsrc pics of
+                  Just n  -> return n
+                  Nothing -> do
+                        let new = "images/img" ++ show (length pics) ++ ext
+                        modifyIORef picsRef ( (oldsrc, new): )
+                        return new
+  return $ Image lab (newsrc, tit) : xs
+transformInlines (MathML _) _ _ (x@(Math _ _) : xs) = do
+  let writeHtmlInline opts z = removeTrailingSpace $
+         writeHtmlString opts $ Pandoc (Meta [] [] []) [Plain [z]]
+      mathml = writeHtmlInline defaultWriterOptions{
+                 writerHTMLMathMethod = MathML Nothing } x
+      fallback = writeHtmlInline defaultWriterOptions{
+                 writerHTMLMathMethod = PlainMath } x
+      inOps = "<ops:switch xmlns:ops=\"http://www.idpf.org/2007/ops\">" ++
+       "<ops:case required-namespace=\"http://www.w3.org/1998/Math/MathML\">" ++
+       mathml ++ "</ops:case><ops:default>" ++ fallback ++ "</ops:default>" ++
+       "</ops:switch>"
+      result = if "<math" `isPrefixOf` mathml then inOps else mathml
+  return $ HtmlInline result : xs
+transformInlines _ _ _ (HtmlInline _ : xs) = return $ Str "" : xs
+transformInlines _ _ _ (Link lab (_,_) : xs) = return $ lab ++ xs
+transformInlines _ _ _ xs = return xs
+
+transformBlock :: Block -> Block
+transformBlock (RawHtml _) = Null
+transformBlock x = x
+
+(!) :: Node t => (t -> Element) -> [(String, String)] -> t -> Element
+(!) f attrs n = add_attrs (map (\(k,v) -> Attr (unqual k) v) attrs) (f n)
+
+-- | Version of 'ppTopElement' that specifies UTF-8 encoding.
+ppTopElement :: Element -> String
+ppTopElement = ("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" ++) . ppElement
+
+imageTypeOf :: FilePath -> Maybe String
+imageTypeOf x = case drop 1 (map toLower (takeExtension x)) of
+                     "jpg"       -> Just "image/jpeg"
+                     "jpeg"      -> Just "image/jpeg"
+                     "jfif"      -> Just "image/jpeg"
+                     "png"       -> Just "image/png"
+                     "gif"       -> Just "image/gif"
+                     "svg"       -> Just "image/svg+xml"
+                     _           -> Nothing
+
+pageTemplate :: String
+pageTemplate = unlines
+ [ "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"
+ , "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.1//EN\" \"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd\">"
+ , "<html xmlns=\"http://www.w3.org/1999/xhtml\">"
+ , "<head>"
+ , "<title>$title$</title>"
+ , "<link href=\"stylesheet.css\" type=\"text/css\" rel=\"stylesheet\" />"
+ , "</head>"
+ , "<body>"
+ , "$if(titlepage)$"
+ , "<h1 class=\"title\">$title$</h1>"
+ , "$for(author)$"
+ , "<h2 class=\"author\">$author$</h2>"
+ , "$endfor$"
+ , "$else$"
+ , "<h1>$title$</h1>"
+ , "$body$"
+ , "$endif$"
+ , "</body>"
+ , "</html>"
+ ]
+