aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Writers/EPUB.hs
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc/Writers/EPUB.hs')
-rw-r--r--src/Text/Pandoc/Writers/EPUB.hs123
1 files changed, 75 insertions, 48 deletions
diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
index b6687c330..2aab7701f 100644
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -1,4 +1,4 @@
-{-# LANGUAGE PatternGuards, CPP, ScopedTypeVariables #-}
+{-# LANGUAGE PatternGuards, CPP, ScopedTypeVariables, ViewPatterns #-}
{-
Copyright (C) 2010-2014 John MacFarlane <jgm@berkeley.edu>
@@ -29,10 +29,10 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Conversion of 'Pandoc' documents to EPUB.
-}
module Text.Pandoc.Writers.EPUB ( writeEPUB ) where
-import Data.IORef
+import Data.IORef ( IORef, newIORef, readIORef, modifyIORef )
import qualified Data.Map as M
import Data.Maybe ( fromMaybe )
-import Data.List ( isInfixOf, intercalate )
+import Data.List ( isPrefixOf, isInfixOf, intercalate )
import System.Environment ( getEnv )
import Text.Printf (printf)
import System.FilePath ( (</>), takeExtension, takeFileName )
@@ -40,28 +40,35 @@ import qualified Data.ByteString.Lazy as B
import qualified Data.ByteString.Lazy.Char8 as B8
import qualified Text.Pandoc.UTF8 as UTF8
import Text.Pandoc.SelfContained ( makeSelfContained )
-import Codec.Archive.Zip
+import Codec.Archive.Zip ( emptyArchive, addEntryToArchive, eRelativePath, fromEntry , Entry, toEntry, fromArchive)
import Control.Applicative ((<$>))
-import Data.Time.Clock.POSIX
-import Data.Time
-import System.Locale
-import Text.Pandoc.Shared hiding ( Element )
-import qualified Text.Pandoc.Shared as Shared
+import Data.Time.Clock.POSIX ( getPOSIXTime )
+import Data.Time (getCurrentTime,UTCTime, formatTime)
+import System.Locale ( defaultTimeLocale )
+import Text.Pandoc.Shared ( trimr, renderTags', safeRead, uniqueIdent, trim
+ , normalizeDate, readDataFile, stringify, warn
+ , hierarchicalize, fetchItem' )
+import qualified Text.Pandoc.Shared as S (Element(..))
import Text.Pandoc.Builder (fromList, setMeta)
-import Text.Pandoc.Options
+import Text.Pandoc.Options ( WriterOptions(..)
+ , HTMLMathMethod(..)
+ , EPUBVersion(..)
+ , ObfuscationMethod(NoObfuscation) )
import Text.Pandoc.Definition
-import Text.Pandoc.Walk
-import Control.Monad.State
-import Text.XML.Light hiding (ppTopElement)
-import Text.Pandoc.UUID
-import Text.Pandoc.Writers.HTML
-import Text.Pandoc.Writers.Markdown ( writePlain )
+import Text.Pandoc.Walk (walk, walkM)
+import Control.Monad.State (modify, get, execState, State, put, evalState)
+import Control.Monad (foldM, when, mplus, liftM)
+import Text.XML.Light ( unode, Element(..), unqual, Attr(..), add_attrs
+ , strContent, lookupAttr, Node(..), QName(..), parseXML
+ , onlyElems, node, ppElement)
+import Text.Pandoc.UUID (getRandomUUID)
+import Text.Pandoc.Writers.HTML (writeHtmlString, writeHtml)
import Data.Char ( toLower, isDigit, isAlphaNum )
import Network.URI ( unEscapeString )
import Text.Pandoc.MIME (getMimeType)
import qualified Control.Exception as E
import Text.Blaze.Html.Renderer.Utf8 (renderHtml)
-import Text.HTML.TagSoup
+import Text.HTML.TagSoup (Tag(TagOpen), fromAttrib, parseTags)
-- A Chapter includes a list of blocks and maybe a section
-- number offset. Note, some chapters are unnumbered. The section
@@ -87,6 +94,7 @@ data EPUBMetadata = EPUBMetadata{
, epubRights :: Maybe String
, epubCoverImage :: Maybe String
, epubStylesheet :: Maybe Stylesheet
+ , epubPageDirection :: ProgressionDirection
} deriving Show
data Stylesheet = StylesheetPath FilePath
@@ -115,6 +123,8 @@ data Title = Title{
, titleType :: Maybe String
} deriving Show
+data ProgressionDirection = LTR | RTL | Default deriving Show
+
dcName :: String -> QName
dcName n = QName n Nothing (Just "dc")
@@ -124,20 +134,15 @@ dcNode = node . dcName
opfName :: String -> QName
opfName n = QName n Nothing (Just "opf")
-plainify :: [Inline] -> String
-plainify t =
- trimr $ writePlain def{ writerStandalone = False }
- $ Pandoc nullMeta [Plain $ walk removeNote t]
-
-removeNote :: Inline -> Inline
-removeNote (Note _) = Str ""
-removeNote x = x
-
toId :: FilePath -> String
toId = map (\x -> if isAlphaNum x || x == '-' || x == '_'
then x
else '_') . takeFileName
+removeNote :: Inline -> Inline
+removeNote (Note _) = Str ""
+removeNote x = x
+
getEPUBMetadata :: WriterOptions -> Meta -> IO EPUBMetadata
getEPUBMetadata opts meta = do
let md = metadataFromMeta opts meta
@@ -172,7 +177,7 @@ getEPUBMetadata opts meta = do
if any (\c -> creatorRole c == Just "aut") $ epubCreator m
then return m
else do
- let authors' = map plainify $ docAuthors meta
+ let authors' = map stringify $ docAuthors meta
let toAuthor name = Creator{ creatorText = name
, creatorRole = Just "aut"
, creatorFileAs = Nothing }
@@ -221,8 +226,8 @@ addMetadataFromXML _ md = md
metaValueToString :: MetaValue -> String
metaValueToString (MetaString s) = s
-metaValueToString (MetaInlines ils) = plainify ils
-metaValueToString (MetaBlocks bs) = plainify $ query (:[]) bs
+metaValueToString (MetaInlines ils) = stringify ils
+metaValueToString (MetaBlocks bs) = stringify bs
metaValueToString (MetaBool b) = show b
metaValueToString _ = ""
@@ -294,6 +299,7 @@ metadataFromMeta opts meta = EPUBMetadata{
, epubRights = rights
, epubCoverImage = coverImage
, epubStylesheet = stylesheet
+ , epubPageDirection = pageDirection
}
where identifiers = getIdentifier meta
titles = getTitle meta
@@ -316,6 +322,14 @@ metadataFromMeta opts meta = EPUBMetadata{
stylesheet = (StylesheetContents <$> writerEpubStylesheet opts) `mplus`
((StylesheetPath . metaValueToString) <$>
lookupMeta "stylesheet" meta)
+ pageDirection = maybe Default stringToPageDirection
+ (lookupMeta "page-progression-direction" meta)
+ stringToPageDirection (metaValueToString -> s) =
+ case s of
+ "ltr" -> LTR
+ "rtl" -> RTL
+ _ -> Default
+
-- | Produce an EPUB file from a Pandoc document.
writeEPUB :: WriterOptions -- ^ Writer options
@@ -339,7 +353,7 @@ writeEPUB opts doc@(Pandoc meta _) = do
if epub3
then MathML Nothing
else writerHTMLMathMethod opts
- , writerWrapText = False }
+ , writerWrapText = True }
metadata <- getEPUBMetadata opts' meta
-- cover page
@@ -366,7 +380,8 @@ writeEPUB opts doc@(Pandoc meta _) = do
walkM (transformBlock opts' mediaRef)
pics <- readIORef mediaRef
let readPicEntry entries (oldsrc, newsrc) = do
- res <- fetchItem (writerSourceURL opts') oldsrc
+ res <- fetchItem' (writerMediaBag opts')
+ (writerSourceURL opts') oldsrc
case res of
Left _ -> do
warn $ "Could not find media `" ++ oldsrc ++ "', skipping..."
@@ -379,6 +394,12 @@ writeEPUB opts doc@(Pandoc meta _) = do
let mkFontEntry f = mkEntry (takeFileName f) `fmap` B.readFile f
fontEntries <- mapM mkFontEntry $ writerEpubFonts opts'
+ -- set page progression direction
+ let progressionDirection = case epubPageDirection metadata of
+ LTR -> "ltr"
+ RTL -> "rtl"
+ Default -> "default"
+
-- body pages
-- add level 1 header to beginning if none there
@@ -468,7 +489,7 @@ writeEPUB opts doc@(Pandoc meta _) = do
[] -> case epubTitle metadata of
[] -> "UNTITLED"
(x:_) -> titleText x
- x -> plainify x
+ x -> stringify x
let uuid = case epubIdentifier metadata of
(x:_) -> identifierText x -- use first identifier as UUID
[] -> error "epubIdentifier is null" -- shouldn't happen
@@ -498,7 +519,8 @@ writeEPUB opts doc@(Pandoc meta _) = do
(pictureNode x)]) ++
map pictureNode picEntries ++
map fontNode fontEntries
- , unode "spine" ! [("toc","ncx")] $
+ , unode "spine" ! [("toc","ncx")
+ ,("page-progression-direction", progressionDirection)] $
case epubCoverImage metadata of
Nothing -> []
Just _ -> [ unode "itemref" !
@@ -529,25 +551,25 @@ writeEPUB opts doc@(Pandoc meta _) = do
let tocLevel = writerTOCDepth opts
let navPointNode :: (Int -> String -> String -> [Element] -> Element)
- -> Shared.Element -> State Int Element
- navPointNode formatter (Sec _ nums (ident,_,_) ils children) = do
+ -> S.Element -> State Int Element
+ navPointNode formatter (S.Sec _ nums (ident,_,_) ils children) = do
n <- get
modify (+1)
let showNums :: [Int] -> String
showNums = intercalate "." . map show
- let tit' = plainify ils
+ let tit' = stringify ils
let tit = if writerNumberSections opts && not (null nums)
then showNums nums ++ " " ++ tit'
else tit'
let src = case lookup ident reftable of
Just x -> x
Nothing -> error (ident ++ " not found in reftable")
- let isSec (Sec lev _ _ _ _) = lev <= tocLevel
+ let isSec (S.Sec lev _ _ _ _) = lev <= tocLevel
isSec _ = False
let subsecs = filter isSec children
subs <- mapM (navPointNode formatter) subsecs
return $ formatter n tit src subs
- navPointNode _ (Blk _) = error "navPointNode encountered Blk"
+ navPointNode _ (S.Blk _) = error "navPointNode encountered Blk"
let navMapFormatter :: Int -> String -> String -> [Element] -> Element
navMapFormatter n tit src subs = unode "navPoint" !
@@ -558,7 +580,7 @@ writeEPUB opts doc@(Pandoc meta _) = do
] ++ subs
let tpNode = unode "navPoint" ! [("id", "navPoint-0")] $
- [ unode "navLabel" $ unode "text" (plainify $ docTitle meta)
+ [ unode "navLabel" $ unode "text" (stringify $ docTitle meta)
, unode "content" ! [("src","title_page.xhtml")] $ () ]
let tocData = UTF8.fromStringLazy $ ppTopElement $
@@ -587,7 +609,7 @@ writeEPUB opts doc@(Pandoc meta _) = do
navXhtmlFormatter n tit src subs = unode "li" !
[("id", "toc-li-" ++ show n)] $
(unode "a" ! [("href",src)]
- $ (unode "span" tit))
+ $ tit)
: case subs of
[] -> []
(_:_) -> [unode "ol" ! [("class","toc")] $ subs]
@@ -751,7 +773,7 @@ transformTag :: WriterOptions
-> Tag String
-> IO (Tag String)
transformTag opts mediaRef tag@(TagOpen name attr)
- | name == "video" || name == "source" || name == "img" = do
+ | name `elem` ["video", "source", "img", "audio"] = do
let src = fromAttrib "src" tag
let poster = fromAttrib "poster" tag
let oldsrc = maybe src (</> src) $ writerSourceURL opts
@@ -784,7 +806,7 @@ transformBlock opts mediaRef (RawBlock fmt raw)
| fmt == Format "html" = do
let tags = parseTags raw
tags' <- mapM (transformTag opts mediaRef) tags
- return $ RawBlock fmt (renderTags tags')
+ return $ RawBlock fmt (renderTags' tags')
transformBlock _ _ b = return b
transformInline :: WriterOptions
@@ -798,8 +820,13 @@ transformInline opts mediaRef (Image lab (src,tit)) = do
return $ Image lab (newsrc, tit)
transformInline opts _ (x@(Math _ _))
| WebTeX _ <- writerHTMLMathMethod opts = do
- raw <- makeSelfContained Nothing $ writeHtmlInline opts x
+ raw <- makeSelfContained opts $ writeHtmlInline opts x
return $ RawInline (Format "html") raw
+transformInline opts mediaRef (RawInline fmt raw)
+ | fmt == Format "html" = do
+ let tags = parseTags raw
+ tags' <- mapM (transformTag opts mediaRef) tags
+ return $ RawInline fmt (renderTags' tags')
transformInline _ _ x = return x
writeHtmlInline :: WriterOptions -> Inline -> String
@@ -825,11 +852,11 @@ ppTopElement = ("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" ++) . unEntity .
unEntity (x:xs) = x : unEntity xs
mediaTypeOf :: FilePath -> Maybe String
-mediaTypeOf x = case getMimeType x of
- Just y@('i':'m':'a':'g':'e':_) -> Just y
- Just y@('v':'i':'d':'e':'o':_) -> Just y
- Just y@('a':'u':'d':'i':'o':_) -> Just y
- _ -> Nothing
+mediaTypeOf x =
+ let mediaPrefixes = ["image", "video", "audio"] in
+ case getMimeType x of
+ Just y | any (`isPrefixOf` y) mediaPrefixes -> Just y
+ _ -> Nothing
data IdentState = IdentState{
chapterNumber :: Int,