diff options
Diffstat (limited to 'src/Text/Pandoc/Writers/EPUB.hs')
-rw-r--r-- | src/Text/Pandoc/Writers/EPUB.hs | 123 |
1 files changed, 75 insertions, 48 deletions
diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index b6687c330..2aab7701f 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE PatternGuards, CPP, ScopedTypeVariables #-} +{-# LANGUAGE PatternGuards, CPP, ScopedTypeVariables, ViewPatterns #-} {- Copyright (C) 2010-2014 John MacFarlane <jgm@berkeley.edu> @@ -29,10 +29,10 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion of 'Pandoc' documents to EPUB. -} module Text.Pandoc.Writers.EPUB ( writeEPUB ) where -import Data.IORef +import Data.IORef ( IORef, newIORef, readIORef, modifyIORef ) import qualified Data.Map as M import Data.Maybe ( fromMaybe ) -import Data.List ( isInfixOf, intercalate ) +import Data.List ( isPrefixOf, isInfixOf, intercalate ) import System.Environment ( getEnv ) import Text.Printf (printf) import System.FilePath ( (</>), takeExtension, takeFileName ) @@ -40,28 +40,35 @@ import qualified Data.ByteString.Lazy as B import qualified Data.ByteString.Lazy.Char8 as B8 import qualified Text.Pandoc.UTF8 as UTF8 import Text.Pandoc.SelfContained ( makeSelfContained ) -import Codec.Archive.Zip +import Codec.Archive.Zip ( emptyArchive, addEntryToArchive, eRelativePath, fromEntry , Entry, toEntry, fromArchive) import Control.Applicative ((<$>)) -import Data.Time.Clock.POSIX -import Data.Time -import System.Locale -import Text.Pandoc.Shared hiding ( Element ) -import qualified Text.Pandoc.Shared as Shared +import Data.Time.Clock.POSIX ( getPOSIXTime ) +import Data.Time (getCurrentTime,UTCTime, formatTime) +import System.Locale ( defaultTimeLocale ) +import Text.Pandoc.Shared ( trimr, renderTags', safeRead, uniqueIdent, trim + , normalizeDate, readDataFile, stringify, warn + , hierarchicalize, fetchItem' ) +import qualified Text.Pandoc.Shared as S (Element(..)) import Text.Pandoc.Builder (fromList, setMeta) -import Text.Pandoc.Options +import Text.Pandoc.Options ( WriterOptions(..) + , HTMLMathMethod(..) + , EPUBVersion(..) + , ObfuscationMethod(NoObfuscation) ) import Text.Pandoc.Definition -import Text.Pandoc.Walk -import Control.Monad.State -import Text.XML.Light hiding (ppTopElement) -import Text.Pandoc.UUID -import Text.Pandoc.Writers.HTML -import Text.Pandoc.Writers.Markdown ( writePlain ) +import Text.Pandoc.Walk (walk, walkM) +import Control.Monad.State (modify, get, execState, State, put, evalState) +import Control.Monad (foldM, when, mplus, liftM) +import Text.XML.Light ( unode, Element(..), unqual, Attr(..), add_attrs + , strContent, lookupAttr, Node(..), QName(..), parseXML + , onlyElems, node, ppElement) +import Text.Pandoc.UUID (getRandomUUID) +import Text.Pandoc.Writers.HTML (writeHtmlString, writeHtml) import Data.Char ( toLower, isDigit, isAlphaNum ) import Network.URI ( unEscapeString ) import Text.Pandoc.MIME (getMimeType) import qualified Control.Exception as E import Text.Blaze.Html.Renderer.Utf8 (renderHtml) -import Text.HTML.TagSoup +import Text.HTML.TagSoup (Tag(TagOpen), fromAttrib, parseTags) -- A Chapter includes a list of blocks and maybe a section -- number offset. Note, some chapters are unnumbered. The section @@ -87,6 +94,7 @@ data EPUBMetadata = EPUBMetadata{ , epubRights :: Maybe String , epubCoverImage :: Maybe String , epubStylesheet :: Maybe Stylesheet + , epubPageDirection :: ProgressionDirection } deriving Show data Stylesheet = StylesheetPath FilePath @@ -115,6 +123,8 @@ data Title = Title{ , titleType :: Maybe String } deriving Show +data ProgressionDirection = LTR | RTL | Default deriving Show + dcName :: String -> QName dcName n = QName n Nothing (Just "dc") @@ -124,20 +134,15 @@ dcNode = node . dcName opfName :: String -> QName opfName n = QName n Nothing (Just "opf") -plainify :: [Inline] -> String -plainify t = - trimr $ writePlain def{ writerStandalone = False } - $ Pandoc nullMeta [Plain $ walk removeNote t] - -removeNote :: Inline -> Inline -removeNote (Note _) = Str "" -removeNote x = x - toId :: FilePath -> String toId = map (\x -> if isAlphaNum x || x == '-' || x == '_' then x else '_') . takeFileName +removeNote :: Inline -> Inline +removeNote (Note _) = Str "" +removeNote x = x + getEPUBMetadata :: WriterOptions -> Meta -> IO EPUBMetadata getEPUBMetadata opts meta = do let md = metadataFromMeta opts meta @@ -172,7 +177,7 @@ getEPUBMetadata opts meta = do if any (\c -> creatorRole c == Just "aut") $ epubCreator m then return m else do - let authors' = map plainify $ docAuthors meta + let authors' = map stringify $ docAuthors meta let toAuthor name = Creator{ creatorText = name , creatorRole = Just "aut" , creatorFileAs = Nothing } @@ -221,8 +226,8 @@ addMetadataFromXML _ md = md metaValueToString :: MetaValue -> String metaValueToString (MetaString s) = s -metaValueToString (MetaInlines ils) = plainify ils -metaValueToString (MetaBlocks bs) = plainify $ query (:[]) bs +metaValueToString (MetaInlines ils) = stringify ils +metaValueToString (MetaBlocks bs) = stringify bs metaValueToString (MetaBool b) = show b metaValueToString _ = "" @@ -294,6 +299,7 @@ metadataFromMeta opts meta = EPUBMetadata{ , epubRights = rights , epubCoverImage = coverImage , epubStylesheet = stylesheet + , epubPageDirection = pageDirection } where identifiers = getIdentifier meta titles = getTitle meta @@ -316,6 +322,14 @@ metadataFromMeta opts meta = EPUBMetadata{ stylesheet = (StylesheetContents <$> writerEpubStylesheet opts) `mplus` ((StylesheetPath . metaValueToString) <$> lookupMeta "stylesheet" meta) + pageDirection = maybe Default stringToPageDirection + (lookupMeta "page-progression-direction" meta) + stringToPageDirection (metaValueToString -> s) = + case s of + "ltr" -> LTR + "rtl" -> RTL + _ -> Default + -- | Produce an EPUB file from a Pandoc document. writeEPUB :: WriterOptions -- ^ Writer options @@ -339,7 +353,7 @@ writeEPUB opts doc@(Pandoc meta _) = do if epub3 then MathML Nothing else writerHTMLMathMethod opts - , writerWrapText = False } + , writerWrapText = True } metadata <- getEPUBMetadata opts' meta -- cover page @@ -366,7 +380,8 @@ writeEPUB opts doc@(Pandoc meta _) = do walkM (transformBlock opts' mediaRef) pics <- readIORef mediaRef let readPicEntry entries (oldsrc, newsrc) = do - res <- fetchItem (writerSourceURL opts') oldsrc + res <- fetchItem' (writerMediaBag opts') + (writerSourceURL opts') oldsrc case res of Left _ -> do warn $ "Could not find media `" ++ oldsrc ++ "', skipping..." @@ -379,6 +394,12 @@ writeEPUB opts doc@(Pandoc meta _) = do let mkFontEntry f = mkEntry (takeFileName f) `fmap` B.readFile f fontEntries <- mapM mkFontEntry $ writerEpubFonts opts' + -- set page progression direction + let progressionDirection = case epubPageDirection metadata of + LTR -> "ltr" + RTL -> "rtl" + Default -> "default" + -- body pages -- add level 1 header to beginning if none there @@ -468,7 +489,7 @@ writeEPUB opts doc@(Pandoc meta _) = do [] -> case epubTitle metadata of [] -> "UNTITLED" (x:_) -> titleText x - x -> plainify x + x -> stringify x let uuid = case epubIdentifier metadata of (x:_) -> identifierText x -- use first identifier as UUID [] -> error "epubIdentifier is null" -- shouldn't happen @@ -498,7 +519,8 @@ writeEPUB opts doc@(Pandoc meta _) = do (pictureNode x)]) ++ map pictureNode picEntries ++ map fontNode fontEntries - , unode "spine" ! [("toc","ncx")] $ + , unode "spine" ! [("toc","ncx") + ,("page-progression-direction", progressionDirection)] $ case epubCoverImage metadata of Nothing -> [] Just _ -> [ unode "itemref" ! @@ -529,25 +551,25 @@ writeEPUB opts doc@(Pandoc meta _) = do let tocLevel = writerTOCDepth opts let navPointNode :: (Int -> String -> String -> [Element] -> Element) - -> Shared.Element -> State Int Element - navPointNode formatter (Sec _ nums (ident,_,_) ils children) = do + -> S.Element -> State Int Element + navPointNode formatter (S.Sec _ nums (ident,_,_) ils children) = do n <- get modify (+1) let showNums :: [Int] -> String showNums = intercalate "." . map show - let tit' = plainify ils + let tit' = stringify ils let tit = if writerNumberSections opts && not (null nums) then showNums nums ++ " " ++ tit' else tit' let src = case lookup ident reftable of Just x -> x Nothing -> error (ident ++ " not found in reftable") - let isSec (Sec lev _ _ _ _) = lev <= tocLevel + let isSec (S.Sec lev _ _ _ _) = lev <= tocLevel isSec _ = False let subsecs = filter isSec children subs <- mapM (navPointNode formatter) subsecs return $ formatter n tit src subs - navPointNode _ (Blk _) = error "navPointNode encountered Blk" + navPointNode _ (S.Blk _) = error "navPointNode encountered Blk" let navMapFormatter :: Int -> String -> String -> [Element] -> Element navMapFormatter n tit src subs = unode "navPoint" ! @@ -558,7 +580,7 @@ writeEPUB opts doc@(Pandoc meta _) = do ] ++ subs let tpNode = unode "navPoint" ! [("id", "navPoint-0")] $ - [ unode "navLabel" $ unode "text" (plainify $ docTitle meta) + [ unode "navLabel" $ unode "text" (stringify $ docTitle meta) , unode "content" ! [("src","title_page.xhtml")] $ () ] let tocData = UTF8.fromStringLazy $ ppTopElement $ @@ -587,7 +609,7 @@ writeEPUB opts doc@(Pandoc meta _) = do navXhtmlFormatter n tit src subs = unode "li" ! [("id", "toc-li-" ++ show n)] $ (unode "a" ! [("href",src)] - $ (unode "span" tit)) + $ tit) : case subs of [] -> [] (_:_) -> [unode "ol" ! [("class","toc")] $ subs] @@ -751,7 +773,7 @@ transformTag :: WriterOptions -> Tag String -> IO (Tag String) transformTag opts mediaRef tag@(TagOpen name attr) - | name == "video" || name == "source" || name == "img" = do + | name `elem` ["video", "source", "img", "audio"] = do let src = fromAttrib "src" tag let poster = fromAttrib "poster" tag let oldsrc = maybe src (</> src) $ writerSourceURL opts @@ -784,7 +806,7 @@ transformBlock opts mediaRef (RawBlock fmt raw) | fmt == Format "html" = do let tags = parseTags raw tags' <- mapM (transformTag opts mediaRef) tags - return $ RawBlock fmt (renderTags tags') + return $ RawBlock fmt (renderTags' tags') transformBlock _ _ b = return b transformInline :: WriterOptions @@ -798,8 +820,13 @@ transformInline opts mediaRef (Image lab (src,tit)) = do return $ Image lab (newsrc, tit) transformInline opts _ (x@(Math _ _)) | WebTeX _ <- writerHTMLMathMethod opts = do - raw <- makeSelfContained Nothing $ writeHtmlInline opts x + raw <- makeSelfContained opts $ writeHtmlInline opts x return $ RawInline (Format "html") raw +transformInline opts mediaRef (RawInline fmt raw) + | fmt == Format "html" = do + let tags = parseTags raw + tags' <- mapM (transformTag opts mediaRef) tags + return $ RawInline fmt (renderTags' tags') transformInline _ _ x = return x writeHtmlInline :: WriterOptions -> Inline -> String @@ -825,11 +852,11 @@ ppTopElement = ("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" ++) . unEntity . unEntity (x:xs) = x : unEntity xs mediaTypeOf :: FilePath -> Maybe String -mediaTypeOf x = case getMimeType x of - Just y@('i':'m':'a':'g':'e':_) -> Just y - Just y@('v':'i':'d':'e':'o':_) -> Just y - Just y@('a':'u':'d':'i':'o':_) -> Just y - _ -> Nothing +mediaTypeOf x = + let mediaPrefixes = ["image", "video", "audio"] in + case getMimeType x of + Just y | any (`isPrefixOf` y) mediaPrefixes -> Just y + _ -> Nothing data IdentState = IdentState{ chapterNumber :: Int, |