aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2014-08-11 08:14:54 -0700
committerJohn MacFarlane <jgm@berkeley.edu>2014-08-11 08:14:54 -0700
commite690fe4a3e13ba1720c2012c9909a6aaf2d8ec8d (patch)
tree4b481a019c9bca3f23045e9937381bdde1822f83
parent270e9a26cc60a3052e6f0b8675d4c517704bec36 (diff)
parente02360d3d8e5c93e797d38d6d5eb8e9abd38ad87 (diff)
downloadpandoc-e690fe4a3e13ba1720c2012c9909a6aaf2d8ec8d.tar.gz
Merge pull request #1516 from mpickering/epubmetadata
EPUB improvements
-rw-r--r--README18
-rw-r--r--src/Text/Pandoc/Readers/EPUB.hs13
-rw-r--r--src/Text/Pandoc/Writers/EPUB.hs67
3 files changed, 72 insertions, 26 deletions
diff --git a/README b/README
index d7031d9fa..9e628d090 100644
--- a/README
+++ b/README
@@ -2972,43 +2972,60 @@ The following fields are recognized:
`GTIN-13`, `UPC`, `ISMN-10`, `DOI`, `LCCN`, `GTIN-14`,
`ISBN-13`, `Legal deposit number`, `URN`, `OCLC`,
`ISMN-13`, `ISBN-A`, `JP`, `OLCC`.
+
`title`
~ Either a string value, or an object with fields `file-as` and
`type`, or a list of such objects. Valid values for `type` are
`main`, `subtitle`, `short`, `collection`, `edition`, `extended`.
+
`creator`
~ Either a string value, or an object with fields `role`, `file-as`,
and `text`, or a list of such objects. Valid values for `role` are
[marc relators](http://www.loc.gov/marc/relators/relaterm.html), but
pandoc will attempt to translate the human-readable versions
(like "author" and "editor") to the appropriate marc relators.
+
`contributor`
~ Same format as `creator`.
+
`date`
~ A string value in `YYYY-MM-DD` format. (Only the year is necessary.)
Pandoc will attempt to convert other common date formats.
+
`language`
~ A string value in [RFC5646] format. Pandoc will default to the local
language if nothing is specified.
+
`subject`
~ A string value or a list of such values.
+
`description`
~ A string value.
+
`type`
~ A string value.
+
`format`
~ A string value.
+
`relation`
~ A string value.
+
`coverage`
~ A string value.
+
`rights`
~ A string value.
+
`cover-image`
~ A string value (path to cover image).
+
`stylesheet`
~ A string value (path to CSS stylesheet).
+`page-progression-direction`
+ ~ Either `ltr` or `rtl`. Specifies the synonymous spine [attribute][EPUBspine].
+
Literate Haskell support
========================
@@ -3126,3 +3143,4 @@ Rosenthal.
[InDesign ICML]: https://www.adobe.com/content/dam/Adobe/en/devnet/indesign/cs55-docs/IDML/idml-specification.pdf
[txt2tags]: http://txt2tags.org/
[EPUB]: http://idpf.org/epub
+[EPUBspine]: http://www.idpf.org/epub/301/spec/epub-publications.html#sec-spine-elem
diff --git a/src/Text/Pandoc/Readers/EPUB.hs b/src/Text/Pandoc/Readers/EPUB.hs
index cb5f684b2..f900c0adc 100644
--- a/src/Text/Pandoc/Readers/EPUB.hs
+++ b/src/Text/Pandoc/Readers/EPUB.hs
@@ -14,7 +14,7 @@ import Text.Pandoc.Walk (walk, query)
import Text.Pandoc.Generic(bottomUp)
import Text.Pandoc.Readers.HTML (readHtml)
import Text.Pandoc.Options ( ReaderOptions(..), readerTrace)
-import Text.Pandoc.Shared (escapeURI, collapseFilePath)
+import Text.Pandoc.Shared (escapeURI, collapseFilePath, addMetaField)
import Text.Pandoc.MediaBag (MediaBag, insertMedia)
import Text.Pandoc.Compat.Except (MonadError, throwError, runExcept, Except)
import qualified Text.Pandoc.Builder as B
@@ -155,7 +155,7 @@ parseMeta content = do
-- http://www.idpf.org/epub/30/spec/epub30-publications.html#sec-metadata-elem
parseMetaItem :: Element -> Meta -> Meta
parseMetaItem e@(stripNamespace . elName -> field) meta =
- B.setMeta (renameMeta field) (B.str $ strContent e) meta
+ addMetaField (renameMeta field) (B.str $ strContent e) meta
renameMeta :: String -> String
renameMeta "creator" = "author"
@@ -169,10 +169,10 @@ getManifest archive = do
ns <- mkE "xmlns not in namespaces" (lookup "xmlns" namespaces)
as <- liftM ((map attrToPair) . elAttribs)
(findElementE (QName "rootfile" (Just ns) Nothing) docElem)
- root <- mkE "Root not found" (lookup "full-path" as)
- let rootdir = dropFileName root
+ manifestFile <- mkE "Root not found" (lookup "full-path" as)
+ let rootdir = dropFileName manifestFile
--mime <- lookup "media-type" as
- manifest <- findEntryByPathE root archive
+ manifest <- findEntryByPathE manifestFile archive
liftM ((,) rootdir) (parseXMLDocE . UTF8.toStringLazy . fromEntry $ manifest)
-- Fixup
@@ -272,7 +272,8 @@ findAttrE :: MonadError String m => QName -> Element -> m String
findAttrE q e = mkE "findAttr" $ findAttr q e
findEntryByPathE :: MonadError String m => FilePath -> Archive -> m Entry
-findEntryByPathE path a = mkE ("No entry on path: " ++ path) $ findEntryByPath path a
+findEntryByPathE (normalise -> path) a =
+ mkE ("No entry on path: " ++ path) $ findEntryByPath path a
parseXMLDocE :: MonadError String m => String -> m Element
parseXMLDocE doc = mkE "Unable to parse XML doc" $ parseXMLDoc doc
diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
index 4ec68879f..2aab7701f 100644
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -1,4 +1,4 @@
-{-# LANGUAGE PatternGuards, CPP, ScopedTypeVariables #-}
+{-# LANGUAGE PatternGuards, CPP, ScopedTypeVariables, ViewPatterns #-}
{-
Copyright (C) 2010-2014 John MacFarlane <jgm@berkeley.edu>
@@ -29,7 +29,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Conversion of 'Pandoc' documents to EPUB.
-}
module Text.Pandoc.Writers.EPUB ( writeEPUB ) where
-import Data.IORef
+import Data.IORef ( IORef, newIORef, readIORef, modifyIORef )
import qualified Data.Map as M
import Data.Maybe ( fromMaybe )
import Data.List ( isPrefixOf, isInfixOf, intercalate )
@@ -40,27 +40,35 @@ import qualified Data.ByteString.Lazy as B
import qualified Data.ByteString.Lazy.Char8 as B8
import qualified Text.Pandoc.UTF8 as UTF8
import Text.Pandoc.SelfContained ( makeSelfContained )
-import Codec.Archive.Zip
+import Codec.Archive.Zip ( emptyArchive, addEntryToArchive, eRelativePath, fromEntry , Entry, toEntry, fromArchive)
import Control.Applicative ((<$>))
-import Data.Time.Clock.POSIX
-import Data.Time
-import System.Locale
-import Text.Pandoc.Shared hiding ( Element )
-import qualified Text.Pandoc.Shared as Shared
+import Data.Time.Clock.POSIX ( getPOSIXTime )
+import Data.Time (getCurrentTime,UTCTime, formatTime)
+import System.Locale ( defaultTimeLocale )
+import Text.Pandoc.Shared ( trimr, renderTags', safeRead, uniqueIdent, trim
+ , normalizeDate, readDataFile, stringify, warn
+ , hierarchicalize, fetchItem' )
+import qualified Text.Pandoc.Shared as S (Element(..))
import Text.Pandoc.Builder (fromList, setMeta)
-import Text.Pandoc.Options
+import Text.Pandoc.Options ( WriterOptions(..)
+ , HTMLMathMethod(..)
+ , EPUBVersion(..)
+ , ObfuscationMethod(NoObfuscation) )
import Text.Pandoc.Definition
-import Text.Pandoc.Walk
-import Control.Monad.State
-import Text.XML.Light hiding (ppTopElement)
-import Text.Pandoc.UUID
-import Text.Pandoc.Writers.HTML
+import Text.Pandoc.Walk (walk, walkM)
+import Control.Monad.State (modify, get, execState, State, put, evalState)
+import Control.Monad (foldM, when, mplus, liftM)
+import Text.XML.Light ( unode, Element(..), unqual, Attr(..), add_attrs
+ , strContent, lookupAttr, Node(..), QName(..), parseXML
+ , onlyElems, node, ppElement)
+import Text.Pandoc.UUID (getRandomUUID)
+import Text.Pandoc.Writers.HTML (writeHtmlString, writeHtml)
import Data.Char ( toLower, isDigit, isAlphaNum )
import Network.URI ( unEscapeString )
import Text.Pandoc.MIME (getMimeType)
import qualified Control.Exception as E
import Text.Blaze.Html.Renderer.Utf8 (renderHtml)
-import Text.HTML.TagSoup
+import Text.HTML.TagSoup (Tag(TagOpen), fromAttrib, parseTags)
-- A Chapter includes a list of blocks and maybe a section
-- number offset. Note, some chapters are unnumbered. The section
@@ -86,6 +94,7 @@ data EPUBMetadata = EPUBMetadata{
, epubRights :: Maybe String
, epubCoverImage :: Maybe String
, epubStylesheet :: Maybe Stylesheet
+ , epubPageDirection :: ProgressionDirection
} deriving Show
data Stylesheet = StylesheetPath FilePath
@@ -114,6 +123,8 @@ data Title = Title{
, titleType :: Maybe String
} deriving Show
+data ProgressionDirection = LTR | RTL | Default deriving Show
+
dcName :: String -> QName
dcName n = QName n Nothing (Just "dc")
@@ -288,6 +299,7 @@ metadataFromMeta opts meta = EPUBMetadata{
, epubRights = rights
, epubCoverImage = coverImage
, epubStylesheet = stylesheet
+ , epubPageDirection = pageDirection
}
where identifiers = getIdentifier meta
titles = getTitle meta
@@ -310,6 +322,14 @@ metadataFromMeta opts meta = EPUBMetadata{
stylesheet = (StylesheetContents <$> writerEpubStylesheet opts) `mplus`
((StylesheetPath . metaValueToString) <$>
lookupMeta "stylesheet" meta)
+ pageDirection = maybe Default stringToPageDirection
+ (lookupMeta "page-progression-direction" meta)
+ stringToPageDirection (metaValueToString -> s) =
+ case s of
+ "ltr" -> LTR
+ "rtl" -> RTL
+ _ -> Default
+
-- | Produce an EPUB file from a Pandoc document.
writeEPUB :: WriterOptions -- ^ Writer options
@@ -374,6 +394,12 @@ writeEPUB opts doc@(Pandoc meta _) = do
let mkFontEntry f = mkEntry (takeFileName f) `fmap` B.readFile f
fontEntries <- mapM mkFontEntry $ writerEpubFonts opts'
+ -- set page progression direction
+ let progressionDirection = case epubPageDirection metadata of
+ LTR -> "ltr"
+ RTL -> "rtl"
+ Default -> "default"
+
-- body pages
-- add level 1 header to beginning if none there
@@ -493,7 +519,8 @@ writeEPUB opts doc@(Pandoc meta _) = do
(pictureNode x)]) ++
map pictureNode picEntries ++
map fontNode fontEntries
- , unode "spine" ! [("toc","ncx")] $
+ , unode "spine" ! [("toc","ncx")
+ ,("page-progression-direction", progressionDirection)] $
case epubCoverImage metadata of
Nothing -> []
Just _ -> [ unode "itemref" !
@@ -524,8 +551,8 @@ writeEPUB opts doc@(Pandoc meta _) = do
let tocLevel = writerTOCDepth opts
let navPointNode :: (Int -> String -> String -> [Element] -> Element)
- -> Shared.Element -> State Int Element
- navPointNode formatter (Sec _ nums (ident,_,_) ils children) = do
+ -> S.Element -> State Int Element
+ navPointNode formatter (S.Sec _ nums (ident,_,_) ils children) = do
n <- get
modify (+1)
let showNums :: [Int] -> String
@@ -537,12 +564,12 @@ writeEPUB opts doc@(Pandoc meta _) = do
let src = case lookup ident reftable of
Just x -> x
Nothing -> error (ident ++ " not found in reftable")
- let isSec (Sec lev _ _ _ _) = lev <= tocLevel
+ let isSec (S.Sec lev _ _ _ _) = lev <= tocLevel
isSec _ = False
let subsecs = filter isSec children
subs <- mapM (navPointNode formatter) subsecs
return $ formatter n tit src subs
- navPointNode _ (Blk _) = error "navPointNode encountered Blk"
+ navPointNode _ (S.Blk _) = error "navPointNode encountered Blk"
let navMapFormatter :: Int -> String -> String -> [Element] -> Element
navMapFormatter n tit src subs = unode "navPoint" !