aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2019-07-13 16:48:09 -0700
committerGitHub <noreply@github.com>2019-07-13 16:48:09 -0700
commit7bc9eab8465e16a13768834e49f124a3efbf29f4 (patch)
treedd09eee480c782d092581b4b6ad9a7603f90cd40
parent1e0d4f16b02e88c7f4d3608a4175c77400a8524b (diff)
parent4b735440871e8e90f16a6ec0ceeeb38a429cf92f (diff)
downloadpandoc-7bc9eab8465e16a13768834e49f124a3efbf29f4.tar.gz
Merge pull request #5589 from blmage/fix-3992
Add support for EPUB2 covers (fix #3992)
-rw-r--r--src/Text/Pandoc/Readers/EPUB.hs23
-rw-r--r--test/Tests/Readers/EPUB.hs34
-rw-r--r--test/epub/epub2_cover.epubbin0 -> 17937 bytes
-rw-r--r--test/epub/epub2_no_cover.epubbin0 -> 9730 bytes
-rw-r--r--test/epub/epub2_picture.epubbin0 -> 17939 bytes
-rw-r--r--test/epub/img_no_cover.epubbin0 -> 50594 bytes
6 files changed, 48 insertions, 9 deletions
diff --git a/src/Text/Pandoc/Readers/EPUB.hs b/src/Text/Pandoc/Readers/EPUB.hs
index 4e125ea45..8e9746090 100644
--- a/src/Text/Pandoc/Readers/EPUB.hs
+++ b/src/Text/Pandoc/Readers/EPUB.hs
@@ -21,7 +21,7 @@ import Prelude
import Codec.Archive.Zip (Archive (..), Entry, findEntryByPath, fromEntry,
toArchiveOrFail)
import Control.DeepSeq (NFData, deepseq)
-import Control.Monad (guard, liftM)
+import Control.Monad (guard, liftM, liftM2, mplus)
import Control.Monad.Except (throwError)
import qualified Data.ByteString.Lazy as BL (ByteString)
import Data.List (isInfixOf, isPrefixOf)
@@ -62,8 +62,8 @@ archiveToEPUB :: (PandocMonad m) => ReaderOptions -> Archive -> m Pandoc
archiveToEPUB os archive = do
-- root is path to folder with manifest file in
(root, content) <- getManifest archive
- meta <- parseMeta content
- (cover, items) <- parseManifest content
+ (coverId, meta) <- parseMeta content
+ (cover, items) <- parseManifest content coverId
-- No need to collapse here as the image path is from the manifest file
let coverDoc = fromMaybe mempty (imageToPandoc <$> cover)
spine <- parseSpine items content
@@ -124,18 +124,22 @@ imageToPandoc s = B.doc . B.para $ B.image s "" mempty
imageMimes :: [MimeType]
imageMimes = ["image/gif", "image/jpeg", "image/png"]
+type CoverId = String
+
type CoverImage = FilePath
-parseManifest :: (PandocMonad m) => Element -> m (Maybe CoverImage, Items)
-parseManifest content = do
+parseManifest :: (PandocMonad m) => Element -> Maybe CoverId -> m (Maybe CoverImage, Items)
+parseManifest content coverId = do
manifest <- findElementE (dfName "manifest") content
let items = findChildren (dfName "item") manifest
r <- mapM parseItem items
let cover = findAttr (emptyName "href") =<< filterChild findCover manifest
- return (cover, M.fromList r)
+ return (cover `mplus` coverId, M.fromList r)
where
findCover e = maybe False (isInfixOf "cover-image")
(findAttr (emptyName "properties") e)
+ || fromMaybe False
+ (liftM2 (==) coverId (findAttr (emptyName "id") e))
parseItem e = do
uid <- findAttrE (emptyName "id") e
href <- findAttrE (emptyName "href") e
@@ -153,14 +157,17 @@ parseSpine is e = do
guard linear
findAttr (emptyName "idref") ref
-parseMeta :: PandocMonad m => Element -> m Meta
+parseMeta :: PandocMonad m => Element -> m (Maybe CoverId, Meta)
parseMeta content = do
meta <- findElementE (dfName "metadata") content
let dcspace (QName _ (Just "http://purl.org/dc/elements/1.1/") (Just "dc")) = True
dcspace _ = False
let dcs = filterChildrenName dcspace meta
let r = foldr parseMetaItem nullMeta dcs
- return r
+ let coverId = findAttr (emptyName "content") =<< filterChild findCover meta
+ return (coverId, r)
+ where
+ findCover e = maybe False (== "cover") (findAttr (emptyName "name") e)
-- http://www.idpf.org/epub/30/spec/epub30-publications.html#sec-metadata-elem
parseMetaItem :: Element -> Meta -> Meta
diff --git a/test/Tests/Readers/EPUB.hs b/test/Tests/Readers/EPUB.hs
index 82e63bb64..f917668ef 100644
--- a/test/Tests/Readers/EPUB.hs
+++ b/test/Tests/Readers/EPUB.hs
@@ -44,10 +44,42 @@ featuresBag = [("img/check.gif","image/gif",1340)
,("img/multiscripts_and_greek_alphabet.png","image/png",10060)
]
+-- with additional meta tag for cover in EPUB2 format
+epub3CoverBag :: [(String, String, Int)]
+epub3CoverBag = [("wasteland-cover.jpg","image/jpeg",103477)]
+
+epub3NoCoverBag :: [(String, String, Int)]
+epub3NoCoverBag = [("img/check.gif","image/gif",1340)
+ ,("img/check.jpg","image/jpeg",2661)
+ ,("img/check.png","image/png",2815)
+ ]
+
+-- content.opf uses the word `picture` to refer to the cover as much as validly possible
+-- to check if references are resolved correctly
+epub2PictureBag :: [(String, String, Int)]
+epub2PictureBag = [("image/image.jpg","image/jpeg",9713)]
+
+-- content.opf contains the word `cover` as much as possible, to check if possible multiple matches cause errors
+epub2CoverBag :: [(String, String, Int)]
+epub2CoverBag = [("image/cover.jpg","image/jpeg",9713)]
+
+epub2NoCoverBag :: [(String, String, Int)]
+epub2NoCoverBag = []
+
tests :: [TestTree]
tests =
[ testGroup "EPUB Mediabag"
[ testCase "features bag"
- (testMediaBag "epub/img.epub" featuresBag)
+ (testMediaBag "epub/img.epub" featuresBag),
+ testCase "EPUB3 cover bag"
+ (testMediaBag "epub/wasteland.epub" epub3CoverBag),
+ testCase "EPUB3 no cover bag"
+ (testMediaBag "epub/img_no_cover.epub" epub3NoCoverBag),
+ testCase "EPUB2 picture bag"
+ (testMediaBag "epub/epub2_picture.epub" epub2PictureBag),
+ testCase "EPUB2 cover bag"
+ (testMediaBag "epub/epub2_cover.epub" epub2CoverBag),
+ testCase "EPUB2 no cover bag"
+ (testMediaBag "epub/epub2_no_cover.epub" epub2NoCoverBag)
]
]
diff --git a/test/epub/epub2_cover.epub b/test/epub/epub2_cover.epub
new file mode 100644
index 000000000..02f8ec1fb
--- /dev/null
+++ b/test/epub/epub2_cover.epub
Binary files differ
diff --git a/test/epub/epub2_no_cover.epub b/test/epub/epub2_no_cover.epub
new file mode 100644
index 000000000..cd8e24b3f
--- /dev/null
+++ b/test/epub/epub2_no_cover.epub
Binary files differ
diff --git a/test/epub/epub2_picture.epub b/test/epub/epub2_picture.epub
new file mode 100644
index 000000000..d19684ce2
--- /dev/null
+++ b/test/epub/epub2_picture.epub
Binary files differ
diff --git a/test/epub/img_no_cover.epub b/test/epub/img_no_cover.epub
new file mode 100644
index 000000000..d8028ff2e
--- /dev/null
+++ b/test/epub/img_no_cover.epub
Binary files differ