diff options
author | John MacFarlane <jgm@berkeley.edu> | 2019-07-13 16:48:09 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-07-13 16:48:09 -0700 |
commit | 7bc9eab8465e16a13768834e49f124a3efbf29f4 (patch) | |
tree | dd09eee480c782d092581b4b6ad9a7603f90cd40 | |
parent | 1e0d4f16b02e88c7f4d3608a4175c77400a8524b (diff) | |
parent | 4b735440871e8e90f16a6ec0ceeeb38a429cf92f (diff) | |
download | pandoc-7bc9eab8465e16a13768834e49f124a3efbf29f4.tar.gz |
Merge pull request #5589 from blmage/fix-3992
Add support for EPUB2 covers (fix #3992)
-rw-r--r-- | src/Text/Pandoc/Readers/EPUB.hs | 23 | ||||
-rw-r--r-- | test/Tests/Readers/EPUB.hs | 34 | ||||
-rw-r--r-- | test/epub/epub2_cover.epub | bin | 0 -> 17937 bytes | |||
-rw-r--r-- | test/epub/epub2_no_cover.epub | bin | 0 -> 9730 bytes | |||
-rw-r--r-- | test/epub/epub2_picture.epub | bin | 0 -> 17939 bytes | |||
-rw-r--r-- | test/epub/img_no_cover.epub | bin | 0 -> 50594 bytes |
6 files changed, 48 insertions, 9 deletions
diff --git a/src/Text/Pandoc/Readers/EPUB.hs b/src/Text/Pandoc/Readers/EPUB.hs index 4e125ea45..8e9746090 100644 --- a/src/Text/Pandoc/Readers/EPUB.hs +++ b/src/Text/Pandoc/Readers/EPUB.hs @@ -21,7 +21,7 @@ import Prelude import Codec.Archive.Zip (Archive (..), Entry, findEntryByPath, fromEntry, toArchiveOrFail) import Control.DeepSeq (NFData, deepseq) -import Control.Monad (guard, liftM) +import Control.Monad (guard, liftM, liftM2, mplus) import Control.Monad.Except (throwError) import qualified Data.ByteString.Lazy as BL (ByteString) import Data.List (isInfixOf, isPrefixOf) @@ -62,8 +62,8 @@ archiveToEPUB :: (PandocMonad m) => ReaderOptions -> Archive -> m Pandoc archiveToEPUB os archive = do -- root is path to folder with manifest file in (root, content) <- getManifest archive - meta <- parseMeta content - (cover, items) <- parseManifest content + (coverId, meta) <- parseMeta content + (cover, items) <- parseManifest content coverId -- No need to collapse here as the image path is from the manifest file let coverDoc = fromMaybe mempty (imageToPandoc <$> cover) spine <- parseSpine items content @@ -124,18 +124,22 @@ imageToPandoc s = B.doc . B.para $ B.image s "" mempty imageMimes :: [MimeType] imageMimes = ["image/gif", "image/jpeg", "image/png"] +type CoverId = String + type CoverImage = FilePath -parseManifest :: (PandocMonad m) => Element -> m (Maybe CoverImage, Items) -parseManifest content = do +parseManifest :: (PandocMonad m) => Element -> Maybe CoverId -> m (Maybe CoverImage, Items) +parseManifest content coverId = do manifest <- findElementE (dfName "manifest") content let items = findChildren (dfName "item") manifest r <- mapM parseItem items let cover = findAttr (emptyName "href") =<< filterChild findCover manifest - return (cover, M.fromList r) + return (cover `mplus` coverId, M.fromList r) where findCover e = maybe False (isInfixOf "cover-image") (findAttr (emptyName "properties") e) + || fromMaybe False + (liftM2 (==) coverId (findAttr (emptyName "id") e)) parseItem e = do uid <- findAttrE (emptyName "id") e href <- findAttrE (emptyName "href") e @@ -153,14 +157,17 @@ parseSpine is e = do guard linear findAttr (emptyName "idref") ref -parseMeta :: PandocMonad m => Element -> m Meta +parseMeta :: PandocMonad m => Element -> m (Maybe CoverId, Meta) parseMeta content = do meta <- findElementE (dfName "metadata") content let dcspace (QName _ (Just "http://purl.org/dc/elements/1.1/") (Just "dc")) = True dcspace _ = False let dcs = filterChildrenName dcspace meta let r = foldr parseMetaItem nullMeta dcs - return r + let coverId = findAttr (emptyName "content") =<< filterChild findCover meta + return (coverId, r) + where + findCover e = maybe False (== "cover") (findAttr (emptyName "name") e) -- http://www.idpf.org/epub/30/spec/epub30-publications.html#sec-metadata-elem parseMetaItem :: Element -> Meta -> Meta diff --git a/test/Tests/Readers/EPUB.hs b/test/Tests/Readers/EPUB.hs index 82e63bb64..f917668ef 100644 --- a/test/Tests/Readers/EPUB.hs +++ b/test/Tests/Readers/EPUB.hs @@ -44,10 +44,42 @@ featuresBag = [("img/check.gif","image/gif",1340) ,("img/multiscripts_and_greek_alphabet.png","image/png",10060) ] +-- with additional meta tag for cover in EPUB2 format +epub3CoverBag :: [(String, String, Int)] +epub3CoverBag = [("wasteland-cover.jpg","image/jpeg",103477)] + +epub3NoCoverBag :: [(String, String, Int)] +epub3NoCoverBag = [("img/check.gif","image/gif",1340) + ,("img/check.jpg","image/jpeg",2661) + ,("img/check.png","image/png",2815) + ] + +-- content.opf uses the word `picture` to refer to the cover as much as validly possible +-- to check if references are resolved correctly +epub2PictureBag :: [(String, String, Int)] +epub2PictureBag = [("image/image.jpg","image/jpeg",9713)] + +-- content.opf contains the word `cover` as much as possible, to check if possible multiple matches cause errors +epub2CoverBag :: [(String, String, Int)] +epub2CoverBag = [("image/cover.jpg","image/jpeg",9713)] + +epub2NoCoverBag :: [(String, String, Int)] +epub2NoCoverBag = [] + tests :: [TestTree] tests = [ testGroup "EPUB Mediabag" [ testCase "features bag" - (testMediaBag "epub/img.epub" featuresBag) + (testMediaBag "epub/img.epub" featuresBag), + testCase "EPUB3 cover bag" + (testMediaBag "epub/wasteland.epub" epub3CoverBag), + testCase "EPUB3 no cover bag" + (testMediaBag "epub/img_no_cover.epub" epub3NoCoverBag), + testCase "EPUB2 picture bag" + (testMediaBag "epub/epub2_picture.epub" epub2PictureBag), + testCase "EPUB2 cover bag" + (testMediaBag "epub/epub2_cover.epub" epub2CoverBag), + testCase "EPUB2 no cover bag" + (testMediaBag "epub/epub2_no_cover.epub" epub2NoCoverBag) ] ] diff --git a/test/epub/epub2_cover.epub b/test/epub/epub2_cover.epub Binary files differnew file mode 100644 index 000000000..02f8ec1fb --- /dev/null +++ b/test/epub/epub2_cover.epub diff --git a/test/epub/epub2_no_cover.epub b/test/epub/epub2_no_cover.epub Binary files differnew file mode 100644 index 000000000..cd8e24b3f --- /dev/null +++ b/test/epub/epub2_no_cover.epub diff --git a/test/epub/epub2_picture.epub b/test/epub/epub2_picture.epub Binary files differnew file mode 100644 index 000000000..d19684ce2 --- /dev/null +++ b/test/epub/epub2_picture.epub diff --git a/test/epub/img_no_cover.epub b/test/epub/img_no_cover.epub Binary files differnew file mode 100644 index 000000000..d8028ff2e --- /dev/null +++ b/test/epub/img_no_cover.epub |