aboutsummaryrefslogtreecommitdiff
path: root/src/Text
diff options
context:
space:
mode:
authorMatthew Pickering <matthewtpickering@gmail.com>2014-08-08 23:04:03 +0100
committerMatthew Pickering <matthewtpickering@gmail.com>2014-08-08 23:04:03 +0100
commitcfd8c0214c3f369d0f8c6f033325c343b78c7659 (patch)
tree4936855572752add4f3d3620274fb10cded172e9 /src/Text
parent40ae8efddc243f75941328b66fce94e0c0ff3e16 (diff)
downloadpandoc-cfd8c0214c3f369d0f8c6f033325c343b78c7659.tar.gz
EPUB Reader: Improved robustness of image extraction
We now maintain the invariant that when fetchImages is called, all images have absolute paths. This patch fixes several bugs relating to this as there are three places where images can be introduced. (1) During the HTML parse (2) As spine elements (3) As a cover image For (1), the paths are corrected by the transformation renameImages For (2) and (3), we need to append the "root" to the path we parse from the spine
Diffstat (limited to 'src/Text')
-rw-r--r--src/Text/Pandoc/Readers/EPUB.hs16
1 files changed, 9 insertions, 7 deletions
diff --git a/src/Text/Pandoc/Readers/EPUB.hs b/src/Text/Pandoc/Readers/EPUB.hs
index 2967bd09a..8073f9ad2 100644
--- a/src/Text/Pandoc/Readers/EPUB.hs
+++ b/src/Text/Pandoc/Readers/EPUB.hs
@@ -52,10 +52,12 @@ runEPUB = either error id . runExcept
--
archiveToEPUB :: (MonadError String m) => ReaderOptions -> Archive -> m (Pandoc, MediaBag)
archiveToEPUB (setEPUBOptions -> os) archive = do
+ -- root is path to folder with manifest file in
(root, content) <- getManifest archive
meta <- parseMeta content
(cover, items) <- parseManifest content
- let coverDoc = fromMaybe mempty (imageToPandoc <$> cover)
+ -- No need to collapse here as the image path is from the manifest file
+ let coverDoc = fromMaybe mempty (imageToPandoc . (root </>) <$> cover)
spine <- parseSpine items content
let escapedSpine = map (escapeURI . takeFileName . fst) spine
Pandoc _ bs <-
@@ -68,17 +70,17 @@ archiveToEPUB (setEPUBOptions -> os) archive = do
parseSpineElem :: MonadError String m => FilePath -> (FilePath, MIME) -> m Pandoc
parseSpineElem (normalise -> r) (normalise -> path, mime) = do
when (readerTrace os) (traceM path)
- doc <- mimeToReader mime r path
+ doc <- mimeToReader mime (r </> path)
let docSpan = B.doc $ B.para $ B.spanWith (takeFileName path, [], []) mempty
return $ docSpan <> doc
- mimeToReader :: MonadError String m => MIME -> FilePath -> FilePath -> m Pandoc
- mimeToReader "application/xhtml+xml" r path = do
- fname <- findEntryByPathE (r </> path) archive
- return $ fixInternalReferences (r </> path) .
+ mimeToReader :: MonadError String m => MIME -> FilePath -> m Pandoc
+ mimeToReader "application/xhtml+xml" (normalise -> path) = do
+ fname <- findEntryByPathE path archive
+ return $ fixInternalReferences path .
readHtml os .
UTF8.toStringLazy $
fromEntry fname
- mimeToReader s _ path
+ mimeToReader s path
| s `elem` imageMimes = return $ imageToPandoc path
| otherwise = return $ mempty