aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/Text/Pandoc/Readers/EPUB.hs47
-rw-r--r--src/Text/Pandoc/Shared.hs27
-rw-r--r--tests/Tests/Shared.hs24
3 files changed, 68 insertions, 30 deletions
diff --git a/src/Text/Pandoc/Readers/EPUB.hs b/src/Text/Pandoc/Readers/EPUB.hs
index 7462b3711..b6b271488 100644
--- a/src/Text/Pandoc/Readers/EPUB.hs
+++ b/src/Text/Pandoc/Readers/EPUB.hs
@@ -14,7 +14,7 @@ import Text.Pandoc.Walk (walk, query)
import Text.Pandoc.Generic(bottomUp)
import Text.Pandoc.Readers.HTML (readHtml)
import Text.Pandoc.Options ( ReaderOptions(..), readerTrace)
-import Text.Pandoc.Shared (escapeURI)
+import Text.Pandoc.Shared (escapeURI, collapseFilePath)
import Text.Pandoc.MediaBag (MediaBag, insertMedia)
import Text.Pandoc.Compat.Except (MonadError, throwError, runExcept, Except)
import qualified Text.Pandoc.Builder as B
@@ -22,7 +22,7 @@ import Codec.Archive.Zip ( Archive (..), toArchive, fromEntry
, findEntryByPath, Entry)
import qualified Data.ByteString.Lazy as BL (ByteString)
import System.FilePath ( takeFileName, (</>), dropFileName, normalise
- , joinPath, dropFileName, splitDirectories
+ , dropFileName
, splitFileName )
import qualified Text.Pandoc.UTF8 as UTF8 (toStringLazy)
import Control.Applicative ((<$>))
@@ -50,50 +50,52 @@ runEPUB = either error id . runExcept
--
archiveToEPUB :: (MonadError String m) => ReaderOptions -> Archive -> m (Pandoc, MediaBag)
archiveToEPUB os archive = do
+ -- root is path to folder with manifest file in
(root, content) <- getManifest archive
meta <- parseMeta content
(cover, items) <- parseManifest content
- let coverDoc = fromMaybe mempty (imageToPandoc <$> cover)
+ -- No need to collapse here as the image path is from the manifest file
+ let coverDoc = fromMaybe mempty (imageToPandoc . (root </>) <$> cover)
spine <- parseSpine items content
let escapedSpine = map (escapeURI . takeFileName . fst) spine
Pandoc _ bs <-
foldM' (\a b -> ((a <>) . bottomUp (prependHash escapedSpine))
`liftM` parseSpineElem root b) mempty spine
let ast = coverDoc <> (Pandoc meta bs)
- let mediaBag = fetchImages (M.elems items) root archive ast
+ let mediaBag = fetchImages (M.elems items) archive ast
return $ (ast, mediaBag)
where
os' = os {readerParseRaw = True}
parseSpineElem :: MonadError String m => FilePath -> (FilePath, MIME) -> m Pandoc
parseSpineElem (normalise -> r) (normalise -> path, mime) = do
when (readerTrace os) (traceM path)
- doc <- mimeToReader mime r path
+ doc <- mimeToReader mime (r </> path)
let docSpan = B.doc $ B.para $ B.spanWith (takeFileName path, [], []) mempty
return $ docSpan <> doc
- mimeToReader :: MonadError String m => MIME -> FilePath -> FilePath -> m Pandoc
- mimeToReader "application/xhtml+xml" r path = do
- fname <- findEntryByPathE (r </> path) archive
- return $ fixInternalReferences (r </> path) .
+ mimeToReader :: MonadError String m => MIME -> FilePath -> m Pandoc
+ mimeToReader "application/xhtml+xml" (normalise -> path) = do
+ fname <- findEntryByPathE path archive
+ return $ fixInternalReferences path .
readHtml os' .
UTF8.toStringLazy $
fromEntry fname
- mimeToReader s _ path
+ mimeToReader s path
| s `elem` imageMimes = return $ imageToPandoc path
| otherwise = return $ mempty
+-- paths should be absolute when this function is called
+-- renameImages should do this
fetchImages :: [(FilePath, MIME)]
- -> FilePath
-> Archive
-> Pandoc
-> MediaBag
-fetchImages mimes root arc (query iq -> links) =
+fetchImages mimes arc (query iq -> links) =
foldr (uncurry3 insertMedia) mempty
(mapMaybe getEntry links)
where
- getEntry (normalise -> l) =
- let mediaPos = normalise (root </> l) in
- (l , lookup mediaPos mimes, ) . fromEntry
- <$> findEntryByPath mediaPos arc
+ getEntry link =
+ (link , lookup link mimes, ) . fromEntry
+ <$> findEntryByPath link arc
iq :: Inline -> [FilePath]
iq (Image _ (url, _)) = [url]
@@ -101,20 +103,9 @@ iq _ = []
-- Remove relative paths
renameImages :: FilePath -> Inline -> Inline
-renameImages root (Image a (url, b)) = Image a (collapse (root </> url), b)
+renameImages root (Image a (url, b)) = Image a (collapseFilePath (root </> url), b)
renameImages _ x = x
-collapse :: FilePath -> FilePath
-collapse = joinPath . reverse . foldl go [] . splitDirectories
- where
- go rs "." = rs
- go r@(p:rs) ".." = case p of
- ".." -> ("..":r)
- "/" -> ("..":r)
- _ -> rs
- go _ "/" = ["/"]
- go rs x = x:rs
-
imageToPandoc :: FilePath -> Pandoc
imageToPandoc s = B.doc . B.para $ B.image s "" mempty
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index 51da34e79..a91ca9115 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -80,6 +80,7 @@ module Text.Pandoc.Shared (
fetchItem,
fetchItem',
openURL,
+ collapseFilePath,
-- * Error handling
err,
warn,
@@ -105,6 +106,7 @@ import Network.URI ( escapeURIString, isURI, nonStrictRelativeTo,
unEscapeString, parseURIReference, isAllowedInURI )
import qualified Data.Set as Set
import System.Directory
+import System.FilePath (joinPath, splitDirectories)
import Text.Pandoc.MIME (getMimeType)
import System.FilePath ( (</>), takeExtension, dropExtension)
import Data.Generics (Typeable, Data)
@@ -530,7 +532,7 @@ stringify = query go . walk deNote
deNote x = x
-- | Bring all regular text in a pandoc structure to uppercase.
---
+--
-- This function correctly handles cases where a lowercase character doesn't
-- match to a single uppercase character – e.g. “Straße” would be converted
-- to “STRASSE”, not “STRAßE”.
@@ -854,6 +856,29 @@ warn msg = do
name <- getProgName
UTF8.hPutStrLn stderr $ name ++ ": " ++ msg
+-- | Remove intermediate "." and ".." directories from a path.
+--
+-- @
+-- collapseFilePath "./foo" == "foo"
+-- collapseFilePath "/bar/../baz" == "/baz"
+-- collapseFilePath "/../baz" == "/../baz"
+-- collapseFilePath "parent/foo/baz/../bar" == "parent/foo/bar"
+-- collapseFilePath "parent/foo/baz/../../bar" == "parent/bar"
+-- collapseFilePath "parent/foo/.." == "parent"
+-- collapseFilePath "/parent/foo/../../bar" == "/bar"
+-- @
+collapseFilePath :: FilePath -> FilePath
+collapseFilePath = joinPath . reverse . foldl go [] . splitDirectories
+ where
+ go rs "." = rs
+ go r@(p:rs) ".." = case p of
+ ".." -> ("..":r)
+ "/" -> ("..":r)
+ _ -> rs
+ go _ "/" = ["/"]
+ go rs x = x:rs
+
+
--
-- Safe read
--
diff --git a/tests/Tests/Shared.hs b/tests/Tests/Shared.hs
index c9e2e21f5..b6671835c 100644
--- a/tests/Tests/Shared.hs
+++ b/tests/Tests/Shared.hs
@@ -6,7 +6,7 @@ import Test.Framework
import Tests.Helpers
import Tests.Arbitrary()
import Test.Framework.Providers.HUnit
-import Test.HUnit ( assertBool )
+import Test.HUnit ( assertBool, (@?=) )
import Text.Pandoc.Builder
import Data.Monoid
@@ -23,6 +23,7 @@ tests = [ testGroup "normalize"
(let x = [(str "word", [para (str "def"), mempty])]
in compactify'DL x == x)
]
+ , testGroup "collapseFilePath" testCollapse
]
p_normalize_blocks_rt :: [Block] -> Bool
@@ -36,3 +37,24 @@ p_normalize_inlines_rt ils =
p_normalize_no_trailing_spaces :: [Inline] -> Bool
p_normalize_no_trailing_spaces ils = null ils' || last ils' /= Space
where ils' = normalizeInlines $ ils ++ [Space]
+
+testCollapse :: [Test]
+testCollapse = map (testCase "collapse")
+ [ (collapseFilePath "" @?= "")
+ , (collapseFilePath "./foo" @?= "foo")
+ , (collapseFilePath "././../foo" @?= "../foo")
+ , (collapseFilePath "../foo" @?= "../foo")
+ , (collapseFilePath "/bar/../baz" @?= "/baz")
+ , (collapseFilePath "/../baz" @?= "/../baz")
+ , (collapseFilePath "./foo/.././bar/../././baz" @?= "baz")
+ , (collapseFilePath "./" @?= "")
+ , (collapseFilePath "././" @?= "")
+ , (collapseFilePath "../" @?= "..")
+ , (collapseFilePath ".././" @?= "..")
+ , (collapseFilePath "./../" @?= "..")
+ , (collapseFilePath "../../" @?= "../..")
+ , (collapseFilePath "parent/foo/baz/../bar" @?= "parent/foo/bar")
+ , (collapseFilePath "parent/foo/baz/../../bar" @?= "parent/bar")
+ , (collapseFilePath "parent/foo/.." @?= "parent")
+ , (collapseFilePath "/parent/foo/../../bar" @?= "/bar")
+ , (collapseFilePath "/./parent/foo" @?= "/parent/foo")]