diff options
Diffstat (limited to 'src/Text/Pandoc')
-rw-r--r-- | src/Text/Pandoc/Readers/EPUB.hs | 47 | ||||
-rw-r--r-- | src/Text/Pandoc/Shared.hs | 27 |
2 files changed, 45 insertions, 29 deletions
diff --git a/src/Text/Pandoc/Readers/EPUB.hs b/src/Text/Pandoc/Readers/EPUB.hs index 7462b3711..b6b271488 100644 --- a/src/Text/Pandoc/Readers/EPUB.hs +++ b/src/Text/Pandoc/Readers/EPUB.hs @@ -14,7 +14,7 @@ import Text.Pandoc.Walk (walk, query) import Text.Pandoc.Generic(bottomUp) import Text.Pandoc.Readers.HTML (readHtml) import Text.Pandoc.Options ( ReaderOptions(..), readerTrace) -import Text.Pandoc.Shared (escapeURI) +import Text.Pandoc.Shared (escapeURI, collapseFilePath) import Text.Pandoc.MediaBag (MediaBag, insertMedia) import Text.Pandoc.Compat.Except (MonadError, throwError, runExcept, Except) import qualified Text.Pandoc.Builder as B @@ -22,7 +22,7 @@ import Codec.Archive.Zip ( Archive (..), toArchive, fromEntry , findEntryByPath, Entry) import qualified Data.ByteString.Lazy as BL (ByteString) import System.FilePath ( takeFileName, (</>), dropFileName, normalise - , joinPath, dropFileName, splitDirectories + , dropFileName , splitFileName ) import qualified Text.Pandoc.UTF8 as UTF8 (toStringLazy) import Control.Applicative ((<$>)) @@ -50,50 +50,52 @@ runEPUB = either error id . runExcept -- archiveToEPUB :: (MonadError String m) => ReaderOptions -> Archive -> m (Pandoc, MediaBag) archiveToEPUB os archive = do + -- root is path to folder with manifest file in (root, content) <- getManifest archive meta <- parseMeta content (cover, items) <- parseManifest content - let coverDoc = fromMaybe mempty (imageToPandoc <$> cover) + -- No need to collapse here as the image path is from the manifest file + let coverDoc = fromMaybe mempty (imageToPandoc . (root </>) <$> cover) spine <- parseSpine items content let escapedSpine = map (escapeURI . takeFileName . fst) spine Pandoc _ bs <- foldM' (\a b -> ((a <>) . bottomUp (prependHash escapedSpine)) `liftM` parseSpineElem root b) mempty spine let ast = coverDoc <> (Pandoc meta bs) - let mediaBag = fetchImages (M.elems items) root archive ast + let mediaBag = fetchImages (M.elems items) archive ast return $ (ast, mediaBag) where os' = os {readerParseRaw = True} parseSpineElem :: MonadError String m => FilePath -> (FilePath, MIME) -> m Pandoc parseSpineElem (normalise -> r) (normalise -> path, mime) = do when (readerTrace os) (traceM path) - doc <- mimeToReader mime r path + doc <- mimeToReader mime (r </> path) let docSpan = B.doc $ B.para $ B.spanWith (takeFileName path, [], []) mempty return $ docSpan <> doc - mimeToReader :: MonadError String m => MIME -> FilePath -> FilePath -> m Pandoc - mimeToReader "application/xhtml+xml" r path = do - fname <- findEntryByPathE (r </> path) archive - return $ fixInternalReferences (r </> path) . + mimeToReader :: MonadError String m => MIME -> FilePath -> m Pandoc + mimeToReader "application/xhtml+xml" (normalise -> path) = do + fname <- findEntryByPathE path archive + return $ fixInternalReferences path . readHtml os' . UTF8.toStringLazy $ fromEntry fname - mimeToReader s _ path + mimeToReader s path | s `elem` imageMimes = return $ imageToPandoc path | otherwise = return $ mempty +-- paths should be absolute when this function is called +-- renameImages should do this fetchImages :: [(FilePath, MIME)] - -> FilePath -> Archive -> Pandoc -> MediaBag -fetchImages mimes root arc (query iq -> links) = +fetchImages mimes arc (query iq -> links) = foldr (uncurry3 insertMedia) mempty (mapMaybe getEntry links) where - getEntry (normalise -> l) = - let mediaPos = normalise (root </> l) in - (l , lookup mediaPos mimes, ) . fromEntry - <$> findEntryByPath mediaPos arc + getEntry link = + (link , lookup link mimes, ) . fromEntry + <$> findEntryByPath link arc iq :: Inline -> [FilePath] iq (Image _ (url, _)) = [url] @@ -101,20 +103,9 @@ iq _ = [] -- Remove relative paths renameImages :: FilePath -> Inline -> Inline -renameImages root (Image a (url, b)) = Image a (collapse (root </> url), b) +renameImages root (Image a (url, b)) = Image a (collapseFilePath (root </> url), b) renameImages _ x = x -collapse :: FilePath -> FilePath -collapse = joinPath . reverse . foldl go [] . splitDirectories - where - go rs "." = rs - go r@(p:rs) ".." = case p of - ".." -> ("..":r) - "/" -> ("..":r) - _ -> rs - go _ "/" = ["/"] - go rs x = x:rs - imageToPandoc :: FilePath -> Pandoc imageToPandoc s = B.doc . B.para $ B.image s "" mempty diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 51da34e79..a91ca9115 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -80,6 +80,7 @@ module Text.Pandoc.Shared ( fetchItem, fetchItem', openURL, + collapseFilePath, -- * Error handling err, warn, @@ -105,6 +106,7 @@ import Network.URI ( escapeURIString, isURI, nonStrictRelativeTo, unEscapeString, parseURIReference, isAllowedInURI ) import qualified Data.Set as Set import System.Directory +import System.FilePath (joinPath, splitDirectories) import Text.Pandoc.MIME (getMimeType) import System.FilePath ( (</>), takeExtension, dropExtension) import Data.Generics (Typeable, Data) @@ -530,7 +532,7 @@ stringify = query go . walk deNote deNote x = x -- | Bring all regular text in a pandoc structure to uppercase. --- +-- -- This function correctly handles cases where a lowercase character doesn't -- match to a single uppercase character – e.g. “Straße” would be converted -- to “STRASSE”, not “STRAßE”. @@ -854,6 +856,29 @@ warn msg = do name <- getProgName UTF8.hPutStrLn stderr $ name ++ ": " ++ msg +-- | Remove intermediate "." and ".." directories from a path. +-- +-- @ +-- collapseFilePath "./foo" == "foo" +-- collapseFilePath "/bar/../baz" == "/baz" +-- collapseFilePath "/../baz" == "/../baz" +-- collapseFilePath "parent/foo/baz/../bar" == "parent/foo/bar" +-- collapseFilePath "parent/foo/baz/../../bar" == "parent/bar" +-- collapseFilePath "parent/foo/.." == "parent" +-- collapseFilePath "/parent/foo/../../bar" == "/bar" +-- @ +collapseFilePath :: FilePath -> FilePath +collapseFilePath = joinPath . reverse . foldl go [] . splitDirectories + where + go rs "." = rs + go r@(p:rs) ".." = case p of + ".." -> ("..":r) + "/" -> ("..":r) + _ -> rs + go _ "/" = ["/"] + go rs x = x:rs + + -- -- Safe read -- |