From d60707eed046979a68b51f03a02ae6163c63a4ac Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 8 Oct 2014 11:52:47 -0700 Subject: EPUB writer: Don't add sourceURL to absolute URIs! Closes #1669. If there are further issues, please open a new, targeted issue on the tracker. Some notes on the further issues you gestured at: Data URIs are indeed dereferenced, but why is this a problem? (The function being used to fetch from URLs is used for many different formats. Preserving data URIs would make sense in EPUBs, but not for e.g. PDF output. And by dereferencing we can get a smaller, more efficient EPUB, with the data stored as bytes in a file rather than encoded in textual representation.) "absolute uris are not recognized" -- I assume that is the problem just fixed. If not, please open a new issue. "relative uris are resolved (wrongly) like file paths" -- can you give an example? `` tag is ignored. Yes. I didn't know about the base tag. Could you open a new issue just for this? --- src/Text/Pandoc/Writers/EPUB.hs | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index 32256cb42..b9c20a2fe 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -64,7 +64,7 @@ import Text.XML.Light ( unode, Element(..), unqual, Attr(..), add_attrs import Text.Pandoc.UUID (getRandomUUID) import Text.Pandoc.Writers.HTML (writeHtmlString, writeHtml) import Data.Char ( toLower, isDigit, isAlphaNum ) -import Network.URI ( unEscapeString ) +import Network.URI ( unEscapeString, isURI ) import Text.Pandoc.MIME (MimeType, getMimeType) import qualified Control.Exception as E import Text.Blaze.Html.Renderer.Utf8 (renderHtml) @@ -773,8 +773,12 @@ transformTag opts mediaRef tag@(TagOpen name attr) | name `elem` ["video", "source", "img", "audio"] = do let src = fromAttrib "src" tag let poster = fromAttrib "poster" tag - let oldsrc = maybe src ( src) $ writerSourceURL opts - let oldposter = maybe poster ( poster) $ writerSourceURL opts + let oldsrc = case writerSourceURL opts of + Just u | not (isURI src) -> u src + _ -> src + let oldposter = case writerSourceURL opts of + Just u | not (isURI src) -> u poster + _ -> poster newsrc <- modifyMediaRef mediaRef oldsrc newposter <- modifyMediaRef mediaRef oldposter let attr' = filter (\(x,_) -> x /= "src" && x /= "poster") attr ++ @@ -811,8 +815,9 @@ transformInline :: WriterOptions -> Inline -> IO Inline transformInline opts mediaRef (Image lab (src,tit)) = do - let src' = unEscapeString src - let oldsrc = maybe src' ( src) $ writerSourceURL opts + let oldsrc = case (unEscapeString src, writerSourceURL opts) of + (s, Just u) | not (isURI s) -> u s + (s, _) -> s newsrc <- modifyMediaRef mediaRef oldsrc return $ Image lab (newsrc, tit) transformInline opts _ (x@(Math _ _)) -- cgit v1.2.3