From ba5b426ded8b01a290da27aac9d3bb9a7a26de8c Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Tue, 9 Aug 2016 20:27:08 +0200 Subject: Org reader: ensure image sources are proper links Image sources as those in plain images, image links, or figures, must be proper URIs or relative file paths to be recognized as images. This restriction is now enforced for all image sources. This also fixes the reader's usage of uncleaned image sources, leading to `file:` prefixes not being deleted from figure images (e.g. `[[file:image.jpg]]` leading to a broken image `) Thanks to @bsag for noticing this bug. --- src/Text/Pandoc/Readers/Org/Blocks.hs | 34 ++++++++++++++++++++++------------ src/Text/Pandoc/Readers/Org/Inlines.hs | 33 ++++++++------------------------- src/Text/Pandoc/Readers/Org/Shared.hs | 25 +++++++++++++++++++++++-- 3 files changed, 53 insertions(+), 39 deletions(-) (limited to 'src/Text/Pandoc') diff --git a/src/Text/Pandoc/Readers/Org/Blocks.hs b/src/Text/Pandoc/Readers/Org/Blocks.hs index 0bd82ce2f..80895e038 100644 --- a/src/Text/Pandoc/Readers/Org/Blocks.hs +++ b/src/Text/Pandoc/Readers/Org/Blocks.hs @@ -39,8 +39,8 @@ import Text.Pandoc.Readers.Org.Inlines import Text.Pandoc.Readers.Org.ParserState import Text.Pandoc.Readers.Org.Parsing import Text.Pandoc.Readers.Org.Shared - ( isImageFilename, rundocBlockClass, toRundocAttrib - , translateLang ) + ( cleanLinkString, isImageFilename, rundocBlockClass + , toRundocAttrib, translateLang ) import qualified Text.Pandoc.Builder as B import Text.Pandoc.Builder ( Inlines, Blocks ) @@ -571,23 +571,33 @@ figure :: OrgParser (F Blocks) figure = try $ do figAttrs <- blockAttributes src <- skipSpaces *> selfTarget <* skipSpaces <* newline - guard . not . isNothing . blockAttrCaption $ figAttrs - guard (isImageFilename src) - let figName = fromMaybe mempty $ blockAttrName figAttrs - let figLabel = fromMaybe mempty $ blockAttrLabel figAttrs - let figCaption = fromMaybe mempty $ blockAttrCaption figAttrs - let figKeyVals = blockAttrKeyValues figAttrs - let attr = (figLabel, mempty, figKeyVals) - return $ (B.para . B.imageWith attr src (withFigPrefix figName) <$> figCaption) + case cleanLinkString src of + Nothing -> mzero + Just imgSrc -> do + guard (not . isNothing . blockAttrCaption $ figAttrs) + guard (isImageFilename imgSrc) + return $ figureBlock figAttrs imgSrc where + selfTarget :: OrgParser String + selfTarget = try $ char '[' *> linkTarget <* char ']' + + figureBlock :: BlockAttributes -> String -> (F Blocks) + figureBlock figAttrs imgSrc = + let + figName = fromMaybe mempty $ blockAttrName figAttrs + figLabel = fromMaybe mempty $ blockAttrLabel figAttrs + figCaption = fromMaybe mempty $ blockAttrCaption figAttrs + figKeyVals = blockAttrKeyValues figAttrs + attr = (figLabel, mempty, figKeyVals) + in + B.para . B.imageWith attr imgSrc (withFigPrefix figName) <$> figCaption + withFigPrefix :: String -> String withFigPrefix cs = if "fig:" `isPrefixOf` cs then cs else "fig:" ++ cs - selfTarget :: OrgParser String - selfTarget = try $ char '[' *> linkTarget <* char ']' -- -- Examples diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs index e1a66a8c7..31f098d27 100644 --- a/src/Text/Pandoc/Readers/Org/Inlines.hs +++ b/src/Text/Pandoc/Readers/Org/Inlines.hs @@ -37,8 +37,8 @@ import Text.Pandoc.Readers.Org.BlockStarts import Text.Pandoc.Readers.Org.ParserState import Text.Pandoc.Readers.Org.Parsing import Text.Pandoc.Readers.Org.Shared - ( isImageFilename, rundocBlockClass, toRundocAttrib - , translateLang ) + ( cleanLinkString, isImageFilename, rundocBlockClass + , toRundocAttrib, translateLang ) import qualified Text.Pandoc.Builder as B import Text.Pandoc.Builder ( Inlines ) @@ -52,7 +52,7 @@ import qualified Text.TeXMath.Readers.MathML.EntityMap as MathMLEntityMap import Prelude hiding (sequence) import Control.Monad ( guard, mplus, mzero, when, void ) import Data.Char ( isAlphaNum, isSpace ) -import Data.List ( intersperse, isPrefixOf ) +import Data.List ( intersperse ) import Data.Maybe ( fromMaybe ) import qualified Data.Map as M import Data.Traversable (sequence) @@ -435,9 +435,11 @@ explicitOrImageLink = try $ do char ']' return $ do src <- srcF - if isImageFilename title - then pure $ B.link src "" $ B.image title mempty mempty - else linkToInlinesF src =<< title' + case cleanLinkString title of + Just imgSrc | isImageFilename imgSrc -> + pure $ B.link src "" $ B.image imgSrc mempty mempty + _ -> + linkToInlinesF src =<< title' selflinkOrImage :: OrgParser (F Inlines) selflinkOrImage = try $ do @@ -482,25 +484,6 @@ linkToInlinesF linkStr = else pure . B.link cleanedLink "" Nothing -> internalLink linkStr -- other internal link --- | Cleanup and canonicalize a string describing a link. Return @Nothing@ if --- the string does not appear to be a link. -cleanLinkString :: String -> Maybe String -cleanLinkString s = - case s of - '/':_ -> Just $ "file://" ++ s -- absolute path - '.':'/':_ -> Just s -- relative path - '.':'.':'/':_ -> Just s -- relative path - -- Relative path or URL (file schema) - 'f':'i':'l':'e':':':s' -> Just $ if ("//" `isPrefixOf` s') then s else s' - _ | isUrl s -> Just s -- URL - _ -> Nothing - where - isUrl :: String -> Bool - isUrl cs = - let (scheme, path) = break (== ':') cs - in all (\c -> isAlphaNum c || c `elem` (".-"::String)) scheme - && not (null path) - internalLink :: String -> Inlines -> F Inlines internalLink link title = do anchorB <- (link `elem`) <$> asksF orgStateAnchorIds diff --git a/src/Text/Pandoc/Readers/Org/Shared.hs b/src/Text/Pandoc/Readers/Org/Shared.hs index 3ba46b9e4..8c87cfa25 100644 --- a/src/Text/Pandoc/Readers/Org/Shared.hs +++ b/src/Text/Pandoc/Readers/Org/Shared.hs @@ -27,13 +27,15 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Utility functions used in other Pandoc Org modules. -} module Text.Pandoc.Readers.Org.Shared - ( isImageFilename + ( cleanLinkString + , isImageFilename , rundocBlockClass , toRundocAttrib , translateLang ) where import Control.Arrow ( first ) +import Data.Char ( isAlphaNum ) import Data.List ( isPrefixOf, isSuffixOf ) @@ -41,12 +43,31 @@ import Data.List ( isPrefixOf, isSuffixOf ) isImageFilename :: String -> Bool isImageFilename filename = any (\x -> ('.':x) `isSuffixOf` filename) imageExtensions && - (any (\x -> (x++":") `isPrefixOf` filename) protocols || + (any (\x -> (x ++ "://") `isPrefixOf` filename) protocols || ':' `notElem` filename) where imageExtensions = [ "jpeg" , "jpg" , "png" , "gif" , "svg" ] protocols = [ "file", "http", "https" ] +-- | Cleanup and canonicalize a string describing a link. Return @Nothing@ if +-- the string does not appear to be a link. +cleanLinkString :: String -> Maybe String +cleanLinkString s = + case s of + '/':_ -> Just $ "file://" ++ s -- absolute path + '.':'/':_ -> Just s -- relative path + '.':'.':'/':_ -> Just s -- relative path + -- Relative path or URL (file schema) + 'f':'i':'l':'e':':':s' -> Just $ if ("//" `isPrefixOf` s') then s else s' + _ | isUrl s -> Just s -- URL + _ -> Nothing + where + isUrl :: String -> Bool + isUrl cs = + let (scheme, path) = break (== ':') cs + in all (\c -> isAlphaNum c || c `elem` (".-"::String)) scheme + && not (null path) + -- | Prefix used for Rundoc classes and arguments. rundocPrefix :: String rundocPrefix = "rundoc-" -- cgit v1.2.3