From ba5b426ded8b01a290da27aac9d3bb9a7a26de8c Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Tue, 9 Aug 2016 20:27:08 +0200
Subject: Org reader: ensure image sources are proper links

Image sources as those in plain images, image links, or figures, must be
proper URIs or relative file paths to be recognized as images.  This
restriction is now enforced for all image sources.

This also fixes the reader's usage of uncleaned image sources, leading
to `file:` prefixes not being deleted from figure
images (e.g. `[[file:image.jpg]]` leading to a broken image `<img
src="file:image.jpg"/>)

Thanks to @bsag for noticing this bug.
---
 src/Text/Pandoc/Readers/Org/Blocks.hs  | 34 ++++++++++++++++++++++------------
 src/Text/Pandoc/Readers/Org/Inlines.hs | 33 ++++++++-------------------------
 src/Text/Pandoc/Readers/Org/Shared.hs  | 25 +++++++++++++++++++++++--
 3 files changed, 53 insertions(+), 39 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/Org/Blocks.hs b/src/Text/Pandoc/Readers/Org/Blocks.hs
index 0bd82ce2f..80895e038 100644
--- a/src/Text/Pandoc/Readers/Org/Blocks.hs
+++ b/src/Text/Pandoc/Readers/Org/Blocks.hs
@@ -39,8 +39,8 @@ import           Text.Pandoc.Readers.Org.Inlines
 import           Text.Pandoc.Readers.Org.ParserState
 import           Text.Pandoc.Readers.Org.Parsing
 import           Text.Pandoc.Readers.Org.Shared
-                   ( isImageFilename, rundocBlockClass, toRundocAttrib
-                   , translateLang )
+                   ( cleanLinkString, isImageFilename, rundocBlockClass
+                   , toRundocAttrib, translateLang )
 
 import qualified Text.Pandoc.Builder as B
 import           Text.Pandoc.Builder ( Inlines, Blocks )
@@ -571,23 +571,33 @@ figure :: OrgParser (F Blocks)
 figure = try $ do
   figAttrs <- blockAttributes
   src <- skipSpaces *> selfTarget <* skipSpaces <* newline
-  guard . not . isNothing . blockAttrCaption $ figAttrs
-  guard (isImageFilename src)
-  let figName    = fromMaybe mempty $ blockAttrName figAttrs
-  let figLabel   = fromMaybe mempty $ blockAttrLabel figAttrs
-  let figCaption = fromMaybe mempty $ blockAttrCaption figAttrs
-  let figKeyVals = blockAttrKeyValues figAttrs
-  let attr       = (figLabel, mempty, figKeyVals)
-  return $ (B.para . B.imageWith attr src (withFigPrefix figName) <$> figCaption)
+  case cleanLinkString src of
+    Nothing     -> mzero
+    Just imgSrc -> do
+      guard (not . isNothing . blockAttrCaption $ figAttrs)
+      guard (isImageFilename imgSrc)
+      return $ figureBlock figAttrs imgSrc
  where
+   selfTarget :: OrgParser String
+   selfTarget = try $ char '[' *> linkTarget <* char ']'
+
+   figureBlock :: BlockAttributes -> String -> (F Blocks)
+   figureBlock figAttrs imgSrc =
+     let
+       figName    = fromMaybe mempty $ blockAttrName figAttrs
+       figLabel   = fromMaybe mempty $ blockAttrLabel figAttrs
+       figCaption = fromMaybe mempty $ blockAttrCaption figAttrs
+       figKeyVals = blockAttrKeyValues figAttrs
+       attr       = (figLabel, mempty, figKeyVals)
+     in
+       B.para . B.imageWith attr imgSrc (withFigPrefix figName) <$> figCaption
+
    withFigPrefix :: String -> String
    withFigPrefix cs =
      if "fig:" `isPrefixOf` cs
      then cs
      else "fig:" ++ cs
 
-   selfTarget :: OrgParser String
-   selfTarget = try $ char '[' *> linkTarget <* char ']'
 
 --
 -- Examples
diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs
index e1a66a8c7..31f098d27 100644
--- a/src/Text/Pandoc/Readers/Org/Inlines.hs
+++ b/src/Text/Pandoc/Readers/Org/Inlines.hs
@@ -37,8 +37,8 @@ import           Text.Pandoc.Readers.Org.BlockStarts
 import           Text.Pandoc.Readers.Org.ParserState
 import           Text.Pandoc.Readers.Org.Parsing
 import           Text.Pandoc.Readers.Org.Shared
-                   ( isImageFilename, rundocBlockClass, toRundocAttrib
-                   , translateLang )
+                   ( cleanLinkString, isImageFilename, rundocBlockClass
+                   , toRundocAttrib, translateLang )
 
 import qualified Text.Pandoc.Builder as B
 import           Text.Pandoc.Builder ( Inlines )
@@ -52,7 +52,7 @@ import qualified Text.TeXMath.Readers.MathML.EntityMap as MathMLEntityMap
 import           Prelude hiding (sequence)
 import           Control.Monad ( guard, mplus, mzero, when, void )
 import           Data.Char ( isAlphaNum, isSpace )
-import           Data.List ( intersperse, isPrefixOf )
+import           Data.List ( intersperse )
 import           Data.Maybe ( fromMaybe )
 import qualified Data.Map as M
 import           Data.Traversable (sequence)
@@ -435,9 +435,11 @@ explicitOrImageLink = try $ do
   char ']'
   return $ do
     src <- srcF
-    if isImageFilename title
-      then pure $ B.link src "" $ B.image title mempty mempty
-      else linkToInlinesF src =<< title'
+    case cleanLinkString title of
+      Just imgSrc | isImageFilename imgSrc ->
+        pure $ B.link src "" $ B.image imgSrc mempty mempty
+      _ ->
+        linkToInlinesF src =<< title'
 
 selflinkOrImage :: OrgParser (F Inlines)
 selflinkOrImage = try $ do
@@ -482,25 +484,6 @@ linkToInlinesF linkStr =
                                        else pure . B.link cleanedLink ""
                  Nothing -> internalLink linkStr  -- other internal link
 
--- | Cleanup and canonicalize a string describing a link.  Return @Nothing@ if
--- the string does not appear to be a link.
-cleanLinkString :: String -> Maybe String
-cleanLinkString s =
-  case s of
-    '/':_                  -> Just $ "file://" ++ s  -- absolute path
-    '.':'/':_              -> Just s                 -- relative path
-    '.':'.':'/':_          -> Just s                 -- relative path
-    -- Relative path or URL (file schema)
-    'f':'i':'l':'e':':':s' -> Just $ if ("//" `isPrefixOf` s') then s else s'
-    _ | isUrl s            -> Just s                 -- URL
-    _                      -> Nothing
- where
-   isUrl :: String -> Bool
-   isUrl cs =
-     let (scheme, path) = break (== ':') cs
-     in all (\c -> isAlphaNum c || c `elem` (".-"::String)) scheme
-          && not (null path)
-
 internalLink :: String -> Inlines -> F Inlines
 internalLink link title = do
   anchorB <- (link `elem`) <$> asksF orgStateAnchorIds
diff --git a/src/Text/Pandoc/Readers/Org/Shared.hs b/src/Text/Pandoc/Readers/Org/Shared.hs
index 3ba46b9e4..8c87cfa25 100644
--- a/src/Text/Pandoc/Readers/Org/Shared.hs
+++ b/src/Text/Pandoc/Readers/Org/Shared.hs
@@ -27,13 +27,15 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 Utility functions used in other Pandoc Org modules.
 -}
 module Text.Pandoc.Readers.Org.Shared
-  ( isImageFilename
+  ( cleanLinkString
+  , isImageFilename
   , rundocBlockClass
   , toRundocAttrib
   , translateLang
   ) where
 
 import           Control.Arrow ( first )
+import           Data.Char ( isAlphaNum )
 import           Data.List ( isPrefixOf, isSuffixOf )
 
 
@@ -41,12 +43,31 @@ import           Data.List ( isPrefixOf, isSuffixOf )
 isImageFilename :: String -> Bool
 isImageFilename filename =
   any (\x -> ('.':x)  `isSuffixOf` filename) imageExtensions &&
-  (any (\x -> (x++":") `isPrefixOf` filename) protocols ||
+  (any (\x -> (x ++ "://") `isPrefixOf` filename) protocols ||
    ':' `notElem` filename)
  where
    imageExtensions = [ "jpeg" , "jpg" , "png" , "gif" , "svg" ]
    protocols = [ "file", "http", "https" ]
 
+-- | Cleanup and canonicalize a string describing a link.  Return @Nothing@ if
+-- the string does not appear to be a link.
+cleanLinkString :: String -> Maybe String
+cleanLinkString s =
+  case s of
+    '/':_                  -> Just $ "file://" ++ s  -- absolute path
+    '.':'/':_              -> Just s                 -- relative path
+    '.':'.':'/':_          -> Just s                 -- relative path
+    -- Relative path or URL (file schema)
+    'f':'i':'l':'e':':':s' -> Just $ if ("//" `isPrefixOf` s') then s else s'
+    _ | isUrl s            -> Just s                 -- URL
+    _                      -> Nothing
+ where
+   isUrl :: String -> Bool
+   isUrl cs =
+     let (scheme, path) = break (== ':') cs
+     in all (\c -> isAlphaNum c || c `elem` (".-"::String)) scheme
+          && not (null path)
+
 -- | Prefix used for Rundoc classes and arguments.
 rundocPrefix :: String
 rundocPrefix = "rundoc-"
-- 
cgit v1.2.3