aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers/Org
diff options
context:
space:
mode:
authorAlbert Krewinkel <albert@zeitkraut.de>2016-08-09 20:27:08 +0200
committerAlbert Krewinkel <albert@zeitkraut.de>2016-08-09 20:27:08 +0200
commitba5b426ded8b01a290da27aac9d3bb9a7a26de8c (patch)
tree7131a90fed0ef22f9201954c25f5b1b1168d7314 /src/Text/Pandoc/Readers/Org
parent0fbb676c81ea258cfbfa8f1a726b37edf2bd2b90 (diff)
downloadpandoc-ba5b426ded8b01a290da27aac9d3bb9a7a26de8c.tar.gz
Org reader: ensure image sources are proper links
Image sources as those in plain images, image links, or figures, must be proper URIs or relative file paths to be recognized as images. This restriction is now enforced for all image sources. This also fixes the reader's usage of uncleaned image sources, leading to `file:` prefixes not being deleted from figure images (e.g. `[[file:image.jpg]]` leading to a broken image `<img src="file:image.jpg"/>) Thanks to @bsag for noticing this bug.
Diffstat (limited to 'src/Text/Pandoc/Readers/Org')
-rw-r--r--src/Text/Pandoc/Readers/Org/Blocks.hs34
-rw-r--r--src/Text/Pandoc/Readers/Org/Inlines.hs33
-rw-r--r--src/Text/Pandoc/Readers/Org/Shared.hs25
3 files changed, 53 insertions, 39 deletions
diff --git a/src/Text/Pandoc/Readers/Org/Blocks.hs b/src/Text/Pandoc/Readers/Org/Blocks.hs
index 0bd82ce2f..80895e038 100644
--- a/src/Text/Pandoc/Readers/Org/Blocks.hs
+++ b/src/Text/Pandoc/Readers/Org/Blocks.hs
@@ -39,8 +39,8 @@ import Text.Pandoc.Readers.Org.Inlines
import Text.Pandoc.Readers.Org.ParserState
import Text.Pandoc.Readers.Org.Parsing
import Text.Pandoc.Readers.Org.Shared
- ( isImageFilename, rundocBlockClass, toRundocAttrib
- , translateLang )
+ ( cleanLinkString, isImageFilename, rundocBlockClass
+ , toRundocAttrib, translateLang )
import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Builder ( Inlines, Blocks )
@@ -571,23 +571,33 @@ figure :: OrgParser (F Blocks)
figure = try $ do
figAttrs <- blockAttributes
src <- skipSpaces *> selfTarget <* skipSpaces <* newline
- guard . not . isNothing . blockAttrCaption $ figAttrs
- guard (isImageFilename src)
- let figName = fromMaybe mempty $ blockAttrName figAttrs
- let figLabel = fromMaybe mempty $ blockAttrLabel figAttrs
- let figCaption = fromMaybe mempty $ blockAttrCaption figAttrs
- let figKeyVals = blockAttrKeyValues figAttrs
- let attr = (figLabel, mempty, figKeyVals)
- return $ (B.para . B.imageWith attr src (withFigPrefix figName) <$> figCaption)
+ case cleanLinkString src of
+ Nothing -> mzero
+ Just imgSrc -> do
+ guard (not . isNothing . blockAttrCaption $ figAttrs)
+ guard (isImageFilename imgSrc)
+ return $ figureBlock figAttrs imgSrc
where
+ selfTarget :: OrgParser String
+ selfTarget = try $ char '[' *> linkTarget <* char ']'
+
+ figureBlock :: BlockAttributes -> String -> (F Blocks)
+ figureBlock figAttrs imgSrc =
+ let
+ figName = fromMaybe mempty $ blockAttrName figAttrs
+ figLabel = fromMaybe mempty $ blockAttrLabel figAttrs
+ figCaption = fromMaybe mempty $ blockAttrCaption figAttrs
+ figKeyVals = blockAttrKeyValues figAttrs
+ attr = (figLabel, mempty, figKeyVals)
+ in
+ B.para . B.imageWith attr imgSrc (withFigPrefix figName) <$> figCaption
+
withFigPrefix :: String -> String
withFigPrefix cs =
if "fig:" `isPrefixOf` cs
then cs
else "fig:" ++ cs
- selfTarget :: OrgParser String
- selfTarget = try $ char '[' *> linkTarget <* char ']'
--
-- Examples
diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs
index e1a66a8c7..31f098d27 100644
--- a/src/Text/Pandoc/Readers/Org/Inlines.hs
+++ b/src/Text/Pandoc/Readers/Org/Inlines.hs
@@ -37,8 +37,8 @@ import Text.Pandoc.Readers.Org.BlockStarts
import Text.Pandoc.Readers.Org.ParserState
import Text.Pandoc.Readers.Org.Parsing
import Text.Pandoc.Readers.Org.Shared
- ( isImageFilename, rundocBlockClass, toRundocAttrib
- , translateLang )
+ ( cleanLinkString, isImageFilename, rundocBlockClass
+ , toRundocAttrib, translateLang )
import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Builder ( Inlines )
@@ -52,7 +52,7 @@ import qualified Text.TeXMath.Readers.MathML.EntityMap as MathMLEntityMap
import Prelude hiding (sequence)
import Control.Monad ( guard, mplus, mzero, when, void )
import Data.Char ( isAlphaNum, isSpace )
-import Data.List ( intersperse, isPrefixOf )
+import Data.List ( intersperse )
import Data.Maybe ( fromMaybe )
import qualified Data.Map as M
import Data.Traversable (sequence)
@@ -435,9 +435,11 @@ explicitOrImageLink = try $ do
char ']'
return $ do
src <- srcF
- if isImageFilename title
- then pure $ B.link src "" $ B.image title mempty mempty
- else linkToInlinesF src =<< title'
+ case cleanLinkString title of
+ Just imgSrc | isImageFilename imgSrc ->
+ pure $ B.link src "" $ B.image imgSrc mempty mempty
+ _ ->
+ linkToInlinesF src =<< title'
selflinkOrImage :: OrgParser (F Inlines)
selflinkOrImage = try $ do
@@ -482,25 +484,6 @@ linkToInlinesF linkStr =
else pure . B.link cleanedLink ""
Nothing -> internalLink linkStr -- other internal link
--- | Cleanup and canonicalize a string describing a link. Return @Nothing@ if
--- the string does not appear to be a link.
-cleanLinkString :: String -> Maybe String
-cleanLinkString s =
- case s of
- '/':_ -> Just $ "file://" ++ s -- absolute path
- '.':'/':_ -> Just s -- relative path
- '.':'.':'/':_ -> Just s -- relative path
- -- Relative path or URL (file schema)
- 'f':'i':'l':'e':':':s' -> Just $ if ("//" `isPrefixOf` s') then s else s'
- _ | isUrl s -> Just s -- URL
- _ -> Nothing
- where
- isUrl :: String -> Bool
- isUrl cs =
- let (scheme, path) = break (== ':') cs
- in all (\c -> isAlphaNum c || c `elem` (".-"::String)) scheme
- && not (null path)
-
internalLink :: String -> Inlines -> F Inlines
internalLink link title = do
anchorB <- (link `elem`) <$> asksF orgStateAnchorIds
diff --git a/src/Text/Pandoc/Readers/Org/Shared.hs b/src/Text/Pandoc/Readers/Org/Shared.hs
index 3ba46b9e4..8c87cfa25 100644
--- a/src/Text/Pandoc/Readers/Org/Shared.hs
+++ b/src/Text/Pandoc/Readers/Org/Shared.hs
@@ -27,13 +27,15 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Utility functions used in other Pandoc Org modules.
-}
module Text.Pandoc.Readers.Org.Shared
- ( isImageFilename
+ ( cleanLinkString
+ , isImageFilename
, rundocBlockClass
, toRundocAttrib
, translateLang
) where
import Control.Arrow ( first )
+import Data.Char ( isAlphaNum )
import Data.List ( isPrefixOf, isSuffixOf )
@@ -41,12 +43,31 @@ import Data.List ( isPrefixOf, isSuffixOf )
isImageFilename :: String -> Bool
isImageFilename filename =
any (\x -> ('.':x) `isSuffixOf` filename) imageExtensions &&
- (any (\x -> (x++":") `isPrefixOf` filename) protocols ||
+ (any (\x -> (x ++ "://") `isPrefixOf` filename) protocols ||
':' `notElem` filename)
where
imageExtensions = [ "jpeg" , "jpg" , "png" , "gif" , "svg" ]
protocols = [ "file", "http", "https" ]
+-- | Cleanup and canonicalize a string describing a link. Return @Nothing@ if
+-- the string does not appear to be a link.
+cleanLinkString :: String -> Maybe String
+cleanLinkString s =
+ case s of
+ '/':_ -> Just $ "file://" ++ s -- absolute path
+ '.':'/':_ -> Just s -- relative path
+ '.':'.':'/':_ -> Just s -- relative path
+ -- Relative path or URL (file schema)
+ 'f':'i':'l':'e':':':s' -> Just $ if ("//" `isPrefixOf` s') then s else s'
+ _ | isUrl s -> Just s -- URL
+ _ -> Nothing
+ where
+ isUrl :: String -> Bool
+ isUrl cs =
+ let (scheme, path) = break (== ':') cs
+ in all (\c -> isAlphaNum c || c `elem` (".-"::String)) scheme
+ && not (null path)
+
-- | Prefix used for Rundoc classes and arguments.
rundocPrefix :: String
rundocPrefix = "rundoc-"