aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2021-02-13 13:06:22 -0800
committerJohn MacFarlane <jgm@berkeley.edu>2021-02-13 13:08:34 -0800
commitd84a6041e12547331f3a252ea4e8b4d229ba1159 (patch)
tree675b890c2f20d448ffa91c6ad0c669d50409fcb6
parent6e73273916a55448c1a12ece343454ef139648a8 (diff)
downloadpandoc-d84a6041e12547331f3a252ea4e8b4d229ba1159.tar.gz
HTML reader: fix bad handling of empty src attribute in iframe.
- If src is empty, we simply skip the iframe. - If src is invalid or cannot be fetched, we issue a warning and skip instead of failing with an error. - Closes #7099.
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs18
-rw-r--r--test/command/7099.md14
2 files changed, 24 insertions, 8 deletions
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index afc7a3e25..cc60b5501 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -26,7 +26,7 @@ module Text.Pandoc.Readers.HTML ( readHtml
import Control.Applicative ((<|>))
import Control.Monad (guard, msum, mzero, unless, void)
-import Control.Monad.Except (throwError)
+import Control.Monad.Except (throwError, catchError)
import Control.Monad.Reader (ask, asks, lift, local, runReaderT)
import Data.ByteString.Base64 (encode)
import Data.Char (isAlphaNum, isLetter)
@@ -393,11 +393,17 @@ pIframe = try $ do
tag <- pSatisfy (tagOpen (=="iframe") (isJust . lookup "src"))
pCloses "iframe" <|> eof
url <- canonicalizeUrl $ fromAttrib "src" tag
- (bs, _) <- openURL url
- let inp = UTF8.toText bs
- opts <- readerOpts <$> getState
- Pandoc _ contents <- readHtml opts inp
- return $ B.divWith ("",["iframe"],[]) $ B.fromList contents
+ if T.null url
+ then ignore $ renderTags' [tag, TagClose "iframe"]
+ else catchError
+ (do (bs, _) <- openURL url
+ let inp = UTF8.toText bs
+ opts <- readerOpts <$> getState
+ Pandoc _ contents <- readHtml opts inp
+ return $ B.divWith ("",["iframe"],[]) $ B.fromList contents)
+ (\e -> do
+ logMessage $ CouldNotFetchResource url (renderError e)
+ ignore $ renderTags' [tag, TagClose "iframe"])
pRawHtmlBlock :: PandocMonad m => TagParser m Blocks
pRawHtmlBlock = do
diff --git a/test/command/7099.md b/test/command/7099.md
index d9ff8e5ff..33ac8aea1 100644
--- a/test/command/7099.md
+++ b/test/command/7099.md
@@ -2,7 +2,17 @@
% pandoc -f html -t native --verbose
<iframe src=""></iframe>
^D
-[INFO] Fetching ...
-[INFO] Skipped '<iframe src></iframe>' at input line 1 column 1
+[INFO] Skipped '<iframe src></iframe>' at input line 1 column 16
+[]
+```
+
+```
+% pandoc -f html -t native --verbose
+<iframe src="h:invalid@url"></iframe>
+^D
+[INFO] Fetching h:invalid@url...
+[WARNING] Could not fetch resource 'h:invalid@url': Could not fetch h:invalid@url
+ InvalidUrlException "h:invalid@url" "Invalid scheme"
+[INFO] Skipped '<iframe src="h:invalid@url"></iframe>' at input line 1 column 29
[]
```