diff options
author | John MacFarlane <jgm@berkeley.edu> | 2021-02-13 13:06:22 -0800 |
---|---|---|
committer | John MacFarlane <jgm@berkeley.edu> | 2021-02-13 13:08:34 -0800 |
commit | d84a6041e12547331f3a252ea4e8b4d229ba1159 (patch) | |
tree | 675b890c2f20d448ffa91c6ad0c669d50409fcb6 | |
parent | 6e73273916a55448c1a12ece343454ef139648a8 (diff) | |
download | pandoc-d84a6041e12547331f3a252ea4e8b4d229ba1159.tar.gz |
HTML reader: fix bad handling of empty src attribute in iframe.
- If src is empty, we simply skip the iframe.
- If src is invalid or cannot be fetched, we issue a warning
and skip instead of failing with an error.
- Closes #7099.
-rw-r--r-- | src/Text/Pandoc/Readers/HTML.hs | 18 | ||||
-rw-r--r-- | test/command/7099.md | 14 |
2 files changed, 24 insertions, 8 deletions
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index afc7a3e25..cc60b5501 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -26,7 +26,7 @@ module Text.Pandoc.Readers.HTML ( readHtml import Control.Applicative ((<|>)) import Control.Monad (guard, msum, mzero, unless, void) -import Control.Monad.Except (throwError) +import Control.Monad.Except (throwError, catchError) import Control.Monad.Reader (ask, asks, lift, local, runReaderT) import Data.ByteString.Base64 (encode) import Data.Char (isAlphaNum, isLetter) @@ -393,11 +393,17 @@ pIframe = try $ do tag <- pSatisfy (tagOpen (=="iframe") (isJust . lookup "src")) pCloses "iframe" <|> eof url <- canonicalizeUrl $ fromAttrib "src" tag - (bs, _) <- openURL url - let inp = UTF8.toText bs - opts <- readerOpts <$> getState - Pandoc _ contents <- readHtml opts inp - return $ B.divWith ("",["iframe"],[]) $ B.fromList contents + if T.null url + then ignore $ renderTags' [tag, TagClose "iframe"] + else catchError + (do (bs, _) <- openURL url + let inp = UTF8.toText bs + opts <- readerOpts <$> getState + Pandoc _ contents <- readHtml opts inp + return $ B.divWith ("",["iframe"],[]) $ B.fromList contents) + (\e -> do + logMessage $ CouldNotFetchResource url (renderError e) + ignore $ renderTags' [tag, TagClose "iframe"]) pRawHtmlBlock :: PandocMonad m => TagParser m Blocks pRawHtmlBlock = do diff --git a/test/command/7099.md b/test/command/7099.md index d9ff8e5ff..33ac8aea1 100644 --- a/test/command/7099.md +++ b/test/command/7099.md @@ -2,7 +2,17 @@ % pandoc -f html -t native --verbose <iframe src=""></iframe> ^D -[INFO] Fetching ... -[INFO] Skipped '<iframe src></iframe>' at input line 1 column 1 +[INFO] Skipped '<iframe src></iframe>' at input line 1 column 16 +[] +``` + +``` +% pandoc -f html -t native --verbose +<iframe src="h:invalid@url"></iframe> +^D +[INFO] Fetching h:invalid@url... +[WARNING] Could not fetch resource 'h:invalid@url': Could not fetch h:invalid@url + InvalidUrlException "h:invalid@url" "Invalid scheme" +[INFO] Skipped '<iframe src="h:invalid@url"></iframe>' at input line 1 column 29 [] ``` |