From 9ab04a92f83b9d9e1ec123c25c10f244e41654e0 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 23 Oct 2020 23:31:01 -0700 Subject: HTML reader: Parse contents of iframes. See #6770. --- src/Text/Pandoc/Readers/HTML.hs | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 05c29d922..7eab27cef 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -89,7 +89,7 @@ readHtml opts inp = do result <- flip runReaderT def $ runParserT parseDoc (HTMLState def{ stateOptions = opts } - [] Nothing Set.empty [] M.empty) + [] Nothing Set.empty [] M.empty opts) "source" tags case result of Right doc -> return doc @@ -112,7 +112,8 @@ data HTMLState = baseHref :: Maybe URI, identifiers :: Set.Set Text, logMessages :: [LogMessage], - macros :: M.Map Text Macro + macros :: M.Map Text Macro, + readerOpts :: ReaderOptions } data HTMLLocal = HTMLLocal { quoteContext :: QuoteContext @@ -185,6 +186,7 @@ block = do , pDiv , pPlain , pFigure + , pIframe , pRawHtmlBlock ] trace (T.take 60 $ tshow $ B.toList res) @@ -401,6 +403,18 @@ pDiv = try $ do else kvs return $ B.divWith (ident, classes', kvs') contents +pIframe :: PandocMonad m => TagParser m Blocks +pIframe = try $ do + guardDisabled Ext_raw_html + tag <- pSatisfy (tagOpen (=="iframe") (isJust . lookup "src")) + pCloses "iframe" <|> eof + url <- canonicalizeUrl $ fromAttrib "src" tag + (bs, _) <- openURL url + let inp = UTF8.toText bs + opts <- readerOpts <$> getState + Pandoc _ contents <- readHtml opts inp + return $ B.divWith ("",["iframe"],[]) $ B.fromList contents + pRawHtmlBlock :: PandocMonad m => TagParser m Blocks pRawHtmlBlock = do raw <- pHtmlBlock "script" <|> pHtmlBlock "style" <|> pHtmlBlock "textarea" @@ -798,9 +812,8 @@ pImage = do pSvg :: PandocMonad m => TagParser m Inlines pSvg = do - exts <- getOption readerExtensions + guardDisabled Ext_raw_html -- if raw_html enabled, parse svg tag as raw - guard $ not (extensionEnabled Ext_raw_html exts) opent@(TagOpen _ attr') <- pSatisfy (matchTagOpen "svg" []) let (ident,cls,_) = toAttr attr' contents <- many (notFollowedBy (pCloses "svg") >> pAny) -- cgit v1.2.3