aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers/HTML.hs
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2020-10-23 23:31:01 -0700
committerJohn MacFarlane <jgm@berkeley.edu>2020-10-23 23:31:36 -0700
commit9ab04a92f83b9d9e1ec123c25c10f244e41654e0 (patch)
treef007a3801d5f84c53f9eab5da1cfdccb881d5127 /src/Text/Pandoc/Readers/HTML.hs
parent4bf171e11dd324f5c65e15da17717b701e961b17 (diff)
downloadpandoc-9ab04a92f83b9d9e1ec123c25c10f244e41654e0.tar.gz
HTML reader: Parse contents of iframes.
See #6770.
Diffstat (limited to 'src/Text/Pandoc/Readers/HTML.hs')
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs21
1 files changed, 17 insertions, 4 deletions
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 05c29d922..7eab27cef 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -89,7 +89,7 @@ readHtml opts inp = do
result <- flip runReaderT def $
runParserT parseDoc
(HTMLState def{ stateOptions = opts }
- [] Nothing Set.empty [] M.empty)
+ [] Nothing Set.empty [] M.empty opts)
"source" tags
case result of
Right doc -> return doc
@@ -112,7 +112,8 @@ data HTMLState =
baseHref :: Maybe URI,
identifiers :: Set.Set Text,
logMessages :: [LogMessage],
- macros :: M.Map Text Macro
+ macros :: M.Map Text Macro,
+ readerOpts :: ReaderOptions
}
data HTMLLocal = HTMLLocal { quoteContext :: QuoteContext
@@ -185,6 +186,7 @@ block = do
, pDiv
, pPlain
, pFigure
+ , pIframe
, pRawHtmlBlock
]
trace (T.take 60 $ tshow $ B.toList res)
@@ -401,6 +403,18 @@ pDiv = try $ do
else kvs
return $ B.divWith (ident, classes', kvs') contents
+pIframe :: PandocMonad m => TagParser m Blocks
+pIframe = try $ do
+ guardDisabled Ext_raw_html
+ tag <- pSatisfy (tagOpen (=="iframe") (isJust . lookup "src"))
+ pCloses "iframe" <|> eof
+ url <- canonicalizeUrl $ fromAttrib "src" tag
+ (bs, _) <- openURL url
+ let inp = UTF8.toText bs
+ opts <- readerOpts <$> getState
+ Pandoc _ contents <- readHtml opts inp
+ return $ B.divWith ("",["iframe"],[]) $ B.fromList contents
+
pRawHtmlBlock :: PandocMonad m => TagParser m Blocks
pRawHtmlBlock = do
raw <- pHtmlBlock "script" <|> pHtmlBlock "style" <|> pHtmlBlock "textarea"
@@ -798,9 +812,8 @@ pImage = do
pSvg :: PandocMonad m => TagParser m Inlines
pSvg = do
- exts <- getOption readerExtensions
+ guardDisabled Ext_raw_html
-- if raw_html enabled, parse svg tag as raw
- guard $ not (extensionEnabled Ext_raw_html exts)
opent@(TagOpen _ attr') <- pSatisfy (matchTagOpen "svg" [])
let (ident,cls,_) = toAttr attr'
contents <- many (notFollowedBy (pCloses "svg") >> pAny)