aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2014-08-18 12:41:09 -0700
committerJohn MacFarlane <jgm@berkeley.edu>2014-08-18 12:41:09 -0700
commit6dce8c67601dcb89d8ff69adf83fac7d27353db2 (patch)
treed2c294030f060d1cca509ddf55eb945e4678f6a8 /src/Text/Pandoc/Readers
parentee88f5662bac212c416cde9b4f530d9a43f3b47c (diff)
downloadpandoc-6dce8c67601dcb89d8ff69adf83fac7d27353db2.tar.gz
HTML reader: improved handling of tags that can be block or inline.
Previously a section like this would be enclosed in a paragraph, with RawInline for the video tags (since video is a tag that can be either block or inline): <video controls="controls"> <source src="../videos/test.mp4" type="video/mp4" /> <source src="../videos/test.webm" type="video/webm" /> <p> The videos can not be played back on your system.<br/> Try viewing on Youtube (requires Internet connection): <a href="http://youtu.be/etE5urBps_w">Relative Velocity on Youtube</a>. </p> </video> This change will cause the video and source tags to be parsed as RawBlock instead, giving better output. The general change is this: when we're parsing a "plain" sequence of inlines, we don't parse anything that COULD be a block-level tag.
Diffstat (limited to 'src/Text/Pandoc/Readers')
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs18
1 files changed, 13 insertions, 5 deletions
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index bd60a74fa..4ea5f41d5 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -91,16 +91,20 @@ replaceNotes' x = return x
data HTMLState =
HTMLState
{ parserState :: ParserState,
- noteTable :: [(String, Blocks)]
+ noteTable :: [(String, Blocks)]
}
data HTMLLocal = HTMLLocal { quoteContext :: QuoteContext
, inChapter :: Bool -- ^ Set if in chapter section
+ , inPlain :: Bool -- ^ Set if in pPlain
}
setInChapter :: HTMLParser s a -> HTMLParser s a
setInChapter = local (\s -> s {inChapter = True})
+setInPlain :: HTMLParser s a -> HTMLParser s a
+setInPlain = local (\s -> s {inPlain = True})
+
type HTMLParser s = ParserT s HTMLState (Reader HTMLLocal)
type TagParser = HTMLParser [Tag String]
@@ -141,8 +145,8 @@ block = do
, pTable
, pHead
, pBody
- , pPlain
, pDiv
+ , pPlain
, pRawHtmlBlock
]
when tr $ trace (printf "line %d: %s" (sourceLine pos)
@@ -422,7 +426,7 @@ pBlockQuote = do
pPlain :: TagParser Blocks
pPlain = do
- contents <- trimInlines . mconcat <$> many1 inline
+ contents <- setInPlain $ trimInlines . mconcat <$> many1 inline
if B.isNull contents
then return mempty
else return $ B.plain contents
@@ -579,7 +583,11 @@ pSpan = try $ do
pRawHtmlInline :: TagParser Inlines
pRawHtmlInline = do
- result <- pSatisfy (tagComment (const True)) <|> pSatisfy isInlineTag
+ inplain <- asks inPlain
+ result <- pSatisfy (tagComment (const True))
+ <|> if inplain
+ then pSatisfy (not . isBlockTag)
+ else pSatisfy isInlineTag
parseRaw <- getOption readerParseRaw
if parseRaw
then return $ B.rawInline "html" $ renderTags' [result]
@@ -919,7 +927,7 @@ instance HasMeta HTMLState where
deleteMeta s st = st {parserState = deleteMeta s $ parserState st}
instance Default HTMLLocal where
- def = HTMLLocal NoQuote False
+ def = HTMLLocal NoQuote False False
instance HasLastStrPosition HTMLState where
setLastStrPos s st = st {parserState = setLastStrPos s (parserState st)}