aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2015-04-17 22:55:39 -0700
committerJohn MacFarlane <jgm@berkeley.edu>2015-04-17 22:56:33 -0700
commit10e28ef750f0c7e6bc0dfb7aabaaad8edf059e3c (patch)
tree00c0649bb6cb6f77e2cde3c55a58390defde37df /src/Text/Pandoc
parentaaf5e67624c6b9dcc2615e0e3f4ad4e622d516af (diff)
downloadpandoc-10e28ef750f0c7e6bc0dfb7aabaaad8edf059e3c.tar.gz
More principled fix for #1820.
If the tag parses as a comment, we check to see if the input starts with `<!--`. If not, it's bogus comment mode and we fail htmlTag. Includes test case. Closes #1820.
Diffstat (limited to 'src/Text/Pandoc')
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs12
1 files changed, 7 insertions, 5 deletions
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 52358e51e..9eeab620b 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -51,7 +51,7 @@ import Text.Pandoc.Options (ReaderOptions(readerParseRaw, readerTrace)
import Text.Pandoc.Parsing hiding ((<|>))
import Text.Pandoc.Walk
import Data.Maybe ( fromMaybe, isJust)
-import Data.List ( intercalate, isInfixOf )
+import Data.List ( intercalate, isInfixOf, isPrefixOf )
import Data.Char ( isDigit )
import Control.Monad ( liftM, guard, when, mzero, void, unless )
import Control.Arrow ((***))
@@ -887,16 +887,18 @@ htmlTag :: Monad m
=> (Tag String -> Bool)
-> ParserT [Char] st m (Tag String, String)
htmlTag f = try $ do
- lookAhead $ char '<' >> ((oneOf "/!?" >> nonspaceChar) <|> letter)
- (next : _) <- getInput >>= return . canonicalizeTags . parseTags
+ lookAhead (char '<')
+ inp <- getInput
+ let (next : _) = canonicalizeTags $ parseTags inp
guard $ f next
- -- advance the parser
case next of
- TagComment s -> do
+ TagComment s
+ | "<!--" `isPrefixOf` inp -> do
count (length s + 4) anyChar
skipMany (satisfy (/='>'))
char '>'
return (next, "<!--" ++ s ++ "-->")
+ | otherwise -> fail "bogus comment mode, HTML5 parse error"
_ -> do
rendered <- manyTill anyChar (char '>')
return (next, rendered ++ ">")