diff options
author | John MacFarlane <jgm@berkeley.edu> | 2015-04-17 22:55:39 -0700 |
---|---|---|
committer | John MacFarlane <jgm@berkeley.edu> | 2015-04-17 22:56:33 -0700 |
commit | 10e28ef750f0c7e6bc0dfb7aabaaad8edf059e3c (patch) | |
tree | 00c0649bb6cb6f77e2cde3c55a58390defde37df /src/Text/Pandoc | |
parent | aaf5e67624c6b9dcc2615e0e3f4ad4e622d516af (diff) | |
download | pandoc-10e28ef750f0c7e6bc0dfb7aabaaad8edf059e3c.tar.gz |
More principled fix for #1820.
If the tag parses as a comment, we check to see if the
input starts with `<!--`. If not, it's bogus comment mode
and we fail htmlTag.
Includes test case. Closes #1820.
Diffstat (limited to 'src/Text/Pandoc')
-rw-r--r-- | src/Text/Pandoc/Readers/HTML.hs | 12 |
1 files changed, 7 insertions, 5 deletions
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 52358e51e..9eeab620b 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -51,7 +51,7 @@ import Text.Pandoc.Options (ReaderOptions(readerParseRaw, readerTrace) import Text.Pandoc.Parsing hiding ((<|>)) import Text.Pandoc.Walk import Data.Maybe ( fromMaybe, isJust) -import Data.List ( intercalate, isInfixOf ) +import Data.List ( intercalate, isInfixOf, isPrefixOf ) import Data.Char ( isDigit ) import Control.Monad ( liftM, guard, when, mzero, void, unless ) import Control.Arrow ((***)) @@ -887,16 +887,18 @@ htmlTag :: Monad m => (Tag String -> Bool) -> ParserT [Char] st m (Tag String, String) htmlTag f = try $ do - lookAhead $ char '<' >> ((oneOf "/!?" >> nonspaceChar) <|> letter) - (next : _) <- getInput >>= return . canonicalizeTags . parseTags + lookAhead (char '<') + inp <- getInput + let (next : _) = canonicalizeTags $ parseTags inp guard $ f next - -- advance the parser case next of - TagComment s -> do + TagComment s + | "<!--" `isPrefixOf` inp -> do count (length s + 4) anyChar skipMany (satisfy (/='>')) char '>' return (next, "<!--" ++ s ++ "-->") + | otherwise -> fail "bogus comment mode, HTML5 parse error" _ -> do rendered <- manyTill anyChar (char '>') return (next, rendered ++ ">") |