aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2021-02-20 22:49:20 -0800
committerJohn MacFarlane <jgm@berkeley.edu>2021-02-20 22:49:20 -0800
commit321343b2cf8a4a75abe1b6713aa40e278ca57997 (patch)
tree24b83e2714d3d304ffefe24a53d77012b946968c
parentcec541e54cd947c8032f9148db18104cd1a48783 (diff)
downloadpandoc-321343b2cf8a4a75abe1b6713aa40e278ca57997.tar.gz
HTML reader: small efficiency improvements.
Also, remove exported class NamedTag(..) [API change]. This was just intended to smooth over the transition from String to Text and is no longer needed. The functions isInlineTag and isBlockTag are no longer polymorphic.
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs43
1 files changed, 18 insertions, 25 deletions
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 47856d2f7..50201fe77 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -19,7 +19,6 @@ module Text.Pandoc.Readers.HTML ( readHtml
, htmlInBalanced
, isInlineTag
, isBlockTag
- , NamedTag(..)
, isTextTag
, isCommentTag
) where
@@ -579,9 +578,9 @@ tagToText (TagOpen "br" _) = "\n"
tagToText _ = ""
inline :: PandocMonad m => TagParser m Inlines
-inline = do
+inline = pTagText <|> do
+ tag <- lookAhead (pSatisfy isInlineTag)
exts <- getOption readerExtensions
- tag <- lookAhead pAny
case tag of
TagOpen name attr ->
case name of
@@ -935,27 +934,21 @@ pSpace = many1 (satisfy isSpace) >>= \xs ->
then return B.softbreak
else return B.space
-class NamedTag a where
- getTagName :: a -> Maybe Text
-
-instance NamedTag (Tag Text) where
- getTagName (TagOpen t _) = Just t
- getTagName (TagClose t) = Just t
- getTagName _ = Nothing
+getTagName :: Tag Text -> Maybe Text
+getTagName (TagOpen t _) = Just t
+getTagName (TagClose t) = Just t
+getTagName _ = Nothing
-instance NamedTag (Tag String) where
- getTagName (TagOpen t _) = Just (T.pack t)
- getTagName (TagClose t) = Just (T.pack t)
- getTagName _ = Nothing
-
-isInlineTag :: NamedTag (Tag a) => Tag a -> Bool
+isInlineTag :: Tag Text -> Bool
isInlineTag t =
- isCommentTag t || case getTagName t of
- Nothing -> False
- Just x -> x `Set.notMember` blockTags ||
- T.take 1 x == "?" -- processing instr.
-
-isBlockTag :: NamedTag (Tag a) => Tag a -> Bool
+ isCommentTag t ||
+ case getTagName t of
+ Nothing -> False
+ Just "script" -> "math/tex" `T.isPrefixOf` fromAttrib "type" t
+ Just x -> x `Set.notMember` blockTags ||
+ T.take 1 x == "?" -- processing instr.
+
+isBlockTag :: Tag Text -> Bool
isBlockTag t = isBlockTagName || isTagComment t
where isBlockTagName =
case getTagName t of
@@ -966,10 +959,10 @@ isBlockTag t = isBlockTagName || isTagComment t
|| x `Set.member` eitherBlockOrInline
Nothing -> False
-isTextTag :: Tag a -> Bool
+isTextTag :: Tag Text -> Bool
isTextTag = tagText (const True)
-isCommentTag :: Tag a -> Bool
+isCommentTag :: Tag Text -> Bool
isCommentTag = tagComment (const True)
--- parsers for use in markdown, textile readers
@@ -1018,7 +1011,7 @@ htmlInBalanced' tagname ts = fromMaybe [] $ go 0 ts
go n (t:ts') = (t :) <$> go n ts'
go _ [] = mzero
-hasTagWarning :: [Tag a] -> Bool
+hasTagWarning :: [Tag Text] -> Bool
hasTagWarning (TagWarning _:_) = True
hasTagWarning _ = False