diff options
author | John MacFarlane <jgm@berkeley.edu> | 2021-02-20 22:49:20 -0800 |
---|---|---|
committer | John MacFarlane <jgm@berkeley.edu> | 2021-02-20 22:49:20 -0800 |
commit | 321343b2cf8a4a75abe1b6713aa40e278ca57997 (patch) | |
tree | 24b83e2714d3d304ffefe24a53d77012b946968c /src/Text/Pandoc | |
parent | cec541e54cd947c8032f9148db18104cd1a48783 (diff) | |
download | pandoc-321343b2cf8a4a75abe1b6713aa40e278ca57997.tar.gz |
HTML reader: small efficiency improvements.
Also, remove exported class NamedTag(..) [API change].
This was just intended to smooth over the transition from String to Text
and is no longer needed.
The functions isInlineTag and isBlockTag are no longer
polymorphic.
Diffstat (limited to 'src/Text/Pandoc')
-rw-r--r-- | src/Text/Pandoc/Readers/HTML.hs | 43 |
1 files changed, 18 insertions, 25 deletions
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 47856d2f7..50201fe77 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -19,7 +19,6 @@ module Text.Pandoc.Readers.HTML ( readHtml , htmlInBalanced , isInlineTag , isBlockTag - , NamedTag(..) , isTextTag , isCommentTag ) where @@ -579,9 +578,9 @@ tagToText (TagOpen "br" _) = "\n" tagToText _ = "" inline :: PandocMonad m => TagParser m Inlines -inline = do +inline = pTagText <|> do + tag <- lookAhead (pSatisfy isInlineTag) exts <- getOption readerExtensions - tag <- lookAhead pAny case tag of TagOpen name attr -> case name of @@ -935,27 +934,21 @@ pSpace = many1 (satisfy isSpace) >>= \xs -> then return B.softbreak else return B.space -class NamedTag a where - getTagName :: a -> Maybe Text - -instance NamedTag (Tag Text) where - getTagName (TagOpen t _) = Just t - getTagName (TagClose t) = Just t - getTagName _ = Nothing +getTagName :: Tag Text -> Maybe Text +getTagName (TagOpen t _) = Just t +getTagName (TagClose t) = Just t +getTagName _ = Nothing -instance NamedTag (Tag String) where - getTagName (TagOpen t _) = Just (T.pack t) - getTagName (TagClose t) = Just (T.pack t) - getTagName _ = Nothing - -isInlineTag :: NamedTag (Tag a) => Tag a -> Bool +isInlineTag :: Tag Text -> Bool isInlineTag t = - isCommentTag t || case getTagName t of - Nothing -> False - Just x -> x `Set.notMember` blockTags || - T.take 1 x == "?" -- processing instr. - -isBlockTag :: NamedTag (Tag a) => Tag a -> Bool + isCommentTag t || + case getTagName t of + Nothing -> False + Just "script" -> "math/tex" `T.isPrefixOf` fromAttrib "type" t + Just x -> x `Set.notMember` blockTags || + T.take 1 x == "?" -- processing instr. + +isBlockTag :: Tag Text -> Bool isBlockTag t = isBlockTagName || isTagComment t where isBlockTagName = case getTagName t of @@ -966,10 +959,10 @@ isBlockTag t = isBlockTagName || isTagComment t || x `Set.member` eitherBlockOrInline Nothing -> False -isTextTag :: Tag a -> Bool +isTextTag :: Tag Text -> Bool isTextTag = tagText (const True) -isCommentTag :: Tag a -> Bool +isCommentTag :: Tag Text -> Bool isCommentTag = tagComment (const True) --- parsers for use in markdown, textile readers @@ -1018,7 +1011,7 @@ htmlInBalanced' tagname ts = fromMaybe [] $ go 0 ts go n (t:ts') = (t :) <$> go n ts' go _ [] = mzero -hasTagWarning :: [Tag a] -> Bool +hasTagWarning :: [Tag Text] -> Bool hasTagWarning (TagWarning _:_) = True hasTagWarning _ = False |