From 013e4a3164115bf84f3e1964f21b7cd7f020b86a Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Fri, 14 May 2021 21:31:24 +0200 Subject: HTML reader: keep h1 tags as normal headers (#7274) The tags `` and `<h1 class="title">` often contain the same information, so the latter was dropped from the document. However, as this can lead to loss of information, the heading is now always retained. Use `--shift-heading-level-by=-1` to turn the `<h1>` into the document title, or a filter to restore the previous behavior. Closes: #2293 --- src/Text/Pandoc/Readers/HTML.hs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 0a9d67e35..b305de7b5 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -499,17 +499,13 @@ pHeader = try $ do tagOpen (`elem` ["h1","h2","h3","h4","h5","h6"]) (const True) let attr = toStringAttr attr' - let bodyTitle = TagOpen tagtype attr' ~== TagOpen ("h1" :: Text) - [("class","title")] level <- headerLevel tagtype contents <- trimInlines . mconcat <$> manyTill inline (pCloses tagtype <|> eof) let ident = fromMaybe "" $ lookup "id" attr let classes = maybe [] T.words $ lookup "class" attr let keyvals = [(k,v) | (k,v) <- attr, k /= "class", k /= "id"] attr'' <- registerHeader (ident, classes, keyvals) contents - return $ if bodyTitle - then mempty -- skip a representation of the title in the body - else B.headerWith attr'' level contents + return $ B.headerWith attr'' level contents pHrule :: PandocMonad m => TagParser m Blocks pHrule = do -- cgit v1.2.3