diff options
author | Albert Krewinkel <albert@zeitkraut.de> | 2021-05-14 21:31:24 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-05-14 12:31:24 -0700 |
commit | 013e4a3164115bf84f3e1964f21b7cd7f020b86a (patch) | |
tree | 3d2f10bee728c4c33bdba661285118f60471b23d | |
parent | 247b1cd18bfbd06e4cb0175d9ac14dc888ccff66 (diff) | |
download | pandoc-013e4a3164115bf84f3e1964f21b7cd7f020b86a.tar.gz |
HTML reader: keep h1 tags as normal headers (#7274)
The tags `<title>` and `<h1 class="title">` often contain the same
information, so the latter was dropped from the document. However, as
this can lead to loss of information, the heading is now always
retained.
Use `--shift-heading-level-by=-1` to turn the `<h1>` into the document
title, or a filter to restore the previous behavior.
Closes: #2293
-rw-r--r-- | src/Text/Pandoc/Readers/HTML.hs | 6 | ||||
-rw-r--r-- | test/html-reader.native | 3 |
2 files changed, 3 insertions, 6 deletions
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 0a9d67e35..b305de7b5 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -499,17 +499,13 @@ pHeader = try $ do tagOpen (`elem` ["h1","h2","h3","h4","h5","h6"]) (const True) let attr = toStringAttr attr' - let bodyTitle = TagOpen tagtype attr' ~== TagOpen ("h1" :: Text) - [("class","title")] level <- headerLevel tagtype contents <- trimInlines . mconcat <$> manyTill inline (pCloses tagtype <|> eof) let ident = fromMaybe "" $ lookup "id" attr let classes = maybe [] T.words $ lookup "class" attr let keyvals = [(k,v) | (k,v) <- attr, k /= "class", k /= "id"] attr'' <- registerHeader (ident, classes, keyvals) contents - return $ if bodyTitle - then mempty -- skip a representation of the title in the body - else B.headerWith attr'' level contents + return $ B.headerWith attr'' level contents pHrule :: PandocMonad m => TagParser m Blocks pHrule = do diff --git a/test/html-reader.native b/test/html-reader.native index 04ec55d1e..880561a93 100644 --- a/test/html-reader.native +++ b/test/html-reader.native @@ -1,5 +1,6 @@ Pandoc (Meta {unMeta = fromList [("generator",MetaInlines [Str "pandoc"]),("title",MetaInlines [Str "Pandoc",Space,Str "Test",Space,Str "Suite"])]}) -[Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "set",Space,Str "of",Space,Str "tests",Space,Str "for",Space,Str "pandoc.",Space,Str "Most",Space,Str "of",Space,Str "them",Space,Str "are",Space,Str "adapted",Space,Str "from",Space,Str "John",Space,Str "Gruber's",Space,Str "markdown",Space,Str "test",Space,Str "suite."] +[Header 1 ("pandoc-test-suite",["title"],[]) [Str "Pandoc",Space,Str "Test",Space,Str "Suite"] +,Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "set",Space,Str "of",Space,Str "tests",Space,Str "for",Space,Str "pandoc.",Space,Str "Most",Space,Str "of",Space,Str "them",Space,Str "are",Space,Str "adapted",Space,Str "from",Space,Str "John",Space,Str "Gruber's",Space,Str "markdown",Space,Str "test",Space,Str "suite."] ,HorizontalRule ,Header 1 ("headers",[],[]) [Str "Headers"] ,Header 2 ("level-2-with-an-embedded-link",[],[]) [Str "Level",Space,Str "2",Space,Str "with",Space,Str "an",Space,Link ("",[],[]) [Str "embedded",Space,Str "link"] ("/url","")] |