From 1843a8793a9043a45c8c427b06f100461889d7ef Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Mon, 17 May 2021 18:08:02 +0200 Subject: HTML writer: keep attributes from code nested below pre tag. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If a code block is defined with `
`, where the `
` element has no
attributes, then the attributes from the `` element are used
instead. Any leading `language-` prefix is dropped in the code's *class*
attribute are dropped to improve syntax highlighting.

Closes: #7221
---
 src/Text/Pandoc/Readers/HTML.hs | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 0a9e4addf..fc4575f2d 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -557,7 +557,18 @@ pFigure = try $ do
 pCodeBlock :: PandocMonad m => TagParser m Blocks
 pCodeBlock = try $ do
   TagOpen _ attr' <- pSatisfy (matchTagOpen "pre" [])
-  let attr = toAttr attr'
+  -- if the `pre` has no attributes, try if it is followed by a `code`
+  -- element and use those attributes if possible.
+  attr <- case attr' of
+    _:_ -> pure (toAttr attr')
+    []  -> option nullAttr $ do
+      TagOpen _ codeAttr <- pSatisfy (matchTagOpen "code" [])
+      pure $ toAttr
+        [ (k, v') | (k, v) <- codeAttr
+                    -- strip language from class
+                  , let v' = if k == "class"
+                             then fromMaybe v (T.stripPrefix "language-" v)
+                             else v ]
   contents <- manyTill pAny (pCloses "pre" <|> eof)
   let rawText = T.concat $ map tagToText contents
   -- drop leading newline if any
-- 
cgit v1.2.3