From 7199d68ba078148ff76a38f2c483da73edd62747 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 3 Dec 2020 21:39:19 -0800 Subject: EPUB writer: include title page in landmarks. Closes #6919. Note that the toc is also included if `--toc` is specified. --- test/command/5986.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'test/command') diff --git a/test/command/5986.md b/test/command/5986.md index e24aebb64..ea0ca70c1 100644 --- a/test/command/5986.md +++ b/test/command/5986.md @@ -6,7 +6,8 @@

-- cgit v1.2.3 From dc3ef5201f9531bc405ac07e763d9f004bb6bc91 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 4 Dec 2020 10:55:48 -0800 Subject: Markdown writer: ensure that a new csl-block begins on a new line. This just looks better and doesn't affect the semantics. See #6921. --- src/Text/Pandoc/Writers/Markdown.hs | 7 ++++++- test/command/pandoc-citeproc-53.md | 6 ++++-- 2 files changed, 10 insertions(+), 3 deletions(-) (limited to 'test/command') diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index 6aec6b244..5eb47b261 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -1058,7 +1058,12 @@ inlineToMarkdown opts (Span ("",["emoji"],kvs) [Str s]) = inlineToMarkdown opts (Span attrs ils) = do variant <- asks envVariant contents <- inlineListToMarkdown opts ils - return $ case variant of + return $ case attrs of + (_,["csl-block"],_) -> (cr <>) + (_,["csl-left-margin"],_) -> (cr <>) + (_,["csl-indent"],_) -> (cr <>) + _ -> id + $ case variant of PlainText -> contents _ | attrs == nullAttr -> contents | isEnabled Ext_bracketed_spans opts -> diff --git a/test/command/pandoc-citeproc-53.md b/test/command/pandoc-citeproc-53.md index 295f52049..fb8d5c35e 100644 --- a/test/command/pandoc-citeproc-53.md +++ b/test/command/pandoc-citeproc-53.md @@ -28,11 +28,13 @@ Doe[^1] Doe[^2] Roe[^3] Roe[^4] Doe[^5] Doe[^6] Roe[^7] Roe[^8] ::: {#refs .references .csl-bib-body} ::: {#ref-a .csl-entry} -[[Doe J.]{.smallcaps} ]{.csl-block}[2000, *Work A*,.]{.csl-left-margin} +[[Doe J.]{.smallcaps} ]{.csl-block} +[2000, *Work A*,.]{.csl-left-margin} ::: ::: {#ref-b .csl-entry} -[[Roe J.]{.smallcaps} ]{.csl-block}[1990, *Work B*,.]{.csl-left-margin} +[[Roe J.]{.smallcaps} ]{.csl-block} +[1990, *Work B*,.]{.csl-left-margin} ::: ::: -- cgit v1.2.3 From ddb76cb356a82f6a9e51a6f3626dd154816e9205 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 5 Dec 2020 09:53:39 -0800 Subject: LaTeX reader: don't apply theorem default styling to a figure inside. If we put an image in italics, then when rendering to Markdown we no longer get an implicit figure. Closes #6925. --- src/Text/Pandoc/Readers/LaTeX.hs | 1 + test/command/6925.md | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+) create mode 100644 test/command/6925.md (limited to 'test/command') diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 15a1a19fc..afe960454 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -1812,6 +1812,7 @@ theoremEnvironment name = do _ -> bs italicize :: Block -> Block +italicize x@(Para [Image{}]) = x -- see #6925 italicize (Para ils) = Para [Emph ils] italicize (Plain ils) = Plain [Emph ils] italicize x = x diff --git a/test/command/6925.md b/test/command/6925.md new file mode 100644 index 000000000..458a0b91d --- /dev/null +++ b/test/command/6925.md @@ -0,0 +1,34 @@ +``` +% pandoc -f latex -t markdown +\documentclass{amsart} +\newtheorem{thm}{Theorem}[section] +\theoremstyle{definition} +\newtheorem{thm2}[section]{Theorem} +\begin{document} +\begin{thm} +a +\begin{figure} +\includegraphics[]{1.png} +\end{figure} +\end{thm} + +\begin{thm2} +a +\begin{figure} +\includegraphics[]{1.png} +\end{figure} +\end{thm2} +\end{document} +^D +::: {.thm} +**Theorem 1**. *a* + +![image](1.png) +::: + +::: {.thm2} +**Theorem 1**. a + +![image](1.png) +::: +``` -- cgit v1.2.3 From c161893f442a3e001b64af1421e9f62376d71c92 Mon Sep 17 00:00:00 2001 From: Nils Carlson Date: Sat, 5 Dec 2020 18:00:04 +0000 Subject: OpenDocument writer: Allow references for internal links (#6774) This commit adds two extensions to the OpenDocument writer, `xrefs_name` and `xrefs_number`. Links to headings, figures and tables inside the document are substituted with cross-references that will use the name or caption of the referenced item for `xrefs_name` or the number for `xrefs_number`. For the `xrefs_number` to be useful heading numbers must be enabled in the generated document and table and figure captions must be enabled using for example the `native_numbering` extension. In order for numbers and reference text to be updated the generated document must be refreshed. Co-authored-by: Nils Carlson --- MANUAL.txt | 37 ++++++++++++++ src/Text/Pandoc/Extensions.hs | 4 ++ src/Text/Pandoc/Writers/OpenDocument.hs | 91 ++++++++++++++++++++++++++------- test/command/6774.md | 63 +++++++++++++++++++++++ 4 files changed, 177 insertions(+), 18 deletions(-) create mode 100644 test/command/6774.md (limited to 'test/command') diff --git a/MANUAL.txt b/MANUAL.txt index fad885fed..4e1615ff2 100644 --- a/MANUAL.txt +++ b/MANUAL.txt @@ -3027,6 +3027,43 @@ This extension can be enabled/disabled for the following formats: output formats : `odt`, `opendocument` +#### Extension: `xrefs_name` #### + +Links to headings, figures and tables inside the document are +substituted with cross-references that will use the name or caption +of the referenced item. The original link text is replaced once +the generated document is refreshed. This extension can be combined +with `xrefs_number` in which case numbers will appear before the +name. + +Text in cross-references is only made consistent with the referenced +item once the document has been refreshed. + +This extension can be enabled/disabled for the following formats: + +output formats +: `odt`, `opendocument` + +#### Extension: `xrefs_number` #### + +Links to headings, figures and tables inside the document are +substituted with cross-references that will use the number +of the referenced item. The original link text is discarded. +This extension can be combined with `xrefs_name` in which case +the name or caption numbers will appear after the number. + +For the `xrefs_number` to be useful heading numbers must be enabled +in the generated document, also table and figure captions must be enabled +using for example the `native_numbering` extension. + +Numbers in cross-references are only visible in the final document once +it has been refreshed. + +This extension can be enabled/disabled for the following formats: + +output formats +: `odt`, `opendocument` + #### Extension: `styles` #### {#ext-styles} When converting from docx, read all docx styles as divs (for diff --git a/src/Text/Pandoc/Extensions.hs b/src/Text/Pandoc/Extensions.hs index 646f7abfb..a94e24e2c 100644 --- a/src/Text/Pandoc/Extensions.hs +++ b/src/Text/Pandoc/Extensions.hs @@ -149,6 +149,8 @@ data Extension = | Ext_tex_math_dollars -- ^ TeX math between $..$ or $$..$$ | Ext_tex_math_double_backslash -- ^ TeX math btw \\(..\\) \\[..\\] | Ext_tex_math_single_backslash -- ^ TeX math btw \(..\) \[..\] + | Ext_xrefs_name -- ^ Use xrefs with names + | Ext_xrefs_number -- ^ Use xrefs with numbers | Ext_yaml_metadata_block -- ^ YAML metadata block | Ext_gutenberg -- ^ Use Project Gutenberg conventions for plain | Ext_attributes -- ^ Generic attribute syntax @@ -465,6 +467,8 @@ getAllExtensions f = universalExtensions <> getAll f getAll "opendocument" = extensionsFromList [ Ext_empty_paragraphs , Ext_native_numbering + , Ext_xrefs_name + , Ext_xrefs_number ] getAll "odt" = getAll "opendocument" <> autoIdExtensions getAll "muse" = autoIdExtensions <> diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs index 071a5542f..cf42f2228 100644 --- a/src/Text/Pandoc/Writers/OpenDocument.hs +++ b/src/Text/Pandoc/Writers/OpenDocument.hs @@ -17,6 +17,7 @@ module Text.Pandoc.Writers.OpenDocument ( writeOpenDocument ) where import Control.Arrow ((***), (>>>)) import Control.Monad.State.Strict hiding (when) import Data.Char (chr) +import Data.Foldable (find) import Data.List (sortOn, sortBy, foldl') import qualified Data.Map as Map import Data.Maybe (fromMaybe, isNothing) @@ -35,6 +36,7 @@ import Text.DocLayout import Text.Pandoc.Shared (linesToPara, tshow, blocksToInlines) import Text.Pandoc.Templates (renderTemplate) import qualified Text.Pandoc.Translations as Term (Term(Figure, Table)) +import Text.Pandoc.Walk import Text.Pandoc.Writers.Math import Text.Pandoc.Writers.Shared import qualified Text.Pandoc.Writers.AnnotatedTable as Ann @@ -54,6 +56,11 @@ plainToPara x = x type OD m = StateT WriterState m +data ReferenceType + = HeaderRef + | TableRef + | ImageRef + data WriterState = WriterState { stNotes :: [Doc Text] , stTableStyles :: [Doc Text] @@ -69,6 +76,7 @@ data WriterState = , stImageId :: Int , stTableCaptionId :: Int , stImageCaptionId :: Int + , stIdentTypes :: [(Text,ReferenceType)] } defaultWriterState :: WriterState @@ -86,6 +94,7 @@ defaultWriterState = , stImageId = 1 , stTableCaptionId = 1 , stImageCaptionId = 1 + , stIdentTypes = [] } when :: Bool -> Doc Text -> Doc Text @@ -243,6 +252,12 @@ writeOpenDocument opts (Pandoc meta blocks) = do meta ((body, metadata),s) <- flip runStateT defaultWriterState $ do + let collectInlineIdent (Image (ident,_,_) _ _) = [(ident,ImageRef)] + collectInlineIdent _ = [] + let collectBlockIdent (Header _ (ident,_,_) _) = [(ident,HeaderRef)] + collectBlockIdent (Table (ident,_,_) _ _ _ _ _) = [(ident,TableRef)] + collectBlockIdent _ = [] + modify $ \s -> s{ stIdentTypes = query collectBlockIdent blocks ++ query collectInlineIdent blocks } m <- metaToContext opts (blocksToOpenDocument opts) (fmap chomp . inlinesToOpenDocument opts) @@ -411,7 +426,7 @@ blockToOpenDocument o bs inTags True "text:list" [ ("text:style-name", "L" <> tshow ln)] <$> orderedListToOpenDocument o pn b table :: PandocMonad m => Ann.Table -> OD m (Doc Text) - table (Ann.Table _ (Caption _ c) colspecs thead tbodies _) = do + table (Ann.Table (ident, _, _) (Caption _ c) colspecs thead tbodies _) = do tn <- length <$> gets stTableStyles pn <- length <$> gets stParaStyles let genIds = map chr [65..] @@ -433,7 +448,7 @@ blockToOpenDocument o bs then return empty else inlinesToOpenDocument o (blocksToInlines c) >>= if isEnabled Ext_native_numbering o - then numberedTableCaption + then numberedTableCaption ident else unNumberedCaption "TableCaption" th <- colHeadsToOpenDocument o (map fst paraHStyles) thead tr <- mapM (tableBodyToOpenDocument o (map fst paraStyles)) tbodies @@ -442,36 +457,39 @@ blockToOpenDocument o bs , ("table:style-name", name) ] (vcat columns $$ th $$ vcat tr) return $ captionDoc $$ tableDoc - figure attr caption source title | null caption = + figure attr@(ident, _, _) caption source title | null caption = withParagraphStyle o "Figure" [Para [Image attr caption (source,title)]] | otherwise = do imageDoc <- withParagraphStyle o "FigureWithCaption" [Para [Image attr caption (source,title)]] captionDoc <- inlinesToOpenDocument o caption >>= if isEnabled Ext_native_numbering o - then numberedFigureCaption + then numberedFigureCaption ident else unNumberedCaption "FigureCaption" return $ imageDoc $$ captionDoc -numberedTableCaption :: PandocMonad m => Doc Text -> OD m (Doc Text) -numberedTableCaption caption = do +numberedTableCaption :: PandocMonad m => Text -> Doc Text -> OD m (Doc Text) +numberedTableCaption ident caption = do id' <- gets stTableCaptionId modify (\st -> st{ stTableCaptionId = id' + 1 }) capterm <- translateTerm Term.Table - return $ numberedCaption "TableCaption" capterm "Table" id' caption + return $ numberedCaption "TableCaption" capterm "Table" id' ident caption -numberedFigureCaption :: PandocMonad m => Doc Text -> OD m (Doc Text) -numberedFigureCaption caption = do +numberedFigureCaption :: PandocMonad m => Text -> Doc Text -> OD m (Doc Text) +numberedFigureCaption ident caption = do id' <- gets stImageCaptionId modify (\st -> st{ stImageCaptionId = id' + 1 }) capterm <- translateTerm Term.Figure - return $ numberedCaption "FigureCaption" capterm "Illustration" id' caption + return $ numberedCaption "FigureCaption" capterm "Illustration" id' ident caption -numberedCaption :: Text -> Text -> Text -> Int -> Doc Text -> Doc Text -numberedCaption style term name num caption = +numberedCaption :: Text -> Text -> Text -> Int -> Text -> Doc Text -> Doc Text +numberedCaption style term name num ident caption = let t = text $ T.unpack term r = num - 1 - s = inTags False "text:sequence" [ ("text:ref-name", "ref" <> name <> tshow r), + ident' = case ident of + "" -> "ref" <> name <> tshow r + _ -> ident + s = inTags False "text:sequence" [ ("text:ref-name", ident'), ("text:name", name), ("text:formula", "ooow:" <> name <> "+1"), ("style:num-format", "1") ] $ text $ show num @@ -607,7 +625,9 @@ inlineToOpenDocument o ils else do report $ InlineNotRendered ils return empty - Link _ l (s,t) -> mkLink s t <$> inlinesToOpenDocument o l + Link _ l (s,t) -> do + identTypes <- gets stIdentTypes + mkLink o identTypes s t <$> inlinesToOpenDocument o l Image attr _ (s,t) -> mkImg attr s t Note l -> mkNote l where @@ -619,10 +639,6 @@ inlineToOpenDocument o ils unhighlighted s = inlinedCode $ preformatted s preformatted s = handleSpaces $ escapeStringForXML s inlinedCode s = return $ inTags False "text:span" [("text:style-name", "Source_Text")] s - mkLink s t = inTags False "text:a" [ ("xlink:type" , "simple") - , ("xlink:href" , s ) - , ("office:name", t ) - ] . inSpanTags "Definition" mkImg (_, _, kvs) s _ = do id' <- gets stImageId modify (\st -> st{ stImageId = id' + 1 }) @@ -659,6 +675,45 @@ inlineToOpenDocument o ils addNote nn return nn +mkLink :: WriterOptions -> [(Text,ReferenceType)] -> Text -> Text -> Doc Text -> Doc Text +mkLink o identTypes s t d = + let maybeIdentAndType = case T.uncons s of + Just ('#', ident) -> find ((ident ==) . fst) identTypes + _ -> Nothing + d' = inSpanTags "Definition" d + ref refType format ident = inTags False refType + [ ("text:reference-format", format ), + ("text:ref-name", ident) ] + inlineSpace = selfClosingTag "text:s" [] + bookmarkRef = ref "text:bookmark-ref" + bookmarkRefNumber ident = bookmarkRef "number" ident mempty + bookmarkRefName ident = bookmarkRef "text" ident d + bookmarkRefNameNumber ident = bookmarkRefNumber ident <> inlineSpace <> bookmarkRefName ident + bookmarkRef' + | isEnabled Ext_xrefs_number o && isEnabled Ext_xrefs_name o = bookmarkRefNameNumber + | isEnabled Ext_xrefs_name o = bookmarkRefName + | otherwise = bookmarkRefNumber + sequenceRef = ref "text:sequence-ref" + sequenceRefNumber ident = sequenceRef "value" ident mempty + sequenceRefName ident = sequenceRef "caption" ident d + sequenceRefNameNumber ident = sequenceRefNumber ident <> inlineSpace <> sequenceRefName ident + sequenceRef' + | isEnabled Ext_xrefs_number o && isEnabled Ext_xrefs_name o = sequenceRefNameNumber + | isEnabled Ext_xrefs_name o = sequenceRefName + | otherwise = sequenceRefNumber + link = inTags False "text:a" [ ("xlink:type" , "simple") + , ("xlink:href" , s ) + , ("office:name", t ) + ] d' + linkOrReference = case maybeIdentAndType of + Just (ident, HeaderRef) -> bookmarkRef' ident + Just (ident, TableRef) -> sequenceRef' ident + Just (ident, ImageRef) -> sequenceRef' ident + _ -> link + in if isEnabled Ext_xrefs_name o || isEnabled Ext_xrefs_number o + then linkOrReference + else link + bulletListStyle :: PandocMonad m => Int -> OD m (Int,(Int,[Doc Text])) bulletListStyle l = do let doStyles i = inTags True "text:list-level-style-bullet" diff --git a/test/command/6774.md b/test/command/6774.md new file mode 100644 index 000000000..66549c0f2 --- /dev/null +++ b/test/command/6774.md @@ -0,0 +1,63 @@ +``` +% pandoc -f native -t opendocument --quiet +[Header 1 ("chapter1",[],[]) [Str "The",Space,Str "Chapter"] +,Para [Str "Chapter",Space,Str "1",Space,Str "references",Space,Link ("",[],[]) [Str "The",Space,Str "Chapter"] ("#chapter1","")]] +^D +The +Chapter +Chapter 1 references +The +Chapter +``` +``` +% pandoc -f native -t opendocument+xrefs_name --quiet +[Header 1 ("chapter1",[],[]) [Str "The",Space,Str "Chapter"] +,Para [Str "Chapter",Space,Str "1",Space,Str "references",Space,Link ("",[],[]) [Str "The",Space,Str "Chapter"] ("#chapter1","")] +,Para [Image ("lalune",[],[]) [Str "lalune"] ("lalune.jpg","fig:Voyage dans la Lune")] +,Para [Str "Image",Space,Str "1",Space,Str "references",Space,Link ("",[],[]) [Str "La",Space,Str "Lune"] ("#lalune","")]] +^D +The +Chapter +Chapter 1 references +The +Chapter + +lalune +Image 1 references +La +Lune +``` +``` +% pandoc -f native -t opendocument+xrefs_number --quiet +[Header 1 ("chapter1",[],[]) [Str "The",Space,Str "Chapter"] +,Para [Str "Chapter",Space,Str "1",Space,Str "references",Space,Link ("",[],[]) [Str "The",Space,Str "Chapter"] ("#chapter1","")] +,Para [Image ("lalune",[],[]) [Str "lalune"] ("lalune.jpg","fig:Voyage dans la Lune")] +,Para [Str "Image",Space,Str "1",Space,Str "references",Space,Link ("",[],[]) [Str "La",Space,Str "Lune"] ("#lalune","")]] +^D +The +Chapter +Chapter 1 references + + +lalune +Image 1 references + +``` +``` +% pandoc -f native -t opendocument+xrefs_number+xrefs_name --quiet +[Header 1 ("chapter1",[],[]) [Str "The",Space,Str "Chapter"] +,Para [Str "Chapter",Space,Str "1",Space,Str "references",Space,Link ("",[],[]) [Str "The",Space,Str "Chapter"] ("#chapter1","")] +,Para [Image ("lalune",[],[]) [Str "lalune"] ("lalune.jpg","fig:Voyage dans la Lune")] +,Para [Str "Image",Space,Str "1",Space,Str "references",Space,Link ("",[],[]) [Str "La",Space,Str "Lune"] ("#lalune","")]] +^D +The +Chapter +Chapter 1 references +The +Chapter + +lalune +Image 1 references +La +Lune +``` -- cgit v1.2.3 From 0a502e5ff52b251bbf3da69fd1f9a88d5e0fe92c Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 10 Dec 2020 15:44:10 -0800 Subject: HTML reader: retain attribute prefixes and avoid duplicates. Previously we stripped attribute prefixes, reading `xml:lang` as `lang` for example. This resulted in two duplicate `lang` attributes when `xml:lang` and `lang` were both used. This commit causes the prefixes to be retained, and also avoids invald duplicate attributes. Closes #6938. --- src/Text/Pandoc/Readers/HTML.hs | 28 +++++++++++----------------- src/Text/Pandoc/Readers/HTML/Parsing.hs | 20 +++++++++++++------- test/command/5986.md | 2 +- test/epub/wasteland.native | 8 ++++---- 4 files changed, 29 insertions(+), 29 deletions(-) (limited to 'test/command') diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index eb78979a3..f870a241d 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -74,7 +74,7 @@ readHtml :: PandocMonad m -> Text -- ^ String to parse (assumes @'\n'@ line endings) -> m Pandoc readHtml opts inp = do - let tags = stripPrefixes . canonicalizeTags $ + let tags = stripPrefixes $ canonicalizeTags $ parseTagsOptions parseOptions{ optTagPosition = True } (crFilter inp) parseDoc = do @@ -95,6 +95,15 @@ readHtml opts inp = do Right doc -> return doc Left err -> throwError $ PandocParseError $ T.pack $ getError err +-- Strip namespace prefixes on tags (not attributes) +stripPrefixes :: [Tag Text] -> [Tag Text] +stripPrefixes = map stripPrefix + +stripPrefix :: Tag Text -> Tag Text +stripPrefix (TagOpen s as) = TagOpen (T.takeWhileEnd (/=':') s) as +stripPrefix (TagClose s) = TagClose (T.takeWhileEnd (/=':') s) +stripPrefix x = x + replaceNotes :: PandocMonad m => [Block] -> TagParser m [Block] replaceNotes bs = do st <- getState @@ -114,7 +123,7 @@ setInPlain = local (\s -> s {inPlain = True}) pHtml :: PandocMonad m => TagParser m Blocks pHtml = try $ do (TagOpen "html" attr) <- lookAhead pAny - for_ (lookup "lang" attr) $ + for_ (lookup "lang" attr <|> lookup "xml:lang" attr) $ updateState . B.setMeta "lang" . B.text pInTags "html" block @@ -1024,21 +1033,6 @@ htmlTag f = try $ do handleTag tagname _ -> mzero --- Strip namespace prefixes -stripPrefixes :: [Tag Text] -> [Tag Text] -stripPrefixes = map stripPrefix - -stripPrefix :: Tag Text -> Tag Text -stripPrefix (TagOpen s as) = - TagOpen (stripPrefix' s) (map (first stripPrefix') as) -stripPrefix (TagClose s) = TagClose (stripPrefix' s) -stripPrefix x = x - -stripPrefix' :: Text -> Text -stripPrefix' s = - if T.null t then s else T.drop 1 t - where (_, t) = T.span (/= ':') s - -- Utilities -- | Adjusts a url according to the document's base URL. diff --git a/src/Text/Pandoc/Readers/HTML/Parsing.hs b/src/Text/Pandoc/Readers/HTML/Parsing.hs index 2d58319da..e28ebe77b 100644 --- a/src/Text/Pandoc/Readers/HTML/Parsing.hs +++ b/src/Text/Pandoc/Readers/HTML/Parsing.hs @@ -193,14 +193,20 @@ t1 `closes` t2 | _ `closes` _ = False toStringAttr :: [(Text, Text)] -> [(Text, Text)] -toStringAttr = map go +toStringAttr = foldr go [] where - go (x,y) = - case T.stripPrefix "data-" x of - Just x' | x' `Set.notMember` (html5Attributes <> - html4Attributes <> rdfaAttributes) - -> (x',y) - _ -> (x,y) + go :: (Text, Text) -> [(Text, Text)] -> [(Text, Text)] + -- treat xml:lang as lang + go ("xml:lang",y) ats = go ("lang",y) ats + -- prevent duplicate attributes + go (x,y) ats + | any (\(x',_) -> x == x') ats = ats + | otherwise = + case T.stripPrefix "data-" x of + Just x' | x' `Set.notMember` (html5Attributes <> + html4Attributes <> rdfaAttributes) + -> go (x',y) ats + _ -> (x,y):ats -- Unlike fromAttrib from tagsoup, this distinguishes -- between a missing attribute and an attribute with empty content. diff --git a/test/command/5986.md b/test/command/5986.md index ea0ca70c1..ed8dd30c9 100644 --- a/test/command/5986.md +++ b/test/command/5986.md @@ -4,7 +4,7 @@ ^D

-