From 511d647290eaa19907906807a9c4514a771ea66d Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 4 Nov 2018 22:15:53 -0800 Subject: XML: toHtml5Entities: prefer shorter entities... when there are several choices for a particular character. --- src/Text/Pandoc/XML.hs | 13 ++++++++++--- test/command/ascii.md | 4 ++-- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/src/Text/Pandoc/XML.hs b/src/Text/Pandoc/XML.hs index 169951591..4c5d8d351 100644 --- a/src/Text/Pandoc/XML.hs +++ b/src/Text/Pandoc/XML.hs @@ -114,9 +114,16 @@ toHtml5Entities = T.concatMap go Nothing -> T.pack ("&#" ++ show (ord c) ++ ";") html5EntityMap :: M.Map Char Text -html5EntityMap = M.fromList [(c, T.takeWhile (/=';') (T.pack ent)) - | (ent@(_:_), [c]) <- htmlEntities - , last ent == ';'] +html5EntityMap = foldr go mempty htmlEntities + where go (ent, s) entmap = + case s of + [c] -> M.insertWith + (\new old -> if T.length new > T.length old + then old + else new) c ent' entmap + where ent' = T.takeWhile (/=';') (T.pack ent) + _ -> entmap + -- Unescapes XML entities fromEntities :: String -> String diff --git a/test/command/ascii.md b/test/command/ascii.md index d8ab5ed50..96fc50291 100644 --- a/test/command/ascii.md +++ b/test/command/ascii.md @@ -2,7 +2,7 @@ pandoc -t html --ascii äéıå ^D -

äéıå

+

äéıå

``` ``` @@ -48,6 +48,6 @@ pandoc -t jats --ascii pandoc -t markdown-smart --ascii "äéıå" ^D -“äéıå” +“äéıå” ``` -- cgit v1.2.3