aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2018-11-04 22:15:53 -0800
committerJohn MacFarlane <jgm@berkeley.edu>2018-11-04 22:15:53 -0800
commit511d647290eaa19907906807a9c4514a771ea66d (patch)
tree4a53e48dbcc1e80038124e14f68d21ad78788a94
parent0c84630549c4b452d2eb5a3d82df5fc62ca593e6 (diff)
downloadpandoc-511d647290eaa19907906807a9c4514a771ea66d.tar.gz
XML: toHtml5Entities: prefer shorter entities...
when there are several choices for a particular character.
-rw-r--r--src/Text/Pandoc/XML.hs13
-rw-r--r--test/command/ascii.md4
2 files changed, 12 insertions, 5 deletions
diff --git a/src/Text/Pandoc/XML.hs b/src/Text/Pandoc/XML.hs
index 169951591..4c5d8d351 100644
--- a/src/Text/Pandoc/XML.hs
+++ b/src/Text/Pandoc/XML.hs
@@ -114,9 +114,16 @@ toHtml5Entities = T.concatMap go
Nothing -> T.pack ("&#" ++ show (ord c) ++ ";")
html5EntityMap :: M.Map Char Text
-html5EntityMap = M.fromList [(c, T.takeWhile (/=';') (T.pack ent))
- | (ent@(_:_), [c]) <- htmlEntities
- , last ent == ';']
+html5EntityMap = foldr go mempty htmlEntities
+ where go (ent, s) entmap =
+ case s of
+ [c] -> M.insertWith
+ (\new old -> if T.length new > T.length old
+ then old
+ else new) c ent' entmap
+ where ent' = T.takeWhile (/=';') (T.pack ent)
+ _ -> entmap
+
-- Unescapes XML entities
fromEntities :: String -> String
diff --git a/test/command/ascii.md b/test/command/ascii.md
index d8ab5ed50..96fc50291 100644
--- a/test/command/ascii.md
+++ b/test/command/ascii.md
@@ -2,7 +2,7 @@
pandoc -t html --ascii
äéıå
^D
-<p>&auml;&eacute;&inodot;&aring;</p>
+<p>&auml;&eacute;&imath;&aring;</p>
```
```
@@ -48,6 +48,6 @@ pandoc -t jats --ascii
pandoc -t markdown-smart --ascii
"äéıå"
^D
-&ldquo;&auml;&eacute;&inodot;&aring;&rdquo;
+&ldquo;&auml;&eacute;&imath;&aring;&rdquo;
```