aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2019-09-03 11:28:20 -0700
committerJohn MacFarlane <jgm@berkeley.edu>2019-09-03 11:28:20 -0700
commit513058a24eebb97a849e7eb5051e29ce2f6642c6 (patch)
treebf1d09f5bdc19ae447c48563e72a2a3d35777f56
parent0fe635d3ecdc362f11c380c2e0b9518aa03424e9 (diff)
downloadpandoc-513058a24eebb97a849e7eb5051e29ce2f6642c6.tar.gz
XML: change toEntities to emit numerical hex character references.
Previously decimal references were used. But Polyglot Markup prefers hex. See #5718. This affects the output of pandoc with `--ascii`.
-rw-r--r--src/Text/Pandoc/XML.hs3
-rw-r--r--test/command/ascii.md6
2 files changed, 5 insertions, 4 deletions
diff --git a/src/Text/Pandoc/XML.hs b/src/Text/Pandoc/XML.hs
index cf12bf482..8d7a2720c 100644
--- a/src/Text/Pandoc/XML.hs
+++ b/src/Text/Pandoc/XML.hs
@@ -26,6 +26,7 @@ import Data.Text (Text)
import qualified Data.Text as T
import Text.HTML.TagSoup.Entity (lookupEntity, htmlEntities)
import Text.DocLayout
+import Text.Printf (printf)
import qualified Data.Map as M
import Data.String
@@ -89,7 +90,7 @@ inTagsIndented tagType = inTags True tagType []
toEntities :: Text -> Text
toEntities = T.concatMap go
where go c | isAscii c = T.singleton c
- | otherwise = T.pack ("&#" ++ show (ord c) ++ ";")
+ | otherwise = T.pack (printf "&#x%X;" (ord c))
-- | Escape all non-ascii characters using HTML5 entities, falling
-- back to numerical entities.
diff --git a/test/command/ascii.md b/test/command/ascii.md
index 214e10f13..0826d7414 100644
--- a/test/command/ascii.md
+++ b/test/command/ascii.md
@@ -2,7 +2,7 @@
pandoc -t html --ascii
äéıå
^D
-<p>&#228;&#233;&#305;&#229;</p>
+<p>&#xE4;&#xE9;&#x131;&#xE5;</p>
```
```
@@ -33,7 +33,7 @@ pandoc -t docbook --ascii
äéıå
^D
<para>
- &#228;&#233;&#305;&#229;
+ &#xE4;&#xE9;&#x131;&#xE5;
</para>
```
@@ -41,7 +41,7 @@ pandoc -t docbook --ascii
pandoc -t jats --ascii
äéıå
^D
-<p>&#228;&#233;&#305;&#229;</p>
+<p>&#xE4;&#xE9;&#x131;&#xE5;</p>
```
```