diff options
author | John MacFarlane <jgm@berkeley.edu> | 2019-09-03 11:28:20 -0700 |
---|---|---|
committer | John MacFarlane <jgm@berkeley.edu> | 2019-09-03 11:28:20 -0700 |
commit | 513058a24eebb97a849e7eb5051e29ce2f6642c6 (patch) | |
tree | bf1d09f5bdc19ae447c48563e72a2a3d35777f56 | |
parent | 0fe635d3ecdc362f11c380c2e0b9518aa03424e9 (diff) | |
download | pandoc-513058a24eebb97a849e7eb5051e29ce2f6642c6.tar.gz |
XML: change toEntities to emit numerical hex character references.
Previously decimal references were used.
But Polyglot Markup prefers hex. See #5718.
This affects the output of pandoc with `--ascii`.
-rw-r--r-- | src/Text/Pandoc/XML.hs | 3 | ||||
-rw-r--r-- | test/command/ascii.md | 6 |
2 files changed, 5 insertions, 4 deletions
diff --git a/src/Text/Pandoc/XML.hs b/src/Text/Pandoc/XML.hs index cf12bf482..8d7a2720c 100644 --- a/src/Text/Pandoc/XML.hs +++ b/src/Text/Pandoc/XML.hs @@ -26,6 +26,7 @@ import Data.Text (Text) import qualified Data.Text as T import Text.HTML.TagSoup.Entity (lookupEntity, htmlEntities) import Text.DocLayout +import Text.Printf (printf) import qualified Data.Map as M import Data.String @@ -89,7 +90,7 @@ inTagsIndented tagType = inTags True tagType [] toEntities :: Text -> Text toEntities = T.concatMap go where go c | isAscii c = T.singleton c - | otherwise = T.pack ("&#" ++ show (ord c) ++ ";") + | otherwise = T.pack (printf "&#x%X;" (ord c)) -- | Escape all non-ascii characters using HTML5 entities, falling -- back to numerical entities. diff --git a/test/command/ascii.md b/test/command/ascii.md index 214e10f13..0826d7414 100644 --- a/test/command/ascii.md +++ b/test/command/ascii.md @@ -2,7 +2,7 @@ pandoc -t html --ascii äéıå ^D -<p>äéıå</p> +<p>äéıå</p> ``` ``` @@ -33,7 +33,7 @@ pandoc -t docbook --ascii äéıå ^D <para> - äéıå + äéıå </para> ``` @@ -41,7 +41,7 @@ pandoc -t docbook --ascii pandoc -t jats --ascii äéıå ^D -<p>äéıå</p> +<p>äéıå</p> ``` ``` |