From 513058a24eebb97a849e7eb5051e29ce2f6642c6 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 3 Sep 2019 11:28:20 -0700 Subject: XML: change toEntities to emit numerical hex character references. Previously decimal references were used. But Polyglot Markup prefers hex. See #5718. This affects the output of pandoc with `--ascii`. --- src/Text/Pandoc/XML.hs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/Text/Pandoc/XML.hs b/src/Text/Pandoc/XML.hs index cf12bf482..8d7a2720c 100644 --- a/src/Text/Pandoc/XML.hs +++ b/src/Text/Pandoc/XML.hs @@ -26,6 +26,7 @@ import Data.Text (Text) import qualified Data.Text as T import Text.HTML.TagSoup.Entity (lookupEntity, htmlEntities) import Text.DocLayout +import Text.Printf (printf) import qualified Data.Map as M import Data.String @@ -89,7 +90,7 @@ inTagsIndented tagType = inTags True tagType [] toEntities :: Text -> Text toEntities = T.concatMap go where go c | isAscii c = T.singleton c - | otherwise = T.pack ("&#" ++ show (ord c) ++ ";") + | otherwise = T.pack (printf "&#x%X;" (ord c)) -- | Escape all non-ascii characters using HTML5 entities, falling -- back to numerical entities. -- cgit v1.2.3