aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2018-12-04 09:24:15 -0800
committerJohn MacFarlane <jgm@berkeley.edu>2018-12-04 09:24:15 -0800
commit38200c0291907fe0e5216b487677c64c90151d0a (patch)
tree90423afae120444fef32a9940de22699ffe060f9
parent48115fcf1a9469e836bbe5b0b6405dda3cfa86cc (diff)
downloadpandoc-38200c0291907fe0e5216b487677c64c90151d0a.tar.gz
Strip out illegal XML characters in escapeXMLString.
Closes #5119.
-rw-r--r--src/Text/Pandoc/XML.hs7
-rw-r--r--test/command/5119.md9
2 files changed, 15 insertions, 1 deletions
diff --git a/src/Text/Pandoc/XML.hs b/src/Text/Pandoc/XML.hs
index c46938d73..a96a63647 100644
--- a/src/Text/Pandoc/XML.hs
+++ b/src/Text/Pandoc/XML.hs
@@ -57,7 +57,12 @@ escapeCharForXML x = case x of
-- | Escape string as needed for XML. Entity references are not preserved.
escapeStringForXML :: String -> String
-escapeStringForXML = concatMap escapeCharForXML
+escapeStringForXML = concatMap escapeCharForXML . filter isLegalXMLChar
+ where isLegalXMLChar c = c == '\t' || c == '\n' || c == '\r' ||
+ (c >= '\x20' && c <= '\xD7FF') ||
+ (c >= '\xE000' && c <= '\xFFFD') ||
+ (c >= '\x10000' && c <= '\x10FFFF')
+ -- see https://www.w3.org/TR/xml/#charsets
-- | Escape newline characters as &#10;
escapeNls :: String -> String
diff --git a/test/command/5119.md b/test/command/5119.md
new file mode 100644
index 000000000..a816e0590
--- /dev/null
+++ b/test/command/5119.md
@@ -0,0 +1,9 @@
+```
+% pandoc -t docbook
+h&#x4;i
+^D
+<para>
+ hi
+</para>
+```
+