aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2018-12-04 09:24:15 -0800
committerJohn MacFarlane <jgm@berkeley.edu>2018-12-04 09:24:15 -0800
commit38200c0291907fe0e5216b487677c64c90151d0a (patch)
tree90423afae120444fef32a9940de22699ffe060f9 /src
parent48115fcf1a9469e836bbe5b0b6405dda3cfa86cc (diff)
downloadpandoc-38200c0291907fe0e5216b487677c64c90151d0a.tar.gz
Strip out illegal XML characters in escapeXMLString.
Closes #5119.
Diffstat (limited to 'src')
-rw-r--r--src/Text/Pandoc/XML.hs7
1 files changed, 6 insertions, 1 deletions
diff --git a/src/Text/Pandoc/XML.hs b/src/Text/Pandoc/XML.hs
index c46938d73..a96a63647 100644
--- a/src/Text/Pandoc/XML.hs
+++ b/src/Text/Pandoc/XML.hs
@@ -57,7 +57,12 @@ escapeCharForXML x = case x of
-- | Escape string as needed for XML. Entity references are not preserved.
escapeStringForXML :: String -> String
-escapeStringForXML = concatMap escapeCharForXML
+escapeStringForXML = concatMap escapeCharForXML . filter isLegalXMLChar
+ where isLegalXMLChar c = c == '\t' || c == '\n' || c == '\r' ||
+ (c >= '\x20' && c <= '\xD7FF') ||
+ (c >= '\xE000' && c <= '\xFFFD') ||
+ (c >= '\x10000' && c <= '\x10FFFF')
+ -- see https://www.w3.org/TR/xml/#charsets
-- | Escape newline characters as &#10;
escapeNls :: String -> String