From 38200c0291907fe0e5216b487677c64c90151d0a Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 4 Dec 2018 09:24:15 -0800 Subject: Strip out illegal XML characters in escapeXMLString. Closes #5119. --- src/Text/Pandoc/XML.hs | 7 ++++++- test/command/5119.md | 9 +++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) create mode 100644 test/command/5119.md diff --git a/src/Text/Pandoc/XML.hs b/src/Text/Pandoc/XML.hs index c46938d73..a96a63647 100644 --- a/src/Text/Pandoc/XML.hs +++ b/src/Text/Pandoc/XML.hs @@ -57,7 +57,12 @@ escapeCharForXML x = case x of -- | Escape string as needed for XML. Entity references are not preserved. escapeStringForXML :: String -> String -escapeStringForXML = concatMap escapeCharForXML +escapeStringForXML = concatMap escapeCharForXML . filter isLegalXMLChar + where isLegalXMLChar c = c == '\t' || c == '\n' || c == '\r' || + (c >= '\x20' && c <= '\xD7FF') || + (c >= '\xE000' && c <= '\xFFFD') || + (c >= '\x10000' && c <= '\x10FFFF') + -- see https://www.w3.org/TR/xml/#charsets -- | Escape newline characters as escapeNls :: String -> String diff --git a/test/command/5119.md b/test/command/5119.md new file mode 100644 index 000000000..a816e0590 --- /dev/null +++ b/test/command/5119.md @@ -0,0 +1,9 @@ +``` +% pandoc -t docbook +hi +^D + + hi + +``` + -- cgit v1.2.3