aboutsummaryrefslogtreecommitdiff
path: root/src/Text
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2015-03-29 09:24:58 -0700
committerJohn MacFarlane <jgm@berkeley.edu>2015-03-29 09:24:58 -0700
commitef07974dc47a834ed3b0f2b7eccbd79bc8c9a9ed (patch)
treef90bd525d9da6e5176959fbf1f5d9be80c5115db /src/Text
parentaa49deceaa71ba902a2277451f7b6cd3c8344417 (diff)
parentf3aa03ee8609f26cfc3fd250a957fe4376458d97 (diff)
downloadpandoc-ef07974dc47a834ed3b0f2b7eccbd79bc8c9a9ed.tar.gz
Merge pull request #2039 from mpickering/7.10
Docx Writer: Filter out illegal XML characters
Diffstat (limited to 'src/Text')
-rw-r--r--src/Text/Pandoc/Writers/Docx.hs21
1 files changed, 19 insertions, 2 deletions
diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index 81369e278..d96faaf86 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -1,4 +1,4 @@
-{-# LANGUAGE ScopedTypeVariables, PatternGuards #-}
+{-# LANGUAGE ScopedTypeVariables, PatternGuards, ViewPatterns #-}
{-
Copyright (C) 2012-2014 John MacFarlane <jgm@berkeley.edu>
@@ -66,6 +66,7 @@ import Text.Pandoc.MIME (MimeType, getMimeType, getMimeTypeDef,
extensionFromMimeType)
import Control.Applicative ((<$>), (<|>), (<*>))
import Data.Maybe (fromMaybe, mapMaybe, maybeToList)
+import Data.Char (ord)
data ListMarker = NoMarker
| BulletMarker
@@ -176,13 +177,29 @@ renumId f renumMap e
renumIds :: (QName -> Bool) -> (M.Map String String) -> [Element] -> [Element]
renumIds f renumMap = map (renumId f renumMap)
+-- | Certain characters are invalid in XML even if escaped.
+-- See #1992
+stripInvalidChars :: Pandoc -> Pandoc
+stripInvalidChars = bottomUp (filter isValidChar)
+
+-- | See XML reference
+isValidChar :: Char -> Bool
+isValidChar (ord -> c)
+ | c == 0x9 = True
+ | c == 0xA = True
+ | c == 0xD = True
+ | 0x20 <= c && c <= 0xD7FF = True
+ | 0xE000 <= c && c <= 0xFFFD = True
+ | 0x10000 <= c && c <= 0x10FFFF = True
+ | otherwise = False
+
-- | Produce an Docx file from a Pandoc document.
writeDocx :: WriterOptions -- ^ Writer options
-> Pandoc -- ^ Document to convert
-> IO BL.ByteString
writeDocx opts doc@(Pandoc meta _) = do
let datadir = writerUserDataDir opts
- let doc' = walk fixDisplayMath doc
+ let doc' = stripInvalidChars . walk fixDisplayMath $ doc
username <- lookup "USERNAME" <$> getEnvironment
utctime <- getCurrentTime
refArchive <- liftM (toArchive . toLazy) $