diff options
| author | John MacFarlane <jgm@berkeley.edu> | 2015-03-29 09:24:58 -0700 | 
|---|---|---|
| committer | John MacFarlane <jgm@berkeley.edu> | 2015-03-29 09:24:58 -0700 | 
| commit | ef07974dc47a834ed3b0f2b7eccbd79bc8c9a9ed (patch) | |
| tree | f90bd525d9da6e5176959fbf1f5d9be80c5115db | |
| parent | aa49deceaa71ba902a2277451f7b6cd3c8344417 (diff) | |
| parent | f3aa03ee8609f26cfc3fd250a957fe4376458d97 (diff) | |
| download | pandoc-ef07974dc47a834ed3b0f2b7eccbd79bc8c9a9ed.tar.gz | |
Merge pull request #2039 from mpickering/7.10
Docx Writer: Filter out illegal XML characters
| -rw-r--r-- | src/Text/Pandoc/Writers/Docx.hs | 21 | 
1 files changed, 19 insertions, 2 deletions
| diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 81369e278..d96faaf86 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE ScopedTypeVariables, PatternGuards #-} +{-# LANGUAGE ScopedTypeVariables, PatternGuards, ViewPatterns #-}  {-  Copyright (C) 2012-2014 John MacFarlane <jgm@berkeley.edu> @@ -66,6 +66,7 @@ import Text.Pandoc.MIME (MimeType, getMimeType, getMimeTypeDef,                           extensionFromMimeType)  import Control.Applicative ((<$>), (<|>), (<*>))  import Data.Maybe (fromMaybe, mapMaybe, maybeToList) +import Data.Char (ord)  data ListMarker = NoMarker                  | BulletMarker @@ -176,13 +177,29 @@ renumId f renumMap e  renumIds :: (QName -> Bool) -> (M.Map String String) -> [Element] -> [Element]  renumIds f renumMap = map (renumId f renumMap) +-- | Certain characters are invalid in XML even if escaped. +-- See #1992 +stripInvalidChars :: Pandoc -> Pandoc +stripInvalidChars = bottomUp (filter isValidChar) + +-- | See XML reference +isValidChar :: Char -> Bool +isValidChar (ord -> c) +  | c == 0x9                      = True +  | c == 0xA                      = True +  | c == 0xD                      = True +  | 0x20 <= c &&  c <= 0xD7FF     = True +  | 0xE000 <= c && c <= 0xFFFD    = True +  | 0x10000 <= c && c <= 0x10FFFF = True +  | otherwise                     = False +  -- | Produce an Docx file from a Pandoc document.  writeDocx :: WriterOptions  -- ^ Writer options            -> Pandoc         -- ^ Document to convert            -> IO BL.ByteString  writeDocx opts doc@(Pandoc meta _) = do    let datadir = writerUserDataDir opts -  let doc' = walk fixDisplayMath doc +  let doc' = stripInvalidChars . walk fixDisplayMath $ doc    username <- lookup "USERNAME" <$> getEnvironment    utctime <- getCurrentTime    refArchive <- liftM (toArchive . toLazy) $ | 
