From 008273ef67ee846ad20ada6502d17b7c521e8630 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 2 May 2013 19:42:37 -0700 Subject: RTF writer: Properly handle characters above the 0000-FFFF range. Uses surrogate pairs. Thanks to Hiromi Ishii for the patch. --- src/Text/Pandoc/Writers/RTF.hs | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/RTF.hs b/src/Text/Pandoc/Writers/RTF.hs index ca33cb0e9..0d4a22cd5 100644 --- a/src/Text/Pandoc/Writers/RTF.hs +++ b/src/Text/Pandoc/Writers/RTF.hs @@ -35,7 +35,7 @@ import Text.Pandoc.Readers.TeXMath import Text.Pandoc.Templates (renderTemplate) import Text.Pandoc.Generic (bottomUpM) import Data.List ( isSuffixOf, intercalate ) -import Data.Char ( ord, isDigit, toLower ) +import Data.Char ( ord, chr, isDigit, toLower ) import System.FilePath ( takeExtension ) import qualified Data.ByteString as B import Text.Printf ( printf ) @@ -113,8 +113,18 @@ handleUnicode :: String -> String handleUnicode [] = [] handleUnicode (c:cs) = if ord c > 127 - then '\\':'u':(show (ord c)) ++ "?" ++ handleUnicode cs + then if surrogate c + then let x = ord c - 0x10000 + (q, r) = x `divMod` 0x400 + upper = q + 0xd800 + lower = r + 0xDC00 + in enc (chr upper) ++ enc (chr lower) ++ handleUnicode cs + else enc c ++ handleUnicode cs else c:(handleUnicode cs) + where + surrogate x = not ( (0x0000 <= ord x && ord x <= 0xd7ff) + || (0xe000 <= ord x && ord x <= 0xffff) ) + enc x = '\\':'u':(show (ord x)) ++ "?" -- | Escape special characters. escapeSpecial :: String -> String -- cgit v1.2.3