aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2021-12-06 12:00:08 -0800
committerJohn MacFarlane <jgm@berkeley.edu>2021-12-06 12:00:08 -0800
commit23b2617bf782c68ae9b46d6bb23812dc7081be3e (patch)
treea57898fa1aeed6afd95ae8533162f8aa36005404
parent36807db531d1b65a25fee9bf3afd4ece60f12041 (diff)
downloadpandoc-23b2617bf782c68ae9b46d6bb23812dc7081be3e.tar.gz
Ms writer: properly encode strings for PDF contents.
Closes #7731.
-rw-r--r--src/Text/Pandoc/Writers/Ms.hs21
1 files changed, 19 insertions, 2 deletions
diff --git a/src/Text/Pandoc/Writers/Ms.hs b/src/Text/Pandoc/Writers/Ms.hs
index eeb8eca62..53763a609 100644
--- a/src/Text/Pandoc/Writers/Ms.hs
+++ b/src/Text/Pandoc/Writers/Ms.hs
@@ -21,7 +21,7 @@ TODO:
module Text.Pandoc.Writers.Ms ( writeMs ) where
import Control.Monad.State.Strict
-import Data.Char (isLower, isUpper, ord)
+import Data.Char (isAscii, isLower, isUpper, ord)
import Data.List (intercalate, intersperse)
import Data.List.NonEmpty (nonEmpty)
import qualified Data.Map as Map
@@ -46,6 +46,8 @@ import Text.Pandoc.Writers.Shared
import Text.Pandoc.Writers.Roff
import Text.Printf (printf)
import Text.TeXMath (writeEqn)
+import qualified Data.Text.Encoding as TE
+import qualified Data.ByteString as B
-- | Convert Pandoc to Ms.
writeMs :: PandocMonad m => WriterOptions -> Pandoc -> m Text
@@ -88,6 +90,21 @@ escapeStr :: WriterOptions -> Text -> Text
escapeStr opts =
escapeString (if writerPreferAscii opts then AsciiOnly else AllowUTF8)
+-- In PDFs we need to escape parentheses and backslash.
+-- In PDF we need to encode as UTF-16 BE.
+escapePDFString :: Text -> Text
+escapePDFString t
+ | T.all isAscii t =
+ T.replace "(" "\\(" . T.replace ")" "\\)" . T.replace "\\" "\\\\" $ t
+ | otherwise = ("\\376\\377" <>) . -- add bom
+ mconcat . map encodeChar . T.unpack $ t
+ where
+ encodeChar c =
+ if isAscii c && c /= '\\' && c /= '(' && c /= ')'
+ then "\\000" <> T.singleton c
+ else mconcat . map toOctal . B.unpack . TE.encodeUtf16BE $ T.singleton c
+ toOctal n = "\\" <> T.pack (printf "%03o" n)
+
escapeUri :: Text -> Text
escapeUri = T.pack . escapeURIString (\c -> c /= '@' && isAllowedInURI c) . T.unpack
@@ -196,7 +213,7 @@ blockToMs opts (Header level (ident,classes,_) inlines) = do
(if T.null secnum
then ""
else " ") <>
- escapeStr opts (stringify inlines))
+ escapePDFString (stringify inlines))
let backlink = nowrap (literal ".pdfhref L -D " <>
doubleQuotes (literal (toAscii ident)) <> space <> literal "\\") <> cr <>
literal " -- "