From 23b2617bf782c68ae9b46d6bb23812dc7081be3e Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 6 Dec 2021 12:00:08 -0800 Subject: Ms writer: properly encode strings for PDF contents. Closes #7731. --- src/Text/Pandoc/Writers/Ms.hs | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/src/Text/Pandoc/Writers/Ms.hs b/src/Text/Pandoc/Writers/Ms.hs index eeb8eca62..53763a609 100644 --- a/src/Text/Pandoc/Writers/Ms.hs +++ b/src/Text/Pandoc/Writers/Ms.hs @@ -21,7 +21,7 @@ TODO: module Text.Pandoc.Writers.Ms ( writeMs ) where import Control.Monad.State.Strict -import Data.Char (isLower, isUpper, ord) +import Data.Char (isAscii, isLower, isUpper, ord) import Data.List (intercalate, intersperse) import Data.List.NonEmpty (nonEmpty) import qualified Data.Map as Map @@ -46,6 +46,8 @@ import Text.Pandoc.Writers.Shared import Text.Pandoc.Writers.Roff import Text.Printf (printf) import Text.TeXMath (writeEqn) +import qualified Data.Text.Encoding as TE +import qualified Data.ByteString as B -- | Convert Pandoc to Ms. writeMs :: PandocMonad m => WriterOptions -> Pandoc -> m Text @@ -88,6 +90,21 @@ escapeStr :: WriterOptions -> Text -> Text escapeStr opts = escapeString (if writerPreferAscii opts then AsciiOnly else AllowUTF8) +-- In PDFs we need to escape parentheses and backslash. +-- In PDF we need to encode as UTF-16 BE. +escapePDFString :: Text -> Text +escapePDFString t + | T.all isAscii t = + T.replace "(" "\\(" . T.replace ")" "\\)" . T.replace "\\" "\\\\" $ t + | otherwise = ("\\376\\377" <>) . -- add bom + mconcat . map encodeChar . T.unpack $ t + where + encodeChar c = + if isAscii c && c /= '\\' && c /= '(' && c /= ')' + then "\\000" <> T.singleton c + else mconcat . map toOctal . B.unpack . TE.encodeUtf16BE $ T.singleton c + toOctal n = "\\" <> T.pack (printf "%03o" n) + escapeUri :: Text -> Text escapeUri = T.pack . escapeURIString (\c -> c /= '@' && isAllowedInURI c) . T.unpack @@ -196,7 +213,7 @@ blockToMs opts (Header level (ident,classes,_) inlines) = do (if T.null secnum then "" else " ") <> - escapeStr opts (stringify inlines)) + escapePDFString (stringify inlines)) let backlink = nowrap (literal ".pdfhref L -D " <> doubleQuotes (literal (toAscii ident)) <> space <> literal "\\") <> cr <> literal " -- " -- cgit v1.2.3