diff options
author | John MacFarlane <jgm@berkeley.edu> | 2021-12-06 12:00:08 -0800 |
---|---|---|
committer | John MacFarlane <jgm@berkeley.edu> | 2021-12-06 12:00:08 -0800 |
commit | 23b2617bf782c68ae9b46d6bb23812dc7081be3e (patch) | |
tree | a57898fa1aeed6afd95ae8533162f8aa36005404 | |
parent | 36807db531d1b65a25fee9bf3afd4ece60f12041 (diff) | |
download | pandoc-23b2617bf782c68ae9b46d6bb23812dc7081be3e.tar.gz |
Ms writer: properly encode strings for PDF contents.
Closes #7731.
-rw-r--r-- | src/Text/Pandoc/Writers/Ms.hs | 21 |
1 files changed, 19 insertions, 2 deletions
diff --git a/src/Text/Pandoc/Writers/Ms.hs b/src/Text/Pandoc/Writers/Ms.hs index eeb8eca62..53763a609 100644 --- a/src/Text/Pandoc/Writers/Ms.hs +++ b/src/Text/Pandoc/Writers/Ms.hs @@ -21,7 +21,7 @@ TODO: module Text.Pandoc.Writers.Ms ( writeMs ) where import Control.Monad.State.Strict -import Data.Char (isLower, isUpper, ord) +import Data.Char (isAscii, isLower, isUpper, ord) import Data.List (intercalate, intersperse) import Data.List.NonEmpty (nonEmpty) import qualified Data.Map as Map @@ -46,6 +46,8 @@ import Text.Pandoc.Writers.Shared import Text.Pandoc.Writers.Roff import Text.Printf (printf) import Text.TeXMath (writeEqn) +import qualified Data.Text.Encoding as TE +import qualified Data.ByteString as B -- | Convert Pandoc to Ms. writeMs :: PandocMonad m => WriterOptions -> Pandoc -> m Text @@ -88,6 +90,21 @@ escapeStr :: WriterOptions -> Text -> Text escapeStr opts = escapeString (if writerPreferAscii opts then AsciiOnly else AllowUTF8) +-- In PDFs we need to escape parentheses and backslash. +-- In PDF we need to encode as UTF-16 BE. +escapePDFString :: Text -> Text +escapePDFString t + | T.all isAscii t = + T.replace "(" "\\(" . T.replace ")" "\\)" . T.replace "\\" "\\\\" $ t + | otherwise = ("\\376\\377" <>) . -- add bom + mconcat . map encodeChar . T.unpack $ t + where + encodeChar c = + if isAscii c && c /= '\\' && c /= '(' && c /= ')' + then "\\000" <> T.singleton c + else mconcat . map toOctal . B.unpack . TE.encodeUtf16BE $ T.singleton c + toOctal n = "\\" <> T.pack (printf "%03o" n) + escapeUri :: Text -> Text escapeUri = T.pack . escapeURIString (\c -> c /= '@' && isAllowedInURI c) . T.unpack @@ -196,7 +213,7 @@ blockToMs opts (Header level (ident,classes,_) inlines) = do (if T.null secnum then "" else " ") <> - escapeStr opts (stringify inlines)) + escapePDFString (stringify inlines)) let backlink = nowrap (literal ".pdfhref L -D " <> doubleQuotes (literal (toAscii ident)) <> space <> literal "\\") <> cr <> literal " -- " |