From 8efb8975ed641ddd075954e1ccc7f71eca1d3c16 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 23 Oct 2018 21:38:21 -0700 Subject: Groff writer character escaping changes. T.P.GroffChar: replaced `essentialEscapes` with `manEscapes`, which includes all the escapes mentioned in the groff_man manual. T.P.Writers.Groff: removed escapeCode; changed parameter on escapeString from Bool to new type `EscapeMode`. Rewrote `escapeString`. --- src/Text/Pandoc/GroffChar.hs | 23 +++++++++++------- src/Text/Pandoc/Writers/Groff.hs | 52 +++++++++++++++++++++------------------- src/Text/Pandoc/Writers/Man.hs | 20 ++++++++++------ src/Text/Pandoc/Writers/Ms.hs | 30 +++++++++++------------ 4 files changed, 70 insertions(+), 55 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/GroffChar.hs b/src/Text/Pandoc/GroffChar.hs index 6d7991e5d..efb3cf11a 100644 --- a/src/Text/Pandoc/GroffChar.hs +++ b/src/Text/Pandoc/GroffChar.hs @@ -31,24 +31,29 @@ Groff character escaping/unescaping. -} module Text.Pandoc.GroffChar ( - essentialEscapes + manEscapes , characterCodes , combiningAccents ) where import Prelude -import qualified Data.Map as Map -essentialEscapes :: Map.Map Char String -essentialEscapes = Map.fromList - [ ('\160', "\\~") +-- | These are the escapes specifically mentioned in groff_man(7). +manEscapes :: [(Char, String)] +manEscapes = + [ ('\160', "\\ ") , ('\'', "\\[aq]") - , ('`', "\\[ga]") + , ('‘', "\\[oq]") + , ('’', "\\[cq]") , ('"', "\\[dq]") - , ('~', "\\[ti]") + , ('“', "\\[lq]") + , ('”', "\\[rq]") + , ('—', "\\[em]") + , ('–', "\\[en]") + , ('`', "\\[ga]") , ('^', "\\[ha]") - , ('@', "\\[at]") - , ('\\', "\\[rs]") + , ('~', "\\[ti]") , ('-', "\\-") -- minus; - will be interpreted as hyphen U+2010 + , ('\\', "\\[rs]") , ('\x2026', "\\&...") -- because u2026 doesn't render on tty ] diff --git a/src/Text/Pandoc/Writers/Groff.hs b/src/Text/Pandoc/Writers/Groff.hs index fb3cc085b..b0e8d3d06 100644 --- a/src/Text/Pandoc/Writers/Groff.hs +++ b/src/Text/Pandoc/Writers/Groff.hs @@ -34,22 +34,21 @@ module Text.Pandoc.Writers.Groff ( , defaultWriterState , MS , Note + , EscapeMode(..) , escapeString - , escapeCode , withFontFeature ) where import Prelude import Data.Char (ord, isAscii) import Control.Monad.State.Strict -import Data.List (intercalate) import qualified Data.Map as Map import Data.Maybe (fromMaybe, isJust, catMaybes) import Text.Pandoc.Class (PandocMonad) import Text.Pandoc.Definition import Text.Pandoc.Pretty import Text.Printf (printf) -import Text.Pandoc.GroffChar (essentialEscapes, characterCodes, - combiningAccents) +import Text.Pandoc.GroffChar (manEscapes, + characterCodes, combiningAccents) data WriterState = WriterState { stHasInlineMath :: Bool , stFirstPara :: Bool @@ -80,33 +79,38 @@ type Note = [Block] type MS = StateT WriterState +data EscapeMode = AllowUTF8 -- ^ use preferred man escapes + | AsciiOnly -- ^ escape everything + deriving Show + combiningAccentsMap :: Map.Map Char String combiningAccentsMap = Map.fromList combiningAccents +essentialEscapes :: Map.Map Char String +essentialEscapes = Map.fromList manEscapes + -- | Escape special characters for groff. -escapeString :: Bool -> String -> String +escapeString :: EscapeMode -> String -> String escapeString _ [] = [] -escapeString useAscii (x:xs) = +escapeString escapeMode ('\n':'.':xs) = + '\n':'\\':'&':'.':escapeString escapeMode xs +escapeString escapeMode (x:xs) = case Map.lookup x essentialEscapes of - Just s -> s ++ escapeString useAscii xs + Just s -> s ++ escapeString escapeMode xs Nothing - | isAscii x || not useAscii -> x : escapeString useAscii xs - | otherwise -> - let accents = catMaybes $ takeWhile isJust - (map (\c -> Map.lookup c combiningAccentsMap) xs) - rest = drop (length accents) xs - s = case Map.lookup x characterCodeMap of - Just t -> "\\[" <> unwords (t:accents) <> "]" - Nothing -> "\\[" <> unwords - (printf "u%04X" (ord x) : accents) <> "]" - in s ++ escapeString useAscii rest - --- | Escape a literal (code) section for groff. -escapeCode :: Bool -> String -> String -escapeCode useAscii = intercalate "\n" . map escapeLine . lines - where escapeLine xs = case xs of - ('.':_) -> "\\%" ++ escapeString useAscii xs - _ -> escapeString useAscii xs + | isAscii x -> x : escapeString escapeMode xs + | otherwise -> + case escapeMode of + AllowUTF8 -> x : escapeString escapeMode xs + AsciiOnly -> + let accents = catMaybes $ takeWhile isJust + (map (\c -> Map.lookup c combiningAccentsMap) xs) + rest = drop (length accents) xs + s = case Map.lookup x characterCodeMap of + Just t -> "\\[" <> unwords (t:accents) <> "]" + Nothing -> "\\[" <> unwords + (printf "u%04X" (ord x) : accents) <> "]" + in s ++ escapeString escapeMode rest characterCodeMap :: Map.Map Char String characterCodeMap = Map.fromList characterCodes diff --git a/src/Text/Pandoc/Writers/Man.hs b/src/Text/Pandoc/Writers/Man.hs index 645476b77..b32d2ff6c 100644 --- a/src/Text/Pandoc/Writers/Man.hs +++ b/src/Text/Pandoc/Writers/Man.hs @@ -97,6 +97,9 @@ pandocToMan opts (Pandoc meta blocks) = do Nothing -> return main Just tpl -> renderTemplate' tpl context +escString :: WriterOptions -> String -> String +escString _ = escapeString AsciiOnly -- for better portability + -- | Return man representation of notes. notesToMan :: PandocMonad m => WriterOptions -> [[Block]] -> StateT WriterState m Doc notesToMan opts notes = @@ -143,11 +146,14 @@ blockToMan opts (Header level _ inlines) = do 1 -> ".SH " _ -> ".SS " return $ text heading <> contents -blockToMan _ (CodeBlock _ str) = return $ +blockToMan opts (CodeBlock _ str) = return $ text ".IP" $$ text ".nf" $$ text "\\f[C]" $$ - text (escapeCode True str) $$ + ((case str of + '.':_ -> text "\\&" + _ -> mempty) <> + text (escString opts str)) $$ text "\\f[R]" $$ text ".fi" blockToMan opts (BlockQuote blocks) = do @@ -296,11 +302,11 @@ inlineToMan opts (Quoted DoubleQuote lst) = do return $ text "\\[lq]" <> contents <> text "\\[rq]" inlineToMan opts (Cite _ lst) = inlineListToMan opts lst -inlineToMan _ (Code _ str) = - withFontFeature 'C' (return (text $ escapeCode True str)) -inlineToMan _ (Str str@('.':_)) = - return $ afterBreak "\\&" <> text (escapeString True str) -inlineToMan _ (Str str) = return $ text $ escapeString True str +inlineToMan opts (Code _ str) = + withFontFeature 'C' (return (text $ escString opts str)) +inlineToMan opts (Str str@('.':_)) = + return $ afterBreak "\\&" <> text (escString opts str) +inlineToMan opts (Str str) = return $ text $ escString opts str inlineToMan opts (Math InlineMath str) = lift (texMathToInlines InlineMath str) >>= inlineListToMan opts inlineToMan opts (Math DisplayMath str) = do diff --git a/src/Text/Pandoc/Writers/Ms.hs b/src/Text/Pandoc/Writers/Ms.hs index e077e9ed9..2fb949cb9 100644 --- a/src/Text/Pandoc/Writers/Ms.hs +++ b/src/Text/Pandoc/Writers/Ms.hs @@ -69,9 +69,6 @@ writeMs :: PandocMonad m => WriterOptions -> Pandoc -> m Text writeMs opts document = evalStateT (pandocToMs opts document) defaultWriterState -escString :: WriterOptions -> String -> String -escString opts = escapeString (writerPreferAscii opts) - -- | Return groff ms representation of document. pandocToMs :: PandocMonad m => WriterOptions -> Pandoc -> MS m Text pandocToMs opts (Pandoc meta blocks) = do @@ -87,8 +84,8 @@ pandocToMs opts (Pandoc meta blocks) = do body <- blockListToMs opts blocks let main = render' body hasInlineMath <- gets stHasInlineMath - let titleMeta = (escString opts . stringify) $ docTitle meta - let authorsMeta = map (escString opts . stringify) $ docAuthors meta + let titleMeta = (escapeStr opts . stringify) $ docTitle meta + let authorsMeta = map (escapeStr opts . stringify) $ docAuthors meta hasHighlighting <- gets stHighlighting let highlightingMacros = if hasHighlighting then case writerHighlightStyle opts of @@ -108,6 +105,10 @@ pandocToMs opts (Pandoc meta blocks) = do Nothing -> return main Just tpl -> renderTemplate' tpl context +escapeStr :: WriterOptions -> String -> String +escapeStr opts = + escapeString (if writerPreferAscii opts then AsciiOnly else AllowUTF8) + escapeUri :: String -> String escapeUri = escapeURIString (\c -> c /= '@' && isAllowedInURI c) @@ -121,11 +122,11 @@ toSmallCaps :: WriterOptions -> String -> String toSmallCaps _ [] = [] toSmallCaps opts (c:cs) | isLower c = let (lowers,rest) = span isLower (c:cs) - in "\\s-2" ++ escString opts (map toUpper lowers) ++ + in "\\s-2" ++ escapeStr opts (map toUpper lowers) ++ "\\s0" ++ toSmallCaps opts rest | isUpper c = let (uppers,rest) = span isUpper (c:cs) - in escString opts uppers ++ toSmallCaps opts rest - | otherwise = escapeString (writerPreferAscii opts) [c] ++ toSmallCaps opts cs + in escapeStr opts uppers ++ toSmallCaps opts rest + | otherwise = escapeStr opts [c] ++ toSmallCaps opts cs -- We split inline lists into sentences, and print one sentence per -- line. groff/troff treats the line-ending period differently. @@ -162,7 +163,7 @@ blockToMs opts (Para [Image attr alt (src,_tit)]) _ -> empty capt <- inlineListToMs' opts alt return $ nowrap (text ".PSPIC -C " <> - doubleQuotes (text (escString opts src)) <> + doubleQuotes (text (escapeStr opts src)) <> sizeAttrs) $$ text ".ce 1000" $$ capt $$ @@ -200,7 +201,7 @@ blockToMs opts (Header level (ident,classes,_) inlines) = do (if null secnum then "" else " ") ++ - escString opts (stringify inlines)) + escapeStr opts (stringify inlines)) let backlink = nowrap (text ".pdfhref L -D " <> doubleQuotes (text (toAscii ident)) <> space <> text "\\") <> cr <> text " -- " @@ -409,7 +410,7 @@ inlineToMs opts (Str str) = do smallcaps <- gets stSmallCaps if smallcaps then return $ shim <> text (toSmallCaps opts str) - else return $ shim <> text (escString opts str) + else return $ shim <> text (escapeStr opts str) inlineToMs opts (Math InlineMath str) = do modify $ \st -> st{ stHasInlineMath = True } res <- convertMath writeEqn InlineMath str @@ -453,7 +454,7 @@ inlineToMs opts (Link _ txt (src, _)) = do text " -- " <> doubleQuotes (nowrap contents) <> cr <> text "\\&" inlineToMs opts (Image _ alternate (_, _)) = return $ char '[' <> text "IMAGE: " <> - text (escString opts (stringify alternate)) + text (escapeStr opts (stringify alternate)) <> char ']' inlineToMs _ (Note contents) = do modify $ \st -> st{ stNotes = contents : stNotes st } @@ -540,15 +541,14 @@ msFormatter opts _fmtopts = where fmtLine = hcat . map fmtToken fmtToken (toktype, tok) = text "\\*" <> brackets (text (show toktype) <> text " \"" - <> text (escapeCode (writerPreferAscii opts) - (T.unpack tok)) <> text "\"") + <> text (escapeStr opts (T.unpack tok)) <> text "\"") highlightCode :: PandocMonad m => WriterOptions -> Attr -> String -> MS m Doc highlightCode opts attr str = case highlight (writerSyntaxMap opts) (msFormatter opts) attr str of Left msg -> do unless (null msg) $ report $ CouldNotHighlight msg - return $ text (escapeCode (writerPreferAscii opts) str) + return $ text (escapeStr opts str) Right h -> do modify (\st -> st{ stHighlighting = True }) return h -- cgit v1.2.3