diff options
author | John MacFarlane <jgm@berkeley.edu> | 2018-10-18 10:21:34 -0700 |
---|---|---|
committer | John MacFarlane <jgm@berkeley.edu> | 2018-10-18 10:21:34 -0700 |
commit | efbb329f1a81a778fd853bffee0414c87a1133b3 (patch) | |
tree | f4af6bcfbb261c347c7f8276067f607e92aa64c8 /src | |
parent | bbd94eae2b3f5273bb681ff6706f0cd375d8a1ef (diff) | |
download | pandoc-efbb329f1a81a778fd853bffee0414c87a1133b3.tar.gz |
Groff escaping changes.
- `--ascii` is now turned on automatically for man output, for
portability. All man output will be escaped to ASCII.
- In T.P.Writers.Groff, `escapeChar`, `escapeString`, and
`escapeCode` now take a boolean parameter that selects
ascii-only output. This is used by the Ms writer for
`--ascii`, instead of doing an extra pass after writing
the document.
- In ms output without `--ascii`, unicode is used whenever
possible (e.g. for double quotes).
- A few escapes are changed: e.g. `\[rs]` instead of `\\` for
backslash, and `\ga]` instead of `` \` `` for backtick.
Diffstat (limited to 'src')
-rw-r--r-- | src/Text/Pandoc/GroffChar.hs | 31 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/Groff.hs | 38 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/Man.hs | 12 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/Ms.hs | 52 |
4 files changed, 70 insertions, 63 deletions
diff --git a/src/Text/Pandoc/GroffChar.hs b/src/Text/Pandoc/GroffChar.hs index 669b2b4a0..8664c627f 100644 --- a/src/Text/Pandoc/GroffChar.hs +++ b/src/Text/Pandoc/GroffChar.hs @@ -1,4 +1,5 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} {- Copyright (C) 2018 John MacFarlane <jgm@berkeley.edu> @@ -400,19 +401,19 @@ characterCodes = -- use like: \\[E a^ aa] combiningAccents :: [(Char, String)] combiningAccents = - [ ('˝' , "\\[a\"]") - , ('¯', "\\[a-]") - , ('˙', "\\[a.]") - , ('^', "\\[a^]") - , ('´', "\\[aa]") - , ('`', "\\[ga]") - , ('˘', "\\[ab]") - , ('¸', "\\[ac]") - , ('¨', "\\[ad]") - , ('ˇ', "\\[ah]") - , ('˚', "\\[ao]") - , ('~', "\\[a~]") - , ('˛', "\\[ho]") - , ('^', "\\[ha]") - , ('~', "\\[ti]") + [ ('˝' , "a\"") + , ('¯', "a-") + , ('˙', "a.") + , ('^', "a^") + , ('´', "aa") + , ('`', "ga") + , ('˘', "ab") + , ('¸', "ac") + , ('¨', "ad") + , ('ˇ', "ah") + , ('˚', "ao") + , ('~', "a~") + , ('˛', "ho") + , ('^', "ha") + , ('~', "ti") ] diff --git a/src/Text/Pandoc/Writers/Groff.hs b/src/Text/Pandoc/Writers/Groff.hs index 3f90a1490..a3b81d138 100644 --- a/src/Text/Pandoc/Writers/Groff.hs +++ b/src/Text/Pandoc/Writers/Groff.hs @@ -37,12 +37,10 @@ module Text.Pandoc.Writers.Groff ( , escapeChar , escapeString , escapeCode - , groffEscape , withFontFeature ) where import Prelude -import qualified Data.Text as T -import Data.Char (isAscii, ord) +import Data.Char (ord, isAscii) import Control.Monad.State.Strict import Data.List (intercalate) import qualified Data.Map as Map @@ -51,7 +49,7 @@ import Text.Pandoc.Class (PandocMonad) import Text.Pandoc.Definition import Text.Pandoc.Pretty import Text.Printf (printf) -import Text.Pandoc.GroffChar (essentialEscapes) +import Text.Pandoc.GroffChar (essentialEscapes, characterCodes) data WriterState = WriterState { stHasInlineMath :: Bool , stFirstPara :: Bool @@ -82,31 +80,35 @@ type Note = [Block] type MS = StateT WriterState - -escapeChar :: Char -> String -escapeChar c = fromMaybe [c] (Map.lookup c essentialEscapes) +escapeChar :: Bool -> Char -> String +escapeChar useAscii c = + case Map.lookup c essentialEscapes of + Just s -> s + Nothing + | useAscii + , not (isAscii c) -> + case Map.lookup c characterCodeMap of + Just t -> "\\[" <> t <> "]" + Nothing -> printf "\\[u%04X]" (ord c) + | otherwise -> [c] -- | Escape special characters for groff. -escapeString :: String -> String -escapeString = concatMap escapeChar +escapeString :: Bool -> String -> String +escapeString useAscii = concatMap (escapeChar useAscii) -- | Escape a literal (code) section for groff. -escapeCode :: String -> String -escapeCode = intercalate "\n" . map escapeLine . lines +escapeCode :: Bool -> String -> String +escapeCode useAScii = intercalate "\n" . map escapeLine . lines where escapeCodeChar ' ' = "\\ " escapeCodeChar '\t' = "\\\t" - escapeCodeChar c = escapeChar c + escapeCodeChar c = escapeChar useAScii c escapeLine codeline = case concatMap escapeCodeChar codeline of a@('.':_) -> "\\&" ++ a b -> b --- | Escape non-ASCII characters using groff \u[..] sequences. -groffEscape :: T.Text -> T.Text -groffEscape = T.concatMap toUchar - where toUchar c - | isAscii c = T.singleton c - | otherwise = T.pack $ printf "\\[u%04X]" (ord c) +characterCodeMap :: Map.Map Char String +characterCodeMap = Map.fromList characterCodes fontChange :: PandocMonad m => MS m Doc fontChange = do diff --git a/src/Text/Pandoc/Writers/Man.hs b/src/Text/Pandoc/Writers/Man.hs index 65aec81b3..839c37da9 100644 --- a/src/Text/Pandoc/Writers/Man.hs +++ b/src/Text/Pandoc/Writers/Man.hs @@ -33,6 +33,7 @@ Conversion of 'Pandoc' documents to groff man page format. module Text.Pandoc.Writers.Man ( writeMan) where import Prelude import Control.Monad.State.Strict +import Data.Char (isAscii) import Data.List (intersperse, stripPrefix) import Data.Maybe (fromMaybe) import Data.Text (Text) @@ -93,8 +94,7 @@ pandocToMan opts (Pandoc meta blocks) = do $ defField "has-tables" hasTables $ defField "hyphenate" True $ defField "pandoc-version" pandocVersion metadata - (if writerPreferAscii opts then groffEscape else id) <$> - case writerTemplate opts of + case writerTemplate opts of Nothing -> return main Just tpl -> renderTemplate' tpl context @@ -148,7 +148,7 @@ blockToMan _ (CodeBlock _ str) = return $ text ".IP" $$ text ".nf" $$ text "\\f[C]" $$ - text (escapeCode str) $$ + text (escapeCode True str) $$ text "\\f[R]" $$ text ".fi" blockToMan opts (BlockQuote blocks) = do @@ -296,10 +296,10 @@ inlineToMan opts (Quoted DoubleQuote lst) = do inlineToMan opts (Cite _ lst) = inlineListToMan opts lst inlineToMan _ (Code _ str) = - withFontFeature 'C' (return (text $ escapeCode str)) + withFontFeature 'C' (return (text $ escapeCode True str)) inlineToMan _ (Str str@('.':_)) = - return $ afterBreak "\\&" <> text (escapeString str) -inlineToMan _ (Str str) = return $ text $ escapeString str + return $ afterBreak "\\&" <> text (escapeString True str) +inlineToMan _ (Str str) = return $ text $ escapeString True str inlineToMan opts (Math InlineMath str) = lift (texMathToInlines InlineMath str) >>= inlineListToMan opts inlineToMan opts (Math DisplayMath str) = do diff --git a/src/Text/Pandoc/Writers/Ms.hs b/src/Text/Pandoc/Writers/Ms.hs index cdca24702..ec7f9bf33 100644 --- a/src/Text/Pandoc/Writers/Ms.hs +++ b/src/Text/Pandoc/Writers/Ms.hs @@ -69,6 +69,9 @@ writeMs :: PandocMonad m => WriterOptions -> Pandoc -> m Text writeMs opts document = evalStateT (pandocToMs opts document) defaultWriterState +escString :: WriterOptions -> String -> String +escString opts = escapeString (writerPreferAscii opts) + -- | Return groff ms representation of document. pandocToMs :: PandocMonad m => WriterOptions -> Pandoc -> MS m Text pandocToMs opts (Pandoc meta blocks) = do @@ -84,8 +87,8 @@ pandocToMs opts (Pandoc meta blocks) = do body <- blockListToMs opts blocks let main = render' body hasInlineMath <- gets stHasInlineMath - let titleMeta = (escapeString . stringify) $ docTitle meta - let authorsMeta = map (escapeString . stringify) $ docAuthors meta + let titleMeta = (escString opts . stringify) $ docTitle meta + let authorsMeta = map (escString opts . stringify) $ docAuthors meta hasHighlighting <- gets stHighlighting let highlightingMacros = if hasHighlighting then case writerHighlightStyle opts of @@ -101,8 +104,7 @@ pandocToMs opts (Pandoc meta blocks) = do $ defField "title-meta" titleMeta $ defField "author-meta" (intercalate "; " authorsMeta) $ defField "highlighting-macros" highlightingMacros metadata - (if writerPreferAscii opts then groffEscape else id) <$> - case writerTemplate opts of + case writerTemplate opts of Nothing -> return main Just tpl -> renderTemplate' tpl context @@ -112,18 +114,18 @@ escapeUri = escapeURIString (\c -> c /= '@' && isAllowedInURI c) -- | Escape | character, used to mark inline math, inside math. escapeBar :: String -> String escapeBar = concatMap go - where go '|' = "\\[u007C]" + where go '|' = "\\[ba]" go c = [c] -toSmallCaps :: String -> String -toSmallCaps [] = [] -toSmallCaps (c:cs) +toSmallCaps :: WriterOptions -> String -> String +toSmallCaps _ [] = [] +toSmallCaps opts (c:cs) | isLower c = let (lowers,rest) = span isLower (c:cs) - in "\\s-2" ++ escapeString (map toUpper lowers) ++ - "\\s0" ++ toSmallCaps rest + in "\\s-2" ++ escString opts (map toUpper lowers) ++ + "\\s0" ++ toSmallCaps opts rest | isUpper c = let (uppers,rest) = span isUpper (c:cs) - in escapeString uppers ++ toSmallCaps rest - | otherwise = escapeChar c ++ toSmallCaps cs + in escString opts uppers ++ toSmallCaps opts rest + | otherwise = escapeChar (writerPreferAscii opts) c ++ toSmallCaps opts cs -- We split inline lists into sentences, and print one sentence per -- line. groff/troff treats the line-ending period differently. @@ -160,7 +162,7 @@ blockToMs opts (Para [Image attr alt (src,_tit)]) _ -> empty capt <- inlineListToMs' opts alt return $ nowrap (text ".PSPIC -C " <> - doubleQuotes (text (escapeString src)) <> + doubleQuotes (text (escString opts src)) <> sizeAttrs) $$ text ".ce 1000" $$ capt $$ @@ -198,7 +200,7 @@ blockToMs opts (Header level (ident,classes,_) inlines) = do (if null secnum then "" else " ") ++ - escapeString (stringify inlines)) + escString opts (stringify inlines)) let backlink = nowrap (text ".pdfhref L -D " <> doubleQuotes (text (toAscii ident)) <> space <> text "\\") <> cr <> text " -- " @@ -400,14 +402,14 @@ inlineToMs opts (Cite _ lst) = inlineToMs opts (Code attr str) = do hlCode <- highlightCode opts attr str withFontFeature 'C' (return hlCode) -inlineToMs _ (Str str) = do +inlineToMs opts (Str str) = do let shim = case str of '.':_ -> afterBreak "\\&" _ -> empty smallcaps <- gets stSmallCaps if smallcaps - then return $ shim <> text (toSmallCaps str) - else return $ shim <> text (escapeString str) + then return $ shim <> text (toSmallCaps opts str) + else return $ shim <> text (escString opts str) inlineToMs opts (Math InlineMath str) = do modify $ \st -> st{ stHasInlineMath = True } res <- convertMath writeEqn InlineMath str @@ -449,9 +451,10 @@ inlineToMs opts (Link _ txt (src, _)) = do doubleQuotes (text (escapeUri src)) <> text " -A " <> doubleQuotes (text "\\c") <> space <> text "\\") <> cr <> text " -- " <> doubleQuotes (nowrap contents) <> cr <> text "\\&" -inlineToMs _ (Image _ alternate (_, _)) = +inlineToMs opts (Image _ alternate (_, _)) = return $ char '[' <> text "IMAGE: " <> - text (escapeString (stringify alternate)) <> char ']' + text (escString opts (stringify alternate)) + <> char ']' inlineToMs _ (Note contents) = do modify $ \st -> st{ stNotes = contents : stNotes st } return $ text "\\**" @@ -531,20 +534,21 @@ toMacro sty toktype = -- lnColor = lineNumberColor sty -- lnBkgColor = lineNumberBackgroundColor sty -msFormatter :: FormatOptions -> [SourceLine] -> Doc -msFormatter _fmtopts = +msFormatter :: WriterOptions -> FormatOptions -> [SourceLine] -> Doc +msFormatter opts _fmtopts = vcat . map fmtLine where fmtLine = hcat . map fmtToken fmtToken (toktype, tok) = text "\\*" <> brackets (text (show toktype) <> text " \"" - <> text (escapeCode (T.unpack tok)) <> text "\"") + <> text (escapeCode (writerPreferAscii opts) + (T.unpack tok)) <> text "\"") highlightCode :: PandocMonad m => WriterOptions -> Attr -> String -> MS m Doc highlightCode opts attr str = - case highlight (writerSyntaxMap opts) msFormatter attr str of + case highlight (writerSyntaxMap opts) (msFormatter opts) attr str of Left msg -> do unless (null msg) $ report $ CouldNotHighlight msg - return $ text (escapeCode str) + return $ text (escapeCode (writerPreferAscii opts) str) Right h -> do modify (\st -> st{ stHighlighting = True }) return h |