aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2018-10-18 10:21:34 -0700
committerJohn MacFarlane <jgm@berkeley.edu>2018-10-18 10:21:34 -0700
commitefbb329f1a81a778fd853bffee0414c87a1133b3 (patch)
treef4af6bcfbb261c347c7f8276067f607e92aa64c8 /src
parentbbd94eae2b3f5273bb681ff6706f0cd375d8a1ef (diff)
downloadpandoc-efbb329f1a81a778fd853bffee0414c87a1133b3.tar.gz
Groff escaping changes.
- `--ascii` is now turned on automatically for man output, for portability. All man output will be escaped to ASCII. - In T.P.Writers.Groff, `escapeChar`, `escapeString`, and `escapeCode` now take a boolean parameter that selects ascii-only output. This is used by the Ms writer for `--ascii`, instead of doing an extra pass after writing the document. - In ms output without `--ascii`, unicode is used whenever possible (e.g. for double quotes). - A few escapes are changed: e.g. `\[rs]` instead of `\\` for backslash, and `\ga]` instead of `` \` `` for backtick.
Diffstat (limited to 'src')
-rw-r--r--src/Text/Pandoc/GroffChar.hs31
-rw-r--r--src/Text/Pandoc/Writers/Groff.hs38
-rw-r--r--src/Text/Pandoc/Writers/Man.hs12
-rw-r--r--src/Text/Pandoc/Writers/Ms.hs52
4 files changed, 70 insertions, 63 deletions
diff --git a/src/Text/Pandoc/GroffChar.hs b/src/Text/Pandoc/GroffChar.hs
index 669b2b4a0..8664c627f 100644
--- a/src/Text/Pandoc/GroffChar.hs
+++ b/src/Text/Pandoc/GroffChar.hs
@@ -1,4 +1,5 @@
{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE OverloadedStrings #-}
{-
Copyright (C) 2018 John MacFarlane <jgm@berkeley.edu>
@@ -400,19 +401,19 @@ characterCodes =
-- use like: \\[E a^ aa]
combiningAccents :: [(Char, String)]
combiningAccents =
- [ ('˝' , "\\[a\"]")
- , ('¯', "\\[a-]")
- , ('˙', "\\[a.]")
- , ('^', "\\[a^]")
- , ('´', "\\[aa]")
- , ('`', "\\[ga]")
- , ('˘', "\\[ab]")
- , ('¸', "\\[ac]")
- , ('¨', "\\[ad]")
- , ('ˇ', "\\[ah]")
- , ('˚', "\\[ao]")
- , ('~', "\\[a~]")
- , ('˛', "\\[ho]")
- , ('^', "\\[ha]")
- , ('~', "\\[ti]")
+ [ ('˝' , "a\"")
+ , ('¯', "a-")
+ , ('˙', "a.")
+ , ('^', "a^")
+ , ('´', "aa")
+ , ('`', "ga")
+ , ('˘', "ab")
+ , ('¸', "ac")
+ , ('¨', "ad")
+ , ('ˇ', "ah")
+ , ('˚', "ao")
+ , ('~', "a~")
+ , ('˛', "ho")
+ , ('^', "ha")
+ , ('~', "ti")
]
diff --git a/src/Text/Pandoc/Writers/Groff.hs b/src/Text/Pandoc/Writers/Groff.hs
index 3f90a1490..a3b81d138 100644
--- a/src/Text/Pandoc/Writers/Groff.hs
+++ b/src/Text/Pandoc/Writers/Groff.hs
@@ -37,12 +37,10 @@ module Text.Pandoc.Writers.Groff (
, escapeChar
, escapeString
, escapeCode
- , groffEscape
, withFontFeature
) where
import Prelude
-import qualified Data.Text as T
-import Data.Char (isAscii, ord)
+import Data.Char (ord, isAscii)
import Control.Monad.State.Strict
import Data.List (intercalate)
import qualified Data.Map as Map
@@ -51,7 +49,7 @@ import Text.Pandoc.Class (PandocMonad)
import Text.Pandoc.Definition
import Text.Pandoc.Pretty
import Text.Printf (printf)
-import Text.Pandoc.GroffChar (essentialEscapes)
+import Text.Pandoc.GroffChar (essentialEscapes, characterCodes)
data WriterState = WriterState { stHasInlineMath :: Bool
, stFirstPara :: Bool
@@ -82,31 +80,35 @@ type Note = [Block]
type MS = StateT WriterState
-
-escapeChar :: Char -> String
-escapeChar c = fromMaybe [c] (Map.lookup c essentialEscapes)
+escapeChar :: Bool -> Char -> String
+escapeChar useAscii c =
+ case Map.lookup c essentialEscapes of
+ Just s -> s
+ Nothing
+ | useAscii
+ , not (isAscii c) ->
+ case Map.lookup c characterCodeMap of
+ Just t -> "\\[" <> t <> "]"
+ Nothing -> printf "\\[u%04X]" (ord c)
+ | otherwise -> [c]
-- | Escape special characters for groff.
-escapeString :: String -> String
-escapeString = concatMap escapeChar
+escapeString :: Bool -> String -> String
+escapeString useAscii = concatMap (escapeChar useAscii)
-- | Escape a literal (code) section for groff.
-escapeCode :: String -> String
-escapeCode = intercalate "\n" . map escapeLine . lines
+escapeCode :: Bool -> String -> String
+escapeCode useAScii = intercalate "\n" . map escapeLine . lines
where escapeCodeChar ' ' = "\\ "
escapeCodeChar '\t' = "\\\t"
- escapeCodeChar c = escapeChar c
+ escapeCodeChar c = escapeChar useAScii c
escapeLine codeline =
case concatMap escapeCodeChar codeline of
a@('.':_) -> "\\&" ++ a
b -> b
--- | Escape non-ASCII characters using groff \u[..] sequences.
-groffEscape :: T.Text -> T.Text
-groffEscape = T.concatMap toUchar
- where toUchar c
- | isAscii c = T.singleton c
- | otherwise = T.pack $ printf "\\[u%04X]" (ord c)
+characterCodeMap :: Map.Map Char String
+characterCodeMap = Map.fromList characterCodes
fontChange :: PandocMonad m => MS m Doc
fontChange = do
diff --git a/src/Text/Pandoc/Writers/Man.hs b/src/Text/Pandoc/Writers/Man.hs
index 65aec81b3..839c37da9 100644
--- a/src/Text/Pandoc/Writers/Man.hs
+++ b/src/Text/Pandoc/Writers/Man.hs
@@ -33,6 +33,7 @@ Conversion of 'Pandoc' documents to groff man page format.
module Text.Pandoc.Writers.Man ( writeMan) where
import Prelude
import Control.Monad.State.Strict
+import Data.Char (isAscii)
import Data.List (intersperse, stripPrefix)
import Data.Maybe (fromMaybe)
import Data.Text (Text)
@@ -93,8 +94,7 @@ pandocToMan opts (Pandoc meta blocks) = do
$ defField "has-tables" hasTables
$ defField "hyphenate" True
$ defField "pandoc-version" pandocVersion metadata
- (if writerPreferAscii opts then groffEscape else id) <$>
- case writerTemplate opts of
+ case writerTemplate opts of
Nothing -> return main
Just tpl -> renderTemplate' tpl context
@@ -148,7 +148,7 @@ blockToMan _ (CodeBlock _ str) = return $
text ".IP" $$
text ".nf" $$
text "\\f[C]" $$
- text (escapeCode str) $$
+ text (escapeCode True str) $$
text "\\f[R]" $$
text ".fi"
blockToMan opts (BlockQuote blocks) = do
@@ -296,10 +296,10 @@ inlineToMan opts (Quoted DoubleQuote lst) = do
inlineToMan opts (Cite _ lst) =
inlineListToMan opts lst
inlineToMan _ (Code _ str) =
- withFontFeature 'C' (return (text $ escapeCode str))
+ withFontFeature 'C' (return (text $ escapeCode True str))
inlineToMan _ (Str str@('.':_)) =
- return $ afterBreak "\\&" <> text (escapeString str)
-inlineToMan _ (Str str) = return $ text $ escapeString str
+ return $ afterBreak "\\&" <> text (escapeString True str)
+inlineToMan _ (Str str) = return $ text $ escapeString True str
inlineToMan opts (Math InlineMath str) =
lift (texMathToInlines InlineMath str) >>= inlineListToMan opts
inlineToMan opts (Math DisplayMath str) = do
diff --git a/src/Text/Pandoc/Writers/Ms.hs b/src/Text/Pandoc/Writers/Ms.hs
index cdca24702..ec7f9bf33 100644
--- a/src/Text/Pandoc/Writers/Ms.hs
+++ b/src/Text/Pandoc/Writers/Ms.hs
@@ -69,6 +69,9 @@ writeMs :: PandocMonad m => WriterOptions -> Pandoc -> m Text
writeMs opts document =
evalStateT (pandocToMs opts document) defaultWriterState
+escString :: WriterOptions -> String -> String
+escString opts = escapeString (writerPreferAscii opts)
+
-- | Return groff ms representation of document.
pandocToMs :: PandocMonad m => WriterOptions -> Pandoc -> MS m Text
pandocToMs opts (Pandoc meta blocks) = do
@@ -84,8 +87,8 @@ pandocToMs opts (Pandoc meta blocks) = do
body <- blockListToMs opts blocks
let main = render' body
hasInlineMath <- gets stHasInlineMath
- let titleMeta = (escapeString . stringify) $ docTitle meta
- let authorsMeta = map (escapeString . stringify) $ docAuthors meta
+ let titleMeta = (escString opts . stringify) $ docTitle meta
+ let authorsMeta = map (escString opts . stringify) $ docAuthors meta
hasHighlighting <- gets stHighlighting
let highlightingMacros = if hasHighlighting
then case writerHighlightStyle opts of
@@ -101,8 +104,7 @@ pandocToMs opts (Pandoc meta blocks) = do
$ defField "title-meta" titleMeta
$ defField "author-meta" (intercalate "; " authorsMeta)
$ defField "highlighting-macros" highlightingMacros metadata
- (if writerPreferAscii opts then groffEscape else id) <$>
- case writerTemplate opts of
+ case writerTemplate opts of
Nothing -> return main
Just tpl -> renderTemplate' tpl context
@@ -112,18 +114,18 @@ escapeUri = escapeURIString (\c -> c /= '@' && isAllowedInURI c)
-- | Escape | character, used to mark inline math, inside math.
escapeBar :: String -> String
escapeBar = concatMap go
- where go '|' = "\\[u007C]"
+ where go '|' = "\\[ba]"
go c = [c]
-toSmallCaps :: String -> String
-toSmallCaps [] = []
-toSmallCaps (c:cs)
+toSmallCaps :: WriterOptions -> String -> String
+toSmallCaps _ [] = []
+toSmallCaps opts (c:cs)
| isLower c = let (lowers,rest) = span isLower (c:cs)
- in "\\s-2" ++ escapeString (map toUpper lowers) ++
- "\\s0" ++ toSmallCaps rest
+ in "\\s-2" ++ escString opts (map toUpper lowers) ++
+ "\\s0" ++ toSmallCaps opts rest
| isUpper c = let (uppers,rest) = span isUpper (c:cs)
- in escapeString uppers ++ toSmallCaps rest
- | otherwise = escapeChar c ++ toSmallCaps cs
+ in escString opts uppers ++ toSmallCaps opts rest
+ | otherwise = escapeChar (writerPreferAscii opts) c ++ toSmallCaps opts cs
-- We split inline lists into sentences, and print one sentence per
-- line. groff/troff treats the line-ending period differently.
@@ -160,7 +162,7 @@ blockToMs opts (Para [Image attr alt (src,_tit)])
_ -> empty
capt <- inlineListToMs' opts alt
return $ nowrap (text ".PSPIC -C " <>
- doubleQuotes (text (escapeString src)) <>
+ doubleQuotes (text (escString opts src)) <>
sizeAttrs) $$
text ".ce 1000" $$
capt $$
@@ -198,7 +200,7 @@ blockToMs opts (Header level (ident,classes,_) inlines) = do
(if null secnum
then ""
else " ") ++
- escapeString (stringify inlines))
+ escString opts (stringify inlines))
let backlink = nowrap (text ".pdfhref L -D " <>
doubleQuotes (text (toAscii ident)) <> space <> text "\\") <> cr <>
text " -- "
@@ -400,14 +402,14 @@ inlineToMs opts (Cite _ lst) =
inlineToMs opts (Code attr str) = do
hlCode <- highlightCode opts attr str
withFontFeature 'C' (return hlCode)
-inlineToMs _ (Str str) = do
+inlineToMs opts (Str str) = do
let shim = case str of
'.':_ -> afterBreak "\\&"
_ -> empty
smallcaps <- gets stSmallCaps
if smallcaps
- then return $ shim <> text (toSmallCaps str)
- else return $ shim <> text (escapeString str)
+ then return $ shim <> text (toSmallCaps opts str)
+ else return $ shim <> text (escString opts str)
inlineToMs opts (Math InlineMath str) = do
modify $ \st -> st{ stHasInlineMath = True }
res <- convertMath writeEqn InlineMath str
@@ -449,9 +451,10 @@ inlineToMs opts (Link _ txt (src, _)) = do
doubleQuotes (text (escapeUri src)) <> text " -A " <>
doubleQuotes (text "\\c") <> space <> text "\\") <> cr <>
text " -- " <> doubleQuotes (nowrap contents) <> cr <> text "\\&"
-inlineToMs _ (Image _ alternate (_, _)) =
+inlineToMs opts (Image _ alternate (_, _)) =
return $ char '[' <> text "IMAGE: " <>
- text (escapeString (stringify alternate)) <> char ']'
+ text (escString opts (stringify alternate))
+ <> char ']'
inlineToMs _ (Note contents) = do
modify $ \st -> st{ stNotes = contents : stNotes st }
return $ text "\\**"
@@ -531,20 +534,21 @@ toMacro sty toktype =
-- lnColor = lineNumberColor sty
-- lnBkgColor = lineNumberBackgroundColor sty
-msFormatter :: FormatOptions -> [SourceLine] -> Doc
-msFormatter _fmtopts =
+msFormatter :: WriterOptions -> FormatOptions -> [SourceLine] -> Doc
+msFormatter opts _fmtopts =
vcat . map fmtLine
where fmtLine = hcat . map fmtToken
fmtToken (toktype, tok) = text "\\*" <>
brackets (text (show toktype) <> text " \""
- <> text (escapeCode (T.unpack tok)) <> text "\"")
+ <> text (escapeCode (writerPreferAscii opts)
+ (T.unpack tok)) <> text "\"")
highlightCode :: PandocMonad m => WriterOptions -> Attr -> String -> MS m Doc
highlightCode opts attr str =
- case highlight (writerSyntaxMap opts) msFormatter attr str of
+ case highlight (writerSyntaxMap opts) (msFormatter opts) attr str of
Left msg -> do
unless (null msg) $ report $ CouldNotHighlight msg
- return $ text (escapeCode str)
+ return $ text (escapeCode (writerPreferAscii opts) str)
Right h -> do
modify (\st -> st{ stHighlighting = True })
return h