aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2018-10-23 21:38:21 -0700
committerJohn MacFarlane <jgm@berkeley.edu>2018-10-23 21:44:07 -0700
commit8efb8975ed641ddd075954e1ccc7f71eca1d3c16 (patch)
treeb1b3f561de86e3d012f1b5f1e82f141e32b43926 /src
parent556e3eef4a26eab9ea769e06668fcba546d76b18 (diff)
downloadpandoc-8efb8975ed641ddd075954e1ccc7f71eca1d3c16.tar.gz
Groff writer character escaping changes.
T.P.GroffChar: replaced `essentialEscapes` with `manEscapes`, which includes all the escapes mentioned in the groff_man manual. T.P.Writers.Groff: removed escapeCode; changed parameter on escapeString from Bool to new type `EscapeMode`. Rewrote `escapeString`.
Diffstat (limited to 'src')
-rw-r--r--src/Text/Pandoc/GroffChar.hs23
-rw-r--r--src/Text/Pandoc/Writers/Groff.hs52
-rw-r--r--src/Text/Pandoc/Writers/Man.hs20
-rw-r--r--src/Text/Pandoc/Writers/Ms.hs30
4 files changed, 70 insertions, 55 deletions
diff --git a/src/Text/Pandoc/GroffChar.hs b/src/Text/Pandoc/GroffChar.hs
index 6d7991e5d..efb3cf11a 100644
--- a/src/Text/Pandoc/GroffChar.hs
+++ b/src/Text/Pandoc/GroffChar.hs
@@ -31,24 +31,29 @@ Groff character escaping/unescaping.
-}
module Text.Pandoc.GroffChar (
- essentialEscapes
+ manEscapes
, characterCodes
, combiningAccents
) where
import Prelude
-import qualified Data.Map as Map
-essentialEscapes :: Map.Map Char String
-essentialEscapes = Map.fromList
- [ ('\160', "\\~")
+-- | These are the escapes specifically mentioned in groff_man(7).
+manEscapes :: [(Char, String)]
+manEscapes =
+ [ ('\160', "\\ ")
, ('\'', "\\[aq]")
- , ('`', "\\[ga]")
+ , ('‘', "\\[oq]")
+ , ('’', "\\[cq]")
, ('"', "\\[dq]")
- , ('~', "\\[ti]")
+ , ('“', "\\[lq]")
+ , ('”', "\\[rq]")
+ , ('—', "\\[em]")
+ , ('–', "\\[en]")
+ , ('`', "\\[ga]")
, ('^', "\\[ha]")
- , ('@', "\\[at]")
- , ('\\', "\\[rs]")
+ , ('~', "\\[ti]")
, ('-', "\\-") -- minus; - will be interpreted as hyphen U+2010
+ , ('\\', "\\[rs]")
, ('\x2026', "\\&...") -- because u2026 doesn't render on tty
]
diff --git a/src/Text/Pandoc/Writers/Groff.hs b/src/Text/Pandoc/Writers/Groff.hs
index fb3cc085b..b0e8d3d06 100644
--- a/src/Text/Pandoc/Writers/Groff.hs
+++ b/src/Text/Pandoc/Writers/Groff.hs
@@ -34,22 +34,21 @@ module Text.Pandoc.Writers.Groff (
, defaultWriterState
, MS
, Note
+ , EscapeMode(..)
, escapeString
- , escapeCode
, withFontFeature
) where
import Prelude
import Data.Char (ord, isAscii)
import Control.Monad.State.Strict
-import Data.List (intercalate)
import qualified Data.Map as Map
import Data.Maybe (fromMaybe, isJust, catMaybes)
import Text.Pandoc.Class (PandocMonad)
import Text.Pandoc.Definition
import Text.Pandoc.Pretty
import Text.Printf (printf)
-import Text.Pandoc.GroffChar (essentialEscapes, characterCodes,
- combiningAccents)
+import Text.Pandoc.GroffChar (manEscapes,
+ characterCodes, combiningAccents)
data WriterState = WriterState { stHasInlineMath :: Bool
, stFirstPara :: Bool
@@ -80,33 +79,38 @@ type Note = [Block]
type MS = StateT WriterState
+data EscapeMode = AllowUTF8 -- ^ use preferred man escapes
+ | AsciiOnly -- ^ escape everything
+ deriving Show
+
combiningAccentsMap :: Map.Map Char String
combiningAccentsMap = Map.fromList combiningAccents
+essentialEscapes :: Map.Map Char String
+essentialEscapes = Map.fromList manEscapes
+
-- | Escape special characters for groff.
-escapeString :: Bool -> String -> String
+escapeString :: EscapeMode -> String -> String
escapeString _ [] = []
-escapeString useAscii (x:xs) =
+escapeString escapeMode ('\n':'.':xs) =
+ '\n':'\\':'&':'.':escapeString escapeMode xs
+escapeString escapeMode (x:xs) =
case Map.lookup x essentialEscapes of
- Just s -> s ++ escapeString useAscii xs
+ Just s -> s ++ escapeString escapeMode xs
Nothing
- | isAscii x || not useAscii -> x : escapeString useAscii xs
- | otherwise ->
- let accents = catMaybes $ takeWhile isJust
- (map (\c -> Map.lookup c combiningAccentsMap) xs)
- rest = drop (length accents) xs
- s = case Map.lookup x characterCodeMap of
- Just t -> "\\[" <> unwords (t:accents) <> "]"
- Nothing -> "\\[" <> unwords
- (printf "u%04X" (ord x) : accents) <> "]"
- in s ++ escapeString useAscii rest
-
--- | Escape a literal (code) section for groff.
-escapeCode :: Bool -> String -> String
-escapeCode useAscii = intercalate "\n" . map escapeLine . lines
- where escapeLine xs = case xs of
- ('.':_) -> "\\%" ++ escapeString useAscii xs
- _ -> escapeString useAscii xs
+ | isAscii x -> x : escapeString escapeMode xs
+ | otherwise ->
+ case escapeMode of
+ AllowUTF8 -> x : escapeString escapeMode xs
+ AsciiOnly ->
+ let accents = catMaybes $ takeWhile isJust
+ (map (\c -> Map.lookup c combiningAccentsMap) xs)
+ rest = drop (length accents) xs
+ s = case Map.lookup x characterCodeMap of
+ Just t -> "\\[" <> unwords (t:accents) <> "]"
+ Nothing -> "\\[" <> unwords
+ (printf "u%04X" (ord x) : accents) <> "]"
+ in s ++ escapeString escapeMode rest
characterCodeMap :: Map.Map Char String
characterCodeMap = Map.fromList characterCodes
diff --git a/src/Text/Pandoc/Writers/Man.hs b/src/Text/Pandoc/Writers/Man.hs
index 645476b77..b32d2ff6c 100644
--- a/src/Text/Pandoc/Writers/Man.hs
+++ b/src/Text/Pandoc/Writers/Man.hs
@@ -97,6 +97,9 @@ pandocToMan opts (Pandoc meta blocks) = do
Nothing -> return main
Just tpl -> renderTemplate' tpl context
+escString :: WriterOptions -> String -> String
+escString _ = escapeString AsciiOnly -- for better portability
+
-- | Return man representation of notes.
notesToMan :: PandocMonad m => WriterOptions -> [[Block]] -> StateT WriterState m Doc
notesToMan opts notes =
@@ -143,11 +146,14 @@ blockToMan opts (Header level _ inlines) = do
1 -> ".SH "
_ -> ".SS "
return $ text heading <> contents
-blockToMan _ (CodeBlock _ str) = return $
+blockToMan opts (CodeBlock _ str) = return $
text ".IP" $$
text ".nf" $$
text "\\f[C]" $$
- text (escapeCode True str) $$
+ ((case str of
+ '.':_ -> text "\\&"
+ _ -> mempty) <>
+ text (escString opts str)) $$
text "\\f[R]" $$
text ".fi"
blockToMan opts (BlockQuote blocks) = do
@@ -296,11 +302,11 @@ inlineToMan opts (Quoted DoubleQuote lst) = do
return $ text "\\[lq]" <> contents <> text "\\[rq]"
inlineToMan opts (Cite _ lst) =
inlineListToMan opts lst
-inlineToMan _ (Code _ str) =
- withFontFeature 'C' (return (text $ escapeCode True str))
-inlineToMan _ (Str str@('.':_)) =
- return $ afterBreak "\\&" <> text (escapeString True str)
-inlineToMan _ (Str str) = return $ text $ escapeString True str
+inlineToMan opts (Code _ str) =
+ withFontFeature 'C' (return (text $ escString opts str))
+inlineToMan opts (Str str@('.':_)) =
+ return $ afterBreak "\\&" <> text (escString opts str)
+inlineToMan opts (Str str) = return $ text $ escString opts str
inlineToMan opts (Math InlineMath str) =
lift (texMathToInlines InlineMath str) >>= inlineListToMan opts
inlineToMan opts (Math DisplayMath str) = do
diff --git a/src/Text/Pandoc/Writers/Ms.hs b/src/Text/Pandoc/Writers/Ms.hs
index e077e9ed9..2fb949cb9 100644
--- a/src/Text/Pandoc/Writers/Ms.hs
+++ b/src/Text/Pandoc/Writers/Ms.hs
@@ -69,9 +69,6 @@ writeMs :: PandocMonad m => WriterOptions -> Pandoc -> m Text
writeMs opts document =
evalStateT (pandocToMs opts document) defaultWriterState
-escString :: WriterOptions -> String -> String
-escString opts = escapeString (writerPreferAscii opts)
-
-- | Return groff ms representation of document.
pandocToMs :: PandocMonad m => WriterOptions -> Pandoc -> MS m Text
pandocToMs opts (Pandoc meta blocks) = do
@@ -87,8 +84,8 @@ pandocToMs opts (Pandoc meta blocks) = do
body <- blockListToMs opts blocks
let main = render' body
hasInlineMath <- gets stHasInlineMath
- let titleMeta = (escString opts . stringify) $ docTitle meta
- let authorsMeta = map (escString opts . stringify) $ docAuthors meta
+ let titleMeta = (escapeStr opts . stringify) $ docTitle meta
+ let authorsMeta = map (escapeStr opts . stringify) $ docAuthors meta
hasHighlighting <- gets stHighlighting
let highlightingMacros = if hasHighlighting
then case writerHighlightStyle opts of
@@ -108,6 +105,10 @@ pandocToMs opts (Pandoc meta blocks) = do
Nothing -> return main
Just tpl -> renderTemplate' tpl context
+escapeStr :: WriterOptions -> String -> String
+escapeStr opts =
+ escapeString (if writerPreferAscii opts then AsciiOnly else AllowUTF8)
+
escapeUri :: String -> String
escapeUri = escapeURIString (\c -> c /= '@' && isAllowedInURI c)
@@ -121,11 +122,11 @@ toSmallCaps :: WriterOptions -> String -> String
toSmallCaps _ [] = []
toSmallCaps opts (c:cs)
| isLower c = let (lowers,rest) = span isLower (c:cs)
- in "\\s-2" ++ escString opts (map toUpper lowers) ++
+ in "\\s-2" ++ escapeStr opts (map toUpper lowers) ++
"\\s0" ++ toSmallCaps opts rest
| isUpper c = let (uppers,rest) = span isUpper (c:cs)
- in escString opts uppers ++ toSmallCaps opts rest
- | otherwise = escapeString (writerPreferAscii opts) [c] ++ toSmallCaps opts cs
+ in escapeStr opts uppers ++ toSmallCaps opts rest
+ | otherwise = escapeStr opts [c] ++ toSmallCaps opts cs
-- We split inline lists into sentences, and print one sentence per
-- line. groff/troff treats the line-ending period differently.
@@ -162,7 +163,7 @@ blockToMs opts (Para [Image attr alt (src,_tit)])
_ -> empty
capt <- inlineListToMs' opts alt
return $ nowrap (text ".PSPIC -C " <>
- doubleQuotes (text (escString opts src)) <>
+ doubleQuotes (text (escapeStr opts src)) <>
sizeAttrs) $$
text ".ce 1000" $$
capt $$
@@ -200,7 +201,7 @@ blockToMs opts (Header level (ident,classes,_) inlines) = do
(if null secnum
then ""
else " ") ++
- escString opts (stringify inlines))
+ escapeStr opts (stringify inlines))
let backlink = nowrap (text ".pdfhref L -D " <>
doubleQuotes (text (toAscii ident)) <> space <> text "\\") <> cr <>
text " -- "
@@ -409,7 +410,7 @@ inlineToMs opts (Str str) = do
smallcaps <- gets stSmallCaps
if smallcaps
then return $ shim <> text (toSmallCaps opts str)
- else return $ shim <> text (escString opts str)
+ else return $ shim <> text (escapeStr opts str)
inlineToMs opts (Math InlineMath str) = do
modify $ \st -> st{ stHasInlineMath = True }
res <- convertMath writeEqn InlineMath str
@@ -453,7 +454,7 @@ inlineToMs opts (Link _ txt (src, _)) = do
text " -- " <> doubleQuotes (nowrap contents) <> cr <> text "\\&"
inlineToMs opts (Image _ alternate (_, _)) =
return $ char '[' <> text "IMAGE: " <>
- text (escString opts (stringify alternate))
+ text (escapeStr opts (stringify alternate))
<> char ']'
inlineToMs _ (Note contents) = do
modify $ \st -> st{ stNotes = contents : stNotes st }
@@ -540,15 +541,14 @@ msFormatter opts _fmtopts =
where fmtLine = hcat . map fmtToken
fmtToken (toktype, tok) = text "\\*" <>
brackets (text (show toktype) <> text " \""
- <> text (escapeCode (writerPreferAscii opts)
- (T.unpack tok)) <> text "\"")
+ <> text (escapeStr opts (T.unpack tok)) <> text "\"")
highlightCode :: PandocMonad m => WriterOptions -> Attr -> String -> MS m Doc
highlightCode opts attr str =
case highlight (writerSyntaxMap opts) (msFormatter opts) attr str of
Left msg -> do
unless (null msg) $ report $ CouldNotHighlight msg
- return $ text (escapeCode (writerPreferAscii opts) str)
+ return $ text (escapeStr opts str)
Right h -> do
modify (\st -> st{ stHighlighting = True })
return h