From 261cbad0ef2f42735c3914c6c448f5f21331de14 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 22 Oct 2018 23:47:14 -0700 Subject: Groff writers escaping changes. - Improve escaping of accented characters with `--ascii`. Combining accents are now handled properly. - Don't escape spaces and tabs in code blocks. This doesn't seem to be necessary. --- src/Text/Pandoc/Writers/Groff.hs | 46 +++++++++++++++++++++------------------- test/writer.man | 32 ++++++++++++++-------------- test/writer.ms | 32 ++++++++++++++-------------- 3 files changed, 56 insertions(+), 54 deletions(-) diff --git a/src/Text/Pandoc/Writers/Groff.hs b/src/Text/Pandoc/Writers/Groff.hs index b7f039cc8..fb3cc085b 100644 --- a/src/Text/Pandoc/Writers/Groff.hs +++ b/src/Text/Pandoc/Writers/Groff.hs @@ -43,12 +43,13 @@ import Data.Char (ord, isAscii) import Control.Monad.State.Strict import Data.List (intercalate) import qualified Data.Map as Map -import Data.Maybe (fromMaybe) +import Data.Maybe (fromMaybe, isJust, catMaybes) import Text.Pandoc.Class (PandocMonad) import Text.Pandoc.Definition import Text.Pandoc.Pretty import Text.Printf (printf) -import Text.Pandoc.GroffChar (essentialEscapes, characterCodes) +import Text.Pandoc.GroffChar (essentialEscapes, characterCodes, + combiningAccents) data WriterState = WriterState { stHasInlineMath :: Bool , stFirstPara :: Bool @@ -79,32 +80,33 @@ type Note = [Block] type MS = StateT WriterState -escapeChar :: Bool -> Char -> String -escapeChar useAscii c = - case Map.lookup c essentialEscapes of - Just s -> s - Nothing - | useAscii - , not (isAscii c) -> - case Map.lookup c characterCodeMap of - Just t -> "\\[" <> t <> "]" - Nothing -> printf "\\[u%04X]" (ord c) - | otherwise -> [c] +combiningAccentsMap :: Map.Map Char String +combiningAccentsMap = Map.fromList combiningAccents -- | Escape special characters for groff. escapeString :: Bool -> String -> String -escapeString useAscii = concatMap (escapeChar useAscii) +escapeString _ [] = [] +escapeString useAscii (x:xs) = + case Map.lookup x essentialEscapes of + Just s -> s ++ escapeString useAscii xs + Nothing + | isAscii x || not useAscii -> x : escapeString useAscii xs + | otherwise -> + let accents = catMaybes $ takeWhile isJust + (map (\c -> Map.lookup c combiningAccentsMap) xs) + rest = drop (length accents) xs + s = case Map.lookup x characterCodeMap of + Just t -> "\\[" <> unwords (t:accents) <> "]" + Nothing -> "\\[" <> unwords + (printf "u%04X" (ord x) : accents) <> "]" + in s ++ escapeString useAscii rest -- | Escape a literal (code) section for groff. escapeCode :: Bool -> String -> String -escapeCode useAScii = intercalate "\n" . map escapeLine . lines - where escapeCodeChar ' ' = "\\ " - escapeCodeChar '\t' = "\\\t" - escapeCodeChar c = escapeChar useAScii c - escapeLine codeline = - case concatMap escapeCodeChar codeline of - a@('.':_) -> "\\&" ++ a - b -> b +escapeCode useAscii = intercalate "\n" . map escapeLine . lines + where escapeLine xs = case xs of + ('.':_) -> "\\%" ++ escapeString useAscii xs + _ -> escapeString useAscii xs characterCodeMap :: Map.Map Char String characterCodeMap = Map.fromList characterCodes diff --git a/test/writer.man b/test/writer.man index 33cb576ff..4a05b74fd 100644 --- a/test/writer.man +++ b/test/writer.man @@ -54,8 +54,8 @@ Code in a block quote: .IP .nf \f[C] -sub\ status\ { -\ \ \ \ print\ \[dq]working\[dq]; +sub status { + print \[dq]working\[dq]; } \f[R] .fi @@ -88,13 +88,13 @@ Code: .IP .nf \f[C] -\-\-\-\-\ (should\ be\ four\ hyphens) +\-\-\-\- (should be four hyphens) -sub\ status\ { -\ \ \ \ print\ \[dq]working\[dq]; +sub status { + print \[dq]working\[dq]; } -this\ code\ block\ is\ indented\ by\ one\ tab +this code block is indented by one tab \f[R] .fi .PP @@ -102,9 +102,9 @@ And: .IP .nf \f[C] -\ \ \ \ this\ code\ block\ is\ indented\ by\ two\ tabs + this code block is indented by two tabs -These\ should\ not\ be\ escaped:\ \ \[rs]$\ \[rs]\[rs]\ \[rs]>\ \[rs][\ \[rs]{ +These should not be escaped: \[rs]$ \[rs]\[rs] \[rs]> \[rs][ \[rs]{ \f[R] .fi .PP @@ -364,7 +364,7 @@ orange fruit .IP .nf \f[C] -{\ orange\ code\ block\ } +{ orange code block } \f[R] .fi .RS @@ -430,7 +430,7 @@ This should be a code block, though: .nf \f[C]
-\ \ \ \ foo + foo
\f[R] .fi @@ -454,7 +454,7 @@ Code block: .IP .nf \f[C] - + \f[R] .fi .PP @@ -464,7 +464,7 @@ Code: .IP .nf \f[C] - +
\f[R] .fi .PP @@ -545,7 +545,7 @@ Here\[cq]s one that has a line break in it: .PP These shouldn\[cq]t be math: .IP \[bu] 2 -To get the famous equation, write \f[C]$e\ =\ mc\[ha]2$\f[R]. +To get the famous equation, write \f[C]$e = mc\[ha]2$\f[R]. .IP \[bu] 2 $22,000 is a \f[I]lot\f[R] of money. So is $34,000. @@ -653,7 +653,7 @@ This should [not][] be a link. .IP .nf \f[C] -[not]:\ /url +[not]: /url \f[R] .fi .PP @@ -690,7 +690,7 @@ Auto\-links should not occur here: \f[C]\f[R] .IP .nf \f[C] -or\ here:\ +or here: \f[R] .fi .PP @@ -733,7 +733,7 @@ with list items). .IP .nf \f[C] -\ \ {\ \ } + { } \f[R] .fi .PP diff --git a/test/writer.ms b/test/writer.ms index 12a7fc778..40ecddb27 100644 --- a/test/writer.ms +++ b/test/writer.ms @@ -158,8 +158,8 @@ Code in a block quote: .IP .nf \f[C] -sub\ status\ { -\ \ \ \ print\ \[dq]working\[dq]; +sub status { + print \[dq]working\[dq]; } \f[] .fi @@ -194,13 +194,13 @@ Code: .IP .nf \f[C] -\-\-\-\-\ (should\ be\ four\ hyphens) +\-\-\-\- (should be four hyphens) -sub\ status\ { -\ \ \ \ print\ \[dq]working\[dq]; +sub status { + print \[dq]working\[dq]; } -this\ code\ block\ is\ indented\ by\ one\ tab +this code block is indented by one tab \f[] .fi .LP @@ -208,9 +208,9 @@ And: .IP .nf \f[C] -\ \ \ \ this\ code\ block\ is\ indented\ by\ two\ tabs + this code block is indented by two tabs -These\ should\ not\ be\ escaped:\ \ \[rs]$\ \[rs]\[rs]\ \[rs]>\ \[rs][\ \[rs]{ +These should not be escaped: \[rs]$ \[rs]\[rs] \[rs]> \[rs][ \[rs]{ \f[] .fi .HLINE @@ -496,7 +496,7 @@ orange fruit .IP .nf \f[C] -{\ orange\ code\ block\ } +{ orange code block } \f[] .fi .RS @@ -579,7 +579,7 @@ This should be a code block, though: .nf \f[C]
-\ \ \ \ foo + foo
\f[] .fi @@ -603,7 +603,7 @@ Code block: .IP .nf \f[C] - + \f[] .fi .LP @@ -613,7 +613,7 @@ Code: .IP .nf \f[C] - +
\f[] .fi .LP @@ -704,7 +704,7 @@ Here’s one that has a line break in it: @alpha + omega times x sup 2@. .LP These shouldn’t be math: .IP \[bu] 3 -To get the famous equation, write \f[C]$e\ =\ mc\[ha]2$\f[R]. +To get the famous equation, write \f[C]$e = mc\[ha]2$\f[R]. .IP \[bu] 3 $22,000 is a \f[I]lot\f[R] of money. So is $34,000. @@ -867,7 +867,7 @@ This should [not][] be a link. .IP .nf \f[C] -[not]:\ /url +[not]: /url \f[] .fi .LP @@ -939,7 +939,7 @@ Auto\-links should not occur here: \f[C]\f[R] .IP .nf \f[C] -or\ here:\ +or here: \f[] .fi .HLINE @@ -975,7 +975,7 @@ with list items). .IP .nf \f[C] -\ \ {\ \ } + { } \f[] .fi .LP -- cgit v1.2.3