diff options
-rw-r--r-- | src/Text/Pandoc/GroffChar.hs | 23 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/Groff.hs | 52 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/Man.hs | 20 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/Ms.hs | 30 | ||||
-rw-r--r-- | test/command/4550.md | 2 | ||||
-rw-r--r-- | test/tables.ms | 8 | ||||
-rw-r--r-- | test/writer.man | 6 | ||||
-rw-r--r-- | test/writer.ms | 46 |
8 files changed, 101 insertions, 86 deletions
diff --git a/src/Text/Pandoc/GroffChar.hs b/src/Text/Pandoc/GroffChar.hs index 6d7991e5d..efb3cf11a 100644 --- a/src/Text/Pandoc/GroffChar.hs +++ b/src/Text/Pandoc/GroffChar.hs @@ -31,24 +31,29 @@ Groff character escaping/unescaping. -} module Text.Pandoc.GroffChar ( - essentialEscapes + manEscapes , characterCodes , combiningAccents ) where import Prelude -import qualified Data.Map as Map -essentialEscapes :: Map.Map Char String -essentialEscapes = Map.fromList - [ ('\160', "\\~") +-- | These are the escapes specifically mentioned in groff_man(7). +manEscapes :: [(Char, String)] +manEscapes = + [ ('\160', "\\ ") , ('\'', "\\[aq]") - , ('`', "\\[ga]") + , ('‘', "\\[oq]") + , ('’', "\\[cq]") , ('"', "\\[dq]") - , ('~', "\\[ti]") + , ('“', "\\[lq]") + , ('”', "\\[rq]") + , ('—', "\\[em]") + , ('–', "\\[en]") + , ('`', "\\[ga]") , ('^', "\\[ha]") - , ('@', "\\[at]") - , ('\\', "\\[rs]") + , ('~', "\\[ti]") , ('-', "\\-") -- minus; - will be interpreted as hyphen U+2010 + , ('\\', "\\[rs]") , ('\x2026', "\\&...") -- because u2026 doesn't render on tty ] diff --git a/src/Text/Pandoc/Writers/Groff.hs b/src/Text/Pandoc/Writers/Groff.hs index fb3cc085b..b0e8d3d06 100644 --- a/src/Text/Pandoc/Writers/Groff.hs +++ b/src/Text/Pandoc/Writers/Groff.hs @@ -34,22 +34,21 @@ module Text.Pandoc.Writers.Groff ( , defaultWriterState , MS , Note + , EscapeMode(..) , escapeString - , escapeCode , withFontFeature ) where import Prelude import Data.Char (ord, isAscii) import Control.Monad.State.Strict -import Data.List (intercalate) import qualified Data.Map as Map import Data.Maybe (fromMaybe, isJust, catMaybes) import Text.Pandoc.Class (PandocMonad) import Text.Pandoc.Definition import Text.Pandoc.Pretty import Text.Printf (printf) -import Text.Pandoc.GroffChar (essentialEscapes, characterCodes, - combiningAccents) +import Text.Pandoc.GroffChar (manEscapes, + characterCodes, combiningAccents) data WriterState = WriterState { stHasInlineMath :: Bool , stFirstPara :: Bool @@ -80,33 +79,38 @@ type Note = [Block] type MS = StateT WriterState +data EscapeMode = AllowUTF8 -- ^ use preferred man escapes + | AsciiOnly -- ^ escape everything + deriving Show + combiningAccentsMap :: Map.Map Char String combiningAccentsMap = Map.fromList combiningAccents +essentialEscapes :: Map.Map Char String +essentialEscapes = Map.fromList manEscapes + -- | Escape special characters for groff. -escapeString :: Bool -> String -> String +escapeString :: EscapeMode -> String -> String escapeString _ [] = [] -escapeString useAscii (x:xs) = +escapeString escapeMode ('\n':'.':xs) = + '\n':'\\':'&':'.':escapeString escapeMode xs +escapeString escapeMode (x:xs) = case Map.lookup x essentialEscapes of - Just s -> s ++ escapeString useAscii xs + Just s -> s ++ escapeString escapeMode xs Nothing - | isAscii x || not useAscii -> x : escapeString useAscii xs - | otherwise -> - let accents = catMaybes $ takeWhile isJust - (map (\c -> Map.lookup c combiningAccentsMap) xs) - rest = drop (length accents) xs - s = case Map.lookup x characterCodeMap of - Just t -> "\\[" <> unwords (t:accents) <> "]" - Nothing -> "\\[" <> unwords - (printf "u%04X" (ord x) : accents) <> "]" - in s ++ escapeString useAscii rest - --- | Escape a literal (code) section for groff. -escapeCode :: Bool -> String -> String -escapeCode useAscii = intercalate "\n" . map escapeLine . lines - where escapeLine xs = case xs of - ('.':_) -> "\\%" ++ escapeString useAscii xs - _ -> escapeString useAscii xs + | isAscii x -> x : escapeString escapeMode xs + | otherwise -> + case escapeMode of + AllowUTF8 -> x : escapeString escapeMode xs + AsciiOnly -> + let accents = catMaybes $ takeWhile isJust + (map (\c -> Map.lookup c combiningAccentsMap) xs) + rest = drop (length accents) xs + s = case Map.lookup x characterCodeMap of + Just t -> "\\[" <> unwords (t:accents) <> "]" + Nothing -> "\\[" <> unwords + (printf "u%04X" (ord x) : accents) <> "]" + in s ++ escapeString escapeMode rest characterCodeMap :: Map.Map Char String characterCodeMap = Map.fromList characterCodes diff --git a/src/Text/Pandoc/Writers/Man.hs b/src/Text/Pandoc/Writers/Man.hs index 645476b77..b32d2ff6c 100644 --- a/src/Text/Pandoc/Writers/Man.hs +++ b/src/Text/Pandoc/Writers/Man.hs @@ -97,6 +97,9 @@ pandocToMan opts (Pandoc meta blocks) = do Nothing -> return main Just tpl -> renderTemplate' tpl context +escString :: WriterOptions -> String -> String +escString _ = escapeString AsciiOnly -- for better portability + -- | Return man representation of notes. notesToMan :: PandocMonad m => WriterOptions -> [[Block]] -> StateT WriterState m Doc notesToMan opts notes = @@ -143,11 +146,14 @@ blockToMan opts (Header level _ inlines) = do 1 -> ".SH " _ -> ".SS " return $ text heading <> contents -blockToMan _ (CodeBlock _ str) = return $ +blockToMan opts (CodeBlock _ str) = return $ text ".IP" $$ text ".nf" $$ text "\\f[C]" $$ - text (escapeCode True str) $$ + ((case str of + '.':_ -> text "\\&" + _ -> mempty) <> + text (escString opts str)) $$ text "\\f[R]" $$ text ".fi" blockToMan opts (BlockQuote blocks) = do @@ -296,11 +302,11 @@ inlineToMan opts (Quoted DoubleQuote lst) = do return $ text "\\[lq]" <> contents <> text "\\[rq]" inlineToMan opts (Cite _ lst) = inlineListToMan opts lst -inlineToMan _ (Code _ str) = - withFontFeature 'C' (return (text $ escapeCode True str)) -inlineToMan _ (Str str@('.':_)) = - return $ afterBreak "\\&" <> text (escapeString True str) -inlineToMan _ (Str str) = return $ text $ escapeString True str +inlineToMan opts (Code _ str) = + withFontFeature 'C' (return (text $ escString opts str)) +inlineToMan opts (Str str@('.':_)) = + return $ afterBreak "\\&" <> text (escString opts str) +inlineToMan opts (Str str) = return $ text $ escString opts str inlineToMan opts (Math InlineMath str) = lift (texMathToInlines InlineMath str) >>= inlineListToMan opts inlineToMan opts (Math DisplayMath str) = do diff --git a/src/Text/Pandoc/Writers/Ms.hs b/src/Text/Pandoc/Writers/Ms.hs index e077e9ed9..2fb949cb9 100644 --- a/src/Text/Pandoc/Writers/Ms.hs +++ b/src/Text/Pandoc/Writers/Ms.hs @@ -69,9 +69,6 @@ writeMs :: PandocMonad m => WriterOptions -> Pandoc -> m Text writeMs opts document = evalStateT (pandocToMs opts document) defaultWriterState -escString :: WriterOptions -> String -> String -escString opts = escapeString (writerPreferAscii opts) - -- | Return groff ms representation of document. pandocToMs :: PandocMonad m => WriterOptions -> Pandoc -> MS m Text pandocToMs opts (Pandoc meta blocks) = do @@ -87,8 +84,8 @@ pandocToMs opts (Pandoc meta blocks) = do body <- blockListToMs opts blocks let main = render' body hasInlineMath <- gets stHasInlineMath - let titleMeta = (escString opts . stringify) $ docTitle meta - let authorsMeta = map (escString opts . stringify) $ docAuthors meta + let titleMeta = (escapeStr opts . stringify) $ docTitle meta + let authorsMeta = map (escapeStr opts . stringify) $ docAuthors meta hasHighlighting <- gets stHighlighting let highlightingMacros = if hasHighlighting then case writerHighlightStyle opts of @@ -108,6 +105,10 @@ pandocToMs opts (Pandoc meta blocks) = do Nothing -> return main Just tpl -> renderTemplate' tpl context +escapeStr :: WriterOptions -> String -> String +escapeStr opts = + escapeString (if writerPreferAscii opts then AsciiOnly else AllowUTF8) + escapeUri :: String -> String escapeUri = escapeURIString (\c -> c /= '@' && isAllowedInURI c) @@ -121,11 +122,11 @@ toSmallCaps :: WriterOptions -> String -> String toSmallCaps _ [] = [] toSmallCaps opts (c:cs) | isLower c = let (lowers,rest) = span isLower (c:cs) - in "\\s-2" ++ escString opts (map toUpper lowers) ++ + in "\\s-2" ++ escapeStr opts (map toUpper lowers) ++ "\\s0" ++ toSmallCaps opts rest | isUpper c = let (uppers,rest) = span isUpper (c:cs) - in escString opts uppers ++ toSmallCaps opts rest - | otherwise = escapeString (writerPreferAscii opts) [c] ++ toSmallCaps opts cs + in escapeStr opts uppers ++ toSmallCaps opts rest + | otherwise = escapeStr opts [c] ++ toSmallCaps opts cs -- We split inline lists into sentences, and print one sentence per -- line. groff/troff treats the line-ending period differently. @@ -162,7 +163,7 @@ blockToMs opts (Para [Image attr alt (src,_tit)]) _ -> empty capt <- inlineListToMs' opts alt return $ nowrap (text ".PSPIC -C " <> - doubleQuotes (text (escString opts src)) <> + doubleQuotes (text (escapeStr opts src)) <> sizeAttrs) $$ text ".ce 1000" $$ capt $$ @@ -200,7 +201,7 @@ blockToMs opts (Header level (ident,classes,_) inlines) = do (if null secnum then "" else " ") ++ - escString opts (stringify inlines)) + escapeStr opts (stringify inlines)) let backlink = nowrap (text ".pdfhref L -D " <> doubleQuotes (text (toAscii ident)) <> space <> text "\\") <> cr <> text " -- " @@ -409,7 +410,7 @@ inlineToMs opts (Str str) = do smallcaps <- gets stSmallCaps if smallcaps then return $ shim <> text (toSmallCaps opts str) - else return $ shim <> text (escString opts str) + else return $ shim <> text (escapeStr opts str) inlineToMs opts (Math InlineMath str) = do modify $ \st -> st{ stHasInlineMath = True } res <- convertMath writeEqn InlineMath str @@ -453,7 +454,7 @@ inlineToMs opts (Link _ txt (src, _)) = do text " -- " <> doubleQuotes (nowrap contents) <> cr <> text "\\&" inlineToMs opts (Image _ alternate (_, _)) = return $ char '[' <> text "IMAGE: " <> - text (escString opts (stringify alternate)) + text (escapeStr opts (stringify alternate)) <> char ']' inlineToMs _ (Note contents) = do modify $ \st -> st{ stNotes = contents : stNotes st } @@ -540,15 +541,14 @@ msFormatter opts _fmtopts = where fmtLine = hcat . map fmtToken fmtToken (toktype, tok) = text "\\*" <> brackets (text (show toktype) <> text " \"" - <> text (escapeCode (writerPreferAscii opts) - (T.unpack tok)) <> text "\"") + <> text (escapeStr opts (T.unpack tok)) <> text "\"") highlightCode :: PandocMonad m => WriterOptions -> Attr -> String -> MS m Doc highlightCode opts attr str = case highlight (writerSyntaxMap opts) (msFormatter opts) attr str of Left msg -> do unless (null msg) $ report $ CouldNotHighlight msg - return $ text (escapeCode (writerPreferAscii opts) str) + return $ text (escapeStr opts str) Right h -> do modify (\st -> st{ stHighlighting = True }) return h diff --git a/test/command/4550.md b/test/command/4550.md index bf3afce5b..45ed21a00 100644 --- a/test/command/4550.md +++ b/test/command/4550.md @@ -3,5 +3,5 @@ A ‘simple’ example ^D .LP -A ‘simple’ example +A \[oq]simple\[cq] example ``` diff --git a/test/tables.ms b/test/tables.ms index 6d9138c64..90662aaad 100644 --- a/test/tables.ms +++ b/test/tables.ms @@ -135,7 +135,7 @@ T} .LP Multiline table with caption: .PP -Here’s the caption. It may span multiple lines. +Here\[cq]s the caption. It may span multiple lines. .TS delim(@@) tab( ); cw(10.5n) lw(9.6n) rw(11.4n) lw(24.5n). @@ -165,7 +165,7 @@ row T} T{ 5.0 T} T{ -Here’s another one. +Here\[cq]s another one. Note the blank line between rows. T} .TE @@ -201,7 +201,7 @@ row T} T{ 5.0 T} T{ -Here’s another one. +Here\[cq]s another one. Note the blank line between rows. T} .TE @@ -261,7 +261,7 @@ row T} T{ 5.0 T} T{ -Here’s another one. +Here\[cq]s another one. Note the blank line between rows. T} .TE diff --git a/test/writer.man b/test/writer.man index 4a05b74fd..4dab58c21 100644 --- a/test/writer.man +++ b/test/writer.man @@ -308,7 +308,7 @@ Nested. .PP Should not be a list item: .PP -M.A.\~2007 +M.A.\ 2007 .PP B. Williams @@ -492,9 +492,9 @@ This is code: \f[C]>\f[R], \f[C]$\f[R], \f[C]\[rs]\f[R], \f[C]\[rs]$\f[R], .PP [STRIKEOUT:This is \f[I]strikeout\f[R].] .PP -Superscripts: a^bc^d a^\f[I]hello\f[R]^ a^hello\~there^. +Superscripts: a^bc^d a^\f[I]hello\f[R]^ a^hello\ there^. .PP -Subscripts: H~2~O, H~23~O, H~many\~of\~them~O. +Subscripts: H~2~O, H~23~O, H~many\ of\ them~O. .PP These should not be superscripts or subscripts, because of the unescaped spaces: a\[ha]b c\[ha]d, a\[ti]b c\[ti]d. diff --git a/test/writer.ms b/test/writer.ms index 40ecddb27..0cb60a3bc 100644 --- a/test/writer.ms +++ b/test/writer.ms @@ -75,7 +75,7 @@ July 17, 2006 .1C .LP This is a set of tests for pandoc. -Most of them are adapted from John Gruber’s markdown test suite. +Most of them are adapted from John Gruber\[cq]s markdown test suite. .HLINE .SH 1 Headers @@ -126,7 +126,7 @@ Paragraphs .pdfhref O 1 "Paragraphs" .pdfhref M "paragraphs" .LP -Here’s a regular paragraph. +Here\[cq]s a regular paragraph. .PP In Markdown 1.0.0 and earlier. Version 8. @@ -134,7 +134,7 @@ This line turns into a list item. Because a hard\-wrapped line in the middle of a paragraph looked like a list item. .PP -Here’s one with a bullet. +Here\[cq]s one with a bullet. * criminey. .PP There should be a hard line break @@ -314,7 +314,7 @@ Item 1, graf one. .PP Item 1. graf two. -The quick brown fox jumped over the lazy dog’s back. +The quick brown fox jumped over the lazy dog\[cq]s back. .RE .IP " 2." 4 Item 2. @@ -335,7 +335,7 @@ Tab .RE .RE .LP -Here’s another: +Here\[cq]s another: .IP " 1." 4 First .IP " 2." 4 @@ -431,7 +431,7 @@ Nested. .LP Should not be a list item: .PP -M.A.\~2007 +M.A.\ 2007 .PP B. Williams @@ -570,7 +570,7 @@ Interpreted markdown in a table: This is \f[I]emphasized\f[R] And this is \f[B]strong\f[R] .PP -Here’s a simple block: +Here\[cq]s a simple block: .LP foo .LP @@ -617,7 +617,7 @@ Code: \f[] .fi .LP -Hr’s: +Hr\[cq]s: .HLINE .SH 1 Inline Markup @@ -646,9 +646,9 @@ This is code: \f[C]>\f[R], \f[C]$\f[R], \f[C]\[rs]\f[R], \f[C]\[rs]$\f[R], .PP \m[strikecolor]This is \f[I]strikeout\f[R].\m[] .PP -Superscripts: a\*{bc\*}d a\*{\f[I]hello\f[R]\*} a\*{hello\~there\*}. +Superscripts: a\*{bc\*}d a\*{\f[I]hello\f[R]\*} a\*{hello\ there\*}. .PP -Subscripts: H\*<2\*>O, H\*<23\*>O, H\*<many\~of\~them\*>O. +Subscripts: H\*<2\*>O, H\*<23\*>O, H\*<many\ of\ them\*>O. .PP These should not be superscripts or subscripts, because of the unescaped spaces: a\[ha]b c\[ha]d, a\[ti]b c\[ti]d. @@ -666,16 +666,16 @@ Smart quotes, ellipses, dashes `Oak,' `elm,' and `beech' are names of trees. So is `pine.' .PP -`He said, \[lq]I want to go.\[rq]' Were you alive in the 70’s? +`He said, \[lq]I want to go.\[rq]' Were you alive in the 70\[cq]s? .PP Here is some quoted `\f[C]code\f[R]' and a \[lq]\c .pdfhref W -D "http://example.com/?foo=1&bar=2" -A "\c" \ -- "quoted link" \&\[rq]. .PP -Some dashes: one—two — three—four — five. +Some dashes: one\[em]two \[em] three\[em]four \[em] five. .PP -Dashes between numbers: 5–7, 255–66, 1987–1999. +Dashes between numbers: 5\[en]7, 255\[en]66, 1987\[en]1999. .PP Ellipses\&...and\&...and\&.... .HLINE @@ -695,14 +695,14 @@ LaTeX .IP \[bu] 3 @p@\-Tree .IP \[bu] 3 -Here’s some display math: +Here\[cq]s some display math: .EQ d over {d x} f ( x ) = lim sub {h -> 0} {f ( x + h ) \[u2212] f ( x )} over h .EN .IP \[bu] 3 -Here’s one that has a line break in it: @alpha + omega times x sup 2@. +Here\[cq]s one that has a line break in it: @alpha + omega times x sup 2@. .LP -These shouldn’t be math: +These shouldn\[cq]t be math: .IP \[bu] 3 To get the famous equation, write \f[C]$e = mc\[ha]2$\f[R]. .IP \[bu] 3 @@ -714,7 +714,7 @@ Shoes ($20) and socks ($5). .IP \[bu] 3 Escaped \f[C]$\f[R]: $73 \f[I]this should be emphasized\f[R] 23$. .LP -Here’s a LaTeX table: +Here\[cq]s a LaTeX table: .HLINE .SH 1 Special Characters @@ -885,22 +885,22 @@ With ampersands .pdfhref O 2 "With ampersands" .pdfhref M "with-ampersands" .LP -Here’s a \c +Here\[cq]s a \c .pdfhref W -D "http://example.com/?foo=1&bar=2" -A "\c" \ -- "link with an ampersand in the URL" \&. .PP -Here’s a link with an amersand in the link text: \c +Here\[cq]s a link with an amersand in the link text: \c .pdfhref W -D "http://att.com/" -A "\c" \ -- "AT&T" \&. .PP -Here’s an \c +Here\[cq]s an \c .pdfhref W -D "/script?foo=1&bar=2" -A "\c" \ -- "inline link" \&. .PP -Here’s an \c +Here\[cq]s an \c .pdfhref W -D "/script?foo=1&bar=2" -A "\c" \ -- "inline link in pointy braces" \&. @@ -925,7 +925,7 @@ It should. .LP An e\-mail address: \c .pdfhref W -D "mailto:nobody%40nowhere.net" -A "\c" \ - -- "nobody\[at]nowhere.net" + -- "nobody@nowhere.net" \& .RS .LP @@ -967,7 +967,7 @@ It need not be placed at the end of the document. .FE and another.\** .FS -Here’s the long note. +Here\[cq]s the long note. This one contains multiple blocks. .PP Subsequent blocks are indented to show that they belong to the footnote (as |