From f48960b75fe2d7ae532ec17a060109c92b893f57 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 17 Oct 2018 17:23:14 -0700 Subject: Move common groff functions to Text.Pandoc.Writers.Groff (unexported module). These are used in both the man and ms writers. Moved groffEscape out of Text.Pandoc.Writers.Shared [cancels earlier API change from adding it, which was after last release]. This fixes strong/code combination on man (should be `\f[CB]` not `\f[BC]`), mentioned in #4973. Updated tests. Closes #4975. --- src/Text/Pandoc/Writers/Groff.hs | 149 ++++++++++++++++++++++++++++++++++++++ src/Text/Pandoc/Writers/Man.hs | 58 +-------------- src/Text/Pandoc/Writers/Ms.hs | 88 +--------------------- src/Text/Pandoc/Writers/Shared.hs | 11 +-- 4 files changed, 155 insertions(+), 151 deletions(-) create mode 100644 src/Text/Pandoc/Writers/Groff.hs (limited to 'src/Text/Pandoc/Writers') diff --git a/src/Text/Pandoc/Writers/Groff.hs b/src/Text/Pandoc/Writers/Groff.hs new file mode 100644 index 000000000..aac76060e --- /dev/null +++ b/src/Text/Pandoc/Writers/Groff.hs @@ -0,0 +1,149 @@ +{-# LANGUAGE NoImplicitPrelude #-} +{- +Copyright (C) 2007-2018 John MacFarlane + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Writers.Groff + Copyright : Copyright (C) 2007-2018 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane + Stability : alpha + Portability : portable + +Common functions for groff writers (man, ms). +-} + +module Text.Pandoc.Writers.Groff ( + WriterState(..) + , defaultWriterState + , MS + , Note + , escapeChar + , escapeString + , escapeCode + , groffEscape + , withFontFeature + ) where +import Prelude +import qualified Data.Text as T +import Data.Char (isAscii, ord) +import Control.Monad.State.Strict +import Data.List (intercalate) +import qualified Data.Map as Map +import Data.Maybe (fromMaybe) +import Text.Pandoc.Class (PandocMonad) +import Text.Pandoc.Definition +import Text.Pandoc.Pretty +import Text.Printf (printf) + +data WriterState = WriterState { stHasInlineMath :: Bool + , stFirstPara :: Bool + , stNotes :: [Note] + , stSmallCaps :: Bool + , stHighlighting :: Bool + , stInHeader :: Bool + , stFontFeatures :: Map.Map Char Bool + , stHasTables :: Bool + } + +defaultWriterState :: WriterState +defaultWriterState = WriterState{ stHasInlineMath = False + , stFirstPara = True + , stNotes = [] + , stSmallCaps = False + , stHighlighting = False + , stInHeader = False + , stFontFeatures = Map.fromList [ + ('I',False) + , ('B',False) + , ('C',False) + ] + , stHasTables = False + } + +type Note = [Block] + +type MS = StateT WriterState + +-- | Association list of characters to escape. +groffEscapes :: Map.Map Char String +groffEscapes = Map.fromList + [ ('\160', "\\~") + , ('\'', "\\[aq]") + , ('`', "\\`") + , ('"', "\\[dq]") + , ('\x201C', "\\[lq]") + , ('\x201D', "\\[rq]") + , ('\x2018', "\\[oq]") + , ('\x2019', "\\[cq]") + , ('\x2014', "\\[em]") + , ('\x2013', "\\[en]") + , ('\x2026', "\\&...") + , ('~', "\\[ti]") + , ('^', "\\[ha]") + , ('@', "\\@") + , ('\\', "\\\\") + ] + +escapeChar :: Char -> String +escapeChar c = fromMaybe [c] (Map.lookup c groffEscapes) + +-- | Escape special characters for groff. +escapeString :: String -> String +escapeString = concatMap escapeChar + +-- | Escape a literal (code) section for groff. +escapeCode :: String -> String +escapeCode = intercalate "\n" . map escapeLine . lines + where escapeCodeChar ' ' = "\\ " + escapeCodeChar '\t' = "\\\t" + escapeCodeChar c = escapeChar c + escapeLine codeline = + case concatMap escapeCodeChar codeline of + a@('.':_) -> "\\&" ++ a + b -> b + +-- | Escape non-ASCII characters using groff \u[..] sequences. +groffEscape :: T.Text -> T.Text +groffEscape = T.concatMap toUchar + where toUchar c + | isAscii c = T.singleton c + | otherwise = T.pack $ printf "\\[u%04X]" (ord c) + +fontChange :: PandocMonad m => MS m Doc +fontChange = do + features <- gets stFontFeatures + inHeader <- gets stInHeader + let filling = ['C' | fromMaybe False $ Map.lookup 'C' features] ++ + ['B' | inHeader || + fromMaybe False (Map.lookup 'B' features)] ++ + ['I' | fromMaybe False $ Map.lookup 'I' features] + return $ + if null filling + then text "\\f[R]" + else text $ "\\f[" ++ filling ++ "]" + +withFontFeature :: PandocMonad m => Char -> MS m Doc -> MS m Doc +withFontFeature c action = do + modify $ \st -> st{ stFontFeatures = Map.adjust not c $ stFontFeatures st } + begin <- fontChange + d <- action + modify $ \st -> st{ stFontFeatures = Map.adjust not c $ stFontFeatures st } + end <- fontChange + return $ begin <> d <> end diff --git a/src/Text/Pandoc/Writers/Man.hs b/src/Text/Pandoc/Writers/Man.hs index 247666b33..65aec81b3 100644 --- a/src/Text/Pandoc/Writers/Man.hs +++ b/src/Text/Pandoc/Writers/Man.hs @@ -33,8 +33,7 @@ Conversion of 'Pandoc' documents to groff man page format. module Text.Pandoc.Writers.Man ( writeMan) where import Prelude import Control.Monad.State.Strict -import Data.List (intercalate, intersperse, sort, stripPrefix) -import qualified Data.Map as Map +import Data.List (intersperse, stripPrefix) import Data.Maybe (fromMaybe) import Data.Text (Text) import qualified Data.Text as T @@ -48,22 +47,9 @@ import Text.Pandoc.Shared import Text.Pandoc.Templates import Text.Pandoc.Writers.Math import Text.Pandoc.Writers.Shared +import Text.Pandoc.Writers.Groff import Text.Printf (printf) -type Notes = [[Block]] -data WriterState = WriterState { stNotes :: Notes - , stFontFeatures :: Map.Map Char Bool - , stHasTables :: Bool } - -defaultWriterState :: WriterState -defaultWriterState = WriterState { stNotes = [] - , stFontFeatures = Map.fromList [ - ('I',False) - , ('B',False) - , ('C',False) - ] - , stHasTables = False } - -- | Convert Pandoc to Man. writeMan :: PandocMonad m => WriterOptions -> Pandoc -> m Text writeMan opts document = @@ -127,28 +113,6 @@ noteToMan opts num note = do let marker = cr <> text ".SS " <> brackets (text (show num)) return $ marker $$ contents --- | Association list of characters to escape. -manEscapes :: [(Char, String)] -manEscapes = [ ('\160', "\\ ") - , ('\'', "\\[aq]") - , ('’', "'") - , ('\x2014', "\\[em]") - , ('\x2013', "\\[en]") - , ('\x2026', "\\&...") - ] ++ backslashEscapes "-@\\" - --- | Escape special characters for Man. -escapeString :: String -> String -escapeString = escapeStringUsing manEscapes - --- | Escape a literal (code) section for Man. -escapeCode :: String -> String -escapeCode = intercalate "\n" . map escapeLine . lines where - escapeLine codeline = - case escapeStringUsing (manEscapes ++ backslashEscapes "\t ") codeline of - a@('.':_) -> "\\&" ++ a - b -> b - -- We split inline lists into sentences, and print one sentence per -- line. groff/troff treats the line-ending period differently. -- See http://code.google.com/p/pandoc/issues/detail?id=148. @@ -373,21 +337,3 @@ inlineToMan _ (Note contents) = do notes <- gets stNotes let ref = show (length notes) return $ char '[' <> text ref <> char ']' - -fontChange :: PandocMonad m => StateT WriterState m Doc -fontChange = do - features <- gets stFontFeatures - let filling = sort [c | (c,True) <- Map.toList features] - return $ text $ "\\f[" ++ (if null filling then "R" else filling) ++ "]" - -withFontFeature :: PandocMonad m - => Char - -> StateT WriterState m Doc - -> StateT WriterState m Doc -withFontFeature c action = do - modify $ \st -> st{ stFontFeatures = Map.adjust not c $ stFontFeatures st } - begin <- fontChange - d <- action - modify $ \st -> st{ stFontFeatures = Map.adjust not c $ stFontFeatures st } - end <- fontChange - return $ begin <> d <> end diff --git a/src/Text/Pandoc/Writers/Ms.hs b/src/Text/Pandoc/Writers/Ms.hs index 9a35a9693..cdca24702 100644 --- a/src/Text/Pandoc/Writers/Ms.hs +++ b/src/Text/Pandoc/Writers/Ms.hs @@ -60,36 +60,10 @@ import Text.Pandoc.Shared import Text.Pandoc.Templates import Text.Pandoc.Writers.Math import Text.Pandoc.Writers.Shared +import Text.Pandoc.Writers.Groff import Text.Printf (printf) import Text.TeXMath (writeEqn) -data WriterState = WriterState { stHasInlineMath :: Bool - , stFirstPara :: Bool - , stNotes :: [Note] - , stSmallCaps :: Bool - , stHighlighting :: Bool - , stInHeader :: Bool - , stFontFeatures :: Map.Map Char Bool - } - -defaultWriterState :: WriterState -defaultWriterState = WriterState{ stHasInlineMath = False - , stFirstPara = True - , stNotes = [] - , stSmallCaps = False - , stHighlighting = False - , stInHeader = False - , stFontFeatures = Map.fromList [ - ('I',False) - , ('B',False) - , ('C',False) - ] - } - -type Note = [Block] - -type MS = StateT WriterState - -- | Convert Pandoc to Ms. writeMs :: PandocMonad m => WriterOptions -> Pandoc -> m Text writeMs opts document = @@ -132,24 +106,8 @@ pandocToMs opts (Pandoc meta blocks) = do Nothing -> return main Just tpl -> renderTemplate' tpl context --- | Association list of characters to escape. -msEscapes :: Map.Map Char String -msEscapes = Map.fromList - [ ('\160', "\\~") - , ('\'', "\\[aq]") - , ('`', "\\`") - , ('"', "\\[dq]") - , ('\x2014', "\\[em]") - , ('\x2013', "\\[en]") - , ('\x2026', "\\&...") - , ('~', "\\[ti]") - , ('^', "\\[ha]") - , ('@', "\\@") - , ('\\', "\\\\") - ] - -escapeChar :: Char -> String -escapeChar c = fromMaybe [c] (Map.lookup c msEscapes) +escapeUri :: String -> String +escapeUri = escapeURIString (\c -> c /= '@' && isAllowedInURI c) -- | Escape | character, used to mark inline math, inside math. escapeBar :: String -> String @@ -157,13 +115,6 @@ escapeBar = concatMap go where go '|' = "\\[u007C]" go c = [c] --- | Escape special characters for Ms. -escapeString :: String -> String -escapeString = concatMap escapeChar - -escapeUri :: String -> String -escapeUri = escapeURIString (\c -> c /= '@' && isAllowedInURI c) - toSmallCaps :: String -> String toSmallCaps [] = [] toSmallCaps (c:cs) @@ -174,17 +125,6 @@ toSmallCaps (c:cs) in escapeString uppers ++ toSmallCaps rest | otherwise = escapeChar c ++ toSmallCaps cs --- | Escape a literal (code) section for Ms. -escapeCode :: String -> String -escapeCode = intercalate "\n" . map escapeLine . lines - where escapeCodeChar ' ' = "\\ " - escapeCodeChar '\t' = "\\\t" - escapeCodeChar c = escapeChar c - escapeLine codeline = - case concatMap escapeCodeChar codeline of - a@('.':_) -> "\\&" ++ a - b -> b - -- We split inline lists into sentences, and print one sentence per -- line. groff/troff treats the line-ending period differently. -- See http://code.google.com/p/pandoc/issues/detail?id=148. @@ -535,28 +475,6 @@ handleNote opts bs = do contents <- blockListToMs opts bs' return $ cr <> text ".FS" $$ contents $$ text ".FE" <> cr -fontChange :: PandocMonad m => MS m Doc -fontChange = do - features <- gets stFontFeatures - inHeader <- gets stInHeader - let filling = ['C' | fromMaybe False $ Map.lookup 'C' features] ++ - ['B' | inHeader || - fromMaybe False (Map.lookup 'B' features)] ++ - ['I' | fromMaybe False $ Map.lookup 'I' features] - return $ - if null filling - then text "\\f[R]" - else text $ "\\f[" ++ filling ++ "]" - -withFontFeature :: PandocMonad m => Char -> MS m Doc -> MS m Doc -withFontFeature c action = do - modify $ \st -> st{ stFontFeatures = Map.adjust not c $ stFontFeatures st } - begin <- fontChange - d <- action - modify $ \st -> st{ stFontFeatures = Map.adjust not c $ stFontFeatures st } - end <- fontChange - return $ begin <> d <> end - setFirstPara :: PandocMonad m => MS m () setFirstPara = modify $ \st -> st{ stFirstPara = True } diff --git a/src/Text/Pandoc/Writers/Shared.hs b/src/Text/Pandoc/Writers/Shared.hs index a7bf30aaa..09e45df90 100644 --- a/src/Text/Pandoc/Writers/Shared.hs +++ b/src/Text/Pandoc/Writers/Shared.hs @@ -47,7 +47,6 @@ module Text.Pandoc.Writers.Shared ( , lookupMetaInlines , lookupMetaString , stripLeadingTrailingSpace - , groffEscape , toSubscript , toSuperscript ) @@ -56,7 +55,7 @@ import Prelude import Control.Monad (zipWithM) import Data.Aeson (FromJSON (..), Result (..), ToJSON (..), Value (Object), encode, fromJSON) -import Data.Char (chr, ord, isAscii, isSpace) +import Data.Char (chr, ord, isSpace) import qualified Data.HashMap.Strict as H import Data.List (groupBy, intersperse, transpose) import qualified Data.Map as M @@ -70,7 +69,6 @@ import Text.Pandoc.Pretty import Text.Pandoc.Shared (stringify) import Text.Pandoc.UTF8 (toStringLazy) import Text.Pandoc.XML (escapeStringForXML) -import Text.Printf (printf) -- | Create JSON value for template from a 'Meta' and an association list -- of variables, specified at the command line or in the writer. @@ -387,13 +385,6 @@ lookupMetaString key meta = Just (MetaBool b) -> show b _ -> "" --- | Escape non-ASCII characters using groff \u[..] sequences. -groffEscape :: T.Text -> T.Text -groffEscape = T.concatMap toUchar - where toUchar c - | isAscii c = T.singleton c - | otherwise = T.pack $ printf "\\[u%04X]" (ord c) - toSuperscript :: Char -> Maybe Char toSuperscript '1' = Just '\x00B9' -- cgit v1.2.3