From e0f985bb2139f142223f8d21e28a3a6bf4605cb7 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 26 Oct 2018 21:28:38 -0700 Subject: Rename Groff -> Roff. Module T.P.Readers.Groff -> T.P.Readers.Roff Module T.P.Writers.Groff -> T.P.Writers.Roff Module T.P.GroffChar -> T.P.RoffChar GroffTokens -> RoffTokens GroffToken -> RoffToken. --- src/Text/Pandoc/GroffChar.hs | 441 -------------------------- src/Text/Pandoc/Readers/Groff.hs | 650 --------------------------------------- src/Text/Pandoc/Readers/Man.hs | 38 +-- src/Text/Pandoc/Readers/Roff.hs | 650 +++++++++++++++++++++++++++++++++++++++ src/Text/Pandoc/RoffChar.hs | 441 ++++++++++++++++++++++++++ src/Text/Pandoc/Writers/Groff.hs | 138 --------- src/Text/Pandoc/Writers/Man.hs | 2 +- src/Text/Pandoc/Writers/Ms.hs | 2 +- src/Text/Pandoc/Writers/Roff.hs | 138 +++++++++ 9 files changed, 1250 insertions(+), 1250 deletions(-) delete mode 100644 src/Text/Pandoc/GroffChar.hs delete mode 100644 src/Text/Pandoc/Readers/Groff.hs create mode 100644 src/Text/Pandoc/Readers/Roff.hs create mode 100644 src/Text/Pandoc/RoffChar.hs delete mode 100644 src/Text/Pandoc/Writers/Groff.hs create mode 100644 src/Text/Pandoc/Writers/Roff.hs (limited to 'src/Text/Pandoc') diff --git a/src/Text/Pandoc/GroffChar.hs b/src/Text/Pandoc/GroffChar.hs deleted file mode 100644 index 3f78939c4..000000000 --- a/src/Text/Pandoc/GroffChar.hs +++ /dev/null @@ -1,441 +0,0 @@ -{-# LANGUAGE NoImplicitPrelude #-} -{-# LANGUAGE OverloadedStrings #-} -{- -Copyright (C) 2018 John MacFarlane - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.GroffChar - Copyright : Copyright (C) 2007-2018 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane - Stability : alpha - Portability : portable - -Groff character escaping/unescaping. --} - -module Text.Pandoc.GroffChar ( - standardEscapes - , characterCodes - , combiningAccents - ) where -import Prelude - --- | These are the escapes specifically mentioned in groff_man(7), --- plus @ and ellipsis. -standardEscapes :: [(Char, String)] -standardEscapes = - [ ('\160', "\\ ") - , ('\'', "\\[aq]") - , ('‘', "\\[oq]") - , ('’', "\\[cq]") - , ('"', "\\[dq]") - , ('“', "\\[lq]") - , ('”', "\\[rq]") - , ('—', "\\[em]") - , ('–', "\\[en]") - , ('`', "\\[ga]") - , ('^', "\\[ha]") - , ('~', "\\[ti]") - , ('-', "\\-") -- minus; - will be interpreted as hyphen U+2010 - , ('\\', "\\[rs]") - , ('@', "\\[at]") -- because we use @ as a table and math delimiter - , ('\x2026', "\\&...") -- because u2026 doesn't render on tty - ] - -characterCodes :: [(Char, String)] -characterCodes = - [ ('Ð', "-D") - , ('ð', "Sd") - , ('Þ', "TP") - , ('þ', "Tp") - , ('ß', "ss") - , ('ff', "ff") - , ('fi', "fi") - , ('fl', "fl") - , ('ffi', "Fi") - , ('ffl', "Fl") - , ('Ł', "/L") - , ('ł', "/l") - , ('Ø', "/O") - , ('ø', "/o") - , ('Æ', "AE") - , ('æ', "ae") - , ('Œ', "OE") - , ('œ', "oe") - , ('IJ', "IJ") - , ('ij', "ij") - , ('ı', ".i") - , ('ȷ', ".j") - , ('Á', "'A") - , ('Ć', "'C") - , ('É', "'E") - , ('Í', "'I") - , ('Ó', "'O") - , ('Ú', "'U") - , ('Ý', "'Y") - , ('á', "'a") - , ('ć', "'c") - , ('é', "'e") - , ('í', "'i") - , ('ó', "'o") - , ('ú', "'u") - , ('ý', "'y") - , ('Ä', ":A") - , ('Ë', ":E") - , ('Ï', ":I") - , ('Ö', ":O") - , ('Ü', ":U") - , ('Ÿ', ":Y") - , ('ä', ":a") - , ('ë', ":e") - , ('ï', ":i") - , ('ö', ":o") - , ('ü', ":u") - , ('ÿ', ":y") - , ('Â', "^A") - , ('Ê', "^E") - , ('Î', "^I") - , ('Ô', "^O") - , ('Û', "^U") - , ('â', "^a") - , ('ê', "^e") - , ('î', "^i") - , ('ô', "^o") - , ('û', "^u") - , ('À', "`A") - , ('È', "`E") - , ('Ì', "`I") - , ('Ò', "`O") - , ('Ù', "`U") - , ('à', "`a") - , ('è', "`e") - , ('ì', "`i") - , ('ò', "`o") - , ('ù', "`u") - , ('Ã', "~A") - , ('Ñ', "~N") - , ('Õ', "~O") - , ('ã', "~a") - , ('ñ', "~n") - , ('õ', "~o") - , ('Š', "vS") - , ('š', "vs") - , ('Ž', "vZ") - , ('ž', "vz") - , ('Ç', ",C") - , ('ç', ",c") - , ('Å', "oA") - , ('å', "oa") - , ('˝', "a\"") - , ('¯', "a-") - , ('˙', "a.") - , ('^', "a^") - , ('´', "aa") - , ('`', "ga") - , ('˘', "ab") - , ('¸', "ac") - , ('¨', "ad") - , ('ˇ', "ah") - , ('˚', "ao") - , ('~', "a~") - , ('˛', "ho") - , ('^', "ha") - , ('~', "ti") - , ('„', "Bq") - , ('‚', "bq") - , ('“', "lq") - , ('”', "rq") - , ('‘', "oq") - , ('’', "cq") - , ('\'', "aq") - , ('"', "dq") - , ('«', "Fo") - , ('»', "Fc") - , ('‹', "fo") - , ('›', "fc") - , ('¡', "r!") - , ('¿', "r?") - , ('—', "em") - , ('–', "en") - , ('‐', "hy") - , ('[', "lB") - , (']', "rB") - , ('{', "lC") - , ('}', "rC") - , ('⟨', "la") - , ('⟩', "ra") - , ('⎪', "bv") - , ('⎪', "braceex") - , ('⎡', "bracketlefttp") - , ('⎣', "bracketleftbt") - , ('⎢', "bracketleftex") - , ('⎤', "bracketrighttp") - , ('⎦', "bracketrightbt") - , ('⎥', "bracketrightex") - , ('╭', "lt") - , ('⎧', "bracelefttp") - , ('┥', "lk") - , ('⎨', "braceleftmid") - , ('╰', "lb") - , ('⎩', "braceleftbt") - , ('⎪', "braceleftex") - , ('╮', "rt") - , ('⎫', "bracerighttp") - , ('┝', "rk") - , ('⎬', "bracerightmid") - , ('╯', "rb") - , ('⎭', "bracerightbt") - , ('⎪', "bracerightex") - , ('⎛', "parenlefttp") - , ('⎝', "parenleftbt") - , ('⎜', "parenleftex") - , ('⎞', "parenrighttp") - , ('⎠', "parenrightbt") - , ('⎟', "parenrightex") - , ('←', "<-") - , ('→', "->") - , ('↔', "<>") - , ('↓', "da") - , ('↑', "ua") - , ('↕', "va") - , ('⇐', "lA") - , ('⇒', "rA") - , ('⇔', "hA") - , ('⇓', "dA") - , ('⇑', "uA") - , ('⇕', "vA") - , ('⎯', "an") - , ('|', "ba") - , ('│', "br") - , ('_', "ul") - , ('‾', "rn") - , ('_', "ru") - , ('¦', "bb") - , ('/', "sl") - , ('\\', "rs") - , ('○', "ci") - , ('·', "bu") - , ('‡', "dd") - , ('†', "dg") - , ('◊', "lz") - , ('□', "sq") - , ('¶', "ps") - , ('§', "sc") - , ('☜', "lh") - , ('☞', "rh") - , ('@', "at") - , ('#', "sh") - , ('↵', "CR") - , ('✓', "OK") - , ('©', "co") - , ('®', "rg") - , ('™', "tm") - , ('$', "Do") - , ('¢', "ct") - , ('€', "eu") - , ('€', "Eu") - , ('¥', "Ye") - , ('£', "Po") - , ('¤', "Cs") - , ('ƒ', "Fn") - , ('°', "de") - , ('‰', "%0") - , ('′', "fm") - , ('″', "sd") - , ('µ', "mc") - , ('ª', "Of") - , ('º', "Om") - , ('∧', "AN") - , ('∨', "OR") - , ('¬', "no") - , ('¬', "tno") - , ('∃', "te") - , ('∀', "fa") - , ('∋', "st") - , ('∴', "3d") - , ('∴', "tf") - , ('|', "or") - , ('½', "12") - , ('¼', "14") - , ('¾', "34") - , ('⅛', "18") - , ('⅜', "38") - , ('⅝', "58") - , ('⅞', "78") - , ('¹', "S1") - , ('²', "S2") - , ('³', "S3") - , ('+', "pl") - , ('−', "mi") - , ('∓', "-+") - , ('±', "+-") - , ('±', "t+-") - , ('·', "pc") - , ('⋅', "md") - , ('×', "mu") - , ('×', "tmu") - , ('⊗', "c*") - , ('⊕', "c+") - , ('÷', "di") - , ('÷', "tdi") - , ('⁄', "f/") - , ('∗', "**") - , ('≤', "<=") - , ('≥', ">=") - , ('≪', "<<") - , ('≫', ">>") - , ('=', "eq") - , ('≠', "!=") - , ('≡', "==") - , ('≢', "ne") - , ('≅', "=~") - , ('≃', "|=") - , ('∼', "ap") - , ('≈', "~~") - , ('≈', "~=") - , ('∝', "pt") - , ('∅', "es") - , ('∈', "mo") - , ('∉', "nm") - , ('⊂', "sb") - , ('⊄', "nb") - , ('⊃', "sp") - , ('⊅', "nc") - , ('⊆', "ib") - , ('⊇', "ip") - , ('∩', "ca") - , ('∪', "cu") - , ('∠', "/_") - , ('⊥', "pp") - , ('∫', "is") - , ('∫', "integral") - , ('∑', "sum") - , ('∏', "product") - , ('∐', "coproduct") - , ('∇', "gr") - , ('√', "sr") - , ('√', "sqrt") - -- , "radicalex" - -- "sqrtex" - , ('⌈', "lc") - , ('⌉', "rc") - , ('⌊', "lf") - , ('⌋', "rf") - , ('∞', "if") - , ('ℵ', "Ah") - , ('ℑ', "Im") - , ('ℜ', "Re") - , ('℘', "wp") - , ('∂', "pd") - , ('ℏ', "-h") - , ('ℏ', "hbar") - , ('Α', "*A") - , ('Β', "*B") - , ('Γ', "*G") - , ('Δ', "*D") - , ('Ε', "*E") - , ('Ζ', "*Z") - , ('Η', "*Y") - , ('Θ', "*H") - , ('Ι', "*I") - , ('Κ', "*K") - , ('Λ', "*L") - , ('Μ', "*M") - , ('Ν', "*N") - , ('Ξ', "*C") - , ('Ο', "*O") - , ('Π', "*P") - , ('Ρ', "*R") - , ('Σ', "*S") - , ('Τ', "*T") - , ('Υ', "*U") - , ('Φ', "*F") - , ('Χ', "*X") - , ('Ψ', "*Q") - , ('Ω', "*W") - , ('α', "*a") - , ('β', "*b") - , ('γ', "*g") - , ('δ', "*d") - , ('ε', "*e") - , ('ζ', "*z") - , ('η', "*y") - , ('θ', "*h") - , ('ι', "*i") - , ('κ', "*k") - , ('λ', "*l") - , ('μ', "*m") - , ('ν', "*n") - , ('ξ', "*c") - , ('ο', "*o") - , ('π', "*p") - , ('ρ', "*r") - , ('ς', "ts") - , ('σ', "*s") - , ('τ', "*t") - , ('υ', "*u") - , ('ϕ', "*f") - , ('χ', "*x") - , ('ψ', "*q") - , ('ω', "*w") - , ('ϑ', "+h") - , ('φ', "+f") - , ('ϖ', "+p") - , ('ϵ', "+e") - , ('♣', "CL") - , ('♠', "SP") - , ('♥', "HE") - , ('♦', "DI") - , ('˝' , "a\"") - , ('¯', "a-") - , ('˙', "a.") - , ('^', "a^") - , ('´', "aa") - , ('`', "ga") - , ('˘', "ab") - , ('¸', "ac") - , ('¨', "ad") - , ('ˇ', "ah") - , ('˚', "ao") - , ('~', "a~") - , ('˛', "ho") - , ('^', "ha") - , ('~', "ti") - ] - --- use like: \\[E a^ aa] -combiningAccents :: [(Char, String)] -combiningAccents = - [ ('\779' , "a\"") - , ('\772', "a-") - , ('\775', "a.") - , ('\770', "a^") - , ('\769', "aa") - , ('\768', "ga") - , ('\774', "ab") - , ('\807', "ac") - , ('\776', "ad") - , ('\780', "ah") - , ('\778', "ao") - , ('\771', "a~") - , ('\808', "ho") - , ('\770', "ha") - , ('\771', "ti") - ] diff --git a/src/Text/Pandoc/Readers/Groff.hs b/src/Text/Pandoc/Readers/Groff.hs deleted file mode 100644 index ed0b3a1ca..000000000 --- a/src/Text/Pandoc/Readers/Groff.hs +++ /dev/null @@ -1,650 +0,0 @@ -{-# LANGUAGE FlexibleContexts #-} -{-# LANGUAGE GeneralizedNewtypeDeriving #-} -{- - Copyright (C) 2018 Yan Pashkovsky - and John MacFarlane - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - --} - -{- | - Module : Text.Pandoc.Readers.Groff - Copyright : Copyright (C) 2018 Yan Pashkovsky and John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : Yan Pashkovsky - Stability : WIP - Portability : portable - -Tokenizer for groff formats (man, ms). --} -module Text.Pandoc.Readers.Groff - ( MacroKind - , FontSpec(..) - , defaultFontSpec - , LinePart(..) - , Arg - , TableOption - , CellFormat(..) - , TableRow - , GroffToken(..) - , GroffTokens(..) - , linePartsToString - , lexGroff - ) -where - -import Prelude -import Safe (lastDef) -import Control.Monad (void, mzero, guard, when) -import Control.Monad.Except (throwError) -import Text.Pandoc.Class - (getResourcePath, readFileFromDirs, PandocMonad(..), report) -import Data.Char (isHexDigit, chr, ord, isAscii, isAlphaNum, isSpace) -import Data.Default (Default) -import qualified Data.Map as M -import Data.List (intercalate, isSuffixOf) -import qualified Data.Text as T -import Text.Pandoc.Logging (LogMessage(..)) -import Text.Pandoc.Options -import Text.Pandoc.Parsing -import Text.Pandoc.Shared (safeRead) -import Text.Parsec hiding (tokenPrim) -import qualified Text.Parsec as Parsec -import Text.Pandoc.GroffChar (characterCodes, combiningAccents) -import qualified Data.Sequence as Seq -import qualified Data.Foldable as Foldable -import qualified Data.Text.Normalize as Normalize - --- import Debug.Trace (traceShowId) - --- --- Data Types --- -data FontSpec = FontSpec{ fontBold :: Bool - , fontItalic :: Bool - , fontMonospace :: Bool - } deriving (Show, Eq, Ord) - -defaultFontSpec :: FontSpec -defaultFontSpec = FontSpec False False False - -type MacroKind = String - -data LinePart = RoffStr String - | Font FontSpec - | FontSize Int - | MacroArg Int - deriving Show - -type Arg = [LinePart] - -type TableOption = (String, String) - -data CellFormat = - CellFormat - { columnType :: Char - , pipePrefix :: Bool - , pipeSuffix :: Bool - , columnSuffixes :: [String] - } deriving (Show, Eq, Ord) - -type TableRow = ([CellFormat], [GroffTokens]) - --- TODO parse tables (see man tbl) -data GroffToken = MLine [LinePart] - | MEmptyLine - | MMacro MacroKind [Arg] SourcePos - | MTable [TableOption] [TableRow] SourcePos - deriving Show - -newtype GroffTokens = GroffTokens { unGroffTokens :: Seq.Seq GroffToken } - deriving (Show, Semigroup, Monoid) - -singleTok :: GroffToken -> GroffTokens -singleTok t = GroffTokens (Seq.singleton t) - -data RoffState = RoffState { customMacros :: M.Map String GroffTokens - , prevFont :: FontSpec - , currentFont :: FontSpec - , tableTabChar :: Char - } deriving Show - -instance Default RoffState where - def = RoffState { customMacros = M.fromList - $ map (\(n, s) -> - (n, singleTok - (MLine [RoffStr s]))) - [ ("Tm", "\x2122") - , ("lq", "\x201C") - , ("rq", "\x201D") - , ("R", "\x00AE") ] - , prevFont = defaultFontSpec - , currentFont = defaultFontSpec - , tableTabChar = '\t' - } - -type GroffLexer m = ParserT [Char] RoffState m - --- --- Lexer: String -> GroffToken --- - -eofline :: Stream s m Char => ParsecT s u m () -eofline = void newline <|> eof - -spacetab :: Stream s m Char => ParsecT s u m Char -spacetab = char ' ' <|> char '\t' - -characterCodeMap :: M.Map String Char -characterCodeMap = - M.fromList $ map (\(x,y) -> (y,x)) characterCodes - -combiningAccentsMap :: M.Map String Char -combiningAccentsMap = - M.fromList $ map (\(x,y) -> (y,x)) combiningAccents - -escape :: PandocMonad m => GroffLexer m [LinePart] -escape = do - char '\\' - c <- anyChar - case c of - 'f' -> escFont - 's' -> escFontSize - '*' -> escStar - '(' -> twoCharGlyph - '[' -> bracketedGlyph - '"' -> mempty <$ skipMany (satisfy (/='\n')) -- line comment - '#' -> mempty <$ manyTill anyChar newline - '%' -> return mempty - '{' -> return mempty - '}' -> return mempty - '&' -> return mempty - '\n' -> return mempty - ':' -> return mempty - '0' -> return mempty - 'c' -> return mempty - '-' -> return [RoffStr "-"] - '_' -> return [RoffStr "_"] - ' ' -> return [RoffStr " "] - '\\' -> return [RoffStr "\\"] - 't' -> return [RoffStr "\t"] - 'e' -> return [RoffStr "\\"] - '`' -> return [RoffStr "`"] - '^' -> return [RoffStr " "] - '|' -> return [RoffStr " "] - '\'' -> return [RoffStr "`"] - '.' -> return [RoffStr "`"] - '~' -> return [RoffStr "\160"] -- nonbreaking space - _ -> escUnknown ['\\',c] - - where - - twoCharGlyph = do - cs <- count 2 anyChar - case M.lookup cs characterCodeMap of - Just c -> return [RoffStr [c]] - Nothing -> escUnknown ('\\':'(':cs) - - bracketedGlyph = unicodeGlyph <|> charGlyph - - charGlyph = do - cs <- manyTill (noneOf ['[',']','\n']) (char ']') - (case words cs of - [] -> mzero - [s] -> case M.lookup s characterCodeMap of - Nothing -> mzero - Just c -> return [RoffStr [c]] - (s:ss) -> do - basechar <- case M.lookup cs characterCodeMap of - Nothing -> - case s of - [ch] | isAscii ch && isAlphaNum ch -> - return ch - _ -> mzero - Just c -> return c - let addAccents [] xs = return $ T.unpack . - Normalize.normalize Normalize.NFC . - T.pack $ reverse xs - addAccents (a:as) xs = - case M.lookup a combiningAccentsMap of - Just x -> addAccents as (x:xs) - Nothing -> mzero - addAccents ss [basechar] >>= \xs -> return [RoffStr xs]) - <|> escUnknown ("\\[" ++ cs ++ "]") - - unicodeGlyph = try $ do - xs <- ucharCode `sepBy1` (char '_') <* char ']' - return [RoffStr xs] - - ucharCode = try $ do - char 'u' - cs <- many1 (satisfy isHexDigit) - let lcs = length cs - guard $ lcs >= 4 && lcs <= 6 - case chr <$> safeRead ('0':'x':cs) of - Nothing -> mzero - Just c -> return c - - escUnknown :: PandocMonad m => String -> GroffLexer m [LinePart] - escUnknown s = do - pos <- getPosition - report $ SkippedContent ("Unknown escape sequence " ++ s) pos - return [RoffStr "\xFFFD"] - --- \s-1 \s0 -escFontSize :: PandocMonad m => GroffLexer m [LinePart] -escFontSize = do - let sign = option "" $ ("-" <$ char '-' <|> "" <$ char '+') - let toFontSize xs = - case safeRead xs of - Nothing -> mzero - Just n -> return [FontSize n] - choice - [ do char '(' - s <- sign - ds <- count 2 digit - toFontSize (s ++ ds) - , do char '[' - s <- sign - ds <- many1 digit - char ']' - toFontSize (s ++ ds) - , do s <- sign - ds <- count 1 digit - toFontSize (s ++ ds) - ] - -escFont :: PandocMonad m => GroffLexer m [LinePart] -escFont = do - font <- choice - [ char 'S' >> return defaultFontSpec - , digit >> return defaultFontSpec - , char '(' >> anyChar >> anyChar >> return defaultFontSpec - , digit >> return defaultFontSpec - , ($ defaultFontSpec) <$> letterFontKind - , lettersFont - ] - modifyState $ \st -> st{ prevFont = currentFont st - , currentFont = font } - return [Font font] - -lettersFont :: PandocMonad m => GroffLexer m FontSpec -lettersFont = try $ do - char '[' - fs <- many letterFontKind - skipMany letter - char ']' - if null fs - then prevFont <$> getState - else return $ foldr ($) defaultFontSpec fs - -letterFontKind :: PandocMonad m => GroffLexer m (FontSpec -> FontSpec) -letterFontKind = choice [ - oneOf ['B','b'] >> return (\fs -> fs{ fontBold = True }) - , oneOf ['I','i'] >> return (\fs -> fs { fontItalic = True }) - , oneOf ['C','c'] >> return (\fs -> fs { fontMonospace = True }) - , oneOf ['P','p','R','r'] >> return id - ] - - --- separate function from lexMacro since real man files sometimes do not --- follow the rules -lexComment :: PandocMonad m => GroffLexer m GroffTokens -lexComment = do - try $ string ".\\\"" - many Parsec.space - skipMany $ noneOf "\n" - char '\n' - return mempty - -lexMacro :: PandocMonad m => GroffLexer m GroffTokens -lexMacro = do - pos <- getPosition - char '.' <|> char '\'' - skipMany spacetab - macroName <- many (satisfy (not . isSpace)) - case macroName of - "nop" -> return mempty - "ie" -> lexConditional - "if" -> lexConditional - "el" -> skipConditional - "TS" -> lexTable pos - - _ -> do - args <- lexArgs - case macroName of - "" -> return mempty - "\\\"" -> return mempty - "\\#" -> return mempty - "de" -> lexMacroDef args - "de1" -> lexMacroDef args - "ds" -> lexStringDef args - "ds1" -> lexStringDef args - "sp" -> return $ singleTok MEmptyLine - "so" -> lexIncludeFile args - _ -> resolveMacro macroName args pos - -lexTable :: PandocMonad m => SourcePos -> GroffLexer m GroffTokens -lexTable pos = do - skipMany lexComment - spaces - opts <- option [] tableOptions - case lookup "tab" opts of - Just (c:_) -> modifyState $ \st -> st{ tableTabChar = c } - _ -> modifyState $ \st -> st{ tableTabChar = '\t' } - spaces - skipMany lexComment - spaces - rows <- lexTableRows - morerows <- many $ try $ do - string ".T&" - skipMany spacetab - newline - lexTableRows - string ".TE" - skipMany spacetab - eofline - return $ singleTok $ MTable opts (rows ++ concat morerows) pos - -lexTableRows :: PandocMonad m => GroffLexer m [TableRow] -lexTableRows = do - aligns <- tableFormatSpec - spaces - skipMany lexComment - spaces - rows <- many (notFollowedBy (try (string ".TE") <|> try (string ".T&")) >> - tableRow) - return $ zip aligns rows - -tableCell :: PandocMonad m => GroffLexer m GroffTokens -tableCell = (enclosedCell <|> simpleCell) >>= lexGroff . T.pack - where - enclosedCell = do - try (string "T{") - manyTill anyChar (try (string "T}")) - simpleCell = do - tabChar <- tableTabChar <$> getState - many (notFollowedBy (char tabChar <|> newline) >> anyChar) - -tableRow :: PandocMonad m => GroffLexer m [GroffTokens] -tableRow = do - tabChar <- tableTabChar <$> getState - c <- tableCell - cs <- many $ try (char tabChar >> tableCell) - skipMany spacetab - eofline - skipMany lexComment - return (c:cs) - -tableOptions :: PandocMonad m => GroffLexer m [TableOption] -tableOptions = try $ many1 tableOption <* spaces <* char ';' - -tableOption :: PandocMonad m => GroffLexer m TableOption -tableOption = do - k <- many1 letter - v <- option "" $ do - char '(' - manyTill anyChar (char ')') - skipMany spacetab - optional (char ',') - skipMany spacetab - return (k,v) - -tableFormatSpec :: PandocMonad m => GroffLexer m [[CellFormat]] -tableFormatSpec = do - speclines <- tableFormatSpecLine `sepBy1` (newline <|> char ',') - skipMany spacetab - char '.' - return $ speclines ++ repeat (lastDef [] speclines) -- last line is default - -tableFormatSpecLine :: PandocMonad m => GroffLexer m [CellFormat] -tableFormatSpecLine = - many1 $ try $ skipMany spacetab >> tableColFormat - -tableColFormat :: PandocMonad m => GroffLexer m CellFormat -tableColFormat = do - pipePrefix' <- option False - $ True <$ (try $ string "|" <* notFollowedBy spacetab) - c <- oneOf ['a','A','c','C','l','L','n','N','r','R','s','S','^','_','-', - '=','|'] - numsuffixes <- option [] $ (:[]) <$> many1 digit - suffixes <- many $ do - x <- oneOf ['b','B','d','D','e','E','f','F','i','I','m','M', - 'p','P','t','T','u','U','v','V','w','W','x','X', 'z','Z'] - num <- if x == 'w' - then many1 digit <|> - do char '(' - xs <- manyTill anyChar (char ')') - return ("(" ++ xs ++ ")") - else return "" - return $ x : num - pipeSuffix' <- option False $ True <$ string "|" - return $ CellFormat - { columnType = c - , pipePrefix = pipePrefix' - , pipeSuffix = pipeSuffix' - , columnSuffixes = numsuffixes ++ suffixes } - --- We don't fully handle the conditional. But we do --- include everything under '.ie n', which occurs commonly --- in man pages. We always skip the '.el' part. -lexConditional :: PandocMonad m => GroffLexer m GroffTokens -lexConditional = do - skipMany spacetab - lexNCond <|> skipConditional - --- n means nroff mode -lexNCond :: PandocMonad m => GroffLexer m GroffTokens -lexNCond = do - char '\n' - many1 spacetab - lexGroup <|> manToken - -lexGroup :: PandocMonad m => GroffLexer m GroffTokens -lexGroup = do - groupstart - mconcat <$> manyTill manToken groupend - where - groupstart = try $ string "\\{\\" >> newline - groupend = try $ string "\\}" >> eofline - -skipConditional :: PandocMonad m => GroffLexer m GroffTokens -skipConditional = do - rest <- anyLine - when ("\\{\\" `isSuffixOf` rest) $ - void $ manyTill anyChar (try (string "\\}")) - return mempty - -lexIncludeFile :: PandocMonad m => [Arg] -> GroffLexer m GroffTokens -lexIncludeFile args = do - pos <- getPosition - case args of - (f:_) -> do - let fp = linePartsToString f - dirs <- getResourcePath - result <- readFileFromDirs dirs fp - case result of - Nothing -> report $ CouldNotLoadIncludeFile fp pos - Just s -> getInput >>= setInput . (s ++) - return mempty - [] -> return mempty - -resolveMacro :: PandocMonad m - => String -> [Arg] -> SourcePos -> GroffLexer m GroffTokens -resolveMacro macroName args pos = do - macros <- customMacros <$> getState - case M.lookup macroName macros of - Nothing -> return $ singleTok $ MMacro macroName args pos - Just ts -> do - let fillLP (MacroArg i) zs = - case drop (i - 1) args of - [] -> zs - (ys:_) -> ys ++ zs - fillLP z zs = z : zs - let fillMacroArg (MLine lineparts) = - MLine (foldr fillLP [] lineparts) - fillMacroArg x = x - return $ GroffTokens . fmap fillMacroArg . unGroffTokens $ ts - -lexStringDef :: PandocMonad m => [Arg] -> GroffLexer m GroffTokens -lexStringDef args = do -- string definition - case args of - [] -> fail "No argument to .ds" - (x:ys) -> do - let ts = singleTok $ MLine (intercalate [RoffStr " " ] ys) - let stringName = linePartsToString x - modifyState $ \st -> - st{ customMacros = M.insert stringName ts (customMacros st) } - return mempty - -lexMacroDef :: PandocMonad m => [Arg] -> GroffLexer m GroffTokens -lexMacroDef args = do -- macro definition - (macroName, stopMacro) <- - case args of - (x : y : _) -> return (linePartsToString x, linePartsToString y) - -- optional second arg - (x:_) -> return (linePartsToString x, ".") - [] -> fail "No argument to .de" - let stop = try $ do - char '.' <|> char '\'' - skipMany spacetab - string stopMacro - _ <- lexArgs - return () - ts <- mconcat <$> manyTill manToken stop - modifyState $ \st -> - st{ customMacros = M.insert macroName ts (customMacros st) } - return mempty - -lexArgs :: PandocMonad m => GroffLexer m [Arg] -lexArgs = do - args <- many $ try oneArg - skipMany spacetab - eofline - return args - - where - - oneArg :: PandocMonad m => GroffLexer m [LinePart] - oneArg = do - skipMany $ try $ string "\\\n" -- continuation line - try quotedArg <|> plainArg - -- try, because there are some erroneous files, e.g. linux/bpf.2 - - plainArg :: PandocMonad m => GroffLexer m [LinePart] - plainArg = do - skipMany spacetab - mconcat <$> many1 (macroArg <|> escape <|> regularText <|> unescapedQuote) - where - unescapedQuote = char '"' >> return [RoffStr "\""] - - quotedArg :: PandocMonad m => GroffLexer m [LinePart] - quotedArg = do - skipMany spacetab - char '"' - xs <- mconcat <$> - many (macroArg <|> escape <|> regularText - <|> spaceTabChar <|> escapedQuote) - char '"' - return xs - where - escapedQuote = try $ do - char '"' - char '"' - return [RoffStr "\""] - -escStar :: PandocMonad m => GroffLexer m [LinePart] -escStar = try $ do - pos <- getPosition - c <- anyChar - case c of - '(' -> do - cs <- count 2 anyChar - resolveString cs pos - '[' -> do - cs <- many (noneOf "\t\n\r ]") - char ']' - resolveString cs pos - 'S' -> return mempty -- switch back to default font size - _ -> resolveString [c] pos - - where - - -- strings and macros share namespace - resolveString stringname pos = do - GroffTokens ts <- resolveMacro stringname [] pos - case Foldable.toList ts of - [MLine xs] -> return xs - _ -> do - report $ SkippedContent ("unknown string " ++ stringname) pos - return mempty - -lexLine :: PandocMonad m => GroffLexer m GroffTokens -lexLine = do - lnparts <- mconcat <$> many1 linePart - eofline - go lnparts - where -- return empty line if we only have empty strings; - -- this can happen if the line just contains \f[C], for example. - go [] = return mempty - go (RoffStr "" : xs) = go xs - go xs = return $ singleTok $ MLine xs - -linePart :: PandocMonad m => GroffLexer m [LinePart] -linePart = macroArg <|> escape <|> - regularText <|> quoteChar <|> spaceTabChar - -macroArg :: PandocMonad m => GroffLexer m [LinePart] -macroArg = try $ do - string "\\\\$" - x <- digit - return [MacroArg $ ord x - ord '0'] - -regularText :: PandocMonad m => GroffLexer m [LinePart] -regularText = do - s <- many1 $ noneOf "\n\r\t \\\"" - return [RoffStr s] - -quoteChar :: PandocMonad m => GroffLexer m [LinePart] -quoteChar = do - char '"' - return [RoffStr "\""] - -spaceTabChar :: PandocMonad m => GroffLexer m [LinePart] -spaceTabChar = do - c <- spacetab - return [RoffStr [c]] - -lexEmptyLine :: PandocMonad m => GroffLexer m GroffTokens -lexEmptyLine = char '\n' >> return (singleTok MEmptyLine) - -manToken :: PandocMonad m => GroffLexer m GroffTokens -manToken = lexComment <|> lexMacro <|> lexLine <|> lexEmptyLine - -linePartsToString :: [LinePart] -> String -linePartsToString = mconcat . map go - where - go (RoffStr s) = s - go _ = mempty - --- | Tokenize a string as a sequence of groff tokens. -lexGroff :: PandocMonad m => T.Text -> m GroffTokens -lexGroff txt = do - eithertokens <- readWithM (mconcat <$> many manToken) def (T.unpack txt) - case eithertokens of - Left e -> throwError e - Right tokenz -> return tokenz diff --git a/src/Text/Pandoc/Readers/Man.hs b/src/Text/Pandoc/Readers/Man.hs index 90f266e6d..3414d8263 100644 --- a/src/Text/Pandoc/Readers/Man.hs +++ b/src/Text/Pandoc/Readers/Man.hs @@ -49,7 +49,7 @@ import Text.Pandoc.Logging (LogMessage(..)) import Text.Pandoc.Options import Text.Pandoc.Parsing import Text.Pandoc.Shared (crFilter) -import Text.Pandoc.Readers.Groff -- TODO explicit imports +import Text.Pandoc.Readers.Roff -- TODO explicit imports import Text.Parsec hiding (tokenPrim) import qualified Text.Parsec as Parsec import Text.Parsec.Pos (updatePosString) @@ -63,22 +63,22 @@ instance Default ManState where def = ManState { readerOptions = def , metadata = nullMeta } -type ManParser m = ParserT [GroffToken] ManState m +type ManParser m = ParserT [RoffToken] ManState m -- | Read man (troff) from an input string and return a Pandoc document. readMan :: PandocMonad m => ReaderOptions -> T.Text -> m Pandoc readMan opts txt = do - tokenz <- lexGroff (crFilter txt) + tokenz <- lexRoff (crFilter txt) let state = def {readerOptions = opts} :: ManState eitherdoc <- readWithMTokens parseMan state - (Foldable.toList . unGroffTokens $ tokenz) + (Foldable.toList . unRoffTokens $ tokenz) either throwError return eitherdoc readWithMTokens :: PandocMonad m - => ParserT [GroffToken] ManState m a -- ^ parser + => ParserT [RoffToken] ManState m a -- ^ parser -> ManState -- ^ initial state - -> [GroffToken] -- ^ input + -> [RoffToken] -- ^ input -> m (Either PandocError a) readWithMTokens parser state input = let leftF = PandocParsecError . intercalate "\n" $ show <$> input @@ -134,7 +134,7 @@ parseTable = do parseTableCell ts = do st <- getState - let ts' = Foldable.toList $ unGroffTokens ts + let ts' = Foldable.toList $ unRoffTokens ts let tcell = try $ do skipMany memptyLine plain . trimInlines <$> (parseInlines <* eof) @@ -147,7 +147,7 @@ parseTable = do isHrule :: TableRow -> Bool isHrule ([cellfmt], _) = columnType cellfmt `elem` ['_','-','='] - isHrule (_, [GroffTokens ss]) = + isHrule (_, [RoffTokens ss]) = case Foldable.toList ss of [MLine [RoffStr [c]]] -> c `elem` ['_','-','='] _ -> False @@ -174,10 +174,10 @@ parseNewParagraph = do return mempty -- --- Parser: [GroffToken] -> Pandoc +-- Parser: [RoffToken] -> Pandoc -- -msatisfy :: Monad m => (GroffToken -> Bool) -> ParserT [GroffToken] st m GroffToken +msatisfy :: Monad m => (RoffToken -> Bool) -> ParserT [RoffToken] st m RoffToken msatisfy predic = tokenPrim show nextPos testTok where testTok t = if predic t then Just t else Nothing @@ -186,32 +186,32 @@ msatisfy predic = tokenPrim show nextPos testTok (setSourceColumn (setSourceLine pos $ sourceLine pos + 1) 1) "" -mtoken :: PandocMonad m => ManParser m GroffToken +mtoken :: PandocMonad m => ManParser m RoffToken mtoken = msatisfy (const True) -mline :: PandocMonad m => ManParser m GroffToken +mline :: PandocMonad m => ManParser m RoffToken mline = msatisfy isMLine where isMLine (MLine _) = True isMLine _ = False -memptyLine :: PandocMonad m => ManParser m GroffToken +memptyLine :: PandocMonad m => ManParser m RoffToken memptyLine = msatisfy isMEmptyLine where isMEmptyLine MEmptyLine = True isMEmptyLine _ = False -mmacro :: PandocMonad m => MacroKind -> ManParser m GroffToken +mmacro :: PandocMonad m => MacroKind -> ManParser m RoffToken mmacro mk = msatisfy isMMacro where isMMacro (MMacro mk' _ _) | mk == mk' = True | otherwise = False isMMacro _ = False -mmacroAny :: PandocMonad m => ManParser m GroffToken +mmacroAny :: PandocMonad m => ManParser m RoffToken mmacroAny = msatisfy isMMacro where isMMacro (MMacro{}) = True isMMacro _ = False -- --- GroffToken -> Block functions +-- RoffToken -> Block functions -- parseTitle :: PandocMonad m => ManParser m Blocks @@ -340,12 +340,12 @@ lineInl = do (MLine fragments) <- mline return $ linePartsToInlines fragments -bareIP :: PandocMonad m => ManParser m GroffToken +bareIP :: PandocMonad m => ManParser m RoffToken bareIP = msatisfy isBareIP where isBareIP (MMacro "IP" [] _) = True isBareIP _ = False -endmacro :: PandocMonad m => String -> ManParser m GroffToken +endmacro :: PandocMonad m => String -> ManParser m RoffToken endmacro name = mmacro name <|> lookAhead newBlockMacro where newBlockMacro = msatisfy isNewBlockMacro @@ -363,7 +363,7 @@ parseCodeBlock = try $ do where - extractText :: GroffToken -> Maybe String + extractText :: RoffToken -> Maybe String extractText (MLine ss) | not (null ss) , all isFontToken ss = Nothing diff --git a/src/Text/Pandoc/Readers/Roff.hs b/src/Text/Pandoc/Readers/Roff.hs new file mode 100644 index 000000000..9d71c98b3 --- /dev/null +++ b/src/Text/Pandoc/Readers/Roff.hs @@ -0,0 +1,650 @@ +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} +{- + Copyright (C) 2018 Yan Pashkovsky + and John MacFarlane + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +-} + +{- | + Module : Text.Pandoc.Readers.Roff + Copyright : Copyright (C) 2018 Yan Pashkovsky and John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : Yan Pashkovsky + Stability : WIP + Portability : portable + +Tokenizer for groff formats (man, ms). +-} +module Text.Pandoc.Readers.Roff + ( MacroKind + , FontSpec(..) + , defaultFontSpec + , LinePart(..) + , Arg + , TableOption + , CellFormat(..) + , TableRow + , RoffToken(..) + , RoffTokens(..) + , linePartsToString + , lexRoff + ) +where + +import Prelude +import Safe (lastDef) +import Control.Monad (void, mzero, guard, when) +import Control.Monad.Except (throwError) +import Text.Pandoc.Class + (getResourcePath, readFileFromDirs, PandocMonad(..), report) +import Data.Char (isHexDigit, chr, ord, isAscii, isAlphaNum, isSpace) +import Data.Default (Default) +import qualified Data.Map as M +import Data.List (intercalate, isSuffixOf) +import qualified Data.Text as T +import Text.Pandoc.Logging (LogMessage(..)) +import Text.Pandoc.Options +import Text.Pandoc.Parsing +import Text.Pandoc.Shared (safeRead) +import Text.Parsec hiding (tokenPrim) +import qualified Text.Parsec as Parsec +import Text.Pandoc.RoffChar (characterCodes, combiningAccents) +import qualified Data.Sequence as Seq +import qualified Data.Foldable as Foldable +import qualified Data.Text.Normalize as Normalize + +-- import Debug.Trace (traceShowId) + +-- +-- Data Types +-- +data FontSpec = FontSpec{ fontBold :: Bool + , fontItalic :: Bool + , fontMonospace :: Bool + } deriving (Show, Eq, Ord) + +defaultFontSpec :: FontSpec +defaultFontSpec = FontSpec False False False + +type MacroKind = String + +data LinePart = RoffStr String + | Font FontSpec + | FontSize Int + | MacroArg Int + deriving Show + +type Arg = [LinePart] + +type TableOption = (String, String) + +data CellFormat = + CellFormat + { columnType :: Char + , pipePrefix :: Bool + , pipeSuffix :: Bool + , columnSuffixes :: [String] + } deriving (Show, Eq, Ord) + +type TableRow = ([CellFormat], [RoffTokens]) + +-- TODO parse tables (see man tbl) +data RoffToken = MLine [LinePart] + | MEmptyLine + | MMacro MacroKind [Arg] SourcePos + | MTable [TableOption] [TableRow] SourcePos + deriving Show + +newtype RoffTokens = RoffTokens { unRoffTokens :: Seq.Seq RoffToken } + deriving (Show, Semigroup, Monoid) + +singleTok :: RoffToken -> RoffTokens +singleTok t = RoffTokens (Seq.singleton t) + +data RoffState = RoffState { customMacros :: M.Map String RoffTokens + , prevFont :: FontSpec + , currentFont :: FontSpec + , tableTabChar :: Char + } deriving Show + +instance Default RoffState where + def = RoffState { customMacros = M.fromList + $ map (\(n, s) -> + (n, singleTok + (MLine [RoffStr s]))) + [ ("Tm", "\x2122") + , ("lq", "\x201C") + , ("rq", "\x201D") + , ("R", "\x00AE") ] + , prevFont = defaultFontSpec + , currentFont = defaultFontSpec + , tableTabChar = '\t' + } + +type RoffLexer m = ParserT [Char] RoffState m + +-- +-- Lexer: String -> RoffToken +-- + +eofline :: Stream s m Char => ParsecT s u m () +eofline = void newline <|> eof + +spacetab :: Stream s m Char => ParsecT s u m Char +spacetab = char ' ' <|> char '\t' + +characterCodeMap :: M.Map String Char +characterCodeMap = + M.fromList $ map (\(x,y) -> (y,x)) characterCodes + +combiningAccentsMap :: M.Map String Char +combiningAccentsMap = + M.fromList $ map (\(x,y) -> (y,x)) combiningAccents + +escape :: PandocMonad m => RoffLexer m [LinePart] +escape = do + char '\\' + c <- anyChar + case c of + 'f' -> escFont + 's' -> escFontSize + '*' -> escStar + '(' -> twoCharGlyph + '[' -> bracketedGlyph + '"' -> mempty <$ skipMany (satisfy (/='\n')) -- line comment + '#' -> mempty <$ manyTill anyChar newline + '%' -> return mempty + '{' -> return mempty + '}' -> return mempty + '&' -> return mempty + '\n' -> return mempty + ':' -> return mempty + '0' -> return mempty + 'c' -> return mempty + '-' -> return [RoffStr "-"] + '_' -> return [RoffStr "_"] + ' ' -> return [RoffStr " "] + '\\' -> return [RoffStr "\\"] + 't' -> return [RoffStr "\t"] + 'e' -> return [RoffStr "\\"] + '`' -> return [RoffStr "`"] + '^' -> return [RoffStr " "] + '|' -> return [RoffStr " "] + '\'' -> return [RoffStr "`"] + '.' -> return [RoffStr "`"] + '~' -> return [RoffStr "\160"] -- nonbreaking space + _ -> escUnknown ['\\',c] + + where + + twoCharGlyph = do + cs <- count 2 anyChar + case M.lookup cs characterCodeMap of + Just c -> return [RoffStr [c]] + Nothing -> escUnknown ('\\':'(':cs) + + bracketedGlyph = unicodeGlyph <|> charGlyph + + charGlyph = do + cs <- manyTill (noneOf ['[',']','\n']) (char ']') + (case words cs of + [] -> mzero + [s] -> case M.lookup s characterCodeMap of + Nothing -> mzero + Just c -> return [RoffStr [c]] + (s:ss) -> do + basechar <- case M.lookup cs characterCodeMap of + Nothing -> + case s of + [ch] | isAscii ch && isAlphaNum ch -> + return ch + _ -> mzero + Just c -> return c + let addAccents [] xs = return $ T.unpack . + Normalize.normalize Normalize.NFC . + T.pack $ reverse xs + addAccents (a:as) xs = + case M.lookup a combiningAccentsMap of + Just x -> addAccents as (x:xs) + Nothing -> mzero + addAccents ss [basechar] >>= \xs -> return [RoffStr xs]) + <|> escUnknown ("\\[" ++ cs ++ "]") + + unicodeGlyph = try $ do + xs <- ucharCode `sepBy1` (char '_') <* char ']' + return [RoffStr xs] + + ucharCode = try $ do + char 'u' + cs <- many1 (satisfy isHexDigit) + let lcs = length cs + guard $ lcs >= 4 && lcs <= 6 + case chr <$> safeRead ('0':'x':cs) of + Nothing -> mzero + Just c -> return c + + escUnknown :: PandocMonad m => String -> RoffLexer m [LinePart] + escUnknown s = do + pos <- getPosition + report $ SkippedContent ("Unknown escape sequence " ++ s) pos + return [RoffStr "\xFFFD"] + +-- \s-1 \s0 +escFontSize :: PandocMonad m => RoffLexer m [LinePart] +escFontSize = do + let sign = option "" $ ("-" <$ char '-' <|> "" <$ char '+') + let toFontSize xs = + case safeRead xs of + Nothing -> mzero + Just n -> return [FontSize n] + choice + [ do char '(' + s <- sign + ds <- count 2 digit + toFontSize (s ++ ds) + , do char '[' + s <- sign + ds <- many1 digit + char ']' + toFontSize (s ++ ds) + , do s <- sign + ds <- count 1 digit + toFontSize (s ++ ds) + ] + +escFont :: PandocMonad m => RoffLexer m [LinePart] +escFont = do + font <- choice + [ char 'S' >> return defaultFontSpec + , digit >> return defaultFontSpec + , char '(' >> anyChar >> anyChar >> return defaultFontSpec + , digit >> return defaultFontSpec + , ($ defaultFontSpec) <$> letterFontKind + , lettersFont + ] + modifyState $ \st -> st{ prevFont = currentFont st + , currentFont = font } + return [Font font] + +lettersFont :: PandocMonad m => RoffLexer m FontSpec +lettersFont = try $ do + char '[' + fs <- many letterFontKind + skipMany letter + char ']' + if null fs + then prevFont <$> getState + else return $ foldr ($) defaultFontSpec fs + +letterFontKind :: PandocMonad m => RoffLexer m (FontSpec -> FontSpec) +letterFontKind = choice [ + oneOf ['B','b'] >> return (\fs -> fs{ fontBold = True }) + , oneOf ['I','i'] >> return (\fs -> fs { fontItalic = True }) + , oneOf ['C','c'] >> return (\fs -> fs { fontMonospace = True }) + , oneOf ['P','p','R','r'] >> return id + ] + + +-- separate function from lexMacro since real man files sometimes do not +-- follow the rules +lexComment :: PandocMonad m => RoffLexer m RoffTokens +lexComment = do + try $ string ".\\\"" + many Parsec.space + skipMany $ noneOf "\n" + char '\n' + return mempty + +lexMacro :: PandocMonad m => RoffLexer m RoffTokens +lexMacro = do + pos <- getPosition + char '.' <|> char '\'' + skipMany spacetab + macroName <- many (satisfy (not . isSpace)) + case macroName of + "nop" -> return mempty + "ie" -> lexConditional + "if" -> lexConditional + "el" -> skipConditional + "TS" -> lexTable pos + + _ -> do + args <- lexArgs + case macroName of + "" -> return mempty + "\\\"" -> return mempty + "\\#" -> return mempty + "de" -> lexMacroDef args + "de1" -> lexMacroDef args + "ds" -> lexStringDef args + "ds1" -> lexStringDef args + "sp" -> return $ singleTok MEmptyLine + "so" -> lexIncludeFile args + _ -> resolveMacro macroName args pos + +lexTable :: PandocMonad m => SourcePos -> RoffLexer m RoffTokens +lexTable pos = do + skipMany lexComment + spaces + opts <- option [] tableOptions + case lookup "tab" opts of + Just (c:_) -> modifyState $ \st -> st{ tableTabChar = c } + _ -> modifyState $ \st -> st{ tableTabChar = '\t' } + spaces + skipMany lexComment + spaces + rows <- lexTableRows + morerows <- many $ try $ do + string ".T&" + skipMany spacetab + newline + lexTableRows + string ".TE" + skipMany spacetab + eofline + return $ singleTok $ MTable opts (rows ++ concat morerows) pos + +lexTableRows :: PandocMonad m => RoffLexer m [TableRow] +lexTableRows = do + aligns <- tableFormatSpec + spaces + skipMany lexComment + spaces + rows <- many (notFollowedBy (try (string ".TE") <|> try (string ".T&")) >> + tableRow) + return $ zip aligns rows + +tableCell :: PandocMonad m => RoffLexer m RoffTokens +tableCell = (enclosedCell <|> simpleCell) >>= lexRoff . T.pack + where + enclosedCell = do + try (string "T{") + manyTill anyChar (try (string "T}")) + simpleCell = do + tabChar <- tableTabChar <$> getState + many (notFollowedBy (char tabChar <|> newline) >> anyChar) + +tableRow :: PandocMonad m => RoffLexer m [RoffTokens] +tableRow = do + tabChar <- tableTabChar <$> getState + c <- tableCell + cs <- many $ try (char tabChar >> tableCell) + skipMany spacetab + eofline + skipMany lexComment + return (c:cs) + +tableOptions :: PandocMonad m => RoffLexer m [TableOption] +tableOptions = try $ many1 tableOption <* spaces <* char ';' + +tableOption :: PandocMonad m => RoffLexer m TableOption +tableOption = do + k <- many1 letter + v <- option "" $ do + char '(' + manyTill anyChar (char ')') + skipMany spacetab + optional (char ',') + skipMany spacetab + return (k,v) + +tableFormatSpec :: PandocMonad m => RoffLexer m [[CellFormat]] +tableFormatSpec = do + speclines <- tableFormatSpecLine `sepBy1` (newline <|> char ',') + skipMany spacetab + char '.' + return $ speclines ++ repeat (lastDef [] speclines) -- last line is default + +tableFormatSpecLine :: PandocMonad m => RoffLexer m [CellFormat] +tableFormatSpecLine = + many1 $ try $ skipMany spacetab >> tableColFormat + +tableColFormat :: PandocMonad m => RoffLexer m CellFormat +tableColFormat = do + pipePrefix' <- option False + $ True <$ (try $ string "|" <* notFollowedBy spacetab) + c <- oneOf ['a','A','c','C','l','L','n','N','r','R','s','S','^','_','-', + '=','|'] + numsuffixes <- option [] $ (:[]) <$> many1 digit + suffixes <- many $ do + x <- oneOf ['b','B','d','D','e','E','f','F','i','I','m','M', + 'p','P','t','T','u','U','v','V','w','W','x','X', 'z','Z'] + num <- if x == 'w' + then many1 digit <|> + do char '(' + xs <- manyTill anyChar (char ')') + return ("(" ++ xs ++ ")") + else return "" + return $ x : num + pipeSuffix' <- option False $ True <$ string "|" + return $ CellFormat + { columnType = c + , pipePrefix = pipePrefix' + , pipeSuffix = pipeSuffix' + , columnSuffixes = numsuffixes ++ suffixes } + +-- We don't fully handle the conditional. But we do +-- include everything under '.ie n', which occurs commonly +-- in man pages. We always skip the '.el' part. +lexConditional :: PandocMonad m => RoffLexer m RoffTokens +lexConditional = do + skipMany spacetab + lexNCond <|> skipConditional + +-- n means nroff mode +lexNCond :: PandocMonad m => RoffLexer m RoffTokens +lexNCond = do + char '\n' + many1 spacetab + lexGroup <|> manToken + +lexGroup :: PandocMonad m => RoffLexer m RoffTokens +lexGroup = do + groupstart + mconcat <$> manyTill manToken groupend + where + groupstart = try $ string "\\{\\" >> newline + groupend = try $ string "\\}" >> eofline + +skipConditional :: PandocMonad m => RoffLexer m RoffTokens +skipConditional = do + rest <- anyLine + when ("\\{\\" `isSuffixOf` rest) $ + void $ manyTill anyChar (try (string "\\}")) + return mempty + +lexIncludeFile :: PandocMonad m => [Arg] -> RoffLexer m RoffTokens +lexIncludeFile args = do + pos <- getPosition + case args of + (f:_) -> do + let fp = linePartsToString f + dirs <- getResourcePath + result <- readFileFromDirs dirs fp + case result of + Nothing -> report $ CouldNotLoadIncludeFile fp pos + Just s -> getInput >>= setInput . (s ++) + return mempty + [] -> return mempty + +resolveMacro :: PandocMonad m + => String -> [Arg] -> SourcePos -> RoffLexer m RoffTokens +resolveMacro macroName args pos = do + macros <- customMacros <$> getState + case M.lookup macroName macros of + Nothing -> return $ singleTok $ MMacro macroName args pos + Just ts -> do + let fillLP (MacroArg i) zs = + case drop (i - 1) args of + [] -> zs + (ys:_) -> ys ++ zs + fillLP z zs = z : zs + let fillMacroArg (MLine lineparts) = + MLine (foldr fillLP [] lineparts) + fillMacroArg x = x + return $ RoffTokens . fmap fillMacroArg . unRoffTokens $ ts + +lexStringDef :: PandocMonad m => [Arg] -> RoffLexer m RoffTokens +lexStringDef args = do -- string definition + case args of + [] -> fail "No argument to .ds" + (x:ys) -> do + let ts = singleTok $ MLine (intercalate [RoffStr " " ] ys) + let stringName = linePartsToString x + modifyState $ \st -> + st{ customMacros = M.insert stringName ts (customMacros st) } + return mempty + +lexMacroDef :: PandocMonad m => [Arg] -> RoffLexer m RoffTokens +lexMacroDef args = do -- macro definition + (macroName, stopMacro) <- + case args of + (x : y : _) -> return (linePartsToString x, linePartsToString y) + -- optional second arg + (x:_) -> return (linePartsToString x, ".") + [] -> fail "No argument to .de" + let stop = try $ do + char '.' <|> char '\'' + skipMany spacetab + string stopMacro + _ <- lexArgs + return () + ts <- mconcat <$> manyTill manToken stop + modifyState $ \st -> + st{ customMacros = M.insert macroName ts (customMacros st) } + return mempty + +lexArgs :: PandocMonad m => RoffLexer m [Arg] +lexArgs = do + args <- many $ try oneArg + skipMany spacetab + eofline + return args + + where + + oneArg :: PandocMonad m => RoffLexer m [LinePart] + oneArg = do + skipMany $ try $ string "\\\n" -- continuation line + try quotedArg <|> plainArg + -- try, because there are some erroneous files, e.g. linux/bpf.2 + + plainArg :: PandocMonad m => RoffLexer m [LinePart] + plainArg = do + skipMany spacetab + mconcat <$> many1 (macroArg <|> escape <|> regularText <|> unescapedQuote) + where + unescapedQuote = char '"' >> return [RoffStr "\""] + + quotedArg :: PandocMonad m => RoffLexer m [LinePart] + quotedArg = do + skipMany spacetab + char '"' + xs <- mconcat <$> + many (macroArg <|> escape <|> regularText + <|> spaceTabChar <|> escapedQuote) + char '"' + return xs + where + escapedQuote = try $ do + char '"' + char '"' + return [RoffStr "\""] + +escStar :: PandocMonad m => RoffLexer m [LinePart] +escStar = try $ do + pos <- getPosition + c <- anyChar + case c of + '(' -> do + cs <- count 2 anyChar + resolveString cs pos + '[' -> do + cs <- many (noneOf "\t\n\r ]") + char ']' + resolveString cs pos + 'S' -> return mempty -- switch back to default font size + _ -> resolveString [c] pos + + where + + -- strings and macros share namespace + resolveString stringname pos = do + RoffTokens ts <- resolveMacro stringname [] pos + case Foldable.toList ts of + [MLine xs] -> return xs + _ -> do + report $ SkippedContent ("unknown string " ++ stringname) pos + return mempty + +lexLine :: PandocMonad m => RoffLexer m RoffTokens +lexLine = do + lnparts <- mconcat <$> many1 linePart + eofline + go lnparts + where -- return empty line if we only have empty strings; + -- this can happen if the line just contains \f[C], for example. + go [] = return mempty + go (RoffStr "" : xs) = go xs + go xs = return $ singleTok $ MLine xs + +linePart :: PandocMonad m => RoffLexer m [LinePart] +linePart = macroArg <|> escape <|> + regularText <|> quoteChar <|> spaceTabChar + +macroArg :: PandocMonad m => RoffLexer m [LinePart] +macroArg = try $ do + string "\\\\$" + x <- digit + return [MacroArg $ ord x - ord '0'] + +regularText :: PandocMonad m => RoffLexer m [LinePart] +regularText = do + s <- many1 $ noneOf "\n\r\t \\\"" + return [RoffStr s] + +quoteChar :: PandocMonad m => RoffLexer m [LinePart] +quoteChar = do + char '"' + return [RoffStr "\""] + +spaceTabChar :: PandocMonad m => RoffLexer m [LinePart] +spaceTabChar = do + c <- spacetab + return [RoffStr [c]] + +lexEmptyLine :: PandocMonad m => RoffLexer m RoffTokens +lexEmptyLine = char '\n' >> return (singleTok MEmptyLine) + +manToken :: PandocMonad m => RoffLexer m RoffTokens +manToken = lexComment <|> lexMacro <|> lexLine <|> lexEmptyLine + +linePartsToString :: [LinePart] -> String +linePartsToString = mconcat . map go + where + go (RoffStr s) = s + go _ = mempty + +-- | Tokenize a string as a sequence of groff tokens. +lexRoff :: PandocMonad m => T.Text -> m RoffTokens +lexRoff txt = do + eithertokens <- readWithM (mconcat <$> many manToken) def (T.unpack txt) + case eithertokens of + Left e -> throwError e + Right tokenz -> return tokenz diff --git a/src/Text/Pandoc/RoffChar.hs b/src/Text/Pandoc/RoffChar.hs new file mode 100644 index 000000000..52d991d32 --- /dev/null +++ b/src/Text/Pandoc/RoffChar.hs @@ -0,0 +1,441 @@ +{-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} +{- +Copyright (C) 2018 John MacFarlane + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.RoffChar + Copyright : Copyright (C) 2007-2018 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane + Stability : alpha + Portability : portable + +Roff character escaping/unescaping. +-} + +module Text.Pandoc.RoffChar ( + standardEscapes + , characterCodes + , combiningAccents + ) where +import Prelude + +-- | These are the escapes specifically mentioned in groff_man(7), +-- plus @ and ellipsis. +standardEscapes :: [(Char, String)] +standardEscapes = + [ ('\160', "\\ ") + , ('\'', "\\[aq]") + , ('‘', "\\[oq]") + , ('’', "\\[cq]") + , ('"', "\\[dq]") + , ('“', "\\[lq]") + , ('”', "\\[rq]") + , ('—', "\\[em]") + , ('–', "\\[en]") + , ('`', "\\[ga]") + , ('^', "\\[ha]") + , ('~', "\\[ti]") + , ('-', "\\-") -- minus; - will be interpreted as hyphen U+2010 + , ('\\', "\\[rs]") + , ('@', "\\[at]") -- because we use @ as a table and math delimiter + , ('\x2026', "\\&...") -- because u2026 doesn't render on tty + ] + +characterCodes :: [(Char, String)] +characterCodes = + [ ('Ð', "-D") + , ('ð', "Sd") + , ('Þ', "TP") + , ('þ', "Tp") + , ('ß', "ss") + , ('ff', "ff") + , ('fi', "fi") + , ('fl', "fl") + , ('ffi', "Fi") + , ('ffl', "Fl") + , ('Ł', "/L") + , ('ł', "/l") + , ('Ø', "/O") + , ('ø', "/o") + , ('Æ', "AE") + , ('æ', "ae") + , ('Œ', "OE") + , ('œ', "oe") + , ('IJ', "IJ") + , ('ij', "ij") + , ('ı', ".i") + , ('ȷ', ".j") + , ('Á', "'A") + , ('Ć', "'C") + , ('É', "'E") + , ('Í', "'I") + , ('Ó', "'O") + , ('Ú', "'U") + , ('Ý', "'Y") + , ('á', "'a") + , ('ć', "'c") + , ('é', "'e") + , ('í', "'i") + , ('ó', "'o") + , ('ú', "'u") + , ('ý', "'y") + , ('Ä', ":A") + , ('Ë', ":E") + , ('Ï', ":I") + , ('Ö', ":O") + , ('Ü', ":U") + , ('Ÿ', ":Y") + , ('ä', ":a") + , ('ë', ":e") + , ('ï', ":i") + , ('ö', ":o") + , ('ü', ":u") + , ('ÿ', ":y") + , ('Â', "^A") + , ('Ê', "^E") + , ('Î', "^I") + , ('Ô', "^O") + , ('Û', "^U") + , ('â', "^a") + , ('ê', "^e") + , ('î', "^i") + , ('ô', "^o") + , ('û', "^u") + , ('À', "`A") + , ('È', "`E") + , ('Ì', "`I") + , ('Ò', "`O") + , ('Ù', "`U") + , ('à', "`a") + , ('è', "`e") + , ('ì', "`i") + , ('ò', "`o") + , ('ù', "`u") + , ('Ã', "~A") + , ('Ñ', "~N") + , ('Õ', "~O") + , ('ã', "~a") + , ('ñ', "~n") + , ('õ', "~o") + , ('Š', "vS") + , ('š', "vs") + , ('Ž', "vZ") + , ('ž', "vz") + , ('Ç', ",C") + , ('ç', ",c") + , ('Å', "oA") + , ('å', "oa") + , ('˝', "a\"") + , ('¯', "a-") + , ('˙', "a.") + , ('^', "a^") + , ('´', "aa") + , ('`', "ga") + , ('˘', "ab") + , ('¸', "ac") + , ('¨', "ad") + , ('ˇ', "ah") + , ('˚', "ao") + , ('~', "a~") + , ('˛', "ho") + , ('^', "ha") + , ('~', "ti") + , ('„', "Bq") + , ('‚', "bq") + , ('“', "lq") + , ('”', "rq") + , ('‘', "oq") + , ('’', "cq") + , ('\'', "aq") + , ('"', "dq") + , ('«', "Fo") + , ('»', "Fc") + , ('‹', "fo") + , ('›', "fc") + , ('¡', "r!") + , ('¿', "r?") + , ('—', "em") + , ('–', "en") + , ('‐', "hy") + , ('[', "lB") + , (']', "rB") + , ('{', "lC") + , ('}', "rC") + , ('⟨', "la") + , ('⟩', "ra") + , ('⎪', "bv") + , ('⎪', "braceex") + , ('⎡', "bracketlefttp") + , ('⎣', "bracketleftbt") + , ('⎢', "bracketleftex") + , ('⎤', "bracketrighttp") + , ('⎦', "bracketrightbt") + , ('⎥', "bracketrightex") + , ('╭', "lt") + , ('⎧', "bracelefttp") + , ('┥', "lk") + , ('⎨', "braceleftmid") + , ('╰', "lb") + , ('⎩', "braceleftbt") + , ('⎪', "braceleftex") + , ('╮', "rt") + , ('⎫', "bracerighttp") + , ('┝', "rk") + , ('⎬', "bracerightmid") + , ('╯', "rb") + , ('⎭', "bracerightbt") + , ('⎪', "bracerightex") + , ('⎛', "parenlefttp") + , ('⎝', "parenleftbt") + , ('⎜', "parenleftex") + , ('⎞', "parenrighttp") + , ('⎠', "parenrightbt") + , ('⎟', "parenrightex") + , ('←', "<-") + , ('→', "->") + , ('↔', "<>") + , ('↓', "da") + , ('↑', "ua") + , ('↕', "va") + , ('⇐', "lA") + , ('⇒', "rA") + , ('⇔', "hA") + , ('⇓', "dA") + , ('⇑', "uA") + , ('⇕', "vA") + , ('⎯', "an") + , ('|', "ba") + , ('│', "br") + , ('_', "ul") + , ('‾', "rn") + , ('_', "ru") + , ('¦', "bb") + , ('/', "sl") + , ('\\', "rs") + , ('○', "ci") + , ('·', "bu") + , ('‡', "dd") + , ('†', "dg") + , ('◊', "lz") + , ('□', "sq") + , ('¶', "ps") + , ('§', "sc") + , ('☜', "lh") + , ('☞', "rh") + , ('@', "at") + , ('#', "sh") + , ('↵', "CR") + , ('✓', "OK") + , ('©', "co") + , ('®', "rg") + , ('™', "tm") + , ('$', "Do") + , ('¢', "ct") + , ('€', "eu") + , ('€', "Eu") + , ('¥', "Ye") + , ('£', "Po") + , ('¤', "Cs") + , ('ƒ', "Fn") + , ('°', "de") + , ('‰', "%0") + , ('′', "fm") + , ('″', "sd") + , ('µ', "mc") + , ('ª', "Of") + , ('º', "Om") + , ('∧', "AN") + , ('∨', "OR") + , ('¬', "no") + , ('¬', "tno") + , ('∃', "te") + , ('∀', "fa") + , ('∋', "st") + , ('∴', "3d") + , ('∴', "tf") + , ('|', "or") + , ('½', "12") + , ('¼', "14") + , ('¾', "34") + , ('⅛', "18") + , ('⅜', "38") + , ('⅝', "58") + , ('⅞', "78") + , ('¹', "S1") + , ('²', "S2") + , ('³', "S3") + , ('+', "pl") + , ('−', "mi") + , ('∓', "-+") + , ('±', "+-") + , ('±', "t+-") + , ('·', "pc") + , ('⋅', "md") + , ('×', "mu") + , ('×', "tmu") + , ('⊗', "c*") + , ('⊕', "c+") + , ('÷', "di") + , ('÷', "tdi") + , ('⁄', "f/") + , ('∗', "**") + , ('≤', "<=") + , ('≥', ">=") + , ('≪', "<<") + , ('≫', ">>") + , ('=', "eq") + , ('≠', "!=") + , ('≡', "==") + , ('≢', "ne") + , ('≅', "=~") + , ('≃', "|=") + , ('∼', "ap") + , ('≈', "~~") + , ('≈', "~=") + , ('∝', "pt") + , ('∅', "es") + , ('∈', "mo") + , ('∉', "nm") + , ('⊂', "sb") + , ('⊄', "nb") + , ('⊃', "sp") + , ('⊅', "nc") + , ('⊆', "ib") + , ('⊇', "ip") + , ('∩', "ca") + , ('∪', "cu") + , ('∠', "/_") + , ('⊥', "pp") + , ('∫', "is") + , ('∫', "integral") + , ('∑', "sum") + , ('∏', "product") + , ('∐', "coproduct") + , ('∇', "gr") + , ('√', "sr") + , ('√', "sqrt") + -- , "radicalex" + -- "sqrtex" + , ('⌈', "lc") + , ('⌉', "rc") + , ('⌊', "lf") + , ('⌋', "rf") + , ('∞', "if") + , ('ℵ', "Ah") + , ('ℑ', "Im") + , ('ℜ', "Re") + , ('℘', "wp") + , ('∂', "pd") + , ('ℏ', "-h") + , ('ℏ', "hbar") + , ('Α', "*A") + , ('Β', "*B") + , ('Γ', "*G") + , ('Δ', "*D") + , ('Ε', "*E") + , ('Ζ', "*Z") + , ('Η', "*Y") + , ('Θ', "*H") + , ('Ι', "*I") + , ('Κ', "*K") + , ('Λ', "*L") + , ('Μ', "*M") + , ('Ν', "*N") + , ('Ξ', "*C") + , ('Ο', "*O") + , ('Π', "*P") + , ('Ρ', "*R") + , ('Σ', "*S") + , ('Τ', "*T") + , ('Υ', "*U") + , ('Φ', "*F") + , ('Χ', "*X") + , ('Ψ', "*Q") + , ('Ω', "*W") + , ('α', "*a") + , ('β', "*b") + , ('γ', "*g") + , ('δ', "*d") + , ('ε', "*e") + , ('ζ', "*z") + , ('η', "*y") + , ('θ', "*h") + , ('ι', "*i") + , ('κ', "*k") + , ('λ', "*l") + , ('μ', "*m") + , ('ν', "*n") + , ('ξ', "*c") + , ('ο', "*o") + , ('π', "*p") + , ('ρ', "*r") + , ('ς', "ts") + , ('σ', "*s") + , ('τ', "*t") + , ('υ', "*u") + , ('ϕ', "*f") + , ('χ', "*x") + , ('ψ', "*q") + , ('ω', "*w") + , ('ϑ', "+h") + , ('φ', "+f") + , ('ϖ', "+p") + , ('ϵ', "+e") + , ('♣', "CL") + , ('♠', "SP") + , ('♥', "HE") + , ('♦', "DI") + , ('˝' , "a\"") + , ('¯', "a-") + , ('˙', "a.") + , ('^', "a^") + , ('´', "aa") + , ('`', "ga") + , ('˘', "ab") + , ('¸', "ac") + , ('¨', "ad") + , ('ˇ', "ah") + , ('˚', "ao") + , ('~', "a~") + , ('˛', "ho") + , ('^', "ha") + , ('~', "ti") + ] + +-- use like: \\[E a^ aa] +combiningAccents :: [(Char, String)] +combiningAccents = + [ ('\779' , "a\"") + , ('\772', "a-") + , ('\775', "a.") + , ('\770', "a^") + , ('\769', "aa") + , ('\768', "ga") + , ('\774', "ab") + , ('\807', "ac") + , ('\776', "ad") + , ('\780', "ah") + , ('\778', "ao") + , ('\771', "a~") + , ('\808', "ho") + , ('\770', "ha") + , ('\771', "ti") + ] diff --git a/src/Text/Pandoc/Writers/Groff.hs b/src/Text/Pandoc/Writers/Groff.hs deleted file mode 100644 index 5d51314bb..000000000 --- a/src/Text/Pandoc/Writers/Groff.hs +++ /dev/null @@ -1,138 +0,0 @@ -{-# LANGUAGE NoImplicitPrelude #-} -{- -Copyright (C) 2007-2018 John MacFarlane - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Writers.Groff - Copyright : Copyright (C) 2007-2018 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane - Stability : alpha - Portability : portable - -Common functions for groff writers (man, ms). --} - -module Text.Pandoc.Writers.Groff ( - WriterState(..) - , defaultWriterState - , MS - , Note - , EscapeMode(..) - , escapeString - , withFontFeature - ) where -import Prelude -import Data.Char (ord, isAscii) -import Control.Monad.State.Strict -import qualified Data.Map as Map -import Data.Maybe (fromMaybe, isJust, catMaybes) -import Text.Pandoc.Class (PandocMonad) -import Text.Pandoc.Definition -import Text.Pandoc.Pretty -import Text.Printf (printf) -import Text.Pandoc.GroffChar (standardEscapes, - characterCodes, combiningAccents) - -data WriterState = WriterState { stHasInlineMath :: Bool - , stFirstPara :: Bool - , stNotes :: [Note] - , stSmallCaps :: Bool - , stHighlighting :: Bool - , stInHeader :: Bool - , stFontFeatures :: Map.Map Char Bool - , stHasTables :: Bool - } - -defaultWriterState :: WriterState -defaultWriterState = WriterState{ stHasInlineMath = False - , stFirstPara = True - , stNotes = [] - , stSmallCaps = False - , stHighlighting = False - , stInHeader = False - , stFontFeatures = Map.fromList [ - ('I',False) - , ('B',False) - , ('C',False) - ] - , stHasTables = False - } - -type Note = [Block] - -type MS = StateT WriterState - -data EscapeMode = AllowUTF8 -- ^ use preferred man escapes - | AsciiOnly -- ^ escape everything - deriving Show - -combiningAccentsMap :: Map.Map Char String -combiningAccentsMap = Map.fromList combiningAccents - -essentialEscapes :: Map.Map Char String -essentialEscapes = Map.fromList standardEscapes - --- | Escape special characters for groff. -escapeString :: EscapeMode -> String -> String -escapeString _ [] = [] -escapeString escapeMode ('\n':'.':xs) = - '\n':'\\':'&':'.':escapeString escapeMode xs -escapeString escapeMode (x:xs) = - case Map.lookup x essentialEscapes of - Just s -> s ++ escapeString escapeMode xs - Nothing - | isAscii x -> x : escapeString escapeMode xs - | otherwise -> - case escapeMode of - AllowUTF8 -> x : escapeString escapeMode xs - AsciiOnly -> - let accents = catMaybes $ takeWhile isJust - (map (\c -> Map.lookup c combiningAccentsMap) xs) - rest = drop (length accents) xs - s = case Map.lookup x characterCodeMap of - Just t -> "\\[" <> unwords (t:accents) <> "]" - Nothing -> "\\[" <> unwords - (printf "u%04X" (ord x) : accents) <> "]" - in s ++ escapeString escapeMode rest - -characterCodeMap :: Map.Map Char String -characterCodeMap = Map.fromList characterCodes - -fontChange :: PandocMonad m => MS m Doc -fontChange = do - features <- gets stFontFeatures - inHeader <- gets stInHeader - let filling = ['C' | fromMaybe False $ Map.lookup 'C' features] ++ - ['B' | inHeader || - fromMaybe False (Map.lookup 'B' features)] ++ - ['I' | fromMaybe False $ Map.lookup 'I' features] - return $ - if null filling - then text "\\f[R]" - else text $ "\\f[" ++ filling ++ "]" - -withFontFeature :: PandocMonad m => Char -> MS m Doc -> MS m Doc -withFontFeature c action = do - modify $ \st -> st{ stFontFeatures = Map.adjust not c $ stFontFeatures st } - begin <- fontChange - d <- action - modify $ \st -> st{ stFontFeatures = Map.adjust not c $ stFontFeatures st } - end <- fontChange - return $ begin <> d <> end diff --git a/src/Text/Pandoc/Writers/Man.hs b/src/Text/Pandoc/Writers/Man.hs index c4570f761..29e4aa718 100644 --- a/src/Text/Pandoc/Writers/Man.hs +++ b/src/Text/Pandoc/Writers/Man.hs @@ -47,7 +47,7 @@ import Text.Pandoc.Shared import Text.Pandoc.Templates import Text.Pandoc.Writers.Math import Text.Pandoc.Writers.Shared -import Text.Pandoc.Writers.Groff +import Text.Pandoc.Writers.Roff import Text.Printf (printf) -- | Convert Pandoc to Man. diff --git a/src/Text/Pandoc/Writers/Ms.hs b/src/Text/Pandoc/Writers/Ms.hs index 46ddc4c59..6abc4bf93 100644 --- a/src/Text/Pandoc/Writers/Ms.hs +++ b/src/Text/Pandoc/Writers/Ms.hs @@ -60,7 +60,7 @@ import Text.Pandoc.Shared import Text.Pandoc.Templates import Text.Pandoc.Writers.Math import Text.Pandoc.Writers.Shared -import Text.Pandoc.Writers.Groff +import Text.Pandoc.Writers.Roff import Text.Printf (printf) import Text.TeXMath (writeEqn) diff --git a/src/Text/Pandoc/Writers/Roff.hs b/src/Text/Pandoc/Writers/Roff.hs new file mode 100644 index 000000000..7bf0c5ee9 --- /dev/null +++ b/src/Text/Pandoc/Writers/Roff.hs @@ -0,0 +1,138 @@ +{-# LANGUAGE NoImplicitPrelude #-} +{- +Copyright (C) 2007-2018 John MacFarlane + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Writers.Roff + Copyright : Copyright (C) 2007-2018 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane + Stability : alpha + Portability : portable + +Common functions for groff writers (man, ms). +-} + +module Text.Pandoc.Writers.Roff ( + WriterState(..) + , defaultWriterState + , MS + , Note + , EscapeMode(..) + , escapeString + , withFontFeature + ) where +import Prelude +import Data.Char (ord, isAscii) +import Control.Monad.State.Strict +import qualified Data.Map as Map +import Data.Maybe (fromMaybe, isJust, catMaybes) +import Text.Pandoc.Class (PandocMonad) +import Text.Pandoc.Definition +import Text.Pandoc.Pretty +import Text.Printf (printf) +import Text.Pandoc.RoffChar (standardEscapes, + characterCodes, combiningAccents) + +data WriterState = WriterState { stHasInlineMath :: Bool + , stFirstPara :: Bool + , stNotes :: [Note] + , stSmallCaps :: Bool + , stHighlighting :: Bool + , stInHeader :: Bool + , stFontFeatures :: Map.Map Char Bool + , stHasTables :: Bool + } + +defaultWriterState :: WriterState +defaultWriterState = WriterState{ stHasInlineMath = False + , stFirstPara = True + , stNotes = [] + , stSmallCaps = False + , stHighlighting = False + , stInHeader = False + , stFontFeatures = Map.fromList [ + ('I',False) + , ('B',False) + , ('C',False) + ] + , stHasTables = False + } + +type Note = [Block] + +type MS = StateT WriterState + +data EscapeMode = AllowUTF8 -- ^ use preferred man escapes + | AsciiOnly -- ^ escape everything + deriving Show + +combiningAccentsMap :: Map.Map Char String +combiningAccentsMap = Map.fromList combiningAccents + +essentialEscapes :: Map.Map Char String +essentialEscapes = Map.fromList standardEscapes + +-- | Escape special characters for groff. +escapeString :: EscapeMode -> String -> String +escapeString _ [] = [] +escapeString escapeMode ('\n':'.':xs) = + '\n':'\\':'&':'.':escapeString escapeMode xs +escapeString escapeMode (x:xs) = + case Map.lookup x essentialEscapes of + Just s -> s ++ escapeString escapeMode xs + Nothing + | isAscii x -> x : escapeString escapeMode xs + | otherwise -> + case escapeMode of + AllowUTF8 -> x : escapeString escapeMode xs + AsciiOnly -> + let accents = catMaybes $ takeWhile isJust + (map (\c -> Map.lookup c combiningAccentsMap) xs) + rest = drop (length accents) xs + s = case Map.lookup x characterCodeMap of + Just t -> "\\[" <> unwords (t:accents) <> "]" + Nothing -> "\\[" <> unwords + (printf "u%04X" (ord x) : accents) <> "]" + in s ++ escapeString escapeMode rest + +characterCodeMap :: Map.Map Char String +characterCodeMap = Map.fromList characterCodes + +fontChange :: PandocMonad m => MS m Doc +fontChange = do + features <- gets stFontFeatures + inHeader <- gets stInHeader + let filling = ['C' | fromMaybe False $ Map.lookup 'C' features] ++ + ['B' | inHeader || + fromMaybe False (Map.lookup 'B' features)] ++ + ['I' | fromMaybe False $ Map.lookup 'I' features] + return $ + if null filling + then text "\\f[R]" + else text $ "\\f[" ++ filling ++ "]" + +withFontFeature :: PandocMonad m => Char -> MS m Doc -> MS m Doc +withFontFeature c action = do + modify $ \st -> st{ stFontFeatures = Map.adjust not c $ stFontFeatures st } + begin <- fontChange + d <- action + modify $ \st -> st{ stFontFeatures = Map.adjust not c $ stFontFeatures st } + end <- fontChange + return $ begin <> d <> end -- cgit v1.2.3