diff options
Diffstat (limited to 'src/Text/Pandoc')
| -rw-r--r-- | src/Text/Pandoc/Readers/Man.hs | 63 | 
1 files changed, 39 insertions, 24 deletions
| diff --git a/src/Text/Pandoc/Readers/Man.hs b/src/Text/Pandoc/Readers/Man.hs index ea5657b56..280acb9c4 100644 --- a/src/Text/Pandoc/Readers/Man.hs +++ b/src/Text/Pandoc/Readers/Man.hs @@ -38,7 +38,9 @@ import Control.Monad (liftM, void)  import Control.Monad.Except (throwError)  import Data.Char (isDigit, isUpper, isLower)  import Data.Default (Default) +import Data.Functor (($>))  import Data.Map (insert) +import Data.Set (Set, singleton, fromList, toList)  import Data.Maybe (catMaybes, fromMaybe, isNothing)  import Data.List (intersperse, intercalate)  import qualified Data.Text as T @@ -57,8 +59,7 @@ import Text.Parsec.Pos (updatePosString)  --  -- Data Types  -- - -data FontKind = Regular | Italic | Bold | ItalicBold deriving Show +data FontKind = Bold | Italic | Monospace | Regular deriving (Show, Eq, Ord)  data MacroKind = KTitle                 | KCodeBlStart @@ -68,7 +69,9 @@ data MacroKind = KTitle                 | KSubTab                 deriving (Show, Eq) -type RoffStr = (String, FontKind) +type Font = Set FontKind + +type RoffStr = (String, Font)  data ManToken = MStr RoffStr                | MLine [RoffStr] @@ -80,16 +83,16 @@ data ManToken = MStr RoffStr                | MComment String                deriving Show -data EscapeThing = EFont FontKind +data EscapeThing = EFont Font                   | EChar Char                   | ENothing                   deriving Show -data RoffState = RoffState { fontKind :: FontKind +data RoffState = RoffState { fontKind :: Font                             } deriving Show  instance Default RoffState where -  def = RoffState {fontKind = Regular} +  def = RoffState {fontKind = singleton Regular}  type ManLexer m = ParserT [Char] RoffState m  type ManParser m = ParserT [ManToken] ParserState m @@ -197,22 +200,29 @@ escapeLexer = do    escFont :: PandocMonad m => ManLexer m EscapeThing    escFont = do      char 'f' -    font <- choice [ letterFont -          , char '(' >> anyChar >> anyChar >> return Regular -          , try (char '[' >> letterFont >>= \f -> char ']'  >> return f) -          , try $ string "[BI]" >> return ItalicBold -          , char '[' >> many letter >> char ']'  >> return Regular -          , digit >> return Regular +    font <- choice [ singleton <$> letterFontKind +          , char '(' >> anyChar >> anyChar >> return (singleton Regular) +          , try lettersFont +          , digit >> return (singleton Regular)            ]      modifyState (\r -> r {fontKind = font})      return $ EFont font      where -    letterFont :: PandocMonad m => ManLexer m FontKind -    letterFont = choice [ +    lettersFont :: PandocMonad m => ManLexer m Font +    lettersFont = do +      char '[' +      fs <- many letterFontKind  +      many letter +      char ']' +      return $ fromList fs + +    letterFontKind :: PandocMonad m => ManLexer m FontKind +    letterFontKind = choice [          char 'B' >> return Bold        , char 'I' >> return Italic +      , char 'C' >> return Monospace        , (char 'P' <|> char 'R') >> return Regular        ] @@ -223,7 +233,7 @@ escapeLexer = do      logOutput $ SkippedContent ("Unknown escape sequence \\" ++ [c]) pos      return ENothing -currentFont :: PandocMonad m => ManLexer m FontKind +currentFont :: PandocMonad m => ManLexer m Font  currentFont = fontKind <$> getState  -- separate function from lexMacro since real man files sometimes do not follow the rules @@ -253,10 +263,10 @@ lexMacro = do                "RS"   -> knownMacro KSubTab                "nf"   -> knownMacro KCodeBlStart                "fi"   -> knownMacro KCodeBlEnd -              "B"    -> MStr (joinedArgs,Bold) +              "B"    -> MStr (joinedArgs, singleton Bold)                "BR"   -> MMaybeLink joinedArgs -              x | x `elem` ["BI", "IB"] -> MStr (joinedArgs, ItalicBold) -              x | x `elem` ["I", "IR", "RI"]  -> MStr (joinedArgs, Italic) +              x | x `elem` ["BI", "IB"] -> MStr (joinedArgs, fromList [Italic, Bold]) +              x | x `elem` ["I", "IR", "RI"]  -> MStr (joinedArgs, singleton Italic)                "SH"   -> MHeader 2 args                "SS"   -> MHeader 3 args                x | x `elem` [ "P", "PP", "LP", "sp"] -> MEmptyLine @@ -314,7 +324,7 @@ lexLine = do    return $ MLine $ catMaybes lnparts    where -  esc :: PandocMonad m => ManLexer m (Maybe (String, FontKind)) +  esc :: PandocMonad m => ManLexer m (Maybe (String, Font))    esc = do      someesc <- escapeLexer      font <- currentFont @@ -323,7 +333,7 @@ lexLine = do                 _ -> Nothing      return rv -  linePart :: PandocMonad m => ManLexer m (Maybe (String, FontKind)) +  linePart :: PandocMonad m => ManLexer m (Maybe (String, Font))    linePart = do      lnpart <- many1 $ noneOf "\n\\"      font <- currentFont @@ -424,10 +434,15 @@ parseSkippedContent = do    onToken _ = return ()  strToInline :: RoffStr -> Inline -strToInline (s, Regular) = Str s -strToInline (s, Italic) = Emph [Str s] -strToInline (s, Bold) = Strong [Str s] -strToInline (s, ItalicBold) = Strong [Emph [Str s]] +strToInline (s, fonts) = inner $ toList fonts where +  inner :: [FontKind] -> Inline +  inner [] = Str s +  inner (Bold:fs) = Strong [inner fs] +  inner (Italic:fs) = Emph [inner fs] + +  -- Monospace goes after Bold and Italic in ordered set +  inner (Monospace:_) = Code nullAttr s +  inner (Regular:fs) = inner fs  parsePara :: PandocMonad m => ManParser m Block  parsePara = Para <$> parseInlines | 
