From 2d785c1e370c521f37a85d6b9bee974e053183be Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 28 Oct 2018 11:31:48 -0700 Subject: Man reader: handle inline macros like .BI in code blocks. The font changes are discarded, but at least we keep the text. --- src/Text/Pandoc/Readers/Man.hs | 96 +++++++++++++++++++++++++----------------- 1 file changed, 58 insertions(+), 38 deletions(-) diff --git a/src/Text/Pandoc/Readers/Man.hs b/src/Text/Pandoc/Readers/Man.hs index 89ac7ee51..0c587d4b7 100644 --- a/src/Text/Pandoc/Readers/Man.hs +++ b/src/Text/Pandoc/Readers/Man.hs @@ -48,6 +48,7 @@ import Text.Pandoc.Error (PandocError (PandocParsecError)) import Text.Pandoc.Logging (LogMessage(..)) import Text.Pandoc.Options import Text.Pandoc.Parsing +import Text.Pandoc.Walk (query) import Text.Pandoc.Shared (crFilter) import Text.Pandoc.Readers.Roff -- TODO explicit imports import Text.Parsec hiding (tokenPrim) @@ -306,29 +307,33 @@ parseInline = try $ do tok <- mtoken case tok of MLine lparts -> return $ linePartsToInlines lparts - MMacro mname args _pos -> - case mname of - "UR" -> parseLink args - "MT" -> parseEmailLink args - "B" -> parseBold args - "I" -> parseItalic args - "br" -> return linebreak - "BI" -> parseAlternatingFonts [strong, emph] args - "IB" -> parseAlternatingFonts [emph, strong] args - "IR" -> parseAlternatingFonts [emph, id] args - "RI" -> parseAlternatingFonts [id, emph] args - "BR" -> parseAlternatingFonts [strong, id] args - "RB" -> parseAlternatingFonts [id, strong] args - "SY" -> return $ strong $ mconcat $ intersperse B.space - $ map linePartsToInlines args - "YS" -> return mempty - "OP" -> case args of - (x:ys) -> return $ B.space <> str "[" <> B.space <> - mconcat (strong (linePartsToInlines x) : - map ((B.space <>) . linePartsToInlines) ys) - <> B.space <> str "]" - [] -> return mempty - _ -> mzero + MMacro mname args pos -> handleInlineMacro mname args pos + _ -> mzero + +handleInlineMacro :: PandocMonad m + => String -> [Arg] -> SourcePos -> ManParser m Inlines +handleInlineMacro mname args _pos = do + case mname of + "UR" -> parseLink args + "MT" -> parseEmailLink args + "B" -> parseBold args + "I" -> parseItalic args + "br" -> return linebreak + "BI" -> parseAlternatingFonts [strong, emph] args + "IB" -> parseAlternatingFonts [emph, strong] args + "IR" -> parseAlternatingFonts [emph, id] args + "RI" -> parseAlternatingFonts [id, emph] args + "BR" -> parseAlternatingFonts [strong, id] args + "RB" -> parseAlternatingFonts [id, strong] args + "SY" -> return $ strong $ mconcat $ intersperse B.space + $ map linePartsToInlines args + "YS" -> return mempty + "OP" -> case args of + (x:ys) -> return $ B.space <> str "[" <> B.space <> + mconcat (strong (linePartsToInlines x) : + map ((B.space <>) . linePartsToInlines) ys) + <> B.space <> str "]" + [] -> return mempty _ -> mzero parseBold :: PandocMonad m => [Arg] -> ManParser m Inlines @@ -375,24 +380,39 @@ endmacro name = void (mmacro name) parseCodeBlock :: PandocMonad m => ManParser m Blocks parseCodeBlock = try $ do optional bareIP -- some people indent their code - toks <- (mmacro "nf" *> many (mline <|> memptyLine) <* endmacro "fi") - <|> (mmacro "EX" *> many (mline <|> memptyLine) <* endmacro "EE") - return $ codeBlock (intercalate "\n" . catMaybes $ - extractText <$> toks) + toks <- (mmacro "nf" *> manyTill codeline (endmacro "fi")) + <|> (mmacro "EX" *> manyTill codeline (endmacro "EE")) + return $ codeBlock (intercalate "\n" $ catMaybes toks) where - extractText :: RoffToken -> Maybe String - extractText (MLine ss) - | not (null ss) - , all isFontToken ss = Nothing - | otherwise = Just $ linePartsToString ss - where isFontToken FontSize{} = True - isFontToken Font{} = True - isFontToken _ = False - extractText MEmptyLine = Just "" - -- string are intercalated with '\n', this prevents double '\n' - extractText _ = Nothing + codeline = do + tok <- mtoken + case tok of + MMacro mname args pos -> do + (Just . query getText <$> handleInlineMacro mname args pos) <|> + do report $ SkippedContent ('.':mname) pos + return Nothing + MTable _ _ pos -> do + report $ SkippedContent "TABLE" pos + return $ Just "TABLE" + MEmptyLine -> return $ Just "" + MLine ss + | not (null ss) + , all isFontToken ss -> return Nothing + | otherwise -> return $ Just $ linePartsToString ss + + isFontToken FontSize{} = True + isFontToken Font{} = True + isFontToken _ = False + + getText :: Inline -> String + getText (Str s) = s + getText Space = " " + getText (Code _ s) = s + getText SoftBreak = "\n" + getText LineBreak = "\n" + getText _ = "" parseHeader :: PandocMonad m => ManParser m Blocks parseHeader = do -- cgit v1.2.3