aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/Text/Pandoc/Readers/Man.hs136
1 files changed, 84 insertions, 52 deletions
diff --git a/src/Text/Pandoc/Readers/Man.hs b/src/Text/Pandoc/Readers/Man.hs
index 8977c9df4..9797d2811 100644
--- a/src/Text/Pandoc/Readers/Man.hs
+++ b/src/Text/Pandoc/Readers/Man.hs
@@ -36,6 +36,7 @@ module Text.Pandoc.Readers.Man (readMan, testFile) where
import Prelude
import Control.Monad (liftM)
import Control.Monad.Except (throwError)
+import Data.Char (isDigit, isUpper, isLower)
import Data.Default (Default)
import Data.Map (insert)
import Data.Maybe (catMaybes)
@@ -64,18 +65,18 @@ data MacroKind = KTitle
| KCodeBlEnd
| KTab
| KTabEnd
+ | KSubTab
deriving (Show, Eq)
--- TODO header strings
--- TODO remove MStr
--- TODO filter skipped content
-data ManToken = MStr String FontKind
- | MLine [(String, FontKind)]
+type RoffStr = (String, FontKind)
+
+data ManToken = MStr RoffStr
+ | MLine [RoffStr]
| MMaybeLink String
| MEmptyLine
- | MHeader Int String
- | MMacro MacroKind [String]
- | MUnknownMacro String [String]
+ | MHeader Int [RoffStr]
+ | MMacro MacroKind [RoffStr]
+ | MUnknownMacro String [RoffStr]
| MComment String
deriving Show
@@ -152,7 +153,7 @@ lexMan = many (lexMacro <|> lexLine <|> lexEmptyLine)
parseMan :: PandocMonad m => ManParser m Pandoc
parseMan = do
- let parsers = [ parseBulletList, parseTitle, parsePara, parseSkippedContent
+ let parsers = [ try parseBulletList, parseTitle, parsePara, parseSkippedContent
, parseCodeBlock, parseHeader, parseSkipMacro]
blocks <- many $ choice parsers
parserst <- getState
@@ -212,13 +213,18 @@ escapeLexer = do
, char 'P' >> return Regular
]
+currentFont :: PandocMonad m => ManLexer m FontKind
+currentFont = do
+ RoffState {fontKind = fk} <- getState
+ return fk
+
lexMacro :: PandocMonad m => ManLexer m ManToken
lexMacro = do
char '.' <|> char '\''
many spacetab
macroName <- many1 (letter <|> oneOf ['\\', '"'])
args <- lexArgs
- let joinedArgs = unwords args
+ let joinedArgs = unwords $ fst <$> args
let knownMacro mkind = MMacro mkind args
let tok = case macroName of
@@ -227,21 +233,22 @@ lexMacro = do
"IP" -> knownMacro KTab
"TP" -> knownMacro KTab
"RE" -> knownMacro KTabEnd
+ "RS" -> knownMacro KSubTab
"nf" -> knownMacro KCodeBlStart
"fi" -> knownMacro KCodeBlEnd
- "B" -> MStr joinedArgs Bold
+ "B" -> MStr (joinedArgs,Bold)
"BR" -> MMaybeLink joinedArgs
- x | x `elem` ["BI", "IB"] -> MStr joinedArgs ItalicBold
- x | x `elem` ["I", "IR", "RI"] -> MStr joinedArgs Italic
- "SH" -> MHeader 2 joinedArgs
- "SS" -> MHeader 3 joinedArgs
+ x | x `elem` ["BI", "IB"] -> MStr (joinedArgs, ItalicBold)
+ x | x `elem` ["I", "IR", "RI"] -> MStr (joinedArgs, Italic)
+ "SH" -> MHeader 2 args
+ "SS" -> MHeader 3 args
x | x `elem` [ "P", "PP", "LP", "sp"] -> MEmptyLine
_ -> MUnknownMacro macroName args
return tok
where
- lexArgs :: PandocMonad m => ManLexer m [String]
+ lexArgs :: PandocMonad m => ManLexer m [RoffStr]
lexArgs = do
args <- many oneArg
eofline
@@ -249,20 +256,24 @@ lexMacro = do
where
- oneArg :: PandocMonad m => ManLexer m String
+ oneArg :: PandocMonad m => ManLexer m RoffStr
oneArg = do
many1 spacetab
quotedArg <|> plainArg
- plainArg :: PandocMonad m => ManLexer m String
- plainArg = fmap catMaybes . many1 $ escChar <|> (Just <$> noneOf " \t\n")
+ plainArg :: PandocMonad m => ManLexer m RoffStr
+ plainArg = do
+ arg <- many1 $ escChar <|> (Just <$> noneOf " \t\n")
+ f <- currentFont
+ return (catMaybes arg, f)
- quotedArg :: PandocMonad m => ManLexer m String
+ quotedArg :: PandocMonad m => ManLexer m RoffStr
quotedArg = do
char '"'
val <- catMaybes <$> many quotedChar
char '"'
- return val
+ f <- currentFont
+ return (val, f)
quotedChar :: PandocMonad m => ManLexer m (Maybe Char)
quotedChar = escChar <|> (Just <$> noneOf "\"\n") <|> (Just <$> try (string "\"\"" >> return '"'))
@@ -295,11 +306,6 @@ lexLine = do
lnpart <- many1 $ noneOf "\n\\"
font <- currentFont
return $ Just (lnpart, font)
-
- currentFont :: PandocMonad m => ManLexer m FontKind
- currentFont = do
- RoffState {fontKind = fk} <- getState
- return fk
lexEmptyLine :: PandocMonad m => ManLexer m ManToken
@@ -313,11 +319,11 @@ msatisfy :: (Show t, Stream s m t) => (t -> Bool) -> ParserT s st m t
msatisfy predic = tokenPrim show nextPos testTok
where
testTok t = if predic t then Just t else Nothing
- nextPos pos x _xs = updatePosString pos (show x)
+ nextPos pos x _xs = updatePosString (setSourceLine pos $ sourceLine pos + (if predic x then 1 else 0)) (show x)
mstr :: PandocMonad m => ManParser m ManToken
mstr = msatisfy isMStr where
- isMStr (MStr _ _) = True
+ isMStr (MStr _) = True
isMStr _ = False
mline :: PandocMonad m => ManParser m ManToken
@@ -371,7 +377,7 @@ parseTitle = do
if null args
then return Null
else do
- let mantitle = head args
+ let mantitle = fst $ head args
modifyState (changeTitle mantitle)
return $ Header 1 nullAttr [Str mantitle]
where
@@ -395,29 +401,32 @@ parseSkippedContent = do
logMessage $ SkippedContent ("Unknown macro: " ++ mname) pos
onToken _ = return ()
-strToInline :: String -> FontKind -> Inline
-strToInline s Regular = Str s
-strToInline s Italic = Emph [Str s]
-strToInline s Bold = Strong [Str s]
-strToInline s ItalicBold = Strong [Emph [Str s]]
+strToInline :: RoffStr -> Inline
+strToInline (s, Regular) = Str s
+strToInline (s, Italic) = Emph [Str s]
+strToInline (s, Bold) = Strong [Str s]
+strToInline (s, ItalicBold) = Strong [Emph [Str s]]
parsePara :: PandocMonad m => ManParser m Block
-parsePara = do
+parsePara = Para <$> parseInlines
+
+parseInlines :: PandocMonad m => ManParser m [Inline]
+parseInlines = do
inls <- many1 (strInl <|> lineInl <|> linkInl <|> comment)
let withspaces = intersperse [Space] inls
- return $ Para (concat withspaces)
+ return $ concat withspaces
where
strInl :: PandocMonad m => ManParser m [Inline]
strInl = do
- (MStr str fk) <- mstr
- return [strToInline str fk]
+ (MStr rstr) <- mstr
+ return [strToInline rstr]
lineInl :: PandocMonad m => ManParser m [Inline]
lineInl = do
(MLine fragments) <- mline
- return $ fmap (\(s,f) -> strToInline s f) fragments
+ return $ strToInline <$> fragments
linkInl :: PandocMonad m => ManParser m [Inline]
linkInl = do
@@ -458,7 +467,7 @@ parseCodeBlock = do
where
extractText :: ManToken -> Maybe String
- extractText (MStr s _) = Just s
+ extractText (MStr (s, _)) = Just s
extractText (MLine ss) = Just . concat $ map fst ss -- TODO maybe unwords?
extractText (MMaybeLink s) = Just s
extractText MEmptyLine = Just "" -- string are intercalated with '\n', this prevents double '\n'
@@ -466,20 +475,43 @@ parseCodeBlock = do
parseHeader :: PandocMonad m => ManParser m Block
parseHeader = do
- (MHeader lvl s) <- mheader
- return $ Header lvl nullAttr [Str s]
+ (MHeader lvl ss) <- mheader
+ return $ Header lvl nullAttr $ intersperse Space $ strToInline <$> ss
parseBulletList :: PandocMonad m => ManParser m Block
-parseBulletList = BulletList . map (: []) <$> many1 block
-
- where
-
- block :: PandocMonad m => ManParser m Block
- block = do
- mmacro KTab
- pars <- parsePara
- many $ mmacro KTabEnd
- return pars
+parseBulletList = BulletList <$> many1 paras where
+
+ macroIPInl :: [RoffStr] -> [Inline]
+ macroIPInl (x:_:[]) = [strToInline x, Space]
+ macroIPInl _ = []
+
+ listKind :: [RoffStr] -> Maybe ([[Block]] -> Block)
+ listKind (((c:_), _):_:[]) =
+ let params style = OrderedList (1, style, DefaultDelim)
+ in Just $ case c of
+ _ | isDigit c -> params Decimal
+ _ | isUpper c -> params UpperAlpha
+ _ | isLower c -> params LowerAlpha
+ _ -> BulletList
+
+ listKind _ = Nothing
+
+ paras :: PandocMonad m => ManParser m [Block]
+ paras = do
+ (MMacro _ args) <- mmacro KTab
+ let lk = listKind args
+ inls <- parseInlines
+ let macroinl = macroIPInl args
+ let para = Plain $ macroinl ++ inls
+ subls <- many sublist
+ return $ para : subls
+
+ sublist :: PandocMonad m => ManParser m Block
+ sublist = do
+ mmacro KSubTab
+ bl <- parseBulletList
+ mmacro KTabEnd
+ return bl
-- In case of weird man file it will be parsed succesfully
parseSkipMacro :: PandocMonad m => ManParser m Block