aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/Text/Pandoc/Readers/Man.hs164
-rw-r--r--test/man-reader.man12
-rw-r--r--test/man-reader.native4
3 files changed, 103 insertions, 77 deletions
diff --git a/src/Text/Pandoc/Readers/Man.hs b/src/Text/Pandoc/Readers/Man.hs
index 169bd03c8..d6a6fa494 100644
--- a/src/Text/Pandoc/Readers/Man.hs
+++ b/src/Text/Pandoc/Readers/Man.hs
@@ -1,4 +1,5 @@
{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-
Copyright (C) 2018 Yan Pashkovsky <yanp.bugz@gmail.com>
@@ -55,6 +56,8 @@ import Text.Parsec hiding (tokenPrim, space)
import qualified Text.Parsec as Parsec
import Text.Parsec.Pos (updatePosString)
import Text.Pandoc.GroffChar (characterCodes, combiningAccents)
+import qualified Data.Sequence as Seq
+import qualified Data.Foldable as Foldable
-- import Debug.Trace (traceShowId)
@@ -75,24 +78,29 @@ data LinePart = RoffStr (String, Font)
data ManToken = MLine [LinePart]
| MEmptyLine
| MMacro MacroKind [[LinePart]]
- | MComment
deriving Show
+newtype ManTokens = ManTokens { unManTokens :: Seq.Seq ManToken }
+ deriving (Show, Semigroup, Monoid)
+
+singleTok :: ManToken -> ManTokens
+singleTok t = ManTokens (Seq.singleton t)
+
data RoffState = RoffState { fontKind :: Font
+ , customMacros :: M.Map String ManTokens
} deriving Show
instance Default RoffState where
- def = RoffState { fontKind = S.singleton Regular }
+ def = RoffState { customMacros = mempty
+ , fontKind = S.singleton Regular }
-data ManState = ManState { customMacros :: M.Map String [ManToken]
- , readerOptions :: ReaderOptions
+data ManState = ManState { readerOptions :: ReaderOptions
, metadata :: Meta
} deriving Show
instance Default ManState where
- def = ManState { customMacros = mempty
- , readerOptions = def
- , metadata = nullMeta }
+ def = ManState { readerOptions = def
+ , metadata = nullMeta }
type ManLexer m = ParserT [Char] RoffState m
type ManParser m = ParserT [ManToken] ManState m
@@ -101,7 +109,9 @@ type ManParser m = ParserT [ManToken] ManState m
-- | Read man (troff) from an input string and return a Pandoc document.
readMan :: PandocMonad m => ReaderOptions -> T.Text -> m Pandoc
readMan opts txt = do
- eithertokens <- readWithM (many manToken) def (T.unpack $ crFilter txt)
+ eithertokens <- readWithM
+ (Foldable.toList . unManTokens . mconcat <$> many manToken)
+ def (T.unpack $ crFilter txt)
case eithertokens of
Left e -> throwError e
Right tokenz -> do
@@ -128,7 +138,7 @@ readMan opts txt = do
-- String -> ManToken function
--
-manToken :: PandocMonad m => ManLexer m ManToken
+manToken :: PandocMonad m => ManLexer m ManTokens
manToken = lexComment <|> lexMacro <|> lexLine <|> lexEmptyLine
parseMan :: PandocMonad m => ManParser m Pandoc
@@ -147,8 +157,7 @@ parseBlock = choice [ parseList
, parseSkippedContent
, parseCodeBlock
, parseHeader
- , parseMacroDef
- , parseUnkownMacro
+ , skipUnkownMacro
]
eofline :: Stream s m Char => ParsecT s u m ()
@@ -268,15 +277,15 @@ currentFont :: PandocMonad m => ManLexer m Font
currentFont = fontKind <$> getState
-- separate function from lexMacro since real man files sometimes do not follow the rules
-lexComment :: PandocMonad m => ManLexer m ManToken
+lexComment :: PandocMonad m => ManLexer m ManTokens
lexComment = do
try $ string ".\\\""
many Parsec.space
skipMany $ noneOf "\n"
char '\n'
- return MComment
+ return mempty
-lexMacro :: PandocMonad m => ManLexer m ManToken
+lexMacro :: PandocMonad m => ManLexer m ManTokens
lexMacro = do
char '.' <|> char '\''
many spacetab
@@ -287,15 +296,16 @@ lexMacro = do
addFontToRoffStr _ x = x
case macroName of
- "" -> return MComment
- "\\\"" -> return MComment
- "\\#" -> return MComment
+ "" -> return mempty
+ "\\\"" -> return mempty
+ "\\#" -> return mempty
+ "de" -> lexMacroDef args
"B" -> do
args' <- argsOrFromNextLine args
- return $ MLine $ concatMap (addFont Bold) args'
+ return $ singleTok $ MLine $ concatMap (addFont Bold) args'
"I" -> do
args' <- argsOrFromNextLine args
- return $ MLine $ concatMap (addFont Italic) args'
+ return $ singleTok $ MLine $ concatMap (addFont Italic) args'
x | x `elem` ["BI", "IB", "RI", "IR", "BR", "RB"] -> do
let toFont 'I' = Italic
toFont 'R' = Regular
@@ -303,17 +313,56 @@ lexMacro = do
toFont 'M' = Monospace
toFont _ = Regular
let fontlist = map toFont x
- return $ MLine $ concat $ zipWith addFont (cycle fontlist) args
- x | x `elem` [ "P", "PP", "LP", "sp"] -> return MEmptyLine
- _ -> return $ MMacro macroName args
+ return $ singleTok
+ $ MLine $ concat $ zipWith addFont (cycle fontlist) args
+ x | x `elem` [ "P", "PP", "LP", "sp"] -> return $ singleTok MEmptyLine
+ _ -> resolveMacro macroName args
where
- argsOrFromNextLine :: PandocMonad m => [[LinePart]] -> ManLexer m [[LinePart]]
+ resolveMacro :: PandocMonad m
+ => String -> [[LinePart]] -> ManLexer m ManTokens
+ resolveMacro macroName args = do
+ macros <- customMacros <$> getState
+ case M.lookup macroName macros of
+ Nothing -> return $ singleTok $ MMacro macroName args
+ Just ts -> do
+ let fillLP (RoffStr (x,y)) zs = RoffStr (x,y) : zs
+ fillLP (MacroArg i) zs =
+ case drop (i - 1) args of
+ [] -> zs
+ (ys:_) -> ys ++ zs
+ let fillMacroArg (MLine lineparts) =
+ MLine (foldr fillLP [] lineparts)
+ fillMacroArg x = x
+ return $ ManTokens . fmap fillMacroArg . unManTokens $ ts
+
+ lexMacroDef :: PandocMonad m => [[LinePart]] -> ManLexer m ManTokens
+ lexMacroDef args = do -- macro definition
+ (macroName, stopMacro) <-
+ case args of
+ (x : y : _) -> return (linePartsToString x, linePartsToString y)
+ -- optional second arg
+ (x:_) -> return (linePartsToString x, ".")
+ [] -> fail "No argument to .de"
+ let stop = try $ do
+ char '.' <|> char '\''
+ many spacetab
+ string stopMacro
+ _ <- lexArgs
+ return ()
+ ts <- mconcat <$> manyTill manToken stop
+ modifyState $ \st ->
+ st{ customMacros = M.insert macroName ts (customMacros st) }
+ return mempty
+
+ argsOrFromNextLine :: PandocMonad m
+ => [[LinePart]] -> ManLexer m [[LinePart]]
argsOrFromNextLine args =
if null args
then do
- MLine lps <- lexLine
+ lps <- many1 linePart
+ eofline
return [lps]
else return args
@@ -357,11 +406,11 @@ lexMacro = do
fonts <- currentFont
return $ RoffStr ("\"", fonts)
-lexLine :: PandocMonad m => ManLexer m ManToken
+lexLine :: PandocMonad m => ManLexer m ManTokens
lexLine = do
lnparts <- many1 linePart
eofline
- return $ MLine lnparts
+ return $ singleTok $ MLine lnparts
where
linePart :: PandocMonad m => ManLexer m LinePart
@@ -398,8 +447,8 @@ spaceTabChar = do
font <- currentFont
return $ RoffStr ([c], font)
-lexEmptyLine :: PandocMonad m => ManLexer m ManToken
-lexEmptyLine = char '\n' >> return MEmptyLine
+lexEmptyLine :: PandocMonad m => ManLexer m ManTokens
+lexEmptyLine = char '\n' >> return (singleTok MEmptyLine)
--
-- ManToken parsec functions
@@ -434,11 +483,6 @@ mmacroAny = msatisfy isMMacro where
isMMacro (MMacro _ _) = True
isMMacro _ = False
-mcomment :: PandocMonad m => ManParser m ManToken
-mcomment = msatisfy isMComment where
- isMComment MComment = True
- isMComment _ = False
-
--
-- ManToken -> Block functions
--
@@ -459,7 +503,7 @@ parseTitle = do
return mempty
parseSkippedContent :: PandocMonad m => ManParser m Blocks
-parseSkippedContent = mempty <$ (mcomment <|> memptyLine)
+parseSkippedContent = mempty <$ memptyLine
linePartsToInlines :: [LinePart] -> Inlines
linePartsToInlines = go
@@ -502,7 +546,7 @@ parsePara = para . trimInlines <$> parseInlines
parseInlines :: PandocMonad m => ManParser m Inlines
parseInlines = do
- inls <- many1 (lineInl <|> comment <|> parseLink <|> parseEmailLink)
+ inls <- many1 (lineInl <|> parseLink <|> parseEmailLink)
return $ mconcat $ intersperse B.space inls
lineInl :: PandocMonad m => ManParser m Inlines
@@ -510,9 +554,6 @@ lineInl = do
(MLine fragments) <- mline
return $ linePartsToInlines $ fragments
-comment :: PandocMonad m => ManParser m Inlines
-comment = mcomment >> return mempty
-
bareIP :: PandocMonad m => ManParser m ManToken
bareIP = msatisfy isBareIP where
isBareIP (MMacro "IP" []) = True
@@ -522,7 +563,7 @@ parseCodeBlock :: PandocMonad m => ManParser m Blocks
parseCodeBlock = try $ do
optional bareIP -- some people indent their code
mmacro "nf"
- toks <- many (mline <|> memptyLine <|> mcomment)
+ toks <- many (mline <|> memptyLine)
mmacro "fi"
return $ codeBlock (removeFinalNewline $
intercalate "\n" . catMaybes $
@@ -612,7 +653,7 @@ parseDefinitionList = definitionList <$> many1 definitionListItem
parseLink :: PandocMonad m => ManParser m Inlines
parseLink = try $ do
MMacro _ args <- mmacro "UR"
- contents <- mconcat <$> many1 (lineInl <|> comment)
+ contents <- mconcat <$> many1 lineInl
mmacro "UE"
let url = case args of
[] -> ""
@@ -622,48 +663,19 @@ parseLink = try $ do
parseEmailLink :: PandocMonad m => ManParser m Inlines
parseEmailLink = do
MMacro _ args <- mmacro "MT"
- contents <- mconcat <$> many1 (lineInl <|> comment)
+ contents <- mconcat <$> many1 lineInl
mmacro "ME"
let url = case args of
[] -> ""
(x:_) -> "mailto:" ++ linePartsToString x
return $ link url "" contents
-parseMacroDef :: PandocMonad m => ManParser m Blocks
-parseMacroDef = do
- MMacro _ args <- mmacro "de"
- (macroName, stopMacro) <-
- case args of
- (x : y : _) -> return (linePartsToString x, linePartsToString y)
- -- optional second arg
- (x:_) -> return (linePartsToString x, ".")
- [] -> fail "No argument to .de"
- ts <- manyTill (msatisfy (const True)) (mmacro stopMacro)
- modifyState $ \st ->
- st{ customMacros = M.insert macroName ts (customMacros st) }
- return mempty
-
--- In case of weird man file it will be parsed succesfully
-parseUnkownMacro :: PandocMonad m => ManParser m Blocks
-parseUnkownMacro = do
+skipUnkownMacro :: PandocMonad m => ManParser m Blocks
+skipUnkownMacro = do
pos <- getPosition
tok <- mmacroAny
case tok of
- MMacro mkind args -> do
- macros <- customMacros <$> getState
- case M.lookup mkind macros of
- Nothing -> do
- report $ SkippedContent ('.':mkind) pos
- return mempty
- Just ts -> do
- toks <- getInput
- let fillLP (RoffStr (x,y)) zs = RoffStr (x,y) : zs
- fillLP (MacroArg i) zs =
- case drop (i - 1) args of
- [] -> zs
- (ys:_) -> ys ++ zs
- let fillMacroArg (MLine lineparts) = MLine (foldr fillLP [] lineparts)
- fillMacroArg x = x
- setInput $ (map fillMacroArg ts) ++ toks
- return mempty
- _ -> fail "the impossible happened"
+ MMacro mkind _ -> do
+ report $ SkippedContent ('.':mkind) pos
+ return mempty
+ _ -> fail "the impossible happened"
diff --git a/test/man-reader.man b/test/man-reader.man
index acda23a00..1ff714c14 100644
--- a/test/man-reader.man
+++ b/test/man-reader.man
@@ -207,3 +207,15 @@ site
.MT me@example.com
my email address.
.ME
+.SH Macros
+.de au
+.B
+Me Myself
+..
+.de auth !!
+.I
+The author is \$1.
+.!!
+.au
+and I.
+.auth "John Jones"
diff --git a/test/man-reader.native b/test/man-reader.native
index eb85c7440..dc64d1978 100644
--- a/test/man-reader.native
+++ b/test/man-reader.native
@@ -101,4 +101,6 @@ Pandoc (Meta {unMeta = fromList [("date",MetaInlines [Str "Oct",Space,Str "17,",
,Para [Str "Minus:",Space,Str "-"]
,Header 1 ("",[],[]) [Str "Links"]
,Para [Link ("",[],[]) [Str "some",Space,Str "randomsite"] ("http://example.com","")]
-,Para [Link ("",[],[]) [Str "my",Space,Str "email",Space,Str "address."] ("mailto:me@example.com","")]]
+,Para [Link ("",[],[]) [Str "my",Space,Str "email",Space,Str "address."] ("mailto:me@example.com","")]
+,Header 1 ("",[],[]) [Str "Macros"]
+,Para [Strong [Str "Me",Space,Str "Myself"],Space,Str "and",Space,Str "I.",Space,Emph [Str "The",Space,Str "author",Space,Str "is",Space,Str "John",Space,Str "Jones",Str "."]]]