aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2018-10-18 20:58:46 -0700
committerJohn MacFarlane <jgm@berkeley.edu>2018-10-18 20:58:46 -0700
commit94c73e84d58b538736e62a66842c13e566f51f31 (patch)
treea7bba8ab304f4689df0cce0f43a2ef8ea0320dc1
parent465f11833f0e30403065454eb94903085922840e (diff)
parent5c42101ee973131196b07661adc95670f2ed6693 (diff)
downloadpandoc-94c73e84d58b538736e62a66842c13e566f51f31.tar.gz
Merge branch 'Yanpas-groff_reader'
-rw-r--r--pandoc.cabal3
-rw-r--r--src/Text/Pandoc/Readers.hs2
-rw-r--r--src/Text/Pandoc/Readers/Man.hs559
-rw-r--r--test/Tests/Old.hs4
-rw-r--r--test/Tests/Readers/Man.hs83
-rw-r--r--test/grofftest.sh22
-rw-r--r--test/man-reader.man189
-rw-r--r--test/man-reader.native94
-rw-r--r--test/test-pandoc.hs2
9 files changed, 958 insertions, 0 deletions
diff --git a/pandoc.cabal b/pandoc.cabal
index 6f469147f..ecb4aa766 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -206,6 +206,7 @@ extra-source-files:
test/haddock-reader.haddock
test/insert
test/lalune.jpg
+ test/man-reader.man
test/movie.jpg
test/media/rId25.jpg
test/media/rId26.jpg
@@ -448,6 +449,7 @@ library
Text.Pandoc.Readers.Odt,
Text.Pandoc.Readers.EPUB,
Text.Pandoc.Readers.Muse,
+ Text.Pandoc.Readers.Man,
Text.Pandoc.Readers.FB2,
Text.Pandoc.Writers,
Text.Pandoc.Writers.Native,
@@ -671,6 +673,7 @@ test-suite test-pandoc
Tests.Readers.EPUB
Tests.Readers.Muse
Tests.Readers.Creole
+ Tests.Readers.Man
Tests.Readers.FB2
Tests.Writers.Native
Tests.Writers.ConTeXt
diff --git a/src/Text/Pandoc/Readers.hs b/src/Text/Pandoc/Readers.hs
index 7b7f92b35..76492b0aa 100644
--- a/src/Text/Pandoc/Readers.hs
+++ b/src/Text/Pandoc/Readers.hs
@@ -105,6 +105,7 @@ import Text.Pandoc.Readers.TikiWiki
import Text.Pandoc.Readers.TWiki
import Text.Pandoc.Readers.Txt2Tags
import Text.Pandoc.Readers.Vimwiki
+import Text.Pandoc.Readers.Man
import Text.Pandoc.Shared (mapLeft)
import qualified Text.Pandoc.UTF8 as UTF8
import Text.Parsec.Error
@@ -145,6 +146,7 @@ readers = [ ("native" , TextReader readNative)
,("t2t" , TextReader readTxt2Tags)
,("epub" , ByteStringReader readEPUB)
,("muse" , TextReader readMuse)
+ ,("man" , TextReader readMan)
,("fb2" , TextReader readFB2)
]
diff --git a/src/Text/Pandoc/Readers/Man.hs b/src/Text/Pandoc/Readers/Man.hs
new file mode 100644
index 000000000..1ffdd1f91
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Man.hs
@@ -0,0 +1,559 @@
+{-# LANGUAGE FlexibleContexts #-}
+{-
+ Copyright (C) 2018 Yan Pashkovsky <yanp.bugz@gmail.com>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+
+
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.Man
+ Copyright : Copyright (C) 2018 Yan Pashkovsky
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Yan Pashkovsky <yanp.bugz@gmail.com>
+ Stability : WIP
+ Portability : portable
+
+Conversion of man to 'Pandoc' document.
+-}
+module Text.Pandoc.Readers.Man (readMan) where
+
+import Prelude
+import Control.Monad (liftM, void)
+import Control.Monad.Except (throwError)
+import Data.Char (isDigit, isUpper, isLower)
+import Data.Default (Default)
+import Data.Map (insert)
+import Data.Set (Set, singleton)
+import qualified Data.Set as S (fromList, toList)
+import Data.Maybe (catMaybes, fromMaybe, isNothing)
+import Data.List (intersperse, intercalate)
+import qualified Data.Text as T
+
+import Text.Pandoc.Class (PandocMonad(..))
+import Text.Pandoc.Builder as B hiding (singleton)
+import Text.Pandoc.Error (PandocError (PandocParsecError))
+import Text.Pandoc.Logging (LogMessage(..))
+import Text.Pandoc.Options
+import Text.Pandoc.Parsing
+import Text.Pandoc.Shared (crFilter)
+import Text.Parsec hiding (tokenPrim, space)
+import qualified Text.Parsec as Parsec
+import Text.Parsec.Pos (updatePosString)
+
+--
+-- Data Types
+--
+data FontKind = Bold | Italic | Monospace | Regular deriving (Show, Eq, Ord)
+
+data MacroKind = KTitle
+ | KCodeBlStart
+ | KCodeBlEnd
+ | KTab
+ | KTabEnd
+ | KSubTab
+ deriving (Show, Eq)
+
+type Font = Set FontKind
+
+type RoffStr = (String, Font)
+
+-- TODO parse tables (see man tbl)
+data ManToken = MStr RoffStr
+ | MLine [RoffStr]
+ | MMaybeLink String
+ | MEmptyLine
+ | MHeader Int [RoffStr]
+ | MMacro MacroKind [RoffStr]
+ | MUnknownMacro String [RoffStr]
+ | MComment String
+ deriving Show
+
+data EscapeThing = EFont Font
+ | EChar Char
+ | ENothing
+ deriving Show
+
+data RoffState = RoffState { fontKind :: Font
+ } deriving Show
+
+instance Default RoffState where
+ def = RoffState {fontKind = singleton Regular}
+
+type ManLexer m = ParserT [Char] RoffState m
+type ManParser m = ParserT [ManToken] ParserState m
+
+---- debug functions
+{-
+import Text.Pandoc.Class (runIOorExplode)
+
+printPandoc :: Pandoc -> [Char]
+printPandoc (Pandoc m content) =
+ let ttl = "Pandoc: " ++ (show $ unMeta m)
+ cnt = intercalate "\n" $ map show content
+ in ttl ++ "\n" ++ cnt
+
+testStr :: String -> IO ()
+testStr str = do
+ pand <- runIOorExplode $ readMan def (T.pack str)
+ putStrLn $ printPandoc pand
+
+
+testFile :: FilePath -> IO ()
+testFile fname = do
+ cont <- readFile fname
+ testStr cont
+-}
+----
+
+
+-- | Read man (troff) from an input string and return a Pandoc document.
+readMan :: PandocMonad m => ReaderOptions -> T.Text -> m Pandoc
+readMan opts txt = do
+ eithertokens <- readWithM lexMan def (T.unpack $ crFilter txt)
+ case eithertokens of
+ Left e -> throwError e
+ Right tokenz -> do
+ let state = def {stateOptions = opts} :: ParserState
+ eitherdoc <- readWithMTokens parseMan state tokenz
+ either throwError return eitherdoc
+
+ where
+
+ readWithMTokens :: PandocMonad m
+ => ParserT [ManToken] ParserState m a -- ^ parser
+ -> ParserState -- ^ initial state
+ -> [ManToken] -- ^ input
+ -> m (Either PandocError a)
+ readWithMTokens parser state input =
+ let leftF = PandocParsecError . (intercalate "\n") $ show <$> input
+ in mapLeft leftF `liftM` runParserT parser state "source" input
+
+ mapLeft :: (a -> c) -> Either a b -> Either c b
+ mapLeft f (Left x) = Left $ f x
+ mapLeft _ (Right r) = Right r
+
+--
+-- String -> ManToken function
+--
+
+lexMan :: PandocMonad m => ManLexer m [ManToken]
+lexMan = many (lexComment <|> lexMacro <|> lexLine <|> lexEmptyLine)
+
+parseMan :: PandocMonad m => ManParser m Pandoc
+parseMan = do
+ let parsers = [ try parseList, parseTitle, parsePara, parseSkippedContent
+ , try parseCodeBlock, parseHeader, parseSkipMacro]
+ bs <- many $ choice parsers
+ let (Pandoc _ blocks) = doc $ mconcat bs
+ meta <- stateMeta <$> getState
+ return $ Pandoc meta blocks
+
+eofline :: Stream s m Char => ParsecT s u m ()
+eofline = void newline <|> eof
+
+spacetab :: Stream s m Char => ParsecT s u m Char
+spacetab = char ' ' <|> char '\t'
+
+-- TODO add other sequences from man (7) groff
+escapeLexer :: PandocMonad m => ManLexer m EscapeThing
+escapeLexer = do
+ char '\\'
+ choice [escChar, escFont, escUnknown]
+ where
+
+ escChar :: PandocMonad m => ManLexer m EscapeThing
+ escChar =
+ let skipSeqs = ["%", "{", "}", "&", "\n", ":", "\"", "0", "c"]
+ subsSeqs = [ ("-", '-'), (" ", ' '), ("\\", '\\'), ("[lq]", '“'), ("[rq]", '”')
+ , ("[em]", '—'), ("[en]", '–'), ("*(lq", '«'), ("*(rq", '»')
+ , ("t", '\t'), ("e", '\\'), ("`", '`'), ("^", ' '), ("|", ' ')
+ , ("'", '`') ]
+ substitute :: PandocMonad m => (String,Char) -> ManLexer m EscapeThing
+ substitute (from,to) = try $ string from >> return (EChar to)
+ skip :: PandocMonad m => String -> ManLexer m EscapeThing
+ skip seq' = try $ string seq' >> return ENothing
+ in choice $ (substitute <$> subsSeqs) ++
+ (skip <$> skipSeqs) ++
+ [ char '(' >> anyChar >> return ENothing
+ , char '[' >> many alphaNum >> char ']' >> return ENothing
+ ]
+
+ escFont :: PandocMonad m => ManLexer m EscapeThing
+ escFont = do
+ char 'f'
+ font <- choice [ singleton <$> letterFontKind
+ , char '(' >> anyChar >> anyChar >> return (singleton Regular)
+ , try lettersFont
+ , digit >> return (singleton Regular)
+ ]
+ modifyState (\r -> r {fontKind = font})
+ return $ EFont font
+
+ where
+
+ lettersFont :: PandocMonad m => ManLexer m Font
+ lettersFont = do
+ char '['
+ fs <- many letterFontKind
+ many letter
+ char ']'
+ return $ S.fromList fs
+
+ letterFontKind :: PandocMonad m => ManLexer m FontKind
+ letterFontKind = choice [
+ char 'B' >> return Bold
+ , char 'I' >> return Italic
+ , char 'C' >> return Monospace
+ , (char 'P' <|> char 'R') >> return Regular
+ ]
+
+ escUnknown :: PandocMonad m => ManLexer m EscapeThing
+ escUnknown = do
+ c <- anyChar
+ pos <- getPosition
+ logOutput $ SkippedContent ("Unknown escape sequence \\" ++ [c]) pos
+ return ENothing
+
+currentFont :: PandocMonad m => ManLexer m Font
+currentFont = fontKind <$> getState
+
+-- separate function from lexMacro since real man files sometimes do not follow the rules
+lexComment :: PandocMonad m => ManLexer m ManToken
+lexComment = do
+ try $ string ".\\\""
+ many Parsec.space
+ body <- many $ noneOf "\n"
+ char '\n'
+ return $ MComment body
+
+lexMacro :: PandocMonad m => ManLexer m ManToken
+lexMacro = do
+ char '.' <|> char '\''
+ many spacetab
+ macroName <- many1 (letter <|> oneOf ['\\', '"', '&'])
+ args <- lexArgs
+ let joinedArgs = unwords $ fst <$> args
+ knownMacro mkind = MMacro mkind args
+
+ tok = case macroName of
+ x | x `elem` ["\\\"", "\\#"] -> MComment joinedArgs
+ "TH" -> knownMacro KTitle
+ "IP" -> knownMacro KTab
+ "TP" -> knownMacro KTab
+ "RE" -> knownMacro KTabEnd
+ "RS" -> knownMacro KSubTab
+ "nf" -> knownMacro KCodeBlStart
+ "fi" -> knownMacro KCodeBlEnd
+ "B" -> MStr (joinedArgs, singleton Bold)
+ "BR" -> MMaybeLink joinedArgs
+ x | x `elem` ["BI", "IB"] -> MStr (joinedArgs, S.fromList [Italic, Bold])
+ x | x `elem` ["I", "IR", "RI"] -> MStr (joinedArgs, singleton Italic)
+ "SH" -> MHeader 2 args
+ "SS" -> MHeader 3 args
+ x | x `elem` [ "P", "PP", "LP", "sp"] -> MEmptyLine
+ _ -> MUnknownMacro macroName args
+ return tok
+
+ where
+
+ -- TODO better would be [[RoffStr]], since one arg may have different fonts
+ lexArgs :: PandocMonad m => ManLexer m [RoffStr]
+ lexArgs = do
+ args <- many $ try oneArg
+ many spacetab
+ eofline
+ return args
+
+ where
+
+ oneArg :: PandocMonad m => ManLexer m RoffStr
+ oneArg = do
+ many1 spacetab
+ many $ try $ string "\\\n"
+ try quotedArg <|> plainArg -- try, because there are some erroneous files, e.g. linux/bpf.2
+
+ plainArg :: PandocMonad m => ManLexer m RoffStr
+ plainArg = do
+ indents <- many spacetab
+ arg <- many1 $ escChar <|> (Just <$> noneOf " \t\n")
+ f <- currentFont
+ return (indents ++ catMaybes arg, f)
+
+ quotedArg :: PandocMonad m => ManLexer m RoffStr
+ quotedArg = do
+ char '"'
+ val <- many quotedChar
+ char '"'
+ val2 <- many $ escChar <|> (Just <$> noneOf " \t\n")
+ f <- currentFont
+ return (catMaybes $ val ++ val2, f)
+
+ quotedChar :: PandocMonad m => ManLexer m (Maybe Char)
+ quotedChar = escChar <|> (Just <$> noneOf "\"\n") <|> (Just <$> try (string "\"\"" >> return '"'))
+
+ escChar :: PandocMonad m => ManLexer m (Maybe Char)
+ escChar = do
+ ec <- escapeLexer
+ case ec of
+ (EChar c) -> return $ Just c
+ _ -> return Nothing
+
+lexLine :: PandocMonad m => ManLexer m ManToken
+lexLine = do
+ lnparts <- many1 (esc <|> linePart)
+ eofline
+ return $ MLine $ catMaybes lnparts
+ where
+
+ esc :: PandocMonad m => ManLexer m (Maybe (String, Font))
+ esc = do
+ someesc <- escapeLexer
+ font <- currentFont
+ let rv = case someesc of
+ EChar c -> Just ([c], font)
+ _ -> Nothing
+ return rv
+
+ linePart :: PandocMonad m => ManLexer m (Maybe (String, Font))
+ linePart = do
+ lnpart <- many1 $ noneOf "\n\\"
+ font <- currentFont
+ return $ Just (lnpart, font)
+
+
+lexEmptyLine :: PandocMonad m => ManLexer m ManToken
+lexEmptyLine = char '\n' >> return MEmptyLine
+
+--
+-- ManToken parsec functions
+--
+
+msatisfy :: (Show t, Stream s m t) => (t -> Bool) -> ParserT s st m t
+msatisfy predic = tokenPrim show nextPos testTok
+ where
+ testTok t = if predic t then Just t else Nothing
+ nextPos pos _x _xs = updatePosString (setSourceColumn (setSourceLine pos $ sourceLine pos + 1) 1) ("")
+
+mstr :: PandocMonad m => ManParser m ManToken
+mstr = msatisfy isMStr where
+ isMStr (MStr _) = True
+ isMStr _ = False
+
+mline :: PandocMonad m => ManParser m ManToken
+mline = msatisfy isMLine where
+ isMLine (MLine _) = True
+ isMLine _ = False
+
+mmaybeLink :: PandocMonad m => ManParser m ManToken
+mmaybeLink = msatisfy isMMaybeLink where
+ isMMaybeLink (MMaybeLink _) = True
+ isMMaybeLink _ = False
+
+memplyLine :: PandocMonad m => ManParser m ManToken
+memplyLine = msatisfy isMEmptyLine where
+ isMEmptyLine MEmptyLine = True
+ isMEmptyLine _ = False
+
+mheader :: PandocMonad m => ManParser m ManToken
+mheader = msatisfy isMHeader where
+ isMHeader (MHeader _ _) = True
+ isMHeader _ = False
+
+mmacro :: PandocMonad m => MacroKind -> ManParser m ManToken
+mmacro mk = msatisfy isMMacro where
+ isMMacro (MMacro mk' _) | mk == mk' = True
+ | otherwise = False
+ isMMacro _ = False
+
+mmacroAny :: PandocMonad m => ManParser m ManToken
+mmacroAny = msatisfy isMMacro where
+ isMMacro (MMacro _ _) = True
+ isMMacro _ = False
+
+munknownMacro :: PandocMonad m => ManParser m ManToken
+munknownMacro = msatisfy isMUnknownMacro where
+ isMUnknownMacro (MUnknownMacro _ _) = True
+ isMUnknownMacro _ = False
+
+mcomment :: PandocMonad m => ManParser m ManToken
+mcomment = msatisfy isMComment where
+ isMComment (MComment _) = True
+ isMComment _ = False
+
+--
+-- ManToken -> Block functions
+--
+
+parseTitle :: PandocMonad m => ManParser m Blocks
+parseTitle = do
+ (MMacro _ args) <- mmacro KTitle
+ if null args
+ then return mempty
+ else do
+ let mantitle = fst $ head args
+ modifyState (changeTitle mantitle)
+ return $ header 1 $ str mantitle
+ where
+ changeTitle title pst =
+ let meta = stateMeta pst
+ metaUp = Meta $ insert "title" (MetaString title) (unMeta meta)
+ in
+ pst {stateMeta = metaUp}
+
+parseSkippedContent :: PandocMonad m => ManParser m Blocks
+parseSkippedContent = do
+ tok <- munknownMacro <|> mcomment <|> memplyLine
+ onToken tok
+ return mempty
+
+ where
+
+ onToken :: PandocMonad m => ManToken -> ManParser m ()
+ onToken (MUnknownMacro mname _) = do
+ pos <- getPosition
+ logMessage $ SkippedContent ("Unknown macro: " ++ mname) pos
+ onToken _ = return ()
+
+strToInlines :: RoffStr -> Inlines
+strToInlines (s, fonts) = inner $ S.toList fonts where
+ inner :: [FontKind] -> Inlines
+ inner [] = str s
+ inner (Bold:fs) = strong $ inner fs
+ inner (Italic:fs) = emph $ inner fs
+
+ -- Monospace goes after Bold and Italic in ordered set
+ inner (Monospace:_) = code s
+ inner (Regular:fs) = inner fs
+
+parsePara :: PandocMonad m => ManParser m Blocks
+parsePara = para <$> parseInlines
+
+parseInlines :: PandocMonad m => ManParser m Inlines
+parseInlines = do
+ inls <- many1 (strInl <|> lineInl <|> linkInl <|> comment)
+ let withspaces = intersperse B.space inls
+ return $ mconcat withspaces
+
+ where
+
+ strInl :: PandocMonad m => ManParser m Inlines
+ strInl = do
+ (MStr rstr) <- mstr
+ return $ strToInlines rstr
+
+ lineInl :: PandocMonad m => ManParser m Inlines
+ lineInl = do
+ (MLine fragments) <- mline
+ return $ mconcat $ strToInlines <$> fragments
+
+ linkInl :: PandocMonad m => ManParser m Inlines
+ linkInl = do
+ (MMaybeLink txt) <- mmaybeLink
+ let inls = case runParser linkParser () "" txt of
+ Right lnk -> lnk
+ Left _ -> strong $ str txt
+ return inls
+
+ where
+
+ -- assuming man pages are generated from Linux-like repository
+ linkParser :: Parsec String () Inlines
+ linkParser = do
+ mpage <- many1 (alphaNum <|> char '_')
+ spacetab
+ char '('
+ mansect <- digit
+ char ')'
+ other <- many anyChar
+ let manurl pagename section = "../"++section++"/"++pagename++"."++section
+ lnkInls = link (manurl mpage [mansect]) mpage (strong $ str mpage)
+ return $ lnkInls <> strong (str (" ("++[mansect] ++ ")") <> str other)
+
+ comment :: PandocMonad m => ManParser m Inlines
+ comment = mcomment >> return mempty
+
+
+parseCodeBlock :: PandocMonad m => ManParser m Blocks
+parseCodeBlock = do
+ mmacro KCodeBlStart
+ toks <- many (mstr <|> mline <|> mmaybeLink <|> memplyLine <|> munknownMacro <|> mcomment)
+ mmacro KCodeBlEnd
+ return $ codeBlock (intercalate "\n" . catMaybes $ extractText <$> toks)
+
+ where
+
+ extractText :: ManToken -> Maybe String
+ extractText (MStr (s, _)) = Just s
+ extractText (MLine ss) = Just . concat $ map fst ss -- TODO maybe unwords?
+ extractText (MMaybeLink s) = Just s
+ extractText MEmptyLine = Just "" -- string are intercalated with '\n', this prevents double '\n'
+ extractText _ = Nothing
+
+parseHeader :: PandocMonad m => ManParser m Blocks
+parseHeader = do
+ (MHeader lvl ss) <- mheader
+ return $ header lvl (mconcat $ intersperse B.space $ strToInlines <$> ss)
+
+type ListBuilder = [Blocks] -> Blocks
+
+parseList :: PandocMonad m => ManParser m Blocks
+parseList = do
+ xx <- many1 paras
+ let bls = map snd xx
+ let bldr = fst $ head xx
+ return $ bldr bls
+
+ where
+
+ macroIPInl :: [RoffStr] -> Inlines
+ macroIPInl (x:_:[]) = strToInlines x <> B.space
+ macroIPInl _ = mempty
+
+ listKind :: [RoffStr] -> Maybe ListBuilder
+ listKind (((c:_), _):_:[]) =
+ let params style = orderedListWith (1, style, DefaultDelim)
+ in case c of
+ _ | isDigit c -> Just $ params Decimal
+ _ | isUpper c -> Just $ params UpperAlpha
+ _ | isLower c -> Just $ params LowerAlpha
+ _ -> Nothing
+
+ listKind _ = Nothing
+
+ paras :: PandocMonad m => ManParser m (ListBuilder, Blocks)
+ paras = do
+ (MMacro _ args) <- mmacro KTab
+ let lbuilderOpt = listKind args
+ lbuilder = fromMaybe bulletList lbuilderOpt
+ macroinl = macroIPInl args
+ inls <- parseInlines
+ let parainls = if isNothing lbuilderOpt then macroinl <> inls else inls
+ subls <- mconcat <$> many sublist
+ return $ (lbuilder, plain parainls <> subls)
+
+ sublist :: PandocMonad m => ManParser m Blocks
+ sublist = do
+ mmacro KSubTab
+ bl <- parseList
+ mmacro KTabEnd
+ return bl
+
+-- In case of weird man file it will be parsed succesfully
+parseSkipMacro :: PandocMonad m => ManParser m Blocks
+parseSkipMacro = mmacroAny >> mempty
diff --git a/test/Tests/Old.hs b/test/Tests/Old.hs
index b426ffd07..842e0f656 100644
--- a/test/Tests/Old.hs
+++ b/test/Tests/Old.hs
@@ -171,6 +171,10 @@ tests = [ testGroup "markdown"
, test "tables" ["-f", "native", "-t", "../data/sample.lua"]
"tables.native" "tables.custom"
]
+ , testGroup "man"
+ [ test "reader" ["-r", "man", "-w", "native", "-s"]
+ "man-reader.man" "man-reader.native"
+ ]
]
-- makes sure file is fully closed after reading
diff --git a/test/Tests/Readers/Man.hs b/test/Tests/Readers/Man.hs
new file mode 100644
index 000000000..9dbfbab4d
--- /dev/null
+++ b/test/Tests/Readers/Man.hs
@@ -0,0 +1,83 @@
+{-# LANGUAGE OverloadedStrings #-}
+module Tests.Readers.Man (tests) where
+
+import Prelude
+import Data.Text (Text)
+import Test.Tasty
+import Tests.Helpers
+import Text.Pandoc
+import Text.Pandoc.Arbitrary ()
+import Text.Pandoc.Builder
+import Text.Pandoc.Readers.Man
+
+man :: Text -> Pandoc
+man = purely $ readMan def
+
+infix 4 =:
+(=:) :: ToString c
+ => String -> (Text, c) -> TestTree
+(=:) = test man
+
+tests :: [TestTree]
+tests = [
+ -- .SH "HEllo bbb" "aaa"" as"
+ testGroup "Macros" [
+ "Bold" =:
+ ".B foo"
+ =?> (para $ strong "foo")
+ , "Italic" =:
+ ".I bar\n"
+ =?> (para $ emph "bar")
+ , "BoldItalic" =:
+ ".BI foo bar"
+ =?> (para $ strong $ emph $ str "foo bar")
+ , "H1" =:
+ ".SH The header\n"
+ =?> header 2 (str "The" <> space <> str "header")
+ , "H2" =:
+ ".SS \"The header 2\""
+ =?> header 3 (str "The header 2")
+ , "Macro args" =:
+ ".B \"single arg with \"\"Q\"\"\""
+ =?> (para $ strong $ str "single arg with \"Q\"")
+ , "comment" =:
+ ".\\\"bla\naaa"
+ =?> (para $ space <> str "aaa")
+ , "link" =:
+ ".BR aa (1)"
+ =?> (para $ link "../1/aa.1" "aa" (strong $ str "aa") <> (strong $ str " (1)"))
+ ],
+ testGroup "Escapes" [
+ "fonts" =:
+ "aa\\fIbb\\fRcc"
+ =?> (para $ str "aa" <> (emph $ str "bb") <> str "cc")
+ , "skip" =:
+ "a\\%\\{\\}\\\n\\:b\\0"
+ =?> (para $ str "ab")
+ , "replace" =:
+ "\\-\\ \\\\\\[lq]\\[rq]\\[em]\\[en]\\*(lq\\*(rq"
+ =?> (para $ str "- \\“”—–«»")
+ , "replace2" =:
+ "\\t\\e\\`\\^\\|\\'"
+ =?> (para $ str "\t\\` `")
+ ],
+ testGroup "Lists" [
+ "bullet" =:
+ ".IP\nfirst\n.IP\nsecond"
+ =?> bulletList [plain $ str "first", plain $ str "second"]
+ , "odrered" =:
+ ".IP 1 a\nfirst\n.IP 2 a\nsecond"
+ =?> orderedListWith (1,Decimal,DefaultDelim) [plain $ str "first", plain $ str "second"]
+ , "upper" =:
+ ".IP A a\nfirst\n.IP B a\nsecond"
+ =?> orderedListWith (1,UpperAlpha,DefaultDelim) [plain $ str "first", plain $ str "second"]
+ , "nested" =:
+ ".IP\nfirst\n.RS\n.IP\n1a\n.IP\n1b\n.RE"
+ =?> bulletList [(plain $ str "first") <> (bulletList [plain $ str "1a", plain $ str "1b"])]
+ ],
+ testGroup "CodeBlocks" [
+ "cb1"=:
+ ".nf\naa\n\tbb\n.fi"
+ =?> codeBlock "aa\n\tbb"
+ ]
+ ]
diff --git a/test/grofftest.sh b/test/grofftest.sh
new file mode 100644
index 000000000..ca1aa71d9
--- /dev/null
+++ b/test/grofftest.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+
+# iterates over specified directory, containing "\w+\.\d"-like files,
+# executes pandoc voer them and prints stderr on nonzero return code
+
+if [ $# -ne 2 ]; then
+ echo "Not enough arguments"
+ exit 1
+fi
+
+PANDOC=$1
+DIR=$2
+
+$PANDOC --version > /dev/null || { echo "pandoc executable error" >&2 ; exit 1 ; }
+
+ls $2 | egrep "^.+\.[0-9].?$" | while read f ; do
+ FILE="$DIR/$f"
+ $PANDOC -f man -t native < $FILE 2>&1 > /dev/null
+ if [ $? -ne 0 ]; then
+ echo "Failed to convert $FILE"
+ fi
+done
diff --git a/test/man-reader.man b/test/man-reader.man
new file mode 100644
index 000000000..4f3395051
--- /dev/null
+++ b/test/man-reader.man
@@ -0,0 +1,189 @@
+.TH "Pandoc Man tests" "" "Oct 17, 2018" "" ""
+.PP
+This is a set of tests for pandoc.
+.PP
+ * * * * *
+.SH Headers
+.SH Level 1
+.SS Level 2
+
+ * * * * *
+.SH Paragraphs
+.PP
+Here's a regular paragraph.
+.PP
+Another paragraph
+In Markdown 1.0.0 and earlier.
+Version 8.
+This line turns into a list item.
+Because a hard\-wrapped line in the middle of a paragraph looked like a list
+item.
+.PP
+There should be a hard line break
+.PD 0
+.P
+.PD
+here.
+.PP
+ * * * * *
+.SH Block Quotes
+Code in a block quote:
+.IP
+.nf
+\f[C]
+sub\ status\ {
+\ \ \ \ print\ "working";
+}
+\f[]
+.fi
+.PP
+A list:
+.IP "1." 3
+item one
+.IP "2." 3
+item two
+.PP
+.SH Code Blocks
+.PP
+Code:
+.IP
+.nf
+\f[C]
+\-\-\-\-\ (should\ be\ four\ hyphens)
+
+sub\ status\ {
+\ \ \ \ print\ "working";
+}
+
+\f[]
+.fi
+.PP
+And:
+.IP
+.nf
+\f[C]
+\tthis\ code\ line is\ indented\ by\ one\ tab
+
+These\ should\ not\ be\ escaped:\ \ \\$\ \\\\\ \\>\ \\[\ \\{
+\f[]
+.fi
+.PP
+ * * * * *
+.SH Lists
+.SS Unordered
+.PP
+Asterisks:
+.IP \[bu] 2
+asterisk 1
+.IP \[bu] 2
+asterisk 2
+.IP \[bu] 2
+asterisk 3
+.PP
+.SS Ordered
+.IP "1." 3
+First
+.IP "2." 3
+Second
+.IP "3." 3
+Third
+.PP
+.SS Nested
+.IP \[bu] 2
+Tab
+.RS 2
+.IP \[bu] 2
+Tab
+.RS 2
+.IP \[bu] 2
+Tab
+.RE
+.RE
+.PP
+Here's another:
+.IP "1." 3
+First
+.IP "2." 3
+Second:
+.RS 4
+.IP \[bu] 2
+Fee
+.IP \[bu] 2
+Fie
+.IP \[bu] 2
+Foe
+.RE
+.IP "3." 3
+Third
+.PP
+Same thing:
+.IP "1." 3
+First
+.IP "2." 3
+Second:
+.RS 4
+.IP \[bu] 2
+Fee
+.IP \[bu] 2
+Fie
+.IP \[bu] 2
+Foe
+.RE
+.IP "3." 3
+Third
+.SS different styles:
+.IP "A." 3
+Upper Alpha
+.RS 4
+.IP "I." 3
+Upper Roman.
+.RS 4
+.IP "(6)" 4
+Decimal start with 6
+.RS 4
+.IP "c)" 3
+Lower alpha with paren
+.RE
+.RE
+.RE
+.PP
+ * * * * *
+.SH Special Characters
+AT&T has an ampersand in their name.
+.PP
+4 < 5.
+.PP
+6 > 5.
+.PP
+Backslash: \\
+.PP
+Backtick: `
+.PP
+Asterisk: *
+.PP
+Underscore: _
+.PP
+Left brace: {
+.PP
+Right brace: }
+.PP
+Left bracket: [
+.PP
+Right bracket: ]
+.PP
+Left paren: (
+.PP
+Right paren: )
+.PP
+Greater\-than: >
+.PP
+Hash: #
+.PP
+Period: .
+.PP
+Bang: !
+.PP
+Plus: +
+.PP
+Minus: \-
+.PP
diff --git a/test/man-reader.native b/test/man-reader.native
new file mode 100644
index 000000000..1fa010bd6
--- /dev/null
+++ b/test/man-reader.native
@@ -0,0 +1,94 @@
+Pandoc (Meta {unMeta = fromList [("title",MetaString "Pandoc Man tests")]})
+[Header 1 ("",[],[]) [Str "Pandoc Man tests"]
+,Para [Str "This is a set of tests for pandoc."]
+,Para [Str " * * * * *"]
+,Header 2 ("",[],[]) [Str "Headers"]
+,Header 2 ("",[],[]) [Str "Level",Space,Str "1"]
+,Header 3 ("",[],[]) [Str "Level",Space,Str "2"]
+,Para [Str " * * * * *"]
+,Header 2 ("",[],[]) [Str "Paragraphs"]
+,Para [Str "Here's a regular paragraph."]
+,Para [Str "Another paragraph",Space,Str "In Markdown 1.0.0 and earlier.",Space,Str "Version 8.",Space,Str "This line turns into a list item.",Space,Str "Because a hard-wrapped line in the middle of a paragraph looked like a list",Space,Str "item."]
+,Para [Str "There should be a hard line break"]
+,Para [Str "here."]
+,Para [Str " * * * * *"]
+,Header 2 ("",[],[]) [Str "Block",Space,Str "Quotes"]
+,Para [Str "Code in a block quote:"]
+,CodeBlock ("",[],[]) "\nsub status {\n print \"working\";\n}\n"
+,Para [Str "A list:"]
+,OrderedList (1,Decimal,DefaultDelim)
+ [[Plain [Str "item one"]]
+ ,[Plain [Str "item two"]]]
+,Header 2 ("",[],[]) [Str "Code",Space,Str "Blocks"]
+,Para [Str "Code:"]
+,CodeBlock ("",[],[]) "\n---- (should be four hyphens)\n\nsub status {\n print \"working\";\n}\n\n"
+,Para [Str "And:"]
+,CodeBlock ("",[],[]) "\n\tthis code line is indented by one tab\n\nThese should not be escaped: \\$ \\\\ \\> \\[ \\{\n"
+,Para [Str " * * * * *"]
+,Header 2 ("",[],[]) [Str "Lists"]
+,Header 3 ("",[],[]) [Str "Unordered"]
+,Para [Str "Asterisks:"]
+,BulletList
+ [[Plain [Str "",Space,Str "asterisk 1"]]
+ ,[Plain [Str "",Space,Str "asterisk 2"]]
+ ,[Plain [Str "",Space,Str "asterisk 3"]]]
+,Header 3 ("",[],[]) [Str "Ordered"]
+,OrderedList (1,Decimal,DefaultDelim)
+ [[Plain [Str "First"]]
+ ,[Plain [Str "Second"]]
+ ,[Plain [Str "Third"]]]
+,Header 3 ("",[],[]) [Str "Nested"]
+,BulletList
+ [[Plain [Str "",Space,Str "Tab"]
+ ,BulletList
+ [[Plain [Str "",Space,Str "Tab"]
+ ,BulletList
+ [[Plain [Str "",Space,Str "Tab"]]]]]]]
+,Para [Str "Here's another:"]
+,OrderedList (1,Decimal,DefaultDelim)
+ [[Plain [Str "First"]]
+ ,[Plain [Str "Second:"]
+ ,BulletList
+ [[Plain [Str "",Space,Str "Fee"]]
+ ,[Plain [Str "",Space,Str "Fie"]]
+ ,[Plain [Str "",Space,Str "Foe"]]]]
+ ,[Plain [Str "Third"]]]
+,Para [Str "Same thing:"]
+,OrderedList (1,Decimal,DefaultDelim)
+ [[Plain [Str "First"]]
+ ,[Plain [Str "Second:"]
+ ,BulletList
+ [[Plain [Str "",Space,Str "Fee"]]
+ ,[Plain [Str "",Space,Str "Fie"]]
+ ,[Plain [Str "",Space,Str "Foe"]]]]
+ ,[Plain [Str "Third"]]]
+,Header 3 ("",[],[]) [Str "different",Space,Str "styles:"]
+,OrderedList (1,UpperAlpha,DefaultDelim)
+ [[Plain [Str "Upper Alpha"]
+ ,OrderedList (1,UpperAlpha,DefaultDelim)
+ [[Plain [Str "Upper Roman."]
+ ,BulletList
+ [[Plain [Str "(6)",Space,Str "Decimal start with 6"]
+ ,OrderedList (1,LowerAlpha,DefaultDelim)
+ [[Plain [Str "Lower alpha with paren"]]]]]]]]]
+,Para [Str " * * * * *"]
+,Header 2 ("",[],[]) [Str "Special",Space,Str "Characters"]
+,Para [Str "AT&T has an ampersand in their name."]
+,Para [Str "4 < 5."]
+,Para [Str "6 > 5."]
+,Para [Str "Backslash: \\"]
+,Para [Str "Backtick: `"]
+,Para [Str "Asterisk: *"]
+,Para [Str "Underscore: _"]
+,Para [Str "Left brace: {"]
+,Para [Str "Right brace: }"]
+,Para [Str "Left bracket: ["]
+,Para [Str "Right bracket: ]"]
+,Para [Str "Left paren: ("]
+,Para [Str "Right paren: )"]
+,Para [Str "Greater-than: >"]
+,Para [Str "Hash: #"]
+,Para [Str "Period: ."]
+,Para [Str "Bang: !"]
+,Para [Str "Plus: +"]
+,Para [Str "Minus: -"]]
diff --git a/test/test-pandoc.hs b/test/test-pandoc.hs
index b70d2286c..dc51b73cc 100644
--- a/test/test-pandoc.hs
+++ b/test/test-pandoc.hs
@@ -22,6 +22,7 @@ import qualified Tests.Readers.Odt
import qualified Tests.Readers.Org
import qualified Tests.Readers.RST
import qualified Tests.Readers.Txt2Tags
+import qualified Tests.Readers.Man
import qualified Tests.Shared
import qualified Tests.Writers.AsciiDoc
import qualified Tests.Writers.ConTeXt
@@ -76,6 +77,7 @@ tests = testGroup "pandoc tests" [ Tests.Command.tests
, testGroup "EPUB" Tests.Readers.EPUB.tests
, testGroup "Muse" Tests.Readers.Muse.tests
, testGroup "Creole" Tests.Readers.Creole.tests
+ , testGroup "Man" Tests.Readers.Man.tests
, testGroup "FB2" Tests.Readers.FB2.tests
]
, testGroup "Lua filters" Tests.Lua.tests