diff options
author | John MacFarlane <jgm@berkeley.edu> | 2018-10-18 20:58:46 -0700 |
---|---|---|
committer | John MacFarlane <jgm@berkeley.edu> | 2018-10-18 20:58:46 -0700 |
commit | 94c73e84d58b538736e62a66842c13e566f51f31 (patch) | |
tree | a7bba8ab304f4689df0cce0f43a2ef8ea0320dc1 | |
parent | 465f11833f0e30403065454eb94903085922840e (diff) | |
parent | 5c42101ee973131196b07661adc95670f2ed6693 (diff) | |
download | pandoc-94c73e84d58b538736e62a66842c13e566f51f31.tar.gz |
Merge branch 'Yanpas-groff_reader'
-rw-r--r-- | pandoc.cabal | 3 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers.hs | 2 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Man.hs | 559 | ||||
-rw-r--r-- | test/Tests/Old.hs | 4 | ||||
-rw-r--r-- | test/Tests/Readers/Man.hs | 83 | ||||
-rw-r--r-- | test/grofftest.sh | 22 | ||||
-rw-r--r-- | test/man-reader.man | 189 | ||||
-rw-r--r-- | test/man-reader.native | 94 | ||||
-rw-r--r-- | test/test-pandoc.hs | 2 |
9 files changed, 958 insertions, 0 deletions
diff --git a/pandoc.cabal b/pandoc.cabal index 6f469147f..ecb4aa766 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -206,6 +206,7 @@ extra-source-files: test/haddock-reader.haddock test/insert test/lalune.jpg + test/man-reader.man test/movie.jpg test/media/rId25.jpg test/media/rId26.jpg @@ -448,6 +449,7 @@ library Text.Pandoc.Readers.Odt, Text.Pandoc.Readers.EPUB, Text.Pandoc.Readers.Muse, + Text.Pandoc.Readers.Man, Text.Pandoc.Readers.FB2, Text.Pandoc.Writers, Text.Pandoc.Writers.Native, @@ -671,6 +673,7 @@ test-suite test-pandoc Tests.Readers.EPUB Tests.Readers.Muse Tests.Readers.Creole + Tests.Readers.Man Tests.Readers.FB2 Tests.Writers.Native Tests.Writers.ConTeXt diff --git a/src/Text/Pandoc/Readers.hs b/src/Text/Pandoc/Readers.hs index 7b7f92b35..76492b0aa 100644 --- a/src/Text/Pandoc/Readers.hs +++ b/src/Text/Pandoc/Readers.hs @@ -105,6 +105,7 @@ import Text.Pandoc.Readers.TikiWiki import Text.Pandoc.Readers.TWiki import Text.Pandoc.Readers.Txt2Tags import Text.Pandoc.Readers.Vimwiki +import Text.Pandoc.Readers.Man import Text.Pandoc.Shared (mapLeft) import qualified Text.Pandoc.UTF8 as UTF8 import Text.Parsec.Error @@ -145,6 +146,7 @@ readers = [ ("native" , TextReader readNative) ,("t2t" , TextReader readTxt2Tags) ,("epub" , ByteStringReader readEPUB) ,("muse" , TextReader readMuse) + ,("man" , TextReader readMan) ,("fb2" , TextReader readFB2) ] diff --git a/src/Text/Pandoc/Readers/Man.hs b/src/Text/Pandoc/Readers/Man.hs new file mode 100644 index 000000000..1ffdd1f91 --- /dev/null +++ b/src/Text/Pandoc/Readers/Man.hs @@ -0,0 +1,559 @@ +{-# LANGUAGE FlexibleContexts #-} +{- + Copyright (C) 2018 Yan Pashkovsky <yanp.bugz@gmail.com> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + + +-} + +{- | + Module : Text.Pandoc.Readers.Man + Copyright : Copyright (C) 2018 Yan Pashkovsky + License : GNU GPL, version 2 or above + + Maintainer : Yan Pashkovsky <yanp.bugz@gmail.com> + Stability : WIP + Portability : portable + +Conversion of man to 'Pandoc' document. +-} +module Text.Pandoc.Readers.Man (readMan) where + +import Prelude +import Control.Monad (liftM, void) +import Control.Monad.Except (throwError) +import Data.Char (isDigit, isUpper, isLower) +import Data.Default (Default) +import Data.Map (insert) +import Data.Set (Set, singleton) +import qualified Data.Set as S (fromList, toList) +import Data.Maybe (catMaybes, fromMaybe, isNothing) +import Data.List (intersperse, intercalate) +import qualified Data.Text as T + +import Text.Pandoc.Class (PandocMonad(..)) +import Text.Pandoc.Builder as B hiding (singleton) +import Text.Pandoc.Error (PandocError (PandocParsecError)) +import Text.Pandoc.Logging (LogMessage(..)) +import Text.Pandoc.Options +import Text.Pandoc.Parsing +import Text.Pandoc.Shared (crFilter) +import Text.Parsec hiding (tokenPrim, space) +import qualified Text.Parsec as Parsec +import Text.Parsec.Pos (updatePosString) + +-- +-- Data Types +-- +data FontKind = Bold | Italic | Monospace | Regular deriving (Show, Eq, Ord) + +data MacroKind = KTitle + | KCodeBlStart + | KCodeBlEnd + | KTab + | KTabEnd + | KSubTab + deriving (Show, Eq) + +type Font = Set FontKind + +type RoffStr = (String, Font) + +-- TODO parse tables (see man tbl) +data ManToken = MStr RoffStr + | MLine [RoffStr] + | MMaybeLink String + | MEmptyLine + | MHeader Int [RoffStr] + | MMacro MacroKind [RoffStr] + | MUnknownMacro String [RoffStr] + | MComment String + deriving Show + +data EscapeThing = EFont Font + | EChar Char + | ENothing + deriving Show + +data RoffState = RoffState { fontKind :: Font + } deriving Show + +instance Default RoffState where + def = RoffState {fontKind = singleton Regular} + +type ManLexer m = ParserT [Char] RoffState m +type ManParser m = ParserT [ManToken] ParserState m + +---- debug functions +{- +import Text.Pandoc.Class (runIOorExplode) + +printPandoc :: Pandoc -> [Char] +printPandoc (Pandoc m content) = + let ttl = "Pandoc: " ++ (show $ unMeta m) + cnt = intercalate "\n" $ map show content + in ttl ++ "\n" ++ cnt + +testStr :: String -> IO () +testStr str = do + pand <- runIOorExplode $ readMan def (T.pack str) + putStrLn $ printPandoc pand + + +testFile :: FilePath -> IO () +testFile fname = do + cont <- readFile fname + testStr cont +-} +---- + + +-- | Read man (troff) from an input string and return a Pandoc document. +readMan :: PandocMonad m => ReaderOptions -> T.Text -> m Pandoc +readMan opts txt = do + eithertokens <- readWithM lexMan def (T.unpack $ crFilter txt) + case eithertokens of + Left e -> throwError e + Right tokenz -> do + let state = def {stateOptions = opts} :: ParserState + eitherdoc <- readWithMTokens parseMan state tokenz + either throwError return eitherdoc + + where + + readWithMTokens :: PandocMonad m + => ParserT [ManToken] ParserState m a -- ^ parser + -> ParserState -- ^ initial state + -> [ManToken] -- ^ input + -> m (Either PandocError a) + readWithMTokens parser state input = + let leftF = PandocParsecError . (intercalate "\n") $ show <$> input + in mapLeft leftF `liftM` runParserT parser state "source" input + + mapLeft :: (a -> c) -> Either a b -> Either c b + mapLeft f (Left x) = Left $ f x + mapLeft _ (Right r) = Right r + +-- +-- String -> ManToken function +-- + +lexMan :: PandocMonad m => ManLexer m [ManToken] +lexMan = many (lexComment <|> lexMacro <|> lexLine <|> lexEmptyLine) + +parseMan :: PandocMonad m => ManParser m Pandoc +parseMan = do + let parsers = [ try parseList, parseTitle, parsePara, parseSkippedContent + , try parseCodeBlock, parseHeader, parseSkipMacro] + bs <- many $ choice parsers + let (Pandoc _ blocks) = doc $ mconcat bs + meta <- stateMeta <$> getState + return $ Pandoc meta blocks + +eofline :: Stream s m Char => ParsecT s u m () +eofline = void newline <|> eof + +spacetab :: Stream s m Char => ParsecT s u m Char +spacetab = char ' ' <|> char '\t' + +-- TODO add other sequences from man (7) groff +escapeLexer :: PandocMonad m => ManLexer m EscapeThing +escapeLexer = do + char '\\' + choice [escChar, escFont, escUnknown] + where + + escChar :: PandocMonad m => ManLexer m EscapeThing + escChar = + let skipSeqs = ["%", "{", "}", "&", "\n", ":", "\"", "0", "c"] + subsSeqs = [ ("-", '-'), (" ", ' '), ("\\", '\\'), ("[lq]", '“'), ("[rq]", '”') + , ("[em]", '—'), ("[en]", '–'), ("*(lq", '«'), ("*(rq", '»') + , ("t", '\t'), ("e", '\\'), ("`", '`'), ("^", ' '), ("|", ' ') + , ("'", '`') ] + substitute :: PandocMonad m => (String,Char) -> ManLexer m EscapeThing + substitute (from,to) = try $ string from >> return (EChar to) + skip :: PandocMonad m => String -> ManLexer m EscapeThing + skip seq' = try $ string seq' >> return ENothing + in choice $ (substitute <$> subsSeqs) ++ + (skip <$> skipSeqs) ++ + [ char '(' >> anyChar >> return ENothing + , char '[' >> many alphaNum >> char ']' >> return ENothing + ] + + escFont :: PandocMonad m => ManLexer m EscapeThing + escFont = do + char 'f' + font <- choice [ singleton <$> letterFontKind + , char '(' >> anyChar >> anyChar >> return (singleton Regular) + , try lettersFont + , digit >> return (singleton Regular) + ] + modifyState (\r -> r {fontKind = font}) + return $ EFont font + + where + + lettersFont :: PandocMonad m => ManLexer m Font + lettersFont = do + char '[' + fs <- many letterFontKind + many letter + char ']' + return $ S.fromList fs + + letterFontKind :: PandocMonad m => ManLexer m FontKind + letterFontKind = choice [ + char 'B' >> return Bold + , char 'I' >> return Italic + , char 'C' >> return Monospace + , (char 'P' <|> char 'R') >> return Regular + ] + + escUnknown :: PandocMonad m => ManLexer m EscapeThing + escUnknown = do + c <- anyChar + pos <- getPosition + logOutput $ SkippedContent ("Unknown escape sequence \\" ++ [c]) pos + return ENothing + +currentFont :: PandocMonad m => ManLexer m Font +currentFont = fontKind <$> getState + +-- separate function from lexMacro since real man files sometimes do not follow the rules +lexComment :: PandocMonad m => ManLexer m ManToken +lexComment = do + try $ string ".\\\"" + many Parsec.space + body <- many $ noneOf "\n" + char '\n' + return $ MComment body + +lexMacro :: PandocMonad m => ManLexer m ManToken +lexMacro = do + char '.' <|> char '\'' + many spacetab + macroName <- many1 (letter <|> oneOf ['\\', '"', '&']) + args <- lexArgs + let joinedArgs = unwords $ fst <$> args + knownMacro mkind = MMacro mkind args + + tok = case macroName of + x | x `elem` ["\\\"", "\\#"] -> MComment joinedArgs + "TH" -> knownMacro KTitle + "IP" -> knownMacro KTab + "TP" -> knownMacro KTab + "RE" -> knownMacro KTabEnd + "RS" -> knownMacro KSubTab + "nf" -> knownMacro KCodeBlStart + "fi" -> knownMacro KCodeBlEnd + "B" -> MStr (joinedArgs, singleton Bold) + "BR" -> MMaybeLink joinedArgs + x | x `elem` ["BI", "IB"] -> MStr (joinedArgs, S.fromList [Italic, Bold]) + x | x `elem` ["I", "IR", "RI"] -> MStr (joinedArgs, singleton Italic) + "SH" -> MHeader 2 args + "SS" -> MHeader 3 args + x | x `elem` [ "P", "PP", "LP", "sp"] -> MEmptyLine + _ -> MUnknownMacro macroName args + return tok + + where + + -- TODO better would be [[RoffStr]], since one arg may have different fonts + lexArgs :: PandocMonad m => ManLexer m [RoffStr] + lexArgs = do + args <- many $ try oneArg + many spacetab + eofline + return args + + where + + oneArg :: PandocMonad m => ManLexer m RoffStr + oneArg = do + many1 spacetab + many $ try $ string "\\\n" + try quotedArg <|> plainArg -- try, because there are some erroneous files, e.g. linux/bpf.2 + + plainArg :: PandocMonad m => ManLexer m RoffStr + plainArg = do + indents <- many spacetab + arg <- many1 $ escChar <|> (Just <$> noneOf " \t\n") + f <- currentFont + return (indents ++ catMaybes arg, f) + + quotedArg :: PandocMonad m => ManLexer m RoffStr + quotedArg = do + char '"' + val <- many quotedChar + char '"' + val2 <- many $ escChar <|> (Just <$> noneOf " \t\n") + f <- currentFont + return (catMaybes $ val ++ val2, f) + + quotedChar :: PandocMonad m => ManLexer m (Maybe Char) + quotedChar = escChar <|> (Just <$> noneOf "\"\n") <|> (Just <$> try (string "\"\"" >> return '"')) + + escChar :: PandocMonad m => ManLexer m (Maybe Char) + escChar = do + ec <- escapeLexer + case ec of + (EChar c) -> return $ Just c + _ -> return Nothing + +lexLine :: PandocMonad m => ManLexer m ManToken +lexLine = do + lnparts <- many1 (esc <|> linePart) + eofline + return $ MLine $ catMaybes lnparts + where + + esc :: PandocMonad m => ManLexer m (Maybe (String, Font)) + esc = do + someesc <- escapeLexer + font <- currentFont + let rv = case someesc of + EChar c -> Just ([c], font) + _ -> Nothing + return rv + + linePart :: PandocMonad m => ManLexer m (Maybe (String, Font)) + linePart = do + lnpart <- many1 $ noneOf "\n\\" + font <- currentFont + return $ Just (lnpart, font) + + +lexEmptyLine :: PandocMonad m => ManLexer m ManToken +lexEmptyLine = char '\n' >> return MEmptyLine + +-- +-- ManToken parsec functions +-- + +msatisfy :: (Show t, Stream s m t) => (t -> Bool) -> ParserT s st m t +msatisfy predic = tokenPrim show nextPos testTok + where + testTok t = if predic t then Just t else Nothing + nextPos pos _x _xs = updatePosString (setSourceColumn (setSourceLine pos $ sourceLine pos + 1) 1) ("") + +mstr :: PandocMonad m => ManParser m ManToken +mstr = msatisfy isMStr where + isMStr (MStr _) = True + isMStr _ = False + +mline :: PandocMonad m => ManParser m ManToken +mline = msatisfy isMLine where + isMLine (MLine _) = True + isMLine _ = False + +mmaybeLink :: PandocMonad m => ManParser m ManToken +mmaybeLink = msatisfy isMMaybeLink where + isMMaybeLink (MMaybeLink _) = True + isMMaybeLink _ = False + +memplyLine :: PandocMonad m => ManParser m ManToken +memplyLine = msatisfy isMEmptyLine where + isMEmptyLine MEmptyLine = True + isMEmptyLine _ = False + +mheader :: PandocMonad m => ManParser m ManToken +mheader = msatisfy isMHeader where + isMHeader (MHeader _ _) = True + isMHeader _ = False + +mmacro :: PandocMonad m => MacroKind -> ManParser m ManToken +mmacro mk = msatisfy isMMacro where + isMMacro (MMacro mk' _) | mk == mk' = True + | otherwise = False + isMMacro _ = False + +mmacroAny :: PandocMonad m => ManParser m ManToken +mmacroAny = msatisfy isMMacro where + isMMacro (MMacro _ _) = True + isMMacro _ = False + +munknownMacro :: PandocMonad m => ManParser m ManToken +munknownMacro = msatisfy isMUnknownMacro where + isMUnknownMacro (MUnknownMacro _ _) = True + isMUnknownMacro _ = False + +mcomment :: PandocMonad m => ManParser m ManToken +mcomment = msatisfy isMComment where + isMComment (MComment _) = True + isMComment _ = False + +-- +-- ManToken -> Block functions +-- + +parseTitle :: PandocMonad m => ManParser m Blocks +parseTitle = do + (MMacro _ args) <- mmacro KTitle + if null args + then return mempty + else do + let mantitle = fst $ head args + modifyState (changeTitle mantitle) + return $ header 1 $ str mantitle + where + changeTitle title pst = + let meta = stateMeta pst + metaUp = Meta $ insert "title" (MetaString title) (unMeta meta) + in + pst {stateMeta = metaUp} + +parseSkippedContent :: PandocMonad m => ManParser m Blocks +parseSkippedContent = do + tok <- munknownMacro <|> mcomment <|> memplyLine + onToken tok + return mempty + + where + + onToken :: PandocMonad m => ManToken -> ManParser m () + onToken (MUnknownMacro mname _) = do + pos <- getPosition + logMessage $ SkippedContent ("Unknown macro: " ++ mname) pos + onToken _ = return () + +strToInlines :: RoffStr -> Inlines +strToInlines (s, fonts) = inner $ S.toList fonts where + inner :: [FontKind] -> Inlines + inner [] = str s + inner (Bold:fs) = strong $ inner fs + inner (Italic:fs) = emph $ inner fs + + -- Monospace goes after Bold and Italic in ordered set + inner (Monospace:_) = code s + inner (Regular:fs) = inner fs + +parsePara :: PandocMonad m => ManParser m Blocks +parsePara = para <$> parseInlines + +parseInlines :: PandocMonad m => ManParser m Inlines +parseInlines = do + inls <- many1 (strInl <|> lineInl <|> linkInl <|> comment) + let withspaces = intersperse B.space inls + return $ mconcat withspaces + + where + + strInl :: PandocMonad m => ManParser m Inlines + strInl = do + (MStr rstr) <- mstr + return $ strToInlines rstr + + lineInl :: PandocMonad m => ManParser m Inlines + lineInl = do + (MLine fragments) <- mline + return $ mconcat $ strToInlines <$> fragments + + linkInl :: PandocMonad m => ManParser m Inlines + linkInl = do + (MMaybeLink txt) <- mmaybeLink + let inls = case runParser linkParser () "" txt of + Right lnk -> lnk + Left _ -> strong $ str txt + return inls + + where + + -- assuming man pages are generated from Linux-like repository + linkParser :: Parsec String () Inlines + linkParser = do + mpage <- many1 (alphaNum <|> char '_') + spacetab + char '(' + mansect <- digit + char ')' + other <- many anyChar + let manurl pagename section = "../"++section++"/"++pagename++"."++section + lnkInls = link (manurl mpage [mansect]) mpage (strong $ str mpage) + return $ lnkInls <> strong (str (" ("++[mansect] ++ ")") <> str other) + + comment :: PandocMonad m => ManParser m Inlines + comment = mcomment >> return mempty + + +parseCodeBlock :: PandocMonad m => ManParser m Blocks +parseCodeBlock = do + mmacro KCodeBlStart + toks <- many (mstr <|> mline <|> mmaybeLink <|> memplyLine <|> munknownMacro <|> mcomment) + mmacro KCodeBlEnd + return $ codeBlock (intercalate "\n" . catMaybes $ extractText <$> toks) + + where + + extractText :: ManToken -> Maybe String + extractText (MStr (s, _)) = Just s + extractText (MLine ss) = Just . concat $ map fst ss -- TODO maybe unwords? + extractText (MMaybeLink s) = Just s + extractText MEmptyLine = Just "" -- string are intercalated with '\n', this prevents double '\n' + extractText _ = Nothing + +parseHeader :: PandocMonad m => ManParser m Blocks +parseHeader = do + (MHeader lvl ss) <- mheader + return $ header lvl (mconcat $ intersperse B.space $ strToInlines <$> ss) + +type ListBuilder = [Blocks] -> Blocks + +parseList :: PandocMonad m => ManParser m Blocks +parseList = do + xx <- many1 paras + let bls = map snd xx + let bldr = fst $ head xx + return $ bldr bls + + where + + macroIPInl :: [RoffStr] -> Inlines + macroIPInl (x:_:[]) = strToInlines x <> B.space + macroIPInl _ = mempty + + listKind :: [RoffStr] -> Maybe ListBuilder + listKind (((c:_), _):_:[]) = + let params style = orderedListWith (1, style, DefaultDelim) + in case c of + _ | isDigit c -> Just $ params Decimal + _ | isUpper c -> Just $ params UpperAlpha + _ | isLower c -> Just $ params LowerAlpha + _ -> Nothing + + listKind _ = Nothing + + paras :: PandocMonad m => ManParser m (ListBuilder, Blocks) + paras = do + (MMacro _ args) <- mmacro KTab + let lbuilderOpt = listKind args + lbuilder = fromMaybe bulletList lbuilderOpt + macroinl = macroIPInl args + inls <- parseInlines + let parainls = if isNothing lbuilderOpt then macroinl <> inls else inls + subls <- mconcat <$> many sublist + return $ (lbuilder, plain parainls <> subls) + + sublist :: PandocMonad m => ManParser m Blocks + sublist = do + mmacro KSubTab + bl <- parseList + mmacro KTabEnd + return bl + +-- In case of weird man file it will be parsed succesfully +parseSkipMacro :: PandocMonad m => ManParser m Blocks +parseSkipMacro = mmacroAny >> mempty diff --git a/test/Tests/Old.hs b/test/Tests/Old.hs index b426ffd07..842e0f656 100644 --- a/test/Tests/Old.hs +++ b/test/Tests/Old.hs @@ -171,6 +171,10 @@ tests = [ testGroup "markdown" , test "tables" ["-f", "native", "-t", "../data/sample.lua"] "tables.native" "tables.custom" ] + , testGroup "man" + [ test "reader" ["-r", "man", "-w", "native", "-s"] + "man-reader.man" "man-reader.native" + ] ] -- makes sure file is fully closed after reading diff --git a/test/Tests/Readers/Man.hs b/test/Tests/Readers/Man.hs new file mode 100644 index 000000000..9dbfbab4d --- /dev/null +++ b/test/Tests/Readers/Man.hs @@ -0,0 +1,83 @@ +{-# LANGUAGE OverloadedStrings #-} +module Tests.Readers.Man (tests) where + +import Prelude +import Data.Text (Text) +import Test.Tasty +import Tests.Helpers +import Text.Pandoc +import Text.Pandoc.Arbitrary () +import Text.Pandoc.Builder +import Text.Pandoc.Readers.Man + +man :: Text -> Pandoc +man = purely $ readMan def + +infix 4 =: +(=:) :: ToString c + => String -> (Text, c) -> TestTree +(=:) = test man + +tests :: [TestTree] +tests = [ + -- .SH "HEllo bbb" "aaa"" as" + testGroup "Macros" [ + "Bold" =: + ".B foo" + =?> (para $ strong "foo") + , "Italic" =: + ".I bar\n" + =?> (para $ emph "bar") + , "BoldItalic" =: + ".BI foo bar" + =?> (para $ strong $ emph $ str "foo bar") + , "H1" =: + ".SH The header\n" + =?> header 2 (str "The" <> space <> str "header") + , "H2" =: + ".SS \"The header 2\"" + =?> header 3 (str "The header 2") + , "Macro args" =: + ".B \"single arg with \"\"Q\"\"\"" + =?> (para $ strong $ str "single arg with \"Q\"") + , "comment" =: + ".\\\"bla\naaa" + =?> (para $ space <> str "aaa") + , "link" =: + ".BR aa (1)" + =?> (para $ link "../1/aa.1" "aa" (strong $ str "aa") <> (strong $ str " (1)")) + ], + testGroup "Escapes" [ + "fonts" =: + "aa\\fIbb\\fRcc" + =?> (para $ str "aa" <> (emph $ str "bb") <> str "cc") + , "skip" =: + "a\\%\\{\\}\\\n\\:b\\0" + =?> (para $ str "ab") + , "replace" =: + "\\-\\ \\\\\\[lq]\\[rq]\\[em]\\[en]\\*(lq\\*(rq" + =?> (para $ str "- \\“”—–«»") + , "replace2" =: + "\\t\\e\\`\\^\\|\\'" + =?> (para $ str "\t\\` `") + ], + testGroup "Lists" [ + "bullet" =: + ".IP\nfirst\n.IP\nsecond" + =?> bulletList [plain $ str "first", plain $ str "second"] + , "odrered" =: + ".IP 1 a\nfirst\n.IP 2 a\nsecond" + =?> orderedListWith (1,Decimal,DefaultDelim) [plain $ str "first", plain $ str "second"] + , "upper" =: + ".IP A a\nfirst\n.IP B a\nsecond" + =?> orderedListWith (1,UpperAlpha,DefaultDelim) [plain $ str "first", plain $ str "second"] + , "nested" =: + ".IP\nfirst\n.RS\n.IP\n1a\n.IP\n1b\n.RE" + =?> bulletList [(plain $ str "first") <> (bulletList [plain $ str "1a", plain $ str "1b"])] + ], + testGroup "CodeBlocks" [ + "cb1"=: + ".nf\naa\n\tbb\n.fi" + =?> codeBlock "aa\n\tbb" + ] + ] diff --git a/test/grofftest.sh b/test/grofftest.sh new file mode 100644 index 000000000..ca1aa71d9 --- /dev/null +++ b/test/grofftest.sh @@ -0,0 +1,22 @@ +#!/bin/bash + +# iterates over specified directory, containing "\w+\.\d"-like files, +# executes pandoc voer them and prints stderr on nonzero return code + +if [ $# -ne 2 ]; then + echo "Not enough arguments" + exit 1 +fi + +PANDOC=$1 +DIR=$2 + +$PANDOC --version > /dev/null || { echo "pandoc executable error" >&2 ; exit 1 ; } + +ls $2 | egrep "^.+\.[0-9].?$" | while read f ; do + FILE="$DIR/$f" + $PANDOC -f man -t native < $FILE 2>&1 > /dev/null + if [ $? -ne 0 ]; then + echo "Failed to convert $FILE" + fi +done diff --git a/test/man-reader.man b/test/man-reader.man new file mode 100644 index 000000000..4f3395051 --- /dev/null +++ b/test/man-reader.man @@ -0,0 +1,189 @@ +.TH "Pandoc Man tests" "" "Oct 17, 2018" "" "" +.PP +This is a set of tests for pandoc. +.PP + * * * * * +.SH Headers +.SH Level 1 +.SS Level 2 + + * * * * * +.SH Paragraphs +.PP +Here's a regular paragraph. +.PP +Another paragraph +In Markdown 1.0.0 and earlier. +Version 8. +This line turns into a list item. +Because a hard\-wrapped line in the middle of a paragraph looked like a list +item. +.PP +There should be a hard line break +.PD 0 +.P +.PD +here. +.PP + * * * * * +.SH Block Quotes +Code in a block quote: +.IP +.nf +\f[C] +sub\ status\ { +\ \ \ \ print\ "working"; +} +\f[] +.fi +.PP +A list: +.IP "1." 3 +item one +.IP "2." 3 +item two +.PP +.SH Code Blocks +.PP +Code: +.IP +.nf +\f[C] +\-\-\-\-\ (should\ be\ four\ hyphens) + +sub\ status\ { +\ \ \ \ print\ "working"; +} + +\f[] +.fi +.PP +And: +.IP +.nf +\f[C] +\tthis\ code\ line is\ indented\ by\ one\ tab + +These\ should\ not\ be\ escaped:\ \ \\$\ \\\\\ \\>\ \\[\ \\{ +\f[] +.fi +.PP + * * * * * +.SH Lists +.SS Unordered +.PP +Asterisks: +.IP \[bu] 2 +asterisk 1 +.IP \[bu] 2 +asterisk 2 +.IP \[bu] 2 +asterisk 3 +.PP +.SS Ordered +.IP "1." 3 +First +.IP "2." 3 +Second +.IP "3." 3 +Third +.PP +.SS Nested +.IP \[bu] 2 +Tab +.RS 2 +.IP \[bu] 2 +Tab +.RS 2 +.IP \[bu] 2 +Tab +.RE +.RE +.PP +Here's another: +.IP "1." 3 +First +.IP "2." 3 +Second: +.RS 4 +.IP \[bu] 2 +Fee +.IP \[bu] 2 +Fie +.IP \[bu] 2 +Foe +.RE +.IP "3." 3 +Third +.PP +Same thing: +.IP "1." 3 +First +.IP "2." 3 +Second: +.RS 4 +.IP \[bu] 2 +Fee +.IP \[bu] 2 +Fie +.IP \[bu] 2 +Foe +.RE +.IP "3." 3 +Third +.SS different styles: +.IP "A." 3 +Upper Alpha +.RS 4 +.IP "I." 3 +Upper Roman. +.RS 4 +.IP "(6)" 4 +Decimal start with 6 +.RS 4 +.IP "c)" 3 +Lower alpha with paren +.RE +.RE +.RE +.PP + * * * * * +.SH Special Characters +AT&T has an ampersand in their name. +.PP +4 < 5. +.PP +6 > 5. +.PP +Backslash: \\ +.PP +Backtick: ` +.PP +Asterisk: * +.PP +Underscore: _ +.PP +Left brace: { +.PP +Right brace: } +.PP +Left bracket: [ +.PP +Right bracket: ] +.PP +Left paren: ( +.PP +Right paren: ) +.PP +Greater\-than: > +.PP +Hash: # +.PP +Period: . +.PP +Bang: ! +.PP +Plus: + +.PP +Minus: \- +.PP diff --git a/test/man-reader.native b/test/man-reader.native new file mode 100644 index 000000000..1fa010bd6 --- /dev/null +++ b/test/man-reader.native @@ -0,0 +1,94 @@ +Pandoc (Meta {unMeta = fromList [("title",MetaString "Pandoc Man tests")]}) +[Header 1 ("",[],[]) [Str "Pandoc Man tests"] +,Para [Str "This is a set of tests for pandoc."] +,Para [Str " * * * * *"] +,Header 2 ("",[],[]) [Str "Headers"] +,Header 2 ("",[],[]) [Str "Level",Space,Str "1"] +,Header 3 ("",[],[]) [Str "Level",Space,Str "2"] +,Para [Str " * * * * *"] +,Header 2 ("",[],[]) [Str "Paragraphs"] +,Para [Str "Here's a regular paragraph."] +,Para [Str "Another paragraph",Space,Str "In Markdown 1.0.0 and earlier.",Space,Str "Version 8.",Space,Str "This line turns into a list item.",Space,Str "Because a hard-wrapped line in the middle of a paragraph looked like a list",Space,Str "item."] +,Para [Str "There should be a hard line break"] +,Para [Str "here."] +,Para [Str " * * * * *"] +,Header 2 ("",[],[]) [Str "Block",Space,Str "Quotes"] +,Para [Str "Code in a block quote:"] +,CodeBlock ("",[],[]) "\nsub status {\n print \"working\";\n}\n" +,Para [Str "A list:"] +,OrderedList (1,Decimal,DefaultDelim) + [[Plain [Str "item one"]] + ,[Plain [Str "item two"]]] +,Header 2 ("",[],[]) [Str "Code",Space,Str "Blocks"] +,Para [Str "Code:"] +,CodeBlock ("",[],[]) "\n---- (should be four hyphens)\n\nsub status {\n print \"working\";\n}\n\n" +,Para [Str "And:"] +,CodeBlock ("",[],[]) "\n\tthis code line is indented by one tab\n\nThese should not be escaped: \\$ \\\\ \\> \\[ \\{\n" +,Para [Str " * * * * *"] +,Header 2 ("",[],[]) [Str "Lists"] +,Header 3 ("",[],[]) [Str "Unordered"] +,Para [Str "Asterisks:"] +,BulletList + [[Plain [Str "",Space,Str "asterisk 1"]] + ,[Plain [Str "",Space,Str "asterisk 2"]] + ,[Plain [Str "",Space,Str "asterisk 3"]]] +,Header 3 ("",[],[]) [Str "Ordered"] +,OrderedList (1,Decimal,DefaultDelim) + [[Plain [Str "First"]] + ,[Plain [Str "Second"]] + ,[Plain [Str "Third"]]] +,Header 3 ("",[],[]) [Str "Nested"] +,BulletList + [[Plain [Str "",Space,Str "Tab"] + ,BulletList + [[Plain [Str "",Space,Str "Tab"] + ,BulletList + [[Plain [Str "",Space,Str "Tab"]]]]]]] +,Para [Str "Here's another:"] +,OrderedList (1,Decimal,DefaultDelim) + [[Plain [Str "First"]] + ,[Plain [Str "Second:"] + ,BulletList + [[Plain [Str "",Space,Str "Fee"]] + ,[Plain [Str "",Space,Str "Fie"]] + ,[Plain [Str "",Space,Str "Foe"]]]] + ,[Plain [Str "Third"]]] +,Para [Str "Same thing:"] +,OrderedList (1,Decimal,DefaultDelim) + [[Plain [Str "First"]] + ,[Plain [Str "Second:"] + ,BulletList + [[Plain [Str "",Space,Str "Fee"]] + ,[Plain [Str "",Space,Str "Fie"]] + ,[Plain [Str "",Space,Str "Foe"]]]] + ,[Plain [Str "Third"]]] +,Header 3 ("",[],[]) [Str "different",Space,Str "styles:"] +,OrderedList (1,UpperAlpha,DefaultDelim) + [[Plain [Str "Upper Alpha"] + ,OrderedList (1,UpperAlpha,DefaultDelim) + [[Plain [Str "Upper Roman."] + ,BulletList + [[Plain [Str "(6)",Space,Str "Decimal start with 6"] + ,OrderedList (1,LowerAlpha,DefaultDelim) + [[Plain [Str "Lower alpha with paren"]]]]]]]]] +,Para [Str " * * * * *"] +,Header 2 ("",[],[]) [Str "Special",Space,Str "Characters"] +,Para [Str "AT&T has an ampersand in their name."] +,Para [Str "4 < 5."] +,Para [Str "6 > 5."] +,Para [Str "Backslash: \\"] +,Para [Str "Backtick: `"] +,Para [Str "Asterisk: *"] +,Para [Str "Underscore: _"] +,Para [Str "Left brace: {"] +,Para [Str "Right brace: }"] +,Para [Str "Left bracket: ["] +,Para [Str "Right bracket: ]"] +,Para [Str "Left paren: ("] +,Para [Str "Right paren: )"] +,Para [Str "Greater-than: >"] +,Para [Str "Hash: #"] +,Para [Str "Period: ."] +,Para [Str "Bang: !"] +,Para [Str "Plus: +"] +,Para [Str "Minus: -"]] diff --git a/test/test-pandoc.hs b/test/test-pandoc.hs index b70d2286c..dc51b73cc 100644 --- a/test/test-pandoc.hs +++ b/test/test-pandoc.hs @@ -22,6 +22,7 @@ import qualified Tests.Readers.Odt import qualified Tests.Readers.Org import qualified Tests.Readers.RST import qualified Tests.Readers.Txt2Tags +import qualified Tests.Readers.Man import qualified Tests.Shared import qualified Tests.Writers.AsciiDoc import qualified Tests.Writers.ConTeXt @@ -76,6 +77,7 @@ tests = testGroup "pandoc tests" [ Tests.Command.tests , testGroup "EPUB" Tests.Readers.EPUB.tests , testGroup "Muse" Tests.Readers.Muse.tests , testGroup "Creole" Tests.Readers.Creole.tests + , testGroup "Man" Tests.Readers.Man.tests , testGroup "FB2" Tests.Readers.FB2.tests ] , testGroup "Lua filters" Tests.Lua.tests |