From fd3676a568589f07ad0707c07b2a9f87df6e2f6c Mon Sep 17 00:00:00 2001 From: Yan Pas Date: Sun, 25 Feb 2018 03:34:17 +0300 Subject: initial --- test/Tests/Readers/Man.hs | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 test/Tests/Readers/Man.hs (limited to 'test/Tests') diff --git a/test/Tests/Readers/Man.hs b/test/Tests/Readers/Man.hs new file mode 100644 index 000000000..5dc91544b --- /dev/null +++ b/test/Tests/Readers/Man.hs @@ -0,0 +1,16 @@ +{-# LANGUAGE OverloadedStrings #-} +module Tests.Readers.Man (tests) where + +import Data.Text (Text) +import qualified Data.Text as T +import Test.Tasty +import Tests.Helpers +import Text.Pandoc +import Text.Pandoc.Arbitrary () +import Text.Pandoc.Builder + +creole :: Text -> Pandoc +creole = purely $ readCreole def{ readerStandalone = True } + +tests :: [TestTree] +tests = [] \ No newline at end of file -- cgit v1.2.3 From c1617565fc2a5984e2e44d4da9adf7f8a26b3160 Mon Sep 17 00:00:00 2001 From: Yan Pas Date: Wed, 9 May 2018 03:24:45 +0300 Subject: basic manfile parsing --- src/Text/Pandoc/Readers/Man.hs | 241 ++++++++++++++++++++++++++++++----------- test/Tests/Readers/Man.hs | 23 +++- test/test-pandoc.hs | 2 +- 3 files changed, 198 insertions(+), 68 deletions(-) (limited to 'test/Tests') diff --git a/src/Text/Pandoc/Readers/Man.hs b/src/Text/Pandoc/Readers/Man.hs index d1ff3fc47..166b7c7a7 100644 --- a/src/Text/Pandoc/Readers/Man.hs +++ b/src/Text/Pandoc/Readers/Man.hs @@ -14,6 +14,9 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + + -} {- | @@ -29,90 +32,202 @@ Conversion of man to 'Pandoc' document. -} module Text.Pandoc.Readers.Man where -import Control.Monad.Except (liftM2, throwError, guard) -import Text.Pandoc.Class (PandocMonad(..)) +import Control.Monad.Except (throwError) +import Data.Default (Default) +import Data.Map (insert) +import Data.Maybe (isJust) +import Data.List (intersperse, intercalate) +import qualified Data.Text as T + +import Text.Pandoc.Class (PandocMonad(..), runPure) import Text.Pandoc.Definition +import Text.Pandoc.Error (PandocError) +import Text.Pandoc.Logging (LogMessage(..)) import Text.Pandoc.Options import Text.Pandoc.Parsing hiding (enclosed) import Text.Pandoc.Shared (crFilter) import Text.Parsec -import Text.Parsec.Char -import Data.Text (Text) -import Data.Map (empty) -import qualified Data.Text as T +import Text.Parsec.Char () + +data FontKind = Regular | Italic | Bold | ItalicBold deriving Show + +data RoffState = RoffState { inCodeBlock :: Bool + , fontKind :: FontKind + } deriving Show + +instance Default RoffState where + def = RoffState {inCodeBlock = False, fontKind = Regular} + +data ManState = ManState {pState :: ParserState, rState :: RoffState} + +instance HasLogMessages ManState where + addLogMessage lm mst = mst {pState = addLogMessage lm (pState mst)} + getLogMessages mst = getLogMessages $ pState mst + +modifyRoffState :: PandocMonad m => (RoffState -> RoffState) -> ParsecT a ManState m () +modifyRoffState f = do + mst <- getState + setState mst { rState = f $ rState mst } + +type ManParser m = ParserT [Char] ManState m + +testStrr :: [Char] -> SourceName -> Either PandocError (Either ParseError Pandoc) +testStrr s srcnm = runPure (runParserT parseMan (ManState {pState=def, rState=def}) srcnm s) + +printPandoc :: Pandoc -> [Char] +printPandoc (Pandoc m content) = + let ttl = "Pandoc: " ++ (show $ unMeta m) + cnt = intercalate "\n" $ map show content + in ttl ++ "\n" ++ cnt + +strrepr :: (Show a2, Show a1) => Either a2 (Either a1 Pandoc) -> [Char] +strrepr obj = case obj of + Right x -> case x of + Right x' -> printPandoc x' + Left y' -> show y' + Left y -> show y + +testFile :: FilePath -> IO () +testFile fname = do + cont <- readFile fname + putStrLn . strrepr $ testStrr cont fname -- | Read man (troff) from an input string and return a Pandoc document. -readMan :: PandocMonad m - => ReaderOptions - -> Text - -> m Pandoc -readMan opts s = do - parsed <- readWithM parseMan def{ stateOptions = opts } (T.unpack s) +readMan :: PandocMonad m => ReaderOptions -> T.Text -> m Pandoc +readMan opts txt = do + let state = ManState { pState = def{ stateOptions = opts }, rState = def} + parsed <- readWithM parseMan state (T.unpack $ crFilter txt) case parsed of Right result -> return result Left e -> throwError e -type ManParser m = ParserT [Char] ParserState m - -comment :: PandocMonad m => ManParser m String -comment = do - string ".\\\" " - many anyChar - -data Macro = Macro { macroName :: String - , macroArgs :: [String] - } - parseMacro :: PandocMonad m => ManParser m Block parseMacro = do - m <- macro - return $ Plain (map Str $ (macroName m : macroArgs m)) - -macro :: PandocMonad m => ManParser m Macro -macro = do char '.' <|> char '\'' many space - name <- many1 letter - --args <- many parseArg - return $ Macro { macroName = name, macroArgs = [] } - + macroName <- many1 (letter <|> oneOf ['\\', '"']) + args <- parseArgs + let joinedArgs = concat $ intersperse " " args + case macroName of + "\\\"" -> return Null -- comment + "TH" -> macroTitle (if null args then "" else head args) + "nf" -> macroCodeBlock True >> return Null + "fi" -> macroCodeBlock False >> return Null + "B" -> return $ Plain [Strong [Str joinedArgs]] + "BR" -> return $ Plain [Strong [Str joinedArgs]] + "BI" -> return $ Plain [Strong [Emph [Str joinedArgs]]] + "I" -> return $ Plain [Emph [Str joinedArgs]] + "SH" -> return $ Header 2 nullAttr [Str joinedArgs] + "sp" -> return $ Plain [LineBreak] + _ -> unkownMacro macroName args + where - parseArg :: PandocMonad m => ManParser m String - parseArg = do - many1 space - plainArg - - quotedArg :: PandocMonad m => ManParser m String - quotedArg = do - char '"' - val <- many1 quotedChar - char '"' - return val - - plainArg :: PandocMonad m => ManParser m String - plainArg = do - many1 $ noneOf " \t" - - quotedChar :: PandocMonad m => ManParser m Char - quotedChar = do - noneOf "\"" - <|> try (string "\"\"" >> return '"') + macroTitle :: PandocMonad m => String -> ManParser m Block + macroTitle mantitle = do + modifyState (changeTitle mantitle) + if null mantitle + then return Null + else return $ Header 1 nullAttr [Str mantitle] + where + changeTitle title mst @ ManState{ pState = pst} = + let meta = stateMeta pst + metaUp = Meta $ insert "title" (MetaString title) (unMeta meta) + in + mst { pState = pst {stateMeta = metaUp} } + + + macroCodeBlock :: PandocMonad m => Bool -> ManParser m () + macroCodeBlock insideCB = modifyRoffState (\rst -> rst{inCodeBlock = insideCB}) >> return () + + unkownMacro :: PandocMonad m => String -> [String] -> ManParser m Block + unkownMacro mname args = do + pos <- getPosition + logMessage $ SkippedContent ("Unknown macro: " ++ mname) pos + return $ Plain $ Str <$> args + + parseArgs :: PandocMonad m => ManParser m [String] + parseArgs = do + eolOpt <- optionMaybe $ char '\n' + if isJust eolOpt + then return [] + else do + many1 space + arg <- try quotedArg <|> plainArg + otherargs <- parseArgs + return $ arg : otherargs + + where + + plainArg :: PandocMonad m => ManParser m String + plainArg = many1 $ noneOf " \t\n" + + quotedArg :: PandocMonad m => ManParser m String + quotedArg = do + char '"' + val <- many1 quotedChar + char '"' + return val + + quotedChar :: PandocMonad m => ManParser m Char + quotedChar = noneOf "\"\n" <|> try (string "\"\"" >> return '"') + +roffInline :: RoffState -> String -> (Maybe Inline) +roffInline rst str + | null str = Nothing + | inCodeBlock rst = Just $ Code nullAttr str + | otherwise = Just $ case fontKind rst of + Regular -> Str str + Italic -> Emph [Str str] + _ -> Strong [Str str] parseLine :: PandocMonad m => ManParser m Block parseLine = do - str <- many anyChar - return $ Plain [Str str] - -parseBlock :: PandocMonad m => ManParser m Block -parseBlock = do - choice [ parseMacro - , parseLine - ] + parts <- parseLineParts + newline + return $ if null parts + then Plain [LineBreak] + else Plain parts + where + parseLineParts :: PandocMonad m => ManParser m [Inline] + parseLineParts = do + lnpart <- many $ noneOf "\n\\" + ManState {rState = roffSt} <- getState + let inl = roffInline roffSt lnpart + others <- backSlash <|> return [] + return $ case inl of + Just x -> x:others + Nothing -> others + + backSlash :: PandocMonad m => ManParser m [Inline] + backSlash = do + char '\\' + esc <- choice [ char 'f' >> fEscape + , char '-' >> return (Just '-') + , Just <$> noneOf "\n" + ] + ManState {rState = roffSt} <- getState + case esc of + Just c -> case roffInline roffSt [c] of + Just inl -> do + oth <- parseLineParts + return $ inl : oth + Nothing -> parseLineParts + Nothing -> parseLineParts + where + + fEscape :: PandocMonad m => ManParser m (Maybe Char) + fEscape = choice [ char 'B' >> modifyRoffState (\rst -> rst {fontKind = Bold}) + , char 'I' >> modifyRoffState (\rst -> rst {fontKind = Italic}) + , char 'P' >> modifyRoffState (\rst -> rst {fontKind = Regular}) + ] + >> return Nothing + + parseMan :: PandocMonad m => ManParser m Pandoc parseMan = do - blocks <- parseBlock `sepBy` newline - - return $ Pandoc Meta{unMeta = empty} blocks \ No newline at end of file + blocks <- many (parseMacro <|> parseLine) + parserst <- pState <$> getState + return $ Pandoc (stateMeta parserst) blocks diff --git a/test/Tests/Readers/Man.hs b/test/Tests/Readers/Man.hs index 5dc91544b..007935be1 100644 --- a/test/Tests/Readers/Man.hs +++ b/test/Tests/Readers/Man.hs @@ -2,15 +2,30 @@ module Tests.Readers.Man (tests) where import Data.Text (Text) -import qualified Data.Text as T import Test.Tasty import Tests.Helpers import Text.Pandoc import Text.Pandoc.Arbitrary () import Text.Pandoc.Builder +import Text.Pandoc.Readers.Man -creole :: Text -> Pandoc -creole = purely $ readCreole def{ readerStandalone = True } +man :: Text -> Pandoc +man = purely $ readMan def + +infix 4 =: +(=:) :: ToString c + => String -> (Text, c) -> TestTree +(=:) = test man tests :: [TestTree] -tests = [] \ No newline at end of file +tests = [ + -- .SH "HEllo bbb" "aaa"" as" + testGroup "Macros" [ + "Bold" =: + ".B foo\n" + =?> strong "foo" + , "Italic" =: + ".I foo\n" + =?> emph "foo" + ] + ] diff --git a/test/test-pandoc.hs b/test/test-pandoc.hs index 1ea3591b2..9d4632f35 100644 --- a/test/test-pandoc.hs +++ b/test/test-pandoc.hs @@ -74,7 +74,7 @@ tests = testGroup "pandoc tests" [ Tests.Command.tests , testGroup "EPUB" Tests.Readers.EPUB.tests , testGroup "Muse" Tests.Readers.Muse.tests , testGroup "Creole" Tests.Readers.Creole.tests - , testGroup "Man" Tests.Readers + , testGroup "Man" Tests.Readers.Man.tests ] , testGroup "Lua filters" Tests.Lua.tests ] -- cgit v1.2.3 From ad19166bc308a2428bd040851a2a97c76e8873f9 Mon Sep 17 00:00:00 2001 From: Yan Pas Date: Wed, 9 May 2018 20:40:37 +0300 Subject: fix build and tests --- src/Text/Pandoc/Readers/Man.hs | 1 + test/Tests/Readers/Man.hs | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'test/Tests') diff --git a/src/Text/Pandoc/Readers/Man.hs b/src/Text/Pandoc/Readers/Man.hs index 70de68d1f..fe66bb61c 100644 --- a/src/Text/Pandoc/Readers/Man.hs +++ b/src/Text/Pandoc/Readers/Man.hs @@ -32,6 +32,7 @@ Conversion of man to 'Pandoc' document. -} module Text.Pandoc.Readers.Man (readMan) where +import Prelude import Control.Monad.Except (throwError) import Data.Default (Default) import Data.Map (insert) diff --git a/test/Tests/Readers/Man.hs b/test/Tests/Readers/Man.hs index 007935be1..a9fe324d1 100644 --- a/test/Tests/Readers/Man.hs +++ b/test/Tests/Readers/Man.hs @@ -1,6 +1,7 @@ {-# LANGUAGE OverloadedStrings #-} module Tests.Readers.Man (tests) where +import Prelude import Data.Text (Text) import Test.Tasty import Tests.Helpers @@ -23,9 +24,9 @@ tests = [ testGroup "Macros" [ "Bold" =: ".B foo\n" - =?> strong "foo" + =?> (para $ strong "foo") , "Italic" =: ".I foo\n" - =?> emph "foo" + =?> (para $ emph "foo") ] ] -- cgit v1.2.3 From 1ce067fc2a4f70963c2c879eac74807bb4fa9e7c Mon Sep 17 00:00:00 2001 From: Yan Pas Date: Wed, 23 May 2018 00:20:30 +0300 Subject: tests, parsing fixes --- src/Text/Pandoc/Readers/Man.hs | 35 +++++++++++++++++++---------------- test/Tests/Readers/Man.hs | 18 +++++++++++++++--- 2 files changed, 34 insertions(+), 19 deletions(-) (limited to 'test/Tests') diff --git a/src/Text/Pandoc/Readers/Man.hs b/src/Text/Pandoc/Readers/Man.hs index d70eedb86..7b752373f 100644 --- a/src/Text/Pandoc/Readers/Man.hs +++ b/src/Text/Pandoc/Readers/Man.hs @@ -37,7 +37,7 @@ import Control.Monad (liftM) import Control.Monad.Except (throwError) import Data.Default (Default) import Data.Map (insert) -import Data.Maybe (isJust, catMaybes) +import Data.Maybe (catMaybes) import Data.List (intersperse, intercalate) import qualified Data.Text as T @@ -159,6 +159,9 @@ parseMan = do isNull Null = True isNull _ = False +eofline :: PandocMonad m => ManLexer m () +eofline = (newline >> return ()) <|> eof + -- TODO escape characters in arguments lexMacro :: PandocMonad m => ManLexer m ManToken lexMacro = do @@ -166,7 +169,7 @@ lexMacro = do many space macroName <- many1 (letter <|> oneOf ['\\', '"']) args <- lexArgs - let joinedArgs = concat $ intersperse " " args + let joinedArgs = unwords args let knownMacro mkind = MMacro mkind args let tok = case macroName of @@ -190,17 +193,17 @@ lexMacro = do lexArgs :: PandocMonad m => ManLexer m [String] lexArgs = do - eolOpt <- optionMaybe $ char '\n' - if isJust eolOpt - then return [] - else do - many1 space - arg <- try quotedArg <|> plainArg - otherargs <- lexArgs - return $ arg : otherargs + args <- many oneArg + eofline + return args where + oneArg :: PandocMonad m => ManLexer m String + oneArg = do + many1 $ char ' ' + try quotedArg <|> plainArg + plainArg :: PandocMonad m => ManLexer m String plainArg = many1 $ noneOf " \t\n" @@ -257,7 +260,7 @@ escapeLexer = do lexLine :: PandocMonad m => ManLexer m ManToken lexLine = do lnparts <- many1 (esc <|> linePart) - newline + eofline return $ MLine $ catMaybes lnparts where @@ -384,7 +387,7 @@ strToInline s ItalicBold = Strong [Emph [Str s]] parsePara :: PandocMonad m => ManParser m Block parsePara = do inls <- many1 (strInl <|> lineInl <|> linkInl <|> comment) - let withspaces = intersperse [Str " "] inls + let withspaces = intersperse [Space] inls return $ Para (concat withspaces) where @@ -420,7 +423,9 @@ parsePara = do other <- many anyChar let manurl pagename section = "../"++section++"/"++pagename++"."++section return $ [ Link nullAttr [Strong [Str mpage]] (manurl mpage [mansect], mpage) - , Strong [Str $ " ("++[mansect] ++ ")", Str other]] + , Strong [Str $ " ("++[mansect] ++ ")" + , Str other] + ] comment :: PandocMonad m => ManParser m [Inline] comment = mcomment >> return [] @@ -448,9 +453,7 @@ parseHeader = do return $ Header lvl nullAttr [Str s] parseBulletList :: PandocMonad m => ManParser m Block -parseBulletList = do - bls <- many1 block - return $ BulletList $ map (:[]) bls +parseBulletList = BulletList . map (: []) <$> many1 block where diff --git a/test/Tests/Readers/Man.hs b/test/Tests/Readers/Man.hs index a9fe324d1..4807095a5 100644 --- a/test/Tests/Readers/Man.hs +++ b/test/Tests/Readers/Man.hs @@ -23,10 +23,22 @@ tests = [ -- .SH "HEllo bbb" "aaa"" as" testGroup "Macros" [ "Bold" =: - ".B foo\n" + ".B foo" =?> (para $ strong "foo") , "Italic" =: - ".I foo\n" - =?> (para $ emph "foo") + ".I bar\n" + =?> (para $ emph "bar") + , "BoldItalic" =: + ".BI foo bar" + =?> (para $ strong $ emph $ str "foo bar") + , "H1" =: + ".SH The header\n" + =?> header 2 (str "The header") + , "H2" =: + ".SS The header 2" + =?> header 3 (str "The header 2") + , "Macro args" =: + ".B \"single arg with \"\"Q\"\"\"" + =?> (para $ strong $ str "single arg with \"Q\"") ] ] -- cgit v1.2.3 From 3fed62611ea394b088c49c1de680e74978bb9f82 Mon Sep 17 00:00:00 2001 From: Yan Pas Date: Sun, 14 Oct 2018 00:57:15 +0300 Subject: tests, commented debug functions --- src/Text/Pandoc/Readers/Man.hs | 23 +++++++------ test/Tests/Readers/Man.hs | 77 +++++++++++++++++++++++++++++++----------- 2 files changed, 70 insertions(+), 30 deletions(-) (limited to 'test/Tests') diff --git a/src/Text/Pandoc/Readers/Man.hs b/src/Text/Pandoc/Readers/Man.hs index aea53a375..24f8316ef 100644 --- a/src/Text/Pandoc/Readers/Man.hs +++ b/src/Text/Pandoc/Readers/Man.hs @@ -31,7 +31,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion of man to 'Pandoc' document. -} -module Text.Pandoc.Readers.Man (readMan, testFile) where +module Text.Pandoc.Readers.Man (readMan) where --testFile import Prelude import Control.Monad (liftM) @@ -43,7 +43,7 @@ import Data.Maybe (catMaybes, fromMaybe, isNothing) import Data.List (intersperse, intercalate) import qualified Data.Text as T -import Text.Pandoc.Class (PandocMonad(..), runIOorExplode) +import Text.Pandoc.Class (PandocMonad(..)) import Text.Pandoc.Definition import Text.Pandoc.Error (PandocError (PandocParsecError)) import Text.Pandoc.Logging (LogMessage(..)) @@ -94,9 +94,9 @@ instance Default RoffState where type ManLexer m = ParserT [Char] RoffState m type ManParser m = ParserT [ManToken] ParserState m ----- --- testStrr :: [Char] -> Either PandocError Pandoc --- testStrr s = runPure $ readMan def (T.pack s) +---- debug functions +{- +import Text.Pandoc.Class (runIOorExplode) printPandoc :: Pandoc -> [Char] printPandoc (Pandoc m content) = @@ -104,16 +104,17 @@ printPandoc (Pandoc m content) = cnt = intercalate "\n" $ map show content in ttl ++ "\n" ++ cnt --- strrepr :: Either PandocError Pandoc -> [Char] --- strrepr obj = case obj of --- Right x -> printPandoc x --- Left y -> show y +testStr :: String -> IO () +testStr str = do + pand <- runIOorExplode $ readMan def (T.pack str) + putStrLn $ printPandoc pand + testFile :: FilePath -> IO () testFile fname = do cont <- readFile fname - pand <- runIOorExplode $ readMan def (T.pack cont) - putStrLn $ printPandoc pand + testStr cont +-} ---- diff --git a/test/Tests/Readers/Man.hs b/test/Tests/Readers/Man.hs index 4807095a5..6226099d2 100644 --- a/test/Tests/Readers/Man.hs +++ b/test/Tests/Readers/Man.hs @@ -22,23 +22,62 @@ tests :: [TestTree] tests = [ -- .SH "HEllo bbb" "aaa"" as" testGroup "Macros" [ - "Bold" =: - ".B foo" - =?> (para $ strong "foo") - , "Italic" =: - ".I bar\n" - =?> (para $ emph "bar") - , "BoldItalic" =: - ".BI foo bar" - =?> (para $ strong $ emph $ str "foo bar") - , "H1" =: - ".SH The header\n" - =?> header 2 (str "The header") - , "H2" =: - ".SS The header 2" - =?> header 3 (str "The header 2") - , "Macro args" =: - ".B \"single arg with \"\"Q\"\"\"" - =?> (para $ strong $ str "single arg with \"Q\"") - ] + "Bold" =: + ".B foo" + =?> (para $ strong "foo") + , "Italic" =: + ".I bar\n" + =?> (para $ emph "bar") + , "BoldItalic" =: + ".BI foo bar" + =?> (para $ strong $ emph $ str "foo bar") + , "H1" =: + ".SH The header\n" + =?> header 2 (str "The" <> space <> str "header") + , "H2" =: + ".SS \"The header 2\"" + =?> header 3 (str "The header 2") + , "Macro args" =: + ".B \"single arg with \"\"Q\"\"\"" + =?> (para $ strong $ str "single arg with \"Q\"") + , "comment" =: + ".\\\"bla\naaa" + =?> (para $ space <> str "aaa") + , "link" =: + ".BR aa (1)" + =?> (para $ fromList [Link nullAttr [Strong [Str "aa"]] ("../1/aa.1","aa"), Strong [Str " (1)",Str ""]]) + ], + testGroup "Escapes" [ + "fonts" =: + "aa\\fIbb\\fRcc" + =?> (para $ str "aa" <> (emph $ str "bb") <> str "cc") + , "skip" =: + "a\\%\\{\\}\\\n\\:b\\0" + =?> (para $ fromList $ map Str ["a", "b"]) + , "replace" =: + "\\-\\ \\\\\\[lq]\\[rq]\\[em]\\[en]\\*(lq\\*(rq" + =?> (para $ fromList $ map Str ["-", " ", "\\", "“", "”", "—", "–", "«", "»"]) + , "replace2" =: + "\\t\\e\\`\\^\\|\\'" + =?> (para $ fromList $ map Str ["\t", "\\", "`", " ", " ", "`"]) + ], + testGroup "Lists" [ + "bullet" =: + ".IP\nfirst\n.IP\nsecond" + =?> bulletList [plain $ str "first", plain $ str "second"] + , "odrered" =: + ".IP 1 a\nfirst\n.IP 2 a\nsecond" + =?> orderedListWith (1,Decimal,DefaultDelim) [plain $ str "first", plain $ str "second"] + , "upper" =: + ".IP A a\nfirst\n.IP B a\nsecond" + =?> orderedListWith (1,UpperAlpha,DefaultDelim) [plain $ str "first", plain $ str "second"] + , "nested" =: + ".IP\nfirst\n.RS\n.IP\n1a\n.IP\n1b\n.RE" + =?> fromList [BulletList [[Plain [Str "first"],BulletList [[Plain [Str "1a"]],[Plain [Str "1b"]]]]]] + ], + testGroup "CodeBlocks" [ + "cb1"=: + ".nf\naa\n\tbb\n.fi" + =?> codeBlock "aa\n\tbb" + ] ] -- cgit v1.2.3 From ce27bf9a02d98a6a5412a493577fa9d2f3cfd1fe Mon Sep 17 00:00:00 2001 From: Yan Pas Date: Tue, 16 Oct 2018 03:12:06 +0300 Subject: builders --- src/Text/Pandoc/Readers/Man.hs | 122 ++++++++++++++++++++--------------------- test/Tests/Readers/Man.hs | 8 +-- 2 files changed, 62 insertions(+), 68 deletions(-) (limited to 'test/Tests') diff --git a/src/Text/Pandoc/Readers/Man.hs b/src/Text/Pandoc/Readers/Man.hs index 280acb9c4..d04718fc7 100644 --- a/src/Text/Pandoc/Readers/Man.hs +++ b/src/Text/Pandoc/Readers/Man.hs @@ -38,22 +38,22 @@ import Control.Monad (liftM, void) import Control.Monad.Except (throwError) import Data.Char (isDigit, isUpper, isLower) import Data.Default (Default) -import Data.Functor (($>)) import Data.Map (insert) -import Data.Set (Set, singleton, fromList, toList) +import Data.Set (Set, singleton) +import qualified Data.Set as S (fromList, toList) import Data.Maybe (catMaybes, fromMaybe, isNothing) import Data.List (intersperse, intercalate) import qualified Data.Text as T import Text.Pandoc.Class (PandocMonad(..)) -import Text.Pandoc.Definition +import Text.Pandoc.Builder as B hiding (singleton) import Text.Pandoc.Error (PandocError (PandocParsecError)) import Text.Pandoc.Logging (LogMessage(..)) import Text.Pandoc.Options import Text.Pandoc.Parsing import Text.Pandoc.Shared (crFilter) -import Text.Parsec hiding (tokenPrim) -import Text.Parsec.Char () +import Text.Parsec hiding (tokenPrim, space) +import qualified Text.Parsec as Parsec import Text.Parsec.Pos (updatePosString) -- @@ -158,14 +158,10 @@ parseMan :: PandocMonad m => ManParser m Pandoc parseMan = do let parsers = [ try parseList, parseTitle, parsePara, parseSkippedContent , try parseCodeBlock, parseHeader, parseSkipMacro] - blocks <- many $ choice parsers - parserst <- getState - return $ Pandoc (stateMeta parserst) (filter (not . isNull) blocks) - - where - - isNull Null = True - isNull _ = False + bs <- many $ choice parsers + let (Pandoc _ blocks) = doc $ mconcat bs + meta <- stateMeta <$> getState + return $ Pandoc meta blocks eofline :: Stream s m Char => ParsecT s u m () eofline = void newline <|> eof @@ -216,7 +212,7 @@ escapeLexer = do fs <- many letterFontKind many letter char ']' - return $ fromList fs + return $ S.fromList fs letterFontKind :: PandocMonad m => ManLexer m FontKind letterFontKind = choice [ @@ -240,7 +236,7 @@ currentFont = fontKind <$> getState lexComment :: PandocMonad m => ManLexer m ManToken lexComment = do try $ string ".\\\"" - many space + many Parsec.space body <- many $ noneOf "\n" char '\n' return $ MComment body @@ -265,7 +261,7 @@ lexMacro = do "fi" -> knownMacro KCodeBlEnd "B" -> MStr (joinedArgs, singleton Bold) "BR" -> MMaybeLink joinedArgs - x | x `elem` ["BI", "IB"] -> MStr (joinedArgs, fromList [Italic, Bold]) + x | x `elem` ["BI", "IB"] -> MStr (joinedArgs, S.fromList [Italic, Bold]) x | x `elem` ["I", "IR", "RI"] -> MStr (joinedArgs, singleton Italic) "SH" -> MHeader 2 args "SS" -> MHeader 3 args @@ -403,15 +399,15 @@ mcomment = msatisfy isMComment where -- ManToken -> Block functions -- -parseTitle :: PandocMonad m => ManParser m Block +parseTitle :: PandocMonad m => ManParser m Blocks parseTitle = do (MMacro _ args) <- mmacro KTitle if null args - then return Null + then return mempty else do let mantitle = fst $ head args modifyState (changeTitle mantitle) - return $ Header 1 nullAttr [Str mantitle] + return $ header 1 $ str mantitle where changeTitle title pst = let meta = stateMeta pst @@ -419,11 +415,11 @@ parseTitle = do in pst {stateMeta = metaUp} -parseSkippedContent :: PandocMonad m => ManParser m Block +parseSkippedContent :: PandocMonad m => ManParser m Blocks parseSkippedContent = do tok <- munknownMacro <|> mcomment <|> memplyLine onToken tok - return Null + return mempty where @@ -433,50 +429,50 @@ parseSkippedContent = do logMessage $ SkippedContent ("Unknown macro: " ++ mname) pos onToken _ = return () -strToInline :: RoffStr -> Inline -strToInline (s, fonts) = inner $ toList fonts where - inner :: [FontKind] -> Inline - inner [] = Str s - inner (Bold:fs) = Strong [inner fs] - inner (Italic:fs) = Emph [inner fs] +strToInlines :: RoffStr -> Inlines +strToInlines (s, fonts) = inner $ S.toList fonts where + inner :: [FontKind] -> Inlines + inner [] = str s + inner (Bold:fs) = strong $ inner fs + inner (Italic:fs) = emph $ inner fs -- Monospace goes after Bold and Italic in ordered set - inner (Monospace:_) = Code nullAttr s + inner (Monospace:_) = code s inner (Regular:fs) = inner fs -parsePara :: PandocMonad m => ManParser m Block -parsePara = Para <$> parseInlines +parsePara :: PandocMonad m => ManParser m Blocks +parsePara = para <$> parseInlines -parseInlines :: PandocMonad m => ManParser m [Inline] +parseInlines :: PandocMonad m => ManParser m Inlines parseInlines = do inls <- many1 (strInl <|> lineInl <|> linkInl <|> comment) - let withspaces = intersperse [Space] inls - return $ concat withspaces + let withspaces = intersperse B.space inls + return $ mconcat withspaces where - strInl :: PandocMonad m => ManParser m [Inline] + strInl :: PandocMonad m => ManParser m Inlines strInl = do (MStr rstr) <- mstr - return [strToInline rstr] + return $ strToInlines rstr - lineInl :: PandocMonad m => ManParser m [Inline] + lineInl :: PandocMonad m => ManParser m Inlines lineInl = do (MLine fragments) <- mline - return $ strToInline <$> fragments + return $ mconcat $ strToInlines <$> fragments - linkInl :: PandocMonad m => ManParser m [Inline] + linkInl :: PandocMonad m => ManParser m Inlines linkInl = do (MMaybeLink txt) <- mmaybeLink let inls = case runParser linkParser () "" txt of Right lnk -> lnk - Left _ -> [Strong [Str txt]] + Left _ -> strong $ str txt return inls where -- assuming man pages are generated from Linux-like repository - linkParser :: Parsec String () [Inline] + linkParser :: Parsec String () Inlines linkParser = do mpage <- many1 (alphaNum <|> char '_') spacetab @@ -485,21 +481,19 @@ parseInlines = do char ')' other <- many anyChar let manurl pagename section = "../"++section++"/"++pagename++"."++section - return $ [ Link nullAttr [Strong [Str mpage]] (manurl mpage [mansect], mpage) - , Strong [Str $ " ("++[mansect] ++ ")" - , Str other] - ] + lnkInls = link (manurl mpage [mansect]) mpage (strong $ str mpage) + return $ lnkInls <> strong (str (" ("++[mansect] ++ ")") <> str other) - comment :: PandocMonad m => ManParser m [Inline] - comment = mcomment >> return [] + comment :: PandocMonad m => ManParser m Inlines + comment = mcomment >> return mempty -parseCodeBlock :: PandocMonad m => ManParser m Block +parseCodeBlock :: PandocMonad m => ManParser m Blocks parseCodeBlock = do mmacro KCodeBlStart toks <- many (mstr <|> mline <|> mmaybeLink <|> memplyLine <|> munknownMacro <|> mcomment) mmacro KCodeBlEnd - return $ CodeBlock nullAttr (intercalate "\n" . catMaybes $ extractText <$> toks) + return $ codeBlock (intercalate "\n" . catMaybes $ extractText <$> toks) where @@ -510,14 +504,14 @@ parseCodeBlock = do extractText MEmptyLine = Just "" -- string are intercalated with '\n', this prevents double '\n' extractText _ = Nothing -parseHeader :: PandocMonad m => ManParser m Block +parseHeader :: PandocMonad m => ManParser m Blocks parseHeader = do (MHeader lvl ss) <- mheader - return $ Header lvl nullAttr $ intersperse Space $ strToInline <$> ss + return $ header lvl (mconcat $ intersperse B.space $ strToInlines <$> ss) -type ListBuilder = [[Block]] -> Block +type ListBuilder = [Blocks] -> Blocks -parseList :: PandocMonad m => ManParser m Block +parseList :: PandocMonad m => ManParser m Blocks parseList = do xx <- many1 paras let bls = map snd xx @@ -526,13 +520,13 @@ parseList = do where - macroIPInl :: [RoffStr] -> [Inline] - macroIPInl (x:_:[]) = [strToInline x, Space] - macroIPInl _ = [] + macroIPInl :: [RoffStr] -> Inlines + macroIPInl (x:_:[]) = strToInlines x <> B.space + macroIPInl _ = mempty listKind :: [RoffStr] -> Maybe ListBuilder listKind (((c:_), _):_:[]) = - let params style = OrderedList (1, style, DefaultDelim) + let params style = orderedListWith (1, style, DefaultDelim) in case c of _ | isDigit c -> Just $ params Decimal _ | isUpper c -> Just $ params UpperAlpha @@ -541,18 +535,18 @@ parseList = do listKind _ = Nothing - paras :: PandocMonad m => ManParser m (ListBuilder, [Block]) + paras :: PandocMonad m => ManParser m (ListBuilder, Blocks) paras = do (MMacro _ args) <- mmacro KTab let lbuilderOpt = listKind args - lbuilder = fromMaybe BulletList lbuilderOpt + lbuilder = fromMaybe bulletList lbuilderOpt macroinl = macroIPInl args inls <- parseInlines - let parainls = if isNothing lbuilderOpt then macroinl ++ inls else inls - subls <- many sublist - return $ (lbuilder, (Plain parainls) : subls) + let parainls = if isNothing lbuilderOpt then macroinl <> inls else inls + subls <- mconcat <$> many sublist + return $ (lbuilder, plain parainls <> subls) - sublist :: PandocMonad m => ManParser m Block + sublist :: PandocMonad m => ManParser m Blocks sublist = do mmacro KSubTab bl <- parseList @@ -560,5 +554,5 @@ parseList = do return bl -- In case of weird man file it will be parsed succesfully -parseSkipMacro :: PandocMonad m => ManParser m Block -parseSkipMacro = mmacroAny >> return Null +parseSkipMacro :: PandocMonad m => ManParser m Blocks +parseSkipMacro = mmacroAny >> mempty diff --git a/test/Tests/Readers/Man.hs b/test/Tests/Readers/Man.hs index 6226099d2..4d8e13fb1 100644 --- a/test/Tests/Readers/Man.hs +++ b/test/Tests/Readers/Man.hs @@ -45,7 +45,7 @@ tests = [ =?> (para $ space <> str "aaa") , "link" =: ".BR aa (1)" - =?> (para $ fromList [Link nullAttr [Strong [Str "aa"]] ("../1/aa.1","aa"), Strong [Str " (1)",Str ""]]) + =?> (para $ link "../1/aa.1" "aa" (strong $ str "aa") <> (strong $ str " (1)")) ], testGroup "Escapes" [ "fonts" =: @@ -53,13 +53,13 @@ tests = [ =?> (para $ str "aa" <> (emph $ str "bb") <> str "cc") , "skip" =: "a\\%\\{\\}\\\n\\:b\\0" - =?> (para $ fromList $ map Str ["a", "b"]) + =?> (para $ str "ab") , "replace" =: "\\-\\ \\\\\\[lq]\\[rq]\\[em]\\[en]\\*(lq\\*(rq" - =?> (para $ fromList $ map Str ["-", " ", "\\", "“", "”", "—", "–", "«", "»"]) + =?> (para $ str "- \\“”—–«»") , "replace2" =: "\\t\\e\\`\\^\\|\\'" - =?> (para $ fromList $ map Str ["\t", "\\", "`", " ", " ", "`"]) + =?> (para $ str "\t\\` `") ], testGroup "Lists" [ "bullet" =: -- cgit v1.2.3 From 7741cdbf04f93875e6cd2051a6778c2ca35b5e40 Mon Sep 17 00:00:00 2001 From: Yan Pas Date: Wed, 17 Oct 2018 00:21:52 +0300 Subject: added old-style test --- src/Text/Pandoc/Readers/Man.hs | 1 + test/Tests/Old.hs | 4 + test/Tests/Readers/Man.hs | 2 +- test/man-reader.man | 189 +++++++++++++++++++++++++++++++++++++++++ test/man-reader.native | 94 ++++++++++++++++++++ 5 files changed, 289 insertions(+), 1 deletion(-) create mode 100644 test/man-reader.man create mode 100644 test/man-reader.native (limited to 'test/Tests') diff --git a/src/Text/Pandoc/Readers/Man.hs b/src/Text/Pandoc/Readers/Man.hs index d04718fc7..1ffdd1f91 100644 --- a/src/Text/Pandoc/Readers/Man.hs +++ b/src/Text/Pandoc/Readers/Man.hs @@ -73,6 +73,7 @@ type Font = Set FontKind type RoffStr = (String, Font) +-- TODO parse tables (see man tbl) data ManToken = MStr RoffStr | MLine [RoffStr] | MMaybeLink String diff --git a/test/Tests/Old.hs b/test/Tests/Old.hs index b426ffd07..842e0f656 100644 --- a/test/Tests/Old.hs +++ b/test/Tests/Old.hs @@ -171,6 +171,10 @@ tests = [ testGroup "markdown" , test "tables" ["-f", "native", "-t", "../data/sample.lua"] "tables.native" "tables.custom" ] + , testGroup "man" + [ test "reader" ["-r", "man", "-w", "native", "-s"] + "man-reader.man" "man-reader.native" + ] ] -- makes sure file is fully closed after reading diff --git a/test/Tests/Readers/Man.hs b/test/Tests/Readers/Man.hs index 4d8e13fb1..9dbfbab4d 100644 --- a/test/Tests/Readers/Man.hs +++ b/test/Tests/Readers/Man.hs @@ -73,7 +73,7 @@ tests = [ =?> orderedListWith (1,UpperAlpha,DefaultDelim) [plain $ str "first", plain $ str "second"] , "nested" =: ".IP\nfirst\n.RS\n.IP\n1a\n.IP\n1b\n.RE" - =?> fromList [BulletList [[Plain [Str "first"],BulletList [[Plain [Str "1a"]],[Plain [Str "1b"]]]]]] + =?> bulletList [(plain $ str "first") <> (bulletList [plain $ str "1a", plain $ str "1b"])] ], testGroup "CodeBlocks" [ "cb1"=: diff --git a/test/man-reader.man b/test/man-reader.man new file mode 100644 index 000000000..4f3395051 --- /dev/null +++ b/test/man-reader.man @@ -0,0 +1,189 @@ +.TH "Pandoc Man tests" "" "Oct 17, 2018" "" "" +.PP +This is a set of tests for pandoc. +.PP + * * * * * +.SH Headers +.SH Level 1 +.SS Level 2 + + * * * * * +.SH Paragraphs +.PP +Here's a regular paragraph. +.PP +Another paragraph +In Markdown 1.0.0 and earlier. +Version 8. +This line turns into a list item. +Because a hard\-wrapped line in the middle of a paragraph looked like a list +item. +.PP +There should be a hard line break +.PD 0 +.P +.PD +here. +.PP + * * * * * +.SH Block Quotes +Code in a block quote: +.IP +.nf +\f[C] +sub\ status\ { +\ \ \ \ print\ "working"; +} +\f[] +.fi +.PP +A list: +.IP "1." 3 +item one +.IP "2." 3 +item two +.PP +.SH Code Blocks +.PP +Code: +.IP +.nf +\f[C] +\-\-\-\-\ (should\ be\ four\ hyphens) + +sub\ status\ { +\ \ \ \ print\ "working"; +} + +\f[] +.fi +.PP +And: +.IP +.nf +\f[C] +\tthis\ code\ line is\ indented\ by\ one\ tab + +These\ should\ not\ be\ escaped:\ \ \\$\ \\\\\ \\>\ \\[\ \\{ +\f[] +.fi +.PP + * * * * * +.SH Lists +.SS Unordered +.PP +Asterisks: +.IP \[bu] 2 +asterisk 1 +.IP \[bu] 2 +asterisk 2 +.IP \[bu] 2 +asterisk 3 +.PP +.SS Ordered +.IP "1." 3 +First +.IP "2." 3 +Second +.IP "3." 3 +Third +.PP +.SS Nested +.IP \[bu] 2 +Tab +.RS 2 +.IP \[bu] 2 +Tab +.RS 2 +.IP \[bu] 2 +Tab +.RE +.RE +.PP +Here's another: +.IP "1." 3 +First +.IP "2." 3 +Second: +.RS 4 +.IP \[bu] 2 +Fee +.IP \[bu] 2 +Fie +.IP \[bu] 2 +Foe +.RE +.IP "3." 3 +Third +.PP +Same thing: +.IP "1." 3 +First +.IP "2." 3 +Second: +.RS 4 +.IP \[bu] 2 +Fee +.IP \[bu] 2 +Fie +.IP \[bu] 2 +Foe +.RE +.IP "3." 3 +Third +.SS different styles: +.IP "A." 3 +Upper Alpha +.RS 4 +.IP "I." 3 +Upper Roman. +.RS 4 +.IP "(6)" 4 +Decimal start with 6 +.RS 4 +.IP "c)" 3 +Lower alpha with paren +.RE +.RE +.RE +.PP + * * * * * +.SH Special Characters +AT&T has an ampersand in their name. +.PP +4 < 5. +.PP +6 > 5. +.PP +Backslash: \\ +.PP +Backtick: ` +.PP +Asterisk: * +.PP +Underscore: _ +.PP +Left brace: { +.PP +Right brace: } +.PP +Left bracket: [ +.PP +Right bracket: ] +.PP +Left paren: ( +.PP +Right paren: ) +.PP +Greater\-than: > +.PP +Hash: # +.PP +Period: . +.PP +Bang: ! +.PP +Plus: + +.PP +Minus: \- +.PP diff --git a/test/man-reader.native b/test/man-reader.native new file mode 100644 index 000000000..1fa010bd6 --- /dev/null +++ b/test/man-reader.native @@ -0,0 +1,94 @@ +Pandoc (Meta {unMeta = fromList [("title",MetaString "Pandoc Man tests")]}) +[Header 1 ("",[],[]) [Str "Pandoc Man tests"] +,Para [Str "This is a set of tests for pandoc."] +,Para [Str " * * * * *"] +,Header 2 ("",[],[]) [Str "Headers"] +,Header 2 ("",[],[]) [Str "Level",Space,Str "1"] +,Header 3 ("",[],[]) [Str "Level",Space,Str "2"] +,Para [Str " * * * * *"] +,Header 2 ("",[],[]) [Str "Paragraphs"] +,Para [Str "Here's a regular paragraph."] +,Para [Str "Another paragraph",Space,Str "In Markdown 1.0.0 and earlier.",Space,Str "Version 8.",Space,Str "This line turns into a list item.",Space,Str "Because a hard-wrapped line in the middle of a paragraph looked like a list",Space,Str "item."] +,Para [Str "There should be a hard line break"] +,Para [Str "here."] +,Para [Str " * * * * *"] +,Header 2 ("",[],[]) [Str "Block",Space,Str "Quotes"] +,Para [Str "Code in a block quote:"] +,CodeBlock ("",[],[]) "\nsub status {\n print \"working\";\n}\n" +,Para [Str "A list:"] +,OrderedList (1,Decimal,DefaultDelim) + [[Plain [Str "item one"]] + ,[Plain [Str "item two"]]] +,Header 2 ("",[],[]) [Str "Code",Space,Str "Blocks"] +,Para [Str "Code:"] +,CodeBlock ("",[],[]) "\n---- (should be four hyphens)\n\nsub status {\n print \"working\";\n}\n\n" +,Para [Str "And:"] +,CodeBlock ("",[],[]) "\n\tthis code line is indented by one tab\n\nThese should not be escaped: \\$ \\\\ \\> \\[ \\{\n" +,Para [Str " * * * * *"] +,Header 2 ("",[],[]) [Str "Lists"] +,Header 3 ("",[],[]) [Str "Unordered"] +,Para [Str "Asterisks:"] +,BulletList + [[Plain [Str "",Space,Str "asterisk 1"]] + ,[Plain [Str "",Space,Str "asterisk 2"]] + ,[Plain [Str "",Space,Str "asterisk 3"]]] +,Header 3 ("",[],[]) [Str "Ordered"] +,OrderedList (1,Decimal,DefaultDelim) + [[Plain [Str "First"]] + ,[Plain [Str "Second"]] + ,[Plain [Str "Third"]]] +,Header 3 ("",[],[]) [Str "Nested"] +,BulletList + [[Plain [Str "",Space,Str "Tab"] + ,BulletList + [[Plain [Str "",Space,Str "Tab"] + ,BulletList + [[Plain [Str "",Space,Str "Tab"]]]]]]] +,Para [Str "Here's another:"] +,OrderedList (1,Decimal,DefaultDelim) + [[Plain [Str "First"]] + ,[Plain [Str "Second:"] + ,BulletList + [[Plain [Str "",Space,Str "Fee"]] + ,[Plain [Str "",Space,Str "Fie"]] + ,[Plain [Str "",Space,Str "Foe"]]]] + ,[Plain [Str "Third"]]] +,Para [Str "Same thing:"] +,OrderedList (1,Decimal,DefaultDelim) + [[Plain [Str "First"]] + ,[Plain [Str "Second:"] + ,BulletList + [[Plain [Str "",Space,Str "Fee"]] + ,[Plain [Str "",Space,Str "Fie"]] + ,[Plain [Str "",Space,Str "Foe"]]]] + ,[Plain [Str "Third"]]] +,Header 3 ("",[],[]) [Str "different",Space,Str "styles:"] +,OrderedList (1,UpperAlpha,DefaultDelim) + [[Plain [Str "Upper Alpha"] + ,OrderedList (1,UpperAlpha,DefaultDelim) + [[Plain [Str "Upper Roman."] + ,BulletList + [[Plain [Str "(6)",Space,Str "Decimal start with 6"] + ,OrderedList (1,LowerAlpha,DefaultDelim) + [[Plain [Str "Lower alpha with paren"]]]]]]]]] +,Para [Str " * * * * *"] +,Header 2 ("",[],[]) [Str "Special",Space,Str "Characters"] +,Para [Str "AT&T has an ampersand in their name."] +,Para [Str "4 < 5."] +,Para [Str "6 > 5."] +,Para [Str "Backslash: \\"] +,Para [Str "Backtick: `"] +,Para [Str "Asterisk: *"] +,Para [Str "Underscore: _"] +,Para [Str "Left brace: {"] +,Para [Str "Right brace: }"] +,Para [Str "Left bracket: ["] +,Para [Str "Right bracket: ]"] +,Para [Str "Left paren: ("] +,Para [Str "Right paren: )"] +,Para [Str "Greater-than: >"] +,Para [Str "Hash: #"] +,Para [Str "Period: ."] +,Para [Str "Bang: !"] +,Para [Str "Plus: +"] +,Para [Str "Minus: -"]] -- cgit v1.2.3