diff options
author | Albert Krewinkel <albert@zeitkraut.de> | 2017-05-10 23:35:45 +0200 |
---|---|---|
committer | Albert Krewinkel <albert+github@zeitkraut.de> | 2017-05-11 00:17:56 +0200 |
commit | 4b9fb7a1280f1d923a6bcecbf42a496480020359 (patch) | |
tree | 45438deecb7439995755b9934598c554112457c6 /src/Text | |
parent | 7bdf38ef2eb39e552a0825436dc8bdfa5507e245 (diff) | |
download | pandoc-4b9fb7a1280f1d923a6bcecbf42a496480020359.tar.gz |
Combine grid table parsers
The grid table parsers for markdown and rst was combined into one single
parser, slightly changing parsing behavior of both parsers:
- The markdown parser now compactifies block content cell-wise: pure
text blocks in cells are now treated as paragraphs only if the cell
contains multiple paragraphs, and as plain blocks otherwise. Before,
this was true only for single-column tables.
- The rst parser now accepts newlines and multiple blocks in header
cells.
Closes: #3638
Diffstat (limited to 'src/Text')
-rw-r--r-- | src/Text/Pandoc/Parsing.hs | 69 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Markdown.hs | 84 |
2 files changed, 52 insertions, 101 deletions
diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index e0c0e36d6..fa3ff898e 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -66,6 +66,7 @@ module Text.Pandoc.Parsing ( anyLine, tableWith, widthsFromIndices, gridTableWith, + gridTableWith', readWith, readWithM, testStringWith, @@ -770,6 +771,20 @@ tableWith :: (Stream s m Char, HasReaderOptions st, -> ParserT s st m end -> ParserT s st m (mf Blocks) tableWith headerParser rowParser lineParser footerParser = try $ do + (aligns, widths, heads, rows) <- tableWith' headerParser rowParser + lineParser footerParser + return $ B.table mempty (zip aligns widths) <$> heads <*> rows + +type TableComponents mf = ([Alignment], [Double], mf [Blocks], mf [[Blocks]]) + +tableWith' :: (Stream s m Char, HasReaderOptions st, + Functor mf, Applicative mf, Monad mf) + => ParserT s st m (mf [Blocks], [Alignment], [Int]) + -> ([Int] -> ParserT s st m (mf [Blocks])) + -> ParserT s st m sep + -> ParserT s st m end + -> ParserT s st m (TableComponents mf) +tableWith' headerParser rowParser lineParser footerParser = try $ do (heads, aligns, indices) <- headerParser lines' <- sequence <$> rowParser indices `sepEndBy1` lineParser footerParser @@ -777,7 +792,7 @@ tableWith headerParser rowParser lineParser footerParser = try $ do let widths = if (indices == []) then replicate (length aligns) 0.0 else widthsFromIndices numColumns indices - return $ B.table mempty (zip aligns widths) <$> heads <*> lines' + return $ (aligns, widths, heads, lines') -- Calculate relative widths of table columns, based on indices widthsFromIndices :: Int -- Number of columns on terminal @@ -812,24 +827,42 @@ widthsFromIndices numColumns' indices = -- ending with a footer (dashed line followed by blank line). gridTableWith :: (Stream [Char] m Char, HasReaderOptions st, Functor mf, Applicative mf, Monad mf) - => ParserT [Char] st m (mf Blocks) -- ^ Block list parser + => ParserT [Char] st m (mf Blocks) -- ^ Block list parser -> Bool -- ^ Headerless table -> ParserT [Char] st m (mf Blocks) gridTableWith blocks headless = tableWith (gridTableHeader headless blocks) (gridTableRow blocks) (gridTableSep '-') gridTableFooter +gridTableWith' :: (Stream [Char] m Char, HasReaderOptions st, + Functor mf, Applicative mf, Monad mf) + => ParserT [Char] st m (mf Blocks) -- ^ Block list parser + -> Bool -- ^ Headerless table + -> ParserT [Char] st m (TableComponents mf) +gridTableWith' blocks headless = + tableWith' (gridTableHeader headless blocks) (gridTableRow blocks) + (gridTableSep '-') gridTableFooter + gridTableSplitLine :: [Int] -> String -> [String] gridTableSplitLine indices line = map removeFinalBar $ tail $ splitStringByIndices (init indices) $ trimr line -gridPart :: Stream s m Char => Char -> ParserT s st m (Int, Int) +gridPart :: Stream s m Char => Char -> ParserT s st m ((Int, Int), Alignment) gridPart ch = do + leftColon <- option False (True <$ char ':') dashes <- many1 (char ch) + rightColon <- option False (True <$ char ':') char '+' - return (length dashes, length dashes + 1) - -gridDashedLines :: Stream s m Char => Char -> ParserT s st m [(Int,Int)] + let lengthDashes = length dashes + (if leftColon then 1 else 0) + + (if rightColon then 1 else 0) + let alignment = case (leftColon, rightColon) of + (True, True) -> AlignCenter + (True, False) -> AlignLeft + (False, True) -> AlignRight + (False, False) -> AlignDefault + return ((lengthDashes, lengthDashes + 1), alignment) + +gridDashedLines :: Stream s m Char => Char -> ParserT s st m [((Int, Int), Alignment)] gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) <* blankline removeFinalBar :: String -> String @@ -853,18 +886,18 @@ gridTableHeader headless blocks = try $ do else many1 (notFollowedBy (gridTableSep '=') >> char '|' >> many1Till anyChar newline) - if headless - then return () - else gridTableSep '=' >> return () - let lines' = map snd dashes + underDashes <- if headless + then return dashes + else gridDashedLines '=' + guard $ length dashes == length underDashes + let lines' = map (snd . fst) underDashes let indices = scanl (+) 0 lines' - let aligns = replicate (length lines') AlignDefault - -- RST does not have a notion of alignments + let aligns = map snd underDashes let rawHeads = if headless - then replicate (length dashes) "" - else map (intercalate " ") $ transpose + then replicate (length underDashes) "" + else map (unlines . map trim) $ transpose $ map (gridTableSplitLine indices) rawContent - heads <- fmap sequence . mapM (parseFromString blocks) $ map trim rawHeads + heads <- fmap sequence $ mapM (parseFromString blocks . trim) rawHeads return (heads, aligns, indices) gridTableRawLine :: Stream s m Char => [Int] -> ParserT s st m [String] @@ -882,6 +915,9 @@ gridTableRow blocks indices = do colLines <- many1 (gridTableRawLine indices) let cols = map ((++ "\n") . unlines . removeOneLeadingSpace) $ transpose colLines + compactifyCell bs = case compactify [bs] of + [] -> mempty + x:_ -> x cells <- sequence <$> mapM (parseFromString blocks) cols return $ fmap (map compactifyCell) cells @@ -893,9 +929,6 @@ removeOneLeadingSpace xs = where startsWithSpace "" = True startsWithSpace (y:_) = y == ' ' -compactifyCell :: Blocks -> Blocks -compactifyCell bs = head $ compactify [bs] - -- | Parse footer for a grid table. gridTableFooter :: Stream s m Char => ParserT s st m [Char] gridTableFooter = blanklines diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 691d4d5cf..4ff5a1845 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1291,89 +1291,7 @@ multilineTableHeader headless = try $ do -- ending with a footer (dashed line followed by blank line). gridTable :: PandocMonad m => Bool -- ^ Headerless table -> MarkdownParser m ([Alignment], [Double], F [Blocks], F [[Blocks]]) -gridTable headless = - tableWith (gridTableHeader headless) gridTableRow - (gridTableSep '-') gridTableFooter - -gridTableSplitLine :: [Int] -> String -> [String] -gridTableSplitLine indices line = map removeFinalBar $ tail $ - splitStringByIndices (init indices) $ trimr line - -gridPart :: PandocMonad m => Char -> ParserT [Char] st m ((Int, Int), Alignment) -gridPart ch = do - leftColon <- option False (True <$ char ':') - dashes <- many1 (char ch) - rightColon <- option False (True <$ char ':') - char '+' - let lengthDashes = length dashes + (if leftColon then 1 else 0) + - (if rightColon then 1 else 0) - let alignment = case (leftColon, rightColon) of - (True, True) -> AlignCenter - (True, False) -> AlignLeft - (False, True) -> AlignRight - (False, False) -> AlignDefault - return ((lengthDashes, lengthDashes + 1), alignment) - -gridDashedLines :: PandocMonad m => Char -> ParserT [Char] st m [((Int, Int), Alignment)] -gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) <* blankline - -removeFinalBar :: String -> String -removeFinalBar = - reverse . dropWhile (`elem` " \t") . dropWhile (=='|') . reverse - --- | Separator between rows of grid table. -gridTableSep :: PandocMonad m => Char -> MarkdownParser m Char -gridTableSep ch = try $ gridDashedLines ch >> return '\n' - --- | Parse header for a grid table. -gridTableHeader :: PandocMonad m => Bool -- ^ Headerless table - -> MarkdownParser m (F [Blocks], [Alignment], [Int]) -gridTableHeader headless = try $ do - optional blanklines - dashes <- gridDashedLines '-' - rawContent <- if headless - then return [] - else many1 (try (char '|' >> anyLine)) - underDashes <- if headless - then return dashes - else gridDashedLines '=' - guard $ length dashes == length underDashes - let lines' = map (snd . fst) underDashes - let indices = scanl (+) 0 lines' - let aligns = map snd underDashes - let rawHeads = if headless - then replicate (length underDashes) "" - else map (unlines . map trim) $ transpose - $ map (gridTableSplitLine indices) rawContent - heads <- fmap sequence $ mapM (parseFromString parseBlocks . trim) rawHeads - return (heads, aligns, indices) - -gridTableRawLine :: PandocMonad m => [Int] -> MarkdownParser m [String] -gridTableRawLine indices = do - char '|' - line <- anyLine - return (gridTableSplitLine indices line) - --- | Parse row of grid table. -gridTableRow :: PandocMonad m => [Int] - -> MarkdownParser m (F [Blocks]) -gridTableRow indices = do - colLines <- many1 (gridTableRawLine indices) - let cols = map ((++ "\n") . unlines . removeOneLeadingSpace) $ - transpose colLines - fmap compactify <$> fmap sequence (mapM (parseFromString parseBlocks) cols) - -removeOneLeadingSpace :: [String] -> [String] -removeOneLeadingSpace xs = - if all startsWithSpace xs - then map (drop 1) xs - else xs - where startsWithSpace "" = True - startsWithSpace (y:_) = y == ' ' - --- | Parse footer for a grid table. -gridTableFooter :: PandocMonad m => MarkdownParser m [Char] -gridTableFooter = blanklines +gridTable headless = gridTableWith' parseBlocks headless pipeBreak :: PandocMonad m => MarkdownParser m ([Alignment], [Int]) pipeBreak = try $ do |