diff options
| author | Albert Krewinkel <albert@zeitkraut.de> | 2017-05-10 23:35:45 +0200 | 
|---|---|---|
| committer | Albert Krewinkel <albert+github@zeitkraut.de> | 2017-05-11 00:17:56 +0200 | 
| commit | 4b9fb7a1280f1d923a6bcecbf42a496480020359 (patch) | |
| tree | 45438deecb7439995755b9934598c554112457c6 /src | |
| parent | 7bdf38ef2eb39e552a0825436dc8bdfa5507e245 (diff) | |
| download | pandoc-4b9fb7a1280f1d923a6bcecbf42a496480020359.tar.gz | |
Combine grid table parsers
The grid table parsers for markdown and rst was combined into one single
parser, slightly changing parsing behavior of both parsers:
- The markdown parser now compactifies block content cell-wise: pure
  text blocks in cells are now treated as paragraphs only if the cell
  contains multiple paragraphs, and as plain blocks otherwise. Before,
  this was true only for single-column tables.
- The rst parser now accepts newlines and multiple blocks in header
  cells.
Closes: #3638
Diffstat (limited to 'src')
| -rw-r--r-- | src/Text/Pandoc/Parsing.hs | 69 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/Markdown.hs | 84 | 
2 files changed, 52 insertions, 101 deletions
| diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index e0c0e36d6..fa3ff898e 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -66,6 +66,7 @@ module Text.Pandoc.Parsing ( anyLine,                               tableWith,                               widthsFromIndices,                               gridTableWith, +                             gridTableWith',                               readWith,                               readWithM,                               testStringWith, @@ -770,6 +771,20 @@ tableWith :: (Stream s m Char, HasReaderOptions st,            -> ParserT s st m end            -> ParserT s st m (mf Blocks)  tableWith headerParser rowParser lineParser footerParser = try $ do +  (aligns, widths, heads, rows) <- tableWith' headerParser rowParser +                                                lineParser footerParser +  return $ B.table mempty (zip aligns widths) <$> heads <*> rows + +type TableComponents mf = ([Alignment], [Double], mf [Blocks], mf [[Blocks]]) + +tableWith' :: (Stream s m Char, HasReaderOptions st, +               Functor mf, Applicative mf, Monad mf) +           => ParserT s st m (mf [Blocks], [Alignment], [Int]) +           -> ([Int] -> ParserT s st m (mf [Blocks])) +           -> ParserT s st m sep +           -> ParserT s st m end +           -> ParserT s st m (TableComponents mf) +tableWith' headerParser rowParser lineParser footerParser = try $ do      (heads, aligns, indices) <- headerParser      lines' <- sequence <$> rowParser indices `sepEndBy1` lineParser      footerParser @@ -777,7 +792,7 @@ tableWith headerParser rowParser lineParser footerParser = try $ do      let widths = if (indices == [])                      then replicate (length aligns) 0.0                      else widthsFromIndices numColumns indices -    return $ B.table mempty (zip aligns widths) <$> heads <*> lines' +    return $ (aligns, widths, heads, lines')  -- Calculate relative widths of table columns, based on indices  widthsFromIndices :: Int      -- Number of columns on terminal @@ -812,24 +827,42 @@ widthsFromIndices numColumns' indices =  -- ending with a footer (dashed line followed by blank line).  gridTableWith :: (Stream [Char] m Char, HasReaderOptions st,                    Functor mf, Applicative mf, Monad mf) -              => ParserT [Char] st m (mf Blocks)   -- ^ Block list parser +              => ParserT [Char] st m (mf Blocks)  -- ^ Block list parser                -> Bool                             -- ^ Headerless table                -> ParserT [Char] st m (mf Blocks)  gridTableWith blocks headless =    tableWith (gridTableHeader headless blocks) (gridTableRow blocks)              (gridTableSep '-') gridTableFooter +gridTableWith' :: (Stream [Char] m Char, HasReaderOptions st, +                   Functor mf, Applicative mf, Monad mf) +               => ParserT [Char] st m (mf Blocks)  -- ^ Block list parser +               -> Bool                             -- ^ Headerless table +               -> ParserT [Char] st m (TableComponents mf) +gridTableWith' blocks headless = +  tableWith' (gridTableHeader headless blocks) (gridTableRow blocks) +             (gridTableSep '-') gridTableFooter +  gridTableSplitLine :: [Int] -> String -> [String]  gridTableSplitLine indices line = map removeFinalBar $ tail $    splitStringByIndices (init indices) $ trimr line -gridPart :: Stream s m Char => Char -> ParserT s st m (Int, Int) +gridPart :: Stream s m Char => Char -> ParserT s st m ((Int, Int), Alignment)  gridPart ch = do +  leftColon <- option False (True <$ char ':')    dashes <- many1 (char ch) +  rightColon <- option False (True <$ char ':')    char '+' -  return (length dashes, length dashes + 1) - -gridDashedLines :: Stream s m Char => Char -> ParserT s st m [(Int,Int)] +  let lengthDashes = length dashes + (if leftColon then 1 else 0) + +                       (if rightColon then 1 else 0) +  let alignment = case (leftColon, rightColon) of +                       (True, True)   -> AlignCenter +                       (True, False)  -> AlignLeft +                       (False, True)  -> AlignRight +                       (False, False) -> AlignDefault +  return ((lengthDashes, lengthDashes + 1), alignment) + +gridDashedLines :: Stream s m Char => Char -> ParserT s st m [((Int, Int), Alignment)]  gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) <* blankline  removeFinalBar :: String -> String @@ -853,18 +886,18 @@ gridTableHeader headless blocks = try $ do                      else many1                           (notFollowedBy (gridTableSep '=') >> char '|' >>                             many1Till anyChar newline) -  if headless -     then return () -     else gridTableSep '=' >> return () -  let lines'   = map snd dashes +  underDashes <- if headless +                    then return dashes +                    else gridDashedLines '=' +  guard $ length dashes == length underDashes +  let lines'   = map (snd . fst) underDashes    let indices  = scanl (+) 0 lines' -  let aligns   = replicate (length lines') AlignDefault -  -- RST does not have a notion of alignments +  let aligns   = map snd underDashes    let rawHeads = if headless -                    then replicate (length dashes) "" -                    else map (intercalate " ") $ transpose +                    then replicate (length underDashes) "" +                    else map (unlines . map trim) $ transpose                         $ map (gridTableSplitLine indices) rawContent -  heads <- fmap sequence . mapM (parseFromString blocks) $ map trim rawHeads +  heads <- fmap sequence $ mapM (parseFromString blocks . trim) rawHeads    return (heads, aligns, indices)  gridTableRawLine :: Stream s m Char => [Int] -> ParserT s st m [String] @@ -882,6 +915,9 @@ gridTableRow blocks indices = do    colLines <- many1 (gridTableRawLine indices)    let cols = map ((++ "\n") . unlines . removeOneLeadingSpace) $                 transpose colLines +      compactifyCell bs = case compactify [bs] of +                            []  -> mempty +                            x:_ -> x    cells <- sequence <$> mapM (parseFromString blocks) cols    return $ fmap (map compactifyCell) cells @@ -893,9 +929,6 @@ removeOneLeadingSpace xs =     where startsWithSpace ""     = True           startsWithSpace (y:_) = y == ' ' -compactifyCell :: Blocks -> Blocks -compactifyCell bs = head $ compactify [bs] -  -- | Parse footer for a grid table.  gridTableFooter :: Stream s m Char => ParserT s st m [Char]  gridTableFooter = blanklines diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 691d4d5cf..4ff5a1845 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1291,89 +1291,7 @@ multilineTableHeader headless = try $ do  -- ending with a footer (dashed line followed by blank line).  gridTable :: PandocMonad m => Bool -- ^ Headerless table            -> MarkdownParser m ([Alignment], [Double], F [Blocks], F [[Blocks]]) -gridTable headless = -  tableWith (gridTableHeader headless) gridTableRow -            (gridTableSep '-') gridTableFooter - -gridTableSplitLine :: [Int] -> String -> [String] -gridTableSplitLine indices line = map removeFinalBar $ tail $ -  splitStringByIndices (init indices) $ trimr line - -gridPart :: PandocMonad m => Char -> ParserT [Char] st m ((Int, Int), Alignment) -gridPart ch = do -  leftColon <- option False (True <$ char ':') -  dashes <- many1 (char ch) -  rightColon <- option False (True <$ char ':') -  char '+' -  let lengthDashes = length dashes + (if leftColon then 1 else 0) + -                       (if rightColon then 1 else 0) -  let alignment = case (leftColon, rightColon) of -                       (True, True)   -> AlignCenter -                       (True, False)  -> AlignLeft -                       (False, True)  -> AlignRight -                       (False, False) -> AlignDefault -  return ((lengthDashes, lengthDashes + 1), alignment) - -gridDashedLines :: PandocMonad m => Char -> ParserT [Char] st m [((Int, Int), Alignment)] -gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) <* blankline - -removeFinalBar :: String -> String -removeFinalBar = -  reverse . dropWhile (`elem` " \t") . dropWhile (=='|') . reverse - --- | Separator between rows of grid table. -gridTableSep :: PandocMonad m => Char -> MarkdownParser m Char -gridTableSep ch = try $ gridDashedLines ch >> return '\n' - --- | Parse header for a grid table. -gridTableHeader :: PandocMonad m => Bool -- ^ Headerless table -                -> MarkdownParser m (F [Blocks], [Alignment], [Int]) -gridTableHeader headless = try $ do -  optional blanklines -  dashes <- gridDashedLines '-' -  rawContent  <- if headless -                    then return [] -                    else many1 (try (char '|' >> anyLine)) -  underDashes <- if headless -                    then return dashes -                    else gridDashedLines '=' -  guard $ length dashes == length underDashes -  let lines'   = map (snd . fst) underDashes -  let indices  = scanl (+) 0 lines' -  let aligns   = map snd underDashes -  let rawHeads = if headless -                    then replicate (length underDashes) "" -                    else map (unlines . map trim) $ transpose -                       $ map (gridTableSplitLine indices) rawContent -  heads <- fmap sequence $ mapM (parseFromString parseBlocks . trim) rawHeads -  return (heads, aligns, indices) - -gridTableRawLine :: PandocMonad m => [Int] -> MarkdownParser m [String] -gridTableRawLine indices = do -  char '|' -  line <- anyLine -  return (gridTableSplitLine indices line) - --- | Parse row of grid table. -gridTableRow :: PandocMonad m => [Int] -             -> MarkdownParser m (F [Blocks]) -gridTableRow indices = do -  colLines <- many1 (gridTableRawLine indices) -  let cols = map ((++ "\n") . unlines . removeOneLeadingSpace) $ -               transpose colLines -  fmap compactify <$> fmap sequence (mapM (parseFromString parseBlocks) cols) - -removeOneLeadingSpace :: [String] -> [String] -removeOneLeadingSpace xs = -  if all startsWithSpace xs -     then map (drop 1) xs -     else xs -   where startsWithSpace ""    = True -         startsWithSpace (y:_) = y == ' ' - --- | Parse footer for a grid table. -gridTableFooter :: PandocMonad m => MarkdownParser m [Char] -gridTableFooter = blanklines +gridTable headless = gridTableWith' parseBlocks headless  pipeBreak :: PandocMonad m => MarkdownParser m ([Alignment], [Int])  pipeBreak = try $ do | 
