From 79e6f8db13ef8f0db6da8fe4e17b7626fe6ef3e9 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 23 Nov 2021 10:50:35 -0800 Subject: Improve detection of pipe table line widths. Fixed calculation of maximum column widths in pipe tables. It is now based on the length of the markdown line, rather than a "stringified" version of the parsed line. This should be more predictable for users. In addition, we take into account double-wide characters such as emojis. Closes #7713. --- MANUAL.txt | 12 ++++++------ src/Text/Pandoc/Readers/Markdown.hs | 32 ++++++++++++++++++-------------- test/command/7713.md | 28 ++++++++++++++++++++++++++++ 3 files changed, 52 insertions(+), 20 deletions(-) create mode 100644 test/command/7713.md diff --git a/MANUAL.txt b/MANUAL.txt index 7e15e8833..0038f16cf 100644 --- a/MANUAL.txt +++ b/MANUAL.txt @@ -4055,12 +4055,12 @@ legal (though ugly) pipe table: orange|3.09 The cells of pipe tables cannot contain block elements like paragraphs -and lists, and cannot span multiple lines. If a pipe table contains a -row whose Markdown content is wider than the column width (see -`--columns`), then the table will take up the full text width and -the cell contents will wrap, with the relative cell widths determined -by the number of dashes in the line separating the table header from -the table body. (For example `---|-` would make the first column 3/4 +and lists, and cannot span multiple lines. If any line of the +markdown source is longer than the column width (see `--columns`), +then the table will take up the full text width and the cell +contents will wrap, with the relative cell widths determined by +the number of dashes in the line separating the table header +from the table body. (For example `---|-` would make the first column 3/4 and the second column 1/4 of the full text width.) On the other hand, if no lines are wider than column width, then cell contents will not be wrapped, and the cells will be sized diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index e24c38d33..b72ab22e2 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1,4 +1,5 @@ {-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE TupleSections #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ViewPatterns #-} @@ -21,8 +22,8 @@ module Text.Pandoc.Readers.Markdown ( import Control.Monad import Control.Monad.Except (throwError) import Data.Char (isAlphaNum, isPunctuation, isSpace) +import Text.DocLayout (realLength) import Data.List (transpose, elemIndex, sortOn, foldl') -import Data.List.NonEmpty (NonEmpty(..)) import qualified Data.Map as M import Data.Maybe import qualified Data.Set as Set @@ -39,6 +40,7 @@ import Text.Pandoc.Class.PandocMonad (PandocMonad (..), report) import Text.Pandoc.Definition as Pandoc import Text.Pandoc.Emoji (emojiToInline) import Text.Pandoc.Error +import Safe.Foldable (maximumBounded) import Text.Pandoc.Logging import Text.Pandoc.Options import Text.Pandoc.Walk (walk) @@ -1351,26 +1353,30 @@ pipeTable = try $ do nonindentSpaces lookAhead nonspaceChar (heads,(aligns, seplengths)) <- (,) <$> pipeTableRow <*> pipeBreak - let heads' = take (length aligns) <$> heads + let cellContents = parseFromString' pipeTableCell . trim + let numcols = length aligns + let heads' = take numcols heads lines' <- many pipeTableRow - let lines'' = map (take (length aligns) <$>) lines' - let maxlength = maximum $ - fmap (\x -> T.length . stringify $ runF x def) (heads' :| lines'') - numColumns <- getOption readerColumns - let widths = if maxlength > numColumns + let lines'' = map (take numcols) lines' + let lineWidths = map (sum . map realLength) (heads' : lines'') + columns <- getOption readerColumns + -- add numcols + 1 for the pipes themselves + let widths = if maximumBounded (sum seplengths : lineWidths) + (numcols + 1) > columns then map (\len -> fromIntegral len / fromIntegral (sum seplengths)) seplengths else replicate (length aligns) 0.0 - return (aligns, widths, toHeaderRow <$> heads', map toRow <$> sequence lines'') + (headCells :: F [Blocks]) <- sequence <$> mapM cellContents heads' + (rows :: F [[Blocks]]) <- sequence <$> mapM (fmap sequence . mapM cellContents) lines'' + return (aligns, widths, toHeaderRow <$> headCells, map toRow <$> rows) sepPipe :: PandocMonad m => MarkdownParser m () sepPipe = try $ do char '|' <|> char '+' notFollowedBy blankline --- parse a row, also returning probable alignments for org-table cells -pipeTableRow :: PandocMonad m => MarkdownParser m (F [Blocks]) +-- parse a row, returning raw cell contents +pipeTableRow :: PandocMonad m => MarkdownParser m [Text] pipeTableRow = try $ do scanForPipe skipMany spaceChar @@ -1378,13 +1384,11 @@ pipeTableRow = try $ do -- split into cells let chunk = void (code <|> math <|> rawHtmlInline <|> escapedChar <|> rawLaTeXInline') <|> void (noneOf "|\n\r") - let cellContents = withRaw (many chunk) >>= - parseFromString' pipeTableCell . trim . snd - cells <- cellContents `sepEndBy1` char '|' + cells <- (snd <$> withRaw (many chunk)) `sepEndBy1` char '|' -- surrounding pipes needed for a one-column table: guard $ not (length cells == 1 && not openPipe) blankline - return $ sequence cells + return cells pipeTableCell :: PandocMonad m => MarkdownParser m (F Blocks) pipeTableCell = diff --git a/test/command/7713.md b/test/command/7713.md new file mode 100644 index 000000000..d42dcd9b8 --- /dev/null +++ b/test/command/7713.md @@ -0,0 +1,28 @@ +``` +% pandoc +| aaaaaaaaaaaa | bbbbb | ccccccccccc | +| --- | --- | --- | +| | | cccccccccc cccccccccc cccccccccc cccccccccc cccccccccc cccccccccc | +^D + +++++ + + + + + + + + + + + + + + +
aaaaaaaaaaaabbbbbccccccccccc
cccccccccc cccccccccc cccccccccc cccccccccc cccccccccc cccccccccc
+``` -- cgit v1.2.3