From d5e4bc179c0517a5da84ed95d55e1f10f1f16a94 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Tue, 3 May 2016 22:52:10 +0200 Subject: Org reader: stop padding short table rows Emacs Org-mode doesn't add any padding to table rows. The first row (header or first body row) is used to determine the column count, no other magic is performed. The org reader was padding rows to the length of the longest table row. This was done due to a misunderstanding of how Org handles tables. This feature reflected how Org-mode handles tables when pressing . The Org exporter however, which is what the reader should implement, doesn't do any of this. So this was a mis-feature that made the reader more complex and reduced comparability. It was hence removed. --- src/Text/Pandoc/Readers/Org.hs | 44 +++++++++++++++++++----------------------- 1 file changed, 20 insertions(+), 24 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 5e98be31d..0de64c663 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -774,9 +774,13 @@ data OrgTableRow = OrgContentRow (F [Blocks]) | OrgAlignRow [Alignment] | OrgHlineRow +-- OrgTable is strongly related to the pandoc table ADT. Using the same +-- (i.e. pandoc-global) ADT would mean that the reader would break if the +-- global structure was to be changed, which would be bad. The final table +-- should be generated using a builder function. Column widths aren't +-- implemented yet, so they are not tracked here. data OrgTable = OrgTable - { orgTableColumns :: Int - , orgTableAlignments :: [Alignment] + { orgTableAlignments :: [Alignment] , orgTableHeader :: [Blocks] , orgTableRows :: [[Blocks]] } @@ -792,7 +796,7 @@ table = try $ do orgToPandocTable :: OrgTable -> Inlines -> Blocks -orgToPandocTable (OrgTable _ aligns heads lns) caption = +orgToPandocTable (OrgTable aligns heads lns) caption = B.table caption (zip aligns $ repeat 0) heads lns tableStart :: OrgParser Char @@ -840,20 +844,18 @@ tableHline = try $ rowsToTable :: [OrgTableRow] -> F OrgTable -rowsToTable = foldM (flip rowToContent) zeroTable - where zeroTable = OrgTable 0 mempty mempty mempty - -normalizeTable :: OrgTable - -> OrgTable -normalizeTable (OrgTable cols aligns heads lns) = - let aligns' = fillColumns aligns AlignDefault - heads' = if heads == mempty - then mempty - else fillColumns heads (B.plain mempty) - lns' = map (`fillColumns` B.plain mempty) lns - fillColumns base padding = take cols $ base ++ repeat padding - in OrgTable cols aligns' heads' lns' +rowsToTable = foldM (flip rowToContent) emptyTable + where emptyTable = OrgTable mempty mempty mempty +normalizeTable :: OrgTable -> OrgTable +normalizeTable (OrgTable aligns heads rows) = OrgTable aligns' heads rows + where + refRow = if heads /= mempty + then heads + else if rows == mempty then mempty else head rows + cols = length refRow + fillColumns base padding = take cols $ base ++ repeat padding + aligns' = fillColumns aligns AlignDefault -- One or more horizontal rules after the first content line mark the previous -- line as a header. All other horizontal lines are discarded. @@ -861,16 +863,10 @@ rowToContent :: OrgTableRow -> OrgTable -> F OrgTable rowToContent OrgHlineRow t = maybeBodyToHeader t -rowToContent (OrgAlignRow as) t = setLongestRow as =<< setAligns as t +rowToContent (OrgAlignRow as) t = setAligns as t rowToContent (OrgContentRow rf) t = do rs <- rf - setLongestRow rs =<< appendToBody rs t - -setLongestRow :: [a] - -> OrgTable - -> F OrgTable -setLongestRow rs t = - return t{ orgTableColumns = max (length rs) (orgTableColumns t) } + appendToBody rs t maybeBodyToHeader :: OrgTable -> F OrgTable -- cgit v1.2.3 From a51e4e82156d8d6f0d3dc616c4f38c70b25be616 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Wed, 4 May 2016 00:03:48 +0200 Subject: Org reader: refactor rows-to-table conversion This refactores the codes conversing a list table lines to an org table ADT. The old code was simplified and is now slightly less ugly. --- src/Text/Pandoc/Readers/Org.hs | 50 +++++++++++++++++++++--------------------- 1 file changed, 25 insertions(+), 25 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 0de64c663..e41dd5dd8 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -844,7 +844,7 @@ tableHline = try $ rowsToTable :: [OrgTableRow] -> F OrgTable -rowsToTable = foldM (flip rowToContent) emptyTable +rowsToTable = foldM rowToContent emptyTable where emptyTable = OrgTable mempty mempty mempty normalizeTable :: OrgTable -> OrgTable @@ -859,31 +859,31 @@ normalizeTable (OrgTable aligns heads rows) = OrgTable aligns' heads rows -- One or more horizontal rules after the first content line mark the previous -- line as a header. All other horizontal lines are discarded. -rowToContent :: OrgTableRow - -> OrgTable +rowToContent :: OrgTable + -> OrgTableRow -> F OrgTable -rowToContent OrgHlineRow t = maybeBodyToHeader t -rowToContent (OrgAlignRow as) t = setAligns as t -rowToContent (OrgContentRow rf) t = do - rs <- rf - appendToBody rs t - -maybeBodyToHeader :: OrgTable - -> F OrgTable -maybeBodyToHeader t = case t of - OrgTable{ orgTableHeader = [], orgTableRows = b:[] } -> - return t{ orgTableHeader = b , orgTableRows = [] } - _ -> return t - -appendToBody :: [Blocks] - -> OrgTable - -> F OrgTable -appendToBody r t = return t{ orgTableRows = orgTableRows t ++ [r] } - -setAligns :: [Alignment] - -> OrgTable - -> F OrgTable -setAligns aligns t = return $ t{ orgTableAlignments = aligns } +rowToContent orgTable row = + case row of + OrgHlineRow -> return singleRowPromotedToHeader + OrgAlignRow as -> return . setAligns $ as + OrgContentRow cs -> appendToBody cs + where + singleRowPromotedToHeader :: OrgTable + singleRowPromotedToHeader = case orgTable of + OrgTable{ orgTableHeader = [], orgTableRows = b:[] } -> + orgTable{ orgTableHeader = b , orgTableRows = [] } + _ -> orgTable + + setAligns :: [Alignment] -> OrgTable + setAligns aligns = orgTable{ orgTableAlignments = aligns } + + appendToBody :: F [Blocks] -> F OrgTable + appendToBody frow = do + newRow <- frow + let oldRows = orgTableRows orgTable + -- NOTE: This is an inefficient O(n) operation. This should be changed + -- if performance ever becomes a problem. + return orgTable{ orgTableRows = oldRows ++ [newRow] } -- -- cgit v1.2.3 From 2d825603c684d6c7af6adb08f26ed34a078a5afe Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Wed, 4 May 2016 15:33:18 +0200 Subject: Org reader: fix handling of empty table cells, rows This fixes Org mode parsing of some corner cases regarding empty cells and rows. Empty cells weren't parsed correctly, e.g. `|||` should be two empty cells, but would be parsed as a single cell containing a pipe character. Empty rows where parsed as alignment rows and dropped from the output. This fixes #2616. --- src/Text/Pandoc/Readers/Org.hs | 30 +++++++++++++++++------------- tests/Tests/Readers/Org.hs | 13 ++++++++++++- 2 files changed, 29 insertions(+), 14 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index e41dd5dd8..493e94d2e 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -35,6 +35,7 @@ import Text.Pandoc.Builder ( Inlines, Blocks, HasMeta(..), trimInlines ) import Text.Pandoc.Definition import Text.Pandoc.Compat.Monoid ((<>)) +import Text.Pandoc.Error import Text.Pandoc.Options import qualified Text.Pandoc.Parsing as P import Text.Pandoc.Parsing hiding ( F, unF, askF, asksF, runF @@ -57,8 +58,6 @@ import qualified Data.Set as Set import Data.Maybe (fromMaybe, isJust) import Network.HTTP (urlEncode) -import Text.Pandoc.Error - -- | Parse org-mode string and return a Pandoc document. readOrg :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse (assuming @'\n'@ line endings) @@ -807,18 +806,19 @@ tableRows = try $ many (tableAlignRow <|> tableHline <|> tableContentRow) tableContentRow :: OrgParser OrgTableRow tableContentRow = try $ - OrgContentRow . sequence <$> (tableStart *> manyTill tableContentCell newline) + OrgContentRow . sequence <$> (tableStart *> many1Till tableContentCell newline) tableContentCell :: OrgParser (F Blocks) tableContentCell = try $ - fmap B.plain . trimInlinesF . mconcat <$> many1Till inline endOfCell - -endOfCell :: OrgParser Char -endOfCell = try $ char '|' <|> lookAhead newline + fmap B.plain . trimInlinesF . mconcat <$> manyTill inline endOfCell tableAlignRow :: OrgParser OrgTableRow -tableAlignRow = try $ - OrgAlignRow <$> (tableStart *> manyTill tableAlignCell newline) +tableAlignRow = try $ do + tableStart + cells <- many1Till tableAlignCell newline + -- Empty rows are regular (i.e. content) rows, not alignment rows. + guard $ any (/= AlignDefault) cells + return $ OrgAlignRow cells tableAlignCell :: OrgParser Alignment tableAlignCell = @@ -833,15 +833,19 @@ tableAlignCell = where emptyCell = try $ skipSpaces *> endOfCell tableAlignFromChar :: OrgParser Alignment -tableAlignFromChar = try $ choice [ char 'l' *> return AlignLeft - , char 'c' *> return AlignCenter - , char 'r' *> return AlignRight - ] +tableAlignFromChar = try $ + choice [ char 'l' *> return AlignLeft + , char 'c' *> return AlignCenter + , char 'r' *> return AlignRight + ] tableHline :: OrgParser OrgTableRow tableHline = try $ OrgHlineRow <$ (tableStart *> char '-' *> anyLine) +endOfCell :: OrgParser Char +endOfCell = try $ char '|' <|> lookAhead newline + rowsToTable :: [OrgTableRow] -> F OrgTable rowsToTable = foldM rowToContent emptyTable diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index 98658482c..81684d8ef 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -941,7 +941,7 @@ tests = , "Empty table" =: "||" =?> - simpleTable' 1 mempty mempty + simpleTable' 1 mempty [[mempty]] , "Glider Table" =: unlines [ "| 1 | 0 | 0 |" @@ -996,6 +996,17 @@ tests = , [ plain "dynamic", plain "Lisp" ] ] + , "Table with empty cells" =: + "|||c|" =?> + simpleTable' 3 mempty [[mempty, mempty, plain "c"]] + + , "Table with empty rows" =: + unlines [ "| first |" + , "| |" + , "| third |" + ] =?> + simpleTable' 1 mempty [[plain "first"], [mempty], [plain "third"]] + , "Table with alignment row" =: unlines [ "| Numbers | Text | More |" , "| | | |" -- cgit v1.2.3