aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2016-05-05 16:22:56 -0700
committerJohn MacFarlane <jgm@berkeley.edu>2016-05-05 16:22:56 -0700
commit21d1a3b57cc37cc8c13eaf24faf0743259afdb9a (patch)
tree0752595991b099bae35357b53ea76f44256a840f
parentf7a5c17a63d48b26e3c676170e8673f3011cf452 (diff)
parent2d825603c684d6c7af6adb08f26ed34a078a5afe (diff)
downloadpandoc-21d1a3b57cc37cc8c13eaf24faf0743259afdb9a.tar.gz
Merge pull request #2898 from tarleb/org-table-refactoring
Org reader: table parsing code refactoring and fixes
-rw-r--r--src/Text/Pandoc/Readers/Org.hs118
-rw-r--r--tests/Tests/Readers/Org.hs21
2 files changed, 75 insertions, 64 deletions
diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index a7987475a..db1e70ea0 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -35,6 +35,7 @@ import Text.Pandoc.Builder ( Inlines, Blocks, HasMeta(..),
trimInlines )
import Text.Pandoc.Definition
import Text.Pandoc.Compat.Monoid ((<>))
+import Text.Pandoc.Error
import Text.Pandoc.Options
import qualified Text.Pandoc.Parsing as P
import Text.Pandoc.Parsing hiding ( F, unF, askF, asksF, runF
@@ -57,8 +58,6 @@ import qualified Data.Set as Set
import Data.Maybe (fromMaybe, isJust)
import Network.HTTP (urlEncode)
-import Text.Pandoc.Error
-
-- | Parse org-mode string and return a Pandoc document.
readOrg :: ReaderOptions -- ^ Reader options
-> String -- ^ String to parse (assuming @'\n'@ line endings)
@@ -774,9 +773,13 @@ data OrgTableRow = OrgContentRow (F [Blocks])
| OrgAlignRow [Alignment]
| OrgHlineRow
+-- OrgTable is strongly related to the pandoc table ADT. Using the same
+-- (i.e. pandoc-global) ADT would mean that the reader would break if the
+-- global structure was to be changed, which would be bad. The final table
+-- should be generated using a builder function. Column widths aren't
+-- implemented yet, so they are not tracked here.
data OrgTable = OrgTable
- { orgTableColumns :: Int
- , orgTableAlignments :: [Alignment]
+ { orgTableAlignments :: [Alignment]
, orgTableHeader :: [Blocks]
, orgTableRows :: [[Blocks]]
}
@@ -792,7 +795,7 @@ table = try $ do
orgToPandocTable :: OrgTable
-> Inlines
-> Blocks
-orgToPandocTable (OrgTable _ aligns heads lns) caption =
+orgToPandocTable (OrgTable aligns heads lns) caption =
B.table caption (zip aligns $ repeat 0) heads lns
tableStart :: OrgParser Char
@@ -803,18 +806,19 @@ tableRows = try $ many (tableAlignRow <|> tableHline <|> tableContentRow)
tableContentRow :: OrgParser OrgTableRow
tableContentRow = try $
- OrgContentRow . sequence <$> (tableStart *> manyTill tableContentCell newline)
+ OrgContentRow . sequence <$> (tableStart *> many1Till tableContentCell newline)
tableContentCell :: OrgParser (F Blocks)
tableContentCell = try $
- fmap B.plain . trimInlinesF . mconcat <$> many1Till inline endOfCell
-
-endOfCell :: OrgParser Char
-endOfCell = try $ char '|' <|> lookAhead newline
+ fmap B.plain . trimInlinesF . mconcat <$> manyTill inline endOfCell
tableAlignRow :: OrgParser OrgTableRow
-tableAlignRow = try $
- OrgAlignRow <$> (tableStart *> manyTill tableAlignCell newline)
+tableAlignRow = try $ do
+ tableStart
+ cells <- many1Till tableAlignCell newline
+ -- Empty rows are regular (i.e. content) rows, not alignment rows.
+ guard $ any (/= AlignDefault) cells
+ return $ OrgAlignRow cells
tableAlignCell :: OrgParser Alignment
tableAlignCell =
@@ -829,65 +833,61 @@ tableAlignCell =
where emptyCell = try $ skipSpaces *> endOfCell
tableAlignFromChar :: OrgParser Alignment
-tableAlignFromChar = try $ choice [ char 'l' *> return AlignLeft
- , char 'c' *> return AlignCenter
- , char 'r' *> return AlignRight
- ]
+tableAlignFromChar = try $
+ choice [ char 'l' *> return AlignLeft
+ , char 'c' *> return AlignCenter
+ , char 'r' *> return AlignRight
+ ]
tableHline :: OrgParser OrgTableRow
tableHline = try $
OrgHlineRow <$ (tableStart *> char '-' *> anyLine)
+endOfCell :: OrgParser Char
+endOfCell = try $ char '|' <|> lookAhead newline
+
rowsToTable :: [OrgTableRow]
-> F OrgTable
-rowsToTable = foldM (flip rowToContent) zeroTable
- where zeroTable = OrgTable 0 mempty mempty mempty
-
-normalizeTable :: OrgTable
- -> OrgTable
-normalizeTable (OrgTable cols aligns heads lns) =
- let aligns' = fillColumns aligns AlignDefault
- heads' = if heads == mempty
- then mempty
- else fillColumns heads (B.plain mempty)
- lns' = map (`fillColumns` B.plain mempty) lns
- fillColumns base padding = take cols $ base ++ repeat padding
- in OrgTable cols aligns' heads' lns'
+rowsToTable = foldM rowToContent emptyTable
+ where emptyTable = OrgTable mempty mempty mempty
+normalizeTable :: OrgTable -> OrgTable
+normalizeTable (OrgTable aligns heads rows) = OrgTable aligns' heads rows
+ where
+ refRow = if heads /= mempty
+ then heads
+ else if rows == mempty then mempty else head rows
+ cols = length refRow
+ fillColumns base padding = take cols $ base ++ repeat padding
+ aligns' = fillColumns aligns AlignDefault
-- One or more horizontal rules after the first content line mark the previous
-- line as a header. All other horizontal lines are discarded.
-rowToContent :: OrgTableRow
- -> OrgTable
+rowToContent :: OrgTable
+ -> OrgTableRow
-> F OrgTable
-rowToContent OrgHlineRow t = maybeBodyToHeader t
-rowToContent (OrgAlignRow as) t = setLongestRow as =<< setAligns as t
-rowToContent (OrgContentRow rf) t = do
- rs <- rf
- setLongestRow rs =<< appendToBody rs t
-
-setLongestRow :: [a]
- -> OrgTable
- -> F OrgTable
-setLongestRow rs t =
- return t{ orgTableColumns = max (length rs) (orgTableColumns t) }
-
-maybeBodyToHeader :: OrgTable
- -> F OrgTable
-maybeBodyToHeader t = case t of
- OrgTable{ orgTableHeader = [], orgTableRows = b:[] } ->
- return t{ orgTableHeader = b , orgTableRows = [] }
- _ -> return t
-
-appendToBody :: [Blocks]
- -> OrgTable
- -> F OrgTable
-appendToBody r t = return t{ orgTableRows = orgTableRows t ++ [r] }
-
-setAligns :: [Alignment]
- -> OrgTable
- -> F OrgTable
-setAligns aligns t = return $ t{ orgTableAlignments = aligns }
+rowToContent orgTable row =
+ case row of
+ OrgHlineRow -> return singleRowPromotedToHeader
+ OrgAlignRow as -> return . setAligns $ as
+ OrgContentRow cs -> appendToBody cs
+ where
+ singleRowPromotedToHeader :: OrgTable
+ singleRowPromotedToHeader = case orgTable of
+ OrgTable{ orgTableHeader = [], orgTableRows = b:[] } ->
+ orgTable{ orgTableHeader = b , orgTableRows = [] }
+ _ -> orgTable
+
+ setAligns :: [Alignment] -> OrgTable
+ setAligns aligns = orgTable{ orgTableAlignments = aligns }
+
+ appendToBody :: F [Blocks] -> F OrgTable
+ appendToBody frow = do
+ newRow <- frow
+ let oldRows = orgTableRows orgTable
+ -- NOTE: This is an inefficient O(n) operation. This should be changed
+ -- if performance ever becomes a problem.
+ return orgTable{ orgTableRows = oldRows ++ [newRow] }
--
diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs
index 6112055ba..3fab92e53 100644
--- a/tests/Tests/Readers/Org.hs
+++ b/tests/Tests/Readers/Org.hs
@@ -945,7 +945,7 @@ tests =
, "Empty table" =:
"||" =?>
- simpleTable' 1 mempty mempty
+ simpleTable' 1 mempty [[mempty]]
, "Glider Table" =:
unlines [ "| 1 | 0 | 0 |"
@@ -1000,6 +1000,17 @@ tests =
, [ plain "dynamic", plain "Lisp" ]
]
+ , "Table with empty cells" =:
+ "|||c|" =?>
+ simpleTable' 3 mempty [[mempty, mempty, plain "c"]]
+
+ , "Table with empty rows" =:
+ unlines [ "| first |"
+ , "| |"
+ , "| third |"
+ ] =?>
+ simpleTable' 1 mempty [[plain "first"], [mempty], [plain "third"]]
+
, "Table with alignment row" =:
unlines [ "| Numbers | Text | More |"
, "| <c> | <r> | |"
@@ -1028,10 +1039,10 @@ tests =
, "| 1 | One | foo |"
, "| 2"
] =?>
- table "" (zip [AlignCenter, AlignRight, AlignDefault] [0, 0, 0])
- [ plain "Numbers", plain "Text" , plain mempty ]
- [ [ plain "1" , plain "One" , plain "foo" ]
- , [ plain "2" , plain mempty , plain mempty ]
+ table "" (zip [AlignCenter, AlignRight] [0, 0])
+ [ plain "Numbers", plain "Text" ]
+ [ [ plain "1" , plain "One" , plain "foo" ]
+ , [ plain "2" ]
]
, "Table with caption" =: