aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorfiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>2010-02-20 08:30:34 +0000
committerfiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>2010-02-20 08:30:34 +0000
commitc6b34574bfbe0bea9fc940b680ddede1689f0de6 (patch)
treee84b634420311bdd200e0c177939554a8a8f269e
parente8a753edffd37763f11a718f94c8fc3b1066830a (diff)
downloadpandoc-c6b34574bfbe0bea9fc940b680ddede1689f0de6.tar.gz
Incomplete support for RST tables (simple and grid).
Thanks to Eric Kow. Note TODO for future improvement in RST reader code comments. git-svn-id: https://pandoc.googlecode.com/svn/trunk@1840 788f1e2b-df1e-0410-8736-df70ead52e1b
-rw-r--r--src/Text/Pandoc/Readers/RST.hs195
-rw-r--r--tests/RunTests.hs4
-rw-r--r--tests/rst-reader.native46
-rw-r--r--tests/rst-reader.rst38
4 files changed, 279 insertions, 4 deletions
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index 2f9282584..dacf51de9 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -34,7 +34,7 @@ import Text.Pandoc.Definition
import Text.Pandoc.Shared
import Text.ParserCombinators.Parsec
import Control.Monad ( when, unless )
-import Data.List ( findIndex, delete, intercalate )
+import Data.List ( findIndex, delete, intercalate, transpose )
-- | Parse reStructuredText string and return Pandoc document.
readRST :: ParserState -- ^ Parser state, including options for parser
@@ -127,6 +127,7 @@ block = choice [ codeBlock
, header
, hrule
, lineBlock -- must go before definitionList
+ , table
, list
, lhsCodeBlock
, para
@@ -580,6 +581,197 @@ regularKey = try $ do
src <- targetURI
return (normalizeSpaces ref, (removeLeadingTrailingSpace src, ""))
+--
+-- tables
+--
+
+-- General tables TODO:
+-- - figure out if leading spaces are acceptable and if so, add
+-- support for them
+--
+-- Simple tables TODO:
+-- - column spans
+-- - multiline support
+-- - ensure that rightmost column span does not need to reach end
+-- - require at least 2 columns
+--
+-- Grid tables TODO:
+-- - column spans
+
+dashedLine :: Char -> Char
+ -> GenParser Char st (Int, Int)
+dashedLine ch sch = do
+ dashes <- many1 (char ch)
+ sp <- many (char sch)
+ return (length dashes, length $ dashes ++ sp)
+
+simpleDashedLines :: Char -> GenParser Char st [(Int,Int)]
+simpleDashedLines ch = try $ many1 (dashedLine ch sch)
+ where
+ sch = ' '
+
+gridDashedLines :: Char -> GenParser Char st [(Int,Int)]
+gridDashedLines ch = try $ char sch >> many1 (dashedLine ch sch)
+ where
+ sch = '+'
+
+-- Parse a table row separator
+simpleTableSep :: Char -> GenParser Char ParserState Char
+simpleTableSep ch = try $ simpleDashedLines ch >> newline
+
+gridTableSep :: Char -> GenParser Char ParserState Char
+gridTableSep ch = try $ gridDashedLines ch >> newline
+
+-- Parse a table footer
+simpleTableFooter :: GenParser Char ParserState [Char]
+simpleTableFooter = try $ simpleTableSep '=' >> blanklines
+
+gridTableFooter :: GenParser Char ParserState [Char]
+gridTableFooter = blanklines
+
+-- Parse a raw line and split it into chunks by indices.
+simpleTableRawLine :: [Int] -> GenParser Char ParserState [String]
+simpleTableRawLine indices = do
+ line <- many1Till anyChar newline
+ return (simpleTableSplitLine indices line)
+
+gridTableRawLine :: [Int] -> GenParser Char ParserState [String]
+gridTableRawLine indices = do
+ char '|'
+ line <- many1Till anyChar newline
+ return (gridTableSplitLine indices line)
+
+-- Parse a table row and return a list of blocks (columns).
+simpleTableRow :: [Int] -> GenParser Char ParserState [[Block]]
+simpleTableRow indices = do
+ notFollowedBy' simpleTableFooter
+ firstLine <- simpleTableRawLine indices
+ colLines <- return [] -- TODO
+ let cols = map unlines . transpose $ firstLine : colLines
+ mapM (parseFromString (many plain)) cols
+
+gridTableRow :: [Int]
+ -> GenParser Char ParserState [[Block]]
+gridTableRow indices = do
+ colLines <- many1 (gridTableRawLine indices)
+ let cols = map unlines $ transpose colLines
+ mapM (parseFromString (many plain)) cols
+
+simpleTableSplitLine :: [Int] -> String -> [String]
+simpleTableSplitLine indices line =
+ map removeLeadingTrailingSpace
+ $ tail $ splitByIndices (init indices) line
+
+gridTableSplitLine :: [Int] -> String -> [String]
+gridTableSplitLine indices line =
+ map removeLeadingTrailingSpace
+ $ map (takeWhile (/= '|')) -- strip trailing '|' off each column
+ $ tail $ splitByIndices (init indices) line
+
+-- Calculate relative widths of table columns, based on indices
+widthsFromIndices :: Int -- Number of columns on terminal
+ -> [Int] -- Indices
+ -> [Double] -- Fractional relative sizes of columns
+widthsFromIndices _ [] = []
+widthsFromIndices numColumns indices =
+ let lengths' = zipWith (-) indices (0:indices)
+ lengths = reverse $
+ case reverse lengths' of
+ [] -> []
+ [x] -> [x]
+ -- compensate for the fact that intercolumn
+ -- spaces are counted in widths of all columns
+ -- but the last...
+ (x:y:zs) -> if x < y && y - x <= 2
+ then y:y:zs
+ else x:y:zs
+ totLength = sum lengths
+ quotient = if totLength > numColumns
+ then fromIntegral totLength
+ else fromIntegral numColumns
+ fracs = map (\l -> (fromIntegral l) / quotient) lengths in
+ tail fracs
+
+simpleTableHeader :: Bool -- ^ Headerless table
+ -> GenParser Char ParserState ([[Char]], [Alignment], [Int])
+simpleTableHeader headless = try $ do
+ optional blanklines
+ rawContent <- if headless
+ then return ""
+ else simpleTableSep '=' >> anyLine
+ dashes <- simpleDashedLines '='
+ newline
+ let lines' = map snd dashes
+ let indices = scanl (+) 0 lines'
+ let aligns = replicate (length lines') AlignDefault
+ let rawHeads = if headless
+ then replicate (length dashes) ""
+ else simpleTableSplitLine indices rawContent
+ return (rawHeads, aligns, indices)
+
+gridTableHeader :: Bool -- ^ Headerless table
+ -> GenParser Char ParserState ([String], [Alignment], [Int])
+gridTableHeader headless = try $ do
+ optional blanklines
+ dashes <- gridDashedLines '-'
+ newline
+ rawContent <- if headless
+ then return $ repeat ""
+ else many1
+ (notFollowedBy (gridTableSep '=') >> char '|' >> many1Till anyChar newline)
+ if headless
+ then return ()
+ else gridTableSep '=' >> return ()
+ let lines' = map snd dashes
+ let indices = scanl (+) 0 lines'
+ let aligns = replicate (length lines') AlignDefault -- RST does not have a notion of alignments
+ let rawHeads = if headless
+ then replicate (length dashes) ""
+ else map (intercalate " ") $ transpose
+ $ map (gridTableSplitLine indices) rawContent
+ return (rawHeads, aligns, indices)
+
+-- Parse a table using 'headerParser', 'lineParser', and 'footerParser'.
+tableWith :: GenParser Char ParserState ([[Char]], [Alignment], [Int])
+ -> ([Int] -> GenParser Char ParserState [[Block]])
+ -> GenParser Char ParserState sep
+ -> GenParser Char ParserState end
+ -> GenParser Char ParserState Block
+tableWith headerParser rowParser lineParser footerParser = try $ do
+ (rawHeads, aligns, indices) <- headerParser
+ lines' <- rowParser indices `sepEndBy` lineParser
+ footerParser
+ heads <- mapM (parseFromString (many plain)) rawHeads
+ state <- getState
+ let captions = [] -- no notion of captions in RST
+ let numColumns = stateColumns state
+ let widths = widthsFromIndices numColumns indices
+ return $ Table captions aligns widths heads lines'
+
+-- Parse a simple table with '---' header and one line per row.
+simpleTable :: Bool -- ^ Headerless table
+ -> GenParser Char ParserState Block
+simpleTable headless = do
+ Table c a _w h l <- tableWith (simpleTableHeader headless) simpleTableRow sep simpleTableFooter
+ -- Simple tables get 0s for relative column widths (i.e., use default)
+ return $ Table c a (replicate (length a) 0) h l
+ where
+ sep = return () -- optional (simpleTableSep '-')
+
+-- Parse a grid table: starts with row of '-' on top, then header
+-- (which may be grid), then the rows,
+-- which may be grid, separated by blank lines, and
+-- ending with a footer (dashed line followed by blank line).
+gridTable :: Bool -- ^ Headerless table
+ -> GenParser Char ParserState Block
+gridTable headless =
+ tableWith (gridTableHeader headless) gridTableRow (gridTableSep '-') gridTableFooter
+
+table :: GenParser Char ParserState Block
+table = gridTable False <|> simpleTable False <|>
+ gridTable True <|> simpleTable True <?> "table"
+
+
--
-- inline
--
@@ -719,4 +911,3 @@ image = try $ do
Nothing -> fail "no corresponding key"
Just target -> return target
return $ Image (normalizeSpaces ref) src
-
diff --git a/tests/RunTests.hs b/tests/RunTests.hs
index 565ce8d7f..635c0b8b0 100644
--- a/tests/RunTests.hs
+++ b/tests/RunTests.hs
@@ -93,6 +93,8 @@ main = do
"markdown-reader-more.txt" "markdown-reader-more.native"
r8 <- runTest "rst reader" ["-r", "rst", "-w", "native", "-s", "-S"]
"rst-reader.rst" "rst-reader.native"
+ r8a <- runTest "rst reader (tables)" ["-r", "rst", "-w", "native"]
+ "tables.rst" "tables-rstsubset.native"
r9 <- runTest "html reader" ["-r", "html", "-w", "native", "-s"]
"html-reader.html" "html-reader.native"
r10 <- runTest "latex reader" ["-r", "latex", "-w", "native", "-s", "-R"]
@@ -105,7 +107,7 @@ main = do
r13s <- if runLhsTests
then mapM runLhsReaderTest lhsReaderFormats
else putStrLn "Skipping lhs reader tests because they presuppose highlighting support" >> return []
- let results = r1s ++ [r2, r3, r4, r5, r6, r7, r7a, r8, r9, r10, r11] ++ r12s ++ r13s
+ let results = r1s ++ [r2, r3, r4, r5, r6, r7, r7a, r8, r8a, r9, r10, r11] ++ r12s ++ r13s
if all id results
then do
putStrLn "\nAll tests passed."
diff --git a/tests/rst-reader.native b/tests/rst-reader.native
index d5b4ba77b..f3c1e5bd2 100644
--- a/tests/rst-reader.native
+++ b/tests/rst-reader.native
@@ -243,5 +243,49 @@ Pandoc (Meta [Str "Pandoc",Space,Str "Test",Space,Str "Suite",Str ":",Space,Str
, Para [Str "A",Space,Str "third",Space,Str "paragraph"]
, Header 1 [Str "Line",Space,Str "blocks"]
, Para [Str "But",Space,Str "can",Space,Str "a",Space,Str "bee",Space,Str "be",Space,Str "said",Space,Str "to",Space,Str "be",LineBreak,Str " ",Str "or",Space,Str "not",Space,Str "to",Space,Str "be",Space,Str "an",Space,Str "entire",Space,Str "bee,",LineBreak,Str " ",Str "when",Space,Str "half",Space,Str "the",Space,Str "bee",Space,Str "is",Space,Str "not",Space,Str "a",Space,Str "bee,",LineBreak,Str " ",Str "due",Space,Str "to",Space,Str "some",Space,Str "ancient",Space,Str "injury?"]
-, Para [Str "Continuation",Space,Str "line",LineBreak,Str " ",Str "and",Space,Str "another"] ]
+, Para [Str "Continuation",Space,Str "line",LineBreak,Str " ",Str "and",Space,Str "another"]
+, Header 1 [Str "Simple",Space,Str "Tables"]
+, Table [] [AlignDefault,AlignDefault,AlignDefault] [0.0,0.0,0.0]
+ [ [ Plain [Str "col",Space,Str "1"] ]
+ , [ Plain [Str "col",Space,Str "2"] ]
+ , [ Plain [Str "col",Space,Str "3"] ] ] [
+ [ [ Plain [Str "r1",Space,Str "a"] ]
+ , [ Plain [Str "b"] ]
+ , [ Plain [Str "c"] ] ],
+ [ [ Plain [Str "r2",Space,Str "d"] ]
+ , [ Plain [Str "e"] ]
+ , [ Plain [Str "f"] ] ] ]
+, Para [Str "Headless"]
+, Table [] [AlignDefault,AlignDefault,AlignDefault] [0.0,0.0,0.0]
+ [ []
+ , []
+ , [] ] [
+ [ [ Plain [Str "r1",Space,Str "a"] ]
+ , [ Plain [Str "b"] ]
+ , [ Plain [Str "c"] ] ],
+ [ [ Plain [Str "r2",Space,Str "d"] ]
+ , [ Plain [Str "e"] ]
+ , [ Plain [Str "f"] ] ] ]
+, Header 1 [Str "Grid",Space,Str "Tables"]
+, Table [] [AlignDefault,AlignDefault,AlignDefault] [0.2375,0.15,0.1625]
+ [ [ Plain [Str "col",Space,Str "1"] ]
+ , [ Plain [Str "col",Space,Str "2"] ]
+ , [ Plain [Str "col",Space,Str "3"] ] ] [
+ [ [ Plain [Str "r1",Space,Str "a",Space,Str "r1",Space,Str "bis"] ]
+ , [ Plain [Str "b",Space,Str "b",Space,Str "2"] ]
+ , [ Plain [Str "c",Space,Str "c",Space,Str "2"] ] ],
+ [ [ Plain [Str "r2",Space,Str "d"] ]
+ , [ Plain [Str "e"] ]
+ , [ Plain [Str "f"] ] ] ]
+, Para [Str "Headless"]
+, Table [] [AlignDefault,AlignDefault,AlignDefault] [0.2375,0.15,0.1625]
+ [ []
+ , []
+ , [] ] [
+ [ [ Plain [Str "r1",Space,Str "a",Space,Str "r1",Space,Str "bis"] ]
+ , [ Plain [Str "b",Space,Str "b",Space,Str "2"] ]
+ , [ Plain [Str "c",Space,Str "c",Space,Str "2"] ] ],
+ [ [ Plain [Str "r2",Space,Str "d"] ]
+ , [ Plain [Str "e"] ]
+ , [ Plain [Str "f"] ] ] ] ]
diff --git a/tests/rst-reader.rst b/tests/rst-reader.rst
index 676698430..ccc248aee 100644
--- a/tests/rst-reader.rst
+++ b/tests/rst-reader.rst
@@ -453,3 +453,41 @@ Line blocks
| and
another
+Simple Tables
+=============
+
+================== =========== ==========
+col 1 col 2 col 3
+================== =========== ==========
+r1 a b c
+r2 d e f
+================== =========== ==========
+
+Headless
+
+================== =========== ==========
+r1 a b c
+r2 d e f
+================== =========== ==========
+
+
+Grid Tables
+===========
+
++------------------+-----------+------------+
+| col 1 | col 2 | col 3 |
++==================+===========+============+
+| r1 a | b | c |
+| r1 bis | b 2 | c 2 |
++------------------+-----------+------------+
+| r2 d | e | f |
++------------------+-----------+------------+
+
+Headless
+
++------------------+-----------+------------+
+| r1 a | b | c |
+| r1 bis | b 2 | c 2 |
++------------------+-----------+------------+
+| r2 d | e | f |
++------------------+-----------+------------+