From c6b34574bfbe0bea9fc940b680ddede1689f0de6 Mon Sep 17 00:00:00 2001 From: fiddlosopher Date: Sat, 20 Feb 2010 08:30:34 +0000 Subject: Incomplete support for RST tables (simple and grid). Thanks to Eric Kow. Note TODO for future improvement in RST reader code comments. git-svn-id: https://pandoc.googlecode.com/svn/trunk@1840 788f1e2b-df1e-0410-8736-df70ead52e1b --- src/Text/Pandoc/Readers/RST.hs | 195 ++++++++++++++++++++++++++++++++++++++++- tests/RunTests.hs | 4 +- tests/rst-reader.native | 46 +++++++++- tests/rst-reader.rst | 38 ++++++++ 4 files changed, 279 insertions(+), 4 deletions(-) diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index 2f9282584..dacf51de9 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -34,7 +34,7 @@ import Text.Pandoc.Definition import Text.Pandoc.Shared import Text.ParserCombinators.Parsec import Control.Monad ( when, unless ) -import Data.List ( findIndex, delete, intercalate ) +import Data.List ( findIndex, delete, intercalate, transpose ) -- | Parse reStructuredText string and return Pandoc document. readRST :: ParserState -- ^ Parser state, including options for parser @@ -127,6 +127,7 @@ block = choice [ codeBlock , header , hrule , lineBlock -- must go before definitionList + , table , list , lhsCodeBlock , para @@ -580,6 +581,197 @@ regularKey = try $ do src <- targetURI return (normalizeSpaces ref, (removeLeadingTrailingSpace src, "")) +-- +-- tables +-- + +-- General tables TODO: +-- - figure out if leading spaces are acceptable and if so, add +-- support for them +-- +-- Simple tables TODO: +-- - column spans +-- - multiline support +-- - ensure that rightmost column span does not need to reach end +-- - require at least 2 columns +-- +-- Grid tables TODO: +-- - column spans + +dashedLine :: Char -> Char + -> GenParser Char st (Int, Int) +dashedLine ch sch = do + dashes <- many1 (char ch) + sp <- many (char sch) + return (length dashes, length $ dashes ++ sp) + +simpleDashedLines :: Char -> GenParser Char st [(Int,Int)] +simpleDashedLines ch = try $ many1 (dashedLine ch sch) + where + sch = ' ' + +gridDashedLines :: Char -> GenParser Char st [(Int,Int)] +gridDashedLines ch = try $ char sch >> many1 (dashedLine ch sch) + where + sch = '+' + +-- Parse a table row separator +simpleTableSep :: Char -> GenParser Char ParserState Char +simpleTableSep ch = try $ simpleDashedLines ch >> newline + +gridTableSep :: Char -> GenParser Char ParserState Char +gridTableSep ch = try $ gridDashedLines ch >> newline + +-- Parse a table footer +simpleTableFooter :: GenParser Char ParserState [Char] +simpleTableFooter = try $ simpleTableSep '=' >> blanklines + +gridTableFooter :: GenParser Char ParserState [Char] +gridTableFooter = blanklines + +-- Parse a raw line and split it into chunks by indices. +simpleTableRawLine :: [Int] -> GenParser Char ParserState [String] +simpleTableRawLine indices = do + line <- many1Till anyChar newline + return (simpleTableSplitLine indices line) + +gridTableRawLine :: [Int] -> GenParser Char ParserState [String] +gridTableRawLine indices = do + char '|' + line <- many1Till anyChar newline + return (gridTableSplitLine indices line) + +-- Parse a table row and return a list of blocks (columns). +simpleTableRow :: [Int] -> GenParser Char ParserState [[Block]] +simpleTableRow indices = do + notFollowedBy' simpleTableFooter + firstLine <- simpleTableRawLine indices + colLines <- return [] -- TODO + let cols = map unlines . transpose $ firstLine : colLines + mapM (parseFromString (many plain)) cols + +gridTableRow :: [Int] + -> GenParser Char ParserState [[Block]] +gridTableRow indices = do + colLines <- many1 (gridTableRawLine indices) + let cols = map unlines $ transpose colLines + mapM (parseFromString (many plain)) cols + +simpleTableSplitLine :: [Int] -> String -> [String] +simpleTableSplitLine indices line = + map removeLeadingTrailingSpace + $ tail $ splitByIndices (init indices) line + +gridTableSplitLine :: [Int] -> String -> [String] +gridTableSplitLine indices line = + map removeLeadingTrailingSpace + $ map (takeWhile (/= '|')) -- strip trailing '|' off each column + $ tail $ splitByIndices (init indices) line + +-- Calculate relative widths of table columns, based on indices +widthsFromIndices :: Int -- Number of columns on terminal + -> [Int] -- Indices + -> [Double] -- Fractional relative sizes of columns +widthsFromIndices _ [] = [] +widthsFromIndices numColumns indices = + let lengths' = zipWith (-) indices (0:indices) + lengths = reverse $ + case reverse lengths' of + [] -> [] + [x] -> [x] + -- compensate for the fact that intercolumn + -- spaces are counted in widths of all columns + -- but the last... + (x:y:zs) -> if x < y && y - x <= 2 + then y:y:zs + else x:y:zs + totLength = sum lengths + quotient = if totLength > numColumns + then fromIntegral totLength + else fromIntegral numColumns + fracs = map (\l -> (fromIntegral l) / quotient) lengths in + tail fracs + +simpleTableHeader :: Bool -- ^ Headerless table + -> GenParser Char ParserState ([[Char]], [Alignment], [Int]) +simpleTableHeader headless = try $ do + optional blanklines + rawContent <- if headless + then return "" + else simpleTableSep '=' >> anyLine + dashes <- simpleDashedLines '=' + newline + let lines' = map snd dashes + let indices = scanl (+) 0 lines' + let aligns = replicate (length lines') AlignDefault + let rawHeads = if headless + then replicate (length dashes) "" + else simpleTableSplitLine indices rawContent + return (rawHeads, aligns, indices) + +gridTableHeader :: Bool -- ^ Headerless table + -> GenParser Char ParserState ([String], [Alignment], [Int]) +gridTableHeader headless = try $ do + optional blanklines + dashes <- gridDashedLines '-' + newline + rawContent <- if headless + then return $ repeat "" + else many1 + (notFollowedBy (gridTableSep '=') >> char '|' >> many1Till anyChar newline) + if headless + then return () + else gridTableSep '=' >> return () + let lines' = map snd dashes + let indices = scanl (+) 0 lines' + let aligns = replicate (length lines') AlignDefault -- RST does not have a notion of alignments + let rawHeads = if headless + then replicate (length dashes) "" + else map (intercalate " ") $ transpose + $ map (gridTableSplitLine indices) rawContent + return (rawHeads, aligns, indices) + +-- Parse a table using 'headerParser', 'lineParser', and 'footerParser'. +tableWith :: GenParser Char ParserState ([[Char]], [Alignment], [Int]) + -> ([Int] -> GenParser Char ParserState [[Block]]) + -> GenParser Char ParserState sep + -> GenParser Char ParserState end + -> GenParser Char ParserState Block +tableWith headerParser rowParser lineParser footerParser = try $ do + (rawHeads, aligns, indices) <- headerParser + lines' <- rowParser indices `sepEndBy` lineParser + footerParser + heads <- mapM (parseFromString (many plain)) rawHeads + state <- getState + let captions = [] -- no notion of captions in RST + let numColumns = stateColumns state + let widths = widthsFromIndices numColumns indices + return $ Table captions aligns widths heads lines' + +-- Parse a simple table with '---' header and one line per row. +simpleTable :: Bool -- ^ Headerless table + -> GenParser Char ParserState Block +simpleTable headless = do + Table c a _w h l <- tableWith (simpleTableHeader headless) simpleTableRow sep simpleTableFooter + -- Simple tables get 0s for relative column widths (i.e., use default) + return $ Table c a (replicate (length a) 0) h l + where + sep = return () -- optional (simpleTableSep '-') + +-- Parse a grid table: starts with row of '-' on top, then header +-- (which may be grid), then the rows, +-- which may be grid, separated by blank lines, and +-- ending with a footer (dashed line followed by blank line). +gridTable :: Bool -- ^ Headerless table + -> GenParser Char ParserState Block +gridTable headless = + tableWith (gridTableHeader headless) gridTableRow (gridTableSep '-') gridTableFooter + +table :: GenParser Char ParserState Block +table = gridTable False <|> simpleTable False <|> + gridTable True <|> simpleTable True "table" + + -- -- inline -- @@ -719,4 +911,3 @@ image = try $ do Nothing -> fail "no corresponding key" Just target -> return target return $ Image (normalizeSpaces ref) src - diff --git a/tests/RunTests.hs b/tests/RunTests.hs index 565ce8d7f..635c0b8b0 100644 --- a/tests/RunTests.hs +++ b/tests/RunTests.hs @@ -93,6 +93,8 @@ main = do "markdown-reader-more.txt" "markdown-reader-more.native" r8 <- runTest "rst reader" ["-r", "rst", "-w", "native", "-s", "-S"] "rst-reader.rst" "rst-reader.native" + r8a <- runTest "rst reader (tables)" ["-r", "rst", "-w", "native"] + "tables.rst" "tables-rstsubset.native" r9 <- runTest "html reader" ["-r", "html", "-w", "native", "-s"] "html-reader.html" "html-reader.native" r10 <- runTest "latex reader" ["-r", "latex", "-w", "native", "-s", "-R"] @@ -105,7 +107,7 @@ main = do r13s <- if runLhsTests then mapM runLhsReaderTest lhsReaderFormats else putStrLn "Skipping lhs reader tests because they presuppose highlighting support" >> return [] - let results = r1s ++ [r2, r3, r4, r5, r6, r7, r7a, r8, r9, r10, r11] ++ r12s ++ r13s + let results = r1s ++ [r2, r3, r4, r5, r6, r7, r7a, r8, r8a, r9, r10, r11] ++ r12s ++ r13s if all id results then do putStrLn "\nAll tests passed." diff --git a/tests/rst-reader.native b/tests/rst-reader.native index d5b4ba77b..f3c1e5bd2 100644 --- a/tests/rst-reader.native +++ b/tests/rst-reader.native @@ -243,5 +243,49 @@ Pandoc (Meta [Str "Pandoc",Space,Str "Test",Space,Str "Suite",Str ":",Space,Str , Para [Str "A",Space,Str "third",Space,Str "paragraph"] , Header 1 [Str "Line",Space,Str "blocks"] , Para [Str "But",Space,Str "can",Space,Str "a",Space,Str "bee",Space,Str "be",Space,Str "said",Space,Str "to",Space,Str "be",LineBreak,Str " ",Str "or",Space,Str "not",Space,Str "to",Space,Str "be",Space,Str "an",Space,Str "entire",Space,Str "bee,",LineBreak,Str " ",Str "when",Space,Str "half",Space,Str "the",Space,Str "bee",Space,Str "is",Space,Str "not",Space,Str "a",Space,Str "bee,",LineBreak,Str " ",Str "due",Space,Str "to",Space,Str "some",Space,Str "ancient",Space,Str "injury?"] -, Para [Str "Continuation",Space,Str "line",LineBreak,Str " ",Str "and",Space,Str "another"] ] +, Para [Str "Continuation",Space,Str "line",LineBreak,Str " ",Str "and",Space,Str "another"] +, Header 1 [Str "Simple",Space,Str "Tables"] +, Table [] [AlignDefault,AlignDefault,AlignDefault] [0.0,0.0,0.0] + [ [ Plain [Str "col",Space,Str "1"] ] + , [ Plain [Str "col",Space,Str "2"] ] + , [ Plain [Str "col",Space,Str "3"] ] ] [ + [ [ Plain [Str "r1",Space,Str "a"] ] + , [ Plain [Str "b"] ] + , [ Plain [Str "c"] ] ], + [ [ Plain [Str "r2",Space,Str "d"] ] + , [ Plain [Str "e"] ] + , [ Plain [Str "f"] ] ] ] +, Para [Str "Headless"] +, Table [] [AlignDefault,AlignDefault,AlignDefault] [0.0,0.0,0.0] + [ [] + , [] + , [] ] [ + [ [ Plain [Str "r1",Space,Str "a"] ] + , [ Plain [Str "b"] ] + , [ Plain [Str "c"] ] ], + [ [ Plain [Str "r2",Space,Str "d"] ] + , [ Plain [Str "e"] ] + , [ Plain [Str "f"] ] ] ] +, Header 1 [Str "Grid",Space,Str "Tables"] +, Table [] [AlignDefault,AlignDefault,AlignDefault] [0.2375,0.15,0.1625] + [ [ Plain [Str "col",Space,Str "1"] ] + , [ Plain [Str "col",Space,Str "2"] ] + , [ Plain [Str "col",Space,Str "3"] ] ] [ + [ [ Plain [Str "r1",Space,Str "a",Space,Str "r1",Space,Str "bis"] ] + , [ Plain [Str "b",Space,Str "b",Space,Str "2"] ] + , [ Plain [Str "c",Space,Str "c",Space,Str "2"] ] ], + [ [ Plain [Str "r2",Space,Str "d"] ] + , [ Plain [Str "e"] ] + , [ Plain [Str "f"] ] ] ] +, Para [Str "Headless"] +, Table [] [AlignDefault,AlignDefault,AlignDefault] [0.2375,0.15,0.1625] + [ [] + , [] + , [] ] [ + [ [ Plain [Str "r1",Space,Str "a",Space,Str "r1",Space,Str "bis"] ] + , [ Plain [Str "b",Space,Str "b",Space,Str "2"] ] + , [ Plain [Str "c",Space,Str "c",Space,Str "2"] ] ], + [ [ Plain [Str "r2",Space,Str "d"] ] + , [ Plain [Str "e"] ] + , [ Plain [Str "f"] ] ] ] ] diff --git a/tests/rst-reader.rst b/tests/rst-reader.rst index 676698430..ccc248aee 100644 --- a/tests/rst-reader.rst +++ b/tests/rst-reader.rst @@ -453,3 +453,41 @@ Line blocks | and another +Simple Tables +============= + +================== =========== ========== +col 1 col 2 col 3 +================== =========== ========== +r1 a b c +r2 d e f +================== =========== ========== + +Headless + +================== =========== ========== +r1 a b c +r2 d e f +================== =========== ========== + + +Grid Tables +=========== + ++------------------+-----------+------------+ +| col 1 | col 2 | col 3 | ++==================+===========+============+ +| r1 a | b | c | +| r1 bis | b 2 | c 2 | ++------------------+-----------+------------+ +| r2 d | e | f | ++------------------+-----------+------------+ + +Headless + ++------------------+-----------+------------+ +| r1 a | b | c | +| r1 bis | b 2 | c 2 | ++------------------+-----------+------------+ +| r2 d | e | f | ++------------------+-----------+------------+ -- cgit v1.2.3