Added support for tables in markdown reader and in LaTeX,

DocBook, and HTML writers. The syntax is documented in README. Tests have been added to the test suite. git-svn-id: https://pandoc.googlecode.com/svn/trunk@493 788f1e2b-df1e-0410-8736-df70ead52e1b
author: fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b> 2007-01-15 19:52:42 +0000
committer: fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b> 2007-01-15 19:52:42 +0000
commit: 60989d0637780787fb337b94af212f1ee9e1ae22 (patch)
tree: 95b5caa1e7e304a47739532a9c4d3767ce54435c /src/Text
parent: 4224d913880e4f77a358cda868c9d1ca75820506 (diff)
download: pandoc-60989d0637780787fb337b94af212f1ee9e1ae22.tar.gz
10 files changed, 295 insertions, 10 deletions
diff --git a/src/Text/Pandoc/Definition.hs b/src/Text/Pandoc/Definition.hs
index 2313b1ef1..d16309b4e 100644
--- a/src/Text/Pandoc/Definition.hs
+++ b/src/Text/Pandoc/Definition.hs
@@ -39,6 +39,12 @@ data Meta   = Meta [Inline] -- title
                    String   -- date
               deriving (Eq, Show, Read)
 
+-- | Alignment of a table column.
+data Alignment = AlignLeft 
+               | AlignRight 
+               | AlignCenter 
+               | AlignDefault deriving (Eq, Show, Read)
+
 -- | Block element.
 data Block  
     = Plain [Inline]        -- ^ Plain text, not a paragraph
@@ -57,6 +63,11 @@ data Block
     | HorizontalRule        -- ^ Horizontal rule
     | Note String [Block]   -- ^ Footnote or endnote - reference (string),
                             -- text (list of blocks)
+    | Table [Inline]        -- ^ Table caption,
+            [Alignment]     -- column alignments,
+            [Float]         -- column widths (relative to page),
+            [[Block]]       -- column headers, and 
+            [[[Block]]]     -- rows
     deriving (Eq, Read, Show)
                
 -- | Target for a link:  either a URL or an indirect (labeled) reference.
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 4e6a7b39c..1a77a5958 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -31,7 +31,7 @@ module Text.Pandoc.Readers.Markdown (
                                      readMarkdown 
                                     ) where
 
-import Data.List ( findIndex, sortBy )
+import Data.List ( findIndex, sortBy, transpose )
 import Data.Char ( isAlphaNum )
 import Text.ParserCombinators.Pandoc
 import Text.Pandoc.Definition
@@ -88,6 +88,7 @@ setextHChars = ['=','-']
 blockQuoteChar = '>'
 hyphenChar = '-'
 ellipsesChar = '.'
+listColSepChar = '|'
 
 -- treat these as potentially non-text when parsing inline:
 specialChars = [escapeChar, labelStart, labelEnd, emphStart, emphEnd,
@@ -106,9 +107,9 @@ indentSpaces = do
   state <- getState
   let tabStop = stateTabStop state
   count tabStop (char ' ') <|> 
-    (do{skipNonindentSpaces; string "\t"}) <?> "indentation"
+    (do{nonindentSpaces; string "\t"}) <?> "indentation"
 
-skipNonindentSpaces = do
+nonindentSpaces = do
   state <- getState
   let tabStop = stateTabStop state
   choice (map (\n -> (try (count n (char ' ')))) (reverse [0..(tabStop - 1)]))
@@ -192,7 +193,7 @@ parseMarkdown = do
 
 parseBlocks = manyTill block eof
 
-block = choice [ codeBlock, note, referenceKey, header, hrule, list, 
+block = choice [ header, table, codeBlock, note, referenceKey, hrule, list, 
                  blockQuote, htmlBlock, rawLaTeXEnvironment', para,
                  plain, blankBlock, nullBlock ] <?> "block"
 
@@ -322,7 +323,7 @@ emacsBoxQuote = try (do
   return raw)
 
 emailBlockQuoteStart = try (do
-  skipNonindentSpaces
+  nonindentSpaces
   char blockQuoteChar
   option ' ' (char ' ')
   return "> ")
@@ -356,7 +357,7 @@ list = choice [ bulletList, orderedList ] <?> "list"
 
 bulletListStart = try (do
   option ' ' newline -- if preceded by a Plain block in a list context
-  skipNonindentSpaces
+  nonindentSpaces
   notFollowedBy' hrule  -- because hrules start out just like lists
   oneOf bulletListMarkers
   spaceChar
@@ -364,7 +365,7 @@ bulletListStart = try (do
 
 orderedListStart = try (do
   option ' ' newline -- if preceded by a Plain block in a list context
-  skipNonindentSpaces
+  nonindentSpaces
   many1 digit <|> (do{failIfStrict; count 1 letter})
   delim <- oneOf orderedListDelimiters
   if delim /= '.' then failIfStrict else return ()
@@ -501,7 +502,7 @@ rawHtmlBlocks = try (do
 --
 
 referenceKey = try (do
-  skipNonindentSpaces
+  nonindentSpaces
   label <- reference
   char labelSep
   skipSpaces
@@ -523,6 +524,150 @@ rawLaTeXEnvironment' = do
   failIfStrict
   rawLaTeXEnvironment
 
+--
+-- Tables
+-- 
+
+-- | Parse a dashed line with optional trailing spaces; return its length
+--   and the length including trailing space.
+dashedLine ch = do
+    dashes <- many1 (char ch)
+    sp     <- many spaceChar
+    return $ (length dashes, length $ dashes ++ sp)
+
+-- | Parse a table header with dashed lines of '-' preceded by 
+--   one line of text.
+simpleTableHeader = do
+    rawContent  <- anyLine
+    initSp      <- nonindentSpaces
+    dashes      <- many1 (dashedLine '-')
+    newline
+    let (lengths, lines) = unzip dashes
+    let indices  = scanl (+) (length initSp) lines
+    let rawHeads = tail $ splitByIndices (init indices) rawContent
+    let aligns   = zipWith alignType (map (\a -> [a]) rawHeads) lengths
+    return $ (rawHeads, aligns, indices)
+
+-- | Parse a table footer - dashed lines followed by blank line.
+tableFooter = try $ do
+    nonindentSpaces
+    many1 (dashedLine '-')
+    blanklines
+
+-- | Parse a table separator - dashed line.
+tableSep = try $ do
+    nonindentSpaces
+    many1 (dashedLine '-')
+    string "\n"
+
+-- | Parse a raw line and split it into chunks by indices.
+rawTableLine indices = do
+    notFollowedBy' (blanklines <|> tableFooter)
+    line <- many1Till anyChar newline
+    return $ map removeLeadingTrailingSpace $ tail $ 
+             splitByIndices (init indices) line
+
+-- | Parse a table line and return a list of lists of blocks (columns).
+tableLine indices = try $ do
+    rawline <- rawTableLine indices
+    mapM (parseFromStr (many plain)) rawline
+
+-- | Parse a multiline table row and return a list of blocks (columns).
+multilineRow indices = try $ do
+    colLines <- many1 (rawTableLine indices)
+    option "" blanklines
+    let cols = map unlines $ transpose colLines
+    mapM (parseFromStr (many plain)) cols
+
+-- | Calculate relative widths of table columns, based on indices
+widthsFromIndices :: Int     -- ^ Number of columns on terminal
+                  -> [Int]   -- ^ Indices
+                  -> [Float] -- ^ Fractional relative sizes of columns
+widthsFromIndices _ [] = []  
+widthsFromIndices numColumns indices = 
+    let lengths = zipWith (-) indices (0:indices)
+        totLength = sum lengths
+        quotient = if totLength > numColumns
+                     then fromIntegral totLength
+                     else fromIntegral numColumns
+        fracs = map (\l -> (fromIntegral l) / quotient) lengths in
+    tail fracs
+
+-- | Parses a table caption:  inlines beginning with 'Table:'
+--   and followed by blank lines
+tableCaption = try $ do
+    nonindentSpaces
+    string "Table:"
+    result <- many1 inline
+    blanklines
+    return $ normalizeSpaces result
+
+-- | Parse a table using 'headerParser', 'lineParser', and 'footerParser'
+tableWith headerParser lineParser footerParser = try $ do
+    (rawHeads, aligns, indices) <- headerParser
+    lines <- many1Till (lineParser indices) footerParser
+    caption <- option [] tableCaption
+    heads <- mapM (parseFromStr (many plain)) rawHeads
+    state <- getState
+    let numColumns = stateColumns state
+    let widths = widthsFromIndices numColumns indices
+    return $ Table caption aligns widths heads lines
+
+-- | Parse a simple table with '---' header and one line per row.
+simpleTable = tableWith simpleTableHeader tableLine blanklines
+
+-- | Parse a multiline table:  starts with row of '-' on top, then header
+--   (which may be multiline), then the rows,
+--   which may be multiline, separated by blank lines, and
+--   ending with a footer (dashed line followed by blank line).
+multilineTable = tableWith multilineTableHeader multilineRow tableFooter
+
+multilineTableHeader = try $ do
+    tableSep 
+    rawContent  <- many1 (do{notFollowedBy' tableSep; 
+                             many1Till anyChar newline})
+    initSp      <- nonindentSpaces
+    dashes      <- many1 (dashedLine '-')
+    newline
+    let (lengths, lines) = unzip dashes
+    let indices  = scanl (+) (length initSp) lines
+    let rawHeadsList = transpose $ map 
+                       (\ln -> tail $ splitByIndices (init indices) ln)
+                       rawContent
+    let rawHeads = map (joinWithSep " ") rawHeadsList
+    let aligns   = zipWith alignType rawHeadsList lengths
+    return $ ((map removeLeadingTrailingSpace rawHeads),
+             aligns, indices)
+
+-- | Returns the longest of a list of strings.
+longest :: [String] -> String
+longest [] = ""
+longest [x] = x
+longest (x:xs) =
+    if (length x) >= (maximum $ map length xs)
+      then x
+      else longest xs
+
+-- | Returns an alignment type for a table, based on a list of strings
+--   (the rows of the column header) and a number (the length of the
+--   dashed line under the rows.
+alignType :: [String] -> Int -> Alignment
+alignType []  len = AlignDefault
+alignType strLst len =
+    let str        = longest $ map removeTrailingSpace strLst
+        leftSpace  = if null str then False else ((str !! 0) `elem` " \t")
+        rightSpace = (length str < len || (str !! (len - 1)) `elem` " \t") in
+    case (leftSpace, rightSpace) of
+        (True,  False)   -> AlignRight
+        (False, True)    -> AlignLeft
+        (True, True)     -> AlignCenter
+        (False, False)   -> AlignDefault
+
+table = do
+    failIfStrict
+    result <- simpleTable <|> multilineTable <?> "table"
+    return result
+
 -- 
 -- inline
 --
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index 624f573de..8ee990827 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -30,6 +30,7 @@ Utility functions and definitions used by the various Pandoc modules.
 module Text.Pandoc.Shared ( 
                      -- * List processing
                      splitBy,
+                     splitByIndices,
                      -- * Text processing
                      gsub,
                      joinWithSep,
@@ -133,6 +134,8 @@ data ParserState = ParserState
       stateDate            :: String,        -- ^ Date of document
       stateStrict          :: Bool,          -- ^ Use strict markdown syntax
       stateSmart           :: Bool,          -- ^ Use smart typography
+      stateColumns         :: Int,           -- ^ Number of columns in
+                                             -- terminal (used for tables)
       stateHeaderTable     :: [HeaderType]   -- ^ List of header types used,
                                              -- in what order (rst only)
     }
@@ -154,6 +157,7 @@ defaultParserState =
                   stateDate            = [],
                   stateStrict          = False,
                   stateSmart           = False,
+                  stateColumns         = 80,
                   stateHeaderTable     = [] }
 
 -- | Indent string as a block.
@@ -292,6 +296,13 @@ splitBy sep lst =
       rest'         = dropWhile (== sep) rest in
   first:(splitBy sep rest')
 
+-- | Split list into chunks divided at specified indices.
+splitByIndices :: [Int] -> [a] -> [[a]]
+splitByIndices [] lst = [lst]
+splitByIndices (x:xs) lst =
+    let (first, rest) = splitAt x lst in
+    first:(splitByIndices (map (\y -> y - x)  xs) rest)
+
 -- | Normalize a list of inline elements: remove leading and trailing
 -- @Space@ elements, and collapse double @Space@s into singles.
 normalizeSpaces :: [Inline] -> [Inline]
diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs
index e67b91fcd..ec3801a9a 100644
--- a/src/Text/Pandoc/Writers/Docbook.hs
+++ b/src/Text/Pandoc/Writers/Docbook.hs
@@ -151,7 +151,39 @@ blockToDocbook opts (RawHtml str) = text str -- raw XML block
 blockToDocbook opts HorizontalRule = empty -- not semantic
 blockToDocbook opts (Note _ _) = empty -- shouldn't occur
 blockToDocbook opts (Key _ _) = empty  -- shouldn't occur
-blockToDocbook opts _ = inTagsIndented "para" (text "Unknown block type")
+blockToDocbook opts (Table caption aligns widths headers rows) =
+  let alignStrings = map alignmentToString aligns
+      captionDoc   = if null caption
+                      then empty
+                      else inTagsIndented "caption" 
+                           (inlinesToDocbook opts caption)
+      tableType   = if isEmpty captionDoc then "informaltable" else "table" in
+  inTagsIndented tableType $ captionDoc $$
+  (colHeadsToDocbook opts alignStrings widths headers) $$ 
+  (vcat $ map (tableRowToDocbook opts alignStrings) rows)
+
+colHeadsToDocbook opts alignStrings widths headers =
+  let heads = zipWith3
+              (\align width item -> tableItemToDocbook opts "th" align width item) 
+              alignStrings widths headers in
+  inTagsIndented "tr" $ vcat heads
+
+alignmentToString alignment = case alignment of
+                                 AlignLeft -> "left"
+                                 AlignRight -> "right"
+                                 AlignCenter -> "center"
+                                 AlignDefault -> "left"
+
+tableRowToDocbook opts aligns cols =
+  inTagsIndented "tr" $ vcat $ zipWith3 (tableItemToDocbook opts "td") aligns (repeat 0) cols
+
+tableItemToDocbook opts tag align width item =
+  let attrib = [("align", align)] ++ 
+               if (width /= 0) 
+                 then [("style", "{width: " ++ 
+                                 show (truncate (100*width)) ++ "%;}")]
+                 else [] in 
+  inTags True tag attrib $ vcat $ map (blockToDocbook opts) item
 
 -- | Put string in CDATA section
 cdata :: String -> Doc
diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
index e119a5c87..d38a57556 100644
--- a/src/Text/Pandoc/Writers/HTML.hs
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -186,6 +186,38 @@ blockToHtml opts (Header level lst) =
   if ((level > 0) && (level <= 6))
       then inTagsSimple ("h" ++ show level) contents 
       else inTagsSimple "p" contents 
+blockToHtml opts (Table caption aligns widths headers rows) =
+  let alignStrings = map alignmentToString aligns
+      captionDoc   = if null caption
+                       then empty
+                       else inTagsSimple "caption" 
+                            (inlineListToHtml opts caption) in
+  inTagsIndented "table" $ captionDoc $$ 
+  (colHeadsToHtml opts alignStrings widths headers) $$ 
+  (vcat $ map (tableRowToHtml opts alignStrings) rows)
+
+colHeadsToHtml opts alignStrings widths headers =
+  let heads = zipWith3
+              (\align width item -> tableItemToHtml opts "th" align width item) 
+              alignStrings widths headers in
+  inTagsIndented "tr" $ vcat heads
+
+alignmentToString alignment = case alignment of
+                                 AlignLeft -> "left"
+                                 AlignRight -> "right"
+                                 AlignCenter -> "center"
+                                 AlignDefault -> "left"
+
+tableRowToHtml opts aligns cols =
+  inTagsIndented "tr" $ vcat $ zipWith3 (tableItemToHtml opts "td") aligns (repeat 0) cols
+
+tableItemToHtml opts tag align width item =
+  let attrib = [("align", align)] ++ 
+               if (width /= 0) 
+                 then [("style", "{width: " ++ 
+                                 show (truncate (100*width)) ++ "%;}")]
+                 else [] in 
+  inTags False tag attrib $ vcat $ map (blockToHtml opts) item
 
 listItemToHtml :: WriterOptions -> [Block] -> Doc
 listItemToHtml opts list = 
diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index aca72535d..db7af223d 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -32,6 +32,7 @@ module Text.Pandoc.Writers.LaTeX (
                                  ) where
 import Text.Pandoc.Definition
 import Text.Pandoc.Shared
+import Text.Printf ( printf )
 import List ( (\\) )
 
 -- | Convert Pandoc to LaTeX.
@@ -123,6 +124,38 @@ blockToLaTeX notes (Header level lst) =
        then "\\" ++ (concat (replicate (level - 1) "sub")) ++ "section{" ++ 
             (inlineListToLaTeX notes (deVerb lst)) ++ "}\n\n"
        else (inlineListToLaTeX notes lst) ++ "\n\n"
+blockToLaTeX notes (Table caption aligns widths heads rows) =
+    let colWidths = map printDecimal widths
+        colDescriptors = concat $ zipWith
+                                  (\width align -> ">{\\PBS" ++ 
+                                  (case align of 
+                                         AlignLeft -> "\\raggedright"
+                                         AlignRight -> "\\raggedleft"
+                                         AlignCenter -> "\\centering"
+                                         AlignDefault -> "\\raggedright") ++
+                                  "\\hspace{0pt}}p{" ++ width ++ 
+                                  "\\textwidth}")
+                                  colWidths aligns
+        headers        = tableRowToLaTeX notes heads 
+        captionText    = inlineListToLaTeX notes caption 
+        tableBody      = "\\begin{tabular}{" ++ colDescriptors ++ "}\n" ++
+                         headers ++ "\\hline\n" ++ 
+                         (concatMap (tableRowToLaTeX notes) rows) ++ 
+                         "\\end{tabular}\n" 
+        centered str   = "\\begin{center}\n" ++ str ++ "\\end{center}\n" in
+    if null captionText
+      then centered tableBody ++ "\n"
+      else "\\begin{table}[h]\n" ++ centered tableBody ++ "\\caption{" ++
+           captionText ++ "}\n" ++ "\\end{table}\n\n" 
+      
+
+printDecimal :: Float -> String
+printDecimal = printf "%.2f" 
+
+tableColumnWidths notes cols = map (length . (concatMap (blockToLaTeX notes))) cols
+
+tableRowToLaTeX notes cols = joinWithSep " & " (map (concatMap (blockToLaTeX notes)) cols) ++ "\\\\\n"
+
 listItemToLaTeX notes list = "\\item " ++ 
     (concatMap (blockToLaTeX notes) list) 
 
diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs
index 343942421..0e7704510 100644
--- a/src/Text/Pandoc/Writers/Markdown.hs
+++ b/src/Text/Pandoc/Writers/Markdown.hs
@@ -132,6 +132,10 @@ blockToMarkdown tabStop (OrderedList lst) =
 blockToMarkdown tabStop HorizontalRule = text "\n* * * * *\n"
 blockToMarkdown tabStop (Header level lst) = text ((replicate level '#') ++ 
   " ") <> (inlineListToMarkdown lst) <> (text "\n")
+blockToMarkdown tabStop (Table caption _ _ headers rows) =
+  blockToMarkdown tabStop (Para [Str "pandoc: TABLE unsupported in Markdown writer"])
+
+
 bulletListItemToMarkdown tabStop list = 
   hang (text "-  ") tabStop (vcat (map (blockToMarkdown tabStop) list))
 
diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs
index 7e1581908..b6802ffa2 100644
--- a/src/Text/Pandoc/Writers/RST.hs
+++ b/src/Text/Pandoc/Writers/RST.hs
@@ -148,6 +148,9 @@ blockToRST tabStop (Header level lst) =
   let headerChar = if (level > 5) then ' ' else "=-~^'" !! (level - 1) in
   let border = text $ replicate headerLength headerChar in
   (headerText <> char '\n' <> border <> char '\n', refs)
+blockToRST tabStop (Table caption _ _ headers rows) =
+  blockToRST tabStop (Para [Str "pandoc: TABLE unsupported in RST writer"])
+
 
 -- | Convert bullet list item (list of blocks) to reStructuredText.
 -- Returns a pair of 'Doc', the first the main text, the second references
diff --git a/src/Text/Pandoc/Writers/RTF.hs b/src/Text/Pandoc/Writers/RTF.hs
index 20f06d21b..b53e39cb2 100644
--- a/src/Text/Pandoc/Writers/RTF.hs
+++ b/src/Text/Pandoc/Writers/RTF.hs
@@ -170,6 +170,8 @@ blockToRTF notes indent HorizontalRule =
 blockToRTF notes indent (Header level lst) = 
   rtfPar indent 0 ("\\b \\fs" ++ (show (40 - (level * 4))) ++ " " ++ 
   (inlineListToRTF notes lst))
+blockToRTF notes indent (Table caption _ _ headers rows) = 
+  blockToRTF notes indent (Para [Str "pandoc: TABLE unsupported in RST writer"])
 
 -- | Ensure that there's the same amount of space after compact
 -- lists as after regular lists.
diff --git a/src/Text/ParserCombinators/Pandoc.hs b/src/Text/ParserCombinators/Pandoc.hs
index b55ceb23d..a825ef8ff 100644
--- a/src/Text/ParserCombinators/Pandoc.hs
+++ b/src/Text/ParserCombinators/Pandoc.hs
@@ -41,7 +41,8 @@ module Text.ParserCombinators.Pandoc (
                                       enclosed,
                                       blankBlock,
                                       nullBlock,
-                                      stringAnyCase
+                                      stringAnyCase,
+                                      parseFromStr
                                      ) where
 import Text.ParserCombinators.Parsec
 import Text.Pandoc.Definition
@@ -138,3 +139,14 @@ stringAnyCase (x:xs) = try (do
   firstChar <- choice [ char (toUpper x), char (toLower x) ]
   rest <- stringAnyCase xs
   return (firstChar:rest))
+
+-- | Parse contents of 'str' using 'parser' and return result.
+parseFromStr :: GenParser tok st a -> [tok] -> GenParser tok st a
+parseFromStr parser str = try $ do
+  oldInput <- getInput
+  setInput str
+  result <- parser
+  setInput oldInput
+  return result
+
+
author	fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>	2007-01-15 19:52:42 +0000
committer	fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>	2007-01-15 19:52:42 +0000
commit	60989d0637780787fb337b94af212f1ee9e1ae22 (patch)
tree	95b5caa1e7e304a47739532a9c4d3767ce54435c /src/Text
parent	4224d913880e4f77a358cda868c9d1ca75820506 (diff)
download	pandoc-60989d0637780787fb337b94af212f1ee9e1ae22.tar.gz