From 60989d0637780787fb337b94af212f1ee9e1ae22 Mon Sep 17 00:00:00 2001
From: fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>
Date: Mon, 15 Jan 2007 19:52:42 +0000
Subject: Added support for tables in markdown reader and in LaTeX, DocBook,
 and HTML writers.  The syntax is documented in README.  Tests have been added
 to the test suite.

git-svn-id: https://pandoc.googlecode.com/svn/trunk@493 788f1e2b-df1e-0410-8736-df70ead52e1b
---
 src/Text/Pandoc/Readers/Markdown.hs | 161 ++++++++++++++++++++++++++++++++++--
 1 file changed, 153 insertions(+), 8 deletions(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 4e6a7b39c..1a77a5958 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -31,7 +31,7 @@ module Text.Pandoc.Readers.Markdown (
                                      readMarkdown 
                                     ) where
 
-import Data.List ( findIndex, sortBy )
+import Data.List ( findIndex, sortBy, transpose )
 import Data.Char ( isAlphaNum )
 import Text.ParserCombinators.Pandoc
 import Text.Pandoc.Definition
@@ -88,6 +88,7 @@ setextHChars = ['=','-']
 blockQuoteChar = '>'
 hyphenChar = '-'
 ellipsesChar = '.'
+listColSepChar = '|'
 
 -- treat these as potentially non-text when parsing inline:
 specialChars = [escapeChar, labelStart, labelEnd, emphStart, emphEnd,
@@ -106,9 +107,9 @@ indentSpaces = do
   state <- getState
   let tabStop = stateTabStop state
   count tabStop (char ' ') <|> 
-    (do{skipNonindentSpaces; string "\t"}) <?> "indentation"
+    (do{nonindentSpaces; string "\t"}) <?> "indentation"
 
-skipNonindentSpaces = do
+nonindentSpaces = do
   state <- getState
   let tabStop = stateTabStop state
   choice (map (\n -> (try (count n (char ' ')))) (reverse [0..(tabStop - 1)]))
@@ -192,7 +193,7 @@ parseMarkdown = do
 
 parseBlocks = manyTill block eof
 
-block = choice [ codeBlock, note, referenceKey, header, hrule, list, 
+block = choice [ header, table, codeBlock, note, referenceKey, hrule, list, 
                  blockQuote, htmlBlock, rawLaTeXEnvironment', para,
                  plain, blankBlock, nullBlock ] <?> "block"
 
@@ -322,7 +323,7 @@ emacsBoxQuote = try (do
   return raw)
 
 emailBlockQuoteStart = try (do
-  skipNonindentSpaces
+  nonindentSpaces
   char blockQuoteChar
   option ' ' (char ' ')
   return "> ")
@@ -356,7 +357,7 @@ list = choice [ bulletList, orderedList ] <?> "list"
 
 bulletListStart = try (do
   option ' ' newline -- if preceded by a Plain block in a list context
-  skipNonindentSpaces
+  nonindentSpaces
   notFollowedBy' hrule  -- because hrules start out just like lists
   oneOf bulletListMarkers
   spaceChar
@@ -364,7 +365,7 @@ bulletListStart = try (do
 
 orderedListStart = try (do
   option ' ' newline -- if preceded by a Plain block in a list context
-  skipNonindentSpaces
+  nonindentSpaces
   many1 digit <|> (do{failIfStrict; count 1 letter})
   delim <- oneOf orderedListDelimiters
   if delim /= '.' then failIfStrict else return ()
@@ -501,7 +502,7 @@ rawHtmlBlocks = try (do
 --
 
 referenceKey = try (do
-  skipNonindentSpaces
+  nonindentSpaces
   label <- reference
   char labelSep
   skipSpaces
@@ -523,6 +524,150 @@ rawLaTeXEnvironment' = do
   failIfStrict
   rawLaTeXEnvironment
 
+--
+-- Tables
+-- 
+
+-- | Parse a dashed line with optional trailing spaces; return its length
+--   and the length including trailing space.
+dashedLine ch = do
+    dashes <- many1 (char ch)
+    sp     <- many spaceChar
+    return $ (length dashes, length $ dashes ++ sp)
+
+-- | Parse a table header with dashed lines of '-' preceded by 
+--   one line of text.
+simpleTableHeader = do
+    rawContent  <- anyLine
+    initSp      <- nonindentSpaces
+    dashes      <- many1 (dashedLine '-')
+    newline
+    let (lengths, lines) = unzip dashes
+    let indices  = scanl (+) (length initSp) lines
+    let rawHeads = tail $ splitByIndices (init indices) rawContent
+    let aligns   = zipWith alignType (map (\a -> [a]) rawHeads) lengths
+    return $ (rawHeads, aligns, indices)
+
+-- | Parse a table footer - dashed lines followed by blank line.
+tableFooter = try $ do
+    nonindentSpaces
+    many1 (dashedLine '-')
+    blanklines
+
+-- | Parse a table separator - dashed line.
+tableSep = try $ do
+    nonindentSpaces
+    many1 (dashedLine '-')
+    string "\n"
+
+-- | Parse a raw line and split it into chunks by indices.
+rawTableLine indices = do
+    notFollowedBy' (blanklines <|> tableFooter)
+    line <- many1Till anyChar newline
+    return $ map removeLeadingTrailingSpace $ tail $ 
+             splitByIndices (init indices) line
+
+-- | Parse a table line and return a list of lists of blocks (columns).
+tableLine indices = try $ do
+    rawline <- rawTableLine indices
+    mapM (parseFromStr (many plain)) rawline
+
+-- | Parse a multiline table row and return a list of blocks (columns).
+multilineRow indices = try $ do
+    colLines <- many1 (rawTableLine indices)
+    option "" blanklines
+    let cols = map unlines $ transpose colLines
+    mapM (parseFromStr (many plain)) cols
+
+-- | Calculate relative widths of table columns, based on indices
+widthsFromIndices :: Int     -- ^ Number of columns on terminal
+                  -> [Int]   -- ^ Indices
+                  -> [Float] -- ^ Fractional relative sizes of columns
+widthsFromIndices _ [] = []  
+widthsFromIndices numColumns indices = 
+    let lengths = zipWith (-) indices (0:indices)
+        totLength = sum lengths
+        quotient = if totLength > numColumns
+                     then fromIntegral totLength
+                     else fromIntegral numColumns
+        fracs = map (\l -> (fromIntegral l) / quotient) lengths in
+    tail fracs
+
+-- | Parses a table caption:  inlines beginning with 'Table:'
+--   and followed by blank lines
+tableCaption = try $ do
+    nonindentSpaces
+    string "Table:"
+    result <- many1 inline
+    blanklines
+    return $ normalizeSpaces result
+
+-- | Parse a table using 'headerParser', 'lineParser', and 'footerParser'
+tableWith headerParser lineParser footerParser = try $ do
+    (rawHeads, aligns, indices) <- headerParser
+    lines <- many1Till (lineParser indices) footerParser
+    caption <- option [] tableCaption
+    heads <- mapM (parseFromStr (many plain)) rawHeads
+    state <- getState
+    let numColumns = stateColumns state
+    let widths = widthsFromIndices numColumns indices
+    return $ Table caption aligns widths heads lines
+
+-- | Parse a simple table with '---' header and one line per row.
+simpleTable = tableWith simpleTableHeader tableLine blanklines
+
+-- | Parse a multiline table:  starts with row of '-' on top, then header
+--   (which may be multiline), then the rows,
+--   which may be multiline, separated by blank lines, and
+--   ending with a footer (dashed line followed by blank line).
+multilineTable = tableWith multilineTableHeader multilineRow tableFooter
+
+multilineTableHeader = try $ do
+    tableSep 
+    rawContent  <- many1 (do{notFollowedBy' tableSep; 
+                             many1Till anyChar newline})
+    initSp      <- nonindentSpaces
+    dashes      <- many1 (dashedLine '-')
+    newline
+    let (lengths, lines) = unzip dashes
+    let indices  = scanl (+) (length initSp) lines
+    let rawHeadsList = transpose $ map 
+                       (\ln -> tail $ splitByIndices (init indices) ln)
+                       rawContent
+    let rawHeads = map (joinWithSep " ") rawHeadsList
+    let aligns   = zipWith alignType rawHeadsList lengths
+    return $ ((map removeLeadingTrailingSpace rawHeads),
+             aligns, indices)
+
+-- | Returns the longest of a list of strings.
+longest :: [String] -> String
+longest [] = ""
+longest [x] = x
+longest (x:xs) =
+    if (length x) >= (maximum $ map length xs)
+      then x
+      else longest xs
+
+-- | Returns an alignment type for a table, based on a list of strings
+--   (the rows of the column header) and a number (the length of the
+--   dashed line under the rows.
+alignType :: [String] -> Int -> Alignment
+alignType []  len = AlignDefault
+alignType strLst len =
+    let str        = longest $ map removeTrailingSpace strLst
+        leftSpace  = if null str then False else ((str !! 0) `elem` " \t")
+        rightSpace = (length str < len || (str !! (len - 1)) `elem` " \t") in
+    case (leftSpace, rightSpace) of
+        (True,  False)   -> AlignRight
+        (False, True)    -> AlignLeft
+        (True, True)     -> AlignCenter
+        (False, False)   -> AlignDefault
+
+table = do
+    failIfStrict
+    result <- simpleTable <|> multilineTable <?> "table"
+    return result
+
 -- 
 -- inline
 --
-- 
cgit v1.2.3