aboutsummaryrefslogtreecommitdiff
path: root/src/Text
diff options
context:
space:
mode:
authorfiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>2007-01-15 19:52:42 +0000
committerfiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>2007-01-15 19:52:42 +0000
commit60989d0637780787fb337b94af212f1ee9e1ae22 (patch)
tree95b5caa1e7e304a47739532a9c4d3767ce54435c /src/Text
parent4224d913880e4f77a358cda868c9d1ca75820506 (diff)
downloadpandoc-60989d0637780787fb337b94af212f1ee9e1ae22.tar.gz
Added support for tables in markdown reader and in LaTeX,
DocBook, and HTML writers. The syntax is documented in README. Tests have been added to the test suite. git-svn-id: https://pandoc.googlecode.com/svn/trunk@493 788f1e2b-df1e-0410-8736-df70ead52e1b
Diffstat (limited to 'src/Text')
-rw-r--r--src/Text/Pandoc/Definition.hs11
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs161
-rw-r--r--src/Text/Pandoc/Shared.hs11
-rw-r--r--src/Text/Pandoc/Writers/Docbook.hs34
-rw-r--r--src/Text/Pandoc/Writers/HTML.hs32
-rw-r--r--src/Text/Pandoc/Writers/LaTeX.hs33
-rw-r--r--src/Text/Pandoc/Writers/Markdown.hs4
-rw-r--r--src/Text/Pandoc/Writers/RST.hs3
-rw-r--r--src/Text/Pandoc/Writers/RTF.hs2
-rw-r--r--src/Text/ParserCombinators/Pandoc.hs14
10 files changed, 295 insertions, 10 deletions
diff --git a/src/Text/Pandoc/Definition.hs b/src/Text/Pandoc/Definition.hs
index 2313b1ef1..d16309b4e 100644
--- a/src/Text/Pandoc/Definition.hs
+++ b/src/Text/Pandoc/Definition.hs
@@ -39,6 +39,12 @@ data Meta = Meta [Inline] -- title
String -- date
deriving (Eq, Show, Read)
+-- | Alignment of a table column.
+data Alignment = AlignLeft
+ | AlignRight
+ | AlignCenter
+ | AlignDefault deriving (Eq, Show, Read)
+
-- | Block element.
data Block
= Plain [Inline] -- ^ Plain text, not a paragraph
@@ -57,6 +63,11 @@ data Block
| HorizontalRule -- ^ Horizontal rule
| Note String [Block] -- ^ Footnote or endnote - reference (string),
-- text (list of blocks)
+ | Table [Inline] -- ^ Table caption,
+ [Alignment] -- column alignments,
+ [Float] -- column widths (relative to page),
+ [[Block]] -- column headers, and
+ [[[Block]]] -- rows
deriving (Eq, Read, Show)
-- | Target for a link: either a URL or an indirect (labeled) reference.
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 4e6a7b39c..1a77a5958 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -31,7 +31,7 @@ module Text.Pandoc.Readers.Markdown (
readMarkdown
) where
-import Data.List ( findIndex, sortBy )
+import Data.List ( findIndex, sortBy, transpose )
import Data.Char ( isAlphaNum )
import Text.ParserCombinators.Pandoc
import Text.Pandoc.Definition
@@ -88,6 +88,7 @@ setextHChars = ['=','-']
blockQuoteChar = '>'
hyphenChar = '-'
ellipsesChar = '.'
+listColSepChar = '|'
-- treat these as potentially non-text when parsing inline:
specialChars = [escapeChar, labelStart, labelEnd, emphStart, emphEnd,
@@ -106,9 +107,9 @@ indentSpaces = do
state <- getState
let tabStop = stateTabStop state
count tabStop (char ' ') <|>
- (do{skipNonindentSpaces; string "\t"}) <?> "indentation"
+ (do{nonindentSpaces; string "\t"}) <?> "indentation"
-skipNonindentSpaces = do
+nonindentSpaces = do
state <- getState
let tabStop = stateTabStop state
choice (map (\n -> (try (count n (char ' ')))) (reverse [0..(tabStop - 1)]))
@@ -192,7 +193,7 @@ parseMarkdown = do
parseBlocks = manyTill block eof
-block = choice [ codeBlock, note, referenceKey, header, hrule, list,
+block = choice [ header, table, codeBlock, note, referenceKey, hrule, list,
blockQuote, htmlBlock, rawLaTeXEnvironment', para,
plain, blankBlock, nullBlock ] <?> "block"
@@ -322,7 +323,7 @@ emacsBoxQuote = try (do
return raw)
emailBlockQuoteStart = try (do
- skipNonindentSpaces
+ nonindentSpaces
char blockQuoteChar
option ' ' (char ' ')
return "> ")
@@ -356,7 +357,7 @@ list = choice [ bulletList, orderedList ] <?> "list"
bulletListStart = try (do
option ' ' newline -- if preceded by a Plain block in a list context
- skipNonindentSpaces
+ nonindentSpaces
notFollowedBy' hrule -- because hrules start out just like lists
oneOf bulletListMarkers
spaceChar
@@ -364,7 +365,7 @@ bulletListStart = try (do
orderedListStart = try (do
option ' ' newline -- if preceded by a Plain block in a list context
- skipNonindentSpaces
+ nonindentSpaces
many1 digit <|> (do{failIfStrict; count 1 letter})
delim <- oneOf orderedListDelimiters
if delim /= '.' then failIfStrict else return ()
@@ -501,7 +502,7 @@ rawHtmlBlocks = try (do
--
referenceKey = try (do
- skipNonindentSpaces
+ nonindentSpaces
label <- reference
char labelSep
skipSpaces
@@ -523,6 +524,150 @@ rawLaTeXEnvironment' = do
failIfStrict
rawLaTeXEnvironment
+--
+-- Tables
+--
+
+-- | Parse a dashed line with optional trailing spaces; return its length
+-- and the length including trailing space.
+dashedLine ch = do
+ dashes <- many1 (char ch)
+ sp <- many spaceChar
+ return $ (length dashes, length $ dashes ++ sp)
+
+-- | Parse a table header with dashed lines of '-' preceded by
+-- one line of text.
+simpleTableHeader = do
+ rawContent <- anyLine
+ initSp <- nonindentSpaces
+ dashes <- many1 (dashedLine '-')
+ newline
+ let (lengths, lines) = unzip dashes
+ let indices = scanl (+) (length initSp) lines
+ let rawHeads = tail $ splitByIndices (init indices) rawContent
+ let aligns = zipWith alignType (map (\a -> [a]) rawHeads) lengths
+ return $ (rawHeads, aligns, indices)
+
+-- | Parse a table footer - dashed lines followed by blank line.
+tableFooter = try $ do
+ nonindentSpaces
+ many1 (dashedLine '-')
+ blanklines
+
+-- | Parse a table separator - dashed line.
+tableSep = try $ do
+ nonindentSpaces
+ many1 (dashedLine '-')
+ string "\n"
+
+-- | Parse a raw line and split it into chunks by indices.
+rawTableLine indices = do
+ notFollowedBy' (blanklines <|> tableFooter)
+ line <- many1Till anyChar newline
+ return $ map removeLeadingTrailingSpace $ tail $
+ splitByIndices (init indices) line
+
+-- | Parse a table line and return a list of lists of blocks (columns).
+tableLine indices = try $ do
+ rawline <- rawTableLine indices
+ mapM (parseFromStr (many plain)) rawline
+
+-- | Parse a multiline table row and return a list of blocks (columns).
+multilineRow indices = try $ do
+ colLines <- many1 (rawTableLine indices)
+ option "" blanklines
+ let cols = map unlines $ transpose colLines
+ mapM (parseFromStr (many plain)) cols
+
+-- | Calculate relative widths of table columns, based on indices
+widthsFromIndices :: Int -- ^ Number of columns on terminal
+ -> [Int] -- ^ Indices
+ -> [Float] -- ^ Fractional relative sizes of columns
+widthsFromIndices _ [] = []
+widthsFromIndices numColumns indices =
+ let lengths = zipWith (-) indices (0:indices)
+ totLength = sum lengths
+ quotient = if totLength > numColumns
+ then fromIntegral totLength
+ else fromIntegral numColumns
+ fracs = map (\l -> (fromIntegral l) / quotient) lengths in
+ tail fracs
+
+-- | Parses a table caption: inlines beginning with 'Table:'
+-- and followed by blank lines
+tableCaption = try $ do
+ nonindentSpaces
+ string "Table:"
+ result <- many1 inline
+ blanklines
+ return $ normalizeSpaces result
+
+-- | Parse a table using 'headerParser', 'lineParser', and 'footerParser'
+tableWith headerParser lineParser footerParser = try $ do
+ (rawHeads, aligns, indices) <- headerParser
+ lines <- many1Till (lineParser indices) footerParser
+ caption <- option [] tableCaption
+ heads <- mapM (parseFromStr (many plain)) rawHeads
+ state <- getState
+ let numColumns = stateColumns state
+ let widths = widthsFromIndices numColumns indices
+ return $ Table caption aligns widths heads lines
+
+-- | Parse a simple table with '---' header and one line per row.
+simpleTable = tableWith simpleTableHeader tableLine blanklines
+
+-- | Parse a multiline table: starts with row of '-' on top, then header
+-- (which may be multiline), then the rows,
+-- which may be multiline, separated by blank lines, and
+-- ending with a footer (dashed line followed by blank line).
+multilineTable = tableWith multilineTableHeader multilineRow tableFooter
+
+multilineTableHeader = try $ do
+ tableSep
+ rawContent <- many1 (do{notFollowedBy' tableSep;
+ many1Till anyChar newline})
+ initSp <- nonindentSpaces
+ dashes <- many1 (dashedLine '-')
+ newline
+ let (lengths, lines) = unzip dashes
+ let indices = scanl (+) (length initSp) lines
+ let rawHeadsList = transpose $ map
+ (\ln -> tail $ splitByIndices (init indices) ln)
+ rawContent
+ let rawHeads = map (joinWithSep " ") rawHeadsList
+ let aligns = zipWith alignType rawHeadsList lengths
+ return $ ((map removeLeadingTrailingSpace rawHeads),
+ aligns, indices)
+
+-- | Returns the longest of a list of strings.
+longest :: [String] -> String
+longest [] = ""
+longest [x] = x
+longest (x:xs) =
+ if (length x) >= (maximum $ map length xs)
+ then x
+ else longest xs
+
+-- | Returns an alignment type for a table, based on a list of strings
+-- (the rows of the column header) and a number (the length of the
+-- dashed line under the rows.
+alignType :: [String] -> Int -> Alignment
+alignType [] len = AlignDefault
+alignType strLst len =
+ let str = longest $ map removeTrailingSpace strLst
+ leftSpace = if null str then False else ((str !! 0) `elem` " \t")
+ rightSpace = (length str < len || (str !! (len - 1)) `elem` " \t") in
+ case (leftSpace, rightSpace) of
+ (True, False) -> AlignRight
+ (False, True) -> AlignLeft
+ (True, True) -> AlignCenter
+ (False, False) -> AlignDefault
+
+table = do
+ failIfStrict
+ result <- simpleTable <|> multilineTable <?> "table"
+ return result
+
--
-- inline
--
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index 624f573de..8ee990827 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -30,6 +30,7 @@ Utility functions and definitions used by the various Pandoc modules.
module Text.Pandoc.Shared (
-- * List processing
splitBy,
+ splitByIndices,
-- * Text processing
gsub,
joinWithSep,
@@ -133,6 +134,8 @@ data ParserState = ParserState
stateDate :: String, -- ^ Date of document
stateStrict :: Bool, -- ^ Use strict markdown syntax
stateSmart :: Bool, -- ^ Use smart typography
+ stateColumns :: Int, -- ^ Number of columns in
+ -- terminal (used for tables)
stateHeaderTable :: [HeaderType] -- ^ List of header types used,
-- in what order (rst only)
}
@@ -154,6 +157,7 @@ defaultParserState =
stateDate = [],
stateStrict = False,
stateSmart = False,
+ stateColumns = 80,
stateHeaderTable = [] }
-- | Indent string as a block.
@@ -292,6 +296,13 @@ splitBy sep lst =
rest' = dropWhile (== sep) rest in
first:(splitBy sep rest')
+-- | Split list into chunks divided at specified indices.
+splitByIndices :: [Int] -> [a] -> [[a]]
+splitByIndices [] lst = [lst]
+splitByIndices (x:xs) lst =
+ let (first, rest) = splitAt x lst in
+ first:(splitByIndices (map (\y -> y - x) xs) rest)
+
-- | Normalize a list of inline elements: remove leading and trailing
-- @Space@ elements, and collapse double @Space@s into singles.
normalizeSpaces :: [Inline] -> [Inline]
diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs
index e67b91fcd..ec3801a9a 100644
--- a/src/Text/Pandoc/Writers/Docbook.hs
+++ b/src/Text/Pandoc/Writers/Docbook.hs
@@ -151,7 +151,39 @@ blockToDocbook opts (RawHtml str) = text str -- raw XML block
blockToDocbook opts HorizontalRule = empty -- not semantic
blockToDocbook opts (Note _ _) = empty -- shouldn't occur
blockToDocbook opts (Key _ _) = empty -- shouldn't occur
-blockToDocbook opts _ = inTagsIndented "para" (text "Unknown block type")
+blockToDocbook opts (Table caption aligns widths headers rows) =
+ let alignStrings = map alignmentToString aligns
+ captionDoc = if null caption
+ then empty
+ else inTagsIndented "caption"
+ (inlinesToDocbook opts caption)
+ tableType = if isEmpty captionDoc then "informaltable" else "table" in
+ inTagsIndented tableType $ captionDoc $$
+ (colHeadsToDocbook opts alignStrings widths headers) $$
+ (vcat $ map (tableRowToDocbook opts alignStrings) rows)
+
+colHeadsToDocbook opts alignStrings widths headers =
+ let heads = zipWith3
+ (\align width item -> tableItemToDocbook opts "th" align width item)
+ alignStrings widths headers in
+ inTagsIndented "tr" $ vcat heads
+
+alignmentToString alignment = case alignment of
+ AlignLeft -> "left"
+ AlignRight -> "right"
+ AlignCenter -> "center"
+ AlignDefault -> "left"
+
+tableRowToDocbook opts aligns cols =
+ inTagsIndented "tr" $ vcat $ zipWith3 (tableItemToDocbook opts "td") aligns (repeat 0) cols
+
+tableItemToDocbook opts tag align width item =
+ let attrib = [("align", align)] ++
+ if (width /= 0)
+ then [("style", "{width: " ++
+ show (truncate (100*width)) ++ "%;}")]
+ else [] in
+ inTags True tag attrib $ vcat $ map (blockToDocbook opts) item
-- | Put string in CDATA section
cdata :: String -> Doc
diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
index e119a5c87..d38a57556 100644
--- a/src/Text/Pandoc/Writers/HTML.hs
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -186,6 +186,38 @@ blockToHtml opts (Header level lst) =
if ((level > 0) && (level <= 6))
then inTagsSimple ("h" ++ show level) contents
else inTagsSimple "p" contents
+blockToHtml opts (Table caption aligns widths headers rows) =
+ let alignStrings = map alignmentToString aligns
+ captionDoc = if null caption
+ then empty
+ else inTagsSimple "caption"
+ (inlineListToHtml opts caption) in
+ inTagsIndented "table" $ captionDoc $$
+ (colHeadsToHtml opts alignStrings widths headers) $$
+ (vcat $ map (tableRowToHtml opts alignStrings) rows)
+
+colHeadsToHtml opts alignStrings widths headers =
+ let heads = zipWith3
+ (\align width item -> tableItemToHtml opts "th" align width item)
+ alignStrings widths headers in
+ inTagsIndented "tr" $ vcat heads
+
+alignmentToString alignment = case alignment of
+ AlignLeft -> "left"
+ AlignRight -> "right"
+ AlignCenter -> "center"
+ AlignDefault -> "left"
+
+tableRowToHtml opts aligns cols =
+ inTagsIndented "tr" $ vcat $ zipWith3 (tableItemToHtml opts "td") aligns (repeat 0) cols
+
+tableItemToHtml opts tag align width item =
+ let attrib = [("align", align)] ++
+ if (width /= 0)
+ then [("style", "{width: " ++
+ show (truncate (100*width)) ++ "%;}")]
+ else [] in
+ inTags False tag attrib $ vcat $ map (blockToHtml opts) item
listItemToHtml :: WriterOptions -> [Block] -> Doc
listItemToHtml opts list =
diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index aca72535d..db7af223d 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -32,6 +32,7 @@ module Text.Pandoc.Writers.LaTeX (
) where
import Text.Pandoc.Definition
import Text.Pandoc.Shared
+import Text.Printf ( printf )
import List ( (\\) )
-- | Convert Pandoc to LaTeX.
@@ -123,6 +124,38 @@ blockToLaTeX notes (Header level lst) =
then "\\" ++ (concat (replicate (level - 1) "sub")) ++ "section{" ++
(inlineListToLaTeX notes (deVerb lst)) ++ "}\n\n"
else (inlineListToLaTeX notes lst) ++ "\n\n"
+blockToLaTeX notes (Table caption aligns widths heads rows) =
+ let colWidths = map printDecimal widths
+ colDescriptors = concat $ zipWith
+ (\width align -> ">{\\PBS" ++
+ (case align of
+ AlignLeft -> "\\raggedright"
+ AlignRight -> "\\raggedleft"
+ AlignCenter -> "\\centering"
+ AlignDefault -> "\\raggedright") ++
+ "\\hspace{0pt}}p{" ++ width ++
+ "\\textwidth}")
+ colWidths aligns
+ headers = tableRowToLaTeX notes heads
+ captionText = inlineListToLaTeX notes caption
+ tableBody = "\\begin{tabular}{" ++ colDescriptors ++ "}\n" ++
+ headers ++ "\\hline\n" ++
+ (concatMap (tableRowToLaTeX notes) rows) ++
+ "\\end{tabular}\n"
+ centered str = "\\begin{center}\n" ++ str ++ "\\end{center}\n" in
+ if null captionText
+ then centered tableBody ++ "\n"
+ else "\\begin{table}[h]\n" ++ centered tableBody ++ "\\caption{" ++
+ captionText ++ "}\n" ++ "\\end{table}\n\n"
+
+
+printDecimal :: Float -> String
+printDecimal = printf "%.2f"
+
+tableColumnWidths notes cols = map (length . (concatMap (blockToLaTeX notes))) cols
+
+tableRowToLaTeX notes cols = joinWithSep " & " (map (concatMap (blockToLaTeX notes)) cols) ++ "\\\\\n"
+
listItemToLaTeX notes list = "\\item " ++
(concatMap (blockToLaTeX notes) list)
diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs
index 343942421..0e7704510 100644
--- a/src/Text/Pandoc/Writers/Markdown.hs
+++ b/src/Text/Pandoc/Writers/Markdown.hs
@@ -132,6 +132,10 @@ blockToMarkdown tabStop (OrderedList lst) =
blockToMarkdown tabStop HorizontalRule = text "\n* * * * *\n"
blockToMarkdown tabStop (Header level lst) = text ((replicate level '#') ++
" ") <> (inlineListToMarkdown lst) <> (text "\n")
+blockToMarkdown tabStop (Table caption _ _ headers rows) =
+ blockToMarkdown tabStop (Para [Str "pandoc: TABLE unsupported in Markdown writer"])
+
+
bulletListItemToMarkdown tabStop list =
hang (text "- ") tabStop (vcat (map (blockToMarkdown tabStop) list))
diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs
index 7e1581908..b6802ffa2 100644
--- a/src/Text/Pandoc/Writers/RST.hs
+++ b/src/Text/Pandoc/Writers/RST.hs
@@ -148,6 +148,9 @@ blockToRST tabStop (Header level lst) =
let headerChar = if (level > 5) then ' ' else "=-~^'" !! (level - 1) in
let border = text $ replicate headerLength headerChar in
(headerText <> char '\n' <> border <> char '\n', refs)
+blockToRST tabStop (Table caption _ _ headers rows) =
+ blockToRST tabStop (Para [Str "pandoc: TABLE unsupported in RST writer"])
+
-- | Convert bullet list item (list of blocks) to reStructuredText.
-- Returns a pair of 'Doc', the first the main text, the second references
diff --git a/src/Text/Pandoc/Writers/RTF.hs b/src/Text/Pandoc/Writers/RTF.hs
index 20f06d21b..b53e39cb2 100644
--- a/src/Text/Pandoc/Writers/RTF.hs
+++ b/src/Text/Pandoc/Writers/RTF.hs
@@ -170,6 +170,8 @@ blockToRTF notes indent HorizontalRule =
blockToRTF notes indent (Header level lst) =
rtfPar indent 0 ("\\b \\fs" ++ (show (40 - (level * 4))) ++ " " ++
(inlineListToRTF notes lst))
+blockToRTF notes indent (Table caption _ _ headers rows) =
+ blockToRTF notes indent (Para [Str "pandoc: TABLE unsupported in RST writer"])
-- | Ensure that there's the same amount of space after compact
-- lists as after regular lists.
diff --git a/src/Text/ParserCombinators/Pandoc.hs b/src/Text/ParserCombinators/Pandoc.hs
index b55ceb23d..a825ef8ff 100644
--- a/src/Text/ParserCombinators/Pandoc.hs
+++ b/src/Text/ParserCombinators/Pandoc.hs
@@ -41,7 +41,8 @@ module Text.ParserCombinators.Pandoc (
enclosed,
blankBlock,
nullBlock,
- stringAnyCase
+ stringAnyCase,
+ parseFromStr
) where
import Text.ParserCombinators.Parsec
import Text.Pandoc.Definition
@@ -138,3 +139,14 @@ stringAnyCase (x:xs) = try (do
firstChar <- choice [ char (toUpper x), char (toLower x) ]
rest <- stringAnyCase xs
return (firstChar:rest))
+
+-- | Parse contents of 'str' using 'parser' and return result.
+parseFromStr :: GenParser tok st a -> [tok] -> GenParser tok st a
+parseFromStr parser str = try $ do
+ oldInput <- getInput
+ setInput str
+ result <- parser
+ setInput oldInput
+ return result
+
+