From 60989d0637780787fb337b94af212f1ee9e1ae22 Mon Sep 17 00:00:00 2001 From: fiddlosopher Date: Mon, 15 Jan 2007 19:52:42 +0000 Subject: Added support for tables in markdown reader and in LaTeX, DocBook, and HTML writers. The syntax is documented in README. Tests have been added to the test suite. git-svn-id: https://pandoc.googlecode.com/svn/trunk@493 788f1e2b-df1e-0410-8736-df70ead52e1b --- src/Main.hs | 10 ++- src/Text/Pandoc/Definition.hs | 11 +++ src/Text/Pandoc/Readers/Markdown.hs | 161 +++++++++++++++++++++++++++++++++-- src/Text/Pandoc/Shared.hs | 11 +++ src/Text/Pandoc/Writers/Docbook.hs | 34 +++++++- src/Text/Pandoc/Writers/HTML.hs | 32 +++++++ src/Text/Pandoc/Writers/LaTeX.hs | 33 +++++++ src/Text/Pandoc/Writers/Markdown.hs | 4 + src/Text/Pandoc/Writers/RST.hs | 3 + src/Text/Pandoc/Writers/RTF.hs | 2 + src/Text/ParserCombinators/Pandoc.hs | 14 ++- src/headers/LaTeXHeader | 4 + 12 files changed, 307 insertions(+), 12 deletions(-) (limited to 'src') diff --git a/src/Main.hs b/src/Main.hs index 0ca1e5ca5..f3c70c472 100644 --- a/src/Main.hs +++ b/src/Main.hs @@ -48,7 +48,7 @@ import Text.Pandoc.Writers.DefaultHeaders ( defaultHtmlHeader, import Text.Pandoc.Definition import Text.Pandoc.Shared import Text.Regex ( mkRegex, matchRegex ) -import System.Environment ( getArgs, getProgName ) +import System.Environment ( getArgs, getProgName, getEnvironment ) import System.Exit ( exitWith, ExitCode (..) ) import System.Console.GetOpt import System.IO @@ -58,7 +58,7 @@ import Char ( toLower ) import Control.Monad ( (>>=) ) version :: String -version = "0.3" +version = "0.4" copyrightMessage :: String copyrightMessage = "\nCopyright (C) 2006 John MacFarlane\nWeb: http://sophos.berkeley.edu/macfarlane/pandoc\nThis is free software; see the source for copying conditions. There is no\nwarranty, not even for merchantability or fitness for a particular purpose." @@ -426,6 +426,11 @@ main = do then return stdout else openFile outputFile WriteMode + environment <- getEnvironment + let columns = case lookup "COLUMNS" environment of + Just cols -> read cols + Nothing -> stateColumns defaultParserState + let tabFilter = if preserveTabs then id else (tabsToSpaces tabStop) let addBlank str = str ++ "\n\n" let removeCRs str = filter (/= '\r') str -- remove DOS-style line endings @@ -435,6 +440,7 @@ main = do stateTabStop = tabStop, stateStandalone = standalone && (not strict), stateSmart = smart || writerName' == "latex", + stateColumns = columns, stateStrict = strict } let csslink = if (css == "") then "" diff --git a/src/Text/Pandoc/Definition.hs b/src/Text/Pandoc/Definition.hs index 2313b1ef1..d16309b4e 100644 --- a/src/Text/Pandoc/Definition.hs +++ b/src/Text/Pandoc/Definition.hs @@ -39,6 +39,12 @@ data Meta = Meta [Inline] -- title String -- date deriving (Eq, Show, Read) +-- | Alignment of a table column. +data Alignment = AlignLeft + | AlignRight + | AlignCenter + | AlignDefault deriving (Eq, Show, Read) + -- | Block element. data Block = Plain [Inline] -- ^ Plain text, not a paragraph @@ -57,6 +63,11 @@ data Block | HorizontalRule -- ^ Horizontal rule | Note String [Block] -- ^ Footnote or endnote - reference (string), -- text (list of blocks) + | Table [Inline] -- ^ Table caption, + [Alignment] -- column alignments, + [Float] -- column widths (relative to page), + [[Block]] -- column headers, and + [[[Block]]] -- rows deriving (Eq, Read, Show) -- | Target for a link: either a URL or an indirect (labeled) reference. diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 4e6a7b39c..1a77a5958 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -31,7 +31,7 @@ module Text.Pandoc.Readers.Markdown ( readMarkdown ) where -import Data.List ( findIndex, sortBy ) +import Data.List ( findIndex, sortBy, transpose ) import Data.Char ( isAlphaNum ) import Text.ParserCombinators.Pandoc import Text.Pandoc.Definition @@ -88,6 +88,7 @@ setextHChars = ['=','-'] blockQuoteChar = '>' hyphenChar = '-' ellipsesChar = '.' +listColSepChar = '|' -- treat these as potentially non-text when parsing inline: specialChars = [escapeChar, labelStart, labelEnd, emphStart, emphEnd, @@ -106,9 +107,9 @@ indentSpaces = do state <- getState let tabStop = stateTabStop state count tabStop (char ' ') <|> - (do{skipNonindentSpaces; string "\t"}) "indentation" + (do{nonindentSpaces; string "\t"}) "indentation" -skipNonindentSpaces = do +nonindentSpaces = do state <- getState let tabStop = stateTabStop state choice (map (\n -> (try (count n (char ' ')))) (reverse [0..(tabStop - 1)])) @@ -192,7 +193,7 @@ parseMarkdown = do parseBlocks = manyTill block eof -block = choice [ codeBlock, note, referenceKey, header, hrule, list, +block = choice [ header, table, codeBlock, note, referenceKey, hrule, list, blockQuote, htmlBlock, rawLaTeXEnvironment', para, plain, blankBlock, nullBlock ] "block" @@ -322,7 +323,7 @@ emacsBoxQuote = try (do return raw) emailBlockQuoteStart = try (do - skipNonindentSpaces + nonindentSpaces char blockQuoteChar option ' ' (char ' ') return "> ") @@ -356,7 +357,7 @@ list = choice [ bulletList, orderedList ] "list" bulletListStart = try (do option ' ' newline -- if preceded by a Plain block in a list context - skipNonindentSpaces + nonindentSpaces notFollowedBy' hrule -- because hrules start out just like lists oneOf bulletListMarkers spaceChar @@ -364,7 +365,7 @@ bulletListStart = try (do orderedListStart = try (do option ' ' newline -- if preceded by a Plain block in a list context - skipNonindentSpaces + nonindentSpaces many1 digit <|> (do{failIfStrict; count 1 letter}) delim <- oneOf orderedListDelimiters if delim /= '.' then failIfStrict else return () @@ -501,7 +502,7 @@ rawHtmlBlocks = try (do -- referenceKey = try (do - skipNonindentSpaces + nonindentSpaces label <- reference char labelSep skipSpaces @@ -523,6 +524,150 @@ rawLaTeXEnvironment' = do failIfStrict rawLaTeXEnvironment +-- +-- Tables +-- + +-- | Parse a dashed line with optional trailing spaces; return its length +-- and the length including trailing space. +dashedLine ch = do + dashes <- many1 (char ch) + sp <- many spaceChar + return $ (length dashes, length $ dashes ++ sp) + +-- | Parse a table header with dashed lines of '-' preceded by +-- one line of text. +simpleTableHeader = do + rawContent <- anyLine + initSp <- nonindentSpaces + dashes <- many1 (dashedLine '-') + newline + let (lengths, lines) = unzip dashes + let indices = scanl (+) (length initSp) lines + let rawHeads = tail $ splitByIndices (init indices) rawContent + let aligns = zipWith alignType (map (\a -> [a]) rawHeads) lengths + return $ (rawHeads, aligns, indices) + +-- | Parse a table footer - dashed lines followed by blank line. +tableFooter = try $ do + nonindentSpaces + many1 (dashedLine '-') + blanklines + +-- | Parse a table separator - dashed line. +tableSep = try $ do + nonindentSpaces + many1 (dashedLine '-') + string "\n" + +-- | Parse a raw line and split it into chunks by indices. +rawTableLine indices = do + notFollowedBy' (blanklines <|> tableFooter) + line <- many1Till anyChar newline + return $ map removeLeadingTrailingSpace $ tail $ + splitByIndices (init indices) line + +-- | Parse a table line and return a list of lists of blocks (columns). +tableLine indices = try $ do + rawline <- rawTableLine indices + mapM (parseFromStr (many plain)) rawline + +-- | Parse a multiline table row and return a list of blocks (columns). +multilineRow indices = try $ do + colLines <- many1 (rawTableLine indices) + option "" blanklines + let cols = map unlines $ transpose colLines + mapM (parseFromStr (many plain)) cols + +-- | Calculate relative widths of table columns, based on indices +widthsFromIndices :: Int -- ^ Number of columns on terminal + -> [Int] -- ^ Indices + -> [Float] -- ^ Fractional relative sizes of columns +widthsFromIndices _ [] = [] +widthsFromIndices numColumns indices = + let lengths = zipWith (-) indices (0:indices) + totLength = sum lengths + quotient = if totLength > numColumns + then fromIntegral totLength + else fromIntegral numColumns + fracs = map (\l -> (fromIntegral l) / quotient) lengths in + tail fracs + +-- | Parses a table caption: inlines beginning with 'Table:' +-- and followed by blank lines +tableCaption = try $ do + nonindentSpaces + string "Table:" + result <- many1 inline + blanklines + return $ normalizeSpaces result + +-- | Parse a table using 'headerParser', 'lineParser', and 'footerParser' +tableWith headerParser lineParser footerParser = try $ do + (rawHeads, aligns, indices) <- headerParser + lines <- many1Till (lineParser indices) footerParser + caption <- option [] tableCaption + heads <- mapM (parseFromStr (many plain)) rawHeads + state <- getState + let numColumns = stateColumns state + let widths = widthsFromIndices numColumns indices + return $ Table caption aligns widths heads lines + +-- | Parse a simple table with '---' header and one line per row. +simpleTable = tableWith simpleTableHeader tableLine blanklines + +-- | Parse a multiline table: starts with row of '-' on top, then header +-- (which may be multiline), then the rows, +-- which may be multiline, separated by blank lines, and +-- ending with a footer (dashed line followed by blank line). +multilineTable = tableWith multilineTableHeader multilineRow tableFooter + +multilineTableHeader = try $ do + tableSep + rawContent <- many1 (do{notFollowedBy' tableSep; + many1Till anyChar newline}) + initSp <- nonindentSpaces + dashes <- many1 (dashedLine '-') + newline + let (lengths, lines) = unzip dashes + let indices = scanl (+) (length initSp) lines + let rawHeadsList = transpose $ map + (\ln -> tail $ splitByIndices (init indices) ln) + rawContent + let rawHeads = map (joinWithSep " ") rawHeadsList + let aligns = zipWith alignType rawHeadsList lengths + return $ ((map removeLeadingTrailingSpace rawHeads), + aligns, indices) + +-- | Returns the longest of a list of strings. +longest :: [String] -> String +longest [] = "" +longest [x] = x +longest (x:xs) = + if (length x) >= (maximum $ map length xs) + then x + else longest xs + +-- | Returns an alignment type for a table, based on a list of strings +-- (the rows of the column header) and a number (the length of the +-- dashed line under the rows. +alignType :: [String] -> Int -> Alignment +alignType [] len = AlignDefault +alignType strLst len = + let str = longest $ map removeTrailingSpace strLst + leftSpace = if null str then False else ((str !! 0) `elem` " \t") + rightSpace = (length str < len || (str !! (len - 1)) `elem` " \t") in + case (leftSpace, rightSpace) of + (True, False) -> AlignRight + (False, True) -> AlignLeft + (True, True) -> AlignCenter + (False, False) -> AlignDefault + +table = do + failIfStrict + result <- simpleTable <|> multilineTable "table" + return result + -- -- inline -- diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 624f573de..8ee990827 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -30,6 +30,7 @@ Utility functions and definitions used by the various Pandoc modules. module Text.Pandoc.Shared ( -- * List processing splitBy, + splitByIndices, -- * Text processing gsub, joinWithSep, @@ -133,6 +134,8 @@ data ParserState = ParserState stateDate :: String, -- ^ Date of document stateStrict :: Bool, -- ^ Use strict markdown syntax stateSmart :: Bool, -- ^ Use smart typography + stateColumns :: Int, -- ^ Number of columns in + -- terminal (used for tables) stateHeaderTable :: [HeaderType] -- ^ List of header types used, -- in what order (rst only) } @@ -154,6 +157,7 @@ defaultParserState = stateDate = [], stateStrict = False, stateSmart = False, + stateColumns = 80, stateHeaderTable = [] } -- | Indent string as a block. @@ -292,6 +296,13 @@ splitBy sep lst = rest' = dropWhile (== sep) rest in first:(splitBy sep rest') +-- | Split list into chunks divided at specified indices. +splitByIndices :: [Int] -> [a] -> [[a]] +splitByIndices [] lst = [lst] +splitByIndices (x:xs) lst = + let (first, rest) = splitAt x lst in + first:(splitByIndices (map (\y -> y - x) xs) rest) + -- | Normalize a list of inline elements: remove leading and trailing -- @Space@ elements, and collapse double @Space@s into singles. normalizeSpaces :: [Inline] -> [Inline] diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs index e67b91fcd..ec3801a9a 100644 --- a/src/Text/Pandoc/Writers/Docbook.hs +++ b/src/Text/Pandoc/Writers/Docbook.hs @@ -151,7 +151,39 @@ blockToDocbook opts (RawHtml str) = text str -- raw XML block blockToDocbook opts HorizontalRule = empty -- not semantic blockToDocbook opts (Note _ _) = empty -- shouldn't occur blockToDocbook opts (Key _ _) = empty -- shouldn't occur -blockToDocbook opts _ = inTagsIndented "para" (text "Unknown block type") +blockToDocbook opts (Table caption aligns widths headers rows) = + let alignStrings = map alignmentToString aligns + captionDoc = if null caption + then empty + else inTagsIndented "caption" + (inlinesToDocbook opts caption) + tableType = if isEmpty captionDoc then "informaltable" else "table" in + inTagsIndented tableType $ captionDoc $$ + (colHeadsToDocbook opts alignStrings widths headers) $$ + (vcat $ map (tableRowToDocbook opts alignStrings) rows) + +colHeadsToDocbook opts alignStrings widths headers = + let heads = zipWith3 + (\align width item -> tableItemToDocbook opts "th" align width item) + alignStrings widths headers in + inTagsIndented "tr" $ vcat heads + +alignmentToString alignment = case alignment of + AlignLeft -> "left" + AlignRight -> "right" + AlignCenter -> "center" + AlignDefault -> "left" + +tableRowToDocbook opts aligns cols = + inTagsIndented "tr" $ vcat $ zipWith3 (tableItemToDocbook opts "td") aligns (repeat 0) cols + +tableItemToDocbook opts tag align width item = + let attrib = [("align", align)] ++ + if (width /= 0) + then [("style", "{width: " ++ + show (truncate (100*width)) ++ "%;}")] + else [] in + inTags True tag attrib $ vcat $ map (blockToDocbook opts) item -- | Put string in CDATA section cdata :: String -> Doc diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index e119a5c87..d38a57556 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -186,6 +186,38 @@ blockToHtml opts (Header level lst) = if ((level > 0) && (level <= 6)) then inTagsSimple ("h" ++ show level) contents else inTagsSimple "p" contents +blockToHtml opts (Table caption aligns widths headers rows) = + let alignStrings = map alignmentToString aligns + captionDoc = if null caption + then empty + else inTagsSimple "caption" + (inlineListToHtml opts caption) in + inTagsIndented "table" $ captionDoc $$ + (colHeadsToHtml opts alignStrings widths headers) $$ + (vcat $ map (tableRowToHtml opts alignStrings) rows) + +colHeadsToHtml opts alignStrings widths headers = + let heads = zipWith3 + (\align width item -> tableItemToHtml opts "th" align width item) + alignStrings widths headers in + inTagsIndented "tr" $ vcat heads + +alignmentToString alignment = case alignment of + AlignLeft -> "left" + AlignRight -> "right" + AlignCenter -> "center" + AlignDefault -> "left" + +tableRowToHtml opts aligns cols = + inTagsIndented "tr" $ vcat $ zipWith3 (tableItemToHtml opts "td") aligns (repeat 0) cols + +tableItemToHtml opts tag align width item = + let attrib = [("align", align)] ++ + if (width /= 0) + then [("style", "{width: " ++ + show (truncate (100*width)) ++ "%;}")] + else [] in + inTags False tag attrib $ vcat $ map (blockToHtml opts) item listItemToHtml :: WriterOptions -> [Block] -> Doc listItemToHtml opts list = diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index aca72535d..db7af223d 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -32,6 +32,7 @@ module Text.Pandoc.Writers.LaTeX ( ) where import Text.Pandoc.Definition import Text.Pandoc.Shared +import Text.Printf ( printf ) import List ( (\\) ) -- | Convert Pandoc to LaTeX. @@ -123,6 +124,38 @@ blockToLaTeX notes (Header level lst) = then "\\" ++ (concat (replicate (level - 1) "sub")) ++ "section{" ++ (inlineListToLaTeX notes (deVerb lst)) ++ "}\n\n" else (inlineListToLaTeX notes lst) ++ "\n\n" +blockToLaTeX notes (Table caption aligns widths heads rows) = + let colWidths = map printDecimal widths + colDescriptors = concat $ zipWith + (\width align -> ">{\\PBS" ++ + (case align of + AlignLeft -> "\\raggedright" + AlignRight -> "\\raggedleft" + AlignCenter -> "\\centering" + AlignDefault -> "\\raggedright") ++ + "\\hspace{0pt}}p{" ++ width ++ + "\\textwidth}") + colWidths aligns + headers = tableRowToLaTeX notes heads + captionText = inlineListToLaTeX notes caption + tableBody = "\\begin{tabular}{" ++ colDescriptors ++ "}\n" ++ + headers ++ "\\hline\n" ++ + (concatMap (tableRowToLaTeX notes) rows) ++ + "\\end{tabular}\n" + centered str = "\\begin{center}\n" ++ str ++ "\\end{center}\n" in + if null captionText + then centered tableBody ++ "\n" + else "\\begin{table}[h]\n" ++ centered tableBody ++ "\\caption{" ++ + captionText ++ "}\n" ++ "\\end{table}\n\n" + + +printDecimal :: Float -> String +printDecimal = printf "%.2f" + +tableColumnWidths notes cols = map (length . (concatMap (blockToLaTeX notes))) cols + +tableRowToLaTeX notes cols = joinWithSep " & " (map (concatMap (blockToLaTeX notes)) cols) ++ "\\\\\n" + listItemToLaTeX notes list = "\\item " ++ (concatMap (blockToLaTeX notes) list) diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index 343942421..0e7704510 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -132,6 +132,10 @@ blockToMarkdown tabStop (OrderedList lst) = blockToMarkdown tabStop HorizontalRule = text "\n* * * * *\n" blockToMarkdown tabStop (Header level lst) = text ((replicate level '#') ++ " ") <> (inlineListToMarkdown lst) <> (text "\n") +blockToMarkdown tabStop (Table caption _ _ headers rows) = + blockToMarkdown tabStop (Para [Str "pandoc: TABLE unsupported in Markdown writer"]) + + bulletListItemToMarkdown tabStop list = hang (text "- ") tabStop (vcat (map (blockToMarkdown tabStop) list)) diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs index 7e1581908..b6802ffa2 100644 --- a/src/Text/Pandoc/Writers/RST.hs +++ b/src/Text/Pandoc/Writers/RST.hs @@ -148,6 +148,9 @@ blockToRST tabStop (Header level lst) = let headerChar = if (level > 5) then ' ' else "=-~^'" !! (level - 1) in let border = text $ replicate headerLength headerChar in (headerText <> char '\n' <> border <> char '\n', refs) +blockToRST tabStop (Table caption _ _ headers rows) = + blockToRST tabStop (Para [Str "pandoc: TABLE unsupported in RST writer"]) + -- | Convert bullet list item (list of blocks) to reStructuredText. -- Returns a pair of 'Doc', the first the main text, the second references diff --git a/src/Text/Pandoc/Writers/RTF.hs b/src/Text/Pandoc/Writers/RTF.hs index 20f06d21b..b53e39cb2 100644 --- a/src/Text/Pandoc/Writers/RTF.hs +++ b/src/Text/Pandoc/Writers/RTF.hs @@ -170,6 +170,8 @@ blockToRTF notes indent HorizontalRule = blockToRTF notes indent (Header level lst) = rtfPar indent 0 ("\\b \\fs" ++ (show (40 - (level * 4))) ++ " " ++ (inlineListToRTF notes lst)) +blockToRTF notes indent (Table caption _ _ headers rows) = + blockToRTF notes indent (Para [Str "pandoc: TABLE unsupported in RST writer"]) -- | Ensure that there's the same amount of space after compact -- lists as after regular lists. diff --git a/src/Text/ParserCombinators/Pandoc.hs b/src/Text/ParserCombinators/Pandoc.hs index b55ceb23d..a825ef8ff 100644 --- a/src/Text/ParserCombinators/Pandoc.hs +++ b/src/Text/ParserCombinators/Pandoc.hs @@ -41,7 +41,8 @@ module Text.ParserCombinators.Pandoc ( enclosed, blankBlock, nullBlock, - stringAnyCase + stringAnyCase, + parseFromStr ) where import Text.ParserCombinators.Parsec import Text.Pandoc.Definition @@ -138,3 +139,14 @@ stringAnyCase (x:xs) = try (do firstChar <- choice [ char (toUpper x), char (toLower x) ] rest <- stringAnyCase xs return (firstChar:rest)) + +-- | Parse contents of 'str' using 'parser' and return result. +parseFromStr :: GenParser tok st a -> [tok] -> GenParser tok st a +parseFromStr parser str = try $ do + oldInput <- getInput + setInput str + result <- parser + setInput oldInput + return result + + diff --git a/src/headers/LaTeXHeader b/src/headers/LaTeXHeader index d50bf8ae3..f808ef80f 100644 --- a/src/headers/LaTeXHeader +++ b/src/headers/LaTeXHeader @@ -3,8 +3,12 @@ \usepackage{ucs} \usepackage[utf8x]{inputenc} \usepackage{graphicx} +\usepackage{array} \setlength{\parindent}{0pt} \setlength{\parskip}{6pt plus 2pt minus 1pt} % This is needed for code blocks in footnotes: \usepackage{fancyvrb} \VerbatimFootnotes +% This is needed because raggedright in table elements redefines //: +\newcommand{\PreserveBackslash}[1]{\let\temp=\\#1\let\\=\temp} +\let\PBS=\PreserveBackslash -- cgit v1.2.3