diff options
Diffstat (limited to 'src/Text/Pandoc')
-rw-r--r-- | src/Text/Pandoc/CSV.hs | 102 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/RST.hs | 52 |
2 files changed, 154 insertions, 0 deletions
diff --git a/src/Text/Pandoc/CSV.hs b/src/Text/Pandoc/CSV.hs new file mode 100644 index 000000000..15492ac52 --- /dev/null +++ b/src/Text/Pandoc/CSV.hs @@ -0,0 +1,102 @@ +{- +Copyright (C) 2017 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.CSV + Copyright : Copyright (C) 2017 John MacFarlane <jgm@berkeley.edu> + License : GNU GPL, version 2 or above + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Simple CSV parser. +-} + +module Text.Pandoc.CSV ( + CSVOptions, + defaultCSVOptions, + parseCSV, + ParseError +) where + +import Text.Parsec +import Text.Parsec.Text (Parser) +import Text.Parsec.Error (ParseError) +import Data.Text (Text) +import qualified Data.Text as T +import Control.Monad (void) + +data CSVOptions = CSVOptions{ + csvDelim :: Char + , csvQuote :: Char + , csvKeepSpace :: Bool -- treat whitespace following delim as significant + , csvEscape :: Maybe Char -- default is to double up quote +} deriving (Read, Show) + +defaultCSVOptions :: CSVOptions +defaultCSVOptions = CSVOptions{ + csvDelim = ',' + , csvQuote = '"' + , csvKeepSpace = False + , csvEscape = Nothing } + +parseCSV :: CSVOptions -> Text -> Either ParseError [[Text]] +parseCSV opts t = parse (pCSV opts) "csv" t + +pCSV :: CSVOptions -> Parser [[Text]] +pCSV opts = + (pCSVRow opts `sepEndBy` endline) <* (spaces *> eof) + +pCSVRow :: CSVOptions -> Parser [Text] +pCSVRow opts = notFollowedBy blank >> pCSVCell opts `sepBy` pCSVDelim opts + +blank :: Parser () +blank = try $ spaces >> (() <$ endline <|> eof) + +pCSVCell :: CSVOptions -> Parser Text +pCSVCell opts = pCSVQuotedCell opts <|> pCSVUnquotedCell opts + +pCSVQuotedCell :: CSVOptions -> Parser Text +pCSVQuotedCell opts = do + char (csvQuote opts) + res <- many (satisfy (\c -> c /= csvQuote opts) <|> escaped opts) + char (csvQuote opts) + return $ T.pack res + +escaped :: CSVOptions -> Parser Char +escaped opts = do + case csvEscape opts of + Nothing -> try $ char (csvQuote opts) >> char (csvQuote opts) + Just c -> try $ char c >> noneOf "\r\n" + +pCSVUnquotedCell :: CSVOptions -> Parser Text +pCSVUnquotedCell opts = T.pack <$> + many (satisfy $ \c -> c /= csvDelim opts && c /= '\r' && c /= '\n') + +pCSVDelim :: CSVOptions -> Parser () +pCSVDelim opts = do + char (csvDelim opts) + if csvKeepSpace opts + then return () + else skipMany (oneOf " \t") + +endline :: Parser () +endline = do + optional (void $ char '\r') + void $ char '\n' + diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index 3b1eee010..6cf8dbae4 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -45,6 +45,7 @@ import Text.Pandoc.Builder (fromList, setMeta) import Text.Pandoc.Builder (Blocks, Inlines, trimInlines) import qualified Text.Pandoc.Builder as B import Text.Pandoc.Class (PandocMonad, readFileFromDirs) +import Text.Pandoc.CSV (CSVOptions(..), defaultCSVOptions, parseCSV) import Text.Pandoc.Definition import Text.Pandoc.Error import Text.Pandoc.ImageSize (lengthToDim, scaleDimension) @@ -56,6 +57,8 @@ import Text.Printf (printf) import Data.Text (Text) import qualified Data.Text as T +import Debug.Trace + -- TODO: -- [ ] .. parsed-literal -- [ ] .. csv-table @@ -688,6 +691,7 @@ directive' = do case label of "table" -> tableDirective top fields body' "list-table" -> listTableDirective top fields body' + "csv-table" -> csvTableDirective top fields body' "line-block" -> lineBlockDirective body' "raw" -> return $ B.rawBlock (trim top) (stripTrailingNewlines body) "role" -> addNewRole top $ map (\(k,v) -> (k, trim v)) fields @@ -820,6 +824,54 @@ listTableDirective top fields body = do takeCells _ = [] normWidths ws = map (/ max 1 (sum ws)) ws + -- TODO + -- [ ] delim: + -- [ ] quote: + -- [ ] keepspace: + -- [ ] escape: + -- [ ] widths: + -- [ ] header-rows: + -- [ ] header: + -- [ ] url: + -- [ ] file: + -- [ ] encoding: +csvTableDirective :: PandocMonad m + => String -> [(String, String)] -> String + -> RSTParser m Blocks +csvTableDirective top fields rawcsv = do + let res = parseCSV defaultCSVOptions (T.pack rawcsv) + case res of + Left e -> do + throwError $ PandocParsecError "csv table" e + Right rows -> do + return $ B.rawBlock "rst" $ show rows +{- + bs <- parseFromString' parseBlocks body + title <- parseFromString' (trimInlines . mconcat <$> many inline) top + let rows = takeRows $ B.toList bs + headerRowsNum = fromMaybe (0 :: Int) $ + lookup "header-rows" fields >>= safeRead + (headerRow,bodyRows,numOfCols) = case rows of + x:xs -> if headerRowsNum > 0 + then (x, xs, length x) + else ([], rows, length x) + _ -> ([],[],0) + widths = case trim <$> lookup "widths" fields of + Just "auto" -> replicate numOfCols 0 + Just specs -> normWidths $ map (fromMaybe (0 :: Double) . safeRead) $ + splitBy (`elem` (" ," :: String)) specs + _ -> replicate numOfCols 0 + return $ B.table title + (zip (replicate numOfCols AlignDefault) widths) + headerRow + bodyRows + where takeRows [BulletList rows] = map takeCells rows + takeRows _ = [] + takeCells [BulletList cells] = map B.fromList cells + takeCells _ = [] + normWidths ws = map (/ max 1 (sum ws)) ws +-} + -- TODO: -- - Only supports :format: fields with a single format for :raw: roles, -- change Text.Pandoc.Definition.Format to fix |