{- |
   Module      : Text.Pandoc.CSV
   Copyright   : Copyright (C) 2017-2021 John MacFarlane <jgm@berkeley.edu>
   License     : GNU GPL, version 2 or above
   Maintainer  : John MacFarlane <jgm@berkeley.edu>
   Stability   : alpha
   Portability : portable

Simple CSV parser.
-}

module Text.Pandoc.CSV (
  CSVOptions(..),
  defaultCSVOptions,
  parseCSV,
  ParseError
) where

import Control.Monad (unless, void)
import Data.Text (Text)
import qualified Data.Text as T
import Text.Parsec
import Text.Parsec.Text (Parser)

data CSVOptions = CSVOptions{
    csvDelim     :: Char
  , csvQuote     :: Char
  , csvKeepSpace :: Bool -- treat whitespace following delim as significant
  , csvEscape    :: Maybe Char -- default is to double up quote
} deriving (Read, Show)

defaultCSVOptions :: CSVOptions
defaultCSVOptions = CSVOptions{
    csvDelim = ','
  , csvQuote = '"'
  , csvKeepSpace = False
  , csvEscape = Nothing }

parseCSV :: CSVOptions -> Text -> Either ParseError [[Text]]
parseCSV opts t = parse (pCSV opts) "csv" t

pCSV :: CSVOptions -> Parser [[Text]]
pCSV opts =
  (pCSVRow opts `sepEndBy` endline) <* (spaces *> eof)

pCSVRow :: CSVOptions -> Parser [Text]
pCSVRow opts = do
  x <- pCSVCell opts
  xs <- (if T.null x then many1 else many) $ pCSVDelim opts *> pCSVCell opts
  return (x:xs)

pCSVCell :: CSVOptions -> Parser Text
pCSVCell opts = pCSVQuotedCell opts <|> pCSVUnquotedCell opts

pCSVQuotedCell :: CSVOptions -> Parser Text
pCSVQuotedCell opts = do
  char (csvQuote opts)
  res <- many (satisfy (\c -> c /= csvQuote opts &&
                              Just c /= csvEscape opts) <|> escaped opts)
  char (csvQuote opts)
  return $ T.pack res

escaped :: CSVOptions -> Parser Char
escaped opts = try $
  case csvEscape opts of
       Nothing -> char (csvQuote opts) >> char (csvQuote opts)
       Just c  -> char c >> noneOf "\r\n"

pCSVUnquotedCell :: CSVOptions -> Parser Text
pCSVUnquotedCell opts = T.pack <$>
  many (satisfy (\c -> c /= csvDelim opts && c /= '\r' && c /= '\n'))

pCSVDelim :: CSVOptions -> Parser ()
pCSVDelim opts = do
  char (csvDelim opts)
  unless (csvKeepSpace opts) $ skipMany (oneOf " \t")

endline :: Parser ()
endline = do
  optional (void $ char '\r')
  void $ char '\n'