diff options
-rw-r--r-- | src/Text/Pandoc/Readers/CSV.hs | 70 |
1 files changed, 5 insertions, 65 deletions
diff --git a/src/Text/Pandoc/Readers/CSV.hs b/src/Text/Pandoc/Readers/CSV.hs index 7c3597570..103e211e7 100644 --- a/src/Text/Pandoc/Readers/CSV.hs +++ b/src/Text/Pandoc/Readers/CSV.hs @@ -16,13 +16,11 @@ Conversion from CSV to a 'Pandoc' table. -} module Text.Pandoc.Readers.CSV ( readCSV ) where import Prelude -import Control.Monad (guard) import Data.Text (Text) import qualified Data.Text as T +import Text.Pandoc.CSV (parseCSV, defaultCSVOptions) import Text.Pandoc.Definition import qualified Text.Pandoc.Builder as B -import Text.Parsec -import Text.Parsec.Text (Parser) import Text.Pandoc.Class (PandocMonad) import Text.Pandoc.Shared (crFilter) import Text.Pandoc.Error @@ -34,73 +32,15 @@ readCSV :: PandocMonad m -> Text -- ^ Text to parse (assuming @'\n'@ line endings) -> m Pandoc readCSV _opts s = do - case parse pCSV "input" (crFilter s) of + case parseCSV defaultCSVOptions (crFilter s) of Right (r:rs) -> return $ B.doc $ B.table capt (zip aligns widths) hdrs rows where capt = mempty numcols = length r - hdrs = map (B.plain . B.text) r - rows = map (map (B.plain . B.text)) rs + toplain = B.plain . B.text . T.strip + hdrs = map toplain r + rows = map (map toplain) rs aligns = replicate numcols AlignDefault widths = replicate numcols 0 Right [] -> return $ B.doc mempty Left e -> throwError $ PandocParsecError s e -{- from RFC 4180 - - The ABNF grammar [2] appears as follows: - - file = [header CRLF] record *(CRLF record) [CRLF] - - header = name *(COMMA name) - - record = field *(COMMA field) - - name = field - - field = (escaped / non-escaped) - - escaped = DQUOTE *(TEXTDATA / COMMA / CR / LF / 2DQUOTE) DQUOTE - - non-escaped = *TEXTDATA - - COMMA = %x2C - - CR = %x0D ;as per section 6.1 of RFC 2234 [2] - - DQUOTE = %x22 ;as per section 6.1 of RFC 2234 [2] - - LF = %x0A ;as per section 6.1 of RFC 2234 [2] - - CRLF = CR LF ;as per section 6.1 of RFC 2234 [2] - - TEXTDATA = %x20-21 / %x23-2B / %x2D-7E --} - -pCSV :: Parser [[Text]] -pCSV = many pRecord - -pRecord :: Parser [Text] -pRecord = do - x <- pField - xs <- many $ pComma >> pField - () <$ newline <|> (guard (not (T.null x) || not (null xs)) >> eof) - return (x:xs) - -pField :: Parser Text -pField = pEscaped <|> pUnescaped - -pComma :: Parser Char -pComma = char ',' - -pUnescaped :: Parser Text -pUnescaped = T.strip . T.pack <$> many (noneOf "\n\r\",") - -pEscaped :: Parser Text -pEscaped = do - char '"' - t <- T.pack <$> many (pDoubledQuote <|> noneOf "\"") - char '"' - return t - -pDoubledQuote :: Parser Char -pDoubledQuote = try $ char '"' >> char '"' |