aboutsummaryrefslogtreecommitdiff
path: root/src/Text
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text')
-rw-r--r--src/Text/Pandoc/Readers/CSV.hs70
1 files changed, 5 insertions, 65 deletions
diff --git a/src/Text/Pandoc/Readers/CSV.hs b/src/Text/Pandoc/Readers/CSV.hs
index 7c3597570..103e211e7 100644
--- a/src/Text/Pandoc/Readers/CSV.hs
+++ b/src/Text/Pandoc/Readers/CSV.hs
@@ -16,13 +16,11 @@ Conversion from CSV to a 'Pandoc' table.
-}
module Text.Pandoc.Readers.CSV ( readCSV ) where
import Prelude
-import Control.Monad (guard)
import Data.Text (Text)
import qualified Data.Text as T
+import Text.Pandoc.CSV (parseCSV, defaultCSVOptions)
import Text.Pandoc.Definition
import qualified Text.Pandoc.Builder as B
-import Text.Parsec
-import Text.Parsec.Text (Parser)
import Text.Pandoc.Class (PandocMonad)
import Text.Pandoc.Shared (crFilter)
import Text.Pandoc.Error
@@ -34,73 +32,15 @@ readCSV :: PandocMonad m
-> Text -- ^ Text to parse (assuming @'\n'@ line endings)
-> m Pandoc
readCSV _opts s = do
- case parse pCSV "input" (crFilter s) of
+ case parseCSV defaultCSVOptions (crFilter s) of
Right (r:rs) -> return $ B.doc $ B.table capt (zip aligns widths) hdrs rows
where capt = mempty
numcols = length r
- hdrs = map (B.plain . B.text) r
- rows = map (map (B.plain . B.text)) rs
+ toplain = B.plain . B.text . T.strip
+ hdrs = map toplain r
+ rows = map (map toplain) rs
aligns = replicate numcols AlignDefault
widths = replicate numcols 0
Right [] -> return $ B.doc mempty
Left e -> throwError $ PandocParsecError s e
-{- from RFC 4180
-
- The ABNF grammar [2] appears as follows:
-
- file = [header CRLF] record *(CRLF record) [CRLF]
-
- header = name *(COMMA name)
-
- record = field *(COMMA field)
-
- name = field
-
- field = (escaped / non-escaped)
-
- escaped = DQUOTE *(TEXTDATA / COMMA / CR / LF / 2DQUOTE) DQUOTE
-
- non-escaped = *TEXTDATA
-
- COMMA = %x2C
-
- CR = %x0D ;as per section 6.1 of RFC 2234 [2]
-
- DQUOTE = %x22 ;as per section 6.1 of RFC 2234 [2]
-
- LF = %x0A ;as per section 6.1 of RFC 2234 [2]
-
- CRLF = CR LF ;as per section 6.1 of RFC 2234 [2]
-
- TEXTDATA = %x20-21 / %x23-2B / %x2D-7E
--}
-
-pCSV :: Parser [[Text]]
-pCSV = many pRecord
-
-pRecord :: Parser [Text]
-pRecord = do
- x <- pField
- xs <- many $ pComma >> pField
- () <$ newline <|> (guard (not (T.null x) || not (null xs)) >> eof)
- return (x:xs)
-
-pField :: Parser Text
-pField = pEscaped <|> pUnescaped
-
-pComma :: Parser Char
-pComma = char ','
-
-pUnescaped :: Parser Text
-pUnescaped = T.strip . T.pack <$> many (noneOf "\n\r\",")
-
-pEscaped :: Parser Text
-pEscaped = do
- char '"'
- t <- T.pack <$> many (pDoubledQuote <|> noneOf "\"")
- char '"'
- return t
-
-pDoubledQuote :: Parser Char
-pDoubledQuote = try $ char '"' >> char '"'