RST reader: implement csv-table directive.

Most attributes are supported, including `:file:` and `:url:`. A (probably insufficient) test case has been added. Closes #3533.
author: John MacFarlane <jgm@berkeley.edu> 2017-08-10 12:04:08 -0700
committer: John MacFarlane <jgm@berkeley.edu> 2017-08-10 15:01:14 -0700
commit: dee4cbc8549d782d9c3f2e9072b2c141ea4f18ad (patch)
tree: acc5eb2c01d99c0cea75e2493ff51f09e7916351
parent: a5790dd30893cf7143eb64a46fb137caf131a624 (diff)
download: pandoc-dee4cbc8549d782d9c3f2e9072b2c141ea4f18ad.tar.gz
4 files changed, 120 insertions, 48 deletions
diff --git a/src/Text/Pandoc/CSV.hs b/src/Text/Pandoc/CSV.hs
index 15492ac52..db9226469 100644
--- a/src/Text/Pandoc/CSV.hs
+++ b/src/Text/Pandoc/CSV.hs
@@ -28,7 +28,7 @@ Simple CSV parser.
 -}
 
 module Text.Pandoc.CSV (
-  CSVOptions,
+  CSVOptions(..),
   defaultCSVOptions,
   parseCSV,
   ParseError
@@ -74,7 +74,8 @@ pCSVCell opts = pCSVQuotedCell opts <|> pCSVUnquotedCell opts
 pCSVQuotedCell :: CSVOptions -> Parser Text
 pCSVQuotedCell opts = do
   char (csvQuote opts)
-  res <- many (satisfy (\c -> c /= csvQuote opts) <|> escaped opts)
+  res <- many (satisfy (\c -> c /= csvQuote opts &&
+                              Just c /= csvEscape opts) <|> escaped opts)
   char (csvQuote opts)
   return $ T.pack res
 
@@ -86,7 +87,8 @@ escaped opts = do
 
 pCSVUnquotedCell :: CSVOptions -> Parser Text
 pCSVUnquotedCell opts = T.pack <$>
-  many (satisfy $ \c -> c /= csvDelim opts && c /= '\r' && c /= '\n')
+  many (satisfy (\c -> c /= csvDelim opts && c /= '\r' && c /= '\n'
+                  && c /= csvQuote opts))
 
 pCSVDelim :: CSVOptions -> Parser ()
 pCSVDelim opts = do
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index 6cf8dbae4..0f594fe1b 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -31,7 +31,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 Conversion from reStructuredText to 'Pandoc' document.
 -}
 module Text.Pandoc.Readers.RST ( readRST ) where
-import Control.Monad (guard, liftM, mzero, when, forM_)
+import Control.Monad (guard, liftM, mzero, when, forM_, mplus)
 import Control.Monad.Identity (Identity(..))
 import Control.Monad.Except (throwError)
 import Data.Char (isHexDigit, isSpace, toLower, toUpper)
@@ -44,7 +44,7 @@ import Data.Sequence (ViewR (..), viewr)
 import Text.Pandoc.Builder (fromList, setMeta)
 import Text.Pandoc.Builder (Blocks, Inlines, trimInlines)
 import qualified Text.Pandoc.Builder as B
-import Text.Pandoc.Class (PandocMonad, readFileFromDirs)
+import Text.Pandoc.Class (PandocMonad, readFileFromDirs, fetchItem)
 import Text.Pandoc.CSV (CSVOptions(..), defaultCSVOptions, parseCSV)
 import Text.Pandoc.Definition
 import Text.Pandoc.Error
@@ -53,15 +53,13 @@ import Text.Pandoc.Logging
 import Text.Pandoc.Options
 import Text.Pandoc.Parsing
 import Text.Pandoc.Shared
+import qualified Text.Pandoc.UTF8 as UTF8
 import Text.Printf (printf)
 import Data.Text (Text)
 import qualified Data.Text as T
 
-import Debug.Trace
-
 -- TODO:
 -- [ ] .. parsed-literal
--- [ ] .. csv-table
 
 -- | Parse reStructuredText string and return Pandoc document.
 readRST :: PandocMonad m
@@ -824,53 +822,66 @@ listTableDirective top fields body = do
           takeCells _ = []
           normWidths ws = map (/ max 1 (sum ws)) ws
 
-  -- TODO
-  -- [ ] delim:
-  -- [ ] quote:
-  -- [ ] keepspace:
-  -- [ ] escape:
-  -- [ ] widths:
-  -- [ ] header-rows:
-  -- [ ] header:
-  -- [ ] url:
-  -- [ ] file:
-  -- [ ] encoding:
 csvTableDirective :: PandocMonad m
                    => String -> [(String, String)] -> String
                    -> RSTParser m Blocks
 csvTableDirective top fields rawcsv = do
-  let res = parseCSV defaultCSVOptions (T.pack rawcsv)
+  let explicitHeader = trim <$> lookup "header" fields
+  let opts = defaultCSVOptions{
+                csvDelim = case trim <$> lookup "delim" fields of
+                                Just "tab" -> '\t'
+                                Just "space" -> ' '
+                                Just [c] -> c
+                                _ -> ','
+              , csvQuote = case trim <$> lookup "quote" fields of
+                                Just [c] -> c
+                                _ -> '"'
+              , csvEscape = case trim <$> lookup "escape" fields of
+                                Just [c] -> Just c
+                                _ -> Nothing
+              , csvKeepSpace = case trim <$> lookup "keepspace" fields of
+                                       Just "true" -> True
+                                       _ -> False
+              }
+  let headerRowsNum = fromMaybe (case explicitHeader of
+                                       Just _  -> 1 :: Int
+                                       Nothing -> 0 :: Int) $
+           lookup "header-rows" fields >>= safeRead
+  rawcsv' <- case trim <$>
+                    lookup "file" fields `mplus` lookup "url" fields of
+                  Just u  -> do
+                    (bs, _) <- fetchItem Nothing u
+                    return $ UTF8.toString bs
+                  Nothing -> return rawcsv
+  let res = parseCSV opts (T.pack $ case explicitHeader of
+                                         Just h -> h ++ "\n" ++ rawcsv'
+                                         Nothing -> rawcsv')
   case res of
        Left e  -> do
          throwError $ PandocParsecError "csv table" e
-       Right rows -> do
-         return $ B.rawBlock "rst" $ show rows
-{-
-  bs <- parseFromString' parseBlocks body
-  title <- parseFromString' (trimInlines . mconcat <$> many inline) top
-  let rows = takeRows $ B.toList bs
-      headerRowsNum = fromMaybe (0 :: Int) $
-         lookup "header-rows" fields >>= safeRead
-      (headerRow,bodyRows,numOfCols) = case rows of
-        x:xs -> if headerRowsNum > 0
-                   then (x, xs, length x)
-                   else ([], rows, length x)
-        _ -> ([],[],0)
-      widths = case trim <$> lookup "widths" fields of
-        Just "auto" -> replicate numOfCols 0
-        Just specs -> normWidths $ map (fromMaybe (0 :: Double) . safeRead) $
-                           splitBy (`elem` (" ," :: String)) specs
-        _ -> replicate numOfCols 0
-  return $ B.table title
-             (zip (replicate numOfCols AlignDefault) widths)
-             headerRow
-             bodyRows
-    where takeRows [BulletList rows] = map takeCells rows
-          takeRows _ = []
-          takeCells [BulletList cells] = map B.fromList cells
-          takeCells _ = []
-          normWidths ws = map (/ max 1 (sum ws)) ws
--}
+       Right rawrows -> do
+         let parseCell = parseFromString' (plain <|> return mempty) . T.unpack
+         let parseRow = mapM parseCell
+         rows <- mapM parseRow rawrows
+         let (headerRow,bodyRows,numOfCols) =
+              case rows of
+                   x:xs -> if headerRowsNum > 0
+                          then (x, xs, length x)
+                          else ([], rows, length x)
+                   _ -> ([],[],0)
+         title <- parseFromString' (trimInlines . mconcat <$> many inline) top
+         let normWidths ws = map (/ max 1 (sum ws)) ws
+         let widths =
+               case trim <$> lookup "widths" fields of
+                 Just "auto" -> replicate numOfCols 0
+                 Just specs -> normWidths
+                               $ map (fromMaybe (0 :: Double) . safeRead)
+                               $ splitBy (`elem` (" ," :: String)) specs
+                 _ -> replicate numOfCols 0
+         return $ B.table title
+                  (zip (replicate numOfCols AlignDefault) widths)
+                  headerRow
+                  bodyRows
 
 -- TODO:
 --  - Only supports :format: fields with a single format for :raw: roles,
diff --git a/test/command/3533-rst-csv-tables.csv b/test/command/3533-rst-csv-tables.csv
new file mode 100644
index 000000000..efef5e4d5
--- /dev/null
+++ b/test/command/3533-rst-csv-tables.csv
@@ -0,0 +1,4 @@
+"Albatross", 2.99, "On a stick!"
+"Crunchy Frog", 1.49, "If we took the bones out, it wouldn't be
+crunchy, now would it?"
+
diff --git a/test/command/3533-rst-csv-tables.md b/test/command/3533-rst-csv-tables.md
new file mode 100644
index 000000000..0e6ed4fea
--- /dev/null
+++ b/test/command/3533-rst-csv-tables.md
@@ -0,0 +1,55 @@
+```
+% pandoc -f rst -t native
+.. csv-table:: Test
+   :widths: 10, 5, 10
+   :header: Flavor,Price,Slogan
+   :file: command/3533-rst-csv-tables.csv
+^D
+[Table [Str "Test"] [AlignDefault,AlignDefault,AlignDefault] [0.4,0.2,0.4]
+ [[Plain [Str "Flavor"]]
+ ,[Plain [Str "Price"]]
+ ,[Plain [Str "Slogan"]]]
+ [[[Plain [Str "Albatross"]]
+  ,[Plain [Str "2.99"]]
+  ,[Plain [Str "On",Space,Str "a",Space,Str "stick!"]]]
+ ,[[Plain [Str "Crunchy",Space,Str "Frog"]]
+  ,[Plain [Str "1.49"]]
+  ,[Plain [Str "If",Space,Str "we",Space,Str "took",Space,Str "the",Space,Str "bones",Space,Str "out,",Space,Str "it",Space,Str "wouldn't",Space,Str "be",SoftBreak,Str "crunchy,",Space,Str "now",Space,Str "would",Space,Str "it?"]]]]]
+```
+
+```
+% pandoc -f rst -t native
+.. csv-table:: Test
+   :header-rows: 1
+   :quote: '
+   :delim: space
+
+   '' 'a' 'b'
+   'cat''s' 3 4
+   'dog''s' 2 3
+^D
+[Table [Str "Test"] [AlignDefault,AlignDefault,AlignDefault] [0.0,0.0,0.0]
+ [[]
+ ,[Plain [Str "a"]]
+ ,[Plain [Str "b"]]]
+ [[[Plain [Str "cat's"]]
+  ,[Plain [Str "3"]]
+  ,[Plain [Str "4"]]]
+ ,[[Plain [Str "dog's"]]
+  ,[Plain [Str "2"]]
+  ,[Plain [Str "3"]]]]]
+```
+
+```
+% pandoc -f rst -t native
+.. csv-table:: Test
+   :escape: \
+
+   "1","\""
+^D
+[Table [Str "Test"] [AlignDefault,AlignDefault] [0.0,0.0]
+ []
+ [[[Plain [Str "1"]]
+  ,[Plain [Str "\""]]]]]
+```
+
author	John MacFarlane <jgm@berkeley.edu>	2017-08-10 12:04:08 -0700
committer	John MacFarlane <jgm@berkeley.edu>	2017-08-10 15:01:14 -0700
commit	dee4cbc8549d782d9c3f2e9072b2c141ea4f18ad (patch)
tree	acc5eb2c01d99c0cea75e2493ff51f09e7916351
parent	a5790dd30893cf7143eb64a46fb137caf131a624 (diff)
download	pandoc-dee4cbc8549d782d9c3f2e9072b2c141ea4f18ad.tar.gz