Merge branch 'master' into textcolor-support

author: schrieveslaach <schrieveslaach@online.de> 2017-06-12 15:52:29 +0200
committer: GitHub <noreply@github.com> 2017-06-12 15:52:29 +0200
commit: 635f299b441e238ccd34e3ad61c5e36f0ca30067 (patch)
tree: 11cfc34402975bad208f9a48d075fe2ace959e70 /src/Text/Pandoc/Readers/RST.hs
parent: 181c56d4003aa83abed23b95a452c4890aa3797c (diff)
parent: 23f3c2d7b4796d1af742a74999ce67924bf2abb3 (diff)
download: pandoc-635f299b441e238ccd34e3ad61c5e36f0ca30067.tar.gz
1 files changed, 111 insertions, 61 deletions
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index 7564998ff..fb5f6f2d4 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings   #-}
 {-# LANGUAGE ScopedTypeVariables #-}
 {-
-Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2006-2017 John MacFarlane <jgm@berkeley.edu>
 
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@@ -21,7 +21,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
 {- |
    Module      : Text.Pandoc.Readers.RST
-   Copyright   : Copyright (C) 2006-2015 John MacFarlane
+   Copyright   : Copyright (C) 2006-2017 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
@@ -32,10 +32,11 @@ Conversion from reStructuredText to 'Pandoc' document.
 -}
 module Text.Pandoc.Readers.RST ( readRST ) where
 import Control.Monad (guard, liftM, mzero, when)
+import Control.Monad.Identity (Identity(..))
 import Control.Monad.Except (throwError)
 import Data.Char (isHexDigit, isSpace, toLower, toUpper)
-import Data.List (deleteFirstsBy, findIndex, intercalate, isInfixOf, isSuffixOf,
-                  nub, sort, transpose, union)
+import Data.List (deleteFirstsBy, findIndex, intercalate, isInfixOf,
+                  isSuffixOf, nub, sort, transpose, union)
 import qualified Data.Map as M
 import Data.Maybe (fromMaybe, isJust)
 import Data.Monoid ((<>))
@@ -52,20 +53,22 @@ import Text.Pandoc.Options
 import Text.Pandoc.Parsing
 import Text.Pandoc.Shared
 import Text.Printf (printf)
+import Data.Text (Text)
+import qualified Data.Text as T
 
 -- TODO:
 -- [ ] .. parsed-literal
 -- [ ] :widths: attribute in .. table
 -- [ ] .. csv-table
--- [ ] .. list-table
 
 -- | Parse reStructuredText string and return Pandoc document.
 readRST :: PandocMonad m
         => ReaderOptions -- ^ Reader options
-        -> String        -- ^ String to parse (assuming @'\n'@ line endings)
+        -> Text          -- ^ String to parse (assuming @'\n'@ line endings)
         -> m Pandoc
 readRST opts s = do
-  parsed <- (readWithM parseRST) def{ stateOptions = opts } (s ++ "\n\n")
+  parsed <- (readWithM parseRST) def{ stateOptions = opts }
+               (T.unpack s ++ "\n\n")
   case parsed of
     Right result -> return result
     Left e       -> throwError e
@@ -131,7 +134,10 @@ metaFromDefList ds meta = adjustAuthors $ foldr f meta ds
        adjustAuthors (Meta metamap) = Meta $ M.adjust splitAuthors "author"
                                            $ M.adjust toPlain "date"
                                            $ M.adjust toPlain "title"
-                                           $ M.mapKeys (\k -> if k == "authors" then "author" else k)
+                                           $ M.mapKeys (\k ->
+                                                 if k == "authors"
+                                                    then "author"
+                                                    else k)
                                            $ metamap
        toPlain (MetaBlocks [Para xs]) = MetaInlines xs
        toPlain x                      = x
@@ -193,7 +199,7 @@ parseRST = do
 parseCitation :: PandocMonad m
               => (String, String) -> RSTParser m (Inlines, [Blocks])
 parseCitation (ref, raw) = do
-  contents <- parseFromString parseBlocks raw
+  contents <- parseFromString' parseBlocks raw
   return $ (B.spanWith (ref, ["citation-label"], []) (B.str ref),
            [contents])
 
@@ -243,7 +249,7 @@ fieldListItem :: PandocMonad m => Int -> RSTParser m (Inlines, [Blocks])
 fieldListItem minIndent = try $ do
   (name, raw) <- rawFieldListItem minIndent
   term <- parseInlineFromString name
-  contents <- parseFromString parseBlocks raw
+  contents <- parseFromString' parseBlocks raw
   optional blanklines
   return (term, [contents])
 
@@ -442,7 +448,7 @@ blockQuote :: PandocMonad m => RSTParser m Blocks
 blockQuote = do
   raw <- indentedBlock
   -- parse the extracted block, which may contain various block elements:
-  contents <- parseFromString parseBlocks $ raw ++ "\n\n"
+  contents <- parseFromString' parseBlocks $ raw ++ "\n\n"
   return $ B.blockQuote contents
 
 {-
@@ -530,7 +536,7 @@ definitionListItem = try $ do
   term <- trimInlines . mconcat <$> many1Till inline endline
   raw <- indentedBlock
   -- parse the extracted block, which may contain various block elements:
-  contents <- parseFromString parseBlocks $ raw ++ "\n"
+  contents <- parseFromString' parseBlocks $ raw ++ "\n"
   return (term, [contents])
 
 definitionList :: PandocMonad m => RSTParser m Blocks
@@ -558,26 +564,16 @@ listLine :: Monad m => Int -> RSTParser m [Char]
 listLine markerLength = try $ do
   notFollowedBy blankline
   indentWith markerLength
-  line <- anyLine
-  return $ line ++ "\n"
-
--- indent by specified number of spaces (or equiv. tabs)
-indentWith :: Monad m => Int -> RSTParser m [Char]
-indentWith num = do
-  tabStop <- getOption readerTabStop
-  if (num < tabStop)
-     then count num  (char ' ')
-     else choice [ try (count num (char ' ')),
-                   (try (char '\t' >> count (num - tabStop) (char ' '))) ]
+  anyLineNewline
 
 -- parse raw text for one list item, excluding start marker and continuations
 rawListItem :: Monad m => RSTParser m Int
             -> RSTParser m (Int, [Char])
 rawListItem start = try $ do
   markerLength <- start
-  firstLine <- anyLine
+  firstLine <- anyLineNewline
   restLines <- many (listLine markerLength)
-  return (markerLength, (firstLine ++ "\n" ++ (concat restLines)))
+  return (markerLength, firstLine ++ concat restLines)
 
 -- continuation of a list item - indented and separated by blankline or
 -- (in compact lists) endline.
@@ -602,13 +598,17 @@ listItem start = try $ do
   let oldContext = stateParserContext state
   setState $ state {stateParserContext = ListItemState}
   -- parse the extracted block, which may itself contain block elements
-  parsed <- parseFromString parseBlocks $ concat (first:rest) ++ "\n"
+  parsed <- parseFromString' parseBlocks $ concat (first:rest) ++ "\n"
   updateState (\st -> st {stateParserContext = oldContext})
   return $ case B.toList parsed of
-                [Para xs] -> B.singleton $ Plain xs
-                [Para xs, BulletList ys] -> B.fromList [Plain xs, BulletList ys]
-                [Para xs, OrderedList s ys] -> B.fromList [Plain xs, OrderedList s ys]
-                [Para xs, DefinitionList ys] -> B.fromList [Plain xs, DefinitionList ys]
+                [Para xs] ->
+                   B.singleton $ Plain xs
+                [Para xs, BulletList ys] ->
+                   B.fromList [Plain xs, BulletList ys]
+                [Para xs, OrderedList s ys] ->
+                   B.fromList [Plain xs, OrderedList s ys]
+                [Para xs, DefinitionList ys] ->
+                   B.fromList [Plain xs, DefinitionList ys]
                 _         -> parsed
 
 orderedList :: PandocMonad m => RSTParser m Blocks
@@ -685,22 +685,23 @@ directive' = do
                           (lengthToDim . filter (not . isSpace))
   case label of
         "table" -> tableDirective top fields body'
+        "list-table" -> listTableDirective top fields body'
         "line-block" -> lineBlockDirective body'
         "raw" -> return $ B.rawBlock (trim top) (stripTrailingNewlines body)
         "role" -> addNewRole top $ map (\(k,v) -> (k, trim v)) fields
-        "container" -> parseFromString parseBlocks body'
+        "container" -> parseFromString' parseBlocks body'
         "replace" -> B.para <$>  -- consumed by substKey
                    parseInlineFromString (trim top)
         "unicode" -> B.para <$>  -- consumed by substKey
                    parseInlineFromString (trim $ unicodeTransform top)
-        "compound" -> parseFromString parseBlocks body'
-        "pull-quote" -> B.blockQuote <$> parseFromString parseBlocks body'
-        "epigraph" -> B.blockQuote <$> parseFromString parseBlocks body'
-        "highlights" -> B.blockQuote <$> parseFromString parseBlocks body'
+        "compound" -> parseFromString' parseBlocks body'
+        "pull-quote" -> B.blockQuote <$> parseFromString' parseBlocks body'
+        "epigraph" -> B.blockQuote <$> parseFromString' parseBlocks body'
+        "highlights" -> B.blockQuote <$> parseFromString' parseBlocks body'
         "rubric" -> B.para . B.strong <$> parseInlineFromString top
         _ | label `elem` ["attention","caution","danger","error","hint",
                           "important","note","tip","warning","admonition"] ->
-           do bod <- parseFromString parseBlocks $ top ++ "\n\n" ++ body'
+           do bod <- parseFromString' parseBlocks $ top ++ "\n\n" ++ body'
               let lab = case label of
                              "admonition" -> mempty
                              (l:ls) -> B.divWith ("",["admonition-title"],[])
@@ -713,11 +714,11 @@ directive' = do
                           (trim top ++ if null subtit
                                           then ""
                                           else (":  " ++ subtit))
-              bod <- parseFromString parseBlocks body'
+              bod <- parseFromString' parseBlocks body'
               return $ B.divWith ("",["sidebar"],[]) $ tit <> bod
         "topic" ->
            do tit <- B.para . B.strong <$> parseInlineFromString top
-              bod <- parseFromString parseBlocks body'
+              bod <- parseFromString' parseBlocks body'
               return $ B.divWith ("",["topic"],[]) $ tit <> bod
         "default-role" -> mempty <$ updateState (\s ->
                               s { stateRstDefaultRole =
@@ -733,9 +734,10 @@ directive' = do
         "math" -> return $ B.para $ mconcat $ map B.displayMath
                          $ toChunks $ top ++ "\n\n" ++ body
         "figure" -> do
-           (caption, legend) <- parseFromString extractCaption body'
+           (caption, legend) <- parseFromString' extractCaption body'
            let src = escapeURI $ trim top
-           return $ B.para (B.imageWith (imgAttr "figclass") src "fig:" caption) <> legend
+           return $ B.para (B.imageWith (imgAttr "figclass") src "fig:"
+                       caption) <> legend
         "image" -> do
            let src = escapeURI $ trim top
            let alt = B.str $ maybe "image" trim $ lookup "alt" fields
@@ -746,38 +748,74 @@ directive' = do
                                      $ B.imageWith attr src "" alt
                           Nothing -> B.imageWith attr src "" alt
         "class" -> do
-            let attrs = ("", (splitBy isSpace $ trim top), map (\(k,v) -> (k, trimr v)) fields)
+            let attrs = ("", (splitBy isSpace $ trim top),
+                              map (\(k,v) -> (k, trimr v)) fields)
             --  directive content or the first immediately following element
             children <- case body of
                 "" -> block
-                _  -> parseFromString parseBlocks  body'
+                _  -> parseFromString' parseBlocks  body'
             return $ B.divWith attrs children
         other     -> do
             pos <- getPosition
             logMessage $ SkippedContent (".. " ++ other) pos
-            bod <- parseFromString parseBlocks $ top ++ "\n\n" ++ body'
+            bod <- parseFromString' parseBlocks $ top ++ "\n\n" ++ body'
             return $ B.divWith ("",[other],[]) bod
 
 tableDirective :: PandocMonad m
                => String -> [(String, String)] -> String -> RSTParser m Blocks
 tableDirective top _fields body = do
-  bs <- parseFromString parseBlocks body
+  bs <- parseFromString' parseBlocks body
   case B.toList bs of
        [Table _ aligns' widths' header' rows'] -> do
-         title <- parseFromString (trimInlines . mconcat <$> many inline) top
+         title <- parseFromString' (trimInlines . mconcat <$> many inline) top
          -- TODO widths
          -- align is not applicable since we can't represent whole table align
          return $ B.singleton $ Table (B.toList title)
                                   aligns' widths' header' rows'
        _ -> return mempty
 
+
+-- TODO: :stub-columns:.
+-- Only the first row becomes the header even if header-rows: > 1,
+-- since Pandoc doesn't support a table with multiple header rows.
+-- We don't need to parse :align: as it represents the whole table align.
+listTableDirective :: PandocMonad m
+                   => String -> [(String, String)] -> String
+                   -> RSTParser m Blocks
+listTableDirective top fields body = do
+  bs <- parseFromString' parseBlocks body
+  title <- parseFromString' (trimInlines . mconcat <$> many inline) top
+  let rows = takeRows $ B.toList bs
+      headerRowsNum = fromMaybe (0 :: Int) $
+         lookup "header-rows" fields >>= safeRead
+      (headerRow,bodyRows,numOfCols) = case rows of
+        x:xs -> if headerRowsNum > 0
+                   then (x, xs, length x)
+                   else ([], rows, length x)
+        _ -> ([],[],0)
+      widths = case trim <$> lookup "widths" fields of
+        Just "auto" -> replicate numOfCols 0
+        Just specs -> normWidths $ map (fromMaybe (0 :: Double) . safeRead) $
+                           splitBy (`elem` (" ," :: String)) specs
+        _ -> replicate numOfCols 0
+  return $ B.table title
+             (zip (replicate numOfCols AlignDefault) widths)
+             headerRow
+             bodyRows
+    where takeRows [BulletList rows] = map takeCells rows
+          takeRows _ = []
+          takeCells [BulletList cells] = map B.fromList cells
+          takeCells _ = []
+          normWidths ws = map (/ max 1 (sum ws)) ws
+
 -- TODO:
 --  - Only supports :format: fields with a single format for :raw: roles,
 --    change Text.Pandoc.Definition.Format to fix
-addNewRole :: PandocMonad m => String -> [(String, String)] -> RSTParser m Blocks
+addNewRole :: PandocMonad m
+           => String -> [(String, String)] -> RSTParser m Blocks
 addNewRole roleString fields = do
     pos <- getPosition
-    (role, parentRole) <- parseFromString inheritedRole roleString
+    (role, parentRole) <- parseFromString' inheritedRole roleString
     customRoles <- stateRstCustomRoles <$> getState
     let getBaseRole (r, f, a) roles =
             case M.lookup r roles of
@@ -804,7 +842,8 @@ addNewRole roleString fields = do
             SkippedContent ":format: [because parent of role is not :raw:]" pos
         _ -> logMessage $ SkippedContent (":" ++ key ++ ":") pos
     when (parentRole == "raw" && countKeys "format" > 1) $
-        logMessage $ SkippedContent ":format: [after first in definition of role]"
+        logMessage $ SkippedContent
+                  ":format: [after first in definition of role]"
                   pos
     when (parentRole == "code" && countKeys "language" > 1) $
         logMessage $ SkippedContent
@@ -819,7 +858,8 @@ addNewRole roleString fields = do
   where
     countKeys k = length . filter (== k) . map fst $ fields
     inheritedRole =
-        (,) <$> roleName <*> ((char '(' *> roleName <* char ')') <|> pure "span")
+        (,) <$> roleName <*> ((char '(' *> roleName <* char ')')
+                            <|> pure "span")
 
 
 -- Can contain character codes as decimal numbers or
@@ -996,7 +1036,8 @@ substKey = try $ do
              [Para ils] -> return $ B.fromList ils
              _          -> mzero
   let key = toKey $ stripFirstAndLast ref
-  updateState $ \s -> s{ stateSubstitutions = M.insert key il $ stateSubstitutions s }
+  updateState $ \s -> s{ stateSubstitutions =
+                          M.insert key il $ stateSubstitutions s }
 
 anonymousKey :: Monad m => RSTParser m ()
 anonymousKey = try $ do
@@ -1005,7 +1046,8 @@ anonymousKey = try $ do
   pos <- getPosition
   let key = toKey $ "_" ++ printf "%09d" (sourceLine pos)
   --TODO: parse width, height, class and name attributes
-  updateState $ \s -> s { stateKeys = M.insert key ((src,""), nullAttr) $ stateKeys s }
+  updateState $ \s -> s { stateKeys = M.insert key ((src,""), nullAttr) $
+                          stateKeys s }
 
 stripTicks :: String -> String
 stripTicks = reverse . stripTick . reverse . stripTick
@@ -1020,7 +1062,8 @@ regularKey = try $ do
   src <- targetURI
   let key = toKey $ stripTicks ref
   --TODO: parse width, height, class and name attributes
-  updateState $ \s -> s { stateKeys = M.insert key ((src,""), nullAttr) $ stateKeys s }
+  updateState $ \s -> s { stateKeys = M.insert key ((src,""), nullAttr) $
+                          stateKeys s }
 
 headerBlock :: PandocMonad m => RSTParser m [Char]
 headerBlock = do
@@ -1087,7 +1130,7 @@ simpleTableRow indices = do
   let cols = map unlines . transpose $ firstLine : conLines ++
                                  [replicate (length indices) ""
                                    | not (null conLines)]
-  mapM (parseFromString parseBlocks) cols
+  mapM (parseFromString' parseBlocks) cols
 
 simpleTableSplitLine :: [Int] -> String -> [String]
 simpleTableSplitLine indices line =
@@ -1110,7 +1153,7 @@ simpleTableHeader headless = try $ do
   let rawHeads = if headless
                     then replicate (length dashes) ""
                     else simpleTableSplitLine indices rawContent
-  heads <- mapM (parseFromString (mconcat <$> many plain)) $
+  heads <- mapM (parseFromString' (mconcat <$> many plain)) $
              map trim rawHeads
   return (heads, aligns, indices)
 
@@ -1119,8 +1162,12 @@ simpleTable :: PandocMonad m
             => Bool  -- ^ Headerless table
             -> RSTParser m Blocks
 simpleTable headless = do
-  tbl <- tableWith (simpleTableHeader headless) simpleTableRow
-              sep simpleTableFooter
+  let wrapIdFst (a, b, c) = (Identity a, b, c)
+      wrapId = fmap Identity
+  tbl <- runIdentity <$> tableWith
+           (wrapIdFst <$> simpleTableHeader headless)
+           (wrapId <$> simpleTableRow)
+           sep simpleTableFooter
   -- Simple tables get 0s for relative column widths (i.e., use default)
   case B.toList tbl of
        [Table c a _w h l]  -> return $ B.singleton $
@@ -1134,7 +1181,8 @@ simpleTable headless = do
 gridTable :: PandocMonad m
           => Bool -- ^ Headerless table
           -> RSTParser m Blocks
-gridTable headerless = gridTableWith parseBlocks headerless
+gridTable headerless = runIdentity <$>
+  gridTableWith (Identity <$> parseBlocks) headerless
 
 table :: PandocMonad m => RSTParser m Blocks
 table = gridTable False <|> simpleTable False <|>
@@ -1161,7 +1209,7 @@ inline = choice [ note          -- can start with whitespace, so try before ws
                 , symbol ] <?> "inline"
 
 parseInlineFromString :: PandocMonad m => String -> RSTParser m Inlines
-parseInlineFromString = parseFromString (trimInlines . mconcat <$> many inline)
+parseInlineFromString = parseFromString' (trimInlines . mconcat <$> many inline)
 
 hyphens :: Monad m => RSTParser m Inlines
 hyphens = do
@@ -1220,7 +1268,8 @@ interpretedRole = try $ do
   (role, contents) <- roleBefore <|> roleAfter
   renderRole contents Nothing role nullAttr
 
-renderRole :: PandocMonad m => String -> Maybe String -> String -> Attr -> RSTParser m Inlines
+renderRole :: PandocMonad m
+           => String -> Maybe String -> String -> Attr -> RSTParser m Inlines
 renderRole contents fmt role attr = case role of
     "sup"  -> return $ B.superscript $ B.str contents
     "superscript" -> return $ B.superscript $ B.str contents
@@ -1353,7 +1402,8 @@ referenceLink = try $ do
                         (k:_) -> return k
   ((src,tit), attr) <- lookupKey [] key
   -- if anonymous link, remove key so it won't be used again
-  when (isAnonKey key) $ updateState $ \s -> s{ stateKeys = M.delete key keyTable }
+  when (isAnonKey key) $ updateState $ \s ->
+                          s{ stateKeys = M.delete key keyTable }
   return $ B.linkWith attr src tit label'
 
 -- We keep a list of oldkeys so we can detect lookup loops.
@@ -1423,7 +1473,7 @@ note = try $ do
       -- Note references inside other notes are allowed in reST, but
       -- not yet in this implementation.
       updateState $ \st -> st{ stateNotes = [] }
-      contents <- parseFromString parseBlocks raw
+      contents <- parseFromString' parseBlocks raw
       let newnotes = if (ref == "*" || ref == "#") -- auto-numbered
                         -- delete the note so the next auto-numbered note
                         -- doesn't get the same contents:
author	schrieveslaach <schrieveslaach@online.de>	2017-06-12 15:52:29 +0200
committer	GitHub <noreply@github.com>	2017-06-12 15:52:29 +0200
commit	635f299b441e238ccd34e3ad61c5e36f0ca30067 (patch)
tree	11cfc34402975bad208f9a48d075fe2ace959e70 /src/Text/Pandoc/Readers/RST.hs
parent	181c56d4003aa83abed23b95a452c4890aa3797c (diff)
parent	23f3c2d7b4796d1af742a74999ce67924bf2abb3 (diff)
download	pandoc-635f299b441e238ccd34e3ad61c5e36f0ca30067.tar.gz