aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers/RST.hs
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc/Readers/RST.hs')
-rw-r--r--src/Text/Pandoc/Readers/RST.hs150
1 files changed, 98 insertions, 52 deletions
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index e5eccb116..678eecc52 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -1,6 +1,7 @@
{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE FlexibleContexts #-}
{-
-Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -19,7 +20,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Readers.RST
- Copyright : Copyright (C) 2006-2014 John MacFarlane
+ Copyright : Copyright (C) 2006-2015 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
@@ -29,32 +30,38 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Conversion from reStructuredText to 'Pandoc' document.
-}
module Text.Pandoc.Readers.RST (
- readRST
+ readRST,
+ readRSTWithWarnings
) where
import Text.Pandoc.Definition
import Text.Pandoc.Builder (setMeta, fromList)
import Text.Pandoc.Shared
import Text.Pandoc.Parsing
import Text.Pandoc.Options
-import Control.Monad ( when, liftM, guard, mzero, mplus )
+import Control.Monad ( when, liftM, guard, mzero )
import Data.List ( findIndex, intersperse, intercalate,
- transpose, sort, deleteFirstsBy, isSuffixOf )
+ transpose, sort, deleteFirstsBy, isSuffixOf , nub, union)
import Data.Maybe (fromMaybe)
import qualified Data.Map as M
import Text.Printf ( printf )
-import Control.Applicative ((<$>), (<$), (<*), (*>), (<*>))
+import Control.Applicative ((<$>), (<$), (<*), (*>), (<*>), pure)
import Text.Pandoc.Builder (Inlines, Blocks, trimInlines, (<>))
import qualified Text.Pandoc.Builder as B
import Data.Monoid (mconcat, mempty)
import Data.Sequence (viewr, ViewR(..))
-import Data.Char (toLower, isHexDigit)
+import Data.Char (toLower, isHexDigit, isSpace)
+
+import Text.Pandoc.Error
-- | Parse reStructuredText string and return Pandoc document.
readRST :: ReaderOptions -- ^ Reader options
-> String -- ^ String to parse (assuming @'\n'@ line endings)
- -> Pandoc
+ -> Either PandocError Pandoc
readRST opts s = (readWith parseRST) def{ stateOptions = opts } (s ++ "\n\n")
+readRSTWithWarnings :: ReaderOptions -> String -> Either PandocError (Pandoc, [String])
+readRSTWithWarnings opts s = (readWithWarnings parseRST) def{ stateOptions = opts } (s ++ "\n\n")
+
type RSTParser = Parser [Char] ParserState
--
@@ -202,7 +209,7 @@ rawFieldListItem minIndent = try $ do
fieldListItem :: Int -> RSTParser (Inlines, [Blocks])
fieldListItem minIndent = try $ do
(name, raw) <- rawFieldListItem minIndent
- let term = B.str name
+ term <- parseInlineFromString name
contents <- parseFromString parseBlocks raw
optional blanklines
return (term, [contents])
@@ -222,8 +229,7 @@ fieldList = try $ do
lineBlock :: RSTParser Blocks
lineBlock = try $ do
lines' <- lineBlockLines
- lines'' <- mapM (parseFromString
- (trimInlines . mconcat <$> many inline)) lines'
+ lines'' <- mapM parseInlineFromString lines'
return $ B.para (mconcat $ intersperse B.linebreak lines'')
--
@@ -335,6 +341,13 @@ indentedBlock = try $ do
optional blanklines
return $ unlines lns
+quotedBlock :: Parser [Char] st [Char]
+quotedBlock = try $ do
+ quote <- lookAhead $ oneOf "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
+ lns <- many1 $ lookAhead (char quote) >> anyLine
+ optional blanklines
+ return $ unlines lns
+
codeBlockStart :: Parser [Char] st Char
codeBlockStart = string "::" >> blankline >> blankline
@@ -342,7 +355,8 @@ codeBlock :: Parser [Char] st Blocks
codeBlock = try $ codeBlockStart >> codeBlockBody
codeBlockBody :: Parser [Char] st Blocks
-codeBlockBody = try $ B.codeBlock . stripTrailingNewlines <$> indentedBlock
+codeBlockBody = try $ B.codeBlock . stripTrailingNewlines <$>
+ (indentedBlock <|> quotedBlock)
lhsCodeBlock :: RSTParser Blocks
lhsCodeBlock = try $ do
@@ -513,7 +527,6 @@ directive = try $ do
-- TODO: line-block, parsed-literal, table, csv-table, list-table
-- date
-- include
--- class
-- title
directive' :: RSTParser Blocks
directive' = do
@@ -535,39 +548,33 @@ directive' = do
"role" -> addNewRole top $ map (\(k,v) -> (k, trim v)) fields
"container" -> parseFromString parseBlocks body'
"replace" -> B.para <$> -- consumed by substKey
- parseFromString (trimInlines . mconcat <$> many inline)
- (trim top)
+ parseInlineFromString (trim top)
"unicode" -> B.para <$> -- consumed by substKey
- parseFromString (trimInlines . mconcat <$> many inline)
- (trim $ unicodeTransform top)
+ parseInlineFromString (trim $ unicodeTransform top)
"compound" -> parseFromString parseBlocks body'
"pull-quote" -> B.blockQuote <$> parseFromString parseBlocks body'
"epigraph" -> B.blockQuote <$> parseFromString parseBlocks body'
"highlights" -> B.blockQuote <$> parseFromString parseBlocks body'
- "rubric" -> B.para . B.strong <$> parseFromString
- (trimInlines . mconcat <$> many inline) top
+ "rubric" -> B.para . B.strong <$> parseInlineFromString top
_ | label `elem` ["attention","caution","danger","error","hint",
"important","note","tip","warning"] ->
do let tit = B.para $ B.strong $ B.str label
bod <- parseFromString parseBlocks $ top ++ "\n\n" ++ body'
return $ B.blockQuote $ tit <> bod
"admonition" ->
- do tit <- B.para . B.strong <$> parseFromString
- (trimInlines . mconcat <$> many inline) top
+ do tit <- B.para . B.strong <$> parseInlineFromString top
bod <- parseFromString parseBlocks body'
return $ B.blockQuote $ tit <> bod
"sidebar" ->
do let subtit = maybe "" trim $ lookup "subtitle" fields
- tit <- B.para . B.strong <$> parseFromString
- (trimInlines . mconcat <$> many inline)
+ tit <- B.para . B.strong <$> parseInlineFromString
(trim top ++ if null subtit
then ""
else (": " ++ subtit))
bod <- parseFromString parseBlocks body'
return $ B.blockQuote $ tit <> bod
"topic" ->
- do tit <- B.para . B.strong <$> parseFromString
- (trimInlines . mconcat <$> many inline) top
+ do tit <- B.para . B.strong <$> parseInlineFromString top
bod <- parseFromString parseBlocks body'
return $ tit <> bod
"default-role" -> mempty <$ updateState (\s ->
@@ -594,38 +601,69 @@ directive' = do
Just t -> B.link (escapeURI $ trim t) ""
$ B.image src "" alt
Nothing -> B.image src "" alt
- _ -> return mempty
+ "class" -> do
+ let attrs = ("", (splitBy isSpace $ trim top), map (\(k,v) -> (k, trimr v)) fields)
+ -- directive content or the first immediately following element
+ children <- case body of
+ "" -> block
+ _ -> parseFromString parseBlocks body'
+ return $ B.divWith attrs children
+ other -> do
+ pos <- getPosition
+ addWarning (Just pos) $ "ignoring unknown directive: " ++ other
+ return mempty
-- TODO:
-- - Silently ignores illegal fields
--- - Silently drops classes
-- - Only supports :format: fields with a single format for :raw: roles,
-- change Text.Pandoc.Definition.Format to fix
addNewRole :: String -> [(String, String)] -> RSTParser Blocks
addNewRole roleString fields = do
(role, parentRole) <- parseFromString inheritedRole roleString
customRoles <- stateRstCustomRoles <$> getState
- baseRole <- case M.lookup parentRole customRoles of
- Just (base, _, _) -> return base
- Nothing -> return parentRole
-
- let fmt = if baseRole == "raw" then lookup "format" fields else Nothing
- annotate = maybe id addLanguage $
- if baseRole == "code"
+ let (baseRole, baseFmt, baseAttr) =
+ maybe (parentRole, Nothing, nullAttr) id $
+ M.lookup parentRole customRoles
+ fmt = if parentRole == "raw" then lookup "format" fields else baseFmt
+ annotate :: [String] -> [String]
+ annotate = maybe id (:) $
+ if parentRole == "code"
then lookup "language" fields
else Nothing
+ attr = let (ident, classes, keyValues) = baseAttr
+ -- nub in case role name & language class are the same
+ in (ident, nub . (role :) . annotate $ classes, keyValues)
+
+ -- warn about syntax we ignore
+ flip mapM_ fields $ \(key, _) -> case key of
+ "language" -> when (parentRole /= "code") $ addWarning Nothing $
+ "ignoring :language: field because the parent of role :" ++
+ role ++ ": is :" ++ parentRole ++ ": not :code:"
+ "format" -> when (parentRole /= "raw") $ addWarning Nothing $
+ "ignoring :format: field because the parent of role :" ++
+ role ++ ": is :" ++ parentRole ++ ": not :raw:"
+ _ -> addWarning Nothing $ "ignoring unknown field :" ++ key ++
+ ": in definition of role :" ++ role ++ ": in"
+ when (parentRole == "raw" && countKeys "format" > 1) $
+ addWarning Nothing $
+ "ignoring :format: fields after the first in the definition of role :"
+ ++ role ++": in"
+ when (parentRole == "code" && countKeys "language" > 1) $
+ addWarning Nothing $
+ "ignoring :language: fields after the first in the definition of role :"
+ ++ role ++": in"
updateState $ \s -> s {
stateRstCustomRoles =
- M.insert role (baseRole, fmt, (,) parentRole . annotate) customRoles
+ M.insert role (baseRole, fmt, attr) customRoles
}
return $ B.singleton Null
where
- addLanguage lang (ident, classes, keyValues) =
- (ident, "sourceCode" : lang : classes, keyValues)
+ countKeys k = length . filter (== k) . map fst $ fields
inheritedRole =
- (,) <$> roleNameEndingIn (char '(') <*> roleNameEndingIn (char ')')
+ (,) <$> roleName <*> ((char '(' *> roleName <* char ')') <|> pure "span")
+
-- Can contain character codes as decimal numbers or
-- hexadecimal numbers, prefixed by 0x, x, \x, U+, u, or \u
@@ -666,7 +704,7 @@ extractCaption = do
toChunks :: String -> [String]
toChunks = dropWhile null
. map (trim . unlines)
- . splitBy (all (`elem` " \t")) . lines
+ . splitBy (all (`elem` (" \t" :: String))) . lines
codeblock :: Maybe String -> String -> String -> RSTParser Blocks
codeblock numberLines lang body =
@@ -734,7 +772,7 @@ simpleReferenceName' :: Parser [Char] st String
simpleReferenceName' = do
x <- alphaNum
xs <- many $ alphaNum
- <|> (try $ oneOf "-_:+." >> lookAhead alphaNum)
+ <|> (try $ oneOf "-_:+." <* lookAhead alphaNum)
return (x:xs)
simpleReferenceName :: Parser [Char] st Inlines
@@ -917,6 +955,9 @@ inline = choice [ whitespace
, escapedChar
, symbol ] <?> "inline"
+parseInlineFromString :: String -> RSTParser Inlines
+parseInlineFromString = parseFromString (trimInlines . mconcat <$> many inline)
+
hyphens :: RSTParser Inlines
hyphens = do
result <- many1 (char '-')
@@ -985,21 +1026,23 @@ renderRole contents fmt role attr = case role of
"RFC" -> return $ rfcLink contents
"pep-reference" -> return $ pepLink contents
"PEP" -> return $ pepLink contents
- "literal" -> return $ B.str contents
+ "literal" -> return $ B.codeWith attr contents
"math" -> return $ B.math contents
"title-reference" -> titleRef contents
"title" -> titleRef contents
"t" -> titleRef contents
- "code" -> return $ B.codeWith attr contents
+ "code" -> return $ B.codeWith (addClass "sourceCode" attr) contents
+ "span" -> return $ B.spanWith attr $ B.str contents
"raw" -> return $ B.rawInline (fromMaybe "" fmt) contents
custom -> do
- customRole <- stateRstCustomRoles <$> getState
- case M.lookup custom customRole of
- Just (_, newFmt, inherit) -> let
- fmtStr = fmt `mplus` newFmt
- (newRole, newAttr) = inherit attr
- in renderRole contents fmtStr newRole newAttr
- Nothing -> return $ B.str contents -- Undefined role
+ customRoles <- stateRstCustomRoles <$> getState
+ case M.lookup custom customRoles of
+ Just (newRole, newFmt, newAttr) ->
+ renderRole contents newFmt newRole newAttr
+ Nothing -> do
+ pos <- getPosition
+ addWarning (Just pos) $ "ignoring unknown role :" ++ custom ++ ": in"
+ return $ B.str contents -- Undefined role
where
titleRef ref = return $ B.str ref -- FIXME: Not a sensible behaviour
rfcLink rfcNo = B.link rfcUrl ("RFC " ++ rfcNo) $ B.str ("RFC " ++ rfcNo)
@@ -1008,11 +1051,14 @@ renderRole contents fmt role attr = case role of
where padNo = replicate (4 - length pepNo) '0' ++ pepNo
pepUrl = "http://www.python.org/dev/peps/pep-" ++ padNo ++ "/"
-roleNameEndingIn :: RSTParser Char -> RSTParser String
-roleNameEndingIn end = many1Till (letter <|> char '-') end
+addClass :: String -> Attr -> Attr
+addClass c (ident, classes, keyValues) = (ident, union classes [c], keyValues)
+
+roleName :: RSTParser String
+roleName = many1 (letter <|> char '-')
roleMarker :: RSTParser String
-roleMarker = char ':' *> roleNameEndingIn (char ':')
+roleMarker = char ':' *> roleName <* char ':'
roleBefore :: RSTParser (String,String)
roleBefore = try $ do