diff options
Diffstat (limited to 'src/Text/Pandoc/Readers/RST.hs')
-rw-r--r-- | src/Text/Pandoc/Readers/RST.hs | 150 |
1 files changed, 98 insertions, 52 deletions
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index e5eccb116..678eecc52 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -1,6 +1,7 @@ {-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE FlexibleContexts #-} {- -Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +20,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.RST - Copyright : Copyright (C) 2006-2014 John MacFarlane + Copyright : Copyright (C) 2006-2015 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -29,32 +30,38 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion from reStructuredText to 'Pandoc' document. -} module Text.Pandoc.Readers.RST ( - readRST + readRST, + readRSTWithWarnings ) where import Text.Pandoc.Definition import Text.Pandoc.Builder (setMeta, fromList) import Text.Pandoc.Shared import Text.Pandoc.Parsing import Text.Pandoc.Options -import Control.Monad ( when, liftM, guard, mzero, mplus ) +import Control.Monad ( when, liftM, guard, mzero ) import Data.List ( findIndex, intersperse, intercalate, - transpose, sort, deleteFirstsBy, isSuffixOf ) + transpose, sort, deleteFirstsBy, isSuffixOf , nub, union) import Data.Maybe (fromMaybe) import qualified Data.Map as M import Text.Printf ( printf ) -import Control.Applicative ((<$>), (<$), (<*), (*>), (<*>)) +import Control.Applicative ((<$>), (<$), (<*), (*>), (<*>), pure) import Text.Pandoc.Builder (Inlines, Blocks, trimInlines, (<>)) import qualified Text.Pandoc.Builder as B import Data.Monoid (mconcat, mempty) import Data.Sequence (viewr, ViewR(..)) -import Data.Char (toLower, isHexDigit) +import Data.Char (toLower, isHexDigit, isSpace) + +import Text.Pandoc.Error -- | Parse reStructuredText string and return Pandoc document. readRST :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse (assuming @'\n'@ line endings) - -> Pandoc + -> Either PandocError Pandoc readRST opts s = (readWith parseRST) def{ stateOptions = opts } (s ++ "\n\n") +readRSTWithWarnings :: ReaderOptions -> String -> Either PandocError (Pandoc, [String]) +readRSTWithWarnings opts s = (readWithWarnings parseRST) def{ stateOptions = opts } (s ++ "\n\n") + type RSTParser = Parser [Char] ParserState -- @@ -202,7 +209,7 @@ rawFieldListItem minIndent = try $ do fieldListItem :: Int -> RSTParser (Inlines, [Blocks]) fieldListItem minIndent = try $ do (name, raw) <- rawFieldListItem minIndent - let term = B.str name + term <- parseInlineFromString name contents <- parseFromString parseBlocks raw optional blanklines return (term, [contents]) @@ -222,8 +229,7 @@ fieldList = try $ do lineBlock :: RSTParser Blocks lineBlock = try $ do lines' <- lineBlockLines - lines'' <- mapM (parseFromString - (trimInlines . mconcat <$> many inline)) lines' + lines'' <- mapM parseInlineFromString lines' return $ B.para (mconcat $ intersperse B.linebreak lines'') -- @@ -335,6 +341,13 @@ indentedBlock = try $ do optional blanklines return $ unlines lns +quotedBlock :: Parser [Char] st [Char] +quotedBlock = try $ do + quote <- lookAhead $ oneOf "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~" + lns <- many1 $ lookAhead (char quote) >> anyLine + optional blanklines + return $ unlines lns + codeBlockStart :: Parser [Char] st Char codeBlockStart = string "::" >> blankline >> blankline @@ -342,7 +355,8 @@ codeBlock :: Parser [Char] st Blocks codeBlock = try $ codeBlockStart >> codeBlockBody codeBlockBody :: Parser [Char] st Blocks -codeBlockBody = try $ B.codeBlock . stripTrailingNewlines <$> indentedBlock +codeBlockBody = try $ B.codeBlock . stripTrailingNewlines <$> + (indentedBlock <|> quotedBlock) lhsCodeBlock :: RSTParser Blocks lhsCodeBlock = try $ do @@ -513,7 +527,6 @@ directive = try $ do -- TODO: line-block, parsed-literal, table, csv-table, list-table -- date -- include --- class -- title directive' :: RSTParser Blocks directive' = do @@ -535,39 +548,33 @@ directive' = do "role" -> addNewRole top $ map (\(k,v) -> (k, trim v)) fields "container" -> parseFromString parseBlocks body' "replace" -> B.para <$> -- consumed by substKey - parseFromString (trimInlines . mconcat <$> many inline) - (trim top) + parseInlineFromString (trim top) "unicode" -> B.para <$> -- consumed by substKey - parseFromString (trimInlines . mconcat <$> many inline) - (trim $ unicodeTransform top) + parseInlineFromString (trim $ unicodeTransform top) "compound" -> parseFromString parseBlocks body' "pull-quote" -> B.blockQuote <$> parseFromString parseBlocks body' "epigraph" -> B.blockQuote <$> parseFromString parseBlocks body' "highlights" -> B.blockQuote <$> parseFromString parseBlocks body' - "rubric" -> B.para . B.strong <$> parseFromString - (trimInlines . mconcat <$> many inline) top + "rubric" -> B.para . B.strong <$> parseInlineFromString top _ | label `elem` ["attention","caution","danger","error","hint", "important","note","tip","warning"] -> do let tit = B.para $ B.strong $ B.str label bod <- parseFromString parseBlocks $ top ++ "\n\n" ++ body' return $ B.blockQuote $ tit <> bod "admonition" -> - do tit <- B.para . B.strong <$> parseFromString - (trimInlines . mconcat <$> many inline) top + do tit <- B.para . B.strong <$> parseInlineFromString top bod <- parseFromString parseBlocks body' return $ B.blockQuote $ tit <> bod "sidebar" -> do let subtit = maybe "" trim $ lookup "subtitle" fields - tit <- B.para . B.strong <$> parseFromString - (trimInlines . mconcat <$> many inline) + tit <- B.para . B.strong <$> parseInlineFromString (trim top ++ if null subtit then "" else (": " ++ subtit)) bod <- parseFromString parseBlocks body' return $ B.blockQuote $ tit <> bod "topic" -> - do tit <- B.para . B.strong <$> parseFromString - (trimInlines . mconcat <$> many inline) top + do tit <- B.para . B.strong <$> parseInlineFromString top bod <- parseFromString parseBlocks body' return $ tit <> bod "default-role" -> mempty <$ updateState (\s -> @@ -594,38 +601,69 @@ directive' = do Just t -> B.link (escapeURI $ trim t) "" $ B.image src "" alt Nothing -> B.image src "" alt - _ -> return mempty + "class" -> do + let attrs = ("", (splitBy isSpace $ trim top), map (\(k,v) -> (k, trimr v)) fields) + -- directive content or the first immediately following element + children <- case body of + "" -> block + _ -> parseFromString parseBlocks body' + return $ B.divWith attrs children + other -> do + pos <- getPosition + addWarning (Just pos) $ "ignoring unknown directive: " ++ other + return mempty -- TODO: -- - Silently ignores illegal fields --- - Silently drops classes -- - Only supports :format: fields with a single format for :raw: roles, -- change Text.Pandoc.Definition.Format to fix addNewRole :: String -> [(String, String)] -> RSTParser Blocks addNewRole roleString fields = do (role, parentRole) <- parseFromString inheritedRole roleString customRoles <- stateRstCustomRoles <$> getState - baseRole <- case M.lookup parentRole customRoles of - Just (base, _, _) -> return base - Nothing -> return parentRole - - let fmt = if baseRole == "raw" then lookup "format" fields else Nothing - annotate = maybe id addLanguage $ - if baseRole == "code" + let (baseRole, baseFmt, baseAttr) = + maybe (parentRole, Nothing, nullAttr) id $ + M.lookup parentRole customRoles + fmt = if parentRole == "raw" then lookup "format" fields else baseFmt + annotate :: [String] -> [String] + annotate = maybe id (:) $ + if parentRole == "code" then lookup "language" fields else Nothing + attr = let (ident, classes, keyValues) = baseAttr + -- nub in case role name & language class are the same + in (ident, nub . (role :) . annotate $ classes, keyValues) + + -- warn about syntax we ignore + flip mapM_ fields $ \(key, _) -> case key of + "language" -> when (parentRole /= "code") $ addWarning Nothing $ + "ignoring :language: field because the parent of role :" ++ + role ++ ": is :" ++ parentRole ++ ": not :code:" + "format" -> when (parentRole /= "raw") $ addWarning Nothing $ + "ignoring :format: field because the parent of role :" ++ + role ++ ": is :" ++ parentRole ++ ": not :raw:" + _ -> addWarning Nothing $ "ignoring unknown field :" ++ key ++ + ": in definition of role :" ++ role ++ ": in" + when (parentRole == "raw" && countKeys "format" > 1) $ + addWarning Nothing $ + "ignoring :format: fields after the first in the definition of role :" + ++ role ++": in" + when (parentRole == "code" && countKeys "language" > 1) $ + addWarning Nothing $ + "ignoring :language: fields after the first in the definition of role :" + ++ role ++": in" updateState $ \s -> s { stateRstCustomRoles = - M.insert role (baseRole, fmt, (,) parentRole . annotate) customRoles + M.insert role (baseRole, fmt, attr) customRoles } return $ B.singleton Null where - addLanguage lang (ident, classes, keyValues) = - (ident, "sourceCode" : lang : classes, keyValues) + countKeys k = length . filter (== k) . map fst $ fields inheritedRole = - (,) <$> roleNameEndingIn (char '(') <*> roleNameEndingIn (char ')') + (,) <$> roleName <*> ((char '(' *> roleName <* char ')') <|> pure "span") + -- Can contain character codes as decimal numbers or -- hexadecimal numbers, prefixed by 0x, x, \x, U+, u, or \u @@ -666,7 +704,7 @@ extractCaption = do toChunks :: String -> [String] toChunks = dropWhile null . map (trim . unlines) - . splitBy (all (`elem` " \t")) . lines + . splitBy (all (`elem` (" \t" :: String))) . lines codeblock :: Maybe String -> String -> String -> RSTParser Blocks codeblock numberLines lang body = @@ -734,7 +772,7 @@ simpleReferenceName' :: Parser [Char] st String simpleReferenceName' = do x <- alphaNum xs <- many $ alphaNum - <|> (try $ oneOf "-_:+." >> lookAhead alphaNum) + <|> (try $ oneOf "-_:+." <* lookAhead alphaNum) return (x:xs) simpleReferenceName :: Parser [Char] st Inlines @@ -917,6 +955,9 @@ inline = choice [ whitespace , escapedChar , symbol ] <?> "inline" +parseInlineFromString :: String -> RSTParser Inlines +parseInlineFromString = parseFromString (trimInlines . mconcat <$> many inline) + hyphens :: RSTParser Inlines hyphens = do result <- many1 (char '-') @@ -985,21 +1026,23 @@ renderRole contents fmt role attr = case role of "RFC" -> return $ rfcLink contents "pep-reference" -> return $ pepLink contents "PEP" -> return $ pepLink contents - "literal" -> return $ B.str contents + "literal" -> return $ B.codeWith attr contents "math" -> return $ B.math contents "title-reference" -> titleRef contents "title" -> titleRef contents "t" -> titleRef contents - "code" -> return $ B.codeWith attr contents + "code" -> return $ B.codeWith (addClass "sourceCode" attr) contents + "span" -> return $ B.spanWith attr $ B.str contents "raw" -> return $ B.rawInline (fromMaybe "" fmt) contents custom -> do - customRole <- stateRstCustomRoles <$> getState - case M.lookup custom customRole of - Just (_, newFmt, inherit) -> let - fmtStr = fmt `mplus` newFmt - (newRole, newAttr) = inherit attr - in renderRole contents fmtStr newRole newAttr - Nothing -> return $ B.str contents -- Undefined role + customRoles <- stateRstCustomRoles <$> getState + case M.lookup custom customRoles of + Just (newRole, newFmt, newAttr) -> + renderRole contents newFmt newRole newAttr + Nothing -> do + pos <- getPosition + addWarning (Just pos) $ "ignoring unknown role :" ++ custom ++ ": in" + return $ B.str contents -- Undefined role where titleRef ref = return $ B.str ref -- FIXME: Not a sensible behaviour rfcLink rfcNo = B.link rfcUrl ("RFC " ++ rfcNo) $ B.str ("RFC " ++ rfcNo) @@ -1008,11 +1051,14 @@ renderRole contents fmt role attr = case role of where padNo = replicate (4 - length pepNo) '0' ++ pepNo pepUrl = "http://www.python.org/dev/peps/pep-" ++ padNo ++ "/" -roleNameEndingIn :: RSTParser Char -> RSTParser String -roleNameEndingIn end = many1Till (letter <|> char '-') end +addClass :: String -> Attr -> Attr +addClass c (ident, classes, keyValues) = (ident, union classes [c], keyValues) + +roleName :: RSTParser String +roleName = many1 (letter <|> char '-') roleMarker :: RSTParser String -roleMarker = char ':' *> roleNameEndingIn (char ':') +roleMarker = char ':' *> roleName <* char ':' roleBefore :: RSTParser (String,String) roleBefore = try $ do |