From 581a3514ca266e20e70f05d9ffe314515a0a7bb9 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 5 Aug 2018 09:15:06 -0700 Subject: RST reader: improve parsing of inline interpreted text roles. * Use a Span with class "title-reference" for the default title-reference role. * Use B.text to split up contents into Spaces, SoftBreaks, and Strs for title-reference. * Use Code with class "interpreted-text" instead of Span and Str for unknown roles. (The RST writer has also been modified to round-trip this properly.) * Disallow blank lines in interpreted text. * Backslash-escape now works in interpreted text. * Backticks followed by alphanumerics no longer end interpreted text. Closes #4811. --- src/Text/Pandoc/Readers/RST.hs | 40 ++++++++++++++++++++++++++++------------ src/Text/Pandoc/Writers/RST.hs | 9 ++++++--- 2 files changed, 34 insertions(+), 15 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index f9752a83c..576c3b77c 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -37,7 +37,7 @@ import Control.Arrow (second) import Control.Monad (forM_, guard, liftM, mplus, mzero, when) import Control.Monad.Except (throwError) import Control.Monad.Identity (Identity (..)) -import Data.Char (isHexDigit, isSpace, toLower, toUpper) +import Data.Char (isHexDigit, isSpace, toLower, toUpper, isAlphaNum) import Data.List (deleteFirstsBy, elemIndex, intercalate, isInfixOf, isSuffixOf, nub, sort, transpose, union) import qualified Data.Map as M @@ -1385,7 +1385,6 @@ strong = B.strong . trimInlines . mconcat <$> -- -- TODO: -- - Classes are silently discarded in addNewRole --- - Lacks sensible implementation for title-reference (which is the default) -- - Allows direct use of the :raw: role, rST only allows inherited use. interpretedRole :: PandocMonad m => RSTParser m Inlines interpretedRole = try $ do @@ -1395,12 +1394,12 @@ interpretedRole = try $ do renderRole :: PandocMonad m => String -> Maybe String -> String -> Attr -> RSTParser m Inlines renderRole contents fmt role attr = case role of - "sup" -> return $ B.superscript $ B.str contents - "superscript" -> return $ B.superscript $ B.str contents - "sub" -> return $ B.subscript $ B.str contents - "subscript" -> return $ B.subscript $ B.str contents - "emphasis" -> return $ B.emph $ B.str contents - "strong" -> return $ B.strong $ B.str contents + "sup" -> return $ B.superscript $ treatAsText contents + "superscript" -> return $ B.superscript $ treatAsText contents + "sub" -> return $ B.subscript $ treatAsText contents + "subscript" -> return $ B.subscript $ treatAsText contents + "emphasis" -> return $ B.emph $ treatAsText contents + "strong" -> return $ B.strong $ treatAsText contents "rfc-reference" -> return $ rfcLink contents "RFC" -> return $ rfcLink contents "pep-reference" -> return $ pepLink contents @@ -1411,7 +1410,7 @@ renderRole contents fmt role attr = case role of "title" -> titleRef contents "t" -> titleRef contents "code" -> return $ B.codeWith (addClass "sourceCode" attr) contents - "span" -> return $ B.spanWith attr $ B.str contents + "span" -> return $ B.spanWith attr $ treatAsText contents "raw" -> return $ B.rawInline (fromMaybe "" fmt) contents custom -> do customRoles <- stateRstCustomRoles <$> getState @@ -1419,14 +1418,20 @@ renderRole contents fmt role attr = case role of Just (newRole, newFmt, newAttr) -> renderRole contents newFmt newRole newAttr Nothing -> -- undefined role - return $ B.spanWith ("",[],[("role",role)]) (B.str contents) + return $ B.codeWith ("",["interpreted-text"],[("role",role)]) + contents where - titleRef ref = return $ B.str ref -- FIXME: Not a sensible behaviour + titleRef ref = return $ B.spanWith ("",["title-ref"],[]) $ treatAsText ref rfcLink rfcNo = B.link rfcUrl ("RFC " ++ rfcNo) $ B.str ("RFC " ++ rfcNo) where rfcUrl = "http://www.faqs.org/rfcs/rfc" ++ rfcNo ++ ".html" pepLink pepNo = B.link pepUrl ("PEP " ++ pepNo) $ B.str ("PEP " ++ pepNo) where padNo = replicate (4 - length pepNo) '0' ++ pepNo pepUrl = "http://www.python.org/dev/peps/pep-" ++ padNo ++ "/" + treatAsText = B.text . handleEscapes + handleEscapes [] = [] + handleEscapes ('\\':' ':cs) = handleEscapes cs + handleEscapes ('\\':c:cs) = c : handleEscapes cs + handleEscapes (c:cs) = c : handleEscapes cs addClass :: String -> Attr -> Attr addClass c (ident, classes, keyValues) = (ident, classes `union` [c], keyValues) @@ -1450,7 +1455,18 @@ roleAfter = try $ do return (role,contents) unmarkedInterpretedText :: PandocMonad m => RSTParser m [Char] -unmarkedInterpretedText = enclosed (atStart $ char '`') (char '`') anyChar +unmarkedInterpretedText = try $ do + atStart (char '`') + contents <- mconcat <$> (many1 + ( many1 (noneOf "`\\\n") + <|> (char '\\' >> ((\c -> ['\\',c]) <$> noneOf "\n")) + <|> (string "\n" <* notFollowedBy blankline) + <|> try (string "`" <* + notFollowedBy (() <$ roleMarker) <* + lookAhead (satisfy isAlphaNum)) + )) + char '`' + return contents whitespace :: PandocMonad m => RSTParser m Inlines whitespace = B.space <$ skipMany1 spaceChar "whitespace" diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs index f355a8f5b..566bcbeef 100644 --- a/src/Text/Pandoc/Writers/RST.hs +++ b/src/Text/Pandoc/Writers/RST.hs @@ -574,15 +574,18 @@ inlineToRST (Quoted DoubleQuote lst) = do else return $ "“" <> contents <> "”" inlineToRST (Cite _ lst) = writeInlines lst +inlineToRST (Code (_,["interpreted-text"],[("role",role)]) str) = do + return $ ":" <> text role <> ":`" <> text str <> "`" inlineToRST (Code _ str) = do opts <- gets stOptions -- we trim the string because the delimiters must adjoin a -- non-space character; see #3496 -- we use :literal: when the code contains backticks, since -- :literal: allows backslash-escapes; see #3974 - return $ if '`' `elem` str - then ":literal:`" <> text (escapeString opts (trim str)) <> "`" - else "``" <> text (trim str) <> "``" + return $ + if '`' `elem` str + then ":literal:`" <> text (escapeString opts (trim str)) <> "`" + else "``" <> text (trim str) <> "``" inlineToRST (Str str) = do opts <- gets stOptions return $ text $ -- cgit v1.2.3