aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2018-08-05 09:15:06 -0700
committerJohn MacFarlane <jgm@berkeley.edu>2018-08-05 09:56:43 -0700
commit581a3514ca266e20e70f05d9ffe314515a0a7bb9 (patch)
tree58f8a9603c3470f9f529107e179d887c9c4690b5
parentf7dc3e7487d80847b376c132acc985ccb9833bc2 (diff)
downloadpandoc-581a3514ca266e20e70f05d9ffe314515a0a7bb9.tar.gz
RST reader: improve parsing of inline interpreted text roles.
* Use a Span with class "title-reference" for the default title-reference role. * Use B.text to split up contents into Spaces, SoftBreaks, and Strs for title-reference. * Use Code with class "interpreted-text" instead of Span and Str for unknown roles. (The RST writer has also been modified to round-trip this properly.) * Disallow blank lines in interpreted text. * Backslash-escape now works in interpreted text. * Backticks followed by alphanumerics no longer end interpreted text. Closes #4811.
-rw-r--r--src/Text/Pandoc/Readers/RST.hs40
-rw-r--r--src/Text/Pandoc/Writers/RST.hs9
-rw-r--r--test/Tests/Readers/RST.hs2
-rw-r--r--test/command/3407.md4
-rw-r--r--test/command/4811.md48
-rw-r--r--test/rst-reader.native2
6 files changed, 86 insertions, 19 deletions
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index f9752a83c..576c3b77c 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -37,7 +37,7 @@ import Control.Arrow (second)
import Control.Monad (forM_, guard, liftM, mplus, mzero, when)
import Control.Monad.Except (throwError)
import Control.Monad.Identity (Identity (..))
-import Data.Char (isHexDigit, isSpace, toLower, toUpper)
+import Data.Char (isHexDigit, isSpace, toLower, toUpper, isAlphaNum)
import Data.List (deleteFirstsBy, elemIndex, intercalate, isInfixOf, isSuffixOf,
nub, sort, transpose, union)
import qualified Data.Map as M
@@ -1385,7 +1385,6 @@ strong = B.strong . trimInlines . mconcat <$>
--
-- TODO:
-- - Classes are silently discarded in addNewRole
--- - Lacks sensible implementation for title-reference (which is the default)
-- - Allows direct use of the :raw: role, rST only allows inherited use.
interpretedRole :: PandocMonad m => RSTParser m Inlines
interpretedRole = try $ do
@@ -1395,12 +1394,12 @@ interpretedRole = try $ do
renderRole :: PandocMonad m
=> String -> Maybe String -> String -> Attr -> RSTParser m Inlines
renderRole contents fmt role attr = case role of
- "sup" -> return $ B.superscript $ B.str contents
- "superscript" -> return $ B.superscript $ B.str contents
- "sub" -> return $ B.subscript $ B.str contents
- "subscript" -> return $ B.subscript $ B.str contents
- "emphasis" -> return $ B.emph $ B.str contents
- "strong" -> return $ B.strong $ B.str contents
+ "sup" -> return $ B.superscript $ treatAsText contents
+ "superscript" -> return $ B.superscript $ treatAsText contents
+ "sub" -> return $ B.subscript $ treatAsText contents
+ "subscript" -> return $ B.subscript $ treatAsText contents
+ "emphasis" -> return $ B.emph $ treatAsText contents
+ "strong" -> return $ B.strong $ treatAsText contents
"rfc-reference" -> return $ rfcLink contents
"RFC" -> return $ rfcLink contents
"pep-reference" -> return $ pepLink contents
@@ -1411,7 +1410,7 @@ renderRole contents fmt role attr = case role of
"title" -> titleRef contents
"t" -> titleRef contents
"code" -> return $ B.codeWith (addClass "sourceCode" attr) contents
- "span" -> return $ B.spanWith attr $ B.str contents
+ "span" -> return $ B.spanWith attr $ treatAsText contents
"raw" -> return $ B.rawInline (fromMaybe "" fmt) contents
custom -> do
customRoles <- stateRstCustomRoles <$> getState
@@ -1419,14 +1418,20 @@ renderRole contents fmt role attr = case role of
Just (newRole, newFmt, newAttr) ->
renderRole contents newFmt newRole newAttr
Nothing -> -- undefined role
- return $ B.spanWith ("",[],[("role",role)]) (B.str contents)
+ return $ B.codeWith ("",["interpreted-text"],[("role",role)])
+ contents
where
- titleRef ref = return $ B.str ref -- FIXME: Not a sensible behaviour
+ titleRef ref = return $ B.spanWith ("",["title-ref"],[]) $ treatAsText ref
rfcLink rfcNo = B.link rfcUrl ("RFC " ++ rfcNo) $ B.str ("RFC " ++ rfcNo)
where rfcUrl = "http://www.faqs.org/rfcs/rfc" ++ rfcNo ++ ".html"
pepLink pepNo = B.link pepUrl ("PEP " ++ pepNo) $ B.str ("PEP " ++ pepNo)
where padNo = replicate (4 - length pepNo) '0' ++ pepNo
pepUrl = "http://www.python.org/dev/peps/pep-" ++ padNo ++ "/"
+ treatAsText = B.text . handleEscapes
+ handleEscapes [] = []
+ handleEscapes ('\\':' ':cs) = handleEscapes cs
+ handleEscapes ('\\':c:cs) = c : handleEscapes cs
+ handleEscapes (c:cs) = c : handleEscapes cs
addClass :: String -> Attr -> Attr
addClass c (ident, classes, keyValues) = (ident, classes `union` [c], keyValues)
@@ -1450,7 +1455,18 @@ roleAfter = try $ do
return (role,contents)
unmarkedInterpretedText :: PandocMonad m => RSTParser m [Char]
-unmarkedInterpretedText = enclosed (atStart $ char '`') (char '`') anyChar
+unmarkedInterpretedText = try $ do
+ atStart (char '`')
+ contents <- mconcat <$> (many1
+ ( many1 (noneOf "`\\\n")
+ <|> (char '\\' >> ((\c -> ['\\',c]) <$> noneOf "\n"))
+ <|> (string "\n" <* notFollowedBy blankline)
+ <|> try (string "`" <*
+ notFollowedBy (() <$ roleMarker) <*
+ lookAhead (satisfy isAlphaNum))
+ ))
+ char '`'
+ return contents
whitespace :: PandocMonad m => RSTParser m Inlines
whitespace = B.space <$ skipMany1 spaceChar <?> "whitespace"
diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs
index f355a8f5b..566bcbeef 100644
--- a/src/Text/Pandoc/Writers/RST.hs
+++ b/src/Text/Pandoc/Writers/RST.hs
@@ -574,15 +574,18 @@ inlineToRST (Quoted DoubleQuote lst) = do
else return $ "“" <> contents <> "”"
inlineToRST (Cite _ lst) =
writeInlines lst
+inlineToRST (Code (_,["interpreted-text"],[("role",role)]) str) = do
+ return $ ":" <> text role <> ":`" <> text str <> "`"
inlineToRST (Code _ str) = do
opts <- gets stOptions
-- we trim the string because the delimiters must adjoin a
-- non-space character; see #3496
-- we use :literal: when the code contains backticks, since
-- :literal: allows backslash-escapes; see #3974
- return $ if '`' `elem` str
- then ":literal:`" <> text (escapeString opts (trim str)) <> "`"
- else "``" <> text (trim str) <> "``"
+ return $
+ if '`' `elem` str
+ then ":literal:`" <> text (escapeString opts (trim str)) <> "`"
+ else "``" <> text (trim str) <> "``"
inlineToRST (Str str) = do
opts <- gets stOptions
return $ text $
diff --git a/test/Tests/Readers/RST.hs b/test/Tests/Readers/RST.hs
index 540c5d45a..8916eed6f 100644
--- a/test/Tests/Readers/RST.hs
+++ b/test/Tests/Readers/RST.hs
@@ -177,7 +177,7 @@ tests = [ "line block with blank line" =:
=: ".. role:: haskell(code)\n.. role:: lhs(haskell)\n\n:lhs:`text`"
=?> para (codeWith ("", ["lhs", "haskell", "sourceCode"], []) "text")
, "unknown role" =: ":unknown:`text`" =?>
- para (spanWith ("",[],[("role","unknown")]) (str "text"))
+ para (codeWith ("",["interpreted-text"],[("role","unknown")]) "text")
]
, testGroup "footnotes"
[ "remove space before note" =: T.unlines
diff --git a/test/command/3407.md b/test/command/3407.md
index 3160d1263..aec253ff5 100644
--- a/test/command/3407.md
+++ b/test/command/3407.md
@@ -1,6 +1,6 @@
```
% pandoc -f native -t rst
-[Para [Span ("",[],[("role","foo")]) [Str "text"]]]
+[Para [Code ("",["interpreted-text"],[("role","foo")]) "text"]]
^D
:foo:`text`
```
@@ -9,5 +9,5 @@
% pandoc -f rst -t native
:foo:`text`
^D
-[Para [Span ("",[],[("role","foo")]) [Str "text"]]]
+[Para [Code ("",["interpreted-text"],[("role","foo")]) "text"]]
```
diff --git a/test/command/4811.md b/test/command/4811.md
new file mode 100644
index 000000000..9c8bea7ce
--- /dev/null
+++ b/test/command/4811.md
@@ -0,0 +1,48 @@
+No blank lines in inline interpreted roles:
+
+```
+% pandoc -f rst -t native
+`no
+
+blank`:myrole:
+^D
+[Para [Str "`no"]
+,Para [Str "blank`:myrole:"]]
+```
+
+Backslash escape behaves properly in interpreted roles:
+
+```
+% pandoc -f rst -t native
+`hi\ there`:sup:
+
+`hi\ there`:code:
+^D
+[Para [Superscript [Str "hithere"]]
+,Para [Code ("",["sourceCode"],[]) "hi\\ there"]]
+```
+
+Backtick followed by alphanumeric doesn't end the span:
+```
+% pandoc -f rst -t native
+`hi`there`:myrole:
+^D
+[Para [Code ("",["interpreted-text"],[("role","myrole")]) "hi`there"]]
+```
+
+Newline is okay, as long as not blank:
+```
+% pandoc -f rst -t native
+`hi
+there`:myrole:
+^D
+[Para [Code ("",["interpreted-text"],[("role","myrole")]) "hi\nthere"]]
+```
+
+Use span for title-reference:
+```
+% pandoc -f rst -t native
+`default`
+^D
+[Para [Span ("",["title-ref"],[]) [Str "default"]]]
+```
diff --git a/test/rst-reader.native b/test/rst-reader.native
index b0e51bd3f..89dde7396 100644
--- a/test/rst-reader.native
+++ b/test/rst-reader.native
@@ -326,7 +326,7 @@ Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "John",Spa
,Para [Math DisplayMath "\\alpha = beta",Math DisplayMath "E = mc^2"]
,Para [Str "Some",Space,Superscript [Str "of"],Space,Str "these",Space,Superscript [Str "words"],Space,Str "are",Space,Str "in",Space,Superscript [Str "superscript"],Str "."]
,Para [Str "Reset",Space,Str "default-role",Space,Str "to",Space,Str "the",Space,Str "default",Space,Str "default."]
-,Para [Str "And",Space,Str "now",Space,Str "some-invalid-string-3231231",Space,Str "is",Space,Str "nonsense."]
+,Para [Str "And",Space,Str "now",Space,Span ("",["title-ref"],[]) [Str "some-invalid-string-3231231"],Space,Str "is",Space,Str "nonsense."]
,Para [Str "And",Space,Str "now",Space,Str "with",Space,RawInline (Format "html") "<b>inline</b> <span id=\"test\">HTML</span>",Str "."]
,Para [Str "And",Space,Str "some",Space,Str "inline",Space,Str "haskell",Space,Code ("",["haskell","sourceCode"],[]) "fmap id [1,2..10]",Str "."]
,Para [Str "Indirect",Space,Str "python",Space,Str "role",Space,Code ("",["py","python","indirect","sourceCode"],[]) "[x*x for x in [1,2,3,4,5]]",Str "."]