From be2d7921cbf33a3aa839cd54a0b3ec0a7dfc4a9b Mon Sep 17 00:00:00 2001 From: danse Date: Mon, 23 Apr 2018 08:54:06 +0200 Subject: RST reader: remove support for nested inlines. RST does not allow nested emphasis, links, or other inline constructs. Closes #4581, double parsing of links with URLs as link text. This supersedes the earlier fix for #4581 in 6419819b46c0d69c7024ba8aa4a6381cb311341c. Fixes #4561, a bug parsing with URLs inside emphasis. Closes #4792. --- src/Text/Pandoc/Readers/RST.hs | 32 +++++++++++++++----------------- test/Tests/Readers/RST.hs | 11 +++++++++++ test/command/4581.md | 6 ------ 3 files changed, 26 insertions(+), 23 deletions(-) delete mode 100644 test/command/4581.md diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index 2a36ca1f1..f9752a83c 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -45,7 +45,6 @@ import Data.Maybe (fromMaybe, isJust) import Data.Sequence (ViewR (..), viewr) import Data.Text (Text) import qualified Data.Text as T -import Text.Pandoc.Walk (walk) import Text.Pandoc.Builder (Blocks, Inlines, fromList, setMeta, trimInlines) import qualified Text.Pandoc.Builder as B import Text.Pandoc.Class (PandocMonad, fetchItem, readFileFromDirs) @@ -1314,19 +1313,24 @@ table = gridTable False <|> simpleTable False <|> inline :: PandocMonad m => RSTParser m Inlines inline = choice [ note -- can start with whitespace, so try before ws - , whitespace , link - , str , endline , strong , emph , code , subst , interpretedRole - , smart - , hyphens - , escapedChar - , symbol ] "inline" + , inlineContent ] "inline" + +-- strings, spaces and other characters that can appear either by +-- themselves or within inline markup +inlineContent :: PandocMonad m => RSTParser m Inlines +inlineContent = choice [ whitespace + , str + , smart + , hyphens + , escapedChar + , symbol ] "inline content" parseInlineFromString :: PandocMonad m => String -> RSTParser m Inlines parseInlineFromString = parseFromString' (trimInlines . mconcat <$> many inline) @@ -1369,11 +1373,11 @@ atStart p = do emph :: PandocMonad m => RSTParser m Inlines emph = B.emph . trimInlines . mconcat <$> - enclosed (atStart $ char '*') (char '*') inline + enclosed (atStart $ char '*') (char '*') inlineContent strong :: PandocMonad m => RSTParser m Inlines strong = B.strong . trimInlines . mconcat <$> - enclosed (atStart $ string "**") (try $ string "**") inline + enclosed (atStart $ string "**") (try $ string "**") inlineContent -- Note, this doesn't precisely implement the complex rule in -- http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#inline-markup-recognition-rules @@ -1480,8 +1484,8 @@ explicitLink :: PandocMonad m => RSTParser m Inlines explicitLink = try $ do char '`' notFollowedBy (char '`') -- `` marks start of inline code - label' <- removeLinks . trimInlines . mconcat <$> - manyTill (notFollowedBy (char '`') >> inline) (char '<') + label' <- trimInlines . mconcat <$> + manyTill (notFollowedBy (char '`') >> inlineContent) (char '<') src <- trim <$> manyTill (noneOf ">\n") (char '>') skipSpaces string "`_" @@ -1495,12 +1499,6 @@ explicitLink = try $ do _ -> return ((src, ""), nullAttr) return $ B.linkWith attr (escapeURI src') tit label'' -removeLinks :: B.Inlines -> B.Inlines -removeLinks = B.fromList . walk (concatMap go) . B.toList - where go :: Inline -> [Inline] - go (Link _ lab _) = lab - go x = [x] - citationName :: PandocMonad m => RSTParser m String citationName = do raw <- citationMarker diff --git a/test/Tests/Readers/RST.hs b/test/Tests/Readers/RST.hs index 906ed4ff9..540c5d45a 100644 --- a/test/Tests/Readers/RST.hs +++ b/test/Tests/Readers/RST.hs @@ -188,4 +188,15 @@ tests = [ "line block with blank line" =: ] =?> para ("foo" <> note (para "bar")) ] + , testGroup "inlines" + [ "links can contain an URI without being parsed twice (#4581)" =: + "`http://loc `__" =?> + para (link "http://loc" "" "http://loc") + , "inline markup cannot be nested" =: + "**a*b*c**" =?> + para (strong "a*b*c") + , "bare URI parsing disabled inside emphasis (#4561)" =: + "*http://location*" =?> + para (emph (text "http://location")) + ] ] diff --git a/test/command/4581.md b/test/command/4581.md deleted file mode 100644 index b38ebaaf0..000000000 --- a/test/command/4581.md +++ /dev/null @@ -1,6 +0,0 @@ -``` -% pandoc -f rst -t native -`http://loc `__ -^D -[Para [Link ("",[],[]) [Str "http://loc"] ("http://loc","")]] -``` -- cgit v1.2.3