From be2d7921cbf33a3aa839cd54a0b3ec0a7dfc4a9b Mon Sep 17 00:00:00 2001 From: danse Date: Mon, 23 Apr 2018 08:54:06 +0200 Subject: RST reader: remove support for nested inlines. RST does not allow nested emphasis, links, or other inline constructs. Closes #4581, double parsing of links with URLs as link text. This supersedes the earlier fix for #4581 in 6419819b46c0d69c7024ba8aa4a6381cb311341c. Fixes #4561, a bug parsing with URLs inside emphasis. Closes #4792. --- src/Text/Pandoc/Readers/RST.hs | 32 +++++++++++++++----------------- 1 file changed, 15 insertions(+), 17 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index 2a36ca1f1..f9752a83c 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -45,7 +45,6 @@ import Data.Maybe (fromMaybe, isJust) import Data.Sequence (ViewR (..), viewr) import Data.Text (Text) import qualified Data.Text as T -import Text.Pandoc.Walk (walk) import Text.Pandoc.Builder (Blocks, Inlines, fromList, setMeta, trimInlines) import qualified Text.Pandoc.Builder as B import Text.Pandoc.Class (PandocMonad, fetchItem, readFileFromDirs) @@ -1314,19 +1313,24 @@ table = gridTable False <|> simpleTable False <|> inline :: PandocMonad m => RSTParser m Inlines inline = choice [ note -- can start with whitespace, so try before ws - , whitespace , link - , str , endline , strong , emph , code , subst , interpretedRole - , smart - , hyphens - , escapedChar - , symbol ] "inline" + , inlineContent ] "inline" + +-- strings, spaces and other characters that can appear either by +-- themselves or within inline markup +inlineContent :: PandocMonad m => RSTParser m Inlines +inlineContent = choice [ whitespace + , str + , smart + , hyphens + , escapedChar + , symbol ] "inline content" parseInlineFromString :: PandocMonad m => String -> RSTParser m Inlines parseInlineFromString = parseFromString' (trimInlines . mconcat <$> many inline) @@ -1369,11 +1373,11 @@ atStart p = do emph :: PandocMonad m => RSTParser m Inlines emph = B.emph . trimInlines . mconcat <$> - enclosed (atStart $ char '*') (char '*') inline + enclosed (atStart $ char '*') (char '*') inlineContent strong :: PandocMonad m => RSTParser m Inlines strong = B.strong . trimInlines . mconcat <$> - enclosed (atStart $ string "**") (try $ string "**") inline + enclosed (atStart $ string "**") (try $ string "**") inlineContent -- Note, this doesn't precisely implement the complex rule in -- http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#inline-markup-recognition-rules @@ -1480,8 +1484,8 @@ explicitLink :: PandocMonad m => RSTParser m Inlines explicitLink = try $ do char '`' notFollowedBy (char '`') -- `` marks start of inline code - label' <- removeLinks . trimInlines . mconcat <$> - manyTill (notFollowedBy (char '`') >> inline) (char '<') + label' <- trimInlines . mconcat <$> + manyTill (notFollowedBy (char '`') >> inlineContent) (char '<') src <- trim <$> manyTill (noneOf ">\n") (char '>') skipSpaces string "`_" @@ -1495,12 +1499,6 @@ explicitLink = try $ do _ -> return ((src, ""), nullAttr) return $ B.linkWith attr (escapeURI src') tit label'' -removeLinks :: B.Inlines -> B.Inlines -removeLinks = B.fromList . walk (concatMap go) . B.toList - where go :: Inline -> [Inline] - go (Link _ lab _) = lab - go x = [x] - citationName :: PandocMonad m => RSTParser m String citationName = do raw <- citationMarker -- cgit v1.2.3