From 7e1370aa874195761aae6e504c66f7111f9a2ccc Mon Sep 17 00:00:00 2001 From: fiddlosopher Date: Sun, 15 Jul 2007 23:53:22 +0000 Subject: Markdown reader: Added inlinesInBalanced parser combinator to unify treatment of embedded brackets in links and inline footnotes. Note that the solution adopted here causes one of John Gruber's markdown tests to fail: [with_underscore](/url/with_underscore) Here the whole phrase "underscore](/url/with" is treated as emphasized. The previous version of the markdown reader handled this the way Gruber's script handles it, but it ran into trouble on the following: [link with verbatim `]`](/url) where the inner ] was treated as the end of the reference link label. I don't see any good way to handle both cases in the framework of pandoc, so I choose to require an escape in the first example: [with\_underscore](/url/with_underscore) git-svn-id: https://pandoc.googlecode.com/svn/trunk@729 788f1e2b-df1e-0410-8736-df70ead52e1b --- src/Text/Pandoc/Readers/Markdown.hs | 53 +++++++++++++++++-------------------- 1 file changed, 24 insertions(+), 29 deletions(-) (limited to 'src/Text/Pandoc/Readers') diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index eb771efe6..572288661 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -128,6 +128,24 @@ failUnlessSmart = do state <- getState if stateSmart state then return () else fail "Smart typography feature" +-- | Parse a sequence of inline elements between a string +-- @opener@ and a string @closer@, including inlines +-- between balanced pairs of @opener@ and a @closer@. +inlinesInBalanced :: String -> String -> GenParser Char ParserState [Inline] +inlinesInBalanced opener closer = try $ do + let nonOpenerSymbol = try $ do -- succeeds if next inline would be Str opener + res <- inline -- fails if next inline merely begins with opener + if res == Str opener + then pzero + else return ' ' + try (string opener) + result <- manyTill ( (do notFollowedBy nonOpenerSymbol + bal <- inlinesInBalanced opener closer + return $ [Str opener] ++ bal ++ [Str closer]) + <|> (count 1 inline)) + (try (string closer)) + return $ concat result + -- -- document structure -- @@ -915,22 +933,9 @@ endline = try (do -- links -- -rawLabel = try $ do - char labelStart - -- allow for embedded brackets: - raw <- manyTill (do{res <- rawLabel; return ("[" ++ res ++ "]")} <|> - count 1 anyChar) (char labelEnd) - return $ concat raw - -- a reference label for a link -reference = try $ do - raw <- rawLabel - oldInput <- getInput - setInput raw - label <- many inline - setInput oldInput - return (normalizeSpaces label) - +reference = inlinesInBalanced [labelStart] [labelEnd] >>= (return . normalizeSpaces) + -- source for a link, with optional title source = try $ do char srcStart @@ -948,10 +953,9 @@ titleWith startChar endChar = try (do then fail "title must be separated by space and on same or next line" else return () char startChar - tit <- manyTill anyChar (try (do - char endChar - skipSpaces - notFollowedBy (noneOf ")\n"))) + tit <- manyTill anyChar (try (do char endChar + skipSpaces + notFollowedBy (noneOf ")\n"))) return $ decodeEntities tit) title = choice [ titleWith '(' ')', @@ -1011,19 +1015,10 @@ note = try $ do Nothing -> fail "note not found" Just contents -> return (Note contents) -inlinesInBrackets = try $ do - char '[' - results <- many $ count 1 (choice [link, referenceLink, image]) <|> - try (do{res <- inlinesInBrackets; return - ([Str "["] ++ res ++ [Str "]"])}) <|> - (do{notFollowedBy (char ']'); count 1 inline}) - char ']' - return $ concat results - inlineNote = try $ do failIfStrict char noteStart - contents <- inlinesInBrackets + contents <- inlinesInBalanced "[" "]" return (Note [Para contents]) rawLaTeXInline' = do -- cgit v1.2.3