diff options
author | fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b> | 2007-07-15 23:53:22 +0000 |
---|---|---|
committer | fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b> | 2007-07-15 23:53:22 +0000 |
commit | 7e1370aa874195761aae6e504c66f7111f9a2ccc (patch) | |
tree | f32ed23c4579b850191ae1af01f7a841f02d60b0 /src/Text/Pandoc | |
parent | 8e71c4c388a086efb7ea971fa3ea96d0b4c975fd (diff) | |
download | pandoc-7e1370aa874195761aae6e504c66f7111f9a2ccc.tar.gz |
Markdown reader: Added inlinesInBalanced parser combinator to
unify treatment of embedded brackets in links and inline footnotes.
Note that the solution adopted here causes one of John Gruber's
markdown tests to fail:
[with_underscore](/url/with_underscore)
Here the whole phrase "underscore](/url/with" is treated as
emphasized. The previous version of the markdown reader handled
this the way Gruber's script handles it, but it ran into trouble
on the following:
[link with verbatim `]`](/url)
where the inner ] was treated as the end of the reference link
label. I don't see any good way to handle both cases in the framework
of pandoc, so I choose to require an escape in the first example:
[with\_underscore](/url/with_underscore)
git-svn-id: https://pandoc.googlecode.com/svn/trunk@729 788f1e2b-df1e-0410-8736-df70ead52e1b
Diffstat (limited to 'src/Text/Pandoc')
-rw-r--r-- | src/Text/Pandoc/Readers/Markdown.hs | 53 |
1 files changed, 24 insertions, 29 deletions
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index eb771efe6..572288661 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -128,6 +128,24 @@ failUnlessSmart = do state <- getState if stateSmart state then return () else fail "Smart typography feature" +-- | Parse a sequence of inline elements between a string +-- @opener@ and a string @closer@, including inlines +-- between balanced pairs of @opener@ and a @closer@. +inlinesInBalanced :: String -> String -> GenParser Char ParserState [Inline] +inlinesInBalanced opener closer = try $ do + let nonOpenerSymbol = try $ do -- succeeds if next inline would be Str opener + res <- inline -- fails if next inline merely begins with opener + if res == Str opener + then pzero + else return ' ' + try (string opener) + result <- manyTill ( (do notFollowedBy nonOpenerSymbol + bal <- inlinesInBalanced opener closer + return $ [Str opener] ++ bal ++ [Str closer]) + <|> (count 1 inline)) + (try (string closer)) + return $ concat result + -- -- document structure -- @@ -915,22 +933,9 @@ endline = try (do -- links -- -rawLabel = try $ do - char labelStart - -- allow for embedded brackets: - raw <- manyTill (do{res <- rawLabel; return ("[" ++ res ++ "]")} <|> - count 1 anyChar) (char labelEnd) - return $ concat raw - -- a reference label for a link -reference = try $ do - raw <- rawLabel - oldInput <- getInput - setInput raw - label <- many inline - setInput oldInput - return (normalizeSpaces label) - +reference = inlinesInBalanced [labelStart] [labelEnd] >>= (return . normalizeSpaces) + -- source for a link, with optional title source = try $ do char srcStart @@ -948,10 +953,9 @@ titleWith startChar endChar = try (do then fail "title must be separated by space and on same or next line" else return () char startChar - tit <- manyTill anyChar (try (do - char endChar - skipSpaces - notFollowedBy (noneOf ")\n"))) + tit <- manyTill anyChar (try (do char endChar + skipSpaces + notFollowedBy (noneOf ")\n"))) return $ decodeEntities tit) title = choice [ titleWith '(' ')', @@ -1011,19 +1015,10 @@ note = try $ do Nothing -> fail "note not found" Just contents -> return (Note contents) -inlinesInBrackets = try $ do - char '[' - results <- many $ count 1 (choice [link, referenceLink, image]) <|> - try (do{res <- inlinesInBrackets; return - ([Str "["] ++ res ++ [Str "]"])}) <|> - (do{notFollowedBy (char ']'); count 1 inline}) - char ']' - return $ concat results - inlineNote = try $ do failIfStrict char noteStart - contents <- inlinesInBrackets + contents <- inlinesInBalanced "[" "]" return (Note [Para contents]) rawLaTeXInline' = do |