aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2013-01-16 11:25:17 -0800
committerJohn MacFarlane <jgm@berkeley.edu>2013-01-16 11:25:17 -0800
commit49820d5b51cb1072d311814b999627c8fec9aa43 (patch)
treef8a498e05d0a931bb3fc252e41077900204b4740
parent517c4b5f314635748b641bf4fcdcba4fb85656d3 (diff)
downloadpandoc-49820d5b51cb1072d311814b999627c8fec9aa43.tar.gz
Implemented Ext_link_attributes in markdown reader.
Also simplified source URL and link title parsers.
-rw-r--r--src/Text/Pandoc/Options.hs2
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs57
2 files changed, 28 insertions, 31 deletions
diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs
index 8ebbef0be..f67debf97 100644
--- a/src/Text/Pandoc/Options.hs
+++ b/src/Text/Pandoc/Options.hs
@@ -77,6 +77,7 @@ data Extension =
| Ext_markdown_attribute -- ^ Interpret text inside HTML as markdown
-- iff container has attribute 'markdown'
| Ext_escaped_line_breaks -- ^ Treat a backslash at EOL as linebreak
+ | Ext_link_attributes -- ^ MMD style reference link attributes
| Ext_autolink_bare_uris -- ^ Make all absolute URIs into links
| Ext_fancy_lists -- ^ Enable fancy list numbers and delimiters
| Ext_startnum -- ^ Make start number of ordered list significant
@@ -170,6 +171,7 @@ multimarkdownExtensions = Set.fromList
[ Ext_pipe_tables
, Ext_raw_html
, Ext_markdown_attribute
+ , Ext_link_attributes
, Ext_raw_tex
, Ext_tex_math_double_backslash
, Ext_intraword_underscores
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 05f6b084e..1f57d1918 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -144,7 +144,7 @@ atMostSpaces n = (char ' ' >> atMostSpaces (n-1)) <|> return ()
litChar :: MarkdownParser Char
litChar = escapedChar'
<|> noneOf "\n"
- <|> (newline >> notFollowedBy blankline >> return ' ')
+ <|> try (newline >> notFollowedBy blankline >> return ' ')
-- | Parse a sequence of inline elements between square brackets,
-- including inlines between balanced pairs of square brackets.
@@ -265,6 +265,9 @@ referenceKey = try $ do
manyTill (escapedChar' <|> litChar) (char '>')
src <- try betweenAngles <|> sourceURL
tit <- option "" referenceTitle
+ -- currently we just ignore MMD-style link/image attributes
+ _kvs <- option [] $ guardEnabled Ext_link_attributes
+ >> many (spnl >> keyValAttr)
blanklines
let target = (escapeURI $ trimr src, tit)
st <- getState
@@ -279,11 +282,18 @@ referenceKey = try $ do
referenceTitle :: MarkdownParser String
referenceTitle = try $ do
skipSpaces >> optional newline >> skipSpaces
- tit <- (charsInBalanced '(' ')' litChar >>= return . unwords . words)
- <|> do delim <- char '\'' <|> char '"'
- manyTill litChar (try (char delim >> skipSpaces >>
- notFollowedBy (noneOf ")\n")))
- return $ fromEntities tit
+ let parenTit = charsInBalanced '(' ')' litChar
+ fromEntities <$> (quotedTitle '"' <|> quotedTitle '\'' <|> parenTit)
+
+-- A link title in quotes
+quotedTitle :: Char -> MarkdownParser String
+quotedTitle c = try $ do
+ char c
+ notFollowedBy spaces
+ let pEnder = try $ char c >> notFollowedBy (satisfy isAlphaNum)
+ let regChunk = many1 (noneOf ['\\','\n',c]) <|> count 1 litChar
+ let nestedChunk = (\x -> [c] ++ x ++ [c]) <$> quotedTitle c
+ unwords . words . concat <$> manyTill (nestedChunk <|> regChunk) pEnder
-- | PHP Markdown Extra style abbreviation key. Currently
-- we just skip them, since Pandoc doesn't have an element for
@@ -1266,7 +1276,7 @@ escapedChar' :: MarkdownParser Char
escapedChar' = try $ do
char '\\'
(guardEnabled Ext_all_symbols_escapable >> satisfy (not . isAlphaNum))
- <|> oneOf "\\`*_{}[]()>#+-.!~"
+ <|> oneOf "\\`*_{}[]()>#+-.!~\""
escapedChar :: MarkdownParser (F Inlines)
escapedChar = do
@@ -1481,37 +1491,22 @@ reference = do notFollowedBy' (string "[^") -- footnote reference
-- source for a link, with optional title
source :: MarkdownParser (String, String)
-source =
- (try $ charsInBalanced '(' ')' litChar >>= parseFromString source') <|>
- -- the following is needed for cases like: [ref](/url(a).
- (enclosed (char '(') (char ')') litChar >>= parseFromString source')
-
--- auxiliary function for source
-source' :: MarkdownParser (String, String)
-source' = do
+source = do
+ char '('
skipSpaces
- let nl = char '\n' >>~ notFollowedBy blankline
- let sourceURL = liftM unwords $ many $ try $ do
- notFollowedBy' linkTitle
- skipMany spaceChar
- optional nl
- skipMany spaceChar
- many1 $ escapedChar' <|> satisfy (not . isBlank)
+ let urlChunk = try $ notFollowedBy (oneOf "\"')") >>
+ (charsInBalanced '(' ')' litChar <|> count 1 litChar)
+ let sourceURL = (unwords . words . concat) <$> many urlChunk
let betweenAngles = try $
- char '<' >> manyTill (escapedChar' <|> noneOf ">\n" <|> nl) (char '>')
+ char '<' >> manyTill litChar (char '>')
src <- try betweenAngles <|> sourceURL
- tit <- option "" linkTitle
+ tit <- option "" $ try $ spnl >> linkTitle
skipSpaces
- eof
+ char ')'
return (escapeURI $ trimr src, tit)
linkTitle :: MarkdownParser String
-linkTitle = try $ do
- (many1 spaceChar >> option '\n' newline) <|> newline
- skipSpaces
- delim <- oneOf "'\""
- tit <- manyTill litChar (try (char delim >> skipSpaces >> eof))
- return $ fromEntities tit
+linkTitle = fromEntities <$> (quotedTitle '"' <|> quotedTitle '\'')
link :: MarkdownParser (F Inlines)
link = try $ do