From 49820d5b51cb1072d311814b999627c8fec9aa43 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 16 Jan 2013 11:25:17 -0800 Subject: Implemented Ext_link_attributes in markdown reader. Also simplified source URL and link title parsers. --- src/Text/Pandoc/Options.hs | 2 ++ src/Text/Pandoc/Readers/Markdown.hs | 57 +++++++++++++++++-------------------- 2 files changed, 28 insertions(+), 31 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs index 8ebbef0be..f67debf97 100644 --- a/src/Text/Pandoc/Options.hs +++ b/src/Text/Pandoc/Options.hs @@ -77,6 +77,7 @@ data Extension = | Ext_markdown_attribute -- ^ Interpret text inside HTML as markdown -- iff container has attribute 'markdown' | Ext_escaped_line_breaks -- ^ Treat a backslash at EOL as linebreak + | Ext_link_attributes -- ^ MMD style reference link attributes | Ext_autolink_bare_uris -- ^ Make all absolute URIs into links | Ext_fancy_lists -- ^ Enable fancy list numbers and delimiters | Ext_startnum -- ^ Make start number of ordered list significant @@ -170,6 +171,7 @@ multimarkdownExtensions = Set.fromList [ Ext_pipe_tables , Ext_raw_html , Ext_markdown_attribute + , Ext_link_attributes , Ext_raw_tex , Ext_tex_math_double_backslash , Ext_intraword_underscores diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 05f6b084e..1f57d1918 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -144,7 +144,7 @@ atMostSpaces n = (char ' ' >> atMostSpaces (n-1)) <|> return () litChar :: MarkdownParser Char litChar = escapedChar' <|> noneOf "\n" - <|> (newline >> notFollowedBy blankline >> return ' ') + <|> try (newline >> notFollowedBy blankline >> return ' ') -- | Parse a sequence of inline elements between square brackets, -- including inlines between balanced pairs of square brackets. @@ -265,6 +265,9 @@ referenceKey = try $ do manyTill (escapedChar' <|> litChar) (char '>') src <- try betweenAngles <|> sourceURL tit <- option "" referenceTitle + -- currently we just ignore MMD-style link/image attributes + _kvs <- option [] $ guardEnabled Ext_link_attributes + >> many (spnl >> keyValAttr) blanklines let target = (escapeURI $ trimr src, tit) st <- getState @@ -279,11 +282,18 @@ referenceKey = try $ do referenceTitle :: MarkdownParser String referenceTitle = try $ do skipSpaces >> optional newline >> skipSpaces - tit <- (charsInBalanced '(' ')' litChar >>= return . unwords . words) - <|> do delim <- char '\'' <|> char '"' - manyTill litChar (try (char delim >> skipSpaces >> - notFollowedBy (noneOf ")\n"))) - return $ fromEntities tit + let parenTit = charsInBalanced '(' ')' litChar + fromEntities <$> (quotedTitle '"' <|> quotedTitle '\'' <|> parenTit) + +-- A link title in quotes +quotedTitle :: Char -> MarkdownParser String +quotedTitle c = try $ do + char c + notFollowedBy spaces + let pEnder = try $ char c >> notFollowedBy (satisfy isAlphaNum) + let regChunk = many1 (noneOf ['\\','\n',c]) <|> count 1 litChar + let nestedChunk = (\x -> [c] ++ x ++ [c]) <$> quotedTitle c + unwords . words . concat <$> manyTill (nestedChunk <|> regChunk) pEnder -- | PHP Markdown Extra style abbreviation key. Currently -- we just skip them, since Pandoc doesn't have an element for @@ -1266,7 +1276,7 @@ escapedChar' :: MarkdownParser Char escapedChar' = try $ do char '\\' (guardEnabled Ext_all_symbols_escapable >> satisfy (not . isAlphaNum)) - <|> oneOf "\\`*_{}[]()>#+-.!~" + <|> oneOf "\\`*_{}[]()>#+-.!~\"" escapedChar :: MarkdownParser (F Inlines) escapedChar = do @@ -1481,37 +1491,22 @@ reference = do notFollowedBy' (string "[^") -- footnote reference -- source for a link, with optional title source :: MarkdownParser (String, String) -source = - (try $ charsInBalanced '(' ')' litChar >>= parseFromString source') <|> - -- the following is needed for cases like: [ref](/url(a). - (enclosed (char '(') (char ')') litChar >>= parseFromString source') - --- auxiliary function for source -source' :: MarkdownParser (String, String) -source' = do +source = do + char '(' skipSpaces - let nl = char '\n' >>~ notFollowedBy blankline - let sourceURL = liftM unwords $ many $ try $ do - notFollowedBy' linkTitle - skipMany spaceChar - optional nl - skipMany spaceChar - many1 $ escapedChar' <|> satisfy (not . isBlank) + let urlChunk = try $ notFollowedBy (oneOf "\"')") >> + (charsInBalanced '(' ')' litChar <|> count 1 litChar) + let sourceURL = (unwords . words . concat) <$> many urlChunk let betweenAngles = try $ - char '<' >> manyTill (escapedChar' <|> noneOf ">\n" <|> nl) (char '>') + char '<' >> manyTill litChar (char '>') src <- try betweenAngles <|> sourceURL - tit <- option "" linkTitle + tit <- option "" $ try $ spnl >> linkTitle skipSpaces - eof + char ')' return (escapeURI $ trimr src, tit) linkTitle :: MarkdownParser String -linkTitle = try $ do - (many1 spaceChar >> option '\n' newline) <|> newline - skipSpaces - delim <- oneOf "'\"" - tit <- manyTill litChar (try (char delim >> skipSpaces >> eof)) - return $ fromEntities tit +linkTitle = fromEntities <$> (quotedTitle '"' <|> quotedTitle '\'') link :: MarkdownParser (F Inlines) link = try $ do -- cgit v1.2.3