aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers/Markdown.hs
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc/Readers/Markdown.hs')
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs123
1 files changed, 58 insertions, 65 deletions
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 33d1a9620..0ea7f9ac5 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -60,6 +60,8 @@ import System.FilePath (takeExtension, addExtension)
import Text.HTML.TagSoup
import Text.HTML.TagSoup.Match (tagOpen)
import qualified Data.Set as Set
+import Text.Printf (printf)
+import Debug.Trace (trace)
type MarkdownParser = Parser [Char] ParserState
@@ -215,10 +217,10 @@ pandocTitleBlock = try $ do
author' <- author
date' <- date
return $
- ( if B.isNull title' then id else B.setMeta "title" title'
- . if null author' then id else B.setMeta "author" author'
- . if B.isNull date' then id else B.setMeta "date" date' )
- nullMeta
+ (if B.isNull title' then id else B.setMeta "title" title')
+ . (if null author' then id else B.setMeta "author" author')
+ . (if B.isNull date' then id else B.setMeta "date" date')
+ $ nullMeta
updateState $ \st -> st{ stateMeta' = stateMeta' st <> meta' }
yamlMetaBlock :: MarkdownParser (F Blocks)
@@ -354,7 +356,7 @@ referenceKey = try $ do
tit <- option "" referenceTitle
-- currently we just ignore MMD-style link/image attributes
_kvs <- option [] $ guardEnabled Ext_link_attributes
- >> many (spnl >> keyValAttr)
+ >> many (try $ spnl >> keyValAttr)
blanklines
let target = (escapeURI $ trimr src, tit)
st <- getState
@@ -440,7 +442,10 @@ parseBlocks :: MarkdownParser (F Blocks)
parseBlocks = mconcat <$> manyTill block eof
block :: MarkdownParser (F Blocks)
-block = choice [ mempty <$ blanklines
+block = do
+ tr <- getOption readerTrace
+ pos <- getPosition
+ res <- choice [ mempty <$ blanklines
, codeBlockFenced
, yamlMetaBlock
, guardEnabled Ext_latex_macros *> (macro >>= return . return)
@@ -465,6 +470,11 @@ block = choice [ mempty <$ blanklines
, para
, plain
] <?> "block"
+ when tr $ do
+ st <- getState
+ trace (printf "line %d: %s" (sourceLine pos)
+ (take 60 $ show $ B.toList $ runF res st)) (return ())
+ return res
--
-- header blocks
@@ -730,9 +740,14 @@ listStart = bulletListStart <|> (anyOrderedListStart >> return ())
listLine :: MarkdownParser String
listLine = try $ do
notFollowedBy' (do indentSpaces
- many (spaceChar)
+ many spaceChar
listStart)
- chunks <- manyTill (liftM snd (htmlTag isCommentTag) <|> count 1 anyChar) newline
+ notFollowedBy' $ htmlTag (~== TagClose "div")
+ chunks <- manyTill
+ ( many1 (satisfy $ \c -> c /= '\n' && c /= '<')
+ <|> liftM snd (htmlTag isCommentTag)
+ <|> count 1 anyChar
+ ) newline
return $ concat chunks
-- parse raw text for one list item, excluding start marker and continuations
@@ -759,6 +774,7 @@ listContinuationLine :: MarkdownParser String
listContinuationLine = try $ do
notFollowedBy blankline
notFollowedBy' listStart
+ notFollowedBy' $ htmlTag (~== TagClose "div")
optional indentSpaces
result <- anyLine
return $ result ++ "\n"
@@ -783,8 +799,8 @@ listItem start = try $ do
orderedList :: MarkdownParser (F Blocks)
orderedList = try $ do
(start, style, delim) <- lookAhead anyOrderedListStart
- unless ((style == DefaultStyle || style == Decimal || style == Example) &&
- (delim == DefaultDelim || delim == Period)) $
+ unless (style `elem` [DefaultStyle, Decimal, Example] &&
+ delim `elem` [DefaultDelim, Period]) $
guardEnabled Ext_fancy_lists
when (style == Example) $ guardEnabled Ext_example_lists
items <- fmap sequence $ many1 $ listItem
@@ -896,7 +912,9 @@ plain = fmap B.plain . trimInlinesF . mconcat <$> many1 inline
--
htmlElement :: MarkdownParser String
-htmlElement = strictHtmlBlock <|> liftM snd (htmlTag isBlockTag)
+htmlElement = rawVerbatimBlock
+ <|> strictHtmlBlock
+ <|> liftM snd (htmlTag isBlockTag)
htmlBlock :: MarkdownParser (F Blocks)
htmlBlock = do
@@ -917,8 +935,8 @@ strictHtmlBlock = htmlInBalanced (not . isInlineTag)
rawVerbatimBlock :: MarkdownParser String
rawVerbatimBlock = try $ do
- (TagOpen tag _, open) <- htmlTag (tagOpen (\t ->
- t == "pre" || t == "style" || t == "script")
+ (TagOpen tag _, open) <- htmlTag (tagOpen (flip elem
+ ["pre", "style", "script"])
(const True))
contents <- manyTill anyChar (htmlTag (~== TagClose tag))
return $ open ++ contents ++ renderTags [TagClose tag]
@@ -936,6 +954,8 @@ rawHtmlBlocks = do
htmlBlocks <- many1 $ try $ do
s <- rawVerbatimBlock <|> try (
do (t,raw) <- htmlTag isBlockTag
+ guard $ t ~/= TagOpen "div" [] &&
+ t ~/= TagClose "div"
exts <- getOption readerExtensions
-- if open tag, need markdown="1" if
-- markdown_attributes extension is set
@@ -1118,12 +1138,12 @@ multilineTableHeader headless = try $ do
then liftM (map (:[]) . tail .
splitStringByIndices (init indices)) $ lookAhead anyLine
else return $ transpose $ map
- (\ln -> tail $ splitStringByIndices (init indices) ln)
+ (tail . splitStringByIndices (init indices))
rawContent
let aligns = zipWith alignType rawHeadsList lengths
let rawHeads = if headless
then replicate (length dashes) ""
- else map unwords rawHeadsList
+ else map (unlines . map trim) rawHeadsList
heads <- fmap sequence $
mapM (parseFromString (mconcat <$> many plain)) $
map trim rawHeads
@@ -1180,7 +1200,7 @@ gridTableHeader headless = try $ do
-- RST does not have a notion of alignments
let rawHeads = if headless
then replicate (length dashes) ""
- else map unwords $ transpose
+ else map (unlines . map trim) $ transpose
$ map (gridTableSplitLine indices) rawContent
heads <- fmap sequence $ mapM (parseFromString parseBlocks . trim) rawHeads
return (heads, aligns, indices)
@@ -1366,7 +1386,7 @@ ltSign :: MarkdownParser (F Inlines)
ltSign = do
guardDisabled Ext_raw_html
<|> guardDisabled Ext_markdown_in_html_blocks
- <|> (notFollowedBy' rawHtmlBlocks >> return ())
+ <|> (notFollowedBy' (htmlTag isBlockTag) >> return ())
char '<'
return $ return $ B.str "<"
@@ -1406,39 +1426,6 @@ math :: MarkdownParser (F Inlines)
math = (return . B.displayMath <$> (mathDisplay >>= applyMacros'))
<|> (return . B.math <$> (mathInline >>= applyMacros'))
-mathDisplay :: MarkdownParser String
-mathDisplay =
- (guardEnabled Ext_tex_math_dollars >> mathDisplayWith "$$" "$$")
- <|> (guardEnabled Ext_tex_math_single_backslash >>
- mathDisplayWith "\\[" "\\]")
- <|> (guardEnabled Ext_tex_math_double_backslash >>
- mathDisplayWith "\\\\[" "\\\\]")
-
-mathDisplayWith :: String -> String -> MarkdownParser String
-mathDisplayWith op cl = try $ do
- string op
- many1Till (noneOf "\n" <|> (newline >>~ notFollowedBy' blankline)) (try $ string cl)
-
-mathInline :: MarkdownParser String
-mathInline =
- (guardEnabled Ext_tex_math_dollars >> mathInlineWith "$" "$")
- <|> (guardEnabled Ext_tex_math_single_backslash >>
- mathInlineWith "\\(" "\\)")
- <|> (guardEnabled Ext_tex_math_double_backslash >>
- mathInlineWith "\\\\(" "\\\\)")
-
-mathInlineWith :: String -> String -> MarkdownParser String
-mathInlineWith op cl = try $ do
- string op
- notFollowedBy space
- words' <- many1Till (count 1 (noneOf "\n\\")
- <|> (char '\\' >> anyChar >>= \c -> return ['\\',c])
- <|> count 1 newline <* notFollowedBy' blankline
- *> return " ")
- (try $ string cl)
- notFollowedBy digit -- to prevent capture of $5
- return $ concat words'
-
-- Parses material enclosed in *s, **s, _s, or __s.
-- Designed to avoid backtracking.
enclosure :: Char
@@ -1565,16 +1552,14 @@ endline :: MarkdownParser (F Inlines)
endline = try $ do
newline
notFollowedBy blankline
+ -- parse potential list-starts differently if in a list:
+ st <- getState
+ when (stateParserContext st == ListItemState) $ notFollowedBy listStart
guardDisabled Ext_lists_without_preceding_blankline <|> notFollowedBy listStart
guardEnabled Ext_blank_before_blockquote <|> notFollowedBy emailBlockQuoteStart
guardEnabled Ext_blank_before_header <|> notFollowedBy (char '#') -- atx header
guardEnabled Ext_backtick_code_blocks >>
notFollowedBy (() <$ (lookAhead (char '`') >> codeBlockFenced))
- -- parse potential list-starts differently if in a list:
- st <- getState
- when (stateParserContext st == ListItemState) $ do
- notFollowedBy' bulletListStart
- notFollowedBy' anyOrderedListStart
(guardEnabled Ext_hard_line_breaks >> return (return B.linebreak))
<|> (guardEnabled Ext_ignore_line_breaks >> return mempty)
<|> (return $ return B.space)
@@ -1747,7 +1732,7 @@ spanHtml = try $ do
guardEnabled Ext_markdown_in_html_blocks
(TagOpen _ attrs, _) <- htmlTag (~== TagOpen "span" [])
contents <- mconcat <$> manyTill inline (htmlTag (~== TagClose "span"))
- let ident = maybe "" id $ lookup "id" attrs
+ let ident = fromMaybe "" $ lookup "id" attrs
let classes = maybe [] words $ lookup "class" attrs
let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
return $ B.spanWith (ident, classes, keyvals) <$> contents
@@ -1755,12 +1740,19 @@ spanHtml = try $ do
divHtml :: MarkdownParser (F Blocks)
divHtml = try $ do
guardEnabled Ext_markdown_in_html_blocks
- (TagOpen _ attrs, _) <- htmlTag (~== TagOpen "div" [])
- contents <- mconcat <$> manyTill block (htmlTag (~== TagClose "div"))
- let ident = maybe "" id $ lookup "id" attrs
- let classes = maybe [] words $ lookup "class" attrs
- let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
- return $ B.divWith (ident, classes, keyvals) <$> contents
+ (TagOpen _ attrs, rawtag) <- htmlTag (~== TagOpen "div" [])
+ bls <- option "" (blankline >> option "" blanklines)
+ contents <- mconcat <$>
+ many (notFollowedBy' (htmlTag (~== TagClose "div")) >> block)
+ closed <- option False (True <$ htmlTag (~== TagClose "div"))
+ if closed
+ then do
+ let ident = fromMaybe "" $ lookup "id" attrs
+ let classes = maybe [] words $ lookup "class" attrs
+ let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
+ return $ B.divWith (ident, classes, keyvals) <$> contents
+ else -- avoid backtracing
+ return $ return (B.rawBlock "html" (rawtag <> bls)) <> contents
rawHtmlInline :: MarkdownParser (F Inlines)
rawHtmlInline = do
@@ -1836,9 +1828,10 @@ citeKey = try $ do
guard $ lastStrPos /= Just pos
suppress_author <- option False (char '-' >> return True)
char '@'
- first <- letter
- let internal p = try $ p >>~ lookAhead (letter <|> digit)
- rest <- many $ letter <|> digit <|> internal (oneOf ":.#$%&-_+?<>~/")
+ first <- letter <|> char '_'
+ let regchar = satisfy (\c -> isAlphaNum c || c == '_')
+ let internal p = try $ p >>~ lookAhead regchar
+ rest <- many $ regchar <|> internal (oneOf ":.#$%&-+?<>~/")
let key = first:rest
return (suppress_author, key)