aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers/Markdown.hs
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc/Readers/Markdown.hs')
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs224
1 files changed, 81 insertions, 143 deletions
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 5515c735b..e1c481311 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -2,7 +2,7 @@
{-# LANGUAGE ScopedTypeVariables #-}
{-
-Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2006-2017 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -21,7 +21,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Readers.Markdown
- Copyright : Copyright (C) 2006-2015 John MacFarlane
+ Copyright : Copyright (C) 2006-2017 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
@@ -55,11 +55,9 @@ import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Class (PandocMonad, report)
import Text.Pandoc.Definition
import Text.Pandoc.Emoji (emojis)
-import Text.Pandoc.Generic (bottomUp)
import Text.Pandoc.Logging
import Text.Pandoc.Options
import Text.Pandoc.Parsing hiding (tableWith)
-import Text.Pandoc.Pretty (charWidth)
import Text.Pandoc.Readers.HTML (htmlInBalanced, htmlTag, isBlockTag,
isCommentTag, isInlineTag, isTextTag)
import Text.Pandoc.Readers.LaTeX (rawLaTeXBlock, rawLaTeXInline)
@@ -72,10 +70,11 @@ type MarkdownParser m = ParserT [Char] ParserState m
-- | Read markdown from an input string and return a Pandoc document.
readMarkdown :: PandocMonad m
=> ReaderOptions -- ^ Reader options
- -> String -- ^ String to parse (assuming @'\n'@ line endings)
+ -> Text -- ^ String to parse (assuming @'\n'@ line endings)
-> m Pandoc
readMarkdown opts s = do
- parsed <- (readWithM parseMarkdown) def{ stateOptions = opts } (s ++ "\n\n")
+ parsed <- (readWithM parseMarkdown) def{ stateOptions = opts }
+ (T.unpack s ++ "\n\n")
case parsed of
Right result -> return result
Left e -> throwError e
@@ -155,9 +154,11 @@ litChar = escapedChar'
inlinesInBalancedBrackets :: PandocMonad m => MarkdownParser m (F Inlines)
inlinesInBalancedBrackets = do
char '['
+ pos <- getPosition
(_, raw) <- withRaw $ charsInBalancedBrackets 1
guard $ not $ null raw
- parseFromString (trimInlinesF . mconcat <$> many inline) (init raw)
+ parseFromString' (setPosition pos >>
+ trimInlinesF . mconcat <$> many inline) (init raw)
charsInBalancedBrackets :: PandocMonad m => Int -> MarkdownParser m ()
charsInBalancedBrackets 0 = return ()
@@ -189,7 +190,7 @@ rawTitleBlockLine = do
titleLine :: PandocMonad m => MarkdownParser m (F Inlines)
titleLine = try $ do
raw <- rawTitleBlockLine
- res <- parseFromString (many inline) raw
+ res <- parseFromString' (many inline) raw
return $ trimInlinesF $ mconcat res
authorsLine :: PandocMonad m => MarkdownParser m (F [Inlines])
@@ -200,12 +201,12 @@ authorsLine = try $ do
(trimInlinesF . mconcat <$> many
(try $ notFollowedBy sep >> inline))
sep
- sequence <$> parseFromString pAuthors raw
+ sequence <$> parseFromString' pAuthors raw
dateLine :: PandocMonad m => MarkdownParser m (F Inlines)
dateLine = try $ do
raw <- rawTitleBlockLine
- res <- parseFromString (many inline) raw
+ res <- parseFromString' (many inline) raw
return $ trimInlinesF $ mconcat res
titleBlock :: PandocMonad m => MarkdownParser m ()
@@ -290,7 +291,7 @@ ignorable t = (T.pack "_") `T.isSuffixOf` t
toMetaValue :: PandocMonad m
=> Text -> MarkdownParser m (F MetaValue)
-toMetaValue x = toMeta <$> parseFromString parseBlocks (T.unpack x)
+toMetaValue x = toMeta <$> parseFromString' parseBlocks (T.unpack x)
where
toMeta p = do
p' <- p
@@ -360,20 +361,20 @@ parseMarkdown = do
optional titleBlock
blocks <- parseBlocks
st <- getState
+ -- check for notes with no corresponding note references
+ let notesUsed = stateNoteRefs st
+ let notesDefined = M.keys (stateNotes' st)
+ mapM_ (\n -> unless (n `Set.member` notesUsed) $ do
+ -- lookup to get sourcepos
+ case M.lookup n (stateNotes' st) of
+ Just (pos, _) -> report (NoteDefinedButNotUsed n pos)
+ Nothing -> error "The impossible happened.") notesDefined
let doc = runF (do Pandoc _ bs <- B.doc <$> blocks
meta <- stateMeta' st
return $ Pandoc meta bs) st
reportLogMessages
(do guardEnabled Ext_east_asian_line_breaks
- return $ bottomUp softBreakFilter doc) <|> return doc
-
-softBreakFilter :: [Inline] -> [Inline]
-softBreakFilter (x:SoftBreak:y:zs) =
- case (stringify x, stringify y) of
- (xs@(_:_), (c:_))
- | charWidth (last xs) == 2 && charWidth c == 2 -> x:y:zs
- _ -> x:SoftBreak:y:zs
-softBreakFilter xs = xs
+ return $ eastAsianLineBreakFilter doc) <|> return doc
referenceKey :: PandocMonad m => MarkdownParser m (F Blocks)
referenceKey = try $ do
@@ -392,7 +393,9 @@ referenceKey = try $ do
src <- try betweenAngles <|> sourceURL
tit <- option "" referenceTitle
attr <- option nullAttr $ try $
- guardEnabled Ext_link_attributes >> skipSpaces >> attributes
+ do guardEnabled Ext_link_attributes
+ skipSpaces >> optional newline >> skipSpaces
+ attributes
addKvs <- option [] $ guardEnabled Ext_mmd_link_attributes
>> many (try $ spnl >> keyValAttr)
blanklines
@@ -402,8 +405,12 @@ referenceKey = try $ do
let oldkeys = stateKeys st
let key = toKey raw
case M.lookup key oldkeys of
- Just _ -> logMessage $ DuplicateLinkReference raw pos
- Nothing -> return ()
+ Just (t,a) | not (t == target && a == attr') ->
+ -- We don't warn on two duplicate keys if the targets are also
+ -- the same. This can happen naturally with --reference-location=block
+ -- or section. See #3701.
+ logMessage $ DuplicateLinkReference raw pos
+ _ -> return ()
updateState $ \s -> s { stateKeys = M.insert key (target, attr') oldkeys }
return $ return mempty
@@ -464,13 +471,12 @@ noteBlock = try $ do
rest <- many $ try $ blanklines >> indentSpaces >> rawLines
let raw = unlines (first:rest) ++ "\n"
optional blanklines
- parsed <- parseFromString parseBlocks raw
- let newnote = (ref, parsed)
+ parsed <- parseFromString' parseBlocks raw
oldnotes <- stateNotes' <$> getState
- case lookup ref oldnotes of
+ case M.lookup ref oldnotes of
Just _ -> logMessage $ DuplicateNoteReference ref pos
Nothing -> return ()
- updateState $ \s -> s { stateNotes' = newnote : oldnotes }
+ updateState $ \s -> s { stateNotes' = M.insert ref (pos, parsed) oldnotes }
return mempty
--
@@ -614,7 +620,7 @@ hrule = try $ do
--
indentedLine :: PandocMonad m => MarkdownParser m String
-indentedLine = indentSpaces >> anyLine >>= return . (++ "\n")
+indentedLine = indentSpaces >> anyLineNewline
blockDelimiter :: PandocMonad m
=> (Char -> Bool)
@@ -772,7 +778,7 @@ blockQuote :: PandocMonad m => MarkdownParser m (F Blocks)
blockQuote = do
raw <- emailBlockQuote
-- parse the extracted block, which may contain various block elements:
- contents <- parseFromString parseBlocks $ (intercalate "\n" raw) ++ "\n\n"
+ contents <- parseFromString' parseBlocks $ (intercalate "\n" raw) ++ "\n\n"
return $ B.blockQuote <$> contents
--
@@ -868,8 +874,7 @@ listContinuationLine = try $ do
notFollowedBy' listStart
notFollowedByHtmlCloser
optional indentSpaces
- result <- anyLine
- return $ result ++ "\n"
+ anyLineNewline
listItem :: PandocMonad m
=> MarkdownParser m a
@@ -885,7 +890,7 @@ listItem start = try $ do
setState $ state {stateParserContext = ListItemState}
-- parse the extracted block, which may contain various block elements:
let raw = concat (first:continuations)
- contents <- parseFromString parseBlocks raw
+ contents <- parseFromString' parseBlocks raw
updateState (\st -> st {stateParserContext = oldContext})
return contents
@@ -932,8 +937,8 @@ definitionListItem :: PandocMonad m => Bool -> MarkdownParser m (F (Inlines, [Bl
definitionListItem compact = try $ do
rawLine' <- anyLine
raw <- many1 $ defRawBlock compact
- term <- parseFromString (trimInlinesF . mconcat <$> many inline) rawLine'
- contents <- mapM (parseFromString parseBlocks . (++"\n")) raw
+ term <- parseFromString' (trimInlinesF . mconcat <$> many inline) rawLine'
+ contents <- mapM (parseFromString' parseBlocks . (++"\n")) raw
optional blanklines
return $ liftM2 (,) term (sequence contents)
@@ -941,7 +946,7 @@ defRawBlock :: PandocMonad m => Bool -> MarkdownParser m String
defRawBlock compact = try $ do
hasBlank <- option False $ blankline >> return True
defListMarker
- firstline <- anyLine
+ firstline <- anyLineNewline
let dline = try
( do notFollowedBy blankline
notFollowedByHtmlCloser
@@ -956,7 +961,7 @@ defRawBlock compact = try $ do
ln <- indentSpaces >> notFollowedBy blankline >> anyLine
lns <- many dline
return $ trailing ++ unlines (ln:lns)
- return $ trimr (firstline ++ "\n" ++ unlines rawlines ++ cont) ++
+ return $ trimr (firstline ++ unlines rawlines ++ cont) ++
if hasBlank || not (null cont) then "\n\n" else ""
definitionList :: PandocMonad m => MarkdownParser m (F Blocks)
@@ -1088,13 +1093,19 @@ rawTeXBlock = do
rawHtmlBlocks :: PandocMonad m => MarkdownParser m (F Blocks)
rawHtmlBlocks = do
(TagOpen tagtype _, raw) <- htmlTag isBlockTag
+ -- we don't want '<td> text' to be a code block:
+ skipMany spaceChar
+ indentlevel <- (blankline >> length <$> many (char ' ')) <|> return 0
-- try to find closing tag
-- we set stateInHtmlBlock so that closing tags that can be either block or
-- inline will not be parsed as inline tags
oldInHtmlBlock <- stateInHtmlBlock <$> getState
updateState $ \st -> st{ stateInHtmlBlock = Just tagtype }
let closer = htmlTag (\x -> x ~== TagClose tagtype)
- contents <- mconcat <$> many (notFollowedBy' closer >> block)
+ let block' = do notFollowedBy' closer
+ atMostSpaces indentlevel
+ block
+ contents <- mconcat <$> many block'
result <-
(closer >>= \(_, rawcloser) -> return (
return (B.rawBlock "html" $ stripMarkdownAttribute raw) <>
@@ -1119,7 +1130,7 @@ lineBlock :: PandocMonad m => MarkdownParser m (F Blocks)
lineBlock = try $ do
guardEnabled Ext_line_blocks
lines' <- lineBlockLines >>=
- mapM (parseFromString (trimInlinesF . mconcat <$> many inline))
+ mapM (parseFromString' (trimInlinesF . mconcat <$> many inline))
return $ B.lineBlock <$> sequence lines'
--
@@ -1162,7 +1173,7 @@ simpleTableHeader headless = try $ do
then replicate (length dashes) ""
else rawHeads
heads <- fmap sequence
- $ mapM (parseFromString (mconcat <$> many plain))
+ $ mapM (parseFromString' (mconcat <$> many plain))
$ map trim rawHeads'
return (heads, aligns, indices)
@@ -1208,7 +1219,7 @@ tableLine :: PandocMonad m
=> [Int]
-> MarkdownParser m (F [Blocks])
tableLine indices = rawTableLine indices >>=
- fmap sequence . mapM (parseFromString (mconcat <$> many plain))
+ fmap sequence . mapM (parseFromString' (mconcat <$> many plain))
-- Parse a multiline table row and return a list of blocks (columns).
multilineRow :: PandocMonad m
@@ -1217,7 +1228,7 @@ multilineRow :: PandocMonad m
multilineRow indices = do
colLines <- many1 (rawTableLine indices)
let cols = map unlines $ transpose colLines
- fmap sequence $ mapM (parseFromString (mconcat <$> many plain)) cols
+ fmap sequence $ mapM (parseFromString' (mconcat <$> many plain)) cols
-- Parses a table caption: inlines beginning with 'Table:'
-- and followed by blank lines.
@@ -1275,7 +1286,7 @@ multilineTableHeader headless = try $ do
then replicate (length dashes) ""
else map (unlines . map trim) rawHeadsList
heads <- fmap sequence $
- mapM (parseFromString (mconcat <$> many plain)) $
+ mapM (parseFromString' (mconcat <$> many plain)) $
map trim rawHeads
return (heads, aligns, indices)
@@ -1285,89 +1296,7 @@ multilineTableHeader headless = try $ do
-- ending with a footer (dashed line followed by blank line).
gridTable :: PandocMonad m => Bool -- ^ Headerless table
-> MarkdownParser m ([Alignment], [Double], F [Blocks], F [[Blocks]])
-gridTable headless =
- tableWith (gridTableHeader headless) gridTableRow
- (gridTableSep '-') gridTableFooter
-
-gridTableSplitLine :: [Int] -> String -> [String]
-gridTableSplitLine indices line = map removeFinalBar $ tail $
- splitStringByIndices (init indices) $ trimr line
-
-gridPart :: PandocMonad m => Char -> ParserT [Char] st m ((Int, Int), Alignment)
-gridPart ch = do
- leftColon <- option False (True <$ char ':')
- dashes <- many1 (char ch)
- rightColon <- option False (True <$ char ':')
- char '+'
- let lengthDashes = length dashes + (if leftColon then 1 else 0) +
- (if rightColon then 1 else 0)
- let alignment = case (leftColon, rightColon) of
- (True, True) -> AlignCenter
- (True, False) -> AlignLeft
- (False, True) -> AlignRight
- (False, False) -> AlignDefault
- return ((lengthDashes, lengthDashes + 1), alignment)
-
-gridDashedLines :: PandocMonad m => Char -> ParserT [Char] st m [((Int, Int), Alignment)]
-gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) <* blankline
-
-removeFinalBar :: String -> String
-removeFinalBar =
- reverse . dropWhile (`elem` " \t") . dropWhile (=='|') . reverse
-
--- | Separator between rows of grid table.
-gridTableSep :: PandocMonad m => Char -> MarkdownParser m Char
-gridTableSep ch = try $ gridDashedLines ch >> return '\n'
-
--- | Parse header for a grid table.
-gridTableHeader :: PandocMonad m => Bool -- ^ Headerless table
- -> MarkdownParser m (F [Blocks], [Alignment], [Int])
-gridTableHeader headless = try $ do
- optional blanklines
- dashes <- gridDashedLines '-'
- rawContent <- if headless
- then return []
- else many1 (try (char '|' >> anyLine))
- underDashes <- if headless
- then return dashes
- else gridDashedLines '='
- guard $ length dashes == length underDashes
- let lines' = map (snd . fst) underDashes
- let indices = scanl (+) 0 lines'
- let aligns = map snd underDashes
- let rawHeads = if headless
- then replicate (length underDashes) ""
- else map (unlines . map trim) $ transpose
- $ map (gridTableSplitLine indices) rawContent
- heads <- fmap sequence $ mapM (parseFromString parseBlocks . trim) rawHeads
- return (heads, aligns, indices)
-
-gridTableRawLine :: PandocMonad m => [Int] -> MarkdownParser m [String]
-gridTableRawLine indices = do
- char '|'
- line <- anyLine
- return (gridTableSplitLine indices line)
-
--- | Parse row of grid table.
-gridTableRow :: PandocMonad m => [Int]
- -> MarkdownParser m (F [Blocks])
-gridTableRow indices = do
- colLines <- many1 (gridTableRawLine indices)
- let cols = map ((++ "\n") . unlines . removeOneLeadingSpace) $
- transpose colLines
- fmap compactify <$> fmap sequence (mapM (parseFromString parseBlocks) cols)
-
-removeOneLeadingSpace :: [String] -> [String]
-removeOneLeadingSpace xs =
- if all startsWithSpace xs
- then map (drop 1) xs
- else xs
- where startsWithSpace "" = True
- startsWithSpace (y:_) = y == ' '
-
--- | Parse footer for a grid table.
-gridTableFooter :: PandocMonad m => MarkdownParser m [Char]
-gridTableFooter = blanklines
+gridTable headless = gridTableWith' parseBlocks headless
pipeBreak :: PandocMonad m => MarkdownParser m ([Alignment], [Int])
pipeBreak = try $ do
@@ -1414,7 +1343,7 @@ pipeTableRow = try $ do
let chunk = void (code <|> math <|> rawHtmlInline <|> escapedChar <|> rawLaTeXInline')
<|> void (noneOf "|\n\r")
let cellContents = ((trim . snd) <$> withRaw (many chunk)) >>=
- parseFromString pipeTableCell
+ parseFromString' pipeTableCell
cells <- cellContents `sepEndBy1` (char '|')
-- surrounding pipes needed for a one-column table:
guard $ not (length cells == 1 && not openPipe)
@@ -1522,6 +1451,7 @@ inline = choice [ whitespace
, autoLink
, spanHtml
, rawHtmlInline
+ , escapedNewline
, escapedChar
, rawLaTeXInline'
, exampleRef
@@ -1538,16 +1468,20 @@ escapedChar' = try $ do
(guardEnabled Ext_all_symbols_escapable >> satisfy (not . isAlphaNum))
<|> (guardEnabled Ext_angle_brackets_escapable >>
oneOf "\\`*_{}[]()>#+-.!~\"<>")
- <|> (guardEnabled Ext_escaped_line_breaks >> char '\n')
<|> oneOf "\\`*_{}[]()>#+-.!~\""
+escapedNewline :: PandocMonad m => MarkdownParser m (F Inlines)
+escapedNewline = try $ do
+ guardEnabled Ext_escaped_line_breaks
+ char '\\'
+ lookAhead (char '\n') -- don't consume the newline (see #3730)
+ return $ return B.linebreak
+
escapedChar :: PandocMonad m => MarkdownParser m (F Inlines)
escapedChar = do
result <- escapedChar'
case result of
' ' -> return $ return $ B.str "\160" -- "\ " is a nonbreaking space
- '\n' -> guardEnabled Ext_escaped_line_breaks >>
- return (return B.linebreak) -- "\[newline]" is a linebreak
_ -> return $ return $ B.str [result]
ltSign :: PandocMonad m => MarkdownParser m (F Inlines)
@@ -1629,9 +1563,9 @@ ender c n = try $ do
three :: PandocMonad m => Char -> MarkdownParser m (F Inlines)
three c = do
contents <- mconcat <$> many (notFollowedBy (ender c 1) >> inline)
- (ender c 3 >> return ((B.strong . B.emph) <$> contents))
- <|> (ender c 2 >> one c (B.strong <$> contents))
- <|> (ender c 1 >> two c (B.emph <$> contents))
+ (ender c 3 >> updateLastStrPos >> return ((B.strong . B.emph) <$> contents))
+ <|> (ender c 2 >> updateLastStrPos >> one c (B.strong <$> contents))
+ <|> (ender c 1 >> updateLastStrPos >> two c (B.emph <$> contents))
<|> return (return (B.str [c,c,c]) <> contents)
-- Parse inlines til you hit two c's, and emit strong.
@@ -1639,7 +1573,8 @@ three c = do
two :: PandocMonad m => Char -> F Inlines -> MarkdownParser m (F Inlines)
two c prefix' = do
contents <- mconcat <$> many (try $ notFollowedBy (ender c 2) >> inline)
- (ender c 2 >> return (B.strong <$> (prefix' <> contents)))
+ (ender c 2 >> updateLastStrPos >>
+ return (B.strong <$> (prefix' <> contents)))
<|> return (return (B.str [c,c]) <> (prefix' <> contents))
-- Parse inlines til you hit a c, and emit emph.
@@ -1650,7 +1585,7 @@ one c prefix' = do
<|> try (string [c,c] >>
notFollowedBy (ender c 1) >>
two c mempty) )
- (ender c 1 >> return (B.emph <$> (prefix' <> contents)))
+ (ender c 1 >> updateLastStrPos >> return (B.emph <$> (prefix' <> contents)))
<|> return (return (B.str [c]) <> (prefix' <> contents))
strongOrEmph :: PandocMonad m => MarkdownParser m (F Inlines)
@@ -1814,15 +1749,17 @@ referenceLink :: PandocMonad m
referenceLink constructor (lab, raw) = do
sp <- (True <$ lookAhead (char ' ')) <|> return False
(_,raw') <- option (mempty, "") $
- lookAhead (try (guardEnabled Ext_citations >>
- spnl >> normalCite >> return (mempty, "")))
+ lookAhead (try (do guardEnabled Ext_citations
+ guardDisabled Ext_spaced_reference_links <|> spnl
+ normalCite
+ return (mempty, "")))
<|>
- try (spnl >> reference)
+ try ((guardDisabled Ext_spaced_reference_links <|> spnl) >> reference)
when (raw' == "") $ guardEnabled Ext_shortcut_reference_links
let labIsRef = raw' == "" || raw' == "[]"
let key = toKey $ if labIsRef then raw else raw'
- parsedRaw <- parseFromString (mconcat <$> many inline) raw'
- fallback <- parseFromString (mconcat <$> many inline) $ dropBrackets raw
+ parsedRaw <- parseFromString' (mconcat <$> many inline) raw'
+ fallback <- parseFromString' (mconcat <$> many inline) $ dropBrackets raw
implicitHeaderRefs <- option False $
True <$ guardEnabled Ext_implicit_header_references
let makeFallback = do
@@ -1887,16 +1824,17 @@ note :: PandocMonad m => MarkdownParser m (F Inlines)
note = try $ do
guardEnabled Ext_footnotes
ref <- noteMarker
+ updateState $ \st -> st{ stateNoteRefs = Set.insert ref (stateNoteRefs st) }
return $ do
notes <- asksF stateNotes'
- case lookup ref notes of
+ case M.lookup ref notes of
Nothing -> return $ B.str $ "[^" ++ ref ++ "]"
- Just contents -> do
+ Just (_pos, contents) -> do
st <- askF
-- process the note in a context that doesn't resolve
-- notes, to avoid infinite looping with notes inside
-- notes:
- let contents' = runF contents st{ stateNotes' = [] }
+ let contents' = runF contents st{ stateNotes' = M.empty }
return $ B.note contents'
inlineNote :: PandocMonad m => MarkdownParser m (F Inlines)
@@ -2028,7 +1966,7 @@ textualCite = try $ do
let (spaces',raw') = span isSpace raw
spc | null spaces' = mempty
| otherwise = B.space
- lab <- parseFromString (mconcat <$> many inline) $ dropBrackets raw'
+ lab <- parseFromString' (mconcat <$> many inline) $ dropBrackets raw'
fallback <- referenceLink B.linkWith (lab,raw')
return $ do
fallback' <- fallback