aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc/Readers')
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs943
-rw-r--r--src/Text/Pandoc/Readers/RST.hs4
2 files changed, 555 insertions, 392 deletions
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 545f34ca1..79bd21cab 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -1,4 +1,5 @@
{-# LANGUAGE RelaxedPolyRec #-} -- needed for inlinesBetween on GHC < 7
+{-# LANGUAGE FlexibleInstances, TypeSynonymInstances #-}
{-
Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu>
@@ -36,17 +37,21 @@ import Data.Ord ( comparing )
import Data.Char ( isAlphaNum )
import Data.Maybe
import Text.Pandoc.Definition
-import Text.Pandoc.Generic
+import qualified Text.Pandoc.Builder as B
+import Text.Pandoc.Builder (Inlines(..), Blocks, trimInlines)
import Text.Pandoc.Options
-import Text.Pandoc.Shared
-import Text.Pandoc.Parsing
+import Text.Pandoc.Shared hiding (compactify)
+import Text.Pandoc.Parsing hiding (tableWith)
import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock )
import Text.Pandoc.Readers.HTML ( htmlTag, htmlInBalanced, isInlineTag, isBlockTag,
isTextTag, isCommentTag )
import Text.Pandoc.XML ( fromEntities )
-import Control.Monad (when, liftM, guard, mzero, unless )
+import Data.Monoid
+import qualified Data.Sequence as Seq -- TODO leaky abstraction, need better isNull in Builder
+import Control.Applicative ((<$>), (<*), (*>), (<$))
import Text.HTML.TagSoup
import Text.HTML.TagSoup.Match (tagOpen)
+import Control.Monad.Reader
-- | Read markdown from an input string and return a Pandoc document.
readMarkdown :: ReaderOptions -- ^ Reader options
@@ -55,6 +60,16 @@ readMarkdown :: ReaderOptions -- ^ Reader options
readMarkdown opts s =
(readWith parseMarkdown) def{ stateOptions = opts } (s ++ "\n\n")
+type F a = Reader ParserState a
+
+instance Monoid a => Monoid (Reader ParserState a) where
+ mempty = return mempty
+ mappend = liftM2 mappend
+ mconcat = liftM mconcat . sequence
+
+trimInlinesF :: F Inlines -> F Inlines
+trimInlinesF = liftM trimInlines
+
--
-- Constants and data structure definitions
--
@@ -71,7 +86,7 @@ isHruleChar '-' = True
isHruleChar '_' = True
isHruleChar _ = False
-setextHChars :: [Char]
+setextHChars :: String
setextHChars = "=-"
isBlank :: Char -> Bool
@@ -84,13 +99,23 @@ isBlank _ = False
-- auxiliary functions
--
-indentSpaces :: Parser [Char] ParserState [Char]
+isNull :: F Inlines -> Bool
+isNull ils = Seq.null $ unInlines (runReader ils def)
+
+spnl :: Parser [Char] st ()
+spnl = try $ do
+ skipSpaces
+ optional newline
+ skipSpaces
+ notFollowedBy (char '\n')
+
+indentSpaces :: Parser [Char] ParserState String
indentSpaces = try $ do
tabStop <- getOption readerTabStop
count tabStop (char ' ') <|>
string "\t" <?> "indentation"
-nonindentSpaces :: Parser [Char] ParserState [Char]
+nonindentSpaces :: Parser [Char] ParserState String
nonindentSpaces = do
tabStop <- getOption readerTabStop
sps <- many (char ' ')
@@ -114,32 +139,31 @@ litChar = escapedChar'
-- | Parse a sequence of inline elements between square brackets,
-- including inlines between balanced pairs of square brackets.
-inlinesInBalancedBrackets :: Parser [Char] ParserState Inline
- -> Parser [Char] ParserState [Inline]
-inlinesInBalancedBrackets parser = try $ do
+inlinesInBalancedBrackets :: Parser [Char] ParserState (F Inlines)
+inlinesInBalancedBrackets = try $ do
char '['
- result <- manyTill ( (do lookAhead $ try $ do (Str res) <- parser
- guard (res == "[")
- bal <- inlinesInBalancedBrackets parser
- return $ [Str "["] ++ bal ++ [Str "]"])
- <|> (count 1 parser))
+ result <- manyTill ( (do lookAhead $ try $ do x <- inline
+ guard (runReader x def == B.str "[")
+ bal <- inlinesInBalancedBrackets
+ return $ (\x -> B.str "[" <> x <> B.str "]") <$> bal)
+ <|> inline)
(char ']')
- return $ concat result
+ return $ mconcat result
--
-- document structure
--
-titleLine :: Parser [Char] ParserState [Inline]
+titleLine :: Parser [Char] ParserState (F Inlines)
titleLine = try $ do
char '%'
skipSpaces
res <- many $ (notFollowedBy newline >> inline)
<|> try (endline >> whitespace)
newline
- return $ normalizeSpaces res
+ return $ trimInlinesF $ mconcat res
-authorsLine :: Parser [Char] ParserState [[Inline]]
+authorsLine :: Parser [Char] ParserState (F [Inlines])
authorsLine = try $ do
char '%'
skipSpaces
@@ -148,21 +172,20 @@ authorsLine = try $ do
(char ';' <|>
try (newline >> notFollowedBy blankline >> spaceChar))
newline
- return $ filter (not . null) $ map normalizeSpaces authors
+ return $ sequence $ filter (not . isNull) $ map (trimInlinesF . mconcat) authors
-dateLine :: Parser [Char] ParserState [Inline]
+dateLine :: Parser [Char] ParserState (F Inlines)
dateLine = try $ do
char '%'
skipSpaces
- date <- manyTill inline newline
- return $ normalizeSpaces date
+ trimInlinesF . mconcat <$> manyTill inline newline
-titleBlock :: Parser [Char] ParserState ([Inline], [[Inline]], [Inline])
+titleBlock :: Parser [Char] ParserState (F Inlines, F [Inlines], F Inlines)
titleBlock = try $ do
guardEnabled Ext_pandoc_title_blocks
- title <- option [] titleLine
- author <- option [] authorsLine
- date <- option [] dateLine
+ title <- option mempty titleLine
+ author <- option (return []) authorsLine
+ date <- option mempty dateLine
optional blanklines
return (title, author, date)
@@ -172,45 +195,22 @@ parseMarkdown = do
updateState $ \state -> state { stateOptions =
let oldOpts = stateOptions state in
oldOpts{ readerParseRaw = True } }
- startPos <- getPosition
- -- go through once just to get list of reference keys and notes
- -- docMinusKeys is the raw document with blanks where the keys/notes were...
- let firstPassParser = referenceKey
- <|> (guardEnabled Ext_footnotes >> noteBlock)
- <|> (guardEnabled Ext_delimited_code_blocks >>
- liftM snd (withRaw codeBlockDelimited))
- <|> lineClump
- docMinusKeys <- liftM concat $ manyTill firstPassParser eof
- setInput docMinusKeys
- setPosition startPos
- st' <- getState
- let reversedNotes = stateNotes st'
- updateState $ \s -> s { stateNotes = reverse reversedNotes }
- -- now parse it for real...
- (title, author, date) <- option ([],[],[]) titleBlock
+ (title, authors, date) <- option (mempty,return [],mempty) titleBlock
blocks <- parseBlocks
- let doc = Pandoc (Meta title author date) $ filter (/= Null) blocks
- -- if there are labeled examples, change references into numbers
- examples <- liftM stateExamples getState
- let handleExampleRef :: Inline -> Inline
- handleExampleRef z@(Str ('@':xs)) =
- case M.lookup xs examples of
- Just n -> Str (show n)
- Nothing -> z
- handleExampleRef z = z
- if M.null examples
- then return doc
- else return $ bottomUp handleExampleRef doc
+ st <- getState
+ return $ B.setTitle (runReader title st)
+ $ B.setAuthors (runReader authors st)
+ $ B.setDate (runReader date st)
+ $ B.doc $ runReader blocks st
--
-- initial pass for references and notes
--
-referenceKey :: Parser [Char] ParserState [Char]
+referenceKey :: Parser [Char] ParserState (F Blocks)
referenceKey = try $ do
- startPos <- getPosition
skipNonindentSpaces
- lab <- reference
+ (_,raw) <- reference
char ':'
skipSpaces >> optional newline >> skipSpaces >> notFollowedBy (char '[')
let sourceURL = liftM unwords $ many $ try $ do
@@ -218,20 +218,18 @@ referenceKey = try $ do
skipMany spaceChar
optional $ newline >> notFollowedBy blankline
skipMany spaceChar
- notFollowedBy' reference
+ notFollowedBy' (() <$ reference)
many1 $ escapedChar' <|> satisfy (not . isBlank)
let betweenAngles = try $ char '<' >>
manyTill (escapedChar' <|> litChar) (char '>')
src <- try betweenAngles <|> sourceURL
tit <- option "" referenceTitle
blanklines
- endPos <- getPosition
let target = (escapeURI $ removeTrailingSpace src, tit)
st <- getState
- let oldkeys = stateKeys st
- updateState $ \s -> s { stateKeys = M.insert (toKey lab) target oldkeys }
- -- return blanks so line count isn't affected
- return $ replicate (sourceLine endPos - sourceLine startPos) '\n'
+ let oldkeys = stateKeys' st
+ updateState $ \s -> s { stateKeys' = M.insert (toKey' raw) target oldkeys }
+ return $ return mempty
referenceTitle :: Parser [Char] ParserState String
referenceTitle = try $ do
@@ -242,25 +240,24 @@ referenceTitle = try $ do
notFollowedBy (noneOf ")\n")))
return $ fromEntities tit
-noteMarker :: Parser [Char] ParserState [Char]
+noteMarker :: Parser [Char] ParserState String
noteMarker = string "[^" >> many1Till (satisfy $ not . isBlank) (char ']')
-rawLine :: Parser [Char] ParserState [Char]
+rawLine :: Parser [Char] ParserState String
rawLine = try $ do
notFollowedBy blankline
notFollowedBy' $ try $ skipNonindentSpaces >> noteMarker
optional indentSpaces
anyLine
-rawLines :: Parser [Char] ParserState [Char]
+rawLines :: Parser [Char] ParserState String
rawLines = do
first <- anyLine
rest <- many rawLine
return $ unlines (first:rest)
-noteBlock :: Parser [Char] ParserState [Char]
+noteBlock :: Parser [Char] ParserState (F Blocks)
noteBlock = try $ do
- startPos <- getPosition
skipNonindentSpaces
ref <- noteMarker
char ':'
@@ -270,24 +267,21 @@ noteBlock = try $ do
(try (blankline >> indentSpaces >>
notFollowedBy blankline))
optional blanklines
- endPos <- getPosition
- let newnote = (ref, (intercalate "\n" raw) ++ "\n\n")
- st <- getState
- let oldnotes = stateNotes st
- updateState $ \s -> s { stateNotes = newnote : oldnotes }
- -- return blanks so line count isn't affected
- return $ replicate (sourceLine endPos - sourceLine startPos) '\n'
+ parsed <- parseFromString parseBlocks $ unlines raw ++ "\n"
+ let newnote = (ref, parsed)
+ updateState $ \s -> s { stateNotes' = newnote : stateNotes' s }
+ return mempty
--
-- parsing blocks
--
-parseBlocks :: Parser [Char] ParserState [Block]
-parseBlocks = manyTill block eof
+parseBlocks :: Parser [Char] ParserState (F Blocks)
+parseBlocks = mconcat <$> manyTill block eof
-block :: Parser [Char] ParserState Block
+block :: Parser [Char] ParserState (F Blocks)
block = choice [ codeBlockDelimited
- , guardEnabled Ext_latex_macros >> macro
+ , guardEnabled Ext_latex_macros *> (mempty <$ macro)
, header
, table
, codeBlockIndented
@@ -298,46 +292,48 @@ block = choice [ codeBlockDelimited
, orderedList
, definitionList
, rawTeXBlock
- , para
, htmlBlock
+ , noteBlock
+ , referenceKey
+ , para
, plain
- , nullBlock ] <?> "block"
+ ] <?> "block"
--
-- header blocks
--
-header :: Parser [Char] ParserState Block
+header :: Parser [Char] ParserState (F Blocks)
header = setextHeader <|> atxHeader <?> "header"
-atxHeader :: Parser [Char] ParserState Block
+atxHeader :: Parser [Char] ParserState (F Blocks)
atxHeader = try $ do
level <- many1 (char '#') >>= return . length
notFollowedBy (char '.' <|> char ')') -- this would be a list
skipSpaces
- text <- manyTill inline atxClosing >>= return . normalizeSpaces
- return $ Header level text
+ text <- trimInlinesF . mconcat <$> manyTill inline atxClosing
+ return $ B.header level <$> text
-atxClosing :: Parser [Char] st [Char]
+atxClosing :: Parser [Char] st String
atxClosing = try $ skipMany (char '#') >> blanklines
-setextHeader :: Parser [Char] ParserState Block
+setextHeader :: Parser [Char] ParserState (F Blocks)
setextHeader = try $ do
-- This lookahead prevents us from wasting time parsing Inlines
-- unless necessary -- it gives a significant performance boost.
lookAhead $ anyLine >> many1 (oneOf setextHChars) >> blankline
- text <- many1Till inline newline
+ text <- trimInlinesF . mconcat <$> many1Till inline newline
underlineChar <- oneOf setextHChars
many (char underlineChar)
blanklines
let level = (fromMaybe 0 $ findIndex (== underlineChar) setextHChars) + 1
- return $ Header level (normalizeSpaces text)
+ return $ B.header level <$> text
--
-- hrule block
--
-hrule :: Parser [Char] st Block
+hrule :: Parser [Char] st (F Blocks)
hrule = try $ do
skipSpaces
start <- satisfy isHruleChar
@@ -345,13 +341,13 @@ hrule = try $ do
skipMany (spaceChar <|> char start)
newline
optional blanklines
- return HorizontalRule
+ return $ return B.horizontalRule
--
-- code blocks
--
-indentedLine :: Parser [Char] ParserState [Char]
+indentedLine :: Parser [Char] ParserState String
indentedLine = indentSpaces >> manyTill anyChar newline >>= return . (++ "\n")
blockDelimiter :: (Char -> Bool)
@@ -370,7 +366,7 @@ blockDelimiter f len = try $ do
blankline
return (size, attr, c)
-attributes :: Parser [Char] st ([Char], [[Char]], [([Char], [Char])])
+attributes :: Parser [Char] st (String, [String], [(String, String)])
attributes = try $ do
char '{'
spnl
@@ -382,28 +378,28 @@ attributes = try $ do
| otherwise = firstNonNull xs
return (firstNonNull $ reverse ids, concat classes, concat keyvals)
-attribute :: Parser [Char] st ([Char], [[Char]], [([Char], [Char])])
+attribute :: Parser [Char] st (String, [String], [(String, String)])
attribute = identifierAttr <|> classAttr <|> keyValAttr
-identifier :: Parser [Char] st [Char]
+identifier :: Parser [Char] st String
identifier = do
first <- letter
rest <- many $ alphaNum <|> oneOf "-_:."
return (first:rest)
-identifierAttr :: Parser [Char] st ([Char], [a], [a1])
+identifierAttr :: Parser [Char] st (String, [a], [a1])
identifierAttr = try $ do
char '#'
result <- identifier
return (result,[],[])
-classAttr :: Parser [Char] st ([Char], [[Char]], [a])
+classAttr :: Parser [Char] st (String, [String], [a])
classAttr = try $ do
char '.'
result <- identifier
return ("",[result],[])
-keyValAttr :: Parser [Char] st ([Char], [a], [([Char], [Char])])
+keyValAttr :: Parser [Char] st (String, [a], [(String, String)])
keyValAttr = try $ do
key <- identifier
char '='
@@ -412,15 +408,15 @@ keyValAttr = try $ do
<|> many nonspaceChar
return ("",[],[(key,val)])
-codeBlockDelimited :: Parser [Char] ParserState Block
+codeBlockDelimited :: Parser [Char] ParserState (F Blocks)
codeBlockDelimited = try $ do
guardEnabled Ext_delimited_code_blocks
(size, attr, c) <- blockDelimiter (\c -> c == '~' || c == '`') Nothing
contents <- manyTill anyLine (blockDelimiter (== c) (Just size))
blanklines
- return $ CodeBlock attr $ intercalate "\n" contents
+ return $ return $ B.codeBlockWith attr $ intercalate "\n" contents
-codeBlockIndented :: Parser [Char] ParserState Block
+codeBlockIndented :: Parser [Char] ParserState (F Blocks)
codeBlockIndented = do
contents <- many1 (indentedLine <|>
try (do b <- blanklines
@@ -428,16 +424,16 @@ codeBlockIndented = do
return $ b ++ l))
optional blanklines
classes <- getOption readerIndentedCodeClasses
- return $ CodeBlock ("", classes, []) $
+ return $ return $ B.codeBlockWith ("", classes, []) $
stripTrailingNewlines $ concat contents
-lhsCodeBlock :: Parser [Char] ParserState Block
+lhsCodeBlock :: Parser [Char] ParserState (F Blocks)
lhsCodeBlock = do
failUnlessLHS
- liftM (CodeBlock ("",["sourceCode","literate","haskell"],[]))
- (lhsCodeBlockBird <|> lhsCodeBlockLaTeX)
- <|> liftM (CodeBlock ("",["sourceCode","haskell"],[]))
- lhsCodeBlockInverseBird
+ (return . B.codeBlockWith ("",["sourceCode","literate","haskell"],[]) <$>
+ (lhsCodeBlockBird <|> lhsCodeBlockLaTeX))
+ <|> (return . B.codeBlockWith ("",["sourceCode","haskell"],[]) <$>
+ lhsCodeBlockInverseBird)
lhsCodeBlockLaTeX :: Parser [Char] ParserState String
lhsCodeBlockLaTeX = try $ do
@@ -465,14 +461,13 @@ lhsCodeBlockBirdWith c = try $ do
blanklines
return $ intercalate "\n" lns'
-birdTrackLine :: Char -> Parser [Char] st [Char]
+birdTrackLine :: Char -> Parser [Char] st String
birdTrackLine c = try $ do
char c
-- allow html tags on left margin:
when (c == '<') $ notFollowedBy letter
manyTill anyChar newline
-
--
-- block quotes
--
@@ -480,7 +475,7 @@ birdTrackLine c = try $ do
emailBlockQuoteStart :: Parser [Char] ParserState Char
emailBlockQuoteStart = try $ skipNonindentSpaces >> char '>' >>~ optional (char ' ')
-emailBlockQuote :: Parser [Char] ParserState [[Char]]
+emailBlockQuote :: Parser [Char] ParserState [String]
emailBlockQuote = try $ do
emailBlockQuoteStart
raw <- sepBy (many (nonEndline <|>
@@ -491,12 +486,12 @@ emailBlockQuote = try $ do
optional blanklines
return raw
-blockQuote :: Parser [Char] ParserState Block
+blockQuote :: Parser [Char] ParserState (F Blocks)
blockQuote = do
raw <- emailBlockQuote
-- parse the extracted block, which may contain various block elements:
contents <- parseFromString parseBlocks $ (intercalate "\n" raw) ++ "\n\n"
- return $ BlockQuote contents
+ return $ B.blockQuote <$> contents
--
-- list blocks
@@ -506,7 +501,7 @@ bulletListStart :: Parser [Char] ParserState ()
bulletListStart = try $ do
optional newline -- if preceded by a Plain block in a list context
skipNonindentSpaces
- notFollowedBy' hrule -- because hrules start out just like lists
+ notFollowedBy' (() <$ hrule) -- because hrules start out just like lists
satisfy isBulletListMarker
spaceChar
skipSpaces
@@ -516,26 +511,25 @@ anyOrderedListStart = try $ do
optional newline -- if preceded by a Plain block in a list context
skipNonindentSpaces
notFollowedBy $ string "p." >> spaceChar >> digit -- page number
- state <- getState
- if readerStrict (stateOptions state)
- then do many1 digit
- char '.'
- spaceChar
- return (1, DefaultStyle, DefaultDelim)
- else do (num, style, delim) <- anyOrderedListMarker
- -- if it could be an abbreviated first name, insist on more than one space
- if delim == Period && (style == UpperAlpha || (style == UpperRoman &&
- num `elem` [1, 5, 10, 50, 100, 500, 1000]))
- then char '\t' <|> (try $ char ' ' >> spaceChar)
- else spaceChar
- skipSpaces
- return (num, style, delim)
+ (guardDisabled Ext_fancy_lists >>
+ do many1 digit
+ char '.'
+ spaceChar
+ return (1, DefaultStyle, DefaultDelim))
+ <|> do (num, style, delim) <- anyOrderedListMarker
+ -- if it could be an abbreviated first name, insist on more than one space
+ if delim == Period && (style == UpperAlpha || (style == UpperRoman &&
+ num `elem` [1, 5, 10, 50, 100, 500, 1000]))
+ then char '\t' <|> (try $ char ' ' >> spaceChar)
+ else spaceChar
+ skipSpaces
+ return (num, style, delim)
listStart :: Parser [Char] ParserState ()
listStart = bulletListStart <|> (anyOrderedListStart >> return ())
-- parse a line of a list item (start = parser for beginning of list item)
-listLine :: Parser [Char] ParserState [Char]
+listLine :: Parser [Char] ParserState String
listLine = try $ do
notFollowedBy blankline
notFollowedBy' (do indentSpaces
@@ -546,7 +540,7 @@ listLine = try $ do
-- parse raw text for one list item, excluding start marker and continuations
rawListItem :: Parser [Char] ParserState a
- -> Parser [Char] ParserState [Char]
+ -> Parser [Char] ParserState String
rawListItem start = try $ do
start
first <- listLine
@@ -557,14 +551,14 @@ rawListItem start = try $ do
-- continuation of a list item - indented and separated by blankline
-- or (in compact lists) endline.
-- note: nested lists are parsed as continuations
-listContinuation :: Parser [Char] ParserState [Char]
+listContinuation :: Parser [Char] ParserState String
listContinuation = try $ do
lookAhead indentSpaces
result <- many1 listContinuationLine
blanks <- many blankline
return $ concat result ++ blanks
-listContinuationLine :: Parser [Char] ParserState [Char]
+listContinuationLine :: Parser [Char] ParserState String
listContinuationLine = try $ do
notFollowedBy blankline
notFollowedBy' listStart
@@ -573,7 +567,7 @@ listContinuationLine = try $ do
return $ result ++ "\n"
listItem :: Parser [Char] ParserState a
- -> Parser [Char] ParserState [Block]
+ -> Parser [Char] ParserState (F Blocks)
listItem start = try $ do
first <- rawListItem start
continuations <- many listContinuation
@@ -589,23 +583,39 @@ listItem start = try $ do
updateState (\st -> st {stateParserContext = oldContext})
return contents
-orderedList :: Parser [Char] ParserState Block
+orderedList :: Parser [Char] ParserState (F Blocks)
orderedList = try $ do
(start, style, delim) <- lookAhead anyOrderedListStart
unless ((style == DefaultStyle || style == Decimal || style == Example) &&
(delim == DefaultDelim || delim == Period)) $
guardEnabled Ext_fancy_lists
when (style == Example) $ guardEnabled Ext_example_lists
- items <- many1 $ listItem $ try $
- do optional newline -- if preceded by a Plain block in a list context
- skipNonindentSpaces
- orderedListMarker style delim
+ items <- fmap sequence $ many1 $ listItem
+ ( try $ do
+ optional newline -- if preceded by Plain block in a list
+ skipNonindentSpaces
+ orderedListMarker style delim )
start' <- option 1 $ guardEnabled Ext_startnum >> return start
- return $ OrderedList (start', style, delim) $ compactify items
-
-bulletList :: Parser [Char] ParserState Block
-bulletList =
- many1 (listItem bulletListStart) >>= return . BulletList . compactify
+ return $ B.orderedListWith (start', style, delim) <$> fmap compactify items
+
+-- | Change final list item from @Para@ to @Plain@ if the list contains
+-- no other @Para@ blocks. (From Shared, modified for Blocks rather than [Block].)
+compactify :: [Blocks] -- ^ List of list items (each a list of blocks)
+ -> [Blocks]
+compactify [] = []
+compactify items =
+ let (others, final) = (init items, last items)
+ in case reverse (B.toList final) of
+ (Para a:xs) -> case [Para x | Para x <- concatMap B.toList items] of
+ -- if this is only Para, change to Plain
+ [_] -> others ++ [B.fromList (reverse $ Plain a : xs)]
+ _ -> items
+ _ -> items
+
+bulletList :: Parser [Char] ParserState (F Blocks)
+bulletList = do
+ items <- fmap sequence $ many1 $ listItem bulletListStart
+ return $ B.bulletList <$> fmap compactify items
-- definition lists
@@ -620,12 +630,12 @@ defListMarker = do
else mzero
return ()
-definitionListItem :: Parser [Char] ParserState ([Inline], [[Block]])
+definitionListItem :: Parser [Char] ParserState (F (Inlines, [Blocks]))
definitionListItem = try $ do
guardEnabled Ext_definition_lists
-- first, see if this has any chance of being a definition list:
lookAhead (anyLine >> optional blankline >> defListMarker)
- term <- manyTill inline newline
+ term <- trimInlinesF . mconcat <$> manyTill inline newline
optional blankline
raw <- many1 defRawBlock
state <- getState
@@ -633,9 +643,9 @@ definitionListItem = try $ do
-- parse the extracted block, which may contain various block elements:
contents <- mapM (parseFromString parseBlocks) raw
updateState (\st -> st {stateParserContext = oldContext})
- return ((normalizeSpaces term), contents)
+ return $ liftM2 (,) term (sequence contents)
-defRawBlock :: Parser [Char] ParserState [Char]
+defRawBlock :: Parser [Char] ParserState String
defRawBlock = try $ do
defListMarker
firstline <- anyLine
@@ -647,58 +657,63 @@ defRawBlock = try $ do
return $ unlines lns ++ trl
return $ firstline ++ "\n" ++ unlines rawlines ++ trailing ++ cont
-definitionList :: Parser [Char] ParserState Block
+definitionList :: Parser [Char] ParserState (F Blocks)
definitionList = do
- items <- many1 definitionListItem
- -- "compactify" the definition list:
- let defs = map snd items
- let defBlocks = reverse $ concat $ concat defs
- let isPara (Para _) = True
+ items <- fmap sequence $ many1 definitionListItem
+ return $ B.definitionList <$> fmap compactifyDL items
+
+compactifyDL :: [(Inlines, [Blocks])] -> [(Inlines, [Blocks])]
+compactifyDL items =
+ let defs = concatMap snd items
+ defBlocks = reverse $ concatMap B.toList defs
+ isPara (Para _) = True
isPara _ = False
- let items' = case take 1 defBlocks of
- [Para x] -> if not $ any isPara (drop 1 defBlocks)
- then let (t,ds) = last items
- lastDef = last ds
- ds' = init ds ++
- [init lastDef ++ [Plain x]]
- in init items ++ [(t, ds')]
- else items
- _ -> items
- return $ DefinitionList items'
+ in case defBlocks of
+ (Para x:_) -> if not $ any isPara (drop 1 defBlocks)
+ then let (t,ds) = last items
+ lastDef = B.toList $ last ds
+ ds' = init ds ++
+ [B.fromList $ init lastDef ++ [Plain x]]
+ in init items ++ [(t, ds')]
+ else items
+ _ -> items
--
-- paragraph block
--
+{-
isHtmlOrBlank :: Inline -> Bool
isHtmlOrBlank (RawInline "html" _) = True
isHtmlOrBlank (Space) = True
isHtmlOrBlank (LineBreak) = True
isHtmlOrBlank _ = False
+-}
-para :: Parser [Char] ParserState Block
+para :: Parser [Char] ParserState (F Blocks)
para = try $ do
- result <- liftM normalizeSpaces $ many1 inline
- guard $ not . all isHtmlOrBlank $ result
- option (Plain result) $ try $ do
+ result <- trimInlinesF . mconcat <$> many1 inline
+ -- TODO remove this if not really needed? and remove isHtmlOrBlank
+ -- guard $ not $ F.all isHtmlOrBlank result
+ option (B.plain <$> result) $ try $ do
newline
- (blanklines >> return Null)
+ (blanklines >> return mempty)
<|> (guardDisabled Ext_blank_before_blockquote >> lookAhead blockQuote)
<|> (guardDisabled Ext_blank_before_header >> lookAhead header)
- return $ Para result
+ return $ B.para <$> result
-plain :: Parser [Char] ParserState Block
-plain = many1 inline >>~ spaces >>= return . Plain . normalizeSpaces
+plain :: Parser [Char] ParserState (F Blocks)
+plain = fmap B.plain . trimInlinesF . mconcat <$> many1 inline <* spaces
--
-- raw html
--
-htmlElement :: Parser [Char] ParserState [Char]
+htmlElement :: Parser [Char] ParserState String
htmlElement = strictHtmlBlock <|> liftM snd (htmlTag isBlockTag)
-htmlBlock :: Parser [Char] ParserState Block
-htmlBlock = RawBlock "html" `fmap`
+htmlBlock :: Parser [Char] ParserState (F Blocks)
+htmlBlock = return . B.rawBlock "html" <$>
((guardEnabled Ext_markdown_in_html_blocks >> rawHtmlBlocks)
<|> htmlBlock')
@@ -709,7 +724,7 @@ htmlBlock' = try $ do
finalNewlines <- many newline
return $ first ++ finalSpace ++ finalNewlines
-strictHtmlBlock :: Parser [Char] ParserState [Char]
+strictHtmlBlock :: Parser [Char] ParserState String
strictHtmlBlock = htmlInBalanced (not . isInlineTag)
rawVerbatimBlock :: Parser [Char] ParserState String
@@ -720,13 +735,13 @@ rawVerbatimBlock = try $ do
contents <- manyTill anyChar (htmlTag (~== TagClose tag))
return $ open ++ contents ++ renderTags [TagClose tag]
-rawTeXBlock :: Parser [Char] ParserState Block
+rawTeXBlock :: Parser [Char] ParserState (F Blocks)
rawTeXBlock = do
guardEnabled Ext_raw_tex
- result <- liftM (RawBlock "latex") rawLaTeXBlock
- <|> liftM (RawBlock "context") rawConTeXtEnvironment
+ result <- (B.rawBlock "latex" <$> rawLaTeXBlock)
+ <|> (B.rawBlock "context" <$> rawConTeXtEnvironment)
spaces
- return result
+ return $ return result
rawHtmlBlocks :: Parser [Char] ParserState String
rawHtmlBlocks = do
@@ -760,7 +775,7 @@ dashedLine ch = do
-- Parse a table header with dashed lines of '-' preceded by
-- one (or zero) line of text.
simpleTableHeader :: Bool -- ^ Headerless table
- -> Parser [Char] ParserState ([[Block]], [Alignment], [Int])
+ -> Parser [Char] ParserState (F [Blocks], [Alignment], [Int])
simpleTableHeader headless = try $ do
rawContent <- if headless
then return ""
@@ -779,12 +794,32 @@ simpleTableHeader headless = try $ do
let rawHeads' = if headless
then replicate (length dashes) ""
else rawHeads
- heads <- mapM (parseFromString (many plain)) $
- map removeLeadingTrailingSpace rawHeads'
+ heads <- fmap sequence
+ $ mapM (parseFromString (mconcat <$> many plain))
+ $ map removeLeadingTrailingSpace rawHeads'
return (heads, aligns, indices)
+-- Returns an alignment type for a table, based on a list of strings
+-- (the rows of the column header) and a number (the length of the
+-- dashed line under the rows.
+alignType :: [String]
+ -> Int
+ -> Alignment
+alignType [] _ = AlignDefault
+alignType strLst len =
+ let nonempties = filter (not . null) $ map removeTrailingSpace strLst
+ (leftSpace, rightSpace) =
+ case sortBy (comparing length) nonempties of
+ (x:_) -> (head x `elem` " \t", length x < len)
+ [] -> (False, False)
+ in case (leftSpace, rightSpace) of
+ (True, False) -> AlignRight
+ (False, True) -> AlignLeft
+ (True, True) -> AlignCenter
+ (False, False) -> AlignDefault
+
-- Parse a table footer - dashed lines followed by blank line.
-tableFooter :: Parser [Char] ParserState [Char]
+tableFooter :: Parser [Char] ParserState String
tableFooter = try $ skipNonindentSpaces >> many1 (dashedLine '-') >> blanklines
-- Parse a table separator - dashed line.
@@ -802,49 +837,49 @@ rawTableLine indices = do
-- Parse a table line and return a list of lists of blocks (columns).
tableLine :: [Int]
- -> Parser [Char] ParserState [[Block]]
-tableLine indices = rawTableLine indices >>= mapM (parseFromString (many plain))
+ -> Parser [Char] ParserState (F [Blocks])
+tableLine indices = rawTableLine indices >>=
+ fmap sequence . mapM (parseFromString (mconcat <$> many plain))
-- Parse a multiline table row and return a list of blocks (columns).
multilineRow :: [Int]
- -> Parser [Char] ParserState [[Block]]
+ -> Parser [Char] ParserState (F [Blocks])
multilineRow indices = do
colLines <- many1 (rawTableLine indices)
let cols = map unlines $ transpose colLines
- mapM (parseFromString (many plain)) cols
+ fmap sequence $ mapM (parseFromString (mconcat <$> many plain)) cols
-- Parses a table caption: inlines beginning with 'Table:'
-- and followed by blank lines.
-tableCaption :: Parser [Char] ParserState [Inline]
+tableCaption :: Parser [Char] ParserState (F Inlines)
tableCaption = try $ do
guardEnabled Ext_table_captions
skipNonindentSpaces
string ":" <|> string "Table:"
- result <- many1 inline
- blanklines
- return $ normalizeSpaces result
+ trimInlinesF . mconcat <$> many1 inline <* blanklines
-- Parse a simple table with '---' header and one line per row.
simpleTable :: Bool -- ^ Headerless table
- -> Parser [Char] ParserState Block
+ -> Parser [Char] ParserState ([Alignment], [Double], F [Blocks], F [[Blocks]])
simpleTable headless = do
- Table c a _w h l <- tableWith (simpleTableHeader headless) tableLine
+ (aligns, _widths, heads', lines') <-
+ tableWith (simpleTableHeader headless) tableLine
(return ())
(if headless then tableFooter else tableFooter <|> blanklines)
-- Simple tables get 0s for relative column widths (i.e., use default)
- return $ Table c a (replicate (length a) 0) h l
+ return (aligns, replicate (length aligns) 0, heads', lines')
-- Parse a multiline table: starts with row of '-' on top, then header
-- (which may be multiline), then the rows,
-- which may be multiline, separated by blank lines, and
-- ending with a footer (dashed line followed by blank line).
multilineTable :: Bool -- ^ Headerless table
- -> Parser [Char] ParserState Block
+ -> Parser [Char] ParserState ([Alignment], [Double], F [Blocks], F [[Blocks]])
multilineTable headless =
tableWith (multilineTableHeader headless) multilineRow blanklines tableFooter
multilineTableHeader :: Bool -- ^ Headerless table
- -> Parser [Char] ParserState ([[Block]], [Alignment], [Int])
+ -> Parser [Char] ParserState (F [Blocks], [Alignment], [Int])
multilineTableHeader headless = try $ do
if headless
then return '\n'
@@ -868,70 +903,142 @@ multilineTableHeader headless = try $ do
let rawHeads = if headless
then replicate (length dashes) ""
else map (intercalate " ") rawHeadsList
- heads <- mapM (parseFromString (many plain)) $
+ heads <- fmap sequence $
+ mapM (parseFromString (mconcat <$> many plain)) $
map removeLeadingTrailingSpace rawHeads
return (heads, aligns, indices)
--- Returns an alignment type for a table, based on a list of strings
--- (the rows of the column header) and a number (the length of the
--- dashed line under the rows.
-alignType :: [String]
- -> Int
- -> Alignment
-alignType [] _ = AlignDefault
-alignType strLst len =
- let nonempties = filter (not . null) $ map removeTrailingSpace strLst
- (leftSpace, rightSpace) =
- case sortBy (comparing length) nonempties of
- (x:_) -> (head x `elem` " \t", length x < len)
- [] -> (False, False)
- in case (leftSpace, rightSpace) of
- (True, False) -> AlignRight
- (False, True) -> AlignLeft
- (True, True) -> AlignCenter
- (False, False) -> AlignDefault
-
+-- Parse a grid table: starts with row of '-' on top, then header
+-- (which may be grid), then the rows,
+-- which may be grid, separated by blank lines, and
+-- ending with a footer (dashed line followed by blank line).
gridTable :: Bool -- ^ Headerless table
- -> Parser [Char] ParserState Block
-gridTable = gridTableWith block
+ -> Parser [Char] ParserState ([Alignment], [Double], F [Blocks], F [[Blocks]])
+gridTable headless =
+ tableWith (gridTableHeader headless) gridTableRow
+ (gridTableSep '-') gridTableFooter
+
+gridTableSplitLine :: [Int] -> String -> [String]
+gridTableSplitLine indices line = map removeFinalBar $ tail $
+ splitStringByIndices (init indices) $ removeTrailingSpace line
+
+gridPart :: Char -> Parser [Char] st (Int, Int)
+gridPart ch = do
+ dashes <- many1 (char ch)
+ char '+'
+ return (length dashes, length dashes + 1)
+
+gridDashedLines :: Char -> Parser [Char] st [(Int,Int)]
+gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) >>~ blankline
+
+removeFinalBar :: String -> String
+removeFinalBar =
+ reverse . dropWhile (`elem` " \t") . dropWhile (=='|') . reverse
+
+-- | Separator between rows of grid table.
+gridTableSep :: Char -> Parser [Char] ParserState Char
+gridTableSep ch = try $ gridDashedLines ch >> return '\n'
+
+-- | Parse header for a grid table.
+gridTableHeader :: Bool -- ^ Headerless table
+ -> Parser [Char] ParserState (F [Blocks], [Alignment], [Int])
+gridTableHeader headless = try $ do
+ optional blanklines
+ dashes <- gridDashedLines '-'
+ rawContent <- if headless
+ then return $ repeat ""
+ else many1
+ (notFollowedBy (gridTableSep '=') >> char '|' >>
+ many1Till anyChar newline)
+ if headless
+ then return ()
+ else gridTableSep '=' >> return ()
+ let lines' = map snd dashes
+ let indices = scanl (+) 0 lines'
+ let aligns = replicate (length lines') AlignDefault
+ -- RST does not have a notion of alignments
+ let rawHeads = if headless
+ then replicate (length dashes) ""
+ else map (intercalate " ") $ transpose
+ $ map (gridTableSplitLine indices) rawContent
+ heads <- fmap sequence $ mapM (parseFromString block) $
+ map removeLeadingTrailingSpace rawHeads
+ return (heads, aligns, indices)
+
+gridTableRawLine :: [Int] -> Parser [Char] ParserState [String]
+gridTableRawLine indices = do
+ char '|'
+ line <- many1Till anyChar newline
+ return (gridTableSplitLine indices line)
+
+-- | Parse row of grid table.
+gridTableRow :: [Int]
+ -> Parser [Char] ParserState (F [Blocks])
+gridTableRow indices = do
+ colLines <- many1 (gridTableRawLine indices)
+ let cols = map ((++ "\n") . unlines . removeOneLeadingSpace) $
+ transpose colLines
+ fmap compactify <$> fmap sequence (mapM (parseFromString block) cols)
+
+removeOneLeadingSpace :: [String] -> [String]
+removeOneLeadingSpace xs =
+ if all startsWithSpace xs
+ then map (drop 1) xs
+ else xs
+ where startsWithSpace "" = True
+ startsWithSpace (y:_) = y == ' '
+
+-- | Parse footer for a grid table.
+gridTableFooter :: Parser [Char] ParserState [Char]
+gridTableFooter = blanklines
pipeTable :: Bool -- ^ Headerless table
- -> Parser [Char] ParserState Block
-pipeTable headless = tableWith (pipeTableHeader headless)
- (\_ -> pipeTableRow) (return ()) blanklines
+ -> Parser [Char] ParserState ([Alignment], [Double], F [Blocks], F [[Blocks]])
+pipeTable headless =
+ tableWith (pipeTableHeader headless)
+ (\_ -> pipeTableRow) (return ()) blanklines
-- | Parse header for an pipe table.
pipeTableHeader :: Bool -- ^ Headerless table
- -> Parser [Char] ParserState ([[Block]], [Alignment], [Int])
+ -> Parser [Char] ParserState (F [Blocks], [Alignment], [Int])
pipeTableHeader headless = do
try $ do
heads <- if headless
- then return $ repeat []
+ then return $ return $ repeat mempty
else pipeTableRow
aligns <- nonindentSpaces >> optional (char '|') >>
pipeTableHeaderPart `sepBy1` sepPipe
optional (char '|')
newline
let cols = length aligns
- return (take cols heads, aligns, [])
+ let heads' = if headless
+ then return (replicate cols mempty)
+ else heads
+ return (heads', aligns, [])
sepPipe :: Parser [Char] ParserState ()
sepPipe = try $ char '|' >> notFollowedBy blankline
-pipeTableRow :: Parser [Char] ParserState [[Block]]
+pipeTableRow :: Parser [Char] ParserState (F [Blocks])
pipeTableRow = do
nonindentSpaces
optional (char '|')
- let cell = many (notFollowedBy (blankline <|> char '|') >> inline)
+ let cell = mconcat <$>
+ many (notFollowedBy (blankline <|> char '|') >> inline)
first <- cell
sepPipe
rest <- cell `sepBy1` sepPipe
optional (char '|')
blankline
- return $ map (\ils ->
- if null ils
- then []
- else [Plain $ normalizeSpaces ils]) (first:rest)
+ let cells = sequence (first:rest)
+ return $ do
+ cells' <- cells
+ return $ map
+ (\ils ->
+ case trimInlines ils of
+ -- TODO leaky abstraction:
+ ils' | Seq.null (unInlines ils') -> mempty
+ | otherwise -> B.plain $ ils') cells'
pipeTableHeaderPart :: Parser [Char] st Alignment
pipeTableHeaderPart = do
@@ -949,33 +1056,54 @@ pipeTableHeaderPart = do
scanForPipe :: Parser [Char] st ()
scanForPipe = lookAhead (manyTill (satisfy (/='\n')) (char '|')) >> return ()
-table :: Parser [Char] ParserState Block
+-- | Parse a table using 'headerParser', 'rowParser',
+-- 'lineParser', and 'footerParser'. Variant of the version in
+-- Text.Pandoc.Parsing.
+tableWith :: Parser [Char] ParserState (F [Blocks], [Alignment], [Int])
+ -> ([Int] -> Parser [Char] ParserState (F [Blocks]))
+ -> Parser [Char] ParserState sep
+ -> Parser [Char] ParserState end
+ -> Parser [Char] ParserState ([Alignment], [Double], F [Blocks], F [[Blocks]])
+tableWith headerParser rowParser lineParser footerParser = try $ do
+ (heads, aligns, indices) <- headerParser
+ lines' <- fmap sequence $ rowParser indices `sepEndBy1` lineParser
+ footerParser
+ numColumns <- getOption readerColumns
+ let widths = if (indices == [])
+ then replicate (length aligns) 0.0
+ else widthsFromIndices numColumns indices
+ return $ (aligns, widths, heads, lines')
+
+table :: Parser [Char] ParserState (F Blocks)
table = try $ do
- frontCaption <- option [] tableCaption
- Table _ aligns widths heads lines' <-
- try (guardEnabled Ext_pipe_tables >> scanForPipe >>
- (pipeTable True <|> pipeTable False)) <|>
- try (guardEnabled Ext_multiline_tables >>
- (multilineTable False <|> simpleTable True)) <|>
- try (guardEnabled Ext_simple_tables >>
- (simpleTable False <|> multilineTable True)) <|>
- try (guardEnabled Ext_grid_tables >>
+ frontCaption <- option Nothing (Just <$> tableCaption)
+ (aligns, widths, heads, lns) <-
+ try (guardEnabled Ext_pipe_tables >> scanForPipe >>
+ (pipeTable True <|> pipeTable False)) <|>
+ try (guardEnabled Ext_multiline_tables >>
+ multilineTable False) <|>
+ try (guardEnabled Ext_simple_tables >>
+ (simpleTable True <|> simpleTable False)) <|>
+ try (guardEnabled Ext_multiline_tables >>
+ multilineTable True) <|>
+ try (guardEnabled Ext_grid_tables >>
(gridTable False <|> gridTable True)) <?> "table"
optional blanklines
- caption <- if null frontCaption
- then option [] tableCaption
- else return frontCaption
- return $ Table caption aligns widths heads lines'
+ caption <- case frontCaption of
+ Nothing -> option (return mempty) tableCaption
+ Just c -> return c
+ return $ do
+ caption' <- caption
+ heads' <- heads
+ lns' <- lns
+ return $ B.table caption' (zip aligns widths) heads' lns'
--
-- inline
--
-inline :: Parser [Char] ParserState Inline
-inline = choice inlineParsers <?> "inline"
-
-inlineParsers :: [Parser [Char] ParserState Inline]
-inlineParsers = [ whitespace
+inline :: Parser [Char] ParserState (F Inlines)
+inline = choice [ whitespace
, str
, endline
, code
@@ -983,8 +1111,8 @@ inlineParsers = [ whitespace
, strong
, emph
, note
- , link
, cite
+ , link
, image
, math
, strikeout
@@ -996,10 +1124,11 @@ inlineParsers = [ whitespace
, escapedChar
, rawLaTeXInline'
, exampleRef
- , smartPunctuation inline
- , charRef
+ , smart
+ , return . B.singleton <$> charRef
, symbol
- , ltSign ]
+ , ltSign
+ ] <?> "inline"
escapedChar' :: Parser [Char] ParserState Char
escapedChar' = try $ do
@@ -1007,41 +1136,43 @@ escapedChar' = try $ do
(guardEnabled Ext_all_symbols_escapable >> satisfy (not . isAlphaNum))
<|> oneOf "\\`*_{}[]()>#+-.!~"
-escapedChar :: Parser [Char] ParserState Inline
+escapedChar :: Parser [Char] ParserState (F Inlines)
escapedChar = do
result <- escapedChar'
case result of
- ' ' -> return $ Str "\160" -- "\ " is a nonbreaking space
+ ' ' -> return $ return $ B.str "\160" -- "\ " is a nonbreaking space
'\n' -> guardEnabled Ext_escaped_line_breaks >>
- return LineBreak -- "\[newline]" is a linebreak
- _ -> return $ Str [result]
+ return (return B.linebreak) -- "\[newline]" is a linebreak
+ _ -> return $ return $ B.str [result]
-ltSign :: Parser [Char] ParserState Inline
+ltSign :: Parser [Char] ParserState (F Inlines)
ltSign = do
guardDisabled Ext_markdown_in_html_blocks
<|> (notFollowedBy' rawHtmlBlocks >> return ())
char '<'
- return $ Str ['<']
+ return $ return $ B.str "<"
-exampleRef :: Parser [Char] ParserState Inline
+exampleRef :: Parser [Char] ParserState (F Inlines)
exampleRef = try $ do
guardEnabled Ext_example_lists
char '@'
lab <- many1 (alphaNum <|> oneOf "-_")
- -- We just return a Str. These are replaced with numbers
- -- later. See the end of parseMarkdown.
- return $ Str $ '@' : lab
+ return $ do
+ st <- ask
+ return $ case M.lookup lab (stateExamples st) of
+ Just n -> B.str (show n)
+ Nothing -> B.str ('@':lab)
-symbol :: Parser [Char] ParserState Inline
+symbol :: Parser [Char] ParserState (F Inlines)
symbol = do
result <- noneOf "<\\\n\t "
<|> try (do lookAhead $ char '\\'
- notFollowedBy' rawTeXBlock
+ notFollowedBy' (() <$ rawTeXBlock)
char '\\')
- return $ Str [result]
+ return $ return $ B.str [result]
-- parses inline code, between n `s and n `s
-code :: Parser [Char] ParserState Inline
+code :: Parser [Char] ParserState (F Inlines)
code = try $ do
starts <- many1 (char '`')
skipSpaces
@@ -1051,20 +1182,20 @@ code = try $ do
notFollowedBy (char '`')))
attr <- option ([],[],[]) (try $ guardEnabled Ext_inline_code_attributes >>
optional whitespace >> attributes)
- return $ Code attr $ removeLeadingTrailingSpace $ concat result
+ return $ return $ B.codeWith attr $ removeLeadingTrailingSpace $ concat result
-mathWord :: Parser [Char] st [Char]
+mathWord :: Parser [Char] st String
mathWord = liftM concat $ many1 mathChunk
-mathChunk :: Parser [Char] st [Char]
+mathChunk :: Parser [Char] st String
mathChunk = do char '\\'
c <- anyChar
return ['\\',c]
<|> many1 (satisfy $ \c -> not (isBlank c || c == '\\' || c == '$'))
-math :: Parser [Char] ParserState Inline
-math = (mathDisplay >>= applyMacros' >>= return . Math DisplayMath)
- <|> (mathInline >>= applyMacros' >>= return . Math InlineMath)
+math :: Parser [Char] ParserState (F Inlines)
+math = (return . B.displayMath <$> (mathDisplay >>= applyMacros'))
+ <|> (return . B.math <$> (mathInline >>= applyMacros'))
mathDisplay :: Parser [Char] ParserState String
mathDisplay = try $ do
@@ -1084,21 +1215,21 @@ mathInline = try $ do
-- to avoid performance problems, treat 4 or more _ or * or ~ or ^ in a row
-- as a literal rather than attempting to parse for emph/strong/strikeout/super/sub
-fours :: Parser [Char] st Inline
+fours :: Parser [Char] st (F Inlines)
fours = try $ do
x <- char '*' <|> char '_' <|> char '~' <|> char '^'
count 2 $ satisfy (==x)
rest <- many1 (satisfy (==x))
- return $ Str (x:x:x:rest)
+ return $ return $ B.str (x:x:x:rest)
-- | Parses a list of inlines between start and end delimiters.
inlinesBetween :: (Show b)
=> Parser [Char] ParserState a
-> Parser [Char] ParserState b
- -> Parser [Char] ParserState [Inline]
+ -> Parser [Char] ParserState (F Inlines)
inlinesBetween start end =
- normalizeSpaces `liftM` try (start >> many1Till inner end)
- where inner = innerSpace <|> (notFollowedBy' whitespace >> inline)
+ (trimInlinesF . mconcat) <$> try (start >> many1Till inner end)
+ where inner = innerSpace <|> (notFollowedBy' (() <$ whitespace) >> inline)
innerSpace = try $ whitespace >>~ notFollowedBy' end
-- This is used to prevent exponential blowups for things like:
@@ -1113,55 +1244,57 @@ nested p = do
updateState $ \st -> st{ stateMaxNestingLevel = nestlevel }
return res
-emph :: Parser [Char] ParserState Inline
-emph = Emph `fmap` nested
+emph :: Parser [Char] ParserState (F Inlines)
+emph = fmap B.emph <$> nested
(inlinesBetween starStart starEnd <|> inlinesBetween ulStart ulEnd)
where starStart = char '*' >> lookAhead nonspaceChar
- starEnd = notFollowedBy' strong >> char '*'
+ starEnd = notFollowedBy' (() <$ strong) >> char '*'
ulStart = char '_' >> lookAhead nonspaceChar
- ulEnd = notFollowedBy' strong >> char '_'
+ ulEnd = notFollowedBy' (() <$ strong) >> char '_'
-strong :: Parser [Char] ParserState Inline
-strong = Strong `liftM` nested
+strong :: Parser [Char] ParserState (F Inlines)
+strong = fmap B.strong <$> nested
(inlinesBetween starStart starEnd <|> inlinesBetween ulStart ulEnd)
where starStart = string "**" >> lookAhead nonspaceChar
starEnd = try $ string "**"
ulStart = string "__" >> lookAhead nonspaceChar
ulEnd = try $ string "__"
-strikeout :: Parser [Char] ParserState Inline
-strikeout = Strikeout `liftM`
+strikeout :: Parser [Char] ParserState (F Inlines)
+strikeout = fmap B.strikeout <$>
(guardEnabled Ext_strikeout >> inlinesBetween strikeStart strikeEnd)
where strikeStart = string "~~" >> lookAhead nonspaceChar
>> notFollowedBy (char '~')
strikeEnd = try $ string "~~"
-superscript :: Parser [Char] ParserState Inline
-superscript = guardEnabled Ext_superscript >> enclosed (char '^') (char '^')
- (notFollowedBy spaceChar >> inline) >>= -- may not contain Space
- return . Superscript
+superscript :: Parser [Char] ParserState (F Inlines)
+superscript = fmap B.superscript <$> try (do
+ guardEnabled Ext_superscript
+ char '^'
+ mconcat <$> many1Till (notFollowedBy spaceChar >> inline) (char '^'))
-subscript :: Parser [Char] ParserState Inline
-subscript = guardEnabled Ext_subscript >> enclosed (char '~') (char '~')
- (notFollowedBy spaceChar >> inline) >>= -- may not contain Space
- return . Subscript
+subscript :: Parser [Char] ParserState (F Inlines)
+subscript = fmap B.subscript <$> try (do
+ guardEnabled Ext_subscript
+ char '~'
+ mconcat <$> many1Till (notFollowedBy spaceChar >> inline) (char '~'))
-whitespace :: Parser [Char] ParserState Inline
-whitespace = spaceChar >>
- ( (spaceChar >> skipMany spaceChar >> option Space (endline >> return LineBreak))
- <|> (skipMany spaceChar >> return Space) ) <?> "whitespace"
+whitespace :: Parser [Char] ParserState (F Inlines)
+whitespace = spaceChar >> return <$> (lb <|> regsp) <?> "whitespace"
+ where lb = spaceChar >> skipMany spaceChar >> option B.space (endline >> return B.linebreak)
+ regsp = skipMany spaceChar >> return B.space
nonEndline :: Parser [Char] st Char
nonEndline = satisfy (/='\n')
-str :: Parser [Char] ParserState Inline
+str :: Parser [Char] ParserState (F Inlines)
str = do
- smart <- (readerSmart . stateOptions) `fmap` getState
+ isSmart <- readerSmart . stateOptions <$> getState
a <- alphaNum
as <- many $ alphaNum
<|> (guardEnabled Ext_intraword_underscores >>
try (char '_' >>~ lookAhead alphaNum))
- <|> if smart
+ <|> if isSmart
then (try $ satisfy (\c -> c == '\'' || c == '\x2019') >>
lookAhead alphaNum >> return '\x2019')
-- for things like l'aide
@@ -1170,15 +1303,16 @@ str = do
updateState $ \s -> s{ stateLastStrPos = Just pos }
let result = a:as
let spacesToNbr = map (\c -> if c == ' ' then '\160' else c)
- if smart
+ if isSmart
then case likelyAbbrev result of
- [] -> return $ Str result
+ [] -> return $ return $ B.str result
xs -> choice (map (\x ->
try (string x >> oneOf " \n" >>
lookAhead alphaNum >>
- return (Str $ result ++ spacesToNbr x ++ "\160"))) xs)
- <|> (return $ Str result)
- else return $ Str result
+ return (return $ B.str
+ $ result ++ spacesToNbr x ++ "\160"))) xs)
+ <|> (return $ return $ B.str result)
+ else return $ return $ B.str result
-- | if the string matches the beginning of an abbreviation (before
-- the first period, return strings that would finish the abbreviation.
@@ -1193,7 +1327,7 @@ likelyAbbrev x =
in map snd $ filter (\(y,_) -> y == x) abbrPairs
-- an endline character that can be treated as a space, not a structural break
-endline :: Parser [Char] ParserState Inline
+endline :: Parser [Char] ParserState (F Inlines)
endline = try $ do
newline
notFollowedBy blankline
@@ -1204,27 +1338,26 @@ endline = try $ do
when (stateParserContext st == ListItemState) $ do
notFollowedBy' bulletListStart
notFollowedBy' anyOrderedListStart
- return Space
+ return $ return B.space
--
-- links
--
-- a reference label for a link
-reference :: Parser [Char] ParserState [Inline]
+reference :: Parser [Char] ParserState (F Inlines, String)
reference = do notFollowedBy' (string "[^") -- footnote reference
- result <- inlinesInBalancedBrackets inline
- return $ normalizeSpaces result
+ withRaw $ trimInlinesF <$> inlinesInBalancedBrackets
-- source for a link, with optional title
-source :: Parser [Char] ParserState (String, [Char])
+source :: Parser [Char] ParserState (String, String)
source =
(try $ charsInBalanced '(' ')' litChar >>= parseFromString source') <|>
-- the following is needed for cases like: [ref](/url(a).
(enclosed (char '(') (char ')') litChar >>= parseFromString source')
-- auxiliary function for source
-source' :: Parser [Char] ParserState (String, [Char])
+source' :: Parser [Char] ParserState (String, String)
source' = do
skipSpaces
let nl = char '\n' >>~ notFollowedBy blankline
@@ -1250,75 +1383,86 @@ linkTitle = try $ do
tit <- manyTill litChar (try (char delim >> skipSpaces >> eof))
return $ fromEntities tit
-link :: Parser [Char] ParserState Inline
+link :: Parser [Char] ParserState (F Inlines)
link = try $ do
- lab <- reference
- (src, tit) <- source <|> referenceLink lab
- return $ Link (delinkify lab) (src, tit)
-
-delinkify :: [Inline] -> [Inline]
-delinkify = bottomUp $ concatMap go
- where go (Link lab _) = lab
- go x = [x]
+ st <- getState
+ guard $ stateAllowLinks st
+ setState $ st{ stateAllowLinks = False }
+ (lab,raw) <- reference
+ setState $ st{ stateAllowLinks = True }
+ regLink B.link lab <|> referenceLink B.link (lab,raw)
+
+regLink :: (String -> String -> Inlines -> Inlines)
+ -> F Inlines -> Parser [Char] ParserState (F Inlines)
+regLink constructor lab = try $ do
+ (src, tit) <- source
+ return $ constructor src tit <$> lab
-- a link like [this][ref] or [this][] or [this]
-referenceLink :: [Inline]
- -> Parser [Char] ParserState (String, [Char])
-referenceLink lab = do
- ref <- option [] (try (optional (char ' ') >>
- optional (newline >> skipSpaces) >> reference))
- let ref' = if null ref then lab else ref
- state <- getState
- case lookupKeySrc (stateKeys state) (toKey ref') of
- Nothing -> fail "no corresponding key"
- Just target -> return target
-
-autoLink :: Parser [Char] ParserState Inline
+referenceLink :: (String -> String -> Inlines -> Inlines)
+ -> (F Inlines, String) -> Parser [Char] ParserState (F Inlines)
+referenceLink constructor (lab, raw) = do
+ raw' <- try (optional (char ' ') >>
+ optional (newline >> skipSpaces) >>
+ (snd <$> reference)) <|> return ""
+ let key = toKey' $ if raw' == "[]" || raw' == "" then raw else raw'
+ let dropRB (']':xs) = xs
+ dropRB xs = xs
+ let dropLB ('[':xs) = xs
+ dropLB xs = xs
+ let dropBrackets = reverse . dropRB . reverse . dropLB
+ fallback <- parseFromString (mconcat <$> many inline) $ dropBrackets raw
+ return $ do
+ keys <- asks stateKeys'
+ case M.lookup key keys of
+ Nothing -> (\x -> B.str "[" <> x <> B.str "]" <> B.str raw') <$> fallback
+ Just (src,tit) -> constructor src tit <$> lab
+
+autoLink :: Parser [Char] ParserState (F Inlines)
autoLink = try $ do
char '<'
(orig, src) <- uri <|> emailAddress
char '>'
(guardEnabled Ext_autolink_code_spans >>
- return (Link [Code ("",["url"],[]) orig] (src, "")))
- <|> return (Link [Str orig] (src, ""))
+ return (return $ B.link src "" (B.codeWith ("",["url"],[]) orig)))
+ <|> return (return $ B.link src "" (B.str orig))
-image :: Parser [Char] ParserState Inline
+image :: Parser [Char] ParserState (F Inlines)
image = try $ do
char '!'
- lab <- reference
- (src, tit) <- source <|> referenceLink lab
- return $ Image lab (src,tit)
+ (lab,raw) <- reference
+ regLink B.image lab <|> referenceLink B.image (lab,raw)
-note :: Parser [Char] ParserState Inline
+note :: Parser [Char] ParserState (F Inlines)
note = try $ do
guardEnabled Ext_footnotes
ref <- noteMarker
- state <- getState
- let notes = stateNotes state
- case lookup ref notes of
- Nothing -> fail "note not found"
- Just raw -> do
- -- We temporarily empty the note list while parsing the note,
- -- so that we don't get infinite loops with notes inside notes...
- -- Note references inside other notes do not work.
- updateState $ \st -> st{ stateNotes = [] }
- contents <- parseFromString parseBlocks raw
- updateState $ \st -> st{ stateNotes = notes }
- return $ Note contents
-
-inlineNote :: Parser [Char] ParserState Inline
+ return $ do
+ notes <- asks stateNotes'
+ case lookup ref notes of
+ Nothing -> return $ B.str $ "[^" ++ ref ++ "]"
+ Just contents -> do
+ st <- ask
+ -- process the note in a context that doesn't resolve
+ -- notes, to avoid infinite looping with notes inside
+ -- notes:
+ let contents' = runReader contents st{ stateNotes' = [] }
+ return $ B.note contents'
+
+inlineNote :: Parser [Char] ParserState (F Inlines)
inlineNote = try $ do
guardEnabled Ext_inline_notes
char '^'
- contents <- inlinesInBalancedBrackets inline
- return $ Note [Para contents]
+ contents <- inlinesInBalancedBrackets
+ return $ B.note . B.para <$> contents
-rawLaTeXInline' :: Parser [Char] ParserState Inline
+rawLaTeXInline' :: Parser [Char] ParserState (F Inlines)
rawLaTeXInline' = try $ do
guardEnabled Ext_raw_tex
lookAhead $ char '\\' >> notFollowedBy' (string "start") -- context env
RawInline _ s <- rawLaTeXInline
- return $ RawInline "tex" s -- "tex" because it might be context or latex
+ return $ return $ B.rawInline "tex" s
+ -- "tex" because it might be context or latex
rawConTeXtEnvironment :: Parser [Char] st String
rawConTeXtEnvironment = try $ do
@@ -1336,31 +1480,25 @@ inBrackets parser = do
char ']'
return $ "[" ++ contents ++ "]"
-rawHtmlInline :: Parser [Char] ParserState Inline
+rawHtmlInline :: Parser [Char] ParserState (F Inlines)
rawHtmlInline = do
mdInHtml <- option False $
guardEnabled Ext_markdown_in_html_blocks >> return True
(_,result) <- if mdInHtml
then htmlTag isInlineTag
else htmlTag (not . isTextTag)
- return $ RawInline "html" result
+ return $ return $ B.rawInline "html" result
-- Citations
-cite :: Parser [Char] ParserState Inline
+cite :: Parser [Char] ParserState (F Inlines)
cite = do
guardEnabled Ext_citations
+ getOption readerCitations >>= guard . not . null
citations <- textualCite <|> normalCite
- return $ Cite citations []
-
-spnl :: Parser [Char] st ()
-spnl = try $ do
- skipSpaces
- optional newline
- skipSpaces
- notFollowedBy (char '\n')
+ return $ flip B.cite mempty <$> citations
-textualCite :: Parser [Char] ParserState [Citation]
+textualCite :: Parser [Char] ParserState (F [Citation])
textualCite = try $ do
(_, key) <- citeKey
let first = Citation{ citationId = key
@@ -1370,22 +1508,25 @@ textualCite = try $ do
, citationNoteNum = 0
, citationHash = 0
}
- rest <- option [] $ try $ spnl >> normalCite
- if null rest
- then option [first] $ bareloc first
- else return $ first : rest
+ mbrest <- option Nothing $ try $ spnl >> Just <$> normalCite
+ case mbrest of
+ Just rest -> return $ (first:) <$> rest
+ Nothing -> option (return [first]) $ bareloc first
-bareloc :: Citation -> Parser [Char] ParserState [Citation]
+bareloc :: Citation -> Parser [Char] ParserState (F [Citation])
bareloc c = try $ do
spnl
char '['
suff <- suffix
- rest <- option [] $ try $ char ';' >> citeList
+ rest <- option (return []) $ try $ char ';' >> citeList
spnl
char ']'
- return $ c{ citationSuffix = suff } : rest
+ return $ do
+ suff' <- suff
+ rest' <- rest
+ return $ c{ citationSuffix = B.toList suff' } : rest'
-normalCite :: Parser [Char] ParserState [Citation]
+normalCite :: Parser [Char] ParserState (F [Citation])
normalCite = try $ do
char '['
spnl
@@ -1406,30 +1547,33 @@ citeKey = try $ do
guard $ key `elem` citations'
return (suppress_author, key)
-suffix :: Parser [Char] ParserState [Inline]
+suffix :: Parser [Char] ParserState (F Inlines)
suffix = try $ do
hasSpace <- option False (notFollowedBy nonspaceChar >> return True)
spnl
- rest <- liftM normalizeSpaces $ many $ notFollowedBy (oneOf ";]") >> inline
+ rest <- trimInlinesF . mconcat <$> many (notFollowedBy (oneOf ";]") >> inline)
return $ if hasSpace
- then Space : rest
+ then (B.space <>) <$> rest
else rest
-prefix :: Parser [Char] ParserState [Inline]
-prefix = liftM normalizeSpaces $
+prefix :: Parser [Char] ParserState (F Inlines)
+prefix = trimInlinesF . mconcat <$>
manyTill inline (char ']' <|> liftM (const ']') (lookAhead citeKey))
-citeList :: Parser [Char] ParserState [Citation]
-citeList = sepBy1 citation (try $ char ';' >> spnl)
+citeList :: Parser [Char] ParserState (F [Citation])
+citeList = fmap sequence $ sepBy1 citation (try $ char ';' >> spnl)
-citation :: Parser [Char] ParserState Citation
+citation :: Parser [Char] ParserState (F Citation)
citation = try $ do
pref <- prefix
(suppress_author, key) <- citeKey
suff <- suffix
- return $ Citation{ citationId = key
- , citationPrefix = pref
- , citationSuffix = suff
+ return $ do
+ x <- pref
+ y <- suff
+ return $ Citation{ citationId = key
+ , citationPrefix = B.toList x
+ , citationSuffix = B.toList y
, citationMode = if suppress_author
then SuppressAuthor
else NormalCitation
@@ -1437,3 +1581,22 @@ citation = try $ do
, citationHash = 0
}
+smart :: Parser [Char] ParserState (F Inlines)
+smart = do
+ getOption readerSmart >>= guard
+ doubleQuoted <|> singleQuoted <|>
+ choice (map (return . B.singleton <$>) [apostrophe, dash, ellipses])
+
+singleQuoted :: Parser [Char] ParserState (F Inlines)
+singleQuoted = try $ do
+ singleQuoteStart
+ withQuoteContext InSingleQuote $
+ fmap B.singleQuoted . trimInlinesF . mconcat <$>
+ many1Till inline singleQuoteEnd
+
+doubleQuoted :: Parser [Char] ParserState (F Inlines)
+doubleQuoted = try $ do
+ doubleQuoteStart
+ withQuoteContext InDoubleQuote $
+ fmap B.doubleQuoted . trimInlinesF . mconcat <$>
+ many1Till inline doubleQuoteEnd
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index 939de08e9..39a04d286 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -407,7 +407,7 @@ mathBlockMultiline = try $ do
lhsCodeBlock :: Parser [Char] ParserState Block
lhsCodeBlock = try $ do
- failUnlessLHS
+ getOption readerLiterateHaskell >>= guard
optional codeBlockStart
pos <- getPosition
when (sourceColumn pos /= 1) $ fail "Not in first column"
@@ -776,7 +776,7 @@ simpleTable headless = do
gridTable :: Bool -- ^ Headerless table
-> Parser [Char] ParserState Block
-gridTable = gridTableWith block
+gridTable = gridTableWith parseBlocks
table :: Parser [Char] ParserState Block
table = gridTable False <|> simpleTable False <|>