aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc/Readers')
-rw-r--r--src/Text/Pandoc/Readers/DocBook.hs14
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs109
-rw-r--r--src/Text/Pandoc/Readers/LaTeX.hs34
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs1423
-rw-r--r--src/Text/Pandoc/Readers/Native.hs33
-rw-r--r--src/Text/Pandoc/Readers/RST.hs316
-rw-r--r--src/Text/Pandoc/Readers/TeXMath.hs2
-rw-r--r--src/Text/Pandoc/Readers/Textile.hs204
8 files changed, 1216 insertions, 919 deletions
diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs
index 62f7c61a0..685fa1ee4 100644
--- a/src/Text/Pandoc/Readers/DocBook.hs
+++ b/src/Text/Pandoc/Readers/DocBook.hs
@@ -1,6 +1,6 @@
module Text.Pandoc.Readers.DocBook ( readDocBook ) where
import Data.Char (toUpper, isDigit)
-import Text.Pandoc.Parsing (ParserState(..))
+import Text.Pandoc.Options
import Text.Pandoc.Definition
import Text.Pandoc.Builder
import Text.XML.Light
@@ -455,13 +455,13 @@ List of all DocBook tags, with [x] indicating implemented,
[x] tocfront - An entry in a table of contents for a front matter component
[x] toclevel1 - A top-level entry within a table of contents entry for a
chapter-like component
-[x] toclevel2 - A second-level entry within a table of contents entry for a
+[x] toclevel2 - A second-level entry within a table of contents entry for a
chapter-like component
-[x] toclevel3 - A third-level entry within a table of contents entry for a
+[x] toclevel3 - A third-level entry within a table of contents entry for a
chapter-like component
-[x] toclevel4 - A fourth-level entry within a table of contents entry for a
+[x] toclevel4 - A fourth-level entry within a table of contents entry for a
chapter-like component
-[x] toclevel5 - A fifth-level entry within a table of contents entry for a
+[x] toclevel5 - A fifth-level entry within a table of contents entry for a
chapter-like component
[x] tocpart - An entry in a table of contents for a part of a book
[ ] token - A unit of information
@@ -503,7 +503,7 @@ data DBState = DBState{ dbSectionLevel :: Int
, dbBook :: Bool
} deriving Show
-readDocBook :: ParserState -> String -> Pandoc
+readDocBook :: ReaderOptions -> String -> Pandoc
readDocBook _ inp = setTitle (dbDocTitle st')
$ setAuthors (dbDocAuthors st')
$ setDate (dbDocDate st')
@@ -574,7 +574,7 @@ addToStart toadd bs =
(Para xs : rest) -> para (toadd <> fromList xs) <> fromList rest
_ -> bs
--- function that is used by both mediaobject (in parseBlock)
+-- function that is used by both mediaobject (in parseBlock)
-- and inlinemediaobject (in parseInline)
getImage :: Element -> DB Inlines
getImage e = do
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 536bddd39..e5c310ffc 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -19,10 +19,10 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Readers.HTML
Copyright : Copyright (C) 2006-2010 John MacFarlane
- License : GNU GPL, version 2 or above
+ License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
- Stability : alpha
+ Stability : alpha
Portability : portable
Conversion of HTML to 'Pandoc' document.
@@ -36,18 +36,17 @@ module Text.Pandoc.Readers.HTML ( readHtml
, isCommentTag
) where
-import Text.ParserCombinators.Parsec
-import Text.ParserCombinators.Parsec.Pos
import Text.HTML.TagSoup
import Text.HTML.TagSoup.Match
import Text.Pandoc.Definition
import Text.Pandoc.Builder (text, toList)
import Text.Pandoc.Shared
+import Text.Pandoc.Options
import Text.Pandoc.Parsing
import Data.Maybe ( fromMaybe, isJust )
import Data.List ( intercalate )
-import Data.Char ( isDigit, toLower )
-import Control.Monad ( liftM, guard, when )
+import Data.Char ( isDigit )
+import Control.Monad ( liftM, guard, when, mzero )
isSpace :: Char -> Bool
isSpace ' ' = True
@@ -56,11 +55,11 @@ isSpace '\n' = True
isSpace _ = False
-- | Convert HTML-formatted string to 'Pandoc' document.
-readHtml :: ParserState -- ^ Parser state
+readHtml :: ReaderOptions -- ^ Reader options
-> String -- ^ String to parse (assumes @'\n'@ line endings)
-> Pandoc
-readHtml st inp = Pandoc meta blocks
- where blocks = readWith parseBody st rest
+readHtml opts inp = Pandoc meta blocks
+ where blocks = readWith parseBody def{ stateOptions = opts } rest
tags = canonicalizeTags $
parseTagsOptions parseOptions{ optTagPosition = True } inp
hasHeader = any (~== TagOpen "head" []) tags
@@ -68,7 +67,7 @@ readHtml st inp = Pandoc meta blocks
then parseHeader tags
else (Meta [] [] [], tags)
-type TagParser = GenParser (Tag String) ParserState
+type TagParser = Parser [Tag String] ParserState
-- TODO - fix this - not every header has a title tag
parseHeader :: [Tag String] -> (Meta, [Tag String])
@@ -96,18 +95,6 @@ block = choice
, pRawHtmlBlock
]
--- repeated in SelfContained -- consolidate eventually
-renderTags' :: [Tag String] -> String
-renderTags' = renderTagsOptions
- renderOptions{ optMinimize = \x ->
- let y = map toLower x
- in y == "hr" || y == "br" ||
- y == "img" || y == "meta" ||
- y == "link"
- , optRawTag = \x ->
- let y = map toLower x
- in y == "script" || y == "style" }
-
pList :: TagParser [Block]
pList = pBulletList <|> pOrderedList <|> pDefinitionList
@@ -126,25 +113,22 @@ pBulletList = try $ do
pOrderedList :: TagParser [Block]
pOrderedList = try $ do
TagOpen _ attribs <- pSatisfy (~== TagOpen "ol" [])
- st <- getState
- let (start, style) = if stateStrict st
- then (1, DefaultStyle)
- else (sta', sty')
- where sta = fromMaybe "1" $
- lookup "start" attribs
- sta' = if all isDigit sta
- then read sta
- else 1
- sty = fromMaybe (fromMaybe "" $
- lookup "style" attribs) $
- lookup "class" attribs
- sty' = case sty of
- "lower-roman" -> LowerRoman
- "upper-roman" -> UpperRoman
- "lower-alpha" -> LowerAlpha
- "upper-alpha" -> UpperAlpha
- "decimal" -> Decimal
- _ -> DefaultStyle
+ let (start, style) = (sta', sty')
+ where sta = fromMaybe "1" $
+ lookup "start" attribs
+ sta' = if all isDigit sta
+ then read sta
+ else 1
+ sty = fromMaybe (fromMaybe "" $
+ lookup "style" attribs) $
+ lookup "class" attribs
+ sty' = case sty of
+ "lower-roman" -> LowerRoman
+ "upper-roman" -> UpperRoman
+ "lower-alpha" -> LowerAlpha
+ "upper-alpha" -> UpperAlpha
+ "decimal" -> Decimal
+ _ -> DefaultStyle
let nonItem = pSatisfy (\t ->
not (tagOpen (`elem` ["li","ol","ul","dl"]) (const True) t) &&
not (t ~== TagClose "ol"))
@@ -196,8 +180,8 @@ pRawTag = do
pRawHtmlBlock :: TagParser [Block]
pRawHtmlBlock = do
raw <- pHtmlBlock "script" <|> pHtmlBlock "style" <|> pRawTag
- state <- getState
- if stateParseRaw state && not (null raw)
+ parseRaw <- getOption readerParseRaw
+ if parseRaw && not (null raw)
then return [RawBlock "html" raw]
else return []
@@ -235,7 +219,7 @@ pSimpleTable = try $ do
rows <- pOptInTag "tbody"
$ many1 $ try $ skipMany pBlank >> pInTags "tr" (pCell "td")
skipMany pBlank
- TagClose _ <- pSatisfy (~== TagClose "table")
+ TagClose _ <- pSatisfy (~== TagClose "table")
let cols = maximum $ map length rows
let aligns = replicate cols AlignLeft
let widths = replicate cols 0
@@ -281,10 +265,7 @@ pCodeBlock = try $ do
let attribsId = fromMaybe "" $ lookup "id" attr
let attribsClasses = words $ fromMaybe "" $ lookup "class" attr
let attribsKV = filter (\(k,_) -> k /= "class" && k /= "id") attr
- st <- getState
- let attribs = if stateStrict st
- then ("",[],[])
- else (attribsId, attribsClasses, attribsKV)
+ let attribs = (attribsId, attribsClasses, attribsKV)
return [CodeBlock attribs result]
inline :: TagParser [Inline]
@@ -310,7 +291,7 @@ pLocation = do
pSat :: (Tag String -> Bool) -> TagParser (Tag String)
pSat f = do
pos <- getPosition
- token show (const pos) (\x -> if f x then Just x else Nothing)
+ token show (const pos) (\x -> if f x then Just x else Nothing)
pSatisfy :: (Tag String -> Bool) -> TagParser (Tag String)
pSatisfy f = try $ optional pLocation >> pSat f
@@ -332,14 +313,13 @@ pStrong :: TagParser [Inline]
pStrong = pInlinesInTags "strong" Strong <|> pInlinesInTags "b" Strong
pSuperscript :: TagParser [Inline]
-pSuperscript = failIfStrict >> pInlinesInTags "sup" Superscript
+pSuperscript = pInlinesInTags "sup" Superscript
pSubscript :: TagParser [Inline]
-pSubscript = failIfStrict >> pInlinesInTags "sub" Subscript
+pSubscript = pInlinesInTags "sub" Subscript
pStrikeout :: TagParser [Inline]
pStrikeout = do
- failIfStrict
pInlinesInTags "s" Strikeout <|>
pInlinesInTags "strike" Strikeout <|>
pInlinesInTags "del" Strikeout <|>
@@ -381,8 +361,8 @@ pCode = try $ do
pRawHtmlInline :: TagParser [Inline]
pRawHtmlInline = do
result <- pSatisfy (tagComment (const True)) <|> pSatisfy isInlineTag
- state <- getState
- if stateParseRaw state
+ parseRaw <- getOption readerParseRaw
+ if parseRaw
then return [RawInline "html" $ renderTags' [result]]
else return []
@@ -417,7 +397,7 @@ pCloses tagtype = try $ do
(TagClose "ul") | tagtype == "li" -> return ()
(TagClose "ol") | tagtype == "li" -> return ()
(TagClose "dl") | tagtype == "li" -> return ()
- _ -> pzero
+ _ -> mzero
pTagText :: TagParser [Inline]
pTagText = try $ do
@@ -432,11 +412,11 @@ pBlank = try $ do
(TagText str) <- pSatisfy isTagText
guard $ all isSpace str
-pTagContents :: GenParser Char ParserState Inline
+pTagContents :: Parser [Char] ParserState Inline
pTagContents =
pStr <|> pSpace <|> smartPunctuation pTagContents <|> pSymbol <|> pBad
-pStr :: GenParser Char ParserState Inline
+pStr :: Parser [Char] ParserState Inline
pStr = do
result <- many1 $ satisfy $ \c ->
not (isSpace c) && not (isSpecial c) && not (isBad c)
@@ -455,13 +435,13 @@ isSpecial '\8220' = True
isSpecial '\8221' = True
isSpecial _ = False
-pSymbol :: GenParser Char ParserState Inline
+pSymbol :: Parser [Char] ParserState Inline
pSymbol = satisfy isSpecial >>= return . Str . (:[])
isBad :: Char -> Bool
isBad c = c >= '\128' && c <= '\159' -- not allowed in HTML
-pBad :: GenParser Char ParserState Inline
+pBad :: Parser [Char] ParserState Inline
pBad = do
c <- satisfy isBad
let c' = case c of
@@ -495,7 +475,7 @@ pBad = do
_ -> '?'
return $ Str [c']
-pSpace :: GenParser Char ParserState Inline
+pSpace :: Parser [Char] ParserState Inline
pSpace = many1 (satisfy isSpace) >> return Space
--
@@ -593,20 +573,19 @@ _ `closes` _ = False
--- parsers for use in markdown, textile readers
-- | Matches a stretch of HTML in balanced tags.
-htmlInBalanced :: (Tag String -> Bool) -> GenParser Char ParserState String
+htmlInBalanced :: (Tag String -> Bool) -> Parser [Char] ParserState String
htmlInBalanced f = try $ do
(TagOpen t _, tag) <- htmlTag f
guard $ '/' `notElem` tag -- not a self-closing tag
- let nonTagChunk = many1 $ satisfy (/= '<')
let stopper = htmlTag (~== TagClose t)
let anytag = liftM snd $ htmlTag (const True)
contents <- many $ notFollowedBy' stopper >>
- (nonTagChunk <|> htmlInBalanced (const True) <|> anytag)
+ (htmlInBalanced f <|> anytag <|> count 1 anyChar)
endtag <- liftM snd stopper
return $ tag ++ concat contents ++ endtag
-- | Matches a tag meeting a certain condition.
-htmlTag :: (Tag String -> Bool) -> GenParser Char ParserState (Tag String, String)
+htmlTag :: (Tag String -> Bool) -> Parser [Char] ParserState (Tag String, String)
htmlTag f = try $ do
lookAhead (char '<')
(next : _) <- getInput >>= return . canonicalizeTags . parseTags
@@ -617,7 +596,7 @@ htmlTag f = try $ do
count (length s + 4) anyChar
skipMany (satisfy (/='>'))
char '>'
- return (next, "<!--" ++ s ++ "-->")
+ return (next, "<!--" ++ s ++ "-->")
_ -> do
rendered <- manyTill anyChar (char '>')
return (next, rendered ++ ">")
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 3178945e4..4a5a14d6a 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -33,10 +33,10 @@ module Text.Pandoc.Readers.LaTeX ( readLaTeX,
handleIncludes
) where
-import Text.ParserCombinators.Parsec hiding ((<|>), space, many, optional)
import Text.Pandoc.Definition
import Text.Pandoc.Shared
-import Text.Pandoc.Parsing
+import Text.Pandoc.Options
+import Text.Pandoc.Parsing hiding ((<|>), many, optional, space)
import qualified Text.Pandoc.UTF8 as UTF8
import Data.Char ( chr, ord )
import Control.Monad
@@ -47,12 +47,13 @@ import Data.Monoid
import System.FilePath (replaceExtension)
import Data.List (intercalate)
import qualified Data.Map as M
+import qualified Control.Exception as E
-- | Parse LaTeX from string and return 'Pandoc' document.
-readLaTeX :: ParserState -- ^ Parser state, including options for parser
+readLaTeX :: ReaderOptions -- ^ Reader options
-> String -- ^ String to parse (assumes @'\n'@ line endings)
-> Pandoc
-readLaTeX = readWith parseLaTeX
+readLaTeX opts = readWith parseLaTeX def{ stateOptions = opts }
parseLaTeX :: LP Pandoc
parseLaTeX = do
@@ -64,7 +65,7 @@ parseLaTeX = do
let date' = stateDate st
return $ Pandoc (Meta title' authors' date') $ toList bs
-type LP = GenParser Char ParserState
+type LP = Parser [Char] ParserState
anyControlSeq :: LP String
anyControlSeq = do
@@ -186,7 +187,7 @@ inline = (mempty <$ comment)
<|> (mathInline $ char '$' *> mathChars <* char '$')
<|> (superscript <$> (char '^' *> tok))
<|> (subscript <$> (char '_' *> tok))
- <|> (failUnlessLHS *> char '|' *> doLHSverb)
+ <|> (guardEnabled Ext_literate_haskell *> char '|' *> doLHSverb)
<|> (str <$> count 1 tildeEscape)
<|> (str <$> string "]")
<|> (str <$> string "#") -- TODO print warning?
@@ -230,14 +231,14 @@ ignoreInlines name = (name, doraw <|> (mempty <$ optargs))
where optargs = skipopts *> skipMany (try $ optional sp *> braced)
contseq = '\\':name
doraw = (rawInline "latex" . (contseq ++) . snd) <$>
- (getState >>= guard . stateParseRaw >> (withRaw optargs))
+ (getOption readerParseRaw >>= guard >> (withRaw optargs))
ignoreBlocks :: String -> (String, LP Blocks)
ignoreBlocks name = (name, doraw <|> (mempty <$ optargs))
where optargs = skipopts *> skipMany (try $ optional sp *> braced)
contseq = '\\':name
doraw = (rawBlock "latex" . (contseq ++) . snd) <$>
- (getState >>= guard . stateParseRaw >> (withRaw optargs))
+ (getOption readerParseRaw >>= guard >> (withRaw optargs))
blockCommands :: M.Map String (LP Blocks)
blockCommands = M.fromList $
@@ -321,7 +322,7 @@ inlineCommand :: LP Inlines
inlineCommand = try $ do
name <- anyControlSeq
guard $ not $ isBlockCommand name
- parseRaw <- stateParseRaw `fmap` getState
+ parseRaw <- getOption readerParseRaw
star <- option "" (string "*")
let name' = name ++ star
let rawargs = withRaw (skipopts *> option "" dimenarg
@@ -336,7 +337,7 @@ inlineCommand = try $ do
Nothing -> raw
unlessParseRaw :: LP ()
-unlessParseRaw = getState >>= guard . not . stateParseRaw
+unlessParseRaw = getOption readerParseRaw >>= guard . not
isBlockCommand :: String -> Bool
isBlockCommand s = maybe False (const True) $ M.lookup s blockCommands
@@ -660,7 +661,7 @@ environment = do
rawEnv :: String -> LP Blocks
rawEnv name = do
let addBegin x = "\\begin{" ++ name ++ "}" ++ x
- parseRaw <- stateParseRaw `fmap` getState
+ parseRaw <- getOption readerParseRaw
if parseRaw
then (rawBlock "latex" . addBegin) <$>
(withRaw (env name blocks) >>= applyMacros' . snd)
@@ -671,8 +672,9 @@ handleIncludes :: String -> IO String
handleIncludes [] = return []
handleIncludes ('\\':xs) =
case runParser include defaultParserState "input" ('\\':xs) of
- Right (fs, rest) -> do let getfile f = catch (UTF8.readFile f)
- (\_ -> return "")
+ Right (fs, rest) -> do let getfile f = E.catch (UTF8.readFile f)
+ (\e -> let _ = (e :: E.SomeException)
+ in return "")
yss <- mapM getfile fs
(intercalate "\n" yss ++) `fmap`
handleIncludes rest
@@ -713,10 +715,10 @@ verbatimEnv = do
rest <- getInput
return (r,rest)
-rawLaTeXBlock :: GenParser Char ParserState String
+rawLaTeXBlock :: Parser [Char] ParserState String
rawLaTeXBlock = snd <$> withRaw (environment <|> blockCommand)
-rawLaTeXInline :: GenParser Char ParserState Inline
+rawLaTeXInline :: Parser [Char] ParserState Inline
rawLaTeXInline = do
(res, raw) <- withRaw inlineCommand
if res == mempty
@@ -735,7 +737,7 @@ environments = M.fromList
, ("itemize", bulletList <$> listenv "itemize" (many item))
, ("description", definitionList <$> listenv "description" (many descItem))
, ("enumerate", ordered_list)
- , ("code", failUnlessLHS *>
+ , ("code", guardEnabled Ext_literate_haskell *>
(codeBlockWith ("",["sourceCode","literate","haskell"],[]) <$>
verbEnv "code"))
, ("verbatim", codeBlock <$> (verbEnv "verbatim"))
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 51a727996..2407e137c 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -1,4 +1,6 @@
{-# LANGUAGE RelaxedPolyRec #-} -- needed for inlinesBetween on GHC < 7
+{-# LANGUAGE FlexibleInstances, TypeSynonymInstances,
+ GeneralizedNewtypeDeriving #-}
{-
Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu>
@@ -20,7 +22,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Readers.Markdown
Copyright : Copyright (C) 2006-2010 John MacFarlane
- License : GNU GPL, version 2 or above
+ License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
Stability : alpha
@@ -33,26 +35,34 @@ module Text.Pandoc.Readers.Markdown ( readMarkdown ) where
import Data.List ( transpose, sortBy, findIndex, intercalate )
import qualified Data.Map as M
import Data.Ord ( comparing )
-import Data.Char ( isAlphaNum )
+import Data.Char ( isAlphaNum, toLower )
import Data.Maybe
import Text.Pandoc.Definition
-import Text.Pandoc.Generic
-import Text.Pandoc.Shared
-import Text.Pandoc.Parsing
+import qualified Text.Pandoc.Builder as B
+import Text.Pandoc.Builder (Inlines, Blocks, trimInlines, (<>))
+import Text.Pandoc.Options
+import Text.Pandoc.Shared hiding (compactify)
+import Text.Pandoc.Parsing hiding (tableWith)
import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock )
import Text.Pandoc.Readers.HTML ( htmlTag, htmlInBalanced, isInlineTag, isBlockTag,
isTextTag, isCommentTag )
import Text.Pandoc.XML ( fromEntities )
-import Text.ParserCombinators.Parsec
-import Control.Monad (when, liftM, guard, mzero)
+import Data.Monoid (mconcat, mempty)
+import Control.Applicative ((<$>), (<*), (*>), (<$))
+import Control.Monad
import Text.HTML.TagSoup
import Text.HTML.TagSoup.Match (tagOpen)
+import qualified Data.Set as Set
-- | Read markdown from an input string and return a Pandoc document.
-readMarkdown :: ParserState -- ^ Parser state, including options for parser
- -> String -- ^ String to parse (assuming @'\n'@ line endings)
+readMarkdown :: ReaderOptions -- ^ Reader options
+ -> String -- ^ String to parse (assuming @'\n'@ line endings)
-> Pandoc
-readMarkdown state s = (readWith parseMarkdown) state (s ++ "\n\n")
+readMarkdown opts s =
+ (readWith parseMarkdown) def{ stateOptions = opts } (s ++ "\n\n")
+
+trimInlinesF :: F Inlines -> F Inlines
+trimInlinesF = liftM trimInlines
--
-- Constants and data structure definitions
@@ -70,7 +80,7 @@ isHruleChar '-' = True
isHruleChar '_' = True
isHruleChar _ = False
-setextHChars :: [Char]
+setextHChars :: String
setextHChars = "=-"
isBlank :: Char -> Bool
@@ -83,71 +93,72 @@ isBlank _ = False
-- auxiliary functions
--
-indentSpaces :: GenParser Char ParserState [Char]
+isNull :: F Inlines -> Bool
+isNull ils = B.isNull $ runF ils def
+
+spnl :: Parser [Char] st ()
+spnl = try $ do
+ skipSpaces
+ optional newline
+ skipSpaces
+ notFollowedBy (char '\n')
+
+indentSpaces :: Parser [Char] ParserState String
indentSpaces = try $ do
- state <- getState
- let tabStop = stateTabStop state
+ tabStop <- getOption readerTabStop
count tabStop (char ' ') <|>
string "\t" <?> "indentation"
-nonindentSpaces :: GenParser Char ParserState [Char]
+nonindentSpaces :: Parser [Char] ParserState String
nonindentSpaces = do
- state <- getState
- let tabStop = stateTabStop state
+ tabStop <- getOption readerTabStop
sps <- many (char ' ')
- if length sps < tabStop
+ if length sps < tabStop
then return sps
else unexpected "indented line"
-skipNonindentSpaces :: GenParser Char ParserState ()
+skipNonindentSpaces :: Parser [Char] ParserState ()
skipNonindentSpaces = do
- state <- getState
- atMostSpaces (stateTabStop state - 1)
+ tabStop <- getOption readerTabStop
+ atMostSpaces (tabStop - 1)
-atMostSpaces :: Int -> GenParser Char ParserState ()
+atMostSpaces :: Int -> Parser [Char] ParserState ()
atMostSpaces 0 = notFollowedBy (char ' ')
atMostSpaces n = (char ' ' >> atMostSpaces (n-1)) <|> return ()
-litChar :: GenParser Char ParserState Char
+litChar :: Parser [Char] ParserState Char
litChar = escapedChar'
<|> noneOf "\n"
<|> (newline >> notFollowedBy blankline >> return ' ')
--- | Fail unless we're at beginning of a line.
-failUnlessBeginningOfLine :: GenParser tok st ()
-failUnlessBeginningOfLine = do
- pos <- getPosition
- if sourceColumn pos == 1 then return () else fail "not beginning of line"
-
-- | Parse a sequence of inline elements between square brackets,
-- including inlines between balanced pairs of square brackets.
-inlinesInBalancedBrackets :: GenParser Char ParserState Inline
- -> GenParser Char ParserState [Inline]
-inlinesInBalancedBrackets parser = try $ do
+inlinesInBalancedBrackets :: Parser [Char] ParserState (F Inlines)
+inlinesInBalancedBrackets = try $ do
char '['
- result <- manyTill ( (do lookAhead $ try $ do (Str res) <- parser
- guard (res == "[")
- bal <- inlinesInBalancedBrackets parser
- return $ [Str "["] ++ bal ++ [Str "]"])
- <|> (count 1 parser))
+ result <- manyTill ( (do lookAhead $ try $ do x <- inline
+ guard (runF x def == B.str "[")
+ bal <- inlinesInBalancedBrackets
+ return $ (\x -> B.str "[" <> x <> B.str "]") <$> bal)
+ <|> inline)
(char ']')
- return $ concat result
+ return $ mconcat result
--
-- document structure
--
-titleLine :: GenParser Char ParserState [Inline]
+titleLine :: Parser [Char] ParserState (F Inlines)
titleLine = try $ do
char '%'
skipSpaces
res <- many $ (notFollowedBy newline >> inline)
<|> try (endline >> whitespace)
newline
- return $ normalizeSpaces res
+ return $ trimInlinesF $ mconcat res
-authorsLine :: GenParser Char ParserState [[Inline]]
-authorsLine = try $ do
+authorsLine :: Parser [Char] ParserState (F [Inlines])
+authorsLine = try $ do
char '%'
skipSpaces
authors <- sepEndBy (many (notFollowedBy (satisfy $ \c ->
@@ -155,67 +166,63 @@ authorsLine = try $ do
(char ';' <|>
try (newline >> notFollowedBy blankline >> spaceChar))
newline
- return $ filter (not . null) $ map normalizeSpaces authors
+ return $ sequence $ filter (not . isNull) $ map (trimInlinesF . mconcat) authors
-dateLine :: GenParser Char ParserState [Inline]
+dateLine :: Parser [Char] ParserState (F Inlines)
dateLine = try $ do
char '%'
skipSpaces
- date <- manyTill inline newline
- return $ normalizeSpaces date
-
-titleBlock :: GenParser Char ParserState ([Inline], [[Inline]], [Inline])
-titleBlock = try $ do
- failIfStrict
- title <- option [] titleLine
- author <- option [] authorsLine
- date <- option [] dateLine
+ trimInlinesF . mconcat <$> manyTill inline newline
+
+titleBlock :: Parser [Char] ParserState (F Inlines, F [Inlines], F Inlines)
+titleBlock = pandocTitleBlock <|> mmdTitleBlock
+
+pandocTitleBlock :: Parser [Char] ParserState (F Inlines, F [Inlines], F Inlines)
+pandocTitleBlock = try $ do
+ guardEnabled Ext_pandoc_title_block
+ title <- option mempty titleLine
+ author <- option (return []) authorsLine
+ date <- option mempty dateLine
optional blanklines
return (title, author, date)
-parseMarkdown :: GenParser Char ParserState Pandoc
+mmdTitleBlock :: Parser [Char] ParserState (F Inlines, F [Inlines], F Inlines)
+mmdTitleBlock = try $ do
+ guardEnabled Ext_mmd_title_block
+ kvPairs <- many1 kvPair
+ blanklines
+ let title = maybe mempty return $ lookup "title" kvPairs
+ let author = maybe mempty (\x -> return [x]) $ lookup "author" kvPairs
+ let date = maybe mempty return $ lookup "date" kvPairs
+ return (title, author, date)
+
+kvPair :: Parser [Char] ParserState (String, Inlines)
+kvPair = try $ do
+ key <- many1Till (alphaNum <|> oneOf "_- ") (char ':')
+ val <- manyTill anyChar
+ (try $ newline >> lookAhead (blankline <|> nonspaceChar))
+ let key' = concat $ words $ map toLower key
+ let val' = trimInlines $ B.text val
+ return (key',val')
+
+parseMarkdown :: Parser [Char] ParserState Pandoc
parseMarkdown = do
-- markdown allows raw HTML
- updateState (\state -> state { stateParseRaw = True })
- startPos <- getPosition
- -- go through once just to get list of reference keys and notes
- -- docMinusKeys is the raw document with blanks where the keys/notes were...
- st <- getState
- let firstPassParser = referenceKey
- <|> (if stateStrict st then pzero else noteBlock)
- <|> liftM snd (withRaw codeBlockDelimited)
- <|> lineClump
- docMinusKeys <- liftM concat $ manyTill firstPassParser eof
- setInput docMinusKeys
- setPosition startPos
- st' <- getState
- let reversedNotes = stateNotes st'
- updateState $ \s -> s { stateNotes = reverse reversedNotes }
- -- now parse it for real...
- (title, author, date) <- option ([],[],[]) titleBlock
+ updateState $ \state -> state { stateOptions =
+ let oldOpts = stateOptions state in
+ oldOpts{ readerParseRaw = True } }
+ (title, authors, date) <- option (mempty,return [],mempty) titleBlock
blocks <- parseBlocks
- let doc = Pandoc (Meta title author date) $ filter (/= Null) blocks
- -- if there are labeled examples, change references into numbers
- examples <- liftM stateExamples getState
- let handleExampleRef :: Inline -> Inline
- handleExampleRef z@(Str ('@':xs)) =
- case M.lookup xs examples of
- Just n -> Str (show n)
- Nothing -> z
- handleExampleRef z = z
- if M.null examples
- then return doc
- else return $ bottomUp handleExampleRef doc
-
---
--- initial pass for references and notes
---
+ st <- getState
+ return $ B.setTitle (runF title st)
+ $ B.setAuthors (runF authors st)
+ $ B.setDate (runF date st)
+ $ B.doc $ runF blocks st
-referenceKey :: GenParser Char ParserState [Char]
+referenceKey :: Parser [Char] ParserState (F Blocks)
referenceKey = try $ do
- startPos <- getPosition
skipNonindentSpaces
- lab <- reference
+ (_,raw) <- reference
char ':'
skipSpaces >> optional newline >> skipSpaces >> notFollowedBy (char '[')
let sourceURL = liftM unwords $ many $ try $ do
@@ -223,22 +230,20 @@ referenceKey = try $ do
skipMany spaceChar
optional $ newline >> notFollowedBy blankline
skipMany spaceChar
- notFollowedBy' reference
+ notFollowedBy' (() <$ reference)
many1 $ escapedChar' <|> satisfy (not . isBlank)
let betweenAngles = try $ char '<' >>
manyTill (escapedChar' <|> litChar) (char '>')
src <- try betweenAngles <|> sourceURL
tit <- option "" referenceTitle
blanklines
- endPos <- getPosition
let target = (escapeURI $ removeTrailingSpace src, tit)
st <- getState
let oldkeys = stateKeys st
- updateState $ \s -> s { stateKeys = M.insert (toKey lab) target oldkeys }
- -- return blanks so line count isn't affected
- return $ replicate (sourceLine endPos - sourceLine startPos) '\n'
+ updateState $ \s -> s { stateKeys = M.insert (toKey raw) target oldkeys }
+ return $ return mempty
-referenceTitle :: GenParser Char ParserState String
+referenceTitle :: Parser [Char] ParserState String
referenceTitle = try $ do
skipSpaces >> optional newline >> skipSpaces
tit <- (charsInBalanced '(' ')' litChar >>= return . unwords . words)
@@ -247,25 +252,38 @@ referenceTitle = try $ do
notFollowedBy (noneOf ")\n")))
return $ fromEntities tit
-noteMarker :: GenParser Char ParserState [Char]
+-- | PHP Markdown Extra style abbreviation key. Currently
+-- we just skip them, since Pandoc doesn't have an element for
+-- an abbreviation.
+abbrevKey :: Parser [Char] ParserState (F Blocks)
+abbrevKey = do
+ guardEnabled Ext_abbreviations
+ try $ do
+ char '*'
+ reference
+ char ':'
+ skipMany (satisfy (/= '\n'))
+ blanklines
+ return $ return mempty
+
+noteMarker :: Parser [Char] ParserState String
noteMarker = string "[^" >> many1Till (satisfy $ not . isBlank) (char ']')
-rawLine :: GenParser Char ParserState [Char]
+rawLine :: Parser [Char] ParserState String
rawLine = try $ do
notFollowedBy blankline
notFollowedBy' $ try $ skipNonindentSpaces >> noteMarker
optional indentSpaces
anyLine
-rawLines :: GenParser Char ParserState [Char]
+rawLines :: Parser [Char] ParserState String
rawLines = do
first <- anyLine
rest <- many rawLine
return $ unlines (first:rest)
-noteBlock :: GenParser Char ParserState [Char]
+noteBlock :: Parser [Char] ParserState (F Blocks)
noteBlock = try $ do
- startPos <- getPosition
skipNonindentSpaces
ref <- noteMarker
char ':'
@@ -275,87 +293,75 @@ noteBlock = try $ do
(try (blankline >> indentSpaces >>
notFollowedBy blankline))
optional blanklines
- endPos <- getPosition
- let newnote = (ref, (intercalate "\n" raw) ++ "\n\n")
- st <- getState
- let oldnotes = stateNotes st
- updateState $ \s -> s { stateNotes = newnote : oldnotes }
- -- return blanks so line count isn't affected
- return $ replicate (sourceLine endPos - sourceLine startPos) '\n'
+ parsed <- parseFromString parseBlocks $ unlines raw ++ "\n"
+ let newnote = (ref, parsed)
+ updateState $ \s -> s { stateNotes' = newnote : stateNotes' s }
+ return mempty
--
-- parsing blocks
--
-parseBlocks :: GenParser Char ParserState [Block]
-parseBlocks = manyTill block eof
-
-block :: GenParser Char ParserState Block
-block = do
- st <- getState
- choice (if stateStrict st
- then [ header
- , codeBlockIndented
- , blockQuote
- , hrule
- , bulletList
- , orderedList
- , htmlBlock
- , para
- , plain
- , nullBlock ]
- else [ codeBlockDelimited
- , macro
- , header
- , table
- , codeBlockIndented
- , lhsCodeBlock
- , blockQuote
- , hrule
- , bulletList
- , orderedList
- , definitionList
- , rawTeXBlock
- , para
- , rawHtmlBlocks
- , plain
- , nullBlock ]) <?> "block"
+parseBlocks :: Parser [Char] ParserState (F Blocks)
+parseBlocks = mconcat <$> manyTill block eof
+
+block :: Parser [Char] ParserState (F Blocks)
+block = choice [ codeBlockFenced
+ , codeBlockBackticks
+ , guardEnabled Ext_latex_macros *> (mempty <$ macro)
+ , header
+ , rawTeXBlock
+ , htmlBlock
+ , table
+ , codeBlockIndented
+ , lhsCodeBlock
+ , blockQuote
+ , hrule
+ , bulletList
+ , orderedList
+ , definitionList
+ , noteBlock
+ , referenceKey
+ , abbrevKey
+ , para
+ , plain
+ ] <?> "block"
--
-- header blocks
--
-header :: GenParser Char ParserState Block
+header :: Parser [Char] ParserState (F Blocks)
header = setextHeader <|> atxHeader <?> "header"
-atxHeader :: GenParser Char ParserState Block
+atxHeader :: Parser [Char] ParserState (F Blocks)
atxHeader = try $ do
level <- many1 (char '#') >>= return . length
notFollowedBy (char '.' <|> char ')') -- this would be a list
skipSpaces
- text <- manyTill inline atxClosing >>= return . normalizeSpaces
- return $ Header level text
+ text <- trimInlinesF . mconcat <$> manyTill inline atxClosing
+ return $ B.header level <$> text
-atxClosing :: GenParser Char st [Char]
+atxClosing :: Parser [Char] st String
atxClosing = try $ skipMany (char '#') >> blanklines
-setextHeader :: GenParser Char ParserState Block
+setextHeader :: Parser [Char] ParserState (F Blocks)
setextHeader = try $ do
-- This lookahead prevents us from wasting time parsing Inlines
-- unless necessary -- it gives a significant performance boost.
lookAhead $ anyLine >> many1 (oneOf setextHChars) >> blankline
- text <- many1Till inline newline
+ text <- trimInlinesF . mconcat <$> many1Till inline newline
underlineChar <- oneOf setextHChars
many (char underlineChar)
blanklines
let level = (fromMaybe 0 $ findIndex (== underlineChar) setextHChars) + 1
- return $ Header level (normalizeSpaces text)
+ return $ B.header level <$> text
--
-- hrule block
--
-hrule :: GenParser Char st Block
+hrule :: Parser [Char] st (F Blocks)
hrule = try $ do
skipSpaces
start <- satisfy isHruleChar
@@ -363,32 +369,26 @@ hrule = try $ do
skipMany (spaceChar <|> char start)
newline
optional blanklines
- return HorizontalRule
+ return $ return B.horizontalRule
--
-- code blocks
--
-indentedLine :: GenParser Char ParserState [Char]
+indentedLine :: Parser [Char] ParserState String
indentedLine = indentSpaces >> manyTill anyChar newline >>= return . (++ "\n")
blockDelimiter :: (Char -> Bool)
-> Maybe Int
- -> GenParser Char st (Int, (String, [String], [(String, String)]), Char)
+ -> Parser [Char] st Int
blockDelimiter f len = try $ do
c <- lookAhead (satisfy f)
- size <- case len of
- Just l -> count l (char c) >> many (char c) >> return l
- Nothing -> count 3 (char c) >> many (char c) >>=
- return . (+ 3) . length
- many spaceChar
- attr <- option ([],[],[])
- $ attributes -- ~~~ {.ruby}
- <|> (many1 alphaNum >>= \x -> return ([],[x],[])) -- github variant ```ruby
- blankline
- return (size, attr, c)
+ case len of
+ Just l -> count l (char c) >> many (char c) >> return l
+ Nothing -> count 3 (char c) >> many (char c) >>=
+ return . (+ 3) . length
-attributes :: GenParser Char st ([Char], [[Char]], [([Char], [Char])])
+attributes :: Parser [Char] st (String, [String], [(String, String)])
attributes = try $ do
char '{'
spnl
@@ -400,28 +400,28 @@ attributes = try $ do
| otherwise = firstNonNull xs
return (firstNonNull $ reverse ids, concat classes, concat keyvals)
-attribute :: GenParser Char st ([Char], [[Char]], [([Char], [Char])])
+attribute :: Parser [Char] st (String, [String], [(String, String)])
attribute = identifierAttr <|> classAttr <|> keyValAttr
-identifier :: GenParser Char st [Char]
+identifier :: Parser [Char] st String
identifier = do
first <- letter
rest <- many $ alphaNum <|> oneOf "-_:."
return (first:rest)
-identifierAttr :: GenParser Char st ([Char], [a], [a1])
+identifierAttr :: Parser [Char] st (String, [a], [a1])
identifierAttr = try $ do
char '#'
result <- identifier
return (result,[],[])
-classAttr :: GenParser Char st ([Char], [[Char]], [a])
+classAttr :: Parser [Char] st (String, [String], [a])
classAttr = try $ do
char '.'
result <- identifier
return ("",[result],[])
-keyValAttr :: GenParser Char st ([Char], [a], [([Char], [Char])])
+keyValAttr :: Parser [Char] st (String, [a], [(String, String)])
keyValAttr = try $ do
key <- identifier
char '='
@@ -430,33 +430,49 @@ keyValAttr = try $ do
<|> many nonspaceChar
return ("",[],[(key,val)])
-codeBlockDelimited :: GenParser Char st Block
-codeBlockDelimited = try $ do
- (size, attr, c) <- blockDelimiter (\c -> c == '~' || c == '`') Nothing
- contents <- manyTill anyLine (blockDelimiter (== c) (Just size))
+codeBlockFenced :: Parser [Char] ParserState (F Blocks)
+codeBlockFenced = try $ do
+ guardEnabled Ext_fenced_code_blocks
+ size <- blockDelimiter (=='~') Nothing
+ skipMany spaceChar
+ attr <- option ([],[],[]) $
+ guardEnabled Ext_fenced_code_attributes >> attributes
+ blankline
+ contents <- manyTill anyLine (blockDelimiter (=='~') (Just size))
+ blanklines
+ return $ return $ B.codeBlockWith attr $ intercalate "\n" contents
+
+codeBlockBackticks :: Parser [Char] ParserState (F Blocks)
+codeBlockBackticks = try $ do
+ guardEnabled Ext_backtick_code_blocks
+ blockDelimiter (=='`') (Just 3)
+ skipMany spaceChar
+ cls <- many1 alphaNum
+ blankline
+ contents <- manyTill anyLine $ blockDelimiter (=='`') (Just 3)
blanklines
- return $ CodeBlock attr $ intercalate "\n" contents
+ return $ return $ B.codeBlockWith ("",[cls],[]) $ intercalate "\n" contents
-codeBlockIndented :: GenParser Char ParserState Block
+codeBlockIndented :: Parser [Char] ParserState (F Blocks)
codeBlockIndented = do
- contents <- many1 (indentedLine <|>
+ contents <- many1 (indentedLine <|>
try (do b <- blanklines
l <- indentedLine
return $ b ++ l))
optional blanklines
- st <- getState
- return $ CodeBlock ("", stateIndentedCodeClasses st, []) $
+ classes <- getOption readerIndentedCodeClasses
+ return $ return $ B.codeBlockWith ("", classes, []) $
stripTrailingNewlines $ concat contents
-lhsCodeBlock :: GenParser Char ParserState Block
+lhsCodeBlock :: Parser [Char] ParserState (F Blocks)
lhsCodeBlock = do
- failUnlessLHS
- liftM (CodeBlock ("",["sourceCode","literate","haskell"],[]))
- (lhsCodeBlockBird <|> lhsCodeBlockLaTeX)
- <|> liftM (CodeBlock ("",["sourceCode","haskell"],[]))
- lhsCodeBlockInverseBird
+ guardEnabled Ext_literate_haskell
+ (return . B.codeBlockWith ("",["sourceCode","literate","haskell"],[]) <$>
+ (lhsCodeBlockBird <|> lhsCodeBlockLaTeX))
+ <|> (return . B.codeBlockWith ("",["sourceCode","haskell"],[]) <$>
+ lhsCodeBlockInverseBird)
-lhsCodeBlockLaTeX :: GenParser Char ParserState String
+lhsCodeBlockLaTeX :: Parser [Char] ParserState String
lhsCodeBlockLaTeX = try $ do
string "\\begin{code}"
manyTill spaceChar newline
@@ -464,13 +480,13 @@ lhsCodeBlockLaTeX = try $ do
blanklines
return $ stripTrailingNewlines contents
-lhsCodeBlockBird :: GenParser Char ParserState String
+lhsCodeBlockBird :: Parser [Char] ParserState String
lhsCodeBlockBird = lhsCodeBlockBirdWith '>'
-lhsCodeBlockInverseBird :: GenParser Char ParserState String
+lhsCodeBlockInverseBird :: Parser [Char] ParserState String
lhsCodeBlockInverseBird = lhsCodeBlockBirdWith '<'
-lhsCodeBlockBirdWith :: Char -> GenParser Char ParserState String
+lhsCodeBlockBirdWith :: Char -> Parser [Char] ParserState String
lhsCodeBlockBirdWith c = try $ do
pos <- getPosition
when (sourceColumn pos /= 1) $ fail "Not in first column"
@@ -482,25 +498,24 @@ lhsCodeBlockBirdWith c = try $ do
blanklines
return $ intercalate "\n" lns'
-birdTrackLine :: Char -> GenParser Char st [Char]
+birdTrackLine :: Char -> Parser [Char] st String
birdTrackLine c = try $ do
char c
-- allow html tags on left margin:
when (c == '<') $ notFollowedBy letter
manyTill anyChar newline
-
--
-- block quotes
--
-emailBlockQuoteStart :: GenParser Char ParserState Char
+emailBlockQuoteStart :: Parser [Char] ParserState Char
emailBlockQuoteStart = try $ skipNonindentSpaces >> char '>' >>~ optional (char ' ')
-emailBlockQuote :: GenParser Char ParserState [[Char]]
+emailBlockQuote :: Parser [Char] ParserState [String]
emailBlockQuote = try $ do
emailBlockQuoteStart
- raw <- sepBy (many (nonEndline <|>
+ raw <- sepBy (many (nonEndline <|>
(try (endline >> notFollowedBy emailBlockQuoteStart >>
return '\n'))))
(try (newline >> emailBlockQuoteStart))
@@ -508,51 +523,50 @@ emailBlockQuote = try $ do
optional blanklines
return raw
-blockQuote :: GenParser Char ParserState Block
-blockQuote = do
+blockQuote :: Parser [Char] ParserState (F Blocks)
+blockQuote = do
raw <- emailBlockQuote
-- parse the extracted block, which may contain various block elements:
contents <- parseFromString parseBlocks $ (intercalate "\n" raw) ++ "\n\n"
- return $ BlockQuote contents
-
+ return $ B.blockQuote <$> contents
+
--
-- list blocks
--
-bulletListStart :: GenParser Char ParserState ()
+bulletListStart :: Parser [Char] ParserState ()
bulletListStart = try $ do
optional newline -- if preceded by a Plain block in a list context
skipNonindentSpaces
- notFollowedBy' hrule -- because hrules start out just like lists
+ notFollowedBy' (() <$ hrule) -- because hrules start out just like lists
satisfy isBulletListMarker
spaceChar
skipSpaces
-anyOrderedListStart :: GenParser Char ParserState (Int, ListNumberStyle, ListNumberDelim)
+anyOrderedListStart :: Parser [Char] ParserState (Int, ListNumberStyle, ListNumberDelim)
anyOrderedListStart = try $ do
optional newline -- if preceded by a Plain block in a list context
skipNonindentSpaces
notFollowedBy $ string "p." >> spaceChar >> digit -- page number
- state <- getState
- if stateStrict state
- then do many1 digit
- char '.'
- spaceChar
- return (1, DefaultStyle, DefaultDelim)
- else do (num, style, delim) <- anyOrderedListMarker
- -- if it could be an abbreviated first name, insist on more than one space
- if delim == Period && (style == UpperAlpha || (style == UpperRoman &&
- num `elem` [1, 5, 10, 50, 100, 500, 1000]))
- then char '\t' <|> (try $ char ' ' >> spaceChar)
- else spaceChar
- skipSpaces
- return (num, style, delim)
-
-listStart :: GenParser Char ParserState ()
+ (guardDisabled Ext_fancy_lists >>
+ do many1 digit
+ char '.'
+ spaceChar
+ return (1, DefaultStyle, DefaultDelim))
+ <|> do (num, style, delim) <- anyOrderedListMarker
+ -- if it could be an abbreviated first name, insist on more than one space
+ if delim == Period && (style == UpperAlpha || (style == UpperRoman &&
+ num `elem` [1, 5, 10, 50, 100, 500, 1000]))
+ then char '\t' <|> (try $ char ' ' >> spaceChar)
+ else spaceChar
+ skipSpaces
+ return (num, style, delim)
+
+listStart :: Parser [Char] ParserState ()
listStart = bulletListStart <|> (anyOrderedListStart >> return ())
-- parse a line of a list item (start = parser for beginning of list item)
-listLine :: GenParser Char ParserState [Char]
+listLine :: Parser [Char] ParserState String
listLine = try $ do
notFollowedBy blankline
notFollowedBy' (do indentSpaces
@@ -562,8 +576,8 @@ listLine = try $ do
return $ concat chunks ++ "\n"
-- parse raw text for one list item, excluding start marker and continuations
-rawListItem :: GenParser Char ParserState a
- -> GenParser Char ParserState [Char]
+rawListItem :: Parser [Char] ParserState a
+ -> Parser [Char] ParserState String
rawListItem start = try $ do
start
first <- listLine
@@ -571,17 +585,17 @@ rawListItem start = try $ do
blanks <- many blankline
return $ concat (first:rest) ++ blanks
--- continuation of a list item - indented and separated by blankline
+-- continuation of a list item - indented and separated by blankline
-- or (in compact lists) endline.
-- note: nested lists are parsed as continuations
-listContinuation :: GenParser Char ParserState [Char]
+listContinuation :: Parser [Char] ParserState String
listContinuation = try $ do
lookAhead indentSpaces
result <- many1 listContinuationLine
blanks <- many blankline
return $ concat result ++ blanks
-listContinuationLine :: GenParser Char ParserState [Char]
+listContinuationLine :: Parser [Char] ParserState String
listContinuationLine = try $ do
notFollowedBy blankline
notFollowedBy' listStart
@@ -589,8 +603,8 @@ listContinuationLine = try $ do
result <- manyTill anyChar newline
return $ result ++ "\n"
-listItem :: GenParser Char ParserState a
- -> GenParser Char ParserState [Block]
+listItem :: Parser [Char] ParserState a
+ -> Parser [Char] ParserState (F Blocks)
listItem start = try $ do
first <- rawListItem start
continuations <- many listContinuation
@@ -606,38 +620,59 @@ listItem start = try $ do
updateState (\st -> st {stateParserContext = oldContext})
return contents
-orderedList :: GenParser Char ParserState Block
+orderedList :: Parser [Char] ParserState (F Blocks)
orderedList = try $ do
(start, style, delim) <- lookAhead anyOrderedListStart
- items <- many1 $ listItem $ try $
- do optional newline -- if preceded by a Plain block in a list context
- skipNonindentSpaces
- orderedListMarker style delim
- return $ OrderedList (start, style, delim) $ compactify items
-
-bulletList :: GenParser Char ParserState Block
-bulletList =
- many1 (listItem bulletListStart) >>= return . BulletList . compactify
+ unless ((style == DefaultStyle || style == Decimal || style == Example) &&
+ (delim == DefaultDelim || delim == Period)) $
+ guardEnabled Ext_fancy_lists
+ when (style == Example) $ guardEnabled Ext_example_lists
+ items <- fmap sequence $ many1 $ listItem
+ ( try $ do
+ optional newline -- if preceded by Plain block in a list
+ skipNonindentSpaces
+ orderedListMarker style delim )
+ start' <- option 1 $ guardEnabled Ext_startnum >> return start
+ return $ B.orderedListWith (start', style, delim) <$> fmap compactify items
+
+-- | Change final list item from @Para@ to @Plain@ if the list contains
+-- no other @Para@ blocks. (From Shared, modified for Blocks rather than [Block].)
+compactify :: [Blocks] -- ^ List of list items (each a list of blocks)
+ -> [Blocks]
+compactify [] = []
+compactify items =
+ let (others, final) = (init items, last items)
+ in case reverse (B.toList final) of
+ (Para a:xs) -> case [Para x | Para x <- concatMap B.toList items] of
+ -- if this is only Para, change to Plain
+ [_] -> others ++ [B.fromList (reverse $ Plain a : xs)]
+ _ -> items
+ _ -> items
+
+bulletList :: Parser [Char] ParserState (F Blocks)
+bulletList = do
+ items <- fmap sequence $ many1 $ listItem bulletListStart
+ return $ B.bulletList <$> fmap compactify items
-- definition lists
-defListMarker :: GenParser Char ParserState ()
+defListMarker :: Parser [Char] ParserState ()
defListMarker = do
sps <- nonindentSpaces
char ':' <|> char '~'
- st <- getState
- let tabStop = stateTabStop st
+ tabStop <- getOption readerTabStop
let remaining = tabStop - (length sps + 1)
if remaining > 0
then count remaining (char ' ') <|> string "\t"
- else pzero
+ else mzero
return ()
-definitionListItem :: GenParser Char ParserState ([Inline], [[Block]])
+definitionListItem :: Parser [Char] ParserState (F (Inlines, [Blocks]))
definitionListItem = try $ do
+ guardEnabled Ext_definition_lists
-- first, see if this has any chance of being a definition list:
lookAhead (anyLine >> optional blankline >> defListMarker)
- term <- manyTill inline newline
+ term <- trimInlinesF . mconcat <$> manyTill inline newline
optional blankline
raw <- many1 defRawBlock
state <- getState
@@ -645,9 +680,9 @@ definitionListItem = try $ do
-- parse the extracted block, which may contain various block elements:
contents <- mapM (parseFromString parseBlocks) raw
updateState (\st -> st {stateParserContext = oldContext})
- return ((normalizeSpaces term), contents)
+ return $ liftM2 (,) term (sequence contents)
-defRawBlock :: GenParser Char ParserState [Char]
+defRawBlock :: Parser [Char] ParserState String
defRawBlock = try $ do
defListMarker
firstline <- anyLine
@@ -659,119 +694,149 @@ defRawBlock = try $ do
return $ unlines lns ++ trl
return $ firstline ++ "\n" ++ unlines rawlines ++ trailing ++ cont
-definitionList :: GenParser Char ParserState Block
+definitionList :: Parser [Char] ParserState (F Blocks)
definitionList = do
- items <- many1 definitionListItem
- -- "compactify" the definition list:
- let defs = map snd items
- let defBlocks = reverse $ concat $ concat defs
- let isPara (Para _) = True
+ items <- fmap sequence $ many1 definitionListItem
+ return $ B.definitionList <$> fmap compactifyDL items
+
+compactifyDL :: [(Inlines, [Blocks])] -> [(Inlines, [Blocks])]
+compactifyDL items =
+ let defs = concatMap snd items
+ defBlocks = reverse $ concatMap B.toList defs
+ isPara (Para _) = True
isPara _ = False
- let items' = case take 1 defBlocks of
- [Para x] -> if not $ any isPara (drop 1 defBlocks)
- then let (t,ds) = last items
- lastDef = last ds
- ds' = init ds ++
- [init lastDef ++ [Plain x]]
- in init items ++ [(t, ds')]
- else items
- _ -> items
- return $ DefinitionList items'
+ in case defBlocks of
+ (Para x:_) -> if not $ any isPara (drop 1 defBlocks)
+ then let (t,ds) = last items
+ lastDef = B.toList $ last ds
+ ds' = init ds ++
+ [B.fromList $ init lastDef ++ [Plain x]]
+ in init items ++ [(t, ds')]
+ else items
+ _ -> items
--
-- paragraph block
--
+{-
isHtmlOrBlank :: Inline -> Bool
isHtmlOrBlank (RawInline "html" _) = True
isHtmlOrBlank (Space) = True
isHtmlOrBlank (LineBreak) = True
isHtmlOrBlank _ = False
+-}
-para :: GenParser Char ParserState Block
-para = try $ do
- result <- liftM normalizeSpaces $ many1 inline
- guard $ not . all isHtmlOrBlank $ result
- option (Plain result) $ try $ do
+para :: Parser [Char] ParserState (F Blocks)
+para = try $ do
+ result <- trimInlinesF . mconcat <$> many1 inline
+ -- TODO remove this if not really needed? and remove isHtmlOrBlank
+ -- guard $ not $ F.all isHtmlOrBlank result
+ option (B.plain <$> result) $ try $ do
newline
- blanklines <|>
- (getState >>= guard . stateStrict >>
- lookAhead (blockQuote <|> header) >> return "")
- return $ Para result
+ (blanklines >> return mempty)
+ <|> (guardDisabled Ext_blank_before_blockquote >> lookAhead blockQuote)
+ <|> (guardDisabled Ext_blank_before_header >> lookAhead header)
+ return $ B.para <$> result
-plain :: GenParser Char ParserState Block
-plain = many1 inline >>~ spaces >>= return . Plain . normalizeSpaces
+plain :: Parser [Char] ParserState (F Blocks)
+plain = fmap B.plain . trimInlinesF . mconcat <$> many1 inline <* spaces
---
+--
-- raw html
--
-htmlElement :: GenParser Char ParserState [Char]
+htmlElement :: Parser [Char] ParserState String
htmlElement = strictHtmlBlock <|> liftM snd (htmlTag isBlockTag)
-htmlBlock :: GenParser Char ParserState Block
-htmlBlock = try $ do
- failUnlessBeginningOfLine
+htmlBlock :: Parser [Char] ParserState (F Blocks)
+htmlBlock = do
+ guardEnabled Ext_raw_html
+ res <- (guardEnabled Ext_markdown_in_html_blocks >> rawHtmlBlocks)
+ <|> htmlBlock'
+ return $ return $ B.rawBlock "html" res
+
+htmlBlock' :: Parser [Char] ParserState String
+htmlBlock' = try $ do
first <- htmlElement
finalSpace <- many spaceChar
finalNewlines <- many newline
- return $ RawBlock "html" $ first ++ finalSpace ++ finalNewlines
+ return $ first ++ finalSpace ++ finalNewlines
-strictHtmlBlock :: GenParser Char ParserState [Char]
-strictHtmlBlock = do
- failUnlessBeginningOfLine
- htmlInBalanced (not . isInlineTag)
+strictHtmlBlock :: Parser [Char] ParserState String
+strictHtmlBlock = htmlInBalanced (not . isInlineTag)
-rawVerbatimBlock :: GenParser Char ParserState String
+rawVerbatimBlock :: Parser [Char] ParserState String
rawVerbatimBlock = try $ do
(TagOpen tag _, open) <- htmlTag (tagOpen (\t ->
- t == "pre" || t == "style" || t == "script")
- (const True))
+ t == "pre" || t == "style" || t == "script")
+ (const True))
contents <- manyTill anyChar (htmlTag (~== TagClose tag))
return $ open ++ contents ++ renderTags [TagClose tag]
-rawTeXBlock :: GenParser Char ParserState Block
+rawTeXBlock :: Parser [Char] ParserState (F Blocks)
rawTeXBlock = do
- failIfStrict
- result <- liftM (RawBlock "latex") rawLaTeXBlock
- <|> liftM (RawBlock "context") rawConTeXtEnvironment
+ guardEnabled Ext_raw_tex
+ result <- (B.rawBlock "latex" <$> rawLaTeXBlock)
+ <|> (B.rawBlock "context" <$> rawConTeXtEnvironment)
spaces
- return result
+ return $ return result
-rawHtmlBlocks :: GenParser Char ParserState Block
+rawHtmlBlocks :: Parser [Char] ParserState String
rawHtmlBlocks = do
- htmlBlocks <- many1 $ do blk <- rawVerbatimBlock <|>
- liftM snd (htmlTag isBlockTag)
- sps <- do sp1 <- many spaceChar
- sp2 <- option "" (blankline >> return "\n")
- sp3 <- many spaceChar
- sp4 <- option "" blanklines
- return $ sp1 ++ sp2 ++ sp3 ++ sp4
- -- note: we want raw html to be able to
- -- precede a code block, when separated
- -- by a blank line
- return $ blk ++ sps
+ htmlBlocks <- many1 $ try $ do
+ s <- rawVerbatimBlock <|> try (
+ do (t,raw) <- htmlTag isBlockTag
+ exts <- getOption readerExtensions
+ -- if open tag, need markdown="1" if
+ -- markdown_attributes extension is set
+ case t of
+ TagOpen _ as
+ | Ext_markdown_attribute `Set.member`
+ exts ->
+ if "markdown" `notElem`
+ map fst as
+ then mzero
+ else return $
+ stripMarkdownAttribute raw
+ | otherwise -> return raw
+ _ -> return raw )
+ sps <- do sp1 <- many spaceChar
+ sp2 <- option "" (blankline >> return "\n")
+ sp3 <- many spaceChar
+ sp4 <- option "" blanklines
+ return $ sp1 ++ sp2 ++ sp3 ++ sp4
+ -- note: we want raw html to be able to
+ -- precede a code block, when separated
+ -- by a blank line
+ return $ s ++ sps
let combined = concat htmlBlocks
- let combined' = if last combined == '\n' then init combined else combined
- return $ RawBlock "html" combined'
+ return $ if last combined == '\n' then init combined else combined
+
+-- remove markdown="1" attribute
+stripMarkdownAttribute :: String -> String
+stripMarkdownAttribute s = renderTags' $ map filterAttrib $ parseTags s
+ where filterAttrib (TagOpen t as) = TagOpen t
+ [(k,v) | (k,v) <- as, k /= "markdown"]
+ filterAttrib x = x
--
-- Tables
---
+--
-- Parse a dashed line with optional trailing spaces; return its length
-- and the length including trailing space.
-dashedLine :: Char
- -> GenParser Char st (Int, Int)
+dashedLine :: Char
+ -> Parser [Char] st (Int, Int)
dashedLine ch = do
dashes <- many1 (char ch)
sp <- many spaceChar
return $ (length dashes, length $ dashes ++ sp)
--- Parse a table header with dashed lines of '-' preceded by
+-- Parse a table header with dashed lines of '-' preceded by
-- one (or zero) line of text.
-simpleTableHeader :: Bool -- ^ Headerless table
- -> GenParser Char ParserState ([[Block]], [Alignment], [Int])
+simpleTableHeader :: Bool -- ^ Headerless table
+ -> Parser [Char] ParserState (F [Blocks], [Alignment], [Int])
simpleTableHeader headless = try $ do
rawContent <- if headless
then return ""
@@ -784,84 +849,104 @@ simpleTableHeader headless = try $ do
-- If no header, calculate alignment on basis of first row of text
rawHeads <- liftM (tail . splitStringByIndices (init indices)) $
if headless
- then lookAhead anyLine
+ then lookAhead anyLine
else return rawContent
let aligns = zipWith alignType (map (\a -> [a]) rawHeads) lengths
let rawHeads' = if headless
then replicate (length dashes) ""
- else rawHeads
- heads <- mapM (parseFromString (many plain)) $
- map removeLeadingTrailingSpace rawHeads'
+ else rawHeads
+ heads <- fmap sequence
+ $ mapM (parseFromString (mconcat <$> many plain))
+ $ map removeLeadingTrailingSpace rawHeads'
return (heads, aligns, indices)
+-- Returns an alignment type for a table, based on a list of strings
+-- (the rows of the column header) and a number (the length of the
+-- dashed line under the rows.
+alignType :: [String]
+ -> Int
+ -> Alignment
+alignType [] _ = AlignDefault
+alignType strLst len =
+ let nonempties = filter (not . null) $ map removeTrailingSpace strLst
+ (leftSpace, rightSpace) =
+ case sortBy (comparing length) nonempties of
+ (x:_) -> (head x `elem` " \t", length x < len)
+ [] -> (False, False)
+ in case (leftSpace, rightSpace) of
+ (True, False) -> AlignRight
+ (False, True) -> AlignLeft
+ (True, True) -> AlignCenter
+ (False, False) -> AlignDefault
+
-- Parse a table footer - dashed lines followed by blank line.
-tableFooter :: GenParser Char ParserState [Char]
+tableFooter :: Parser [Char] ParserState String
tableFooter = try $ skipNonindentSpaces >> many1 (dashedLine '-') >> blanklines
-- Parse a table separator - dashed line.
-tableSep :: GenParser Char ParserState Char
+tableSep :: Parser [Char] ParserState Char
tableSep = try $ skipNonindentSpaces >> many1 (dashedLine '-') >> char '\n'
-- Parse a raw line and split it into chunks by indices.
rawTableLine :: [Int]
- -> GenParser Char ParserState [String]
+ -> Parser [Char] ParserState [String]
rawTableLine indices = do
notFollowedBy' (blanklines <|> tableFooter)
line <- many1Till anyChar newline
- return $ map removeLeadingTrailingSpace $ tail $
+ return $ map removeLeadingTrailingSpace $ tail $
splitStringByIndices (init indices) line
-- Parse a table line and return a list of lists of blocks (columns).
tableLine :: [Int]
- -> GenParser Char ParserState [[Block]]
-tableLine indices = rawTableLine indices >>= mapM (parseFromString (many plain))
+ -> Parser [Char] ParserState (F [Blocks])
+tableLine indices = rawTableLine indices >>=
+ fmap sequence . mapM (parseFromString (mconcat <$> many plain))
-- Parse a multiline table row and return a list of blocks (columns).
multilineRow :: [Int]
- -> GenParser Char ParserState [[Block]]
+ -> Parser [Char] ParserState (F [Blocks])
multilineRow indices = do
colLines <- many1 (rawTableLine indices)
let cols = map unlines $ transpose colLines
- mapM (parseFromString (many plain)) cols
+ fmap sequence $ mapM (parseFromString (mconcat <$> many plain)) cols
-- Parses a table caption: inlines beginning with 'Table:'
-- and followed by blank lines.
-tableCaption :: GenParser Char ParserState [Inline]
+tableCaption :: Parser [Char] ParserState (F Inlines)
tableCaption = try $ do
+ guardEnabled Ext_table_captions
skipNonindentSpaces
string ":" <|> string "Table:"
- result <- many1 inline
- blanklines
- return $ normalizeSpaces result
+ trimInlinesF . mconcat <$> many1 inline <* blanklines
-- Parse a simple table with '---' header and one line per row.
simpleTable :: Bool -- ^ Headerless table
- -> GenParser Char ParserState Block
+ -> Parser [Char] ParserState ([Alignment], [Double], F [Blocks], F [[Blocks]])
simpleTable headless = do
- Table c a _w h l <- tableWith (simpleTableHeader headless) tableLine
+ (aligns, _widths, heads', lines') <-
+ tableWith (simpleTableHeader headless) tableLine
(return ())
(if headless then tableFooter else tableFooter <|> blanklines)
- tableCaption
-- Simple tables get 0s for relative column widths (i.e., use default)
- return $ Table c a (replicate (length a) 0) h l
+ return (aligns, replicate (length aligns) 0, heads', lines')
-- Parse a multiline table: starts with row of '-' on top, then header
-- (which may be multiline), then the rows,
-- which may be multiline, separated by blank lines, and
-- ending with a footer (dashed line followed by blank line).
multilineTable :: Bool -- ^ Headerless table
- -> GenParser Char ParserState Block
+ -> Parser [Char] ParserState ([Alignment], [Double], F [Blocks], F [[Blocks]])
multilineTable headless =
- tableWith (multilineTableHeader headless) multilineRow blanklines tableFooter tableCaption
+ tableWith (multilineTableHeader headless) multilineRow blanklines tableFooter
multilineTableHeader :: Bool -- ^ Headerless table
- -> GenParser Char ParserState ([[Block]], [Alignment], [Int])
+ -> Parser [Char] ParserState (F [Blocks], [Alignment], [Int])
multilineTableHeader headless = try $ do
if headless
then return '\n'
else tableSep >>~ notFollowedBy blankline
rawContent <- if headless
- then return $ repeat ""
+ then return $ repeat ""
else many1
(notFollowedBy tableSep >> many1Till anyChar newline)
initSp <- nonindentSpaces
@@ -872,54 +957,206 @@ multilineTableHeader headless = try $ do
rawHeadsList <- if headless
then liftM (map (:[]) . tail .
splitStringByIndices (init indices)) $ lookAhead anyLine
- else return $ transpose $ map
+ else return $ transpose $ map
(\ln -> tail $ splitStringByIndices (init indices) ln)
rawContent
let aligns = zipWith alignType rawHeadsList lengths
let rawHeads = if headless
then replicate (length dashes) ""
else map (intercalate " ") rawHeadsList
- heads <- mapM (parseFromString (many plain)) $
+ heads <- fmap sequence $
+ mapM (parseFromString (mconcat <$> many plain)) $
map removeLeadingTrailingSpace rawHeads
return (heads, aligns, indices)
--- Returns an alignment type for a table, based on a list of strings
--- (the rows of the column header) and a number (the length of the
--- dashed line under the rows.
-alignType :: [String]
- -> Int
- -> Alignment
-alignType [] _ = AlignDefault
-alignType strLst len =
- let nonempties = filter (not . null) $ map removeTrailingSpace strLst
- (leftSpace, rightSpace) =
- case sortBy (comparing length) nonempties of
- (x:_) -> (head x `elem` " \t", length x < len)
- [] -> (False, False)
- in case (leftSpace, rightSpace) of
- (True, False) -> AlignRight
- (False, True) -> AlignLeft
- (True, True) -> AlignCenter
- (False, False) -> AlignDefault
-
+-- Parse a grid table: starts with row of '-' on top, then header
+-- (which may be grid), then the rows,
+-- which may be grid, separated by blank lines, and
+-- ending with a footer (dashed line followed by blank line).
gridTable :: Bool -- ^ Headerless table
- -> GenParser Char ParserState Block
-gridTable = gridTableWith block tableCaption
+ -> Parser [Char] ParserState ([Alignment], [Double], F [Blocks], F [[Blocks]])
+gridTable headless =
+ tableWith (gridTableHeader headless) gridTableRow
+ (gridTableSep '-') gridTableFooter
+
+gridTableSplitLine :: [Int] -> String -> [String]
+gridTableSplitLine indices line = map removeFinalBar $ tail $
+ splitStringByIndices (init indices) $ removeTrailingSpace line
+
+gridPart :: Char -> Parser [Char] st (Int, Int)
+gridPart ch = do
+ dashes <- many1 (char ch)
+ char '+'
+ return (length dashes, length dashes + 1)
-table :: GenParser Char ParserState Block
-table = multilineTable False <|> simpleTable True <|>
- simpleTable False <|> multilineTable True <|>
- gridTable False <|> gridTable True <?> "table"
+gridDashedLines :: Char -> Parser [Char] st [(Int,Int)]
+gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) >>~ blankline
+
+removeFinalBar :: String -> String
+removeFinalBar =
+ reverse . dropWhile (`elem` " \t") . dropWhile (=='|') . reverse
+
+-- | Separator between rows of grid table.
+gridTableSep :: Char -> Parser [Char] ParserState Char
+gridTableSep ch = try $ gridDashedLines ch >> return '\n'
+
+-- | Parse header for a grid table.
+gridTableHeader :: Bool -- ^ Headerless table
+ -> Parser [Char] ParserState (F [Blocks], [Alignment], [Int])
+gridTableHeader headless = try $ do
+ optional blanklines
+ dashes <- gridDashedLines '-'
+ rawContent <- if headless
+ then return $ repeat ""
+ else many1
+ (notFollowedBy (gridTableSep '=') >> char '|' >>
+ many1Till anyChar newline)
+ if headless
+ then return ()
+ else gridTableSep '=' >> return ()
+ let lines' = map snd dashes
+ let indices = scanl (+) 0 lines'
+ let aligns = replicate (length lines') AlignDefault
+ -- RST does not have a notion of alignments
+ let rawHeads = if headless
+ then replicate (length dashes) ""
+ else map (intercalate " ") $ transpose
+ $ map (gridTableSplitLine indices) rawContent
+ heads <- fmap sequence $ mapM (parseFromString block) $
+ map removeLeadingTrailingSpace rawHeads
+ return (heads, aligns, indices)
---
+gridTableRawLine :: [Int] -> Parser [Char] ParserState [String]
+gridTableRawLine indices = do
+ char '|'
+ line <- many1Till anyChar newline
+ return (gridTableSplitLine indices line)
+
+-- | Parse row of grid table.
+gridTableRow :: [Int]
+ -> Parser [Char] ParserState (F [Blocks])
+gridTableRow indices = do
+ colLines <- many1 (gridTableRawLine indices)
+ let cols = map ((++ "\n") . unlines . removeOneLeadingSpace) $
+ transpose colLines
+ fmap compactify <$> fmap sequence (mapM (parseFromString block) cols)
+
+removeOneLeadingSpace :: [String] -> [String]
+removeOneLeadingSpace xs =
+ if all startsWithSpace xs
+ then map (drop 1) xs
+ else xs
+ where startsWithSpace "" = True
+ startsWithSpace (y:_) = y == ' '
+
+-- | Parse footer for a grid table.
+gridTableFooter :: Parser [Char] ParserState [Char]
+gridTableFooter = blanklines
+
+pipeTable :: Parser [Char] ParserState ([Alignment], [Double], F [Blocks], F [[Blocks]])
+pipeTable = try $ do
+ let pipeBreak = nonindentSpaces *> optional (char '|') *>
+ pipeTableHeaderPart `sepBy1` sepPipe <*
+ optional (char '|') <* blankline
+ (heads,aligns) <- try ( pipeBreak >>= \als ->
+ return (return $ replicate (length als) mempty, als))
+ <|> ( pipeTableRow >>= \row -> pipeBreak >>= \als ->
+
+ return (row, als) )
+ lines' <- sequence <$> many1 pipeTableRow
+ blanklines
+ let widths = replicate (length aligns) 0.0
+ return $ (aligns, widths, heads, lines')
+
+sepPipe :: Parser [Char] ParserState ()
+sepPipe = try $ do
+ char '|' <|> char '+'
+ notFollowedBy blankline
+
+-- parse a row, also returning probable alignments for org-table cells
+pipeTableRow :: Parser [Char] ParserState (F [Blocks])
+pipeTableRow = do
+ nonindentSpaces
+ optional (char '|')
+ let cell = mconcat <$>
+ many (notFollowedBy (blankline <|> char '|') >> inline)
+ first <- cell
+ sepPipe
+ rest <- cell `sepBy1` sepPipe
+ optional (char '|')
+ blankline
+ let cells = sequence (first:rest)
+ return $ do
+ cells' <- cells
+ return $ map
+ (\ils ->
+ case trimInlines ils of
+ ils' | B.isNull ils' -> mempty
+ | otherwise -> B.plain $ ils') cells'
+
+pipeTableHeaderPart :: Parser [Char] st Alignment
+pipeTableHeaderPart = do
+ left <- optionMaybe (char ':')
+ many1 (char '-')
+ right <- optionMaybe (char ':')
+ return $
+ case (left,right) of
+ (Nothing,Nothing) -> AlignDefault
+ (Just _,Nothing) -> AlignLeft
+ (Nothing,Just _) -> AlignRight
+ (Just _,Just _) -> AlignCenter
+
+-- Succeed only if current line contains a pipe.
+scanForPipe :: Parser [Char] st ()
+scanForPipe = lookAhead (manyTill (satisfy (/='\n')) (char '|')) >> return ()
+
+-- | Parse a table using 'headerParser', 'rowParser',
+-- 'lineParser', and 'footerParser'. Variant of the version in
+-- Text.Pandoc.Parsing.
+tableWith :: Parser [Char] ParserState (F [Blocks], [Alignment], [Int])
+ -> ([Int] -> Parser [Char] ParserState (F [Blocks]))
+ -> Parser [Char] ParserState sep
+ -> Parser [Char] ParserState end
+ -> Parser [Char] ParserState ([Alignment], [Double], F [Blocks], F [[Blocks]])
+tableWith headerParser rowParser lineParser footerParser = try $ do
+ (heads, aligns, indices) <- headerParser
+ lines' <- fmap sequence $ rowParser indices `sepEndBy1` lineParser
+ footerParser
+ numColumns <- getOption readerColumns
+ let widths = if (indices == [])
+ then replicate (length aligns) 0.0
+ else widthsFromIndices numColumns indices
+ return $ (aligns, widths, heads, lines')
+
+table :: Parser [Char] ParserState (F Blocks)
+table = try $ do
+ frontCaption <- option Nothing (Just <$> tableCaption)
+ (aligns, widths, heads, lns) <-
+ try (guardEnabled Ext_pipe_tables >> scanForPipe >> pipeTable) <|>
+ try (guardEnabled Ext_multiline_tables >>
+ multilineTable False) <|>
+ try (guardEnabled Ext_simple_tables >>
+ (simpleTable True <|> simpleTable False)) <|>
+ try (guardEnabled Ext_multiline_tables >>
+ multilineTable True) <|>
+ try (guardEnabled Ext_grid_tables >>
+ (gridTable False <|> gridTable True)) <?> "table"
+ optional blanklines
+ caption <- case frontCaption of
+ Nothing -> option (return mempty) tableCaption
+ Just c -> return c
+ return $ do
+ caption' <- caption
+ heads' <- heads
+ lns' <- lns
+ return $ B.table caption' (zip aligns widths) heads' lns'
+
+--
-- inline
--
-inline :: GenParser Char ParserState Inline
-inline = choice inlineParsers <?> "inline"
-
-inlineParsers :: [GenParser Char ParserState Inline]
-inlineParsers = [ whitespace
+inline :: Parser [Char] ParserState (F Inlines)
+inline = choice [ whitespace
, str
, endline
, code
@@ -927,8 +1164,8 @@ inlineParsers = [ whitespace
, strong
, emph
, note
- , link
, cite
+ , link
, image
, math
, strikeout
@@ -940,115 +1177,127 @@ inlineParsers = [ whitespace
, escapedChar
, rawLaTeXInline'
, exampleRef
- , smartPunctuation inline
- , charRef
+ , smart
+ , return . B.singleton <$> charRef
, symbol
- , ltSign ]
+ , ltSign
+ ] <?> "inline"
-escapedChar' :: GenParser Char ParserState Char
+escapedChar' :: Parser [Char] ParserState Char
escapedChar' = try $ do
char '\\'
- state <- getState
- if stateStrict state
- then oneOf "\\`*_{}[]()>#+-.!~"
- else satisfy (not . isAlphaNum)
+ (guardEnabled Ext_all_symbols_escapable >> satisfy (not . isAlphaNum))
+ <|> oneOf "\\`*_{}[]()>#+-.!~"
-escapedChar :: GenParser Char ParserState Inline
+escapedChar :: Parser [Char] ParserState (F Inlines)
escapedChar = do
result <- escapedChar'
- return $ case result of
- ' ' -> Str "\160" -- "\ " is a nonbreaking space
- '\n' -> LineBreak -- "\[newline]" is a linebreak
- _ -> Str [result]
+ case result of
+ ' ' -> return $ return $ B.str "\160" -- "\ " is a nonbreaking space
+ '\n' -> guardEnabled Ext_escaped_line_breaks >>
+ return (return B.linebreak) -- "\[newline]" is a linebreak
+ _ -> return $ return $ B.str [result]
-ltSign :: GenParser Char ParserState Inline
+ltSign :: Parser [Char] ParserState (F Inlines)
ltSign = do
- st <- getState
- if stateStrict st
- then char '<'
- else notFollowedBy' rawHtmlBlocks >> char '<' -- unless it starts html
- return $ Str ['<']
+ guardDisabled Ext_raw_html
+ <|> guardDisabled Ext_markdown_in_html_blocks
+ <|> (notFollowedBy' rawHtmlBlocks >> return ())
+ char '<'
+ return $ return $ B.str "<"
-exampleRef :: GenParser Char ParserState Inline
+exampleRef :: Parser [Char] ParserState (F Inlines)
exampleRef = try $ do
+ guardEnabled Ext_example_lists
char '@'
lab <- many1 (alphaNum <|> oneOf "-_")
- -- We just return a Str. These are replaced with numbers
- -- later. See the end of parseMarkdown.
- return $ Str $ '@' : lab
-
-symbol :: GenParser Char ParserState Inline
-symbol = do
+ return $ do
+ st <- askF
+ return $ case M.lookup lab (stateExamples st) of
+ Just n -> B.str (show n)
+ Nothing -> B.str ('@':lab)
+
+symbol :: Parser [Char] ParserState (F Inlines)
+symbol = do
result <- noneOf "<\\\n\t "
<|> try (do lookAhead $ char '\\'
- notFollowedBy' rawTeXBlock
+ notFollowedBy' (() <$ rawTeXBlock)
char '\\')
- return $ Str [result]
+ return $ return $ B.str [result]
-- parses inline code, between n `s and n `s
-code :: GenParser Char ParserState Inline
-code = try $ do
+code :: Parser [Char] ParserState (F Inlines)
+code = try $ do
starts <- many1 (char '`')
skipSpaces
result <- many1Till (many1 (noneOf "`\n") <|> many1 (char '`') <|>
(char '\n' >> notFollowedBy' blankline >> return " "))
- (try (skipSpaces >> count (length starts) (char '`') >>
+ (try (skipSpaces >> count (length starts) (char '`') >>
notFollowedBy (char '`')))
- attr <- option ([],[],[]) (try $ optional whitespace >> attributes)
- return $ Code attr $ removeLeadingTrailingSpace $ concat result
-
-mathWord :: GenParser Char st [Char]
-mathWord = liftM concat $ many1 mathChunk
-
-mathChunk :: GenParser Char st [Char]
-mathChunk = do char '\\'
- c <- anyChar
- return ['\\',c]
- <|> many1 (satisfy $ \c -> not (isBlank c || c == '\\' || c == '$'))
-
-math :: GenParser Char ParserState Inline
-math = (mathDisplay >>= applyMacros' >>= return . Math DisplayMath)
- <|> (mathInline >>= applyMacros' >>= return . Math InlineMath)
-
-mathDisplay :: GenParser Char ParserState String
-mathDisplay = try $ do
- failIfStrict
- string "$$"
- many1Till (noneOf "\n" <|> (newline >>~ notFollowedBy' blankline)) (try $ string "$$")
-
-mathInline :: GenParser Char ParserState String
-mathInline = try $ do
- failIfStrict
- char '$'
+ attr <- option ([],[],[]) (try $ guardEnabled Ext_inline_code_attributes >>
+ optional whitespace >> attributes)
+ return $ return $ B.codeWith attr $ removeLeadingTrailingSpace $ concat result
+
+math :: Parser [Char] ParserState (F Inlines)
+math = (return . B.displayMath <$> (mathDisplay >>= applyMacros'))
+ <|> (return . B.math <$> (mathInline >>= applyMacros'))
+
+mathDisplay :: Parser [Char] ParserState String
+mathDisplay =
+ (guardEnabled Ext_tex_math_dollars >> mathDisplayWith "$$" "$$")
+ <|> (guardEnabled Ext_tex_math_single_backslash >>
+ mathDisplayWith "\\[" "\\]")
+ <|> (guardEnabled Ext_tex_math_double_backslash >>
+ mathDisplayWith "\\\\[" "\\\\]")
+
+mathDisplayWith :: String -> String -> Parser [Char] ParserState String
+mathDisplayWith op cl = try $ do
+ string op
+ many1Till (noneOf "\n" <|> (newline >>~ notFollowedBy' blankline)) (try $ string cl)
+
+mathInline :: Parser [Char] ParserState String
+mathInline =
+ (guardEnabled Ext_tex_math_dollars >> mathInlineWith "$" "$")
+ <|> (guardEnabled Ext_tex_math_single_backslash >>
+ mathInlineWith "\\(" "\\)")
+ <|> (guardEnabled Ext_tex_math_double_backslash >>
+ mathInlineWith "\\\\(" "\\\\)")
+
+mathInlineWith :: String -> String -> Parser [Char] ParserState String
+mathInlineWith op cl = try $ do
+ string op
notFollowedBy space
- words' <- sepBy1 mathWord (many1 (spaceChar <|> (newline >>~ notFollowedBy' blankline)))
- char '$'
- notFollowedBy digit
- return $ intercalate " " words'
+ words' <- many1Till (count 1 (noneOf "\n\\")
+ <|> (char '\\' >> anyChar >>= \c -> return ['\\',c])
+ <|> count 1 newline <* notFollowedBy' blankline
+ *> return " ")
+ (try $ string cl)
+ notFollowedBy digit -- to prevent capture of $5
+ return $ concat words'
-- to avoid performance problems, treat 4 or more _ or * or ~ or ^ in a row
-- as a literal rather than attempting to parse for emph/strong/strikeout/super/sub
-fours :: GenParser Char st Inline
+fours :: Parser [Char] st (F Inlines)
fours = try $ do
x <- char '*' <|> char '_' <|> char '~' <|> char '^'
count 2 $ satisfy (==x)
rest <- many1 (satisfy (==x))
- return $ Str (x:x:x:rest)
+ return $ return $ B.str (x:x:x:rest)
-- | Parses a list of inlines between start and end delimiters.
inlinesBetween :: (Show b)
- => GenParser Char ParserState a
- -> GenParser Char ParserState b
- -> GenParser Char ParserState [Inline]
+ => Parser [Char] ParserState a
+ -> Parser [Char] ParserState b
+ -> Parser [Char] ParserState (F Inlines)
inlinesBetween start end =
- normalizeSpaces `liftM` try (start >> many1Till inner end)
- where inner = innerSpace <|> (notFollowedBy' whitespace >> inline)
+ (trimInlinesF . mconcat) <$> try (start >> many1Till inner end)
+ where inner = innerSpace <|> (notFollowedBy' (() <$ whitespace) >> inline)
innerSpace = try $ whitespace >>~ notFollowedBy' end
-- This is used to prevent exponential blowups for things like:
-- a**a*a**a*a**a*a**a*a**a*a**a*a**
-nested :: GenParser Char ParserState a
- -> GenParser Char ParserState a
+nested :: Parser [Char] ParserState a
+ -> Parser [Char] ParserState a
nested p = do
nestlevel <- stateMaxNestingLevel `fmap` getState
guard $ nestlevel > 0
@@ -1057,54 +1306,57 @@ nested p = do
updateState $ \st -> st{ stateMaxNestingLevel = nestlevel }
return res
-emph :: GenParser Char ParserState Inline
-emph = Emph `fmap` nested
+emph :: Parser [Char] ParserState (F Inlines)
+emph = fmap B.emph <$> nested
(inlinesBetween starStart starEnd <|> inlinesBetween ulStart ulEnd)
where starStart = char '*' >> lookAhead nonspaceChar
- starEnd = notFollowedBy' strong >> char '*'
+ starEnd = notFollowedBy' (() <$ strong) >> char '*'
ulStart = char '_' >> lookAhead nonspaceChar
- ulEnd = notFollowedBy' strong >> char '_'
+ ulEnd = notFollowedBy' (() <$ strong) >> char '_'
-strong :: GenParser Char ParserState Inline
-strong = Strong `liftM` nested
+strong :: Parser [Char] ParserState (F Inlines)
+strong = fmap B.strong <$> nested
(inlinesBetween starStart starEnd <|> inlinesBetween ulStart ulEnd)
where starStart = string "**" >> lookAhead nonspaceChar
starEnd = try $ string "**"
ulStart = string "__" >> lookAhead nonspaceChar
ulEnd = try $ string "__"
-strikeout :: GenParser Char ParserState Inline
-strikeout = Strikeout `liftM`
- (failIfStrict >> inlinesBetween strikeStart strikeEnd)
+strikeout :: Parser [Char] ParserState (F Inlines)
+strikeout = fmap B.strikeout <$>
+ (guardEnabled Ext_strikeout >> inlinesBetween strikeStart strikeEnd)
where strikeStart = string "~~" >> lookAhead nonspaceChar
>> notFollowedBy (char '~')
strikeEnd = try $ string "~~"
-superscript :: GenParser Char ParserState Inline
-superscript = failIfStrict >> enclosed (char '^') (char '^')
- (notFollowedBy spaceChar >> inline) >>= -- may not contain Space
- return . Superscript
+superscript :: Parser [Char] ParserState (F Inlines)
+superscript = fmap B.superscript <$> try (do
+ guardEnabled Ext_superscript
+ char '^'
+ mconcat <$> many1Till (notFollowedBy spaceChar >> inline) (char '^'))
-subscript :: GenParser Char ParserState Inline
-subscript = failIfStrict >> enclosed (char '~') (char '~')
- (notFollowedBy spaceChar >> inline) >>= -- may not contain Space
- return . Subscript
+subscript :: Parser [Char] ParserState (F Inlines)
+subscript = fmap B.subscript <$> try (do
+ guardEnabled Ext_subscript
+ char '~'
+ mconcat <$> many1Till (notFollowedBy spaceChar >> inline) (char '~'))
-whitespace :: GenParser Char ParserState Inline
-whitespace = spaceChar >>
- ( (spaceChar >> skipMany spaceChar >> option Space (endline >> return LineBreak))
- <|> (skipMany spaceChar >> return Space) ) <?> "whitespace"
+whitespace :: Parser [Char] ParserState (F Inlines)
+whitespace = spaceChar >> return <$> (lb <|> regsp) <?> "whitespace"
+ where lb = spaceChar >> skipMany spaceChar >> option B.space (endline >> return B.linebreak)
+ regsp = skipMany spaceChar >> return B.space
-nonEndline :: GenParser Char st Char
+nonEndline :: Parser [Char] st Char
nonEndline = satisfy (/='\n')
-str :: GenParser Char ParserState Inline
+str :: Parser [Char] ParserState (F Inlines)
str = do
- smart <- stateSmart `fmap` getState
+ isSmart <- readerSmart . stateOptions <$> getState
a <- alphaNum
as <- many $ alphaNum
- <|> (try $ char '_' >>~ lookAhead alphaNum)
- <|> if smart
+ <|> (guardEnabled Ext_intraword_underscores >>
+ try (char '_' >>~ lookAhead alphaNum))
+ <|> if isSmart
then (try $ satisfy (\c -> c == '\'' || c == '\x2019') >>
lookAhead alphaNum >> return '\x2019')
-- for things like l'aide
@@ -1113,15 +1365,16 @@ str = do
updateState $ \s -> s{ stateLastStrPos = Just pos }
let result = a:as
let spacesToNbr = map (\c -> if c == ' ' then '\160' else c)
- if smart
+ if isSmart
then case likelyAbbrev result of
- [] -> return $ Str result
+ [] -> return $ return $ B.str result
xs -> choice (map (\x ->
try (string x >> oneOf " \n" >>
lookAhead alphaNum >>
- return (Str $ result ++ spacesToNbr x ++ "\160"))) xs)
- <|> (return $ Str result)
- else return $ Str result
+ return (return $ B.str
+ $ result ++ spacesToNbr x ++ "\160"))) xs)
+ <|> (return $ return $ B.str result)
+ else return $ return $ B.str result
-- | if the string matches the beginning of an abbreviation (before
-- the first period, return strings that would finish the abbreviation.
@@ -1136,39 +1389,38 @@ likelyAbbrev x =
in map snd $ filter (\(y,_) -> y == x) abbrPairs
-- an endline character that can be treated as a space, not a structural break
-endline :: GenParser Char ParserState Inline
+endline :: Parser [Char] ParserState (F Inlines)
endline = try $ do
newline
notFollowedBy blankline
- st <- getState
- when (stateStrict st) $ do
- notFollowedBy emailBlockQuoteStart
- notFollowedBy (char '#') -- atx header
+ guardEnabled Ext_blank_before_blockquote <|> notFollowedBy emailBlockQuoteStart
+ guardEnabled Ext_blank_before_header <|> notFollowedBy (char '#') -- atx header
-- parse potential list-starts differently if in a list:
+ st <- getState
when (stateParserContext st == ListItemState) $ do
notFollowedBy' bulletListStart
notFollowedBy' anyOrderedListStart
- return Space
+ (guardEnabled Ext_hard_line_breaks >> return (return B.linebreak))
+ <|> (return $ return B.space)
--
-- links
--
-- a reference label for a link
-reference :: GenParser Char ParserState [Inline]
+reference :: Parser [Char] ParserState (F Inlines, String)
reference = do notFollowedBy' (string "[^") -- footnote reference
- result <- inlinesInBalancedBrackets inline
- return $ normalizeSpaces result
+ withRaw $ trimInlinesF <$> inlinesInBalancedBrackets
-- source for a link, with optional title
-source :: GenParser Char ParserState (String, [Char])
+source :: Parser [Char] ParserState (String, String)
source =
(try $ charsInBalanced '(' ')' litChar >>= parseFromString source') <|>
-- the following is needed for cases like: [ref](/url(a).
(enclosed (char '(') (char ')') litChar >>= parseFromString source')
-- auxiliary function for source
-source' :: GenParser Char ParserState (String, [Char])
+source' :: Parser [Char] ParserState (String, String)
source' = do
skipSpaces
let nl = char '\n' >>~ notFollowedBy blankline
@@ -1186,7 +1438,7 @@ source' = do
eof
return (escapeURI $ removeTrailingSpace src, tit)
-linkTitle :: GenParser Char ParserState String
+linkTitle :: Parser [Char] ParserState String
linkTitle = try $ do
(many1 spaceChar >> option '\n' newline) <|> newline
skipSpaces
@@ -1194,78 +1446,88 @@ linkTitle = try $ do
tit <- manyTill litChar (try (char delim >> skipSpaces >> eof))
return $ fromEntities tit
-link :: GenParser Char ParserState Inline
+link :: Parser [Char] ParserState (F Inlines)
link = try $ do
- lab <- reference
- (src, tit) <- source <|> referenceLink lab
- return $ Link (delinkify lab) (src, tit)
-
-delinkify :: [Inline] -> [Inline]
-delinkify = bottomUp $ concatMap go
- where go (Link lab _) = lab
- go x = [x]
+ st <- getState
+ guard $ stateAllowLinks st
+ setState $ st{ stateAllowLinks = False }
+ (lab,raw) <- reference
+ setState $ st{ stateAllowLinks = True }
+ regLink B.link lab <|> referenceLink B.link (lab,raw)
+
+regLink :: (String -> String -> Inlines -> Inlines)
+ -> F Inlines -> Parser [Char] ParserState (F Inlines)
+regLink constructor lab = try $ do
+ (src, tit) <- source
+ return $ constructor src tit <$> lab
-- a link like [this][ref] or [this][] or [this]
-referenceLink :: [Inline]
- -> GenParser Char ParserState (String, [Char])
-referenceLink lab = do
- ref <- option [] (try (optional (char ' ') >>
- optional (newline >> skipSpaces) >> reference))
- let ref' = if null ref then lab else ref
- state <- getState
- case lookupKeySrc (stateKeys state) (toKey ref') of
- Nothing -> fail "no corresponding key"
- Just target -> return target
-
-autoLink :: GenParser Char ParserState Inline
+referenceLink :: (String -> String -> Inlines -> Inlines)
+ -> (F Inlines, String) -> Parser [Char] ParserState (F Inlines)
+referenceLink constructor (lab, raw) = do
+ raw' <- try (optional (char ' ') >>
+ optional (newline >> skipSpaces) >>
+ (snd <$> reference)) <|> return ""
+ let key = toKey $ if raw' == "[]" || raw' == "" then raw else raw'
+ let dropRB (']':xs) = xs
+ dropRB xs = xs
+ let dropLB ('[':xs) = xs
+ dropLB xs = xs
+ let dropBrackets = reverse . dropRB . reverse . dropLB
+ fallback <- parseFromString (mconcat <$> many inline) $ dropBrackets raw
+ return $ do
+ keys <- asksF stateKeys
+ case M.lookup key keys of
+ Nothing -> (\x -> B.str "[" <> x <> B.str "]" <> B.str raw') <$> fallback
+ Just (src,tit) -> constructor src tit <$> lab
+
+autoLink :: Parser [Char] ParserState (F Inlines)
autoLink = try $ do
char '<'
(orig, src) <- uri <|> emailAddress
char '>'
- st <- getState
- return $ if stateStrict st
- then Link [Str orig] (src, "")
- else Link [Code ("",["url"],[]) orig] (src, "")
+ (guardEnabled Ext_autolink_code_spans >>
+ return (return $ B.link src "" (B.codeWith ("",["url"],[]) orig)))
+ <|> return (return $ B.link src "" (B.str orig))
-image :: GenParser Char ParserState Inline
+image :: Parser [Char] ParserState (F Inlines)
image = try $ do
char '!'
- lab <- reference
- (src, tit) <- source <|> referenceLink lab
- return $ Image lab (src,tit)
+ (lab,raw) <- reference
+ regLink B.image lab <|> referenceLink B.image (lab,raw)
-note :: GenParser Char ParserState Inline
+note :: Parser [Char] ParserState (F Inlines)
note = try $ do
- failIfStrict
+ guardEnabled Ext_footnotes
ref <- noteMarker
- state <- getState
- let notes = stateNotes state
- case lookup ref notes of
- Nothing -> fail "note not found"
- Just raw -> do
- -- We temporarily empty the note list while parsing the note,
- -- so that we don't get infinite loops with notes inside notes...
- -- Note references inside other notes do not work.
- updateState $ \st -> st{ stateNotes = [] }
- contents <- parseFromString parseBlocks raw
- updateState $ \st -> st{ stateNotes = notes }
- return $ Note contents
-
-inlineNote :: GenParser Char ParserState Inline
+ return $ do
+ notes <- asksF stateNotes'
+ case lookup ref notes of
+ Nothing -> return $ B.str $ "[^" ++ ref ++ "]"
+ Just contents -> do
+ st <- askF
+ -- process the note in a context that doesn't resolve
+ -- notes, to avoid infinite looping with notes inside
+ -- notes:
+ let contents' = runF contents st{ stateNotes' = [] }
+ return $ B.note contents'
+
+inlineNote :: Parser [Char] ParserState (F Inlines)
inlineNote = try $ do
- failIfStrict
+ guardEnabled Ext_inline_notes
char '^'
- contents <- inlinesInBalancedBrackets inline
- return $ Note [Para contents]
+ contents <- inlinesInBalancedBrackets
+ return $ B.note . B.para <$> contents
-rawLaTeXInline' :: GenParser Char ParserState Inline
+rawLaTeXInline' :: Parser [Char] ParserState (F Inlines)
rawLaTeXInline' = try $ do
- failIfStrict
+ guardEnabled Ext_raw_tex
lookAhead $ char '\\' >> notFollowedBy' (string "start") -- context env
RawInline _ s <- rawLaTeXInline
- return $ RawInline "tex" s -- "tex" because it might be context or latex
+ return $ return $ B.rawInline "tex" s
+ -- "tex" because it might be context or latex
-rawConTeXtEnvironment :: GenParser Char st String
+rawConTeXtEnvironment :: Parser [Char] st String
rawConTeXtEnvironment = try $ do
string "\\start"
completion <- inBrackets (letter <|> digit <|> spaceChar)
@@ -1274,37 +1536,33 @@ rawConTeXtEnvironment = try $ do
(try $ string "\\stop" >> string completion)
return $ "\\start" ++ completion ++ concat contents ++ "\\stop" ++ completion
-inBrackets :: (GenParser Char st Char) -> GenParser Char st String
+inBrackets :: (Parser [Char] st Char) -> Parser [Char] st String
inBrackets parser = do
char '['
contents <- many parser
char ']'
return $ "[" ++ contents ++ "]"
-rawHtmlInline :: GenParser Char ParserState Inline
+rawHtmlInline :: Parser [Char] ParserState (F Inlines)
rawHtmlInline = do
- st <- getState
- (_,result) <- if stateStrict st
- then htmlTag (not . isTextTag)
- else htmlTag isInlineTag
- return $ RawInline "html" result
+ guardEnabled Ext_raw_html
+ mdInHtml <- option False $
+ guardEnabled Ext_markdown_in_html_blocks >> return True
+ (_,result) <- if mdInHtml
+ then htmlTag isInlineTag
+ else htmlTag (not . isTextTag)
+ return $ return $ B.rawInline "html" result
-- Citations
-cite :: GenParser Char ParserState Inline
+cite :: Parser [Char] ParserState (F Inlines)
cite = do
- failIfStrict
+ guardEnabled Ext_citations
+ getOption readerCitations >>= guard . not . null
citations <- textualCite <|> normalCite
- return $ Cite citations []
+ return $ flip B.cite mempty <$> citations
-spnl :: GenParser Char st ()
-spnl = try $ do
- skipSpaces
- optional newline
- skipSpaces
- notFollowedBy (char '\n')
-
-textualCite :: GenParser Char ParserState [Citation]
+textualCite :: Parser [Char] ParserState (F [Citation])
textualCite = try $ do
(_, key) <- citeKey
let first = Citation{ citationId = key
@@ -1314,22 +1572,25 @@ textualCite = try $ do
, citationNoteNum = 0
, citationHash = 0
}
- rest <- option [] $ try $ spnl >> normalCite
- if null rest
- then option [first] $ bareloc first
- else return $ first : rest
+ mbrest <- option Nothing $ try $ spnl >> Just <$> normalCite
+ case mbrest of
+ Just rest -> return $ (first:) <$> rest
+ Nothing -> option (return [first]) $ bareloc first
-bareloc :: Citation -> GenParser Char ParserState [Citation]
+bareloc :: Citation -> Parser [Char] ParserState (F [Citation])
bareloc c = try $ do
spnl
char '['
suff <- suffix
- rest <- option [] $ try $ char ';' >> citeList
+ rest <- option (return []) $ try $ char ';' >> citeList
spnl
char ']'
- return $ c{ citationSuffix = suff } : rest
+ return $ do
+ suff' <- suff
+ rest' <- rest
+ return $ c{ citationSuffix = B.toList suff' } : rest'
-normalCite :: GenParser Char ParserState [Citation]
+normalCite :: Parser [Char] ParserState (F [Citation])
normalCite = try $ do
char '['
spnl
@@ -1338,7 +1599,7 @@ normalCite = try $ do
char ']'
return citations
-citeKey :: GenParser Char ParserState (Bool, String)
+citeKey :: Parser [Char] ParserState (Bool, String)
citeKey = try $ do
suppress_author <- option False (char '-' >> return True)
char '@'
@@ -1346,34 +1607,37 @@ citeKey = try $ do
let internal p = try $ p >>~ lookAhead (letter <|> digit)
rest <- many $ letter <|> digit <|> internal (oneOf ":.#$%&-_?<>~")
let key = first:rest
- st <- getState
- guard $ key `elem` stateCitations st
+ citations' <- getOption readerCitations
+ guard $ key `elem` citations'
return (suppress_author, key)
-suffix :: GenParser Char ParserState [Inline]
+suffix :: Parser [Char] ParserState (F Inlines)
suffix = try $ do
hasSpace <- option False (notFollowedBy nonspaceChar >> return True)
spnl
- rest <- liftM normalizeSpaces $ many $ notFollowedBy (oneOf ";]") >> inline
+ rest <- trimInlinesF . mconcat <$> many (notFollowedBy (oneOf ";]") >> inline)
return $ if hasSpace
- then Space : rest
+ then (B.space <>) <$> rest
else rest
-prefix :: GenParser Char ParserState [Inline]
-prefix = liftM normalizeSpaces $
+prefix :: Parser [Char] ParserState (F Inlines)
+prefix = trimInlinesF . mconcat <$>
manyTill inline (char ']' <|> liftM (const ']') (lookAhead citeKey))
-citeList :: GenParser Char ParserState [Citation]
-citeList = sepBy1 citation (try $ char ';' >> spnl)
+citeList :: Parser [Char] ParserState (F [Citation])
+citeList = fmap sequence $ sepBy1 citation (try $ char ';' >> spnl)
-citation :: GenParser Char ParserState Citation
+citation :: Parser [Char] ParserState (F Citation)
citation = try $ do
pref <- prefix
(suppress_author, key) <- citeKey
suff <- suffix
- return $ Citation{ citationId = key
- , citationPrefix = pref
- , citationSuffix = suff
+ return $ do
+ x <- pref
+ y <- suff
+ return $ Citation{ citationId = key
+ , citationPrefix = B.toList x
+ , citationSuffix = B.toList y
, citationMode = if suppress_author
then SuppressAuthor
else NormalCitation
@@ -1381,3 +1645,22 @@ citation = try $ do
, citationHash = 0
}
+smart :: Parser [Char] ParserState (F Inlines)
+smart = do
+ getOption readerSmart >>= guard
+ doubleQuoted <|> singleQuoted <|>
+ choice (map (return . B.singleton <$>) [apostrophe, dash, ellipses])
+
+singleQuoted :: Parser [Char] ParserState (F Inlines)
+singleQuoted = try $ do
+ singleQuoteStart
+ withQuoteContext InSingleQuote $
+ fmap B.singleQuoted . trimInlinesF . mconcat <$>
+ many1Till inline singleQuoteEnd
+
+doubleQuoted :: Parser [Char] ParserState (F Inlines)
+doubleQuoted = try $ do
+ doubleQuoteStart
+ withQuoteContext InDoubleQuote $
+ fmap B.doubleQuoted . trimInlinesF . mconcat <$>
+ many1Till inline doubleQuoteEnd
diff --git a/src/Text/Pandoc/Readers/Native.hs b/src/Text/Pandoc/Readers/Native.hs
index 2c6fcc6e6..a0e5a0635 100644
--- a/src/Text/Pandoc/Readers/Native.hs
+++ b/src/Text/Pandoc/Readers/Native.hs
@@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Readers.Native
Copyright : Copyright (C) 2011 John MacFarlane
- License : GNU GPL, version 2 or above
+ License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
Stability : alpha
@@ -31,6 +31,7 @@ Conversion of a string representation of a pandoc type (@Pandoc@,
module Text.Pandoc.Readers.Native ( readNative ) where
import Text.Pandoc.Definition
+import Text.Pandoc.Shared (safeRead)
nullMeta :: Meta
nullMeta = Meta{ docTitle = []
@@ -51,31 +52,31 @@ nullMeta = Meta{ docTitle = []
readNative :: String -- ^ String to parse (assuming @'\n'@ line endings)
-> Pandoc
readNative s =
- case reads s of
- (d,_):_ -> d
- [] -> Pandoc nullMeta $ readBlocks s
+ case safeRead s of
+ Just d -> d
+ Nothing -> Pandoc nullMeta $ readBlocks s
readBlocks :: String -> [Block]
readBlocks s =
- case reads s of
- (d,_):_ -> d
- [] -> [readBlock s]
+ case safeRead s of
+ Just d -> d
+ Nothing -> [readBlock s]
readBlock :: String -> Block
readBlock s =
- case reads s of
- (d,_):_ -> d
- [] -> Plain $ readInlines s
+ case safeRead s of
+ Just d -> d
+ Nothing -> Plain $ readInlines s
readInlines :: String -> [Inline]
readInlines s =
- case reads s of
- (d,_):_ -> d
- [] -> [readInline s]
+ case safeRead s of
+ Just d -> d
+ Nothing -> [readInline s]
readInline :: String -> Inline
readInline s =
- case reads s of
- (d,_):_ -> d
- [] -> error "Cannot parse document"
+ case safeRead s of
+ Just d -> d
+ Nothing -> error "Cannot parse document"
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index d1010a736..9fb976903 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -17,9 +17,9 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-}
{- |
- Module : Text.Pandoc.Readers.RST
+ Module : Text.Pandoc.Readers.RST
Copyright : Copyright (C) 2006-2010 John MacFarlane
- License : GNU GPL, version 2 or above
+ License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
Stability : alpha
@@ -27,24 +27,24 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Conversion from reStructuredText to 'Pandoc' document.
-}
-module Text.Pandoc.Readers.RST (
+module Text.Pandoc.Readers.RST (
readRST
) where
import Text.Pandoc.Definition
import Text.Pandoc.Shared
import Text.Pandoc.Parsing
-import Text.ParserCombinators.Parsec
-import Control.Monad ( when, liftM )
+import Text.Pandoc.Options
+import Control.Monad ( when, liftM, guard, mzero )
import Data.List ( findIndex, intercalate, transpose, sort, deleteFirstsBy )
import qualified Data.Map as M
import Text.Printf ( printf )
import Data.Maybe ( catMaybes )
-- | Parse reStructuredText string and return Pandoc document.
-readRST :: ParserState -- ^ Parser state, including options for parser
- -> String -- ^ String to parse (assuming @'\n'@ line endings)
+readRST :: ReaderOptions -- ^ Reader options
+ -> String -- ^ String to parse (assuming @'\n'@ line endings)
-> Pandoc
-readRST state s = (readWith parseRST) state (s ++ "\n\n")
+readRST opts s = (readWith parseRST) def{ stateOptions = opts } (s ++ "\n\n")
--
-- Constants and data structure definitions
@@ -58,7 +58,7 @@ underlineChars = "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
-- treat these as potentially non-text when parsing inline:
specialChars :: [Char]
-specialChars = "\\`|*_<>$:[]()-.\"'\8216\8217\8220\8221"
+specialChars = "\\`|*_<>$:/[]{}()-.\"'\8216\8217\8220\8221"
--
-- parsing documents
@@ -71,14 +71,14 @@ isHeader _ _ = False
-- | Promote all headers in a list of blocks. (Part of
-- title transformation for RST.)
promoteHeaders :: Int -> [Block] -> [Block]
-promoteHeaders num ((Header level text):rest) =
+promoteHeaders num ((Header level text):rest) =
(Header (level - num) text):(promoteHeaders num rest)
promoteHeaders num (other:rest) = other:(promoteHeaders num rest)
promoteHeaders _ [] = []
-- | If list of blocks starts with a header (or a header and subheader)
-- of level that are not found elsewhere, return it as a title and
--- promote all the other headers.
+-- promote all the other headers.
titleTransform :: [Block] -- ^ list of blocks
-> ([Block], [Inline]) -- ^ modified list of blocks, title
titleTransform ((Header 1 head1):(Header 2 head2):rest) |
@@ -89,7 +89,7 @@ titleTransform ((Header 1 head1):rest) |
(promoteHeaders 1 rest, head1)
titleTransform blocks = (blocks, [])
-parseRST :: GenParser Char ParserState Pandoc
+parseRST :: Parser [Char] ParserState Pandoc
parseRST = do
optional blanklines -- skip blank lines at beginning of file
startPos <- getPosition
@@ -103,12 +103,13 @@ parseRST = do
let reversedNotes = stateNotes st'
updateState $ \s -> s { stateNotes = reverse reversedNotes }
-- now parse it for real...
- blocks <- parseBlocks
+ blocks <- parseBlocks
let blocks' = filter (/= Null) blocks
- state <- getState
- let (blocks'', title) = if stateStandalone state
+ standalone <- getOption readerStandalone
+ let (blocks'', title) = if standalone
then titleTransform blocks'
else (blocks', [])
+ state <- getState
let authors = stateAuthors state
let date = stateDate state
let title' = if (null title) then (stateTitle state) else title
@@ -118,10 +119,10 @@ parseRST = do
-- parsing blocks
--
-parseBlocks :: GenParser Char ParserState [Block]
+parseBlocks :: Parser [Char] ParserState [Block]
parseBlocks = manyTill block eof
-block :: GenParser Char ParserState Block
+block :: Parser [Char] ParserState Block
block = choice [ codeBlock
, rawBlock
, blockQuote
@@ -146,7 +147,7 @@ block = choice [ codeBlock
-- field list
--
-rawFieldListItem :: String -> GenParser Char ParserState (String, String)
+rawFieldListItem :: String -> Parser [Char] ParserState (String, String)
rawFieldListItem indent = try $ do
string indent
char ':'
@@ -160,7 +161,7 @@ rawFieldListItem indent = try $ do
return (name, raw)
fieldListItem :: String
- -> GenParser Char ParserState (Maybe ([Inline], [[Block]]))
+ -> Parser [Char] ParserState (Maybe ([Inline], [[Block]]))
fieldListItem indent = try $ do
(name, raw) <- rawFieldListItem indent
let term = [Str name]
@@ -187,7 +188,7 @@ extractContents [Plain auth] = auth
extractContents [Para auth] = auth
extractContents _ = []
-fieldList :: GenParser Char ParserState Block
+fieldList :: Parser [Char] ParserState Block
fieldList = try $ do
indent <- lookAhead $ many spaceChar
items <- many1 $ fieldListItem indent
@@ -199,7 +200,7 @@ fieldList = try $ do
-- line block
--
-lineBlockLine :: GenParser Char ParserState [Inline]
+lineBlockLine :: Parser [Char] ParserState [Inline]
lineBlockLine = try $ do
char '|'
char ' ' <|> lookAhead (char '\n')
@@ -210,7 +211,7 @@ lineBlockLine = try $ do
then normalizeSpaces line
else Str white : normalizeSpaces line
-lineBlock :: GenParser Char ParserState Block
+lineBlock :: Parser [Char] ParserState Block
lineBlock = try $ do
lines' <- many1 lineBlockLine
blanklines
@@ -220,14 +221,14 @@ lineBlock = try $ do
-- paragraph block
--
-para :: GenParser Char ParserState Block
+para :: Parser [Char] ParserState Block
para = paraBeforeCodeBlock <|> paraNormal <?> "paragraph"
-codeBlockStart :: GenParser Char st Char
+codeBlockStart :: Parser [Char] st Char
codeBlockStart = string "::" >> blankline >> blankline
-- paragraph that ends in a :: starting a code block
-paraBeforeCodeBlock :: GenParser Char ParserState Block
+paraBeforeCodeBlock :: Parser [Char] ParserState Block
paraBeforeCodeBlock = try $ do
result <- many1 (notFollowedBy' codeBlockStart >> inline)
lookAhead (string "::")
@@ -236,21 +237,21 @@ paraBeforeCodeBlock = try $ do
else (normalizeSpaces result) ++ [Str ":"]
-- regular paragraph
-paraNormal :: GenParser Char ParserState Block
-paraNormal = try $ do
+paraNormal :: Parser [Char] ParserState Block
+paraNormal = try $ do
result <- many1 inline
newline
blanklines
return $ Para $ normalizeSpaces result
-plain :: GenParser Char ParserState Block
-plain = many1 inline >>= return . Plain . normalizeSpaces
+plain :: Parser [Char] ParserState Block
+plain = many1 inline >>= return . Plain . normalizeSpaces
--
-- image block
--
-imageBlock :: GenParser Char ParserState Block
+imageBlock :: Parser [Char] ParserState Block
imageBlock = try $ do
string ".. image:: "
src <- manyTill anyChar newline
@@ -265,11 +266,11 @@ imageBlock = try $ do
-- header blocks
--
-header :: GenParser Char ParserState Block
+header :: Parser [Char] ParserState Block
header = doubleHeader <|> singleHeader <?> "header"
-- a header with lines on top and bottom
-doubleHeader :: GenParser Char ParserState Block
+doubleHeader :: Parser [Char] ParserState Block
doubleHeader = try $ do
c <- oneOf underlineChars
rest <- many (char c) -- the top line
@@ -283,7 +284,7 @@ doubleHeader = try $ do
blankline -- spaces and newline
count lenTop (char c) -- the bottom line
blanklines
- -- check to see if we've had this kind of header before.
+ -- check to see if we've had this kind of header before.
-- if so, get appropriate level. if not, add to list.
state <- getState
let headerTable = stateHeaderTable state
@@ -294,8 +295,8 @@ doubleHeader = try $ do
return $ Header level (normalizeSpaces txt)
-- a header with line on the bottom only
-singleHeader :: GenParser Char ParserState Block
-singleHeader = try $ do
+singleHeader :: Parser [Char] ParserState Block
+singleHeader = try $ do
notFollowedBy' whitespace
txt <- many1 (do {notFollowedBy blankline; inline})
pos <- getPosition
@@ -317,7 +318,7 @@ singleHeader = try $ do
-- hrule block
--
-hrule :: GenParser Char st Block
+hrule :: Parser [Char] st Block
hrule = try $ do
chr <- oneOf underlineChars
count 3 (char chr)
@@ -331,14 +332,14 @@ hrule = try $ do
--
-- read a line indented by a given string
-indentedLine :: String -> GenParser Char st [Char]
+indentedLine :: String -> Parser [Char] st [Char]
indentedLine indents = try $ do
string indents
manyTill anyChar newline
-- one or more indented lines, possibly separated by blank lines.
-- any amount of indentation will work.
-indentedBlock :: GenParser Char st [Char]
+indentedBlock :: Parser [Char] st [Char]
indentedBlock = try $ do
indents <- lookAhead $ many1 spaceChar
lns <- many1 $ try $ do b <- option "" blanklines
@@ -347,7 +348,7 @@ indentedBlock = try $ do
optional blanklines
return $ unlines lns
-codeBlock :: GenParser Char st Block
+codeBlock :: Parser [Char] st Block
codeBlock = try $ do
codeBlockStart
result <- indentedBlock
@@ -355,7 +356,7 @@ codeBlock = try $ do
-- | The 'code-block' directive (from Sphinx) that allows a language to be
-- specified.
-customCodeBlock :: GenParser Char st Block
+customCodeBlock :: Parser [Char] st Block
customCodeBlock = try $ do
string ".. code-block:: "
language <- manyTill anyChar newline
@@ -364,7 +365,7 @@ customCodeBlock = try $ do
return $ CodeBlock ("", ["sourceCode", language], []) $ stripTrailingNewlines result
-figureBlock :: GenParser Char ParserState Block
+figureBlock :: Parser [Char] ParserState Block
figureBlock = try $ do
string ".. figure::"
src <- removeLeadingTrailingSpace `fmap` manyTill anyChar newline
@@ -372,24 +373,24 @@ figureBlock = try $ do
caption <- parseFromString extractCaption body
return $ Para [Image caption (src,"")]
-extractCaption :: GenParser Char ParserState [Inline]
+extractCaption :: Parser [Char] ParserState [Inline]
extractCaption = try $ do
manyTill anyLine blanklines
many inline
-- | The 'math' directive (from Sphinx) for display math.
-mathBlock :: GenParser Char st Block
+mathBlock :: Parser [Char] st Block
mathBlock = try $ do
string ".. math::"
mathBlockMultiline <|> mathBlockOneLine
-mathBlockOneLine :: GenParser Char st Block
+mathBlockOneLine :: Parser [Char] st Block
mathBlockOneLine = try $ do
result <- manyTill anyChar newline
blanklines
return $ Para [Math DisplayMath $ removeLeadingTrailingSpace result]
-mathBlockMultiline :: GenParser Char st Block
+mathBlockMultiline :: Parser [Char] st Block
mathBlockMultiline = try $ do
blanklines
result <- indentedBlock
@@ -404,9 +405,9 @@ mathBlockMultiline = try $ do
$ filter (not . null) $ splitBy null lns'
return $ Para $ map (Math DisplayMath) eqs
-lhsCodeBlock :: GenParser Char ParserState Block
+lhsCodeBlock :: Parser [Char] ParserState Block
lhsCodeBlock = try $ do
- failUnlessLHS
+ guardEnabled Ext_literate_haskell
optional codeBlockStart
pos <- getPosition
when (sourceColumn pos /= 1) $ fail "Not in first column"
@@ -418,7 +419,7 @@ lhsCodeBlock = try $ do
blanklines
return $ CodeBlock ("", ["sourceCode", "literate", "haskell"], []) $ intercalate "\n" lns'
-birdTrackLine :: GenParser Char st [Char]
+birdTrackLine :: Parser [Char] st [Char]
birdTrackLine = do
char '>'
manyTill anyChar newline
@@ -427,7 +428,7 @@ birdTrackLine = do
-- raw html/latex/etc
--
-rawBlock :: GenParser Char st Block
+rawBlock :: Parser [Char] st Block
rawBlock = try $ do
string ".. raw:: "
lang <- many1 (letter <|> digit)
@@ -439,7 +440,7 @@ rawBlock = try $ do
-- block quotes
--
-blockQuote :: GenParser Char ParserState Block
+blockQuote :: Parser [Char] ParserState Block
blockQuote = do
raw <- indentedBlock
-- parse the extracted block, which may contain various block elements:
@@ -450,10 +451,10 @@ blockQuote = do
-- list blocks
--
-list :: GenParser Char ParserState Block
+list :: Parser [Char] ParserState Block
list = choice [ bulletList, orderedList, definitionList ] <?> "list"
-definitionListItem :: GenParser Char ParserState ([Inline], [[Block]])
+definitionListItem :: Parser [Char] ParserState ([Inline], [[Block]])
definitionListItem = try $ do
-- avoid capturing a directive or comment
notFollowedBy (try $ char '.' >> char '.')
@@ -463,11 +464,11 @@ definitionListItem = try $ do
contents <- parseFromString parseBlocks $ raw ++ "\n"
return (normalizeSpaces term, [contents])
-definitionList :: GenParser Char ParserState Block
+definitionList :: Parser [Char] ParserState Block
definitionList = many1 definitionListItem >>= return . DefinitionList
-- parses bullet list start and returns its length (inc. following whitespace)
-bulletListStart :: GenParser Char st Int
+bulletListStart :: Parser [Char] st Int
bulletListStart = try $ do
notFollowedBy' hrule -- because hrules start out just like lists
marker <- oneOf bulletListMarkers
@@ -477,14 +478,14 @@ bulletListStart = try $ do
-- parses ordered list start and returns its length (inc following whitespace)
orderedListStart :: ListNumberStyle
-> ListNumberDelim
- -> GenParser Char ParserState Int
+ -> Parser [Char] ParserState Int
orderedListStart style delim = try $ do
(_, markerLen) <- withHorizDisplacement (orderedListMarker style delim)
white <- many1 spaceChar
return $ markerLen + length white
-- parse a line of a list item
-listLine :: Int -> GenParser Char ParserState [Char]
+listLine :: Int -> Parser [Char] ParserState [Char]
listLine markerLength = try $ do
notFollowedBy blankline
indentWith markerLength
@@ -492,36 +493,35 @@ listLine markerLength = try $ do
return $ line ++ "\n"
-- indent by specified number of spaces (or equiv. tabs)
-indentWith :: Int -> GenParser Char ParserState [Char]
+indentWith :: Int -> Parser [Char] ParserState [Char]
indentWith num = do
- state <- getState
- let tabStop = stateTabStop state
+ tabStop <- getOption readerTabStop
if (num < tabStop)
then count num (char ' ')
- else choice [ try (count num (char ' ')),
- (try (char '\t' >> count (num - tabStop) (char ' '))) ]
+ else choice [ try (count num (char ' ')),
+ (try (char '\t' >> count (num - tabStop) (char ' '))) ]
-- parse raw text for one list item, excluding start marker and continuations
-rawListItem :: GenParser Char ParserState Int
- -> GenParser Char ParserState (Int, [Char])
+rawListItem :: Parser [Char] ParserState Int
+ -> Parser [Char] ParserState (Int, [Char])
rawListItem start = try $ do
markerLength <- start
firstLine <- manyTill anyChar newline
restLines <- many (listLine markerLength)
return (markerLength, (firstLine ++ "\n" ++ (concat restLines)))
--- continuation of a list item - indented and separated by blankline or
--- (in compact lists) endline.
+-- continuation of a list item - indented and separated by blankline or
+-- (in compact lists) endline.
-- Note: nested lists are parsed as continuations.
-listContinuation :: Int -> GenParser Char ParserState [Char]
+listContinuation :: Int -> Parser [Char] ParserState [Char]
listContinuation markerLength = try $ do
blanks <- many1 blankline
result <- many1 (listLine markerLength)
return $ blanks ++ concat result
-listItem :: GenParser Char ParserState Int
- -> GenParser Char ParserState [Block]
-listItem start = try $ do
+listItem :: Parser [Char] ParserState Int
+ -> Parser [Char] ParserState [Block]
+listItem start = try $ do
(markerLength, first) <- rawListItem start
rest <- many (listContinuation markerLength)
blanks <- choice [ try (many blankline >>~ lookAhead start),
@@ -537,22 +537,22 @@ listItem start = try $ do
updateState (\st -> st {stateParserContext = oldContext})
return parsed
-orderedList :: GenParser Char ParserState Block
+orderedList :: Parser [Char] ParserState Block
orderedList = try $ do
(start, style, delim) <- lookAhead (anyOrderedListMarker >>~ spaceChar)
items <- many1 (listItem (orderedListStart style delim))
let items' = compactify items
return $ OrderedList (start, style, delim) items'
-bulletList :: GenParser Char ParserState Block
-bulletList = many1 (listItem bulletListStart) >>=
+bulletList :: Parser [Char] ParserState Block
+bulletList = many1 (listItem bulletListStart) >>=
return . BulletList . compactify
--
-- default-role block
--
-defaultRoleBlock :: GenParser Char ParserState Block
+defaultRoleBlock :: Parser [Char] ParserState Block
defaultRoleBlock = try $ do
string ".. default-role::"
-- doesn't enforce any restrictions on the role name; embedded spaces shouldn't be allowed, for one
@@ -570,7 +570,7 @@ defaultRoleBlock = try $ do
-- unknown directive (e.g. comment)
--
-unknownDirective :: GenParser Char st Block
+unknownDirective :: Parser [Char] st Block
unknownDirective = try $ do
string ".."
notFollowedBy (noneOf " \t\n")
@@ -582,7 +582,7 @@ unknownDirective = try $ do
--- note block
---
-noteBlock :: GenParser Char ParserState [Char]
+noteBlock :: Parser [Char] ParserState [Char]
noteBlock = try $ do
startPos <- getPosition
string ".."
@@ -601,7 +601,7 @@ noteBlock = try $ do
-- return blanks so line count isn't affected
return $ replicate (sourceLine endPos - sourceLine startPos) '\n'
-noteMarker :: GenParser Char ParserState [Char]
+noteMarker :: Parser [Char] ParserState [Char]
noteMarker = do
char '['
res <- many1 digit
@@ -614,13 +614,13 @@ noteMarker = do
-- reference key
--
-quotedReferenceName :: GenParser Char ParserState [Inline]
+quotedReferenceName :: Parser [Char] ParserState [Inline]
quotedReferenceName = try $ do
char '`' >> notFollowedBy (char '`') -- `` means inline code!
- label' <- many1Till inline (char '`')
+ label' <- many1Till inline (char '`')
return label'
-unquotedReferenceName :: GenParser Char ParserState [Inline]
+unquotedReferenceName :: Parser [Char] ParserState [Inline]
unquotedReferenceName = try $ do
label' <- many1Till inline (lookAhead $ char ':')
return label'
@@ -629,24 +629,24 @@ unquotedReferenceName = try $ do
-- plus isolated (no two adjacent) internal hyphens, underscores,
-- periods, colons and plus signs; no whitespace or other characters
-- are allowed.
-simpleReferenceName' :: GenParser Char st String
+simpleReferenceName' :: Parser [Char] st String
simpleReferenceName' = do
x <- alphaNum
xs <- many $ alphaNum
<|> (try $ oneOf "-_:+." >> lookAhead alphaNum)
return (x:xs)
-simpleReferenceName :: GenParser Char st [Inline]
+simpleReferenceName :: Parser [Char] st [Inline]
simpleReferenceName = do
raw <- simpleReferenceName'
return [Str raw]
-referenceName :: GenParser Char ParserState [Inline]
+referenceName :: Parser [Char] ParserState [Inline]
referenceName = quotedReferenceName <|>
(try $ simpleReferenceName >>~ lookAhead (char ':')) <|>
unquotedReferenceName
-referenceKey :: GenParser Char ParserState [Char]
+referenceKey :: Parser [Char] ParserState [Char]
referenceKey = do
startPos <- getPosition
(key, target) <- choice [imageKey, anonymousKey, regularKey]
@@ -658,38 +658,43 @@ referenceKey = do
-- return enough blanks to replace key
return $ replicate (sourceLine endPos - sourceLine startPos) '\n'
-targetURI :: GenParser Char st [Char]
+targetURI :: Parser [Char] st [Char]
targetURI = do
skipSpaces
optional newline
- contents <- many1 (try (many spaceChar >> newline >>
+ contents <- many1 (try (many spaceChar >> newline >>
many1 spaceChar >> noneOf " \t\n") <|> noneOf "\n")
blanklines
return $ escapeURI $ removeLeadingTrailingSpace $ contents
-imageKey :: GenParser Char ParserState (Key, Target)
+imageKey :: Parser [Char] ParserState (Key, Target)
imageKey = try $ do
string ".. |"
- ref <- manyTill inline (char '|')
+ (_,ref) <- withRaw (manyTill inline (char '|'))
skipSpaces
string "image::"
src <- targetURI
- return (toKey (normalizeSpaces ref), (src, ""))
+ return (toKey $ init ref, (src, ""))
-anonymousKey :: GenParser Char st (Key, Target)
+anonymousKey :: Parser [Char] st (Key, Target)
anonymousKey = try $ do
oneOfStrings [".. __:", "__"]
src <- targetURI
pos <- getPosition
- return (toKey [Str $ "_" ++ printf "%09d" (sourceLine pos)], (src, ""))
+ return (toKey $ "_" ++ printf "%09d" (sourceLine pos), (src, ""))
-regularKey :: GenParser Char ParserState (Key, Target)
+stripTicks :: String -> String
+stripTicks = reverse . stripTick . reverse . stripTick
+ where stripTick ('`':xs) = xs
+ stripTick xs = xs
+
+regularKey :: Parser [Char] ParserState (Key, Target)
regularKey = try $ do
string ".. _"
- ref <- referenceName
+ (_,ref) <- withRaw referenceName
char ':'
src <- targetURI
- return (toKey (normalizeSpaces ref), (src, ""))
+ return (toKey $ stripTicks ref, (src, ""))
--
-- tables
@@ -702,37 +707,37 @@ regularKey = try $ do
-- Simple tables TODO:
-- - column spans
-- - multiline support
--- - ensure that rightmost column span does not need to reach end
+-- - ensure that rightmost column span does not need to reach end
-- - require at least 2 columns
--
-- Grid tables TODO:
-- - column spans
-dashedLine :: Char -> GenParser Char st (Int, Int)
+dashedLine :: Char -> Parser [Char] st (Int, Int)
dashedLine ch = do
dashes <- many1 (char ch)
sp <- many (char ' ')
return (length dashes, length $ dashes ++ sp)
-simpleDashedLines :: Char -> GenParser Char st [(Int,Int)]
+simpleDashedLines :: Char -> Parser [Char] st [(Int,Int)]
simpleDashedLines ch = try $ many1 (dashedLine ch)
-- Parse a table row separator
-simpleTableSep :: Char -> GenParser Char ParserState Char
+simpleTableSep :: Char -> Parser [Char] ParserState Char
simpleTableSep ch = try $ simpleDashedLines ch >> newline
-- Parse a table footer
-simpleTableFooter :: GenParser Char ParserState [Char]
+simpleTableFooter :: Parser [Char] ParserState [Char]
simpleTableFooter = try $ simpleTableSep '=' >> blanklines
-- Parse a raw line and split it into chunks by indices.
-simpleTableRawLine :: [Int] -> GenParser Char ParserState [String]
+simpleTableRawLine :: [Int] -> Parser [Char] ParserState [String]
simpleTableRawLine indices = do
line <- many1Till anyChar newline
return (simpleTableSplitLine indices line)
-- Parse a table row and return a list of blocks (columns).
-simpleTableRow :: [Int] -> GenParser Char ParserState [[Block]]
+simpleTableRow :: [Int] -> Parser [Char] ParserState [[Block]]
simpleTableRow indices = do
notFollowedBy' simpleTableFooter
firstLine <- simpleTableRawLine indices
@@ -745,8 +750,8 @@ simpleTableSplitLine indices line =
map removeLeadingTrailingSpace
$ tail $ splitByIndices (init indices) line
-simpleTableHeader :: Bool -- ^ Headerless table
- -> GenParser Char ParserState ([[Block]], [Alignment], [Int])
+simpleTableHeader :: Bool -- ^ Headerless table
+ -> Parser [Char] ParserState ([[Block]], [Alignment], [Int])
simpleTableHeader headless = try $ do
optional blanklines
rawContent <- if headless
@@ -766,28 +771,28 @@ simpleTableHeader headless = try $ do
-- Parse a simple table.
simpleTable :: Bool -- ^ Headerless table
- -> GenParser Char ParserState Block
+ -> Parser [Char] ParserState Block
simpleTable headless = do
- Table c a _w h l <- tableWith (simpleTableHeader headless) simpleTableRow sep simpleTableFooter (return [])
+ Table c a _w h l <- tableWith (simpleTableHeader headless) simpleTableRow sep simpleTableFooter
-- Simple tables get 0s for relative column widths (i.e., use default)
return $ Table c a (replicate (length a) 0) h l
where
sep = return () -- optional (simpleTableSep '-')
gridTable :: Bool -- ^ Headerless table
- -> GenParser Char ParserState Block
-gridTable = gridTableWith block (return [])
+ -> Parser [Char] ParserState Block
+gridTable = gridTableWith parseBlocks
-table :: GenParser Char ParserState Block
+table :: Parser [Char] ParserState Block
table = gridTable False <|> simpleTable False <|>
gridTable True <|> simpleTable True <?> "table"
- --
+ --
-- inline
--
-inline :: GenParser Char ParserState Inline
+inline :: Parser [Char] ParserState Inline
inline = choice [ whitespace
, link
, str
@@ -805,44 +810,53 @@ inline = choice [ whitespace
, escapedChar
, symbol ] <?> "inline"
-hyphens :: GenParser Char ParserState Inline
+hyphens :: Parser [Char] ParserState Inline
hyphens = do
result <- many1 (char '-')
- option Space endline
+ option Space endline
-- don't want to treat endline after hyphen or dash as a space
return $ Str result
-escapedChar :: GenParser Char st Inline
+escapedChar :: Parser [Char] st Inline
escapedChar = do c <- escaped anyChar
return $ if c == ' ' -- '\ ' is null in RST
then Str ""
else Str [c]
-symbol :: GenParser Char ParserState Inline
-symbol = do
+symbol :: Parser [Char] ParserState Inline
+symbol = do
result <- oneOf specialChars
return $ Str [result]
-- parses inline code, between codeStart and codeEnd
-code :: GenParser Char ParserState Inline
-code = try $ do
+code :: Parser [Char] ParserState Inline
+code = try $ do
string "``"
result <- manyTill anyChar (try (string "``"))
return $ Code nullAttr
$ removeLeadingTrailingSpace $ intercalate " " $ lines result
-emph :: GenParser Char ParserState Inline
-emph = enclosed (char '*') (char '*') inline >>=
+-- succeeds only if we're not right after a str (ie. in middle of word)
+atStart :: Parser [Char] ParserState a -> Parser [Char] ParserState a
+atStart p = do
+ pos <- getPosition
+ st <- getState
+ -- single quote start can't be right after str
+ guard $ stateLastStrPos st /= Just pos
+ p
+
+emph :: Parser [Char] ParserState Inline
+emph = enclosed (atStart $ char '*') (char '*') inline >>=
return . Emph . normalizeSpaces
-strong :: GenParser Char ParserState Inline
-strong = enclosed (string "**") (try $ string "**") inline >>=
+strong :: Parser [Char] ParserState Inline
+strong = enclosed (atStart $ string "**") (try $ string "**") inline >>=
return . Strong . normalizeSpaces
-- Parses inline interpreted text which is required to have the given role.
-- This decision is based on the role marker (if present),
-- and the current default interpreted text role.
-interpreted :: [Char] -> GenParser Char ParserState [Char]
+interpreted :: [Char] -> Parser [Char] ParserState [Char]
interpreted role = try $ do
state <- getState
if role == stateRstDefaultRole state
@@ -856,30 +870,30 @@ interpreted role = try $ do
-- http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#inline-markup-recognition-rules
-- but it should be good enough for most purposes
unmarkedInterpretedText = do
- result <- enclosed (char '`') (char '`') anyChar
+ result <- enclosed (atStart $ char '`') (char '`') anyChar
return result
-superscript :: GenParser Char ParserState Inline
+superscript :: Parser [Char] ParserState Inline
superscript = interpreted "sup" >>= \x -> return (Superscript [Str x])
-subscript :: GenParser Char ParserState Inline
+subscript :: Parser [Char] ParserState Inline
subscript = interpreted "sub" >>= \x -> return (Subscript [Str x])
-math :: GenParser Char ParserState Inline
+math :: Parser [Char] ParserState Inline
math = interpreted "math" >>= \x -> return (Math InlineMath x)
-whitespace :: GenParser Char ParserState Inline
+whitespace :: Parser [Char] ParserState Inline
whitespace = many1 spaceChar >> return Space <?> "whitespace"
-str :: GenParser Char ParserState Inline
+str :: Parser [Char] ParserState Inline
str = do
- result <- many1 (noneOf (specialChars ++ "\t\n "))
- pos <- getPosition
- updateState $ \s -> s{ stateLastStrPos = Just pos }
+ let strChar = noneOf ("\t\n " ++ specialChars)
+ result <- many1 strChar
+ updateLastStrPos
return $ Str result
-- an endline character that can be treated as a space, not a structural break
-endline :: GenParser Char ParserState Inline
+endline :: Parser [Char] ParserState Inline
endline = try $ do
newline
notFollowedBy blankline
@@ -895,14 +909,14 @@ endline = try $ do
-- links
--
-link :: GenParser Char ParserState Inline
+link :: Parser [Char] ParserState Inline
link = choice [explicitLink, referenceLink, autoLink] <?> "link"
-explicitLink :: GenParser Char ParserState Inline
+explicitLink :: Parser [Char] ParserState Inline
explicitLink = try $ do
char '`'
notFollowedBy (char '`') -- `` marks start of inline code
- label' <- manyTill (notFollowedBy (char '`') >> inline)
+ label' <- manyTill (notFollowedBy (char '`') >> inline)
(try (spaces >> char '<'))
src <- manyTill (noneOf ">\n") (char '>')
skipSpaces
@@ -910,53 +924,53 @@ explicitLink = try $ do
return $ Link (normalizeSpaces label')
(escapeURI $ removeLeadingTrailingSpace src, "")
-referenceLink :: GenParser Char ParserState Inline
+referenceLink :: Parser [Char] ParserState Inline
referenceLink = try $ do
- label' <- (quotedReferenceName <|> simpleReferenceName) >>~ char '_'
+ (label',ref) <- withRaw (quotedReferenceName <|> simpleReferenceName) >>~
+ char '_'
state <- getState
let keyTable = stateKeys state
- let isAnonKey x = case fromKey x of
- [Str ('_':_)] -> True
- _ -> False
- key <- option (toKey label') $
+ let isAnonKey (Key ('_':_)) = True
+ isAnonKey _ = False
+ key <- option (toKey $ stripTicks ref) $
do char '_'
let anonKeys = sort $ filter isAnonKey $ M.keys keyTable
if null anonKeys
- then pzero
+ then mzero
else return (head anonKeys)
- (src,tit) <- case lookupKeySrc keyTable key of
+ (src,tit) <- case M.lookup key keyTable of
Nothing -> fail "no corresponding key"
Just target -> return target
-- if anonymous link, remove key so it won't be used again
when (isAnonKey key) $ updateState $ \s -> s{ stateKeys = M.delete key keyTable }
- return $ Link (normalizeSpaces label') (src, tit)
+ return $ Link (normalizeSpaces label') (src, tit)
-autoURI :: GenParser Char ParserState Inline
+autoURI :: Parser [Char] ParserState Inline
autoURI = do
(orig, src) <- uri
return $ Link [Str orig] (src, "")
-autoEmail :: GenParser Char ParserState Inline
+autoEmail :: Parser [Char] ParserState Inline
autoEmail = do
(orig, src) <- emailAddress
return $ Link [Str orig] (src, "")
-autoLink :: GenParser Char ParserState Inline
+autoLink :: Parser [Char] ParserState Inline
autoLink = autoURI <|> autoEmail
-- For now, we assume that all substitution references are for images.
-image :: GenParser Char ParserState Inline
+image :: Parser [Char] ParserState Inline
image = try $ do
char '|'
- ref <- manyTill inline (char '|')
+ (alt,ref) <- withRaw (manyTill inline (char '|'))
state <- getState
let keyTable = stateKeys state
- (src,tit) <- case lookupKeySrc keyTable (toKey ref) of
+ (src,tit) <- case M.lookup (toKey $ init ref) keyTable of
Nothing -> fail "no corresponding key"
Just target -> return target
- return $ Image (normalizeSpaces ref) (src, tit)
+ return $ Image (normalizeSpaces alt) (src, tit)
-note :: GenParser Char ParserState Inline
+note :: Parser [Char] ParserState Inline
note = try $ do
ref <- noteMarker
char '_'
diff --git a/src/Text/Pandoc/Readers/TeXMath.hs b/src/Text/Pandoc/Readers/TeXMath.hs
index 67dfe6753..fe49a992e 100644
--- a/src/Text/Pandoc/Readers/TeXMath.hs
+++ b/src/Text/Pandoc/Readers/TeXMath.hs
@@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Readers.TeXMath
Copyright : Copyright (C) 2007-2010 John MacFarlane
- License : GNU GPL, version 2 or above
+ License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
Stability : alpha
diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index 348900d38..89f281ae8 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Readers.Textile
Copyright : Copyright (C) 2010-2012 Paul Rivier and John MacFarlane
- License : GNU GPL, version 2 or above
+ License : GNU GPL, version 2 or above
Maintainer : Paul Rivier <paul*rivier#demotera*com>
Stability : alpha
@@ -56,29 +56,34 @@ TODO : refactor common patterns across readers :
module Text.Pandoc.Readers.Textile ( readTextile) where
import Text.Pandoc.Definition
-import Text.Pandoc.Shared
+import Text.Pandoc.Shared
+import Text.Pandoc.Options
import Text.Pandoc.Parsing
import Text.Pandoc.Readers.HTML ( htmlTag, isInlineTag, isBlockTag )
import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock )
-import Text.ParserCombinators.Parsec
import Text.HTML.TagSoup.Match
import Data.Char ( digitToInt, isUpper )
import Control.Monad ( guard, liftM )
import Control.Applicative ((<$>), (*>), (<*))
-- | Parse a Textile text and return a Pandoc document.
-readTextile :: ParserState -- ^ Parser state, including options for parser
- -> String -- ^ String to parse (assuming @'\n'@ line endings)
- -> Pandoc
-readTextile state s =
- (readWith parseTextile) state{ stateOldDashes = True } (s ++ "\n\n")
+readTextile :: ReaderOptions -- ^ Reader options
+ -> String -- ^ String to parse (assuming @'\n'@ line endings)
+ -> Pandoc
+readTextile opts s =
+ (readWith parseTextile) def{ stateOptions = opts } (s ++ "\n\n")
-- | Generate a Pandoc ADT from a textile document
-parseTextile :: GenParser Char ParserState Pandoc
+parseTextile :: Parser [Char] ParserState Pandoc
parseTextile = do
-- textile allows raw HTML and does smart punctuation by default
- updateState (\state -> state { stateParseRaw = True, stateSmart = True })
+ oldOpts <- stateOptions `fmap` getState
+ updateState $ \state -> state{ stateOptions =
+ oldOpts{ readerSmart = True
+ , readerParseRaw = True
+ , readerOldDashes = True
+ } }
many blankline
startPos <- getPosition
-- go through once just to get list of reference keys and notes
@@ -93,10 +98,10 @@ parseTextile = do
blocks <- parseBlocks
return $ Pandoc (Meta [] [] []) blocks -- FIXME
-noteMarker :: GenParser Char ParserState [Char]
+noteMarker :: Parser [Char] ParserState [Char]
noteMarker = skipMany spaceChar >> string "fn" >> manyTill digit (char '.')
-noteBlock :: GenParser Char ParserState [Char]
+noteBlock :: Parser [Char] ParserState [Char]
noteBlock = try $ do
startPos <- getPosition
ref <- noteMarker
@@ -111,11 +116,11 @@ noteBlock = try $ do
return $ replicate (sourceLine endPos - sourceLine startPos) '\n'
-- | Parse document blocks
-parseBlocks :: GenParser Char ParserState [Block]
+parseBlocks :: Parser [Char] ParserState [Block]
parseBlocks = manyTill block eof
-- | Block parsers list tried in definition order
-blockParsers :: [GenParser Char ParserState Block]
+blockParsers :: [Parser [Char] ParserState Block]
blockParsers = [ codeBlock
, header
, blockQuote
@@ -128,20 +133,20 @@ blockParsers = [ codeBlock
, nullBlock ]
-- | Any block in the order of definition of blockParsers
-block :: GenParser Char ParserState Block
+block :: Parser [Char] ParserState Block
block = choice blockParsers <?> "block"
-codeBlock :: GenParser Char ParserState Block
+codeBlock :: Parser [Char] ParserState Block
codeBlock = codeBlockBc <|> codeBlockPre
-codeBlockBc :: GenParser Char ParserState Block
+codeBlockBc :: Parser [Char] ParserState Block
codeBlockBc = try $ do
string "bc. "
contents <- manyTill anyLine blanklines
return $ CodeBlock ("",[],[]) $ unlines contents
-- | Code Blocks in Textile are between <pre> and </pre>
-codeBlockPre :: GenParser Char ParserState Block
+codeBlockPre :: Parser [Char] ParserState Block
codeBlockPre = try $ do
htmlTag (tagOpen (=="pre") null)
result' <- manyTill anyChar (try $ htmlTag (tagClose (=="pre")) >> blockBreak)
@@ -156,7 +161,7 @@ codeBlockPre = try $ do
return $ CodeBlock ("",[],[]) result'''
-- | Header of the form "hN. content" with N in 1..6
-header :: GenParser Char ParserState Block
+header :: Parser [Char] ParserState Block
header = try $ do
char 'h'
level <- digitToInt <$> oneOf "123456"
@@ -165,14 +170,14 @@ header = try $ do
return $ Header level name
-- | Blockquote of the form "bq. content"
-blockQuote :: GenParser Char ParserState Block
+blockQuote :: Parser [Char] ParserState Block
blockQuote = try $ do
string "bq" >> optional attributes >> char '.' >> whitespace
BlockQuote . singleton <$> para
-- Horizontal rule
-hrule :: GenParser Char st Block
+hrule :: Parser [Char] st Block
hrule = try $ do
skipSpaces
start <- oneOf "-*"
@@ -187,39 +192,39 @@ hrule = try $ do
-- | Can be a bullet list or an ordered list. This implementation is
-- strict in the nesting, sublist must start at exactly "parent depth
-- plus one"
-anyList :: GenParser Char ParserState Block
+anyList :: Parser [Char] ParserState Block
anyList = try $ ( (anyListAtDepth 1) <* blanklines )
-- | This allow one type of list to be nested into an other type,
-- provided correct nesting
-anyListAtDepth :: Int -> GenParser Char ParserState Block
+anyListAtDepth :: Int -> Parser [Char] ParserState Block
anyListAtDepth depth = choice [ bulletListAtDepth depth,
orderedListAtDepth depth,
definitionList ]
-- | Bullet List of given depth, depth being the number of leading '*'
-bulletListAtDepth :: Int -> GenParser Char ParserState Block
+bulletListAtDepth :: Int -> Parser [Char] ParserState Block
bulletListAtDepth depth = try $ BulletList <$> many1 (bulletListItemAtDepth depth)
-- | Bullet List Item of given depth, depth being the number of
-- leading '*'
-bulletListItemAtDepth :: Int -> GenParser Char ParserState [Block]
+bulletListItemAtDepth :: Int -> Parser [Char] ParserState [Block]
bulletListItemAtDepth = genericListItemAtDepth '*'
-- | Ordered List of given depth, depth being the number of
-- leading '#'
-orderedListAtDepth :: Int -> GenParser Char ParserState Block
+orderedListAtDepth :: Int -> Parser [Char] ParserState Block
orderedListAtDepth depth = try $ do
items <- many1 (orderedListItemAtDepth depth)
return (OrderedList (1, DefaultStyle, DefaultDelim) items)
-- | Ordered List Item of given depth, depth being the number of
-- leading '#'
-orderedListItemAtDepth :: Int -> GenParser Char ParserState [Block]
+orderedListItemAtDepth :: Int -> Parser [Char] ParserState [Block]
orderedListItemAtDepth = genericListItemAtDepth '#'
-- | Common implementation of list items
-genericListItemAtDepth :: Char -> Int -> GenParser Char ParserState [Block]
+genericListItemAtDepth :: Char -> Int -> Parser [Char] ParserState [Block]
genericListItemAtDepth c depth = try $ do
count depth (char c) >> optional attributes >> whitespace
p <- inlines
@@ -227,22 +232,22 @@ genericListItemAtDepth c depth = try $ do
return ((Plain p):sublist)
-- | A definition list is a set of consecutive definition items
-definitionList :: GenParser Char ParserState Block
+definitionList :: Parser [Char] ParserState Block
definitionList = try $ DefinitionList <$> many1 definitionListItem
-
+
-- | A definition list item in textile begins with '- ', followed by
-- the term defined, then spaces and ":=". The definition follows, on
-- the same single line, or spaned on multiple line, after a line
-- break.
-definitionListItem :: GenParser Char ParserState ([Inline], [[Block]])
+definitionListItem :: Parser [Char] ParserState ([Inline], [[Block]])
definitionListItem = try $ do
string "- "
term <- many1Till inline (try (whitespace >> string ":="))
- def <- inlineDef <|> multilineDef
- return (term, def)
- where inlineDef :: GenParser Char ParserState [[Block]]
+ def' <- inlineDef <|> multilineDef
+ return (term, def')
+ where inlineDef :: Parser [Char] ParserState [[Block]]
inlineDef = liftM (\d -> [[Plain d]]) $ try (whitespace >> inlines)
- multilineDef :: GenParser Char ParserState [[Block]]
+ multilineDef :: Parser [Char] ParserState [[Block]]
multilineDef = try $ do
optional whitespace >> newline
s <- many1Till anyChar (try (string "=:" >> newline))
@@ -252,76 +257,76 @@ definitionListItem = try $ do
-- | This terminates a block such as a paragraph. Because of raw html
-- blocks support, we have to lookAhead for a rawHtmlBlock.
-blockBreak :: GenParser Char ParserState ()
+blockBreak :: Parser [Char] ParserState ()
blockBreak = try (newline >> blanklines >> return ()) <|>
(lookAhead rawHtmlBlock >> return ())
-- raw content
-- | A raw Html Block, optionally followed by blanklines
-rawHtmlBlock :: GenParser Char ParserState Block
+rawHtmlBlock :: Parser [Char] ParserState Block
rawHtmlBlock = try $ do
(_,b) <- htmlTag isBlockTag
optional blanklines
return $ RawBlock "html" b
-- | Raw block of LaTeX content
-rawLaTeXBlock' :: GenParser Char ParserState Block
+rawLaTeXBlock' :: Parser [Char] ParserState Block
rawLaTeXBlock' = do
- failIfStrict
+ guardEnabled Ext_raw_tex
RawBlock "latex" <$> (rawLaTeXBlock <* spaces)
-- | In textile, paragraphs are separated by blank lines.
-para :: GenParser Char ParserState Block
+para :: Parser [Char] ParserState Block
para = try $ Para . normalizeSpaces <$> manyTill inline blockBreak
-- Tables
-
+
-- | A table cell spans until a pipe |
-tableCell :: GenParser Char ParserState TableCell
+tableCell :: Parser [Char] ParserState TableCell
tableCell = do
c <- many1 (noneOf "|\n")
content <- parseFromString (many1 inline) c
return $ [ Plain $ normalizeSpaces content ]
-- | A table row is made of many table cells
-tableRow :: GenParser Char ParserState [TableCell]
+tableRow :: Parser [Char] ParserState [TableCell]
tableRow = try $ ( char '|' *> (endBy1 tableCell (char '|')) <* newline)
-- | Many table rows
-tableRows :: GenParser Char ParserState [[TableCell]]
+tableRows :: Parser [Char] ParserState [[TableCell]]
tableRows = many1 tableRow
-- | Table headers are made of cells separated by a tag "|_."
-tableHeaders :: GenParser Char ParserState [TableCell]
+tableHeaders :: Parser [Char] ParserState [TableCell]
tableHeaders = let separator = (try $ string "|_.") in
try $ ( separator *> (sepBy1 tableCell separator) <* char '|' <* newline )
-
+
-- | A table with an optional header. Current implementation can
-- handle tables with and without header, but will parse cells
-- alignment attributes as content.
-table :: GenParser Char ParserState Block
+table :: Parser [Char] ParserState Block
table = try $ do
headers <- option [] tableHeaders
rows <- tableRows
blanklines
let nbOfCols = max (length headers) (length $ head rows)
- return $ Table []
+ return $ Table []
(replicate nbOfCols AlignDefault)
(replicate nbOfCols 0.0)
headers
rows
-
+
-- | Blocks like 'p' and 'table' do not need explicit block tag.
-- However, they can be used to set HTML/CSS attributes when needed.
maybeExplicitBlock :: String -- ^ block tag name
- -> GenParser Char ParserState Block -- ^ implicit block
- -> GenParser Char ParserState Block
+ -> Parser [Char] ParserState Block -- ^ implicit block
+ -> Parser [Char] ParserState Block
maybeExplicitBlock name blk = try $ do
- optional $ try $ string name >> optional attributes >> char '.' >>
+ optional $ try $ string name >> optional attributes >> char '.' >>
((try whitespace) <|> endline)
blk
@@ -333,15 +338,15 @@ maybeExplicitBlock name blk = try $ do
-- | Any inline element
-inline :: GenParser Char ParserState Inline
+inline :: Parser [Char] ParserState Inline
inline = choice inlineParsers <?> "inline"
-- | List of consecutive inlines before a newline
-inlines :: GenParser Char ParserState [Inline]
+inlines :: Parser [Char] ParserState [Inline]
inlines = manyTill inline newline
-- | Inline parsers tried in order
-inlineParsers :: [GenParser Char ParserState Inline]
+inlineParsers :: [Parser [Char] ParserState Inline]
inlineParsers = [ autoLink
, str
, whitespace
@@ -362,7 +367,7 @@ inlineParsers = [ autoLink
]
-- | Inline markups
-inlineMarkup :: GenParser Char ParserState Inline
+inlineMarkup :: Parser [Char] ParserState Inline
inlineMarkup = choice [ simpleInline (string "??") (Cite [])
, simpleInline (string "**") Strong
, simpleInline (string "__") Emph
@@ -375,29 +380,29 @@ inlineMarkup = choice [ simpleInline (string "??") (Cite [])
]
-- | Trademark, registered, copyright
-mark :: GenParser Char st Inline
+mark :: Parser [Char] st Inline
mark = try $ char '(' >> (try tm <|> try reg <|> copy)
-reg :: GenParser Char st Inline
+reg :: Parser [Char] st Inline
reg = do
oneOf "Rr"
char ')'
return $ Str "\174"
-tm :: GenParser Char st Inline
+tm :: Parser [Char] st Inline
tm = do
oneOf "Tt"
oneOf "Mm"
char ')'
return $ Str "\8482"
-copy :: GenParser Char st Inline
+copy :: Parser [Char] st Inline
copy = do
oneOf "Cc"
char ')'
return $ Str "\169"
-note :: GenParser Char ParserState Inline
+note :: Parser [Char] ParserState Inline
note = try $ do
ref <- (char '[' *> many1 digit <* char ']')
notes <- stateNotes <$> getState
@@ -405,7 +410,7 @@ note = try $ do
Nothing -> fail "note not found"
Just raw -> liftM Note $ parseFromString parseBlocks raw
--- | Special chars
+-- | Special chars
markupChars :: [Char]
markupChars = "\\[]*#_@~-+^|%="
@@ -421,17 +426,17 @@ wordBoundaries :: [Char]
wordBoundaries = markupChars ++ stringBreakers
-- | Parse a hyphened sequence of words
-hyphenedWords :: GenParser Char ParserState String
+hyphenedWords :: Parser [Char] ParserState String
hyphenedWords = try $ do
hd <- noneOf wordBoundaries
- tl <- many ( (noneOf wordBoundaries) <|>
+ tl <- many ( (noneOf wordBoundaries) <|>
try (oneOf markupChars <* lookAhead (noneOf wordBoundaries) ) )
let wd = hd:tl
- option wd $ try $
+ option wd $ try $
(\r -> concat [wd, "-", r]) <$> (char '-' *> hyphenedWords)
-- | Any string
-str :: GenParser Char ParserState Inline
+str :: Parser [Char] ParserState Inline
str = do
baseStr <- hyphenedWords
-- RedCloth compliance : if parsed word is uppercase and immediatly
@@ -444,44 +449,57 @@ str = do
return $ Str fullStr
-- | Textile allows HTML span infos, we discard them
-htmlSpan :: GenParser Char ParserState Inline
+htmlSpan :: Parser [Char] ParserState Inline
htmlSpan = try $ Str <$> ( char '%' *> attributes *> manyTill anyChar (char '%') )
-- | Some number of space chars
-whitespace :: GenParser Char ParserState Inline
+whitespace :: Parser [Char] ParserState Inline
whitespace = many1 spaceChar >> return Space <?> "whitespace"
-- | In Textile, an isolated endline character is a line break
-endline :: GenParser Char ParserState Inline
+endline :: Parser [Char] ParserState Inline
endline = try $ do
newline >> notFollowedBy blankline
return LineBreak
-rawHtmlInline :: GenParser Char ParserState Inline
+rawHtmlInline :: Parser [Char] ParserState Inline
rawHtmlInline = RawInline "html" . snd <$> htmlTag isInlineTag
-
--- | Raw LaTeX Inline
-rawLaTeXInline' :: GenParser Char ParserState Inline
+
+-- | Raw LaTeX Inline
+rawLaTeXInline' :: Parser [Char] ParserState Inline
rawLaTeXInline' = try $ do
- failIfStrict
+ guardEnabled Ext_raw_tex
rawLaTeXInline
--- | Textile standard link syntax is "label":target
-link :: GenParser Char ParserState Inline
-link = try $ do
+-- | Textile standard link syntax is "label":target. But we
+-- can also have ["label":target].
+link :: Parser [Char] ParserState Inline
+link = linkB <|> linkNoB
+
+linkNoB :: Parser [Char] ParserState Inline
+linkNoB = try $ do
+ name <- surrounded (char '"') inline
+ char ':'
+ let stopChars = "!.,;:"
+ url <- manyTill nonspaceChar (lookAhead $ space <|> try (oneOf stopChars >> (space <|> newline)))
+ return $ Link name (url, "")
+
+linkB :: Parser [Char] ParserState Inline
+linkB = try $ do
+ char '['
name <- surrounded (char '"') inline
char ':'
- url <- manyTill (anyChar) (lookAhead $ (space <|> try (oneOf ".;,:" >> (space <|> newline))))
+ url <- manyTill nonspaceChar (char ']')
return $ Link name (url, "")
-- | Detect plain links to http or email.
-autoLink :: GenParser Char ParserState Inline
+autoLink :: Parser [Char] ParserState Inline
autoLink = do
(orig, src) <- (try uri <|> try emailAddress)
return $ Link [Str orig] (src, "")
-- | image embedding
-image :: GenParser Char ParserState Inline
+image :: Parser [Char] ParserState Inline
image = try $ do
char '!' >> notFollowedBy space
src <- manyTill anyChar (lookAhead $ oneOf "!(")
@@ -489,49 +507,49 @@ image = try $ do
char '!'
return $ Image [Str alt] (src, alt)
-escapedInline :: GenParser Char ParserState Inline
+escapedInline :: Parser [Char] ParserState Inline
escapedInline = escapedEqs <|> escapedTag
-escapedEqs :: GenParser Char ParserState Inline
+escapedEqs :: Parser [Char] ParserState Inline
escapedEqs = Str <$> (try $ string "==" *> manyTill anyChar (try $ string "=="))
-- | literal text escaped btw <notextile> tags
-escapedTag :: GenParser Char ParserState Inline
+escapedTag :: Parser [Char] ParserState Inline
escapedTag = Str <$>
(try $ string "<notextile>" *> manyTill anyChar (try $ string "</notextile>"))
-- | Any special symbol defined in wordBoundaries
-symbol :: GenParser Char ParserState Inline
+symbol :: Parser [Char] ParserState Inline
symbol = Str . singleton <$> oneOf wordBoundaries
-- | Inline code
-code :: GenParser Char ParserState Inline
+code :: Parser [Char] ParserState Inline
code = code1 <|> code2
-code1 :: GenParser Char ParserState Inline
+code1 :: Parser [Char] ParserState Inline
code1 = Code nullAttr <$> surrounded (char '@') anyChar
-code2 :: GenParser Char ParserState Inline
+code2 :: Parser [Char] ParserState Inline
code2 = do
htmlTag (tagOpen (=="tt") null)
Code nullAttr <$> manyTill anyChar (try $ htmlTag $ tagClose (=="tt"))
-- | Html / CSS attributes
-attributes :: GenParser Char ParserState String
+attributes :: Parser [Char] ParserState String
attributes = choice [ enclosed (char '(') (char ')') anyChar,
enclosed (char '{') (char '}') anyChar,
enclosed (char '[') (char ']') anyChar]
-- | Parses material surrounded by a parser.
-surrounded :: GenParser Char st t -- ^ surrounding parser
- -> GenParser Char st a -- ^ content parser (to be used repeatedly)
- -> GenParser Char st [a]
+surrounded :: Parser [Char] st t -- ^ surrounding parser
+ -> Parser [Char] st a -- ^ content parser (to be used repeatedly)
+ -> Parser [Char] st [a]
surrounded border = enclosed border (try border)
-- | Inlines are most of the time of the same form
-simpleInline :: GenParser Char ParserState t -- ^ surrounding parser
+simpleInline :: Parser [Char] ParserState t -- ^ surrounding parser
-> ([Inline] -> Inline) -- ^ Inline constructor
- -> GenParser Char ParserState Inline -- ^ content parser (to be used repeatedly)
+ -> Parser [Char] ParserState Inline -- ^ content parser (to be used repeatedly)
simpleInline border construct = surrounded border (inlineWithAttribute) >>=
return . construct . normalizeSpaces
where inlineWithAttribute = (try $ optional attributes) >> inline