diff options
Diffstat (limited to 'src/Text/Pandoc/Readers')
-rw-r--r-- | src/Text/Pandoc/Readers/DocBook.hs | 14 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/HTML.hs | 109 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/LaTeX.hs | 34 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Markdown.hs | 1423 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Native.hs | 33 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/RST.hs | 316 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/TeXMath.hs | 2 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Textile.hs | 204 |
8 files changed, 1216 insertions, 919 deletions
diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index 62f7c61a0..685fa1ee4 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -1,6 +1,6 @@ module Text.Pandoc.Readers.DocBook ( readDocBook ) where import Data.Char (toUpper, isDigit) -import Text.Pandoc.Parsing (ParserState(..)) +import Text.Pandoc.Options import Text.Pandoc.Definition import Text.Pandoc.Builder import Text.XML.Light @@ -455,13 +455,13 @@ List of all DocBook tags, with [x] indicating implemented, [x] tocfront - An entry in a table of contents for a front matter component [x] toclevel1 - A top-level entry within a table of contents entry for a chapter-like component -[x] toclevel2 - A second-level entry within a table of contents entry for a +[x] toclevel2 - A second-level entry within a table of contents entry for a chapter-like component -[x] toclevel3 - A third-level entry within a table of contents entry for a +[x] toclevel3 - A third-level entry within a table of contents entry for a chapter-like component -[x] toclevel4 - A fourth-level entry within a table of contents entry for a +[x] toclevel4 - A fourth-level entry within a table of contents entry for a chapter-like component -[x] toclevel5 - A fifth-level entry within a table of contents entry for a +[x] toclevel5 - A fifth-level entry within a table of contents entry for a chapter-like component [x] tocpart - An entry in a table of contents for a part of a book [ ] token - A unit of information @@ -503,7 +503,7 @@ data DBState = DBState{ dbSectionLevel :: Int , dbBook :: Bool } deriving Show -readDocBook :: ParserState -> String -> Pandoc +readDocBook :: ReaderOptions -> String -> Pandoc readDocBook _ inp = setTitle (dbDocTitle st') $ setAuthors (dbDocAuthors st') $ setDate (dbDocDate st') @@ -574,7 +574,7 @@ addToStart toadd bs = (Para xs : rest) -> para (toadd <> fromList xs) <> fromList rest _ -> bs --- function that is used by both mediaobject (in parseBlock) +-- function that is used by both mediaobject (in parseBlock) -- and inlinemediaobject (in parseInline) getImage :: Element -> DB Inlines getImage e = do diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 536bddd39..e5c310ffc 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -19,10 +19,10 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.HTML Copyright : Copyright (C) 2006-2010 John MacFarlane - License : GNU GPL, version 2 or above + License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha + Stability : alpha Portability : portable Conversion of HTML to 'Pandoc' document. @@ -36,18 +36,17 @@ module Text.Pandoc.Readers.HTML ( readHtml , isCommentTag ) where -import Text.ParserCombinators.Parsec -import Text.ParserCombinators.Parsec.Pos import Text.HTML.TagSoup import Text.HTML.TagSoup.Match import Text.Pandoc.Definition import Text.Pandoc.Builder (text, toList) import Text.Pandoc.Shared +import Text.Pandoc.Options import Text.Pandoc.Parsing import Data.Maybe ( fromMaybe, isJust ) import Data.List ( intercalate ) -import Data.Char ( isDigit, toLower ) -import Control.Monad ( liftM, guard, when ) +import Data.Char ( isDigit ) +import Control.Monad ( liftM, guard, when, mzero ) isSpace :: Char -> Bool isSpace ' ' = True @@ -56,11 +55,11 @@ isSpace '\n' = True isSpace _ = False -- | Convert HTML-formatted string to 'Pandoc' document. -readHtml :: ParserState -- ^ Parser state +readHtml :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse (assumes @'\n'@ line endings) -> Pandoc -readHtml st inp = Pandoc meta blocks - where blocks = readWith parseBody st rest +readHtml opts inp = Pandoc meta blocks + where blocks = readWith parseBody def{ stateOptions = opts } rest tags = canonicalizeTags $ parseTagsOptions parseOptions{ optTagPosition = True } inp hasHeader = any (~== TagOpen "head" []) tags @@ -68,7 +67,7 @@ readHtml st inp = Pandoc meta blocks then parseHeader tags else (Meta [] [] [], tags) -type TagParser = GenParser (Tag String) ParserState +type TagParser = Parser [Tag String] ParserState -- TODO - fix this - not every header has a title tag parseHeader :: [Tag String] -> (Meta, [Tag String]) @@ -96,18 +95,6 @@ block = choice , pRawHtmlBlock ] --- repeated in SelfContained -- consolidate eventually -renderTags' :: [Tag String] -> String -renderTags' = renderTagsOptions - renderOptions{ optMinimize = \x -> - let y = map toLower x - in y == "hr" || y == "br" || - y == "img" || y == "meta" || - y == "link" - , optRawTag = \x -> - let y = map toLower x - in y == "script" || y == "style" } - pList :: TagParser [Block] pList = pBulletList <|> pOrderedList <|> pDefinitionList @@ -126,25 +113,22 @@ pBulletList = try $ do pOrderedList :: TagParser [Block] pOrderedList = try $ do TagOpen _ attribs <- pSatisfy (~== TagOpen "ol" []) - st <- getState - let (start, style) = if stateStrict st - then (1, DefaultStyle) - else (sta', sty') - where sta = fromMaybe "1" $ - lookup "start" attribs - sta' = if all isDigit sta - then read sta - else 1 - sty = fromMaybe (fromMaybe "" $ - lookup "style" attribs) $ - lookup "class" attribs - sty' = case sty of - "lower-roman" -> LowerRoman - "upper-roman" -> UpperRoman - "lower-alpha" -> LowerAlpha - "upper-alpha" -> UpperAlpha - "decimal" -> Decimal - _ -> DefaultStyle + let (start, style) = (sta', sty') + where sta = fromMaybe "1" $ + lookup "start" attribs + sta' = if all isDigit sta + then read sta + else 1 + sty = fromMaybe (fromMaybe "" $ + lookup "style" attribs) $ + lookup "class" attribs + sty' = case sty of + "lower-roman" -> LowerRoman + "upper-roman" -> UpperRoman + "lower-alpha" -> LowerAlpha + "upper-alpha" -> UpperAlpha + "decimal" -> Decimal + _ -> DefaultStyle let nonItem = pSatisfy (\t -> not (tagOpen (`elem` ["li","ol","ul","dl"]) (const True) t) && not (t ~== TagClose "ol")) @@ -196,8 +180,8 @@ pRawTag = do pRawHtmlBlock :: TagParser [Block] pRawHtmlBlock = do raw <- pHtmlBlock "script" <|> pHtmlBlock "style" <|> pRawTag - state <- getState - if stateParseRaw state && not (null raw) + parseRaw <- getOption readerParseRaw + if parseRaw && not (null raw) then return [RawBlock "html" raw] else return [] @@ -235,7 +219,7 @@ pSimpleTable = try $ do rows <- pOptInTag "tbody" $ many1 $ try $ skipMany pBlank >> pInTags "tr" (pCell "td") skipMany pBlank - TagClose _ <- pSatisfy (~== TagClose "table") + TagClose _ <- pSatisfy (~== TagClose "table") let cols = maximum $ map length rows let aligns = replicate cols AlignLeft let widths = replicate cols 0 @@ -281,10 +265,7 @@ pCodeBlock = try $ do let attribsId = fromMaybe "" $ lookup "id" attr let attribsClasses = words $ fromMaybe "" $ lookup "class" attr let attribsKV = filter (\(k,_) -> k /= "class" && k /= "id") attr - st <- getState - let attribs = if stateStrict st - then ("",[],[]) - else (attribsId, attribsClasses, attribsKV) + let attribs = (attribsId, attribsClasses, attribsKV) return [CodeBlock attribs result] inline :: TagParser [Inline] @@ -310,7 +291,7 @@ pLocation = do pSat :: (Tag String -> Bool) -> TagParser (Tag String) pSat f = do pos <- getPosition - token show (const pos) (\x -> if f x then Just x else Nothing) + token show (const pos) (\x -> if f x then Just x else Nothing) pSatisfy :: (Tag String -> Bool) -> TagParser (Tag String) pSatisfy f = try $ optional pLocation >> pSat f @@ -332,14 +313,13 @@ pStrong :: TagParser [Inline] pStrong = pInlinesInTags "strong" Strong <|> pInlinesInTags "b" Strong pSuperscript :: TagParser [Inline] -pSuperscript = failIfStrict >> pInlinesInTags "sup" Superscript +pSuperscript = pInlinesInTags "sup" Superscript pSubscript :: TagParser [Inline] -pSubscript = failIfStrict >> pInlinesInTags "sub" Subscript +pSubscript = pInlinesInTags "sub" Subscript pStrikeout :: TagParser [Inline] pStrikeout = do - failIfStrict pInlinesInTags "s" Strikeout <|> pInlinesInTags "strike" Strikeout <|> pInlinesInTags "del" Strikeout <|> @@ -381,8 +361,8 @@ pCode = try $ do pRawHtmlInline :: TagParser [Inline] pRawHtmlInline = do result <- pSatisfy (tagComment (const True)) <|> pSatisfy isInlineTag - state <- getState - if stateParseRaw state + parseRaw <- getOption readerParseRaw + if parseRaw then return [RawInline "html" $ renderTags' [result]] else return [] @@ -417,7 +397,7 @@ pCloses tagtype = try $ do (TagClose "ul") | tagtype == "li" -> return () (TagClose "ol") | tagtype == "li" -> return () (TagClose "dl") | tagtype == "li" -> return () - _ -> pzero + _ -> mzero pTagText :: TagParser [Inline] pTagText = try $ do @@ -432,11 +412,11 @@ pBlank = try $ do (TagText str) <- pSatisfy isTagText guard $ all isSpace str -pTagContents :: GenParser Char ParserState Inline +pTagContents :: Parser [Char] ParserState Inline pTagContents = pStr <|> pSpace <|> smartPunctuation pTagContents <|> pSymbol <|> pBad -pStr :: GenParser Char ParserState Inline +pStr :: Parser [Char] ParserState Inline pStr = do result <- many1 $ satisfy $ \c -> not (isSpace c) && not (isSpecial c) && not (isBad c) @@ -455,13 +435,13 @@ isSpecial '\8220' = True isSpecial '\8221' = True isSpecial _ = False -pSymbol :: GenParser Char ParserState Inline +pSymbol :: Parser [Char] ParserState Inline pSymbol = satisfy isSpecial >>= return . Str . (:[]) isBad :: Char -> Bool isBad c = c >= '\128' && c <= '\159' -- not allowed in HTML -pBad :: GenParser Char ParserState Inline +pBad :: Parser [Char] ParserState Inline pBad = do c <- satisfy isBad let c' = case c of @@ -495,7 +475,7 @@ pBad = do _ -> '?' return $ Str [c'] -pSpace :: GenParser Char ParserState Inline +pSpace :: Parser [Char] ParserState Inline pSpace = many1 (satisfy isSpace) >> return Space -- @@ -593,20 +573,19 @@ _ `closes` _ = False --- parsers for use in markdown, textile readers -- | Matches a stretch of HTML in balanced tags. -htmlInBalanced :: (Tag String -> Bool) -> GenParser Char ParserState String +htmlInBalanced :: (Tag String -> Bool) -> Parser [Char] ParserState String htmlInBalanced f = try $ do (TagOpen t _, tag) <- htmlTag f guard $ '/' `notElem` tag -- not a self-closing tag - let nonTagChunk = many1 $ satisfy (/= '<') let stopper = htmlTag (~== TagClose t) let anytag = liftM snd $ htmlTag (const True) contents <- many $ notFollowedBy' stopper >> - (nonTagChunk <|> htmlInBalanced (const True) <|> anytag) + (htmlInBalanced f <|> anytag <|> count 1 anyChar) endtag <- liftM snd stopper return $ tag ++ concat contents ++ endtag -- | Matches a tag meeting a certain condition. -htmlTag :: (Tag String -> Bool) -> GenParser Char ParserState (Tag String, String) +htmlTag :: (Tag String -> Bool) -> Parser [Char] ParserState (Tag String, String) htmlTag f = try $ do lookAhead (char '<') (next : _) <- getInput >>= return . canonicalizeTags . parseTags @@ -617,7 +596,7 @@ htmlTag f = try $ do count (length s + 4) anyChar skipMany (satisfy (/='>')) char '>' - return (next, "<!--" ++ s ++ "-->") + return (next, "<!--" ++ s ++ "-->") _ -> do rendered <- manyTill anyChar (char '>') return (next, rendered ++ ">") diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 3178945e4..4a5a14d6a 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -33,10 +33,10 @@ module Text.Pandoc.Readers.LaTeX ( readLaTeX, handleIncludes ) where -import Text.ParserCombinators.Parsec hiding ((<|>), space, many, optional) import Text.Pandoc.Definition import Text.Pandoc.Shared -import Text.Pandoc.Parsing +import Text.Pandoc.Options +import Text.Pandoc.Parsing hiding ((<|>), many, optional, space) import qualified Text.Pandoc.UTF8 as UTF8 import Data.Char ( chr, ord ) import Control.Monad @@ -47,12 +47,13 @@ import Data.Monoid import System.FilePath (replaceExtension) import Data.List (intercalate) import qualified Data.Map as M +import qualified Control.Exception as E -- | Parse LaTeX from string and return 'Pandoc' document. -readLaTeX :: ParserState -- ^ Parser state, including options for parser +readLaTeX :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse (assumes @'\n'@ line endings) -> Pandoc -readLaTeX = readWith parseLaTeX +readLaTeX opts = readWith parseLaTeX def{ stateOptions = opts } parseLaTeX :: LP Pandoc parseLaTeX = do @@ -64,7 +65,7 @@ parseLaTeX = do let date' = stateDate st return $ Pandoc (Meta title' authors' date') $ toList bs -type LP = GenParser Char ParserState +type LP = Parser [Char] ParserState anyControlSeq :: LP String anyControlSeq = do @@ -186,7 +187,7 @@ inline = (mempty <$ comment) <|> (mathInline $ char '$' *> mathChars <* char '$') <|> (superscript <$> (char '^' *> tok)) <|> (subscript <$> (char '_' *> tok)) - <|> (failUnlessLHS *> char '|' *> doLHSverb) + <|> (guardEnabled Ext_literate_haskell *> char '|' *> doLHSverb) <|> (str <$> count 1 tildeEscape) <|> (str <$> string "]") <|> (str <$> string "#") -- TODO print warning? @@ -230,14 +231,14 @@ ignoreInlines name = (name, doraw <|> (mempty <$ optargs)) where optargs = skipopts *> skipMany (try $ optional sp *> braced) contseq = '\\':name doraw = (rawInline "latex" . (contseq ++) . snd) <$> - (getState >>= guard . stateParseRaw >> (withRaw optargs)) + (getOption readerParseRaw >>= guard >> (withRaw optargs)) ignoreBlocks :: String -> (String, LP Blocks) ignoreBlocks name = (name, doraw <|> (mempty <$ optargs)) where optargs = skipopts *> skipMany (try $ optional sp *> braced) contseq = '\\':name doraw = (rawBlock "latex" . (contseq ++) . snd) <$> - (getState >>= guard . stateParseRaw >> (withRaw optargs)) + (getOption readerParseRaw >>= guard >> (withRaw optargs)) blockCommands :: M.Map String (LP Blocks) blockCommands = M.fromList $ @@ -321,7 +322,7 @@ inlineCommand :: LP Inlines inlineCommand = try $ do name <- anyControlSeq guard $ not $ isBlockCommand name - parseRaw <- stateParseRaw `fmap` getState + parseRaw <- getOption readerParseRaw star <- option "" (string "*") let name' = name ++ star let rawargs = withRaw (skipopts *> option "" dimenarg @@ -336,7 +337,7 @@ inlineCommand = try $ do Nothing -> raw unlessParseRaw :: LP () -unlessParseRaw = getState >>= guard . not . stateParseRaw +unlessParseRaw = getOption readerParseRaw >>= guard . not isBlockCommand :: String -> Bool isBlockCommand s = maybe False (const True) $ M.lookup s blockCommands @@ -660,7 +661,7 @@ environment = do rawEnv :: String -> LP Blocks rawEnv name = do let addBegin x = "\\begin{" ++ name ++ "}" ++ x - parseRaw <- stateParseRaw `fmap` getState + parseRaw <- getOption readerParseRaw if parseRaw then (rawBlock "latex" . addBegin) <$> (withRaw (env name blocks) >>= applyMacros' . snd) @@ -671,8 +672,9 @@ handleIncludes :: String -> IO String handleIncludes [] = return [] handleIncludes ('\\':xs) = case runParser include defaultParserState "input" ('\\':xs) of - Right (fs, rest) -> do let getfile f = catch (UTF8.readFile f) - (\_ -> return "") + Right (fs, rest) -> do let getfile f = E.catch (UTF8.readFile f) + (\e -> let _ = (e :: E.SomeException) + in return "") yss <- mapM getfile fs (intercalate "\n" yss ++) `fmap` handleIncludes rest @@ -713,10 +715,10 @@ verbatimEnv = do rest <- getInput return (r,rest) -rawLaTeXBlock :: GenParser Char ParserState String +rawLaTeXBlock :: Parser [Char] ParserState String rawLaTeXBlock = snd <$> withRaw (environment <|> blockCommand) -rawLaTeXInline :: GenParser Char ParserState Inline +rawLaTeXInline :: Parser [Char] ParserState Inline rawLaTeXInline = do (res, raw) <- withRaw inlineCommand if res == mempty @@ -735,7 +737,7 @@ environments = M.fromList , ("itemize", bulletList <$> listenv "itemize" (many item)) , ("description", definitionList <$> listenv "description" (many descItem)) , ("enumerate", ordered_list) - , ("code", failUnlessLHS *> + , ("code", guardEnabled Ext_literate_haskell *> (codeBlockWith ("",["sourceCode","literate","haskell"],[]) <$> verbEnv "code")) , ("verbatim", codeBlock <$> (verbEnv "verbatim")) diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 51a727996..2407e137c 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1,4 +1,6 @@ {-# LANGUAGE RelaxedPolyRec #-} -- needed for inlinesBetween on GHC < 7 +{-# LANGUAGE FlexibleInstances, TypeSynonymInstances, + GeneralizedNewtypeDeriving #-} {- Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu> @@ -20,7 +22,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.Markdown Copyright : Copyright (C) 2006-2010 John MacFarlane - License : GNU GPL, version 2 or above + License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> Stability : alpha @@ -33,26 +35,34 @@ module Text.Pandoc.Readers.Markdown ( readMarkdown ) where import Data.List ( transpose, sortBy, findIndex, intercalate ) import qualified Data.Map as M import Data.Ord ( comparing ) -import Data.Char ( isAlphaNum ) +import Data.Char ( isAlphaNum, toLower ) import Data.Maybe import Text.Pandoc.Definition -import Text.Pandoc.Generic -import Text.Pandoc.Shared -import Text.Pandoc.Parsing +import qualified Text.Pandoc.Builder as B +import Text.Pandoc.Builder (Inlines, Blocks, trimInlines, (<>)) +import Text.Pandoc.Options +import Text.Pandoc.Shared hiding (compactify) +import Text.Pandoc.Parsing hiding (tableWith) import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock ) import Text.Pandoc.Readers.HTML ( htmlTag, htmlInBalanced, isInlineTag, isBlockTag, isTextTag, isCommentTag ) import Text.Pandoc.XML ( fromEntities ) -import Text.ParserCombinators.Parsec -import Control.Monad (when, liftM, guard, mzero) +import Data.Monoid (mconcat, mempty) +import Control.Applicative ((<$>), (<*), (*>), (<$)) +import Control.Monad import Text.HTML.TagSoup import Text.HTML.TagSoup.Match (tagOpen) +import qualified Data.Set as Set -- | Read markdown from an input string and return a Pandoc document. -readMarkdown :: ParserState -- ^ Parser state, including options for parser - -> String -- ^ String to parse (assuming @'\n'@ line endings) +readMarkdown :: ReaderOptions -- ^ Reader options + -> String -- ^ String to parse (assuming @'\n'@ line endings) -> Pandoc -readMarkdown state s = (readWith parseMarkdown) state (s ++ "\n\n") +readMarkdown opts s = + (readWith parseMarkdown) def{ stateOptions = opts } (s ++ "\n\n") + +trimInlinesF :: F Inlines -> F Inlines +trimInlinesF = liftM trimInlines -- -- Constants and data structure definitions @@ -70,7 +80,7 @@ isHruleChar '-' = True isHruleChar '_' = True isHruleChar _ = False -setextHChars :: [Char] +setextHChars :: String setextHChars = "=-" isBlank :: Char -> Bool @@ -83,71 +93,72 @@ isBlank _ = False -- auxiliary functions -- -indentSpaces :: GenParser Char ParserState [Char] +isNull :: F Inlines -> Bool +isNull ils = B.isNull $ runF ils def + +spnl :: Parser [Char] st () +spnl = try $ do + skipSpaces + optional newline + skipSpaces + notFollowedBy (char '\n') + +indentSpaces :: Parser [Char] ParserState String indentSpaces = try $ do - state <- getState - let tabStop = stateTabStop state + tabStop <- getOption readerTabStop count tabStop (char ' ') <|> string "\t" <?> "indentation" -nonindentSpaces :: GenParser Char ParserState [Char] +nonindentSpaces :: Parser [Char] ParserState String nonindentSpaces = do - state <- getState - let tabStop = stateTabStop state + tabStop <- getOption readerTabStop sps <- many (char ' ') - if length sps < tabStop + if length sps < tabStop then return sps else unexpected "indented line" -skipNonindentSpaces :: GenParser Char ParserState () +skipNonindentSpaces :: Parser [Char] ParserState () skipNonindentSpaces = do - state <- getState - atMostSpaces (stateTabStop state - 1) + tabStop <- getOption readerTabStop + atMostSpaces (tabStop - 1) -atMostSpaces :: Int -> GenParser Char ParserState () +atMostSpaces :: Int -> Parser [Char] ParserState () atMostSpaces 0 = notFollowedBy (char ' ') atMostSpaces n = (char ' ' >> atMostSpaces (n-1)) <|> return () -litChar :: GenParser Char ParserState Char +litChar :: Parser [Char] ParserState Char litChar = escapedChar' <|> noneOf "\n" <|> (newline >> notFollowedBy blankline >> return ' ') --- | Fail unless we're at beginning of a line. -failUnlessBeginningOfLine :: GenParser tok st () -failUnlessBeginningOfLine = do - pos <- getPosition - if sourceColumn pos == 1 then return () else fail "not beginning of line" - -- | Parse a sequence of inline elements between square brackets, -- including inlines between balanced pairs of square brackets. -inlinesInBalancedBrackets :: GenParser Char ParserState Inline - -> GenParser Char ParserState [Inline] -inlinesInBalancedBrackets parser = try $ do +inlinesInBalancedBrackets :: Parser [Char] ParserState (F Inlines) +inlinesInBalancedBrackets = try $ do char '[' - result <- manyTill ( (do lookAhead $ try $ do (Str res) <- parser - guard (res == "[") - bal <- inlinesInBalancedBrackets parser - return $ [Str "["] ++ bal ++ [Str "]"]) - <|> (count 1 parser)) + result <- manyTill ( (do lookAhead $ try $ do x <- inline + guard (runF x def == B.str "[") + bal <- inlinesInBalancedBrackets + return $ (\x -> B.str "[" <> x <> B.str "]") <$> bal) + <|> inline) (char ']') - return $ concat result + return $ mconcat result -- -- document structure -- -titleLine :: GenParser Char ParserState [Inline] +titleLine :: Parser [Char] ParserState (F Inlines) titleLine = try $ do char '%' skipSpaces res <- many $ (notFollowedBy newline >> inline) <|> try (endline >> whitespace) newline - return $ normalizeSpaces res + return $ trimInlinesF $ mconcat res -authorsLine :: GenParser Char ParserState [[Inline]] -authorsLine = try $ do +authorsLine :: Parser [Char] ParserState (F [Inlines]) +authorsLine = try $ do char '%' skipSpaces authors <- sepEndBy (many (notFollowedBy (satisfy $ \c -> @@ -155,67 +166,63 @@ authorsLine = try $ do (char ';' <|> try (newline >> notFollowedBy blankline >> spaceChar)) newline - return $ filter (not . null) $ map normalizeSpaces authors + return $ sequence $ filter (not . isNull) $ map (trimInlinesF . mconcat) authors -dateLine :: GenParser Char ParserState [Inline] +dateLine :: Parser [Char] ParserState (F Inlines) dateLine = try $ do char '%' skipSpaces - date <- manyTill inline newline - return $ normalizeSpaces date - -titleBlock :: GenParser Char ParserState ([Inline], [[Inline]], [Inline]) -titleBlock = try $ do - failIfStrict - title <- option [] titleLine - author <- option [] authorsLine - date <- option [] dateLine + trimInlinesF . mconcat <$> manyTill inline newline + +titleBlock :: Parser [Char] ParserState (F Inlines, F [Inlines], F Inlines) +titleBlock = pandocTitleBlock <|> mmdTitleBlock + +pandocTitleBlock :: Parser [Char] ParserState (F Inlines, F [Inlines], F Inlines) +pandocTitleBlock = try $ do + guardEnabled Ext_pandoc_title_block + title <- option mempty titleLine + author <- option (return []) authorsLine + date <- option mempty dateLine optional blanklines return (title, author, date) -parseMarkdown :: GenParser Char ParserState Pandoc +mmdTitleBlock :: Parser [Char] ParserState (F Inlines, F [Inlines], F Inlines) +mmdTitleBlock = try $ do + guardEnabled Ext_mmd_title_block + kvPairs <- many1 kvPair + blanklines + let title = maybe mempty return $ lookup "title" kvPairs + let author = maybe mempty (\x -> return [x]) $ lookup "author" kvPairs + let date = maybe mempty return $ lookup "date" kvPairs + return (title, author, date) + +kvPair :: Parser [Char] ParserState (String, Inlines) +kvPair = try $ do + key <- many1Till (alphaNum <|> oneOf "_- ") (char ':') + val <- manyTill anyChar + (try $ newline >> lookAhead (blankline <|> nonspaceChar)) + let key' = concat $ words $ map toLower key + let val' = trimInlines $ B.text val + return (key',val') + +parseMarkdown :: Parser [Char] ParserState Pandoc parseMarkdown = do -- markdown allows raw HTML - updateState (\state -> state { stateParseRaw = True }) - startPos <- getPosition - -- go through once just to get list of reference keys and notes - -- docMinusKeys is the raw document with blanks where the keys/notes were... - st <- getState - let firstPassParser = referenceKey - <|> (if stateStrict st then pzero else noteBlock) - <|> liftM snd (withRaw codeBlockDelimited) - <|> lineClump - docMinusKeys <- liftM concat $ manyTill firstPassParser eof - setInput docMinusKeys - setPosition startPos - st' <- getState - let reversedNotes = stateNotes st' - updateState $ \s -> s { stateNotes = reverse reversedNotes } - -- now parse it for real... - (title, author, date) <- option ([],[],[]) titleBlock + updateState $ \state -> state { stateOptions = + let oldOpts = stateOptions state in + oldOpts{ readerParseRaw = True } } + (title, authors, date) <- option (mempty,return [],mempty) titleBlock blocks <- parseBlocks - let doc = Pandoc (Meta title author date) $ filter (/= Null) blocks - -- if there are labeled examples, change references into numbers - examples <- liftM stateExamples getState - let handleExampleRef :: Inline -> Inline - handleExampleRef z@(Str ('@':xs)) = - case M.lookup xs examples of - Just n -> Str (show n) - Nothing -> z - handleExampleRef z = z - if M.null examples - then return doc - else return $ bottomUp handleExampleRef doc - --- --- initial pass for references and notes --- + st <- getState + return $ B.setTitle (runF title st) + $ B.setAuthors (runF authors st) + $ B.setDate (runF date st) + $ B.doc $ runF blocks st -referenceKey :: GenParser Char ParserState [Char] +referenceKey :: Parser [Char] ParserState (F Blocks) referenceKey = try $ do - startPos <- getPosition skipNonindentSpaces - lab <- reference + (_,raw) <- reference char ':' skipSpaces >> optional newline >> skipSpaces >> notFollowedBy (char '[') let sourceURL = liftM unwords $ many $ try $ do @@ -223,22 +230,20 @@ referenceKey = try $ do skipMany spaceChar optional $ newline >> notFollowedBy blankline skipMany spaceChar - notFollowedBy' reference + notFollowedBy' (() <$ reference) many1 $ escapedChar' <|> satisfy (not . isBlank) let betweenAngles = try $ char '<' >> manyTill (escapedChar' <|> litChar) (char '>') src <- try betweenAngles <|> sourceURL tit <- option "" referenceTitle blanklines - endPos <- getPosition let target = (escapeURI $ removeTrailingSpace src, tit) st <- getState let oldkeys = stateKeys st - updateState $ \s -> s { stateKeys = M.insert (toKey lab) target oldkeys } - -- return blanks so line count isn't affected - return $ replicate (sourceLine endPos - sourceLine startPos) '\n' + updateState $ \s -> s { stateKeys = M.insert (toKey raw) target oldkeys } + return $ return mempty -referenceTitle :: GenParser Char ParserState String +referenceTitle :: Parser [Char] ParserState String referenceTitle = try $ do skipSpaces >> optional newline >> skipSpaces tit <- (charsInBalanced '(' ')' litChar >>= return . unwords . words) @@ -247,25 +252,38 @@ referenceTitle = try $ do notFollowedBy (noneOf ")\n"))) return $ fromEntities tit -noteMarker :: GenParser Char ParserState [Char] +-- | PHP Markdown Extra style abbreviation key. Currently +-- we just skip them, since Pandoc doesn't have an element for +-- an abbreviation. +abbrevKey :: Parser [Char] ParserState (F Blocks) +abbrevKey = do + guardEnabled Ext_abbreviations + try $ do + char '*' + reference + char ':' + skipMany (satisfy (/= '\n')) + blanklines + return $ return mempty + +noteMarker :: Parser [Char] ParserState String noteMarker = string "[^" >> many1Till (satisfy $ not . isBlank) (char ']') -rawLine :: GenParser Char ParserState [Char] +rawLine :: Parser [Char] ParserState String rawLine = try $ do notFollowedBy blankline notFollowedBy' $ try $ skipNonindentSpaces >> noteMarker optional indentSpaces anyLine -rawLines :: GenParser Char ParserState [Char] +rawLines :: Parser [Char] ParserState String rawLines = do first <- anyLine rest <- many rawLine return $ unlines (first:rest) -noteBlock :: GenParser Char ParserState [Char] +noteBlock :: Parser [Char] ParserState (F Blocks) noteBlock = try $ do - startPos <- getPosition skipNonindentSpaces ref <- noteMarker char ':' @@ -275,87 +293,75 @@ noteBlock = try $ do (try (blankline >> indentSpaces >> notFollowedBy blankline)) optional blanklines - endPos <- getPosition - let newnote = (ref, (intercalate "\n" raw) ++ "\n\n") - st <- getState - let oldnotes = stateNotes st - updateState $ \s -> s { stateNotes = newnote : oldnotes } - -- return blanks so line count isn't affected - return $ replicate (sourceLine endPos - sourceLine startPos) '\n' + parsed <- parseFromString parseBlocks $ unlines raw ++ "\n" + let newnote = (ref, parsed) + updateState $ \s -> s { stateNotes' = newnote : stateNotes' s } + return mempty -- -- parsing blocks -- -parseBlocks :: GenParser Char ParserState [Block] -parseBlocks = manyTill block eof - -block :: GenParser Char ParserState Block -block = do - st <- getState - choice (if stateStrict st - then [ header - , codeBlockIndented - , blockQuote - , hrule - , bulletList - , orderedList - , htmlBlock - , para - , plain - , nullBlock ] - else [ codeBlockDelimited - , macro - , header - , table - , codeBlockIndented - , lhsCodeBlock - , blockQuote - , hrule - , bulletList - , orderedList - , definitionList - , rawTeXBlock - , para - , rawHtmlBlocks - , plain - , nullBlock ]) <?> "block" +parseBlocks :: Parser [Char] ParserState (F Blocks) +parseBlocks = mconcat <$> manyTill block eof + +block :: Parser [Char] ParserState (F Blocks) +block = choice [ codeBlockFenced + , codeBlockBackticks + , guardEnabled Ext_latex_macros *> (mempty <$ macro) + , header + , rawTeXBlock + , htmlBlock + , table + , codeBlockIndented + , lhsCodeBlock + , blockQuote + , hrule + , bulletList + , orderedList + , definitionList + , noteBlock + , referenceKey + , abbrevKey + , para + , plain + ] <?> "block" -- -- header blocks -- -header :: GenParser Char ParserState Block +header :: Parser [Char] ParserState (F Blocks) header = setextHeader <|> atxHeader <?> "header" -atxHeader :: GenParser Char ParserState Block +atxHeader :: Parser [Char] ParserState (F Blocks) atxHeader = try $ do level <- many1 (char '#') >>= return . length notFollowedBy (char '.' <|> char ')') -- this would be a list skipSpaces - text <- manyTill inline atxClosing >>= return . normalizeSpaces - return $ Header level text + text <- trimInlinesF . mconcat <$> manyTill inline atxClosing + return $ B.header level <$> text -atxClosing :: GenParser Char st [Char] +atxClosing :: Parser [Char] st String atxClosing = try $ skipMany (char '#') >> blanklines -setextHeader :: GenParser Char ParserState Block +setextHeader :: Parser [Char] ParserState (F Blocks) setextHeader = try $ do -- This lookahead prevents us from wasting time parsing Inlines -- unless necessary -- it gives a significant performance boost. lookAhead $ anyLine >> many1 (oneOf setextHChars) >> blankline - text <- many1Till inline newline + text <- trimInlinesF . mconcat <$> many1Till inline newline underlineChar <- oneOf setextHChars many (char underlineChar) blanklines let level = (fromMaybe 0 $ findIndex (== underlineChar) setextHChars) + 1 - return $ Header level (normalizeSpaces text) + return $ B.header level <$> text -- -- hrule block -- -hrule :: GenParser Char st Block +hrule :: Parser [Char] st (F Blocks) hrule = try $ do skipSpaces start <- satisfy isHruleChar @@ -363,32 +369,26 @@ hrule = try $ do skipMany (spaceChar <|> char start) newline optional blanklines - return HorizontalRule + return $ return B.horizontalRule -- -- code blocks -- -indentedLine :: GenParser Char ParserState [Char] +indentedLine :: Parser [Char] ParserState String indentedLine = indentSpaces >> manyTill anyChar newline >>= return . (++ "\n") blockDelimiter :: (Char -> Bool) -> Maybe Int - -> GenParser Char st (Int, (String, [String], [(String, String)]), Char) + -> Parser [Char] st Int blockDelimiter f len = try $ do c <- lookAhead (satisfy f) - size <- case len of - Just l -> count l (char c) >> many (char c) >> return l - Nothing -> count 3 (char c) >> many (char c) >>= - return . (+ 3) . length - many spaceChar - attr <- option ([],[],[]) - $ attributes -- ~~~ {.ruby} - <|> (many1 alphaNum >>= \x -> return ([],[x],[])) -- github variant ```ruby - blankline - return (size, attr, c) + case len of + Just l -> count l (char c) >> many (char c) >> return l + Nothing -> count 3 (char c) >> many (char c) >>= + return . (+ 3) . length -attributes :: GenParser Char st ([Char], [[Char]], [([Char], [Char])]) +attributes :: Parser [Char] st (String, [String], [(String, String)]) attributes = try $ do char '{' spnl @@ -400,28 +400,28 @@ attributes = try $ do | otherwise = firstNonNull xs return (firstNonNull $ reverse ids, concat classes, concat keyvals) -attribute :: GenParser Char st ([Char], [[Char]], [([Char], [Char])]) +attribute :: Parser [Char] st (String, [String], [(String, String)]) attribute = identifierAttr <|> classAttr <|> keyValAttr -identifier :: GenParser Char st [Char] +identifier :: Parser [Char] st String identifier = do first <- letter rest <- many $ alphaNum <|> oneOf "-_:." return (first:rest) -identifierAttr :: GenParser Char st ([Char], [a], [a1]) +identifierAttr :: Parser [Char] st (String, [a], [a1]) identifierAttr = try $ do char '#' result <- identifier return (result,[],[]) -classAttr :: GenParser Char st ([Char], [[Char]], [a]) +classAttr :: Parser [Char] st (String, [String], [a]) classAttr = try $ do char '.' result <- identifier return ("",[result],[]) -keyValAttr :: GenParser Char st ([Char], [a], [([Char], [Char])]) +keyValAttr :: Parser [Char] st (String, [a], [(String, String)]) keyValAttr = try $ do key <- identifier char '=' @@ -430,33 +430,49 @@ keyValAttr = try $ do <|> many nonspaceChar return ("",[],[(key,val)]) -codeBlockDelimited :: GenParser Char st Block -codeBlockDelimited = try $ do - (size, attr, c) <- blockDelimiter (\c -> c == '~' || c == '`') Nothing - contents <- manyTill anyLine (blockDelimiter (== c) (Just size)) +codeBlockFenced :: Parser [Char] ParserState (F Blocks) +codeBlockFenced = try $ do + guardEnabled Ext_fenced_code_blocks + size <- blockDelimiter (=='~') Nothing + skipMany spaceChar + attr <- option ([],[],[]) $ + guardEnabled Ext_fenced_code_attributes >> attributes + blankline + contents <- manyTill anyLine (blockDelimiter (=='~') (Just size)) + blanklines + return $ return $ B.codeBlockWith attr $ intercalate "\n" contents + +codeBlockBackticks :: Parser [Char] ParserState (F Blocks) +codeBlockBackticks = try $ do + guardEnabled Ext_backtick_code_blocks + blockDelimiter (=='`') (Just 3) + skipMany spaceChar + cls <- many1 alphaNum + blankline + contents <- manyTill anyLine $ blockDelimiter (=='`') (Just 3) blanklines - return $ CodeBlock attr $ intercalate "\n" contents + return $ return $ B.codeBlockWith ("",[cls],[]) $ intercalate "\n" contents -codeBlockIndented :: GenParser Char ParserState Block +codeBlockIndented :: Parser [Char] ParserState (F Blocks) codeBlockIndented = do - contents <- many1 (indentedLine <|> + contents <- many1 (indentedLine <|> try (do b <- blanklines l <- indentedLine return $ b ++ l)) optional blanklines - st <- getState - return $ CodeBlock ("", stateIndentedCodeClasses st, []) $ + classes <- getOption readerIndentedCodeClasses + return $ return $ B.codeBlockWith ("", classes, []) $ stripTrailingNewlines $ concat contents -lhsCodeBlock :: GenParser Char ParserState Block +lhsCodeBlock :: Parser [Char] ParserState (F Blocks) lhsCodeBlock = do - failUnlessLHS - liftM (CodeBlock ("",["sourceCode","literate","haskell"],[])) - (lhsCodeBlockBird <|> lhsCodeBlockLaTeX) - <|> liftM (CodeBlock ("",["sourceCode","haskell"],[])) - lhsCodeBlockInverseBird + guardEnabled Ext_literate_haskell + (return . B.codeBlockWith ("",["sourceCode","literate","haskell"],[]) <$> + (lhsCodeBlockBird <|> lhsCodeBlockLaTeX)) + <|> (return . B.codeBlockWith ("",["sourceCode","haskell"],[]) <$> + lhsCodeBlockInverseBird) -lhsCodeBlockLaTeX :: GenParser Char ParserState String +lhsCodeBlockLaTeX :: Parser [Char] ParserState String lhsCodeBlockLaTeX = try $ do string "\\begin{code}" manyTill spaceChar newline @@ -464,13 +480,13 @@ lhsCodeBlockLaTeX = try $ do blanklines return $ stripTrailingNewlines contents -lhsCodeBlockBird :: GenParser Char ParserState String +lhsCodeBlockBird :: Parser [Char] ParserState String lhsCodeBlockBird = lhsCodeBlockBirdWith '>' -lhsCodeBlockInverseBird :: GenParser Char ParserState String +lhsCodeBlockInverseBird :: Parser [Char] ParserState String lhsCodeBlockInverseBird = lhsCodeBlockBirdWith '<' -lhsCodeBlockBirdWith :: Char -> GenParser Char ParserState String +lhsCodeBlockBirdWith :: Char -> Parser [Char] ParserState String lhsCodeBlockBirdWith c = try $ do pos <- getPosition when (sourceColumn pos /= 1) $ fail "Not in first column" @@ -482,25 +498,24 @@ lhsCodeBlockBirdWith c = try $ do blanklines return $ intercalate "\n" lns' -birdTrackLine :: Char -> GenParser Char st [Char] +birdTrackLine :: Char -> Parser [Char] st String birdTrackLine c = try $ do char c -- allow html tags on left margin: when (c == '<') $ notFollowedBy letter manyTill anyChar newline - -- -- block quotes -- -emailBlockQuoteStart :: GenParser Char ParserState Char +emailBlockQuoteStart :: Parser [Char] ParserState Char emailBlockQuoteStart = try $ skipNonindentSpaces >> char '>' >>~ optional (char ' ') -emailBlockQuote :: GenParser Char ParserState [[Char]] +emailBlockQuote :: Parser [Char] ParserState [String] emailBlockQuote = try $ do emailBlockQuoteStart - raw <- sepBy (many (nonEndline <|> + raw <- sepBy (many (nonEndline <|> (try (endline >> notFollowedBy emailBlockQuoteStart >> return '\n')))) (try (newline >> emailBlockQuoteStart)) @@ -508,51 +523,50 @@ emailBlockQuote = try $ do optional blanklines return raw -blockQuote :: GenParser Char ParserState Block -blockQuote = do +blockQuote :: Parser [Char] ParserState (F Blocks) +blockQuote = do raw <- emailBlockQuote -- parse the extracted block, which may contain various block elements: contents <- parseFromString parseBlocks $ (intercalate "\n" raw) ++ "\n\n" - return $ BlockQuote contents - + return $ B.blockQuote <$> contents + -- -- list blocks -- -bulletListStart :: GenParser Char ParserState () +bulletListStart :: Parser [Char] ParserState () bulletListStart = try $ do optional newline -- if preceded by a Plain block in a list context skipNonindentSpaces - notFollowedBy' hrule -- because hrules start out just like lists + notFollowedBy' (() <$ hrule) -- because hrules start out just like lists satisfy isBulletListMarker spaceChar skipSpaces -anyOrderedListStart :: GenParser Char ParserState (Int, ListNumberStyle, ListNumberDelim) +anyOrderedListStart :: Parser [Char] ParserState (Int, ListNumberStyle, ListNumberDelim) anyOrderedListStart = try $ do optional newline -- if preceded by a Plain block in a list context skipNonindentSpaces notFollowedBy $ string "p." >> spaceChar >> digit -- page number - state <- getState - if stateStrict state - then do many1 digit - char '.' - spaceChar - return (1, DefaultStyle, DefaultDelim) - else do (num, style, delim) <- anyOrderedListMarker - -- if it could be an abbreviated first name, insist on more than one space - if delim == Period && (style == UpperAlpha || (style == UpperRoman && - num `elem` [1, 5, 10, 50, 100, 500, 1000])) - then char '\t' <|> (try $ char ' ' >> spaceChar) - else spaceChar - skipSpaces - return (num, style, delim) - -listStart :: GenParser Char ParserState () + (guardDisabled Ext_fancy_lists >> + do many1 digit + char '.' + spaceChar + return (1, DefaultStyle, DefaultDelim)) + <|> do (num, style, delim) <- anyOrderedListMarker + -- if it could be an abbreviated first name, insist on more than one space + if delim == Period && (style == UpperAlpha || (style == UpperRoman && + num `elem` [1, 5, 10, 50, 100, 500, 1000])) + then char '\t' <|> (try $ char ' ' >> spaceChar) + else spaceChar + skipSpaces + return (num, style, delim) + +listStart :: Parser [Char] ParserState () listStart = bulletListStart <|> (anyOrderedListStart >> return ()) -- parse a line of a list item (start = parser for beginning of list item) -listLine :: GenParser Char ParserState [Char] +listLine :: Parser [Char] ParserState String listLine = try $ do notFollowedBy blankline notFollowedBy' (do indentSpaces @@ -562,8 +576,8 @@ listLine = try $ do return $ concat chunks ++ "\n" -- parse raw text for one list item, excluding start marker and continuations -rawListItem :: GenParser Char ParserState a - -> GenParser Char ParserState [Char] +rawListItem :: Parser [Char] ParserState a + -> Parser [Char] ParserState String rawListItem start = try $ do start first <- listLine @@ -571,17 +585,17 @@ rawListItem start = try $ do blanks <- many blankline return $ concat (first:rest) ++ blanks --- continuation of a list item - indented and separated by blankline +-- continuation of a list item - indented and separated by blankline -- or (in compact lists) endline. -- note: nested lists are parsed as continuations -listContinuation :: GenParser Char ParserState [Char] +listContinuation :: Parser [Char] ParserState String listContinuation = try $ do lookAhead indentSpaces result <- many1 listContinuationLine blanks <- many blankline return $ concat result ++ blanks -listContinuationLine :: GenParser Char ParserState [Char] +listContinuationLine :: Parser [Char] ParserState String listContinuationLine = try $ do notFollowedBy blankline notFollowedBy' listStart @@ -589,8 +603,8 @@ listContinuationLine = try $ do result <- manyTill anyChar newline return $ result ++ "\n" -listItem :: GenParser Char ParserState a - -> GenParser Char ParserState [Block] +listItem :: Parser [Char] ParserState a + -> Parser [Char] ParserState (F Blocks) listItem start = try $ do first <- rawListItem start continuations <- many listContinuation @@ -606,38 +620,59 @@ listItem start = try $ do updateState (\st -> st {stateParserContext = oldContext}) return contents -orderedList :: GenParser Char ParserState Block +orderedList :: Parser [Char] ParserState (F Blocks) orderedList = try $ do (start, style, delim) <- lookAhead anyOrderedListStart - items <- many1 $ listItem $ try $ - do optional newline -- if preceded by a Plain block in a list context - skipNonindentSpaces - orderedListMarker style delim - return $ OrderedList (start, style, delim) $ compactify items - -bulletList :: GenParser Char ParserState Block -bulletList = - many1 (listItem bulletListStart) >>= return . BulletList . compactify + unless ((style == DefaultStyle || style == Decimal || style == Example) && + (delim == DefaultDelim || delim == Period)) $ + guardEnabled Ext_fancy_lists + when (style == Example) $ guardEnabled Ext_example_lists + items <- fmap sequence $ many1 $ listItem + ( try $ do + optional newline -- if preceded by Plain block in a list + skipNonindentSpaces + orderedListMarker style delim ) + start' <- option 1 $ guardEnabled Ext_startnum >> return start + return $ B.orderedListWith (start', style, delim) <$> fmap compactify items + +-- | Change final list item from @Para@ to @Plain@ if the list contains +-- no other @Para@ blocks. (From Shared, modified for Blocks rather than [Block].) +compactify :: [Blocks] -- ^ List of list items (each a list of blocks) + -> [Blocks] +compactify [] = [] +compactify items = + let (others, final) = (init items, last items) + in case reverse (B.toList final) of + (Para a:xs) -> case [Para x | Para x <- concatMap B.toList items] of + -- if this is only Para, change to Plain + [_] -> others ++ [B.fromList (reverse $ Plain a : xs)] + _ -> items + _ -> items + +bulletList :: Parser [Char] ParserState (F Blocks) +bulletList = do + items <- fmap sequence $ many1 $ listItem bulletListStart + return $ B.bulletList <$> fmap compactify items -- definition lists -defListMarker :: GenParser Char ParserState () +defListMarker :: Parser [Char] ParserState () defListMarker = do sps <- nonindentSpaces char ':' <|> char '~' - st <- getState - let tabStop = stateTabStop st + tabStop <- getOption readerTabStop let remaining = tabStop - (length sps + 1) if remaining > 0 then count remaining (char ' ') <|> string "\t" - else pzero + else mzero return () -definitionListItem :: GenParser Char ParserState ([Inline], [[Block]]) +definitionListItem :: Parser [Char] ParserState (F (Inlines, [Blocks])) definitionListItem = try $ do + guardEnabled Ext_definition_lists -- first, see if this has any chance of being a definition list: lookAhead (anyLine >> optional blankline >> defListMarker) - term <- manyTill inline newline + term <- trimInlinesF . mconcat <$> manyTill inline newline optional blankline raw <- many1 defRawBlock state <- getState @@ -645,9 +680,9 @@ definitionListItem = try $ do -- parse the extracted block, which may contain various block elements: contents <- mapM (parseFromString parseBlocks) raw updateState (\st -> st {stateParserContext = oldContext}) - return ((normalizeSpaces term), contents) + return $ liftM2 (,) term (sequence contents) -defRawBlock :: GenParser Char ParserState [Char] +defRawBlock :: Parser [Char] ParserState String defRawBlock = try $ do defListMarker firstline <- anyLine @@ -659,119 +694,149 @@ defRawBlock = try $ do return $ unlines lns ++ trl return $ firstline ++ "\n" ++ unlines rawlines ++ trailing ++ cont -definitionList :: GenParser Char ParserState Block +definitionList :: Parser [Char] ParserState (F Blocks) definitionList = do - items <- many1 definitionListItem - -- "compactify" the definition list: - let defs = map snd items - let defBlocks = reverse $ concat $ concat defs - let isPara (Para _) = True + items <- fmap sequence $ many1 definitionListItem + return $ B.definitionList <$> fmap compactifyDL items + +compactifyDL :: [(Inlines, [Blocks])] -> [(Inlines, [Blocks])] +compactifyDL items = + let defs = concatMap snd items + defBlocks = reverse $ concatMap B.toList defs + isPara (Para _) = True isPara _ = False - let items' = case take 1 defBlocks of - [Para x] -> if not $ any isPara (drop 1 defBlocks) - then let (t,ds) = last items - lastDef = last ds - ds' = init ds ++ - [init lastDef ++ [Plain x]] - in init items ++ [(t, ds')] - else items - _ -> items - return $ DefinitionList items' + in case defBlocks of + (Para x:_) -> if not $ any isPara (drop 1 defBlocks) + then let (t,ds) = last items + lastDef = B.toList $ last ds + ds' = init ds ++ + [B.fromList $ init lastDef ++ [Plain x]] + in init items ++ [(t, ds')] + else items + _ -> items -- -- paragraph block -- +{- isHtmlOrBlank :: Inline -> Bool isHtmlOrBlank (RawInline "html" _) = True isHtmlOrBlank (Space) = True isHtmlOrBlank (LineBreak) = True isHtmlOrBlank _ = False +-} -para :: GenParser Char ParserState Block -para = try $ do - result <- liftM normalizeSpaces $ many1 inline - guard $ not . all isHtmlOrBlank $ result - option (Plain result) $ try $ do +para :: Parser [Char] ParserState (F Blocks) +para = try $ do + result <- trimInlinesF . mconcat <$> many1 inline + -- TODO remove this if not really needed? and remove isHtmlOrBlank + -- guard $ not $ F.all isHtmlOrBlank result + option (B.plain <$> result) $ try $ do newline - blanklines <|> - (getState >>= guard . stateStrict >> - lookAhead (blockQuote <|> header) >> return "") - return $ Para result + (blanklines >> return mempty) + <|> (guardDisabled Ext_blank_before_blockquote >> lookAhead blockQuote) + <|> (guardDisabled Ext_blank_before_header >> lookAhead header) + return $ B.para <$> result -plain :: GenParser Char ParserState Block -plain = many1 inline >>~ spaces >>= return . Plain . normalizeSpaces +plain :: Parser [Char] ParserState (F Blocks) +plain = fmap B.plain . trimInlinesF . mconcat <$> many1 inline <* spaces --- +-- -- raw html -- -htmlElement :: GenParser Char ParserState [Char] +htmlElement :: Parser [Char] ParserState String htmlElement = strictHtmlBlock <|> liftM snd (htmlTag isBlockTag) -htmlBlock :: GenParser Char ParserState Block -htmlBlock = try $ do - failUnlessBeginningOfLine +htmlBlock :: Parser [Char] ParserState (F Blocks) +htmlBlock = do + guardEnabled Ext_raw_html + res <- (guardEnabled Ext_markdown_in_html_blocks >> rawHtmlBlocks) + <|> htmlBlock' + return $ return $ B.rawBlock "html" res + +htmlBlock' :: Parser [Char] ParserState String +htmlBlock' = try $ do first <- htmlElement finalSpace <- many spaceChar finalNewlines <- many newline - return $ RawBlock "html" $ first ++ finalSpace ++ finalNewlines + return $ first ++ finalSpace ++ finalNewlines -strictHtmlBlock :: GenParser Char ParserState [Char] -strictHtmlBlock = do - failUnlessBeginningOfLine - htmlInBalanced (not . isInlineTag) +strictHtmlBlock :: Parser [Char] ParserState String +strictHtmlBlock = htmlInBalanced (not . isInlineTag) -rawVerbatimBlock :: GenParser Char ParserState String +rawVerbatimBlock :: Parser [Char] ParserState String rawVerbatimBlock = try $ do (TagOpen tag _, open) <- htmlTag (tagOpen (\t -> - t == "pre" || t == "style" || t == "script") - (const True)) + t == "pre" || t == "style" || t == "script") + (const True)) contents <- manyTill anyChar (htmlTag (~== TagClose tag)) return $ open ++ contents ++ renderTags [TagClose tag] -rawTeXBlock :: GenParser Char ParserState Block +rawTeXBlock :: Parser [Char] ParserState (F Blocks) rawTeXBlock = do - failIfStrict - result <- liftM (RawBlock "latex") rawLaTeXBlock - <|> liftM (RawBlock "context") rawConTeXtEnvironment + guardEnabled Ext_raw_tex + result <- (B.rawBlock "latex" <$> rawLaTeXBlock) + <|> (B.rawBlock "context" <$> rawConTeXtEnvironment) spaces - return result + return $ return result -rawHtmlBlocks :: GenParser Char ParserState Block +rawHtmlBlocks :: Parser [Char] ParserState String rawHtmlBlocks = do - htmlBlocks <- many1 $ do blk <- rawVerbatimBlock <|> - liftM snd (htmlTag isBlockTag) - sps <- do sp1 <- many spaceChar - sp2 <- option "" (blankline >> return "\n") - sp3 <- many spaceChar - sp4 <- option "" blanklines - return $ sp1 ++ sp2 ++ sp3 ++ sp4 - -- note: we want raw html to be able to - -- precede a code block, when separated - -- by a blank line - return $ blk ++ sps + htmlBlocks <- many1 $ try $ do + s <- rawVerbatimBlock <|> try ( + do (t,raw) <- htmlTag isBlockTag + exts <- getOption readerExtensions + -- if open tag, need markdown="1" if + -- markdown_attributes extension is set + case t of + TagOpen _ as + | Ext_markdown_attribute `Set.member` + exts -> + if "markdown" `notElem` + map fst as + then mzero + else return $ + stripMarkdownAttribute raw + | otherwise -> return raw + _ -> return raw ) + sps <- do sp1 <- many spaceChar + sp2 <- option "" (blankline >> return "\n") + sp3 <- many spaceChar + sp4 <- option "" blanklines + return $ sp1 ++ sp2 ++ sp3 ++ sp4 + -- note: we want raw html to be able to + -- precede a code block, when separated + -- by a blank line + return $ s ++ sps let combined = concat htmlBlocks - let combined' = if last combined == '\n' then init combined else combined - return $ RawBlock "html" combined' + return $ if last combined == '\n' then init combined else combined + +-- remove markdown="1" attribute +stripMarkdownAttribute :: String -> String +stripMarkdownAttribute s = renderTags' $ map filterAttrib $ parseTags s + where filterAttrib (TagOpen t as) = TagOpen t + [(k,v) | (k,v) <- as, k /= "markdown"] + filterAttrib x = x -- -- Tables --- +-- -- Parse a dashed line with optional trailing spaces; return its length -- and the length including trailing space. -dashedLine :: Char - -> GenParser Char st (Int, Int) +dashedLine :: Char + -> Parser [Char] st (Int, Int) dashedLine ch = do dashes <- many1 (char ch) sp <- many spaceChar return $ (length dashes, length $ dashes ++ sp) --- Parse a table header with dashed lines of '-' preceded by +-- Parse a table header with dashed lines of '-' preceded by -- one (or zero) line of text. -simpleTableHeader :: Bool -- ^ Headerless table - -> GenParser Char ParserState ([[Block]], [Alignment], [Int]) +simpleTableHeader :: Bool -- ^ Headerless table + -> Parser [Char] ParserState (F [Blocks], [Alignment], [Int]) simpleTableHeader headless = try $ do rawContent <- if headless then return "" @@ -784,84 +849,104 @@ simpleTableHeader headless = try $ do -- If no header, calculate alignment on basis of first row of text rawHeads <- liftM (tail . splitStringByIndices (init indices)) $ if headless - then lookAhead anyLine + then lookAhead anyLine else return rawContent let aligns = zipWith alignType (map (\a -> [a]) rawHeads) lengths let rawHeads' = if headless then replicate (length dashes) "" - else rawHeads - heads <- mapM (parseFromString (many plain)) $ - map removeLeadingTrailingSpace rawHeads' + else rawHeads + heads <- fmap sequence + $ mapM (parseFromString (mconcat <$> many plain)) + $ map removeLeadingTrailingSpace rawHeads' return (heads, aligns, indices) +-- Returns an alignment type for a table, based on a list of strings +-- (the rows of the column header) and a number (the length of the +-- dashed line under the rows. +alignType :: [String] + -> Int + -> Alignment +alignType [] _ = AlignDefault +alignType strLst len = + let nonempties = filter (not . null) $ map removeTrailingSpace strLst + (leftSpace, rightSpace) = + case sortBy (comparing length) nonempties of + (x:_) -> (head x `elem` " \t", length x < len) + [] -> (False, False) + in case (leftSpace, rightSpace) of + (True, False) -> AlignRight + (False, True) -> AlignLeft + (True, True) -> AlignCenter + (False, False) -> AlignDefault + -- Parse a table footer - dashed lines followed by blank line. -tableFooter :: GenParser Char ParserState [Char] +tableFooter :: Parser [Char] ParserState String tableFooter = try $ skipNonindentSpaces >> many1 (dashedLine '-') >> blanklines -- Parse a table separator - dashed line. -tableSep :: GenParser Char ParserState Char +tableSep :: Parser [Char] ParserState Char tableSep = try $ skipNonindentSpaces >> many1 (dashedLine '-') >> char '\n' -- Parse a raw line and split it into chunks by indices. rawTableLine :: [Int] - -> GenParser Char ParserState [String] + -> Parser [Char] ParserState [String] rawTableLine indices = do notFollowedBy' (blanklines <|> tableFooter) line <- many1Till anyChar newline - return $ map removeLeadingTrailingSpace $ tail $ + return $ map removeLeadingTrailingSpace $ tail $ splitStringByIndices (init indices) line -- Parse a table line and return a list of lists of blocks (columns). tableLine :: [Int] - -> GenParser Char ParserState [[Block]] -tableLine indices = rawTableLine indices >>= mapM (parseFromString (many plain)) + -> Parser [Char] ParserState (F [Blocks]) +tableLine indices = rawTableLine indices >>= + fmap sequence . mapM (parseFromString (mconcat <$> many plain)) -- Parse a multiline table row and return a list of blocks (columns). multilineRow :: [Int] - -> GenParser Char ParserState [[Block]] + -> Parser [Char] ParserState (F [Blocks]) multilineRow indices = do colLines <- many1 (rawTableLine indices) let cols = map unlines $ transpose colLines - mapM (parseFromString (many plain)) cols + fmap sequence $ mapM (parseFromString (mconcat <$> many plain)) cols -- Parses a table caption: inlines beginning with 'Table:' -- and followed by blank lines. -tableCaption :: GenParser Char ParserState [Inline] +tableCaption :: Parser [Char] ParserState (F Inlines) tableCaption = try $ do + guardEnabled Ext_table_captions skipNonindentSpaces string ":" <|> string "Table:" - result <- many1 inline - blanklines - return $ normalizeSpaces result + trimInlinesF . mconcat <$> many1 inline <* blanklines -- Parse a simple table with '---' header and one line per row. simpleTable :: Bool -- ^ Headerless table - -> GenParser Char ParserState Block + -> Parser [Char] ParserState ([Alignment], [Double], F [Blocks], F [[Blocks]]) simpleTable headless = do - Table c a _w h l <- tableWith (simpleTableHeader headless) tableLine + (aligns, _widths, heads', lines') <- + tableWith (simpleTableHeader headless) tableLine (return ()) (if headless then tableFooter else tableFooter <|> blanklines) - tableCaption -- Simple tables get 0s for relative column widths (i.e., use default) - return $ Table c a (replicate (length a) 0) h l + return (aligns, replicate (length aligns) 0, heads', lines') -- Parse a multiline table: starts with row of '-' on top, then header -- (which may be multiline), then the rows, -- which may be multiline, separated by blank lines, and -- ending with a footer (dashed line followed by blank line). multilineTable :: Bool -- ^ Headerless table - -> GenParser Char ParserState Block + -> Parser [Char] ParserState ([Alignment], [Double], F [Blocks], F [[Blocks]]) multilineTable headless = - tableWith (multilineTableHeader headless) multilineRow blanklines tableFooter tableCaption + tableWith (multilineTableHeader headless) multilineRow blanklines tableFooter multilineTableHeader :: Bool -- ^ Headerless table - -> GenParser Char ParserState ([[Block]], [Alignment], [Int]) + -> Parser [Char] ParserState (F [Blocks], [Alignment], [Int]) multilineTableHeader headless = try $ do if headless then return '\n' else tableSep >>~ notFollowedBy blankline rawContent <- if headless - then return $ repeat "" + then return $ repeat "" else many1 (notFollowedBy tableSep >> many1Till anyChar newline) initSp <- nonindentSpaces @@ -872,54 +957,206 @@ multilineTableHeader headless = try $ do rawHeadsList <- if headless then liftM (map (:[]) . tail . splitStringByIndices (init indices)) $ lookAhead anyLine - else return $ transpose $ map + else return $ transpose $ map (\ln -> tail $ splitStringByIndices (init indices) ln) rawContent let aligns = zipWith alignType rawHeadsList lengths let rawHeads = if headless then replicate (length dashes) "" else map (intercalate " ") rawHeadsList - heads <- mapM (parseFromString (many plain)) $ + heads <- fmap sequence $ + mapM (parseFromString (mconcat <$> many plain)) $ map removeLeadingTrailingSpace rawHeads return (heads, aligns, indices) --- Returns an alignment type for a table, based on a list of strings --- (the rows of the column header) and a number (the length of the --- dashed line under the rows. -alignType :: [String] - -> Int - -> Alignment -alignType [] _ = AlignDefault -alignType strLst len = - let nonempties = filter (not . null) $ map removeTrailingSpace strLst - (leftSpace, rightSpace) = - case sortBy (comparing length) nonempties of - (x:_) -> (head x `elem` " \t", length x < len) - [] -> (False, False) - in case (leftSpace, rightSpace) of - (True, False) -> AlignRight - (False, True) -> AlignLeft - (True, True) -> AlignCenter - (False, False) -> AlignDefault - +-- Parse a grid table: starts with row of '-' on top, then header +-- (which may be grid), then the rows, +-- which may be grid, separated by blank lines, and +-- ending with a footer (dashed line followed by blank line). gridTable :: Bool -- ^ Headerless table - -> GenParser Char ParserState Block -gridTable = gridTableWith block tableCaption + -> Parser [Char] ParserState ([Alignment], [Double], F [Blocks], F [[Blocks]]) +gridTable headless = + tableWith (gridTableHeader headless) gridTableRow + (gridTableSep '-') gridTableFooter + +gridTableSplitLine :: [Int] -> String -> [String] +gridTableSplitLine indices line = map removeFinalBar $ tail $ + splitStringByIndices (init indices) $ removeTrailingSpace line + +gridPart :: Char -> Parser [Char] st (Int, Int) +gridPart ch = do + dashes <- many1 (char ch) + char '+' + return (length dashes, length dashes + 1) -table :: GenParser Char ParserState Block -table = multilineTable False <|> simpleTable True <|> - simpleTable False <|> multilineTable True <|> - gridTable False <|> gridTable True <?> "table" +gridDashedLines :: Char -> Parser [Char] st [(Int,Int)] +gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) >>~ blankline + +removeFinalBar :: String -> String +removeFinalBar = + reverse . dropWhile (`elem` " \t") . dropWhile (=='|') . reverse + +-- | Separator between rows of grid table. +gridTableSep :: Char -> Parser [Char] ParserState Char +gridTableSep ch = try $ gridDashedLines ch >> return '\n' + +-- | Parse header for a grid table. +gridTableHeader :: Bool -- ^ Headerless table + -> Parser [Char] ParserState (F [Blocks], [Alignment], [Int]) +gridTableHeader headless = try $ do + optional blanklines + dashes <- gridDashedLines '-' + rawContent <- if headless + then return $ repeat "" + else many1 + (notFollowedBy (gridTableSep '=') >> char '|' >> + many1Till anyChar newline) + if headless + then return () + else gridTableSep '=' >> return () + let lines' = map snd dashes + let indices = scanl (+) 0 lines' + let aligns = replicate (length lines') AlignDefault + -- RST does not have a notion of alignments + let rawHeads = if headless + then replicate (length dashes) "" + else map (intercalate " ") $ transpose + $ map (gridTableSplitLine indices) rawContent + heads <- fmap sequence $ mapM (parseFromString block) $ + map removeLeadingTrailingSpace rawHeads + return (heads, aligns, indices) --- +gridTableRawLine :: [Int] -> Parser [Char] ParserState [String] +gridTableRawLine indices = do + char '|' + line <- many1Till anyChar newline + return (gridTableSplitLine indices line) + +-- | Parse row of grid table. +gridTableRow :: [Int] + -> Parser [Char] ParserState (F [Blocks]) +gridTableRow indices = do + colLines <- many1 (gridTableRawLine indices) + let cols = map ((++ "\n") . unlines . removeOneLeadingSpace) $ + transpose colLines + fmap compactify <$> fmap sequence (mapM (parseFromString block) cols) + +removeOneLeadingSpace :: [String] -> [String] +removeOneLeadingSpace xs = + if all startsWithSpace xs + then map (drop 1) xs + else xs + where startsWithSpace "" = True + startsWithSpace (y:_) = y == ' ' + +-- | Parse footer for a grid table. +gridTableFooter :: Parser [Char] ParserState [Char] +gridTableFooter = blanklines + +pipeTable :: Parser [Char] ParserState ([Alignment], [Double], F [Blocks], F [[Blocks]]) +pipeTable = try $ do + let pipeBreak = nonindentSpaces *> optional (char '|') *> + pipeTableHeaderPart `sepBy1` sepPipe <* + optional (char '|') <* blankline + (heads,aligns) <- try ( pipeBreak >>= \als -> + return (return $ replicate (length als) mempty, als)) + <|> ( pipeTableRow >>= \row -> pipeBreak >>= \als -> + + return (row, als) ) + lines' <- sequence <$> many1 pipeTableRow + blanklines + let widths = replicate (length aligns) 0.0 + return $ (aligns, widths, heads, lines') + +sepPipe :: Parser [Char] ParserState () +sepPipe = try $ do + char '|' <|> char '+' + notFollowedBy blankline + +-- parse a row, also returning probable alignments for org-table cells +pipeTableRow :: Parser [Char] ParserState (F [Blocks]) +pipeTableRow = do + nonindentSpaces + optional (char '|') + let cell = mconcat <$> + many (notFollowedBy (blankline <|> char '|') >> inline) + first <- cell + sepPipe + rest <- cell `sepBy1` sepPipe + optional (char '|') + blankline + let cells = sequence (first:rest) + return $ do + cells' <- cells + return $ map + (\ils -> + case trimInlines ils of + ils' | B.isNull ils' -> mempty + | otherwise -> B.plain $ ils') cells' + +pipeTableHeaderPart :: Parser [Char] st Alignment +pipeTableHeaderPart = do + left <- optionMaybe (char ':') + many1 (char '-') + right <- optionMaybe (char ':') + return $ + case (left,right) of + (Nothing,Nothing) -> AlignDefault + (Just _,Nothing) -> AlignLeft + (Nothing,Just _) -> AlignRight + (Just _,Just _) -> AlignCenter + +-- Succeed only if current line contains a pipe. +scanForPipe :: Parser [Char] st () +scanForPipe = lookAhead (manyTill (satisfy (/='\n')) (char '|')) >> return () + +-- | Parse a table using 'headerParser', 'rowParser', +-- 'lineParser', and 'footerParser'. Variant of the version in +-- Text.Pandoc.Parsing. +tableWith :: Parser [Char] ParserState (F [Blocks], [Alignment], [Int]) + -> ([Int] -> Parser [Char] ParserState (F [Blocks])) + -> Parser [Char] ParserState sep + -> Parser [Char] ParserState end + -> Parser [Char] ParserState ([Alignment], [Double], F [Blocks], F [[Blocks]]) +tableWith headerParser rowParser lineParser footerParser = try $ do + (heads, aligns, indices) <- headerParser + lines' <- fmap sequence $ rowParser indices `sepEndBy1` lineParser + footerParser + numColumns <- getOption readerColumns + let widths = if (indices == []) + then replicate (length aligns) 0.0 + else widthsFromIndices numColumns indices + return $ (aligns, widths, heads, lines') + +table :: Parser [Char] ParserState (F Blocks) +table = try $ do + frontCaption <- option Nothing (Just <$> tableCaption) + (aligns, widths, heads, lns) <- + try (guardEnabled Ext_pipe_tables >> scanForPipe >> pipeTable) <|> + try (guardEnabled Ext_multiline_tables >> + multilineTable False) <|> + try (guardEnabled Ext_simple_tables >> + (simpleTable True <|> simpleTable False)) <|> + try (guardEnabled Ext_multiline_tables >> + multilineTable True) <|> + try (guardEnabled Ext_grid_tables >> + (gridTable False <|> gridTable True)) <?> "table" + optional blanklines + caption <- case frontCaption of + Nothing -> option (return mempty) tableCaption + Just c -> return c + return $ do + caption' <- caption + heads' <- heads + lns' <- lns + return $ B.table caption' (zip aligns widths) heads' lns' + +-- -- inline -- -inline :: GenParser Char ParserState Inline -inline = choice inlineParsers <?> "inline" - -inlineParsers :: [GenParser Char ParserState Inline] -inlineParsers = [ whitespace +inline :: Parser [Char] ParserState (F Inlines) +inline = choice [ whitespace , str , endline , code @@ -927,8 +1164,8 @@ inlineParsers = [ whitespace , strong , emph , note - , link , cite + , link , image , math , strikeout @@ -940,115 +1177,127 @@ inlineParsers = [ whitespace , escapedChar , rawLaTeXInline' , exampleRef - , smartPunctuation inline - , charRef + , smart + , return . B.singleton <$> charRef , symbol - , ltSign ] + , ltSign + ] <?> "inline" -escapedChar' :: GenParser Char ParserState Char +escapedChar' :: Parser [Char] ParserState Char escapedChar' = try $ do char '\\' - state <- getState - if stateStrict state - then oneOf "\\`*_{}[]()>#+-.!~" - else satisfy (not . isAlphaNum) + (guardEnabled Ext_all_symbols_escapable >> satisfy (not . isAlphaNum)) + <|> oneOf "\\`*_{}[]()>#+-.!~" -escapedChar :: GenParser Char ParserState Inline +escapedChar :: Parser [Char] ParserState (F Inlines) escapedChar = do result <- escapedChar' - return $ case result of - ' ' -> Str "\160" -- "\ " is a nonbreaking space - '\n' -> LineBreak -- "\[newline]" is a linebreak - _ -> Str [result] + case result of + ' ' -> return $ return $ B.str "\160" -- "\ " is a nonbreaking space + '\n' -> guardEnabled Ext_escaped_line_breaks >> + return (return B.linebreak) -- "\[newline]" is a linebreak + _ -> return $ return $ B.str [result] -ltSign :: GenParser Char ParserState Inline +ltSign :: Parser [Char] ParserState (F Inlines) ltSign = do - st <- getState - if stateStrict st - then char '<' - else notFollowedBy' rawHtmlBlocks >> char '<' -- unless it starts html - return $ Str ['<'] + guardDisabled Ext_raw_html + <|> guardDisabled Ext_markdown_in_html_blocks + <|> (notFollowedBy' rawHtmlBlocks >> return ()) + char '<' + return $ return $ B.str "<" -exampleRef :: GenParser Char ParserState Inline +exampleRef :: Parser [Char] ParserState (F Inlines) exampleRef = try $ do + guardEnabled Ext_example_lists char '@' lab <- many1 (alphaNum <|> oneOf "-_") - -- We just return a Str. These are replaced with numbers - -- later. See the end of parseMarkdown. - return $ Str $ '@' : lab - -symbol :: GenParser Char ParserState Inline -symbol = do + return $ do + st <- askF + return $ case M.lookup lab (stateExamples st) of + Just n -> B.str (show n) + Nothing -> B.str ('@':lab) + +symbol :: Parser [Char] ParserState (F Inlines) +symbol = do result <- noneOf "<\\\n\t " <|> try (do lookAhead $ char '\\' - notFollowedBy' rawTeXBlock + notFollowedBy' (() <$ rawTeXBlock) char '\\') - return $ Str [result] + return $ return $ B.str [result] -- parses inline code, between n `s and n `s -code :: GenParser Char ParserState Inline -code = try $ do +code :: Parser [Char] ParserState (F Inlines) +code = try $ do starts <- many1 (char '`') skipSpaces result <- many1Till (many1 (noneOf "`\n") <|> many1 (char '`') <|> (char '\n' >> notFollowedBy' blankline >> return " ")) - (try (skipSpaces >> count (length starts) (char '`') >> + (try (skipSpaces >> count (length starts) (char '`') >> notFollowedBy (char '`'))) - attr <- option ([],[],[]) (try $ optional whitespace >> attributes) - return $ Code attr $ removeLeadingTrailingSpace $ concat result - -mathWord :: GenParser Char st [Char] -mathWord = liftM concat $ many1 mathChunk - -mathChunk :: GenParser Char st [Char] -mathChunk = do char '\\' - c <- anyChar - return ['\\',c] - <|> many1 (satisfy $ \c -> not (isBlank c || c == '\\' || c == '$')) - -math :: GenParser Char ParserState Inline -math = (mathDisplay >>= applyMacros' >>= return . Math DisplayMath) - <|> (mathInline >>= applyMacros' >>= return . Math InlineMath) - -mathDisplay :: GenParser Char ParserState String -mathDisplay = try $ do - failIfStrict - string "$$" - many1Till (noneOf "\n" <|> (newline >>~ notFollowedBy' blankline)) (try $ string "$$") - -mathInline :: GenParser Char ParserState String -mathInline = try $ do - failIfStrict - char '$' + attr <- option ([],[],[]) (try $ guardEnabled Ext_inline_code_attributes >> + optional whitespace >> attributes) + return $ return $ B.codeWith attr $ removeLeadingTrailingSpace $ concat result + +math :: Parser [Char] ParserState (F Inlines) +math = (return . B.displayMath <$> (mathDisplay >>= applyMacros')) + <|> (return . B.math <$> (mathInline >>= applyMacros')) + +mathDisplay :: Parser [Char] ParserState String +mathDisplay = + (guardEnabled Ext_tex_math_dollars >> mathDisplayWith "$$" "$$") + <|> (guardEnabled Ext_tex_math_single_backslash >> + mathDisplayWith "\\[" "\\]") + <|> (guardEnabled Ext_tex_math_double_backslash >> + mathDisplayWith "\\\\[" "\\\\]") + +mathDisplayWith :: String -> String -> Parser [Char] ParserState String +mathDisplayWith op cl = try $ do + string op + many1Till (noneOf "\n" <|> (newline >>~ notFollowedBy' blankline)) (try $ string cl) + +mathInline :: Parser [Char] ParserState String +mathInline = + (guardEnabled Ext_tex_math_dollars >> mathInlineWith "$" "$") + <|> (guardEnabled Ext_tex_math_single_backslash >> + mathInlineWith "\\(" "\\)") + <|> (guardEnabled Ext_tex_math_double_backslash >> + mathInlineWith "\\\\(" "\\\\)") + +mathInlineWith :: String -> String -> Parser [Char] ParserState String +mathInlineWith op cl = try $ do + string op notFollowedBy space - words' <- sepBy1 mathWord (many1 (spaceChar <|> (newline >>~ notFollowedBy' blankline))) - char '$' - notFollowedBy digit - return $ intercalate " " words' + words' <- many1Till (count 1 (noneOf "\n\\") + <|> (char '\\' >> anyChar >>= \c -> return ['\\',c]) + <|> count 1 newline <* notFollowedBy' blankline + *> return " ") + (try $ string cl) + notFollowedBy digit -- to prevent capture of $5 + return $ concat words' -- to avoid performance problems, treat 4 or more _ or * or ~ or ^ in a row -- as a literal rather than attempting to parse for emph/strong/strikeout/super/sub -fours :: GenParser Char st Inline +fours :: Parser [Char] st (F Inlines) fours = try $ do x <- char '*' <|> char '_' <|> char '~' <|> char '^' count 2 $ satisfy (==x) rest <- many1 (satisfy (==x)) - return $ Str (x:x:x:rest) + return $ return $ B.str (x:x:x:rest) -- | Parses a list of inlines between start and end delimiters. inlinesBetween :: (Show b) - => GenParser Char ParserState a - -> GenParser Char ParserState b - -> GenParser Char ParserState [Inline] + => Parser [Char] ParserState a + -> Parser [Char] ParserState b + -> Parser [Char] ParserState (F Inlines) inlinesBetween start end = - normalizeSpaces `liftM` try (start >> many1Till inner end) - where inner = innerSpace <|> (notFollowedBy' whitespace >> inline) + (trimInlinesF . mconcat) <$> try (start >> many1Till inner end) + where inner = innerSpace <|> (notFollowedBy' (() <$ whitespace) >> inline) innerSpace = try $ whitespace >>~ notFollowedBy' end -- This is used to prevent exponential blowups for things like: -- a**a*a**a*a**a*a**a*a**a*a**a*a** -nested :: GenParser Char ParserState a - -> GenParser Char ParserState a +nested :: Parser [Char] ParserState a + -> Parser [Char] ParserState a nested p = do nestlevel <- stateMaxNestingLevel `fmap` getState guard $ nestlevel > 0 @@ -1057,54 +1306,57 @@ nested p = do updateState $ \st -> st{ stateMaxNestingLevel = nestlevel } return res -emph :: GenParser Char ParserState Inline -emph = Emph `fmap` nested +emph :: Parser [Char] ParserState (F Inlines) +emph = fmap B.emph <$> nested (inlinesBetween starStart starEnd <|> inlinesBetween ulStart ulEnd) where starStart = char '*' >> lookAhead nonspaceChar - starEnd = notFollowedBy' strong >> char '*' + starEnd = notFollowedBy' (() <$ strong) >> char '*' ulStart = char '_' >> lookAhead nonspaceChar - ulEnd = notFollowedBy' strong >> char '_' + ulEnd = notFollowedBy' (() <$ strong) >> char '_' -strong :: GenParser Char ParserState Inline -strong = Strong `liftM` nested +strong :: Parser [Char] ParserState (F Inlines) +strong = fmap B.strong <$> nested (inlinesBetween starStart starEnd <|> inlinesBetween ulStart ulEnd) where starStart = string "**" >> lookAhead nonspaceChar starEnd = try $ string "**" ulStart = string "__" >> lookAhead nonspaceChar ulEnd = try $ string "__" -strikeout :: GenParser Char ParserState Inline -strikeout = Strikeout `liftM` - (failIfStrict >> inlinesBetween strikeStart strikeEnd) +strikeout :: Parser [Char] ParserState (F Inlines) +strikeout = fmap B.strikeout <$> + (guardEnabled Ext_strikeout >> inlinesBetween strikeStart strikeEnd) where strikeStart = string "~~" >> lookAhead nonspaceChar >> notFollowedBy (char '~') strikeEnd = try $ string "~~" -superscript :: GenParser Char ParserState Inline -superscript = failIfStrict >> enclosed (char '^') (char '^') - (notFollowedBy spaceChar >> inline) >>= -- may not contain Space - return . Superscript +superscript :: Parser [Char] ParserState (F Inlines) +superscript = fmap B.superscript <$> try (do + guardEnabled Ext_superscript + char '^' + mconcat <$> many1Till (notFollowedBy spaceChar >> inline) (char '^')) -subscript :: GenParser Char ParserState Inline -subscript = failIfStrict >> enclosed (char '~') (char '~') - (notFollowedBy spaceChar >> inline) >>= -- may not contain Space - return . Subscript +subscript :: Parser [Char] ParserState (F Inlines) +subscript = fmap B.subscript <$> try (do + guardEnabled Ext_subscript + char '~' + mconcat <$> many1Till (notFollowedBy spaceChar >> inline) (char '~')) -whitespace :: GenParser Char ParserState Inline -whitespace = spaceChar >> - ( (spaceChar >> skipMany spaceChar >> option Space (endline >> return LineBreak)) - <|> (skipMany spaceChar >> return Space) ) <?> "whitespace" +whitespace :: Parser [Char] ParserState (F Inlines) +whitespace = spaceChar >> return <$> (lb <|> regsp) <?> "whitespace" + where lb = spaceChar >> skipMany spaceChar >> option B.space (endline >> return B.linebreak) + regsp = skipMany spaceChar >> return B.space -nonEndline :: GenParser Char st Char +nonEndline :: Parser [Char] st Char nonEndline = satisfy (/='\n') -str :: GenParser Char ParserState Inline +str :: Parser [Char] ParserState (F Inlines) str = do - smart <- stateSmart `fmap` getState + isSmart <- readerSmart . stateOptions <$> getState a <- alphaNum as <- many $ alphaNum - <|> (try $ char '_' >>~ lookAhead alphaNum) - <|> if smart + <|> (guardEnabled Ext_intraword_underscores >> + try (char '_' >>~ lookAhead alphaNum)) + <|> if isSmart then (try $ satisfy (\c -> c == '\'' || c == '\x2019') >> lookAhead alphaNum >> return '\x2019') -- for things like l'aide @@ -1113,15 +1365,16 @@ str = do updateState $ \s -> s{ stateLastStrPos = Just pos } let result = a:as let spacesToNbr = map (\c -> if c == ' ' then '\160' else c) - if smart + if isSmart then case likelyAbbrev result of - [] -> return $ Str result + [] -> return $ return $ B.str result xs -> choice (map (\x -> try (string x >> oneOf " \n" >> lookAhead alphaNum >> - return (Str $ result ++ spacesToNbr x ++ "\160"))) xs) - <|> (return $ Str result) - else return $ Str result + return (return $ B.str + $ result ++ spacesToNbr x ++ "\160"))) xs) + <|> (return $ return $ B.str result) + else return $ return $ B.str result -- | if the string matches the beginning of an abbreviation (before -- the first period, return strings that would finish the abbreviation. @@ -1136,39 +1389,38 @@ likelyAbbrev x = in map snd $ filter (\(y,_) -> y == x) abbrPairs -- an endline character that can be treated as a space, not a structural break -endline :: GenParser Char ParserState Inline +endline :: Parser [Char] ParserState (F Inlines) endline = try $ do newline notFollowedBy blankline - st <- getState - when (stateStrict st) $ do - notFollowedBy emailBlockQuoteStart - notFollowedBy (char '#') -- atx header + guardEnabled Ext_blank_before_blockquote <|> notFollowedBy emailBlockQuoteStart + guardEnabled Ext_blank_before_header <|> notFollowedBy (char '#') -- atx header -- parse potential list-starts differently if in a list: + st <- getState when (stateParserContext st == ListItemState) $ do notFollowedBy' bulletListStart notFollowedBy' anyOrderedListStart - return Space + (guardEnabled Ext_hard_line_breaks >> return (return B.linebreak)) + <|> (return $ return B.space) -- -- links -- -- a reference label for a link -reference :: GenParser Char ParserState [Inline] +reference :: Parser [Char] ParserState (F Inlines, String) reference = do notFollowedBy' (string "[^") -- footnote reference - result <- inlinesInBalancedBrackets inline - return $ normalizeSpaces result + withRaw $ trimInlinesF <$> inlinesInBalancedBrackets -- source for a link, with optional title -source :: GenParser Char ParserState (String, [Char]) +source :: Parser [Char] ParserState (String, String) source = (try $ charsInBalanced '(' ')' litChar >>= parseFromString source') <|> -- the following is needed for cases like: [ref](/url(a). (enclosed (char '(') (char ')') litChar >>= parseFromString source') -- auxiliary function for source -source' :: GenParser Char ParserState (String, [Char]) +source' :: Parser [Char] ParserState (String, String) source' = do skipSpaces let nl = char '\n' >>~ notFollowedBy blankline @@ -1186,7 +1438,7 @@ source' = do eof return (escapeURI $ removeTrailingSpace src, tit) -linkTitle :: GenParser Char ParserState String +linkTitle :: Parser [Char] ParserState String linkTitle = try $ do (many1 spaceChar >> option '\n' newline) <|> newline skipSpaces @@ -1194,78 +1446,88 @@ linkTitle = try $ do tit <- manyTill litChar (try (char delim >> skipSpaces >> eof)) return $ fromEntities tit -link :: GenParser Char ParserState Inline +link :: Parser [Char] ParserState (F Inlines) link = try $ do - lab <- reference - (src, tit) <- source <|> referenceLink lab - return $ Link (delinkify lab) (src, tit) - -delinkify :: [Inline] -> [Inline] -delinkify = bottomUp $ concatMap go - where go (Link lab _) = lab - go x = [x] + st <- getState + guard $ stateAllowLinks st + setState $ st{ stateAllowLinks = False } + (lab,raw) <- reference + setState $ st{ stateAllowLinks = True } + regLink B.link lab <|> referenceLink B.link (lab,raw) + +regLink :: (String -> String -> Inlines -> Inlines) + -> F Inlines -> Parser [Char] ParserState (F Inlines) +regLink constructor lab = try $ do + (src, tit) <- source + return $ constructor src tit <$> lab -- a link like [this][ref] or [this][] or [this] -referenceLink :: [Inline] - -> GenParser Char ParserState (String, [Char]) -referenceLink lab = do - ref <- option [] (try (optional (char ' ') >> - optional (newline >> skipSpaces) >> reference)) - let ref' = if null ref then lab else ref - state <- getState - case lookupKeySrc (stateKeys state) (toKey ref') of - Nothing -> fail "no corresponding key" - Just target -> return target - -autoLink :: GenParser Char ParserState Inline +referenceLink :: (String -> String -> Inlines -> Inlines) + -> (F Inlines, String) -> Parser [Char] ParserState (F Inlines) +referenceLink constructor (lab, raw) = do + raw' <- try (optional (char ' ') >> + optional (newline >> skipSpaces) >> + (snd <$> reference)) <|> return "" + let key = toKey $ if raw' == "[]" || raw' == "" then raw else raw' + let dropRB (']':xs) = xs + dropRB xs = xs + let dropLB ('[':xs) = xs + dropLB xs = xs + let dropBrackets = reverse . dropRB . reverse . dropLB + fallback <- parseFromString (mconcat <$> many inline) $ dropBrackets raw + return $ do + keys <- asksF stateKeys + case M.lookup key keys of + Nothing -> (\x -> B.str "[" <> x <> B.str "]" <> B.str raw') <$> fallback + Just (src,tit) -> constructor src tit <$> lab + +autoLink :: Parser [Char] ParserState (F Inlines) autoLink = try $ do char '<' (orig, src) <- uri <|> emailAddress char '>' - st <- getState - return $ if stateStrict st - then Link [Str orig] (src, "") - else Link [Code ("",["url"],[]) orig] (src, "") + (guardEnabled Ext_autolink_code_spans >> + return (return $ B.link src "" (B.codeWith ("",["url"],[]) orig))) + <|> return (return $ B.link src "" (B.str orig)) -image :: GenParser Char ParserState Inline +image :: Parser [Char] ParserState (F Inlines) image = try $ do char '!' - lab <- reference - (src, tit) <- source <|> referenceLink lab - return $ Image lab (src,tit) + (lab,raw) <- reference + regLink B.image lab <|> referenceLink B.image (lab,raw) -note :: GenParser Char ParserState Inline +note :: Parser [Char] ParserState (F Inlines) note = try $ do - failIfStrict + guardEnabled Ext_footnotes ref <- noteMarker - state <- getState - let notes = stateNotes state - case lookup ref notes of - Nothing -> fail "note not found" - Just raw -> do - -- We temporarily empty the note list while parsing the note, - -- so that we don't get infinite loops with notes inside notes... - -- Note references inside other notes do not work. - updateState $ \st -> st{ stateNotes = [] } - contents <- parseFromString parseBlocks raw - updateState $ \st -> st{ stateNotes = notes } - return $ Note contents - -inlineNote :: GenParser Char ParserState Inline + return $ do + notes <- asksF stateNotes' + case lookup ref notes of + Nothing -> return $ B.str $ "[^" ++ ref ++ "]" + Just contents -> do + st <- askF + -- process the note in a context that doesn't resolve + -- notes, to avoid infinite looping with notes inside + -- notes: + let contents' = runF contents st{ stateNotes' = [] } + return $ B.note contents' + +inlineNote :: Parser [Char] ParserState (F Inlines) inlineNote = try $ do - failIfStrict + guardEnabled Ext_inline_notes char '^' - contents <- inlinesInBalancedBrackets inline - return $ Note [Para contents] + contents <- inlinesInBalancedBrackets + return $ B.note . B.para <$> contents -rawLaTeXInline' :: GenParser Char ParserState Inline +rawLaTeXInline' :: Parser [Char] ParserState (F Inlines) rawLaTeXInline' = try $ do - failIfStrict + guardEnabled Ext_raw_tex lookAhead $ char '\\' >> notFollowedBy' (string "start") -- context env RawInline _ s <- rawLaTeXInline - return $ RawInline "tex" s -- "tex" because it might be context or latex + return $ return $ B.rawInline "tex" s + -- "tex" because it might be context or latex -rawConTeXtEnvironment :: GenParser Char st String +rawConTeXtEnvironment :: Parser [Char] st String rawConTeXtEnvironment = try $ do string "\\start" completion <- inBrackets (letter <|> digit <|> spaceChar) @@ -1274,37 +1536,33 @@ rawConTeXtEnvironment = try $ do (try $ string "\\stop" >> string completion) return $ "\\start" ++ completion ++ concat contents ++ "\\stop" ++ completion -inBrackets :: (GenParser Char st Char) -> GenParser Char st String +inBrackets :: (Parser [Char] st Char) -> Parser [Char] st String inBrackets parser = do char '[' contents <- many parser char ']' return $ "[" ++ contents ++ "]" -rawHtmlInline :: GenParser Char ParserState Inline +rawHtmlInline :: Parser [Char] ParserState (F Inlines) rawHtmlInline = do - st <- getState - (_,result) <- if stateStrict st - then htmlTag (not . isTextTag) - else htmlTag isInlineTag - return $ RawInline "html" result + guardEnabled Ext_raw_html + mdInHtml <- option False $ + guardEnabled Ext_markdown_in_html_blocks >> return True + (_,result) <- if mdInHtml + then htmlTag isInlineTag + else htmlTag (not . isTextTag) + return $ return $ B.rawInline "html" result -- Citations -cite :: GenParser Char ParserState Inline +cite :: Parser [Char] ParserState (F Inlines) cite = do - failIfStrict + guardEnabled Ext_citations + getOption readerCitations >>= guard . not . null citations <- textualCite <|> normalCite - return $ Cite citations [] + return $ flip B.cite mempty <$> citations -spnl :: GenParser Char st () -spnl = try $ do - skipSpaces - optional newline - skipSpaces - notFollowedBy (char '\n') - -textualCite :: GenParser Char ParserState [Citation] +textualCite :: Parser [Char] ParserState (F [Citation]) textualCite = try $ do (_, key) <- citeKey let first = Citation{ citationId = key @@ -1314,22 +1572,25 @@ textualCite = try $ do , citationNoteNum = 0 , citationHash = 0 } - rest <- option [] $ try $ spnl >> normalCite - if null rest - then option [first] $ bareloc first - else return $ first : rest + mbrest <- option Nothing $ try $ spnl >> Just <$> normalCite + case mbrest of + Just rest -> return $ (first:) <$> rest + Nothing -> option (return [first]) $ bareloc first -bareloc :: Citation -> GenParser Char ParserState [Citation] +bareloc :: Citation -> Parser [Char] ParserState (F [Citation]) bareloc c = try $ do spnl char '[' suff <- suffix - rest <- option [] $ try $ char ';' >> citeList + rest <- option (return []) $ try $ char ';' >> citeList spnl char ']' - return $ c{ citationSuffix = suff } : rest + return $ do + suff' <- suff + rest' <- rest + return $ c{ citationSuffix = B.toList suff' } : rest' -normalCite :: GenParser Char ParserState [Citation] +normalCite :: Parser [Char] ParserState (F [Citation]) normalCite = try $ do char '[' spnl @@ -1338,7 +1599,7 @@ normalCite = try $ do char ']' return citations -citeKey :: GenParser Char ParserState (Bool, String) +citeKey :: Parser [Char] ParserState (Bool, String) citeKey = try $ do suppress_author <- option False (char '-' >> return True) char '@' @@ -1346,34 +1607,37 @@ citeKey = try $ do let internal p = try $ p >>~ lookAhead (letter <|> digit) rest <- many $ letter <|> digit <|> internal (oneOf ":.#$%&-_?<>~") let key = first:rest - st <- getState - guard $ key `elem` stateCitations st + citations' <- getOption readerCitations + guard $ key `elem` citations' return (suppress_author, key) -suffix :: GenParser Char ParserState [Inline] +suffix :: Parser [Char] ParserState (F Inlines) suffix = try $ do hasSpace <- option False (notFollowedBy nonspaceChar >> return True) spnl - rest <- liftM normalizeSpaces $ many $ notFollowedBy (oneOf ";]") >> inline + rest <- trimInlinesF . mconcat <$> many (notFollowedBy (oneOf ";]") >> inline) return $ if hasSpace - then Space : rest + then (B.space <>) <$> rest else rest -prefix :: GenParser Char ParserState [Inline] -prefix = liftM normalizeSpaces $ +prefix :: Parser [Char] ParserState (F Inlines) +prefix = trimInlinesF . mconcat <$> manyTill inline (char ']' <|> liftM (const ']') (lookAhead citeKey)) -citeList :: GenParser Char ParserState [Citation] -citeList = sepBy1 citation (try $ char ';' >> spnl) +citeList :: Parser [Char] ParserState (F [Citation]) +citeList = fmap sequence $ sepBy1 citation (try $ char ';' >> spnl) -citation :: GenParser Char ParserState Citation +citation :: Parser [Char] ParserState (F Citation) citation = try $ do pref <- prefix (suppress_author, key) <- citeKey suff <- suffix - return $ Citation{ citationId = key - , citationPrefix = pref - , citationSuffix = suff + return $ do + x <- pref + y <- suff + return $ Citation{ citationId = key + , citationPrefix = B.toList x + , citationSuffix = B.toList y , citationMode = if suppress_author then SuppressAuthor else NormalCitation @@ -1381,3 +1645,22 @@ citation = try $ do , citationHash = 0 } +smart :: Parser [Char] ParserState (F Inlines) +smart = do + getOption readerSmart >>= guard + doubleQuoted <|> singleQuoted <|> + choice (map (return . B.singleton <$>) [apostrophe, dash, ellipses]) + +singleQuoted :: Parser [Char] ParserState (F Inlines) +singleQuoted = try $ do + singleQuoteStart + withQuoteContext InSingleQuote $ + fmap B.singleQuoted . trimInlinesF . mconcat <$> + many1Till inline singleQuoteEnd + +doubleQuoted :: Parser [Char] ParserState (F Inlines) +doubleQuoted = try $ do + doubleQuoteStart + withQuoteContext InDoubleQuote $ + fmap B.doubleQuoted . trimInlinesF . mconcat <$> + many1Till inline doubleQuoteEnd diff --git a/src/Text/Pandoc/Readers/Native.hs b/src/Text/Pandoc/Readers/Native.hs index 2c6fcc6e6..a0e5a0635 100644 --- a/src/Text/Pandoc/Readers/Native.hs +++ b/src/Text/Pandoc/Readers/Native.hs @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.Native Copyright : Copyright (C) 2011 John MacFarlane - License : GNU GPL, version 2 or above + License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> Stability : alpha @@ -31,6 +31,7 @@ Conversion of a string representation of a pandoc type (@Pandoc@, module Text.Pandoc.Readers.Native ( readNative ) where import Text.Pandoc.Definition +import Text.Pandoc.Shared (safeRead) nullMeta :: Meta nullMeta = Meta{ docTitle = [] @@ -51,31 +52,31 @@ nullMeta = Meta{ docTitle = [] readNative :: String -- ^ String to parse (assuming @'\n'@ line endings) -> Pandoc readNative s = - case reads s of - (d,_):_ -> d - [] -> Pandoc nullMeta $ readBlocks s + case safeRead s of + Just d -> d + Nothing -> Pandoc nullMeta $ readBlocks s readBlocks :: String -> [Block] readBlocks s = - case reads s of - (d,_):_ -> d - [] -> [readBlock s] + case safeRead s of + Just d -> d + Nothing -> [readBlock s] readBlock :: String -> Block readBlock s = - case reads s of - (d,_):_ -> d - [] -> Plain $ readInlines s + case safeRead s of + Just d -> d + Nothing -> Plain $ readInlines s readInlines :: String -> [Inline] readInlines s = - case reads s of - (d,_):_ -> d - [] -> [readInline s] + case safeRead s of + Just d -> d + Nothing -> [readInline s] readInline :: String -> Inline readInline s = - case reads s of - (d,_):_ -> d - [] -> error "Cannot parse document" + case safeRead s of + Just d -> d + Nothing -> error "Cannot parse document" diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index d1010a736..9fb976903 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -17,9 +17,9 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -} {- | - Module : Text.Pandoc.Readers.RST + Module : Text.Pandoc.Readers.RST Copyright : Copyright (C) 2006-2010 John MacFarlane - License : GNU GPL, version 2 or above + License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> Stability : alpha @@ -27,24 +27,24 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion from reStructuredText to 'Pandoc' document. -} -module Text.Pandoc.Readers.RST ( +module Text.Pandoc.Readers.RST ( readRST ) where import Text.Pandoc.Definition import Text.Pandoc.Shared import Text.Pandoc.Parsing -import Text.ParserCombinators.Parsec -import Control.Monad ( when, liftM ) +import Text.Pandoc.Options +import Control.Monad ( when, liftM, guard, mzero ) import Data.List ( findIndex, intercalate, transpose, sort, deleteFirstsBy ) import qualified Data.Map as M import Text.Printf ( printf ) import Data.Maybe ( catMaybes ) -- | Parse reStructuredText string and return Pandoc document. -readRST :: ParserState -- ^ Parser state, including options for parser - -> String -- ^ String to parse (assuming @'\n'@ line endings) +readRST :: ReaderOptions -- ^ Reader options + -> String -- ^ String to parse (assuming @'\n'@ line endings) -> Pandoc -readRST state s = (readWith parseRST) state (s ++ "\n\n") +readRST opts s = (readWith parseRST) def{ stateOptions = opts } (s ++ "\n\n") -- -- Constants and data structure definitions @@ -58,7 +58,7 @@ underlineChars = "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~" -- treat these as potentially non-text when parsing inline: specialChars :: [Char] -specialChars = "\\`|*_<>$:[]()-.\"'\8216\8217\8220\8221" +specialChars = "\\`|*_<>$:/[]{}()-.\"'\8216\8217\8220\8221" -- -- parsing documents @@ -71,14 +71,14 @@ isHeader _ _ = False -- | Promote all headers in a list of blocks. (Part of -- title transformation for RST.) promoteHeaders :: Int -> [Block] -> [Block] -promoteHeaders num ((Header level text):rest) = +promoteHeaders num ((Header level text):rest) = (Header (level - num) text):(promoteHeaders num rest) promoteHeaders num (other:rest) = other:(promoteHeaders num rest) promoteHeaders _ [] = [] -- | If list of blocks starts with a header (or a header and subheader) -- of level that are not found elsewhere, return it as a title and --- promote all the other headers. +-- promote all the other headers. titleTransform :: [Block] -- ^ list of blocks -> ([Block], [Inline]) -- ^ modified list of blocks, title titleTransform ((Header 1 head1):(Header 2 head2):rest) | @@ -89,7 +89,7 @@ titleTransform ((Header 1 head1):rest) | (promoteHeaders 1 rest, head1) titleTransform blocks = (blocks, []) -parseRST :: GenParser Char ParserState Pandoc +parseRST :: Parser [Char] ParserState Pandoc parseRST = do optional blanklines -- skip blank lines at beginning of file startPos <- getPosition @@ -103,12 +103,13 @@ parseRST = do let reversedNotes = stateNotes st' updateState $ \s -> s { stateNotes = reverse reversedNotes } -- now parse it for real... - blocks <- parseBlocks + blocks <- parseBlocks let blocks' = filter (/= Null) blocks - state <- getState - let (blocks'', title) = if stateStandalone state + standalone <- getOption readerStandalone + let (blocks'', title) = if standalone then titleTransform blocks' else (blocks', []) + state <- getState let authors = stateAuthors state let date = stateDate state let title' = if (null title) then (stateTitle state) else title @@ -118,10 +119,10 @@ parseRST = do -- parsing blocks -- -parseBlocks :: GenParser Char ParserState [Block] +parseBlocks :: Parser [Char] ParserState [Block] parseBlocks = manyTill block eof -block :: GenParser Char ParserState Block +block :: Parser [Char] ParserState Block block = choice [ codeBlock , rawBlock , blockQuote @@ -146,7 +147,7 @@ block = choice [ codeBlock -- field list -- -rawFieldListItem :: String -> GenParser Char ParserState (String, String) +rawFieldListItem :: String -> Parser [Char] ParserState (String, String) rawFieldListItem indent = try $ do string indent char ':' @@ -160,7 +161,7 @@ rawFieldListItem indent = try $ do return (name, raw) fieldListItem :: String - -> GenParser Char ParserState (Maybe ([Inline], [[Block]])) + -> Parser [Char] ParserState (Maybe ([Inline], [[Block]])) fieldListItem indent = try $ do (name, raw) <- rawFieldListItem indent let term = [Str name] @@ -187,7 +188,7 @@ extractContents [Plain auth] = auth extractContents [Para auth] = auth extractContents _ = [] -fieldList :: GenParser Char ParserState Block +fieldList :: Parser [Char] ParserState Block fieldList = try $ do indent <- lookAhead $ many spaceChar items <- many1 $ fieldListItem indent @@ -199,7 +200,7 @@ fieldList = try $ do -- line block -- -lineBlockLine :: GenParser Char ParserState [Inline] +lineBlockLine :: Parser [Char] ParserState [Inline] lineBlockLine = try $ do char '|' char ' ' <|> lookAhead (char '\n') @@ -210,7 +211,7 @@ lineBlockLine = try $ do then normalizeSpaces line else Str white : normalizeSpaces line -lineBlock :: GenParser Char ParserState Block +lineBlock :: Parser [Char] ParserState Block lineBlock = try $ do lines' <- many1 lineBlockLine blanklines @@ -220,14 +221,14 @@ lineBlock = try $ do -- paragraph block -- -para :: GenParser Char ParserState Block +para :: Parser [Char] ParserState Block para = paraBeforeCodeBlock <|> paraNormal <?> "paragraph" -codeBlockStart :: GenParser Char st Char +codeBlockStart :: Parser [Char] st Char codeBlockStart = string "::" >> blankline >> blankline -- paragraph that ends in a :: starting a code block -paraBeforeCodeBlock :: GenParser Char ParserState Block +paraBeforeCodeBlock :: Parser [Char] ParserState Block paraBeforeCodeBlock = try $ do result <- many1 (notFollowedBy' codeBlockStart >> inline) lookAhead (string "::") @@ -236,21 +237,21 @@ paraBeforeCodeBlock = try $ do else (normalizeSpaces result) ++ [Str ":"] -- regular paragraph -paraNormal :: GenParser Char ParserState Block -paraNormal = try $ do +paraNormal :: Parser [Char] ParserState Block +paraNormal = try $ do result <- many1 inline newline blanklines return $ Para $ normalizeSpaces result -plain :: GenParser Char ParserState Block -plain = many1 inline >>= return . Plain . normalizeSpaces +plain :: Parser [Char] ParserState Block +plain = many1 inline >>= return . Plain . normalizeSpaces -- -- image block -- -imageBlock :: GenParser Char ParserState Block +imageBlock :: Parser [Char] ParserState Block imageBlock = try $ do string ".. image:: " src <- manyTill anyChar newline @@ -265,11 +266,11 @@ imageBlock = try $ do -- header blocks -- -header :: GenParser Char ParserState Block +header :: Parser [Char] ParserState Block header = doubleHeader <|> singleHeader <?> "header" -- a header with lines on top and bottom -doubleHeader :: GenParser Char ParserState Block +doubleHeader :: Parser [Char] ParserState Block doubleHeader = try $ do c <- oneOf underlineChars rest <- many (char c) -- the top line @@ -283,7 +284,7 @@ doubleHeader = try $ do blankline -- spaces and newline count lenTop (char c) -- the bottom line blanklines - -- check to see if we've had this kind of header before. + -- check to see if we've had this kind of header before. -- if so, get appropriate level. if not, add to list. state <- getState let headerTable = stateHeaderTable state @@ -294,8 +295,8 @@ doubleHeader = try $ do return $ Header level (normalizeSpaces txt) -- a header with line on the bottom only -singleHeader :: GenParser Char ParserState Block -singleHeader = try $ do +singleHeader :: Parser [Char] ParserState Block +singleHeader = try $ do notFollowedBy' whitespace txt <- many1 (do {notFollowedBy blankline; inline}) pos <- getPosition @@ -317,7 +318,7 @@ singleHeader = try $ do -- hrule block -- -hrule :: GenParser Char st Block +hrule :: Parser [Char] st Block hrule = try $ do chr <- oneOf underlineChars count 3 (char chr) @@ -331,14 +332,14 @@ hrule = try $ do -- -- read a line indented by a given string -indentedLine :: String -> GenParser Char st [Char] +indentedLine :: String -> Parser [Char] st [Char] indentedLine indents = try $ do string indents manyTill anyChar newline -- one or more indented lines, possibly separated by blank lines. -- any amount of indentation will work. -indentedBlock :: GenParser Char st [Char] +indentedBlock :: Parser [Char] st [Char] indentedBlock = try $ do indents <- lookAhead $ many1 spaceChar lns <- many1 $ try $ do b <- option "" blanklines @@ -347,7 +348,7 @@ indentedBlock = try $ do optional blanklines return $ unlines lns -codeBlock :: GenParser Char st Block +codeBlock :: Parser [Char] st Block codeBlock = try $ do codeBlockStart result <- indentedBlock @@ -355,7 +356,7 @@ codeBlock = try $ do -- | The 'code-block' directive (from Sphinx) that allows a language to be -- specified. -customCodeBlock :: GenParser Char st Block +customCodeBlock :: Parser [Char] st Block customCodeBlock = try $ do string ".. code-block:: " language <- manyTill anyChar newline @@ -364,7 +365,7 @@ customCodeBlock = try $ do return $ CodeBlock ("", ["sourceCode", language], []) $ stripTrailingNewlines result -figureBlock :: GenParser Char ParserState Block +figureBlock :: Parser [Char] ParserState Block figureBlock = try $ do string ".. figure::" src <- removeLeadingTrailingSpace `fmap` manyTill anyChar newline @@ -372,24 +373,24 @@ figureBlock = try $ do caption <- parseFromString extractCaption body return $ Para [Image caption (src,"")] -extractCaption :: GenParser Char ParserState [Inline] +extractCaption :: Parser [Char] ParserState [Inline] extractCaption = try $ do manyTill anyLine blanklines many inline -- | The 'math' directive (from Sphinx) for display math. -mathBlock :: GenParser Char st Block +mathBlock :: Parser [Char] st Block mathBlock = try $ do string ".. math::" mathBlockMultiline <|> mathBlockOneLine -mathBlockOneLine :: GenParser Char st Block +mathBlockOneLine :: Parser [Char] st Block mathBlockOneLine = try $ do result <- manyTill anyChar newline blanklines return $ Para [Math DisplayMath $ removeLeadingTrailingSpace result] -mathBlockMultiline :: GenParser Char st Block +mathBlockMultiline :: Parser [Char] st Block mathBlockMultiline = try $ do blanklines result <- indentedBlock @@ -404,9 +405,9 @@ mathBlockMultiline = try $ do $ filter (not . null) $ splitBy null lns' return $ Para $ map (Math DisplayMath) eqs -lhsCodeBlock :: GenParser Char ParserState Block +lhsCodeBlock :: Parser [Char] ParserState Block lhsCodeBlock = try $ do - failUnlessLHS + guardEnabled Ext_literate_haskell optional codeBlockStart pos <- getPosition when (sourceColumn pos /= 1) $ fail "Not in first column" @@ -418,7 +419,7 @@ lhsCodeBlock = try $ do blanklines return $ CodeBlock ("", ["sourceCode", "literate", "haskell"], []) $ intercalate "\n" lns' -birdTrackLine :: GenParser Char st [Char] +birdTrackLine :: Parser [Char] st [Char] birdTrackLine = do char '>' manyTill anyChar newline @@ -427,7 +428,7 @@ birdTrackLine = do -- raw html/latex/etc -- -rawBlock :: GenParser Char st Block +rawBlock :: Parser [Char] st Block rawBlock = try $ do string ".. raw:: " lang <- many1 (letter <|> digit) @@ -439,7 +440,7 @@ rawBlock = try $ do -- block quotes -- -blockQuote :: GenParser Char ParserState Block +blockQuote :: Parser [Char] ParserState Block blockQuote = do raw <- indentedBlock -- parse the extracted block, which may contain various block elements: @@ -450,10 +451,10 @@ blockQuote = do -- list blocks -- -list :: GenParser Char ParserState Block +list :: Parser [Char] ParserState Block list = choice [ bulletList, orderedList, definitionList ] <?> "list" -definitionListItem :: GenParser Char ParserState ([Inline], [[Block]]) +definitionListItem :: Parser [Char] ParserState ([Inline], [[Block]]) definitionListItem = try $ do -- avoid capturing a directive or comment notFollowedBy (try $ char '.' >> char '.') @@ -463,11 +464,11 @@ definitionListItem = try $ do contents <- parseFromString parseBlocks $ raw ++ "\n" return (normalizeSpaces term, [contents]) -definitionList :: GenParser Char ParserState Block +definitionList :: Parser [Char] ParserState Block definitionList = many1 definitionListItem >>= return . DefinitionList -- parses bullet list start and returns its length (inc. following whitespace) -bulletListStart :: GenParser Char st Int +bulletListStart :: Parser [Char] st Int bulletListStart = try $ do notFollowedBy' hrule -- because hrules start out just like lists marker <- oneOf bulletListMarkers @@ -477,14 +478,14 @@ bulletListStart = try $ do -- parses ordered list start and returns its length (inc following whitespace) orderedListStart :: ListNumberStyle -> ListNumberDelim - -> GenParser Char ParserState Int + -> Parser [Char] ParserState Int orderedListStart style delim = try $ do (_, markerLen) <- withHorizDisplacement (orderedListMarker style delim) white <- many1 spaceChar return $ markerLen + length white -- parse a line of a list item -listLine :: Int -> GenParser Char ParserState [Char] +listLine :: Int -> Parser [Char] ParserState [Char] listLine markerLength = try $ do notFollowedBy blankline indentWith markerLength @@ -492,36 +493,35 @@ listLine markerLength = try $ do return $ line ++ "\n" -- indent by specified number of spaces (or equiv. tabs) -indentWith :: Int -> GenParser Char ParserState [Char] +indentWith :: Int -> Parser [Char] ParserState [Char] indentWith num = do - state <- getState - let tabStop = stateTabStop state + tabStop <- getOption readerTabStop if (num < tabStop) then count num (char ' ') - else choice [ try (count num (char ' ')), - (try (char '\t' >> count (num - tabStop) (char ' '))) ] + else choice [ try (count num (char ' ')), + (try (char '\t' >> count (num - tabStop) (char ' '))) ] -- parse raw text for one list item, excluding start marker and continuations -rawListItem :: GenParser Char ParserState Int - -> GenParser Char ParserState (Int, [Char]) +rawListItem :: Parser [Char] ParserState Int + -> Parser [Char] ParserState (Int, [Char]) rawListItem start = try $ do markerLength <- start firstLine <- manyTill anyChar newline restLines <- many (listLine markerLength) return (markerLength, (firstLine ++ "\n" ++ (concat restLines))) --- continuation of a list item - indented and separated by blankline or --- (in compact lists) endline. +-- continuation of a list item - indented and separated by blankline or +-- (in compact lists) endline. -- Note: nested lists are parsed as continuations. -listContinuation :: Int -> GenParser Char ParserState [Char] +listContinuation :: Int -> Parser [Char] ParserState [Char] listContinuation markerLength = try $ do blanks <- many1 blankline result <- many1 (listLine markerLength) return $ blanks ++ concat result -listItem :: GenParser Char ParserState Int - -> GenParser Char ParserState [Block] -listItem start = try $ do +listItem :: Parser [Char] ParserState Int + -> Parser [Char] ParserState [Block] +listItem start = try $ do (markerLength, first) <- rawListItem start rest <- many (listContinuation markerLength) blanks <- choice [ try (many blankline >>~ lookAhead start), @@ -537,22 +537,22 @@ listItem start = try $ do updateState (\st -> st {stateParserContext = oldContext}) return parsed -orderedList :: GenParser Char ParserState Block +orderedList :: Parser [Char] ParserState Block orderedList = try $ do (start, style, delim) <- lookAhead (anyOrderedListMarker >>~ spaceChar) items <- many1 (listItem (orderedListStart style delim)) let items' = compactify items return $ OrderedList (start, style, delim) items' -bulletList :: GenParser Char ParserState Block -bulletList = many1 (listItem bulletListStart) >>= +bulletList :: Parser [Char] ParserState Block +bulletList = many1 (listItem bulletListStart) >>= return . BulletList . compactify -- -- default-role block -- -defaultRoleBlock :: GenParser Char ParserState Block +defaultRoleBlock :: Parser [Char] ParserState Block defaultRoleBlock = try $ do string ".. default-role::" -- doesn't enforce any restrictions on the role name; embedded spaces shouldn't be allowed, for one @@ -570,7 +570,7 @@ defaultRoleBlock = try $ do -- unknown directive (e.g. comment) -- -unknownDirective :: GenParser Char st Block +unknownDirective :: Parser [Char] st Block unknownDirective = try $ do string ".." notFollowedBy (noneOf " \t\n") @@ -582,7 +582,7 @@ unknownDirective = try $ do --- note block --- -noteBlock :: GenParser Char ParserState [Char] +noteBlock :: Parser [Char] ParserState [Char] noteBlock = try $ do startPos <- getPosition string ".." @@ -601,7 +601,7 @@ noteBlock = try $ do -- return blanks so line count isn't affected return $ replicate (sourceLine endPos - sourceLine startPos) '\n' -noteMarker :: GenParser Char ParserState [Char] +noteMarker :: Parser [Char] ParserState [Char] noteMarker = do char '[' res <- many1 digit @@ -614,13 +614,13 @@ noteMarker = do -- reference key -- -quotedReferenceName :: GenParser Char ParserState [Inline] +quotedReferenceName :: Parser [Char] ParserState [Inline] quotedReferenceName = try $ do char '`' >> notFollowedBy (char '`') -- `` means inline code! - label' <- many1Till inline (char '`') + label' <- many1Till inline (char '`') return label' -unquotedReferenceName :: GenParser Char ParserState [Inline] +unquotedReferenceName :: Parser [Char] ParserState [Inline] unquotedReferenceName = try $ do label' <- many1Till inline (lookAhead $ char ':') return label' @@ -629,24 +629,24 @@ unquotedReferenceName = try $ do -- plus isolated (no two adjacent) internal hyphens, underscores, -- periods, colons and plus signs; no whitespace or other characters -- are allowed. -simpleReferenceName' :: GenParser Char st String +simpleReferenceName' :: Parser [Char] st String simpleReferenceName' = do x <- alphaNum xs <- many $ alphaNum <|> (try $ oneOf "-_:+." >> lookAhead alphaNum) return (x:xs) -simpleReferenceName :: GenParser Char st [Inline] +simpleReferenceName :: Parser [Char] st [Inline] simpleReferenceName = do raw <- simpleReferenceName' return [Str raw] -referenceName :: GenParser Char ParserState [Inline] +referenceName :: Parser [Char] ParserState [Inline] referenceName = quotedReferenceName <|> (try $ simpleReferenceName >>~ lookAhead (char ':')) <|> unquotedReferenceName -referenceKey :: GenParser Char ParserState [Char] +referenceKey :: Parser [Char] ParserState [Char] referenceKey = do startPos <- getPosition (key, target) <- choice [imageKey, anonymousKey, regularKey] @@ -658,38 +658,43 @@ referenceKey = do -- return enough blanks to replace key return $ replicate (sourceLine endPos - sourceLine startPos) '\n' -targetURI :: GenParser Char st [Char] +targetURI :: Parser [Char] st [Char] targetURI = do skipSpaces optional newline - contents <- many1 (try (many spaceChar >> newline >> + contents <- many1 (try (many spaceChar >> newline >> many1 spaceChar >> noneOf " \t\n") <|> noneOf "\n") blanklines return $ escapeURI $ removeLeadingTrailingSpace $ contents -imageKey :: GenParser Char ParserState (Key, Target) +imageKey :: Parser [Char] ParserState (Key, Target) imageKey = try $ do string ".. |" - ref <- manyTill inline (char '|') + (_,ref) <- withRaw (manyTill inline (char '|')) skipSpaces string "image::" src <- targetURI - return (toKey (normalizeSpaces ref), (src, "")) + return (toKey $ init ref, (src, "")) -anonymousKey :: GenParser Char st (Key, Target) +anonymousKey :: Parser [Char] st (Key, Target) anonymousKey = try $ do oneOfStrings [".. __:", "__"] src <- targetURI pos <- getPosition - return (toKey [Str $ "_" ++ printf "%09d" (sourceLine pos)], (src, "")) + return (toKey $ "_" ++ printf "%09d" (sourceLine pos), (src, "")) -regularKey :: GenParser Char ParserState (Key, Target) +stripTicks :: String -> String +stripTicks = reverse . stripTick . reverse . stripTick + where stripTick ('`':xs) = xs + stripTick xs = xs + +regularKey :: Parser [Char] ParserState (Key, Target) regularKey = try $ do string ".. _" - ref <- referenceName + (_,ref) <- withRaw referenceName char ':' src <- targetURI - return (toKey (normalizeSpaces ref), (src, "")) + return (toKey $ stripTicks ref, (src, "")) -- -- tables @@ -702,37 +707,37 @@ regularKey = try $ do -- Simple tables TODO: -- - column spans -- - multiline support --- - ensure that rightmost column span does not need to reach end +-- - ensure that rightmost column span does not need to reach end -- - require at least 2 columns -- -- Grid tables TODO: -- - column spans -dashedLine :: Char -> GenParser Char st (Int, Int) +dashedLine :: Char -> Parser [Char] st (Int, Int) dashedLine ch = do dashes <- many1 (char ch) sp <- many (char ' ') return (length dashes, length $ dashes ++ sp) -simpleDashedLines :: Char -> GenParser Char st [(Int,Int)] +simpleDashedLines :: Char -> Parser [Char] st [(Int,Int)] simpleDashedLines ch = try $ many1 (dashedLine ch) -- Parse a table row separator -simpleTableSep :: Char -> GenParser Char ParserState Char +simpleTableSep :: Char -> Parser [Char] ParserState Char simpleTableSep ch = try $ simpleDashedLines ch >> newline -- Parse a table footer -simpleTableFooter :: GenParser Char ParserState [Char] +simpleTableFooter :: Parser [Char] ParserState [Char] simpleTableFooter = try $ simpleTableSep '=' >> blanklines -- Parse a raw line and split it into chunks by indices. -simpleTableRawLine :: [Int] -> GenParser Char ParserState [String] +simpleTableRawLine :: [Int] -> Parser [Char] ParserState [String] simpleTableRawLine indices = do line <- many1Till anyChar newline return (simpleTableSplitLine indices line) -- Parse a table row and return a list of blocks (columns). -simpleTableRow :: [Int] -> GenParser Char ParserState [[Block]] +simpleTableRow :: [Int] -> Parser [Char] ParserState [[Block]] simpleTableRow indices = do notFollowedBy' simpleTableFooter firstLine <- simpleTableRawLine indices @@ -745,8 +750,8 @@ simpleTableSplitLine indices line = map removeLeadingTrailingSpace $ tail $ splitByIndices (init indices) line -simpleTableHeader :: Bool -- ^ Headerless table - -> GenParser Char ParserState ([[Block]], [Alignment], [Int]) +simpleTableHeader :: Bool -- ^ Headerless table + -> Parser [Char] ParserState ([[Block]], [Alignment], [Int]) simpleTableHeader headless = try $ do optional blanklines rawContent <- if headless @@ -766,28 +771,28 @@ simpleTableHeader headless = try $ do -- Parse a simple table. simpleTable :: Bool -- ^ Headerless table - -> GenParser Char ParserState Block + -> Parser [Char] ParserState Block simpleTable headless = do - Table c a _w h l <- tableWith (simpleTableHeader headless) simpleTableRow sep simpleTableFooter (return []) + Table c a _w h l <- tableWith (simpleTableHeader headless) simpleTableRow sep simpleTableFooter -- Simple tables get 0s for relative column widths (i.e., use default) return $ Table c a (replicate (length a) 0) h l where sep = return () -- optional (simpleTableSep '-') gridTable :: Bool -- ^ Headerless table - -> GenParser Char ParserState Block -gridTable = gridTableWith block (return []) + -> Parser [Char] ParserState Block +gridTable = gridTableWith parseBlocks -table :: GenParser Char ParserState Block +table :: Parser [Char] ParserState Block table = gridTable False <|> simpleTable False <|> gridTable True <|> simpleTable True <?> "table" - -- + -- -- inline -- -inline :: GenParser Char ParserState Inline +inline :: Parser [Char] ParserState Inline inline = choice [ whitespace , link , str @@ -805,44 +810,53 @@ inline = choice [ whitespace , escapedChar , symbol ] <?> "inline" -hyphens :: GenParser Char ParserState Inline +hyphens :: Parser [Char] ParserState Inline hyphens = do result <- many1 (char '-') - option Space endline + option Space endline -- don't want to treat endline after hyphen or dash as a space return $ Str result -escapedChar :: GenParser Char st Inline +escapedChar :: Parser [Char] st Inline escapedChar = do c <- escaped anyChar return $ if c == ' ' -- '\ ' is null in RST then Str "" else Str [c] -symbol :: GenParser Char ParserState Inline -symbol = do +symbol :: Parser [Char] ParserState Inline +symbol = do result <- oneOf specialChars return $ Str [result] -- parses inline code, between codeStart and codeEnd -code :: GenParser Char ParserState Inline -code = try $ do +code :: Parser [Char] ParserState Inline +code = try $ do string "``" result <- manyTill anyChar (try (string "``")) return $ Code nullAttr $ removeLeadingTrailingSpace $ intercalate " " $ lines result -emph :: GenParser Char ParserState Inline -emph = enclosed (char '*') (char '*') inline >>= +-- succeeds only if we're not right after a str (ie. in middle of word) +atStart :: Parser [Char] ParserState a -> Parser [Char] ParserState a +atStart p = do + pos <- getPosition + st <- getState + -- single quote start can't be right after str + guard $ stateLastStrPos st /= Just pos + p + +emph :: Parser [Char] ParserState Inline +emph = enclosed (atStart $ char '*') (char '*') inline >>= return . Emph . normalizeSpaces -strong :: GenParser Char ParserState Inline -strong = enclosed (string "**") (try $ string "**") inline >>= +strong :: Parser [Char] ParserState Inline +strong = enclosed (atStart $ string "**") (try $ string "**") inline >>= return . Strong . normalizeSpaces -- Parses inline interpreted text which is required to have the given role. -- This decision is based on the role marker (if present), -- and the current default interpreted text role. -interpreted :: [Char] -> GenParser Char ParserState [Char] +interpreted :: [Char] -> Parser [Char] ParserState [Char] interpreted role = try $ do state <- getState if role == stateRstDefaultRole state @@ -856,30 +870,30 @@ interpreted role = try $ do -- http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#inline-markup-recognition-rules -- but it should be good enough for most purposes unmarkedInterpretedText = do - result <- enclosed (char '`') (char '`') anyChar + result <- enclosed (atStart $ char '`') (char '`') anyChar return result -superscript :: GenParser Char ParserState Inline +superscript :: Parser [Char] ParserState Inline superscript = interpreted "sup" >>= \x -> return (Superscript [Str x]) -subscript :: GenParser Char ParserState Inline +subscript :: Parser [Char] ParserState Inline subscript = interpreted "sub" >>= \x -> return (Subscript [Str x]) -math :: GenParser Char ParserState Inline +math :: Parser [Char] ParserState Inline math = interpreted "math" >>= \x -> return (Math InlineMath x) -whitespace :: GenParser Char ParserState Inline +whitespace :: Parser [Char] ParserState Inline whitespace = many1 spaceChar >> return Space <?> "whitespace" -str :: GenParser Char ParserState Inline +str :: Parser [Char] ParserState Inline str = do - result <- many1 (noneOf (specialChars ++ "\t\n ")) - pos <- getPosition - updateState $ \s -> s{ stateLastStrPos = Just pos } + let strChar = noneOf ("\t\n " ++ specialChars) + result <- many1 strChar + updateLastStrPos return $ Str result -- an endline character that can be treated as a space, not a structural break -endline :: GenParser Char ParserState Inline +endline :: Parser [Char] ParserState Inline endline = try $ do newline notFollowedBy blankline @@ -895,14 +909,14 @@ endline = try $ do -- links -- -link :: GenParser Char ParserState Inline +link :: Parser [Char] ParserState Inline link = choice [explicitLink, referenceLink, autoLink] <?> "link" -explicitLink :: GenParser Char ParserState Inline +explicitLink :: Parser [Char] ParserState Inline explicitLink = try $ do char '`' notFollowedBy (char '`') -- `` marks start of inline code - label' <- manyTill (notFollowedBy (char '`') >> inline) + label' <- manyTill (notFollowedBy (char '`') >> inline) (try (spaces >> char '<')) src <- manyTill (noneOf ">\n") (char '>') skipSpaces @@ -910,53 +924,53 @@ explicitLink = try $ do return $ Link (normalizeSpaces label') (escapeURI $ removeLeadingTrailingSpace src, "") -referenceLink :: GenParser Char ParserState Inline +referenceLink :: Parser [Char] ParserState Inline referenceLink = try $ do - label' <- (quotedReferenceName <|> simpleReferenceName) >>~ char '_' + (label',ref) <- withRaw (quotedReferenceName <|> simpleReferenceName) >>~ + char '_' state <- getState let keyTable = stateKeys state - let isAnonKey x = case fromKey x of - [Str ('_':_)] -> True - _ -> False - key <- option (toKey label') $ + let isAnonKey (Key ('_':_)) = True + isAnonKey _ = False + key <- option (toKey $ stripTicks ref) $ do char '_' let anonKeys = sort $ filter isAnonKey $ M.keys keyTable if null anonKeys - then pzero + then mzero else return (head anonKeys) - (src,tit) <- case lookupKeySrc keyTable key of + (src,tit) <- case M.lookup key keyTable of Nothing -> fail "no corresponding key" Just target -> return target -- if anonymous link, remove key so it won't be used again when (isAnonKey key) $ updateState $ \s -> s{ stateKeys = M.delete key keyTable } - return $ Link (normalizeSpaces label') (src, tit) + return $ Link (normalizeSpaces label') (src, tit) -autoURI :: GenParser Char ParserState Inline +autoURI :: Parser [Char] ParserState Inline autoURI = do (orig, src) <- uri return $ Link [Str orig] (src, "") -autoEmail :: GenParser Char ParserState Inline +autoEmail :: Parser [Char] ParserState Inline autoEmail = do (orig, src) <- emailAddress return $ Link [Str orig] (src, "") -autoLink :: GenParser Char ParserState Inline +autoLink :: Parser [Char] ParserState Inline autoLink = autoURI <|> autoEmail -- For now, we assume that all substitution references are for images. -image :: GenParser Char ParserState Inline +image :: Parser [Char] ParserState Inline image = try $ do char '|' - ref <- manyTill inline (char '|') + (alt,ref) <- withRaw (manyTill inline (char '|')) state <- getState let keyTable = stateKeys state - (src,tit) <- case lookupKeySrc keyTable (toKey ref) of + (src,tit) <- case M.lookup (toKey $ init ref) keyTable of Nothing -> fail "no corresponding key" Just target -> return target - return $ Image (normalizeSpaces ref) (src, tit) + return $ Image (normalizeSpaces alt) (src, tit) -note :: GenParser Char ParserState Inline +note :: Parser [Char] ParserState Inline note = try $ do ref <- noteMarker char '_' diff --git a/src/Text/Pandoc/Readers/TeXMath.hs b/src/Text/Pandoc/Readers/TeXMath.hs index 67dfe6753..fe49a992e 100644 --- a/src/Text/Pandoc/Readers/TeXMath.hs +++ b/src/Text/Pandoc/Readers/TeXMath.hs @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.TeXMath Copyright : Copyright (C) 2007-2010 John MacFarlane - License : GNU GPL, version 2 or above + License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> Stability : alpha diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index 348900d38..89f281ae8 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.Textile Copyright : Copyright (C) 2010-2012 Paul Rivier and John MacFarlane - License : GNU GPL, version 2 or above + License : GNU GPL, version 2 or above Maintainer : Paul Rivier <paul*rivier#demotera*com> Stability : alpha @@ -56,29 +56,34 @@ TODO : refactor common patterns across readers : module Text.Pandoc.Readers.Textile ( readTextile) where import Text.Pandoc.Definition -import Text.Pandoc.Shared +import Text.Pandoc.Shared +import Text.Pandoc.Options import Text.Pandoc.Parsing import Text.Pandoc.Readers.HTML ( htmlTag, isInlineTag, isBlockTag ) import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock ) -import Text.ParserCombinators.Parsec import Text.HTML.TagSoup.Match import Data.Char ( digitToInt, isUpper ) import Control.Monad ( guard, liftM ) import Control.Applicative ((<$>), (*>), (<*)) -- | Parse a Textile text and return a Pandoc document. -readTextile :: ParserState -- ^ Parser state, including options for parser - -> String -- ^ String to parse (assuming @'\n'@ line endings) - -> Pandoc -readTextile state s = - (readWith parseTextile) state{ stateOldDashes = True } (s ++ "\n\n") +readTextile :: ReaderOptions -- ^ Reader options + -> String -- ^ String to parse (assuming @'\n'@ line endings) + -> Pandoc +readTextile opts s = + (readWith parseTextile) def{ stateOptions = opts } (s ++ "\n\n") -- | Generate a Pandoc ADT from a textile document -parseTextile :: GenParser Char ParserState Pandoc +parseTextile :: Parser [Char] ParserState Pandoc parseTextile = do -- textile allows raw HTML and does smart punctuation by default - updateState (\state -> state { stateParseRaw = True, stateSmart = True }) + oldOpts <- stateOptions `fmap` getState + updateState $ \state -> state{ stateOptions = + oldOpts{ readerSmart = True + , readerParseRaw = True + , readerOldDashes = True + } } many blankline startPos <- getPosition -- go through once just to get list of reference keys and notes @@ -93,10 +98,10 @@ parseTextile = do blocks <- parseBlocks return $ Pandoc (Meta [] [] []) blocks -- FIXME -noteMarker :: GenParser Char ParserState [Char] +noteMarker :: Parser [Char] ParserState [Char] noteMarker = skipMany spaceChar >> string "fn" >> manyTill digit (char '.') -noteBlock :: GenParser Char ParserState [Char] +noteBlock :: Parser [Char] ParserState [Char] noteBlock = try $ do startPos <- getPosition ref <- noteMarker @@ -111,11 +116,11 @@ noteBlock = try $ do return $ replicate (sourceLine endPos - sourceLine startPos) '\n' -- | Parse document blocks -parseBlocks :: GenParser Char ParserState [Block] +parseBlocks :: Parser [Char] ParserState [Block] parseBlocks = manyTill block eof -- | Block parsers list tried in definition order -blockParsers :: [GenParser Char ParserState Block] +blockParsers :: [Parser [Char] ParserState Block] blockParsers = [ codeBlock , header , blockQuote @@ -128,20 +133,20 @@ blockParsers = [ codeBlock , nullBlock ] -- | Any block in the order of definition of blockParsers -block :: GenParser Char ParserState Block +block :: Parser [Char] ParserState Block block = choice blockParsers <?> "block" -codeBlock :: GenParser Char ParserState Block +codeBlock :: Parser [Char] ParserState Block codeBlock = codeBlockBc <|> codeBlockPre -codeBlockBc :: GenParser Char ParserState Block +codeBlockBc :: Parser [Char] ParserState Block codeBlockBc = try $ do string "bc. " contents <- manyTill anyLine blanklines return $ CodeBlock ("",[],[]) $ unlines contents -- | Code Blocks in Textile are between <pre> and </pre> -codeBlockPre :: GenParser Char ParserState Block +codeBlockPre :: Parser [Char] ParserState Block codeBlockPre = try $ do htmlTag (tagOpen (=="pre") null) result' <- manyTill anyChar (try $ htmlTag (tagClose (=="pre")) >> blockBreak) @@ -156,7 +161,7 @@ codeBlockPre = try $ do return $ CodeBlock ("",[],[]) result''' -- | Header of the form "hN. content" with N in 1..6 -header :: GenParser Char ParserState Block +header :: Parser [Char] ParserState Block header = try $ do char 'h' level <- digitToInt <$> oneOf "123456" @@ -165,14 +170,14 @@ header = try $ do return $ Header level name -- | Blockquote of the form "bq. content" -blockQuote :: GenParser Char ParserState Block +blockQuote :: Parser [Char] ParserState Block blockQuote = try $ do string "bq" >> optional attributes >> char '.' >> whitespace BlockQuote . singleton <$> para -- Horizontal rule -hrule :: GenParser Char st Block +hrule :: Parser [Char] st Block hrule = try $ do skipSpaces start <- oneOf "-*" @@ -187,39 +192,39 @@ hrule = try $ do -- | Can be a bullet list or an ordered list. This implementation is -- strict in the nesting, sublist must start at exactly "parent depth -- plus one" -anyList :: GenParser Char ParserState Block +anyList :: Parser [Char] ParserState Block anyList = try $ ( (anyListAtDepth 1) <* blanklines ) -- | This allow one type of list to be nested into an other type, -- provided correct nesting -anyListAtDepth :: Int -> GenParser Char ParserState Block +anyListAtDepth :: Int -> Parser [Char] ParserState Block anyListAtDepth depth = choice [ bulletListAtDepth depth, orderedListAtDepth depth, definitionList ] -- | Bullet List of given depth, depth being the number of leading '*' -bulletListAtDepth :: Int -> GenParser Char ParserState Block +bulletListAtDepth :: Int -> Parser [Char] ParserState Block bulletListAtDepth depth = try $ BulletList <$> many1 (bulletListItemAtDepth depth) -- | Bullet List Item of given depth, depth being the number of -- leading '*' -bulletListItemAtDepth :: Int -> GenParser Char ParserState [Block] +bulletListItemAtDepth :: Int -> Parser [Char] ParserState [Block] bulletListItemAtDepth = genericListItemAtDepth '*' -- | Ordered List of given depth, depth being the number of -- leading '#' -orderedListAtDepth :: Int -> GenParser Char ParserState Block +orderedListAtDepth :: Int -> Parser [Char] ParserState Block orderedListAtDepth depth = try $ do items <- many1 (orderedListItemAtDepth depth) return (OrderedList (1, DefaultStyle, DefaultDelim) items) -- | Ordered List Item of given depth, depth being the number of -- leading '#' -orderedListItemAtDepth :: Int -> GenParser Char ParserState [Block] +orderedListItemAtDepth :: Int -> Parser [Char] ParserState [Block] orderedListItemAtDepth = genericListItemAtDepth '#' -- | Common implementation of list items -genericListItemAtDepth :: Char -> Int -> GenParser Char ParserState [Block] +genericListItemAtDepth :: Char -> Int -> Parser [Char] ParserState [Block] genericListItemAtDepth c depth = try $ do count depth (char c) >> optional attributes >> whitespace p <- inlines @@ -227,22 +232,22 @@ genericListItemAtDepth c depth = try $ do return ((Plain p):sublist) -- | A definition list is a set of consecutive definition items -definitionList :: GenParser Char ParserState Block +definitionList :: Parser [Char] ParserState Block definitionList = try $ DefinitionList <$> many1 definitionListItem - + -- | A definition list item in textile begins with '- ', followed by -- the term defined, then spaces and ":=". The definition follows, on -- the same single line, or spaned on multiple line, after a line -- break. -definitionListItem :: GenParser Char ParserState ([Inline], [[Block]]) +definitionListItem :: Parser [Char] ParserState ([Inline], [[Block]]) definitionListItem = try $ do string "- " term <- many1Till inline (try (whitespace >> string ":=")) - def <- inlineDef <|> multilineDef - return (term, def) - where inlineDef :: GenParser Char ParserState [[Block]] + def' <- inlineDef <|> multilineDef + return (term, def') + where inlineDef :: Parser [Char] ParserState [[Block]] inlineDef = liftM (\d -> [[Plain d]]) $ try (whitespace >> inlines) - multilineDef :: GenParser Char ParserState [[Block]] + multilineDef :: Parser [Char] ParserState [[Block]] multilineDef = try $ do optional whitespace >> newline s <- many1Till anyChar (try (string "=:" >> newline)) @@ -252,76 +257,76 @@ definitionListItem = try $ do -- | This terminates a block such as a paragraph. Because of raw html -- blocks support, we have to lookAhead for a rawHtmlBlock. -blockBreak :: GenParser Char ParserState () +blockBreak :: Parser [Char] ParserState () blockBreak = try (newline >> blanklines >> return ()) <|> (lookAhead rawHtmlBlock >> return ()) -- raw content -- | A raw Html Block, optionally followed by blanklines -rawHtmlBlock :: GenParser Char ParserState Block +rawHtmlBlock :: Parser [Char] ParserState Block rawHtmlBlock = try $ do (_,b) <- htmlTag isBlockTag optional blanklines return $ RawBlock "html" b -- | Raw block of LaTeX content -rawLaTeXBlock' :: GenParser Char ParserState Block +rawLaTeXBlock' :: Parser [Char] ParserState Block rawLaTeXBlock' = do - failIfStrict + guardEnabled Ext_raw_tex RawBlock "latex" <$> (rawLaTeXBlock <* spaces) -- | In textile, paragraphs are separated by blank lines. -para :: GenParser Char ParserState Block +para :: Parser [Char] ParserState Block para = try $ Para . normalizeSpaces <$> manyTill inline blockBreak -- Tables - + -- | A table cell spans until a pipe | -tableCell :: GenParser Char ParserState TableCell +tableCell :: Parser [Char] ParserState TableCell tableCell = do c <- many1 (noneOf "|\n") content <- parseFromString (many1 inline) c return $ [ Plain $ normalizeSpaces content ] -- | A table row is made of many table cells -tableRow :: GenParser Char ParserState [TableCell] +tableRow :: Parser [Char] ParserState [TableCell] tableRow = try $ ( char '|' *> (endBy1 tableCell (char '|')) <* newline) -- | Many table rows -tableRows :: GenParser Char ParserState [[TableCell]] +tableRows :: Parser [Char] ParserState [[TableCell]] tableRows = many1 tableRow -- | Table headers are made of cells separated by a tag "|_." -tableHeaders :: GenParser Char ParserState [TableCell] +tableHeaders :: Parser [Char] ParserState [TableCell] tableHeaders = let separator = (try $ string "|_.") in try $ ( separator *> (sepBy1 tableCell separator) <* char '|' <* newline ) - + -- | A table with an optional header. Current implementation can -- handle tables with and without header, but will parse cells -- alignment attributes as content. -table :: GenParser Char ParserState Block +table :: Parser [Char] ParserState Block table = try $ do headers <- option [] tableHeaders rows <- tableRows blanklines let nbOfCols = max (length headers) (length $ head rows) - return $ Table [] + return $ Table [] (replicate nbOfCols AlignDefault) (replicate nbOfCols 0.0) headers rows - + -- | Blocks like 'p' and 'table' do not need explicit block tag. -- However, they can be used to set HTML/CSS attributes when needed. maybeExplicitBlock :: String -- ^ block tag name - -> GenParser Char ParserState Block -- ^ implicit block - -> GenParser Char ParserState Block + -> Parser [Char] ParserState Block -- ^ implicit block + -> Parser [Char] ParserState Block maybeExplicitBlock name blk = try $ do - optional $ try $ string name >> optional attributes >> char '.' >> + optional $ try $ string name >> optional attributes >> char '.' >> ((try whitespace) <|> endline) blk @@ -333,15 +338,15 @@ maybeExplicitBlock name blk = try $ do -- | Any inline element -inline :: GenParser Char ParserState Inline +inline :: Parser [Char] ParserState Inline inline = choice inlineParsers <?> "inline" -- | List of consecutive inlines before a newline -inlines :: GenParser Char ParserState [Inline] +inlines :: Parser [Char] ParserState [Inline] inlines = manyTill inline newline -- | Inline parsers tried in order -inlineParsers :: [GenParser Char ParserState Inline] +inlineParsers :: [Parser [Char] ParserState Inline] inlineParsers = [ autoLink , str , whitespace @@ -362,7 +367,7 @@ inlineParsers = [ autoLink ] -- | Inline markups -inlineMarkup :: GenParser Char ParserState Inline +inlineMarkup :: Parser [Char] ParserState Inline inlineMarkup = choice [ simpleInline (string "??") (Cite []) , simpleInline (string "**") Strong , simpleInline (string "__") Emph @@ -375,29 +380,29 @@ inlineMarkup = choice [ simpleInline (string "??") (Cite []) ] -- | Trademark, registered, copyright -mark :: GenParser Char st Inline +mark :: Parser [Char] st Inline mark = try $ char '(' >> (try tm <|> try reg <|> copy) -reg :: GenParser Char st Inline +reg :: Parser [Char] st Inline reg = do oneOf "Rr" char ')' return $ Str "\174" -tm :: GenParser Char st Inline +tm :: Parser [Char] st Inline tm = do oneOf "Tt" oneOf "Mm" char ')' return $ Str "\8482" -copy :: GenParser Char st Inline +copy :: Parser [Char] st Inline copy = do oneOf "Cc" char ')' return $ Str "\169" -note :: GenParser Char ParserState Inline +note :: Parser [Char] ParserState Inline note = try $ do ref <- (char '[' *> many1 digit <* char ']') notes <- stateNotes <$> getState @@ -405,7 +410,7 @@ note = try $ do Nothing -> fail "note not found" Just raw -> liftM Note $ parseFromString parseBlocks raw --- | Special chars +-- | Special chars markupChars :: [Char] markupChars = "\\[]*#_@~-+^|%=" @@ -421,17 +426,17 @@ wordBoundaries :: [Char] wordBoundaries = markupChars ++ stringBreakers -- | Parse a hyphened sequence of words -hyphenedWords :: GenParser Char ParserState String +hyphenedWords :: Parser [Char] ParserState String hyphenedWords = try $ do hd <- noneOf wordBoundaries - tl <- many ( (noneOf wordBoundaries) <|> + tl <- many ( (noneOf wordBoundaries) <|> try (oneOf markupChars <* lookAhead (noneOf wordBoundaries) ) ) let wd = hd:tl - option wd $ try $ + option wd $ try $ (\r -> concat [wd, "-", r]) <$> (char '-' *> hyphenedWords) -- | Any string -str :: GenParser Char ParserState Inline +str :: Parser [Char] ParserState Inline str = do baseStr <- hyphenedWords -- RedCloth compliance : if parsed word is uppercase and immediatly @@ -444,44 +449,57 @@ str = do return $ Str fullStr -- | Textile allows HTML span infos, we discard them -htmlSpan :: GenParser Char ParserState Inline +htmlSpan :: Parser [Char] ParserState Inline htmlSpan = try $ Str <$> ( char '%' *> attributes *> manyTill anyChar (char '%') ) -- | Some number of space chars -whitespace :: GenParser Char ParserState Inline +whitespace :: Parser [Char] ParserState Inline whitespace = many1 spaceChar >> return Space <?> "whitespace" -- | In Textile, an isolated endline character is a line break -endline :: GenParser Char ParserState Inline +endline :: Parser [Char] ParserState Inline endline = try $ do newline >> notFollowedBy blankline return LineBreak -rawHtmlInline :: GenParser Char ParserState Inline +rawHtmlInline :: Parser [Char] ParserState Inline rawHtmlInline = RawInline "html" . snd <$> htmlTag isInlineTag - --- | Raw LaTeX Inline -rawLaTeXInline' :: GenParser Char ParserState Inline + +-- | Raw LaTeX Inline +rawLaTeXInline' :: Parser [Char] ParserState Inline rawLaTeXInline' = try $ do - failIfStrict + guardEnabled Ext_raw_tex rawLaTeXInline --- | Textile standard link syntax is "label":target -link :: GenParser Char ParserState Inline -link = try $ do +-- | Textile standard link syntax is "label":target. But we +-- can also have ["label":target]. +link :: Parser [Char] ParserState Inline +link = linkB <|> linkNoB + +linkNoB :: Parser [Char] ParserState Inline +linkNoB = try $ do + name <- surrounded (char '"') inline + char ':' + let stopChars = "!.,;:" + url <- manyTill nonspaceChar (lookAhead $ space <|> try (oneOf stopChars >> (space <|> newline))) + return $ Link name (url, "") + +linkB :: Parser [Char] ParserState Inline +linkB = try $ do + char '[' name <- surrounded (char '"') inline char ':' - url <- manyTill (anyChar) (lookAhead $ (space <|> try (oneOf ".;,:" >> (space <|> newline)))) + url <- manyTill nonspaceChar (char ']') return $ Link name (url, "") -- | Detect plain links to http or email. -autoLink :: GenParser Char ParserState Inline +autoLink :: Parser [Char] ParserState Inline autoLink = do (orig, src) <- (try uri <|> try emailAddress) return $ Link [Str orig] (src, "") -- | image embedding -image :: GenParser Char ParserState Inline +image :: Parser [Char] ParserState Inline image = try $ do char '!' >> notFollowedBy space src <- manyTill anyChar (lookAhead $ oneOf "!(") @@ -489,49 +507,49 @@ image = try $ do char '!' return $ Image [Str alt] (src, alt) -escapedInline :: GenParser Char ParserState Inline +escapedInline :: Parser [Char] ParserState Inline escapedInline = escapedEqs <|> escapedTag -escapedEqs :: GenParser Char ParserState Inline +escapedEqs :: Parser [Char] ParserState Inline escapedEqs = Str <$> (try $ string "==" *> manyTill anyChar (try $ string "==")) -- | literal text escaped btw <notextile> tags -escapedTag :: GenParser Char ParserState Inline +escapedTag :: Parser [Char] ParserState Inline escapedTag = Str <$> (try $ string "<notextile>" *> manyTill anyChar (try $ string "</notextile>")) -- | Any special symbol defined in wordBoundaries -symbol :: GenParser Char ParserState Inline +symbol :: Parser [Char] ParserState Inline symbol = Str . singleton <$> oneOf wordBoundaries -- | Inline code -code :: GenParser Char ParserState Inline +code :: Parser [Char] ParserState Inline code = code1 <|> code2 -code1 :: GenParser Char ParserState Inline +code1 :: Parser [Char] ParserState Inline code1 = Code nullAttr <$> surrounded (char '@') anyChar -code2 :: GenParser Char ParserState Inline +code2 :: Parser [Char] ParserState Inline code2 = do htmlTag (tagOpen (=="tt") null) Code nullAttr <$> manyTill anyChar (try $ htmlTag $ tagClose (=="tt")) -- | Html / CSS attributes -attributes :: GenParser Char ParserState String +attributes :: Parser [Char] ParserState String attributes = choice [ enclosed (char '(') (char ')') anyChar, enclosed (char '{') (char '}') anyChar, enclosed (char '[') (char ']') anyChar] -- | Parses material surrounded by a parser. -surrounded :: GenParser Char st t -- ^ surrounding parser - -> GenParser Char st a -- ^ content parser (to be used repeatedly) - -> GenParser Char st [a] +surrounded :: Parser [Char] st t -- ^ surrounding parser + -> Parser [Char] st a -- ^ content parser (to be used repeatedly) + -> Parser [Char] st [a] surrounded border = enclosed border (try border) -- | Inlines are most of the time of the same form -simpleInline :: GenParser Char ParserState t -- ^ surrounding parser +simpleInline :: Parser [Char] ParserState t -- ^ surrounding parser -> ([Inline] -> Inline) -- ^ Inline constructor - -> GenParser Char ParserState Inline -- ^ content parser (to be used repeatedly) + -> Parser [Char] ParserState Inline -- ^ content parser (to be used repeatedly) simpleInline border construct = surrounded border (inlineWithAttribute) >>= return . construct . normalizeSpaces where inlineWithAttribute = (try $ optional attributes) >> inline |