diff options
author | fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b> | 2008-07-11 16:33:21 +0000 |
---|---|---|
committer | fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b> | 2008-07-11 16:33:21 +0000 |
commit | 752adcd45aaabef53fe3ae8677a0547b31411765 (patch) | |
tree | 33e0433750fa37d19626830c26771781185cbf98 /Text | |
parent | 45044ff536f9f5156cb238366d90a8b03042d201 (diff) | |
download | pandoc-752adcd45aaabef53fe3ae8677a0547b31411765.tar.gz |
Added type signatures and fixed other -Wall warnings in Markdown reader.
git-svn-id: https://pandoc.googlecode.com/svn/trunk@1301 788f1e2b-df1e-0410-8736-df70ead52e1b
Diffstat (limited to 'Text')
-rw-r--r-- | Text/Pandoc/Readers/Markdown.hs | 217 |
1 files changed, 182 insertions, 35 deletions
diff --git a/Text/Pandoc/Readers/Markdown.hs b/Text/Pandoc/Readers/Markdown.hs index ef75a85f4..f702a6e33 100644 --- a/Text/Pandoc/Readers/Markdown.hs +++ b/Text/Pandoc/Readers/Markdown.hs @@ -47,30 +47,40 @@ import Text.ParserCombinators.Parsec -- | Read markdown from an input string and return a Pandoc document. readMarkdown :: ParserState -> String -> Pandoc -readMarkdown state str = (readWith parseMarkdown) state (str ++ "\n\n") +readMarkdown state s = (readWith parseMarkdown) state (s ++ "\n\n") -- -- Constants and data structure definitions -- +spaceChars :: [Char] spaceChars = " \t" + +bulletListMarkers :: [Char] bulletListMarkers = "*+-" + +hruleChars :: [Char] hruleChars = "*-_" + +setextHChars :: [Char] setextHChars = "=-" -- treat these as potentially non-text when parsing inline: +specialChars :: [Char] specialChars = "\\[]*_~`<>$!^-.&'\"\8216\8217\8220\8221" -- -- auxiliary functions -- +indentSpaces :: GenParser Char ParserState [Char] indentSpaces = try $ do state <- getState let tabStop = stateTabStop state try (count tabStop (char ' ')) <|> (many (char ' ') >> string "\t") <?> "indentation" +nonindentSpaces :: GenParser Char ParserState [Char] nonindentSpaces = do state <- getState let tabStop = stateTabStop state @@ -80,11 +90,13 @@ nonindentSpaces = do else unexpected "indented line" -- | Fail unless we're at beginning of a line. +failUnlessBeginningOfLine :: GenParser tok st () failUnlessBeginningOfLine = do pos <- getPosition if sourceColumn pos == 1 then return () else fail "not beginning of line" -- | Fail unless we're in "smart typography" mode. +failUnlessSmart :: GenParser tok ParserState () failUnlessSmart = do state <- getState if stateSmart state then return () else fail "Smart typography feature" @@ -109,8 +121,10 @@ inlinesInBalancedBrackets parser = try $ do -- document structure -- +titleLine :: GenParser Char ParserState [Inline] titleLine = try $ char '%' >> skipSpaces >> manyTill inline newline +authorsLine :: GenParser Char st [String] authorsLine = try $ do char '%' skipSpaces @@ -118,6 +132,7 @@ authorsLine = try $ do newline return $ map (decodeCharacterReferences . removeLeadingTrailingSpace) authors +dateLine :: GenParser Char st String dateLine = try $ do char '%' skipSpaces @@ -125,6 +140,7 @@ dateLine = try $ do newline return $ decodeCharacterReferences $ removeTrailingSpace date +titleBlock :: GenParser Char ParserState ([Inline], [String], [Char]) titleBlock = try $ do failIfStrict title <- option [] titleLine @@ -133,6 +149,7 @@ titleBlock = try $ do optional blanklines return (title, author, date) +parseMarkdown :: GenParser Char ParserState Pandoc parseMarkdown = do -- markdown allows raw HTML updateState (\state -> state { stateParseRaw = True }) @@ -149,9 +166,9 @@ parseMarkdown = do then return () else do docMinusNotes <- manyTill (noteBlock <|> lineClump) eof >>= return . concat - st <- getState - let reversedNotes = stateNotes st - updateState $ \st -> st { stateNotes = reverse reversedNotes } + st' <- getState + let reversedNotes = stateNotes st' + updateState $ \s -> s { stateNotes = reverse reversedNotes } setInput docMinusNotes setPosition startPos -- now parse it for real... @@ -163,10 +180,11 @@ parseMarkdown = do -- initial pass for references and notes -- +referenceKey :: GenParser Char ParserState [Char] referenceKey = try $ do startPos <- getPosition nonindentSpaces - label <- reference + lab <- reference char ':' skipSpaces optional (char '<') @@ -175,13 +193,14 @@ referenceKey = try $ do tit <- option "" referenceTitle blanklines endPos <- getPosition - let newkey = (label, (removeTrailingSpace src, tit)) + let newkey = (lab, (removeTrailingSpace src, tit)) st <- getState let oldkeys = stateKeys st - updateState $ \st -> st { stateKeys = newkey : oldkeys } + updateState $ \s -> s { stateKeys = newkey : oldkeys } -- return blanks so line count isn't affected return $ replicate (sourceLine endPos - sourceLine startPos) '\n' +referenceTitle :: GenParser Char st String referenceTitle = try $ do (many1 spaceChar >> option '\n' newline) <|> newline skipSpaces @@ -191,8 +210,10 @@ referenceTitle = try $ do notFollowedBy (noneOf ")\n"))) return $ decodeCharacterReferences tit +noteMarker :: GenParser Char st [Char] noteMarker = string "[^" >> manyTill (noneOf " \t\n") (char ']') +rawLine :: GenParser Char ParserState [Char] rawLine = do notFollowedBy blankline notFollowedBy' noteMarker @@ -200,8 +221,10 @@ rawLine = do end <- option "" (newline >> optional indentSpaces >> return "\n") return $ contents ++ end +rawLines :: GenParser Char ParserState [Char] rawLines = many1 rawLine >>= return . concat +noteBlock :: GenParser Char ParserState [Char] noteBlock = try $ do startPos <- getPosition ref <- noteMarker @@ -216,7 +239,7 @@ noteBlock = try $ do let newnote = (ref, contents) st <- getState let oldnotes = stateNotes st - updateState $ \st -> st { stateNotes = newnote : oldnotes } + updateState $ \s -> s { stateNotes = newnote : oldnotes } -- return blanks so line count isn't affected return $ replicate (sourceLine endPos - sourceLine startPos) '\n' @@ -224,8 +247,10 @@ noteBlock = try $ do -- parsing blocks -- +parseBlocks :: GenParser Char ParserState [Block] parseBlocks = manyTill block eof +block :: GenParser Char ParserState Block block = do st <- getState choice (if stateStrict st @@ -258,8 +283,10 @@ block = do -- header blocks -- +header :: GenParser Char ParserState Block header = atxHeader <|> setextHeader <?> "header" +atxHeader :: GenParser Char ParserState Block atxHeader = try $ do level <- many1 (char '#') >>= return . length notFollowedBy (char '.' <|> char ')') -- this would be a list @@ -267,8 +294,10 @@ atxHeader = try $ do text <- manyTill inline atxClosing >>= return . normalizeSpaces return $ Header level text +atxClosing :: GenParser Char st [Char] atxClosing = try $ skipMany (char '#') >> blanklines +setextHeader :: GenParser Char ParserState Block setextHeader = try $ do text <- many1Till inline newline underlineChar <- oneOf setextHChars @@ -281,6 +310,7 @@ setextHeader = try $ do -- hrule block -- +hrule :: GenParser Char st Block hrule = try $ do skipSpaces start <- oneOf hruleChars @@ -294,8 +324,11 @@ hrule = try $ do -- code blocks -- +indentedLine :: GenParser Char ParserState [Char] indentedLine = indentSpaces >> manyTill anyChar newline >>= return . (++ "\n") +codeBlockDelimiter :: Maybe Int + -> GenParser Char st (Int, ([Char], [[Char]], [([Char], [Char])])) codeBlockDelimiter len = try $ do size <- case len of Just l -> count l (char '~') >> many (char '~') >> return l @@ -306,32 +339,38 @@ codeBlockDelimiter len = try $ do blankline return (size, attr) +attributes :: GenParser Char st ([Char], [[Char]], [([Char], [Char])]) attributes = try $ do char '{' many spaceChar attrs <- many (attribute >>~ many spaceChar) char '}' let (ids, classes, keyvals) = unzip3 attrs - let id = if null ids then "" else head ids - return (id, concat classes, concat keyvals) + let id' = if null ids then "" else head ids + return (id', concat classes, concat keyvals) +attribute :: GenParser Char st ([Char], [[Char]], [([Char], [Char])]) attribute = identifierAttr <|> classAttr <|> keyValAttr +identifier :: GenParser Char st [Char] identifier = do first <- letter rest <- many alphaNum return (first:rest) +identifierAttr :: GenParser Char st ([Char], [a], [a1]) identifierAttr = try $ do char '#' result <- identifier return (result,[],[]) +classAttr :: GenParser Char st ([Char], [[Char]], [a]) classAttr = try $ do char '.' result <- identifier return ("",[result],[]) +keyValAttr :: GenParser Char st ([Char], [a], [([Char], [Char])]) keyValAttr = try $ do key <- identifier char '=' @@ -339,12 +378,14 @@ keyValAttr = try $ do val <- manyTill (noneOf "\n") (char '"') return ("",[],[(key,val)]) +codeBlockDelimited :: GenParser Char st Block codeBlockDelimited = try $ do (size, attr) <- codeBlockDelimiter Nothing contents <- manyTill anyLine (codeBlockDelimiter (Just size)) blanklines return $ CodeBlock attr $ joinWithSep "\n" contents +codeBlockIndented :: GenParser Char ParserState Block codeBlockIndented = do contents <- many1 (indentedLine <|> try (do b <- blanklines @@ -357,8 +398,10 @@ codeBlockIndented = do -- block quotes -- +emailBlockQuoteStart :: GenParser Char ParserState Char emailBlockQuoteStart = try $ nonindentSpaces >> char '>' >>~ optional (char ' ') +emailBlockQuote :: GenParser Char ParserState [[Char]] emailBlockQuote = try $ do emailBlockQuoteStart raw <- sepBy (many (nonEndline <|> @@ -369,6 +412,7 @@ emailBlockQuote = try $ do optional blanklines return raw +blockQuote :: GenParser Char ParserState Block blockQuote = do raw <- emailBlockQuote -- parse the extracted block, which may contain various block elements: @@ -379,6 +423,7 @@ blockQuote = do -- list blocks -- +bulletListStart :: GenParser Char ParserState () bulletListStart = try $ do optional newline -- if preceded by a Plain block in a list context nonindentSpaces @@ -387,6 +432,7 @@ bulletListStart = try $ do spaceChar skipSpaces +anyOrderedListStart :: GenParser Char ParserState (Int, ListNumberStyle, ListNumberDelim) anyOrderedListStart = try $ do optional newline -- if preceded by a Plain block in a list context nonindentSpaces @@ -399,6 +445,9 @@ anyOrderedListStart = try $ do return (1, DefaultStyle, DefaultDelim) else anyOrderedListMarker >>~ spaceChar +orderedListStart :: ListNumberStyle + -> ListNumberDelim + -> GenParser Char ParserState () orderedListStart style delim = try $ do optional newline -- if preceded by a Plain block in a list context nonindentSpaces @@ -415,6 +464,8 @@ orderedListStart style delim = try $ do skipSpaces -- parse a line of a list item (start = parser for beginning of list item) +listLine :: GenParser Char ParserState () + -> GenParser Char ParserState [Char] listLine start = try $ do notFollowedBy' start notFollowedBy blankline @@ -425,6 +476,8 @@ listLine start = try $ do return $ line ++ "\n" -- parse raw text for one list item, excluding start marker and continuations +rawListItem :: GenParser Char ParserState () + -> GenParser Char ParserState [Char] rawListItem start = try $ do start result <- many1 (listLine start) @@ -434,12 +487,15 @@ rawListItem start = try $ do -- continuation of a list item - indented and separated by blankline -- or (in compact lists) endline. -- note: nested lists are parsed as continuations +listContinuation :: GenParser Char ParserState () -> GenParser Char ParserState [Char] listContinuation start = try $ do lookAhead indentSpaces result <- many1 (listContinuationLine start) blanks <- many blankline return $ concat result ++ blanks +listContinuationLine :: GenParser Char ParserState () + -> GenParser Char ParserState [Char] listContinuationLine start = try $ do notFollowedBy blankline notFollowedBy' start @@ -447,6 +503,8 @@ listContinuationLine start = try $ do result <- manyTill anyChar newline return $ result ++ "\n" +listItem :: GenParser Char ParserState () + -> GenParser Char ParserState [Block] listItem start = try $ do first <- rawListItem start continuations <- many (listContinuation start) @@ -462,16 +520,19 @@ listItem start = try $ do updateState (\st -> st {stateParserContext = oldContext}) return contents +orderedList :: GenParser Char ParserState Block orderedList = try $ do (start, style, delim) <- lookAhead anyOrderedListStart items <- many1 (listItem (orderedListStart style delim)) return $ OrderedList (start, style, delim) $ compactify items +bulletList :: GenParser Char ParserState Block bulletList = many1 (listItem bulletListStart) >>= return . BulletList . compactify -- definition lists +definitionListItem :: GenParser Char ParserState ([Inline], [Block]) definitionListItem = try $ do notFollowedBy blankline notFollowedBy' indentSpaces @@ -486,6 +547,7 @@ definitionListItem = try $ do updateState (\st -> st {stateParserContext = oldContext}) return ((normalizeSpaces term), contents) +defRawBlock :: GenParser Char ParserState [Char] defRawBlock = try $ do char ':' state <- getState @@ -496,6 +558,7 @@ defRawBlock = try $ do trailing <- option "" blanklines return $ firstline ++ "\n" ++ unlines rawlines ++ trailing +definitionList :: GenParser Char ParserState Block definitionList = do items <- many1 definitionListItem let (terms, defs) = unzip items @@ -507,11 +570,13 @@ definitionList = do -- paragraph block -- +isHtmlOrBlank :: Inline -> Bool isHtmlOrBlank (HtmlInline _) = True -isHtmlOrBlank (Space) = True -isHtmlOrBlank (LineBreak) = True -isHtmlOrBlank _ = False +isHtmlOrBlank (Space) = True +isHtmlOrBlank (LineBreak) = True +isHtmlOrBlank _ = False +para :: GenParser Char ParserState Block para = try $ do result <- many1 inline if all isHtmlOrBlank result @@ -524,14 +589,17 @@ para = try $ do else pzero return $ Para $ normalizeSpaces result +plain :: GenParser Char ParserState Block plain = many1 inline >>= return . Plain . normalizeSpaces -- -- raw html -- +htmlElement :: GenParser Char ParserState [Char] htmlElement = strictHtmlBlock <|> htmlBlockElement <?> "html element" +htmlBlock :: GenParser Char ParserState Block htmlBlock = try $ do failUnlessBeginningOfLine first <- htmlElement @@ -540,9 +608,11 @@ htmlBlock = try $ do return $ RawHtml $ first ++ finalSpace ++ finalNewlines -- True if tag is self-closing +isSelfClosing :: [Char] -> Bool isSelfClosing tag = isSuffixOf "/>" $ filter (not . (`elem` " \n\t")) tag +strictHtmlBlock :: GenParser Char ParserState [Char] strictHtmlBlock = try $ do tag <- anyHtmlBlockTag let tag' = extractTagType tag @@ -553,6 +623,7 @@ strictHtmlBlock = try $ do end <- htmlEndTag tag' return $ tag ++ concat contents ++ end +rawHtmlBlocks :: GenParser Char ParserState Block rawHtmlBlocks = do htmlBlocks <- many1 $ do (RawHtml blk) <- rawHtmlBlock sps <- do sp1 <- many spaceChar @@ -574,6 +645,8 @@ rawHtmlBlocks = do -- Parse a dashed line with optional trailing spaces; return its length -- and the length including trailing space. +dashedLine :: Char + -> GenParser Char st (Int, Int) dashedLine ch = do dashes <- many1 (char ch) sp <- many spaceChar @@ -581,24 +654,29 @@ dashedLine ch = do -- Parse a table header with dashed lines of '-' preceded by -- one line of text. +simpleTableHeader :: GenParser Char ParserState ([[Char]], [Alignment], [Int]) simpleTableHeader = try $ do rawContent <- anyLine initSp <- nonindentSpaces dashes <- many1 (dashedLine '-') newline - let (lengths, lines) = unzip dashes - let indices = scanl (+) (length initSp) lines + let (lengths, lines') = unzip dashes + let indices = scanl (+) (length initSp) lines' let rawHeads = tail $ splitByIndices (init indices) rawContent let aligns = zipWith alignType (map (\a -> [a]) rawHeads) lengths return (rawHeads, aligns, indices) -- Parse a table footer - dashed lines followed by blank line. +tableFooter :: GenParser Char ParserState [Char] tableFooter = try $ nonindentSpaces >> many1 (dashedLine '-') >> blanklines -- Parse a table separator - dashed line. +tableSep :: GenParser Char ParserState String tableSep = try $ nonindentSpaces >> many1 (dashedLine '-') >> string "\n" -- Parse a raw line and split it into chunks by indices. +rawTableLine :: [Int] + -> GenParser Char ParserState [String] rawTableLine indices = do notFollowedBy' (blanklines <|> tableFooter) line <- many1Till anyChar newline @@ -606,9 +684,13 @@ rawTableLine indices = do splitByIndices (init indices) line -- Parse a table line and return a list of lists of blocks (columns). +tableLine :: [Int] + -> GenParser Char ParserState [[Block]] tableLine indices = rawTableLine indices >>= mapM (parseFromString (many plain)) -- Parse a multiline table row and return a list of blocks (columns). +multilineRow :: [Int] + -> GenParser Char ParserState [[Block]] multilineRow indices = do colLines <- many1 (rawTableLine indices) optional blanklines @@ -631,6 +713,7 @@ widthsFromIndices numColumns indices = -- Parses a table caption: inlines beginning with 'Table:' -- and followed by blank lines. +tableCaption :: GenParser Char ParserState [Inline] tableCaption = try $ do nonindentSpaces string "Table:" @@ -639,33 +722,40 @@ tableCaption = try $ do return $ normalizeSpaces result -- Parse a table using 'headerParser', 'lineParser', and 'footerParser'. +tableWith :: GenParser Char ParserState ([[Char]], [Alignment], [Int]) + -> ([Int] -> GenParser Char ParserState [[Block]]) + -> GenParser Char ParserState end + -> GenParser Char ParserState Block tableWith headerParser lineParser footerParser = try $ do (rawHeads, aligns, indices) <- headerParser - lines <- many1Till (lineParser indices) footerParser + lines' <- many1Till (lineParser indices) footerParser caption <- option [] tableCaption heads <- mapM (parseFromString (many plain)) rawHeads state <- getState let numColumns = stateColumns state let widths = widthsFromIndices numColumns indices - return $ Table caption aligns widths heads lines + return $ Table caption aligns widths heads lines' -- Parse a simple table with '---' header and one line per row. +simpleTable :: GenParser Char ParserState Block simpleTable = tableWith simpleTableHeader tableLine blanklines -- Parse a multiline table: starts with row of '-' on top, then header -- (which may be multiline), then the rows, -- which may be multiline, separated by blank lines, and -- ending with a footer (dashed line followed by blank line). +multilineTable :: GenParser Char ParserState Block multilineTable = tableWith multilineTableHeader multilineRow tableFooter +multilineTableHeader :: GenParser Char ParserState ([String], [Alignment], [Int]) multilineTableHeader = try $ do tableSep rawContent <- many1 (notFollowedBy' tableSep >> many1Till anyChar newline) initSp <- nonindentSpaces dashes <- many1 (dashedLine '-') newline - let (lengths, lines) = unzip dashes - let indices = scanl (+) (length initSp) lines + let (lengths, lines') = unzip dashes + let indices = scanl (+) (length initSp) lines' let rawHeadsList = transpose $ map (\ln -> tail $ splitByIndices (init indices) ln) rawContent @@ -676,27 +766,32 @@ multilineTableHeader = try $ do -- Returns an alignment type for a table, based on a list of strings -- (the rows of the column header) and a number (the length of the -- dashed line under the rows. -alignType :: [String] -> Int -> Alignment -alignType [] len = AlignDefault +alignType :: [String] + -> Int + -> Alignment +alignType [] _ = AlignDefault alignType strLst len = - let str = head $ sortBy (comparing length) $ + let s = head $ sortBy (comparing length) $ map removeTrailingSpace strLst - leftSpace = if null str then False else (str !! 0) `elem` " \t" - rightSpace = length str < len || (str !! (len - 1)) `elem` " \t" + leftSpace = if null s then False else (s !! 0) `elem` " \t" + rightSpace = length s < len || (s !! (len - 1)) `elem` " \t" in case (leftSpace, rightSpace) of (True, False) -> AlignRight (False, True) -> AlignLeft (True, True) -> AlignCenter (False, False) -> AlignDefault +table :: GenParser Char ParserState Block table = simpleTable <|> multilineTable <?> "table" -- -- inline -- +inline :: GenParser Char ParserState Inline inline = choice inlineParsers <?> "inline" +inlineParsers :: [GenParser Char ParserState Inline] inlineParsers = [ abbrev , str , smartPunctuation @@ -721,13 +816,16 @@ inlineParsers = [ abbrev , symbol , ltSign ] +inlineNonLink :: GenParser Char ParserState Inline inlineNonLink = (choice $ map (\parser -> try (parser >>= failIfLink)) inlineParsers) <?> "inline (non-link)" +failIfLink :: Inline -> GenParser tok st Inline failIfLink (Link _ _) = pzero -failIfLink elt = return elt +failIfLink elt = return elt +escapedChar :: GenParser Char ParserState Inline escapedChar = do char '\\' state <- getState @@ -739,6 +837,7 @@ escapedChar = do else result return $ Str [result'] +ltSign :: GenParser Char ParserState Inline ltSign = do st <- getState if stateStrict st @@ -746,13 +845,16 @@ ltSign = do else notFollowedBy' rawHtmlBlocks >> char '<' -- unless it starts html return $ Str ['<'] +specialCharsMinusLt :: [Char] specialCharsMinusLt = filter (/= '<') specialChars +symbol :: GenParser Char ParserState Inline symbol = do result <- oneOf specialCharsMinusLt return $ Str [result] -- parses inline code, between n `s and n `s +code :: GenParser Char ParserState Inline code = try $ do starts <- many1 (char '`') skipSpaces @@ -762,43 +864,52 @@ code = try $ do notFollowedBy (char '`'))) return $ Code $ removeLeadingTrailingSpace $ concat result +mathWord :: GenParser Char st [Char] mathWord = many1 ((noneOf " \t\n\\$") <|> (try (char '\\') >>~ notFollowedBy (char '$'))) +math :: GenParser Char ParserState Inline math = try $ do failIfStrict char '$' notFollowedBy space - words <- sepBy1 mathWord (many1 space) + words' <- sepBy1 mathWord (many1 space) char '$' - return $ Math $ joinWithSep " " words + return $ Math $ joinWithSep " " words' +emph :: GenParser Char ParserState Inline emph = ((enclosed (char '*') (notFollowedBy' strong >> char '*') inline) <|> (enclosed (char '_') (notFollowedBy' strong >> char '_' >> notFollowedBy alphaNum) inline)) >>= return . Emph . normalizeSpaces +strong :: GenParser Char ParserState Inline strong = ((enclosed (string "**") (try $ string "**") inline) <|> (enclosed (string "__") (try $ string "__") inline)) >>= return . Strong . normalizeSpaces +strikeout :: GenParser Char ParserState Inline strikeout = failIfStrict >> enclosed (string "~~") (try $ string "~~") inline >>= return . Strikeout . normalizeSpaces +superscript :: GenParser Char ParserState Inline superscript = failIfStrict >> enclosed (char '^') (char '^') (notFollowedBy' whitespace >> inline) >>= -- may not contain Space return . Superscript +subscript :: GenParser Char ParserState Inline subscript = failIfStrict >> enclosed (char '~') (char '~') (notFollowedBy' whitespace >> inline) >>= -- may not contain Space return . Subscript +abbrev :: GenParser Char ParserState Inline abbrev = failUnlessSmart >> (assumedAbbrev <|> knownAbbrev) >>= return . Str . (++ ".\160") -- an string of letters followed by a period that does not end a sentence -- is assumed to be an abbreviation. It is assumed that sentences don't -- start with lowercase letters or numerals. +assumedAbbrev :: GenParser Char ParserState [Char] assumedAbbrev = try $ do result <- many1 $ satisfy isAlpha string ". " @@ -807,20 +918,27 @@ assumedAbbrev = try $ do -- these strings are treated as abbreviations even if they are followed -- by a capital letter (such as a name). +knownAbbrev :: GenParser Char ParserState [Char] knownAbbrev = try $ do result <- oneOfStrings [ "Mr", "Mrs", "Ms", "Capt", "Dr", "Prof", "Gen", "Gov", "e.g", "i.e", "Sgt", "St", "vol", "vs", "Sen", "Rep", "Pres", "Hon", "Rev" ] string ". " return result - + +smartPunctuation :: GenParser Char ParserState Inline smartPunctuation = failUnlessSmart >> choice [ quoted, apostrophe, dash, ellipses ] +apostrophe :: GenParser Char ParserState Inline apostrophe = (char '\'' <|> char '\8217') >> return Apostrophe +quoted :: GenParser Char ParserState Inline quoted = doubleQuoted <|> singleQuoted +withQuoteContext :: QuoteContext + -> (GenParser Char ParserState Inline) + -> GenParser Char ParserState Inline withQuoteContext context parser = do oldState <- getState let oldQuoteContext = stateQuoteContext oldState @@ -830,22 +948,26 @@ withQuoteContext context parser = do setState newState { stateQuoteContext = oldQuoteContext } return result +singleQuoted :: GenParser Char ParserState Inline singleQuoted = try $ do singleQuoteStart withQuoteContext InSingleQuote $ many1Till inline singleQuoteEnd >>= return . Quoted SingleQuote . normalizeSpaces +doubleQuoted :: GenParser Char ParserState Inline doubleQuoted = try $ do doubleQuoteStart withQuoteContext InDoubleQuote $ many1Till inline doubleQuoteEnd >>= return . Quoted DoubleQuote . normalizeSpaces +failIfInQuoteContext :: QuoteContext -> GenParser tok ParserState () failIfInQuoteContext context = do st <- getState if stateQuoteContext st == context then fail "already inside quotes" else return () +singleQuoteStart :: GenParser Char ParserState Char singleQuoteStart = do failIfInQuoteContext InSingleQuote char '\8216' <|> @@ -856,11 +978,13 @@ singleQuoteStart = do -- possess/contraction return '\'') +singleQuoteEnd :: GenParser Char st Char singleQuoteEnd = try $ do char '\8217' <|> char '\'' notFollowedBy alphaNum return '\'' +doubleQuoteStart :: GenParser Char ParserState Char doubleQuoteStart = do failIfInQuoteContext InDoubleQuote char '\8220' <|> @@ -868,29 +992,39 @@ doubleQuoteStart = do notFollowedBy (oneOf " \t\n") return '"') +doubleQuoteEnd :: GenParser Char st Char doubleQuoteEnd = char '\8221' <|> char '"' +ellipses :: GenParser Char st Inline ellipses = oneOfStrings ["...", " . . . ", ". . .", " . . ."] >> return Ellipses +dash :: GenParser Char st Inline dash = enDash <|> emDash +enDash :: GenParser Char st Inline enDash = try $ char '-' >> notFollowedBy (noneOf "0123456789") >> return EnDash +emDash :: GenParser Char st Inline emDash = oneOfStrings ["---", "--"] >> return EmDash +whitespace :: GenParser Char ParserState Inline whitespace = do sps <- many1 (oneOf spaceChars) if length sps >= 2 then option Space (endline >> return LineBreak) else return Space <?> "whitespace" +nonEndline :: GenParser Char st Char nonEndline = satisfy (/='\n') +strChar :: GenParser Char st Char strChar = noneOf (specialChars ++ spaceChars ++ "\n") +str :: GenParser Char st Inline str = many1 strChar >>= return . Str -- an endline character that can be treated as a space, not a structural break +endline :: GenParser Char ParserState Inline endline = try $ do newline notFollowedBy blankline @@ -911,11 +1045,13 @@ endline = try $ do -- -- a reference label for a link +reference :: GenParser Char ParserState [Inline] reference = do notFollowedBy' (string "[^") -- footnote reference result <- inlinesInBalancedBrackets inlineNonLink return $ normalizeSpaces result -- source for a link, with optional title +source :: GenParser Char st (String, [Char]) source = (try $ charsInBalanced '(' ')' >>= parseFromString source') <|> -- the following is needed for cases like: [ref](/url(a). @@ -923,6 +1059,7 @@ source = parseFromString source') -- auxiliary function for source +source' :: GenParser Char st (String, [Char]) source' = do skipSpaces src <- try (char '<' >> @@ -934,6 +1071,7 @@ source' = do eof return (removeTrailingSpace src, tit) +linkTitle :: GenParser Char st String linkTitle = try $ do (many1 spaceChar >> option '\n' newline) <|> newline skipSpaces @@ -942,24 +1080,28 @@ linkTitle = try $ do (try (char delim >> skipSpaces >> eof)) return $ decodeCharacterReferences tit +link :: GenParser Char ParserState Inline link = try $ do - label <- reference - src <- source <|> referenceLink label + lab <- reference + src <- source <|> referenceLink lab sanitize <- getState >>= return . stateSanitizeHTML if sanitize && unsanitaryURI (fst src) then fail "Unsanitary URI" - else return $ Link label src + else return $ Link lab src -- a link like [this][ref] or [this][] or [this] -referenceLink label = do +referenceLink :: [Inline] + -> GenParser Char ParserState (String, [Char]) +referenceLink lab = do ref <- option [] (try (optional (char ' ') >> optional (newline >> skipSpaces) >> reference)) - let ref' = if null ref then label else ref + let ref' = if null ref then lab else ref state <- getState case lookupKeySrc (stateKeys state) ref' of Nothing -> fail "no corresponding key" Just target -> return target +autoLink :: GenParser Char ParserState Inline autoLink = try $ do char '<' src <- uri <|> (emailAddress >>= (return . ("mailto:" ++))) @@ -975,11 +1117,13 @@ autoLink = try $ do then Link [Str src'] (src, "") else Link [Code src'] (src, "") +image :: GenParser Char ParserState Inline image = try $ do char '!' - (Link label src) <- link - return $ Image label src + (Link lab src) <- link + return $ Image lab src +note :: GenParser Char ParserState Inline note = try $ do failIfStrict ref <- noteMarker @@ -989,14 +1133,17 @@ note = try $ do Nothing -> fail "note not found" Just contents -> return $ Note contents +inlineNote :: GenParser Char ParserState Inline inlineNote = try $ do failIfStrict char '^' contents <- inlinesInBalancedBrackets inline return $ Note [Para contents] +rawLaTeXInline' :: GenParser Char ParserState Inline rawLaTeXInline' = failIfStrict >> rawLaTeXInline +rawHtmlInline' :: GenParser Char ParserState Inline rawHtmlInline' = do st <- getState result <- if stateStrict st |