From 9a67a486c2dc98d14d9687ceb4b01befa09114df Mon Sep 17 00:00:00 2001 From: fiddlosopher Date: Thu, 29 Nov 2007 02:02:34 +0000 Subject: Moved everything from src into the top-level directory. git-svn-id: https://pandoc.googlecode.com/svn/trunk@1104 788f1e2b-df1e-0410-8736-df70ead52e1b --- src/Text/Pandoc/Readers/Markdown.hs | 916 ------------------------------------ 1 file changed, 916 deletions(-) delete mode 100644 src/Text/Pandoc/Readers/Markdown.hs (limited to 'src/Text/Pandoc/Readers/Markdown.hs') diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs deleted file mode 100644 index ded9f2136..000000000 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ /dev/null @@ -1,916 +0,0 @@ -{- -Copyright (C) 2006-7 John MacFarlane - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Readers.Markdown - Copyright : Copyright (C) 2006-7 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane - Stability : alpha - Portability : portable - -Conversion of markdown-formatted plain text to 'Pandoc' document. --} -module Text.Pandoc.Readers.Markdown ( - readMarkdown - ) where - -import Data.List ( transpose, isPrefixOf, isSuffixOf, lookup, sortBy, findIndex ) -import Data.Ord ( comparing ) -import Data.Char ( isAlphaNum ) -import Data.Maybe ( fromMaybe ) -import Network.URI ( isURI ) -import Text.Pandoc.Definition -import Text.Pandoc.Shared -import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXEnvironment ) -import Text.Pandoc.Readers.HTML ( rawHtmlBlock, anyHtmlBlockTag, - anyHtmlInlineTag, anyHtmlTag, - anyHtmlEndTag, htmlEndTag, extractTagType, - htmlBlockElement ) -import Text.Pandoc.CharacterReferences ( decodeCharacterReferences ) -import Text.ParserCombinators.Parsec - --- | Read markdown from an input string and return a Pandoc document. -readMarkdown :: ParserState -> String -> Pandoc -readMarkdown state str = (readWith parseMarkdown) state (str ++ "\n\n") - --- --- Constants and data structure definitions --- - -spaceChars = " \t" -bulletListMarkers = "*+-" -hruleChars = "*-_" -setextHChars = "=-" - --- treat these as potentially non-text when parsing inline: -specialChars = "\\[]*_~`<>$!^-.&'\"\8216\8217\8220\8221" - --- --- auxiliary functions --- - -indentSpaces = try $ do - state <- getState - let tabStop = stateTabStop state - try (count tabStop (char ' ')) <|> - (many (char ' ') >> string "\t") "indentation" - -nonindentSpaces = do - state <- getState - let tabStop = stateTabStop state - sps <- many (char ' ') - if length sps < tabStop - then return sps - else unexpected "indented line" - --- | Fail unless we're at beginning of a line. -failUnlessBeginningOfLine = do - pos <- getPosition - if sourceColumn pos == 1 then return () else fail "not beginning of line" - --- | Fail unless we're in "smart typography" mode. -failUnlessSmart = do - state <- getState - if stateSmart state then return () else fail "Smart typography feature" - --- | Parse an inline Str element with a given content. -inlineString str = try $ do - (Str res) <- inline - if res == str then return res else fail $ "unexpected Str content" - --- | Parse a sequence of inline elements between a string --- @opener@ and a string @closer@, including inlines --- between balanced pairs of @opener@ and a @closer@. -inlinesInBalanced :: String -> String -> GenParser Char ParserState [Inline] -inlinesInBalanced opener closer = try $ do - string opener - result <- manyTill ( (do lookAhead (inlineString opener) - -- because it might be a link... - bal <- inlinesInBalanced opener closer - return $ [Str opener] ++ bal ++ [Str closer]) - <|> (count 1 inline)) - (try (string closer)) - return $ concat result - --- --- document structure --- - -titleLine = try $ char '%' >> skipSpaces >> manyTill inline newline - -authorsLine = try $ do - char '%' - skipSpaces - authors <- sepEndBy (many1 (noneOf ",;\n")) (oneOf ",;") - newline - return $ map (decodeCharacterReferences . removeLeadingTrailingSpace) authors - -dateLine = try $ do - char '%' - skipSpaces - date <- many (noneOf "\n") - newline - return $ decodeCharacterReferences $ removeTrailingSpace date - -titleBlock = try $ do - failIfStrict - title <- option [] titleLine - author <- option [] authorsLine - date <- option "" dateLine - optional blanklines - return (title, author, date) - -parseMarkdown = do - -- markdown allows raw HTML - updateState (\state -> state { stateParseRaw = True }) - startPos <- getPosition - -- go through once just to get list of reference keys - -- docMinusKeys is the raw document with blanks where the keys were... - docMinusKeys <- manyTill (referenceKey <|> lineClump) eof >>= - return . concat - setInput docMinusKeys - setPosition startPos - st <- getState - -- go through again for notes unless strict... - if stateStrict st - then return () - else do docMinusNotes <- manyTill (noteBlock <|> lineClump) eof >>= - return . concat - st <- getState - let reversedNotes = stateNotes st - updateState $ \st -> st { stateNotes = reverse reversedNotes } - setInput docMinusNotes - setPosition startPos - -- now parse it for real... - (title, author, date) <- option ([],[],"") titleBlock - blocks <- parseBlocks - return $ Pandoc (Meta title author date) $ filter (/= Null) blocks - --- --- initial pass for references and notes --- - -referenceKey = try $ do - startPos <- getPosition - nonindentSpaces - label <- reference - char ':' - skipSpaces - optional (char '<') - src <- many (noneOf "> \n\t") - optional (char '>') - tit <- option "" referenceTitle - blanklines - endPos <- getPosition - let newkey = (label, (removeTrailingSpace src, tit)) - st <- getState - let oldkeys = stateKeys st - updateState $ \st -> st { stateKeys = newkey : oldkeys } - -- return blanks so line count isn't affected - return $ replicate (sourceLine endPos - sourceLine startPos) '\n' - -referenceTitle = try $ do - (many1 spaceChar >> option '\n' newline) <|> newline - skipSpaces - tit <- (charsInBalanced '(' ')' >>= return . unwords . words) - <|> do delim <- char '\'' <|> char '"' - manyTill anyChar (try (char delim >> skipSpaces >> - notFollowedBy (noneOf ")\n"))) - return $ decodeCharacterReferences tit - -noteMarker = string "[^" >> manyTill (noneOf " \t\n") (char ']') - -rawLine = do - notFollowedBy blankline - notFollowedBy' noteMarker - contents <- many1 nonEndline - end <- option "" (newline >> optional indentSpaces >> return "\n") - return $ contents ++ end - -rawLines = many1 rawLine >>= return . concat - -noteBlock = try $ do - startPos <- getPosition - ref <- noteMarker - char ':' - optional blankline - optional indentSpaces - raw <- sepBy rawLines (try (blankline >> indentSpaces)) - optional blanklines - endPos <- getPosition - -- parse the extracted text, which may contain various block elements: - contents <- parseFromString parseBlocks $ (joinWithSep "\n" raw) ++ "\n\n" - let newnote = (ref, contents) - st <- getState - let oldnotes = stateNotes st - updateState $ \st -> st { stateNotes = newnote : oldnotes } - -- return blanks so line count isn't affected - return $ replicate (sourceLine endPos - sourceLine startPos) '\n' - --- --- parsing blocks --- - -parseBlocks = manyTill block eof - -block = choice [ header - , table - , codeBlock - , hrule - , list - , blockQuote - , htmlBlock - , rawLaTeXEnvironment' - , para - , plain - , nullBlock ] "block" - --- --- header blocks --- - -header = atxHeader <|> setextHeader "header" - -atxHeader = try $ do - level <- many1 (char '#') >>= return . length - notFollowedBy (char '.' <|> char ')') -- this would be a list - skipSpaces - text <- manyTill inline atxClosing >>= return . normalizeSpaces - return $ Header level text - -atxClosing = try $ skipMany (char '#') >> blanklines - -setextHeader = try $ do - text <- many1Till inline newline - underlineChar <- oneOf setextHChars - many (char underlineChar) - blanklines - let level = (fromMaybe 0 $ findIndex (== underlineChar) setextHChars) + 1 - return $ Header level (normalizeSpaces text) - --- --- hrule block --- - -hrule = try $ do - skipSpaces - start <- oneOf hruleChars - count 2 (skipSpaces >> char start) - skipMany (skipSpaces >> char start) - newline - optional blanklines - return HorizontalRule - --- --- code blocks --- - -indentedLine = indentSpaces >> manyTill anyChar newline >>= return . (++ "\n") - -codeBlock = do - contents <- many1 (indentedLine <|> - try (do b <- blanklines - l <- indentedLine - return $ b ++ l)) - optional blanklines - return $ CodeBlock $ stripTrailingNewlines $ concat contents - --- --- block quotes --- - -emacsBoxQuote = try $ do - failIfStrict - string ",----" - manyTill anyChar newline - raw <- manyTill - (try (char '|' >> optional (char ' ') >> manyTill anyChar newline)) - (try (string "`----")) - blanklines - return raw - -emailBlockQuoteStart = try $ nonindentSpaces >> char '>' >>~ optional (char ' ') - -emailBlockQuote = try $ do - emailBlockQuoteStart - raw <- sepBy (many (nonEndline <|> - (try (endline >> notFollowedBy emailBlockQuoteStart >> - return '\n')))) - (try (newline >> emailBlockQuoteStart)) - newline <|> (eof >> return '\n') - optional blanklines - return raw - -blockQuote = do - raw <- emailBlockQuote <|> emacsBoxQuote - -- parse the extracted block, which may contain various block elements: - contents <- parseFromString parseBlocks $ (joinWithSep "\n" raw) ++ "\n\n" - return $ BlockQuote contents - --- --- list blocks --- - -list = choice [ bulletList, orderedList, definitionList ] "list" - -bulletListStart = try $ do - optional newline -- if preceded by a Plain block in a list context - nonindentSpaces - notFollowedBy' hrule -- because hrules start out just like lists - oneOf bulletListMarkers - spaceChar - skipSpaces - -anyOrderedListStart = try $ do - optional newline -- if preceded by a Plain block in a list context - nonindentSpaces - notFollowedBy $ string "p." >> spaceChar >> digit -- page number - state <- getState - if stateStrict state - then do many1 digit - char '.' - spaceChar - return (1, DefaultStyle, DefaultDelim) - else anyOrderedListMarker >>~ spaceChar - -orderedListStart style delim = try $ do - optional newline -- if preceded by a Plain block in a list context - nonindentSpaces - state <- getState - num <- if stateStrict state - then do many1 digit - char '.' - return 1 - else orderedListMarker style delim - if delim == Period && (style == UpperAlpha || (style == UpperRoman && - num `elem` [1, 5, 10, 50, 100, 500, 1000])) - then char '\t' <|> (spaceChar >> spaceChar) - else spaceChar - skipSpaces - --- parse a line of a list item (start = parser for beginning of list item) -listLine start = try $ do - notFollowedBy' start - notFollowedBy blankline - notFollowedBy' (do indentSpaces - many (spaceChar) - bulletListStart <|> (anyOrderedListStart >> return ())) - line <- manyTill anyChar newline - return $ line ++ "\n" - --- parse raw text for one list item, excluding start marker and continuations -rawListItem start = try $ do - start - result <- many1 (listLine start) - blanks <- many blankline - return $ concat result ++ blanks - --- continuation of a list item - indented and separated by blankline --- or (in compact lists) endline. --- note: nested lists are parsed as continuations -listContinuation start = try $ do - lookAhead indentSpaces - result <- many1 (listContinuationLine start) - blanks <- many blankline - return $ concat result ++ blanks - -listContinuationLine start = try $ do - notFollowedBy blankline - notFollowedBy' start - optional indentSpaces - result <- manyTill anyChar newline - return $ result ++ "\n" - -listItem start = try $ do - first <- rawListItem start - continuations <- many (listContinuation start) - -- parsing with ListItemState forces markers at beginning of lines to - -- count as list item markers, even if not separated by blank space. - -- see definition of "endline" - state <- getState - let oldContext = stateParserContext state - setState $ state {stateParserContext = ListItemState} - -- parse the extracted block, which may contain various block elements: - let raw = concat (first:continuations) - contents <- parseFromString parseBlocks raw - updateState (\st -> st {stateParserContext = oldContext}) - return contents - -orderedList = try $ do - (start, style, delim) <- lookAhead anyOrderedListStart - items <- many1 (listItem (orderedListStart style delim)) - return $ OrderedList (start, style, delim) $ compactify items - -bulletList = many1 (listItem bulletListStart) >>= - return . BulletList . compactify - --- definition lists - -definitionListItem = try $ do - notFollowedBy blankline - notFollowedBy' indentSpaces - -- first, see if this has any chance of being a definition list: - lookAhead (anyLine >> char ':') - term <- manyTill inline newline - raw <- many1 defRawBlock - state <- getState - let oldContext = stateParserContext state - -- parse the extracted block, which may contain various block elements: - contents <- parseFromString parseBlocks $ concat raw - updateState (\st -> st {stateParserContext = oldContext}) - return ((normalizeSpaces term), contents) - -defRawBlock = try $ do - char ':' - state <- getState - let tabStop = stateTabStop state - try (count (tabStop - 1) (char ' ')) <|> (many (char ' ') >> string "\t") - firstline <- anyLine - rawlines <- many (notFollowedBy blankline >> indentSpaces >> anyLine) - trailing <- option "" blanklines - return $ firstline ++ "\n" ++ unlines rawlines ++ trailing - -definitionList = do - failIfStrict - items <- many1 definitionListItem - let (terms, defs) = unzip items - let defs' = compactify defs - let items' = zip terms defs' - return $ DefinitionList items' - --- --- paragraph block --- - -para = try $ do - result <- many1 inline - newline - blanklines <|> do st <- getState - if stateStrict st - then lookAhead (blockQuote <|> header) >> return "" - else lookAhead emacsBoxQuote >> return "" - return $ Para $ normalizeSpaces result - -plain = many1 inline >>= return . Plain . normalizeSpaces - --- --- raw html --- - -htmlElement = strictHtmlBlock <|> htmlBlockElement "html element" - -htmlBlock = do - st <- getState - if stateStrict st - then try $ do failUnlessBeginningOfLine - first <- htmlElement - finalSpace <- many (oneOf spaceChars) - finalNewlines <- many newline - return $ RawHtml $ first ++ finalSpace ++ finalNewlines - else rawHtmlBlocks - --- True if tag is self-closing -isSelfClosing tag = - isSuffixOf "/>" $ filter (not . (`elem` " \n\t")) tag - -strictHtmlBlock = try $ do - tag <- anyHtmlBlockTag - let tag' = extractTagType tag - if isSelfClosing tag || tag' == "hr" - then return tag - else do contents <- many (notFollowedBy' (htmlEndTag tag') >> - (htmlElement <|> (count 1 anyChar))) - end <- htmlEndTag tag' - return $ tag ++ concat contents ++ end - -rawHtmlBlocks = do - htmlBlocks <- many1 rawHtmlBlock - let combined = concatMap (\(RawHtml str) -> str) htmlBlocks - let combined' = if not (null combined) && last combined == '\n' - then init combined -- strip extra newline - else combined - return $ RawHtml combined' - --- --- LaTeX --- - -rawLaTeXEnvironment' = failIfStrict >> rawLaTeXEnvironment - --- --- Tables --- - --- Parse a dashed line with optional trailing spaces; return its length --- and the length including trailing space. -dashedLine ch = do - dashes <- many1 (char ch) - sp <- many spaceChar - return $ (length dashes, length $ dashes ++ sp) - --- Parse a table header with dashed lines of '-' preceded by --- one line of text. -simpleTableHeader = try $ do - rawContent <- anyLine - initSp <- nonindentSpaces - dashes <- many1 (dashedLine '-') - newline - let (lengths, lines) = unzip dashes - let indices = scanl (+) (length initSp) lines - let rawHeads = tail $ splitByIndices (init indices) rawContent - let aligns = zipWith alignType (map (\a -> [a]) rawHeads) lengths - return (rawHeads, aligns, indices) - --- Parse a table footer - dashed lines followed by blank line. -tableFooter = try $ nonindentSpaces >> many1 (dashedLine '-') >> blanklines - --- Parse a table separator - dashed line. -tableSep = try $ nonindentSpaces >> many1 (dashedLine '-') >> string "\n" - --- Parse a raw line and split it into chunks by indices. -rawTableLine indices = do - notFollowedBy' (blanklines <|> tableFooter) - line <- many1Till anyChar newline - return $ map removeLeadingTrailingSpace $ tail $ - splitByIndices (init indices) line - --- Parse a table line and return a list of lists of blocks (columns). -tableLine indices = rawTableLine indices >>= mapM (parseFromString (many plain)) - --- Parse a multiline table row and return a list of blocks (columns). -multilineRow indices = do - colLines <- many1 (rawTableLine indices) - optional blanklines - let cols = map unlines $ transpose colLines - mapM (parseFromString (many plain)) cols - --- Calculate relative widths of table columns, based on indices -widthsFromIndices :: Int -- Number of columns on terminal - -> [Int] -- Indices - -> [Float] -- Fractional relative sizes of columns -widthsFromIndices _ [] = [] -widthsFromIndices numColumns indices = - let lengths = zipWith (-) indices (0:indices) - totLength = sum lengths - quotient = if totLength > numColumns - then fromIntegral totLength - else fromIntegral numColumns - fracs = map (\l -> (fromIntegral l) / quotient) lengths in - tail fracs - --- Parses a table caption: inlines beginning with 'Table:' --- and followed by blank lines. -tableCaption = try $ do - nonindentSpaces - string "Table:" - result <- many1 inline - blanklines - return $ normalizeSpaces result - --- Parse a table using 'headerParser', 'lineParser', and 'footerParser'. -tableWith headerParser lineParser footerParser = try $ do - (rawHeads, aligns, indices) <- headerParser - lines <- many1Till (lineParser indices) footerParser - caption <- option [] tableCaption - heads <- mapM (parseFromString (many plain)) rawHeads - state <- getState - let numColumns = stateColumns state - let widths = widthsFromIndices numColumns indices - return $ Table caption aligns widths heads lines - --- Parse a simple table with '---' header and one line per row. -simpleTable = tableWith simpleTableHeader tableLine blanklines - --- Parse a multiline table: starts with row of '-' on top, then header --- (which may be multiline), then the rows, --- which may be multiline, separated by blank lines, and --- ending with a footer (dashed line followed by blank line). -multilineTable = tableWith multilineTableHeader multilineRow tableFooter - -multilineTableHeader = try $ do - tableSep - rawContent <- many1 (notFollowedBy' tableSep >> many1Till anyChar newline) - initSp <- nonindentSpaces - dashes <- many1 (dashedLine '-') - newline - let (lengths, lines) = unzip dashes - let indices = scanl (+) (length initSp) lines - let rawHeadsList = transpose $ map - (\ln -> tail $ splitByIndices (init indices) ln) - rawContent - let rawHeads = map (joinWithSep " ") rawHeadsList - let aligns = zipWith alignType rawHeadsList lengths - return ((map removeLeadingTrailingSpace rawHeads), aligns, indices) - --- Returns an alignment type for a table, based on a list of strings --- (the rows of the column header) and a number (the length of the --- dashed line under the rows. -alignType :: [String] -> Int -> Alignment -alignType [] len = AlignDefault -alignType strLst len = - let str = head $ sortBy (comparing length) $ - map removeTrailingSpace strLst - leftSpace = if null str then False else (str !! 0) `elem` " \t" - rightSpace = length str < len || (str !! (len - 1)) `elem` " \t" - in case (leftSpace, rightSpace) of - (True, False) -> AlignRight - (False, True) -> AlignLeft - (True, True) -> AlignCenter - (False, False) -> AlignDefault - -table = failIfStrict >> (simpleTable <|> multilineTable) "table" - --- --- inline --- - -inline = choice [ str - , smartPunctuation - , whitespace - , endline - , code - , charRef - , strong - , emph - , note - , inlineNote - , link - , image - , math - , strikeout - , superscript - , subscript - , autoLink - , rawHtmlInline' - , rawLaTeXInline' - , escapedChar - , symbol - , ltSign ] "inline" - -escapedChar = do - char '\\' - state <- getState - result <- option '\\' $ if stateStrict state - then oneOf "\\`*_{}[]()>#+-.!~" - else satisfy (not . isAlphaNum) - return $ Str [result] - -ltSign = do - st <- getState - if stateStrict st - then char '<' - else notFollowedBy' rawHtmlBlocks >> char '<' -- unless it starts html - return $ Str ['<'] - -specialCharsMinusLt = filter (/= '<') specialChars - -symbol = do - result <- oneOf specialCharsMinusLt - return $ Str [result] - --- parses inline code, between n `s and n `s -code = try $ do - starts <- many1 (char '`') - skipSpaces - result <- many1Till (many1 (noneOf "`\n") <|> many1 (char '`') <|> - (char '\n' >> return " ")) - (try (skipSpaces >> count (length starts) (char '`') >> - notFollowedBy (char '`'))) - return $ Code $ removeLeadingTrailingSpace $ concat result - -mathWord = many1 ((noneOf " \t\n\\$") <|> - (try (char '\\') >>~ notFollowedBy (char '$'))) - -math = try $ do - failIfStrict - char '$' - notFollowedBy space - words <- sepBy1 mathWord (many1 space) - char '$' - return $ TeX ("$" ++ (joinWithSep " " words) ++ "$") - -emph = ((enclosed (char '*') (char '*') inline) <|> - (enclosed (char '_') (char '_' >> notFollowedBy alphaNum) inline)) >>= - return . Emph . normalizeSpaces - -strong = ((enclosed (string "**") (try $ string "**") inline) <|> - (enclosed (string "__") (try $ string "__") inline)) >>= - return . Strong . normalizeSpaces - -strikeout = failIfStrict >> enclosed (string "~~") (try $ string "~~") inline >>= - return . Strikeout . normalizeSpaces - -superscript = failIfStrict >> enclosed (char '^') (char '^') - (notFollowedBy' whitespace >> inline) >>= -- may not contain Space - return . Superscript - -subscript = failIfStrict >> enclosed (char '~') (char '~') - (notFollowedBy' whitespace >> inline) >>= -- may not contain Space - return . Subscript - -smartPunctuation = failUnlessSmart >> - choice [ quoted, apostrophe, dash, ellipses ] - -apostrophe = (char '\'' <|> char '\8217') >> return Apostrophe - -quoted = doubleQuoted <|> singleQuoted - -withQuoteContext context parser = do - oldState <- getState - let oldQuoteContext = stateQuoteContext oldState - setState oldState { stateQuoteContext = context } - result <- parser - newState <- getState - setState newState { stateQuoteContext = oldQuoteContext } - return result - -singleQuoted = try $ do - singleQuoteStart - withQuoteContext InSingleQuote $ many1Till inline singleQuoteEnd >>= - return . Quoted SingleQuote . normalizeSpaces - -doubleQuoted = try $ do - doubleQuoteStart - withQuoteContext InDoubleQuote $ many1Till inline doubleQuoteEnd >>= - return . Quoted DoubleQuote . normalizeSpaces - -failIfInQuoteContext context = do - st <- getState - if stateQuoteContext st == context - then fail "already inside quotes" - else return () - -singleQuoteStart = do - failIfInQuoteContext InSingleQuote - char '\8216' <|> - (try $ do char '\'' - notFollowedBy (oneOf ")!],.;:-? \t\n") - notFollowedBy (try (oneOfStrings ["s","t","m","ve","ll","re"] >> - satisfy (not . isAlphaNum))) - -- possess/contraction - return '\'') - -singleQuoteEnd = try $ do - char '\8217' <|> char '\'' - notFollowedBy alphaNum - return '\'' - -doubleQuoteStart = do - failIfInQuoteContext InDoubleQuote - char '\8220' <|> - (try $ do char '"' - notFollowedBy (oneOf " \t\n") - return '"') - -doubleQuoteEnd = char '\8221' <|> char '"' - -ellipses = oneOfStrings ["...", " . . . ", ". . .", " . . ."] >> return Ellipses - -dash = enDash <|> emDash - -enDash = try $ char '-' >> notFollowedBy (noneOf "0123456789") >> return EnDash - -emDash = try $ skipSpaces >> oneOfStrings ["---", "--"] >> - skipSpaces >> return EmDash - -whitespace = do - sps <- many1 (oneOf spaceChars) - if length sps >= 2 - then option Space (endline >> return LineBreak) - else return Space "whitespace" - -nonEndline = satisfy (/='\n') - -strChar = noneOf (specialChars ++ spaceChars ++ "\n") - -str = many1 strChar >>= return . Str - --- an endline character that can be treated as a space, not a structural break -endline = try $ do - newline - notFollowedBy blankline - st <- getState - if stateStrict st - then do notFollowedBy emailBlockQuoteStart - notFollowedBy (char '#') -- atx header - else return () - -- parse potential list-starts differently if in a list: - if stateParserContext st == ListItemState - then notFollowedBy' (bulletListStart <|> - (anyOrderedListStart >> return ())) - else return () - return Space - --- --- links --- - --- a reference label for a link -reference = notFollowedBy' (string "[^") >> -- footnote reference - inlinesInBalanced "[" "]" >>= (return . normalizeSpaces) - --- source for a link, with optional title -source = try $ do - char '(' - optional (char '<') - src <- many (noneOf ")> \t\n") - optional (char '>') - tit <- option "" linkTitle - skipSpaces - char ')' - return (removeTrailingSpace src, tit) - -linkTitle = try $ do - (many1 spaceChar >> option '\n' newline) <|> newline - skipSpaces - delim <- char '\'' <|> char '"' - tit <- manyTill anyChar (try (char delim >> skipSpaces >> - notFollowedBy (noneOf ")\n"))) - return $ decodeCharacterReferences tit - -link = try $ do - label <- reference - src <- source <|> referenceLink label - return $ Link label src - --- a link like [this][ref] or [this][] or [this] -referenceLink label = do - ref <- option [] (try (optional (char ' ') >> - optional (newline >> skipSpaces) >> reference)) - let ref' = if null ref then label else ref - state <- getState - case lookupKeySrc (stateKeys state) ref' of - Nothing -> fail "no corresponding key" - Just target -> return target - -emailAddress = try $ do - name <- many1 (alphaNum <|> char '+') - char '@' - first <- many1 alphaNum - rest <- many1 (char '.' >> many1 alphaNum) - return $ "mailto:" ++ name ++ "@" ++ joinWithSep "." (first:rest) - -uri = try $ do - str <- many1 (noneOf "\n\t >") - if isURI str - then return str - else fail "not a URI" - -autoLink = try $ do - char '<' - src <- uri <|> emailAddress - char '>' - let src' = if "mailto:" `isPrefixOf` src - then drop 7 src - else src - st <- getState - return $ if stateStrict st - then Link [Str src'] (src, "") - else Link [Code src'] (src, "") - -image = try $ do - char '!' - (Link label src) <- link - return $ Image label src - -note = try $ do - failIfStrict - ref <- noteMarker - state <- getState - let notes = stateNotes state - case lookup ref notes of - Nothing -> fail "note not found" - Just contents -> return $ Note contents - -inlineNote = try $ do - failIfStrict - char '^' - contents <- inlinesInBalanced "[" "]" - return $ Note [Para contents] - -rawLaTeXInline' = failIfStrict >> rawLaTeXInline - -rawHtmlInline' = do - st <- getState - result <- choice $ if stateStrict st - then [htmlBlockElement, anyHtmlTag, anyHtmlEndTag] - else [htmlBlockElement, anyHtmlInlineTag] - return $ HtmlInline result - -- cgit v1.2.3