diff options
Diffstat (limited to 'Text')
-rw-r--r-- | Text/Pandoc/Readers/RST.hs | 116 |
1 files changed, 93 insertions, 23 deletions
diff --git a/Text/Pandoc/Readers/RST.hs b/Text/Pandoc/Readers/RST.hs index 76cfc8aa0..08e55f97d 100644 --- a/Text/Pandoc/Readers/RST.hs +++ b/Text/Pandoc/Readers/RST.hs @@ -37,27 +37,29 @@ import Data.List ( findIndex, delete ) -- | Parse reStructuredText string and return Pandoc document. readRST :: ParserState -> String -> Pandoc -readRST state str = (readWith parseRST) state (str ++ "\n\n") +readRST state s = (readWith parseRST) state (s ++ "\n\n") -- -- Constants and data structure definitions --- +bulletListMarkers :: [Char] bulletListMarkers = "*+-" + +underlineChars :: [Char] underlineChars = "!\"#$&'()*+,-./:;<=>?@[\\]^_`{|}~" -- treat these as potentially non-text when parsing inline: +specialChars :: [Char] specialChars = "\\`|*_<>$:[-" -- -- parsing documents -- -isAnonKey (ref, src) = ref == [Str "_"] - isHeader :: Int -> Block -> Bool isHeader n (Header x _) = x == n -isHeader _ _ = False +isHeader _ _ = False -- | Promote all headers in a list of blocks. (Part of -- title transformation for RST.) @@ -65,7 +67,7 @@ promoteHeaders :: Int -> [Block] -> [Block] promoteHeaders num ((Header level text):rest) = (Header (level - num) text):(promoteHeaders num rest) promoteHeaders num (other:rest) = other:(promoteHeaders num rest) -promoteHeaders num [] = [] +promoteHeaders _ [] = [] -- | If list of blocks starts with a header (or a header and subheader) -- of level that are not found elsewhere, return it as a title and @@ -82,6 +84,7 @@ titleTransform ((Header 1 head1):rest) = -- title, no subtitle else ((promoteHeaders 1 rest), head1) titleTransform blocks = (blocks, []) +parseRST :: GenParser Char ParserState Pandoc parseRST = do startPos <- getPosition -- go through once just to get list of reference keys @@ -91,7 +94,7 @@ parseRST = do setPosition startPos st <- getState let reversedKeys = stateKeys st - updateState $ \st -> st { stateKeys = reverse reversedKeys } + updateState $ \s -> s { stateKeys = reverse reversedKeys } -- now parse it for real... blocks <- parseBlocks let blocks' = filter (/= Null) blocks @@ -108,8 +111,10 @@ parseRST = do -- parsing blocks -- +parseBlocks :: GenParser Char ParserState [Block] parseBlocks = manyTill block eof +block :: GenParser Char ParserState Block block = choice [ codeBlock , rawHtmlBlock , rawLaTeXBlock @@ -129,6 +134,7 @@ block = choice [ codeBlock -- field list -- +fieldListItem :: String -> GenParser Char st ([Char], [Char]) fieldListItem indent = try $ do string indent char ':' @@ -140,13 +146,14 @@ fieldListItem indent = try $ do indentedBlock return (name, joinWithSep " " (first:(lines rest))) +fieldList :: GenParser Char ParserState Block fieldList = try $ do indent <- lookAhead $ many (oneOf " \t") items <- many1 $ fieldListItem indent blanklines let authors = case lookup "Authors" items of Just auth -> [auth] - Nothing -> map snd (filter (\(x,y) -> x == "Author") items) + Nothing -> map snd (filter (\(x,_) -> x == "Author") items) if null authors then return () else updateState $ \st -> st {stateAuthors = authors} @@ -157,7 +164,7 @@ fieldList = try $ do Just tit -> parseFromString (many inline) tit >>= \t -> updateState $ \st -> st {stateTitle = t} Nothing -> return () - let remaining = filter (\(x,y) -> (x /= "Authors") && (x /= "Author") && + let remaining = filter (\(x,_) -> (x /= "Authors") && (x /= "Author") && (x /= "Date") && (x /= "Title")) items if null remaining then return Null @@ -170,26 +177,31 @@ fieldList = try $ do -- line block -- +lineBlockLine :: GenParser Char ParserState [Inline] lineBlockLine = try $ do string "| " white <- many (oneOf " \t") line <- manyTill inline newline return $ (if null white then [] else [Str white]) ++ line ++ [LineBreak] +lineBlock :: GenParser Char ParserState Block lineBlock = try $ do - lines <- many1 lineBlockLine + lines' <- many1 lineBlockLine blanklines - return $ Para (concat lines) + return $ Para (concat lines') -- -- paragraph block -- +para :: GenParser Char ParserState Block para = paraBeforeCodeBlock <|> paraNormal <?> "paragraph" +codeBlockStart :: GenParser Char st Char codeBlockStart = string "::" >> blankline >> blankline -- paragraph that ends in a :: starting a code block +paraBeforeCodeBlock :: GenParser Char ParserState Block paraBeforeCodeBlock = try $ do result <- many1 (notFollowedBy' codeBlockStart >> inline) lookAhead (string "::") @@ -198,18 +210,21 @@ paraBeforeCodeBlock = try $ do else (normalizeSpaces result) ++ [Str ":"] -- regular paragraph +paraNormal :: GenParser Char ParserState Block paraNormal = try $ do result <- many1 inline newline blanklines return $ Para $ normalizeSpaces result +plain :: GenParser Char ParserState Block plain = many1 inline >>= return . Plain . normalizeSpaces -- -- image block -- +imageBlock :: GenParser Char st Block imageBlock = try $ do string ".. image:: " src <- manyTill anyChar newline @@ -223,9 +238,11 @@ imageBlock = try $ do -- header blocks -- +header :: GenParser Char ParserState Block header = doubleHeader <|> singleHeader <?> "header" -- a header with lines on top and bottom +doubleHeader :: GenParser Char ParserState Block doubleHeader = try $ do c <- oneOf underlineChars rest <- many (char c) -- the top line @@ -250,6 +267,7 @@ doubleHeader = try $ do return $ Header level (normalizeSpaces txt) -- a header with line on the bottom only +singleHeader :: GenParser Char ParserState Block singleHeader = try $ do notFollowedBy' whitespace txt <- many1 (do {notFollowedBy blankline; inline}) @@ -257,7 +275,7 @@ singleHeader = try $ do let len = (sourceColumn pos) - 1 blankline c <- oneOf underlineChars - rest <- count (len - 1) (char c) + count (len - 1) (char c) many (char c) blanklines state <- getState @@ -272,6 +290,7 @@ singleHeader = try $ do -- hrule block -- +hrule :: GenParser Char st Block hrule = try $ do chr <- oneOf underlineChars count 3 (char chr) @@ -285,6 +304,7 @@ hrule = try $ do -- -- read a line indented by a given string +indentedLine :: String -> GenParser Char st [Char] indentedLine indents = try $ do string indents result <- manyTill anyChar newline @@ -292,6 +312,7 @@ indentedLine indents = try $ do -- two or more indented lines, possibly separated by blank lines. -- any amount of indentation will work. +indentedBlock :: GenParser Char st [Char] indentedBlock = do indents <- lookAhead $ many1 (oneOf " \t") lns <- many $ choice $ [ indentedLine indents, @@ -301,6 +322,7 @@ indentedBlock = do optional blanklines return $ concat lns +codeBlock :: GenParser Char st Block codeBlock = try $ do codeBlockStart result <- indentedBlock @@ -310,6 +332,7 @@ codeBlock = try $ do -- raw html -- +rawHtmlBlock :: GenParser Char st Block rawHtmlBlock = try $ string ".. raw:: html" >> blanklines >> indentedBlock >>= return . RawHtml @@ -317,6 +340,7 @@ rawHtmlBlock = try $ string ".. raw:: html" >> blanklines >> -- raw latex -- +rawLaTeXBlock :: GenParser Char st Block rawLaTeXBlock = try $ do string ".. raw:: latex" blanklines @@ -327,6 +351,7 @@ rawLaTeXBlock = try $ do -- block quotes -- +blockQuote :: GenParser Char ParserState Block blockQuote = do raw <- indentedBlock -- parse the extracted block, which may contain various block elements: @@ -337,8 +362,10 @@ blockQuote = do -- list blocks -- +list :: GenParser Char ParserState Block list = choice [ bulletList, orderedList, definitionList ] <?> "list" +definitionListItem :: GenParser Char ParserState ([Inline], [Block]) definitionListItem = try $ do term <- many1Till inline endline raw <- indentedBlock @@ -346,9 +373,11 @@ definitionListItem = try $ do contents <- parseFromString parseBlocks $ raw ++ "\n\n" return (normalizeSpaces term, contents) +definitionList :: GenParser Char ParserState Block definitionList = many1 definitionListItem >>= return . DefinitionList -- parses bullet list start and returns its length (inc. following whitespace) +bulletListStart :: GenParser Char st Int bulletListStart = try $ do notFollowedBy' hrule -- because hrules start out just like lists marker <- oneOf bulletListMarkers @@ -356,12 +385,16 @@ bulletListStart = try $ do return $ length (marker:white) -- parses ordered list start and returns its length (inc following whitespace) +orderedListStart :: ListNumberStyle + -> ListNumberDelim + -> GenParser Char st Int orderedListStart style delim = try $ do (_, markerLen) <- withHorizDisplacement (orderedListMarker style delim) white <- many1 spaceChar return $ markerLen + length white -- parse a line of a list item +listLine :: Int -> GenParser Char ParserState [Char] listLine markerLength = try $ do notFollowedBy blankline indentWith markerLength @@ -369,6 +402,7 @@ listLine markerLength = try $ do return $ line ++ "\n" -- indent by specified number of spaces (or equiv. tabs) +indentWith :: Int -> GenParser Char ParserState [Char] indentWith num = do state <- getState let tabStop = stateTabStop state @@ -378,6 +412,8 @@ indentWith num = do (try (char '\t' >> count (num - tabStop) (char ' '))) ] -- parse raw text for one list item, excluding start marker and continuations +rawListItem :: GenParser Char ParserState Int + -> GenParser Char ParserState (Int, [Char]) rawListItem start = try $ do markerLength <- start firstLine <- manyTill anyChar newline @@ -387,11 +423,14 @@ rawListItem start = try $ do -- continuation of a list item - indented and separated by blankline or -- (in compact lists) endline. -- Note: nested lists are parsed as continuations. +listContinuation :: Int -> GenParser Char ParserState [Char] listContinuation markerLength = try $ do blanks <- many1 blankline result <- many1 (listLine markerLength) return $ blanks ++ concat result +listItem :: GenParser Char ParserState Int + -> GenParser Char ParserState [Block] listItem start = try $ do (markerLength, first) <- rawListItem start rest <- many (listContinuation markerLength) @@ -408,12 +447,14 @@ listItem start = try $ do updateState (\st -> st {stateParserContext = oldContext}) return parsed +orderedList :: GenParser Char ParserState Block orderedList = try $ do (start, style, delim) <- lookAhead (anyOrderedListMarker >>~ spaceChar) items <- many1 (listItem (orderedListStart style delim)) let items' = compactify items return $ OrderedList (start, style, delim) items' +bulletList :: GenParser Char ParserState Block bulletList = many1 (listItem bulletListStart) >>= return . BulletList . compactify @@ -421,6 +462,7 @@ bulletList = many1 (listItem bulletListStart) >>= -- unknown directive (e.g. comment) -- +unknownDirective :: GenParser Char st Block unknownDirective = try $ do string ".. " manyTill anyChar newline @@ -433,37 +475,44 @@ unknownDirective = try $ do -- reference key -- +quotedReferenceName :: GenParser Char ParserState [Inline] quotedReferenceName = try $ do char '`' >> notFollowedBy (char '`') -- `` means inline code! - label <- many1Till inline (char '`') - return label + label' <- many1Till inline (char '`') + return label' +unquotedReferenceName :: GenParser Char ParserState [Inline] unquotedReferenceName = try $ do - label <- many1Till inline (lookAhead $ char ':') - return label + label' <- many1Till inline (lookAhead $ char ':') + return label' +isolated :: Char -> GenParser Char st Char isolated ch = try $ char ch >>~ notFollowedBy (char ch) +simpleReferenceName :: GenParser Char st [Inline] simpleReferenceName = do raw <- many1 (alphaNum <|> isolated '-' <|> isolated '.' <|> (try $ char '_' >>~ lookAhead alphaNum)) return [Str raw] +referenceName :: GenParser Char ParserState [Inline] referenceName = quotedReferenceName <|> (try $ simpleReferenceName >>~ lookAhead (char ':')) <|> unquotedReferenceName +referenceKey :: GenParser Char ParserState [Char] referenceKey = do startPos <- getPosition key <- choice [imageKey, anonymousKey, regularKey] st <- getState let oldkeys = stateKeys st - updateState $ \st -> st { stateKeys = key : oldkeys } + updateState $ \s -> s { stateKeys = key : oldkeys } optional blanklines endPos <- getPosition -- return enough blanks to replace key return $ replicate (sourceLine endPos - sourceLine startPos) '\n' +targetURI :: GenParser Char st [Char] targetURI = do skipSpaces optional newline @@ -472,6 +521,7 @@ targetURI = do blanklines return contents +imageKey :: GenParser Char ParserState ([Inline], (String, [Char])) imageKey = try $ do string ".. |" ref <- manyTill inline (char '|') @@ -480,12 +530,13 @@ imageKey = try $ do src <- targetURI return (normalizeSpaces ref, (removeLeadingTrailingSpace src, "")) +anonymousKey :: GenParser Char st ([Inline], (String, [Char])) anonymousKey = try $ do oneOfStrings [".. __:", "__"] src <- targetURI - state <- getState return ([Str "_"], (removeLeadingTrailingSpace src, "")) +regularKey :: GenParser Char ParserState ([Inline], (String, [Char])) regularKey = try $ do string ".. _" ref <- referenceName @@ -497,6 +548,7 @@ regularKey = try $ do -- inline -- +inline :: GenParser Char ParserState Inline inline = choice [ link , str , whitespace @@ -511,46 +563,57 @@ inline = choice [ link , escapedChar , symbol ] <?> "inline" +hyphens :: GenParser Char ParserState Inline hyphens = do result <- many1 (char '-') option Space endline -- don't want to treat endline after hyphen or dash as a space return $ Str result +escapedChar :: GenParser Char st Inline escapedChar = escaped anyChar +symbol :: GenParser Char ParserState Inline symbol = do result <- oneOf specialChars return $ Str [result] -- parses inline code, between codeStart and codeEnd +code :: GenParser Char ParserState Inline code = try $ do string "``" result <- manyTill anyChar (try (string "``")) return $ Code $ removeLeadingTrailingSpace $ joinWithSep " " $ lines result +emph :: GenParser Char ParserState Inline emph = enclosed (char '*') (char '*') inline >>= return . Emph . normalizeSpaces +strong :: GenParser Char ParserState Inline strong = enclosed (string "**") (try $ string "**") inline >>= return . Strong . normalizeSpaces +interpreted :: [Char] -> GenParser Char st [Inline] interpreted role = try $ do optional $ try $ string "\\ " result <- enclosed (string $ ":" ++ role ++ ":`") (char '`') anyChar - nextChar <- lookAhead anyChar try (string "\\ ") <|> lookAhead (count 1 $ oneOf " \t\n") <|> (eof >> return "") return [Str result] +superscript :: GenParser Char ParserState Inline superscript = interpreted "sup" >>= (return . Superscript) +subscript :: GenParser Char ParserState Inline subscript = interpreted "sub" >>= (return . Subscript) +whitespace :: GenParser Char ParserState Inline whitespace = many1 spaceChar >> return Space <?> "whitespace" +str :: GenParser Char ParserState Inline str = many1 (noneOf (specialChars ++ "\t\n ")) >>= return . Str -- an endline character that can be treated as a space, not a structural break +endline :: GenParser Char ParserState Inline endline = try $ do newline notFollowedBy blankline @@ -566,21 +629,24 @@ endline = try $ do -- links -- +link :: GenParser Char ParserState Inline link = choice [explicitLink, referenceLink, autoLink] <?> "link" +explicitLink :: GenParser Char ParserState Inline explicitLink = try $ do char '`' notFollowedBy (char '`') -- `` marks start of inline code - label <- manyTill (notFollowedBy (char '`') >> inline) + label' <- manyTill (notFollowedBy (char '`') >> inline) (try (spaces >> char '<')) src <- manyTill (noneOf ">\n ") (char '>') skipSpaces string "`_" - return $ Link (normalizeSpaces label) (removeLeadingTrailingSpace src, "") + return $ Link (normalizeSpaces label') (removeLeadingTrailingSpace src, "") +referenceLink :: GenParser Char ParserState Inline referenceLink = try $ do - label <- (quotedReferenceName <|> simpleReferenceName) >>~ char '_' - key <- option label (do{char '_'; return [Str "_"]}) -- anonymous link + label' <- (quotedReferenceName <|> simpleReferenceName) >>~ char '_' + key <- option label' (do{char '_'; return [Str "_"]}) -- anonymous link state <- getState let keyTable = stateKeys state src <- case lookupKeySrc keyTable key of @@ -591,19 +657,23 @@ referenceLink = try $ do then delete ([Str "_"], src) keyTable -- remove first anon key else keyTable setState $ state { stateKeys = keyTable' } - return $ Link (normalizeSpaces label) src + return $ Link (normalizeSpaces label') src +autoURI :: GenParser Char ParserState Inline autoURI = do src <- uri return $ Link [Str src] (src, "") +autoEmail :: GenParser Char ParserState Inline autoEmail = do src <- emailAddress return $ Link [Str src] ("mailto:" ++ src, "") +autoLink :: GenParser Char ParserState Inline autoLink = autoURI <|> autoEmail -- For now, we assume that all substitution references are for images. +image :: GenParser Char ParserState Inline image = try $ do char '|' ref <- manyTill inline (char '|') |