diff options
Diffstat (limited to 'src/Text/Pandoc/Readers/RST.hs')
-rw-r--r-- | src/Text/Pandoc/Readers/RST.hs | 640 |
1 files changed, 0 insertions, 640 deletions
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs deleted file mode 100644 index 1239eb688..000000000 --- a/src/Text/Pandoc/Readers/RST.hs +++ /dev/null @@ -1,640 +0,0 @@ -{- -Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Readers.RST - Copyright : Copyright (C) 2006-7 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha - Portability : portable - -Conversion from reStructuredText to 'Pandoc' document. --} -module Text.Pandoc.Readers.RST ( - readRST - ) where -import Text.Pandoc.Definition -import Text.Pandoc.Shared -import Text.ParserCombinators.Parsec -import Data.List ( findIndex, delete ) - --- | Parse reStructuredText string and return Pandoc document. -readRST :: ParserState -> String -> Pandoc -readRST state str = (readWith parseRST) state (str ++ "\n\n") - --- --- Constants and data structure definitions ---- - -bulletListMarkers = "*+-" -underlineChars = "!\"#$&'()*+,-./:;<=>?@[\\]^_`{|}~" - --- treat these as potentially non-text when parsing inline: -specialChars = "\\`|*_<>$:[-" - --- --- parsing documents --- - -isAnonKey (ref, src) = ref == [Str "_"] - -isHeader :: Int -> Block -> Bool -isHeader n (Header x _) = x == n -isHeader _ _ = False - --- | Promote all headers in a list of blocks. (Part of --- title transformation for RST.) -promoteHeaders :: Int -> [Block] -> [Block] -promoteHeaders num ((Header level text):rest) = - (Header (level - num) text):(promoteHeaders num rest) -promoteHeaders num (other:rest) = other:(promoteHeaders num rest) -promoteHeaders num [] = [] - --- | If list of blocks starts with a header (or a header and subheader) --- of level that are not found elsewhere, return it as a title and --- promote all the other headers. -titleTransform :: [Block] -- ^ list of blocks - -> ([Block], [Inline]) -- ^ modified list of blocks, title -titleTransform ((Header 1 head1):(Header 2 head2):rest) = -- title subtitle - if (any (isHeader 1) rest) || (any (isHeader 2) rest) - then ((Header 1 head1):(Header 2 head2):rest, []) - else ((promoteHeaders 2 rest), head1 ++ [Str ":", Space] ++ head2) -titleTransform ((Header 1 head1):rest) = -- title, no subtitle - if (any (isHeader 1) rest) - then ((Header 1 head1):rest, []) - else ((promoteHeaders 1 rest), head1) -titleTransform blocks = (blocks, []) - -parseRST = do - startPos <- getPosition - -- go through once just to get list of reference keys - -- docMinusKeys is the raw document with blanks where the keys were... - docMinusKeys <- manyTill (referenceKey <|> lineClump) eof >>= return . concat - setInput docMinusKeys - setPosition startPos - st <- getState - let reversedKeys = stateKeys st - updateState $ \st -> st { stateKeys = reverse reversedKeys } - -- now parse it for real... - blocks <- parseBlocks - let blocks' = filter (/= Null) blocks - state <- getState - let (blocks'', title) = if stateStandalone state - then titleTransform blocks' - else (blocks', []) - let authors = stateAuthors state - let date = stateDate state - let title' = if (null title) then (stateTitle state) else title - return $ Pandoc (Meta title' authors date) blocks'' - --- --- parsing blocks --- - -parseBlocks = manyTill block eof - -block = choice [ codeBlock - , rawHtmlBlock - , rawLaTeXBlock - , fieldList - , blockQuote - , imageBlock - , unknownDirective - , header - , hrule - , list - , lineBlock - , para - , plain - , nullBlock ] <?> "block" - --- --- field list --- - -fieldListItem indent = try $ do - string indent - char ':' - name <- many1 alphaNum - string ": " - skipSpaces - first <- manyTill anyChar newline - rest <- option "" $ try $ lookAhead (string indent >> oneOf " \t") >> - indentedBlock - return (name, joinWithSep " " (first:(lines rest))) - -fieldList = try $ do - indent <- lookAhead $ many (oneOf " \t") - items <- many1 $ fieldListItem indent - blanklines - let authors = case lookup "Authors" items of - Just auth -> [auth] - Nothing -> map snd (filter (\(x,y) -> x == "Author") items) - if null authors - then return () - else updateState $ \st -> st {stateAuthors = authors} - case (lookup "Date" items) of - Just dat -> updateState $ \st -> st {stateDate = dat} - Nothing -> return () - case (lookup "Title" items) of - Just tit -> parseFromString (many inline) tit >>= - \t -> updateState $ \st -> st {stateTitle = t} - Nothing -> return () - let remaining = filter (\(x,y) -> (x /= "Authors") && (x /= "Author") && - (x /= "Date") && (x /= "Title")) items - if null remaining - then return Null - else do terms <- mapM (return . (:[]) . Str . fst) remaining - defs <- mapM (parseFromString (many block) . snd) - remaining - return $ DefinitionList $ zip terms defs - --- --- line block --- - -lineBlockLine = try $ do - string "| " - white <- many (oneOf " \t") - line <- manyTill inline newline - return $ (if null white then [] else [Str white]) ++ line ++ [LineBreak] - -lineBlock = try $ do - lines <- many1 lineBlockLine - blanklines - return $ Para (concat lines) - --- --- paragraph block --- - -para = paraBeforeCodeBlock <|> paraNormal <?> "paragraph" - -codeBlockStart = string "::" >> blankline >> blankline - --- paragraph that ends in a :: starting a code block -paraBeforeCodeBlock = try $ do - result <- many1 (notFollowedBy' codeBlockStart >> inline) - lookAhead (string "::") - return $ Para $ if last result == Space - then normalizeSpaces result - else (normalizeSpaces result) ++ [Str ":"] - --- regular paragraph -paraNormal = try $ do - result <- many1 inline - newline - blanklines - return $ Para $ normalizeSpaces result - -plain = many1 inline >>= return . Plain . normalizeSpaces - --- --- image block --- - -imageBlock = try $ do - string ".. image:: " - src <- manyTill anyChar newline - fields <- option [] $ do indent <- lookAhead $ many (oneOf " /t") - many1 $ fieldListItem indent - optional blanklines - case lookup "alt" fields of - Just alt -> return $ Plain [Image [Str alt] (src, alt)] - Nothing -> return $ Plain [Image [Str "image"] (src, "")] --- --- header blocks --- - -header = doubleHeader <|> singleHeader <?> "header" - --- a header with lines on top and bottom -doubleHeader = try $ do - c <- oneOf underlineChars - rest <- many (char c) -- the top line - let lenTop = length (c:rest) - skipSpaces - newline - txt <- many1 (notFollowedBy blankline >> inline) - pos <- getPosition - let len = (sourceColumn pos) - 1 - if (len > lenTop) then fail "title longer than border" else return () - blankline -- spaces and newline - count lenTop (char c) -- the bottom line - blanklines - -- check to see if we've had this kind of header before. - -- if so, get appropriate level. if not, add to list. - state <- getState - let headerTable = stateHeaderTable state - let (headerTable',level) = case findIndex (== DoubleHeader c) headerTable of - Just ind -> (headerTable, ind + 1) - Nothing -> (headerTable ++ [DoubleHeader c], (length headerTable) + 1) - setState (state { stateHeaderTable = headerTable' }) - return $ Header level (normalizeSpaces txt) - --- a header with line on the bottom only -singleHeader = try $ do - notFollowedBy' whitespace - txt <- many1 (do {notFollowedBy blankline; inline}) - pos <- getPosition - let len = (sourceColumn pos) - 1 - blankline - c <- oneOf underlineChars - rest <- count (len - 1) (char c) - many (char c) - blanklines - state <- getState - let headerTable = stateHeaderTable state - let (headerTable',level) = case findIndex (== SingleHeader c) headerTable of - Just ind -> (headerTable, ind + 1) - Nothing -> (headerTable ++ [SingleHeader c], (length headerTable) + 1) - setState (state { stateHeaderTable = headerTable' }) - return $ Header level (normalizeSpaces txt) - --- --- hrule block --- - -hrule = try $ do - chr <- oneOf underlineChars - count 3 (char chr) - skipMany (char chr) - blankline - blanklines - return HorizontalRule - --- --- code blocks --- - --- read a line indented by a given string -indentedLine indents = try $ do - string indents - result <- manyTill anyChar newline - return $ result ++ "\n" - --- two or more indented lines, possibly separated by blank lines. --- any amount of indentation will work. -indentedBlock = do - indents <- lookAhead $ many1 (oneOf " \t") - lns <- many $ choice $ [ indentedLine indents, - try $ do b <- blanklines - l <- indentedLine indents - return (b ++ l) ] - optional blanklines - return $ concat lns - -codeBlock = try $ do - codeBlockStart - result <- indentedBlock - return $ CodeBlock $ stripTrailingNewlines result - --- --- raw html --- - -rawHtmlBlock = try $ string ".. raw:: html" >> blanklines >> - indentedBlock >>= return . RawHtml - --- --- raw latex --- - -rawLaTeXBlock = try $ do - string ".. raw:: latex" - blanklines - result <- indentedBlock - return $ Para [(TeX result)] - --- --- block quotes --- - -blockQuote = do - raw <- indentedBlock - -- parse the extracted block, which may contain various block elements: - contents <- parseFromString parseBlocks $ raw ++ "\n\n" - return $ BlockQuote contents - --- --- list blocks --- - -list = choice [ bulletList, orderedList, definitionList ] <?> "list" - -definitionListItem = try $ do - term <- many1Till inline endline - raw <- indentedBlock - -- parse the extracted block, which may contain various block elements: - contents <- parseFromString parseBlocks $ raw ++ "\n\n" - return (normalizeSpaces term, contents) - -definitionList = many1 definitionListItem >>= return . DefinitionList - --- parses bullet list start and returns its length (inc. following whitespace) -bulletListStart = try $ do - notFollowedBy' hrule -- because hrules start out just like lists - marker <- oneOf bulletListMarkers - white <- many1 spaceChar - return $ length (marker:white) - --- parses ordered list start and returns its length (inc following whitespace) -orderedListStart style delim = try $ do - (_, markerLen) <- withHorizDisplacement (orderedListMarker style delim) - white <- many1 spaceChar - return $ markerLen + length white - --- parse a line of a list item -listLine markerLength = try $ do - notFollowedBy blankline - indentWith markerLength - line <- manyTill anyChar newline - return $ line ++ "\n" - --- indent by specified number of spaces (or equiv. tabs) -indentWith num = do - state <- getState - let tabStop = stateTabStop state - if (num < tabStop) - then count num (char ' ') - else choice [ try (count num (char ' ')), - (try (char '\t' >> count (num - tabStop) (char ' '))) ] - --- parse raw text for one list item, excluding start marker and continuations -rawListItem start = do - markerLength <- start - firstLine <- manyTill anyChar newline - restLines <- many (listLine markerLength) - return (markerLength, (firstLine ++ "\n" ++ (concat restLines))) - --- continuation of a list item - indented and separated by blankline or --- (in compact lists) endline. --- Note: nested lists are parsed as continuations. -listContinuation markerLength = try $ do - blanks <- many1 blankline - result <- many1 (listLine markerLength) - return $ blanks ++ concat result - -listItem start = try $ do - (markerLength, first) <- rawListItem start - rest <- many (listContinuation markerLength) - blanks <- choice [ try (many blankline >>~ lookAhead start), - many1 blankline ] -- whole list must end with blank. - -- parsing with ListItemState forces markers at beginning of lines to - -- count as list item markers, even if not separated by blank space. - -- see definition of "endline" - state <- getState - let oldContext = stateParserContext state - setState $ state {stateParserContext = ListItemState} - -- parse the extracted block, which may itself contain block elements - parsed <- parseFromString parseBlocks $ concat (first:rest) ++ blanks - updateState (\st -> st {stateParserContext = oldContext}) - return parsed - -orderedList = do - (start, style, delim) <- lookAhead (anyOrderedListMarker >>~ spaceChar) - items <- many1 (listItem (orderedListStart style delim)) - let items' = compactify items - return $ OrderedList (start, style, delim) items' - -bulletList = many1 (listItem bulletListStart) >>= - return . BulletList . compactify - --- --- unknown directive (e.g. comment) --- - -unknownDirective = try $ do - string ".. " - manyTill anyChar newline - many (string " :" >> many1 (noneOf "\n:") >> char ':' >> - many1 (noneOf "\n") >> newline) - optional blanklines - return Null - --- --- reference key --- - -referenceKey = do - startPos <- getPosition - key <- choice [imageKey, anonymousKey, regularKeyQuoted, regularKey] - st <- getState - let oldkeys = stateKeys st - updateState $ \st -> st { stateKeys = key : oldkeys } - optional blanklines - endPos <- getPosition - -- return enough blanks to replace key - return $ replicate (sourceLine endPos - sourceLine startPos) '\n' - -targetURI = do - skipSpaces - optional newline - contents <- many1 (try (many spaceChar >> newline >> - many1 spaceChar >> noneOf " \t\n") <|> noneOf "\n") - blanklines - return contents - -imageKey = try $ do - string ".. |" - ref <- manyTill inline (char '|') - skipSpaces - string "image::" - src <- targetURI - return (normalizeSpaces ref, (removeLeadingTrailingSpace src, "")) - -anonymousKey = try $ do - oneOfStrings [".. __:", "__"] - src <- targetURI - state <- getState - return ([Str "_"], (removeLeadingTrailingSpace src, "")) - -regularKeyQuoted = try $ do - string ".. _`" - ref <- manyTill inline (char '`') - char ':' - src <- targetURI - return (normalizeSpaces ref, (removeLeadingTrailingSpace src, "")) - -regularKey = try $ do - string ".. _" - ref <- manyTill inline (char ':') - src <- targetURI - return (normalizeSpaces ref, (removeLeadingTrailingSpace src, "")) - - -- - -- inline - -- - -inline = choice [ link - , str - , whitespace - , endline - , strong - , emph - , code - , image - , hyphens - , superscript - , subscript - , escapedChar - , symbol ] <?> "inline" - -hyphens = do - result <- many1 (char '-') - option Space endline - -- don't want to treat endline after hyphen or dash as a space - return $ Str result - -escapedChar = escaped anyChar - -symbol = do - result <- oneOf specialChars - return $ Str [result] - --- parses inline code, between codeStart and codeEnd -code = try $ do - string "``" - result <- manyTill anyChar (try (string "``")) - return $ Code $ removeLeadingTrailingSpace $ joinWithSep " " $ lines result - -emph = enclosed (char '*') (char '*') inline >>= - return . Emph . normalizeSpaces - -strong = enclosed (string "**") (try $ string "**") inline >>= - return . Strong . normalizeSpaces - -interpreted role = try $ do - optional $ try $ string "\\ " - result <- enclosed (string $ ":" ++ role ++ ":`") (char '`') anyChar - nextChar <- lookAhead anyChar - try (string "\\ ") <|> lookAhead (count 1 $ oneOf " \t\n") <|> (eof >> return "") - return [Str result] - -superscript = interpreted "sup" >>= (return . Superscript) - -subscript = interpreted "sub" >>= (return . Subscript) - -whitespace = many1 spaceChar >> return Space <?> "whitespace" - -str = notFollowedBy' oneWordReference >> - many1 (noneOf (specialChars ++ "\t\n ")) >>= return . Str - --- an endline character that can be treated as a space, not a structural break -endline = try $ do - newline - notFollowedBy blankline - -- parse potential list-starts at beginning of line differently in a list: - st <- getState - if (stateParserContext st) == ListItemState - then notFollowedBy (anyOrderedListMarker >> spaceChar) >> - notFollowedBy' bulletListStart - else return () - return Space - --- --- links --- - -link = choice [explicitLink, referenceLink, autoLink] <?> "link" - -explicitLink = try $ do - char '`' - notFollowedBy (char '`') -- `` is marks start of inline code - label <- manyTill inline (try (do {spaces; char '<'})) - src <- manyTill (noneOf ">\n ") (char '>') - skipSpaces - string "`_" - return $ Link (normalizeSpaces label) (removeLeadingTrailingSpace src, "") - -reference = try $ do - char '`' - notFollowedBy (char '`') - label <- many1Till inline (char '`') - char '_' - return label - -oneWordReference = do - raw <- many1 alphaNum - char '_' - notFollowedBy alphaNum -- because this_is_not a link - return [Str raw] - -referenceLink = try $ do - label <- reference <|> oneWordReference - key <- option label (do{char '_'; return [Str "_"]}) -- anonymous link - state <- getState - let keyTable = stateKeys state - src <- case lookupKeySrc keyTable key of - Nothing -> fail "no corresponding key" - Just target -> return target - -- if anonymous link, remove first anon key so it won't be used again - let keyTable' = if (key == [Str "_"]) -- anonymous link? - then delete ([Str "_"], src) keyTable -- remove first anon key - else keyTable - setState $ state { stateKeys = keyTable' } - return $ Link (normalizeSpaces label) src - -uriScheme = oneOfStrings [ "http://", "https://", "ftp://", "file://", - "mailto:", "news:", "telnet:" ] - -uri = try $ do - scheme <- uriScheme - identifier <- many1 (noneOf " \t\n") - return $ scheme ++ identifier - -autoURI = do - src <- uri - return $ Link [Str src] (src, "") - -emailChar = alphaNum <|> oneOf "-+_." - -emailAddress = try $ do - firstLetter <- alphaNum - restAddr <- many emailChar - let addr = firstLetter:restAddr - char '@' - dom <- domain - return $ addr ++ '@':dom - -domainChar = alphaNum <|> char '-' - -domain = do - first <- many1 domainChar - dom <- many1 (try (do{ char '.'; many1 domainChar })) - return $ joinWithSep "." (first:dom) - -autoEmail = do - src <- emailAddress - return $ Link [Str src] ("mailto:" ++ src, "") - -autoLink = autoURI <|> autoEmail - --- For now, we assume that all substitution references are for images. -image = try $ do - char '|' - ref <- manyTill inline (char '|') - state <- getState - let keyTable = stateKeys state - src <- case lookupKeySrc keyTable ref of - Nothing -> fail "no corresponding key" - Just target -> return target - return $ Image (normalizeSpaces ref) src - |