aboutsummaryrefslogtreecommitdiff
path: root/Text/Pandoc/Readers
diff options
context:
space:
mode:
authorfiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>2008-07-15 00:54:43 +0000
committerfiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>2008-07-15 00:54:43 +0000
commit7701a87a1a15bf531a0571cdee0d6df15b19ed77 (patch)
tree6cc4a3ad9ff20118bb19a35716e7cf9b0380e8a2 /Text/Pandoc/Readers
parente9668bc988b2be0034632e0da0c061feecb28336 (diff)
downloadpandoc-7701a87a1a15bf531a0571cdee0d6df15b19ed77.tar.gz
Code cleanup - RST reader.
git-svn-id: https://pandoc.googlecode.com/svn/trunk@1324 788f1e2b-df1e-0410-8736-df70ead52e1b
Diffstat (limited to 'Text/Pandoc/Readers')
-rw-r--r--Text/Pandoc/Readers/RST.hs116
1 files changed, 93 insertions, 23 deletions
diff --git a/Text/Pandoc/Readers/RST.hs b/Text/Pandoc/Readers/RST.hs
index 76cfc8aa0..08e55f97d 100644
--- a/Text/Pandoc/Readers/RST.hs
+++ b/Text/Pandoc/Readers/RST.hs
@@ -37,27 +37,29 @@ import Data.List ( findIndex, delete )
-- | Parse reStructuredText string and return Pandoc document.
readRST :: ParserState -> String -> Pandoc
-readRST state str = (readWith parseRST) state (str ++ "\n\n")
+readRST state s = (readWith parseRST) state (s ++ "\n\n")
--
-- Constants and data structure definitions
---
+bulletListMarkers :: [Char]
bulletListMarkers = "*+-"
+
+underlineChars :: [Char]
underlineChars = "!\"#$&'()*+,-./:;<=>?@[\\]^_`{|}~"
-- treat these as potentially non-text when parsing inline:
+specialChars :: [Char]
specialChars = "\\`|*_<>$:[-"
--
-- parsing documents
--
-isAnonKey (ref, src) = ref == [Str "_"]
-
isHeader :: Int -> Block -> Bool
isHeader n (Header x _) = x == n
-isHeader _ _ = False
+isHeader _ _ = False
-- | Promote all headers in a list of blocks. (Part of
-- title transformation for RST.)
@@ -65,7 +67,7 @@ promoteHeaders :: Int -> [Block] -> [Block]
promoteHeaders num ((Header level text):rest) =
(Header (level - num) text):(promoteHeaders num rest)
promoteHeaders num (other:rest) = other:(promoteHeaders num rest)
-promoteHeaders num [] = []
+promoteHeaders _ [] = []
-- | If list of blocks starts with a header (or a header and subheader)
-- of level that are not found elsewhere, return it as a title and
@@ -82,6 +84,7 @@ titleTransform ((Header 1 head1):rest) = -- title, no subtitle
else ((promoteHeaders 1 rest), head1)
titleTransform blocks = (blocks, [])
+parseRST :: GenParser Char ParserState Pandoc
parseRST = do
startPos <- getPosition
-- go through once just to get list of reference keys
@@ -91,7 +94,7 @@ parseRST = do
setPosition startPos
st <- getState
let reversedKeys = stateKeys st
- updateState $ \st -> st { stateKeys = reverse reversedKeys }
+ updateState $ \s -> s { stateKeys = reverse reversedKeys }
-- now parse it for real...
blocks <- parseBlocks
let blocks' = filter (/= Null) blocks
@@ -108,8 +111,10 @@ parseRST = do
-- parsing blocks
--
+parseBlocks :: GenParser Char ParserState [Block]
parseBlocks = manyTill block eof
+block :: GenParser Char ParserState Block
block = choice [ codeBlock
, rawHtmlBlock
, rawLaTeXBlock
@@ -129,6 +134,7 @@ block = choice [ codeBlock
-- field list
--
+fieldListItem :: String -> GenParser Char st ([Char], [Char])
fieldListItem indent = try $ do
string indent
char ':'
@@ -140,13 +146,14 @@ fieldListItem indent = try $ do
indentedBlock
return (name, joinWithSep " " (first:(lines rest)))
+fieldList :: GenParser Char ParserState Block
fieldList = try $ do
indent <- lookAhead $ many (oneOf " \t")
items <- many1 $ fieldListItem indent
blanklines
let authors = case lookup "Authors" items of
Just auth -> [auth]
- Nothing -> map snd (filter (\(x,y) -> x == "Author") items)
+ Nothing -> map snd (filter (\(x,_) -> x == "Author") items)
if null authors
then return ()
else updateState $ \st -> st {stateAuthors = authors}
@@ -157,7 +164,7 @@ fieldList = try $ do
Just tit -> parseFromString (many inline) tit >>=
\t -> updateState $ \st -> st {stateTitle = t}
Nothing -> return ()
- let remaining = filter (\(x,y) -> (x /= "Authors") && (x /= "Author") &&
+ let remaining = filter (\(x,_) -> (x /= "Authors") && (x /= "Author") &&
(x /= "Date") && (x /= "Title")) items
if null remaining
then return Null
@@ -170,26 +177,31 @@ fieldList = try $ do
-- line block
--
+lineBlockLine :: GenParser Char ParserState [Inline]
lineBlockLine = try $ do
string "| "
white <- many (oneOf " \t")
line <- manyTill inline newline
return $ (if null white then [] else [Str white]) ++ line ++ [LineBreak]
+lineBlock :: GenParser Char ParserState Block
lineBlock = try $ do
- lines <- many1 lineBlockLine
+ lines' <- many1 lineBlockLine
blanklines
- return $ Para (concat lines)
+ return $ Para (concat lines')
--
-- paragraph block
--
+para :: GenParser Char ParserState Block
para = paraBeforeCodeBlock <|> paraNormal <?> "paragraph"
+codeBlockStart :: GenParser Char st Char
codeBlockStart = string "::" >> blankline >> blankline
-- paragraph that ends in a :: starting a code block
+paraBeforeCodeBlock :: GenParser Char ParserState Block
paraBeforeCodeBlock = try $ do
result <- many1 (notFollowedBy' codeBlockStart >> inline)
lookAhead (string "::")
@@ -198,18 +210,21 @@ paraBeforeCodeBlock = try $ do
else (normalizeSpaces result) ++ [Str ":"]
-- regular paragraph
+paraNormal :: GenParser Char ParserState Block
paraNormal = try $ do
result <- many1 inline
newline
blanklines
return $ Para $ normalizeSpaces result
+plain :: GenParser Char ParserState Block
plain = many1 inline >>= return . Plain . normalizeSpaces
--
-- image block
--
+imageBlock :: GenParser Char st Block
imageBlock = try $ do
string ".. image:: "
src <- manyTill anyChar newline
@@ -223,9 +238,11 @@ imageBlock = try $ do
-- header blocks
--
+header :: GenParser Char ParserState Block
header = doubleHeader <|> singleHeader <?> "header"
-- a header with lines on top and bottom
+doubleHeader :: GenParser Char ParserState Block
doubleHeader = try $ do
c <- oneOf underlineChars
rest <- many (char c) -- the top line
@@ -250,6 +267,7 @@ doubleHeader = try $ do
return $ Header level (normalizeSpaces txt)
-- a header with line on the bottom only
+singleHeader :: GenParser Char ParserState Block
singleHeader = try $ do
notFollowedBy' whitespace
txt <- many1 (do {notFollowedBy blankline; inline})
@@ -257,7 +275,7 @@ singleHeader = try $ do
let len = (sourceColumn pos) - 1
blankline
c <- oneOf underlineChars
- rest <- count (len - 1) (char c)
+ count (len - 1) (char c)
many (char c)
blanklines
state <- getState
@@ -272,6 +290,7 @@ singleHeader = try $ do
-- hrule block
--
+hrule :: GenParser Char st Block
hrule = try $ do
chr <- oneOf underlineChars
count 3 (char chr)
@@ -285,6 +304,7 @@ hrule = try $ do
--
-- read a line indented by a given string
+indentedLine :: String -> GenParser Char st [Char]
indentedLine indents = try $ do
string indents
result <- manyTill anyChar newline
@@ -292,6 +312,7 @@ indentedLine indents = try $ do
-- two or more indented lines, possibly separated by blank lines.
-- any amount of indentation will work.
+indentedBlock :: GenParser Char st [Char]
indentedBlock = do
indents <- lookAhead $ many1 (oneOf " \t")
lns <- many $ choice $ [ indentedLine indents,
@@ -301,6 +322,7 @@ indentedBlock = do
optional blanklines
return $ concat lns
+codeBlock :: GenParser Char st Block
codeBlock = try $ do
codeBlockStart
result <- indentedBlock
@@ -310,6 +332,7 @@ codeBlock = try $ do
-- raw html
--
+rawHtmlBlock :: GenParser Char st Block
rawHtmlBlock = try $ string ".. raw:: html" >> blanklines >>
indentedBlock >>= return . RawHtml
@@ -317,6 +340,7 @@ rawHtmlBlock = try $ string ".. raw:: html" >> blanklines >>
-- raw latex
--
+rawLaTeXBlock :: GenParser Char st Block
rawLaTeXBlock = try $ do
string ".. raw:: latex"
blanklines
@@ -327,6 +351,7 @@ rawLaTeXBlock = try $ do
-- block quotes
--
+blockQuote :: GenParser Char ParserState Block
blockQuote = do
raw <- indentedBlock
-- parse the extracted block, which may contain various block elements:
@@ -337,8 +362,10 @@ blockQuote = do
-- list blocks
--
+list :: GenParser Char ParserState Block
list = choice [ bulletList, orderedList, definitionList ] <?> "list"
+definitionListItem :: GenParser Char ParserState ([Inline], [Block])
definitionListItem = try $ do
term <- many1Till inline endline
raw <- indentedBlock
@@ -346,9 +373,11 @@ definitionListItem = try $ do
contents <- parseFromString parseBlocks $ raw ++ "\n\n"
return (normalizeSpaces term, contents)
+definitionList :: GenParser Char ParserState Block
definitionList = many1 definitionListItem >>= return . DefinitionList
-- parses bullet list start and returns its length (inc. following whitespace)
+bulletListStart :: GenParser Char st Int
bulletListStart = try $ do
notFollowedBy' hrule -- because hrules start out just like lists
marker <- oneOf bulletListMarkers
@@ -356,12 +385,16 @@ bulletListStart = try $ do
return $ length (marker:white)
-- parses ordered list start and returns its length (inc following whitespace)
+orderedListStart :: ListNumberStyle
+ -> ListNumberDelim
+ -> GenParser Char st Int
orderedListStart style delim = try $ do
(_, markerLen) <- withHorizDisplacement (orderedListMarker style delim)
white <- many1 spaceChar
return $ markerLen + length white
-- parse a line of a list item
+listLine :: Int -> GenParser Char ParserState [Char]
listLine markerLength = try $ do
notFollowedBy blankline
indentWith markerLength
@@ -369,6 +402,7 @@ listLine markerLength = try $ do
return $ line ++ "\n"
-- indent by specified number of spaces (or equiv. tabs)
+indentWith :: Int -> GenParser Char ParserState [Char]
indentWith num = do
state <- getState
let tabStop = stateTabStop state
@@ -378,6 +412,8 @@ indentWith num = do
(try (char '\t' >> count (num - tabStop) (char ' '))) ]
-- parse raw text for one list item, excluding start marker and continuations
+rawListItem :: GenParser Char ParserState Int
+ -> GenParser Char ParserState (Int, [Char])
rawListItem start = try $ do
markerLength <- start
firstLine <- manyTill anyChar newline
@@ -387,11 +423,14 @@ rawListItem start = try $ do
-- continuation of a list item - indented and separated by blankline or
-- (in compact lists) endline.
-- Note: nested lists are parsed as continuations.
+listContinuation :: Int -> GenParser Char ParserState [Char]
listContinuation markerLength = try $ do
blanks <- many1 blankline
result <- many1 (listLine markerLength)
return $ blanks ++ concat result
+listItem :: GenParser Char ParserState Int
+ -> GenParser Char ParserState [Block]
listItem start = try $ do
(markerLength, first) <- rawListItem start
rest <- many (listContinuation markerLength)
@@ -408,12 +447,14 @@ listItem start = try $ do
updateState (\st -> st {stateParserContext = oldContext})
return parsed
+orderedList :: GenParser Char ParserState Block
orderedList = try $ do
(start, style, delim) <- lookAhead (anyOrderedListMarker >>~ spaceChar)
items <- many1 (listItem (orderedListStart style delim))
let items' = compactify items
return $ OrderedList (start, style, delim) items'
+bulletList :: GenParser Char ParserState Block
bulletList = many1 (listItem bulletListStart) >>=
return . BulletList . compactify
@@ -421,6 +462,7 @@ bulletList = many1 (listItem bulletListStart) >>=
-- unknown directive (e.g. comment)
--
+unknownDirective :: GenParser Char st Block
unknownDirective = try $ do
string ".. "
manyTill anyChar newline
@@ -433,37 +475,44 @@ unknownDirective = try $ do
-- reference key
--
+quotedReferenceName :: GenParser Char ParserState [Inline]
quotedReferenceName = try $ do
char '`' >> notFollowedBy (char '`') -- `` means inline code!
- label <- many1Till inline (char '`')
- return label
+ label' <- many1Till inline (char '`')
+ return label'
+unquotedReferenceName :: GenParser Char ParserState [Inline]
unquotedReferenceName = try $ do
- label <- many1Till inline (lookAhead $ char ':')
- return label
+ label' <- many1Till inline (lookAhead $ char ':')
+ return label'
+isolated :: Char -> GenParser Char st Char
isolated ch = try $ char ch >>~ notFollowedBy (char ch)
+simpleReferenceName :: GenParser Char st [Inline]
simpleReferenceName = do
raw <- many1 (alphaNum <|> isolated '-' <|> isolated '.' <|>
(try $ char '_' >>~ lookAhead alphaNum))
return [Str raw]
+referenceName :: GenParser Char ParserState [Inline]
referenceName = quotedReferenceName <|>
(try $ simpleReferenceName >>~ lookAhead (char ':')) <|>
unquotedReferenceName
+referenceKey :: GenParser Char ParserState [Char]
referenceKey = do
startPos <- getPosition
key <- choice [imageKey, anonymousKey, regularKey]
st <- getState
let oldkeys = stateKeys st
- updateState $ \st -> st { stateKeys = key : oldkeys }
+ updateState $ \s -> s { stateKeys = key : oldkeys }
optional blanklines
endPos <- getPosition
-- return enough blanks to replace key
return $ replicate (sourceLine endPos - sourceLine startPos) '\n'
+targetURI :: GenParser Char st [Char]
targetURI = do
skipSpaces
optional newline
@@ -472,6 +521,7 @@ targetURI = do
blanklines
return contents
+imageKey :: GenParser Char ParserState ([Inline], (String, [Char]))
imageKey = try $ do
string ".. |"
ref <- manyTill inline (char '|')
@@ -480,12 +530,13 @@ imageKey = try $ do
src <- targetURI
return (normalizeSpaces ref, (removeLeadingTrailingSpace src, ""))
+anonymousKey :: GenParser Char st ([Inline], (String, [Char]))
anonymousKey = try $ do
oneOfStrings [".. __:", "__"]
src <- targetURI
- state <- getState
return ([Str "_"], (removeLeadingTrailingSpace src, ""))
+regularKey :: GenParser Char ParserState ([Inline], (String, [Char]))
regularKey = try $ do
string ".. _"
ref <- referenceName
@@ -497,6 +548,7 @@ regularKey = try $ do
-- inline
--
+inline :: GenParser Char ParserState Inline
inline = choice [ link
, str
, whitespace
@@ -511,46 +563,57 @@ inline = choice [ link
, escapedChar
, symbol ] <?> "inline"
+hyphens :: GenParser Char ParserState Inline
hyphens = do
result <- many1 (char '-')
option Space endline
-- don't want to treat endline after hyphen or dash as a space
return $ Str result
+escapedChar :: GenParser Char st Inline
escapedChar = escaped anyChar
+symbol :: GenParser Char ParserState Inline
symbol = do
result <- oneOf specialChars
return $ Str [result]
-- parses inline code, between codeStart and codeEnd
+code :: GenParser Char ParserState Inline
code = try $ do
string "``"
result <- manyTill anyChar (try (string "``"))
return $ Code $ removeLeadingTrailingSpace $ joinWithSep " " $ lines result
+emph :: GenParser Char ParserState Inline
emph = enclosed (char '*') (char '*') inline >>=
return . Emph . normalizeSpaces
+strong :: GenParser Char ParserState Inline
strong = enclosed (string "**") (try $ string "**") inline >>=
return . Strong . normalizeSpaces
+interpreted :: [Char] -> GenParser Char st [Inline]
interpreted role = try $ do
optional $ try $ string "\\ "
result <- enclosed (string $ ":" ++ role ++ ":`") (char '`') anyChar
- nextChar <- lookAhead anyChar
try (string "\\ ") <|> lookAhead (count 1 $ oneOf " \t\n") <|> (eof >> return "")
return [Str result]
+superscript :: GenParser Char ParserState Inline
superscript = interpreted "sup" >>= (return . Superscript)
+subscript :: GenParser Char ParserState Inline
subscript = interpreted "sub" >>= (return . Subscript)
+whitespace :: GenParser Char ParserState Inline
whitespace = many1 spaceChar >> return Space <?> "whitespace"
+str :: GenParser Char ParserState Inline
str = many1 (noneOf (specialChars ++ "\t\n ")) >>= return . Str
-- an endline character that can be treated as a space, not a structural break
+endline :: GenParser Char ParserState Inline
endline = try $ do
newline
notFollowedBy blankline
@@ -566,21 +629,24 @@ endline = try $ do
-- links
--
+link :: GenParser Char ParserState Inline
link = choice [explicitLink, referenceLink, autoLink] <?> "link"
+explicitLink :: GenParser Char ParserState Inline
explicitLink = try $ do
char '`'
notFollowedBy (char '`') -- `` marks start of inline code
- label <- manyTill (notFollowedBy (char '`') >> inline)
+ label' <- manyTill (notFollowedBy (char '`') >> inline)
(try (spaces >> char '<'))
src <- manyTill (noneOf ">\n ") (char '>')
skipSpaces
string "`_"
- return $ Link (normalizeSpaces label) (removeLeadingTrailingSpace src, "")
+ return $ Link (normalizeSpaces label') (removeLeadingTrailingSpace src, "")
+referenceLink :: GenParser Char ParserState Inline
referenceLink = try $ do
- label <- (quotedReferenceName <|> simpleReferenceName) >>~ char '_'
- key <- option label (do{char '_'; return [Str "_"]}) -- anonymous link
+ label' <- (quotedReferenceName <|> simpleReferenceName) >>~ char '_'
+ key <- option label' (do{char '_'; return [Str "_"]}) -- anonymous link
state <- getState
let keyTable = stateKeys state
src <- case lookupKeySrc keyTable key of
@@ -591,19 +657,23 @@ referenceLink = try $ do
then delete ([Str "_"], src) keyTable -- remove first anon key
else keyTable
setState $ state { stateKeys = keyTable' }
- return $ Link (normalizeSpaces label) src
+ return $ Link (normalizeSpaces label') src
+autoURI :: GenParser Char ParserState Inline
autoURI = do
src <- uri
return $ Link [Str src] (src, "")
+autoEmail :: GenParser Char ParserState Inline
autoEmail = do
src <- emailAddress
return $ Link [Str src] ("mailto:" ++ src, "")
+autoLink :: GenParser Char ParserState Inline
autoLink = autoURI <|> autoEmail
-- For now, we assume that all substitution references are for images.
+image :: GenParser Char ParserState Inline
image = try $ do
char '|'
ref <- manyTill inline (char '|')