aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc/Readers')
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs19
-rw-r--r--src/Text/Pandoc/Readers/LaTeX.hs23
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs217
-rw-r--r--src/Text/Pandoc/Readers/RST.hs130
4 files changed, 172 insertions, 217 deletions
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 96244e58f..803fc91c5 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -45,7 +45,7 @@ import Text.ParserCombinators.Pandoc
import Text.Pandoc.Definition
import Text.Pandoc.Shared
import Text.Pandoc.Entities ( characterEntity, decodeEntities )
-import Maybe ( fromMaybe )
+import Data.Maybe ( fromMaybe )
import Data.List ( intersect, takeWhile, dropWhile )
import Data.Char ( toUpper, toLower, isAlphaNum )
@@ -267,9 +267,7 @@ parseHtml = do
option "" (htmlEndTag "html")
many anyChar -- ignore anything after </html>
eof
- state <- getState
- let keyBlocks = stateKeyBlocks state
- return (Pandoc (Meta title authors date) (blocks ++ (reverse keyBlocks)))
+ return (Pandoc (Meta title authors date) blocks)
--
-- parsing blocks
@@ -456,11 +454,7 @@ link = try $ do
Nothing -> fail "no href"
let title = fromMaybe "" (extractAttribute "title" attributes)
label <- inlinesTilEnd "a"
- state <- getState
- ref <- if stateInlineLinks state
- then return (Src url title)
- else generateReference url title
- return $ Link (normalizeSpaces label) ref
+ return $ Link (normalizeSpaces label) (url, title)
image = try $ do
(tag, attributes) <- htmlTag "img"
@@ -469,8 +463,5 @@ image = try $ do
Nothing -> fail "no src"
let title = fromMaybe "" (extractAttribute "title" attributes)
let alt = fromMaybe "" (extractAttribute "alt" attributes)
- state <- getState
- ref <- if stateInlineLinks state
- then return (Src url title)
- else generateReference url title
- return $ Image [Str alt] ref
+ return $ Image [Str alt] (url, title)
+
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 33c4a75ee..b0062ceff 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -37,8 +37,8 @@ import Text.ParserCombinators.Parsec
import Text.ParserCombinators.Pandoc
import Text.Pandoc.Definition
import Text.Pandoc.Shared
-import Maybe ( fromMaybe )
-import Char ( chr )
+import Data.Maybe ( fromMaybe )
+import Data.Char ( chr )
-- | Parse LaTeX from string and return 'Pandoc' document.
readLaTeX :: ParserState -- ^ Parser state, including options for parser
@@ -135,14 +135,11 @@ parseLaTeX = do
spaces
eof
state <- getState
- let keyBlocks = stateKeyBlocks state
- let noteBlocks = stateNoteBlocks state
let blocks' = filter (/= Null) blocks
let title' = stateTitle state
let authors' = stateAuthors state
let date' = stateDate state
- return (Pandoc (Meta title' authors' date')
- (blocks' ++ (reverse noteBlocks) ++ (reverse keyBlocks)))
+ return (Pandoc (Meta title' authors' date') blocks')
--
-- parsing blocks
@@ -618,15 +615,15 @@ link = try (do
url <- manyTill anyChar (char '}')
char '{'
label <- manyTill inline (char '}')
- return (Link (normalizeSpaces label) (Src url "")))
+ return (Link (normalizeSpaces label) (url, "")))
image = try (do
("includegraphics", _, args) <- command
let args' = filter isArg args -- filter out options
let src = if null args' then
- Src "" ""
+ ("", "")
else
- Src (stripFirstAndLast (head args')) ""
+ (stripFirstAndLast (head args'), "")
return (Image [Str "image"] src))
footnote = try (do
@@ -640,13 +637,7 @@ footnote = try (do
setInput $ contents'
blocks <- parseBlocks
setInput rest
- state <- getState
- let notes = stateNoteBlocks state
- let nextRef = case notes of
- [] -> "1"
- (Note ref body):rest -> (show ((read ref) + 1))
- setState (state { stateNoteBlocks = (Note nextRef blocks):notes })
- return (NoteRef nextRef))
+ return (Note blocks))
-- | Parse any LaTeX command and return it in a raw TeX inline element.
rawLaTeXInline :: GenParser Char ParserState Inline
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index a2e84e8c2..353dd45dd 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -31,7 +31,7 @@ module Text.Pandoc.Readers.Markdown (
readMarkdown
) where
-import Data.List ( findIndex, sortBy, transpose, isSuffixOf, intersect )
+import Data.List ( findIndex, sortBy, transpose, isSuffixOf, intersect, lookup )
import Data.Char ( isAlphaNum )
import Text.ParserCombinators.Pandoc
import Text.Pandoc.Definition
@@ -160,28 +160,72 @@ titleBlock = try (do
option "" blanklines
return (title, author, date))
--- | Returns the number assigned to a Note block
-numberOfNote :: Block -> Int
-numberOfNote (Note ref _) = (read ref)
-numberOfNote _ = 0
-
parseMarkdown = do
- updateState (\state -> state { stateParseRaw = True })
- -- need to parse raw HTML, since markdown allows it
+ updateState (\state -> state { stateParseRaw = True }) -- parse raw HTML: markdown allows it
(title, author, date) <- option ([],[],"") titleBlock
-- go through once just to get list of reference keys
- keysUsed <- lookAhead $ (do {manyTill (referenceKey <|> (do{anyLine; return Null})) eof;
- newState <- getState;
- return $ stateKeysUsed newState})
- updateState (\st -> st { stateKeysUsed = keysUsed })
+ refs <- manyTill (noteBlock <|> referenceKey <|> (do l <- lineClump
+ return (LineClump l))) eof
+ let keys = map (\(KeyBlock label target) -> (label, target)) $
+ filter isKeyBlock refs
+ let notes = map (\(NoteBlock label blocks) -> (label, blocks)) $
+ filter isNoteBlock refs
+ let rawlines = map (\(LineClump ln) -> ln) $ filter isLineClump refs
+ setInput $ concat rawlines -- with note blocks and keys stripped out
+ updateState (\state -> state { stateKeys = keys, stateNotes = notes })
blocks <- parseBlocks -- go through again, for real
let blocks' = filter (/= Null) blocks
- state <- getState
- let keys = reverse $ stateKeyBlocks state
- let notes = reverse $ stateNoteBlocks state
- let sortedNotes = sortBy (\x y -> compare (numberOfNote x)
- (numberOfNote y)) notes
- return (Pandoc (Meta title author date) (blocks' ++ sortedNotes ++ keys))
+ return (Pandoc (Meta title author date) blocks')
+
+--
+-- initial pass for references
+--
+
+referenceKey = try $ do
+ nonindentSpaces
+ label <- reference
+ char labelSep
+ skipSpaces
+ option ' ' (char autoLinkStart)
+ src <- many (noneOf (titleOpeners ++ [autoLinkEnd] ++ endLineChars))
+ option ' ' (char autoLinkEnd)
+ tit <- option "" title
+ blanklines
+ return $ KeyBlock label (removeTrailingSpace src, tit)
+
+noteMarker = try (do
+ char labelStart
+ char noteStart
+ manyTill (noneOf " \t\n") (char labelEnd))
+
+rawLine = try (do
+ notFollowedBy' blankline
+ notFollowedBy' noteMarker
+ contents <- many1 nonEndline
+ end <- option "" (do
+ newline
+ option "" (try indentSpaces)
+ return "\n")
+ return (contents ++ end))
+
+rawLines = do
+ lines <- many1 rawLine
+ return (concat lines)
+
+noteBlock = try $ do
+ failIfStrict
+ ref <- noteMarker
+ char ':'
+ option ' ' (try blankline)
+ option "" (try indentSpaces)
+ raw <- sepBy rawLines (try (do {blankline; indentSpaces}))
+ option "" blanklines
+ -- parse the extracted text, which may contain various block elements:
+ rest <- getInput
+ setInput $ (joinWithSep "\n" raw) ++ "\n\n"
+ contents <- parseBlocks
+ setInput rest
+ return (NoteBlock ref contents)
--
-- parsing blocks
@@ -189,9 +233,17 @@ parseMarkdown = do
parseBlocks = manyTill block eof
-block = choice [ header, table, codeBlock, note, referenceKey, hrule, list,
- blockQuote, htmlBlock, rawLaTeXEnvironment', para,
- plain, nullBlock ] <?> "block"
+block = choice [ header
+ , table
+ , codeBlock
+ , hrule
+ , list
+ , blockQuote
+ , htmlBlock
+ , rawLaTeXEnvironment'
+ , para
+ , plain
+ , nullBlock ] <?> "block"
--
-- header blocks
@@ -262,45 +314,6 @@ codeBlock = do
return (CodeBlock (stripTrailingNewlines result))
--
--- note block
---
-
-rawLine = try (do
- notFollowedBy' blankline
- notFollowedBy' noteMarker
- contents <- many1 nonEndline
- end <- option "" (do
- newline
- option "" (try indentSpaces)
- return "\n")
- return (contents ++ end))
-
-rawLines = do
- lines <- many1 rawLine
- return (concat lines)
-
-note = try (do
- failIfStrict
- ref <- noteMarker
- char ':'
- skipSpaces
- skipEndline
- raw <- sepBy rawLines (try (do {blankline; indentSpaces}))
- option "" blanklines
- -- parse the extracted text, which may contain various block elements:
- rest <- getInput
- setInput $ (joinWithSep "\n" raw) ++ "\n\n"
- contents <- parseBlocks
- setInput rest
- state <- getState
- let identifiers = stateNoteIdentifiers state
- case (findIndex (== ref) identifiers) of
- Just n -> updateState (\s -> s {stateNoteBlocks =
- (Note (show (n+1)) contents):(stateNoteBlocks s)})
- Nothing -> updateState id
- return Null)
-
---
-- block quotes
--
@@ -535,25 +548,6 @@ rawHtmlBlocks = try (do
else combined
return (RawHtml combined'))
---
--- reference key
---
-
-referenceKey = try (do
- nonindentSpaces
- label <- reference
- char labelSep
- skipSpaces
- option ' ' (char autoLinkStart)
- src <- many (noneOf (titleOpeners ++ [autoLinkEnd] ++ endLineChars))
- option ' ' (char autoLinkEnd)
- tit <- option "" title
- blanklines
- state <- getState
- let keysUsed = stateKeysUsed state
- setState state { stateKeysUsed = (label:keysUsed) }
- return $ Key label (Src (removeTrailingSpace src) tit))
-
--
-- LaTeX
--
@@ -713,7 +707,7 @@ table = do
inline = choice [ rawLaTeXInline'
, escapedChar
, entity
- , noteRef
+ , note
, inlineNote
, link
, referenceLink
@@ -933,7 +927,7 @@ reference = try $ do
return (normalizeSpaces label)
-- source for a link, with optional title
-source = try (do
+source = try $ do
char srcStart
option ' ' (char autoLinkStart)
src <- many (noneOf ([srcEnd, autoLinkEnd] ++ titleOpeners))
@@ -941,7 +935,7 @@ source = try (do
tit <- option "" title
skipSpaces
char srcEnd
- return (Src (removeTrailingSpace src) tit))
+ return (removeTrailingSpace src, tit)
titleWith startChar endChar = try (do
skipSpaces
@@ -965,30 +959,18 @@ explicitLink = try (do
src <- source
return (Link label src))
-referenceLink = choice [referenceLinkDouble, referenceLinkSingle]
-
--- a link like [this][ref]
-referenceLinkDouble = try (do
+-- a link like [this][ref] or [this][] or [this]
+referenceLink = try $ do
label <- reference
- skipSpaces
- option ' ' newline
- skipSpaces
- ref <- reference
+ ref <- option [] (try (do skipSpaces
+ option ' ' newline
+ skipSpaces
+ reference))
let ref' = if null ref then label else ref
state <- getState
- if ref' `elem` (stateKeysUsed state)
- then return ()
- else fail "no corresponding key"
- return (Link label (Ref ref')))
-
--- a link like [this]
-referenceLinkSingle = try (do
- label <- reference
- state <- getState
- if label `elem` (stateKeysUsed state)
- then return ()
- else fail "no corresponding key"
- return (Link label (Ref label)))
+ case lookupKeySrc (stateKeys state) ref' of
+ Nothing -> fail "no corresponding key"
+ Just target -> return (Link label target)
autoLink = autoLinkEmail <|> autoLinkRegular
@@ -999,7 +981,7 @@ autoLinkEmail = try $ do
domain <- sepBy1 (many1 (noneOf "/:.@<> \t\n")) (char '.')
let src = name ++ "@" ++ (joinWithSep "." domain)
char autoLinkEnd
- return $ Link [Str src] (Src ("mailto:" ++ src) "")
+ return $ Link [Str src] (("mailto:" ++ src), "")
-- a link <http://like.this.com>
autoLinkRegular = try $ do
@@ -1007,39 +989,28 @@ autoLinkRegular = try $ do
prot <- oneOfStrings ["http:", "ftp:", "mailto:"]
rest <- many1Till (noneOf " \t\n<>") (char autoLinkEnd)
let src = prot ++ rest
- return $ Link [Str src] (Src src "")
+ return $ Link [Str src] (src, "")
image = try (do
char imageStart
(Link label src) <- link
return (Image label src))
-noteMarker = try (do
- char labelStart
- char noteStart
- manyTill (noneOf " \t\n") (char labelEnd))
-
-noteRef = try (do
+note = try $ do
failIfStrict
ref <- noteMarker
state <- getState
- let identifiers = (stateNoteIdentifiers state) ++ [ref]
- setState state {stateNoteIdentifiers = identifiers}
- return (NoteRef (show (length identifiers))))
+ let notes = stateNotes state
+ case lookup ref notes of
+ Nothing -> fail "note not found"
+ Just contents -> return (Note contents)
-inlineNote = try (do
+inlineNote = try $ do
failIfStrict
char noteStart
char labelStart
contents <- manyTill inline (char labelEnd)
- state <- getState
- let identifiers = stateNoteIdentifiers state
- let ref = show $ (length identifiers) + 1
- let noteBlocks = stateNoteBlocks state
- setState state {stateNoteIdentifiers = (identifiers ++ [ref]),
- stateNoteBlocks =
- (Note ref [Para contents]):noteBlocks}
- return (NoteRef ref))
+ return (Note [Para contents])
rawLaTeXInline' = do
failIfStrict
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index a3de0a2ea..d2143af38 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -37,8 +37,8 @@ import Text.Pandoc.Readers.HTML ( anyHtmlBlockTag, anyHtmlInlineTag )
import Text.Regex ( matchRegex, mkRegex )
import Text.ParserCombinators.Parsec
import Data.Maybe ( fromMaybe )
-import List ( findIndex )
-import Char ( toUpper )
+import Data.List ( findIndex, delete )
+import Data.Char ( toUpper )
-- | Parse reStructuredText string and return Pandoc document.
readRST :: ParserState -> String -> Pandoc
@@ -62,11 +62,7 @@ specialChars = "\\`|*_<>$:[-"
-- parsing documents
--
-isAnonKeyBlock block = case block of
- (Key [Str "_"] str) -> True
- otherwise -> False
-
-isNotAnonKeyBlock block = not (isAnonKeyBlock block)
+isAnonKey (ref, src) = (ref == [Str "_"])
isHeader1 :: Block -> Bool
isHeader1 (Header 1 _) = True
@@ -101,20 +97,22 @@ titleTransform blocks = (blocks, [])
parseRST = do
-- first pass: get anonymous keys
- keyBlocks <- lookAhead $ manyTill (anonymousKey <|> (do{anyLine; return Null})) eof
- let anonymousKeys = filter (/= Null) keyBlocks
- -- run parser again to fill in anonymous links...
- updateState (\st -> st { stateKeyBlocks = anonymousKeys })
- state <- getState
+ refs <- manyTill (referenceKey <|> (do l <- lineClump
+ return (LineClump l))) eof
+ let keys = map (\(KeyBlock label target) -> (label, target)) $
+ filter isKeyBlock refs
+ let rawlines = map (\(LineClump ln) -> ln) $ filter isLineClump refs
+ setInput $ concat rawlines -- with keys stripped out
+ updateState (\state -> state { stateKeys = keys })
blocks <- parseBlocks
- let blocks' = filter isNotAnonKeyBlock blocks
+ let blocks' = filter (/= Null) blocks
+ state <- getState
let (blocks'', title) = if stateStandalone state
then titleTransform blocks'
else (blocks', [])
- state' <- getState
- let authors = stateAuthors state'
- let date = stateDate state'
- let title' = if (null title) then (stateTitle state') else title
+ let authors = stateAuthors state
+ let date = stateDate state
+ let title' = if (null title) then (stateTitle state) else title
return (Pandoc (Meta title' authors date) blocks'')
--
@@ -124,7 +122,7 @@ parseRST = do
parseBlocks = manyTill block eof
block = choice [ codeBlock, rawHtmlBlock, rawLaTeXBlock, blockQuote,
- referenceKey, imageBlock, unknownDirective, header,
+ imageBlock, unknownDirective, header,
hrule, list, fieldList, lineBlock, para, plain,
nullBlock ] <?> "block"
@@ -221,7 +219,7 @@ plain = do
imageBlock = try (do
string ".. image:: "
src <- manyTill anyChar newline
- return (Plain [Image [Str "image"] (Src src "")]))
+ return (Plain [Image [Str "image"] (src, "")]))
--
-- header blocks
@@ -492,43 +490,43 @@ unknownDirective = try (do
-- reference key
--
-referenceKey = choice [imageKey, anonymousKey, regularKeyQuoted, regularKey]
+referenceKey = do
+ result <- choice [imageKey, anonymousKey, regularKeyQuoted, regularKey]
+ option "" blanklines
+ return result
-imageKey = try (do
+imageKey = try $ do
string ".. |"
ref <- manyTill inline (char '|')
skipSpaces
string "image::"
src <- manyTill anyChar newline
- return (Key (normalizeSpaces ref)
- (Src (removeLeadingTrailingSpace src) "")))
+ return $ KeyBlock (normalizeSpaces ref) (removeLeadingTrailingSpace src, "")
-anonymousKey = try (do
+anonymousKey = try $ do
oneOfStrings [".. __:", "__"]
skipSpaces
option ' ' newline
src <- manyTill anyChar newline
state <- getState
- return (Key [Str "_"] (Src (removeLeadingTrailingSpace src) "")))
+ return $ KeyBlock [Str "_"] (removeLeadingTrailingSpace src, "")
-regularKeyQuoted = try (do
+regularKeyQuoted = try $ do
string ".. _`"
ref <- manyTill inline (char '`')
char ':'
skipSpaces
option ' ' newline
src <- manyTill anyChar newline
- return (Key (normalizeSpaces ref)
- (Src (removeLeadingTrailingSpace src) "")))
+ return $ KeyBlock (normalizeSpaces ref) (removeLeadingTrailingSpace src, "")
-regularKey = try (do
+regularKey = try $ do
string ".. _"
ref <- manyTill inline (char ':')
skipSpaces
option ' ' newline
src <- manyTill anyChar newline
- return (Key (normalizeSpaces ref)
- (Src (removeLeadingTrailingSpace src) "")))
+ return $ KeyBlock (normalizeSpaces ref) (removeLeadingTrailingSpace src, "")
--
-- inline
@@ -577,7 +575,7 @@ tabchar = do
return (Str "\t")
str = do
- notFollowedBy' oneWordReferenceLink
+ notFollowedBy' oneWordReference
result <- many1 (noneOf (specialChars ++ "\t\n "))
return (Str result)
@@ -596,46 +594,44 @@ endline = try (do
-- links
--
-link = choice [explicitLink, referenceLink, autoLink,
- oneWordReferenceLink] <?> "link"
+link = choice [explicitLink, referenceLink, autoLink] <?> "link"
-explicitLink = try (do
+explicitLink = try $ do
char '`'
notFollowedBy (char '`') -- `` is marks start of inline code
label <- manyTill inline (try (do {spaces; char '<'}))
src <- manyTill (noneOf ">\n ") (char '>')
skipSpaces
string "`_"
- return (Link (normalizeSpaces label)
- (Src (removeLeadingTrailingSpace src) "")))
+ return $ Link (normalizeSpaces label) (removeLeadingTrailingSpace src, "")
-anonymousLinkEnding = try (do
- char '_'
- state <- getState
- let anonKeys = stateKeyBlocks state
- -- if there's a list of anon key refs (from previous pass), pop one off.
- -- otherwise return an anon key ref for the next pass to take care of...
- case anonKeys of
- (Key [Str "_"] src):rest ->
- do
- setState (state { stateKeyBlocks = rest })
- return src
- otherwise -> return (Ref [Str "_"]))
-
-referenceLink = try (do
+reference = try $ do
char '`'
notFollowedBy (char '`')
- label <- manyTill inline (char '`')
+ label <- many1Till inline (char '`')
char '_'
- src <- option (Ref []) anonymousLinkEnding
- return (Link (normalizeSpaces label) src))
+ return label
-oneWordReferenceLink = try (do
- label <- many1 alphaNum
+oneWordReference = do
+ raw <- many1 alphaNum
char '_'
- src <- option (Ref []) anonymousLinkEnding
notFollowedBy alphaNum -- because this_is_not a link
- return (Link [Str label] src))
+ return [Str raw]
+
+referenceLink = try $ do
+ label <- reference <|> oneWordReference
+ key <- option label (do{char '_'; return [Str "_"]}) -- anonymous link
+ state <- getState
+ let keyTable = stateKeys state
+ src <- case lookupKeySrc keyTable key of
+ Nothing -> fail "no corresponding key"
+ Just target -> return target
+ -- if anonymous link, remove first anon key so it won't be used again
+ let keyTable' = if (key == [Str "_"]) -- anonymous link?
+ then delete ([Str "_"], src) keyTable -- remove first anon key
+ else keyTable
+ setState $ state { stateKeys = keyTable' }
+ return $ Link (normalizeSpaces label) src
uriScheme = oneOfStrings [ "http://", "https://", "ftp://", "file://",
"mailto:", "news:", "telnet:" ]
@@ -645,9 +641,9 @@ uri = try (do
identifier <- many1 (noneOf " \t\n")
return (scheme ++ identifier))
-autoURI = try (do
+autoURI = try $ do
src <- uri
- return (Link [Str src] (Src src "")))
+ return $ Link [Str src] (src, "")
emailChar = alphaNum <|> oneOf "-+_."
@@ -666,14 +662,20 @@ domain = try (do
dom <- many1 (try (do{ char '.'; many1 domainChar }))
return (joinWithSep "." (first:dom)))
-autoEmail = try (do
+autoEmail = try $ do
src <- emailAddress
- return (Link [Str src] (Src ("mailto:" ++ src) "")))
+ return $ Link [Str src] ("mailto:" ++ src, "")
autoLink = autoURI <|> autoEmail
-- For now, we assume that all substitution references are for images.
-image = try (do
+image = try $ do
char '|'
ref <- manyTill inline (char '|')
- return (Image (normalizeSpaces ref) (Ref ref)))
+ state <- getState
+ let keyTable = stateKeys state
+ src <- case lookupKeySrc keyTable ref of
+ Nothing -> fail "no corresponding key"
+ Just target -> return target
+ return (Image (normalizeSpaces ref) src)
+