aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers/Markdown.hs
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc/Readers/Markdown.hs')
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs55
1 files changed, 27 insertions, 28 deletions
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index c262a9a90..6ba19d7a1 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -29,7 +29,7 @@ Conversion of markdown-formatted plain text to 'Pandoc' document.
-}
module Text.Pandoc.Readers.Markdown ( readMarkdown ) where
-import Data.List ( transpose, isSuffixOf, sortBy, findIndex, intercalate )
+import Data.List ( transpose, sortBy, findIndex, intercalate )
import qualified Data.Map as M
import Data.Ord ( comparing )
import Data.Char ( isAlphaNum )
@@ -39,14 +39,14 @@ import Text.Pandoc.Generic
import Text.Pandoc.Shared
import Text.Pandoc.Parsing
import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXEnvironment' )
-import Text.Pandoc.Readers.HTML ( rawHtmlBlock, anyHtmlBlockTag,
- anyHtmlInlineTag, anyHtmlTag,
- anyHtmlEndTag, htmlEndTag, extractTagType,
- htmlBlockElement, htmlComment )
+import Text.Pandoc.Readers.HTML ( htmlTag, htmlInBalanced, isInlineTag, isBlockTag,
+ isTextTag, isCommentTag )
import Text.Pandoc.CharacterReferences ( decodeCharacterReferences )
import Text.ParserCombinators.Parsec
import Control.Monad (when, liftM, guard)
import Text.TeXMath.Macros (applyMacros, Macro, pMacroDefinition)
+import Text.HTML.TagSoup
+import Text.HTML.TagSoup.Match (tagOpen)
-- | Read markdown from an input string and return a Pandoc document.
readMarkdown :: ParserState -- ^ Parser state, including options for parser
@@ -532,7 +532,7 @@ listLine = try $ do
notFollowedBy' (do indentSpaces
many (spaceChar)
listStart)
- chunks <- manyTill (htmlComment <|> count 1 anyChar) newline
+ chunks <- manyTill (liftM snd (htmlTag isCommentTag) <|> count 1 anyChar) newline
return $ concat chunks ++ "\n"
-- parse raw text for one list item, excluding start marker and continuations
@@ -676,7 +676,7 @@ plain = many1 inline >>~ spaces >>= return . Plain . normalizeSpaces
--
htmlElement :: GenParser Char ParserState [Char]
-htmlElement = strictHtmlBlock <|> htmlBlockElement <?> "html element"
+htmlElement = strictHtmlBlock <|> liftM snd (htmlTag isBlockTag)
htmlBlock :: GenParser Char ParserState Block
htmlBlock = try $ do
@@ -686,25 +686,23 @@ htmlBlock = try $ do
finalNewlines <- many newline
return $ RawHtml $ first ++ finalSpace ++ finalNewlines
--- True if tag is self-closing
-isSelfClosing :: [Char] -> Bool
-isSelfClosing tag =
- isSuffixOf "/>" $ filter (not . (`elem` " \n\t")) tag
-
strictHtmlBlock :: GenParser Char ParserState [Char]
-strictHtmlBlock = try $ do
- tag <- anyHtmlBlockTag
- let tag' = extractTagType tag
- if isSelfClosing tag || tag' == "hr"
- then return tag
- else do contents <- many (notFollowedBy' (htmlEndTag tag') >>
- (htmlElement <|> (count 1 anyChar)))
- end <- htmlEndTag tag'
- return $ tag ++ concat contents ++ end
+strictHtmlBlock = do
+ failUnlessBeginningOfLine
+ htmlInBalanced (not . isInlineTag)
+
+rawVerbatimBlock :: GenParser Char ParserState String
+rawVerbatimBlock = try $ do
+ (TagOpen tag _, open) <- htmlTag (tagOpen (\t ->
+ t == "pre" || t == "style" || t == "script")
+ (const True))
+ contents <- manyTill anyChar (htmlTag (~== TagClose tag))
+ return $ open ++ contents ++ renderTags [TagClose tag]
rawHtmlBlocks :: GenParser Char ParserState Block
rawHtmlBlocks = do
- htmlBlocks <- many1 $ do (RawHtml blk) <- rawHtmlBlock
+ htmlBlocks <- many1 $ do blk <- rawVerbatimBlock <|>
+ liftM snd (htmlTag isBlockTag)
sps <- do sp1 <- many spaceChar
sp2 <- option "" (blankline >> return "\n")
sp3 <- many spaceChar
@@ -921,7 +919,7 @@ inlineParsers = [ str
, subscript
, inlineNote -- after superscript because of ^[link](/foo)^
, autoLink
- , rawHtmlInline'
+ , rawHtmlInline
, rawLaTeXInline'
, escapedChar
, exampleRef
@@ -1221,12 +1219,12 @@ inBrackets parser = do
char ']'
return $ "[" ++ contents ++ "]"
-rawHtmlInline' :: GenParser Char ParserState Inline
-rawHtmlInline' = do
+rawHtmlInline :: GenParser Char ParserState Inline
+rawHtmlInline = do
st <- getState
- result <- if stateStrict st
- then choice [htmlBlockElement, anyHtmlTag, anyHtmlEndTag]
- else choice [htmlComment, anyHtmlInlineTag]
+ (_,result) <- if stateStrict st
+ then htmlTag (not . isTextTag)
+ else htmlTag isInlineTag
return $ HtmlInline result
-- Citations
@@ -1315,3 +1313,4 @@ citation = try $ do
, citationNoteNum = 0
, citationHash = 0
}
+