diff options
Diffstat (limited to 'src/Text/Pandoc/Readers/Markdown.hs')
-rw-r--r-- | src/Text/Pandoc/Readers/Markdown.hs | 55 |
1 files changed, 27 insertions, 28 deletions
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index c262a9a90..6ba19d7a1 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -29,7 +29,7 @@ Conversion of markdown-formatted plain text to 'Pandoc' document. -} module Text.Pandoc.Readers.Markdown ( readMarkdown ) where -import Data.List ( transpose, isSuffixOf, sortBy, findIndex, intercalate ) +import Data.List ( transpose, sortBy, findIndex, intercalate ) import qualified Data.Map as M import Data.Ord ( comparing ) import Data.Char ( isAlphaNum ) @@ -39,14 +39,14 @@ import Text.Pandoc.Generic import Text.Pandoc.Shared import Text.Pandoc.Parsing import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXEnvironment' ) -import Text.Pandoc.Readers.HTML ( rawHtmlBlock, anyHtmlBlockTag, - anyHtmlInlineTag, anyHtmlTag, - anyHtmlEndTag, htmlEndTag, extractTagType, - htmlBlockElement, htmlComment ) +import Text.Pandoc.Readers.HTML ( htmlTag, htmlInBalanced, isInlineTag, isBlockTag, + isTextTag, isCommentTag ) import Text.Pandoc.CharacterReferences ( decodeCharacterReferences ) import Text.ParserCombinators.Parsec import Control.Monad (when, liftM, guard) import Text.TeXMath.Macros (applyMacros, Macro, pMacroDefinition) +import Text.HTML.TagSoup +import Text.HTML.TagSoup.Match (tagOpen) -- | Read markdown from an input string and return a Pandoc document. readMarkdown :: ParserState -- ^ Parser state, including options for parser @@ -532,7 +532,7 @@ listLine = try $ do notFollowedBy' (do indentSpaces many (spaceChar) listStart) - chunks <- manyTill (htmlComment <|> count 1 anyChar) newline + chunks <- manyTill (liftM snd (htmlTag isCommentTag) <|> count 1 anyChar) newline return $ concat chunks ++ "\n" -- parse raw text for one list item, excluding start marker and continuations @@ -676,7 +676,7 @@ plain = many1 inline >>~ spaces >>= return . Plain . normalizeSpaces -- htmlElement :: GenParser Char ParserState [Char] -htmlElement = strictHtmlBlock <|> htmlBlockElement <?> "html element" +htmlElement = strictHtmlBlock <|> liftM snd (htmlTag isBlockTag) htmlBlock :: GenParser Char ParserState Block htmlBlock = try $ do @@ -686,25 +686,23 @@ htmlBlock = try $ do finalNewlines <- many newline return $ RawHtml $ first ++ finalSpace ++ finalNewlines --- True if tag is self-closing -isSelfClosing :: [Char] -> Bool -isSelfClosing tag = - isSuffixOf "/>" $ filter (not . (`elem` " \n\t")) tag - strictHtmlBlock :: GenParser Char ParserState [Char] -strictHtmlBlock = try $ do - tag <- anyHtmlBlockTag - let tag' = extractTagType tag - if isSelfClosing tag || tag' == "hr" - then return tag - else do contents <- many (notFollowedBy' (htmlEndTag tag') >> - (htmlElement <|> (count 1 anyChar))) - end <- htmlEndTag tag' - return $ tag ++ concat contents ++ end +strictHtmlBlock = do + failUnlessBeginningOfLine + htmlInBalanced (not . isInlineTag) + +rawVerbatimBlock :: GenParser Char ParserState String +rawVerbatimBlock = try $ do + (TagOpen tag _, open) <- htmlTag (tagOpen (\t -> + t == "pre" || t == "style" || t == "script") + (const True)) + contents <- manyTill anyChar (htmlTag (~== TagClose tag)) + return $ open ++ contents ++ renderTags [TagClose tag] rawHtmlBlocks :: GenParser Char ParserState Block rawHtmlBlocks = do - htmlBlocks <- many1 $ do (RawHtml blk) <- rawHtmlBlock + htmlBlocks <- many1 $ do blk <- rawVerbatimBlock <|> + liftM snd (htmlTag isBlockTag) sps <- do sp1 <- many spaceChar sp2 <- option "" (blankline >> return "\n") sp3 <- many spaceChar @@ -921,7 +919,7 @@ inlineParsers = [ str , subscript , inlineNote -- after superscript because of ^[link](/foo)^ , autoLink - , rawHtmlInline' + , rawHtmlInline , rawLaTeXInline' , escapedChar , exampleRef @@ -1221,12 +1219,12 @@ inBrackets parser = do char ']' return $ "[" ++ contents ++ "]" -rawHtmlInline' :: GenParser Char ParserState Inline -rawHtmlInline' = do +rawHtmlInline :: GenParser Char ParserState Inline +rawHtmlInline = do st <- getState - result <- if stateStrict st - then choice [htmlBlockElement, anyHtmlTag, anyHtmlEndTag] - else choice [htmlComment, anyHtmlInlineTag] + (_,result) <- if stateStrict st + then htmlTag (not . isTextTag) + else htmlTag isInlineTag return $ HtmlInline result -- Citations @@ -1315,3 +1313,4 @@ citation = try $ do , citationNoteNum = 0 , citationHash = 0 } + |