diff options
Diffstat (limited to 'src/Text/Pandoc/Readers/HTML.hs')
-rw-r--r-- | src/Text/Pandoc/Readers/HTML.hs | 54 |
1 files changed, 39 insertions, 15 deletions
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index ae8f0438e..0cbdf72b0 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -78,14 +78,14 @@ parseBody :: TagParser [Block] parseBody = liftM concat $ manyTill block eof block :: TagParser [Block] -block = optional pLocation >> - choice [ - pPara +block = choice + [ pPara , pHeader , pBlockQuote , pCodeBlock , pList , pHrule + , pSimpleTable , pPlain , pRawHtmlBlock ] @@ -195,6 +195,27 @@ pHrule = do pSelfClosing (=="hr") (const True) return [HorizontalRule] +pSimpleTable :: TagParser [Block] +pSimpleTable = try $ do + TagOpen _ _ <- pSatisfy (~== TagOpen "table" []) + skipMany pBlank + head' <- option [] $ pInTags "th" pTd + rows <- many1 $ try $ + skipMany pBlank >> pInTags "tr" pTd + skipMany pBlank + TagClose _ <- pSatisfy (~== TagClose "table") + let cols = maximum $ map length rows + let aligns = replicate cols AlignLeft + let widths = replicate cols 0 + return [Table [] aligns widths head' rows] + +pTd :: TagParser [TableCell] +pTd = try $ do + skipMany pBlank + res <- pInTags "td" pPlain + skipMany pBlank + return [res] + pBlockQuote :: TagParser [Block] pBlockQuote = do contents <- pInTags "blockquote" block @@ -235,9 +256,8 @@ pCodeBlock = try $ do return [CodeBlock attribs result] inline :: TagParser [Inline] -inline = choice [ - pLocation - , pTagText +inline = choice + [ pTagText , pEmph , pStrong , pSuperscript @@ -250,17 +270,19 @@ inline = choice [ , pRawHtmlInline ] -pLocation :: TagParser [a] +pLocation :: TagParser () pLocation = do - (TagPosition r c) <- pSatisfy isTagPosition + (TagPosition r c) <- pSat isTagPosition setPosition $ newPos "input" r c - return [] -pSatisfy :: (Tag String -> Bool) -> TagParser (Tag String) -pSatisfy f = do +pSat :: (Tag String -> Bool) -> TagParser (Tag String) +pSat f = do pos <- getPosition token show (const pos) (\x -> if f x then Just x else Nothing) +pSatisfy :: (Tag String -> Bool) -> TagParser (Tag String) +pSatisfy f = try $ optional pLocation >> pSat f + pAnyTag :: TagParser (Tag String) pAnyTag = pSatisfy (const True) @@ -268,7 +290,7 @@ pSelfClosing :: (String -> Bool) -> ([Attribute String] -> Bool) -> TagParser (Tag String) pSelfClosing f g = do open <- pSatisfy (tagOpen f g) - optional $ try $ pLocation >> pSatisfy (tagClose f) + optional $ pSatisfy (tagClose f) return open pEmph :: TagParser [Inline] @@ -342,7 +364,6 @@ pInTags tagtype parser = try $ do pCloses :: String -> TagParser () pCloses tagtype = try $ do - optional pLocation t <- lookAhead $ pSatisfy $ \tag -> isTagClose tag || isTagOpen tag case t of (TagClose t') | t' == tagtype -> pAnyTag >> return () @@ -360,6 +381,11 @@ pTagText = try $ do Left _ -> fail $ "Could not parse `" ++ str ++ "'" Right result -> return result +pBlank :: TagParser () +pBlank = try $ do + (TagText str) <- pSatisfy isTagText + guard $ all isSpace str + pTagContents :: GenParser Char ParserState Inline pTagContents = pStr <|> pSpace <|> smartPunctuation pTagContents <|> pSymbol @@ -433,10 +459,8 @@ _ `closes` "html" = False "a" `closes` "a" = True "li" `closes` "li" = True "th" `closes` t | t `elem` ["th","td"] = True -"td" `closes` t | t `elem` ["th","td"] = True "tr" `closes` t | t `elem` ["th","td","tr"] = True "dt" `closes` t | t `elem` ["dt","dd"] = True -"dd" `closes` t | t `elem` ["dt","dd"] = True "hr" `closes` "p" = True "p" `closes` "p" = True "meta" `closes` "meta" = True |