From 34cf0162512f2f75805b2b2ff947026bc9f67b2c Mon Sep 17 00:00:00 2001 From: Matthew Pickering Date: Tue, 12 Aug 2014 23:03:24 +0100 Subject: EPUB Reader: Ignore title pages --- src/Text/Pandoc/Readers/HTML.hs | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 1789b865f..cee7ea300 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -131,6 +131,7 @@ block = do , eSwitch B.para block , mempty <$ eFootnote , mempty <$ eTOC + , mempty <$ eTitlePage , pPara , pHeader , pBlockQuote @@ -334,9 +335,13 @@ headerLevel tagtype = do <|> return level - - - +eTitlePage :: TagParser () +eTitlePage = try $ do + let isTitlePage as = maybe False (isInfixOf "titlepage") (lookup "type" as) + let groupTag = tagOpen (\x -> x `elem` groupingContent || x == "section") + isTitlePage + TagOpen tag _ <- lookAhead $ pSatisfy groupTag + () <$ pInTags tag block pHeader :: TagParser Blocks pHeader = try $ do @@ -922,13 +927,14 @@ instance HasLastStrPosition HTMLState where sectioningContent :: [String] sectioningContent = ["article", "aside", "nav", "section"] -{- + groupingContent :: [String] groupingContent = ["p", "hr", "pre", "blockquote", "ol" , "ul", "li", "dl", "dt", "dt", "dd" , "figure", "figcaption", "div", "main"] +{- types :: [(String, ([String], Int))] types = -- Document divisions -- cgit v1.2.3