diff options
Diffstat (limited to 'src/Text/Pandoc/Readers/HTML')
-rw-r--r-- | src/Text/Pandoc/Readers/HTML/Parsing.hs | 17 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/HTML/Table.hs | 13 |
2 files changed, 18 insertions, 12 deletions
diff --git a/src/Text/Pandoc/Readers/HTML/Parsing.hs b/src/Text/Pandoc/Readers/HTML/Parsing.hs index bd8d7c96c..a8cdf1de2 100644 --- a/src/Text/Pandoc/Readers/HTML/Parsing.hs +++ b/src/Text/Pandoc/Readers/HTML/Parsing.hs @@ -30,11 +30,11 @@ module Text.Pandoc.Readers.HTML.Parsing ) where -import Control.Monad (guard, void, mzero) +import Control.Monad (void, mzero, mplus) import Data.Maybe (fromMaybe) import Data.Text (Text) import Text.HTML.TagSoup - ( Attribute, Tag (..), isTagText, isTagPosition, isTagOpen, isTagClose, (~==) ) + ( Attribute, Tag (..), isTagPosition, isTagOpen, isTagClose, (~==) ) import Text.Pandoc.Class.PandocMonad (PandocMonad (..)) import Text.Pandoc.Definition (Attr) import Text.Pandoc.Parsing @@ -118,9 +118,11 @@ pCloses tagtype = try $ do _ -> mzero pBlank :: PandocMonad m => TagParser m () -pBlank = try $ do - (TagText str) <- pSatisfy isTagText - guard $ T.all isSpace str +pBlank = void $ pSatisfy isBlank + where + isBlank (TagText t) = T.all isSpace t + isBlank (TagComment _) = True + isBlank _ = False pLocation :: PandocMonad m => TagParser m () pLocation = do @@ -218,9 +220,10 @@ maybeFromAttrib _ _ = Nothing mkAttr :: [(Text, Text)] -> Attr mkAttr attr = (attribsId, attribsClasses, attribsKV) - where attribsId = fromMaybe "" $ lookup "id" attr + where attribsId = fromMaybe "" $ lookup "id" attr `mplus` lookup "name" attr attribsClasses = T.words (fromMaybe "" $ lookup "class" attr) <> epubTypes - attribsKV = filter (\(k,_) -> k /= "class" && k /= "id") attr + attribsKV = filter (\(k,_) -> k /= "class" && k /= "id" && k /= "name") + attr epubTypes = T.words $ fromMaybe "" $ lookup "epub:type" attr toAttr :: [(Text, Text)] -> Attr diff --git a/src/Text/Pandoc/Readers/HTML/Table.hs b/src/Text/Pandoc/Readers/HTML/Table.hs index 6e62e12f5..b23a2abc8 100644 --- a/src/Text/Pandoc/Readers/HTML/Table.hs +++ b/src/Text/Pandoc/Readers/HTML/Table.hs @@ -16,7 +16,7 @@ HTML table parser. module Text.Pandoc.Readers.HTML.Table (pTable) where import Control.Applicative ((<|>)) -import Data.Maybe (fromMaybe) +import Data.Maybe (fromMaybe, isJust) import Data.Either (lefts, rights) import Data.List.NonEmpty (nonEmpty) import Data.Text (Text) @@ -27,12 +27,13 @@ import Text.Pandoc.Definition import Text.Pandoc.Class.PandocMonad (PandocMonad (..)) import Text.Pandoc.Parsing ( eof, lookAhead, many, many1, manyTill, option, optional - , optionMaybe, skipMany, try) + , optionMaybe, skipMany, try ) import Text.Pandoc.Readers.HTML.Parsing import Text.Pandoc.Readers.HTML.Types (TagParser) import Text.Pandoc.Shared (onlySimpleTableCells, safeRead) import qualified Data.Text as T import qualified Text.Pandoc.Builder as B +import Control.Monad (guard) -- | Parses a @<col>@ element, returning the column's width. -- An Either value is used: Left i means a "relative length" with @@ -183,11 +184,13 @@ pTableBody :: PandocMonad m -> TagParser m TableBody pTableBody block = try $ do skipMany pBlank - attribs <- option [] $ getAttribs <$> pSatisfy (matchTagOpen "tbody" []) - <* skipMany pBlank + mbattribs <- option Nothing $ Just . getAttribs <$> + pSatisfy (matchTagOpen "tbody" []) <* skipMany pBlank bodyheads <- many (pHeaderRow block) - (rowheads, rows) <- unzip <$> many1 (pRow block <* skipMany pBlank) + (rowheads, rows) <- unzip <$> many (pRow block <* skipMany pBlank) optional $ pSatisfy (matchTagClose "tbody") + guard $ isJust mbattribs || not (null bodyheads && null rows) + let attribs = fromMaybe [] mbattribs return $ TableBody (toAttr attribs) (foldr max 0 rowheads) bodyheads rows where getAttribs (TagOpen _ attribs) = attribs |