aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers/HTML
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc/Readers/HTML')
-rw-r--r--src/Text/Pandoc/Readers/HTML/Parsing.hs17
-rw-r--r--src/Text/Pandoc/Readers/HTML/Table.hs13
2 files changed, 18 insertions, 12 deletions
diff --git a/src/Text/Pandoc/Readers/HTML/Parsing.hs b/src/Text/Pandoc/Readers/HTML/Parsing.hs
index bd8d7c96c..a8cdf1de2 100644
--- a/src/Text/Pandoc/Readers/HTML/Parsing.hs
+++ b/src/Text/Pandoc/Readers/HTML/Parsing.hs
@@ -30,11 +30,11 @@ module Text.Pandoc.Readers.HTML.Parsing
)
where
-import Control.Monad (guard, void, mzero)
+import Control.Monad (void, mzero, mplus)
import Data.Maybe (fromMaybe)
import Data.Text (Text)
import Text.HTML.TagSoup
- ( Attribute, Tag (..), isTagText, isTagPosition, isTagOpen, isTagClose, (~==) )
+ ( Attribute, Tag (..), isTagPosition, isTagOpen, isTagClose, (~==) )
import Text.Pandoc.Class.PandocMonad (PandocMonad (..))
import Text.Pandoc.Definition (Attr)
import Text.Pandoc.Parsing
@@ -118,9 +118,11 @@ pCloses tagtype = try $ do
_ -> mzero
pBlank :: PandocMonad m => TagParser m ()
-pBlank = try $ do
- (TagText str) <- pSatisfy isTagText
- guard $ T.all isSpace str
+pBlank = void $ pSatisfy isBlank
+ where
+ isBlank (TagText t) = T.all isSpace t
+ isBlank (TagComment _) = True
+ isBlank _ = False
pLocation :: PandocMonad m => TagParser m ()
pLocation = do
@@ -218,9 +220,10 @@ maybeFromAttrib _ _ = Nothing
mkAttr :: [(Text, Text)] -> Attr
mkAttr attr = (attribsId, attribsClasses, attribsKV)
- where attribsId = fromMaybe "" $ lookup "id" attr
+ where attribsId = fromMaybe "" $ lookup "id" attr `mplus` lookup "name" attr
attribsClasses = T.words (fromMaybe "" $ lookup "class" attr) <> epubTypes
- attribsKV = filter (\(k,_) -> k /= "class" && k /= "id") attr
+ attribsKV = filter (\(k,_) -> k /= "class" && k /= "id" && k /= "name")
+ attr
epubTypes = T.words $ fromMaybe "" $ lookup "epub:type" attr
toAttr :: [(Text, Text)] -> Attr
diff --git a/src/Text/Pandoc/Readers/HTML/Table.hs b/src/Text/Pandoc/Readers/HTML/Table.hs
index 6e62e12f5..b23a2abc8 100644
--- a/src/Text/Pandoc/Readers/HTML/Table.hs
+++ b/src/Text/Pandoc/Readers/HTML/Table.hs
@@ -16,7 +16,7 @@ HTML table parser.
module Text.Pandoc.Readers.HTML.Table (pTable) where
import Control.Applicative ((<|>))
-import Data.Maybe (fromMaybe)
+import Data.Maybe (fromMaybe, isJust)
import Data.Either (lefts, rights)
import Data.List.NonEmpty (nonEmpty)
import Data.Text (Text)
@@ -27,12 +27,13 @@ import Text.Pandoc.Definition
import Text.Pandoc.Class.PandocMonad (PandocMonad (..))
import Text.Pandoc.Parsing
( eof, lookAhead, many, many1, manyTill, option, optional
- , optionMaybe, skipMany, try)
+ , optionMaybe, skipMany, try )
import Text.Pandoc.Readers.HTML.Parsing
import Text.Pandoc.Readers.HTML.Types (TagParser)
import Text.Pandoc.Shared (onlySimpleTableCells, safeRead)
import qualified Data.Text as T
import qualified Text.Pandoc.Builder as B
+import Control.Monad (guard)
-- | Parses a @<col>@ element, returning the column's width.
-- An Either value is used: Left i means a "relative length" with
@@ -183,11 +184,13 @@ pTableBody :: PandocMonad m
-> TagParser m TableBody
pTableBody block = try $ do
skipMany pBlank
- attribs <- option [] $ getAttribs <$> pSatisfy (matchTagOpen "tbody" [])
- <* skipMany pBlank
+ mbattribs <- option Nothing $ Just . getAttribs <$>
+ pSatisfy (matchTagOpen "tbody" []) <* skipMany pBlank
bodyheads <- many (pHeaderRow block)
- (rowheads, rows) <- unzip <$> many1 (pRow block <* skipMany pBlank)
+ (rowheads, rows) <- unzip <$> many (pRow block <* skipMany pBlank)
optional $ pSatisfy (matchTagClose "tbody")
+ guard $ isJust mbattribs || not (null bodyheads && null rows)
+ let attribs = fromMaybe [] mbattribs
return $ TableBody (toAttr attribs) (foldr max 0 rowheads) bodyheads rows
where
getAttribs (TagOpen _ attribs) = attribs