diff options
Diffstat (limited to 'src/Text/Pandoc/Readers/HTML.hs')
-rw-r--r-- | src/Text/Pandoc/Readers/HTML.hs | 34 |
1 files changed, 17 insertions, 17 deletions
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 8b66d2d3d..a34e2fb5c 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -66,7 +66,7 @@ import Control.Monad.Reader (Reader,ask, asks, local, runReader) import Network.URI (isURI) import Text.Pandoc.Error import Text.Pandoc.CSS (foldOrElse, pickStyleAttrProps) - +import Text.Pandoc.Compat.Monoid ((<>)) import Text.Parsec.Error @@ -601,16 +601,8 @@ pLineBreak = do return B.linebreak pLink :: TagParser Inlines -pLink = pRelLink <|> pAnchor - -pAnchor :: TagParser Inlines -pAnchor = try $ do - tag <- pSatisfy (tagOpenLit "a" (isJust . lookup "id")) - return $ B.spanWith (fromAttrib "id" tag , [], []) mempty - -pRelLink :: TagParser Inlines -pRelLink = try $ do - tag <- pSatisfy (tagOpenLit "a" (isJust . lookup "href")) +pLink = try $ do + tag <- pSatisfy $ tagOpenLit "a" (const True) mbBaseHref <- baseHref <$> getState let url' = fromAttrib "href" tag let url = case (isURI url', mbBaseHref) of @@ -618,11 +610,9 @@ pRelLink = try $ do _ -> url' let title = fromAttrib "title" tag let uid = fromAttrib "id" tag - let spanC = case uid of - [] -> id - s -> B.spanWith (s, [], []) + let cls = words $ fromAttrib "class" tag lab <- trimInlines . mconcat <$> manyTill inline (pCloses "a") - return $ spanC $ B.link (escapeURI url) title lab + return $ B.linkWith (uid, cls, []) (escapeURI url) title lab pImage :: TagParser Inlines pImage = do @@ -634,7 +624,13 @@ pImage = do _ -> url' let title = fromAttrib "title" tag let alt = fromAttrib "alt" tag - return $ B.image (escapeURI url) title (B.text alt) + let uid = fromAttrib "id" tag + let cls = words $ fromAttrib "class" tag + let getAtt k = case fromAttrib k tag of + "" -> [] + v -> [(k, v)] + let kvs = concat $ map getAtt ["width", "height", "sizes", "srcset"] + return $ B.imageWith (uid, cls, kvs) (escapeURI url) title (B.text alt) pCode :: TagParser Inlines pCode = try $ do @@ -803,7 +799,10 @@ pBad = do return $ B.str [c'] pSpace :: InlinesParser Inlines -pSpace = many1 (satisfy isSpace) >> return B.space +pSpace = many1 (satisfy isSpace) >>= \xs -> + if '\n' `elem` xs + then return B.softbreak + else return B.space -- -- Constants @@ -948,6 +947,7 @@ htmlTag f = try $ do parseOptions{ optTagWarning = True } inp guard $ f next case next of + TagWarning _ -> fail "encountered TagWarning" TagComment s | "<!--" `isPrefixOf` inp -> do count (length s + 4) anyChar |