diff options
| author | John MacFarlane <jgm@berkeley.edu> | 2021-11-24 11:01:21 -0800 | 
|---|---|---|
| committer | John MacFarlane <jgm@berkeley.edu> | 2021-11-24 11:01:55 -0800 | 
| commit | 6072bdcec95df9f537b22fb7df4a5f8ea7958189 (patch) | |
| tree | 6234b23cf7e8e24cb4b2535aee2ad505bb230a8e /src/Text/Pandoc | |
| parent | a8638894ab698cc0e49757a2732e383b652834bc (diff) | |
| download | pandoc-6072bdcec95df9f537b22fb7df4a5f8ea7958189.tar.gz | |
HTML reader: parse attributes on links and images.
Closes #6970.
Diffstat (limited to 'src/Text/Pandoc')
| -rw-r--r-- | src/Text/Pandoc/Readers/HTML.hs | 14 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/HTML/Parsing.hs | 7 | 
2 files changed, 10 insertions, 11 deletions
| diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index c78faebbd..8aa2646b2 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -643,7 +643,7 @@ pQ = do    case lookup "cite" attrs of      Just url -> do        let uid = fromMaybe mempty $ -                   lookup "name" attrs <> lookup "id" attrs +                   lookup "name" attrs <|> lookup "id" attrs        let cls = maybe [] T.words $ lookup "class" attrs        url' <- canonicalizeUrl url        makeQuote $ B.spanWith (uid, cls, [("cite", escapeURI url')]) @@ -705,20 +705,18 @@ pLineBreak = do  pLink :: PandocMonad m => TagParser m Inlines  pLink = try $ do -  tag <- pSatisfy $ tagOpenLit "a" (const True) +  tag@(TagOpen _ attr') <- pSatisfy $ tagOpenLit "a" (const True)    let title = fromAttrib "title" tag -  -- take id from id attribute if present, otherwise name -  let uid = fromMaybe (fromAttrib "name" tag) $ -               maybeFromAttrib "id" tag -  let cls = T.words $ fromAttrib "class" tag +  let attr = toAttr $ filter (\(k,_) -> k /= "title" && k /= "href") attr'    lab <- mconcat <$> manyTill inline (pCloses "a")    -- check for href; if href, then a link, otherwise a span    case maybeFromAttrib "href" tag of         Nothing   -> -         return $ extractSpaces (B.spanWith (uid, cls, [])) lab +         return $ extractSpaces (B.spanWith attr) lab         Just url' -> do           url <- canonicalizeUrl url' -         return $ extractSpaces (B.linkWith (uid, cls, []) (escapeURI url) title) lab +         return $ extractSpaces +                   (B.linkWith attr (escapeURI url) title) lab  pImage :: PandocMonad m => TagParser m Inlines  pImage = do diff --git a/src/Text/Pandoc/Readers/HTML/Parsing.hs b/src/Text/Pandoc/Readers/HTML/Parsing.hs index 27a23aa69..a8cdf1de2 100644 --- a/src/Text/Pandoc/Readers/HTML/Parsing.hs +++ b/src/Text/Pandoc/Readers/HTML/Parsing.hs @@ -30,7 +30,7 @@ module Text.Pandoc.Readers.HTML.Parsing    )  where -import Control.Monad (void, mzero) +import Control.Monad (void, mzero, mplus)  import Data.Maybe (fromMaybe)  import Data.Text (Text)  import Text.HTML.TagSoup @@ -220,9 +220,10 @@ maybeFromAttrib _ _ = Nothing  mkAttr :: [(Text, Text)] -> Attr  mkAttr attr = (attribsId, attribsClasses, attribsKV) -  where attribsId = fromMaybe "" $ lookup "id" attr +  where attribsId = fromMaybe "" $ lookup "id" attr `mplus` lookup "name" attr          attribsClasses = T.words (fromMaybe "" $ lookup "class" attr) <> epubTypes -        attribsKV = filter (\(k,_) -> k /= "class" && k /= "id") attr +        attribsKV = filter (\(k,_) -> k /= "class" && k /= "id" && k /= "name") +                           attr          epubTypes = T.words $ fromMaybe "" $ lookup "epub:type" attr  toAttr :: [(Text, Text)] -> Attr | 
