aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2021-11-24 11:01:21 -0800
committerJohn MacFarlane <jgm@berkeley.edu>2021-11-24 11:01:55 -0800
commit6072bdcec95df9f537b22fb7df4a5f8ea7958189 (patch)
tree6234b23cf7e8e24cb4b2535aee2ad505bb230a8e /src
parenta8638894ab698cc0e49757a2732e383b652834bc (diff)
downloadpandoc-6072bdcec95df9f537b22fb7df4a5f8ea7958189.tar.gz
HTML reader: parse attributes on links and images.
Closes #6970.
Diffstat (limited to 'src')
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs14
-rw-r--r--src/Text/Pandoc/Readers/HTML/Parsing.hs7
2 files changed, 10 insertions, 11 deletions
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index c78faebbd..8aa2646b2 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -643,7 +643,7 @@ pQ = do
case lookup "cite" attrs of
Just url -> do
let uid = fromMaybe mempty $
- lookup "name" attrs <> lookup "id" attrs
+ lookup "name" attrs <|> lookup "id" attrs
let cls = maybe [] T.words $ lookup "class" attrs
url' <- canonicalizeUrl url
makeQuote $ B.spanWith (uid, cls, [("cite", escapeURI url')])
@@ -705,20 +705,18 @@ pLineBreak = do
pLink :: PandocMonad m => TagParser m Inlines
pLink = try $ do
- tag <- pSatisfy $ tagOpenLit "a" (const True)
+ tag@(TagOpen _ attr') <- pSatisfy $ tagOpenLit "a" (const True)
let title = fromAttrib "title" tag
- -- take id from id attribute if present, otherwise name
- let uid = fromMaybe (fromAttrib "name" tag) $
- maybeFromAttrib "id" tag
- let cls = T.words $ fromAttrib "class" tag
+ let attr = toAttr $ filter (\(k,_) -> k /= "title" && k /= "href") attr'
lab <- mconcat <$> manyTill inline (pCloses "a")
-- check for href; if href, then a link, otherwise a span
case maybeFromAttrib "href" tag of
Nothing ->
- return $ extractSpaces (B.spanWith (uid, cls, [])) lab
+ return $ extractSpaces (B.spanWith attr) lab
Just url' -> do
url <- canonicalizeUrl url'
- return $ extractSpaces (B.linkWith (uid, cls, []) (escapeURI url) title) lab
+ return $ extractSpaces
+ (B.linkWith attr (escapeURI url) title) lab
pImage :: PandocMonad m => TagParser m Inlines
pImage = do
diff --git a/src/Text/Pandoc/Readers/HTML/Parsing.hs b/src/Text/Pandoc/Readers/HTML/Parsing.hs
index 27a23aa69..a8cdf1de2 100644
--- a/src/Text/Pandoc/Readers/HTML/Parsing.hs
+++ b/src/Text/Pandoc/Readers/HTML/Parsing.hs
@@ -30,7 +30,7 @@ module Text.Pandoc.Readers.HTML.Parsing
)
where
-import Control.Monad (void, mzero)
+import Control.Monad (void, mzero, mplus)
import Data.Maybe (fromMaybe)
import Data.Text (Text)
import Text.HTML.TagSoup
@@ -220,9 +220,10 @@ maybeFromAttrib _ _ = Nothing
mkAttr :: [(Text, Text)] -> Attr
mkAttr attr = (attribsId, attribsClasses, attribsKV)
- where attribsId = fromMaybe "" $ lookup "id" attr
+ where attribsId = fromMaybe "" $ lookup "id" attr `mplus` lookup "name" attr
attribsClasses = T.words (fromMaybe "" $ lookup "class" attr) <> epubTypes
- attribsKV = filter (\(k,_) -> k /= "class" && k /= "id") attr
+ attribsKV = filter (\(k,_) -> k /= "class" && k /= "id" && k /= "name")
+ attr
epubTypes = T.words $ fromMaybe "" $ lookup "epub:type" attr
toAttr :: [(Text, Text)] -> Attr