aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2016-11-13 22:41:11 +0100
committerJohn MacFarlane <jgm@berkeley.edu>2016-11-13 22:41:11 +0100
commit50f0cfcc1a96a418b5da9539f80499758ac207c7 (patch)
treeaa89993c1b0329866fb911793a14283dc17f3ee8
parent3de6b97b9fa043c0682b230213393b2db5867a30 (diff)
downloadpandoc-50f0cfcc1a96a418b5da9539f80499758ac207c7.tar.gz
HTML reader: only treat "a" element as link if it has href.
Otherwise treat as span. Closes #3226.
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs26
-rw-r--r--tests/Tests/Readers/HTML.hs4
2 files changed, 23 insertions, 7 deletions
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 10a2976e5..e2fc97fbf 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -599,19 +599,31 @@ pLineBreak = do
pSelfClosing (=="br") (const True)
return B.linebreak
+-- Unlike fromAttrib from tagsoup, this distinguishes
+-- between a missing attribute and an attribute with empty content.
+maybeFromAttrib :: String -> Tag String -> Maybe String
+maybeFromAttrib name (TagOpen _ attrs) = lookup name attrs
+maybeFromAttrib _ _ = Nothing
+
pLink :: TagParser Inlines
pLink = try $ do
tag <- pSatisfy $ tagOpenLit "a" (const True)
- mbBaseHref <- baseHref <$> getState
- let url' = fromAttrib "href" tag
- let url = case (parseURIReference url', mbBaseHref) of
- (Just rel, Just bs) -> show (rel `nonStrictRelativeTo` bs)
- _ -> url'
let title = fromAttrib "title" tag
- let uid = fromAttrib "id" tag
+ -- take id from id attribute if present, otherwise name
+ let uid = maybe (fromAttrib "name" tag) id $ maybeFromAttrib "id" tag
let cls = words $ fromAttrib "class" tag
lab <- trimInlines . mconcat <$> manyTill inline (pCloses "a")
- return $ B.linkWith (uid, cls, []) (escapeURI url) title lab
+ -- check for href; if href, then a link, otherwise a span
+ case maybeFromAttrib "href" tag of
+ Nothing ->
+ return $ B.spanWith (uid, cls, []) lab
+ Just url' -> do
+ mbBaseHref <- baseHref <$> getState
+ let url = case (parseURIReference url', mbBaseHref) of
+ (Just rel, Just bs) ->
+ show (rel `nonStrictRelativeTo` bs)
+ _ -> url'
+ return $ B.linkWith (uid, cls, []) (escapeURI url) title lab
pImage :: TagParser Inlines
pImage = do
diff --git a/tests/Tests/Readers/HTML.hs b/tests/Tests/Readers/HTML.hs
index 09041bfd1..1426a8bea 100644
--- a/tests/Tests/Readers/HTML.hs
+++ b/tests/Tests/Readers/HTML.hs
@@ -26,4 +26,8 @@ tests = [ testGroup "base tag"
"<head><base href=\"http://www.w3schools.com/images/\" ></head><body><img src=\"http://example.com/stickman.gif\" alt=\"Stickman\"></head>" =?>
plain (image "http://example.com/stickman.gif" "" (text "Stickman"))
]
+ , testGroup "anchors"
+ [ test html "anchor without href" $ "<a name=\"anchor\"/>" =?>
+ plain (spanWith ("anchor",[],[]) mempty)
+ ]
]