diff options
author | John MacFarlane <jgm@berkeley.edu> | 2016-11-13 22:41:11 +0100 |
---|---|---|
committer | John MacFarlane <jgm@berkeley.edu> | 2016-11-13 22:41:11 +0100 |
commit | 50f0cfcc1a96a418b5da9539f80499758ac207c7 (patch) | |
tree | aa89993c1b0329866fb911793a14283dc17f3ee8 | |
parent | 3de6b97b9fa043c0682b230213393b2db5867a30 (diff) | |
download | pandoc-50f0cfcc1a96a418b5da9539f80499758ac207c7.tar.gz |
HTML reader: only treat "a" element as link if it has href.
Otherwise treat as span.
Closes #3226.
-rw-r--r-- | src/Text/Pandoc/Readers/HTML.hs | 26 | ||||
-rw-r--r-- | tests/Tests/Readers/HTML.hs | 4 |
2 files changed, 23 insertions, 7 deletions
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 10a2976e5..e2fc97fbf 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -599,19 +599,31 @@ pLineBreak = do pSelfClosing (=="br") (const True) return B.linebreak +-- Unlike fromAttrib from tagsoup, this distinguishes +-- between a missing attribute and an attribute with empty content. +maybeFromAttrib :: String -> Tag String -> Maybe String +maybeFromAttrib name (TagOpen _ attrs) = lookup name attrs +maybeFromAttrib _ _ = Nothing + pLink :: TagParser Inlines pLink = try $ do tag <- pSatisfy $ tagOpenLit "a" (const True) - mbBaseHref <- baseHref <$> getState - let url' = fromAttrib "href" tag - let url = case (parseURIReference url', mbBaseHref) of - (Just rel, Just bs) -> show (rel `nonStrictRelativeTo` bs) - _ -> url' let title = fromAttrib "title" tag - let uid = fromAttrib "id" tag + -- take id from id attribute if present, otherwise name + let uid = maybe (fromAttrib "name" tag) id $ maybeFromAttrib "id" tag let cls = words $ fromAttrib "class" tag lab <- trimInlines . mconcat <$> manyTill inline (pCloses "a") - return $ B.linkWith (uid, cls, []) (escapeURI url) title lab + -- check for href; if href, then a link, otherwise a span + case maybeFromAttrib "href" tag of + Nothing -> + return $ B.spanWith (uid, cls, []) lab + Just url' -> do + mbBaseHref <- baseHref <$> getState + let url = case (parseURIReference url', mbBaseHref) of + (Just rel, Just bs) -> + show (rel `nonStrictRelativeTo` bs) + _ -> url' + return $ B.linkWith (uid, cls, []) (escapeURI url) title lab pImage :: TagParser Inlines pImage = do diff --git a/tests/Tests/Readers/HTML.hs b/tests/Tests/Readers/HTML.hs index 09041bfd1..1426a8bea 100644 --- a/tests/Tests/Readers/HTML.hs +++ b/tests/Tests/Readers/HTML.hs @@ -26,4 +26,8 @@ tests = [ testGroup "base tag" "<head><base href=\"http://www.w3schools.com/images/\" ></head><body><img src=\"http://example.com/stickman.gif\" alt=\"Stickman\"></head>" =?> plain (image "http://example.com/stickman.gif" "" (text "Stickman")) ] + , testGroup "anchors" + [ test html "anchor without href" $ "<a name=\"anchor\"/>" =?> + plain (spanWith ("anchor",[],[]) mempty) + ] ] |