aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs28
-rw-r--r--tests/Tests/Readers/HTML.hs5
2 files changed, 15 insertions, 18 deletions
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 69df13aac..959a2d16f 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -63,7 +63,7 @@ import Debug.Trace (trace)
import Text.TeXMath (readMathML, writeTeX)
import Data.Default (Default (..), def)
import Control.Monad.Reader (Reader,ask, asks, local, runReader)
-import Network.URI (isURI)
+import Network.URI (URI, parseURIReference, nonStrictRelativeTo)
import Text.Pandoc.Error
import Text.Pandoc.CSS (foldOrElse, pickStyleAttrProps)
import Text.Pandoc.Compat.Monoid ((<>))
@@ -103,7 +103,7 @@ data HTMLState =
HTMLState
{ parserState :: ParserState,
noteTable :: [(String, Blocks)],
- baseHref :: Maybe String,
+ baseHref :: Maybe URI,
identifiers :: Set.Set String,
headerMap :: M.Map Inlines String
}
@@ -145,15 +145,9 @@ pHead = pInTags "head" $ pTitle <|> pMetaTag <|> pBaseTag <|> (mempty <$ pAnyTag
return mempty
pBaseTag = do
bt <- pSatisfy (~== TagOpen "base" [])
- let baseH = fromAttrib "href" bt
- if null baseH
- then return mempty
- else do
- let baseH' = case reverse baseH of
- '/':_ -> baseH
- _ -> baseH ++ "/"
- updateState $ \st -> st{ baseHref = Just baseH' }
- return mempty
+ updateState $ \st -> st{ baseHref =
+ parseURIReference $ fromAttrib "href" bt }
+ return mempty
block :: TagParser Blocks
block = do
@@ -610,9 +604,9 @@ pLink = try $ do
tag <- pSatisfy $ tagOpenLit "a" (const True)
mbBaseHref <- baseHref <$> getState
let url' = fromAttrib "href" tag
- let url = case (isURI url', mbBaseHref) of
- (False, Just h) -> h ++ url'
- _ -> url'
+ let url = case (parseURIReference url', mbBaseHref) of
+ (Just rel, Just bs) -> show (rel `nonStrictRelativeTo` bs)
+ _ -> url'
let title = fromAttrib "title" tag
let uid = fromAttrib "id" tag
let cls = words $ fromAttrib "class" tag
@@ -624,9 +618,9 @@ pImage = do
tag <- pSelfClosing (=="img") (isJust . lookup "src")
mbBaseHref <- baseHref <$> getState
let url' = fromAttrib "src" tag
- let url = case (isURI url', mbBaseHref) of
- (False, Just h) -> h ++ url'
- _ -> url'
+ let url = case (parseURIReference url', mbBaseHref) of
+ (Just rel, Just bs) -> show (rel `nonStrictRelativeTo` bs)
+ _ -> url'
let title = fromAttrib "title" tag
let alt = fromAttrib "alt" tag
let uid = fromAttrib "id" tag
diff --git a/tests/Tests/Readers/HTML.hs b/tests/Tests/Readers/HTML.hs
index 2eb87a2f3..ff27b8aed 100644
--- a/tests/Tests/Readers/HTML.hs
+++ b/tests/Tests/Readers/HTML.hs
@@ -15,11 +15,14 @@ html = handleError . readHtml def
tests :: [Test]
tests = [ testGroup "base tag"
[ test html "simple" $
- "<head><base href=\"http://www.w3schools.com/images\" ></head><body><img src=\"stickman.gif\" alt=\"Stickman\"></head>" =?>
+ "<head><base href=\"http://www.w3schools.com/images/foo\" ></head><body><img src=\"stickman.gif\" alt=\"Stickman\"></head>" =?>
plain (image "http://www.w3schools.com/images/stickman.gif" "" (text "Stickman"))
, test html "slash at end of base" $
"<head><base href=\"http://www.w3schools.com/images/\" ></head><body><img src=\"stickman.gif\" alt=\"Stickman\"></head>" =?>
plain (image "http://www.w3schools.com/images/stickman.gif" "" (text "Stickman"))
+ , test html "slash at beginning of href" $
+ "<head><base href=\"http://www.w3schools.com/images/\" ></head><body><img src=\"/stickman.gif\" alt=\"Stickman\"></head>" =?>
+ plain (image "http://www.w3schools.com/stickman.gif" "" (text "Stickman"))
, test html "absolute URL" $
"<head><base href=\"http://www.w3schools.com/images/\" ></head><body><img src=\"http://example.com/stickman.gif\" alt=\"Stickman\"></head>" =?>
plain (image "http://example.com/stickman.gif" "" (text "Stickman"))