diff options
author | John MacFarlane <jgm@berkeley.edu> | 2016-03-10 19:59:55 -0800 |
---|---|---|
committer | John MacFarlane <jgm@berkeley.edu> | 2016-03-10 19:59:55 -0800 |
commit | a485c42d78d8bc819f7ad1bef137d54a324c5ea9 (patch) | |
tree | fc8c7494dd39b38de4526204a45a64099db67610 /src | |
parent | 06a57b27a1367e7954efa38fde13376a7334e870 (diff) | |
download | pandoc-a485c42d78d8bc819f7ad1bef137d54a324c5ea9.tar.gz |
Fixed behavior of base tag.
+ If the base path does not end with slash, the last component
will be replaced. E.g. base = `http://example.com/foo`
combines with `bar.html` to give `http://example.com/bar.html`.
+ If the href begins with a slash, the whole path of the base
is replaced. E.g. base = `http://example.com/foo/` combines
with `/bar.html` to give `http://example.com/bar.html`.
Closes #2777.
Diffstat (limited to 'src')
-rw-r--r-- | src/Text/Pandoc/Readers/HTML.hs | 28 |
1 files changed, 11 insertions, 17 deletions
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 69df13aac..959a2d16f 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -63,7 +63,7 @@ import Debug.Trace (trace) import Text.TeXMath (readMathML, writeTeX) import Data.Default (Default (..), def) import Control.Monad.Reader (Reader,ask, asks, local, runReader) -import Network.URI (isURI) +import Network.URI (URI, parseURIReference, nonStrictRelativeTo) import Text.Pandoc.Error import Text.Pandoc.CSS (foldOrElse, pickStyleAttrProps) import Text.Pandoc.Compat.Monoid ((<>)) @@ -103,7 +103,7 @@ data HTMLState = HTMLState { parserState :: ParserState, noteTable :: [(String, Blocks)], - baseHref :: Maybe String, + baseHref :: Maybe URI, identifiers :: Set.Set String, headerMap :: M.Map Inlines String } @@ -145,15 +145,9 @@ pHead = pInTags "head" $ pTitle <|> pMetaTag <|> pBaseTag <|> (mempty <$ pAnyTag return mempty pBaseTag = do bt <- pSatisfy (~== TagOpen "base" []) - let baseH = fromAttrib "href" bt - if null baseH - then return mempty - else do - let baseH' = case reverse baseH of - '/':_ -> baseH - _ -> baseH ++ "/" - updateState $ \st -> st{ baseHref = Just baseH' } - return mempty + updateState $ \st -> st{ baseHref = + parseURIReference $ fromAttrib "href" bt } + return mempty block :: TagParser Blocks block = do @@ -610,9 +604,9 @@ pLink = try $ do tag <- pSatisfy $ tagOpenLit "a" (const True) mbBaseHref <- baseHref <$> getState let url' = fromAttrib "href" tag - let url = case (isURI url', mbBaseHref) of - (False, Just h) -> h ++ url' - _ -> url' + let url = case (parseURIReference url', mbBaseHref) of + (Just rel, Just bs) -> show (rel `nonStrictRelativeTo` bs) + _ -> url' let title = fromAttrib "title" tag let uid = fromAttrib "id" tag let cls = words $ fromAttrib "class" tag @@ -624,9 +618,9 @@ pImage = do tag <- pSelfClosing (=="img") (isJust . lookup "src") mbBaseHref <- baseHref <$> getState let url' = fromAttrib "src" tag - let url = case (isURI url', mbBaseHref) of - (False, Just h) -> h ++ url' - _ -> url' + let url = case (parseURIReference url', mbBaseHref) of + (Just rel, Just bs) -> show (rel `nonStrictRelativeTo` bs) + _ -> url' let title = fromAttrib "title" tag let alt = fromAttrib "alt" tag let uid = fromAttrib "id" tag |