diff options
author | John MacFarlane <jgm@berkeley.edu> | 2015-05-13 20:39:01 -0700 |
---|---|---|
committer | John MacFarlane <jgm@berkeley.edu> | 2015-05-13 20:53:19 -0700 |
commit | e06810499e69b23f08fc1f8fb7e774e7b11be713 (patch) | |
tree | 917f1d47d7b0f3f6ee8987455273dce4ed7ca2c4 /src/Text/Pandoc/Readers | |
parent | 75cfa7b4624bfbf06c8a47761620bd756ddab254 (diff) | |
download | pandoc-e06810499e69b23f08fc1f8fb7e774e7b11be713.tar.gz |
HTML reader: Support base tag.
We only support the href attribute, as there's no place for
"target" in the Pandoc document model for links.
Added HTML reader test module, with tests for this feature.
Closes #1751.
Diffstat (limited to 'src/Text/Pandoc/Readers')
-rw-r--r-- | src/Text/Pandoc/Readers/HTML.hs | 35 |
1 files changed, 28 insertions, 7 deletions
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index df2d37fac..c766bb4ee 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -62,7 +62,7 @@ import Debug.Trace (trace) import Text.TeXMath (readMathML, writeTeX) import Data.Default (Default (..), def) import Control.Monad.Reader (Reader,ask, asks, local, runReader) - +import Network.URI (isURI) import Text.Pandoc.Error import Text.Parsec.Error @@ -74,7 +74,8 @@ readHtml :: ReaderOptions -- ^ Reader options -> Either PandocError Pandoc readHtml opts inp = mapLeft (ParseFailure . getError) . flip runReader def $ - runParserT parseDoc (HTMLState def{ stateOptions = opts } []) "source" tags + runParserT parseDoc (HTMLState def{ stateOptions = opts } [] Nothing) + "source" tags where tags = stripPrefixes . canonicalizeTags $ parseTagsOptions parseOptions{ optTagPosition = True } inp parseDoc = do @@ -98,7 +99,8 @@ replaceNotes' x = return x data HTMLState = HTMLState { parserState :: ParserState, - noteTable :: [(String, Blocks)] + noteTable :: [(String, Blocks)], + baseHref :: Maybe String } data HTMLLocal = HTMLLocal { quoteContext :: QuoteContext @@ -120,7 +122,7 @@ pBody :: TagParser Blocks pBody = pInTags "body" block pHead :: TagParser Blocks -pHead = pInTags "head" $ pTitle <|> pMetaTag <|> (mempty <$ pAnyTag) +pHead = pInTags "head" $ pTitle <|> pMetaTag <|> pBaseTag <|> (mempty <$ pAnyTag) where pTitle = pInTags "title" inline >>= setTitle . trimInlines setTitle t = mempty <$ (updateState $ B.setMeta "title" t) pMetaTag = do @@ -132,6 +134,17 @@ pHead = pInTags "head" $ pTitle <|> pMetaTag <|> (mempty <$ pAnyTag) let content = fromAttrib "content" mt updateState $ B.setMeta name (B.text content) return mempty + pBaseTag = do + bt <- pSatisfy (~== TagOpen "base" []) + let baseH = fromAttrib "href" bt + if null baseH + then return mempty + else do + let baseH' = case reverse baseH of + '/':_ -> baseH + _ -> baseH ++ "/" + updateState $ \st -> st{ baseHref = Just baseH' } + return mempty block :: TagParser Blocks block = do @@ -566,7 +579,11 @@ pAnchor = try $ do pRelLink :: TagParser Inlines pRelLink = try $ do tag <- pSatisfy (tagOpenLit "a" (isJust . lookup "href")) - let url = fromAttrib "href" tag + mbBaseHref <- baseHref <$> getState + let url' = fromAttrib "href" tag + let url = case (isURI url', mbBaseHref) of + (False, Just h) -> h ++ url' + _ -> url' let title = fromAttrib "title" tag let uid = fromAttrib "id" tag let spanC = case uid of @@ -578,7 +595,11 @@ pRelLink = try $ do pImage :: TagParser Inlines pImage = do tag <- pSelfClosing (=="img") (isJust . lookup "src") - let url = fromAttrib "src" tag + mbBaseHref <- baseHref <$> getState + let url' = fromAttrib "src" tag + let url = case (isURI url', mbBaseHref) of + (False, Just h) -> h ++ url' + _ -> url' let title = fromAttrib "title" tag let alt = fromAttrib "alt" tag return $ B.image (escapeURI url) title (B.text alt) @@ -945,7 +966,7 @@ instance HasReaderOptions HTMLState where extractReaderOptions = extractReaderOptions . parserState instance Default HTMLState where - def = HTMLState def [] + def = HTMLState def [] Nothing instance HasMeta HTMLState where setMeta s b st = st {parserState = setMeta s b $ parserState st} |