aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers/HTML.hs
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc/Readers/HTML.hs')
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs411
1 files changed, 222 insertions, 189 deletions
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index eb78979a3..fdf4f28e0 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -5,7 +5,7 @@
{-# LANGUAGE ViewPatterns #-}
{- |
Module : Text.Pandoc.Readers.HTML
- Copyright : Copyright (C) 2006-2020 John MacFarlane
+ Copyright : Copyright (C) 2006-2021 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
@@ -19,21 +19,20 @@ module Text.Pandoc.Readers.HTML ( readHtml
, htmlInBalanced
, isInlineTag
, isBlockTag
- , NamedTag(..)
, isTextTag
, isCommentTag
) where
import Control.Applicative ((<|>))
-import Control.Arrow (first)
import Control.Monad (guard, msum, mzero, unless, void)
-import Control.Monad.Except (throwError)
+import Control.Monad.Except (throwError, catchError)
import Control.Monad.Reader (ask, asks, lift, local, runReaderT)
import Data.ByteString.Base64 (encode)
import Data.Char (isAlphaNum, isLetter)
import Data.Default (Default (..), def)
import Data.Foldable (for_)
import Data.List.Split (splitWhen)
+import Data.List (foldl')
import qualified Data.Map as M
import Data.Maybe (fromMaybe, isJust, isNothing)
import Data.Monoid (First (..))
@@ -62,21 +61,22 @@ import Text.Pandoc.Options (
ReaderOptions (readerExtensions, readerStripComments),
extensionEnabled)
import Text.Pandoc.Parsing hiding ((<|>))
-import Text.Pandoc.Shared (addMetaField, blocksToInlines', crFilter, escapeURI,
- extractSpaces, htmlSpanLikeElements, safeRead, tshow)
+import Text.Pandoc.Shared (
+ addMetaField, blocksToInlines', escapeURI, extractSpaces,
+ htmlSpanLikeElements, renderTags', safeRead, tshow)
import Text.Pandoc.Walk
import Text.Parsec.Error
import Text.TeXMath (readMathML, writeTeX)
-- | Convert HTML-formatted string to 'Pandoc' document.
-readHtml :: PandocMonad m
+readHtml :: (PandocMonad m, ToSources a)
=> ReaderOptions -- ^ Reader options
- -> Text -- ^ String to parse (assumes @'\n'@ line endings)
+ -> a -- ^ Input to parse
-> m Pandoc
readHtml opts inp = do
- let tags = stripPrefixes . canonicalizeTags $
+ let tags = stripPrefixes $ canonicalizeTags $
parseTagsOptions parseOptions{ optTagPosition = True }
- (crFilter inp)
+ (sourcesToText $ toSources inp)
parseDoc = do
blocks <- fixPlains False . mconcat <$> manyTill block eof
meta <- stateMeta . parserState <$> getState
@@ -95,6 +95,15 @@ readHtml opts inp = do
Right doc -> return doc
Left err -> throwError $ PandocParseError $ T.pack $ getError err
+-- Strip namespace prefixes on tags (not attributes)
+stripPrefixes :: [Tag Text] -> [Tag Text]
+stripPrefixes = map stripPrefix
+
+stripPrefix :: Tag Text -> Tag Text
+stripPrefix (TagOpen s as) = TagOpen (T.takeWhileEnd (/=':') s) as
+stripPrefix (TagClose s) = TagClose (T.takeWhileEnd (/=':') s)
+stripPrefix x = x
+
replaceNotes :: PandocMonad m => [Block] -> TagParser m [Block]
replaceNotes bs = do
st <- getState
@@ -112,14 +121,18 @@ setInPlain :: PandocMonad m => HTMLParser m s a -> HTMLParser m s a
setInPlain = local (\s -> s {inPlain = True})
pHtml :: PandocMonad m => TagParser m Blocks
-pHtml = try $ do
+pHtml = do
(TagOpen "html" attr) <- lookAhead pAny
- for_ (lookup "lang" attr) $
+ for_ (lookup "lang" attr <|> lookup "xml:lang" attr) $
updateState . B.setMeta "lang" . B.text
pInTags "html" block
pBody :: PandocMonad m => TagParser m Blocks
-pBody = pInTags "body" block
+pBody = do
+ (TagOpen "body" attr) <- lookAhead pAny
+ for_ (lookup "lang" attr <|> lookup "xml:lang" attr) $
+ updateState . B.setMeta "lang" . B.text
+ pInTags "body" block
pHead :: PandocMonad m => TagParser m Blocks
pHead = pInTags "head" $ pTitle <|> pMetaTag <|> pBaseTag <|> (mempty <$ pAny)
@@ -145,32 +158,65 @@ pHead = pInTags "head" $ pTitle <|> pMetaTag <|> pBaseTag <|> (mempty <$ pAny)
return mempty
block :: PandocMonad m => TagParser m Blocks
-block = do
- res <- choice
- [ eSection
- , eSwitch B.para block
- , mempty <$ eFootnote
- , mempty <$ eTOC
- , mempty <$ eTitlePage
- , pPara
- , pHeader
- , pBlockQuote
- , pCodeBlock
- , pList
- , pHrule
- , pTable block
- , pHtml
- , pHead
- , pBody
- , pLineBlock
- , pDiv
- , pPlain
- , pFigure
- , pIframe
- , pRawHtmlBlock
- ]
- trace (T.take 60 $ tshow $ B.toList res)
- return res
+block = ((do
+ tag <- lookAhead (pSatisfy isBlockTag)
+ exts <- getOption readerExtensions
+ case tag of
+ TagOpen name attr ->
+ let type' = fromMaybe "" $
+ lookup "type" attr <|> lookup "epub:type" attr
+ epubExts = extensionEnabled Ext_epub_html_exts exts
+ in
+ case name of
+ _ | name `elem` sectioningContent
+ , epubExts
+ , "chapter" `T.isInfixOf` type'
+ -> eSection
+ _ | epubExts
+ , type' `elem` ["footnote", "rearnote"]
+ -> mempty <$ eFootnote
+ _ | epubExts
+ , type' == "toc"
+ -> mempty <$ eTOC
+ _ | "titlepage" `T.isInfixOf` type'
+ , name `elem` ("section" : groupingContent)
+ -> mempty <$ eTitlePage
+ "p" -> pPara
+ "h1" -> pHeader
+ "h2" -> pHeader
+ "h3" -> pHeader
+ "h4" -> pHeader
+ "h5" -> pHeader
+ "h6" -> pHeader
+ "blockquote" -> pBlockQuote
+ "pre" -> pCodeBlock
+ "ul" -> pBulletList
+ "ol" -> pOrderedList
+ "dl" -> pDefinitionList
+ "table" -> pTable block
+ "hr" -> pHrule
+ "html" -> pHtml
+ "head" -> pHead
+ "body" -> pBody
+ "div"
+ | extensionEnabled Ext_line_blocks exts
+ , Just "line-block" <- lookup "class" attr
+ -> pLineBlock
+ | otherwise
+ -> pDiv
+ "section" -> pDiv
+ "header" -> pDiv
+ "main" -> pDiv
+ "figure" -> pFigure
+ "iframe" -> pIframe
+ "style" -> pRawHtmlBlock
+ "textarea" -> pRawHtmlBlock
+ "switch"
+ | epubExts
+ -> eSwitch B.para block
+ _ -> mzero
+ _ -> mzero) <|> pPlain <|> pRawHtmlBlock) >>= \res ->
+ res <$ trace (T.take 60 $ tshow $ B.toList res)
namespaces :: PandocMonad m => [(Text, TagParser m Inlines)]
namespaces = [(mathMLNamespace, pMath True)]
@@ -243,9 +289,6 @@ eTOC = try $ do
guard $ (lookup "type" attr <|> lookup "epub:type" attr) == Just "toc"
void (pInTags tag block)
-pList :: PandocMonad m => TagParser m Blocks
-pList = pBulletList <|> pOrderedList <|> pDefinitionList
-
pBulletList :: PandocMonad m => TagParser m Blocks
pBulletList = try $ do
pSatisfy (matchTagOpen "ul" [])
@@ -319,7 +362,10 @@ pDefListItem = try $ do
terms <- many1 (try $ skipMany nonItem >> pInTags "dt" inline)
defs <- many1 (try $ skipMany nonItem >> pInTags "dd" block)
skipMany nonItem
- let term = foldl1 (\x y -> x <> B.linebreak <> y) $ map trimInlines terms
+ let term = foldl' (\x y -> if null x
+ then trimInlines y
+ else x <> B.linebreak <> trimInlines y)
+ mempty terms
return (term, map (fixPlains True) defs)
fixPlains :: Bool -> Blocks -> Blocks
@@ -356,13 +402,16 @@ pLineBlock = try $ do
B.toList ils
return $ B.lineBlock lns
+isDivLike :: Text -> Bool
+isDivLike "div" = True
+isDivLike "section" = True
+isDivLike "header" = True
+isDivLike "main" = True
+isDivLike _ = False
+
pDiv :: PandocMonad m => TagParser m Blocks
pDiv = try $ do
guardEnabled Ext_native_divs
- let isDivLike "div" = True
- isDivLike "section" = True
- isDivLike "main" = True
- isDivLike _ = False
TagOpen tag attr' <- lookAhead $ pSatisfy $ tagOpen isDivLike (const True)
let (ident, classes, kvs) = toAttr attr'
contents <- pInTags tag block
@@ -380,11 +429,17 @@ pIframe = try $ do
tag <- pSatisfy (tagOpen (=="iframe") (isJust . lookup "src"))
pCloses "iframe" <|> eof
url <- canonicalizeUrl $ fromAttrib "src" tag
- (bs, _) <- openURL url
- let inp = UTF8.toText bs
- opts <- readerOpts <$> getState
- Pandoc _ contents <- readHtml opts inp
- return $ B.divWith ("",["iframe"],[]) $ B.fromList contents
+ if T.null url
+ then ignore $ renderTags' [tag, TagClose "iframe"]
+ else catchError
+ (do (bs, _) <- openURL url
+ let inp = UTF8.toText bs
+ opts <- readerOpts <$> getState
+ Pandoc _ contents <- readHtml opts inp
+ return $ B.divWith ("",["iframe"],[]) $ B.fromList contents)
+ (\e -> do
+ logMessage $ CouldNotFetchResource url (renderError e)
+ ignore $ renderTags' [tag, TagClose "iframe"])
pRawHtmlBlock :: PandocMonad m => TagParser m Blocks
pRawHtmlBlock = do
@@ -446,17 +501,13 @@ pHeader = try $ do
tagOpen (`elem` ["h1","h2","h3","h4","h5","h6"])
(const True)
let attr = toStringAttr attr'
- let bodyTitle = TagOpen tagtype attr' ~== TagOpen ("h1" :: Text)
- [("class","title")]
level <- headerLevel tagtype
contents <- trimInlines . mconcat <$> manyTill inline (pCloses tagtype <|> eof)
let ident = fromMaybe "" $ lookup "id" attr
let classes = maybe [] T.words $ lookup "class" attr
let keyvals = [(k,v) | (k,v) <- attr, k /= "class", k /= "id"]
attr'' <- registerHeader (ident, classes, keyvals) contents
- return $ if bodyTitle
- then mempty -- skip a representation of the title in the body
- else B.headerWith attr'' level contents
+ return $ B.headerWith attr'' level contents
pHrule :: PandocMonad m => TagParser m Blocks
pHrule = do
@@ -506,7 +557,18 @@ pFigure = try $ do
pCodeBlock :: PandocMonad m => TagParser m Blocks
pCodeBlock = try $ do
TagOpen _ attr' <- pSatisfy (matchTagOpen "pre" [])
- let attr = toAttr attr'
+ -- if the `pre` has no attributes, try if it is followed by a `code`
+ -- element and use those attributes if possible.
+ attr <- case attr' of
+ _:_ -> pure (toAttr attr')
+ [] -> option nullAttr $ do
+ TagOpen _ codeAttr <- pSatisfy (matchTagOpen "code" [])
+ pure $ toAttr
+ [ (k, v') | (k, v) <- codeAttr
+ -- strip language from class
+ , let v' = if k == "class"
+ then fromMaybe v (T.stripPrefix "language-" v)
+ else v ]
contents <- manyTill pAny (pCloses "pre" <|> eof)
let rawText = T.concat $ map tagToText contents
-- drop leading newline if any
@@ -525,31 +587,47 @@ tagToText (TagOpen "br" _) = "\n"
tagToText _ = ""
inline :: PandocMonad m => TagParser m Inlines
-inline = choice
- [ eNoteref
- , eSwitch id inline
- , pTagText
- , pQ
- , pEmph
- , pStrong
- , pSuperscript
- , pSubscript
- , pSpanLike
- , pSmall
- , pStrikeout
- , pUnderline
- , pLineBreak
- , pLink
- , pImage
- , pSvg
- , pBdo
- , pCode
- , pCodeWithClass [("samp","sample"),("var","variable")]
- , pSpan
- , pMath False
- , pScriptMath
- , pRawHtmlInline
- ]
+inline = pTagText <|> do
+ tag <- lookAhead (pSatisfy isInlineTag)
+ exts <- getOption readerExtensions
+ case tag of
+ TagOpen name attr ->
+ case name of
+ "a" | extensionEnabled Ext_epub_html_exts exts
+ , Just "noteref" <- lookup "type" attr <|> lookup "epub:type" attr
+ , Just ('#',_) <- lookup "href" attr >>= T.uncons
+ -> eNoteref
+ | otherwise -> pLink
+ "switch" -> eSwitch id inline
+ "q" -> pQ
+ "em" -> pEmph
+ "i" -> pEmph
+ "strong" -> pStrong
+ "b" -> pStrong
+ "sup" -> pSuperscript
+ "sub" -> pSubscript
+ "small" -> pSmall
+ "s" -> pStrikeout
+ "strike" -> pStrikeout
+ "del" -> pStrikeout
+ "u" -> pUnderline
+ "ins" -> pUnderline
+ "br" -> pLineBreak
+ "img" -> pImage
+ "svg" -> pSvg
+ "bdo" -> pBdo
+ "code" -> pCode
+ "samp" -> pCodeWithClass "samp" "sample"
+ "var" -> pCodeWithClass "var" "variable"
+ "span" -> pSpan
+ "math" -> pMath False
+ "script"
+ | Just x <- lookup "type" attr
+ , "math/tex" `T.isPrefixOf` x -> pScriptMath
+ _ | name `elem` htmlSpanLikeElements -> pSpanLike
+ _ -> pRawHtmlInline
+ TagText _ -> pTagText
+ _ -> pRawHtmlInline
pSelfClosing :: PandocMonad m
=> (Text -> Bool) -> ([Attribute Text] -> Bool)
@@ -560,27 +638,25 @@ pSelfClosing f g = do
return open
pQ :: PandocMonad m => TagParser m Inlines
-pQ = choice $ map try [citedQuote, normalQuote]
- where citedQuote = do
- tag <- pSatisfy $ tagOpenLit "q" (any ((=="cite") . fst))
-
- url <- canonicalizeUrl $ fromAttrib "cite" tag
- let uid = fromMaybe (fromAttrib "name" tag) $
- maybeFromAttrib "id" tag
- let cls = T.words $ fromAttrib "class" tag
-
- makeQuote $ B.spanWith (uid, cls, [("cite", escapeURI url)])
- normalQuote = do
- pSatisfy $ tagOpenLit "q" (const True)
- makeQuote id
- makeQuote wrapper = do
- ctx <- asks quoteContext
- let (constructor, innerContext) = case ctx of
- InDoubleQuote -> (B.singleQuoted, InSingleQuote)
- _ -> (B.doubleQuoted, InDoubleQuote)
-
- content <- withQuoteContext innerContext (mconcat <$> manyTill inline (pCloses "q"))
- return $ extractSpaces (constructor . wrapper) content
+pQ = do
+ TagOpen _ attrs <- pSatisfy $ tagOpenLit "q" (const True)
+ case lookup "cite" attrs of
+ Just url -> do
+ let uid = fromMaybe mempty $
+ lookup "name" attrs <> lookup "id" attrs
+ let cls = maybe [] T.words $ lookup "class" attrs
+ url' <- canonicalizeUrl url
+ makeQuote $ B.spanWith (uid, cls, [("cite", escapeURI url')])
+ Nothing -> makeQuote id
+ where
+ makeQuote wrapper = do
+ ctx <- asks quoteContext
+ let (constructor, innerContext) = case ctx of
+ InDoubleQuote -> (B.singleQuoted, InSingleQuote)
+ _ -> (B.doubleQuoted, InDoubleQuote)
+ content <- withQuoteContext innerContext
+ (mconcat <$> manyTill inline (pCloses "q"))
+ return $ extractSpaces (constructor . wrapper) content
pEmph :: PandocMonad m => TagParser m Inlines
pEmph = pInlinesInTags "em" B.emph <|> pInlinesInTags "i" B.emph
@@ -646,17 +722,12 @@ pLink = try $ do
pImage :: PandocMonad m => TagParser m Inlines
pImage = do
- tag <- pSelfClosing (=="img") (isJust . lookup "src")
+ tag@(TagOpen _ attr') <- pSelfClosing (=="img") (isJust . lookup "src")
url <- canonicalizeUrl $ fromAttrib "src" tag
let title = fromAttrib "title" tag
let alt = fromAttrib "alt" tag
- let uid = fromAttrib "id" tag
- let cls = T.words $ fromAttrib "class" tag
- let getAtt k = case fromAttrib k tag of
- "" -> []
- v -> [(k, v)]
- let kvs = concatMap getAtt ["width", "height", "sizes", "srcset"]
- return $ B.imageWith (uid, cls, kvs) (escapeURI url) title (B.text alt)
+ let attr = toAttr $ filter (\(k,_) -> k /= "alt" && k /= "title" && k /= "src") attr'
+ return $ B.imageWith attr (escapeURI url) title (B.text alt)
pSvg :: PandocMonad m => TagParser m Inlines
pSvg = do
@@ -671,13 +742,12 @@ pSvg = do
UTF8.toText (encode $ UTF8.fromText rawText)
return $ B.imageWith (ident,cls,[]) svgData mempty mempty
-pCodeWithClass :: PandocMonad m => [(T.Text,Text)] -> TagParser m Inlines
-pCodeWithClass elemToClass = try $ do
- let tagTest = flip elem . fmap fst $ elemToClass
- TagOpen open attr' <- pSatisfy $ tagOpen tagTest (const True)
+pCodeWithClass :: PandocMonad m => Text -> Text -> TagParser m Inlines
+pCodeWithClass name class' = try $ do
+ TagOpen open attr' <- pSatisfy $ tagOpen (== name) (const True)
result <- manyTill pAny (pCloses open)
let (ids,cs,kvs) = toAttr attr'
- cs' = maybe cs (:cs) . lookup open $ elemToClass
+ cs' = class' : cs
return . B.codeWith (ids,cs',kvs) .
T.unwords . T.lines . innerText $ result
@@ -764,17 +834,19 @@ pInlinesInTags tagtype f = extractSpaces f <$> pInTags tagtype inline
pTagText :: PandocMonad m => TagParser m Inlines
pTagText = try $ do
+ pos <- getPosition
(TagText str) <- pSatisfy isTagText
st <- getState
qu <- ask
parsed <- lift $ lift $
- flip runReaderT qu $ runParserT (many pTagContents) st "text" str
+ flip runReaderT qu $ runParserT (many pTagContents) st "text"
+ (Sources [(pos, str)])
case parsed of
Left _ -> throwError $ PandocParseError $
"Could not parse `" <> str <> "'"
Right result -> return $ mconcat result
-type InlinesParser m = HTMLParser m Text
+type InlinesParser m = HTMLParser m Sources
pTagContents :: PandocMonad m => InlinesParser m Inlines
pTagContents =
@@ -868,27 +940,23 @@ pSpace = many1 (satisfy isSpace) >>= \xs ->
then return B.softbreak
else return B.space
-class NamedTag a where
- getTagName :: a -> Maybe Text
-
-instance NamedTag (Tag Text) where
- getTagName (TagOpen t _) = Just t
- getTagName (TagClose t) = Just t
- getTagName _ = Nothing
-
-instance NamedTag (Tag String) where
- getTagName (TagOpen t _) = Just (T.pack t)
- getTagName (TagClose t) = Just (T.pack t)
- getTagName _ = Nothing
-
-isInlineTag :: NamedTag (Tag a) => Tag a -> Bool
-isInlineTag t =
- isCommentTag t || case getTagName t of
- Nothing -> False
- Just x -> x `Set.notMember` blockTags ||
- T.take 1 x == "?" -- processing instr.
-
-isBlockTag :: NamedTag (Tag a) => Tag a -> Bool
+getTagName :: Tag Text -> Maybe Text
+getTagName (TagOpen t _) = Just t
+getTagName (TagClose t) = Just t
+getTagName _ = Nothing
+
+isInlineTag :: Tag Text -> Bool
+isInlineTag t = isCommentTag t || case t of
+ TagOpen "script" _ -> "math/tex" `T.isPrefixOf` fromAttrib "type" t
+ TagClose "script" -> True
+ TagOpen name _ -> isInlineTagName name
+ TagClose name -> isInlineTagName name
+ _ -> False
+ where isInlineTagName x =
+ x `Set.notMember` blockTags ||
+ T.take 1 x == "?" -- processing instr.
+
+isBlockTag :: Tag Text -> Bool
isBlockTag t = isBlockTagName || isTagComment t
where isBlockTagName =
case getTagName t of
@@ -899,10 +967,10 @@ isBlockTag t = isBlockTagName || isTagComment t
|| x `Set.member` eitherBlockOrInline
Nothing -> False
-isTextTag :: Tag a -> Bool
+isTextTag :: Tag Text -> Bool
isTextTag = tagText (const True)
-isCommentTag :: Tag a -> Bool
+isCommentTag :: Tag Text -> Bool
isCommentTag = tagComment (const True)
--- parsers for use in markdown, textile readers
@@ -910,13 +978,14 @@ isCommentTag = tagComment (const True)
-- | Matches a stretch of HTML in balanced tags.
htmlInBalanced :: Monad m
=> (Tag Text -> Bool)
- -> ParserT Text st m Text
+ -> ParserT Sources st m Text
htmlInBalanced f = try $ do
lookAhead (char '<')
- inp <- getInput
- let ts = canonicalizeTags $
- parseTagsOptions parseOptions{ optTagWarning = True,
- optTagPosition = True } inp
+ sources <- getInput
+ let ts = canonicalizeTags
+ $ parseTagsOptions parseOptions{ optTagWarning = True,
+ optTagPosition = True }
+ $ sourcesToText sources
case ts of
(TagPosition sr sc : t@(TagOpen tn _) : rest) -> do
guard $ f t
@@ -951,22 +1020,24 @@ htmlInBalanced' tagname ts = fromMaybe [] $ go 0 ts
go n (t:ts') = (t :) <$> go n ts'
go _ [] = mzero
-hasTagWarning :: [Tag a] -> Bool
+hasTagWarning :: [Tag Text] -> Bool
hasTagWarning (TagWarning _:_) = True
hasTagWarning _ = False
-- | Matches a tag meeting a certain condition.
htmlTag :: (HasReaderOptions st, Monad m)
=> (Tag Text -> Bool)
- -> ParserT Text st m (Tag Text, Text)
+ -> ParserT Sources st m (Tag Text, Text)
htmlTag f = try $ do
lookAhead (char '<')
startpos <- getPosition
- inp <- getInput
+ sources <- getInput
+ let inp = sourcesToText sources
let ts = canonicalizeTags $ parseTagsOptions
parseOptions{ optTagWarning = False
, optTagPosition = True }
- (inp <> " ") -- add space to ensure that
+ (inp <> " ")
+ -- add space to ensure that
-- we get a TagPosition after the tag
(next, ln, col) <- case ts of
(TagPosition{} : next : TagPosition ln col : _)
@@ -1024,21 +1095,6 @@ htmlTag f = try $ do
handleTag tagname
_ -> mzero
--- Strip namespace prefixes
-stripPrefixes :: [Tag Text] -> [Tag Text]
-stripPrefixes = map stripPrefix
-
-stripPrefix :: Tag Text -> Tag Text
-stripPrefix (TagOpen s as) =
- TagOpen (stripPrefix' s) (map (first stripPrefix') as)
-stripPrefix (TagClose s) = TagClose (stripPrefix' s)
-stripPrefix x = x
-
-stripPrefix' :: Text -> Text
-stripPrefix' s =
- if T.null t then s else T.drop 1 t
- where (_, t) = T.span (/= ':') s
-
-- Utilities
-- | Adjusts a url according to the document's base URL.
@@ -1048,26 +1104,3 @@ canonicalizeUrl url = do
return $ case (parseURIReference (T.unpack url), mbBaseHref) of
(Just rel, Just bs) -> tshow (rel `nonStrictRelativeTo` bs)
_ -> url
-
--- For now we need a special version here; the one in Shared has String type
-renderTags' :: [Tag Text] -> Text
-renderTags' = renderTagsOptions
- renderOptions{ optMinimize = matchTags ["hr", "br", "img",
- "meta", "link"]
- , optRawTag = matchTags ["script", "style"] }
- where matchTags tags = flip elem tags . T.toLower
-
-
--- EPUB Specific
---
---
-{-
-
-types :: [(String, ([String], Int))]
-types = -- Document divisions
- map (\s -> (s, (["section", "body"], 0)))
- ["volume", "part", "chapter", "division"]
- <> -- Document section and components
- [
- ("abstract", ([], 0))]
--}