diff options
Diffstat (limited to 'src/Hakyll/Web/Html.hs')
-rw-r--r-- | src/Hakyll/Web/Html.hs | 184 |
1 files changed, 0 insertions, 184 deletions
diff --git a/src/Hakyll/Web/Html.hs b/src/Hakyll/Web/Html.hs deleted file mode 100644 index 6b7ec88..0000000 --- a/src/Hakyll/Web/Html.hs +++ /dev/null @@ -1,184 +0,0 @@ --------------------------------------------------------------------------------- --- | Provides utilities to manipulate HTML pages -module Hakyll.Web.Html - ( -- * Generic - withTags - - -- * Headers - , demoteHeaders - - -- * Url manipulation - , getUrls - , withUrls - , toUrl - , toSiteRoot - , isExternal - - -- * Stripping/escaping - , stripTags - , escapeHtml - ) where - - --------------------------------------------------------------------------------- -import Data.Char (digitToInt, intToDigit, - isDigit, toLower) -import Data.List (isPrefixOf) -import qualified Data.Set as S -import System.FilePath.Posix (joinPath, splitPath, - takeDirectory) -import Text.Blaze.Html (toHtml) -import Text.Blaze.Html.Renderer.String (renderHtml) -import qualified Text.HTML.TagSoup as TS -import Network.URI (isUnreserved, escapeURIString) - - --------------------------------------------------------------------------------- --- | Map over all tags in the document -withTags :: (TS.Tag String -> TS.Tag String) -> String -> String -withTags f = renderTags' . map f . parseTags' - - --------------------------------------------------------------------------------- --- | Map every @h1@ to an @h2@, @h2@ to @h3@, etc. -demoteHeaders :: String -> String -demoteHeaders = withTags $ \tag -> case tag of - TS.TagOpen t a -> TS.TagOpen (demote t) a - TS.TagClose t -> TS.TagClose (demote t) - t -> t - where - demote t@['h', n] - | isDigit n = ['h', intToDigit (min 6 $ digitToInt n + 1)] - | otherwise = t - demote t = t - - --------------------------------------------------------------------------------- -isUrlAttribute :: String -> Bool -isUrlAttribute = (`elem` ["src", "href", "data", "poster"]) - - --------------------------------------------------------------------------------- -getUrls :: [TS.Tag String] -> [String] -getUrls tags = [v | TS.TagOpen _ as <- tags, (k, v) <- as, isUrlAttribute k] - - --------------------------------------------------------------------------------- --- | Apply a function to each URL on a webpage -withUrls :: (String -> String) -> String -> String -withUrls f = withTags tag - where - tag (TS.TagOpen s a) = TS.TagOpen s $ map attr a - tag x = x - attr (k, v) = (k, if isUrlAttribute k then f v else v) - - --------------------------------------------------------------------------------- --- | Customized TagSoup renderer. The default TagSoup renderer escape CSS --- within style tags, and doesn't properly minimize. -renderTags' :: [TS.Tag String] -> String -renderTags' = TS.renderTagsOptions TS.RenderOptions - { TS.optRawTag = (`elem` ["script", "style"]) . map toLower - , TS.optMinimize = (`S.member` minimize) . map toLower - , TS.optEscape = id - } - where - -- A list of elements which must be minimized - minimize = S.fromList - [ "area", "br", "col", "embed", "hr", "img", "input", "meta", "link" - , "param" - ] - - --------------------------------------------------------------------------------- --- | Customized TagSoup parser: do not decode any entities. -parseTags' :: String -> [TS.Tag String] -parseTags' = TS.parseTagsOptions (TS.parseOptions :: TS.ParseOptions String) - { TS.optEntityData = \(str, b) -> [TS.TagText $ "&" ++ str ++ [';' | b]] - , TS.optEntityAttrib = \(str, b) -> ("&" ++ str ++ [';' | b], []) - } - - --------------------------------------------------------------------------------- --- | Convert a filepath to an URL starting from the site root --- --- Example: --- --- > toUrl "foo/bar.html" --- --- Result: --- --- > "/foo/bar.html" --- --- This also sanitizes the URL, e.g. converting spaces into '%20' -toUrl :: FilePath -> String -toUrl url = case url of - ('/' : xs) -> '/' : sanitize xs - xs -> '/' : sanitize xs - where - -- Everything but unreserved characters should be escaped as we are - -- sanitising the path therefore reserved characters which have a - -- meaning in URI does not appear. Special casing for `/`, because it has - -- a special meaning in FilePath as well as in URI. - sanitize = escapeURIString (\c -> c == '/' || isUnreserved c) - - --------------------------------------------------------------------------------- --- | Get the relative url to the site root, for a given (absolute) url -toSiteRoot :: String -> String -toSiteRoot = emptyException . joinPath . map parent - . filter relevant . splitPath . takeDirectory - where - parent = const ".." - emptyException [] = "." - emptyException x = x - relevant "." = False - relevant "/" = False - relevant "./" = False - relevant _ = True - - --------------------------------------------------------------------------------- --- | Check if an URL links to an external HTTP(S) source -isExternal :: String -> Bool -isExternal url = any (flip isPrefixOf url) ["http://", "https://", "//"] - - --------------------------------------------------------------------------------- --- | Strip all HTML tags from a string --- --- Example: --- --- > stripTags "<p>foo</p>" --- --- Result: --- --- > "foo" --- --- This also works for incomplete tags --- --- Example: --- --- > stripTags "<p>foo</p" --- --- Result: --- --- > "foo" -stripTags :: String -> String -stripTags [] = [] -stripTags ('<' : xs) = stripTags $ drop 1 $ dropWhile (/= '>') xs -stripTags (x : xs) = x : stripTags xs - - --------------------------------------------------------------------------------- --- | HTML-escape a string --- --- Example: --- --- > escapeHtml "Me & Dean" --- --- Result: --- --- > "Me & Dean" -escapeHtml :: String -> String -escapeHtml = renderHtml . toHtml |