diff options
author | Jasper Van der Jeugt <m@jaspervdj.be> | 2017-06-19 11:57:23 +0200 |
---|---|---|
committer | Jasper Van der Jeugt <m@jaspervdj.be> | 2017-06-19 11:57:23 +0200 |
commit | 67ecff7ad383640bc73d64edc2506c7cc648a134 (patch) | |
tree | 6d328e43c3ab86c29a2d775fabaa23618c16fb51 /lib/Hakyll/Web/Html.hs | |
parent | 2df3209bafa08e6b77ee4a8598fc503269513527 (diff) | |
download | hakyll-67ecff7ad383640bc73d64edc2506c7cc648a134.tar.gz |
Move src/ to lib/, put Init.hs in src/
Diffstat (limited to 'lib/Hakyll/Web/Html.hs')
-rw-r--r-- | lib/Hakyll/Web/Html.hs | 184 |
1 files changed, 184 insertions, 0 deletions
diff --git a/lib/Hakyll/Web/Html.hs b/lib/Hakyll/Web/Html.hs new file mode 100644 index 0000000..6b7ec88 --- /dev/null +++ b/lib/Hakyll/Web/Html.hs @@ -0,0 +1,184 @@ +-------------------------------------------------------------------------------- +-- | Provides utilities to manipulate HTML pages +module Hakyll.Web.Html + ( -- * Generic + withTags + + -- * Headers + , demoteHeaders + + -- * Url manipulation + , getUrls + , withUrls + , toUrl + , toSiteRoot + , isExternal + + -- * Stripping/escaping + , stripTags + , escapeHtml + ) where + + +-------------------------------------------------------------------------------- +import Data.Char (digitToInt, intToDigit, + isDigit, toLower) +import Data.List (isPrefixOf) +import qualified Data.Set as S +import System.FilePath.Posix (joinPath, splitPath, + takeDirectory) +import Text.Blaze.Html (toHtml) +import Text.Blaze.Html.Renderer.String (renderHtml) +import qualified Text.HTML.TagSoup as TS +import Network.URI (isUnreserved, escapeURIString) + + +-------------------------------------------------------------------------------- +-- | Map over all tags in the document +withTags :: (TS.Tag String -> TS.Tag String) -> String -> String +withTags f = renderTags' . map f . parseTags' + + +-------------------------------------------------------------------------------- +-- | Map every @h1@ to an @h2@, @h2@ to @h3@, etc. +demoteHeaders :: String -> String +demoteHeaders = withTags $ \tag -> case tag of + TS.TagOpen t a -> TS.TagOpen (demote t) a + TS.TagClose t -> TS.TagClose (demote t) + t -> t + where + demote t@['h', n] + | isDigit n = ['h', intToDigit (min 6 $ digitToInt n + 1)] + | otherwise = t + demote t = t + + +-------------------------------------------------------------------------------- +isUrlAttribute :: String -> Bool +isUrlAttribute = (`elem` ["src", "href", "data", "poster"]) + + +-------------------------------------------------------------------------------- +getUrls :: [TS.Tag String] -> [String] +getUrls tags = [v | TS.TagOpen _ as <- tags, (k, v) <- as, isUrlAttribute k] + + +-------------------------------------------------------------------------------- +-- | Apply a function to each URL on a webpage +withUrls :: (String -> String) -> String -> String +withUrls f = withTags tag + where + tag (TS.TagOpen s a) = TS.TagOpen s $ map attr a + tag x = x + attr (k, v) = (k, if isUrlAttribute k then f v else v) + + +-------------------------------------------------------------------------------- +-- | Customized TagSoup renderer. The default TagSoup renderer escape CSS +-- within style tags, and doesn't properly minimize. +renderTags' :: [TS.Tag String] -> String +renderTags' = TS.renderTagsOptions TS.RenderOptions + { TS.optRawTag = (`elem` ["script", "style"]) . map toLower + , TS.optMinimize = (`S.member` minimize) . map toLower + , TS.optEscape = id + } + where + -- A list of elements which must be minimized + minimize = S.fromList + [ "area", "br", "col", "embed", "hr", "img", "input", "meta", "link" + , "param" + ] + + +-------------------------------------------------------------------------------- +-- | Customized TagSoup parser: do not decode any entities. +parseTags' :: String -> [TS.Tag String] +parseTags' = TS.parseTagsOptions (TS.parseOptions :: TS.ParseOptions String) + { TS.optEntityData = \(str, b) -> [TS.TagText $ "&" ++ str ++ [';' | b]] + , TS.optEntityAttrib = \(str, b) -> ("&" ++ str ++ [';' | b], []) + } + + +-------------------------------------------------------------------------------- +-- | Convert a filepath to an URL starting from the site root +-- +-- Example: +-- +-- > toUrl "foo/bar.html" +-- +-- Result: +-- +-- > "/foo/bar.html" +-- +-- This also sanitizes the URL, e.g. converting spaces into '%20' +toUrl :: FilePath -> String +toUrl url = case url of + ('/' : xs) -> '/' : sanitize xs + xs -> '/' : sanitize xs + where + -- Everything but unreserved characters should be escaped as we are + -- sanitising the path therefore reserved characters which have a + -- meaning in URI does not appear. Special casing for `/`, because it has + -- a special meaning in FilePath as well as in URI. + sanitize = escapeURIString (\c -> c == '/' || isUnreserved c) + + +-------------------------------------------------------------------------------- +-- | Get the relative url to the site root, for a given (absolute) url +toSiteRoot :: String -> String +toSiteRoot = emptyException . joinPath . map parent + . filter relevant . splitPath . takeDirectory + where + parent = const ".." + emptyException [] = "." + emptyException x = x + relevant "." = False + relevant "/" = False + relevant "./" = False + relevant _ = True + + +-------------------------------------------------------------------------------- +-- | Check if an URL links to an external HTTP(S) source +isExternal :: String -> Bool +isExternal url = any (flip isPrefixOf url) ["http://", "https://", "//"] + + +-------------------------------------------------------------------------------- +-- | Strip all HTML tags from a string +-- +-- Example: +-- +-- > stripTags "<p>foo</p>" +-- +-- Result: +-- +-- > "foo" +-- +-- This also works for incomplete tags +-- +-- Example: +-- +-- > stripTags "<p>foo</p" +-- +-- Result: +-- +-- > "foo" +stripTags :: String -> String +stripTags [] = [] +stripTags ('<' : xs) = stripTags $ drop 1 $ dropWhile (/= '>') xs +stripTags (x : xs) = x : stripTags xs + + +-------------------------------------------------------------------------------- +-- | HTML-escape a string +-- +-- Example: +-- +-- > escapeHtml "Me & Dean" +-- +-- Result: +-- +-- > "Me & Dean" +escapeHtml :: String -> String +escapeHtml = renderHtml . toHtml |