summaryrefslogtreecommitdiff
path: root/lib/Hakyll/Web/Html.hs
diff options
context:
space:
mode:
authorJasper Van der Jeugt <m@jaspervdj.be>2017-06-19 11:57:23 +0200
committerJasper Van der Jeugt <m@jaspervdj.be>2017-06-19 11:57:23 +0200
commit67ecff7ad383640bc73d64edc2506c7cc648a134 (patch)
tree6d328e43c3ab86c29a2d775fabaa23618c16fb51 /lib/Hakyll/Web/Html.hs
parent2df3209bafa08e6b77ee4a8598fc503269513527 (diff)
downloadhakyll-67ecff7ad383640bc73d64edc2506c7cc648a134.tar.gz
Move src/ to lib/, put Init.hs in src/
Diffstat (limited to 'lib/Hakyll/Web/Html.hs')
-rw-r--r--lib/Hakyll/Web/Html.hs184
1 files changed, 184 insertions, 0 deletions
diff --git a/lib/Hakyll/Web/Html.hs b/lib/Hakyll/Web/Html.hs
new file mode 100644
index 0000000..6b7ec88
--- /dev/null
+++ b/lib/Hakyll/Web/Html.hs
@@ -0,0 +1,184 @@
+--------------------------------------------------------------------------------
+-- | Provides utilities to manipulate HTML pages
+module Hakyll.Web.Html
+ ( -- * Generic
+ withTags
+
+ -- * Headers
+ , demoteHeaders
+
+ -- * Url manipulation
+ , getUrls
+ , withUrls
+ , toUrl
+ , toSiteRoot
+ , isExternal
+
+ -- * Stripping/escaping
+ , stripTags
+ , escapeHtml
+ ) where
+
+
+--------------------------------------------------------------------------------
+import Data.Char (digitToInt, intToDigit,
+ isDigit, toLower)
+import Data.List (isPrefixOf)
+import qualified Data.Set as S
+import System.FilePath.Posix (joinPath, splitPath,
+ takeDirectory)
+import Text.Blaze.Html (toHtml)
+import Text.Blaze.Html.Renderer.String (renderHtml)
+import qualified Text.HTML.TagSoup as TS
+import Network.URI (isUnreserved, escapeURIString)
+
+
+--------------------------------------------------------------------------------
+-- | Map over all tags in the document
+withTags :: (TS.Tag String -> TS.Tag String) -> String -> String
+withTags f = renderTags' . map f . parseTags'
+
+
+--------------------------------------------------------------------------------
+-- | Map every @h1@ to an @h2@, @h2@ to @h3@, etc.
+demoteHeaders :: String -> String
+demoteHeaders = withTags $ \tag -> case tag of
+ TS.TagOpen t a -> TS.TagOpen (demote t) a
+ TS.TagClose t -> TS.TagClose (demote t)
+ t -> t
+ where
+ demote t@['h', n]
+ | isDigit n = ['h', intToDigit (min 6 $ digitToInt n + 1)]
+ | otherwise = t
+ demote t = t
+
+
+--------------------------------------------------------------------------------
+isUrlAttribute :: String -> Bool
+isUrlAttribute = (`elem` ["src", "href", "data", "poster"])
+
+
+--------------------------------------------------------------------------------
+getUrls :: [TS.Tag String] -> [String]
+getUrls tags = [v | TS.TagOpen _ as <- tags, (k, v) <- as, isUrlAttribute k]
+
+
+--------------------------------------------------------------------------------
+-- | Apply a function to each URL on a webpage
+withUrls :: (String -> String) -> String -> String
+withUrls f = withTags tag
+ where
+ tag (TS.TagOpen s a) = TS.TagOpen s $ map attr a
+ tag x = x
+ attr (k, v) = (k, if isUrlAttribute k then f v else v)
+
+
+--------------------------------------------------------------------------------
+-- | Customized TagSoup renderer. The default TagSoup renderer escape CSS
+-- within style tags, and doesn't properly minimize.
+renderTags' :: [TS.Tag String] -> String
+renderTags' = TS.renderTagsOptions TS.RenderOptions
+ { TS.optRawTag = (`elem` ["script", "style"]) . map toLower
+ , TS.optMinimize = (`S.member` minimize) . map toLower
+ , TS.optEscape = id
+ }
+ where
+ -- A list of elements which must be minimized
+ minimize = S.fromList
+ [ "area", "br", "col", "embed", "hr", "img", "input", "meta", "link"
+ , "param"
+ ]
+
+
+--------------------------------------------------------------------------------
+-- | Customized TagSoup parser: do not decode any entities.
+parseTags' :: String -> [TS.Tag String]
+parseTags' = TS.parseTagsOptions (TS.parseOptions :: TS.ParseOptions String)
+ { TS.optEntityData = \(str, b) -> [TS.TagText $ "&" ++ str ++ [';' | b]]
+ , TS.optEntityAttrib = \(str, b) -> ("&" ++ str ++ [';' | b], [])
+ }
+
+
+--------------------------------------------------------------------------------
+-- | Convert a filepath to an URL starting from the site root
+--
+-- Example:
+--
+-- > toUrl "foo/bar.html"
+--
+-- Result:
+--
+-- > "/foo/bar.html"
+--
+-- This also sanitizes the URL, e.g. converting spaces into '%20'
+toUrl :: FilePath -> String
+toUrl url = case url of
+ ('/' : xs) -> '/' : sanitize xs
+ xs -> '/' : sanitize xs
+ where
+ -- Everything but unreserved characters should be escaped as we are
+ -- sanitising the path therefore reserved characters which have a
+ -- meaning in URI does not appear. Special casing for `/`, because it has
+ -- a special meaning in FilePath as well as in URI.
+ sanitize = escapeURIString (\c -> c == '/' || isUnreserved c)
+
+
+--------------------------------------------------------------------------------
+-- | Get the relative url to the site root, for a given (absolute) url
+toSiteRoot :: String -> String
+toSiteRoot = emptyException . joinPath . map parent
+ . filter relevant . splitPath . takeDirectory
+ where
+ parent = const ".."
+ emptyException [] = "."
+ emptyException x = x
+ relevant "." = False
+ relevant "/" = False
+ relevant "./" = False
+ relevant _ = True
+
+
+--------------------------------------------------------------------------------
+-- | Check if an URL links to an external HTTP(S) source
+isExternal :: String -> Bool
+isExternal url = any (flip isPrefixOf url) ["http://", "https://", "//"]
+
+
+--------------------------------------------------------------------------------
+-- | Strip all HTML tags from a string
+--
+-- Example:
+--
+-- > stripTags "<p>foo</p>"
+--
+-- Result:
+--
+-- > "foo"
+--
+-- This also works for incomplete tags
+--
+-- Example:
+--
+-- > stripTags "<p>foo</p"
+--
+-- Result:
+--
+-- > "foo"
+stripTags :: String -> String
+stripTags [] = []
+stripTags ('<' : xs) = stripTags $ drop 1 $ dropWhile (/= '>') xs
+stripTags (x : xs) = x : stripTags xs
+
+
+--------------------------------------------------------------------------------
+-- | HTML-escape a string
+--
+-- Example:
+--
+-- > escapeHtml "Me & Dean"
+--
+-- Result:
+--
+-- > "Me &amp; Dean"
+escapeHtml :: String -> String
+escapeHtml = renderHtml . toHtml