summaryrefslogtreecommitdiff
path: root/src/Hakyll/Web/Html.hs
diff options
context:
space:
mode:
Diffstat (limited to 'src/Hakyll/Web/Html.hs')
-rw-r--r--src/Hakyll/Web/Html.hs184
1 files changed, 0 insertions, 184 deletions
diff --git a/src/Hakyll/Web/Html.hs b/src/Hakyll/Web/Html.hs
deleted file mode 100644
index 6b7ec88..0000000
--- a/src/Hakyll/Web/Html.hs
+++ /dev/null
@@ -1,184 +0,0 @@
---------------------------------------------------------------------------------
--- | Provides utilities to manipulate HTML pages
-module Hakyll.Web.Html
- ( -- * Generic
- withTags
-
- -- * Headers
- , demoteHeaders
-
- -- * Url manipulation
- , getUrls
- , withUrls
- , toUrl
- , toSiteRoot
- , isExternal
-
- -- * Stripping/escaping
- , stripTags
- , escapeHtml
- ) where
-
-
---------------------------------------------------------------------------------
-import Data.Char (digitToInt, intToDigit,
- isDigit, toLower)
-import Data.List (isPrefixOf)
-import qualified Data.Set as S
-import System.FilePath.Posix (joinPath, splitPath,
- takeDirectory)
-import Text.Blaze.Html (toHtml)
-import Text.Blaze.Html.Renderer.String (renderHtml)
-import qualified Text.HTML.TagSoup as TS
-import Network.URI (isUnreserved, escapeURIString)
-
-
---------------------------------------------------------------------------------
--- | Map over all tags in the document
-withTags :: (TS.Tag String -> TS.Tag String) -> String -> String
-withTags f = renderTags' . map f . parseTags'
-
-
---------------------------------------------------------------------------------
--- | Map every @h1@ to an @h2@, @h2@ to @h3@, etc.
-demoteHeaders :: String -> String
-demoteHeaders = withTags $ \tag -> case tag of
- TS.TagOpen t a -> TS.TagOpen (demote t) a
- TS.TagClose t -> TS.TagClose (demote t)
- t -> t
- where
- demote t@['h', n]
- | isDigit n = ['h', intToDigit (min 6 $ digitToInt n + 1)]
- | otherwise = t
- demote t = t
-
-
---------------------------------------------------------------------------------
-isUrlAttribute :: String -> Bool
-isUrlAttribute = (`elem` ["src", "href", "data", "poster"])
-
-
---------------------------------------------------------------------------------
-getUrls :: [TS.Tag String] -> [String]
-getUrls tags = [v | TS.TagOpen _ as <- tags, (k, v) <- as, isUrlAttribute k]
-
-
---------------------------------------------------------------------------------
--- | Apply a function to each URL on a webpage
-withUrls :: (String -> String) -> String -> String
-withUrls f = withTags tag
- where
- tag (TS.TagOpen s a) = TS.TagOpen s $ map attr a
- tag x = x
- attr (k, v) = (k, if isUrlAttribute k then f v else v)
-
-
---------------------------------------------------------------------------------
--- | Customized TagSoup renderer. The default TagSoup renderer escape CSS
--- within style tags, and doesn't properly minimize.
-renderTags' :: [TS.Tag String] -> String
-renderTags' = TS.renderTagsOptions TS.RenderOptions
- { TS.optRawTag = (`elem` ["script", "style"]) . map toLower
- , TS.optMinimize = (`S.member` minimize) . map toLower
- , TS.optEscape = id
- }
- where
- -- A list of elements which must be minimized
- minimize = S.fromList
- [ "area", "br", "col", "embed", "hr", "img", "input", "meta", "link"
- , "param"
- ]
-
-
---------------------------------------------------------------------------------
--- | Customized TagSoup parser: do not decode any entities.
-parseTags' :: String -> [TS.Tag String]
-parseTags' = TS.parseTagsOptions (TS.parseOptions :: TS.ParseOptions String)
- { TS.optEntityData = \(str, b) -> [TS.TagText $ "&" ++ str ++ [';' | b]]
- , TS.optEntityAttrib = \(str, b) -> ("&" ++ str ++ [';' | b], [])
- }
-
-
---------------------------------------------------------------------------------
--- | Convert a filepath to an URL starting from the site root
---
--- Example:
---
--- > toUrl "foo/bar.html"
---
--- Result:
---
--- > "/foo/bar.html"
---
--- This also sanitizes the URL, e.g. converting spaces into '%20'
-toUrl :: FilePath -> String
-toUrl url = case url of
- ('/' : xs) -> '/' : sanitize xs
- xs -> '/' : sanitize xs
- where
- -- Everything but unreserved characters should be escaped as we are
- -- sanitising the path therefore reserved characters which have a
- -- meaning in URI does not appear. Special casing for `/`, because it has
- -- a special meaning in FilePath as well as in URI.
- sanitize = escapeURIString (\c -> c == '/' || isUnreserved c)
-
-
---------------------------------------------------------------------------------
--- | Get the relative url to the site root, for a given (absolute) url
-toSiteRoot :: String -> String
-toSiteRoot = emptyException . joinPath . map parent
- . filter relevant . splitPath . takeDirectory
- where
- parent = const ".."
- emptyException [] = "."
- emptyException x = x
- relevant "." = False
- relevant "/" = False
- relevant "./" = False
- relevant _ = True
-
-
---------------------------------------------------------------------------------
--- | Check if an URL links to an external HTTP(S) source
-isExternal :: String -> Bool
-isExternal url = any (flip isPrefixOf url) ["http://", "https://", "//"]
-
-
---------------------------------------------------------------------------------
--- | Strip all HTML tags from a string
---
--- Example:
---
--- > stripTags "<p>foo</p>"
---
--- Result:
---
--- > "foo"
---
--- This also works for incomplete tags
---
--- Example:
---
--- > stripTags "<p>foo</p"
---
--- Result:
---
--- > "foo"
-stripTags :: String -> String
-stripTags [] = []
-stripTags ('<' : xs) = stripTags $ drop 1 $ dropWhile (/= '>') xs
-stripTags (x : xs) = x : stripTags xs
-
-
---------------------------------------------------------------------------------
--- | HTML-escape a string
---
--- Example:
---
--- > escapeHtml "Me & Dean"
---
--- Result:
---
--- > "Me &amp; Dean"
-escapeHtml :: String -> String
-escapeHtml = renderHtml . toHtml