aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc
diff options
context:
space:
mode:
authorJohn MacFarlane <fiddlosopher@gmail.com>2011-11-19 00:20:00 -0800
committerJohn MacFarlane <fiddlosopher@gmail.com>2011-11-19 00:20:00 -0800
commit24d8961d05c4be7f99349b6d66fcbfc5c61e3653 (patch)
tree9a72a550b319c7b07d328975d6461beadddb40d3 /src/Text/Pandoc
parentbea0fdf6e6d3f6dab000a6a4b6a0f74776348b98 (diff)
downloadpandoc-24d8961d05c4be7f99349b6d66fcbfc5c61e3653.tar.gz
Added Text.Pandoc.Offline module.
Diffstat (limited to 'src/Text/Pandoc')
-rw-r--r--src/Text/Pandoc/Offline.hs239
1 files changed, 239 insertions, 0 deletions
diff --git a/src/Text/Pandoc/Offline.hs b/src/Text/Pandoc/Offline.hs
new file mode 100644
index 000000000..727026887
--- /dev/null
+++ b/src/Text/Pandoc/Offline.hs
@@ -0,0 +1,239 @@
+{-# LANGUAGE OverloadedStrings #-}
+{-
+Copyright (C) 2011 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Offline
+ Copyright : Copyright (C) 2011 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Functions for converting an HTML file into one that can be viewed
+offline, by incorporating linked images, CSS, and scripts into
+the HTML using data URIs.
+-}
+module Text.Pandoc.Offline ( offline ) where
+import Text.HTML.TagSoup
+import Network.URI (isAbsoluteURI, parseURI, escapeURIString)
+import Network.Browser
+import Network.HTTP
+import Data.ByteString.Base64
+import System.IO
+import qualified Data.ByteString.Char8 as B
+import Data.ByteString (ByteString)
+import Data.ByteString.UTF8 (toString, fromString)
+import System.FilePath (takeExtension, dropExtension)
+import Data.Char (toLower, isAscii, isAlphaNum)
+import Codec.Compression.GZip as Gzip
+import qualified Data.ByteString.Lazy as L
+
+getItem :: String -> IO ByteString
+getItem f =
+ if isAbsoluteURI f
+ then openURL f
+ else B.readFile f
+
+openURL :: String -> IO ByteString
+openURL u = getResponseBody =<< simpleHTTP (getReq u)
+ where getReq u = case parseURI u of
+ Nothing -> error $ "Could not parse URI: " ++ u
+ Just u' -> mkRequest GET u'
+
+mimeTypeFor :: String -> ByteString
+mimeTypeFor s = case lookup s mimetypes of
+ Nothing -> error $ "Could not find mime type for " ++ s
+ Just x -> x
+ where mimetypes = [ -- taken from MissingH
+ (".a", "application/octet-stream"),
+ (".ai", "application/postscript"),
+ (".aif", "audio/x-aiff"),
+ (".aifc", "audio/x-aiff"),
+ (".aiff", "audio/x-aiff"),
+ (".au", "audio/basic"),
+ (".avi", "video/x-msvideo"),
+ (".bat", "text/plain"),
+ (".bcpio", "application/x-bcpio"),
+ (".bin", "application/octet-stream"),
+ (".bmp", "image/x-ms-bmp"),
+ (".c", "text/plain"),
+ (".cdf", "application/x-netcdf"),
+ (".cpio", "application/x-cpio"),
+ (".csh", "application/x-csh"),
+ (".css", "text/css"),
+ (".dll", "application/octet-stream"),
+ (".doc", "application/msword"),
+ (".dot", "application/msword"),
+ (".dvi", "application/x-dvi"),
+ (".eml", "message/rfc822"),
+ (".eps", "application/postscript"),
+ (".etx", "text/x-setext"),
+ (".exe", "application/octet-stream"),
+ (".gif", "image/gif"),
+ (".gtar", "application/x-gtar"),
+ (".h", "text/plain"),
+ (".hdf", "application/x-hdf"),
+ (".htm", "text/html"),
+ (".html", "text/html"),
+ (".ief", "image/ief"),
+ (".jpe", "image/jpeg"),
+ (".jpeg", "image/jpeg"),
+ (".jpg", "image/jpeg"),
+ (".js", "application/x-javascript"),
+ (".ksh", "text/plain"),
+ (".latex", "application/x-latex"),
+ (".m1v", "video/mpeg"),
+ (".man", "application/x-troff-man"),
+ (".me", "application/x-troff-me"),
+ (".mht", "message/rfc822"),
+ (".mhtml", "message/rfc822"),
+ (".mif", "application/x-mif"),
+ (".mov", "video/quicktime"),
+ (".movie", "video/x-sgi-movie"),
+ (".mp2", "audio/mpeg"),
+ (".mp3", "audio/mpeg"),
+ (".mpa", "video/mpeg"),
+ (".mpe", "video/mpeg"),
+ (".mpeg", "video/mpeg"),
+ (".mpg", "video/mpeg"),
+ (".ms", "application/x-troff-ms"),
+ (".nc", "application/x-netcdf"),
+ (".nws", "message/rfc822"),
+ (".o", "application/octet-stream"),
+ (".obj", "application/octet-stream"),
+ (".oda", "application/oda"),
+ (".p12", "application/x-pkcs12"),
+ (".p7c", "application/pkcs7-mime"),
+ (".pbm", "image/x-portable-bitmap"),
+ (".pdf", "application/pdf"),
+ (".pfx", "application/x-pkcs12"),
+ (".pgm", "image/x-portable-graymap"),
+ (".pl", "text/plain"),
+ (".png", "image/png"),
+ (".pnm", "image/x-portable-anymap"),
+ (".pot", "application/vnd.ms-powerpoint"),
+ (".ppa", "application/vnd.ms-powerpoint"),
+ (".ppm", "image/x-portable-pixmap"),
+ (".pps", "application/vnd.ms-powerpoint"),
+ (".ppt", "application/vnd.ms-powerpoint"),
+ (".ps", "application/postscript"),
+ (".pwz", "application/vnd.ms-powerpoint"),
+ (".py", "text/x-python"),
+ (".pyc", "application/x-python-code"),
+ (".pyo", "application/x-python-code"),
+ (".qt", "video/quicktime"),
+ (".ra", "audio/x-pn-realaudio"),
+ (".ram", "application/x-pn-realaudio"),
+ (".ras", "image/x-cmu-raster"),
+ (".rdf", "application/xml"),
+ (".rgb", "image/x-rgb"),
+ (".roff", "application/x-troff"),
+ (".rtx", "text/richtext"),
+ (".sgm", "text/x-sgml"),
+ (".sgml", "text/x-sgml"),
+ (".sh", "application/x-sh"),
+ (".shar", "application/x-shar"),
+ (".snd", "audio/basic"),
+ (".so", "application/octet-stream"),
+ (".src", "application/x-wais-source"),
+ (".sv4cpio", "application/x-sv4cpio"),
+ (".sv4crc", "application/x-sv4crc"),
+ (".swf", "application/x-shockwave-flash"),
+ (".t", "application/x-troff"),
+ (".tar", "application/x-tar"),
+ (".tcl", "application/x-tcl"),
+ (".tex", "application/x-tex"),
+ (".texi", "application/x-texinfo"),
+ (".texinfo", "application/x-texinfo"),
+ (".tif", "image/tiff"),
+ (".tiff", "image/tiff"),
+ (".tr", "application/x-troff"),
+ (".tsv", "text/tab-separated-values"),
+ (".txt", "text/plain"),
+ (".ustar", "application/x-ustar"),
+ (".vcf", "text/x-vcard"),
+ (".wav", "audio/x-wav"),
+ (".wiz", "application/msword"),
+ (".xbm", "image/x-xbitmap"),
+ (".xlb", "application/vnd.ms-excel"),
+ (".xls", "application/vnd.ms-excel"),
+ (".xml", "text/xml"),
+ (".xpm", "image/x-xpixmap"),
+ (".xsl", "application/xml"),
+ (".xwd", "image/x-xwindowdump"),
+ (".zip", "application/zip"),
+ (".jpg", "image/jpg"),
+ (".mid", "audio/midi"),
+ (".midi", "audio/midi"),
+ (".pct", "image/pict"),
+ (".pic", "image/pict"),
+ (".pict", "image/pict"),
+ (".rtf", "application/rtf"),
+ (".xul", "text/xul")
+ ]
+
+isOk c = isAscii c && isAlphaNum c
+
+convertTag :: Tag ByteString -> IO (Tag ByteString)
+convertTag t@(TagOpen "img" as) =
+ case fromAttrib "src" t of
+ src | not (B.null src) -> do
+ (raw, mime) <- getRaw t src
+ let enc = "data:" `B.append` mime `B.append` ";base64," `B.append` encode raw
+ return $ TagOpen "img" (("src",enc) : [(x,y) | (x,y) <- as, x /= "src"])
+ _ -> return t
+convertTag t@(TagOpen "script" as) =
+ case fromAttrib "src" t of
+ src | not (B.null src) -> do
+ (raw, mime) <- getRaw t src
+ let enc = "data:" `B.append` mime `B.append` "," `B.append`
+ (fromString $ escapeURIString isOk $ toString raw)
+ return $ TagOpen "script" (("src",enc) : [(x,y) | (x,y) <- as, x /= "src"])
+ _ -> return t
+convertTag t@(TagOpen "link" as) =
+ case fromAttrib "href" t of
+ src | not (B.null src) -> do
+ (raw, mime) <- getRaw t src
+ let enc = "data:" `B.append` mime `B.append` "," `B.append`
+ (fromString $ escapeURIString isOk $ toString raw)
+ return $ TagOpen "link" (("href",enc) : [(x,y) | (x,y) <- as, x /= "href"])
+ _ -> return t
+convertTag t = return t
+
+getRaw :: Tag ByteString -> ByteString -> IO (ByteString, ByteString)
+getRaw t src = do
+ let src' = toString src
+ let ext = map toLower $ takeExtension src'
+ let (ext',decompress) = if ext == ".gz"
+ then (takeExtension $ dropExtension src', B.concat . L.toChunks . Gzip.decompress . L.fromChunks . (:[]))
+ else (ext, id)
+ let mime = case fromAttrib "type" t of
+ x | not (B.null x) -> x
+ _ -> mimeTypeFor ext'
+ raw <- getItem src'
+ return (decompress raw, mime)
+
+offline :: ByteString -> IO ByteString
+offline inp = do
+ let tags = parseTags inp
+ out <- mapM convertTag tags
+ return $ renderTagsOptions renderOptions{ optMinimize = (\t -> t == "br"
+ || t == "img" || t == "meta" || t == "link" ) } out
+