aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Writers/HTML.hs
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc/Writers/HTML.hs')
-rw-r--r--src/Text/Pandoc/Writers/HTML.hs197
1 files changed, 197 insertions, 0 deletions
diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
new file mode 100644
index 000000000..9eecf2761
--- /dev/null
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -0,0 +1,197 @@
+-- | Converts Pandoc to HTML.
+module Text.Pandoc.Writers.HTML (
+ writeHtml
+ ) where
+import Text.Pandoc.Definition
+import Text.Pandoc.Shared
+import Text.Html ( stringToHtmlString )
+import Text.Regex ( mkRegex )
+import Numeric ( showHex )
+import Char ( ord )
+import List ( isPrefixOf )
+
+-- | Convert Pandoc document to string in HTML format.
+writeHtml :: WriterOptions -> Pandoc -> String
+writeHtml options (Pandoc (Meta title authors date) blocks) =
+ let titlePrefix = writerTitlePrefix options in
+ let topTitle = if not (null titlePrefix) then
+ [Str titlePrefix] ++ (if not (null title) then [Str " - "] ++ title else [])
+ else
+ title in
+ let head = if (writerStandalone options) then
+ htmlHeader options (Meta topTitle authors date)
+ else
+ ""
+ titleBlocks = if (writerStandalone options) && (not (null title)) &&
+ (not (writerS5 options)) then
+ [RawHtml "<h1 class=\"title\">", Plain title, RawHtml "</h1>\n"]
+ else
+ []
+ foot = if (writerStandalone options) then "</body>\n</html>\n" else ""
+ body = (writerIncludeBefore options) ++
+ concatMap (blockToHtml options) (replaceReferenceLinks (titleBlocks ++ blocks)) ++
+ (writerIncludeAfter options) in
+ head ++ body ++ foot
+
+-- | Obfuscate a "mailto:" link using Javascript.
+obfuscateLink :: WriterOptions -> [Inline] -> String -> String
+obfuscateLink options text src =
+ let text' = inlineListToHtml options text in
+ let linkText = if src == ("mailto:" ++ text') then "e" else "'" ++ text' ++ "'"
+ altText = if src == ("mailto:" ++ text') then "\\1 [at] \\2" else text' ++ " (\\1 [at] \\2)" in
+ gsub "mailto:([^@]*)@(.*)" ("<script type=\"text/javascript\">h='\\2';n='\\1';e=n+'@'+h;document.write('<a href=\"mailto:'+e+'\">'+" ++ linkText ++ "+'<\\/a>');</script><noscript>" ++ altText ++ "</noscript>") src
+
+-- | Obfuscate character as entity.
+obfuscateChar :: Char -> String
+obfuscateChar char = let num = ord char in
+ let numstr = if even num then (show num) else ("x" ++ (showHex num "")) in
+ "&#" ++ numstr ++ ";"
+
+-- | Escape string, preserving character entities and quote.
+stringToHtml :: String -> String
+stringToHtml str = escapePreservingRegex stringToHtmlString (mkRegex "\"|(&[[:alnum:]]*;)") str
+
+-- | Escape string as in 'stringToHtml' but add smartypants filter.
+stringToSmartHtml :: String -> String
+stringToSmartHtml =
+ let escapeDoubleQuotes =
+ gsub "(\"|&quot;|'')" "&rdquo;" . -- rest are right quotes
+ gsub "([[:space:]])(\"|&quot;)" "\\1&ldquo;" . -- never right quo after space
+ gsub "(\"|&quot;|``)('|`|&lsquo;)([^[:punct:][:space:]])" "&ldquo;&lsquo;\\3" . -- "'word left
+ gsub "(\"|&quot;|``)([^[:punct:][:space:]])" "&ldquo;\\2" -- "word left
+ escapeSingleQuotes =
+ gsub "'" "&rsquo;" . -- otherwise right
+ gsub "([[:space:]])'" "\\1&lsquo;" . -- never right quo after space
+ gsub "`" "&lsquo;" . -- ` is left
+ gsub "([^[:punct:][:space:]])'" "\\1&rsquo;" . -- word' right
+ gsub "('|`)(\"|&quot;|&ldquo;|``)" "&lsquo;&ldquo;" . -- '"word left
+ gsub "^('|`)([^[:punct:][:space:]])" "&lsquo;\\2" . -- 'word left
+ gsub "([^[:punct:][:space:]])'(s|S)" "\\1&rsquo;\\2" . -- possessive
+ gsub "([[:space:]])'([^[:punct:][:space:]])" "\\1&lsquo;\\2" . -- 'word left
+ gsub "'([0-9][0-9](s|S))" "&rsquo;\\1" -- '80s - decade abbrevs.
+ escapeDashes = gsub " ?-- ?" "&mdash;" .
+ gsub " ?--- ?" "&mdash;" .
+ gsub "([0-9])--?([0-9])" "\\1&ndash;\\2"
+ escapeEllipses = gsub "\\.\\.\\.|\\. \\. \\." "&hellip;" in
+ escapeSingleQuotes . escapeDoubleQuotes . escapeDashes . escapeEllipses . stringToHtml
+
+-- | Escape code string as needed for HTML.
+codeStringToHtml :: String -> String
+codeStringToHtml [] = []
+codeStringToHtml (x:xs) = case x of
+ '&' -> "&amp;" ++ codeStringToHtml xs
+ '<' -> "&lt;" ++ codeStringToHtml xs
+ _ -> x:(codeStringToHtml xs)
+
+-- | Escape string to HTML appropriate for attributes
+attributeStringToHtml :: String -> String
+attributeStringToHtml = gsub "\"" "&quot;"
+
+-- | Returns an HTML header with appropriate bibliographic information.
+htmlHeader :: WriterOptions -> Meta -> String
+htmlHeader options (Meta title authors date) =
+ let titletext = if (null title) then
+ ""
+ else
+ "<title>" ++ (inlineListToHtml options title) ++ "</title>\n"
+ authortext = if (null authors) then
+ ""
+ else
+ "<meta name=\"author\" content=\"" ++
+ (joinWithSep ", " (map stringToHtml authors)) ++ "\" />\n"
+ datetext = if (date == "") then
+ ""
+ else
+ "<meta name=\"date\" content=\"" ++ (stringToHtml date) ++ "\" />\n" in
+ (writerHeader options) ++ authortext ++ datetext ++ titletext ++ "</head>\n<body>\n"
+
+-- | Convert Pandoc block element to HTML.
+blockToHtml :: WriterOptions -> Block -> String
+blockToHtml options Blank = "\n"
+blockToHtml options Null = ""
+blockToHtml options (Plain lst) = inlineListToHtml options lst
+blockToHtml options (Para lst) = "<p>" ++ (inlineListToHtml options lst) ++ "</p>\n"
+blockToHtml options (BlockQuote blocks) =
+ if (writerS5 options) then -- in S5, treat list in blockquote specially
+ -- if default is incremental, make it nonincremental; otherwise incremental
+ let inc = not (writerIncremental options) in
+ case blocks of
+ [BulletList lst] -> blockToHtml (options {writerIncremental = inc}) (BulletList lst)
+ [OrderedList lst] -> blockToHtml (options {writerIncremental = inc}) (OrderedList lst)
+ otherwise -> "<blockquote>\n" ++ (concatMap (blockToHtml options) blocks) ++
+ "</blockquote>\n"
+ else
+ "<blockquote>\n" ++ (concatMap (blockToHtml options) blocks) ++ "</blockquote>\n"
+blockToHtml options (Note ref lst) =
+ let marker = "<span class=\"pandocNoteMarker\"><a name=\"note_" ++ ref ++
+ "\" href=\"#ref_" ++ ref ++ "\">(" ++ ref ++ ")</a></span> " in
+ let contents = (concatMap (blockToHtml options) lst) in
+ let contents' = case contents of
+ ('<':'p':'>':rest) -> "<p class=\"first\">" ++ marker ++ rest ++ "\n"
+ otherwise -> marker ++ contents ++ "\n" in
+ "<div class=\"pandocNote\">\n" ++ contents' ++ "</div>\n"
+blockToHtml options (Key _ _) = ""
+blockToHtml options (CodeBlock str) = "<pre><code>" ++ (codeStringToHtml str) ++
+ "</code></pre>\n"
+blockToHtml options (RawHtml str) = str
+blockToHtml options (BulletList lst) =
+ let attribs = if (writerIncremental options) then " class=\"incremental\"" else "" in
+ "<ul" ++ attribs ++ ">\n" ++ (concatMap (listItemToHtml options) lst) ++ "</ul>\n"
+blockToHtml options (OrderedList lst) =
+ let attribs = if (writerIncremental options) then " class=\"incremental\"" else "" in
+ "<ol" ++ attribs ++ ">\n" ++ (concatMap (listItemToHtml options) lst) ++ "</ol>\n"
+blockToHtml options HorizontalRule = "<hr />\n"
+blockToHtml options (Header level lst) = if ((level > 0) && (level <= 6)) then
+ "<h" ++ (show level) ++ ">" ++
+ (inlineListToHtml options lst) ++
+ "</h" ++ (show level) ++ ">\n"
+ else
+ "<p>" ++ (inlineListToHtml options lst) ++ "</p>\n"
+listItemToHtml options list = "<li>" ++ (concatMap (blockToHtml options) list) ++ "</li>\n"
+
+-- | Convert list of Pandoc inline elements to HTML.
+inlineListToHtml :: WriterOptions -> [Inline] -> String
+inlineListToHtml options lst =
+ -- consolidate adjacent Str and Space elements for more intelligent
+ -- smartypants filtering
+ let lst' = consolidateList lst in
+ concatMap (inlineToHtml options) lst'
+
+-- | Convert Pandoc inline element to HTML.
+inlineToHtml :: WriterOptions -> Inline -> String
+inlineToHtml options (Emph lst) = "<em>" ++ (inlineListToHtml options lst) ++ "</em>"
+inlineToHtml options (Strong lst) = "<strong>" ++ (inlineListToHtml options lst) ++ "</strong>"
+inlineToHtml options (Code str) = "<code>" ++ (codeStringToHtml str) ++ "</code>"
+inlineToHtml options (Str str) = if (writerSmartypants options) then
+ stringToSmartHtml str
+ else
+ stringToHtml str
+inlineToHtml options (TeX str) = (codeStringToHtml str)
+inlineToHtml options (HtmlInline str) = str
+inlineToHtml options (LineBreak) = "<br />\n"
+inlineToHtml options Space = " "
+inlineToHtml options (Link text (Src src tit)) =
+ let title = attributeStringToHtml tit in
+ if (isPrefixOf "mailto:" src) then
+ obfuscateLink options text src
+ else
+ "<a href=\"" ++ (codeStringToHtml src) ++ "\"" ++
+ (if tit /= "" then " title=\"" ++ title ++ "\">" else ">") ++
+ (inlineListToHtml options text) ++ "</a>"
+inlineToHtml options (Link text (Ref [])) = "[" ++ (inlineListToHtml options text) ++ "]"
+inlineToHtml options (Link text (Ref ref)) = "[" ++ (inlineListToHtml options text) ++ "][" ++
+ (inlineListToHtml options ref) ++ "]" -- this is what markdown does, for better or worse
+inlineToHtml options (Image alt (Src source tit)) =
+ let title = attributeStringToHtml tit
+ alternate = inlineListToHtml options alt in
+ "<img src=\"" ++ source ++ "\"" ++
+ (if tit /= "" then " title=\"" ++ title ++ "\"" else "") ++
+ (if alternate /= "" then " alt=\"" ++ alternate ++ "\"" else "") ++ ">"
+inlineToHtml options (Image alternate (Ref [])) =
+ "![" ++ (inlineListToHtml options alternate) ++ "]"
+inlineToHtml options (Image alternate (Ref ref)) =
+ "![" ++ (inlineListToHtml options alternate) ++ "][" ++ (inlineListToHtml options ref) ++ "]"
+inlineToHtml options (NoteRef ref) =
+ "<span class=\"pandocNoteRef\"><a name=\"ref_" ++ ref ++ "\" href=\"#note_" ++ ref ++
+ "\">(" ++ ref ++ ")</a></span>"
+