{- Copyright (C) 2006 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -} {- | Module : Text.Pandoc.Writers.HTML Copyright : Copyright (C) 2006 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane Stability : alpha Portability : portable Conversion of 'Pandoc' documents to HTML. -} module Text.Pandoc.Writers.HTML ( writeHtml, stringToSmartHtml, stringToHtml ) where import Text.Pandoc.Definition import Text.Pandoc.Shared import Text.Html ( stringToHtmlString ) import Text.Regex ( mkRegex, matchRegex ) import Numeric ( showHex ) import Data.Char ( ord, toLower ) import Data.List ( isPrefixOf, partition ) -- | Convert Pandoc document to string in HTML format. writeHtml :: WriterOptions -> Pandoc -> String writeHtml options (Pandoc (Meta title authors date) blocks) = let titlePrefix = writerTitlePrefix options in let topTitle = if not (null titlePrefix) then [Str titlePrefix] ++ (if not (null title) then [Str " - "] ++ title else []) else title in let head = if (writerStandalone options) then htmlHeader options (Meta topTitle authors date) else "" titleBlocks = if (writerStandalone options) && (not (null title)) && (not (writerS5 options)) then [RawHtml "

", Plain title, RawHtml "

\n"] else [] foot = if (writerStandalone options) then "\n\n" else "" blocks' = replaceReferenceLinks (titleBlocks ++ blocks) (noteBlocks, blocks'') = partition isNoteBlock blocks' body = (writerIncludeBefore options) ++ concatMap (blockToHtml options) blocks'' ++ footnoteSection options noteBlocks ++ (writerIncludeAfter options) in head ++ body ++ foot -- | Convert list of Note blocks to a footnote
. -- Assumes notes are sorted. footnoteSection :: WriterOptions -> [Block] -> String footnoteSection options notes = if null notes then "" else "
\n
\n
    \n" ++ concatMap (blockToHtml options) notes ++ "
\n
\n" -- | Obfuscate a "mailto:" link using Javascript. obfuscateLink :: WriterOptions -> [Inline] -> String -> String obfuscateLink options text src = let emailRegex = mkRegex "mailto:*([^@]*)@(.*)" text' = inlineListToHtml options text src' = map toLower src in case (matchRegex emailRegex src') of (Just [name, domain]) -> let domain' = gsub "\\." " dot " domain at' = obfuscateChar '@' in let linkText = if src' == ("mailto:" ++ text') then "e" else "'" ++ text' ++ "'" altText = if src' == ("mailto:" ++ text') then name ++ " at " ++ domain' else text' ++ " (" ++ name ++ " at " ++ domain' ++ ")" in if writerStrictMarkdown options then "" ++ obfuscateString text' ++ "" else "" _ -> "" ++ text' ++ "" -- malformed email -- | Obfuscate character as entity. obfuscateChar :: Char -> String obfuscateChar char = let num = ord char in let numstr = if even num then (show num) else ("x" ++ (showHex num "")) in "&#" ++ numstr ++ ";" -- | Obfuscate string using entities. obfuscateString :: String -> String obfuscateString = concatMap obfuscateChar -- | Escape string, preserving character entities and quote. stringToHtml :: String -> String stringToHtml str = escapePreservingRegex stringToHtmlString (mkRegex "\"|(&[[:alnum:]]*;)") str -- | Escape string as in 'stringToHtml' but add smart typography filter. stringToSmartHtml :: String -> String stringToSmartHtml = let escapeDoubleQuotes = gsub "(\"|")" "”" . -- rest are right quotes gsub "(\"|")(&r[sd]quo;)" "”\\2" . -- never left quo before right quo gsub "(&l[sd]quo;)(\"|")" "\\2“" . -- never right quo after left quo gsub "([ \t])(\"|")" "\\1“" . -- never right quo after space gsub "(\"|")([^,.;:!?^) \t-])" "“\\2" . -- "word left gsub "(\"|")('|`|‘)" "”’" . -- right if it got through last filter gsub "(\"|")('|`|‘)([^,.;:!?^) \t-])" "“‘\\3" . -- "'word left gsub "``" "“" . gsub "''" "”" escapeSingleQuotes = gsub "'" "’" . -- otherwise right gsub "'(&r[sd]quo;)" "’\\1" . -- never left quo before right quo gsub "(&l[sd]quo;)'" "\\1‘" . -- never right quo after left quo gsub "([ \t])'" "\\1‘" . -- never right quo after space gsub "`" "‘" . -- ` is left gsub "([^,.;:!?^) \t-])'" "\\1’" . -- word' right gsub "^('|`)([^,.;:!?^) \t-])" "‘\\2" . -- 'word left gsub "('|`)(\"|"|“|``)" "‘“" . -- '"word left gsub "([^,.;:!?^) \t-])'(s|S)" "\\1’\\2" . -- possessive gsub "([[:space:]])'([^,.;:!?^) \t-])" "\\1‘\\2" . -- 'word left gsub "'([0-9][0-9](s|S))" "’\\1" -- '80s - decade abbrevs. escapeDashes = gsub " ?-- ?" "—" . gsub " ?--- ?" "—" . gsub "([0-9])--?([0-9])" "\\1–\\2" escapeEllipses = gsub "\\.\\.\\.|\\. \\. \\." "…" in escapeSingleQuotes . escapeDoubleQuotes . escapeDashes . escapeEllipses . stringToHtml -- | Escape code string as needed for HTML. codeStringToHtml :: String -> String codeStringToHtml [] = [] codeStringToHtml (x:xs) = case x of '&' -> "&" ++ codeStringToHtml xs '<' -> "<" ++ codeStringToHtml xs _ -> x:(codeStringToHtml xs) -- | Escape string to HTML appropriate for attributes attributeStringToHtml :: String -> String attributeStringToHtml = gsub "\"" """ -- | Returns an HTML header with appropriate bibliographic information. htmlHeader :: WriterOptions -> Meta -> String htmlHeader options (Meta title authors date) = let titletext = "" ++ (inlineListToHtml options title) ++ "\n" authortext = if (null authors) then "" else "\n" datetext = if (date == "") then "" else "\n" in (writerHeader options) ++ authortext ++ datetext ++ titletext ++ "\n\n" -- | Convert Pandoc block element to HTML. blockToHtml :: WriterOptions -> Block -> String blockToHtml options Blank = "\n" blockToHtml options Null = "" blockToHtml options (Plain lst) = inlineListToHtml options lst blockToHtml options (Para lst) = "

" ++ (inlineListToHtml options lst) ++ "

\n" blockToHtml options (BlockQuote blocks) = if (writerS5 options) then -- in S5, treat list in blockquote specially -- if default is incremental, make it nonincremental; -- otherwise incremental let inc = not (writerIncremental options) in case blocks of [BulletList lst] -> blockToHtml (options {writerIncremental = inc}) (BulletList lst) [OrderedList lst] -> blockToHtml (options {writerIncremental = inc}) (OrderedList lst) otherwise -> "
\n" ++ (concatMap (blockToHtml options) blocks) ++ "
\n" else "
\n" ++ (concatMap (blockToHtml options) blocks) ++ "
\n" blockToHtml options (Note ref lst) = let contents = (concatMap (blockToHtml options) lst) in "
  • " ++ contents ++ "
  • \n" blockToHtml options (Key _ _) = "" blockToHtml options (CodeBlock str) = "
    " ++ (codeStringToHtml str) ++ "\n
    \n" blockToHtml options (RawHtml str) = str blockToHtml options (BulletList lst) = let attribs = if (writerIncremental options) then " class=\"incremental\"" else "" in "\n" ++ (concatMap (listItemToHtml options) lst) ++ "\n" blockToHtml options (OrderedList lst) = let attribs = if (writerIncremental options) then " class=\"incremental\"" else "" in "\n" ++ (concatMap (listItemToHtml options) lst) ++ "\n" blockToHtml options HorizontalRule = "
    \n" blockToHtml options (Header level lst) = let contents = inlineListToHtml options lst in if ((level > 0) && (level <= 6)) then "" ++ contents ++ "\n" else "

    " ++ contents ++ "

    \n" listItemToHtml options list = "
  • " ++ (concatMap (blockToHtml options) list) ++ "
  • \n" -- | Convert list of Pandoc inline elements to HTML. inlineListToHtml :: WriterOptions -> [Inline] -> String inlineListToHtml options lst = -- consolidate adjacent Str and Space elements for more intelligent -- smart typography filtering let lst' = consolidateList lst in concatMap (inlineToHtml options) lst' -- | Convert Pandoc inline element to HTML. inlineToHtml :: WriterOptions -> Inline -> String inlineToHtml options (Emph lst) = "" ++ (inlineListToHtml options lst) ++ "" inlineToHtml options (Strong lst) = "" ++ (inlineListToHtml options lst) ++ "" inlineToHtml options (Code str) = "" ++ (codeStringToHtml str) ++ "" inlineToHtml options (Str str) = if (writerSmart options) then stringToSmartHtml str else stringToHtml str inlineToHtml options (TeX str) = (codeStringToHtml str) inlineToHtml options (HtmlInline str) = str inlineToHtml options (LineBreak) = "
    \n" inlineToHtml options Space = " " inlineToHtml options (Link text (Src src tit)) = let title = attributeStringToHtml tit in if (isPrefixOf "mailto:" src) then obfuscateLink options text src else "" else ">") ++ (inlineListToHtml options text) ++ "" inlineToHtml options (Link text (Ref ref)) = "[" ++ (inlineListToHtml options text) ++ "][" ++ (inlineListToHtml options ref) ++ "]" -- this is what markdown does, for better or worse inlineToHtml options (Image alt (Src source tit)) = let title = attributeStringToHtml tit alternate = inlineListToHtml options alt in "\""" inlineToHtml options (Image alternate (Ref ref)) = "![" ++ (inlineListToHtml options alternate) ++ "][" ++ (inlineListToHtml options ref) ++ "]" inlineToHtml options (NoteRef ref) = "" ++ ref ++ ""