{-# OPTIONS_GHC -fno-warn-deprecations #-} {- Copyright (C) 2006-8 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -} {- | Module : Text.Pandoc.Writers.HTML Copyright : Copyright (C) 2006-8 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane Stability : alpha Portability : portable Conversion of 'Pandoc' documents to HTML. -} module Text.Pandoc.Writers.HTML ( writeHtml , writeHtmlString ) where import Text.Pandoc.Definition import Text.Pandoc.CharacterReferences ( decodeCharacterReferences ) import Text.Pandoc.Shared import Text.Pandoc.Templates import Text.Pandoc.Readers.TeXMath import Text.Pandoc.Highlighting ( highlightHtml ) import Text.Pandoc.XML (stripTags, escapeStringForXML) import Numeric ( showHex ) import Data.Char ( ord, toLower ) import Data.List ( isPrefixOf, intersperse ) import Data.Maybe ( catMaybes ) import Control.Monad.State import Text.XHtml.Transitional hiding ( stringToHtml ) data WriterState = WriterState { stNotes :: [Html] -- ^ List of notes , stMath :: Bool -- ^ Math is used in document , stHighlighting :: Bool -- ^ Syntax highlighting is used , stSecNum :: [Int] -- ^ Number of current section } deriving Show defaultWriterState :: WriterState defaultWriterState = WriterState {stNotes= [], stMath = False, stHighlighting = False, stSecNum = []} -- Helpers to render HTML with the appropriate function. renderFragment :: (HTML html) => WriterOptions -> html -> String renderFragment opts = if writerWrapText opts then renderHtmlFragment else showHtmlFragment -- | Modified version of Text.XHtml's stringToHtml. -- Use unicode characters wherever possible. stringToHtml :: String -> Html stringToHtml = primHtml . escapeStringForXML -- | Convert Pandoc document to Html string. writeHtmlString :: WriterOptions -> Pandoc -> String writeHtmlString opts d = let (tit, auths, date, toc, body', newvars) = evalState (pandocToHtml opts d) defaultWriterState in if writerStandalone opts then inTemplate opts tit auths date toc body' newvars else renderFragment opts body' -- | Convert Pandoc document to Html structure. writeHtml :: WriterOptions -> Pandoc -> Html writeHtml opts d = let (tit, auths, date, toc, body', newvars) = evalState (pandocToHtml opts d) defaultWriterState in if writerStandalone opts then inTemplate opts tit auths date toc body' newvars else body' -- result is (title, authors, date, toc, body, new variables) pandocToHtml :: WriterOptions -> Pandoc -> State WriterState (Html, [Html], Html, Html, Html, [(String,String)]) pandocToHtml opts (Pandoc (Meta title' authors' date') blocks) = do let standalone = writerStandalone opts tit <- if standalone then inlineListToHtml opts title' else return noHtml auths <- if standalone then mapM (inlineListToHtml opts) authors' else return [] date <- if standalone then inlineListToHtml opts date' else return noHtml let sects = hierarchicalize blocks toc <- if writerTableOfContents opts then tableOfContents opts sects else return noHtml blocks' <- liftM toHtmlFromList $ mapM (elementToHtml opts) sects st <- get let notes = reverse (stNotes st) let before = primHtml $ writerIncludeBefore opts let after = primHtml $ writerIncludeAfter opts let thebody = before +++ blocks' +++ footnoteSection notes +++ after let math = if stMath st then case writerHTMLMathMethod opts of LaTeXMathML (Just url) -> script ! [src url, thetype "text/javascript"] $ noHtml JsMath (Just url) -> script ! [src url, thetype "text/javascript"] $ noHtml _ -> case lookup "latexmathml-script" (writerVariables opts) of Just s -> script ! [thetype "text/javascript"] << primHtml s Nothing -> noHtml else noHtml let newvars = [("highlighting","yes") | stHighlighting st] ++ [("math", renderHtmlFragment math) | stMath st] return (tit, auths, date, toc, thebody, newvars) inTemplate :: TemplateTarget a => WriterOptions -> Html -> [Html] -> Html -> Html -> Html -> [(String,String)] -> a inTemplate opts tit auths date toc body' newvars = let renderedTit = showHtmlFragment tit topTitle' = stripTags renderedTit authors = map (stripTags . showHtmlFragment) auths date' = stripTags $ showHtmlFragment date variables = writerVariables opts ++ newvars context = variables ++ [ ("body", renderHtmlFragment body') , ("pagetitle", topTitle') , ("toc", renderHtmlFragment toc) , ("title", renderHtmlFragment tit) , ("date", date') ] ++ [ ("author", a) | a <- authors ] in renderTemplate context $ writerTemplate opts -- | Like Text.XHtml's identifier, but adds the writerIdentifierPrefix prefixedId :: WriterOptions -> String -> HtmlAttr prefixedId opts s = identifier $ writerIdentifierPrefix opts ++ s -- | Construct table of contents from list of elements. tableOfContents :: WriterOptions -> [Element] -> State WriterState Html tableOfContents _ [] = return noHtml tableOfContents opts sects = do let opts' = opts { writerIgnoreNotes = True } contents <- mapM (elementToListItem opts') sects let tocList = catMaybes contents return $ thediv ! [prefixedId opts' "TOC"] $ if null tocList then noHtml else unordList tocList -- | Convert section number to string showSecNum :: [Int] -> String showSecNum = concat . intersperse "." . map show -- | Converts an Element to a list item for a table of contents, -- retrieving the appropriate identifier from state. elementToListItem :: WriterOptions -> Element -> State WriterState (Maybe Html) elementToListItem _ (Blk _) = return Nothing elementToListItem opts (Sec _ num id' headerText subsecs) = do let sectnum = if writerNumberSections opts then (thespan ! [theclass "toc-section-number"] << showSecNum num) +++ stringToHtml " " else noHtml txt <- liftM (sectnum +++) $ inlineListToHtml opts headerText subHeads <- mapM (elementToListItem opts) subsecs >>= return . catMaybes let subList = if null subHeads then noHtml else unordList subHeads return $ Just $ (anchor ! [href ("#" ++ writerIdentifierPrefix opts ++ id')] $ txt) +++ subList -- | Convert an Element to Html. elementToHtml :: WriterOptions -> Element -> State WriterState Html elementToHtml opts (Blk block) = blockToHtml opts block elementToHtml opts (Sec level num id' title' elements) = do innerContents <- mapM (elementToHtml opts) elements modify $ \st -> st{stSecNum = num} -- update section number header' <- blockToHtml opts (Header level title') return $ if writerS5 opts || (writerStrictMarkdown opts && not (writerTableOfContents opts)) -- S5 gets confused by the extra divs around sections then toHtmlFromList (header' : innerContents) else thediv ! [prefixedId opts id'] << (header' : innerContents) -- | Convert list of Note blocks to a footnote
. -- Assumes notes are sorted. footnoteSection :: [Html] -> Html footnoteSection notes = if null notes then noHtml else thediv ! [theclass "footnotes"] $ hr +++ (olist << notes) -- | Parse a mailto link; return Just (name, domain) or Nothing. parseMailto :: String -> Maybe (String, String) parseMailto ('m':'a':'i':'l':'t':'o':':':addr) = let (name', rest) = span (/='@') addr domain = drop 1 rest in Just (name', domain) parseMailto _ = Nothing -- | Obfuscate a "mailto:" link. obfuscateLink :: WriterOptions -> String -> String -> Html obfuscateLink opts txt s | writerEmailObfuscation opts == NoObfuscation = anchor ! [href s] << txt obfuscateLink opts txt s = let meth = writerEmailObfuscation opts s' = map toLower s in case parseMailto s' of (Just (name', domain)) -> let domain' = substitute "." " dot " domain at' = obfuscateChar '@' (linkText, altText) = if txt == drop 7 s' -- autolink then ("''+e+''", name' ++ " at " ++ domain') else ("'" ++ txt ++ "'", txt ++ " (" ++ name' ++ " at " ++ domain' ++ ")") in case meth of ReferenceObfuscation -> -- need to use primHtml or &'s are escaped to & in URL primHtml $ "" ++ (obfuscateString txt) ++ "" JavascriptObfuscation -> (script ! [thetype "text/javascript"] $ primHtml ("\n\n")) +++ noscript (primHtml $ obfuscateString altText) _ -> error $ "Unknown obfuscation method: " ++ show meth _ -> anchor ! [href s] $ stringToHtml txt -- malformed email -- | Obfuscate character as entity. obfuscateChar :: Char -> String obfuscateChar char = let num = ord char numstr = if even num then show num else "x" ++ showHex num "" in "&#" ++ numstr ++ ";" -- | Obfuscate string using entities. obfuscateString :: String -> String obfuscateString = concatMap obfuscateChar . decodeCharacterReferences -- | Convert Pandoc block element to HTML. blockToHtml :: WriterOptions -> Block -> State WriterState Html blockToHtml _ Null = return $ noHtml blockToHtml opts (Plain lst) = inlineListToHtml opts lst blockToHtml opts (Para lst) = inlineListToHtml opts lst >>= (return . paragraph) blockToHtml _ (RawHtml str) = return $ primHtml str blockToHtml _ (HorizontalRule) = return $ hr blockToHtml opts (CodeBlock (id',classes,keyvals) rawCode) = do let classes' = if writerLiterateHaskell opts then classes else filter (/= "literate") classes case highlightHtml (id',classes',keyvals) rawCode of Left _ -> -- change leading newlines into
tags, because some -- browsers ignore leading newlines in pre blocks let (leadingBreaks, rawCode') = span (=='\n') rawCode attrs = [theclass (unwords classes') | not (null classes')] ++ [prefixedId opts id' | not (null id')] ++ map (\(x,y) -> strAttr x y) keyvals in return $ pre ! attrs $ thecode << (replicate (length leadingBreaks) br +++ [stringToHtml $ rawCode' ++ "\n"]) Right h -> modify (\st -> st{ stHighlighting = True }) >> return h blockToHtml opts (BlockQuote blocks) = -- in S5, treat list in blockquote specially -- if default is incremental, make it nonincremental; -- otherwise incremental if writerS5 opts then let inc = not (writerIncremental opts) in case blocks of [BulletList lst] -> blockToHtml (opts {writerIncremental = inc}) (BulletList lst) [OrderedList attribs lst] -> blockToHtml (opts {writerIncremental = inc}) (OrderedList attribs lst) _ -> blockListToHtml opts blocks >>= (return . blockquote) else blockListToHtml opts blocks >>= (return . blockquote) blockToHtml opts (Header level lst) = do contents <- inlineListToHtml opts lst secnum <- liftM stSecNum get let contents' = if writerNumberSections opts then (thespan ! [theclass "header-section-number"] << showSecNum secnum) +++ stringToHtml " " +++ contents else contents let contents'' = if writerTableOfContents opts then anchor ! [href $ "#" ++ writerIdentifierPrefix opts ++ "TOC"] $ contents' else contents' return $ case level of 1 -> h1 contents'' 2 -> h2 contents'' 3 -> h3 contents'' 4 -> h4 contents'' 5 -> h5 contents'' 6 -> h6 contents'' _ -> paragraph contents'' blockToHtml opts (BulletList lst) = do contents <- mapM (blockListToHtml opts) lst let attribs = if writerIncremental opts then [theclass "incremental"] else [] return $ unordList ! attribs $ contents blockToHtml opts (OrderedList (startnum, numstyle, _) lst) = do contents <- mapM (blockListToHtml opts) lst let numstyle' = camelCaseToHyphenated $ show numstyle let attribs = (if writerIncremental opts then [theclass "incremental"] else []) ++ (if startnum /= 1 then [start startnum] else []) ++ (if numstyle /= DefaultStyle then [thestyle $ "list-style-type: " ++ numstyle' ++ ";"] else []) return $ ordList ! attribs $ contents blockToHtml opts (DefinitionList lst) = do contents <- mapM (\(term, defs) -> do term' <- liftM (dterm <<) $ inlineListToHtml opts term defs' <- mapM (liftM (ddef <<) . blockListToHtml opts) defs return $ term' : defs') lst let attribs = if writerIncremental opts then [theclass "incremental"] else [] return $ dlist ! attribs << concat contents blockToHtml opts (Table capt aligns widths headers rows') = do let alignStrings = map alignmentToString aligns captionDoc <- if null capt then return noHtml else inlineListToHtml opts capt >>= return . caption let zeros = replicate (length headers) (0.0 :: Double) let (rownums, rows'') = if all null headers then ([1..], rows') else ([0..], (headers : rows')) body' <- mapM (tableRowToHtml opts alignStrings) $ zip3 rownums (widths : repeat zeros) rows'' return $ table $ captionDoc +++ body' tableRowToHtml :: WriterOptions -> [String] -> (Int, [Double], [[Block]]) -> State WriterState Html tableRowToHtml opts alignStrings (rownum, widths, cols') = do let mkcell = if rownum == 0 then th else td let rowclass = case rownum of 0 -> "header" x | x `rem` 2 == 1 -> "odd" _ -> "even" cols'' <- sequence $ zipWith3 (\alignment columnwidth item -> tableItemToHtml opts mkcell alignment columnwidth item) alignStrings widths cols' return $ tr ! [theclass rowclass] $ toHtmlFromList cols'' alignmentToString :: Alignment -> [Char] alignmentToString alignment = case alignment of AlignLeft -> "left" AlignRight -> "right" AlignCenter -> "center" AlignDefault -> "left" tableItemToHtml :: WriterOptions -> (Html -> Html) -> [Char] -> Double -> [Block] -> State WriterState Html tableItemToHtml opts tag' align' width' item = do contents <- blockListToHtml opts item let attrib = [align align'] ++ if width' /= 0 then [thestyle ("width: " ++ (show (truncate (100 * width') :: Integer)) ++ "%;")] else [] return $ tag' ! attrib $ contents blockListToHtml :: WriterOptions -> [Block] -> State WriterState Html blockListToHtml opts lst = mapM (blockToHtml opts) lst >>= return . toHtmlFromList -- | Convert list of Pandoc inline elements to HTML. inlineListToHtml :: WriterOptions -> [Inline] -> State WriterState Html inlineListToHtml opts lst = mapM (inlineToHtml opts) lst >>= return . toHtmlFromList -- | Convert Pandoc inline element to HTML. inlineToHtml :: WriterOptions -> Inline -> State WriterState Html inlineToHtml opts inline = case inline of (Str str) -> return $ stringToHtml str (Space) -> return $ stringToHtml " " (LineBreak) -> return br (EmDash) -> return $ stringToHtml "—" (EnDash) -> return $ stringToHtml "–" (Ellipses) -> return $ stringToHtml "…" (Apostrophe) -> return $ stringToHtml "’" (Emph lst) -> inlineListToHtml opts lst >>= return . emphasize (Strong lst) -> inlineListToHtml opts lst >>= return . strong (Code str) -> return $ thecode << str (Strikeout lst) -> inlineListToHtml opts lst >>= return . (thespan ! [thestyle "text-decoration: line-through;"]) (SmallCaps lst) -> inlineListToHtml opts lst >>= return . (thespan ! [thestyle "font-variant: small-caps;"]) (Superscript lst) -> inlineListToHtml opts lst >>= return . sup (Subscript lst) -> inlineListToHtml opts lst >>= return . sub (Quoted quoteType lst) -> let (leftQuote, rightQuote) = case quoteType of SingleQuote -> (stringToHtml "‘", stringToHtml "’") DoubleQuote -> (stringToHtml "“", stringToHtml "”") in do contents <- inlineListToHtml opts lst return $ leftQuote +++ contents +++ rightQuote (Math t str) -> modify (\st -> st {stMath = True}) >> (case writerHTMLMathMethod opts of LaTeXMathML _ -> -- putting LaTeXMathML in container with class "LaTeX" prevents -- non-math elements on the page from being treated as math by -- the javascript return $ thespan ! [theclass "LaTeX"] $ if t == InlineMath then primHtml ("$" ++ str ++ "$") else primHtml ("$$" ++ str ++ "$$") JsMath _ -> return $ if t == InlineMath then thespan ! [theclass "math"] $ primHtml str else thediv ! [theclass "math"] $ primHtml str MimeTeX url -> return $ image ! [src (url ++ "?" ++ str), alt str, title str] GladTeX -> return $ primHtml $ "" ++ str ++ "" PlainMath -> inlineListToHtml opts (readTeXMath str) >>= return . (thespan ! [theclass "math"]) ) (TeX str) -> case writerHTMLMathMethod opts of LaTeXMathML _ -> do modify (\st -> st {stMath = True}) return $ primHtml str _ -> return noHtml (HtmlInline str) -> return $ primHtml str (Link [Code str] (s,_)) | "mailto:" `isPrefixOf` s -> return $ obfuscateLink opts str s (Link txt (s,_)) | "mailto:" `isPrefixOf` s -> do linkText <- inlineListToHtml opts txt return $ obfuscateLink opts (show linkText) s (Link txt (s,tit)) -> do linkText <- inlineListToHtml opts txt return $ anchor ! ([href s] ++ if null tit then [] else [title tit]) $ linkText (Image txt (s,tit)) -> do alternate <- inlineListToHtml opts txt let alternate' = renderFragment opts alternate let attributes = [src s] ++ (if null tit then [] else [title tit]) ++ if null txt then [] else [alt alternate'] return $ image ! attributes -- note: null title included, as in Markdown.pl (Note contents) -> do st <- get let notes = stNotes st let number = (length notes) + 1 let ref = show number htmlContents <- blockListToNote opts ref contents -- push contents onto front of notes put $ st {stNotes = (htmlContents:notes)} return $ sup << anchor ! [href ("#" ++ writerIdentifierPrefix opts ++ "fn" ++ ref), theclass "footnoteRef", prefixedId opts ("fnref" ++ ref)] << ref (Cite _ il) -> inlineListToHtml opts il blockListToNote :: WriterOptions -> String -> [Block] -> State WriterState Html blockListToNote opts ref blocks = -- If last block is Para or Plain, include the backlink at the end of -- that block. Otherwise, insert a new Plain block with the backlink. let backlink = [HtmlInline $ " "] blocks' = if null blocks then [] else let lastBlock = last blocks otherBlocks = init blocks in case lastBlock of (Para lst) -> otherBlocks ++ [Para (lst ++ backlink)] (Plain lst) -> otherBlocks ++ [Plain (lst ++ backlink)] _ -> otherBlocks ++ [lastBlock, Plain backlink] in do contents <- blockListToHtml opts blocks' return $ li ! [prefixedId opts ("fn" ++ ref)] $ contents