1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
|
{-
Copyright (C) 2006 John MacFarlane <jgm at berkeley dot edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-}
{- |
Module : Text.Pandoc.Writers.HTML
Copyright : Copyright (C) 2006 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm at berkeley dot edu>
Stability : alpha
Portability : portable
Conversion of 'Pandoc' documents to HTML.
-}
module Text.Pandoc.Writers.HTML (
writeHtml,
) where
import Text.Pandoc.Definition
import Text.Pandoc.Shared
import Text.Pandoc.Entities ( encodeEntities )
import Text.Regex ( mkRegex, matchRegex )
import Numeric ( showHex )
import Data.Char ( ord, toLower )
import Data.List ( isPrefixOf, partition )
import Text.PrettyPrint.HughesPJ hiding ( Str )
-- | Convert Pandoc document to string in HTML format.
writeHtml :: WriterOptions -> Pandoc -> String
writeHtml opts (Pandoc (Meta title authors date) blocks) =
let titlePrefix = writerTitlePrefix opts in
let topTitle = if not (null titlePrefix)
then [Str titlePrefix] ++ (if not (null title)
then [Str " - "] ++ title
else [])
else title in
let head = if (writerStandalone opts)
then htmlHeader opts (Meta topTitle authors date)
else empty
titleBlocks = if (writerStandalone opts) && (not (null title)) &&
(not (writerS5 opts))
then [RawHtml "<h1 class=\"title\">", Plain title,
RawHtml "</h1>"]
else []
foot = if (writerStandalone opts)
then text "</body>\n</html>"
else empty
blocks' = replaceReferenceLinks (titleBlocks ++ blocks)
(noteBlocks, blocks'') = partition isNoteBlock blocks'
before = writerIncludeBefore opts
after = writerIncludeAfter opts
body = (if null before then empty else text before) $$
vcat (map (blockToHtml opts) blocks'') $$
footnoteSection opts noteBlocks $$
(if null after then empty else text after) in
render $ head $$ body $$ foot $$ text ""
-- | Convert list of Note blocks to a footnote <div>.
-- Assumes notes are sorted.
footnoteSection :: WriterOptions -> [Block] -> Doc
footnoteSection opts notes =
if null notes
then empty
else inTags True "div" [("class","footnotes")] $
selfClosingTag "hr" [] $$ (inTagsIndented "ol"
(vcat $ map (blockToHtml opts) notes))
-- | Obfuscate a "mailto:" link using Javascript.
obfuscateLink :: WriterOptions -> [Inline] -> String -> Doc
obfuscateLink opts txt src =
let emailRegex = mkRegex "mailto:*([^@]*)@(.*)"
text' = render $ inlineListToHtml opts txt
src' = map toLower src in
case (matchRegex emailRegex src') of
(Just [name, domain]) ->
let domain' = substitute "." " dot " domain
at' = obfuscateChar '@' in
let linkText = if src' == ("mailto:" ++ text')
then "e"
else "'" ++ text' ++ "'"
altText = if src' == ("mailto:" ++ text')
then name ++ " at " ++ domain'
else text' ++ " (" ++ name ++ " at " ++
domain' ++ ")" in
if writerStrictMarkdown opts
then inTags False "a" [("href", obfuscateString src')] $
text $ obfuscateString text'
else inTags False "script" [("type", "text/javascript")]
(text ("\n<!--\nh='" ++
obfuscateString domain ++ "';a='" ++ at' ++ "';n='" ++
obfuscateString name ++ "';e=n+a+h;\n" ++
"document.write('<a h'+'ref'+'=\"ma'+'ilto'+':'+e+'\">'+" ++
linkText ++ "+'<\\/'+'a'+'>');\n// -->\n")) <>
inTagsSimple "noscript" (text (obfuscateString altText))
_ -> inTags False "a" [("href", src)] (text text') -- malformed email
-- | Obfuscate character as entity.
obfuscateChar :: Char -> String
obfuscateChar char =
let num = ord char in
let numstr = if even num then (show num) else ("x" ++ (showHex num "")) in
"&#" ++ numstr ++ ";"
-- | Obfuscate string using entities.
obfuscateString :: String -> String
obfuscateString = concatMap obfuscateChar
-- | Return an HTML header with appropriate bibliographic information.
htmlHeader :: WriterOptions -> Meta -> Doc
htmlHeader opts (Meta title authors date) =
let titletext = inTagsSimple "title" (wrap opts title)
authortext = if (null authors)
then empty
else selfClosingTag "meta" [("name", "author"),
("content",
joinWithSep ", " (map stringToSGML authors))]
datetext = if (date == "")
then empty
else selfClosingTag "meta" [("name", "date"),
("content", stringToSGML date)] in
text (writerHeader opts) $$ authortext $$ datetext $$ titletext $$
text "</head>\n<body>"
-- | Take list of inline elements and return wrapped doc.
wrap :: WriterOptions -> [Inline] -> Doc
wrap opts lst = fsep $ map (inlineListToHtml opts) (splitBy Space lst)
-- | Convert Pandoc block element to HTML.
blockToHtml :: WriterOptions -> Block -> Doc
blockToHtml opts Blank = text ""
blockToHtml opts Null = empty
blockToHtml opts (Plain lst) = wrap opts lst
blockToHtml opts (Para lst) = inTagsIndented "p" $ wrap opts lst
blockToHtml opts (BlockQuote blocks) =
if (writerS5 opts)
then -- in S5, treat list in blockquote specially
-- if default is incremental, make it nonincremental;
-- otherwise incremental
let inc = not (writerIncremental opts) in
case blocks of
[BulletList lst] -> blockToHtml (opts {writerIncremental =
inc}) (BulletList lst)
[OrderedList lst] -> blockToHtml (opts {writerIncremental =
inc}) (OrderedList lst)
otherwise -> inTagsIndented "blockquote" $
vcat $ map (blockToHtml opts) blocks
else inTagsIndented "blockquote" $ vcat $ map (blockToHtml opts) blocks
blockToHtml opts (Note ref lst) =
let contents = (vcat $ map (blockToHtml opts) lst) in
inTags True "li" [("id", "fn" ++ ref)] $
contents <> inTags False "a" [("href", "#fnref" ++ ref),
("class", "footnoteBacklink"),
("title", "Jump back to footnote " ++ ref)]
(text "↩")
blockToHtml opts (Key _ _) = empty
blockToHtml opts (CodeBlock str) =
text "<pre><code>" <> text (escapeSGML str) <> text "\n</code></pre>"
blockToHtml opts (RawHtml str) = text str
blockToHtml opts (BulletList lst) =
let attribs = if (writerIncremental opts)
then [("class","incremental")]
else [] in
inTags True "ul" attribs $ vcat $ map (listItemToHtml opts) lst
blockToHtml opts (OrderedList lst) =
let attribs = if (writerIncremental opts)
then [("class","incremental")]
else [] in
inTags True "ol" attribs $ vcat $ map (listItemToHtml opts) lst
blockToHtml opts HorizontalRule = selfClosingTag "hr" []
blockToHtml opts (Header level lst) =
let contents = wrap opts lst in
if ((level > 0) && (level <= 6))
then inTagsSimple ("h" ++ show level) contents
else inTagsSimple "p" contents
blockToHtml opts (Table caption aligns widths headers rows) =
let alignStrings = map alignmentToString aligns
captionDoc = if null caption
then empty
else inTagsSimple "caption"
(inlineListToHtml opts caption) in
inTagsIndented "table" $ captionDoc $$
(colHeadsToHtml opts alignStrings widths headers) $$
(vcat $ map (tableRowToHtml opts alignStrings) rows)
colHeadsToHtml opts alignStrings widths headers =
let heads = zipWith3
(\align width item -> tableItemToHtml opts "th" align width item)
alignStrings widths headers in
inTagsIndented "tr" $ vcat heads
alignmentToString alignment = case alignment of
AlignLeft -> "left"
AlignRight -> "right"
AlignCenter -> "center"
AlignDefault -> "left"
tableRowToHtml opts aligns cols =
inTagsIndented "tr" $ vcat $ zipWith3 (tableItemToHtml opts "td") aligns (repeat 0) cols
tableItemToHtml opts tag align width item =
let attrib = [("align", align)] ++
if (width /= 0)
then [("style", "{width: " ++
show (truncate (100*width)) ++ "%;}")]
else [] in
inTags False tag attrib $ vcat $ map (blockToHtml opts) item
listItemToHtml :: WriterOptions -> [Block] -> Doc
listItemToHtml opts list =
inTagsSimple "li" $ vcat $ map (blockToHtml opts) list
-- | Convert list of Pandoc inline elements to HTML.
inlineListToHtml :: WriterOptions -> [Inline] -> Doc
inlineListToHtml opts lst = hcat (map (inlineToHtml opts) lst)
-- | Convert Pandoc inline element to HTML.
inlineToHtml :: WriterOptions -> Inline -> Doc
inlineToHtml opts (Emph lst) =
inTagsSimple "em" (inlineListToHtml opts lst)
inlineToHtml opts (Strong lst) =
inTagsSimple "strong" (inlineListToHtml opts lst)
inlineToHtml opts (Code str) =
inTagsSimple "code" $ text (escapeSGML str)
inlineToHtml opts (Quoted SingleQuote lst) =
text "‘" <> (inlineListToHtml opts lst) <> text "’"
inlineToHtml opts (Quoted DoubleQuote lst) =
text "“" <> (inlineListToHtml opts lst) <> text "”"
inlineToHtml opts EmDash = text "—"
inlineToHtml opts EnDash = text "–"
inlineToHtml opts Ellipses = text "…"
inlineToHtml opts Apostrophe = text "’"
inlineToHtml opts (Str str) = text $ stringToSGML str
inlineToHtml opts (TeX str) = text $ escapeSGML str
inlineToHtml opts (HtmlInline str) = text str
inlineToHtml opts (LineBreak) = selfClosingTag "br" []
inlineToHtml opts Space = space
inlineToHtml opts (Link txt (Src src tit)) =
let title = stringToSGML tit in
if (isPrefixOf "mailto:" src)
then obfuscateLink opts txt src
else inTags False "a" ([("href", escapeSGML src)] ++
if null tit then [] else [("title", title)])
(inlineListToHtml opts txt)
inlineToHtml opts (Link txt (Ref ref)) =
char '[' <> (inlineListToHtml opts txt) <> text "][" <>
(inlineListToHtml opts ref) <> char ']'
-- this is what markdown does, for better or worse
inlineToHtml opts (Image alt (Src source tit)) =
let title = stringToSGML tit
alternate = render $ inlineListToHtml opts alt in
selfClosingTag "img" $ [("src", source)] ++
(if null alternate then [] else [("alt", alternate)]) ++
[("title", title)] -- note: null title is included, as in Markdown.pl
inlineToHtml opts (Image alternate (Ref ref)) =
text "![" <> (inlineListToHtml opts alternate) <> text "][" <>
(inlineListToHtml opts ref) <> char ']'
inlineToHtml opts (NoteRef ref) =
inTags False "sup" [("class", "footnoteRef"), ("id", "fnref" ++ ref)]
(inTags False "a" [("href", "#fn" ++ ref)] $ text ref)
|