aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Writers/HTML.hs
blob: 4c869ac21593245e9efac95367ca34bf84aa1ff5 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
{-
Copyright (C) 2006 John MacFarlane <jgm at berkeley dot edu>

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-}

{- |
   Module      : Text.Pandoc.Writers.HTML 
   Copyright   : Copyright (C) 2006 John MacFarlane
   License     : GNU GPL, version 2 or above 

   Maintainer  : John MacFarlane <jgm at berkeley dot edu>
   Stability   : alpha
   Portability : portable

Conversion of 'Pandoc' documents to HTML.
-}
module Text.Pandoc.Writers.HTML ( 
                                 writeHtml,
                                ) where
import Text.Pandoc.Definition
import Text.Pandoc.Shared
import Text.Pandoc.Entities ( encodeEntities )
import Text.Regex ( mkRegex, matchRegex )
import Numeric ( showHex )
import Data.Char ( ord, toLower )
import Data.List ( isPrefixOf, partition )
import Text.PrettyPrint.HughesPJ hiding ( Str )

-- | Convert Pandoc document to string in HTML format.
writeHtml :: WriterOptions -> Pandoc -> String
writeHtml opts (Pandoc (Meta title authors date) blocks) = 
  let titlePrefix = writerTitlePrefix opts in
  let topTitle = if not (null titlePrefix)
                    then [Str titlePrefix] ++ (if not (null title) 
                                                  then [Str " - "] ++ title
                                                  else [])
                    else title in
  let head = if (writerStandalone opts)
                then htmlHeader opts (Meta topTitle authors date)
                else empty 
      titleBlocks = if (writerStandalone opts) && (not (null title)) && 
                    (not (writerS5 opts))
                       then [RawHtml "<h1 class=\"title\">", Plain title, 
                             RawHtml "</h1>"]
                       else []
      foot = if (writerStandalone opts) 
               then text "</body>\n</html>"
               else empty 
      blocks' = replaceReferenceLinks (titleBlocks ++ blocks)
      (noteBlocks, blocks'') = partition isNoteBlock blocks' 
      before = writerIncludeBefore opts
      after = writerIncludeAfter opts
      body = (if null before then empty else text before) $$
             vcat (map (blockToHtml opts) blocks'') $$
             footnoteSection opts noteBlocks $$
             (if null after then empty else text after) in
  render $ head $$ body $$ foot $$ text ""

-- | Convert list of Note blocks to a footnote <div>.
-- Assumes notes are sorted.
footnoteSection :: WriterOptions -> [Block] -> Doc
footnoteSection opts notes =
  if null notes 
    then empty
    else inTags True "div" [("class","footnotes")] $
         selfClosingTag "hr" [] $$ (inTagsIndented "ol" 
         (vcat $ map (blockToHtml opts) notes))

-- | Obfuscate a "mailto:" link using Javascript.
obfuscateLink :: WriterOptions -> [Inline] -> String -> Doc
obfuscateLink opts txt src =
  let emailRegex = mkRegex "mailto:*([^@]*)@(.*)"
      text' = render $ inlineListToHtml opts txt 
      src' = map toLower src in
  case (matchRegex emailRegex src') of
    (Just [name, domain]) ->
      let domain' = gsub "\\." " dot " domain
          at' = obfuscateChar '@' in
      let linkText = if src' == ("mailto:" ++ text')
                        then "e"
                        else "'" ++ text' ++ "'" 
          altText  = if src' == ("mailto:" ++ text')
                        then name ++ " at " ++ domain'
                        else text' ++ " (" ++ name ++ " at " ++ 
                             domain' ++ ")" in 
      if writerStrictMarkdown opts
        then inTags False "a" [("href", obfuscateString src')] $
             text $ obfuscateString text'
        else inTags False "script" [("type", "text/javascript")] 
             (text ("\n<!--\nh='" ++ 
             obfuscateString domain ++ "';a='" ++ at' ++ "';n='" ++ 
             obfuscateString name ++ "';e=n+a+h;\n" ++
             "document.write('<a h'+'ref'+'=\"ma'+'ilto'+':'+e+'\">'+" ++ 
             linkText  ++ "+'<\\/'+'a'+'>');\n// -->\n")) <> 
             inTagsSimple "noscript" (text (obfuscateString altText))
    _ -> inTags False "a" [("href", src)]  (text text') -- malformed email

-- | Obfuscate character as entity.
obfuscateChar :: Char -> String
obfuscateChar char = 
  let num = ord char in
  let numstr = if even num then (show num) else ("x" ++ (showHex num "")) in
  "&#" ++ numstr ++ ";"

-- | Obfuscate string using entities.
obfuscateString :: String -> String
obfuscateString = concatMap obfuscateChar

-- | Return an HTML header with appropriate bibliographic information.
htmlHeader :: WriterOptions -> Meta -> Doc
htmlHeader opts (Meta title authors date) = 
  let titletext = inTagsSimple "title" (wrap opts title)
      authortext = if (null authors) 
                      then empty 
                      else selfClosingTag "meta" [("name", "author"), 
                           ("content", 
                            joinWithSep ", " (map stringToSGML authors))]  
      datetext = if (date == "")
                    then empty 
                    else selfClosingTag "meta" [("name", "date"),
                         ("content", stringToSGML date)] in
  text (writerHeader opts) $$ authortext $$ datetext $$ titletext $$ 
  text "</head>\n<body>"

-- | Take list of inline elements and return wrapped doc.
wrap :: WriterOptions -> [Inline] -> Doc
wrap opts lst = fsep $ map (inlineListToHtml opts) (splitBy Space lst)

-- | Convert Pandoc block element to HTML.
blockToHtml :: WriterOptions -> Block -> Doc
blockToHtml opts Blank = text ""
blockToHtml opts Null = empty
blockToHtml opts (Plain lst) = wrap opts lst 
blockToHtml opts (Para lst) = inTagsIndented "p" $ wrap opts lst
blockToHtml opts (BlockQuote blocks) = 
  if (writerS5 opts)
     then  -- in S5, treat list in blockquote specially
           -- if default is incremental, make it nonincremental; 
           -- otherwise incremental
           let inc = not (writerIncremental opts) in
           case blocks of 
              [BulletList lst] -> blockToHtml (opts {writerIncremental = 
                                                        inc}) (BulletList lst)
              [OrderedList lst] -> blockToHtml (opts {writerIncremental =
                                                       inc}) (OrderedList lst)
              otherwise         -> inTagsIndented "blockquote" $
                                   vcat $ map (blockToHtml opts) blocks
     else inTagsIndented "blockquote" $ vcat $ map (blockToHtml opts) blocks
blockToHtml opts (Note ref lst) = 
  let contents = (vcat $ map (blockToHtml opts) lst) in
  inTags True "li" [("id", "fn" ++ ref)] $
  contents <> inTags False "a" [("href", "#fnref" ++ ref), 
                                ("class", "footnoteBacklink"), 
                                ("title", "Jump back to footnote " ++ ref)] 
                     (text "&#8617;")
blockToHtml opts (Key _ _) = empty
blockToHtml opts (CodeBlock str) = 
  text "<pre><code>" <> text (escapeSGML str) <> text "\n</code></pre>"
blockToHtml opts (RawHtml str) = text str 
blockToHtml opts (BulletList lst) = 
  let attribs = if (writerIncremental opts)
                   then [("class","incremental")]
                   else [] in
  inTags True "ul" attribs $ vcat $ map (listItemToHtml opts) lst 
blockToHtml opts (OrderedList lst) = 
  let attribs = if (writerIncremental opts)
                   then [("class","incremental")]
                   else [] in
  inTags True "ol" attribs $ vcat $ map (listItemToHtml opts) lst 
blockToHtml opts HorizontalRule = selfClosingTag "hr" []
blockToHtml opts (Header level lst) = 
  let contents = wrap opts lst in
  if ((level > 0) && (level <= 6))
      then inTagsSimple ("h" ++ show level) contents 
      else inTagsSimple "p" contents 

listItemToHtml :: WriterOptions -> [Block] -> Doc
listItemToHtml opts list = 
  inTagsSimple "li" $ vcat $ map (blockToHtml opts) list

-- | Convert list of Pandoc inline elements to HTML.
inlineListToHtml :: WriterOptions -> [Inline] -> Doc
inlineListToHtml opts lst = hcat (map (inlineToHtml opts) lst)

-- | Convert Pandoc inline element to HTML.
inlineToHtml :: WriterOptions -> Inline -> Doc
inlineToHtml opts (Emph lst) = 
  inTagsSimple "em" (inlineListToHtml opts lst)
inlineToHtml opts (Strong lst) = 
  inTagsSimple "strong" (inlineListToHtml opts lst)
inlineToHtml opts (Code str) =  
  inTagsSimple "code" $ text (escapeSGML str)
inlineToHtml opts (Quoted SingleQuote lst) =
  text "&lsquo;" <> (inlineListToHtml opts lst) <> text "&rsquo;"
inlineToHtml opts (Quoted DoubleQuote lst) =
  text "&ldquo;" <> (inlineListToHtml opts lst) <> text "&rdquo;"
inlineToHtml opts EmDash = text "&mdash;"
inlineToHtml opts EnDash = text "&ndash;"
inlineToHtml opts Ellipses = text "&hellip;"
inlineToHtml opts Apostrophe = text "&rsquo;"
inlineToHtml opts (Str str) = text $ stringToSGML str
inlineToHtml opts (TeX str) = text $ escapeSGML str
inlineToHtml opts (HtmlInline str) = text str
inlineToHtml opts (LineBreak) = selfClosingTag "br" []
inlineToHtml opts Space = space
inlineToHtml opts (Link txt (Src src tit)) = 
  let title = stringToSGML tit in
  if (isPrefixOf "mailto:" src)
     then obfuscateLink opts txt src 
     else inTags False "a" ([("href", escapeSGML src)] ++ 
          if null tit then [] else [("title", title)]) 
          (inlineListToHtml opts txt)
inlineToHtml opts (Link txt (Ref ref)) = 
  char '[' <> (inlineListToHtml opts txt) <> text "][" <> 
  (inlineListToHtml opts ref) <> char ']'
  -- this is what markdown does, for better or worse
inlineToHtml opts (Image alt (Src source tit)) = 
  let title = stringToSGML tit
      alternate = render $ inlineListToHtml opts alt in 
  selfClosingTag "img" $ [("src", source)] ++  
  (if null tit then [] else [("title", title)]) ++ 
  (if null alternate then [] else [("alt", alternate)])  
inlineToHtml opts (Image alternate (Ref ref)) = 
  text "![" <> (inlineListToHtml opts alternate) <> text "][" <> 
  (inlineListToHtml opts ref) <> char ']'
inlineToHtml opts (NoteRef ref) = 
  inTags False "sup" [("class", "footnoteRef"), ("id", "fnref" ++ ref)]
  (inTags False "a" [("href", "#fn" ++ ref)] $ text ref)