aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Writers/HTML.hs
diff options
context:
space:
mode:
authorfiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>2007-01-27 03:04:40 +0000
committerfiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>2007-01-27 03:04:40 +0000
commitd06417125dd4d8cb177abd2d472c0c1cad4c49be (patch)
treea3a6d7991f01e4184b678a5fcce023947d015451 /src/Text/Pandoc/Writers/HTML.hs
parentf2de08864ee9870147839389554b9353631d21f8 (diff)
downloadpandoc-d06417125dd4d8cb177abd2d472c0c1cad4c49be.tar.gz
Changes in entity handling:
+ Entities are parsed (and unicode characters returned) in both Markdown and HTML readers. + Parsers characterEntity, namedEntity, decimalEntity, hexEntity added to Entities.hs; these parse a string and return a unicode character. + Changed 'entity' parser in HTML reader to use the 'characterEntity' parser from Entities.hs. + Added new 'entity' parser to Markdown reader, and added '&' as a special character. Adjusted test suite accordingly since now we get 'Str "AT",Str "&",Str "T"' instead of 'Str "AT&T".. + stringToSGML moved to Entities.hs. escapeSGML removed as redundant, given encodeEntities. + stringToSGML, encodeEntities, and specialCharToEntity are given a boolean parameter that causes only numerical entities to be used. This is used in the docbook writer. The HTML writer uses named entities where possible, but not all docbook-consumers know about the named entities without special instructions, so it seems safer to use numerical entities there. + decodeEntities is rewritten in a way that avoids Text.Regex, using the new parsers. + charToEntity and charToNumericalEntity added to Entities.hs. + Moved specialCharToEntity from Shared.hs to Entities.hs. + Removed unneeded 'decodeEntities' from 'str' parser in HTML and Markdown readers. + Removed sgmlHexEntity, sgmlDecimalEntity, sgmlNamedEntity, and sgmlCharacterEntity from Shared.hs. + Modified Docbook writer so that it doesn't rely on Text.Regex for detecting "mailto" links. git-svn-id: https://pandoc.googlecode.com/svn/trunk@515 788f1e2b-df1e-0410-8736-df70ead52e1b
Diffstat (limited to 'src/Text/Pandoc/Writers/HTML.hs')
-rw-r--r--src/Text/Pandoc/Writers/HTML.hs20
1 files changed, 10 insertions, 10 deletions
diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
index 5465e125d..7c89d6352 100644
--- a/src/Text/Pandoc/Writers/HTML.hs
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -32,7 +32,7 @@ module Text.Pandoc.Writers.HTML (
) where
import Text.Pandoc.Definition
import Text.Pandoc.Shared
-import Text.Pandoc.Entities ( encodeEntities )
+import Text.Pandoc.Entities ( encodeEntities, stringToSGML )
import Text.Regex ( mkRegex, matchRegex )
import Numeric ( showHex )
import Data.Char ( ord, toLower )
@@ -127,11 +127,11 @@ htmlHeader opts (Meta title authors date) =
then empty
else selfClosingTag "meta" [("name", "author"),
("content",
- joinWithSep ", " (map stringToSGML authors))]
+ joinWithSep ", " (map (stringToSGML False) authors))]
datetext = if (date == "")
then empty
else selfClosingTag "meta" [("name", "date"),
- ("content", stringToSGML date)] in
+ ("content", stringToSGML False date)] in
text (writerHeader opts) $$ authortext $$ datetext $$ titletext $$
text "</head>\n<body>"
@@ -168,7 +168,7 @@ blockToHtml opts (Note ref lst) =
(text "&#8617;")
blockToHtml opts (Key _ _) = empty
blockToHtml opts (CodeBlock str) =
- text "<pre><code>" <> text (escapeSGML str) <> text "\n</code></pre>"
+ text "<pre><code>" <> text (encodeEntities False str) <> text "\n</code></pre>"
blockToHtml opts (RawHtml str) = text str
blockToHtml opts (BulletList lst) =
let attribs = if (writerIncremental opts)
@@ -234,7 +234,7 @@ inlineToHtml opts (Emph lst) =
inlineToHtml opts (Strong lst) =
inTagsSimple "strong" (inlineListToHtml opts lst)
inlineToHtml opts (Code str) =
- inTagsSimple "code" $ text (escapeSGML str)
+ inTagsSimple "code" $ text (encodeEntities False str)
inlineToHtml opts (Quoted SingleQuote lst) =
text "&lsquo;" <> (inlineListToHtml opts lst) <> text "&rsquo;"
inlineToHtml opts (Quoted DoubleQuote lst) =
@@ -243,16 +243,16 @@ inlineToHtml opts EmDash = text "&mdash;"
inlineToHtml opts EnDash = text "&ndash;"
inlineToHtml opts Ellipses = text "&hellip;"
inlineToHtml opts Apostrophe = text "&rsquo;"
-inlineToHtml opts (Str str) = text $ stringToSGML str
-inlineToHtml opts (TeX str) = text $ escapeSGML str
+inlineToHtml opts (Str str) = text $ stringToSGML False str
+inlineToHtml opts (TeX str) = text $ encodeEntities False str
inlineToHtml opts (HtmlInline str) = text str
inlineToHtml opts (LineBreak) = selfClosingTag "br" []
inlineToHtml opts Space = space
inlineToHtml opts (Link txt (Src src tit)) =
- let title = stringToSGML tit in
+ let title = stringToSGML False tit in
if (isPrefixOf "mailto:" src)
then obfuscateLink opts txt src
- else inTags False "a" ([("href", escapeSGML src)] ++
+ else inTags False "a" ([("href", encodeEntities False src)] ++
if null tit then [] else [("title", title)])
(inlineListToHtml opts txt)
inlineToHtml opts (Link txt (Ref ref)) =
@@ -260,7 +260,7 @@ inlineToHtml opts (Link txt (Ref ref)) =
(inlineListToHtml opts ref) <> char ']'
-- this is what markdown does, for better or worse
inlineToHtml opts (Image alt (Src source tit)) =
- let title = stringToSGML tit
+ let title = stringToSGML False tit
alternate = render $ inlineListToHtml opts alt in
selfClosingTag "img" $ [("src", source)] ++
(if null alternate then [] else [("alt", alternate)]) ++