From d06417125dd4d8cb177abd2d472c0c1cad4c49be Mon Sep 17 00:00:00 2001 From: fiddlosopher Date: Sat, 27 Jan 2007 03:04:40 +0000 Subject: Changes in entity handling: + Entities are parsed (and unicode characters returned) in both Markdown and HTML readers. + Parsers characterEntity, namedEntity, decimalEntity, hexEntity added to Entities.hs; these parse a string and return a unicode character. + Changed 'entity' parser in HTML reader to use the 'characterEntity' parser from Entities.hs. + Added new 'entity' parser to Markdown reader, and added '&' as a special character. Adjusted test suite accordingly since now we get 'Str "AT",Str "&",Str "T"' instead of 'Str "AT&T".. + stringToSGML moved to Entities.hs. escapeSGML removed as redundant, given encodeEntities. + stringToSGML, encodeEntities, and specialCharToEntity are given a boolean parameter that causes only numerical entities to be used. This is used in the docbook writer. The HTML writer uses named entities where possible, but not all docbook-consumers know about the named entities without special instructions, so it seems safer to use numerical entities there. + decodeEntities is rewritten in a way that avoids Text.Regex, using the new parsers. + charToEntity and charToNumericalEntity added to Entities.hs. + Moved specialCharToEntity from Shared.hs to Entities.hs. + Removed unneeded 'decodeEntities' from 'str' parser in HTML and Markdown readers. + Removed sgmlHexEntity, sgmlDecimalEntity, sgmlNamedEntity, and sgmlCharacterEntity from Shared.hs. + Modified Docbook writer so that it doesn't rely on Text.Regex for detecting "mailto" links. git-svn-id: https://pandoc.googlecode.com/svn/trunk@515 788f1e2b-df1e-0410-8736-df70ead52e1b --- src/Text/Pandoc/Writers/HTML.hs | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'src/Text/Pandoc/Writers/HTML.hs') diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index 5465e125d..7c89d6352 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -32,7 +32,7 @@ module Text.Pandoc.Writers.HTML ( ) where import Text.Pandoc.Definition import Text.Pandoc.Shared -import Text.Pandoc.Entities ( encodeEntities ) +import Text.Pandoc.Entities ( encodeEntities, stringToSGML ) import Text.Regex ( mkRegex, matchRegex ) import Numeric ( showHex ) import Data.Char ( ord, toLower ) @@ -127,11 +127,11 @@ htmlHeader opts (Meta title authors date) = then empty else selfClosingTag "meta" [("name", "author"), ("content", - joinWithSep ", " (map stringToSGML authors))] + joinWithSep ", " (map (stringToSGML False) authors))] datetext = if (date == "") then empty else selfClosingTag "meta" [("name", "date"), - ("content", stringToSGML date)] in + ("content", stringToSGML False date)] in text (writerHeader opts) $$ authortext $$ datetext $$ titletext $$ text "\n" @@ -168,7 +168,7 @@ blockToHtml opts (Note ref lst) = (text "↩") blockToHtml opts (Key _ _) = empty blockToHtml opts (CodeBlock str) = - text "
" <> text (escapeSGML str) <> text "\n
" + text "
" <> text (encodeEntities False str) <> text "\n
" blockToHtml opts (RawHtml str) = text str blockToHtml opts (BulletList lst) = let attribs = if (writerIncremental opts) @@ -234,7 +234,7 @@ inlineToHtml opts (Emph lst) = inlineToHtml opts (Strong lst) = inTagsSimple "strong" (inlineListToHtml opts lst) inlineToHtml opts (Code str) = - inTagsSimple "code" $ text (escapeSGML str) + inTagsSimple "code" $ text (encodeEntities False str) inlineToHtml opts (Quoted SingleQuote lst) = text "‘" <> (inlineListToHtml opts lst) <> text "’" inlineToHtml opts (Quoted DoubleQuote lst) = @@ -243,16 +243,16 @@ inlineToHtml opts EmDash = text "—" inlineToHtml opts EnDash = text "–" inlineToHtml opts Ellipses = text "…" inlineToHtml opts Apostrophe = text "’" -inlineToHtml opts (Str str) = text $ stringToSGML str -inlineToHtml opts (TeX str) = text $ escapeSGML str +inlineToHtml opts (Str str) = text $ stringToSGML False str +inlineToHtml opts (TeX str) = text $ encodeEntities False str inlineToHtml opts (HtmlInline str) = text str inlineToHtml opts (LineBreak) = selfClosingTag "br" [] inlineToHtml opts Space = space inlineToHtml opts (Link txt (Src src tit)) = - let title = stringToSGML tit in + let title = stringToSGML False tit in if (isPrefixOf "mailto:" src) then obfuscateLink opts txt src - else inTags False "a" ([("href", escapeSGML src)] ++ + else inTags False "a" ([("href", encodeEntities False src)] ++ if null tit then [] else [("title", title)]) (inlineListToHtml opts txt) inlineToHtml opts (Link txt (Ref ref)) = @@ -260,7 +260,7 @@ inlineToHtml opts (Link txt (Ref ref)) = (inlineListToHtml opts ref) <> char ']' -- this is what markdown does, for better or worse inlineToHtml opts (Image alt (Src source tit)) = - let title = stringToSGML tit + let title = stringToSGML False tit alternate = render $ inlineListToHtml opts alt in selfClosingTag "img" $ [("src", source)] ++ (if null alternate then [] else [("alt", alternate)]) ++ -- cgit v1.2.3