From dc6925542c6aa60078c370e7e356b42ea216b1b7 Mon Sep 17 00:00:00 2001 From: fiddlosopher Date: Sun, 28 Jan 2007 00:04:43 +0000 Subject: + Simplified entity handling by removing stringToSGML from Entities.hs. It is no longer needed now that all entities are processed in the markdown and HTML readers. All calls to stringToSGML have been replaced by calls to encodeEntities. + Since inTag's attribute handling already encodes entities, calls to encodeEntities are no longer needed for attribute values, so they've been removed. + The HTML and Markdown readers now call decodeEntities on all raw strings (e.g. authors, dates, link titles), to ensure that no unprocessed entities are included in the native representation of the document. (In the HTML reader, most of this work is done by a change in extractAttributeName.) + The result is a small speed improvement (around 5% on my benchmark) and cleaner code. git-svn-id: https://pandoc.googlecode.com/svn/trunk@519 788f1e2b-df1e-0410-8736-df70ead52e1b --- src/Text/Pandoc/Writers/Docbook.hs | 14 +++++++------- src/Text/Pandoc/Writers/HTML.hs | 18 ++++++++---------- 2 files changed, 15 insertions(+), 17 deletions(-) (limited to 'src/Text/Pandoc/Writers') diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs index 7e50f8ede..405b2978a 100644 --- a/src/Text/Pandoc/Writers/Docbook.hs +++ b/src/Text/Pandoc/Writers/Docbook.hs @@ -32,7 +32,7 @@ module Text.Pandoc.Writers.Docbook ( ) where import Text.Pandoc.Definition import Text.Pandoc.Shared -import Text.Pandoc.Entities ( encodeEntities, stringToSGML ) +import Text.Pandoc.Entities ( encodeEntities ) import Data.Char ( toLower, ord ) import Data.List ( isPrefixOf, partition, drop ) import Text.PrettyPrint.HughesPJ hiding ( Str ) @@ -64,8 +64,8 @@ authorToDocbook name = inTagsIndented "author" $ then -- last name first let (lastname, rest) = break (==',') name firstname = removeLeadingSpace rest in - inTagsSimple "firstname" (text $ stringToSGML firstname) <> - inTagsSimple "surname" (text $ stringToSGML lastname) + inTagsSimple "firstname" (text $ encodeEntities firstname) <> + inTagsSimple "surname" (text $ encodeEntities lastname) else -- last name last let namewords = words name lengthname = length namewords @@ -73,8 +73,8 @@ authorToDocbook name = inTagsIndented "author" $ 0 -> ("","") 1 -> ("", name) n -> (joinWithSep " " (take (n-1) namewords), last namewords) in - inTagsSimple "firstname" (text $ stringToSGML firstname) $$ - inTagsSimple "surname" (text $ stringToSGML lastname) + inTagsSimple "firstname" (text $ encodeEntities firstname) $$ + inTagsSimple "surname" (text $ encodeEntities lastname) -- | Convert Pandoc document to string in Docbook format. writeDocbook :: WriterOptions -> Pandoc -> String @@ -86,7 +86,7 @@ writeDocbook opts (Pandoc (Meta title authors date) blocks) = then inTagsIndented "articleinfo" $ (inTagsSimple "title" (wrap opts title)) $$ (vcat (map authorToDocbook authors)) $$ - (inTagsSimple "date" (text $ stringToSGML date)) + (inTagsSimple "date" (text $ encodeEntities date)) else empty blocks' = replaceReferenceLinks blocks (noteBlocks, blocks'') = partition isNoteBlock blocks' @@ -227,7 +227,7 @@ inlineToDocbook opts (Image alt (Src src tit)) = then empty else inTagsIndented "objectinfo" $ inTagsIndented "title" - (text $ stringToSGML tit) in + (text $ encodeEntities tit) in inTagsIndented "inlinemediaobject" $ inTagsIndented "imageobject" $ titleDoc $$ selfClosingTag "imagedata" [("fileref", src)] diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index 0f2a2b5dc..8a654e3c9 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -32,7 +32,7 @@ module Text.Pandoc.Writers.HTML ( ) where import Text.Pandoc.Definition import Text.Pandoc.Shared -import Text.Pandoc.Entities ( encodeEntities, stringToSGML ) +import Text.Pandoc.Entities ( encodeEntities ) import Text.Regex ( mkRegex, matchRegex ) import Numeric ( showHex ) import Data.Char ( ord, toLower ) @@ -127,11 +127,11 @@ htmlHeader opts (Meta title authors date) = then empty else selfClosingTag "meta" [("name", "author"), ("content", - joinWithSep ", " (map stringToSGML authors))] + joinWithSep ", " (map encodeEntities authors))] datetext = if (date == "") then empty else selfClosingTag "meta" [("name", "date"), - ("content", stringToSGML date)] in + ("content", encodeEntities date)] in text (writerHeader opts) $$ authortext $$ datetext $$ titletext $$ text "\n" @@ -248,20 +248,18 @@ inlineToHtml opts (TeX str) = text $ encodeEntities str inlineToHtml opts (HtmlInline str) = text str inlineToHtml opts (LineBreak) = selfClosingTag "br" [] inlineToHtml opts Space = space -inlineToHtml opts (Link txt (Src src tit)) = - let title = stringToSGML tit in +inlineToHtml opts (Link txt (Src src title)) = if (isPrefixOf "mailto:" src) then obfuscateLink opts txt src - else inTags False "a" ([("href", encodeEntities src)] ++ - if null tit then [] else [("title", title)]) + else inTags False "a" ([("href", src)] ++ + if null title then [] else [("title", title)]) (inlineListToHtml opts txt) inlineToHtml opts (Link txt (Ref ref)) = char '[' <> (inlineListToHtml opts txt) <> text "][" <> (inlineListToHtml opts ref) <> char ']' -- this is what markdown does, for better or worse -inlineToHtml opts (Image alt (Src source tit)) = - let title = stringToSGML tit - alternate = render $ inlineListToHtml opts alt in +inlineToHtml opts (Image alt (Src source title)) = + let alternate = render $ inlineListToHtml opts alt in selfClosingTag "img" $ [("src", source)] ++ (if null alternate then [] else [("alt", alternate)]) ++ [("title", title)] -- note: null title is included, as in Markdown.pl -- cgit v1.2.3