From 030d94e1c3cd4be0ab9d7c16fccfa973cedb5d38 Mon Sep 17 00:00:00 2001 From: fiddlosopher Date: Thu, 4 Jan 2007 22:52:16 +0000 Subject: Refactored SGML escaping functions and "in tag" functions to Text/Shared/Pandoc. (escapeSGML, stringToSGML, inTag, inTagSimple, inTagIndented, selfClosingTag) These can be used by both the HTML and Docbook writers. git-svn-id: https://pandoc.googlecode.com/svn/trunk@417 788f1e2b-df1e-0410-8736-df70ead52e1b --- src/Text/Pandoc/Writers/HTML.hs | 78 ++++++----------------------------------- 1 file changed, 10 insertions(+), 68 deletions(-) (limited to 'src/Text/Pandoc/Writers/HTML.hs') diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index 8de1de43f..b42d78eb0 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -29,12 +29,10 @@ Conversion of 'Pandoc' documents to HTML. -} module Text.Pandoc.Writers.HTML ( writeHtml, - stringToSmartHtml, - stringToHtml ) where import Text.Pandoc.Definition import Text.Pandoc.Shared -import Text.Html ( stringToHtmlString ) +import Text.Pandoc.Entities ( encodeEntities ) import Text.Regex ( mkRegex, matchRegex ) import Numeric ( showHex ) import Data.Char ( ord, toLower ) @@ -115,61 +113,6 @@ obfuscateChar char = obfuscateString :: String -> String obfuscateString = concatMap obfuscateChar --- | Escape string, preserving character entities and quote. -stringToHtml :: String -> String -stringToHtml str = escapePreservingRegex stringToHtmlString - (mkRegex "\"|(&[[:alnum:]]*;)") str - --- | Escape string as in 'stringToHtml' but add smart typography filter. -stringToSmartHtml :: String -> String -stringToSmartHtml = - let escapeDoubleQuotes = - gsub "(\"|")" "”" . -- rest are right quotes - gsub "(\"|")(&r[sd]quo;)" "”\\2" . - -- never left quo before right quo - gsub "(&l[sd]quo;)(\"|")" "\\2“" . - -- never right quo after left quo - gsub "([ \t])(\"|")" "\\1“" . - -- never right quo after space - gsub "(\"|")([^,.;:!?^) \t-])" "“\\2" . -- "word left - gsub "(\"|")('|`|‘)" "”’" . - -- right if it got through last filter - gsub "(\"|")('|`|‘)([^,.;:!?^) \t-])" "“‘\\3" . - -- "'word left - gsub "``" "“" . - gsub "''" "”" - escapeSingleQuotes = - gsub "'" "’" . -- otherwise right - gsub "'(&r[sd]quo;)" "’\\1" . -- never left quo before right quo - gsub "(&l[sd]quo;)'" "\\1‘" . -- never right quo after left quo - gsub "([ \t])'" "\\1‘" . -- never right quo after space - gsub "`" "‘" . -- ` is left - gsub "([^,.;:!?^) \t-])'" "\\1’" . -- word' right - gsub "^('|`)([^,.;:!?^) \t-])" "‘\\2" . -- 'word left - gsub "('|`)(\"|"|“|``)" "‘“" . -- '"word left - gsub "([^,.;:!?^) \t-])'(s|S)" "\\1’\\2" . -- possessive - gsub "([[:space:]])'([^,.;:!?^) \t-])" "\\1‘\\2" . -- 'word left - gsub "'([0-9][0-9](s|S))" "’\\1" -- '80s - decade abbrevs. - escapeDashes = - gsub " ?-- ?" "—" . - gsub " ?--- ?" "—" . - gsub "([0-9])--?([0-9])" "\\1–\\2" - escapeEllipses = gsub "\\.\\.\\.|\\. \\. \\." "…" in - escapeSingleQuotes . escapeDoubleQuotes . escapeDashes . - escapeEllipses . stringToHtml - --- | Escape code string as needed for HTML. -codeStringToHtml :: String -> String -codeStringToHtml [] = [] -codeStringToHtml (x:xs) = case x of - '&' -> "&" ++ codeStringToHtml xs - '<' -> "<" ++ codeStringToHtml xs - _ -> x:(codeStringToHtml xs) - --- | Escape string to HTML appropriate for attributes -attributeStringToHtml :: String -> String -attributeStringToHtml = gsub "\"" """ - -- | Returns an HTML header with appropriate bibliographic information. htmlHeader :: WriterOptions -> Meta -> String htmlHeader options (Meta title authors date) = @@ -178,12 +121,12 @@ htmlHeader options (Meta title authors date) = authortext = if (null authors) then "" else "\n" datetext = if (date == "") then "" else "\n" in + (stringToSGML options date) ++ "\" />\n" in (writerHeader options) ++ authortext ++ datetext ++ titletext ++ "\n\n" @@ -216,7 +159,7 @@ blockToHtml options (Note ref lst) = "\">↩\n" blockToHtml options (Key _ _) = "" blockToHtml options (CodeBlock str) = - "
" ++ (codeStringToHtml str) ++ "\n
\n" + "
" ++ (escapeSGML str) ++ "\n
\n" blockToHtml options (RawHtml str) = str blockToHtml options (BulletList lst) = let attribs = if (writerIncremental options) @@ -255,18 +198,17 @@ inlineToHtml options (Emph lst) = inlineToHtml options (Strong lst) = "" ++ (inlineListToHtml options lst) ++ "" inlineToHtml options (Code str) = - "" ++ (codeStringToHtml str) ++ "" -inlineToHtml options (Str str) = - if (writerSmart options) then stringToSmartHtml str else stringToHtml str -inlineToHtml options (TeX str) = (codeStringToHtml str) + "" ++ (escapeSGML str) ++ "" +inlineToHtml options (Str str) = stringToSGML options str +inlineToHtml options (TeX str) = (escapeSGML str) inlineToHtml options (HtmlInline str) = str inlineToHtml options (LineBreak) = "
\n" inlineToHtml options Space = " " inlineToHtml options (Link text (Src src tit)) = - let title = attributeStringToHtml tit in + let title = stringToSGML options tit in if (isPrefixOf "mailto:" src) then obfuscateLink options text src - else "" else ">") ++ (inlineListToHtml options text) ++ "" inlineToHtml options (Link text (Ref ref)) = @@ -274,7 +216,7 @@ inlineToHtml options (Link text (Ref ref)) = (inlineListToHtml options ref) ++ "]" -- this is what markdown does, for better or worse inlineToHtml options (Image alt (Src source tit)) = - let title = attributeStringToHtml tit + let title = stringToSGML options tit alternate = inlineListToHtml options alt in "