aboutsummaryrefslogtreecommitdiff
path: root/src/Text
diff options
context:
space:
mode:
authorfiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>2007-01-04 22:52:16 +0000
committerfiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>2007-01-04 22:52:16 +0000
commit030d94e1c3cd4be0ab9d7c16fccfa973cedb5d38 (patch)
tree325452ca83818bc0776e257f44342776f9bff78d /src/Text
parent24f3710e0911fa8ebe6070ef83c6206e54a46f1a (diff)
downloadpandoc-030d94e1c3cd4be0ab9d7c16fccfa973cedb5d38.tar.gz
Refactored SGML escaping functions and "in tag" functions to
Text/Shared/Pandoc. (escapeSGML, stringToSGML, inTag, inTagSimple, inTagIndented, selfClosingTag) These can be used by both the HTML and Docbook writers. git-svn-id: https://pandoc.googlecode.com/svn/trunk@417 788f1e2b-df1e-0410-8736-df70ead52e1b
Diffstat (limited to 'src/Text')
-rw-r--r--src/Text/Pandoc/Shared.hs98
-rw-r--r--src/Text/Pandoc/Writers/Docbook.hs196
-rw-r--r--src/Text/Pandoc/Writers/HTML.hs78
3 files changed, 186 insertions, 186 deletions
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index d4687b10e..d1d40ac23 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -28,6 +28,8 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Utility functions and definitions used by the various Pandoc modules.
-}
module Text.Pandoc.Shared (
+ -- * List processing
+ splitBy,
-- * Text processing
gsub,
joinWithSep,
@@ -52,7 +54,6 @@ module Text.Pandoc.Shared (
-- * Pandoc block list processing
consolidateList,
isNoteBlock,
- splitBy,
normalizeSpaces,
compactify,
generateReference,
@@ -62,12 +63,21 @@ module Text.Pandoc.Shared (
lookupKeySrc,
refsMatch,
replaceReferenceLinks,
- replaceRefLinksBlockList
+ replaceRefLinksBlockList,
+ -- * SGML
+ escapeSGML,
+ stringToSGML,
+ inTags,
+ selfClosingTag,
+ inTagsSimple,
+ inTagsIndented
) where
import Text.Pandoc.Definition
import Text.ParserCombinators.Parsec
-import Text.Pandoc.Entities ( decodeEntities )
+import Text.Pandoc.Entities ( decodeEntities, encodeEntities )
import Text.Regex ( matchRegexAll, mkRegex, subRegex, Regex )
+import Text.PrettyPrint.HughesPJ as PP ( text, char, (<>), ($$), nest, Doc,
+ isEmpty )
import Char ( toLower )
import List ( find, groupBy )
@@ -507,3 +517,85 @@ replaceRefLinksInline keytable (Emph lst) =
replaceRefLinksInline keytable (Strong lst) =
Strong (map (replaceRefLinksInline keytable) lst)
replaceRefLinksInline keytable other = other
+
+-- | Escape string, preserving character entities and quote, and adding
+-- smart typography if specified.
+stringToSGML :: WriterOptions -> String -> String
+stringToSGML options =
+ let escapeDoubleQuotes =
+ gsub "(\"|&quot;)" "&rdquo;" . -- rest are right quotes
+ gsub "(\"|&quot;)(&r[sd]quo;)" "&rdquo;\\2" .
+ -- never left quo before right quo
+ gsub "(&l[sd]quo;)(\"|&quot;)" "\\2&ldquo;" .
+ -- never right quo after left quo
+ gsub "([ \t])(\"|&quot;)" "\\1&ldquo;" .
+ -- never right quo after space
+ gsub "(\"|&quot;)([^,.;:!?^) \t-])" "&ldquo;\\2" . -- "word left
+ gsub "(\"|&quot;)('|`|&lsquo;)" "&rdquo;&rsquo;" .
+ -- right if it got through last filter
+ gsub "(\"|&quot;)('|`|&lsquo;)([^,.;:!?^) \t-])" "&ldquo;&lsquo;\\3" .
+ -- "'word left
+ gsub "``" "&ldquo;" .
+ gsub "''" "&rdquo;"
+ escapeSingleQuotes =
+ gsub "'" "&rsquo;" . -- otherwise right
+ gsub "'(&r[sd]quo;)" "&rsquo;\\1" . -- never left quo before right quo
+ gsub "(&l[sd]quo;)'" "\\1&lsquo;" . -- never right quo after left quo
+ gsub "([ \t])'" "\\1&lsquo;" . -- never right quo after space
+ gsub "`" "&lsquo;" . -- ` is left
+ gsub "([^,.;:!?^) \t-])'" "\\1&rsquo;" . -- word' right
+ gsub "^('|`)([^,.;:!?^) \t-])" "&lsquo;\\2" . -- 'word left
+ gsub "('|`)(\"|&quot;|&ldquo;|``)" "&lsquo;&ldquo;" . -- '"word left
+ gsub "([^,.;:!?^) \t-])'(s|S)" "\\1&rsquo;\\2" . -- possessive
+ gsub "([[:space:]])'([^,.;:!?^) \t-])" "\\1&lsquo;\\2" . -- 'word left
+ gsub "'([0-9][0-9](s|S))" "&rsquo;\\1" -- '80s - decade abbrevs.
+ escapeDashes =
+ gsub " ?-- ?" "&mdash;" .
+ gsub " ?--- ?" "&mdash;" .
+ gsub "([0-9])--?([0-9])" "\\1&ndash;\\2"
+ escapeEllipses = gsub "\\.\\.\\.|\\. \\. \\." "&hellip;"
+ smartFilter = escapeSingleQuotes . escapeDoubleQuotes . escapeDashes .
+ escapeEllipses in
+ encodeEntities . (if (writerSmart options) then smartFilter else id) .
+ (escapePreservingRegex escapeSGML (mkRegex "&[[:alnum:]]*;"))
+
+-- | Escape string as needed for HTML. Entity references are not preserved.
+escapeSGML :: String -> String
+escapeSGML [] = []
+escapeSGML (x:xs) = case x of
+ '&' -> "&amp;" ++ escapeSGML xs
+ '<' -> "&lt;" ++ escapeSGML xs
+ '>' -> "&gt;" ++ escapeSGML xs
+ '"' -> "&quot;" ++ escapeSGML xs
+ _ -> x:(escapeSGML xs)
+
+-- | Return a text object with a string of formatted SGML attributes.
+attributeList :: WriterOptions -> [(String, String)] -> Doc
+attributeList options =
+ text . concatMap (\(a, b) -> " " ++ stringToSGML options a ++ "=\"" ++
+ stringToSGML options b ++ "\"")
+
+-- | Put the supplied contents between start and end tags of tagType,
+-- with specified attributes and (if specified) indentation.
+inTags:: Bool -> WriterOptions -> String -> [(String, String)] -> Doc -> Doc
+inTags isIndented options tagType attribs contents =
+ let openTag = PP.char '<' <> text tagType <> attributeList options attribs <>
+ PP.char '>'
+ closeTag = text "</" <> text tagType <> PP.char '>' in
+ if isIndented
+ then openTag $$ nest 2 contents $$ closeTag
+ else openTag <> contents <> closeTag
+
+-- | Return a self-closing tag of tagType with specified attributes
+selfClosingTag :: WriterOptions -> String -> [(String, String)] -> Doc
+selfClosingTag options tagType attribs =
+ PP.char '<' <> text tagType <> attributeList options attribs <> text " />"
+
+-- | Put the supplied contents between start and end tags of tagType.
+inTagsSimple :: WriterOptions -> String -> Doc -> Doc
+inTagsSimple options tagType = inTags False options tagType []
+
+-- | Put the supplied contents in indented block btw start and end tags.
+inTagsIndented :: WriterOptions -> String -> Doc -> Doc
+inTagsIndented options tagType = inTags True options tagType []
+
diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs
index 86dbbf6db..0fa4a1d98 100644
--- a/src/Text/Pandoc/Writers/Docbook.hs
+++ b/src/Text/Pandoc/Writers/Docbook.hs
@@ -32,9 +32,7 @@ module Text.Pandoc.Writers.Docbook (
) where
import Text.Pandoc.Definition
import Text.Pandoc.Shared
-import Text.Pandoc.Writers.HTML ( stringToSmartHtml, stringToHtml )
import Text.Pandoc.Entities ( encodeEntities )
-import Text.Html ( stringToHtmlString )
import Text.Regex ( mkRegex, matchRegex )
import Data.Char ( toLower, ord )
import Data.List ( isPrefixOf, partition )
@@ -54,19 +52,21 @@ hierarchicalize :: [Block] -> [Element]
hierarchicalize [] = []
hierarchicalize (block:rest) =
case block of
- (Header level title) -> let (thisSection, rest') = break (headerAtLeast level) rest in
- (Sec title (hierarchicalize thisSection)):(hierarchicalize rest')
+ (Header level title) -> let (thisSection, rest') = break (headerAtLeast
+ level) rest in
+ (Sec title (hierarchicalize thisSection)):
+ (hierarchicalize rest')
x -> (Blk x):(hierarchicalize rest)
-- | Convert list of authors to a docbook <author> section
authorToDocbook :: WriterOptions -> [Char] -> Doc
-authorToDocbook options name = indentedInTags "author" $
+authorToDocbook opts name = inTagsIndented opts "author" $
if ',' `elem` name
then -- last name first
let (lastname, rest) = break (==',') name
firstname = removeLeadingSpace rest in
- inTags "firstname" (text $ stringToXML options firstname) <>
- inTags "surname" (text $ stringToXML options lastname)
+ inTagsSimple opts "firstname" (text $ stringToSGML opts firstname) <>
+ inTagsSimple opts "surname" (text $ stringToSGML opts lastname)
else -- last name last
let namewords = words name
lengthname = length namewords
@@ -74,101 +74,82 @@ authorToDocbook options name = indentedInTags "author" $
0 -> ("","")
1 -> ("", name)
n -> (joinWithSep " " (take (n-1) namewords), last namewords) in
- inTags "firstname" (text $ stringToXML options firstname) $$
- inTags "surname" (text $ stringToXML options lastname)
+ inTagsSimple opts "firstname" (text $ stringToSGML opts firstname) $$
+ inTagsSimple opts "surname" (text $ stringToSGML opts lastname)
-- | Convert Pandoc document to string in Docbook format.
writeDocbook :: WriterOptions -> Pandoc -> String
-writeDocbook options (Pandoc (Meta title authors date) blocks) =
- let head = if (writerStandalone options)
- then text (writerHeader options)
+writeDocbook opts (Pandoc (Meta title authors date) blocks) =
+ let head = if (writerStandalone opts)
+ then text (writerHeader opts)
else empty
- meta = if (writerStandalone options)
- then indentedInTags "articleinfo" $
- (inTags "title" (inlinesToDocbook options title)) $$
- (vcat (map (authorToDocbook options) authors)) $$
- (inTags "date" (text date))
+ meta = if (writerStandalone opts)
+ then inTagsIndented opts "articleinfo" $
+ (inTagsSimple opts "title" (inlinesToDocbook opts title)) $$
+ (vcat (map (authorToDocbook opts) authors)) $$
+ (inTagsSimple opts "date" (text $ stringToSGML opts date))
else empty
blocks' = replaceReferenceLinks blocks
(noteBlocks, blocks'') = partition isNoteBlock blocks'
- options' = options {writerNotes = noteBlocks}
+ opts' = opts {writerNotes = noteBlocks}
elements = hierarchicalize blocks''
- body = text (writerIncludeBefore options') <>
- vcat (map (elementToDocbook options') elements) $$
- text (writerIncludeAfter options')
- body' = if writerStandalone options'
- then indentedInTags "article" (meta $$ body)
+ body = text (writerIncludeBefore opts') <>
+ vcat (map (elementToDocbook opts') elements) $$
+ text (writerIncludeAfter opts')
+ body' = if writerStandalone opts'
+ then inTagsIndented opts "article" (meta $$ body)
else body in
render $ head $$ body' <> text "\n"
--- | Put the supplied contents between start and end tags of tagType,
--- with specified attributes.
-inTagsWithAttrib :: String -> [(String, String)] -> Doc -> Doc
-inTagsWithAttrib tagType attribs contents = text ("<" ++ tagType ++
- (concatMap (\(a, b) -> " " ++ attributeStringToXML a ++
- "=\"" ++ attributeStringToXML b ++ "\"") attribs)) <>
- if isEmpty contents
- then text " />" -- self-closing tag
- else text ">" <> contents <> text ("</" ++ tagType ++ ">")
-
--- | Put the supplied contents between start and end tags of tagType.
-inTags :: String -> Doc -> Doc
-inTags tagType contents = inTagsWithAttrib tagType [] contents
-
--- | Put the supplied contents in indented block btw start and end tags.
-indentedInTags :: [Char] -> Doc -> Doc
-indentedInTags tagType contents = text ("<" ++ tagType ++ ">") $$
- nest 2 contents $$ text ("</" ++ tagType ++ ">")
-
-- | Convert an Element to Docbook.
elementToDocbook :: WriterOptions -> Element -> Doc
-elementToDocbook options (Blk block) = blockToDocbook options block
-elementToDocbook options (Sec title elements) =
+elementToDocbook opts (Blk block) = blockToDocbook opts block
+elementToDocbook opts (Sec title elements) =
-- Docbook doesn't allow sections with no content, so insert some if needed
let elements' = if null elements
then [Blk (Para [])]
else elements in
- indentedInTags "section" $
- inTags "title" (wrap options title) $$
- vcat (map (elementToDocbook options) elements')
+ inTagsIndented opts "section" $
+ inTagsSimple opts "title" (wrap opts title) $$
+ vcat (map (elementToDocbook opts) elements')
-- | Convert a list of Pandoc blocks to Docbook.
blocksToDocbook :: WriterOptions -> [Block] -> Doc
-blocksToDocbook options = vcat . map (blockToDocbook options)
+blocksToDocbook opts = vcat . map (blockToDocbook opts)
-- | Convert a list of lists of blocks to a list of Docbook list items.
listItemsToDocbook :: WriterOptions -> [[Block]] -> Doc
-listItemsToDocbook options items =
- vcat $ map (listItemToDocbook options) items
+listItemsToDocbook opts items =
+ vcat $ map (listItemToDocbook opts) items
-- | Convert a list of blocks into a Docbook list item.
listItemToDocbook :: WriterOptions -> [Block] -> Doc
-listItemToDocbook options item =
+listItemToDocbook opts item =
let plainToPara (Plain x) = Para x
plainToPara y = y in
let item' = map plainToPara item in
- indentedInTags "listitem" (blocksToDocbook options item')
+ inTagsIndented opts "listitem" (blocksToDocbook opts item')
-- | Convert a Pandoc block element to Docbook.
blockToDocbook :: WriterOptions -> Block -> Doc
-blockToDocbook options Blank = text ""
-blockToDocbook options Null = empty
-blockToDocbook options (Plain lst) = wrap options lst
-blockToDocbook options (Para lst) =
- indentedInTags "para" (wrap options lst)
-blockToDocbook options (BlockQuote blocks) =
- indentedInTags "blockquote" (blocksToDocbook options blocks)
-blockToDocbook options (CodeBlock str) =
- text "<screen>\n" <> text (codeStringToXML str) <> text "\n</screen>"
-blockToDocbook options (BulletList lst) =
- indentedInTags "itemizedlist" $ listItemsToDocbook options lst
-blockToDocbook options (OrderedList lst) =
- indentedInTags "orderedlist" $ listItemsToDocbook options lst
-blockToDocbook options (RawHtml str) = text str -- raw XML block
-blockToDocbook options HorizontalRule = empty -- not semantic
-blockToDocbook options (Note _ _) = empty -- shouldn't occur
-blockToDocbook options (Key _ _) = empty -- shouldn't occur
-blockToDocbook options _ = indentedInTags "para" (text "Unknown block type")
+blockToDocbook opts Blank = text ""
+blockToDocbook opts Null = empty
+blockToDocbook opts (Plain lst) = wrap opts lst
+blockToDocbook opts (Para lst) =
+ inTagsIndented opts "para" (wrap opts lst)
+blockToDocbook opts (BlockQuote blocks) =
+ inTagsIndented opts "blockquote" (blocksToDocbook opts blocks)
+blockToDocbook opts (CodeBlock str) =
+ text "<screen>\n" <> text (escapeSGML str) <> text "\n</screen>"
+blockToDocbook opts (BulletList lst) =
+ inTagsIndented opts "itemizedlist" $ listItemsToDocbook opts lst
+blockToDocbook opts (OrderedList lst) =
+ inTagsIndented opts "orderedlist" $ listItemsToDocbook opts lst
+blockToDocbook opts (RawHtml str) = text str -- raw XML block
+blockToDocbook opts HorizontalRule = empty -- not semantic
+blockToDocbook opts (Note _ _) = empty -- shouldn't occur
+blockToDocbook opts (Key _ _) = empty -- shouldn't occur
+blockToDocbook opts _ = inTagsIndented opts "para" (text "Unknown block type")
-- | Put string in CDATA section
cdata :: String -> Doc
@@ -176,62 +157,47 @@ cdata str = text $ "<![CDATA[" ++ str ++ "]]>"
-- | Take list of inline elements and return wrapped doc.
wrap :: WriterOptions -> [Inline] -> Doc
-wrap options lst = fsep $ map (inlinesToDocbook options) (splitBy Space lst)
-
--- | Escape a string for XML (with "smart" option if specified).
-stringToXML :: WriterOptions -> String -> String
-stringToXML options = encodeEntities .
- (if writerSmart options
- then stringToSmartHtml
- else stringToHtml)
-
--- | Escape string to XML appropriate for attributes
-attributeStringToXML :: String -> String
-attributeStringToXML = gsub "\"" "&quot;" . codeStringToXML
-
--- | Escape a literal string for XML.
-codeStringToXML :: String -> String
-codeStringToXML = encodeEntities . gsub "<" "&lt;" . gsub "&" "&amp;"
+wrap opts lst = fsep $ map (inlinesToDocbook opts) (splitBy Space lst)
-- | Convert a list of inline elements to Docbook.
inlinesToDocbook :: WriterOptions -> [Inline] -> Doc
-inlinesToDocbook options lst = hcat (map (inlineToDocbook options) lst)
+inlinesToDocbook opts lst = hcat (map (inlineToDocbook opts) lst)
-- | Convert an inline element to Docbook.
inlineToDocbook :: WriterOptions -> Inline -> Doc
-inlineToDocbook options (Str str) = text $ stringToXML options str
-inlineToDocbook options (Emph lst) =
- inTags "emphasis" (inlinesToDocbook options lst)
-inlineToDocbook options (Strong lst) =
- inTagsWithAttrib "emphasis" [("role", "strong")]
- (inlinesToDocbook options lst)
-inlineToDocbook options (Code str) =
- inTags "literal" $ text (codeStringToXML str)
-inlineToDocbook options (TeX str) = inlineToDocbook options (Code str)
-inlineToDocbook options (HtmlInline str) = empty
-inlineToDocbook options LineBreak =
+inlineToDocbook opts (Str str) = text $ stringToSGML opts str
+inlineToDocbook opts (Emph lst) =
+ inTagsSimple opts "emphasis" (inlinesToDocbook opts lst)
+inlineToDocbook opts (Strong lst) =
+ inTags False opts "emphasis" [("role", "strong")]
+ (inlinesToDocbook opts lst)
+inlineToDocbook opts (Code str) =
+ inTagsSimple opts "literal" $ text (escapeSGML str)
+inlineToDocbook opts (TeX str) = inlineToDocbook opts (Code str)
+inlineToDocbook opts (HtmlInline str) = empty
+inlineToDocbook opts LineBreak =
text $ "<literallayout></literallayout>"
-inlineToDocbook options Space = char ' '
-inlineToDocbook options (Link txt (Src src tit)) =
+inlineToDocbook opts Space = char ' '
+inlineToDocbook opts (Link txt (Src src tit)) =
case (matchRegex (mkRegex "mailto:(.*)") src) of
- Just [addr] -> inTags "email" $ text (codeStringToXML addr)
- Nothing -> inTagsWithAttrib "ulink" [("url", src)] $
- inlinesToDocbook options txt
-inlineToDocbook options (Link text (Ref ref)) = empty -- shouldn't occur
-inlineToDocbook options (Image alt (Src src tit)) =
+ Just [addr] -> inTagsSimple opts "email" $ text (escapeSGML addr)
+ Nothing -> inTags False opts "ulink" [("url", src)] $
+ inlinesToDocbook opts txt
+inlineToDocbook opts (Link text (Ref ref)) = empty -- shouldn't occur
+inlineToDocbook opts (Image alt (Src src tit)) =
let titleDoc = if null tit
then empty
- else indentedInTags "objectinfo" $
- indentedInTags "title"
- (text $ stringToXML options tit) in
- indentedInTags "inlinemediaobject" $
- indentedInTags "imageobject" $
- titleDoc $$ inTagsWithAttrib "imagedata" [("fileref", src)] empty
-inlineToDocbook options (Image alternate (Ref ref)) = empty --shouldn't occur
-inlineToDocbook options (NoteRef ref) =
- let notes = writerNotes options
+ else inTagsIndented opts "objectinfo" $
+ inTagsIndented opts "title"
+ (text $ stringToSGML opts tit) in
+ inTagsIndented opts "inlinemediaobject" $
+ inTagsIndented opts "imageobject" $
+ titleDoc $$ selfClosingTag opts "imagedata" [("fileref", src)]
+inlineToDocbook opts (Image alternate (Ref ref)) = empty --shouldn't occur
+inlineToDocbook opts (NoteRef ref) =
+ let notes = writerNotes opts
hits = filter (\(Note r _) -> r == ref) notes in
if null hits
then empty
else let (Note _ contents) = head hits in
- indentedInTags "footnote" $ blocksToDocbook options contents
+ inTagsIndented opts "footnote" $ blocksToDocbook opts contents
diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
index 8de1de43f..b42d78eb0 100644
--- a/src/Text/Pandoc/Writers/HTML.hs
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -29,12 +29,10 @@ Conversion of 'Pandoc' documents to HTML.
-}
module Text.Pandoc.Writers.HTML (
writeHtml,
- stringToSmartHtml,
- stringToHtml
) where
import Text.Pandoc.Definition
import Text.Pandoc.Shared
-import Text.Html ( stringToHtmlString )
+import Text.Pandoc.Entities ( encodeEntities )
import Text.Regex ( mkRegex, matchRegex )
import Numeric ( showHex )
import Data.Char ( ord, toLower )
@@ -115,61 +113,6 @@ obfuscateChar char =
obfuscateString :: String -> String
obfuscateString = concatMap obfuscateChar
--- | Escape string, preserving character entities and quote.
-stringToHtml :: String -> String
-stringToHtml str = escapePreservingRegex stringToHtmlString
- (mkRegex "\"|(&[[:alnum:]]*;)") str
-
--- | Escape string as in 'stringToHtml' but add smart typography filter.
-stringToSmartHtml :: String -> String
-stringToSmartHtml =
- let escapeDoubleQuotes =
- gsub "(\"|&quot;)" "&rdquo;" . -- rest are right quotes
- gsub "(\"|&quot;)(&r[sd]quo;)" "&rdquo;\\2" .
- -- never left quo before right quo
- gsub "(&l[sd]quo;)(\"|&quot;)" "\\2&ldquo;" .
- -- never right quo after left quo
- gsub "([ \t])(\"|&quot;)" "\\1&ldquo;" .
- -- never right quo after space
- gsub "(\"|&quot;)([^,.;:!?^) \t-])" "&ldquo;\\2" . -- "word left
- gsub "(\"|&quot;)('|`|&lsquo;)" "&rdquo;&rsquo;" .
- -- right if it got through last filter
- gsub "(\"|&quot;)('|`|&lsquo;)([^,.;:!?^) \t-])" "&ldquo;&lsquo;\\3" .
- -- "'word left
- gsub "``" "&ldquo;" .
- gsub "''" "&rdquo;"
- escapeSingleQuotes =
- gsub "'" "&rsquo;" . -- otherwise right
- gsub "'(&r[sd]quo;)" "&rsquo;\\1" . -- never left quo before right quo
- gsub "(&l[sd]quo;)'" "\\1&lsquo;" . -- never right quo after left quo
- gsub "([ \t])'" "\\1&lsquo;" . -- never right quo after space
- gsub "`" "&lsquo;" . -- ` is left
- gsub "([^,.;:!?^) \t-])'" "\\1&rsquo;" . -- word' right
- gsub "^('|`)([^,.;:!?^) \t-])" "&lsquo;\\2" . -- 'word left
- gsub "('|`)(\"|&quot;|&ldquo;|``)" "&lsquo;&ldquo;" . -- '"word left
- gsub "([^,.;:!?^) \t-])'(s|S)" "\\1&rsquo;\\2" . -- possessive
- gsub "([[:space:]])'([^,.;:!?^) \t-])" "\\1&lsquo;\\2" . -- 'word left
- gsub "'([0-9][0-9](s|S))" "&rsquo;\\1" -- '80s - decade abbrevs.
- escapeDashes =
- gsub " ?-- ?" "&mdash;" .
- gsub " ?--- ?" "&mdash;" .
- gsub "([0-9])--?([0-9])" "\\1&ndash;\\2"
- escapeEllipses = gsub "\\.\\.\\.|\\. \\. \\." "&hellip;" in
- escapeSingleQuotes . escapeDoubleQuotes . escapeDashes .
- escapeEllipses . stringToHtml
-
--- | Escape code string as needed for HTML.
-codeStringToHtml :: String -> String
-codeStringToHtml [] = []
-codeStringToHtml (x:xs) = case x of
- '&' -> "&amp;" ++ codeStringToHtml xs
- '<' -> "&lt;" ++ codeStringToHtml xs
- _ -> x:(codeStringToHtml xs)
-
--- | Escape string to HTML appropriate for attributes
-attributeStringToHtml :: String -> String
-attributeStringToHtml = gsub "\"" "&quot;"
-
-- | Returns an HTML header with appropriate bibliographic information.
htmlHeader :: WriterOptions -> Meta -> String
htmlHeader options (Meta title authors date) =
@@ -178,12 +121,12 @@ htmlHeader options (Meta title authors date) =
authortext = if (null authors)
then ""
else "<meta name=\"author\" content=\"" ++
- (joinWithSep ", " (map stringToHtml authors)) ++
+ (joinWithSep ", " (map (stringToSGML options) authors)) ++
"\" />\n"
datetext = if (date == "")
then ""
else "<meta name=\"date\" content=\"" ++
- (stringToHtml date) ++ "\" />\n" in
+ (stringToSGML options date) ++ "\" />\n" in
(writerHeader options) ++ authortext ++ datetext ++ titletext ++
"</head>\n<body>\n"
@@ -216,7 +159,7 @@ blockToHtml options (Note ref lst) =
"\">&#8617;</a></li>\n"
blockToHtml options (Key _ _) = ""
blockToHtml options (CodeBlock str) =
- "<pre><code>" ++ (codeStringToHtml str) ++ "\n</code></pre>\n"
+ "<pre><code>" ++ (escapeSGML str) ++ "\n</code></pre>\n"
blockToHtml options (RawHtml str) = str
blockToHtml options (BulletList lst) =
let attribs = if (writerIncremental options)
@@ -255,18 +198,17 @@ inlineToHtml options (Emph lst) =
inlineToHtml options (Strong lst) =
"<strong>" ++ (inlineListToHtml options lst) ++ "</strong>"
inlineToHtml options (Code str) =
- "<code>" ++ (codeStringToHtml str) ++ "</code>"
-inlineToHtml options (Str str) =
- if (writerSmart options) then stringToSmartHtml str else stringToHtml str
-inlineToHtml options (TeX str) = (codeStringToHtml str)
+ "<code>" ++ (escapeSGML str) ++ "</code>"
+inlineToHtml options (Str str) = stringToSGML options str
+inlineToHtml options (TeX str) = (escapeSGML str)
inlineToHtml options (HtmlInline str) = str
inlineToHtml options (LineBreak) = "<br />\n"
inlineToHtml options Space = " "
inlineToHtml options (Link text (Src src tit)) =
- let title = attributeStringToHtml tit in
+ let title = stringToSGML options tit in
if (isPrefixOf "mailto:" src)
then obfuscateLink options text src
- else "<a href=\"" ++ (codeStringToHtml src) ++ "\"" ++
+ else "<a href=\"" ++ (escapeSGML src) ++ "\"" ++
(if tit /= "" then " title=\"" ++ title ++ "\">" else ">") ++
(inlineListToHtml options text) ++ "</a>"
inlineToHtml options (Link text (Ref ref)) =
@@ -274,7 +216,7 @@ inlineToHtml options (Link text (Ref ref)) =
(inlineListToHtml options ref) ++ "]"
-- this is what markdown does, for better or worse
inlineToHtml options (Image alt (Src source tit)) =
- let title = attributeStringToHtml tit
+ let title = stringToSGML options tit
alternate = inlineListToHtml options alt in
"<img src=\"" ++ source ++ "\"" ++
(if tit /= "" then " title=\"" ++ title ++ "\"" else "") ++