aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--debian/changelog2
-rw-r--r--src/Text/Pandoc/Shared.hs98
-rw-r--r--src/Text/Pandoc/Writers/Docbook.hs196
-rw-r--r--src/Text/Pandoc/Writers/HTML.hs78
-rw-r--r--tests/s5.basic.html2
-rw-r--r--tests/s5.fragment.html2
-rw-r--r--tests/s5.inserts.html2
-rw-r--r--tests/writer.docbook29
-rw-r--r--tests/writer.html48
-rw-r--r--tests/writer.smart.html44
10 files changed, 252 insertions, 249 deletions
diff --git a/debian/changelog b/debian/changelog
index 3fa92832f..0b8db6e8c 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -186,6 +186,8 @@ pandoc (0.3) unstable; urgency=low
+ Renamed 'Text/Pandoc/HtmlEntities' module to
'Text/Pandoc/Entities'. Also changed function names so as
not to be HTML-specific.
+ + Refactored SGML string escaping functions from HTML and Docbook
+ writers into Text/Pandoc/Shared. (escapeSGML, stringToSGML)
+ Removed 'BlockQuoteContext' from ParserContext, as it isn't
used anywhere.
+ Removed splitBySpace and replaced it with a general, polymorphic
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index d4687b10e..d1d40ac23 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -28,6 +28,8 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Utility functions and definitions used by the various Pandoc modules.
-}
module Text.Pandoc.Shared (
+ -- * List processing
+ splitBy,
-- * Text processing
gsub,
joinWithSep,
@@ -52,7 +54,6 @@ module Text.Pandoc.Shared (
-- * Pandoc block list processing
consolidateList,
isNoteBlock,
- splitBy,
normalizeSpaces,
compactify,
generateReference,
@@ -62,12 +63,21 @@ module Text.Pandoc.Shared (
lookupKeySrc,
refsMatch,
replaceReferenceLinks,
- replaceRefLinksBlockList
+ replaceRefLinksBlockList,
+ -- * SGML
+ escapeSGML,
+ stringToSGML,
+ inTags,
+ selfClosingTag,
+ inTagsSimple,
+ inTagsIndented
) where
import Text.Pandoc.Definition
import Text.ParserCombinators.Parsec
-import Text.Pandoc.Entities ( decodeEntities )
+import Text.Pandoc.Entities ( decodeEntities, encodeEntities )
import Text.Regex ( matchRegexAll, mkRegex, subRegex, Regex )
+import Text.PrettyPrint.HughesPJ as PP ( text, char, (<>), ($$), nest, Doc,
+ isEmpty )
import Char ( toLower )
import List ( find, groupBy )
@@ -507,3 +517,85 @@ replaceRefLinksInline keytable (Emph lst) =
replaceRefLinksInline keytable (Strong lst) =
Strong (map (replaceRefLinksInline keytable) lst)
replaceRefLinksInline keytable other = other
+
+-- | Escape string, preserving character entities and quote, and adding
+-- smart typography if specified.
+stringToSGML :: WriterOptions -> String -> String
+stringToSGML options =
+ let escapeDoubleQuotes =
+ gsub "(\"|&quot;)" "&rdquo;" . -- rest are right quotes
+ gsub "(\"|&quot;)(&r[sd]quo;)" "&rdquo;\\2" .
+ -- never left quo before right quo
+ gsub "(&l[sd]quo;)(\"|&quot;)" "\\2&ldquo;" .
+ -- never right quo after left quo
+ gsub "([ \t])(\"|&quot;)" "\\1&ldquo;" .
+ -- never right quo after space
+ gsub "(\"|&quot;)([^,.;:!?^) \t-])" "&ldquo;\\2" . -- "word left
+ gsub "(\"|&quot;)('|`|&lsquo;)" "&rdquo;&rsquo;" .
+ -- right if it got through last filter
+ gsub "(\"|&quot;)('|`|&lsquo;)([^,.;:!?^) \t-])" "&ldquo;&lsquo;\\3" .
+ -- "'word left
+ gsub "``" "&ldquo;" .
+ gsub "''" "&rdquo;"
+ escapeSingleQuotes =
+ gsub "'" "&rsquo;" . -- otherwise right
+ gsub "'(&r[sd]quo;)" "&rsquo;\\1" . -- never left quo before right quo
+ gsub "(&l[sd]quo;)'" "\\1&lsquo;" . -- never right quo after left quo
+ gsub "([ \t])'" "\\1&lsquo;" . -- never right quo after space
+ gsub "`" "&lsquo;" . -- ` is left
+ gsub "([^,.;:!?^) \t-])'" "\\1&rsquo;" . -- word' right
+ gsub "^('|`)([^,.;:!?^) \t-])" "&lsquo;\\2" . -- 'word left
+ gsub "('|`)(\"|&quot;|&ldquo;|``)" "&lsquo;&ldquo;" . -- '"word left
+ gsub "([^,.;:!?^) \t-])'(s|S)" "\\1&rsquo;\\2" . -- possessive
+ gsub "([[:space:]])'([^,.;:!?^) \t-])" "\\1&lsquo;\\2" . -- 'word left
+ gsub "'([0-9][0-9](s|S))" "&rsquo;\\1" -- '80s - decade abbrevs.
+ escapeDashes =
+ gsub " ?-- ?" "&mdash;" .
+ gsub " ?--- ?" "&mdash;" .
+ gsub "([0-9])--?([0-9])" "\\1&ndash;\\2"
+ escapeEllipses = gsub "\\.\\.\\.|\\. \\. \\." "&hellip;"
+ smartFilter = escapeSingleQuotes . escapeDoubleQuotes . escapeDashes .
+ escapeEllipses in
+ encodeEntities . (if (writerSmart options) then smartFilter else id) .
+ (escapePreservingRegex escapeSGML (mkRegex "&[[:alnum:]]*;"))
+
+-- | Escape string as needed for HTML. Entity references are not preserved.
+escapeSGML :: String -> String
+escapeSGML [] = []
+escapeSGML (x:xs) = case x of
+ '&' -> "&amp;" ++ escapeSGML xs
+ '<' -> "&lt;" ++ escapeSGML xs
+ '>' -> "&gt;" ++ escapeSGML xs
+ '"' -> "&quot;" ++ escapeSGML xs
+ _ -> x:(escapeSGML xs)
+
+-- | Return a text object with a string of formatted SGML attributes.
+attributeList :: WriterOptions -> [(String, String)] -> Doc
+attributeList options =
+ text . concatMap (\(a, b) -> " " ++ stringToSGML options a ++ "=\"" ++
+ stringToSGML options b ++ "\"")
+
+-- | Put the supplied contents between start and end tags of tagType,
+-- with specified attributes and (if specified) indentation.
+inTags:: Bool -> WriterOptions -> String -> [(String, String)] -> Doc -> Doc
+inTags isIndented options tagType attribs contents =
+ let openTag = PP.char '<' <> text tagType <> attributeList options attribs <>
+ PP.char '>'
+ closeTag = text "</" <> text tagType <> PP.char '>' in
+ if isIndented
+ then openTag $$ nest 2 contents $$ closeTag
+ else openTag <> contents <> closeTag
+
+-- | Return a self-closing tag of tagType with specified attributes
+selfClosingTag :: WriterOptions -> String -> [(String, String)] -> Doc
+selfClosingTag options tagType attribs =
+ PP.char '<' <> text tagType <> attributeList options attribs <> text " />"
+
+-- | Put the supplied contents between start and end tags of tagType.
+inTagsSimple :: WriterOptions -> String -> Doc -> Doc
+inTagsSimple options tagType = inTags False options tagType []
+
+-- | Put the supplied contents in indented block btw start and end tags.
+inTagsIndented :: WriterOptions -> String -> Doc -> Doc
+inTagsIndented options tagType = inTags True options tagType []
+
diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs
index 86dbbf6db..0fa4a1d98 100644
--- a/src/Text/Pandoc/Writers/Docbook.hs
+++ b/src/Text/Pandoc/Writers/Docbook.hs
@@ -32,9 +32,7 @@ module Text.Pandoc.Writers.Docbook (
) where
import Text.Pandoc.Definition
import Text.Pandoc.Shared
-import Text.Pandoc.Writers.HTML ( stringToSmartHtml, stringToHtml )
import Text.Pandoc.Entities ( encodeEntities )
-import Text.Html ( stringToHtmlString )
import Text.Regex ( mkRegex, matchRegex )
import Data.Char ( toLower, ord )
import Data.List ( isPrefixOf, partition )
@@ -54,19 +52,21 @@ hierarchicalize :: [Block] -> [Element]
hierarchicalize [] = []
hierarchicalize (block:rest) =
case block of
- (Header level title) -> let (thisSection, rest') = break (headerAtLeast level) rest in
- (Sec title (hierarchicalize thisSection)):(hierarchicalize rest')
+ (Header level title) -> let (thisSection, rest') = break (headerAtLeast
+ level) rest in
+ (Sec title (hierarchicalize thisSection)):
+ (hierarchicalize rest')
x -> (Blk x):(hierarchicalize rest)
-- | Convert list of authors to a docbook <author> section
authorToDocbook :: WriterOptions -> [Char] -> Doc
-authorToDocbook options name = indentedInTags "author" $
+authorToDocbook opts name = inTagsIndented opts "author" $
if ',' `elem` name
then -- last name first
let (lastname, rest) = break (==',') name
firstname = removeLeadingSpace rest in
- inTags "firstname" (text $ stringToXML options firstname) <>
- inTags "surname" (text $ stringToXML options lastname)
+ inTagsSimple opts "firstname" (text $ stringToSGML opts firstname) <>
+ inTagsSimple opts "surname" (text $ stringToSGML opts lastname)
else -- last name last
let namewords = words name
lengthname = length namewords
@@ -74,101 +74,82 @@ authorToDocbook options name = indentedInTags "author" $
0 -> ("","")
1 -> ("", name)
n -> (joinWithSep " " (take (n-1) namewords), last namewords) in
- inTags "firstname" (text $ stringToXML options firstname) $$
- inTags "surname" (text $ stringToXML options lastname)
+ inTagsSimple opts "firstname" (text $ stringToSGML opts firstname) $$
+ inTagsSimple opts "surname" (text $ stringToSGML opts lastname)
-- | Convert Pandoc document to string in Docbook format.
writeDocbook :: WriterOptions -> Pandoc -> String
-writeDocbook options (Pandoc (Meta title authors date) blocks) =
- let head = if (writerStandalone options)
- then text (writerHeader options)
+writeDocbook opts (Pandoc (Meta title authors date) blocks) =
+ let head = if (writerStandalone opts)
+ then text (writerHeader opts)
else empty
- meta = if (writerStandalone options)
- then indentedInTags "articleinfo" $
- (inTags "title" (inlinesToDocbook options title)) $$
- (vcat (map (authorToDocbook options) authors)) $$
- (inTags "date" (text date))
+ meta = if (writerStandalone opts)
+ then inTagsIndented opts "articleinfo" $
+ (inTagsSimple opts "title" (inlinesToDocbook opts title)) $$
+ (vcat (map (authorToDocbook opts) authors)) $$
+ (inTagsSimple opts "date" (text $ stringToSGML opts date))
else empty
blocks' = replaceReferenceLinks blocks
(noteBlocks, blocks'') = partition isNoteBlock blocks'
- options' = options {writerNotes = noteBlocks}
+ opts' = opts {writerNotes = noteBlocks}
elements = hierarchicalize blocks''
- body = text (writerIncludeBefore options') <>
- vcat (map (elementToDocbook options') elements) $$
- text (writerIncludeAfter options')
- body' = if writerStandalone options'
- then indentedInTags "article" (meta $$ body)
+ body = text (writerIncludeBefore opts') <>
+ vcat (map (elementToDocbook opts') elements) $$
+ text (writerIncludeAfter opts')
+ body' = if writerStandalone opts'
+ then inTagsIndented opts "article" (meta $$ body)
else body in
render $ head $$ body' <> text "\n"
--- | Put the supplied contents between start and end tags of tagType,
--- with specified attributes.
-inTagsWithAttrib :: String -> [(String, String)] -> Doc -> Doc
-inTagsWithAttrib tagType attribs contents = text ("<" ++ tagType ++
- (concatMap (\(a, b) -> " " ++ attributeStringToXML a ++
- "=\"" ++ attributeStringToXML b ++ "\"") attribs)) <>
- if isEmpty contents
- then text " />" -- self-closing tag
- else text ">" <> contents <> text ("</" ++ tagType ++ ">")
-
--- | Put the supplied contents between start and end tags of tagType.
-inTags :: String -> Doc -> Doc
-inTags tagType contents = inTagsWithAttrib tagType [] contents
-
--- | Put the supplied contents in indented block btw start and end tags.
-indentedInTags :: [Char] -> Doc -> Doc
-indentedInTags tagType contents = text ("<" ++ tagType ++ ">") $$
- nest 2 contents $$ text ("</" ++ tagType ++ ">")
-
-- | Convert an Element to Docbook.
elementToDocbook :: WriterOptions -> Element -> Doc
-elementToDocbook options (Blk block) = blockToDocbook options block
-elementToDocbook options (Sec title elements) =
+elementToDocbook opts (Blk block) = blockToDocbook opts block
+elementToDocbook opts (Sec title elements) =
-- Docbook doesn't allow sections with no content, so insert some if needed
let elements' = if null elements
then [Blk (Para [])]
else elements in
- indentedInTags "section" $
- inTags "title" (wrap options title) $$
- vcat (map (elementToDocbook options) elements')
+ inTagsIndented opts "section" $
+ inTagsSimple opts "title" (wrap opts title) $$
+ vcat (map (elementToDocbook opts) elements')
-- | Convert a list of Pandoc blocks to Docbook.
blocksToDocbook :: WriterOptions -> [Block] -> Doc
-blocksToDocbook options = vcat . map (blockToDocbook options)
+blocksToDocbook opts = vcat . map (blockToDocbook opts)
-- | Convert a list of lists of blocks to a list of Docbook list items.
listItemsToDocbook :: WriterOptions -> [[Block]] -> Doc
-listItemsToDocbook options items =
- vcat $ map (listItemToDocbook options) items
+listItemsToDocbook opts items =
+ vcat $ map (listItemToDocbook opts) items
-- | Convert a list of blocks into a Docbook list item.
listItemToDocbook :: WriterOptions -> [Block] -> Doc
-listItemToDocbook options item =
+listItemToDocbook opts item =
let plainToPara (Plain x) = Para x
plainToPara y = y in
let item' = map plainToPara item in
- indentedInTags "listitem" (blocksToDocbook options item')
+ inTagsIndented opts "listitem" (blocksToDocbook opts item')
-- | Convert a Pandoc block element to Docbook.
blockToDocbook :: WriterOptions -> Block -> Doc
-blockToDocbook options Blank = text ""
-blockToDocbook options Null = empty
-blockToDocbook options (Plain lst) = wrap options lst
-blockToDocbook options (Para lst) =
- indentedInTags "para" (wrap options lst)
-blockToDocbook options (BlockQuote blocks) =
- indentedInTags "blockquote" (blocksToDocbook options blocks)
-blockToDocbook options (CodeBlock str) =
- text "<screen>\n" <> text (codeStringToXML str) <> text "\n</screen>"
-blockToDocbook options (BulletList lst) =
- indentedInTags "itemizedlist" $ listItemsToDocbook options lst
-blockToDocbook options (OrderedList lst) =
- indentedInTags "orderedlist" $ listItemsToDocbook options lst
-blockToDocbook options (RawHtml str) = text str -- raw XML block
-blockToDocbook options HorizontalRule = empty -- not semantic
-blockToDocbook options (Note _ _) = empty -- shouldn't occur
-blockToDocbook options (Key _ _) = empty -- shouldn't occur
-blockToDocbook options _ = indentedInTags "para" (text "Unknown block type")
+blockToDocbook opts Blank = text ""
+blockToDocbook opts Null = empty
+blockToDocbook opts (Plain lst) = wrap opts lst
+blockToDocbook opts (Para lst) =
+ inTagsIndented opts "para" (wrap opts lst)
+blockToDocbook opts (BlockQuote blocks) =
+ inTagsIndented opts "blockquote" (blocksToDocbook opts blocks)
+blockToDocbook opts (CodeBlock str) =
+ text "<screen>\n" <> text (escapeSGML str) <> text "\n</screen>"
+blockToDocbook opts (BulletList lst) =
+ inTagsIndented opts "itemizedlist" $ listItemsToDocbook opts lst
+blockToDocbook opts (OrderedList lst) =
+ inTagsIndented opts "orderedlist" $ listItemsToDocbook opts lst
+blockToDocbook opts (RawHtml str) = text str -- raw XML block
+blockToDocbook opts HorizontalRule = empty -- not semantic
+blockToDocbook opts (Note _ _) = empty -- shouldn't occur
+blockToDocbook opts (Key _ _) = empty -- shouldn't occur
+blockToDocbook opts _ = inTagsIndented opts "para" (text "Unknown block type")
-- | Put string in CDATA section
cdata :: String -> Doc
@@ -176,62 +157,47 @@ cdata str = text $ "<![CDATA[" ++ str ++ "]]>"
-- | Take list of inline elements and return wrapped doc.
wrap :: WriterOptions -> [Inline] -> Doc
-wrap options lst = fsep $ map (inlinesToDocbook options) (splitBy Space lst)
-
--- | Escape a string for XML (with "smart" option if specified).
-stringToXML :: WriterOptions -> String -> String
-stringToXML options = encodeEntities .
- (if writerSmart options
- then stringToSmartHtml
- else stringToHtml)
-
--- | Escape string to XML appropriate for attributes
-attributeStringToXML :: String -> String
-attributeStringToXML = gsub "\"" "&quot;" . codeStringToXML
-
--- | Escape a literal string for XML.
-codeStringToXML :: String -> String
-codeStringToXML = encodeEntities . gsub "<" "&lt;" . gsub "&" "&amp;"
+wrap opts lst = fsep $ map (inlinesToDocbook opts) (splitBy Space lst)
-- | Convert a list of inline elements to Docbook.
inlinesToDocbook :: WriterOptions -> [Inline] -> Doc
-inlinesToDocbook options lst = hcat (map (inlineToDocbook options) lst)
+inlinesToDocbook opts lst = hcat (map (inlineToDocbook opts) lst)
-- | Convert an inline element to Docbook.
inlineToDocbook :: WriterOptions -> Inline -> Doc
-inlineToDocbook options (Str str) = text $ stringToXML options str
-inlineToDocbook options (Emph lst) =
- inTags "emphasis" (inlinesToDocbook options lst)
-inlineToDocbook options (Strong lst) =
- inTagsWithAttrib "emphasis" [("role", "strong")]
- (inlinesToDocbook options lst)
-inlineToDocbook options (Code str) =
- inTags "literal" $ text (codeStringToXML str)
-inlineToDocbook options (TeX str) = inlineToDocbook options (Code str)
-inlineToDocbook options (HtmlInline str) = empty
-inlineToDocbook options LineBreak =
+inlineToDocbook opts (Str str) = text $ stringToSGML opts str
+inlineToDocbook opts (Emph lst) =
+ inTagsSimple opts "emphasis" (inlinesToDocbook opts lst)
+inlineToDocbook opts (Strong lst) =
+ inTags False opts "emphasis" [("role", "strong")]
+ (inlinesToDocbook opts lst)
+inlineToDocbook opts (Code str) =
+ inTagsSimple opts "literal" $ text (escapeSGML str)
+inlineToDocbook opts (TeX str) = inlineToDocbook opts (Code str)
+inlineToDocbook opts (HtmlInline str) = empty
+inlineToDocbook opts LineBreak =
text $ "<literallayout></literallayout>"
-inlineToDocbook options Space = char ' '
-inlineToDocbook options (Link txt (Src src tit)) =
+inlineToDocbook opts Space = char ' '
+inlineToDocbook opts (Link txt (Src src tit)) =
case (matchRegex (mkRegex "mailto:(.*)") src) of
- Just [addr] -> inTags "email" $ text (codeStringToXML addr)
- Nothing -> inTagsWithAttrib "ulink" [("url", src)] $
- inlinesToDocbook options txt
-inlineToDocbook options (Link text (Ref ref)) = empty -- shouldn't occur
-inlineToDocbook options (Image alt (Src src tit)) =
+ Just [addr] -> inTagsSimple opts "email" $ text (escapeSGML addr)
+ Nothing -> inTags False opts "ulink" [("url", src)] $
+ inlinesToDocbook opts txt
+inlineToDocbook opts (Link text (Ref ref)) = empty -- shouldn't occur
+inlineToDocbook opts (Image alt (Src src tit)) =
let titleDoc = if null tit
then empty
- else indentedInTags "objectinfo" $
- indentedInTags "title"
- (text $ stringToXML options tit) in
- indentedInTags "inlinemediaobject" $
- indentedInTags "imageobject" $
- titleDoc $$ inTagsWithAttrib "imagedata" [("fileref", src)] empty
-inlineToDocbook options (Image alternate (Ref ref)) = empty --shouldn't occur
-inlineToDocbook options (NoteRef ref) =
- let notes = writerNotes options
+ else inTagsIndented opts "objectinfo" $
+ inTagsIndented opts "title"
+ (text $ stringToSGML opts tit) in
+ inTagsIndented opts "inlinemediaobject" $
+ inTagsIndented opts "imageobject" $
+ titleDoc $$ selfClosingTag opts "imagedata" [("fileref", src)]
+inlineToDocbook opts (Image alternate (Ref ref)) = empty --shouldn't occur
+inlineToDocbook opts (NoteRef ref) =
+ let notes = writerNotes opts
hits = filter (\(Note r _) -> r == ref) notes in
if null hits
then empty
else let (Note _ contents) = head hits in
- indentedInTags "footnote" $ blocksToDocbook options contents
+ inTagsIndented opts "footnote" $ blocksToDocbook opts contents
diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
index 8de1de43f..b42d78eb0 100644
--- a/src/Text/Pandoc/Writers/HTML.hs
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -29,12 +29,10 @@ Conversion of 'Pandoc' documents to HTML.
-}
module Text.Pandoc.Writers.HTML (
writeHtml,
- stringToSmartHtml,
- stringToHtml
) where
import Text.Pandoc.Definition
import Text.Pandoc.Shared
-import Text.Html ( stringToHtmlString )
+import Text.Pandoc.Entities ( encodeEntities )
import Text.Regex ( mkRegex, matchRegex )
import Numeric ( showHex )
import Data.Char ( ord, toLower )
@@ -115,61 +113,6 @@ obfuscateChar char =
obfuscateString :: String -> String
obfuscateString = concatMap obfuscateChar
--- | Escape string, preserving character entities and quote.
-stringToHtml :: String -> String
-stringToHtml str = escapePreservingRegex stringToHtmlString
- (mkRegex "\"|(&[[:alnum:]]*;)") str
-
--- | Escape string as in 'stringToHtml' but add smart typography filter.
-stringToSmartHtml :: String -> String
-stringToSmartHtml =
- let escapeDoubleQuotes =
- gsub "(\"|&quot;)" "&rdquo;" . -- rest are right quotes
- gsub "(\"|&quot;)(&r[sd]quo;)" "&rdquo;\\2" .
- -- never left quo before right quo
- gsub "(&l[sd]quo;)(\"|&quot;)" "\\2&ldquo;" .
- -- never right quo after left quo
- gsub "([ \t])(\"|&quot;)" "\\1&ldquo;" .
- -- never right quo after space
- gsub "(\"|&quot;)([^,.;:!?^) \t-])" "&ldquo;\\2" . -- "word left
- gsub "(\"|&quot;)('|`|&lsquo;)" "&rdquo;&rsquo;" .
- -- right if it got through last filter
- gsub "(\"|&quot;)('|`|&lsquo;)([^,.;:!?^) \t-])" "&ldquo;&lsquo;\\3" .
- -- "'word left
- gsub "``" "&ldquo;" .
- gsub "''" "&rdquo;"
- escapeSingleQuotes =
- gsub "'" "&rsquo;" . -- otherwise right
- gsub "'(&r[sd]quo;)" "&rsquo;\\1" . -- never left quo before right quo
- gsub "(&l[sd]quo;)'" "\\1&lsquo;" . -- never right quo after left quo
- gsub "([ \t])'" "\\1&lsquo;" . -- never right quo after space
- gsub "`" "&lsquo;" . -- ` is left
- gsub "([^,.;:!?^) \t-])'" "\\1&rsquo;" . -- word' right
- gsub "^('|`)([^,.;:!?^) \t-])" "&lsquo;\\2" . -- 'word left
- gsub "('|`)(\"|&quot;|&ldquo;|``)" "&lsquo;&ldquo;" . -- '"word left
- gsub "([^,.;:!?^) \t-])'(s|S)" "\\1&rsquo;\\2" . -- possessive
- gsub "([[:space:]])'([^,.;:!?^) \t-])" "\\1&lsquo;\\2" . -- 'word left
- gsub "'([0-9][0-9](s|S))" "&rsquo;\\1" -- '80s - decade abbrevs.
- escapeDashes =
- gsub " ?-- ?" "&mdash;" .
- gsub " ?--- ?" "&mdash;" .
- gsub "([0-9])--?([0-9])" "\\1&ndash;\\2"
- escapeEllipses = gsub "\\.\\.\\.|\\. \\. \\." "&hellip;" in
- escapeSingleQuotes . escapeDoubleQuotes . escapeDashes .
- escapeEllipses . stringToHtml
-
--- | Escape code string as needed for HTML.
-codeStringToHtml :: String -> String
-codeStringToHtml [] = []
-codeStringToHtml (x:xs) = case x of
- '&' -> "&amp;" ++ codeStringToHtml xs
- '<' -> "&lt;" ++ codeStringToHtml xs
- _ -> x:(codeStringToHtml xs)
-
--- | Escape string to HTML appropriate for attributes
-attributeStringToHtml :: String -> String
-attributeStringToHtml = gsub "\"" "&quot;"
-
-- | Returns an HTML header with appropriate bibliographic information.
htmlHeader :: WriterOptions -> Meta -> String
htmlHeader options (Meta title authors date) =
@@ -178,12 +121,12 @@ htmlHeader options (Meta title authors date) =
authortext = if (null authors)
then ""
else "<meta name=\"author\" content=\"" ++
- (joinWithSep ", " (map stringToHtml authors)) ++
+ (joinWithSep ", " (map (stringToSGML options) authors)) ++
"\" />\n"
datetext = if (date == "")
then ""
else "<meta name=\"date\" content=\"" ++
- (stringToHtml date) ++ "\" />\n" in
+ (stringToSGML options date) ++ "\" />\n" in
(writerHeader options) ++ authortext ++ datetext ++ titletext ++
"</head>\n<body>\n"
@@ -216,7 +159,7 @@ blockToHtml options (Note ref lst) =
"\">&#8617;</a></li>\n"
blockToHtml options (Key _ _) = ""
blockToHtml options (CodeBlock str) =
- "<pre><code>" ++ (codeStringToHtml str) ++ "\n</code></pre>\n"
+ "<pre><code>" ++ (escapeSGML str) ++ "\n</code></pre>\n"
blockToHtml options (RawHtml str) = str
blockToHtml options (BulletList lst) =
let attribs = if (writerIncremental options)
@@ -255,18 +198,17 @@ inlineToHtml options (Emph lst) =
inlineToHtml options (Strong lst) =
"<strong>" ++ (inlineListToHtml options lst) ++ "</strong>"
inlineToHtml options (Code str) =
- "<code>" ++ (codeStringToHtml str) ++ "</code>"
-inlineToHtml options (Str str) =
- if (writerSmart options) then stringToSmartHtml str else stringToHtml str
-inlineToHtml options (TeX str) = (codeStringToHtml str)
+ "<code>" ++ (escapeSGML str) ++ "</code>"
+inlineToHtml options (Str str) = stringToSGML options str
+inlineToHtml options (TeX str) = (escapeSGML str)
inlineToHtml options (HtmlInline str) = str
inlineToHtml options (LineBreak) = "<br />\n"
inlineToHtml options Space = " "
inlineToHtml options (Link text (Src src tit)) =
- let title = attributeStringToHtml tit in
+ let title = stringToSGML options tit in
if (isPrefixOf "mailto:" src)
then obfuscateLink options text src
- else "<a href=\"" ++ (codeStringToHtml src) ++ "\"" ++
+ else "<a href=\"" ++ (escapeSGML src) ++ "\"" ++
(if tit /= "" then " title=\"" ++ title ++ "\">" else ">") ++
(inlineListToHtml options text) ++ "</a>"
inlineToHtml options (Link text (Ref ref)) =
@@ -274,7 +216,7 @@ inlineToHtml options (Link text (Ref ref)) =
(inlineListToHtml options ref) ++ "]"
-- this is what markdown does, for better or worse
inlineToHtml options (Image alt (Src source tit)) =
- let title = attributeStringToHtml tit
+ let title = stringToSGML options tit
alternate = inlineListToHtml options alt in
"<img src=\"" ++ source ++ "\"" ++
(if tit /= "" then " title=\"" ++ title ++ "\"" else "") ++
diff --git a/tests/s5.basic.html b/tests/s5.basic.html
index ac06eabd0..50ce30968 100644
--- a/tests/s5.basic.html
+++ b/tests/s5.basic.html
@@ -768,7 +768,7 @@ window.onresize = function(){setTimeout('fontScale()', 50);}</script>
<div class="slide">
<h1>Smarty</h1>
<ul class="incremental">
-<li>"Hello there"</li>
+<li>&quot;Hello there&quot;</li>
<li>Here's a -- dash</li>
<li>And 'ellipses'...</li>
</ul>
diff --git a/tests/s5.fragment.html b/tests/s5.fragment.html
index 370b9c111..c40f2514e 100644
--- a/tests/s5.fragment.html
+++ b/tests/s5.fragment.html
@@ -6,7 +6,7 @@
<h1>Smarty</h1>
<blockquote>
<ul>
-<li>"Hello there"</li>
+<li>&quot;Hello there&quot;</li>
<li>Here's a -- dash</li>
<li>And 'ellipses'...</li>
</ul>
diff --git a/tests/s5.inserts.html b/tests/s5.inserts.html
index 7be33a2c8..c7e544409 100644
--- a/tests/s5.inserts.html
+++ b/tests/s5.inserts.html
@@ -21,7 +21,7 @@ STUFF INSERTED
<h1>Smarty</h1>
<blockquote>
<ul>
-<li>"Hello there"</li>
+<li>&quot;Hello there&quot;</li>
<li>Here's a -- dash</li>
<li>And 'ellipses'...</li>
</ul>
diff --git a/tests/writer.docbook b/tests/writer.docbook
index 3cf7b6bcf..150b63bac 100644
--- a/tests/writer.docbook
+++ b/tests/writer.docbook
@@ -89,7 +89,7 @@
</para>
<screen>
sub status {
- print "working";
+ print &quot;working&quot;;
}
</screen>
<para>
@@ -133,7 +133,7 @@ sub status {
</para>
<screen>
sub status {
- print "working";
+ print &quot;working&quot;;
}
</screen>
</blockquote>
@@ -177,7 +177,7 @@ sub status {
---- (should be four hyphens)
sub status {
- print "working";
+ print &quot;working&quot;;
}
this code block is indented by one tab
@@ -188,7 +188,7 @@ this code block is indented by one tab
<screen>
this code block is indented by two tabs
-These should not be escaped: \$ \\ \> \[ \{
+These should not be escaped: \$ \\ \&gt; \[ \{
</screen>
</section>
<section>
@@ -577,15 +577,16 @@ These should not be escaped: \$ \\ \> \[ \{
word.
</para>
<para>
- This is code: <literal>></literal>, <literal>$</literal>,
+ This is code: <literal>&gt;</literal>, <literal>$</literal>,
<literal>\</literal>, <literal>\$</literal>,
- <literal>&lt;html></literal>.
+ <literal>&lt;html&gt;</literal>.
</para>
</section>
<section>
<title>Smart quotes, ellipses, dashes</title>
<para>
- "Hello," said the spider. "'Shelob' is my name."
+ &quot;Hello,&quot; said the spider. &quot;'Shelob' is my
+ name.&quot;
</para>
<para>
'A', 'B', and 'C' are letters.
@@ -594,11 +595,11 @@ These should not be escaped: \$ \\ \> \[ \{
'Oak,' 'elm,' and 'beech' are names of trees. So is 'pine.'
</para>
<para>
- 'He said, "I want to go."' Were you alive in the 70's?
+ 'He said, &quot;I want to go.&quot;' Were you alive in the 70's?
</para>
<para>
Here is some quoted '<literal>code</literal>' and a
- "<ulink url="http://example.com/?foo=1&amp;bar=2">quoted link</ulink>".
+ &quot;<ulink url="http://example.com/?foo=1&amp;bar=2">quoted link</ulink>&quot;.
</para>
<para>
Some dashes: one---two --- three--four -- five.
@@ -672,7 +673,7 @@ These should not be escaped: \$ \\ \> \[ \{
<listitem>
<para>
$22,000 is a <emphasis>lot</emphasis> of money. So is $34,000. (It
- worked if "lot" is emphasized.)
+ worked if &quot;lot&quot; is emphasized.)
</para>
</listitem>
<listitem>
@@ -909,17 +910,17 @@ Cat &amp; 1 \\ \hline
</blockquote>
<para>
Auto-links should not occur here:
- <literal>&lt;http://example.com/></literal>
+ <literal>&lt;http://example.com/&gt;</literal>
</para>
<screen>
-or here: &lt;http://example.com/>
+or here: &lt;http://example.com/&gt;
</screen>
</section>
</section>
<section>
<title>Images</title>
<para>
- From "Voyage dans la Lune" by Georges Melies (1902):
+ From &quot;Voyage dans la Lune&quot; by Georges Melies (1902):
</para>
<para>
<inlinemediaobject>
@@ -963,7 +964,7 @@ or here: &lt;http://example.com/>
footnote (as with list items).
</para>
<screen>
- { &lt;code> }
+ { &lt;code&gt; }
</screen>
<para>
If you want, you can indent every line, but you can also be lazy
diff --git a/tests/writer.html b/tests/writer.html
index 191b1982e..8915a172c 100644
--- a/tests/writer.html
+++ b/tests/writer.html
@@ -39,7 +39,7 @@ here.</p>
<blockquote>
<p>Code in a block quote:</p>
<pre><code>sub status {
- print "working";
+ print &quot;working&quot;;
}
</code></pre>
<p>A list:</p>
@@ -60,7 +60,7 @@ here.</p>
<blockquote>
<p>Example:</p>
<pre><code>sub status {
- print "working";
+ print &quot;working&quot;;
}
</code></pre>
</blockquote>
@@ -84,7 +84,7 @@ here.</p>
<pre><code>---- (should be four hyphens)
sub status {
- print "working";
+ print &quot;working&quot;;
}
this code block is indented by one tab
@@ -92,7 +92,7 @@ this code block is indented by one tab
<p>And:</p>
<pre><code> this code block is indented by two tabs
-These should not be escaped: \$ \\ \> \[ \{
+These should not be escaped: \$ \\ \&gt; \[ \{
</code></pre>
<hr />
<h1>Lists</h1>
@@ -255,12 +255,12 @@ These should not be escaped: \$ \\ \> \[ \{
<div>
foo</div>
<p>This should be a code block, though:</p>
-<pre><code>&lt;div>
+<pre><code>&lt;div&gt;
foo
-&lt;/div>
+&lt;/div&gt;
</code></pre>
<p>As should this:</p>
-<pre><code>&lt;div>foo&lt;/div>
+<pre><code>&lt;div&gt;foo&lt;/div&gt;
</code></pre>
<p>Now, nested:</p>
<div>
@@ -281,12 +281,12 @@ Blah
This is another comment.
-->
<p>Code block:</p>
-<pre><code>&lt;!-- Comment -->
+<pre><code>&lt;!-- Comment --&gt;
</code></pre>
<p>Just plain comment, with trailing spaces on the line:</p>
<!-- foo -->
<p>Code:</p>
-<pre><code>&lt;hr />
+<pre><code>&lt;hr /&gt;
</code></pre>
<p>Hr's:</p>
<hr>
@@ -315,14 +315,14 @@ Blah
<p>So is <strong><em>this</em></strong> word.</p>
<p><strong><em>This is strong and em.</em></strong></p>
<p>So is <strong><em>this</em></strong> word.</p>
-<p>This is code: <code>></code>, <code>$</code>, <code>\</code>, <code>\$</code>, <code>&lt;html></code>.</p>
+<p>This is code: <code>&gt;</code>, <code>$</code>, <code>\</code>, <code>\$</code>, <code>&lt;html&gt;</code>.</p>
<hr />
<h1>Smart quotes, ellipses, dashes</h1>
-<p>"Hello," said the spider. "'Shelob' is my name."</p>
+<p>&quot;Hello,&quot; said the spider. &quot;'Shelob' is my name.&quot;</p>
<p>'A', 'B', and 'C' are letters.</p>
<p>'Oak,' 'elm,' and 'beech' are names of trees. So is 'pine.'</p>
-<p>'He said, "I want to go."' Were you alive in the 70's?</p>
-<p>Here is some quoted '<code>code</code>' and a "<a href="http://example.com/?foo=1&amp;bar=2">quoted link</a>".</p>
+<p>'He said, &quot;I want to go.&quot;' Were you alive in the 70's?</p>
+<p>Here is some quoted '<code>code</code>' and a &quot;<a href="http://example.com/?foo=1&amp;bar=2">quoted link</a>&quot;.</p>
<p>Some dashes: one---two --- three--four -- five.</p>
<p>Dashes between numbers: 5-7, 255-66, 1987-1999.</p>
<p>Ellipses...and. . .and . . . .</p>
@@ -342,7 +342,7 @@ Blah
<p>These shouldn't be math:</p>
<ul>
<li>To get the famous equation, write <code>$e = mc^2$</code>.</li>
-<li>$22,000 is a <em>lot</em> of money. So is $34,000. (It worked if "lot" is emphasized.)</li>
+<li>$22,000 is a <em>lot</em> of money. So is $34,000. (It worked if &quot;lot&quot; is emphasized.)</li>
<li>Escaped <code>$</code>: $73 <em>this should be emphasized</em> 23$.</li>
</ul>
<p>Here's a LaTeX table:</p>
@@ -355,11 +355,11 @@ Cat &amp; 1 \\ \hline
<h1>Special Characters</h1>
<p>Here is some unicode:</p>
<ul>
-<li>I hat: Î</li>
-<li>o umlaut: ö</li>
-<li>section: §</li>
-<li>set membership: ∈</li>
-<li>copyright: ©</li>
+<li>I hat: &Icirc;</li>
+<li>o umlaut: &ouml;</li>
+<li>section: &sect;</li>
+<li>set membership: &isin;</li>
+<li>copyright: &copy;</li>
</ul>
<p>AT&amp;T has an ampersand in their name.</p>
<p>AT&amp;T is another way to write it.</p>
@@ -414,7 +414,7 @@ document.write('<a h'+'ref'+'="ma'+'ilto'+':'+e+'">'+'Email link'+'<\/'+'a'+'>')
<p>Foo <a href="/url/" title="Title with &quot;quote&quot; inside">biz</a>.</p>
<h2>With ampersands</h2>
<p>Here's a <a href="http://example.com/?foo=1&amp;bar=2">link with an ampersand in the URL</a>.</p>
-<p>Here's a link with an amersand in the link text: <a href="http://att.com/" title="AT&T">AT&amp;T</a>.</p>
+<p>Here's a link with an amersand in the link text: <a href="http://att.com/" title="AT&amp;T">AT&amp;T</a>.</p>
<p>Here's an <a href="/script?foo=1&amp;bar=2">inline link</a>.</p>
<p>Here's an <a href="/script?foo=1&amp;bar=2">inline link in pointy braces</a>.</p>
<h2>Autolinks</h2>
@@ -433,12 +433,12 @@ document.write('<a h'+'ref'+'="ma'+'ilto'+':'+e+'">'+e+'<\/'+'a'+'>');
<blockquote>
<p>Blockquoted: <a href="http://example.com/">http://example.com/</a></p>
</blockquote>
-<p>Auto-links should not occur here: <code>&lt;http://example.com/></code></p>
-<pre><code>or here: &lt;http://example.com/>
+<p>Auto-links should not occur here: <code>&lt;http://example.com/&gt;</code></p>
+<pre><code>or here: &lt;http://example.com/&gt;
</code></pre>
<hr />
<h1>Images</h1>
-<p>From "Voyage dans la Lune" by Georges Melies (1902):</p>
+<p>From &quot;Voyage dans la Lune&quot; by Georges Melies (1902):</p>
<p><img src="lalune.jpg" title="Voyage dans la Lune" alt="lalune"></p>
<p>Here is a movie <img src="movie.jpg" alt="movie"> icon.</p>
<hr />
@@ -458,7 +458,7 @@ document.write('<a h'+'ref'+'="ma'+'ilto'+':'+e+'">'+e+'<\/'+'a'+'>');
<a href="#fnref1" class="footnoteBacklink" title="Jump back to footnote 1">&#8617;</a></li>
<li id="fn2"><p>Here's the long note. This one contains multiple blocks.</p>
<p>Subsequent blocks are indented to show that they belong to the footnote (as with list items).</p>
-<pre><code> { &lt;code> }
+<pre><code> { &lt;code&gt; }
</code></pre>
<p>If you want, you can indent every line, but you can also be lazy and just indent the first line of each block.</p>
<a href="#fnref2" class="footnoteBacklink" title="Jump back to footnote 2">&#8617;</a></li>
diff --git a/tests/writer.smart.html b/tests/writer.smart.html
index c14a6de54..14b70e2fe 100644
--- a/tests/writer.smart.html
+++ b/tests/writer.smart.html
@@ -39,7 +39,7 @@ here.</p>
<blockquote>
<p>Code in a block quote:</p>
<pre><code>sub status {
- print "working";
+ print &quot;working&quot;;
}
</code></pre>
<p>A list:</p>
@@ -60,7 +60,7 @@ here.</p>
<blockquote>
<p>Example:</p>
<pre><code>sub status {
- print "working";
+ print &quot;working&quot;;
}
</code></pre>
</blockquote>
@@ -84,7 +84,7 @@ here.</p>
<pre><code>---- (should be four hyphens)
sub status {
- print "working";
+ print &quot;working&quot;;
}
this code block is indented by one tab
@@ -92,7 +92,7 @@ this code block is indented by one tab
<p>And:</p>
<pre><code> this code block is indented by two tabs
-These should not be escaped: \$ \\ \> \[ \{
+These should not be escaped: \$ \\ \&gt; \[ \{
</code></pre>
<hr />
<h1>Lists</h1>
@@ -255,12 +255,12 @@ These should not be escaped: \$ \\ \> \[ \{
<div>
foo</div>
<p>This should be a code block, though:</p>
-<pre><code>&lt;div>
+<pre><code>&lt;div&gt;
foo
-&lt;/div>
+&lt;/div&gt;
</code></pre>
<p>As should this:</p>
-<pre><code>&lt;div>foo&lt;/div>
+<pre><code>&lt;div&gt;foo&lt;/div&gt;
</code></pre>
<p>Now, nested:</p>
<div>
@@ -281,12 +281,12 @@ Blah
This is another comment.
-->
<p>Code block:</p>
-<pre><code>&lt;!-- Comment -->
+<pre><code>&lt;!-- Comment --&gt;
</code></pre>
<p>Just plain comment, with trailing spaces on the line:</p>
<!-- foo -->
<p>Code:</p>
-<pre><code>&lt;hr />
+<pre><code>&lt;hr /&gt;
</code></pre>
<p>Hr&rsquo;s:</p>
<hr>
@@ -315,7 +315,7 @@ Blah
<p>So is <strong><em>this</em></strong> word.</p>
<p><strong><em>This is strong and em.</em></strong></p>
<p>So is <strong><em>this</em></strong> word.</p>
-<p>This is code: <code>></code>, <code>$</code>, <code>\</code>, <code>\$</code>, <code>&lt;html></code>.</p>
+<p>This is code: <code>&gt;</code>, <code>$</code>, <code>\</code>, <code>\$</code>, <code>&lt;html&gt;</code>.</p>
<hr />
<h1>Smart quotes, ellipses, dashes</h1>
<p>&ldquo;Hello,&rdquo; said the spider. &ldquo;&lsquo;Shelob&rsquo; is my name.&rdquo;</p>
@@ -355,11 +355,11 @@ Cat &amp; 1 \\ \hline
<h1>Special Characters</h1>
<p>Here is some unicode:</p>
<ul>
-<li>I hat: Î</li>
-<li>o umlaut: ö</li>
-<li>section: §</li>
-<li>set membership: ∈</li>
-<li>copyright: ©</li>
+<li>I hat: &Icirc;</li>
+<li>o umlaut: &ouml;</li>
+<li>section: &sect;</li>
+<li>set membership: &isin;</li>
+<li>copyright: &copy;</li>
</ul>
<p>AT&amp;T has an ampersand in their name.</p>
<p>AT&amp;T is another way to write it.</p>
@@ -389,7 +389,7 @@ Cat &amp; 1 \\ \hline
<p><a href="/url/" title="title">URL and title</a>.</p>
<p><a href="/url/" title="title preceded by two spaces">URL and title</a>.</p>
<p><a href="/url/" title="title preceded by a tab">URL and title</a>.</p>
-<p><a href="/url/" title="title with &quot;quotes&quot; in it">URL and title</a></p>
+<p><a href="/url/" title="title with &ldquo;quotes&rdquo; in it">URL and title</a></p>
<p><a href="/url/" title="title with single quotes">URL and title</a></p>
<p><script type="text/javascript">
<!--
@@ -410,11 +410,11 @@ document.write('<a h'+'ref'+'="ma'+'ilto'+':'+e+'">'+'Email link'+'<\/'+'a'+'>')
<p>This should [not][] be a link.</p>
<pre><code>[not]: /url
</code></pre>
-<p>Foo <a href="/url/" title="Title with &quot;quotes&quot; inside">bar</a>.</p>
-<p>Foo <a href="/url/" title="Title with &quot;quote&quot; inside">biz</a>.</p>
+<p>Foo <a href="/url/" title="Title with &ldquo;quotes&rdquo; inside">bar</a>.</p>
+<p>Foo <a href="/url/" title="Title with &ldquo;quote&rdquo; inside">biz</a>.</p>
<h2>With ampersands</h2>
<p>Here&rsquo;s a <a href="http://example.com/?foo=1&amp;bar=2">link with an ampersand in the URL</a>.</p>
-<p>Here&rsquo;s a link with an amersand in the link text: <a href="http://att.com/" title="AT&T">AT&amp;T</a>.</p>
+<p>Here&rsquo;s a link with an amersand in the link text: <a href="http://att.com/" title="AT&amp;T">AT&amp;T</a>.</p>
<p>Here&rsquo;s an <a href="/script?foo=1&amp;bar=2">inline link</a>.</p>
<p>Here&rsquo;s an <a href="/script?foo=1&amp;bar=2">inline link in pointy braces</a>.</p>
<h2>Autolinks</h2>
@@ -433,8 +433,8 @@ document.write('<a h'+'ref'+'="ma'+'ilto'+':'+e+'">'+e+'<\/'+'a'+'>');
<blockquote>
<p>Blockquoted: <a href="http://example.com/">http://example.com/</a></p>
</blockquote>
-<p>Auto-links should not occur here: <code>&lt;http://example.com/></code></p>
-<pre><code>or here: &lt;http://example.com/>
+<p>Auto-links should not occur here: <code>&lt;http://example.com/&gt;</code></p>
+<pre><code>or here: &lt;http://example.com/&gt;
</code></pre>
<hr />
<h1>Images</h1>
@@ -458,7 +458,7 @@ document.write('<a h'+'ref'+'="ma'+'ilto'+':'+e+'">'+e+'<\/'+'a'+'>');
<a href="#fnref1" class="footnoteBacklink" title="Jump back to footnote 1">&#8617;</a></li>
<li id="fn2"><p>Here&rsquo;s the long note. This one contains multiple blocks.</p>
<p>Subsequent blocks are indented to show that they belong to the footnote (as with list items).</p>
-<pre><code> { &lt;code> }
+<pre><code> { &lt;code&gt; }
</code></pre>
<p>If you want, you can indent every line, but you can also be lazy and just indent the first line of each block.</p>
<a href="#fnref2" class="footnoteBacklink" title="Jump back to footnote 2">&#8617;</a></li>