diff options
-rw-r--r-- | debian/changelog | 2 | ||||
-rw-r--r-- | src/Text/Pandoc/Shared.hs | 98 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/Docbook.hs | 196 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/HTML.hs | 78 | ||||
-rw-r--r-- | tests/s5.basic.html | 2 | ||||
-rw-r--r-- | tests/s5.fragment.html | 2 | ||||
-rw-r--r-- | tests/s5.inserts.html | 2 | ||||
-rw-r--r-- | tests/writer.docbook | 29 | ||||
-rw-r--r-- | tests/writer.html | 48 | ||||
-rw-r--r-- | tests/writer.smart.html | 44 |
10 files changed, 252 insertions, 249 deletions
diff --git a/debian/changelog b/debian/changelog index 3fa92832f..0b8db6e8c 100644 --- a/debian/changelog +++ b/debian/changelog @@ -186,6 +186,8 @@ pandoc (0.3) unstable; urgency=low + Renamed 'Text/Pandoc/HtmlEntities' module to 'Text/Pandoc/Entities'. Also changed function names so as not to be HTML-specific. + + Refactored SGML string escaping functions from HTML and Docbook + writers into Text/Pandoc/Shared. (escapeSGML, stringToSGML) + Removed 'BlockQuoteContext' from ParserContext, as it isn't used anywhere. + Removed splitBySpace and replaced it with a general, polymorphic diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index d4687b10e..d1d40ac23 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -28,6 +28,8 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Utility functions and definitions used by the various Pandoc modules. -} module Text.Pandoc.Shared ( + -- * List processing + splitBy, -- * Text processing gsub, joinWithSep, @@ -52,7 +54,6 @@ module Text.Pandoc.Shared ( -- * Pandoc block list processing consolidateList, isNoteBlock, - splitBy, normalizeSpaces, compactify, generateReference, @@ -62,12 +63,21 @@ module Text.Pandoc.Shared ( lookupKeySrc, refsMatch, replaceReferenceLinks, - replaceRefLinksBlockList + replaceRefLinksBlockList, + -- * SGML + escapeSGML, + stringToSGML, + inTags, + selfClosingTag, + inTagsSimple, + inTagsIndented ) where import Text.Pandoc.Definition import Text.ParserCombinators.Parsec -import Text.Pandoc.Entities ( decodeEntities ) +import Text.Pandoc.Entities ( decodeEntities, encodeEntities ) import Text.Regex ( matchRegexAll, mkRegex, subRegex, Regex ) +import Text.PrettyPrint.HughesPJ as PP ( text, char, (<>), ($$), nest, Doc, + isEmpty ) import Char ( toLower ) import List ( find, groupBy ) @@ -507,3 +517,85 @@ replaceRefLinksInline keytable (Emph lst) = replaceRefLinksInline keytable (Strong lst) = Strong (map (replaceRefLinksInline keytable) lst) replaceRefLinksInline keytable other = other + +-- | Escape string, preserving character entities and quote, and adding +-- smart typography if specified. +stringToSGML :: WriterOptions -> String -> String +stringToSGML options = + let escapeDoubleQuotes = + gsub "(\"|")" "”" . -- rest are right quotes + gsub "(\"|")(&r[sd]quo;)" "”\\2" . + -- never left quo before right quo + gsub "(&l[sd]quo;)(\"|")" "\\2“" . + -- never right quo after left quo + gsub "([ \t])(\"|")" "\\1“" . + -- never right quo after space + gsub "(\"|")([^,.;:!?^) \t-])" "“\\2" . -- "word left + gsub "(\"|")('|`|‘)" "”’" . + -- right if it got through last filter + gsub "(\"|")('|`|‘)([^,.;:!?^) \t-])" "“‘\\3" . + -- "'word left + gsub "``" "“" . + gsub "''" "”" + escapeSingleQuotes = + gsub "'" "’" . -- otherwise right + gsub "'(&r[sd]quo;)" "’\\1" . -- never left quo before right quo + gsub "(&l[sd]quo;)'" "\\1‘" . -- never right quo after left quo + gsub "([ \t])'" "\\1‘" . -- never right quo after space + gsub "`" "‘" . -- ` is left + gsub "([^,.;:!?^) \t-])'" "\\1’" . -- word' right + gsub "^('|`)([^,.;:!?^) \t-])" "‘\\2" . -- 'word left + gsub "('|`)(\"|"|“|``)" "‘“" . -- '"word left + gsub "([^,.;:!?^) \t-])'(s|S)" "\\1’\\2" . -- possessive + gsub "([[:space:]])'([^,.;:!?^) \t-])" "\\1‘\\2" . -- 'word left + gsub "'([0-9][0-9](s|S))" "’\\1" -- '80s - decade abbrevs. + escapeDashes = + gsub " ?-- ?" "—" . + gsub " ?--- ?" "—" . + gsub "([0-9])--?([0-9])" "\\1–\\2" + escapeEllipses = gsub "\\.\\.\\.|\\. \\. \\." "…" + smartFilter = escapeSingleQuotes . escapeDoubleQuotes . escapeDashes . + escapeEllipses in + encodeEntities . (if (writerSmart options) then smartFilter else id) . + (escapePreservingRegex escapeSGML (mkRegex "&[[:alnum:]]*;")) + +-- | Escape string as needed for HTML. Entity references are not preserved. +escapeSGML :: String -> String +escapeSGML [] = [] +escapeSGML (x:xs) = case x of + '&' -> "&" ++ escapeSGML xs + '<' -> "<" ++ escapeSGML xs + '>' -> ">" ++ escapeSGML xs + '"' -> """ ++ escapeSGML xs + _ -> x:(escapeSGML xs) + +-- | Return a text object with a string of formatted SGML attributes. +attributeList :: WriterOptions -> [(String, String)] -> Doc +attributeList options = + text . concatMap (\(a, b) -> " " ++ stringToSGML options a ++ "=\"" ++ + stringToSGML options b ++ "\"") + +-- | Put the supplied contents between start and end tags of tagType, +-- with specified attributes and (if specified) indentation. +inTags:: Bool -> WriterOptions -> String -> [(String, String)] -> Doc -> Doc +inTags isIndented options tagType attribs contents = + let openTag = PP.char '<' <> text tagType <> attributeList options attribs <> + PP.char '>' + closeTag = text "</" <> text tagType <> PP.char '>' in + if isIndented + then openTag $$ nest 2 contents $$ closeTag + else openTag <> contents <> closeTag + +-- | Return a self-closing tag of tagType with specified attributes +selfClosingTag :: WriterOptions -> String -> [(String, String)] -> Doc +selfClosingTag options tagType attribs = + PP.char '<' <> text tagType <> attributeList options attribs <> text " />" + +-- | Put the supplied contents between start and end tags of tagType. +inTagsSimple :: WriterOptions -> String -> Doc -> Doc +inTagsSimple options tagType = inTags False options tagType [] + +-- | Put the supplied contents in indented block btw start and end tags. +inTagsIndented :: WriterOptions -> String -> Doc -> Doc +inTagsIndented options tagType = inTags True options tagType [] + diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs index 86dbbf6db..0fa4a1d98 100644 --- a/src/Text/Pandoc/Writers/Docbook.hs +++ b/src/Text/Pandoc/Writers/Docbook.hs @@ -32,9 +32,7 @@ module Text.Pandoc.Writers.Docbook ( ) where import Text.Pandoc.Definition import Text.Pandoc.Shared -import Text.Pandoc.Writers.HTML ( stringToSmartHtml, stringToHtml ) import Text.Pandoc.Entities ( encodeEntities ) -import Text.Html ( stringToHtmlString ) import Text.Regex ( mkRegex, matchRegex ) import Data.Char ( toLower, ord ) import Data.List ( isPrefixOf, partition ) @@ -54,19 +52,21 @@ hierarchicalize :: [Block] -> [Element] hierarchicalize [] = [] hierarchicalize (block:rest) = case block of - (Header level title) -> let (thisSection, rest') = break (headerAtLeast level) rest in - (Sec title (hierarchicalize thisSection)):(hierarchicalize rest') + (Header level title) -> let (thisSection, rest') = break (headerAtLeast + level) rest in + (Sec title (hierarchicalize thisSection)): + (hierarchicalize rest') x -> (Blk x):(hierarchicalize rest) -- | Convert list of authors to a docbook <author> section authorToDocbook :: WriterOptions -> [Char] -> Doc -authorToDocbook options name = indentedInTags "author" $ +authorToDocbook opts name = inTagsIndented opts "author" $ if ',' `elem` name then -- last name first let (lastname, rest) = break (==',') name firstname = removeLeadingSpace rest in - inTags "firstname" (text $ stringToXML options firstname) <> - inTags "surname" (text $ stringToXML options lastname) + inTagsSimple opts "firstname" (text $ stringToSGML opts firstname) <> + inTagsSimple opts "surname" (text $ stringToSGML opts lastname) else -- last name last let namewords = words name lengthname = length namewords @@ -74,101 +74,82 @@ authorToDocbook options name = indentedInTags "author" $ 0 -> ("","") 1 -> ("", name) n -> (joinWithSep " " (take (n-1) namewords), last namewords) in - inTags "firstname" (text $ stringToXML options firstname) $$ - inTags "surname" (text $ stringToXML options lastname) + inTagsSimple opts "firstname" (text $ stringToSGML opts firstname) $$ + inTagsSimple opts "surname" (text $ stringToSGML opts lastname) -- | Convert Pandoc document to string in Docbook format. writeDocbook :: WriterOptions -> Pandoc -> String -writeDocbook options (Pandoc (Meta title authors date) blocks) = - let head = if (writerStandalone options) - then text (writerHeader options) +writeDocbook opts (Pandoc (Meta title authors date) blocks) = + let head = if (writerStandalone opts) + then text (writerHeader opts) else empty - meta = if (writerStandalone options) - then indentedInTags "articleinfo" $ - (inTags "title" (inlinesToDocbook options title)) $$ - (vcat (map (authorToDocbook options) authors)) $$ - (inTags "date" (text date)) + meta = if (writerStandalone opts) + then inTagsIndented opts "articleinfo" $ + (inTagsSimple opts "title" (inlinesToDocbook opts title)) $$ + (vcat (map (authorToDocbook opts) authors)) $$ + (inTagsSimple opts "date" (text $ stringToSGML opts date)) else empty blocks' = replaceReferenceLinks blocks (noteBlocks, blocks'') = partition isNoteBlock blocks' - options' = options {writerNotes = noteBlocks} + opts' = opts {writerNotes = noteBlocks} elements = hierarchicalize blocks'' - body = text (writerIncludeBefore options') <> - vcat (map (elementToDocbook options') elements) $$ - text (writerIncludeAfter options') - body' = if writerStandalone options' - then indentedInTags "article" (meta $$ body) + body = text (writerIncludeBefore opts') <> + vcat (map (elementToDocbook opts') elements) $$ + text (writerIncludeAfter opts') + body' = if writerStandalone opts' + then inTagsIndented opts "article" (meta $$ body) else body in render $ head $$ body' <> text "\n" --- | Put the supplied contents between start and end tags of tagType, --- with specified attributes. -inTagsWithAttrib :: String -> [(String, String)] -> Doc -> Doc -inTagsWithAttrib tagType attribs contents = text ("<" ++ tagType ++ - (concatMap (\(a, b) -> " " ++ attributeStringToXML a ++ - "=\"" ++ attributeStringToXML b ++ "\"") attribs)) <> - if isEmpty contents - then text " />" -- self-closing tag - else text ">" <> contents <> text ("</" ++ tagType ++ ">") - --- | Put the supplied contents between start and end tags of tagType. -inTags :: String -> Doc -> Doc -inTags tagType contents = inTagsWithAttrib tagType [] contents - --- | Put the supplied contents in indented block btw start and end tags. -indentedInTags :: [Char] -> Doc -> Doc -indentedInTags tagType contents = text ("<" ++ tagType ++ ">") $$ - nest 2 contents $$ text ("</" ++ tagType ++ ">") - -- | Convert an Element to Docbook. elementToDocbook :: WriterOptions -> Element -> Doc -elementToDocbook options (Blk block) = blockToDocbook options block -elementToDocbook options (Sec title elements) = +elementToDocbook opts (Blk block) = blockToDocbook opts block +elementToDocbook opts (Sec title elements) = -- Docbook doesn't allow sections with no content, so insert some if needed let elements' = if null elements then [Blk (Para [])] else elements in - indentedInTags "section" $ - inTags "title" (wrap options title) $$ - vcat (map (elementToDocbook options) elements') + inTagsIndented opts "section" $ + inTagsSimple opts "title" (wrap opts title) $$ + vcat (map (elementToDocbook opts) elements') -- | Convert a list of Pandoc blocks to Docbook. blocksToDocbook :: WriterOptions -> [Block] -> Doc -blocksToDocbook options = vcat . map (blockToDocbook options) +blocksToDocbook opts = vcat . map (blockToDocbook opts) -- | Convert a list of lists of blocks to a list of Docbook list items. listItemsToDocbook :: WriterOptions -> [[Block]] -> Doc -listItemsToDocbook options items = - vcat $ map (listItemToDocbook options) items +listItemsToDocbook opts items = + vcat $ map (listItemToDocbook opts) items -- | Convert a list of blocks into a Docbook list item. listItemToDocbook :: WriterOptions -> [Block] -> Doc -listItemToDocbook options item = +listItemToDocbook opts item = let plainToPara (Plain x) = Para x plainToPara y = y in let item' = map plainToPara item in - indentedInTags "listitem" (blocksToDocbook options item') + inTagsIndented opts "listitem" (blocksToDocbook opts item') -- | Convert a Pandoc block element to Docbook. blockToDocbook :: WriterOptions -> Block -> Doc -blockToDocbook options Blank = text "" -blockToDocbook options Null = empty -blockToDocbook options (Plain lst) = wrap options lst -blockToDocbook options (Para lst) = - indentedInTags "para" (wrap options lst) -blockToDocbook options (BlockQuote blocks) = - indentedInTags "blockquote" (blocksToDocbook options blocks) -blockToDocbook options (CodeBlock str) = - text "<screen>\n" <> text (codeStringToXML str) <> text "\n</screen>" -blockToDocbook options (BulletList lst) = - indentedInTags "itemizedlist" $ listItemsToDocbook options lst -blockToDocbook options (OrderedList lst) = - indentedInTags "orderedlist" $ listItemsToDocbook options lst -blockToDocbook options (RawHtml str) = text str -- raw XML block -blockToDocbook options HorizontalRule = empty -- not semantic -blockToDocbook options (Note _ _) = empty -- shouldn't occur -blockToDocbook options (Key _ _) = empty -- shouldn't occur -blockToDocbook options _ = indentedInTags "para" (text "Unknown block type") +blockToDocbook opts Blank = text "" +blockToDocbook opts Null = empty +blockToDocbook opts (Plain lst) = wrap opts lst +blockToDocbook opts (Para lst) = + inTagsIndented opts "para" (wrap opts lst) +blockToDocbook opts (BlockQuote blocks) = + inTagsIndented opts "blockquote" (blocksToDocbook opts blocks) +blockToDocbook opts (CodeBlock str) = + text "<screen>\n" <> text (escapeSGML str) <> text "\n</screen>" +blockToDocbook opts (BulletList lst) = + inTagsIndented opts "itemizedlist" $ listItemsToDocbook opts lst +blockToDocbook opts (OrderedList lst) = + inTagsIndented opts "orderedlist" $ listItemsToDocbook opts lst +blockToDocbook opts (RawHtml str) = text str -- raw XML block +blockToDocbook opts HorizontalRule = empty -- not semantic +blockToDocbook opts (Note _ _) = empty -- shouldn't occur +blockToDocbook opts (Key _ _) = empty -- shouldn't occur +blockToDocbook opts _ = inTagsIndented opts "para" (text "Unknown block type") -- | Put string in CDATA section cdata :: String -> Doc @@ -176,62 +157,47 @@ cdata str = text $ "<![CDATA[" ++ str ++ "]]>" -- | Take list of inline elements and return wrapped doc. wrap :: WriterOptions -> [Inline] -> Doc -wrap options lst = fsep $ map (inlinesToDocbook options) (splitBy Space lst) - --- | Escape a string for XML (with "smart" option if specified). -stringToXML :: WriterOptions -> String -> String -stringToXML options = encodeEntities . - (if writerSmart options - then stringToSmartHtml - else stringToHtml) - --- | Escape string to XML appropriate for attributes -attributeStringToXML :: String -> String -attributeStringToXML = gsub "\"" """ . codeStringToXML - --- | Escape a literal string for XML. -codeStringToXML :: String -> String -codeStringToXML = encodeEntities . gsub "<" "<" . gsub "&" "&" +wrap opts lst = fsep $ map (inlinesToDocbook opts) (splitBy Space lst) -- | Convert a list of inline elements to Docbook. inlinesToDocbook :: WriterOptions -> [Inline] -> Doc -inlinesToDocbook options lst = hcat (map (inlineToDocbook options) lst) +inlinesToDocbook opts lst = hcat (map (inlineToDocbook opts) lst) -- | Convert an inline element to Docbook. inlineToDocbook :: WriterOptions -> Inline -> Doc -inlineToDocbook options (Str str) = text $ stringToXML options str -inlineToDocbook options (Emph lst) = - inTags "emphasis" (inlinesToDocbook options lst) -inlineToDocbook options (Strong lst) = - inTagsWithAttrib "emphasis" [("role", "strong")] - (inlinesToDocbook options lst) -inlineToDocbook options (Code str) = - inTags "literal" $ text (codeStringToXML str) -inlineToDocbook options (TeX str) = inlineToDocbook options (Code str) -inlineToDocbook options (HtmlInline str) = empty -inlineToDocbook options LineBreak = +inlineToDocbook opts (Str str) = text $ stringToSGML opts str +inlineToDocbook opts (Emph lst) = + inTagsSimple opts "emphasis" (inlinesToDocbook opts lst) +inlineToDocbook opts (Strong lst) = + inTags False opts "emphasis" [("role", "strong")] + (inlinesToDocbook opts lst) +inlineToDocbook opts (Code str) = + inTagsSimple opts "literal" $ text (escapeSGML str) +inlineToDocbook opts (TeX str) = inlineToDocbook opts (Code str) +inlineToDocbook opts (HtmlInline str) = empty +inlineToDocbook opts LineBreak = text $ "<literallayout></literallayout>" -inlineToDocbook options Space = char ' ' -inlineToDocbook options (Link txt (Src src tit)) = +inlineToDocbook opts Space = char ' ' +inlineToDocbook opts (Link txt (Src src tit)) = case (matchRegex (mkRegex "mailto:(.*)") src) of - Just [addr] -> inTags "email" $ text (codeStringToXML addr) - Nothing -> inTagsWithAttrib "ulink" [("url", src)] $ - inlinesToDocbook options txt -inlineToDocbook options (Link text (Ref ref)) = empty -- shouldn't occur -inlineToDocbook options (Image alt (Src src tit)) = + Just [addr] -> inTagsSimple opts "email" $ text (escapeSGML addr) + Nothing -> inTags False opts "ulink" [("url", src)] $ + inlinesToDocbook opts txt +inlineToDocbook opts (Link text (Ref ref)) = empty -- shouldn't occur +inlineToDocbook opts (Image alt (Src src tit)) = let titleDoc = if null tit then empty - else indentedInTags "objectinfo" $ - indentedInTags "title" - (text $ stringToXML options tit) in - indentedInTags "inlinemediaobject" $ - indentedInTags "imageobject" $ - titleDoc $$ inTagsWithAttrib "imagedata" [("fileref", src)] empty -inlineToDocbook options (Image alternate (Ref ref)) = empty --shouldn't occur -inlineToDocbook options (NoteRef ref) = - let notes = writerNotes options + else inTagsIndented opts "objectinfo" $ + inTagsIndented opts "title" + (text $ stringToSGML opts tit) in + inTagsIndented opts "inlinemediaobject" $ + inTagsIndented opts "imageobject" $ + titleDoc $$ selfClosingTag opts "imagedata" [("fileref", src)] +inlineToDocbook opts (Image alternate (Ref ref)) = empty --shouldn't occur +inlineToDocbook opts (NoteRef ref) = + let notes = writerNotes opts hits = filter (\(Note r _) -> r == ref) notes in if null hits then empty else let (Note _ contents) = head hits in - indentedInTags "footnote" $ blocksToDocbook options contents + inTagsIndented opts "footnote" $ blocksToDocbook opts contents diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index 8de1de43f..b42d78eb0 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -29,12 +29,10 @@ Conversion of 'Pandoc' documents to HTML. -} module Text.Pandoc.Writers.HTML ( writeHtml, - stringToSmartHtml, - stringToHtml ) where import Text.Pandoc.Definition import Text.Pandoc.Shared -import Text.Html ( stringToHtmlString ) +import Text.Pandoc.Entities ( encodeEntities ) import Text.Regex ( mkRegex, matchRegex ) import Numeric ( showHex ) import Data.Char ( ord, toLower ) @@ -115,61 +113,6 @@ obfuscateChar char = obfuscateString :: String -> String obfuscateString = concatMap obfuscateChar --- | Escape string, preserving character entities and quote. -stringToHtml :: String -> String -stringToHtml str = escapePreservingRegex stringToHtmlString - (mkRegex "\"|(&[[:alnum:]]*;)") str - --- | Escape string as in 'stringToHtml' but add smart typography filter. -stringToSmartHtml :: String -> String -stringToSmartHtml = - let escapeDoubleQuotes = - gsub "(\"|")" "”" . -- rest are right quotes - gsub "(\"|")(&r[sd]quo;)" "”\\2" . - -- never left quo before right quo - gsub "(&l[sd]quo;)(\"|")" "\\2“" . - -- never right quo after left quo - gsub "([ \t])(\"|")" "\\1“" . - -- never right quo after space - gsub "(\"|")([^,.;:!?^) \t-])" "“\\2" . -- "word left - gsub "(\"|")('|`|‘)" "”’" . - -- right if it got through last filter - gsub "(\"|")('|`|‘)([^,.;:!?^) \t-])" "“‘\\3" . - -- "'word left - gsub "``" "“" . - gsub "''" "”" - escapeSingleQuotes = - gsub "'" "’" . -- otherwise right - gsub "'(&r[sd]quo;)" "’\\1" . -- never left quo before right quo - gsub "(&l[sd]quo;)'" "\\1‘" . -- never right quo after left quo - gsub "([ \t])'" "\\1‘" . -- never right quo after space - gsub "`" "‘" . -- ` is left - gsub "([^,.;:!?^) \t-])'" "\\1’" . -- word' right - gsub "^('|`)([^,.;:!?^) \t-])" "‘\\2" . -- 'word left - gsub "('|`)(\"|"|“|``)" "‘“" . -- '"word left - gsub "([^,.;:!?^) \t-])'(s|S)" "\\1’\\2" . -- possessive - gsub "([[:space:]])'([^,.;:!?^) \t-])" "\\1‘\\2" . -- 'word left - gsub "'([0-9][0-9](s|S))" "’\\1" -- '80s - decade abbrevs. - escapeDashes = - gsub " ?-- ?" "—" . - gsub " ?--- ?" "—" . - gsub "([0-9])--?([0-9])" "\\1–\\2" - escapeEllipses = gsub "\\.\\.\\.|\\. \\. \\." "…" in - escapeSingleQuotes . escapeDoubleQuotes . escapeDashes . - escapeEllipses . stringToHtml - --- | Escape code string as needed for HTML. -codeStringToHtml :: String -> String -codeStringToHtml [] = [] -codeStringToHtml (x:xs) = case x of - '&' -> "&" ++ codeStringToHtml xs - '<' -> "<" ++ codeStringToHtml xs - _ -> x:(codeStringToHtml xs) - --- | Escape string to HTML appropriate for attributes -attributeStringToHtml :: String -> String -attributeStringToHtml = gsub "\"" """ - -- | Returns an HTML header with appropriate bibliographic information. htmlHeader :: WriterOptions -> Meta -> String htmlHeader options (Meta title authors date) = @@ -178,12 +121,12 @@ htmlHeader options (Meta title authors date) = authortext = if (null authors) then "" else "<meta name=\"author\" content=\"" ++ - (joinWithSep ", " (map stringToHtml authors)) ++ + (joinWithSep ", " (map (stringToSGML options) authors)) ++ "\" />\n" datetext = if (date == "") then "" else "<meta name=\"date\" content=\"" ++ - (stringToHtml date) ++ "\" />\n" in + (stringToSGML options date) ++ "\" />\n" in (writerHeader options) ++ authortext ++ datetext ++ titletext ++ "</head>\n<body>\n" @@ -216,7 +159,7 @@ blockToHtml options (Note ref lst) = "\">↩</a></li>\n" blockToHtml options (Key _ _) = "" blockToHtml options (CodeBlock str) = - "<pre><code>" ++ (codeStringToHtml str) ++ "\n</code></pre>\n" + "<pre><code>" ++ (escapeSGML str) ++ "\n</code></pre>\n" blockToHtml options (RawHtml str) = str blockToHtml options (BulletList lst) = let attribs = if (writerIncremental options) @@ -255,18 +198,17 @@ inlineToHtml options (Emph lst) = inlineToHtml options (Strong lst) = "<strong>" ++ (inlineListToHtml options lst) ++ "</strong>" inlineToHtml options (Code str) = - "<code>" ++ (codeStringToHtml str) ++ "</code>" -inlineToHtml options (Str str) = - if (writerSmart options) then stringToSmartHtml str else stringToHtml str -inlineToHtml options (TeX str) = (codeStringToHtml str) + "<code>" ++ (escapeSGML str) ++ "</code>" +inlineToHtml options (Str str) = stringToSGML options str +inlineToHtml options (TeX str) = (escapeSGML str) inlineToHtml options (HtmlInline str) = str inlineToHtml options (LineBreak) = "<br />\n" inlineToHtml options Space = " " inlineToHtml options (Link text (Src src tit)) = - let title = attributeStringToHtml tit in + let title = stringToSGML options tit in if (isPrefixOf "mailto:" src) then obfuscateLink options text src - else "<a href=\"" ++ (codeStringToHtml src) ++ "\"" ++ + else "<a href=\"" ++ (escapeSGML src) ++ "\"" ++ (if tit /= "" then " title=\"" ++ title ++ "\">" else ">") ++ (inlineListToHtml options text) ++ "</a>" inlineToHtml options (Link text (Ref ref)) = @@ -274,7 +216,7 @@ inlineToHtml options (Link text (Ref ref)) = (inlineListToHtml options ref) ++ "]" -- this is what markdown does, for better or worse inlineToHtml options (Image alt (Src source tit)) = - let title = attributeStringToHtml tit + let title = stringToSGML options tit alternate = inlineListToHtml options alt in "<img src=\"" ++ source ++ "\"" ++ (if tit /= "" then " title=\"" ++ title ++ "\"" else "") ++ diff --git a/tests/s5.basic.html b/tests/s5.basic.html index ac06eabd0..50ce30968 100644 --- a/tests/s5.basic.html +++ b/tests/s5.basic.html @@ -768,7 +768,7 @@ window.onresize = function(){setTimeout('fontScale()', 50);}</script> <div class="slide"> <h1>Smarty</h1> <ul class="incremental"> -<li>"Hello there"</li> +<li>"Hello there"</li> <li>Here's a -- dash</li> <li>And 'ellipses'...</li> </ul> diff --git a/tests/s5.fragment.html b/tests/s5.fragment.html index 370b9c111..c40f2514e 100644 --- a/tests/s5.fragment.html +++ b/tests/s5.fragment.html @@ -6,7 +6,7 @@ <h1>Smarty</h1> <blockquote> <ul> -<li>"Hello there"</li> +<li>"Hello there"</li> <li>Here's a -- dash</li> <li>And 'ellipses'...</li> </ul> diff --git a/tests/s5.inserts.html b/tests/s5.inserts.html index 7be33a2c8..c7e544409 100644 --- a/tests/s5.inserts.html +++ b/tests/s5.inserts.html @@ -21,7 +21,7 @@ STUFF INSERTED <h1>Smarty</h1> <blockquote> <ul> -<li>"Hello there"</li> +<li>"Hello there"</li> <li>Here's a -- dash</li> <li>And 'ellipses'...</li> </ul> diff --git a/tests/writer.docbook b/tests/writer.docbook index 3cf7b6bcf..150b63bac 100644 --- a/tests/writer.docbook +++ b/tests/writer.docbook @@ -89,7 +89,7 @@ </para> <screen> sub status { - print "working"; + print "working"; } </screen> <para> @@ -133,7 +133,7 @@ sub status { </para> <screen> sub status { - print "working"; + print "working"; } </screen> </blockquote> @@ -177,7 +177,7 @@ sub status { ---- (should be four hyphens) sub status { - print "working"; + print "working"; } this code block is indented by one tab @@ -188,7 +188,7 @@ this code block is indented by one tab <screen> this code block is indented by two tabs -These should not be escaped: \$ \\ \> \[ \{ +These should not be escaped: \$ \\ \> \[ \{ </screen> </section> <section> @@ -577,15 +577,16 @@ These should not be escaped: \$ \\ \> \[ \{ word. </para> <para> - This is code: <literal>></literal>, <literal>$</literal>, + This is code: <literal>></literal>, <literal>$</literal>, <literal>\</literal>, <literal>\$</literal>, - <literal><html></literal>. + <literal><html></literal>. </para> </section> <section> <title>Smart quotes, ellipses, dashes</title> <para> - "Hello," said the spider. "'Shelob' is my name." + "Hello," said the spider. "'Shelob' is my + name." </para> <para> 'A', 'B', and 'C' are letters. @@ -594,11 +595,11 @@ These should not be escaped: \$ \\ \> \[ \{ 'Oak,' 'elm,' and 'beech' are names of trees. So is 'pine.' </para> <para> - 'He said, "I want to go."' Were you alive in the 70's? + 'He said, "I want to go."' Were you alive in the 70's? </para> <para> Here is some quoted '<literal>code</literal>' and a - "<ulink url="http://example.com/?foo=1&bar=2">quoted link</ulink>". + "<ulink url="http://example.com/?foo=1&bar=2">quoted link</ulink>". </para> <para> Some dashes: one---two --- three--four -- five. @@ -672,7 +673,7 @@ These should not be escaped: \$ \\ \> \[ \{ <listitem> <para> $22,000 is a <emphasis>lot</emphasis> of money. So is $34,000. (It - worked if "lot" is emphasized.) + worked if "lot" is emphasized.) </para> </listitem> <listitem> @@ -909,17 +910,17 @@ Cat & 1 \\ \hline </blockquote> <para> Auto-links should not occur here: - <literal><http://example.com/></literal> + <literal><http://example.com/></literal> </para> <screen> -or here: <http://example.com/> +or here: <http://example.com/> </screen> </section> </section> <section> <title>Images</title> <para> - From "Voyage dans la Lune" by Georges Melies (1902): + From "Voyage dans la Lune" by Georges Melies (1902): </para> <para> <inlinemediaobject> @@ -963,7 +964,7 @@ or here: <http://example.com/> footnote (as with list items). </para> <screen> - { <code> } + { <code> } </screen> <para> If you want, you can indent every line, but you can also be lazy diff --git a/tests/writer.html b/tests/writer.html index 191b1982e..8915a172c 100644 --- a/tests/writer.html +++ b/tests/writer.html @@ -39,7 +39,7 @@ here.</p> <blockquote> <p>Code in a block quote:</p> <pre><code>sub status { - print "working"; + print "working"; } </code></pre> <p>A list:</p> @@ -60,7 +60,7 @@ here.</p> <blockquote> <p>Example:</p> <pre><code>sub status { - print "working"; + print "working"; } </code></pre> </blockquote> @@ -84,7 +84,7 @@ here.</p> <pre><code>---- (should be four hyphens) sub status { - print "working"; + print "working"; } this code block is indented by one tab @@ -92,7 +92,7 @@ this code block is indented by one tab <p>And:</p> <pre><code> this code block is indented by two tabs -These should not be escaped: \$ \\ \> \[ \{ +These should not be escaped: \$ \\ \> \[ \{ </code></pre> <hr /> <h1>Lists</h1> @@ -255,12 +255,12 @@ These should not be escaped: \$ \\ \> \[ \{ <div> foo</div> <p>This should be a code block, though:</p> -<pre><code><div> +<pre><code><div> foo -</div> +</div> </code></pre> <p>As should this:</p> -<pre><code><div>foo</div> +<pre><code><div>foo</div> </code></pre> <p>Now, nested:</p> <div> @@ -281,12 +281,12 @@ Blah This is another comment. --> <p>Code block:</p> -<pre><code><!-- Comment --> +<pre><code><!-- Comment --> </code></pre> <p>Just plain comment, with trailing spaces on the line:</p> <!-- foo --> <p>Code:</p> -<pre><code><hr /> +<pre><code><hr /> </code></pre> <p>Hr's:</p> <hr> @@ -315,14 +315,14 @@ Blah <p>So is <strong><em>this</em></strong> word.</p> <p><strong><em>This is strong and em.</em></strong></p> <p>So is <strong><em>this</em></strong> word.</p> -<p>This is code: <code>></code>, <code>$</code>, <code>\</code>, <code>\$</code>, <code><html></code>.</p> +<p>This is code: <code>></code>, <code>$</code>, <code>\</code>, <code>\$</code>, <code><html></code>.</p> <hr /> <h1>Smart quotes, ellipses, dashes</h1> -<p>"Hello," said the spider. "'Shelob' is my name."</p> +<p>"Hello," said the spider. "'Shelob' is my name."</p> <p>'A', 'B', and 'C' are letters.</p> <p>'Oak,' 'elm,' and 'beech' are names of trees. So is 'pine.'</p> -<p>'He said, "I want to go."' Were you alive in the 70's?</p> -<p>Here is some quoted '<code>code</code>' and a "<a href="http://example.com/?foo=1&bar=2">quoted link</a>".</p> +<p>'He said, "I want to go."' Were you alive in the 70's?</p> +<p>Here is some quoted '<code>code</code>' and a "<a href="http://example.com/?foo=1&bar=2">quoted link</a>".</p> <p>Some dashes: one---two --- three--four -- five.</p> <p>Dashes between numbers: 5-7, 255-66, 1987-1999.</p> <p>Ellipses...and. . .and . . . .</p> @@ -342,7 +342,7 @@ Blah <p>These shouldn't be math:</p> <ul> <li>To get the famous equation, write <code>$e = mc^2$</code>.</li> -<li>$22,000 is a <em>lot</em> of money. So is $34,000. (It worked if "lot" is emphasized.)</li> +<li>$22,000 is a <em>lot</em> of money. So is $34,000. (It worked if "lot" is emphasized.)</li> <li>Escaped <code>$</code>: $73 <em>this should be emphasized</em> 23$.</li> </ul> <p>Here's a LaTeX table:</p> @@ -355,11 +355,11 @@ Cat & 1 \\ \hline <h1>Special Characters</h1> <p>Here is some unicode:</p> <ul> -<li>I hat: Î</li> -<li>o umlaut: ö</li> -<li>section: §</li> -<li>set membership: ∈</li> -<li>copyright: ©</li> +<li>I hat: Î</li> +<li>o umlaut: ö</li> +<li>section: §</li> +<li>set membership: ∈</li> +<li>copyright: ©</li> </ul> <p>AT&T has an ampersand in their name.</p> <p>AT&T is another way to write it.</p> @@ -414,7 +414,7 @@ document.write('<a h'+'ref'+'="ma'+'ilto'+':'+e+'">'+'Email link'+'<\/'+'a'+'>') <p>Foo <a href="/url/" title="Title with "quote" inside">biz</a>.</p> <h2>With ampersands</h2> <p>Here's a <a href="http://example.com/?foo=1&bar=2">link with an ampersand in the URL</a>.</p> -<p>Here's a link with an amersand in the link text: <a href="http://att.com/" title="AT&T">AT&T</a>.</p> +<p>Here's a link with an amersand in the link text: <a href="http://att.com/" title="AT&T">AT&T</a>.</p> <p>Here's an <a href="/script?foo=1&bar=2">inline link</a>.</p> <p>Here's an <a href="/script?foo=1&bar=2">inline link in pointy braces</a>.</p> <h2>Autolinks</h2> @@ -433,12 +433,12 @@ document.write('<a h'+'ref'+'="ma'+'ilto'+':'+e+'">'+e+'<\/'+'a'+'>'); <blockquote> <p>Blockquoted: <a href="http://example.com/">http://example.com/</a></p> </blockquote> -<p>Auto-links should not occur here: <code><http://example.com/></code></p> -<pre><code>or here: <http://example.com/> +<p>Auto-links should not occur here: <code><http://example.com/></code></p> +<pre><code>or here: <http://example.com/> </code></pre> <hr /> <h1>Images</h1> -<p>From "Voyage dans la Lune" by Georges Melies (1902):</p> +<p>From "Voyage dans la Lune" by Georges Melies (1902):</p> <p><img src="lalune.jpg" title="Voyage dans la Lune" alt="lalune"></p> <p>Here is a movie <img src="movie.jpg" alt="movie"> icon.</p> <hr /> @@ -458,7 +458,7 @@ document.write('<a h'+'ref'+'="ma'+'ilto'+':'+e+'">'+e+'<\/'+'a'+'>'); <a href="#fnref1" class="footnoteBacklink" title="Jump back to footnote 1">↩</a></li> <li id="fn2"><p>Here's the long note. This one contains multiple blocks.</p> <p>Subsequent blocks are indented to show that they belong to the footnote (as with list items).</p> -<pre><code> { <code> } +<pre><code> { <code> } </code></pre> <p>If you want, you can indent every line, but you can also be lazy and just indent the first line of each block.</p> <a href="#fnref2" class="footnoteBacklink" title="Jump back to footnote 2">↩</a></li> diff --git a/tests/writer.smart.html b/tests/writer.smart.html index c14a6de54..14b70e2fe 100644 --- a/tests/writer.smart.html +++ b/tests/writer.smart.html @@ -39,7 +39,7 @@ here.</p> <blockquote> <p>Code in a block quote:</p> <pre><code>sub status { - print "working"; + print "working"; } </code></pre> <p>A list:</p> @@ -60,7 +60,7 @@ here.</p> <blockquote> <p>Example:</p> <pre><code>sub status { - print "working"; + print "working"; } </code></pre> </blockquote> @@ -84,7 +84,7 @@ here.</p> <pre><code>---- (should be four hyphens) sub status { - print "working"; + print "working"; } this code block is indented by one tab @@ -92,7 +92,7 @@ this code block is indented by one tab <p>And:</p> <pre><code> this code block is indented by two tabs -These should not be escaped: \$ \\ \> \[ \{ +These should not be escaped: \$ \\ \> \[ \{ </code></pre> <hr /> <h1>Lists</h1> @@ -255,12 +255,12 @@ These should not be escaped: \$ \\ \> \[ \{ <div> foo</div> <p>This should be a code block, though:</p> -<pre><code><div> +<pre><code><div> foo -</div> +</div> </code></pre> <p>As should this:</p> -<pre><code><div>foo</div> +<pre><code><div>foo</div> </code></pre> <p>Now, nested:</p> <div> @@ -281,12 +281,12 @@ Blah This is another comment. --> <p>Code block:</p> -<pre><code><!-- Comment --> +<pre><code><!-- Comment --> </code></pre> <p>Just plain comment, with trailing spaces on the line:</p> <!-- foo --> <p>Code:</p> -<pre><code><hr /> +<pre><code><hr /> </code></pre> <p>Hr’s:</p> <hr> @@ -315,7 +315,7 @@ Blah <p>So is <strong><em>this</em></strong> word.</p> <p><strong><em>This is strong and em.</em></strong></p> <p>So is <strong><em>this</em></strong> word.</p> -<p>This is code: <code>></code>, <code>$</code>, <code>\</code>, <code>\$</code>, <code><html></code>.</p> +<p>This is code: <code>></code>, <code>$</code>, <code>\</code>, <code>\$</code>, <code><html></code>.</p> <hr /> <h1>Smart quotes, ellipses, dashes</h1> <p>“Hello,” said the spider. “‘Shelob’ is my name.”</p> @@ -355,11 +355,11 @@ Cat & 1 \\ \hline <h1>Special Characters</h1> <p>Here is some unicode:</p> <ul> -<li>I hat: Î</li> -<li>o umlaut: ö</li> -<li>section: §</li> -<li>set membership: ∈</li> -<li>copyright: ©</li> +<li>I hat: Î</li> +<li>o umlaut: ö</li> +<li>section: §</li> +<li>set membership: ∈</li> +<li>copyright: ©</li> </ul> <p>AT&T has an ampersand in their name.</p> <p>AT&T is another way to write it.</p> @@ -389,7 +389,7 @@ Cat & 1 \\ \hline <p><a href="/url/" title="title">URL and title</a>.</p> <p><a href="/url/" title="title preceded by two spaces">URL and title</a>.</p> <p><a href="/url/" title="title preceded by a tab">URL and title</a>.</p> -<p><a href="/url/" title="title with "quotes" in it">URL and title</a></p> +<p><a href="/url/" title="title with “quotes” in it">URL and title</a></p> <p><a href="/url/" title="title with single quotes">URL and title</a></p> <p><script type="text/javascript"> <!-- @@ -410,11 +410,11 @@ document.write('<a h'+'ref'+'="ma'+'ilto'+':'+e+'">'+'Email link'+'<\/'+'a'+'>') <p>This should [not][] be a link.</p> <pre><code>[not]: /url </code></pre> -<p>Foo <a href="/url/" title="Title with "quotes" inside">bar</a>.</p> -<p>Foo <a href="/url/" title="Title with "quote" inside">biz</a>.</p> +<p>Foo <a href="/url/" title="Title with “quotes” inside">bar</a>.</p> +<p>Foo <a href="/url/" title="Title with “quote” inside">biz</a>.</p> <h2>With ampersands</h2> <p>Here’s a <a href="http://example.com/?foo=1&bar=2">link with an ampersand in the URL</a>.</p> -<p>Here’s a link with an amersand in the link text: <a href="http://att.com/" title="AT&T">AT&T</a>.</p> +<p>Here’s a link with an amersand in the link text: <a href="http://att.com/" title="AT&T">AT&T</a>.</p> <p>Here’s an <a href="/script?foo=1&bar=2">inline link</a>.</p> <p>Here’s an <a href="/script?foo=1&bar=2">inline link in pointy braces</a>.</p> <h2>Autolinks</h2> @@ -433,8 +433,8 @@ document.write('<a h'+'ref'+'="ma'+'ilto'+':'+e+'">'+e+'<\/'+'a'+'>'); <blockquote> <p>Blockquoted: <a href="http://example.com/">http://example.com/</a></p> </blockquote> -<p>Auto-links should not occur here: <code><http://example.com/></code></p> -<pre><code>or here: <http://example.com/> +<p>Auto-links should not occur here: <code><http://example.com/></code></p> +<pre><code>or here: <http://example.com/> </code></pre> <hr /> <h1>Images</h1> @@ -458,7 +458,7 @@ document.write('<a h'+'ref'+'="ma'+'ilto'+':'+e+'">'+e+'<\/'+'a'+'>'); <a href="#fnref1" class="footnoteBacklink" title="Jump back to footnote 1">↩</a></li> <li id="fn2"><p>Here’s the long note. This one contains multiple blocks.</p> <p>Subsequent blocks are indented to show that they belong to the footnote (as with list items).</p> -<pre><code> { <code> } +<pre><code> { <code> } </code></pre> <p>If you want, you can indent every line, but you can also be lazy and just indent the first line of each block.</p> <a href="#fnref2" class="footnoteBacklink" title="Jump back to footnote 2">↩</a></li> |