diff options
author | fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b> | 2007-01-04 22:52:16 +0000 |
---|---|---|
committer | fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b> | 2007-01-04 22:52:16 +0000 |
commit | 030d94e1c3cd4be0ab9d7c16fccfa973cedb5d38 (patch) | |
tree | 325452ca83818bc0776e257f44342776f9bff78d /src/Text/Pandoc/Shared.hs | |
parent | 24f3710e0911fa8ebe6070ef83c6206e54a46f1a (diff) | |
download | pandoc-030d94e1c3cd4be0ab9d7c16fccfa973cedb5d38.tar.gz |
Refactored SGML escaping functions and "in tag" functions to
Text/Shared/Pandoc. (escapeSGML, stringToSGML, inTag,
inTagSimple, inTagIndented, selfClosingTag) These can be
used by both the HTML and Docbook writers.
git-svn-id: https://pandoc.googlecode.com/svn/trunk@417 788f1e2b-df1e-0410-8736-df70ead52e1b
Diffstat (limited to 'src/Text/Pandoc/Shared.hs')
-rw-r--r-- | src/Text/Pandoc/Shared.hs | 98 |
1 files changed, 95 insertions, 3 deletions
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index d4687b10e..d1d40ac23 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -28,6 +28,8 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Utility functions and definitions used by the various Pandoc modules. -} module Text.Pandoc.Shared ( + -- * List processing + splitBy, -- * Text processing gsub, joinWithSep, @@ -52,7 +54,6 @@ module Text.Pandoc.Shared ( -- * Pandoc block list processing consolidateList, isNoteBlock, - splitBy, normalizeSpaces, compactify, generateReference, @@ -62,12 +63,21 @@ module Text.Pandoc.Shared ( lookupKeySrc, refsMatch, replaceReferenceLinks, - replaceRefLinksBlockList + replaceRefLinksBlockList, + -- * SGML + escapeSGML, + stringToSGML, + inTags, + selfClosingTag, + inTagsSimple, + inTagsIndented ) where import Text.Pandoc.Definition import Text.ParserCombinators.Parsec -import Text.Pandoc.Entities ( decodeEntities ) +import Text.Pandoc.Entities ( decodeEntities, encodeEntities ) import Text.Regex ( matchRegexAll, mkRegex, subRegex, Regex ) +import Text.PrettyPrint.HughesPJ as PP ( text, char, (<>), ($$), nest, Doc, + isEmpty ) import Char ( toLower ) import List ( find, groupBy ) @@ -507,3 +517,85 @@ replaceRefLinksInline keytable (Emph lst) = replaceRefLinksInline keytable (Strong lst) = Strong (map (replaceRefLinksInline keytable) lst) replaceRefLinksInline keytable other = other + +-- | Escape string, preserving character entities and quote, and adding +-- smart typography if specified. +stringToSGML :: WriterOptions -> String -> String +stringToSGML options = + let escapeDoubleQuotes = + gsub "(\"|")" "”" . -- rest are right quotes + gsub "(\"|")(&r[sd]quo;)" "”\\2" . + -- never left quo before right quo + gsub "(&l[sd]quo;)(\"|")" "\\2“" . + -- never right quo after left quo + gsub "([ \t])(\"|")" "\\1“" . + -- never right quo after space + gsub "(\"|")([^,.;:!?^) \t-])" "“\\2" . -- "word left + gsub "(\"|")('|`|‘)" "”’" . + -- right if it got through last filter + gsub "(\"|")('|`|‘)([^,.;:!?^) \t-])" "“‘\\3" . + -- "'word left + gsub "``" "“" . + gsub "''" "”" + escapeSingleQuotes = + gsub "'" "’" . -- otherwise right + gsub "'(&r[sd]quo;)" "’\\1" . -- never left quo before right quo + gsub "(&l[sd]quo;)'" "\\1‘" . -- never right quo after left quo + gsub "([ \t])'" "\\1‘" . -- never right quo after space + gsub "`" "‘" . -- ` is left + gsub "([^,.;:!?^) \t-])'" "\\1’" . -- word' right + gsub "^('|`)([^,.;:!?^) \t-])" "‘\\2" . -- 'word left + gsub "('|`)(\"|"|“|``)" "‘“" . -- '"word left + gsub "([^,.;:!?^) \t-])'(s|S)" "\\1’\\2" . -- possessive + gsub "([[:space:]])'([^,.;:!?^) \t-])" "\\1‘\\2" . -- 'word left + gsub "'([0-9][0-9](s|S))" "’\\1" -- '80s - decade abbrevs. + escapeDashes = + gsub " ?-- ?" "—" . + gsub " ?--- ?" "—" . + gsub "([0-9])--?([0-9])" "\\1–\\2" + escapeEllipses = gsub "\\.\\.\\.|\\. \\. \\." "…" + smartFilter = escapeSingleQuotes . escapeDoubleQuotes . escapeDashes . + escapeEllipses in + encodeEntities . (if (writerSmart options) then smartFilter else id) . + (escapePreservingRegex escapeSGML (mkRegex "&[[:alnum:]]*;")) + +-- | Escape string as needed for HTML. Entity references are not preserved. +escapeSGML :: String -> String +escapeSGML [] = [] +escapeSGML (x:xs) = case x of + '&' -> "&" ++ escapeSGML xs + '<' -> "<" ++ escapeSGML xs + '>' -> ">" ++ escapeSGML xs + '"' -> """ ++ escapeSGML xs + _ -> x:(escapeSGML xs) + +-- | Return a text object with a string of formatted SGML attributes. +attributeList :: WriterOptions -> [(String, String)] -> Doc +attributeList options = + text . concatMap (\(a, b) -> " " ++ stringToSGML options a ++ "=\"" ++ + stringToSGML options b ++ "\"") + +-- | Put the supplied contents between start and end tags of tagType, +-- with specified attributes and (if specified) indentation. +inTags:: Bool -> WriterOptions -> String -> [(String, String)] -> Doc -> Doc +inTags isIndented options tagType attribs contents = + let openTag = PP.char '<' <> text tagType <> attributeList options attribs <> + PP.char '>' + closeTag = text "</" <> text tagType <> PP.char '>' in + if isIndented + then openTag $$ nest 2 contents $$ closeTag + else openTag <> contents <> closeTag + +-- | Return a self-closing tag of tagType with specified attributes +selfClosingTag :: WriterOptions -> String -> [(String, String)] -> Doc +selfClosingTag options tagType attribs = + PP.char '<' <> text tagType <> attributeList options attribs <> text " />" + +-- | Put the supplied contents between start and end tags of tagType. +inTagsSimple :: WriterOptions -> String -> Doc -> Doc +inTagsSimple options tagType = inTags False options tagType [] + +-- | Put the supplied contents in indented block btw start and end tags. +inTagsIndented :: WriterOptions -> String -> Doc -> Doc +inTagsIndented options tagType = inTags True options tagType [] + |