diff options
Diffstat (limited to 'src/Text/Pandoc/Shared.hs')
-rw-r--r-- | src/Text/Pandoc/Shared.hs | 98 |
1 files changed, 95 insertions, 3 deletions
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index d4687b10e..d1d40ac23 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -28,6 +28,8 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Utility functions and definitions used by the various Pandoc modules. -} module Text.Pandoc.Shared ( + -- * List processing + splitBy, -- * Text processing gsub, joinWithSep, @@ -52,7 +54,6 @@ module Text.Pandoc.Shared ( -- * Pandoc block list processing consolidateList, isNoteBlock, - splitBy, normalizeSpaces, compactify, generateReference, @@ -62,12 +63,21 @@ module Text.Pandoc.Shared ( lookupKeySrc, refsMatch, replaceReferenceLinks, - replaceRefLinksBlockList + replaceRefLinksBlockList, + -- * SGML + escapeSGML, + stringToSGML, + inTags, + selfClosingTag, + inTagsSimple, + inTagsIndented ) where import Text.Pandoc.Definition import Text.ParserCombinators.Parsec -import Text.Pandoc.Entities ( decodeEntities ) +import Text.Pandoc.Entities ( decodeEntities, encodeEntities ) import Text.Regex ( matchRegexAll, mkRegex, subRegex, Regex ) +import Text.PrettyPrint.HughesPJ as PP ( text, char, (<>), ($$), nest, Doc, + isEmpty ) import Char ( toLower ) import List ( find, groupBy ) @@ -507,3 +517,85 @@ replaceRefLinksInline keytable (Emph lst) = replaceRefLinksInline keytable (Strong lst) = Strong (map (replaceRefLinksInline keytable) lst) replaceRefLinksInline keytable other = other + +-- | Escape string, preserving character entities and quote, and adding +-- smart typography if specified. +stringToSGML :: WriterOptions -> String -> String +stringToSGML options = + let escapeDoubleQuotes = + gsub "(\"|")" "”" . -- rest are right quotes + gsub "(\"|")(&r[sd]quo;)" "”\\2" . + -- never left quo before right quo + gsub "(&l[sd]quo;)(\"|")" "\\2“" . + -- never right quo after left quo + gsub "([ \t])(\"|")" "\\1“" . + -- never right quo after space + gsub "(\"|")([^,.;:!?^) \t-])" "“\\2" . -- "word left + gsub "(\"|")('|`|‘)" "”’" . + -- right if it got through last filter + gsub "(\"|")('|`|‘)([^,.;:!?^) \t-])" "“‘\\3" . + -- "'word left + gsub "``" "“" . + gsub "''" "”" + escapeSingleQuotes = + gsub "'" "’" . -- otherwise right + gsub "'(&r[sd]quo;)" "’\\1" . -- never left quo before right quo + gsub "(&l[sd]quo;)'" "\\1‘" . -- never right quo after left quo + gsub "([ \t])'" "\\1‘" . -- never right quo after space + gsub "`" "‘" . -- ` is left + gsub "([^,.;:!?^) \t-])'" "\\1’" . -- word' right + gsub "^('|`)([^,.;:!?^) \t-])" "‘\\2" . -- 'word left + gsub "('|`)(\"|"|“|``)" "‘“" . -- '"word left + gsub "([^,.;:!?^) \t-])'(s|S)" "\\1’\\2" . -- possessive + gsub "([[:space:]])'([^,.;:!?^) \t-])" "\\1‘\\2" . -- 'word left + gsub "'([0-9][0-9](s|S))" "’\\1" -- '80s - decade abbrevs. + escapeDashes = + gsub " ?-- ?" "—" . + gsub " ?--- ?" "—" . + gsub "([0-9])--?([0-9])" "\\1–\\2" + escapeEllipses = gsub "\\.\\.\\.|\\. \\. \\." "…" + smartFilter = escapeSingleQuotes . escapeDoubleQuotes . escapeDashes . + escapeEllipses in + encodeEntities . (if (writerSmart options) then smartFilter else id) . + (escapePreservingRegex escapeSGML (mkRegex "&[[:alnum:]]*;")) + +-- | Escape string as needed for HTML. Entity references are not preserved. +escapeSGML :: String -> String +escapeSGML [] = [] +escapeSGML (x:xs) = case x of + '&' -> "&" ++ escapeSGML xs + '<' -> "<" ++ escapeSGML xs + '>' -> ">" ++ escapeSGML xs + '"' -> """ ++ escapeSGML xs + _ -> x:(escapeSGML xs) + +-- | Return a text object with a string of formatted SGML attributes. +attributeList :: WriterOptions -> [(String, String)] -> Doc +attributeList options = + text . concatMap (\(a, b) -> " " ++ stringToSGML options a ++ "=\"" ++ + stringToSGML options b ++ "\"") + +-- | Put the supplied contents between start and end tags of tagType, +-- with specified attributes and (if specified) indentation. +inTags:: Bool -> WriterOptions -> String -> [(String, String)] -> Doc -> Doc +inTags isIndented options tagType attribs contents = + let openTag = PP.char '<' <> text tagType <> attributeList options attribs <> + PP.char '>' + closeTag = text "</" <> text tagType <> PP.char '>' in + if isIndented + then openTag $$ nest 2 contents $$ closeTag + else openTag <> contents <> closeTag + +-- | Return a self-closing tag of tagType with specified attributes +selfClosingTag :: WriterOptions -> String -> [(String, String)] -> Doc +selfClosingTag options tagType attribs = + PP.char '<' <> text tagType <> attributeList options attribs <> text " />" + +-- | Put the supplied contents between start and end tags of tagType. +inTagsSimple :: WriterOptions -> String -> Doc -> Doc +inTagsSimple options tagType = inTags False options tagType [] + +-- | Put the supplied contents in indented block btw start and end tags. +inTagsIndented :: WriterOptions -> String -> Doc -> Doc +inTagsIndented options tagType = inTags True options tagType [] + |