aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/XML/Light/Output.hs
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2021-02-16 18:40:06 -0800
committerJohn MacFarlane <jgm@berkeley.edu>2021-02-16 18:40:06 -0800
commitd7a4996b1e7e201095ce792375a0776984fa8fcd (patch)
tree2d9fe829471bd4217a3e92b72d1c713abe77f274 /src/Text/Pandoc/XML/Light/Output.hs
parent967e7f5fb990b29de48b37be1db40fb149a8cf55 (diff)
downloadpandoc-d7a4996b1e7e201095ce792375a0776984fa8fcd.tar.gz
Split up T.P.XML.Light into submodules.
Diffstat (limited to 'src/Text/Pandoc/XML/Light/Output.hs')
-rw-r--r--src/Text/Pandoc/XML/Light/Output.hs230
1 files changed, 230 insertions, 0 deletions
diff --git a/src/Text/Pandoc/XML/Light/Output.hs b/src/Text/Pandoc/XML/Light/Output.hs
new file mode 100644
index 000000000..dc94ce914
--- /dev/null
+++ b/src/Text/Pandoc/XML/Light/Output.hs
@@ -0,0 +1,230 @@
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE OverloadedStrings #-}
+{- |
+ Module : Text.Pandoc.XML.Light.Output
+ Copyright : Copyright (C) 2021 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+-}
+module Text.Pandoc.XML.Light.Output
+ ( -- * Replacement for xml-light's Text.XML.Output
+ ppTopElement
+ , ppElement
+ , ppContent
+ , ppcElement
+ , ppcContent
+ , showTopElement
+ , showElement
+ , showContent
+ , useShortEmptyTags
+ , defaultConfigPP
+ , ConfigPP(..)
+ ) where
+
+import Data.Text (Text)
+import qualified Data.Text as T
+import qualified Data.Text.Lazy as TL
+import Data.Text.Lazy.Builder (Builder, singleton, fromText, toLazyText)
+import Text.Pandoc.XML.Light.Types
+
+--
+-- duplicates functinos from Text.XML.Output
+--
+
+-- | The XML 1.0 header
+xmlHeader :: Text
+xmlHeader = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"
+
+
+--------------------------------------------------------------------------------
+data ConfigPP = ConfigPP
+ { shortEmptyTag :: QName -> Bool
+ , prettify :: Bool
+ }
+
+-- | Default pretty orinting configuration.
+-- * Always use abbreviate empty tags.
+defaultConfigPP :: ConfigPP
+defaultConfigPP = ConfigPP { shortEmptyTag = const True
+ , prettify = False
+ }
+
+-- | The predicate specifies for which empty tags we should use XML's
+-- abbreviated notation <TAG />. This is useful if we are working with
+-- some XML-ish standards (such as certain versions of HTML) where some
+-- empty tags should always be displayed in the <TAG></TAG> form.
+useShortEmptyTags :: (QName -> Bool) -> ConfigPP -> ConfigPP
+useShortEmptyTags p c = c { shortEmptyTag = p }
+
+
+-- | Specify if we should use extra white-space to make document more readable.
+-- WARNING: This adds additional white-space to text elements,
+-- and so it may change the meaning of the document.
+useExtraWhiteSpace :: Bool -> ConfigPP -> ConfigPP
+useExtraWhiteSpace p c = c { prettify = p }
+
+-- | A configuration that tries to make things pretty
+-- (possibly at the cost of changing the semantics a bit
+-- through adding white space.)
+prettyConfigPP :: ConfigPP
+prettyConfigPP = useExtraWhiteSpace True defaultConfigPP
+
+
+--------------------------------------------------------------------------------
+
+
+-- | Pretty printing renders XML documents faithfully,
+-- with the exception that whitespace may be added\/removed
+-- in non-verbatim character data.
+ppTopElement :: Element -> Text
+ppTopElement = ppcTopElement prettyConfigPP
+
+-- | Pretty printing elements
+ppElement :: Element -> Text
+ppElement = ppcElement prettyConfigPP
+
+-- | Pretty printing content
+ppContent :: Content -> Text
+ppContent = ppcContent prettyConfigPP
+
+-- | Pretty printing renders XML documents faithfully,
+-- with the exception that whitespace may be added\/removed
+-- in non-verbatim character data.
+ppcTopElement :: ConfigPP -> Element -> Text
+ppcTopElement c e = T.unlines [xmlHeader,ppcElement c e]
+
+-- | Pretty printing elements
+ppcElement :: ConfigPP -> Element -> Text
+ppcElement c = TL.toStrict . toLazyText . ppElementS c mempty
+
+-- | Pretty printing content
+ppcContent :: ConfigPP -> Content -> Text
+ppcContent c = TL.toStrict . toLazyText . ppContentS c mempty
+
+ppcCData :: ConfigPP -> CData -> Text
+ppcCData c = TL.toStrict . toLazyText . ppCDataS c mempty
+
+type Indent = Builder
+
+-- | Pretty printing content using ShowT
+ppContentS :: ConfigPP -> Indent -> Content -> Builder
+ppContentS c i x = case x of
+ Elem e -> ppElementS c i e
+ Text t -> ppCDataS c i t
+ CRef r -> showCRefS r
+
+ppElementS :: ConfigPP -> Indent -> Element -> Builder
+ppElementS c i e = i <> tagStart (elName e) (elAttribs e) <>
+ (case elContent e of
+ [] | "?" `T.isPrefixOf` qName name -> fromText " ?>"
+ | shortEmptyTag c name -> fromText " />"
+ [Text t] -> singleton '>' <> ppCDataS c mempty t <> tagEnd name
+ cs -> singleton '>' <> nl <>
+ mconcat (map ((<> nl) . ppContentS c (sp <> i)) cs) <>
+ i <> tagEnd name
+ where (nl,sp) = if prettify c then ("\n"," ") else ("","")
+ )
+ where name = elName e
+
+ppCDataS :: ConfigPP -> Indent -> CData -> Builder
+ppCDataS c i t = i <> if cdVerbatim t /= CDataText || not (prettify c)
+ then showCDataS t
+ else foldr cons mempty (T.unpack (showCData t))
+ where cons :: Char -> Builder -> Builder
+ cons '\n' ys = singleton '\n' <> i <> ys
+ cons y ys = singleton y <> ys
+
+
+
+--------------------------------------------------------------------------------
+
+-- | Adds the <?xml?> header.
+showTopElement :: Element -> Text
+showTopElement c = xmlHeader <> showElement c
+
+showContent :: Content -> Text
+showContent = ppcContent defaultConfigPP
+
+showElement :: Element -> Text
+showElement = ppcElement defaultConfigPP
+
+showCData :: CData -> Text
+showCData = ppcCData defaultConfigPP
+
+-- Note: crefs should not contain '&', ';', etc.
+showCRefS :: Text -> Builder
+showCRefS r = singleton '&' <> fromText r <> singleton ';'
+
+-- | Convert a text element to characters.
+showCDataS :: CData -> Builder
+showCDataS cd =
+ case cdVerbatim cd of
+ CDataText -> escStr (cdData cd)
+ CDataVerbatim -> fromText "<![CDATA[" <> escCData (cdData cd) <>
+ fromText "]]>"
+ CDataRaw -> fromText (cdData cd)
+
+--------------------------------------------------------------------------------
+escCData :: Text -> Builder
+escCData t
+ | "]]>" `T.isPrefixOf` t =
+ fromText "]]]]><![CDATA[>" <> fromText (T.drop 3 t)
+escCData t
+ = case T.uncons t of
+ Nothing -> mempty
+ Just (c,t') -> singleton c <> escCData t'
+
+escChar :: Char -> Builder
+escChar c = case c of
+ '<' -> fromText "&lt;"
+ '>' -> fromText "&gt;"
+ '&' -> fromText "&amp;"
+ '"' -> fromText "&quot;"
+ -- we use &#39 instead of &apos; because IE apparently has difficulties
+ -- rendering &apos; in xhtml.
+ -- Reported by Rohan Drape <rohan.drape@gmail.com>.
+ '\'' -> fromText "&#39;"
+ _ -> singleton c
+
+ {- original xml-light version:
+ -- NOTE: We escape '\r' explicitly because otherwise they get lost
+ -- when parsed back in because of then end-of-line normalization rules.
+ _ | isPrint c || c == '\n' -> singleton c
+ | otherwise -> showText "&#" . showsT oc . singleton ';'
+ where oc = ord c
+ -}
+
+escStr :: Text -> Builder
+escStr cs = if T.any needsEscape cs
+ then mconcat (map escChar (T.unpack cs))
+ else fromText cs
+ where
+ needsEscape '<' = True
+ needsEscape '>' = True
+ needsEscape '&' = True
+ needsEscape '"' = True
+ needsEscape '\'' = True
+ needsEscape _ = False
+
+tagEnd :: QName -> Builder
+tagEnd qn = fromText "</" <> showQName qn <> singleton '>'
+
+tagStart :: QName -> [Attr] -> Builder
+tagStart qn as = singleton '<' <> showQName qn <> as_str
+ where as_str = if null as
+ then mempty
+ else mconcat (map showAttr as)
+
+showAttr :: Attr -> Builder
+showAttr (Attr qn v) = singleton ' ' <> showQName qn <>
+ singleton '=' <>
+ singleton '"' <> escStr v <> singleton '"'
+
+showQName :: QName -> Builder
+showQName q =
+ case qPrefix q of
+ Nothing -> fromText (qName q)
+ Just p -> fromText p <> singleton ':' <> fromText (qName q)