From 7a9832166e36f77402d5e0259647e9f5c7ba4e58 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 20 Dec 2021 13:44:03 -0800 Subject: Add text wrapping to HTML output. Previously the HTML writer was exceptional in not being sensitive to the `--wrap` option. With this change `--wrap` now works for HTML. The default (as with other formats) is automatic wrapping to 72 columns. A new internal module, T.P.Writers.Blaze, exports `layoutMarkup`. This converts a blaze Html structure into a doclayout Doc Text. In addition, we now add a line break between an `img` tag and the associated `figcaption`. Note: Output is never wrapped in `writeHtmlStringForEPUB`. This accords with previous behavior since previously the HTML writer was insensitive to `--wrap` settings. There's no real need to wrap HTML inside a zipped container. Note that the contents of script, textarea, and pre tags are always laid out with the `flush` combinator, so that unwanted spaces won't be introduced if these occur in an indented context in a template. Closes #7764. --- src/Text/Pandoc/Writers/Blaze.hs | 139 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 139 insertions(+) create mode 100644 src/Text/Pandoc/Writers/Blaze.hs (limited to 'src/Text/Pandoc/Writers/Blaze.hs') diff --git a/src/Text/Pandoc/Writers/Blaze.hs b/src/Text/Pandoc/Writers/Blaze.hs new file mode 100644 index 000000000..0e3bd0f98 --- /dev/null +++ b/src/Text/Pandoc/Writers/Blaze.hs @@ -0,0 +1,139 @@ +{-# LANGUAGE OverloadedStrings #-} +{- | + Module : Text.Pandoc.Writers.Shared + Copyright : Copyright (C) 2021 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane + Stability : alpha + Portability : portable + +Render blaze-html Html to DocLayout document (so it can be wrapped). +-} +module Text.Pandoc.Writers.Blaze ( layoutMarkup ) +where +import Text.Blaze +import qualified Data.ByteString as S +import Data.List (isInfixOf) +import Data.Text.Encoding (decodeUtf8) +import qualified Data.Text as T +import Data.Text (Text) +import Text.DocLayout hiding (Text, Empty) +import Text.Blaze.Internal (ChoiceString(..), getText, MarkupM(..)) + +layoutMarkup :: Markup -> Doc T.Text +layoutMarkup = go True mempty + where + go :: Bool -> Doc T.Text -> MarkupM b -> Doc T.Text + go wrap attrs (Parent _ open close content) = + let open' = getText open + in literal open' + <> attrs + <> char '>' + <> (if allowsWrap open' + then go wrap mempty content + else flush $ go False mempty content) + <> literal (getText close) + go wrap attrs (CustomParent tag content) = + char '<' + <> fromChoiceString wrap tag + <> attrs + <> char '>' + <> go wrap mempty content + <> literal " fromChoiceString wrap tag + <> char '>' + go _wrap attrs (Leaf _ begin end _) = + literal (getText begin) + <> attrs + <> literal (getText end) + go wrap attrs (CustomLeaf tag close _) = + char '<' + <> fromChoiceString wrap tag + <> attrs + <> (if close then literal " />" else char '>') + go wrap attrs (AddAttribute rawkey _ value h) = + go wrap + (space' wrap + <> literal (getText rawkey) + <> char '=' + <> doubleQuotes (fromChoiceString wrap value) + <> attrs) h + go wrap attrs (AddCustomAttribute key value h) = + go wrap + (space' wrap + <> fromChoiceString wrap key + <> char '=' + <> doubleQuotes (fromChoiceString wrap value) + <> attrs) h + go wrap _ (Content content _) = fromChoiceString wrap content + go wrap _ (Comment comment _) = + literal "" + go wrap attrs (Append h1 h2) = go wrap attrs h1 <> go wrap attrs h2 + go _ _ (Empty _) = mempty + space' wrap = if wrap then space else char ' ' + +allowsWrap :: T.Text -> Bool +allowsWrap t = + not (t == " ChoiceString -- ^ String to render + -> Doc Text -- ^ Resulting builder +fromChoiceString wrap (Static s) = withWrap wrap $ getText s +fromChoiceString wrap (String s) = withWrap wrap $ + escapeMarkupEntities $ T.pack s +fromChoiceString wrap (Text s) = withWrap wrap $ escapeMarkupEntities s +fromChoiceString wrap (ByteString s) = withWrap wrap $ decodeUtf8 s +fromChoiceString _wrap (PreEscaped x) = -- don't wrap! + case x of + String s -> literal $ T.pack s + Text s -> literal s + s -> fromChoiceString False s +fromChoiceString wrap (External x) = case x of + -- Check that the sequence " if " if " if " fromChoiceString wrap s +fromChoiceString wrap (AppendChoiceString x y) = + fromChoiceString wrap x <> fromChoiceString wrap y +fromChoiceString _ EmptyChoiceString = mempty + +withWrap :: Bool -> Text -> Doc Text +withWrap wrap + | wrap = mconcat . toChunks + | otherwise = literal + +toChunks :: Text -> [Doc Text] +toChunks = map toDoc . T.groupBy sameStatus + where + toDoc t = + if T.any (== ' ') t + then space + else if T.any (== '\n') t + then cr + else literal t + sameStatus c d = + (c == ' ' && d == ' ') || + (c == '\n' && d == '\n') || + (c /= ' ' && d /= ' ' && c /= '\n' && d /= '\n') + + +-- | Escape predefined XML entities in a text value +-- +escapeMarkupEntities :: Text -- ^ Text to escape + -> Text -- ^ Resulting Doc +escapeMarkupEntities = T.concatMap escape + where + escape :: Char -> Text + escape '<' = "<" + escape '>' = ">" + escape '&' = "&" + escape '"' = """ + escape '\'' = "'" + escape x = T.singleton x -- cgit v1.2.3