diff options
author | John MacFarlane <jgm@berkeley.edu> | 2021-12-20 13:44:03 -0800 |
---|---|---|
committer | John MacFarlane <jgm@berkeley.edu> | 2021-12-22 09:45:02 -0800 |
commit | 7a9832166e36f77402d5e0259647e9f5c7ba4e58 (patch) | |
tree | 38b417997812c3e04704be97c05368ea795b15fe /src | |
parent | 0bdf37315766eb4b785002ffaf38cdb724628e7a (diff) | |
download | pandoc-7a9832166e36f77402d5e0259647e9f5c7ba4e58.tar.gz |
Add text wrapping to HTML output.
Previously the HTML writer was exceptional in not being
sensitive to the `--wrap` option. With this change `--wrap`
now works for HTML. The default (as with other formats) is
automatic wrapping to 72 columns.
A new internal module, T.P.Writers.Blaze, exports `layoutMarkup`.
This converts a blaze Html structure into a doclayout Doc Text.
In addition, we now add a line break between an `img` tag
and the associated `figcaption`.
Note: Output is never wrapped in `writeHtmlStringForEPUB`.
This accords with previous behavior since previously the HTML
writer was insensitive to `--wrap` settings. There's no real
need to wrap HTML inside a zipped container.
Note that the contents of script, textarea, and pre tags are
always laid out with the `flush` combinator, so that unwanted
spaces won't be introduced if these occur in an indented context
in a template.
Closes #7764.
Diffstat (limited to 'src')
-rw-r--r-- | src/Text/Pandoc/Writers/Blaze.hs | 139 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/HTML.hs | 107 |
2 files changed, 197 insertions, 49 deletions
diff --git a/src/Text/Pandoc/Writers/Blaze.hs b/src/Text/Pandoc/Writers/Blaze.hs new file mode 100644 index 000000000..0e3bd0f98 --- /dev/null +++ b/src/Text/Pandoc/Writers/Blaze.hs @@ -0,0 +1,139 @@ +{-# LANGUAGE OverloadedStrings #-} +{- | + Module : Text.Pandoc.Writers.Shared + Copyright : Copyright (C) 2021 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Render blaze-html Html to DocLayout document (so it can be wrapped). +-} +module Text.Pandoc.Writers.Blaze ( layoutMarkup ) +where +import Text.Blaze +import qualified Data.ByteString as S +import Data.List (isInfixOf) +import Data.Text.Encoding (decodeUtf8) +import qualified Data.Text as T +import Data.Text (Text) +import Text.DocLayout hiding (Text, Empty) +import Text.Blaze.Internal (ChoiceString(..), getText, MarkupM(..)) + +layoutMarkup :: Markup -> Doc T.Text +layoutMarkup = go True mempty + where + go :: Bool -> Doc T.Text -> MarkupM b -> Doc T.Text + go wrap attrs (Parent _ open close content) = + let open' = getText open + in literal open' + <> attrs + <> char '>' + <> (if allowsWrap open' + then go wrap mempty content + else flush $ go False mempty content) + <> literal (getText close) + go wrap attrs (CustomParent tag content) = + char '<' + <> fromChoiceString wrap tag + <> attrs + <> char '>' + <> go wrap mempty content + <> literal "</" + <> fromChoiceString wrap tag + <> char '>' + go _wrap attrs (Leaf _ begin end _) = + literal (getText begin) + <> attrs + <> literal (getText end) + go wrap attrs (CustomLeaf tag close _) = + char '<' + <> fromChoiceString wrap tag + <> attrs + <> (if close then literal " />" else char '>') + go wrap attrs (AddAttribute rawkey _ value h) = + go wrap + (space' wrap + <> literal (getText rawkey) + <> char '=' + <> doubleQuotes (fromChoiceString wrap value) + <> attrs) h + go wrap attrs (AddCustomAttribute key value h) = + go wrap + (space' wrap + <> fromChoiceString wrap key + <> char '=' + <> doubleQuotes (fromChoiceString wrap value) + <> attrs) h + go wrap _ (Content content _) = fromChoiceString wrap content + go wrap _ (Comment comment _) = + literal "<!--" + <> space' wrap + <> fromChoiceString wrap comment + <> space' wrap + <> "-->" + go wrap attrs (Append h1 h2) = go wrap attrs h1 <> go wrap attrs h2 + go _ _ (Empty _) = mempty + space' wrap = if wrap then space else char ' ' + +allowsWrap :: T.Text -> Bool +allowsWrap t = + not (t == "<pre" || t == "<style" || t == "<script" || t == "<textarea") + +fromChoiceString :: Bool -- ^ Allow wrapping + -> ChoiceString -- ^ String to render + -> Doc Text -- ^ Resulting builder +fromChoiceString wrap (Static s) = withWrap wrap $ getText s +fromChoiceString wrap (String s) = withWrap wrap $ + escapeMarkupEntities $ T.pack s +fromChoiceString wrap (Text s) = withWrap wrap $ escapeMarkupEntities s +fromChoiceString wrap (ByteString s) = withWrap wrap $ decodeUtf8 s +fromChoiceString _wrap (PreEscaped x) = -- don't wrap! + case x of + String s -> literal $ T.pack s + Text s -> literal s + s -> fromChoiceString False s +fromChoiceString wrap (External x) = case x of + -- Check that the sequence "</" is *not* in the external data. + String s -> if "</" `isInfixOf` s then mempty else withWrap wrap (T.pack s) + Text s -> if "</" `T.isInfixOf` s then mempty else withWrap wrap s + ByteString s -> if "</" `S.isInfixOf` s then mempty else withWrap wrap (decodeUtf8 s) + s -> fromChoiceString wrap s +fromChoiceString wrap (AppendChoiceString x y) = + fromChoiceString wrap x <> fromChoiceString wrap y +fromChoiceString _ EmptyChoiceString = mempty + +withWrap :: Bool -> Text -> Doc Text +withWrap wrap + | wrap = mconcat . toChunks + | otherwise = literal + +toChunks :: Text -> [Doc Text] +toChunks = map toDoc . T.groupBy sameStatus + where + toDoc t = + if T.any (== ' ') t + then space + else if T.any (== '\n') t + then cr + else literal t + sameStatus c d = + (c == ' ' && d == ' ') || + (c == '\n' && d == '\n') || + (c /= ' ' && d /= ' ' && c /= '\n' && d /= '\n') + + +-- | Escape predefined XML entities in a text value +-- +escapeMarkupEntities :: Text -- ^ Text to escape + -> Text -- ^ Resulting Doc +escapeMarkupEntities = T.concatMap escape + where + escape :: Char -> Text + escape '<' = "<" + escape '>' = ">" + escape '&' = "&" + escape '"' = """ + escape '\'' = "'" + escape x = T.singleton x diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index 247cddfc9..664aeffb6 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -39,7 +39,7 @@ import qualified Data.Text as T import qualified Data.Text.Lazy as TL import Network.URI (URI (..), parseURIReference) import Numeric (showHex) -import Text.DocLayout (render, literal) +import Text.DocLayout (render, literal, Doc) import Text.Blaze.Internal (MarkupM (Empty), customLeaf, customParent) import Text.DocTemplates (FromContext (lookupContext), Context (..)) import Text.Blaze.Html hiding (contents) @@ -70,6 +70,7 @@ import Text.Pandoc.Class.PandocPure (runPure) import Text.Pandoc.Error import Text.Pandoc.Logging import Text.Pandoc.MIME (mediaCategory) +import Text.Pandoc.Writers.Blaze (layoutMarkup) import Text.TeXMath import Text.XML.Light (elChildren, unode, unqual) import qualified Text.XML.Light as XML @@ -160,7 +161,8 @@ writeHtmlStringForEPUB :: PandocMonad m -> m Text writeHtmlStringForEPUB version o = writeHtmlString' defaultWriterState{ stHtml5 = version == EPUB3, - stEPUBVersion = Just version } o + stEPUBVersion = Just version } + o{ writerWrapText = WrapNone } -- | Convert Pandoc document to Reveal JS HTML slide show. writeRevealJs :: PandocMonad m @@ -207,17 +209,23 @@ writeHtmlString' :: PandocMonad m => WriterState -> WriterOptions -> Pandoc -> m Text writeHtmlString' st opts d = do (body, context) <- evalStateT (pandocToHtml opts d) st + let colwidth = case writerWrapText opts of + WrapAuto -> Just (writerColumns opts) + _ -> Nothing (if writerPreferAscii opts then toEntities else id) <$> case writerTemplate opts of - Nothing -> return $ renderHtml' body + Nothing -> return $ + case colwidth of + Nothing -> renderHtml' body -- optimization, skip layout + Just cols -> render (Just cols) $ layoutMarkup body Just tpl -> do -- warn if empty lang when (isNothing (getField "lang" context :: Maybe Text)) $ report NoLangSpecified -- check for empty pagetitle - context' <- + (context' :: Context Text) <- case getField "pagetitle" context of Just (s :: Text) | not (T.null s) -> return context _ -> do @@ -228,9 +236,9 @@ writeHtmlString' st opts d = do Just [] -> "Untitled" Just (x:_) -> takeBaseName $ T.unpack x report $ NoTitleElement fallback - return $ resetField "pagetitle" fallback context - return $ render Nothing $ renderTemplate tpl - (defField "body" (renderHtml' body) context') + return $ resetField "pagetitle" (literal fallback) context + return $ render colwidth $ renderTemplate tpl + (defField "body" (layoutMarkup body) context') writeHtml' :: PandocMonad m => WriterState -> WriterOptions -> Pandoc -> m Html writeHtml' st opts d = @@ -252,13 +260,13 @@ pandocToHtml opts (Pandoc meta blocks) = do let slideLevel = fromMaybe (getSlideLevel blocks) $ writerSlideLevel opts modify $ \st -> st{ stSlideLevel = slideLevel } metadata <- metaToContext opts - (fmap (literal . renderHtml') . blockListToHtml opts) - (fmap (literal . renderHtml') . inlineListToHtml opts) + (fmap layoutMarkup . blockListToHtml opts) + (fmap layoutMarkup . inlineListToHtml opts) meta let stringifyHTML = escapeStringForXML . stringify - let authsMeta = map stringifyHTML $ docAuthors meta + let authsMeta = map (literal . stringifyHTML) $ docAuthors meta let dateMeta = stringifyHTML $ docDate meta - let descriptionMeta = escapeStringForXML $ + let descriptionMeta = literal $ escapeStringForXML $ lookupMetaString "description" meta slideVariant <- gets stSlideVariant let sects = adjustNumbers opts $ @@ -267,7 +275,7 @@ pandocToHtml opts (Pandoc meta blocks) = do then blocks else prepSlides slideLevel blocks toc <- if writerTableOfContents opts && slideVariant /= S5Slides - then fmap renderHtml' <$> tableOfContents opts sects + then fmap layoutMarkup <$> tableOfContents opts sects else return Nothing blocks' <- blockListToHtml opts sects notes <- do @@ -281,7 +289,7 @@ pandocToHtml opts (Pandoc meta blocks) = do return notes st <- get let thebody = blocks' >> notes - let math = case writerHTMLMathMethod opts of + let math = layoutMarkup $ case writerHTMLMathMethod opts of MathJax url | slideVariant /= RevealJsSlides -> -- mathjax is handled via a special plugin in revealjs @@ -298,7 +306,7 @@ pandocToHtml opts (Pandoc meta blocks) = do nl opts let katexFlushLeft = case lookupContext "classoption" metadata of - Just clsops | "fleqn" `elem` (clsops :: [Text]) -> "true" + Just clsops | "fleqn" `elem` (clsops :: [Doc Text]) -> "true" _ -> "false" H.script $ text $ T.unlines [ "document.addEventListener(\"DOMContentLoaded\", function () {" @@ -324,15 +332,16 @@ pandocToHtml opts (Pandoc meta blocks) = do Just s | not (stHtml5 st) -> H.script ! A.type_ "text/javascript" $ preEscapedString - ("/*<![CDATA[*/\n" ++ T.unpack s ++ + ("/*<![CDATA[*/\n" <> T.unpack s <> "/*]]>*/\n") | otherwise -> mempty Nothing -> mempty let mCss :: Maybe [Text] = lookupContext "css" metadata - let context = (if stHighlighting st + let context :: Context Text + context = (if stHighlighting st then case writerHighlightStyle opts of Just sty -> defField "highlighting-css" - (T.pack $ styleToCss sty) + (literal $ T.pack $ styleToCss sty) Nothing -> id else id) . (if stCsl st @@ -342,15 +351,15 @@ pandocToHtml opts (Pandoc meta blocks) = do Just 0 -> id Just n -> defField "csl-entry-spacing" - (tshow n <> "em")) + (literal $ tshow n <> "em")) else id) . (if stMath st - then defField "math" (renderHtml' math) + then defField "math" math else id) . (case writerHTMLMathMethod opts of MathJax u -> defField "mathjax" True . defField "mathjaxurl" - (T.takeWhile (/='?') u) + (literal $ T.takeWhile (/='?') u) _ -> defField "mathjax" False) . (case writerHTMLMathMethod opts of PlainMath -> defField "displaymath-css" True @@ -361,11 +370,12 @@ pandocToHtml opts (Pandoc meta blocks) = do -- template can't distinguish False/undefined defField "controls" True . defField "controlsTutorial" True . - defField "controlsLayout" ("bottom-right" :: Text) . - defField "controlsBackArrows" ("faded" :: Text) . + defField "controlsLayout" + ("bottom-right" :: Doc Text) . + defField "controlsBackArrows" ("faded" :: Doc Text) . defField "progress" True . defField "slideNumber" False . - defField "showSlideNumber" ("all" :: Text) . + defField "showSlideNumber" ("all" :: Doc Text) . defField "hashOneBasedIndex" False . defField "hash" True . defField "respondToHashChanges" True . @@ -377,7 +387,7 @@ pandocToHtml opts (Pandoc meta blocks) = do defField "touch" True . defField "loop" False . defField "rtl" False . - defField "navigationMode" ("default" :: Text) . + defField "navigationMode" ("default" :: Doc Text) . defField "shuffle" False . defField "fragments" True . defField "fragmentInURL" True . @@ -385,22 +395,22 @@ pandocToHtml opts (Pandoc meta blocks) = do defField "help" True . defField "pause" True . defField "showNotes" False . - defField "autoPlayMedia" ("null" :: Text) . - defField "preloadIframes" ("null" :: Text) . - defField "autoSlide" ("0" :: Text) . + defField "autoPlayMedia" ("null" :: Doc Text) . + defField "preloadIframes" ("null" :: Doc Text) . + defField "autoSlide" ("0" :: Doc Text) . defField "autoSlideStoppable" True . - defField "autoSlideMethod" ("null" :: Text) . - defField "defaultTiming" ("null" :: Text) . + defField "autoSlideMethod" ("null" :: Doc Text) . + defField "defaultTiming" ("null" :: Doc Text) . defField "mouseWheel" False . - defField "display" ("block" :: Text) . + defField "display" ("block" :: Doc Text) . defField "hideInactiveCursor" True . - defField "hideCursorTime" ("5000" :: Text) . + defField "hideCursorTime" ("5000" :: Doc Text) . defField "previewLinks" False . - defField "transition" ("slide" :: Text) . - defField "transitionSpeed" ("default" :: Text) . - defField "backgroundTransition" ("fade" :: Text) . - defField "viewDistance" ("3" :: Text) . - defField "mobileViewDistance" ("2" :: Text) + defField "transition" ("slide" :: Doc Text) . + defField "transitionSpeed" ("default" :: Doc Text) . + defField "backgroundTransition" ("fade" :: Doc Text) . + defField "viewDistance" ("3" :: Doc Text) . + defField "mobileViewDistance" ("2" :: Doc Text) else id) . defField "document-css" (isNothing mCss && slideVariant == NoSlides) . defField "quotes" (stQuotes st) . @@ -410,18 +420,18 @@ pandocToHtml opts (Pandoc meta blocks) = do maybe id (defField "toc") toc . maybe id (defField "table-of-contents") toc . defField "author-meta" authsMeta . - maybe id (defField "date-meta") + maybe id (defField "date-meta" . literal) (normalizeDate dateMeta) . defField "description-meta" descriptionMeta . defField "pagetitle" - (stringifyHTML . docTitle $ meta) . - defField "idprefix" (writerIdentifierPrefix opts) . + (literal . stringifyHTML . docTitle $ meta) . + defField "idprefix" (literal $ writerIdentifierPrefix opts) . -- these should maybe be set in pandoc.hs defField "slidy-url" - ("https://www.w3.org/Talks/Tools/Slidy2" :: Text) . - defField "slideous-url" ("slideous" :: Text) . - defField "revealjs-url" ("https://unpkg.com/reveal.js@^4/" :: Text) $ - defField "s5-url" ("s5/default" :: Text) . + ("https://www.w3.org/Talks/Tools/Slidy2" :: Doc Text) . + defField "slideous-url" ("slideous" :: Doc Text) . + defField "revealjs-url" ("https://unpkg.com/reveal.js@^4/" :: Doc Text) $ + defField "s5-url" ("s5/default" :: Doc Text) . defField "html5" (stHtml5 st) $ metadata return (thebody, context) @@ -705,12 +715,11 @@ figure opts attr@(_, _, attrList) txt (s,tit) = do img <- inlineToHtml opts (Image attr alt (s,tit)) capt <- if null txt then return mempty - else tocapt `fmap` inlineListToHtml opts txt + else (nl opts <>) . tocapt <$> inlineListToHtml opts txt + let inner = mconcat [nl opts, img, capt, nl opts] return $ if html5 - then H5.figure $ mconcat - [nl opts, img, capt, nl opts] - else H.div ! A.class_ "figure" $ mconcat - [nl opts, img, nl opts, capt, nl opts] + then H5.figure inner + else H.div ! A.class_ "figure" $ inner adjustNumbers :: WriterOptions -> [Block] -> [Block] @@ -1332,7 +1341,7 @@ inlineToHtml opts inline = do Space -> return $ strToHtml " " SoftBreak -> return $ case writerWrapText opts of WrapNone -> preEscapedText " " - WrapAuto -> preEscapedText " " + WrapAuto -> " " WrapPreserve -> preEscapedText "\n" LineBreak -> return $ do if html5 then H5.br else H.br |