aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2021-12-20 13:44:03 -0800
committerJohn MacFarlane <jgm@berkeley.edu>2021-12-22 09:45:02 -0800
commit7a9832166e36f77402d5e0259647e9f5c7ba4e58 (patch)
tree38b417997812c3e04704be97c05368ea795b15fe /src
parent0bdf37315766eb4b785002ffaf38cdb724628e7a (diff)
downloadpandoc-7a9832166e36f77402d5e0259647e9f5c7ba4e58.tar.gz
Add text wrapping to HTML output.
Previously the HTML writer was exceptional in not being sensitive to the `--wrap` option. With this change `--wrap` now works for HTML. The default (as with other formats) is automatic wrapping to 72 columns. A new internal module, T.P.Writers.Blaze, exports `layoutMarkup`. This converts a blaze Html structure into a doclayout Doc Text. In addition, we now add a line break between an `img` tag and the associated `figcaption`. Note: Output is never wrapped in `writeHtmlStringForEPUB`. This accords with previous behavior since previously the HTML writer was insensitive to `--wrap` settings. There's no real need to wrap HTML inside a zipped container. Note that the contents of script, textarea, and pre tags are always laid out with the `flush` combinator, so that unwanted spaces won't be introduced if these occur in an indented context in a template. Closes #7764.
Diffstat (limited to 'src')
-rw-r--r--src/Text/Pandoc/Writers/Blaze.hs139
-rw-r--r--src/Text/Pandoc/Writers/HTML.hs107
2 files changed, 197 insertions, 49 deletions
diff --git a/src/Text/Pandoc/Writers/Blaze.hs b/src/Text/Pandoc/Writers/Blaze.hs
new file mode 100644
index 000000000..0e3bd0f98
--- /dev/null
+++ b/src/Text/Pandoc/Writers/Blaze.hs
@@ -0,0 +1,139 @@
+{-# LANGUAGE OverloadedStrings #-}
+{- |
+ Module : Text.Pandoc.Writers.Shared
+ Copyright : Copyright (C) 2021 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Render blaze-html Html to DocLayout document (so it can be wrapped).
+-}
+module Text.Pandoc.Writers.Blaze ( layoutMarkup )
+where
+import Text.Blaze
+import qualified Data.ByteString as S
+import Data.List (isInfixOf)
+import Data.Text.Encoding (decodeUtf8)
+import qualified Data.Text as T
+import Data.Text (Text)
+import Text.DocLayout hiding (Text, Empty)
+import Text.Blaze.Internal (ChoiceString(..), getText, MarkupM(..))
+
+layoutMarkup :: Markup -> Doc T.Text
+layoutMarkup = go True mempty
+ where
+ go :: Bool -> Doc T.Text -> MarkupM b -> Doc T.Text
+ go wrap attrs (Parent _ open close content) =
+ let open' = getText open
+ in literal open'
+ <> attrs
+ <> char '>'
+ <> (if allowsWrap open'
+ then go wrap mempty content
+ else flush $ go False mempty content)
+ <> literal (getText close)
+ go wrap attrs (CustomParent tag content) =
+ char '<'
+ <> fromChoiceString wrap tag
+ <> attrs
+ <> char '>'
+ <> go wrap mempty content
+ <> literal "</"
+ <> fromChoiceString wrap tag
+ <> char '>'
+ go _wrap attrs (Leaf _ begin end _) =
+ literal (getText begin)
+ <> attrs
+ <> literal (getText end)
+ go wrap attrs (CustomLeaf tag close _) =
+ char '<'
+ <> fromChoiceString wrap tag
+ <> attrs
+ <> (if close then literal " />" else char '>')
+ go wrap attrs (AddAttribute rawkey _ value h) =
+ go wrap
+ (space' wrap
+ <> literal (getText rawkey)
+ <> char '='
+ <> doubleQuotes (fromChoiceString wrap value)
+ <> attrs) h
+ go wrap attrs (AddCustomAttribute key value h) =
+ go wrap
+ (space' wrap
+ <> fromChoiceString wrap key
+ <> char '='
+ <> doubleQuotes (fromChoiceString wrap value)
+ <> attrs) h
+ go wrap _ (Content content _) = fromChoiceString wrap content
+ go wrap _ (Comment comment _) =
+ literal "<!--"
+ <> space' wrap
+ <> fromChoiceString wrap comment
+ <> space' wrap
+ <> "-->"
+ go wrap attrs (Append h1 h2) = go wrap attrs h1 <> go wrap attrs h2
+ go _ _ (Empty _) = mempty
+ space' wrap = if wrap then space else char ' '
+
+allowsWrap :: T.Text -> Bool
+allowsWrap t =
+ not (t == "<pre" || t == "<style" || t == "<script" || t == "<textarea")
+
+fromChoiceString :: Bool -- ^ Allow wrapping
+ -> ChoiceString -- ^ String to render
+ -> Doc Text -- ^ Resulting builder
+fromChoiceString wrap (Static s) = withWrap wrap $ getText s
+fromChoiceString wrap (String s) = withWrap wrap $
+ escapeMarkupEntities $ T.pack s
+fromChoiceString wrap (Text s) = withWrap wrap $ escapeMarkupEntities s
+fromChoiceString wrap (ByteString s) = withWrap wrap $ decodeUtf8 s
+fromChoiceString _wrap (PreEscaped x) = -- don't wrap!
+ case x of
+ String s -> literal $ T.pack s
+ Text s -> literal s
+ s -> fromChoiceString False s
+fromChoiceString wrap (External x) = case x of
+ -- Check that the sequence "</" is *not* in the external data.
+ String s -> if "</" `isInfixOf` s then mempty else withWrap wrap (T.pack s)
+ Text s -> if "</" `T.isInfixOf` s then mempty else withWrap wrap s
+ ByteString s -> if "</" `S.isInfixOf` s then mempty else withWrap wrap (decodeUtf8 s)
+ s -> fromChoiceString wrap s
+fromChoiceString wrap (AppendChoiceString x y) =
+ fromChoiceString wrap x <> fromChoiceString wrap y
+fromChoiceString _ EmptyChoiceString = mempty
+
+withWrap :: Bool -> Text -> Doc Text
+withWrap wrap
+ | wrap = mconcat . toChunks
+ | otherwise = literal
+
+toChunks :: Text -> [Doc Text]
+toChunks = map toDoc . T.groupBy sameStatus
+ where
+ toDoc t =
+ if T.any (== ' ') t
+ then space
+ else if T.any (== '\n') t
+ then cr
+ else literal t
+ sameStatus c d =
+ (c == ' ' && d == ' ') ||
+ (c == '\n' && d == '\n') ||
+ (c /= ' ' && d /= ' ' && c /= '\n' && d /= '\n')
+
+
+-- | Escape predefined XML entities in a text value
+--
+escapeMarkupEntities :: Text -- ^ Text to escape
+ -> Text -- ^ Resulting Doc
+escapeMarkupEntities = T.concatMap escape
+ where
+ escape :: Char -> Text
+ escape '<' = "&lt;"
+ escape '>' = "&gt;"
+ escape '&' = "&amp;"
+ escape '"' = "&quot;"
+ escape '\'' = "&#39;"
+ escape x = T.singleton x
diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
index 247cddfc9..664aeffb6 100644
--- a/src/Text/Pandoc/Writers/HTML.hs
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -39,7 +39,7 @@ import qualified Data.Text as T
import qualified Data.Text.Lazy as TL
import Network.URI (URI (..), parseURIReference)
import Numeric (showHex)
-import Text.DocLayout (render, literal)
+import Text.DocLayout (render, literal, Doc)
import Text.Blaze.Internal (MarkupM (Empty), customLeaf, customParent)
import Text.DocTemplates (FromContext (lookupContext), Context (..))
import Text.Blaze.Html hiding (contents)
@@ -70,6 +70,7 @@ import Text.Pandoc.Class.PandocPure (runPure)
import Text.Pandoc.Error
import Text.Pandoc.Logging
import Text.Pandoc.MIME (mediaCategory)
+import Text.Pandoc.Writers.Blaze (layoutMarkup)
import Text.TeXMath
import Text.XML.Light (elChildren, unode, unqual)
import qualified Text.XML.Light as XML
@@ -160,7 +161,8 @@ writeHtmlStringForEPUB :: PandocMonad m
-> m Text
writeHtmlStringForEPUB version o = writeHtmlString'
defaultWriterState{ stHtml5 = version == EPUB3,
- stEPUBVersion = Just version } o
+ stEPUBVersion = Just version }
+ o{ writerWrapText = WrapNone }
-- | Convert Pandoc document to Reveal JS HTML slide show.
writeRevealJs :: PandocMonad m
@@ -207,17 +209,23 @@ writeHtmlString' :: PandocMonad m
=> WriterState -> WriterOptions -> Pandoc -> m Text
writeHtmlString' st opts d = do
(body, context) <- evalStateT (pandocToHtml opts d) st
+ let colwidth = case writerWrapText opts of
+ WrapAuto -> Just (writerColumns opts)
+ _ -> Nothing
(if writerPreferAscii opts
then toEntities
else id) <$>
case writerTemplate opts of
- Nothing -> return $ renderHtml' body
+ Nothing -> return $
+ case colwidth of
+ Nothing -> renderHtml' body -- optimization, skip layout
+ Just cols -> render (Just cols) $ layoutMarkup body
Just tpl -> do
-- warn if empty lang
when (isNothing (getField "lang" context :: Maybe Text)) $
report NoLangSpecified
-- check for empty pagetitle
- context' <-
+ (context' :: Context Text) <-
case getField "pagetitle" context of
Just (s :: Text) | not (T.null s) -> return context
_ -> do
@@ -228,9 +236,9 @@ writeHtmlString' st opts d = do
Just [] -> "Untitled"
Just (x:_) -> takeBaseName $ T.unpack x
report $ NoTitleElement fallback
- return $ resetField "pagetitle" fallback context
- return $ render Nothing $ renderTemplate tpl
- (defField "body" (renderHtml' body) context')
+ return $ resetField "pagetitle" (literal fallback) context
+ return $ render colwidth $ renderTemplate tpl
+ (defField "body" (layoutMarkup body) context')
writeHtml' :: PandocMonad m => WriterState -> WriterOptions -> Pandoc -> m Html
writeHtml' st opts d =
@@ -252,13 +260,13 @@ pandocToHtml opts (Pandoc meta blocks) = do
let slideLevel = fromMaybe (getSlideLevel blocks) $ writerSlideLevel opts
modify $ \st -> st{ stSlideLevel = slideLevel }
metadata <- metaToContext opts
- (fmap (literal . renderHtml') . blockListToHtml opts)
- (fmap (literal . renderHtml') . inlineListToHtml opts)
+ (fmap layoutMarkup . blockListToHtml opts)
+ (fmap layoutMarkup . inlineListToHtml opts)
meta
let stringifyHTML = escapeStringForXML . stringify
- let authsMeta = map stringifyHTML $ docAuthors meta
+ let authsMeta = map (literal . stringifyHTML) $ docAuthors meta
let dateMeta = stringifyHTML $ docDate meta
- let descriptionMeta = escapeStringForXML $
+ let descriptionMeta = literal $ escapeStringForXML $
lookupMetaString "description" meta
slideVariant <- gets stSlideVariant
let sects = adjustNumbers opts $
@@ -267,7 +275,7 @@ pandocToHtml opts (Pandoc meta blocks) = do
then blocks
else prepSlides slideLevel blocks
toc <- if writerTableOfContents opts && slideVariant /= S5Slides
- then fmap renderHtml' <$> tableOfContents opts sects
+ then fmap layoutMarkup <$> tableOfContents opts sects
else return Nothing
blocks' <- blockListToHtml opts sects
notes <- do
@@ -281,7 +289,7 @@ pandocToHtml opts (Pandoc meta blocks) = do
return notes
st <- get
let thebody = blocks' >> notes
- let math = case writerHTMLMathMethod opts of
+ let math = layoutMarkup $ case writerHTMLMathMethod opts of
MathJax url
| slideVariant /= RevealJsSlides ->
-- mathjax is handled via a special plugin in revealjs
@@ -298,7 +306,7 @@ pandocToHtml opts (Pandoc meta blocks) = do
nl opts
let katexFlushLeft =
case lookupContext "classoption" metadata of
- Just clsops | "fleqn" `elem` (clsops :: [Text]) -> "true"
+ Just clsops | "fleqn" `elem` (clsops :: [Doc Text]) -> "true"
_ -> "false"
H.script $ text $ T.unlines [
"document.addEventListener(\"DOMContentLoaded\", function () {"
@@ -324,15 +332,16 @@ pandocToHtml opts (Pandoc meta blocks) = do
Just s | not (stHtml5 st) ->
H.script ! A.type_ "text/javascript"
$ preEscapedString
- ("/*<![CDATA[*/\n" ++ T.unpack s ++
+ ("/*<![CDATA[*/\n" <> T.unpack s <>
"/*]]>*/\n")
| otherwise -> mempty
Nothing -> mempty
let mCss :: Maybe [Text] = lookupContext "css" metadata
- let context = (if stHighlighting st
+ let context :: Context Text
+ context = (if stHighlighting st
then case writerHighlightStyle opts of
Just sty -> defField "highlighting-css"
- (T.pack $ styleToCss sty)
+ (literal $ T.pack $ styleToCss sty)
Nothing -> id
else id) .
(if stCsl st
@@ -342,15 +351,15 @@ pandocToHtml opts (Pandoc meta blocks) = do
Just 0 -> id
Just n ->
defField "csl-entry-spacing"
- (tshow n <> "em"))
+ (literal $ tshow n <> "em"))
else id) .
(if stMath st
- then defField "math" (renderHtml' math)
+ then defField "math" math
else id) .
(case writerHTMLMathMethod opts of
MathJax u -> defField "mathjax" True .
defField "mathjaxurl"
- (T.takeWhile (/='?') u)
+ (literal $ T.takeWhile (/='?') u)
_ -> defField "mathjax" False) .
(case writerHTMLMathMethod opts of
PlainMath -> defField "displaymath-css" True
@@ -361,11 +370,12 @@ pandocToHtml opts (Pandoc meta blocks) = do
-- template can't distinguish False/undefined
defField "controls" True .
defField "controlsTutorial" True .
- defField "controlsLayout" ("bottom-right" :: Text) .
- defField "controlsBackArrows" ("faded" :: Text) .
+ defField "controlsLayout"
+ ("bottom-right" :: Doc Text) .
+ defField "controlsBackArrows" ("faded" :: Doc Text) .
defField "progress" True .
defField "slideNumber" False .
- defField "showSlideNumber" ("all" :: Text) .
+ defField "showSlideNumber" ("all" :: Doc Text) .
defField "hashOneBasedIndex" False .
defField "hash" True .
defField "respondToHashChanges" True .
@@ -377,7 +387,7 @@ pandocToHtml opts (Pandoc meta blocks) = do
defField "touch" True .
defField "loop" False .
defField "rtl" False .
- defField "navigationMode" ("default" :: Text) .
+ defField "navigationMode" ("default" :: Doc Text) .
defField "shuffle" False .
defField "fragments" True .
defField "fragmentInURL" True .
@@ -385,22 +395,22 @@ pandocToHtml opts (Pandoc meta blocks) = do
defField "help" True .
defField "pause" True .
defField "showNotes" False .
- defField "autoPlayMedia" ("null" :: Text) .
- defField "preloadIframes" ("null" :: Text) .
- defField "autoSlide" ("0" :: Text) .
+ defField "autoPlayMedia" ("null" :: Doc Text) .
+ defField "preloadIframes" ("null" :: Doc Text) .
+ defField "autoSlide" ("0" :: Doc Text) .
defField "autoSlideStoppable" True .
- defField "autoSlideMethod" ("null" :: Text) .
- defField "defaultTiming" ("null" :: Text) .
+ defField "autoSlideMethod" ("null" :: Doc Text) .
+ defField "defaultTiming" ("null" :: Doc Text) .
defField "mouseWheel" False .
- defField "display" ("block" :: Text) .
+ defField "display" ("block" :: Doc Text) .
defField "hideInactiveCursor" True .
- defField "hideCursorTime" ("5000" :: Text) .
+ defField "hideCursorTime" ("5000" :: Doc Text) .
defField "previewLinks" False .
- defField "transition" ("slide" :: Text) .
- defField "transitionSpeed" ("default" :: Text) .
- defField "backgroundTransition" ("fade" :: Text) .
- defField "viewDistance" ("3" :: Text) .
- defField "mobileViewDistance" ("2" :: Text)
+ defField "transition" ("slide" :: Doc Text) .
+ defField "transitionSpeed" ("default" :: Doc Text) .
+ defField "backgroundTransition" ("fade" :: Doc Text) .
+ defField "viewDistance" ("3" :: Doc Text) .
+ defField "mobileViewDistance" ("2" :: Doc Text)
else id) .
defField "document-css" (isNothing mCss && slideVariant == NoSlides) .
defField "quotes" (stQuotes st) .
@@ -410,18 +420,18 @@ pandocToHtml opts (Pandoc meta blocks) = do
maybe id (defField "toc") toc .
maybe id (defField "table-of-contents") toc .
defField "author-meta" authsMeta .
- maybe id (defField "date-meta")
+ maybe id (defField "date-meta" . literal)
(normalizeDate dateMeta) .
defField "description-meta" descriptionMeta .
defField "pagetitle"
- (stringifyHTML . docTitle $ meta) .
- defField "idprefix" (writerIdentifierPrefix opts) .
+ (literal . stringifyHTML . docTitle $ meta) .
+ defField "idprefix" (literal $ writerIdentifierPrefix opts) .
-- these should maybe be set in pandoc.hs
defField "slidy-url"
- ("https://www.w3.org/Talks/Tools/Slidy2" :: Text) .
- defField "slideous-url" ("slideous" :: Text) .
- defField "revealjs-url" ("https://unpkg.com/reveal.js@^4/" :: Text) $
- defField "s5-url" ("s5/default" :: Text) .
+ ("https://www.w3.org/Talks/Tools/Slidy2" :: Doc Text) .
+ defField "slideous-url" ("slideous" :: Doc Text) .
+ defField "revealjs-url" ("https://unpkg.com/reveal.js@^4/" :: Doc Text) $
+ defField "s5-url" ("s5/default" :: Doc Text) .
defField "html5" (stHtml5 st) $
metadata
return (thebody, context)
@@ -705,12 +715,11 @@ figure opts attr@(_, _, attrList) txt (s,tit) = do
img <- inlineToHtml opts (Image attr alt (s,tit))
capt <- if null txt
then return mempty
- else tocapt `fmap` inlineListToHtml opts txt
+ else (nl opts <>) . tocapt <$> inlineListToHtml opts txt
+ let inner = mconcat [nl opts, img, capt, nl opts]
return $ if html5
- then H5.figure $ mconcat
- [nl opts, img, capt, nl opts]
- else H.div ! A.class_ "figure" $ mconcat
- [nl opts, img, nl opts, capt, nl opts]
+ then H5.figure inner
+ else H.div ! A.class_ "figure" $ inner
adjustNumbers :: WriterOptions -> [Block] -> [Block]
@@ -1332,7 +1341,7 @@ inlineToHtml opts inline = do
Space -> return $ strToHtml " "
SoftBreak -> return $ case writerWrapText opts of
WrapNone -> preEscapedText " "
- WrapAuto -> preEscapedText " "
+ WrapAuto -> " "
WrapPreserve -> preEscapedText "\n"
LineBreak -> return $ do
if html5 then H5.br else H.br