From 99a4d1d0b06bb68e4d7a10acd642d439842004d1 Mon Sep 17 00:00:00 2001 From: Francesco Mazzoli Date: Fri, 10 Sep 2021 18:30:05 +0200 Subject: Support `--reference-location` for HTML output (#7461) The HTML writer now supports `EndOfBlock`, `EndOfSection`, and `EndOfDocument` for reference locations. EPUB and HTML slide show formats are also affected by this change. This works similarly to the markdown writer, but with special care taken to skipping section divs with what regards to the block level. The change also takes care to not modify the output if `EndOfDocument` is used. --- MANUAL.txt | 2 +- man/pandoc.1 | 2 +- src/Text/Pandoc/Writers/HTML.hs | 121 +++++++++++++++++++++++++++++----------- test/Tests/Writers/HTML.hs | 75 ++++++++++++++++++++++++- test/command/4235.md | 2 +- test/command/7006.md | 2 +- test/writer.html4 | 2 +- test/writer.html5 | 2 +- 8 files changed, 169 insertions(+), 39 deletions(-) diff --git a/MANUAL.txt b/MANUAL.txt index afdd66ddd..57b9f3b2c 100644 --- a/MANUAL.txt +++ b/MANUAL.txt @@ -972,7 +972,7 @@ header when requesting a document from a URL: : Specify whether footnotes (and references, if `reference-links` is set) are placed at the end of the current (top-level) block, the current section, or the document. The default is - `document`. Currently only affects the markdown writer. + `document`. Currently only affects the markdown and HTML writers. `--markdown-headings=setext`|`atx` diff --git a/man/pandoc.1 b/man/pandoc.1 index a0092b385..e901f60b3 100644 --- a/man/pandoc.1 +++ b/man/pandoc.1 @@ -995,7 +995,7 @@ Specify whether footnotes (and references, if \f[C]reference-links\f[R] is set) are placed at the end of the current (top-level) block, the current section, or the document. The default is \f[C]document\f[R]. -Currently only affects the markdown writer. +Currently only affects the markdown and HTML writers. .TP \f[B]\f[CB]--markdown-headings=setext\f[B]\f[R]|\f[B]\f[CB]atx\f[B]\f[R] Specify whether to use ATX-style (\f[C]#\f[R]-prefixed) or Setext-style diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index 89fc110ef..c96d4622a 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -74,9 +74,11 @@ import Text.TeXMath import Text.XML.Light (elChildren, unode, unqual) import qualified Text.XML.Light as XML import Text.XML.Light.Output +import Data.String (fromString) data WriterState = WriterState { stNotes :: [Html] -- ^ List of notes + , stEmittedNotes :: Int -- ^ How many notes we've already pushed out to the HTML , stMath :: Bool -- ^ Math is used in document , stQuotes :: Bool -- ^ tag is used , stHighlighting :: Bool -- ^ Syntax highlighting is used @@ -88,10 +90,11 @@ data WriterState = WriterState , stCodeBlockNum :: Int -- ^ Number of code block , stCsl :: Bool -- ^ Has CSL references , stCslEntrySpacing :: Maybe Int -- ^ CSL entry spacing + , stBlockLevel :: Int -- ^ Current block depth, excluding section divs } defaultWriterState :: WriterState -defaultWriterState = WriterState {stNotes= [], stMath = False, stQuotes = False, +defaultWriterState = WriterState {stNotes= [], stEmittedNotes = 0, stMath = False, stQuotes = False, stHighlighting = False, stHtml5 = False, stEPUBVersion = Nothing, @@ -100,7 +103,8 @@ defaultWriterState = WriterState {stNotes= [], stMath = False, stQuotes = False, stInSection = False, stCodeBlockNum = 0, stCsl = False, - stCslEntrySpacing = Nothing} + stCslEntrySpacing = Nothing, + stBlockLevel = 0} -- Helpers to render HTML with the appropriate function. @@ -266,8 +270,16 @@ pandocToHtml opts (Pandoc meta blocks) = do then fmap renderHtml' <$> tableOfContents opts sects else return Nothing blocks' <- blockListToHtml opts sects + notes <- do + -- make the st private just to be safe, since we modify it right afterwards + st <- get + if null (stNotes st) + then return mempty + else do + notes <- footnoteSection opts EndOfDocument (stEmittedNotes st + 1) (reverse (stNotes st)) + modify (\st' -> st'{ stNotes = mempty, stEmittedNotes = stEmittedNotes st' + length (stNotes st') }) + return notes st <- get - notes <- footnoteSection opts (reverse (stNotes st)) let thebody = blocks' >> notes let math = case writerHTMLMathMethod opts of MathJax url @@ -490,28 +502,43 @@ tableOfContents opts sects = do -- | Convert list of Note blocks to a footnote
. -- Assumes notes are sorted. -footnoteSection :: PandocMonad m - => WriterOptions -> [Html] -> StateT WriterState m Html -footnoteSection opts notes = do +footnoteSection :: + PandocMonad m => WriterOptions -> ReferenceLocation -> Int -> [Html] -> StateT WriterState m Html +footnoteSection opts refLocation startCounter notes = do html5 <- gets stHtml5 slideVariant <- gets stSlideVariant - let hrtag = if html5 then H5.hr else H.hr + let hrtag = if refLocation /= EndOfBlock then (if html5 then H5.hr else H.hr) else mempty + let additionalClassName = case refLocation of + EndOfBlock -> "footnotes-end-of-block" + EndOfDocument -> "footnotes-end-of-document" + EndOfSection -> "footnotes-end-of-section" + let className = "footnotes " <> additionalClassName epubVersion <- gets stEPUBVersion let container x | html5 , epubVersion == Just EPUB3 - = H5.section ! A.class_ "footnotes" + = H5.section ! A.class_ className ! customAttribute "epub:type" "footnotes" $ x - | html5 = H5.section ! A.class_ "footnotes" + | html5 = H5.section ! A.class_ className ! customAttribute "role" "doc-endnotes" $ x | slideVariant /= NoSlides = H.div ! A.class_ "footnotes slide" $ x - | otherwise = H.div ! A.class_ "footnotes" $ x + | otherwise = H.div ! A.class_ className $ x return $ if null notes then mempty - else nl opts >> container (nl opts >> hrtag >> nl opts >> - H.ol (mconcat notes >> nl opts) >> nl opts) + else do + nl opts + container $ do + nl opts + hrtag + nl opts + -- Keep the previous output exactly the same if we don't + -- have multiple notes sections + if startCounter == 1 + then H.ol $ mconcat notes >> nl opts + else H.ol ! A.start (fromString (show startCounter)) $ mconcat notes >> nl opts + nl opts -- | Parse a mailto link; return Just (name, domain) or Nothing. parseMailto :: Text -> Maybe (Text, Text) @@ -702,11 +729,10 @@ adjustNumbers opts doc = fixnum x = x showSecNum = T.intercalate "." . map tshow --- | Convert Pandoc block element to HTML. -blockToHtml :: PandocMonad m => WriterOptions -> Block -> StateT WriterState m Html -blockToHtml _ Null = return mempty -blockToHtml opts (Plain lst) = inlineListToHtml opts lst -blockToHtml opts (Para [Image attr@(_,classes,_) txt (src,tit)]) +blockToHtmlInner :: PandocMonad m => WriterOptions -> Block -> StateT WriterState m Html +blockToHtmlInner _ Null = return mempty +blockToHtmlInner opts (Plain lst) = inlineListToHtml opts lst +blockToHtmlInner opts (Para [Image attr@(_,classes,_) txt (src,tit)]) | "stretch" `elem` classes = do slideVariant <- gets stSlideVariant case slideVariant of @@ -716,20 +742,20 @@ blockToHtml opts (Para [Image attr@(_,classes,_) txt (src,tit)]) inlineToHtml opts (Image attr txt (src, tit)) _ -> figure opts attr txt (src, tit) -- title beginning with fig: indicates that the image is a figure -blockToHtml opts (Para [Image attr txt (s,T.stripPrefix "fig:" -> Just tit)]) = +blockToHtmlInner opts (Para [Image attr txt (s,T.stripPrefix "fig:" -> Just tit)]) = figure opts attr txt (s,tit) -blockToHtml opts (Para lst) = do +blockToHtmlInner opts (Para lst) = do contents <- inlineListToHtml opts lst case contents of Empty _ | not (isEnabled Ext_empty_paragraphs opts) -> return mempty _ -> return $ H.p contents -blockToHtml opts (LineBlock lns) = +blockToHtmlInner opts (LineBlock lns) = if writerWrapText opts == WrapNone then blockToHtml opts $ linesToPara lns else do htmlLines <- inlineListToHtml opts $ intercalate [LineBreak] lns return $ H.div ! A.class_ "line-block" $ htmlLines -blockToHtml opts (Div (ident, "section":dclasses, dkvs) +blockToHtmlInner opts (Div (ident, "section":dclasses, dkvs) (Header level hattr@(hident,hclasses,hkvs) ils : xs)) = do slideVariant <- gets stSlideVariant @@ -810,7 +836,7 @@ blockToHtml opts (Div (ident, "section":dclasses, dkvs) if null innerSecs then mempty else nl opts <> innerContents -blockToHtml opts (Div attr@(ident, classes, kvs') bs) = do +blockToHtmlInner opts (Div attr@(ident, classes, kvs') bs) = do html5 <- gets stHtml5 slideVariant <- gets stSlideVariant let isCslBibBody = ident == "refs" || "csl-bib-body" `elem` classes @@ -864,7 +890,7 @@ blockToHtml opts (Div attr@(ident, classes, kvs') bs) = do _ -> return mempty else addAttrs opts (ident, classes'', kvs) $ divtag contents' -blockToHtml opts (RawBlock f str) = do +blockToHtmlInner opts (RawBlock f str) = do ishtml <- isRawHtml f if ishtml then return $ preEscapedText str @@ -875,10 +901,10 @@ blockToHtml opts (RawBlock f str) = do else do report $ BlockNotRendered (RawBlock f str) return mempty -blockToHtml _ HorizontalRule = do +blockToHtmlInner _ HorizontalRule = do html5 <- gets stHtml5 return $ if html5 then H5.hr else H.hr -blockToHtml opts (CodeBlock (id',classes,keyvals) rawCode) = do +blockToHtmlInner opts (CodeBlock (id',classes,keyvals) rawCode) = do id'' <- if T.null id' then do modify $ \st -> st{ stCodeBlockNum = stCodeBlockNum st + 1 } @@ -910,7 +936,7 @@ blockToHtml opts (CodeBlock (id',classes,keyvals) rawCode) = do -- we set writerIdentifierPrefix to "" since id'' already -- includes it: addAttrs opts{writerIdentifierPrefix = ""} (id'',[],keyvals) h -blockToHtml opts (BlockQuote blocks) = do +blockToHtmlInner opts (BlockQuote blocks) = do -- in S5, treat list in blockquote specially -- if default is incremental, make it nonincremental; -- otherwise incremental @@ -932,7 +958,7 @@ blockToHtml opts (BlockQuote blocks) = do else do contents <- blockListToHtml opts blocks return $ H.blockquote $ nl opts >> contents >> nl opts -blockToHtml opts (Header level (ident,classes,kvs) lst) = do +blockToHtmlInner opts (Header level (ident,classes,kvs) lst) = do contents <- inlineListToHtml opts lst let secnum = fromMaybe mempty $ lookup "number" kvs let contents' = if writerNumberSections opts && not (T.null secnum) @@ -955,12 +981,12 @@ blockToHtml opts (Header level (ident,classes,kvs) lst) = do 5 -> H.h5 contents' 6 -> H.h6 contents' _ -> H.p ! A.class_ "heading" $ contents' -blockToHtml opts (BulletList lst) = do +blockToHtmlInner opts (BulletList lst) = do contents <- mapM (listItemToHtml opts) lst let isTaskList = not (null lst) && all isTaskListItem lst (if isTaskList then (! A.class_ "task-list") else id) <$> unordList opts contents -blockToHtml opts (OrderedList (startnum, numstyle, _) lst) = do +blockToHtmlInner opts (OrderedList (startnum, numstyle, _) lst) = do contents <- mapM (listItemToHtml opts) lst html5 <- gets stHtml5 let numstyle' = case numstyle of @@ -983,7 +1009,7 @@ blockToHtml opts (OrderedList (startnum, numstyle, _) lst) = do else []) l <- ordList opts contents return $ foldl' (!) l attribs -blockToHtml opts (DefinitionList lst) = do +blockToHtmlInner opts (DefinitionList lst) = do contents <- mapM (\(term, defs) -> do term' <- liftM H.dt $ inlineListToHtml opts term defs' <- mapM (liftM (\x -> H.dd (x >> nl opts)) . @@ -991,9 +1017,39 @@ blockToHtml opts (DefinitionList lst) = do return $ mconcat $ nl opts : term' : nl opts : intersperse (nl opts) defs') lst defList opts contents -blockToHtml opts (Table attr caption colspecs thead tbody tfoot) = +blockToHtmlInner opts (Table attr caption colspecs thead tbody tfoot) = tableToHtml opts (Ann.toTable attr caption colspecs thead tbody tfoot) +-- | Convert Pandoc block element to HTML. All the legwork is done by +-- 'blockToHtmlInner', this just takes care of emitting the notes after +-- the block if necessary. +blockToHtml :: PandocMonad m => WriterOptions -> Block -> StateT WriterState m Html +blockToHtml opts block = do + -- Ignore inserted section divs -- they are not blocks as they came from + -- the document itself (at least not when coming from markdown) + let isSection = case block of + Div (_, classes, _) _ | "section" `elem` classes -> True + _ -> False + let increaseLevel = not isSection + when increaseLevel $ + modify (\st -> st{ stBlockLevel = stBlockLevel st + 1 }) + doc <- blockToHtmlInner opts block + st <- get + let emitNotes = + (writerReferenceLocation opts == EndOfBlock && stBlockLevel st == 1) || + (writerReferenceLocation opts == EndOfSection && isSection) + res <- if emitNotes + then do + notes <- if null (stNotes st) + then return mempty + else footnoteSection opts (writerReferenceLocation opts) (stEmittedNotes st + 1) (reverse (stNotes st)) + modify (\st' -> st'{ stNotes = mempty, stEmittedNotes = stEmittedNotes st' + length (stNotes st') }) + return (doc <> notes) + else return doc + when increaseLevel $ + modify (\st' -> st'{ stBlockLevel = stBlockLevel st' - 1 }) + return res + tableToHtml :: PandocMonad m => WriterOptions -> Ann.Table @@ -1468,7 +1524,8 @@ inlineToHtml opts inline = do -- note: null title included, as in Markdown.pl (Note contents) -> do notes <- gets stNotes - let number = length notes + 1 + emittedNotes <- gets stEmittedNotes + let number = emittedNotes + length notes + 1 let ref = tshow number htmlContents <- blockListToNote opts ref contents epubVersion <- gets stEPUBVersion diff --git a/test/Tests/Writers/HTML.hs b/test/Tests/Writers/HTML.hs index 404f6da98..50775b171 100644 --- a/test/Tests/Writers/HTML.hs +++ b/test/Tests/Writers/HTML.hs @@ -8,8 +8,11 @@ import Text.Pandoc import Text.Pandoc.Arbitrary () import Text.Pandoc.Builder +htmlWithOpts :: (ToPandoc a) => WriterOptions -> a -> String +htmlWithOpts opts = unpack . purely (writeHtml4String opts{ writerWrapText = WrapNone }) . toPandoc + html :: (ToPandoc a) => a -> String -html = unpack . purely (writeHtml4String def{ writerWrapText = WrapNone }) . toPandoc +html = htmlWithOpts def htmlQTags :: (ToPandoc a) => a -> String htmlQTags = unpack @@ -33,6 +36,21 @@ infix 4 =: => String -> (a, String) -> TestTree (=:) = test html +noteTestDoc :: Blocks +noteTestDoc = + header 1 "Page title" <> + header 2 "First section" <> + para ("This is a footnote." <> + note (para "Down here.") <> + " And this is a " <> + link "https://www.google.com" "" "link" <> + ".") <> + blockQuote (para ("A note inside a block quote." <> + note (para "The second note.")) <> + para "A second paragraph.") <> + header 2 "Second section" <> + para "Some more text." + tests :: [TestTree] tests = [ testGroup "inline code" @@ -86,6 +104,61 @@ tests = =?> ("" ++ ">>=") ] + , testGroup "footnotes" + [ test (htmlWithOpts def{writerReferenceLocation=EndOfDocument}) + "at the end of a document" $ + noteTestDoc =?> + concat + [ "

Page title

" + , "

First section

" + , "

This is a footnote.1 And this is a link.

" + , "

A note inside a block quote.2

A second paragraph.

" + , "

Second section

" + , "

Some more text.

" + , "

  1. Down here.↩︎

  2. The second note.↩︎

" + ] + , test (htmlWithOpts def{writerReferenceLocation=EndOfBlock}) + "at the end of a block" $ + noteTestDoc =?> + concat + [ "

Page title

" + , "

First section

" + , "

This is a footnote.1 And this is a link.

" + , "
  1. Down here.↩︎

" + , "

A note inside a block quote.2

A second paragraph.

" + , "
  1. The second note.↩︎

" + , "

Second section

" + , "

Some more text.

" + ] + , test (htmlWithOpts def{writerReferenceLocation=EndOfSection}) + "at the end of a section" $ + noteTestDoc =?> + concat + [ "

Page title

" + , "

First section

" + , "

This is a footnote.1 And this is a link.

" + , "

A note inside a block quote.2

A second paragraph.

" + , "

  1. Down here.↩︎

  2. The second note.↩︎

" + , "

Second section

" + , "

Some more text.

" + ] + , test (htmlWithOpts def{writerReferenceLocation=EndOfSection, writerSectionDivs=True}) + "at the end of a section, with section divs" $ + noteTestDoc =?> + -- Footnotes are rendered _after_ their section (in this case after the level2 section + -- that contains it). + concat + [ "
" + , "

Page title

" + , "
" + , "

First section

" + , "

This is a footnote.1 And this is a link.

A note inside a block quote.2

A second paragraph.

" + , "
" + , "

  1. Down here.↩︎

  2. The second note.↩︎

" + , "

Second section

Some more text.

" + , "
" + ] + ] ] where tQ :: (ToString a, ToPandoc a) diff --git a/test/command/4235.md b/test/command/4235.md index 8bbf43ff9..4f2644dd6 100644 --- a/test/command/4235.md +++ b/test/command/4235.md @@ -3,7 +3,7 @@ This.^[Has a footnote.] ^D

This.1

-
+

  1. Has a footnote.↩︎

  2. diff --git a/test/command/7006.md b/test/command/7006.md index e7951fb1a..7e2215cdf 100644 --- a/test/command/7006.md +++ b/test/command/7006.md @@ -7,7 +7,7 @@ Test.[^fn] ![Caption.](/image.jpg) ^D

    Test.1

    -
    +

    1. Foo:

      diff --git a/test/writer.html4 b/test/writer.html4 index 215a1efb9..257d86ddb 100644 --- a/test/writer.html4 +++ b/test/writer.html4 @@ -665,7 +665,7 @@ Blah
    2. And in list items.5

    This paragraph should not be part of the note, as it is not indented.

    -
    +

    1. Here is the footnote. It can go anywhere after the footnote reference. It need not be placed at the end of the document.↩︎

    2. diff --git a/test/writer.html5 b/test/writer.html5 index 387df4058..0141bf9fe 100644 --- a/test/writer.html5 +++ b/test/writer.html5 @@ -667,7 +667,7 @@ Blah
    3. And in list items.5

    This paragraph should not be part of the note, as it is not indented.

    -
    +

    1. Here is the footnote. It can go anywhere after the footnote reference. It need not be placed at the end of the document.↩︎

    2. -- cgit v1.2.3