From f379edc4ad372f96d4a5cc7cd38292f095dfdf35 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 1 Nov 2018 16:08:27 -0700 Subject: HTML writer: use character entities references when possible for HTML5. --- MANUAL.txt | 2 +- src/Text/Pandoc/Writers/HTML.hs | 10 ++++++++-- test/command/ascii.md | 2 +- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/MANUAL.txt b/MANUAL.txt index 5a4710474..cda70a34c 100644 --- a/MANUAL.txt +++ b/MANUAL.txt @@ -859,7 +859,7 @@ Options affecting specific writers {.options} `--ascii` : Use only ASCII characters in output. Currently supported for - XML and HTML formats (which use numerical entities instead of + XML and HTML formats (which use entities instead of UTF-8 when this option is selected), groff ms (which use hexadecimal escapes), and to a limited degree LaTeX (which uses standard commands for accented diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index 46f754226..4d15c827c 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -75,7 +75,8 @@ import Text.Pandoc.Templates import Text.Pandoc.Walk import Text.Pandoc.Writers.Math import Text.Pandoc.Writers.Shared -import Text.Pandoc.XML (escapeStringForXML, fromEntities, toEntities) +import Text.Pandoc.XML (escapeStringForXML, fromEntities, + toEntities, toHtml5Entities) #if MIN_VERSION_blaze_markup(0,6,3) #else import Text.Blaze.Internal (preEscapedString, preEscapedText) @@ -206,7 +207,12 @@ writeHtmlString' :: PandocMonad m => WriterState -> WriterOptions -> Pandoc -> m Text writeHtmlString' st opts d = do (body, context) <- evalStateT (pandocToHtml opts d) st - (if writerPreferAscii opts then toEntities else id) <$> + (if writerPreferAscii opts + then + if stHtml5 st + then toHtml5Entities + else toEntities + else id) <$> case writerTemplate opts of Nothing -> return $ renderHtml' body Just tpl -> do diff --git a/test/command/ascii.md b/test/command/ascii.md index 4956ae14e..1cbb2bec4 100644 --- a/test/command/ascii.md +++ b/test/command/ascii.md @@ -2,7 +2,7 @@ pandoc -t html --ascii äéıå ^D -

äéıå

+

äéıå

``` ``` -- cgit v1.2.3