diff options
author | quasicomputational <quasicomputational@gmail.com> | 2018-10-06 05:33:14 +0100 |
---|---|---|
committer | John MacFarlane <jgm@berkeley.edu> | 2018-10-05 21:33:14 -0700 |
commit | 6207bdeb681142e9fa3731e6e0ee7fa8e6c120f5 (patch) | |
tree | 1ef9b0c5f6b22ff6e97c26c8a83680862bd5bc42 | |
parent | a26b3a2d6af8614e13299bbf477e28c5932ef680 (diff) | |
download | pandoc-6207bdeb681142e9fa3731e6e0ee7fa8e6c120f5.tar.gz |
CommonMark writer: add plain text fallbacks. (#4531)
Previously, the writer would unconditionally emit HTMLish output for
subscripts, superscripts, strikeouts (if the strikeout extension is
disabled) and small caps, even with raw_html disabled.
Now there are plain-text (and, where possible, fancy Unicode)
fallbacks for all of these corresponding (mostly) to the Markdown
fallbacks, and the HTMLish output is only used when raw_html is
enabled.
This commit adds exported functions `toSuperscript` and
`toSubscript` to `Text.Pandoc.Writers.Shared`. [API change]
Closes #4528.
-rw-r--r-- | MANUAL.txt | 10 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/CommonMark.hs | 52 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/Markdown.hs | 29 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/Shared.hs | 31 | ||||
-rw-r--r-- | test/command/4528.md | 156 |
5 files changed, 237 insertions, 41 deletions
diff --git a/MANUAL.txt b/MANUAL.txt index 802ce556e..bf47184ce 100644 --- a/MANUAL.txt +++ b/MANUAL.txt @@ -3381,8 +3381,14 @@ Markdown allows it, but it has been made an extension so that it can be disabled if desired.) The raw HTML is passed through unchanged in HTML, S5, Slidy, Slideous, -DZSlides, EPUB, Markdown, Emacs Org mode, and Textile output, and suppressed -in other formats. +DZSlides, EPUB, Markdown, CommonMark, Emacs Org mode, and Textile +output, and suppressed in other formats. + +In the CommonMark format, if `raw_html` is enabled, superscripts, +subscripts, strikeouts and small capitals will be represented as HTML. +Otherwise, plain-text fallbacks will be used. Note that even if +`raw_html` is disabled, tables will be rendered with HTML syntax if +they cannot use pipe syntax. #### Extension: `markdown_in_html_blocks` #### diff --git a/src/Text/Pandoc/Writers/CommonMark.hs b/src/Text/Pandoc/Writers/CommonMark.hs index 27179496c..84ea37f38 100644 --- a/src/Text/Pandoc/Writers/CommonMark.hs +++ b/src/Text/Pandoc/Writers/CommonMark.hs @@ -45,7 +45,7 @@ import Network.HTTP (urlEncode) import Text.Pandoc.Class (PandocMonad) import Text.Pandoc.Definition import Text.Pandoc.Options -import Text.Pandoc.Shared (isTightList, linesToPara, substitute) +import Text.Pandoc.Shared (isTightList, linesToPara, substitute, capitalize) import Text.Pandoc.Templates (renderTemplate') import Text.Pandoc.Walk (query, walk, walkM) import Text.Pandoc.Writers.HTML (writeHtml5String, tagWithAttributes) @@ -253,18 +253,34 @@ inlineToNodes opts (Strong xs) = (node STRONG (inlinesToNodes opts xs) :) inlineToNodes opts (Strikeout xs) = if isEnabled Ext_strikeout opts then (node (CUSTOM_INLINE "~~" "~~") (inlinesToNodes opts xs) :) - else ((node (HTML_INLINE (T.pack "<s>")) [] : inlinesToNodes opts xs ++ - [node (HTML_INLINE (T.pack "</s>")) []]) ++ ) + else if isEnabled Ext_raw_html opts + then ((node (HTML_INLINE (T.pack "<s>")) [] : inlinesToNodes opts xs ++ + [node (HTML_INLINE (T.pack "</s>")) []]) ++ ) + else (inlinesToNodes opts xs ++) inlineToNodes opts (Superscript xs) = - ((node (HTML_INLINE (T.pack "<sup>")) [] : inlinesToNodes opts xs ++ - [node (HTML_INLINE (T.pack "</sup>")) []]) ++ ) + if isEnabled Ext_raw_html opts + then ((node (HTML_INLINE (T.pack "<sup>")) [] : inlinesToNodes opts xs ++ + [node (HTML_INLINE (T.pack "</sup>")) []]) ++ ) + else case traverse toSuperscriptInline xs of + Nothing -> + ((node (TEXT (T.pack "^(")) [] : inlinesToNodes opts xs ++ + [node (TEXT (T.pack ")")) []]) ++ ) + Just xs' -> (inlinesToNodes opts xs' ++) inlineToNodes opts (Subscript xs) = - ((node (HTML_INLINE (T.pack "<sub>")) [] : inlinesToNodes opts xs ++ - [node (HTML_INLINE (T.pack "</sub>")) []]) ++ ) + if isEnabled Ext_raw_html opts + then ((node (HTML_INLINE (T.pack "<sub>")) [] : inlinesToNodes opts xs ++ + [node (HTML_INLINE (T.pack "</sub>")) []]) ++ ) + else case traverse toSubscriptInline xs of + Nothing -> + ((node (TEXT (T.pack "_(")) [] : inlinesToNodes opts xs ++ + [node (TEXT (T.pack ")")) []]) ++ ) + Just xs' -> (inlinesToNodes opts xs' ++) inlineToNodes opts (SmallCaps xs) = - ((node (HTML_INLINE (T.pack "<span class=\"smallcaps\">")) [] - : inlinesToNodes opts xs ++ - [node (HTML_INLINE (T.pack "</span>")) []]) ++ ) + if isEnabled Ext_raw_html opts + then ((node (HTML_INLINE (T.pack "<span class=\"smallcaps\">")) [] + : inlinesToNodes opts xs ++ + [node (HTML_INLINE (T.pack "</span>")) []]) ++ ) + else (inlinesToNodes opts (capitalize xs) ++) inlineToNodes opts (Link _ ils (url,tit)) = (node (LINK (T.pack url) (T.pack tit)) (inlinesToNodes opts ils) :) -- title beginning with fig: indicates implicit figure @@ -319,3 +335,19 @@ inlineToNodes opts (Span attr ils) = inlineToNodes opts (Cite _ ils) = (inlinesToNodes opts ils ++) inlineToNodes _ (Note _) = id -- should not occur -- we remove Note elements in preprocessing + +toSubscriptInline :: Inline -> Maybe Inline +toSubscriptInline Space = Just Space +toSubscriptInline (Span attr ils) = Span attr <$> traverse toSubscriptInline ils +toSubscriptInline (Str s) = Str <$> traverse toSubscript s +toSubscriptInline LineBreak = Just LineBreak +toSubscriptInline SoftBreak = Just SoftBreak +toSubscriptInline _ = Nothing + +toSuperscriptInline :: Inline -> Maybe Inline +toSuperscriptInline Space = Just Space +toSuperscriptInline (Span attr ils) = Span attr <$> traverse toSuperscriptInline ils +toSuperscriptInline (Str s) = Str <$> traverse toSuperscript s +toSuperscriptInline LineBreak = Just LineBreak +toSuperscriptInline SoftBreak = Just SoftBreak +toSuperscriptInline _ = Nothing diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index 741d11580..9a4acb59d 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -38,7 +38,7 @@ module Text.Pandoc.Writers.Markdown (writeMarkdown, writePlain) where import Prelude import Control.Monad.Reader import Control.Monad.State.Strict -import Data.Char (chr, isPunctuation, isSpace, ord, isAlphaNum) +import Data.Char (isPunctuation, isSpace, isAlphaNum) import Data.Default import qualified Data.HashMap.Strict as H import Data.List (find, group, intersperse, sortBy, stripPrefix, transpose) @@ -1249,33 +1249,6 @@ makeMathPlainer = walk go go (Emph xs) = Span nullAttr xs go x = x -toSuperscript :: Char -> Maybe Char -toSuperscript '1' = Just '\x00B9' -toSuperscript '2' = Just '\x00B2' -toSuperscript '3' = Just '\x00B3' -toSuperscript '+' = Just '\x207A' -toSuperscript '-' = Just '\x207B' -toSuperscript '=' = Just '\x207C' -toSuperscript '(' = Just '\x207D' -toSuperscript ')' = Just '\x207E' -toSuperscript c - | c >= '0' && c <= '9' = - Just $ chr (0x2070 + (ord c - 48)) - | isSpace c = Just c - | otherwise = Nothing - -toSubscript :: Char -> Maybe Char -toSubscript '+' = Just '\x208A' -toSubscript '-' = Just '\x208B' -toSubscript '=' = Just '\x208C' -toSubscript '(' = Just '\x208D' -toSubscript ')' = Just '\x208E' -toSubscript c - | c >= '0' && c <= '9' = - Just $ chr (0x2080 + (ord c - 48)) - | isSpace c = Just c - | otherwise = Nothing - lineBreakToSpace :: Inline -> Inline lineBreakToSpace LineBreak = Space lineBreakToSpace SoftBreak = Space diff --git a/src/Text/Pandoc/Writers/Shared.hs b/src/Text/Pandoc/Writers/Shared.hs index 323748aad..a7bf30aaa 100644 --- a/src/Text/Pandoc/Writers/Shared.hs +++ b/src/Text/Pandoc/Writers/Shared.hs @@ -48,12 +48,15 @@ module Text.Pandoc.Writers.Shared ( , lookupMetaString , stripLeadingTrailingSpace , groffEscape + , toSubscript + , toSuperscript ) where import Prelude import Control.Monad (zipWithM) import Data.Aeson (FromJSON (..), Result (..), ToJSON (..), Value (Object), encode, fromJSON) +import Data.Char (chr, ord, isAscii, isSpace) import qualified Data.HashMap.Strict as H import Data.List (groupBy, intersperse, transpose) import qualified Data.Map as M @@ -68,7 +71,6 @@ import Text.Pandoc.Shared (stringify) import Text.Pandoc.UTF8 (toStringLazy) import Text.Pandoc.XML (escapeStringForXML) import Text.Printf (printf) -import Data.Char (isAscii, ord) -- | Create JSON value for template from a 'Meta' and an association list -- of variables, specified at the command line or in the writer. @@ -392,3 +394,30 @@ groffEscape = T.concatMap toUchar | isAscii c = T.singleton c | otherwise = T.pack $ printf "\\[u%04X]" (ord c) + +toSuperscript :: Char -> Maybe Char +toSuperscript '1' = Just '\x00B9' +toSuperscript '2' = Just '\x00B2' +toSuperscript '3' = Just '\x00B3' +toSuperscript '+' = Just '\x207A' +toSuperscript '-' = Just '\x207B' +toSuperscript '=' = Just '\x207C' +toSuperscript '(' = Just '\x207D' +toSuperscript ')' = Just '\x207E' +toSuperscript c + | c >= '0' && c <= '9' = + Just $ chr (0x2070 + (ord c - 48)) + | isSpace c = Just c + | otherwise = Nothing + +toSubscript :: Char -> Maybe Char +toSubscript '+' = Just '\x208A' +toSubscript '-' = Just '\x208B' +toSubscript '=' = Just '\x208C' +toSubscript '(' = Just '\x208D' +toSubscript ')' = Just '\x208E' +toSubscript c + | c >= '0' && c <= '9' = + Just $ chr (0x2080 + (ord c - 48)) + | isSpace c = Just c + | otherwise = Nothing diff --git a/test/command/4528.md b/test/command/4528.md new file mode 100644 index 000000000..a60f6decf --- /dev/null +++ b/test/command/4528.md @@ -0,0 +1,156 @@ +# Rendering small caps, superscripts and subscripts with and without `raw_html` + +## Small caps + +``` +% pandoc --wrap=none -f latex -t commonmark-raw_html +This has \textsc{small caps} in it. +^D +This has SMALL CAPS in it. +``` + +``` +% pandoc --wrap=none -f latex -t commonmark+raw_html +This has \textsc{small caps} in it. +^D +This has <span class="smallcaps">small caps</span> in it. +``` +``` + +``` +% pandoc --wrap=none -f latex -t markdown_strict+raw_html +This has \textsc{small caps} in it. +^D +This has <span class="smallcaps">small caps</span> in it. +``` + +## Strikeout + +``` +% pandoc --wrap=none -f html -t commonmark-raw_html-strikeout +This has <s>strikeout</s> in it. +^D +This has strikeout in it. + +``` +% pandoc --wrap=none -f html -t commonmark+raw_html-strikeout +This has <s>strikeout</s> in it. +^D +This has <s>strikeout</s> in it. +``` + +``` +% pandoc --wrap=none -f html -t commonmark-raw_html+strikeout +This has <s>strikeout</s> in it. +^D +This has ~~strikeout~~ in it. +``` + +``` +% pandoc --wrap=none -f html -t commonmark+raw_html+strikeout +This has <s>strikeout</s> in it. +^D +This has ~~strikeout~~ in it. +``` + +``` +% pandoc --wrap=none -f html -t markdown_strict-raw_html-strikeout +This has <s>strikeout</s> in it. +^D +This has strikeout in it. +``` + +``` +% pandoc --wrap=none -f html -t markdown_strict+raw_html-strikeout +This has <s>strikeout</s> in it. +^D +This has <s>strikeout</s> in it. +``` + +``` +% pandoc --wrap=none -f html -t markdown_strict-raw_html+strikeout +This has <s>strikeout</s> in it. +^D +This has ~~strikeout~~ in it. +``` + +``` +% pandoc --wrap=none -f html -t markdown_strict+raw_html+strikeout +This has <s>strikeout</s> in it. +^D +This has ~~strikeout~~ in it. +``` + +## Superscript + +``` +% pandoc --wrap=none -f html -t commonmark-raw_html +This has <sup>superscript</sup> in it and <sup>2 3</sup> again. With emphasis: <sup><em>2</em> 3</sup>. With letters: <sup>foo</sup>. With a span: <sup><span class=foo>2</span></sup>. +^D +This has ^(superscript) in it and ² ³ again. With emphasis: ^(*2* 3). With letters: ^(foo). With a span: ². +``` + +``` +% pandoc --wrap=none -f html -t commonmark+raw_html +This has <sup>superscript</sup> in it and <sup>2</sup> again. +^D +This has <sup>superscript</sup> in it and <sup>2</sup> again. +``` + +``` +% pandoc --wrap=none -f html -t markdown_strict-raw_html-superscript +This has <sup>superscript</sup> in it and <sup>2</sup> again. +^D +This has ^(superscript) in it and ² again. +``` + +``` +% pandoc --wrap=none -f html -t markdown_strict+raw_html-superscript +This has <sup>superscript</sup> in it and <sup>2</sup> again. +^D +This has <sup>superscript</sup> in it and <sup>2</sup> again. +``` + +``` +% pandoc --wrap=none -f html -t markdown_strict+raw_html+superscript +This has <sup>superscript</sup> in it and <sup>2</sup> again. +^D +This has ^superscript^ in it and ^2^ again. +``` + +## Subscript + +``` +% pandoc --wrap=none -f html -t commonmark-raw_html +This has <sub>subscript</sub> in it and <sub>2 3</sub> again. With emphasis: <sub><em>2</em> 3</sub>. With letters: <sub>foo</sub>. With a span: <sub><span class=foo>2</span></sub>. +^D +This has \_(subscript) in it and ₂ ₃ again. With emphasis: \_(*2* 3). With letters: \_(foo). With a span: ₂. +``` + +``` +% pandoc --wrap=none -f html -t commonmark+raw_html +This has <sub>subscript</sub> in it and <sub>2</sub> again. +^D +This has <sub>subscript</sub> in it and <sub>2</sub> again. +``` + +``` +% pandoc --wrap=none -f html -t markdown_strict-raw_html-subscript +This has <sub>subscript</sub> in it and <sub>2</sub> again. +^D +This has _(subscript) in it and ₂ again. +``` + +``` +% pandoc --wrap=none -f html -t markdown_strict+raw_html-subscript +This has <sub>subscript</sub> in it and <sub>2</sub> again. +^D +This has <sub>subscript</sub> in it and <sub>2</sub> again. +``` + +``` +% pandoc --wrap=none -f html -t markdown_strict+raw_html+subscript +This has <sub>subscript</sub> in it and <sub>2</sub> again. +^D +This has ~subscript~ in it and ~2~ again. +``` |