aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorquasicomputational <quasicomputational@gmail.com>2018-10-06 05:33:14 +0100
committerJohn MacFarlane <jgm@berkeley.edu>2018-10-05 21:33:14 -0700
commit6207bdeb681142e9fa3731e6e0ee7fa8e6c120f5 (patch)
tree1ef9b0c5f6b22ff6e97c26c8a83680862bd5bc42
parenta26b3a2d6af8614e13299bbf477e28c5932ef680 (diff)
downloadpandoc-6207bdeb681142e9fa3731e6e0ee7fa8e6c120f5.tar.gz
CommonMark writer: add plain text fallbacks. (#4531)
Previously, the writer would unconditionally emit HTMLish output for subscripts, superscripts, strikeouts (if the strikeout extension is disabled) and small caps, even with raw_html disabled. Now there are plain-text (and, where possible, fancy Unicode) fallbacks for all of these corresponding (mostly) to the Markdown fallbacks, and the HTMLish output is only used when raw_html is enabled. This commit adds exported functions `toSuperscript` and `toSubscript` to `Text.Pandoc.Writers.Shared`. [API change] Closes #4528.
-rw-r--r--MANUAL.txt10
-rw-r--r--src/Text/Pandoc/Writers/CommonMark.hs52
-rw-r--r--src/Text/Pandoc/Writers/Markdown.hs29
-rw-r--r--src/Text/Pandoc/Writers/Shared.hs31
-rw-r--r--test/command/4528.md156
5 files changed, 237 insertions, 41 deletions
diff --git a/MANUAL.txt b/MANUAL.txt
index 802ce556e..bf47184ce 100644
--- a/MANUAL.txt
+++ b/MANUAL.txt
@@ -3381,8 +3381,14 @@ Markdown allows it, but it has been made an extension so that it can
be disabled if desired.)
The raw HTML is passed through unchanged in HTML, S5, Slidy, Slideous,
-DZSlides, EPUB, Markdown, Emacs Org mode, and Textile output, and suppressed
-in other formats.
+DZSlides, EPUB, Markdown, CommonMark, Emacs Org mode, and Textile
+output, and suppressed in other formats.
+
+In the CommonMark format, if `raw_html` is enabled, superscripts,
+subscripts, strikeouts and small capitals will be represented as HTML.
+Otherwise, plain-text fallbacks will be used. Note that even if
+`raw_html` is disabled, tables will be rendered with HTML syntax if
+they cannot use pipe syntax.
#### Extension: `markdown_in_html_blocks` ####
diff --git a/src/Text/Pandoc/Writers/CommonMark.hs b/src/Text/Pandoc/Writers/CommonMark.hs
index 27179496c..84ea37f38 100644
--- a/src/Text/Pandoc/Writers/CommonMark.hs
+++ b/src/Text/Pandoc/Writers/CommonMark.hs
@@ -45,7 +45,7 @@ import Network.HTTP (urlEncode)
import Text.Pandoc.Class (PandocMonad)
import Text.Pandoc.Definition
import Text.Pandoc.Options
-import Text.Pandoc.Shared (isTightList, linesToPara, substitute)
+import Text.Pandoc.Shared (isTightList, linesToPara, substitute, capitalize)
import Text.Pandoc.Templates (renderTemplate')
import Text.Pandoc.Walk (query, walk, walkM)
import Text.Pandoc.Writers.HTML (writeHtml5String, tagWithAttributes)
@@ -253,18 +253,34 @@ inlineToNodes opts (Strong xs) = (node STRONG (inlinesToNodes opts xs) :)
inlineToNodes opts (Strikeout xs) =
if isEnabled Ext_strikeout opts
then (node (CUSTOM_INLINE "~~" "~~") (inlinesToNodes opts xs) :)
- else ((node (HTML_INLINE (T.pack "<s>")) [] : inlinesToNodes opts xs ++
- [node (HTML_INLINE (T.pack "</s>")) []]) ++ )
+ else if isEnabled Ext_raw_html opts
+ then ((node (HTML_INLINE (T.pack "<s>")) [] : inlinesToNodes opts xs ++
+ [node (HTML_INLINE (T.pack "</s>")) []]) ++ )
+ else (inlinesToNodes opts xs ++)
inlineToNodes opts (Superscript xs) =
- ((node (HTML_INLINE (T.pack "<sup>")) [] : inlinesToNodes opts xs ++
- [node (HTML_INLINE (T.pack "</sup>")) []]) ++ )
+ if isEnabled Ext_raw_html opts
+ then ((node (HTML_INLINE (T.pack "<sup>")) [] : inlinesToNodes opts xs ++
+ [node (HTML_INLINE (T.pack "</sup>")) []]) ++ )
+ else case traverse toSuperscriptInline xs of
+ Nothing ->
+ ((node (TEXT (T.pack "^(")) [] : inlinesToNodes opts xs ++
+ [node (TEXT (T.pack ")")) []]) ++ )
+ Just xs' -> (inlinesToNodes opts xs' ++)
inlineToNodes opts (Subscript xs) =
- ((node (HTML_INLINE (T.pack "<sub>")) [] : inlinesToNodes opts xs ++
- [node (HTML_INLINE (T.pack "</sub>")) []]) ++ )
+ if isEnabled Ext_raw_html opts
+ then ((node (HTML_INLINE (T.pack "<sub>")) [] : inlinesToNodes opts xs ++
+ [node (HTML_INLINE (T.pack "</sub>")) []]) ++ )
+ else case traverse toSubscriptInline xs of
+ Nothing ->
+ ((node (TEXT (T.pack "_(")) [] : inlinesToNodes opts xs ++
+ [node (TEXT (T.pack ")")) []]) ++ )
+ Just xs' -> (inlinesToNodes opts xs' ++)
inlineToNodes opts (SmallCaps xs) =
- ((node (HTML_INLINE (T.pack "<span class=\"smallcaps\">")) []
- : inlinesToNodes opts xs ++
- [node (HTML_INLINE (T.pack "</span>")) []]) ++ )
+ if isEnabled Ext_raw_html opts
+ then ((node (HTML_INLINE (T.pack "<span class=\"smallcaps\">")) []
+ : inlinesToNodes opts xs ++
+ [node (HTML_INLINE (T.pack "</span>")) []]) ++ )
+ else (inlinesToNodes opts (capitalize xs) ++)
inlineToNodes opts (Link _ ils (url,tit)) =
(node (LINK (T.pack url) (T.pack tit)) (inlinesToNodes opts ils) :)
-- title beginning with fig: indicates implicit figure
@@ -319,3 +335,19 @@ inlineToNodes opts (Span attr ils) =
inlineToNodes opts (Cite _ ils) = (inlinesToNodes opts ils ++)
inlineToNodes _ (Note _) = id -- should not occur
-- we remove Note elements in preprocessing
+
+toSubscriptInline :: Inline -> Maybe Inline
+toSubscriptInline Space = Just Space
+toSubscriptInline (Span attr ils) = Span attr <$> traverse toSubscriptInline ils
+toSubscriptInline (Str s) = Str <$> traverse toSubscript s
+toSubscriptInline LineBreak = Just LineBreak
+toSubscriptInline SoftBreak = Just SoftBreak
+toSubscriptInline _ = Nothing
+
+toSuperscriptInline :: Inline -> Maybe Inline
+toSuperscriptInline Space = Just Space
+toSuperscriptInline (Span attr ils) = Span attr <$> traverse toSuperscriptInline ils
+toSuperscriptInline (Str s) = Str <$> traverse toSuperscript s
+toSuperscriptInline LineBreak = Just LineBreak
+toSuperscriptInline SoftBreak = Just SoftBreak
+toSuperscriptInline _ = Nothing
diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs
index 741d11580..9a4acb59d 100644
--- a/src/Text/Pandoc/Writers/Markdown.hs
+++ b/src/Text/Pandoc/Writers/Markdown.hs
@@ -38,7 +38,7 @@ module Text.Pandoc.Writers.Markdown (writeMarkdown, writePlain) where
import Prelude
import Control.Monad.Reader
import Control.Monad.State.Strict
-import Data.Char (chr, isPunctuation, isSpace, ord, isAlphaNum)
+import Data.Char (isPunctuation, isSpace, isAlphaNum)
import Data.Default
import qualified Data.HashMap.Strict as H
import Data.List (find, group, intersperse, sortBy, stripPrefix, transpose)
@@ -1249,33 +1249,6 @@ makeMathPlainer = walk go
go (Emph xs) = Span nullAttr xs
go x = x
-toSuperscript :: Char -> Maybe Char
-toSuperscript '1' = Just '\x00B9'
-toSuperscript '2' = Just '\x00B2'
-toSuperscript '3' = Just '\x00B3'
-toSuperscript '+' = Just '\x207A'
-toSuperscript '-' = Just '\x207B'
-toSuperscript '=' = Just '\x207C'
-toSuperscript '(' = Just '\x207D'
-toSuperscript ')' = Just '\x207E'
-toSuperscript c
- | c >= '0' && c <= '9' =
- Just $ chr (0x2070 + (ord c - 48))
- | isSpace c = Just c
- | otherwise = Nothing
-
-toSubscript :: Char -> Maybe Char
-toSubscript '+' = Just '\x208A'
-toSubscript '-' = Just '\x208B'
-toSubscript '=' = Just '\x208C'
-toSubscript '(' = Just '\x208D'
-toSubscript ')' = Just '\x208E'
-toSubscript c
- | c >= '0' && c <= '9' =
- Just $ chr (0x2080 + (ord c - 48))
- | isSpace c = Just c
- | otherwise = Nothing
-
lineBreakToSpace :: Inline -> Inline
lineBreakToSpace LineBreak = Space
lineBreakToSpace SoftBreak = Space
diff --git a/src/Text/Pandoc/Writers/Shared.hs b/src/Text/Pandoc/Writers/Shared.hs
index 323748aad..a7bf30aaa 100644
--- a/src/Text/Pandoc/Writers/Shared.hs
+++ b/src/Text/Pandoc/Writers/Shared.hs
@@ -48,12 +48,15 @@ module Text.Pandoc.Writers.Shared (
, lookupMetaString
, stripLeadingTrailingSpace
, groffEscape
+ , toSubscript
+ , toSuperscript
)
where
import Prelude
import Control.Monad (zipWithM)
import Data.Aeson (FromJSON (..), Result (..), ToJSON (..), Value (Object),
encode, fromJSON)
+import Data.Char (chr, ord, isAscii, isSpace)
import qualified Data.HashMap.Strict as H
import Data.List (groupBy, intersperse, transpose)
import qualified Data.Map as M
@@ -68,7 +71,6 @@ import Text.Pandoc.Shared (stringify)
import Text.Pandoc.UTF8 (toStringLazy)
import Text.Pandoc.XML (escapeStringForXML)
import Text.Printf (printf)
-import Data.Char (isAscii, ord)
-- | Create JSON value for template from a 'Meta' and an association list
-- of variables, specified at the command line or in the writer.
@@ -392,3 +394,30 @@ groffEscape = T.concatMap toUchar
| isAscii c = T.singleton c
| otherwise = T.pack $ printf "\\[u%04X]" (ord c)
+
+toSuperscript :: Char -> Maybe Char
+toSuperscript '1' = Just '\x00B9'
+toSuperscript '2' = Just '\x00B2'
+toSuperscript '3' = Just '\x00B3'
+toSuperscript '+' = Just '\x207A'
+toSuperscript '-' = Just '\x207B'
+toSuperscript '=' = Just '\x207C'
+toSuperscript '(' = Just '\x207D'
+toSuperscript ')' = Just '\x207E'
+toSuperscript c
+ | c >= '0' && c <= '9' =
+ Just $ chr (0x2070 + (ord c - 48))
+ | isSpace c = Just c
+ | otherwise = Nothing
+
+toSubscript :: Char -> Maybe Char
+toSubscript '+' = Just '\x208A'
+toSubscript '-' = Just '\x208B'
+toSubscript '=' = Just '\x208C'
+toSubscript '(' = Just '\x208D'
+toSubscript ')' = Just '\x208E'
+toSubscript c
+ | c >= '0' && c <= '9' =
+ Just $ chr (0x2080 + (ord c - 48))
+ | isSpace c = Just c
+ | otherwise = Nothing
diff --git a/test/command/4528.md b/test/command/4528.md
new file mode 100644
index 000000000..a60f6decf
--- /dev/null
+++ b/test/command/4528.md
@@ -0,0 +1,156 @@
+# Rendering small caps, superscripts and subscripts with and without `raw_html`
+
+## Small caps
+
+```
+% pandoc --wrap=none -f latex -t commonmark-raw_html
+This has \textsc{small caps} in it.
+^D
+This has SMALL CAPS in it.
+```
+
+```
+% pandoc --wrap=none -f latex -t commonmark+raw_html
+This has \textsc{small caps} in it.
+^D
+This has <span class="smallcaps">small caps</span> in it.
+```
+```
+
+```
+% pandoc --wrap=none -f latex -t markdown_strict+raw_html
+This has \textsc{small caps} in it.
+^D
+This has <span class="smallcaps">small caps</span> in it.
+```
+
+## Strikeout
+
+```
+% pandoc --wrap=none -f html -t commonmark-raw_html-strikeout
+This has <s>strikeout</s> in it.
+^D
+This has strikeout in it.
+
+```
+% pandoc --wrap=none -f html -t commonmark+raw_html-strikeout
+This has <s>strikeout</s> in it.
+^D
+This has <s>strikeout</s> in it.
+```
+
+```
+% pandoc --wrap=none -f html -t commonmark-raw_html+strikeout
+This has <s>strikeout</s> in it.
+^D
+This has ~~strikeout~~ in it.
+```
+
+```
+% pandoc --wrap=none -f html -t commonmark+raw_html+strikeout
+This has <s>strikeout</s> in it.
+^D
+This has ~~strikeout~~ in it.
+```
+
+```
+% pandoc --wrap=none -f html -t markdown_strict-raw_html-strikeout
+This has <s>strikeout</s> in it.
+^D
+This has strikeout in it.
+```
+
+```
+% pandoc --wrap=none -f html -t markdown_strict+raw_html-strikeout
+This has <s>strikeout</s> in it.
+^D
+This has <s>strikeout</s> in it.
+```
+
+```
+% pandoc --wrap=none -f html -t markdown_strict-raw_html+strikeout
+This has <s>strikeout</s> in it.
+^D
+This has ~~strikeout~~ in it.
+```
+
+```
+% pandoc --wrap=none -f html -t markdown_strict+raw_html+strikeout
+This has <s>strikeout</s> in it.
+^D
+This has ~~strikeout~~ in it.
+```
+
+## Superscript
+
+```
+% pandoc --wrap=none -f html -t commonmark-raw_html
+This has <sup>superscript</sup> in it and <sup>2 3</sup> again. With emphasis: <sup><em>2</em> 3</sup>. With letters: <sup>foo</sup>. With a span: <sup><span class=foo>2</span></sup>.
+^D
+This has ^(superscript) in it and ² ³ again. With emphasis: ^(*2* 3). With letters: ^(foo). With a span: ².
+```
+
+```
+% pandoc --wrap=none -f html -t commonmark+raw_html
+This has <sup>superscript</sup> in it and <sup>2</sup> again.
+^D
+This has <sup>superscript</sup> in it and <sup>2</sup> again.
+```
+
+```
+% pandoc --wrap=none -f html -t markdown_strict-raw_html-superscript
+This has <sup>superscript</sup> in it and <sup>2</sup> again.
+^D
+This has ^(superscript) in it and ² again.
+```
+
+```
+% pandoc --wrap=none -f html -t markdown_strict+raw_html-superscript
+This has <sup>superscript</sup> in it and <sup>2</sup> again.
+^D
+This has <sup>superscript</sup> in it and <sup>2</sup> again.
+```
+
+```
+% pandoc --wrap=none -f html -t markdown_strict+raw_html+superscript
+This has <sup>superscript</sup> in it and <sup>2</sup> again.
+^D
+This has ^superscript^ in it and ^2^ again.
+```
+
+## Subscript
+
+```
+% pandoc --wrap=none -f html -t commonmark-raw_html
+This has <sub>subscript</sub> in it and <sub>2 3</sub> again. With emphasis: <sub><em>2</em> 3</sub>. With letters: <sub>foo</sub>. With a span: <sub><span class=foo>2</span></sub>.
+^D
+This has \_(subscript) in it and ₂ ₃ again. With emphasis: \_(*2* 3). With letters: \_(foo). With a span: ₂.
+```
+
+```
+% pandoc --wrap=none -f html -t commonmark+raw_html
+This has <sub>subscript</sub> in it and <sub>2</sub> again.
+^D
+This has <sub>subscript</sub> in it and <sub>2</sub> again.
+```
+
+```
+% pandoc --wrap=none -f html -t markdown_strict-raw_html-subscript
+This has <sub>subscript</sub> in it and <sub>2</sub> again.
+^D
+This has _(subscript) in it and ₂ again.
+```
+
+```
+% pandoc --wrap=none -f html -t markdown_strict+raw_html-subscript
+This has <sub>subscript</sub> in it and <sub>2</sub> again.
+^D
+This has <sub>subscript</sub> in it and <sub>2</sub> again.
+```
+
+```
+% pandoc --wrap=none -f html -t markdown_strict+raw_html+subscript
+This has <sub>subscript</sub> in it and <sub>2</sub> again.
+^D
+This has ~subscript~ in it and ~2~ again.
+```