diff options
| -rw-r--r-- | MANUAL.txt | 10 | ||||
| -rw-r--r-- | src/Text/Pandoc/Writers/CommonMark.hs | 52 | ||||
| -rw-r--r-- | src/Text/Pandoc/Writers/Markdown.hs | 29 | ||||
| -rw-r--r-- | src/Text/Pandoc/Writers/Shared.hs | 31 | ||||
| -rw-r--r-- | test/command/4528.md | 156 | 
5 files changed, 237 insertions, 41 deletions
| diff --git a/MANUAL.txt b/MANUAL.txt index 802ce556e..bf47184ce 100644 --- a/MANUAL.txt +++ b/MANUAL.txt @@ -3381,8 +3381,14 @@ Markdown allows it, but it has been made an extension so that it can  be disabled if desired.)  The raw HTML is passed through unchanged in HTML, S5, Slidy, Slideous, -DZSlides, EPUB, Markdown, Emacs Org mode, and Textile output, and suppressed -in other formats. +DZSlides, EPUB, Markdown, CommonMark, Emacs Org mode, and Textile +output, and suppressed in other formats. + +In the CommonMark format, if `raw_html` is enabled, superscripts, +subscripts, strikeouts and small capitals will be represented as HTML. +Otherwise, plain-text fallbacks will be used. Note that even if +`raw_html` is disabled, tables will be rendered with HTML syntax if +they cannot use pipe syntax.  #### Extension: `markdown_in_html_blocks` #### diff --git a/src/Text/Pandoc/Writers/CommonMark.hs b/src/Text/Pandoc/Writers/CommonMark.hs index 27179496c..84ea37f38 100644 --- a/src/Text/Pandoc/Writers/CommonMark.hs +++ b/src/Text/Pandoc/Writers/CommonMark.hs @@ -45,7 +45,7 @@ import Network.HTTP (urlEncode)  import Text.Pandoc.Class (PandocMonad)  import Text.Pandoc.Definition  import Text.Pandoc.Options -import Text.Pandoc.Shared (isTightList, linesToPara, substitute) +import Text.Pandoc.Shared (isTightList, linesToPara, substitute, capitalize)  import Text.Pandoc.Templates (renderTemplate')  import Text.Pandoc.Walk (query, walk, walkM)  import Text.Pandoc.Writers.HTML (writeHtml5String, tagWithAttributes) @@ -253,18 +253,34 @@ inlineToNodes opts (Strong xs) = (node STRONG (inlinesToNodes opts xs) :)  inlineToNodes opts (Strikeout xs) =    if isEnabled Ext_strikeout opts       then (node (CUSTOM_INLINE "~~" "~~") (inlinesToNodes opts xs) :) -     else ((node (HTML_INLINE (T.pack "<s>")) [] : inlinesToNodes opts xs ++ -           [node (HTML_INLINE (T.pack "</s>")) []]) ++ ) +     else if isEnabled Ext_raw_html opts +            then ((node (HTML_INLINE (T.pack "<s>")) [] : inlinesToNodes opts xs ++ +                  [node (HTML_INLINE (T.pack "</s>")) []]) ++ ) +            else (inlinesToNodes opts xs ++)  inlineToNodes opts (Superscript xs) = -  ((node (HTML_INLINE (T.pack "<sup>")) [] : inlinesToNodes opts xs ++ -   [node (HTML_INLINE (T.pack "</sup>")) []]) ++ ) +  if isEnabled Ext_raw_html opts +    then ((node (HTML_INLINE (T.pack "<sup>")) [] : inlinesToNodes opts xs ++ +          [node (HTML_INLINE (T.pack "</sup>")) []]) ++ ) +    else case traverse toSuperscriptInline xs of +      Nothing -> +        ((node (TEXT (T.pack "^(")) [] : inlinesToNodes opts xs ++ +          [node (TEXT (T.pack ")")) []]) ++ ) +      Just xs' -> (inlinesToNodes opts xs' ++)  inlineToNodes opts (Subscript xs) = -  ((node (HTML_INLINE (T.pack "<sub>")) [] : inlinesToNodes opts xs ++ -   [node (HTML_INLINE (T.pack "</sub>")) []]) ++ ) +  if isEnabled Ext_raw_html opts +    then ((node (HTML_INLINE (T.pack "<sub>")) [] : inlinesToNodes opts xs ++ +          [node (HTML_INLINE (T.pack "</sub>")) []]) ++ ) +    else case traverse toSubscriptInline xs of +      Nothing -> +        ((node (TEXT (T.pack "_(")) [] : inlinesToNodes opts xs ++ +          [node (TEXT (T.pack ")")) []]) ++ ) +      Just xs' -> (inlinesToNodes opts xs' ++)  inlineToNodes opts (SmallCaps xs) = -  ((node (HTML_INLINE (T.pack "<span class=\"smallcaps\">")) [] -    : inlinesToNodes opts xs ++ -    [node (HTML_INLINE (T.pack "</span>")) []]) ++ ) +  if isEnabled Ext_raw_html opts +    then ((node (HTML_INLINE (T.pack "<span class=\"smallcaps\">")) [] +           : inlinesToNodes opts xs ++ +           [node (HTML_INLINE (T.pack "</span>")) []]) ++ ) +    else (inlinesToNodes opts (capitalize xs) ++)  inlineToNodes opts (Link _ ils (url,tit)) =    (node (LINK (T.pack url) (T.pack tit)) (inlinesToNodes opts ils) :)  -- title beginning with fig: indicates implicit figure @@ -319,3 +335,19 @@ inlineToNodes opts (Span attr ils) =  inlineToNodes opts (Cite _ ils) = (inlinesToNodes opts ils ++)  inlineToNodes _ (Note _) = id -- should not occur  -- we remove Note elements in preprocessing + +toSubscriptInline :: Inline -> Maybe Inline +toSubscriptInline Space = Just Space +toSubscriptInline (Span attr ils) = Span attr <$> traverse toSubscriptInline ils +toSubscriptInline (Str s) = Str <$> traverse toSubscript s +toSubscriptInline LineBreak = Just LineBreak +toSubscriptInline SoftBreak = Just SoftBreak +toSubscriptInline _ = Nothing + +toSuperscriptInline :: Inline -> Maybe Inline +toSuperscriptInline Space = Just Space +toSuperscriptInline (Span attr ils) = Span attr <$> traverse toSuperscriptInline ils +toSuperscriptInline (Str s) = Str <$> traverse toSuperscript s +toSuperscriptInline LineBreak = Just LineBreak +toSuperscriptInline SoftBreak = Just SoftBreak +toSuperscriptInline _ = Nothing diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index 741d11580..9a4acb59d 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -38,7 +38,7 @@ module Text.Pandoc.Writers.Markdown (writeMarkdown, writePlain) where  import Prelude  import Control.Monad.Reader  import Control.Monad.State.Strict -import Data.Char (chr, isPunctuation, isSpace, ord, isAlphaNum) +import Data.Char (isPunctuation, isSpace, isAlphaNum)  import Data.Default  import qualified Data.HashMap.Strict as H  import Data.List (find, group, intersperse, sortBy, stripPrefix, transpose) @@ -1249,33 +1249,6 @@ makeMathPlainer = walk go    go (Emph xs) = Span nullAttr xs    go x         = x -toSuperscript :: Char -> Maybe Char -toSuperscript '1' = Just '\x00B9' -toSuperscript '2' = Just '\x00B2' -toSuperscript '3' = Just '\x00B3' -toSuperscript '+' = Just '\x207A' -toSuperscript '-' = Just '\x207B' -toSuperscript '=' = Just '\x207C' -toSuperscript '(' = Just '\x207D' -toSuperscript ')' = Just '\x207E' -toSuperscript c -  | c >= '0' && c <= '9' = -                 Just $ chr (0x2070 + (ord c - 48)) -  | isSpace c = Just c -  | otherwise = Nothing - -toSubscript :: Char -> Maybe Char -toSubscript '+' = Just '\x208A' -toSubscript '-' = Just '\x208B' -toSubscript '=' = Just '\x208C' -toSubscript '(' = Just '\x208D' -toSubscript ')' = Just '\x208E' -toSubscript c -  | c >= '0' && c <= '9' = -                 Just $ chr (0x2080 + (ord c - 48)) -  | isSpace c = Just c -  | otherwise = Nothing -  lineBreakToSpace :: Inline -> Inline  lineBreakToSpace LineBreak = Space  lineBreakToSpace SoftBreak = Space diff --git a/src/Text/Pandoc/Writers/Shared.hs b/src/Text/Pandoc/Writers/Shared.hs index 323748aad..a7bf30aaa 100644 --- a/src/Text/Pandoc/Writers/Shared.hs +++ b/src/Text/Pandoc/Writers/Shared.hs @@ -48,12 +48,15 @@ module Text.Pandoc.Writers.Shared (                       , lookupMetaString                       , stripLeadingTrailingSpace                       , groffEscape +                     , toSubscript +                     , toSuperscript                       )  where  import Prelude  import Control.Monad (zipWithM)  import Data.Aeson (FromJSON (..), Result (..), ToJSON (..), Value (Object),                     encode, fromJSON) +import Data.Char (chr, ord, isAscii, isSpace)  import qualified Data.HashMap.Strict as H  import Data.List (groupBy, intersperse, transpose)  import qualified Data.Map as M @@ -68,7 +71,6 @@ import Text.Pandoc.Shared (stringify)  import Text.Pandoc.UTF8 (toStringLazy)  import Text.Pandoc.XML (escapeStringForXML)  import Text.Printf (printf) -import Data.Char (isAscii, ord)  -- | Create JSON value for template from a 'Meta' and an association list  -- of variables, specified at the command line or in the writer. @@ -392,3 +394,30 @@ groffEscape = T.concatMap toUchar           | isAscii c = T.singleton c           | otherwise = T.pack $ printf "\\[u%04X]" (ord c) + +toSuperscript :: Char -> Maybe Char +toSuperscript '1' = Just '\x00B9' +toSuperscript '2' = Just '\x00B2' +toSuperscript '3' = Just '\x00B3' +toSuperscript '+' = Just '\x207A' +toSuperscript '-' = Just '\x207B' +toSuperscript '=' = Just '\x207C' +toSuperscript '(' = Just '\x207D' +toSuperscript ')' = Just '\x207E' +toSuperscript c +  | c >= '0' && c <= '9' = +                 Just $ chr (0x2070 + (ord c - 48)) +  | isSpace c = Just c +  | otherwise = Nothing + +toSubscript :: Char -> Maybe Char +toSubscript '+' = Just '\x208A' +toSubscript '-' = Just '\x208B' +toSubscript '=' = Just '\x208C' +toSubscript '(' = Just '\x208D' +toSubscript ')' = Just '\x208E' +toSubscript c +  | c >= '0' && c <= '9' = +                 Just $ chr (0x2080 + (ord c - 48)) +  | isSpace c = Just c +  | otherwise = Nothing diff --git a/test/command/4528.md b/test/command/4528.md new file mode 100644 index 000000000..a60f6decf --- /dev/null +++ b/test/command/4528.md @@ -0,0 +1,156 @@ +# Rendering small caps, superscripts and subscripts with and without `raw_html` + +## Small caps + +``` +% pandoc --wrap=none -f latex -t commonmark-raw_html +This has \textsc{small caps} in it. +^D +This has SMALL CAPS in it. +``` + +``` +% pandoc --wrap=none -f latex -t commonmark+raw_html +This has \textsc{small caps} in it. +^D +This has <span class="smallcaps">small caps</span> in it. +``` +``` + +``` +% pandoc --wrap=none -f latex -t markdown_strict+raw_html +This has \textsc{small caps} in it. +^D +This has <span class="smallcaps">small caps</span> in it. +``` + +## Strikeout + +``` +% pandoc --wrap=none -f html -t commonmark-raw_html-strikeout +This has <s>strikeout</s> in it. +^D +This has strikeout in it. + +``` +% pandoc --wrap=none -f html -t commonmark+raw_html-strikeout +This has <s>strikeout</s> in it. +^D +This has <s>strikeout</s> in it. +``` + +``` +% pandoc --wrap=none -f html -t commonmark-raw_html+strikeout +This has <s>strikeout</s> in it. +^D +This has ~~strikeout~~ in it. +``` + +``` +% pandoc --wrap=none -f html -t commonmark+raw_html+strikeout +This has <s>strikeout</s> in it. +^D +This has ~~strikeout~~ in it. +``` + +``` +% pandoc --wrap=none -f html -t markdown_strict-raw_html-strikeout +This has <s>strikeout</s> in it. +^D +This has strikeout in it. +``` + +``` +% pandoc --wrap=none -f html -t markdown_strict+raw_html-strikeout +This has <s>strikeout</s> in it. +^D +This has <s>strikeout</s> in it. +``` + +``` +% pandoc --wrap=none -f html -t markdown_strict-raw_html+strikeout +This has <s>strikeout</s> in it. +^D +This has ~~strikeout~~ in it. +``` + +``` +% pandoc --wrap=none -f html -t markdown_strict+raw_html+strikeout +This has <s>strikeout</s> in it. +^D +This has ~~strikeout~~ in it. +``` + +## Superscript + +``` +% pandoc --wrap=none -f html -t commonmark-raw_html +This has <sup>superscript</sup> in it and <sup>2 3</sup> again. With emphasis: <sup><em>2</em> 3</sup>. With letters: <sup>foo</sup>. With a span: <sup><span class=foo>2</span></sup>. +^D +This has ^(superscript) in it and ² ³ again. With emphasis: ^(*2* 3). With letters: ^(foo). With a span: ². +``` + +``` +% pandoc --wrap=none -f html -t commonmark+raw_html +This has <sup>superscript</sup> in it and <sup>2</sup> again. +^D +This has <sup>superscript</sup> in it and <sup>2</sup> again. +``` + +``` +% pandoc --wrap=none -f html -t markdown_strict-raw_html-superscript +This has <sup>superscript</sup> in it and <sup>2</sup> again. +^D +This has ^(superscript) in it and ² again. +``` + +``` +% pandoc --wrap=none -f html -t markdown_strict+raw_html-superscript +This has <sup>superscript</sup> in it and <sup>2</sup> again. +^D +This has <sup>superscript</sup> in it and <sup>2</sup> again. +``` + +``` +% pandoc --wrap=none -f html -t markdown_strict+raw_html+superscript +This has <sup>superscript</sup> in it and <sup>2</sup> again. +^D +This has ^superscript^ in it and ^2^ again. +``` + +## Subscript + +``` +% pandoc --wrap=none -f html -t commonmark-raw_html +This has <sub>subscript</sub> in it and <sub>2 3</sub> again. With emphasis: <sub><em>2</em> 3</sub>. With letters: <sub>foo</sub>. With a span: <sub><span class=foo>2</span></sub>. +^D +This has \_(subscript) in it and ₂ ₃ again. With emphasis: \_(*2* 3). With letters: \_(foo). With a span: ₂. +``` + +``` +% pandoc --wrap=none -f html -t commonmark+raw_html +This has <sub>subscript</sub> in it and <sub>2</sub> again. +^D +This has <sub>subscript</sub> in it and <sub>2</sub> again. +``` + +``` +% pandoc --wrap=none -f html -t markdown_strict-raw_html-subscript +This has <sub>subscript</sub> in it and <sub>2</sub> again. +^D +This has _(subscript) in it and ₂ again. +``` + +``` +% pandoc --wrap=none -f html -t markdown_strict+raw_html-subscript +This has <sub>subscript</sub> in it and <sub>2</sub> again. +^D +This has <sub>subscript</sub> in it and <sub>2</sub> again. +``` + +``` +% pandoc --wrap=none -f html -t markdown_strict+raw_html+subscript +This has <sub>subscript</sub> in it and <sub>2</sub> again. +^D +This has ~subscript~ in it and ~2~ again. +``` | 
