aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--MANUAL.txt13
-rw-r--r--src/Text/Pandoc/Writers/CommonMark.hs19
-rw-r--r--test/command/ascii.md36
3 files changed, 60 insertions, 8 deletions
diff --git a/MANUAL.txt b/MANUAL.txt
index fb3fda805..0ad2e9220 100644
--- a/MANUAL.txt
+++ b/MANUAL.txt
@@ -859,13 +859,12 @@ Options affecting specific writers {.options}
`--ascii`
-: Use only ASCII characters in output. Currently supported for
- XML and HTML formats (which use entities instead of
- UTF-8 when this option is selected), Markdown (which uses
- entities), roff ms (which use hexadecimal escapes), and to
- a limited degree LaTeX (which uses standard commands for
- accented characters when possible). roff man output uses
- ASCII by default.
+: Use only ASCII characters in output. Currently supported for XML
+ and HTML formats (which use entities instead of UTF-8 when this
+ option is selected), CommonMark and Markdown (which uses
+ entities), roff ms (which use hexadecimal escapes), and to a
+ limited degree LaTeX (which uses standard commands for accented
+ characters when possible). roff man output uses ASCII by default.
`--reference-links`
diff --git a/src/Text/Pandoc/Writers/CommonMark.hs b/src/Text/Pandoc/Writers/CommonMark.hs
index 84ea37f38..e590ceac8 100644
--- a/src/Text/Pandoc/Writers/CommonMark.hs
+++ b/src/Text/Pandoc/Writers/CommonMark.hs
@@ -36,6 +36,7 @@ module Text.Pandoc.Writers.CommonMark (writeCommonMark) where
import Prelude
import CMarkGFM
import Control.Monad.State.Strict (State, get, modify, runState)
+import Data.Char (isAscii)
import Data.Foldable (foldrM)
import Data.List (transpose)
import Data.Monoid (Any (..))
@@ -50,6 +51,7 @@ import Text.Pandoc.Templates (renderTemplate')
import Text.Pandoc.Walk (query, walk, walkM)
import Text.Pandoc.Writers.HTML (writeHtml5String, tagWithAttributes)
import Text.Pandoc.Writers.Shared
+import Text.Pandoc.XML (toHtml5Entities)
-- | Convert Pandoc to CommonMark.
writeCommonMark :: PandocMonad m => WriterOptions -> Pandoc -> m Text
@@ -238,7 +240,7 @@ inlinesToNodes :: WriterOptions -> [Inline] -> [Node]
inlinesToNodes opts = foldr (inlineToNodes opts) []
inlineToNodes :: WriterOptions -> Inline -> [Node] -> [Node]
-inlineToNodes opts (Str s) = (node (TEXT (T.pack s')) [] :)
+inlineToNodes opts (Str s) = stringToNodes opts s'
where s' = if isEnabled Ext_smart opts
then unsmartify opts s
else s
@@ -336,6 +338,21 @@ inlineToNodes opts (Cite _ ils) = (inlinesToNodes opts ils ++)
inlineToNodes _ (Note _) = id -- should not occur
-- we remove Note elements in preprocessing
+stringToNodes :: WriterOptions -> String -> [Node] -> [Node]
+stringToNodes opts s
+ | not (writerPreferAscii opts) = (node (TEXT (T.pack s)) [] :)
+ | otherwise = step s
+ where
+ step input =
+ let (ascii, rest) = span isAscii input
+ this = node (TEXT (T.pack ascii)) []
+ nodes = case rest of
+ [] -> id
+ (nonAscii : rest') ->
+ let escaped = toHtml5Entities (T.singleton nonAscii)
+ in (node (HTML_INLINE escaped) [] :) . step rest'
+ in (this :) . nodes
+
toSubscriptInline :: Inline -> Maybe Inline
toSubscriptInline Space = Just Space
toSubscriptInline (Span attr ils) = Span attr <$> traverse toSubscriptInline ils
diff --git a/test/command/ascii.md b/test/command/ascii.md
index 96fc50291..d01389a60 100644
--- a/test/command/ascii.md
+++ b/test/command/ascii.md
@@ -51,3 +51,39 @@ pandoc -t markdown-smart --ascii
&ldquo;&auml;&eacute;&imath;&aring;&rdquo;
```
+# CommonMark tests
+
+```
+% pandoc -f commonmark -t commonmark --ascii
+hello … ok? … bye
+^D
+hello &mldr; ok? &mldr; bye
+```
+
+```
+% pandoc -f commonmark+smart -t commonmark-smart --ascii --wrap=none
+"hi"...dog's breath---cat 5--6
+^D
+&ldquo;hi&rdquo;&mldr;dog&rsquo;s breath&mdash;cat 5&ndash;6
+```
+
+```
+% pandoc -f commonmark+smart -t commonmark+smart --ascii
+"hi"...dog's breath---cat 5--6
+^D
+"hi"...dog's breath---cat 5--6
+```
+
+```
+% pandoc -f commonmark -t commonmark --ascii
+foo &#1234; bar
+^D
+foo &#1234; bar
+```
+
+```
+% pandoc -f commonmark -t commonmark --ascii
+\[foo\](bar)
+^D
+\[foo\](bar)
+```