diff options
-rw-r--r-- | MANUAL.txt | 13 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/CommonMark.hs | 19 | ||||
-rw-r--r-- | test/command/ascii.md | 36 |
3 files changed, 60 insertions, 8 deletions
diff --git a/MANUAL.txt b/MANUAL.txt index fb3fda805..0ad2e9220 100644 --- a/MANUAL.txt +++ b/MANUAL.txt @@ -859,13 +859,12 @@ Options affecting specific writers {.options} `--ascii` -: Use only ASCII characters in output. Currently supported for - XML and HTML formats (which use entities instead of - UTF-8 when this option is selected), Markdown (which uses - entities), roff ms (which use hexadecimal escapes), and to - a limited degree LaTeX (which uses standard commands for - accented characters when possible). roff man output uses - ASCII by default. +: Use only ASCII characters in output. Currently supported for XML + and HTML formats (which use entities instead of UTF-8 when this + option is selected), CommonMark and Markdown (which uses + entities), roff ms (which use hexadecimal escapes), and to a + limited degree LaTeX (which uses standard commands for accented + characters when possible). roff man output uses ASCII by default. `--reference-links` diff --git a/src/Text/Pandoc/Writers/CommonMark.hs b/src/Text/Pandoc/Writers/CommonMark.hs index 84ea37f38..e590ceac8 100644 --- a/src/Text/Pandoc/Writers/CommonMark.hs +++ b/src/Text/Pandoc/Writers/CommonMark.hs @@ -36,6 +36,7 @@ module Text.Pandoc.Writers.CommonMark (writeCommonMark) where import Prelude import CMarkGFM import Control.Monad.State.Strict (State, get, modify, runState) +import Data.Char (isAscii) import Data.Foldable (foldrM) import Data.List (transpose) import Data.Monoid (Any (..)) @@ -50,6 +51,7 @@ import Text.Pandoc.Templates (renderTemplate') import Text.Pandoc.Walk (query, walk, walkM) import Text.Pandoc.Writers.HTML (writeHtml5String, tagWithAttributes) import Text.Pandoc.Writers.Shared +import Text.Pandoc.XML (toHtml5Entities) -- | Convert Pandoc to CommonMark. writeCommonMark :: PandocMonad m => WriterOptions -> Pandoc -> m Text @@ -238,7 +240,7 @@ inlinesToNodes :: WriterOptions -> [Inline] -> [Node] inlinesToNodes opts = foldr (inlineToNodes opts) [] inlineToNodes :: WriterOptions -> Inline -> [Node] -> [Node] -inlineToNodes opts (Str s) = (node (TEXT (T.pack s')) [] :) +inlineToNodes opts (Str s) = stringToNodes opts s' where s' = if isEnabled Ext_smart opts then unsmartify opts s else s @@ -336,6 +338,21 @@ inlineToNodes opts (Cite _ ils) = (inlinesToNodes opts ils ++) inlineToNodes _ (Note _) = id -- should not occur -- we remove Note elements in preprocessing +stringToNodes :: WriterOptions -> String -> [Node] -> [Node] +stringToNodes opts s + | not (writerPreferAscii opts) = (node (TEXT (T.pack s)) [] :) + | otherwise = step s + where + step input = + let (ascii, rest) = span isAscii input + this = node (TEXT (T.pack ascii)) [] + nodes = case rest of + [] -> id + (nonAscii : rest') -> + let escaped = toHtml5Entities (T.singleton nonAscii) + in (node (HTML_INLINE escaped) [] :) . step rest' + in (this :) . nodes + toSubscriptInline :: Inline -> Maybe Inline toSubscriptInline Space = Just Space toSubscriptInline (Span attr ils) = Span attr <$> traverse toSubscriptInline ils diff --git a/test/command/ascii.md b/test/command/ascii.md index 96fc50291..d01389a60 100644 --- a/test/command/ascii.md +++ b/test/command/ascii.md @@ -51,3 +51,39 @@ pandoc -t markdown-smart --ascii “äéıå” ``` +# CommonMark tests + +``` +% pandoc -f commonmark -t commonmark --ascii +hello … ok? … bye +^D +hello … ok? … bye +``` + +``` +% pandoc -f commonmark+smart -t commonmark-smart --ascii --wrap=none +"hi"...dog's breath---cat 5--6 +^D +“hi”…dog’s breath—cat 5–6 +``` + +``` +% pandoc -f commonmark+smart -t commonmark+smart --ascii +"hi"...dog's breath---cat 5--6 +^D +"hi"...dog's breath---cat 5--6 +``` + +``` +% pandoc -f commonmark -t commonmark --ascii +foo Ӓ bar +^D +foo Ӓ bar +``` + +``` +% pandoc -f commonmark -t commonmark --ascii +\[foo\](bar) +^D +\[foo\](bar) +``` |