aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2021-12-03 17:52:47 -0800
committerJohn MacFarlane <jgm@berkeley.edu>2021-12-03 17:52:47 -0800
commit51f6f0e3a1533a0e1bb94538f55bdc9db380a028 (patch)
treef1b39e5e281ea327f4e18b4612fe4d94ea01b18c
parent928c120806279579100c7327e92fae21b51a94b4 (diff)
downloadpandoc-51f6f0e3a1533a0e1bb94538f55bdc9db380a028.tar.gz
Improve Markdown writer escaping.
This fixes escaping for '#' in particular. Closes #7726.
-rw-r--r--src/Text/Pandoc/Writers/Markdown/Inline.hs37
-rw-r--r--test/command/3792.md2
-rw-r--r--test/command/4164.md2
-rw-r--r--test/command/7208.md2
-rw-r--r--test/command/7726.md22
-rw-r--r--test/command/biblatex-sigfridsson.md2
-rw-r--r--test/writer.opml2
7 files changed, 46 insertions, 23 deletions
diff --git a/src/Text/Pandoc/Writers/Markdown/Inline.hs b/src/Text/Pandoc/Writers/Markdown/Inline.hs
index be22f8d9f..e774b5cc4 100644
--- a/src/Text/Pandoc/Writers/Markdown/Inline.hs
+++ b/src/Text/Pandoc/Writers/Markdown/Inline.hs
@@ -44,32 +44,35 @@ import Text.Pandoc.Writers.Markdown.Types (MarkdownVariant(..),
-- | Escape special characters for Markdown.
escapeText :: WriterOptions -> Text -> Text
-escapeText opts = T.pack . go . T.unpack
+escapeText opts = T.pack . go' . T.unpack
where
startsWithSpace (' ':_) = True
startsWithSpace ('\t':_) = True
startsWithSpace [] = True
startsWithSpace _ = False
+ go' ('#':cs)
+ | isEnabled Ext_space_in_atx_header opts
+ = if startsWithSpace (dropWhile (=='#') cs)
+ then '\\':'#':go cs
+ else '#':go cs
+ | otherwise = '\\':'#':go cs
+ go' ('@':cs)
+ | isEnabled Ext_citations opts =
+ case cs of
+ (d:_)
+ | isAlphaNum d || d == '_' || d == '{'
+ -> '\\':'@':go cs
+ _ -> '@':go cs
+ go' cs = go cs
go [] = []
go (c:cs) =
case c of
- '<' | isEnabled Ext_all_symbols_escapable opts ->
- '\\' : '<' : go cs
- | otherwise -> "&lt;" ++ go cs
- '>' | isEnabled Ext_all_symbols_escapable opts ->
- '\\' : '>' : go cs
- | otherwise -> "&gt;" ++ go cs
- '@' | isEnabled Ext_citations opts ->
- case cs of
- (d:_)
- | isAlphaNum d || d == '_' || d == '{'
- -> '\\':'@':go cs
- _ -> '@':go cs
- '#' | isEnabled Ext_space_in_atx_header opts
- , startsWithSpace cs
- -> '\\':'#':go cs
_ | c `elem` ['\\','`','*','_','[',']'] ->
'\\':c:go cs
+ '>' | isEnabled Ext_all_symbols_escapable opts -> '\\':'>':go cs
+ | otherwise -> "&gt;" ++ go cs
+ '<' | isEnabled Ext_all_symbols_escapable opts -> '\\':'<':go cs
+ | otherwise -> "&lt;" ++ go cs
'|' | isEnabled Ext_pipe_tables opts -> '\\':'|':go cs
'^' | isEnabled Ext_superscript opts -> '\\':'^':go cs
'~' | isEnabled Ext_subscript opts ||
@@ -90,8 +93,6 @@ escapeText opts = T.pack . go . T.unpack
| isEnabled Ext_intraword_underscores opts
, isAlphaNum c
, isAlphaNum x -> c : '_' : x : go xs
- '#':xs -> c : '#' : go xs
- '>':xs -> c : '>' : go xs
_ -> c : go cs
attrsToMarkdown :: Attr -> Doc Text
diff --git a/test/command/3792.md b/test/command/3792.md
index eff26d517..eb109b9cc 100644
--- a/test/command/3792.md
+++ b/test/command/3792.md
@@ -6,7 +6,7 @@ and properly escaped.
ok
^D
---
-title: \<this> \*that\*
+title: \<this\> \*that\*
---
ok
diff --git a/test/command/4164.md b/test/command/4164.md
index 68cbd0584..4e7b7e285 100644
--- a/test/command/4164.md
+++ b/test/command/4164.md
@@ -26,6 +26,6 @@ Here is inline html:
Here is inline html:
-\<div> \<balise> bla bla \</div>
+\<div\> \<balise\> bla bla \</div\>
```
diff --git a/test/command/7208.md b/test/command/7208.md
index fe02ec32e..e65943ade 100644
--- a/test/command/7208.md
+++ b/test/command/7208.md
@@ -2,5 +2,5 @@
% pandoc -t gfm
\<hi\>
^D
-\<hi>
+\<hi\>
```
diff --git a/test/command/7726.md b/test/command/7726.md
new file mode 100644
index 000000000..bab11ca04
--- /dev/null
+++ b/test/command/7726.md
@@ -0,0 +1,22 @@
+```
+% pandoc -t markdown
+\# Hi
+
+\## Hi
+
+\### Hi
+
+\#### hi
+
+and #hi
+^D
+\# Hi
+
+\## Hi
+
+\### Hi
+
+\#### hi
+
+and #hi
+```
diff --git a/test/command/biblatex-sigfridsson.md b/test/command/biblatex-sigfridsson.md
index e042a8762..f83c35622 100644
--- a/test/command/biblatex-sigfridsson.md
+++ b/test/command/biblatex-sigfridsson.md
@@ -90,7 +90,7 @@ references:
- family: Ryde
given: Ulf
container-title: Journal of Computational Chemistry
- doi: "10.1002/(SICI)1096-987X(199803)19:4\\<377::AID-JCC1>3.0.CO;2-P"
+ doi: "10.1002/(SICI)1096-987X(199803)19:4\\<377::AID-JCC1\\>3.0.CO;2-P"
id: sigfridsson
issue: 4
issued: 1998
diff --git a/test/writer.opml b/test/writer.opml
index bfe1e5de6..6bdcb882e 100644
--- a/test/writer.opml
+++ b/test/writer.opml
@@ -52,7 +52,7 @@
</outline>
<outline text="LaTeX" _note="- &#10;- 2 + 2 = 4&#10;- *x* ∈ *y*&#10;- *α* ∧ *ω*&#10;- 223&#10;- *p*-Tree&#10;- Here’s some display math:&#10; $$\\frac{d}{dx}f(x)=\\lim\_{h\\to 0}\\frac{f(x+h)-f(x)}{h}$$&#10;- Here’s one that has a line break in it: *α* + *ω* × *x*².&#10;&#10;These shouldn’t be math:&#10;&#10;- To get the famous equation, write `$e = mc^2$`.&#10;- $22,000 is a *lot* of money. So is $34,000. (It worked if “lot” is&#10; emphasized.)&#10;- Shoes ($20) and socks ($5).&#10;- Escaped `$`: $73 *this should be emphasized* 23$.&#10;&#10;Here’s a LaTeX table:&#10;&#10;------------------------------------------------------------------------">
</outline>
-<outline text="Special Characters" _note="Here is some unicode:&#10;&#10;- I hat: Î&#10;- o umlaut: ö&#10;- section: §&#10;- set membership: ∈&#10;- copyright: ©&#10;&#10;AT&amp;T has an ampersand in their name.&#10;&#10;AT&amp;T is another way to write it.&#10;&#10;This &amp; that.&#10;&#10;4 &amp;lt; 5.&#10;&#10;6 &amp;gt; 5.&#10;&#10;Backslash: \\&#10;&#10;Backtick: \`&#10;&#10;Asterisk: \*&#10;&#10;Underscore: \_&#10;&#10;Left brace: {&#10;&#10;Right brace: }&#10;&#10;Left bracket: \[&#10;&#10;Right bracket: \]&#10;&#10;Left paren: (&#10;&#10;Right paren: )&#10;&#10;Greater-than: &amp;gt;&#10;&#10;Hash: #&#10;&#10;Period: .&#10;&#10;Bang: !&#10;&#10;Plus: +&#10;&#10;Minus: -&#10;&#10;------------------------------------------------------------------------">
+<outline text="Special Characters" _note="Here is some unicode:&#10;&#10;- I hat: Î&#10;- o umlaut: ö&#10;- section: §&#10;- set membership: ∈&#10;- copyright: ©&#10;&#10;AT&amp;T has an ampersand in their name.&#10;&#10;AT&amp;T is another way to write it.&#10;&#10;This &amp; that.&#10;&#10;4 &amp;lt; 5.&#10;&#10;6 &amp;gt; 5.&#10;&#10;Backslash: \\&#10;&#10;Backtick: \`&#10;&#10;Asterisk: \*&#10;&#10;Underscore: \_&#10;&#10;Left brace: {&#10;&#10;Right brace: }&#10;&#10;Left bracket: \[&#10;&#10;Right bracket: \]&#10;&#10;Left paren: (&#10;&#10;Right paren: )&#10;&#10;Greater-than: &amp;gt;&#10;&#10;Hash: \#&#10;&#10;Period: .&#10;&#10;Bang: !&#10;&#10;Plus: +&#10;&#10;Minus: -&#10;&#10;------------------------------------------------------------------------">
</outline>
<outline text="Links">
<outline text="Explicit" _note="Just a [URL](/url/).&#10;&#10;[URL and title](/url/ &quot;title&quot;).&#10;&#10;[URL and title](/url/ &quot;title preceded by two spaces&quot;).&#10;&#10;[URL and title](/url/ &quot;title preceded by a tab&quot;).&#10;&#10;[URL and title](/url/ &quot;title with &quot;quotes&quot; in it&quot;)&#10;&#10;[URL and title](/url/ &quot;title with single quotes&quot;)&#10;&#10;[with\_underscore](/url/with_underscore)&#10;&#10;[Email link](mailto:nobody@nowhere.net)&#10;&#10;[Empty]().">