diff options
author | John MacFarlane <jgm@berkeley.edu> | 2021-12-03 17:52:47 -0800 |
---|---|---|
committer | John MacFarlane <jgm@berkeley.edu> | 2021-12-03 17:52:47 -0800 |
commit | 51f6f0e3a1533a0e1bb94538f55bdc9db380a028 (patch) | |
tree | f1b39e5e281ea327f4e18b4612fe4d94ea01b18c | |
parent | 928c120806279579100c7327e92fae21b51a94b4 (diff) | |
download | pandoc-51f6f0e3a1533a0e1bb94538f55bdc9db380a028.tar.gz |
Improve Markdown writer escaping.
This fixes escaping for '#' in particular.
Closes #7726.
-rw-r--r-- | src/Text/Pandoc/Writers/Markdown/Inline.hs | 37 | ||||
-rw-r--r-- | test/command/3792.md | 2 | ||||
-rw-r--r-- | test/command/4164.md | 2 | ||||
-rw-r--r-- | test/command/7208.md | 2 | ||||
-rw-r--r-- | test/command/7726.md | 22 | ||||
-rw-r--r-- | test/command/biblatex-sigfridsson.md | 2 | ||||
-rw-r--r-- | test/writer.opml | 2 |
7 files changed, 46 insertions, 23 deletions
diff --git a/src/Text/Pandoc/Writers/Markdown/Inline.hs b/src/Text/Pandoc/Writers/Markdown/Inline.hs index be22f8d9f..e774b5cc4 100644 --- a/src/Text/Pandoc/Writers/Markdown/Inline.hs +++ b/src/Text/Pandoc/Writers/Markdown/Inline.hs @@ -44,32 +44,35 @@ import Text.Pandoc.Writers.Markdown.Types (MarkdownVariant(..), -- | Escape special characters for Markdown. escapeText :: WriterOptions -> Text -> Text -escapeText opts = T.pack . go . T.unpack +escapeText opts = T.pack . go' . T.unpack where startsWithSpace (' ':_) = True startsWithSpace ('\t':_) = True startsWithSpace [] = True startsWithSpace _ = False + go' ('#':cs) + | isEnabled Ext_space_in_atx_header opts + = if startsWithSpace (dropWhile (=='#') cs) + then '\\':'#':go cs + else '#':go cs + | otherwise = '\\':'#':go cs + go' ('@':cs) + | isEnabled Ext_citations opts = + case cs of + (d:_) + | isAlphaNum d || d == '_' || d == '{' + -> '\\':'@':go cs + _ -> '@':go cs + go' cs = go cs go [] = [] go (c:cs) = case c of - '<' | isEnabled Ext_all_symbols_escapable opts -> - '\\' : '<' : go cs - | otherwise -> "<" ++ go cs - '>' | isEnabled Ext_all_symbols_escapable opts -> - '\\' : '>' : go cs - | otherwise -> ">" ++ go cs - '@' | isEnabled Ext_citations opts -> - case cs of - (d:_) - | isAlphaNum d || d == '_' || d == '{' - -> '\\':'@':go cs - _ -> '@':go cs - '#' | isEnabled Ext_space_in_atx_header opts - , startsWithSpace cs - -> '\\':'#':go cs _ | c `elem` ['\\','`','*','_','[',']'] -> '\\':c:go cs + '>' | isEnabled Ext_all_symbols_escapable opts -> '\\':'>':go cs + | otherwise -> ">" ++ go cs + '<' | isEnabled Ext_all_symbols_escapable opts -> '\\':'<':go cs + | otherwise -> "<" ++ go cs '|' | isEnabled Ext_pipe_tables opts -> '\\':'|':go cs '^' | isEnabled Ext_superscript opts -> '\\':'^':go cs '~' | isEnabled Ext_subscript opts || @@ -90,8 +93,6 @@ escapeText opts = T.pack . go . T.unpack | isEnabled Ext_intraword_underscores opts , isAlphaNum c , isAlphaNum x -> c : '_' : x : go xs - '#':xs -> c : '#' : go xs - '>':xs -> c : '>' : go xs _ -> c : go cs attrsToMarkdown :: Attr -> Doc Text diff --git a/test/command/3792.md b/test/command/3792.md index eff26d517..eb109b9cc 100644 --- a/test/command/3792.md +++ b/test/command/3792.md @@ -6,7 +6,7 @@ and properly escaped. ok ^D --- -title: \<this> \*that\* +title: \<this\> \*that\* --- ok diff --git a/test/command/4164.md b/test/command/4164.md index 68cbd0584..4e7b7e285 100644 --- a/test/command/4164.md +++ b/test/command/4164.md @@ -26,6 +26,6 @@ Here is inline html: Here is inline html: -\<div> \<balise> bla bla \</div> +\<div\> \<balise\> bla bla \</div\> ``` diff --git a/test/command/7208.md b/test/command/7208.md index fe02ec32e..e65943ade 100644 --- a/test/command/7208.md +++ b/test/command/7208.md @@ -2,5 +2,5 @@ % pandoc -t gfm \<hi\> ^D -\<hi> +\<hi\> ``` diff --git a/test/command/7726.md b/test/command/7726.md new file mode 100644 index 000000000..bab11ca04 --- /dev/null +++ b/test/command/7726.md @@ -0,0 +1,22 @@ +``` +% pandoc -t markdown +\# Hi + +\## Hi + +\### Hi + +\#### hi + +and #hi +^D +\# Hi + +\## Hi + +\### Hi + +\#### hi + +and #hi +``` diff --git a/test/command/biblatex-sigfridsson.md b/test/command/biblatex-sigfridsson.md index e042a8762..f83c35622 100644 --- a/test/command/biblatex-sigfridsson.md +++ b/test/command/biblatex-sigfridsson.md @@ -90,7 +90,7 @@ references: - family: Ryde given: Ulf container-title: Journal of Computational Chemistry - doi: "10.1002/(SICI)1096-987X(199803)19:4\\<377::AID-JCC1>3.0.CO;2-P" + doi: "10.1002/(SICI)1096-987X(199803)19:4\\<377::AID-JCC1\\>3.0.CO;2-P" id: sigfridsson issue: 4 issued: 1998 diff --git a/test/writer.opml b/test/writer.opml index bfe1e5de6..6bdcb882e 100644 --- a/test/writer.opml +++ b/test/writer.opml @@ -52,7 +52,7 @@ </outline> <outline text="LaTeX" _note="- - 2 + 2 = 4 - *x* ∈ *y* - *α* ∧ *ω* - 223 - *p*-Tree - Here’s some display math: $$\\frac{d}{dx}f(x)=\\lim\_{h\\to 0}\\frac{f(x+h)-f(x)}{h}$$ - Here’s one that has a line break in it: *α* + *ω* × *x*². These shouldn’t be math: - To get the famous equation, write `$e = mc^2$`. - $22,000 is a *lot* of money. So is $34,000. (It worked if “lot” is emphasized.) - Shoes ($20) and socks ($5). - Escaped `$`: $73 *this should be emphasized* 23$. Here’s a LaTeX table: ------------------------------------------------------------------------"> </outline> -<outline text="Special Characters" _note="Here is some unicode: - I hat: Î - o umlaut: ö - section: § - set membership: ∈ - copyright: © AT&T has an ampersand in their name. AT&T is another way to write it. This & that. 4 &lt; 5. 6 &gt; 5. Backslash: \\ Backtick: \` Asterisk: \* Underscore: \_ Left brace: { Right brace: } Left bracket: \[ Right bracket: \] Left paren: ( Right paren: ) Greater-than: &gt; Hash: # Period: . Bang: ! Plus: + Minus: - ------------------------------------------------------------------------"> +<outline text="Special Characters" _note="Here is some unicode: - I hat: Î - o umlaut: ö - section: § - set membership: ∈ - copyright: © AT&T has an ampersand in their name. AT&T is another way to write it. This & that. 4 &lt; 5. 6 &gt; 5. Backslash: \\ Backtick: \` Asterisk: \* Underscore: \_ Left brace: { Right brace: } Left bracket: \[ Right bracket: \] Left paren: ( Right paren: ) Greater-than: &gt; Hash: \# Period: . Bang: ! Plus: + Minus: - ------------------------------------------------------------------------"> </outline> <outline text="Links"> <outline text="Explicit" _note="Just a [URL](/url/). [URL and title](/url/ "title"). [URL and title](/url/ "title preceded by two spaces"). [URL and title](/url/ "title preceded by a tab"). [URL and title](/url/ "title with "quotes" in it") [URL and title](/url/ "title with single quotes") [with\_underscore](/url/with_underscore) [Email link](mailto:nobody@nowhere.net) [Empty]()."> |