aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorOCzarnecki <44535552+OCzarnecki@users.noreply.github.com>2021-08-16 06:57:57 +0200
committerGitHub <noreply@github.com>2021-08-15 21:57:57 -0700
commite37cf4484d38c171d9f7477a8ae9eca9643cc426 (patch)
tree62984d5cf0e54867f80ef8fcbc646ff598e9a094
parent72447a563c415a3d8addc7a2b2edd93f9fe56ebe (diff)
downloadpandoc-e37cf4484d38c171d9f7477a8ae9eca9643cc426.tar.gz
Multimarkdown sub- and superscripts (#5512) (#7188)
Added an extension `short_subsuperscripts` which modifies the behavior of `subscript` and `superscript`, allowing subscripts or superscripts containing only alphanumerics to end with a space character (eg. `x^2 = 4` or `H~2 is combustible`). This improves support for multimarkdown. Closes #5512. Add `Ext_short_subsuperscripts` constructor to `Extension` [API change]. This is enabled by default for `markdown_mmd`.
-rw-r--r--MANUAL.txt12
-rw-r--r--src/Text/Pandoc/Extensions.hs11
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs24
-rw-r--r--test/Tests/Readers/Markdown.hs48
4 files changed, 80 insertions, 15 deletions
diff --git a/MANUAL.txt b/MANUAL.txt
index a6edd8ccd..bed3b2009 100644
--- a/MANUAL.txt
+++ b/MANUAL.txt
@@ -5304,6 +5304,18 @@ For elements that accept attributes, a `data-pos` attribute
is added; other elements are placed in a surrounding
Div or Span elemnet with a `data-pos` attribute.
+#### Extension: `short_subsuperscript` ####
+
+Parse multimarkdown style subscripts and superscripts, which start with
+a '~' or '^' character, respectively, and include the alphanumeric sequence
+that follows. For example:
+
+ x^2 = 4
+
+or
+
+ Oxygen is O~2.
+
## Markdown variants
In addition to pandoc's extended Markdown, the following Markdown
diff --git a/src/Text/Pandoc/Extensions.hs b/src/Text/Pandoc/Extensions.hs
index ce04ce641..2ef8f64e9 100644
--- a/src/Text/Pandoc/Extensions.hs
+++ b/src/Text/Pandoc/Extensions.hs
@@ -124,6 +124,7 @@ data Extension =
| Ext_mmd_header_identifiers -- ^ Multimarkdown style header identifiers [myid]
| Ext_mmd_link_attributes -- ^ MMD style reference link attributes
| Ext_mmd_title_block -- ^ Multimarkdown metadata block
+ | Ext_short_subsuperscripts -- ^ sub-&superscripts w/o closing char (v~i)
| Ext_multiline_tables -- ^ Pandoc-style multiline tables
| Ext_native_divs -- ^ Use Div blocks for contents of <div> tags
| Ext_native_spans -- ^ Use Span inlines for contents of <span>
@@ -286,14 +287,9 @@ multimarkdownExtensions = extensionsFromList
, Ext_auto_identifiers
, Ext_mmd_header_identifiers
, Ext_implicit_figures
- -- Note: MMD's syntax for superscripts and subscripts
- -- is a bit more permissive than pandoc's, allowing
- -- e^2 and a~1 instead of e^2^ and a~1~, so even with
- -- these options we don't have full support for MMD
- -- superscripts and subscripts, but there's no reason
- -- not to include these:
- , Ext_superscript
+ , Ext_short_subsuperscripts
, Ext_subscript
+ , Ext_superscript
, Ext_backtick_code_blocks
, Ext_spaced_reference_links
-- So far only in dev version of mmd:
@@ -464,6 +460,7 @@ getAllExtensions f = universalExtensions <> getAll f
, Ext_gutenberg
, Ext_smart
, Ext_literate_haskell
+ , Ext_short_subsuperscripts
, Ext_rebase_relative_paths
]
getAll "markdown_strict" = allMarkdownExtensions
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 2dc7ddf52..536e502cf 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -1692,21 +1692,29 @@ strikeout = fmap B.strikeout <$>
superscript :: PandocMonad m => MarkdownParser m (F Inlines)
superscript = do
- guardEnabled Ext_superscript
fmap B.superscript <$> try (do
char '^'
- mconcat <$> many1Till (do notFollowedBy spaceChar
- notFollowedBy newline
- inline) (char '^'))
+ mconcat <$> (try regularSuperscript <|> try mmdShortSuperscript))
+ where regularSuperscript = many1Till (do guardEnabled Ext_superscript
+ notFollowedBy spaceChar
+ notFollowedBy newline
+ inline) (char '^')
+ mmdShortSuperscript = do guardEnabled Ext_short_subsuperscripts
+ result <- take1WhileP isAlphaNum
+ return $ return $ return $ B.str result
subscript :: PandocMonad m => MarkdownParser m (F Inlines)
subscript = do
- guardEnabled Ext_subscript
fmap B.subscript <$> try (do
char '~'
- mconcat <$> many1Till (do notFollowedBy spaceChar
- notFollowedBy newline
- inline) (char '~'))
+ mconcat <$> (try regularSubscript <|> mmdShortSubscript))
+ where regularSubscript = many1Till (do guardEnabled Ext_subscript
+ notFollowedBy spaceChar
+ notFollowedBy newline
+ inline) (char '~')
+ mmdShortSubscript = do guardEnabled Ext_short_subsuperscripts
+ result <- take1WhileP isAlphaNum
+ return $ return $ return $ B.str result
whitespace :: PandocMonad m => MarkdownParser m (F Inlines)
whitespace = spaceChar >> return <$> (lb <|> regsp) <?> "whitespace"
diff --git a/test/Tests/Readers/Markdown.hs b/test/Tests/Readers/Markdown.hs
index f055ab197..02fc0d8ce 100644
--- a/test/Tests/Readers/Markdown.hs
+++ b/test/Tests/Readers/Markdown.hs
@@ -36,6 +36,9 @@ markdownGH :: Text -> Pandoc
markdownGH = purely $ readMarkdown def {
readerExtensions = githubMarkdownExtensions }
+markdownMMD :: Text -> Pandoc
+markdownMMD = purely $ readMarkdown def {
+ readerExtensions = multimarkdownExtensions }
infix 4 =:
(=:) :: ToString c
=> String -> (Text, c) -> TestTree
@@ -360,6 +363,51 @@ tests = [ testGroup "inline code"
("**this should \"be bold**"
=?> para (strong "this should \8220be bold"))
]
+ , testGroup "sub- and superscripts"
+ [
+ test markdownMMD "normal subscript"
+ ("H~2~"
+ =?> para ("H" <> subscript "2"))
+ , test markdownMMD "normal superscript"
+ ("x^3^"
+ =?> para ("x" <> superscript "3"))
+ , test markdownMMD "short subscript delimeted by space"
+ ("O~2 is dangerous"
+ =?> para ("O" <> subscript "2" <> space <> "is dangerous"))
+ , test markdownMMD "short subscript delimeted by newline"
+ ("O~2\n"
+ =?> para ("O" <> subscript "2"))
+ , test markdownMMD "short subscript delimeted by EOF"
+ ("O~2"
+ =?> para ("O" <> subscript "2"))
+ , test markdownMMD "short subscript delimited by punctuation"
+ ("O~2."
+ =?> para ("O" <> subscript "2" <> "."))
+ , test markdownMMD "short subscript delimited by emph"
+ ("O~2*combustible!*"
+ =?> para ("O" <> subscript "2" <> emph "combustible!"))
+ , test markdownMMD "no nesting in short subscripts"
+ ("y~*2*"
+ =?> para ("y~" <> emph "2"))
+ , test markdownMMD "short superscript delimeted by space"
+ ("x^2 = y"
+ =?> para ("x" <> superscript "2" <> space <> "= y"))
+ , test markdownMMD "short superscript delimeted by newline"
+ ("x^2\n"
+ =?> para ("x" <> superscript "2"))
+ , test markdownMMD "short superscript delimeted by ExF"
+ ("x^2"
+ =?> para ("x" <> superscript "2"))
+ , test markdownMMD "short superscript delimited by punctuation"
+ ("x^2."
+ =?> para ("x" <> superscript "2" <> "."))
+ , test markdownMMD "short superscript delimited by emph"
+ ("x^2*combustible!*"
+ =?> para ("x" <> superscript "2" <> emph "combustible!"))
+ , test markdownMMD "no nesting in short superscripts"
+ ("y^*2*"
+ =?> para ("y^" <> emph "2"))
+ ]
, testGroup "footnotes"
[ "indent followed by newline and flush-left text" =:
"[^1]\n\n[^1]: my note\n\n \nnot in note\n"