From e37cf4484d38c171d9f7477a8ae9eca9643cc426 Mon Sep 17 00:00:00 2001 From: OCzarnecki <44535552+OCzarnecki@users.noreply.github.com> Date: Mon, 16 Aug 2021 06:57:57 +0200 Subject: Multimarkdown sub- and superscripts (#5512) (#7188) Added an extension `short_subsuperscripts` which modifies the behavior of `subscript` and `superscript`, allowing subscripts or superscripts containing only alphanumerics to end with a space character (eg. `x^2 = 4` or `H~2 is combustible`). This improves support for multimarkdown. Closes #5512. Add `Ext_short_subsuperscripts` constructor to `Extension` [API change]. This is enabled by default for `markdown_mmd`. --- MANUAL.txt | 12 ++++++++++ src/Text/Pandoc/Extensions.hs | 11 ++++----- src/Text/Pandoc/Readers/Markdown.hs | 24 ++++++++++++------- test/Tests/Readers/Markdown.hs | 48 +++++++++++++++++++++++++++++++++++++ 4 files changed, 80 insertions(+), 15 deletions(-) diff --git a/MANUAL.txt b/MANUAL.txt index a6edd8ccd..bed3b2009 100644 --- a/MANUAL.txt +++ b/MANUAL.txt @@ -5304,6 +5304,18 @@ For elements that accept attributes, a `data-pos` attribute is added; other elements are placed in a surrounding Div or Span elemnet with a `data-pos` attribute. +#### Extension: `short_subsuperscript` #### + +Parse multimarkdown style subscripts and superscripts, which start with +a '~' or '^' character, respectively, and include the alphanumeric sequence +that follows. For example: + + x^2 = 4 + +or + + Oxygen is O~2. + ## Markdown variants In addition to pandoc's extended Markdown, the following Markdown diff --git a/src/Text/Pandoc/Extensions.hs b/src/Text/Pandoc/Extensions.hs index ce04ce641..2ef8f64e9 100644 --- a/src/Text/Pandoc/Extensions.hs +++ b/src/Text/Pandoc/Extensions.hs @@ -124,6 +124,7 @@ data Extension = | Ext_mmd_header_identifiers -- ^ Multimarkdown style header identifiers [myid] | Ext_mmd_link_attributes -- ^ MMD style reference link attributes | Ext_mmd_title_block -- ^ Multimarkdown metadata block + | Ext_short_subsuperscripts -- ^ sub-&superscripts w/o closing char (v~i) | Ext_multiline_tables -- ^ Pandoc-style multiline tables | Ext_native_divs -- ^ Use Div blocks for contents of
tags | Ext_native_spans -- ^ Use Span inlines for contents of @@ -286,14 +287,9 @@ multimarkdownExtensions = extensionsFromList , Ext_auto_identifiers , Ext_mmd_header_identifiers , Ext_implicit_figures - -- Note: MMD's syntax for superscripts and subscripts - -- is a bit more permissive than pandoc's, allowing - -- e^2 and a~1 instead of e^2^ and a~1~, so even with - -- these options we don't have full support for MMD - -- superscripts and subscripts, but there's no reason - -- not to include these: - , Ext_superscript + , Ext_short_subsuperscripts , Ext_subscript + , Ext_superscript , Ext_backtick_code_blocks , Ext_spaced_reference_links -- So far only in dev version of mmd: @@ -464,6 +460,7 @@ getAllExtensions f = universalExtensions <> getAll f , Ext_gutenberg , Ext_smart , Ext_literate_haskell + , Ext_short_subsuperscripts , Ext_rebase_relative_paths ] getAll "markdown_strict" = allMarkdownExtensions diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 2dc7ddf52..536e502cf 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1692,21 +1692,29 @@ strikeout = fmap B.strikeout <$> superscript :: PandocMonad m => MarkdownParser m (F Inlines) superscript = do - guardEnabled Ext_superscript fmap B.superscript <$> try (do char '^' - mconcat <$> many1Till (do notFollowedBy spaceChar - notFollowedBy newline - inline) (char '^')) + mconcat <$> (try regularSuperscript <|> try mmdShortSuperscript)) + where regularSuperscript = many1Till (do guardEnabled Ext_superscript + notFollowedBy spaceChar + notFollowedBy newline + inline) (char '^') + mmdShortSuperscript = do guardEnabled Ext_short_subsuperscripts + result <- take1WhileP isAlphaNum + return $ return $ return $ B.str result subscript :: PandocMonad m => MarkdownParser m (F Inlines) subscript = do - guardEnabled Ext_subscript fmap B.subscript <$> try (do char '~' - mconcat <$> many1Till (do notFollowedBy spaceChar - notFollowedBy newline - inline) (char '~')) + mconcat <$> (try regularSubscript <|> mmdShortSubscript)) + where regularSubscript = many1Till (do guardEnabled Ext_subscript + notFollowedBy spaceChar + notFollowedBy newline + inline) (char '~') + mmdShortSubscript = do guardEnabled Ext_short_subsuperscripts + result <- take1WhileP isAlphaNum + return $ return $ return $ B.str result whitespace :: PandocMonad m => MarkdownParser m (F Inlines) whitespace = spaceChar >> return <$> (lb <|> regsp) "whitespace" diff --git a/test/Tests/Readers/Markdown.hs b/test/Tests/Readers/Markdown.hs index f055ab197..02fc0d8ce 100644 --- a/test/Tests/Readers/Markdown.hs +++ b/test/Tests/Readers/Markdown.hs @@ -36,6 +36,9 @@ markdownGH :: Text -> Pandoc markdownGH = purely $ readMarkdown def { readerExtensions = githubMarkdownExtensions } +markdownMMD :: Text -> Pandoc +markdownMMD = purely $ readMarkdown def { + readerExtensions = multimarkdownExtensions } infix 4 =: (=:) :: ToString c => String -> (Text, c) -> TestTree @@ -360,6 +363,51 @@ tests = [ testGroup "inline code" ("**this should \"be bold**" =?> para (strong "this should \8220be bold")) ] + , testGroup "sub- and superscripts" + [ + test markdownMMD "normal subscript" + ("H~2~" + =?> para ("H" <> subscript "2")) + , test markdownMMD "normal superscript" + ("x^3^" + =?> para ("x" <> superscript "3")) + , test markdownMMD "short subscript delimeted by space" + ("O~2 is dangerous" + =?> para ("O" <> subscript "2" <> space <> "is dangerous")) + , test markdownMMD "short subscript delimeted by newline" + ("O~2\n" + =?> para ("O" <> subscript "2")) + , test markdownMMD "short subscript delimeted by EOF" + ("O~2" + =?> para ("O" <> subscript "2")) + , test markdownMMD "short subscript delimited by punctuation" + ("O~2." + =?> para ("O" <> subscript "2" <> ".")) + , test markdownMMD "short subscript delimited by emph" + ("O~2*combustible!*" + =?> para ("O" <> subscript "2" <> emph "combustible!")) + , test markdownMMD "no nesting in short subscripts" + ("y~*2*" + =?> para ("y~" <> emph "2")) + , test markdownMMD "short superscript delimeted by space" + ("x^2 = y" + =?> para ("x" <> superscript "2" <> space <> "= y")) + , test markdownMMD "short superscript delimeted by newline" + ("x^2\n" + =?> para ("x" <> superscript "2")) + , test markdownMMD "short superscript delimeted by ExF" + ("x^2" + =?> para ("x" <> superscript "2")) + , test markdownMMD "short superscript delimited by punctuation" + ("x^2." + =?> para ("x" <> superscript "2" <> ".")) + , test markdownMMD "short superscript delimited by emph" + ("x^2*combustible!*" + =?> para ("x" <> superscript "2" <> emph "combustible!")) + , test markdownMMD "no nesting in short superscripts" + ("y^*2*" + =?> para ("y^" <> emph "2")) + ] , testGroup "footnotes" [ "indent followed by newline and flush-left text" =: "[^1]\n\n[^1]: my note\n\n \nnot in note\n" -- cgit v1.2.3