From e37cf4484d38c171d9f7477a8ae9eca9643cc426 Mon Sep 17 00:00:00 2001 From: OCzarnecki <44535552+OCzarnecki@users.noreply.github.com> Date: Mon, 16 Aug 2021 06:57:57 +0200 Subject: Multimarkdown sub- and superscripts (#5512) (#7188) Added an extension `short_subsuperscripts` which modifies the behavior of `subscript` and `superscript`, allowing subscripts or superscripts containing only alphanumerics to end with a space character (eg. `x^2 = 4` or `H~2 is combustible`). This improves support for multimarkdown. Closes #5512. Add `Ext_short_subsuperscripts` constructor to `Extension` [API change]. This is enabled by default for `markdown_mmd`. --- src/Text/Pandoc/Extensions.hs | 11 ++++------- src/Text/Pandoc/Readers/Markdown.hs | 24 ++++++++++++++++-------- 2 files changed, 20 insertions(+), 15 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Extensions.hs b/src/Text/Pandoc/Extensions.hs index ce04ce641..2ef8f64e9 100644 --- a/src/Text/Pandoc/Extensions.hs +++ b/src/Text/Pandoc/Extensions.hs @@ -124,6 +124,7 @@ data Extension = | Ext_mmd_header_identifiers -- ^ Multimarkdown style header identifiers [myid] | Ext_mmd_link_attributes -- ^ MMD style reference link attributes | Ext_mmd_title_block -- ^ Multimarkdown metadata block + | Ext_short_subsuperscripts -- ^ sub-&superscripts w/o closing char (v~i) | Ext_multiline_tables -- ^ Pandoc-style multiline tables | Ext_native_divs -- ^ Use Div blocks for contents of
tags | Ext_native_spans -- ^ Use Span inlines for contents of @@ -286,14 +287,9 @@ multimarkdownExtensions = extensionsFromList , Ext_auto_identifiers , Ext_mmd_header_identifiers , Ext_implicit_figures - -- Note: MMD's syntax for superscripts and subscripts - -- is a bit more permissive than pandoc's, allowing - -- e^2 and a~1 instead of e^2^ and a~1~, so even with - -- these options we don't have full support for MMD - -- superscripts and subscripts, but there's no reason - -- not to include these: - , Ext_superscript + , Ext_short_subsuperscripts , Ext_subscript + , Ext_superscript , Ext_backtick_code_blocks , Ext_spaced_reference_links -- So far only in dev version of mmd: @@ -464,6 +460,7 @@ getAllExtensions f = universalExtensions <> getAll f , Ext_gutenberg , Ext_smart , Ext_literate_haskell + , Ext_short_subsuperscripts , Ext_rebase_relative_paths ] getAll "markdown_strict" = allMarkdownExtensions diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 2dc7ddf52..536e502cf 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1692,21 +1692,29 @@ strikeout = fmap B.strikeout <$> superscript :: PandocMonad m => MarkdownParser m (F Inlines) superscript = do - guardEnabled Ext_superscript fmap B.superscript <$> try (do char '^' - mconcat <$> many1Till (do notFollowedBy spaceChar - notFollowedBy newline - inline) (char '^')) + mconcat <$> (try regularSuperscript <|> try mmdShortSuperscript)) + where regularSuperscript = many1Till (do guardEnabled Ext_superscript + notFollowedBy spaceChar + notFollowedBy newline + inline) (char '^') + mmdShortSuperscript = do guardEnabled Ext_short_subsuperscripts + result <- take1WhileP isAlphaNum + return $ return $ return $ B.str result subscript :: PandocMonad m => MarkdownParser m (F Inlines) subscript = do - guardEnabled Ext_subscript fmap B.subscript <$> try (do char '~' - mconcat <$> many1Till (do notFollowedBy spaceChar - notFollowedBy newline - inline) (char '~')) + mconcat <$> (try regularSubscript <|> mmdShortSubscript)) + where regularSubscript = many1Till (do guardEnabled Ext_subscript + notFollowedBy spaceChar + notFollowedBy newline + inline) (char '~') + mmdShortSubscript = do guardEnabled Ext_short_subsuperscripts + result <- take1WhileP isAlphaNum + return $ return $ return $ B.str result whitespace :: PandocMonad m => MarkdownParser m (F Inlines) whitespace = spaceChar >> return <$> (lb <|> regsp) "whitespace" -- cgit v1.2.3