From 6ea6011ca66c3127ff42cd5d0d39b3bd40e56e76 Mon Sep 17 00:00:00 2001 From: Alexander Krotov Date: Thu, 30 Aug 2018 17:10:46 +0300 Subject: Muse writer: use lightweight markup when possible --- src/Text/Pandoc/Writers/Muse.hs | 142 ++++++++++++++++++++++++++++++++++------ test/Tests/Writers/Muse.hs | 50 ++++++++++---- test/writer.muse | 75 ++++++++++----------- 3 files changed, 198 insertions(+), 69 deletions(-) diff --git a/src/Text/Pandoc/Writers/Muse.hs b/src/Text/Pandoc/Writers/Muse.hs index a21bf5fc0..b9f9381c3 100644 --- a/src/Text/Pandoc/Writers/Muse.hs +++ b/src/Text/Pandoc/Writers/Muse.hs @@ -46,7 +46,7 @@ module Text.Pandoc.Writers.Muse (writeMuse) where import Prelude import Control.Monad.Reader import Control.Monad.State.Strict -import Data.Char (isSpace, isDigit, isAsciiUpper, isAsciiLower) +import Data.Char (isSpace, isAlphaNum, isDigit, isAsciiUpper, isAsciiLower) import Data.Default import Data.Text (Text) import Data.List (intersperse, transpose, isInfixOf) @@ -74,16 +74,20 @@ data WriterEnv = , envInsideLinkDescription :: Bool -- ^ Escape ] if True , envAfterSpace :: Bool -- ^ There is whitespace (not just newline) before , envOneLine :: Bool -- ^ True if newlines are not allowed + , envInsideAsterisks :: Bool -- ^ True if outer element is emphasis with asterisks + , envNearAsterisks :: Bool -- ^ Rendering inline near asterisks } data WriterState = WriterState { stNotes :: Notes , stIds :: Set.Set String + , stUseTags :: Bool -- ^ Use tags for emphasis, for example because previous character is a letter } instance Default WriterState where def = WriterState { stNotes = [] , stIds = Set.empty + , stUseTags = False } evalMuse :: PandocMonad m => Muse m a -> WriterEnv -> WriterState -> m a @@ -103,6 +107,8 @@ writeMuse opts document = , envInsideLinkDescription = False , envAfterSpace = False , envOneLine = False + , envInsideAsterisks = False + , envNearAsterisks = False } -- | Return Muse representation of document. @@ -212,6 +218,7 @@ blockToMuse (BulletList items) = do => [Block] -> Muse m Doc bulletListItemToMuse item = do + modify $ \st -> st { stUseTags = False } contents <- blockListToMuse item return $ hang 2 "- " contents blockToMuse (DefinitionList items) = do @@ -223,6 +230,7 @@ blockToMuse (DefinitionList items) = do => ([Inline], [[Block]]) -> Muse m Doc definitionListItemToMuse (label, defs) = do + modify $ \st -> st { stUseTags = False } label' <- local (\env -> env { envOneLine = True, envAfterSpace = True }) $ inlineListToMuse' label contents <- vcat <$> mapM descriptionToMuse defs let ind = offset label' @@ -401,6 +409,17 @@ fixNotes (Space : n@Note{} : rest) = Str " " : n : fixNotes rest fixNotes (SoftBreak : n@Note{} : rest) = Str " " : n : fixNotes rest fixNotes (x:xs) = x : fixNotes xs +startsWithSpace :: [Inline] -> Bool +startsWithSpace (Space:_) = True +startsWithSpace (SoftBreak:_) = True +startsWithSpace _ = False + +endsWithSpace :: [Inline] -> Bool +endsWithSpace [Space] = True +endsWithSpace [SoftBreak] = True +endsWithSpace (_:xs) = endsWithSpace xs +endsWithSpace [] = False + urlEscapeBrackets :: String -> String urlEscapeBrackets (']':xs) = '%':'5':'D':urlEscapeBrackets xs urlEscapeBrackets (x:xs) = x:urlEscapeBrackets xs @@ -409,9 +428,9 @@ urlEscapeBrackets [] = [] isHorizontalRule :: String -> Bool isHorizontalRule s = length s >= 4 && all (== '-') s -startsWithSpace :: String -> Bool -startsWithSpace (x:_) = isSpace x -startsWithSpace [] = False +stringStartsWithSpace :: String -> Bool +stringStartsWithSpace (x:_) = isSpace x +stringStartsWithSpace [] = False fixOrEscape :: Bool -> Inline -> Bool fixOrEscape sp (Str "-") = sp @@ -420,11 +439,19 @@ fixOrEscape _ (Str ">") = True fixOrEscape sp (Str s) = (sp && (startsWithMarker isDigit s || startsWithMarker isAsciiLower s || startsWithMarker isAsciiUpper s)) - || isHorizontalRule s || startsWithSpace s + || isHorizontalRule s || stringStartsWithSpace s fixOrEscape _ Space = True fixOrEscape _ SoftBreak = True fixOrEscape _ _ = False +inlineListStartsWithAlnum :: PandocMonad m + => [Inline] + -> Muse m Bool +inlineListStartsWithAlnum (Str s:_) = do + esc <- shouldEscapeString s + return $ esc || isAlphaNum (head s) +inlineListStartsWithAlnum _ = return False + -- | Convert list of Pandoc inline elements to Muse renderInlineList :: PandocMonad m => [Inline] @@ -436,11 +463,22 @@ renderInlineList (x:xs) = do start <- asks envInlineStart afterSpace <- asks envAfterSpace topLevel <- asks envTopLevel - r <- local (\env -> env { envInlineStart = False }) $ inlineToMuse x + insideAsterisks <- asks envInsideAsterisks + nearAsterisks <- asks envNearAsterisks + useTags <- gets stUseTags + alnumNext <- inlineListStartsWithAlnum xs + let newUseTags = useTags || alnumNext + modify $ \st -> st { stUseTags = newUseTags } + + r <- local (\env -> env { envInlineStart = False + , envInsideAsterisks = False + , envNearAsterisks = nearAsterisks || (null xs && insideAsterisks) + }) $ inlineToMuse x opts <- asks envOptions let isNewline = (x == SoftBreak && writerWrapText opts == WrapPreserve) || x == LineBreak lst' <- local (\env -> env { envInlineStart = isNewline , envAfterSpace = x == Space || (not topLevel && isNewline) + , envNearAsterisks = False }) $ renderInlineList xs if start && fixOrEscape afterSpace x then pure (text "" <> r <> lst') @@ -452,7 +490,9 @@ inlineListToMuse :: PandocMonad m -> Muse m Doc inlineListToMuse lst = do lst' <- (normalizeInlineList . fixNotes) <$> preprocessInlineList (map (removeKeyValues . replaceSmallCaps) lst) - renderInlineList lst' + insideAsterisks <- asks envInsideAsterisks + modify $ \st -> st { stUseTags = False } -- Previous character is likely a '>' or some other markup + local (\env -> env { envNearAsterisks = insideAsterisks }) $ renderInlineList lst' inlineListToMuse' :: PandocMonad m => [Inline] -> Muse m Doc inlineListToMuse' lst = do @@ -466,52 +506,112 @@ inlineListToMuse' lst = do inlineToMuse :: PandocMonad m => Inline -> Muse m Doc -inlineToMuse (Str str) = - text <$> conditionalEscapeString str +inlineToMuse (Str str) = do + escapedStr <- conditionalEscapeString str + let useTags = isAlphaNum $ last escapedStr -- escapedStr is never empty because empty strings are escaped + modify $ \st -> st { stUseTags = useTags } + return $ text escapedStr +inlineToMuse (Emph [Strong lst]) = do + useTags <- gets stUseTags + if useTags + then do contents <- local (\env -> env { envInsideAsterisks = True }) $ inlineListToMuse lst + modify $ \st -> st { stUseTags = False } + return $ "**" <> contents <> "**" + else if null lst || startsWithSpace lst || endsWithSpace lst + then do + contents <- local (\env -> env { envInsideAsterisks = False }) $ inlineListToMuse lst + modify $ \st -> st { stUseTags = True } + return $ "*" <> contents <> "*" + else do + contents <- local (\env -> env { envInsideAsterisks = True }) $ inlineListToMuse lst + modify $ \st -> st { stUseTags = True } + return $ "***" <> contents <> "***" inlineToMuse (Emph lst) = do - contents <- inlineListToMuse lst - return $ "" <> contents <> "" + useTags <- gets stUseTags + if useTags || null lst || startsWithSpace lst || endsWithSpace lst + then do contents <- inlineListToMuse lst + return $ "" <> contents <> "" + else do contents <- local (\env -> env { envInsideAsterisks = True }) $ inlineListToMuse lst + modify $ \st -> st { stUseTags = True } + return $ "*" <> contents <> "*" +inlineToMuse (Strong [Emph lst]) = do + useTags <- gets stUseTags + if useTags + then do contents <- local (\env -> env { envInsideAsterisks = True }) $ inlineListToMuse lst + modify $ \st -> st { stUseTags = False } + return $ "*" <> contents <> "*" + else if null lst || startsWithSpace lst || endsWithSpace lst + then do + contents <- local (\env -> env { envInsideAsterisks = False }) $ inlineListToMuse lst + modify $ \st -> st { stUseTags = True } + return $ "**" <> contents <> "**" + else do + contents <- local (\env -> env { envInsideAsterisks = True }) $ inlineListToMuse lst + modify $ \st -> st { stUseTags = True } + return $ "***" <> contents <> "***" inlineToMuse (Strong lst) = do - contents <- inlineListToMuse lst - return $ "" <> contents <> "" + useTags <- gets stUseTags + if useTags || null lst || startsWithSpace lst || endsWithSpace lst + then do contents <- inlineListToMuse lst + modify $ \st -> st { stUseTags = False } + return $ "" <> contents <> "" + else do contents <- local (\env -> env { envInsideAsterisks = True }) $ inlineListToMuse lst + modify $ \st -> st { stUseTags = True } + return $ "**" <> contents <> "**" inlineToMuse (Strikeout lst) = do contents <- inlineListToMuse lst + modify $ \st -> st { stUseTags = False } return $ "" <> contents <> "" inlineToMuse (Superscript lst) = do contents <- inlineListToMuse lst + modify $ \st -> st { stUseTags = False } return $ "" <> contents <> "" inlineToMuse (Subscript lst) = do contents <- inlineListToMuse lst + modify $ \st -> st { stUseTags = False } return $ "" <> contents <> "" inlineToMuse SmallCaps {} = fail "SmallCaps should be expanded before normalization" inlineToMuse (Quoted SingleQuote lst) = do contents <- inlineListToMuse lst + modify $ \st -> st { stUseTags = False } return $ "‘" <> contents <> "’" inlineToMuse (Quoted DoubleQuote lst) = do contents <- inlineListToMuse lst + modify $ \st -> st { stUseTags = False } return $ "“" <> contents <> "”" inlineToMuse Cite {} = fail "Citations should be expanded before normalization" -inlineToMuse (Code _ str) = return $ - "" <> text (substitute "" "</code>" str) <> "" +inlineToMuse (Code _ str) = do + useTags <- gets stUseTags + modify $ \st -> st { stUseTags = False } + return $ if useTags || null str || '=' `elem` str || isSpace (head str) || isSpace (last str) + then "" <> text (substitute "" "</code>" str) <> "" + else "=" <> text str <> "=" inlineToMuse Math{} = fail "Math should be expanded before normalization" -inlineToMuse (RawInline (Format f) str) = +inlineToMuse (RawInline (Format f) str) = do + modify $ \st -> st { stUseTags = False } return $ " text f <> "\">" <> text str <> "" inlineToMuse LineBreak = do oneline <- asks envOneLine + modify $ \st -> st { stUseTags = False } return $ if oneline then "
" else "
" <> cr -inlineToMuse Space = return space +inlineToMuse Space = do + modify $ \st -> st { stUseTags = False } + return space inlineToMuse SoftBreak = do oneline <- asks envOneLine wrapText <- asks $ writerWrapText . envOptions + modify $ \st -> st { stUseTags = False } return $ if not oneline && wrapText == WrapPreserve then cr else space inlineToMuse (Link _ txt (src, _)) = case txt of - [Str x] | escapeURI x == src -> + [Str x] | escapeURI x == src -> do + modify $ \st -> st { stUseTags = False } return $ "[[" <> text (escapeLink x) <> "]]" _ -> do contents <- local (\env -> env { envInsideLinkDescription = True }) $ inlineListToMuse txt + modify $ \st -> st { stUseTags = False } return $ "[[" <> text (escapeLink src) <> "][" <> contents <> "]]" where escapeLink lnk = if isImageUrl lnk then "URL:" ++ urlEscapeBrackets lnk else urlEscapeBrackets lnk -- Taken from muse-image-regexp defined in Emacs Muse file lisp/muse-regexps.el @@ -537,11 +637,14 @@ inlineToMuse (Image attr@(_, classes, _) inlines (source, title)) = do let rightalign = if "align-right" `elem` classes then " r" else "" + modify $ \st -> st { stUseTags = False } return $ "[[" <> text (urlEscapeBrackets source ++ width ++ leftalign ++ rightalign) <> "]" <> title' <> "]" inlineToMuse (Note contents) = do -- add to notes in state notes <- gets stNotes - modify $ \st -> st { stNotes = contents:notes } + modify $ \st -> st { stNotes = contents:notes + , stUseTags = False + } let ref = show $ length notes + 1 return $ "[" <> text ref <> "]" inlineToMuse (Span (anchor,names,_) inlines) = do @@ -549,6 +652,7 @@ inlineToMuse (Span (anchor,names,_) inlines) = do let anchorDoc = if null anchor then mempty else text ('#':anchor) <> space + modify $ \st -> st { stUseTags = False } return $ anchorDoc <> (if null inlines && not (null anchor) then mempty else (if null names diff --git a/test/Tests/Writers/Muse.hs b/test/Tests/Writers/Muse.hs index 50c0e78eb..f44097f9e 100644 --- a/test/Tests/Writers/Muse.hs +++ b/test/Tests/Writers/Muse.hs @@ -354,23 +354,51 @@ tests = [ testGroup "block elements" , "do not escape ; inside paragraph" =: text "foo ; bar" =?> "foo ; bar" ] , testGroup "emphasis" - [ "emph" =: emph (text "foo") =?> "foo" - , "strong" =: strong (text "foo") =?> "foo" + [ "emphasis" =: emph (text "foo") =?> "*foo*" + , "emphasis inside word" =: text "foo" <> emph (text "bar") <> text "baz" =?> "foobarbaz" + , "emphasis before comma" =: emph (text "foo") <> text ", bar" =?> "*foo*, bar" + , "emphasis before period" =: emph (text "foobar") <> text "." =?> "*foobar*." + , "empty emphasis" =: emph mempty =?> "" + , "empty strong" =: strong mempty =?> "" + , "empty strong emphasis" =: strong (emph mempty) =?> "****" + , "empty emphasized strong" =: emph (strong mempty) =?> "**" + , "strong" =: strong (text "foo") =?> "**foo**" + , "strong inside word" =: text "foo" <> strong (text "bar") <> text "baz" =?> "foobarbaz" + , "strong emphasis" =: strong (emph (text "foo")) =?> "***foo***" + , "strong after emphasis" =: emph (text "foo") <> strong (text "bar") =?> "*foo*bar" + , "strong emphasis after emphasis" =: emph (text "foo") <> strong (emph (text "bar")) =?> "*foo**bar*" + , "strong in the end of emphasis" =: emph (text "foo" <> strong (text "bar")) =?> "*foobar*" , "strikeout" =: strikeout (text "foo") =?> "foo" + , "space at the beginning of emphasis" =: emph (text " foo") =?> " foo" + , "space at the end of emphasis" =: emph (text "foo ") =?> "foo " + , "space at the beginning of strong" =: strong (text " foo") =?> " foo" + , "space at the end of strong" =: strong (text "foo ") =?> "foo " + , "space at the beginning of strong emphasis" =: strong (emph (text " foo")) =?> "** foo**" + , "space at the end of strong emphasis" =: strong (emph (text "foo ")) =?> "**foo **" + , "space at the beginning of emphasiszed strong" =: emph (strong (text " foo")) =?> "* foo*" + , "space at the end of emphasized strong" =: emph (strong (text "foo ")) =?> "*foo *" ] , "superscript" =: superscript (text "foo") =?> "foo" , "subscript" =: subscript (text "foo") =?> "foo" - , "smallcaps" =: smallcaps (text "foo") =?> "foo" - , "smallcaps near emphasis" =: emph (str "foo") <> smallcaps (str "bar") =?> "foobar" + , "smallcaps" =: smallcaps (text "foo") =?> "*foo*" + , "smallcaps near emphasis" =: emph (str "foo") <> smallcaps (str "bar") =?> "*foobar*" , "single quoted" =: singleQuoted (text "foo") =?> "‘foo’" , "double quoted" =: doubleQuoted (text "foo") =?> "“foo”" -- Cite is trivial , testGroup "code" - [ "simple" =: code "foo" =?> "foo" + [ "simple" =: code "foo" =?> "=foo=" + , "empty" =: code "" =?> "" + , "space" =: code " " =?> " " + , "space at the beginning" =: code " foo" =?> " foo" + , "space at the end" =: code "foo " =?> "foo " + , "use tags for =" =: code "foo = bar" =?> "foo = bar" , "escape tag" =: code "foo = bar baz" =?> "foo = bar</code> baz" - , "normalization with attributes" =: codeWith ("",["haskell"],[]) "foo" <> code "bar" =?> "foobar" - , "normalization" =: code " code "de>" =?> "</code>" - , "normalization with empty string" =: code " str "" <> code "de>" =?> "</code>" + , "normalization with attributes" =: codeWith ("",["haskell"],[]) "foo" <> code "bar" =?> "=foobar=" + , "code tag" =: code "foo" =?> "=foo=" + , "normalization" =: code " code "de>" <> code "=" =?> "</code>=" + , "normalization with empty string" =: code " str "" <> code "de>" <> code "=" =?> "</code>=" + , "emphasized code" =: emph (code "foo") =?> "*=foo=*" + , "strong code" =: strong (code "foo") =?> "**=foo=**" ] , testGroup "spaces" [ "space" =: text "a" <> space <> text "b" =?> "a b" @@ -385,7 +413,7 @@ tests = [ testGroup "block elements" , testGroup "math" [ "inline math" =: math "2^3" =?> "23" , "display math" =: displayMath "2^3" =?> "23" - , "multiple letters in inline math" =: math "abc" =?> "abc" + , "multiple letters in inline math" =: math "abc" =?> "*abc*" , "expand math before normalization" =: math "[" <> str "2]" =?> "[2]" , "multiple math expressions inside one inline list" =: math "5_4" <> text ", " <> displayMath "3^2" =?> "54, 32" ] @@ -461,7 +489,7 @@ tests = [ testGroup "block elements" "foobar" , "emph quoted" =: para (doubleQuoted (emph (text "foo"))) =?> - "“foo”" + "“*foo*”" , "strong word before" =: para (text "foo" <> strong (text "bar")) =?> "foobar" @@ -470,7 +498,7 @@ tests = [ testGroup "block elements" "foobar" , "strong quoted" =: para (singleQuoted (strong (text "foo"))) =?> - "‘foo’" + "‘**foo**’" ] ] ] diff --git a/test/writer.muse b/test/writer.muse index 5993ec357..35d43a751 100644 --- a/test/writer.muse +++ b/test/writer.muse @@ -11,7 +11,7 @@ markdown test suite. ** Level 2 with an [[/url][embedded link]] -*** Level 3 with emphasis +*** Level 3 with *emphasis* **** Level 4 @@ -19,7 +19,7 @@ markdown test suite. * Level 1 -** Level 2 with emphasis +** Level 2 with *emphasis* *** Level 3 @@ -271,18 +271,18 @@ Loose: Multiple blocks with italics: - apple :: red fruit + *apple* :: red fruit - contains seeds, crisp, pleasant to taste - orange :: orange fruit + contains seeds, crisp, pleasant to taste + *orange* :: orange fruit - - { orange code block } - + + { orange code block } + - - orange block quote - + + orange block quote + Multiple definitions, tight: @@ -331,7 +331,7 @@ Interpreted markdown in a table: -This is emphasized +This is *emphasized* @@ -341,7 +341,7 @@ This is emphasized -And this is strong +And this is **strong** @@ -461,27 +461,25 @@ Hr’s: * Inline Markup -This is emphasized, and so is this. +This is *emphasized*, and so *is this*. -This is strong, and so is this. +This is **strong**, and so **is this**. -An [[/url][emphasized link]]. +An *[[/url][emphasized link]]*. -This is strong and em. +***This is strong and em.*** -So is this word. +So is ***this*** word. -This is strong and em. +***This is strong and em.*** -So is this word. +So is ***this*** word. -This is code: >, $, \, \$, -. +This is code: =>=, =$=, =\=, =\$=, ==. -This is strikeout. +This is *strikeout*. -Superscripts: abcd ahello -ahello there. +Superscripts: abcd a*hello* ahello there. Subscripts: H2O, H23O, Hmany of themO. @@ -500,8 +498,8 @@ spaces: a^b c^d, a~b c~d. ‘He said, “I want to go.”’ Were you alive in the 70’s? -Here is some quoted ‘code’ and a -“[[http://example.com/?foo=1&bar=2][quoted link]]”. +Here is some quoted ‘=code=’ and a “[[http://example.com/?foo=1&bar=2][quoted +link]]”. Some dashes: one—two — three—four — five. @@ -515,22 +513,21 @@ Ellipses…and…and…. - \cite[22-23]{smith.1899} - 2 + 2 = 4 - - x ∈ y - - α ∧ ω + - *x* ∈ *y* + - *α* ∧ *ω* - 223 - - p-Tree + - *p*-Tree - Here’s some display math: $$\frac{d}{dx}f(x)=\lim_{h\to 0}\frac{f(x+h)-f(x)}{h}$$ - - Here’s one that has a line break in it: - α + ω × x2. + - Here’s one that has a line break in it: *α* + *ω* × *x*2. These shouldn’t be math: - To get the famous equation, write $e = mc^2$. - - $22,000 is a lot of money. So is $34,000. (It worked if “lot” is + - $22,000 is a *lot* of money. So is $34,000. (It worked if “lot” is emphasized.) - Shoes ($20) and socks ($5). - - Escaped $: $73 this should be emphasized 23$. + - Escaped =$=: $73 *this should be emphasized* 23$. Here’s a LaTeX table: @@ -669,7 +666,7 @@ An e-mail address: [[mailto:nobody@nowhere.net][nobody@nowhere.net]] Blockquoted: [[http://example.com/]] -Auto-links should not occur here: +Auto-links should not occur here: == or here: @@ -689,7 +686,7 @@ Here is a movie [[movie.jpg][movie]] icon. * Footnotes -Here is a footnote reference,[1] and another.[2] This should not be a +Here is a footnote reference,[1] and another.[2] This should *not* be a footnote reference, because it contains a space.[^my note] Here is an inline note.[3] @@ -716,9 +713,9 @@ This paragraph should not be part of the note, as it is not indented. If you want, you can indent every line, but you can also be lazy and just indent the first line of each block. -[3] This is easier to type. Inline notes may contain - [[http://google.com][links]] and ] verbatim characters, as - well as [bracketed text]. +[3] This is *easier* to type. Inline notes may contain + [[http://google.com][links]] and =]= verbatim characters, as well as + [bracketed text]. [4] In quote. -- cgit v1.2.3