diff options
| author | Francesco Occhipinti <focchi.pinti@gmail.com> | 2018-04-26 21:17:51 +0200 | 
|---|---|---|
| committer | John MacFarlane <jgm@berkeley.edu> | 2018-04-26 12:17:51 -0700 | 
| commit | eef1c211f58f0a2ffc6c500bd2158569b83fca1f (patch) | |
| tree | 63612cad375a97c2ce84d4ed4174d85f74757639 | |
| parent | cfa4eee28bc3d6521f806bc37c937e9615d15588 (diff) | |
| download | pandoc-eef1c211f58f0a2ffc6c500bd2158569b83fca1f.tar.gz | |
RST reader: flatten nested inlines, closes #4368 (#4554)
nested inlines are not valid RST syntax, so we flatten them following
some readability criteria discussed in #4368.
| -rw-r--r-- | src/Text/Pandoc/Writers/RST.hs | 78 | ||||
| -rw-r--r-- | test/Tests/Writers/RST.hs | 24 | ||||
| -rw-r--r-- | test/writer.rst | 14 | 
3 files changed, 106 insertions, 10 deletions
| diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs index cc7131d0a..084615357 100644 --- a/src/Text/Pandoc/Writers/RST.hs +++ b/src/Text/Pandoc/Writers/RST.hs @@ -31,7 +31,7 @@ Conversion of 'Pandoc' documents to reStructuredText.  reStructuredText:  <http://docutils.sourceforge.net/rst.html>  -} -module Text.Pandoc.Writers.RST ( writeRST ) where +module Text.Pandoc.Writers.RST ( writeRST, flatten ) where  import Prelude  import Control.Monad.State.Strict  import Data.Char (isSpace, toLower) @@ -377,8 +377,10 @@ blockListToRST :: PandocMonad m  blockListToRST = blockListToRST' False  transformInlines :: [Inline] -> [Inline] -transformInlines =  stripLeadingTrailingSpace . insertBS -                    . filter hasContents . removeSpaceAfterDisplayMath +transformInlines =  insertBS . +                    filter hasContents . +                    removeSpaceAfterDisplayMath . +                    concatMap (transformNested . flatten)    where -- empty inlines are not valid RST syntax          hasContents :: Inline -> Bool          hasContents (Str "")              = False @@ -412,6 +414,8 @@ transformInlines =  stripLeadingTrailingSpace . insertBS                x : insertBS (y : zs)          insertBS (x:ys) = x : insertBS ys          insertBS [] = [] +        transformNested :: [Inline] -> [Inline] +        transformNested = map (mapNested stripLeadingTrailingSpace)          surroundComplex :: Inline -> Inline -> Bool          surroundComplex (Str s@(_:_)) (Str s'@(_:_)) =            case (last s, head s') of @@ -449,6 +453,74 @@ transformInlines =  stripLeadingTrailingSpace . insertBS          isComplex (Span _ (x:_))  = isComplex x          isComplex _               = False +-- | Flattens nested inlines. Extracts nested inlines and goes through +-- them either collapsing them in the outer inline container or +-- pulling them out of it +flatten :: Inline -> [Inline] +flatten outer = combineAll $ dropInlineParent outer +  where combineAll = foldl combine [] + +        combine :: [Inline] -> Inline -> [Inline] +        combine f i =  +          case (outer, i) of +          -- quotes are not rendered using RST inlines, so we can keep +          -- them and they will be readable and parsable +          (Quoted _ _, _)          -> keep f i +          (_, Quoted _ _)          -> keep f i +          -- parent inlines would prevent links from being correctly +          -- parsed, in this case we prioritise the content over the +          -- style +          (_, Link _ _ _)          -> emerge f i +          -- always give priority to strong text over emphasis +          (Emph _, Strong _)       -> emerge f i +          -- drop all other nested styles +          (_, _)                   -> collapse f i + +        emerge f i = f <> [i] +        keep f i = appendToLast f [i] +        collapse f i = appendToLast f $ dropInlineParent i + +        appendToLast :: [Inline] -> [Inline] -> [Inline] +        appendToLast [] toAppend = [setInlineChildren outer toAppend] +        appendToLast flattened toAppend +          | isOuter lastFlat = init flattened <> [appendTo lastFlat toAppend] +          | otherwise =  flattened <> [setInlineChildren outer toAppend] +          where lastFlat = last flattened +                appendTo o i = mapNested (<> i) o +                isOuter i = emptyParent i == emptyParent outer +                emptyParent i = setInlineChildren i [] + +mapNested :: ([Inline] -> [Inline]) -> Inline -> Inline +mapNested f i = setInlineChildren i (f (dropInlineParent i)) + +dropInlineParent :: Inline -> [Inline] +dropInlineParent (Link _ i _)    = i +dropInlineParent (Emph i)        = i +dropInlineParent (Strong i)      = i +dropInlineParent (Strikeout i)   = i +dropInlineParent (Superscript i) = i +dropInlineParent (Subscript i)   = i +dropInlineParent (SmallCaps i)   = i +dropInlineParent (Cite _ i)      = i +dropInlineParent (Image _ i _)   = i +dropInlineParent (Span _ i)      = i +dropInlineParent (Quoted _ i)    = i +dropInlineParent i               = [i] -- not a parent, like Str or Space + +setInlineChildren :: Inline -> [Inline] -> Inline +setInlineChildren (Link a _ t) i    = Link a i t +setInlineChildren (Emph _) i        = Emph i +setInlineChildren (Strong _) i      = Strong i +setInlineChildren (Strikeout _) i   = Strikeout i +setInlineChildren (Superscript _) i = Superscript i +setInlineChildren (Subscript _) i   = Subscript i +setInlineChildren (SmallCaps _) i   = SmallCaps i +setInlineChildren (Quoted q _) i    = Quoted q i +setInlineChildren (Cite c _) i      = Cite c i +setInlineChildren (Image a _ t) i   = Image a i t +setInlineChildren (Span a _) i      = Span a i +setInlineChildren leaf _            = leaf +  inlineListToRST :: PandocMonad m => [Inline] -> RST m Doc  inlineListToRST = writeInlines . walk transformInlines diff --git a/test/Tests/Writers/RST.hs b/test/Tests/Writers/RST.hs index 29c9328f6..89ad1de48 100644 --- a/test/Tests/Writers/RST.hs +++ b/test/Tests/Writers/RST.hs @@ -4,10 +4,12 @@ module Tests.Writers.RST (tests) where  import Prelude  import Test.Tasty +import Test.Tasty.HUnit  import Tests.Helpers  import Text.Pandoc  import Text.Pandoc.Arbitrary ()  import Text.Pandoc.Builder +import Text.Pandoc.Writers.RST  infix 4 =:  (=:) :: (ToString a, ToPandoc a) @@ -52,6 +54,17 @@ tests = [ testGroup "rubrics"                , ""                , "   quoted"]            ] +        , testGroup "flatten" +          [ testCase "emerges nested styles as expected" $ +            flatten (Emph [Str "1", Strong [Str "2"], Str "3"]) @?= +            [Emph [Str "1"], Strong [Str "2"], Emph [Str "3"]] +          , testCase "could introduce trailing spaces" $ +            flatten (Emph [Str "f", Space, Strong [Str "2"]]) @?= +            [Emph [Str "f", Space], Strong [Str "2"]] +            -- the test above is the reason why we call +            -- stripLeadingTrailingSpace through transformNested after +            -- flatten +          ]          , testGroup "inlines"            [ "are removed when empty" =: -- #4434              plain (strong (str "")) =?> "" @@ -64,6 +77,17 @@ tests = [ testGroup "rubrics"              strong (space <> str "text" <> space <> space) =?> "**text**"            , "single space stripped" =:              strong space =?> "" +          , "give priority to strong style over emphasis" =: +            strong (emph (strong (str "s"))) =?> "**s**" +          , "links are not elided by outer style" =: +            strong (emph (link "loc" "" (str "text"))) =?> +            "`text <loc>`__" +          , "RST inlines cannot start nor end with spaces" =: +            emph (str "f" <> space <> strong (str "d") <> space <> str "l") =?> +            "*f*\\ **d**\\ *l*" +          , "keeps quotes" =: +            strong (str "f" <> doubleQuoted (str "d") <> str "l") =?> +            "**f“d”l**"            ]          , testGroup "headings"            [ "normal heading" =: diff --git a/test/writer.rst b/test/writer.rst index 3353d11d3..0c986b887 100644 --- a/test/writer.rst +++ b/test/writer.rst @@ -615,21 +615,21 @@ This is *emphasized*, and so *is this*.  This is **strong**, and so **is this**. -An *`emphasized link </url>`__*. +An `emphasized link </url>`__. -***This is strong and em.*** +**This is strong and em.** -So is ***this*** word. +So is **this** word. -***This is strong and em.*** +**This is strong and em.** -So is ***this*** word. +So is **this** word.  This is code: ``>``, ``$``, ``\``, ``\$``, ``<html>``. -[STRIKEOUT:This is *strikeout*.] +[STRIKEOUT:This is strikeout.] -Superscripts: a\ :sup:`bc`\ d a\ :sup:`*hello*` a\ :sup:`hello there`. +Superscripts: a\ :sup:`bc`\ d a\ :sup:`hello` a\ :sup:`hello there`.  Subscripts: H\ :sub:`2`\ O, H\ :sub:`23`\ O, H\ :sub:`many of them`\ O. | 
