aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFrancesco Occhipinti <focchi.pinti@gmail.com>2018-04-26 21:17:51 +0200
committerJohn MacFarlane <jgm@berkeley.edu>2018-04-26 12:17:51 -0700
commiteef1c211f58f0a2ffc6c500bd2158569b83fca1f (patch)
tree63612cad375a97c2ce84d4ed4174d85f74757639
parentcfa4eee28bc3d6521f806bc37c937e9615d15588 (diff)
downloadpandoc-eef1c211f58f0a2ffc6c500bd2158569b83fca1f.tar.gz
RST reader: flatten nested inlines, closes #4368 (#4554)
nested inlines are not valid RST syntax, so we flatten them following some readability criteria discussed in #4368.
-rw-r--r--src/Text/Pandoc/Writers/RST.hs78
-rw-r--r--test/Tests/Writers/RST.hs24
-rw-r--r--test/writer.rst14
3 files changed, 106 insertions, 10 deletions
diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs
index cc7131d0a..084615357 100644
--- a/src/Text/Pandoc/Writers/RST.hs
+++ b/src/Text/Pandoc/Writers/RST.hs
@@ -31,7 +31,7 @@ Conversion of 'Pandoc' documents to reStructuredText.
reStructuredText: <http://docutils.sourceforge.net/rst.html>
-}
-module Text.Pandoc.Writers.RST ( writeRST ) where
+module Text.Pandoc.Writers.RST ( writeRST, flatten ) where
import Prelude
import Control.Monad.State.Strict
import Data.Char (isSpace, toLower)
@@ -377,8 +377,10 @@ blockListToRST :: PandocMonad m
blockListToRST = blockListToRST' False
transformInlines :: [Inline] -> [Inline]
-transformInlines = stripLeadingTrailingSpace . insertBS
- . filter hasContents . removeSpaceAfterDisplayMath
+transformInlines = insertBS .
+ filter hasContents .
+ removeSpaceAfterDisplayMath .
+ concatMap (transformNested . flatten)
where -- empty inlines are not valid RST syntax
hasContents :: Inline -> Bool
hasContents (Str "") = False
@@ -412,6 +414,8 @@ transformInlines = stripLeadingTrailingSpace . insertBS
x : insertBS (y : zs)
insertBS (x:ys) = x : insertBS ys
insertBS [] = []
+ transformNested :: [Inline] -> [Inline]
+ transformNested = map (mapNested stripLeadingTrailingSpace)
surroundComplex :: Inline -> Inline -> Bool
surroundComplex (Str s@(_:_)) (Str s'@(_:_)) =
case (last s, head s') of
@@ -449,6 +453,74 @@ transformInlines = stripLeadingTrailingSpace . insertBS
isComplex (Span _ (x:_)) = isComplex x
isComplex _ = False
+-- | Flattens nested inlines. Extracts nested inlines and goes through
+-- them either collapsing them in the outer inline container or
+-- pulling them out of it
+flatten :: Inline -> [Inline]
+flatten outer = combineAll $ dropInlineParent outer
+ where combineAll = foldl combine []
+
+ combine :: [Inline] -> Inline -> [Inline]
+ combine f i =
+ case (outer, i) of
+ -- quotes are not rendered using RST inlines, so we can keep
+ -- them and they will be readable and parsable
+ (Quoted _ _, _) -> keep f i
+ (_, Quoted _ _) -> keep f i
+ -- parent inlines would prevent links from being correctly
+ -- parsed, in this case we prioritise the content over the
+ -- style
+ (_, Link _ _ _) -> emerge f i
+ -- always give priority to strong text over emphasis
+ (Emph _, Strong _) -> emerge f i
+ -- drop all other nested styles
+ (_, _) -> collapse f i
+
+ emerge f i = f <> [i]
+ keep f i = appendToLast f [i]
+ collapse f i = appendToLast f $ dropInlineParent i
+
+ appendToLast :: [Inline] -> [Inline] -> [Inline]
+ appendToLast [] toAppend = [setInlineChildren outer toAppend]
+ appendToLast flattened toAppend
+ | isOuter lastFlat = init flattened <> [appendTo lastFlat toAppend]
+ | otherwise = flattened <> [setInlineChildren outer toAppend]
+ where lastFlat = last flattened
+ appendTo o i = mapNested (<> i) o
+ isOuter i = emptyParent i == emptyParent outer
+ emptyParent i = setInlineChildren i []
+
+mapNested :: ([Inline] -> [Inline]) -> Inline -> Inline
+mapNested f i = setInlineChildren i (f (dropInlineParent i))
+
+dropInlineParent :: Inline -> [Inline]
+dropInlineParent (Link _ i _) = i
+dropInlineParent (Emph i) = i
+dropInlineParent (Strong i) = i
+dropInlineParent (Strikeout i) = i
+dropInlineParent (Superscript i) = i
+dropInlineParent (Subscript i) = i
+dropInlineParent (SmallCaps i) = i
+dropInlineParent (Cite _ i) = i
+dropInlineParent (Image _ i _) = i
+dropInlineParent (Span _ i) = i
+dropInlineParent (Quoted _ i) = i
+dropInlineParent i = [i] -- not a parent, like Str or Space
+
+setInlineChildren :: Inline -> [Inline] -> Inline
+setInlineChildren (Link a _ t) i = Link a i t
+setInlineChildren (Emph _) i = Emph i
+setInlineChildren (Strong _) i = Strong i
+setInlineChildren (Strikeout _) i = Strikeout i
+setInlineChildren (Superscript _) i = Superscript i
+setInlineChildren (Subscript _) i = Subscript i
+setInlineChildren (SmallCaps _) i = SmallCaps i
+setInlineChildren (Quoted q _) i = Quoted q i
+setInlineChildren (Cite c _) i = Cite c i
+setInlineChildren (Image a _ t) i = Image a i t
+setInlineChildren (Span a _) i = Span a i
+setInlineChildren leaf _ = leaf
+
inlineListToRST :: PandocMonad m => [Inline] -> RST m Doc
inlineListToRST = writeInlines . walk transformInlines
diff --git a/test/Tests/Writers/RST.hs b/test/Tests/Writers/RST.hs
index 29c9328f6..89ad1de48 100644
--- a/test/Tests/Writers/RST.hs
+++ b/test/Tests/Writers/RST.hs
@@ -4,10 +4,12 @@ module Tests.Writers.RST (tests) where
import Prelude
import Test.Tasty
+import Test.Tasty.HUnit
import Tests.Helpers
import Text.Pandoc
import Text.Pandoc.Arbitrary ()
import Text.Pandoc.Builder
+import Text.Pandoc.Writers.RST
infix 4 =:
(=:) :: (ToString a, ToPandoc a)
@@ -52,6 +54,17 @@ tests = [ testGroup "rubrics"
, ""
, " quoted"]
]
+ , testGroup "flatten"
+ [ testCase "emerges nested styles as expected" $
+ flatten (Emph [Str "1", Strong [Str "2"], Str "3"]) @?=
+ [Emph [Str "1"], Strong [Str "2"], Emph [Str "3"]]
+ , testCase "could introduce trailing spaces" $
+ flatten (Emph [Str "f", Space, Strong [Str "2"]]) @?=
+ [Emph [Str "f", Space], Strong [Str "2"]]
+ -- the test above is the reason why we call
+ -- stripLeadingTrailingSpace through transformNested after
+ -- flatten
+ ]
, testGroup "inlines"
[ "are removed when empty" =: -- #4434
plain (strong (str "")) =?> ""
@@ -64,6 +77,17 @@ tests = [ testGroup "rubrics"
strong (space <> str "text" <> space <> space) =?> "**text**"
, "single space stripped" =:
strong space =?> ""
+ , "give priority to strong style over emphasis" =:
+ strong (emph (strong (str "s"))) =?> "**s**"
+ , "links are not elided by outer style" =:
+ strong (emph (link "loc" "" (str "text"))) =?>
+ "`text <loc>`__"
+ , "RST inlines cannot start nor end with spaces" =:
+ emph (str "f" <> space <> strong (str "d") <> space <> str "l") =?>
+ "*f*\\ **d**\\ *l*"
+ , "keeps quotes" =:
+ strong (str "f" <> doubleQuoted (str "d") <> str "l") =?>
+ "**f“d”l**"
]
, testGroup "headings"
[ "normal heading" =:
diff --git a/test/writer.rst b/test/writer.rst
index 3353d11d3..0c986b887 100644
--- a/test/writer.rst
+++ b/test/writer.rst
@@ -615,21 +615,21 @@ This is *emphasized*, and so *is this*.
This is **strong**, and so **is this**.
-An *`emphasized link </url>`__*.
+An `emphasized link </url>`__.
-***This is strong and em.***
+**This is strong and em.**
-So is ***this*** word.
+So is **this** word.
-***This is strong and em.***
+**This is strong and em.**
-So is ***this*** word.
+So is **this** word.
This is code: ``>``, ``$``, ``\``, ``\$``, ``<html>``.
-[STRIKEOUT:This is *strikeout*.]
+[STRIKEOUT:This is strikeout.]
-Superscripts: a\ :sup:`bc`\ d a\ :sup:`*hello*` a\ :sup:`hello there`.
+Superscripts: a\ :sup:`bc`\ d a\ :sup:`hello` a\ :sup:`hello there`.
Subscripts: H\ :sub:`2`\ O, H\ :sub:`23`\ O, H\ :sub:`many of them`\ O.