aboutsummaryrefslogtreecommitdiff
path: root/src/Text
diff options
context:
space:
mode:
authorJesse Rosenthal <jrosenthal@jhu.edu>2016-02-26 08:47:26 -0500
committerJesse Rosenthal <jrosenthal@jhu.edu>2016-02-26 08:57:53 -0500
commita7a0b452a5f3ac3e1897c1f56758731f82a12f2f (patch)
tree8571bd4016a02177c4a68d1b2a9d743a96b18522 /src/Text
parent38bd4162fefb35a33c22dd939a594262fbdc8a46 (diff)
downloadpandoc-a7a0b452a5f3ac3e1897c1f56758731f82a12f2f.tar.gz
Docx Reader: Get rid of Modifiable typeclass.
The docx reader used to use a Modifiable typeclass to combine both Blocks and Inlines. But all the work was in the inlines. So most of the generality was wasted, at the expense of making the code harder to understand. This gets rid of the generality, and adds functions for Blocks and Inlines. It should be a bit easier to work with going forward.
Diffstat (limited to 'src/Text')
-rw-r--r--src/Text/Pandoc/Readers/Docx.hs30
-rw-r--r--src/Text/Pandoc/Readers/Docx/Combine.hs154
-rw-r--r--src/Text/Pandoc/Readers/Docx/Reducible.hs183
3 files changed, 169 insertions, 198 deletions
diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index 1b3269136..eb71d8dd8 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -81,7 +81,7 @@ import Text.Pandoc.Builder
import Text.Pandoc.Walk
import Text.Pandoc.Readers.Docx.Parse
import Text.Pandoc.Readers.Docx.Lists
-import Text.Pandoc.Readers.Docx.Reducible
+import Text.Pandoc.Readers.Docx.Combine
import Text.Pandoc.Shared
import Text.Pandoc.MediaBag (insertMedia, MediaBag)
import Data.List (delete, (\\), intersect)
@@ -167,7 +167,7 @@ bodyPartsToMeta' (bp : bps)
| (Paragraph pPr parParts) <- bp
, (c : _)<- intersect (pStyle pPr) (M.keys metaStyles)
, (Just metaField) <- M.lookup c metaStyles = do
- inlines <- concatReduce <$> mapM parPartToInlines parParts
+ inlines <- smushInlines <$> mapM parPartToInlines parParts
remaining <- bodyPartsToMeta' bps
let
f (MetaInlines ils) (MetaInlines ils') = MetaBlocks [Para ils, Para ils']
@@ -291,13 +291,13 @@ runToInlines (Run rs runElems)
Just SubScrpt -> subscript codeString
_ -> codeString
| otherwise = do
- let ils = concatReduce (map runElemToInlines runElems)
+ let ils = smushInlines (map runElemToInlines runElems)
return $ (runStyleToTransform $ resolveDependentRunStyle rs) ils
runToInlines (Footnote bps) = do
- blksList <- concatReduce <$> (mapM bodyPartToBlocks bps)
+ blksList <- smushBlocks <$> (mapM bodyPartToBlocks bps)
return $ note blksList
runToInlines (Endnote bps) = do
- blksList <- concatReduce <$> (mapM bodyPartToBlocks bps)
+ blksList <- smushBlocks <$> (mapM bodyPartToBlocks bps)
return $ note blksList
runToInlines (InlineDrawing fp bs ext) = do
mediaBag <- gets docxMediaBag
@@ -316,19 +316,19 @@ parPartToInlines (PlainRun r) = runToInlines r
parPartToInlines (Insertion _ author date runs) = do
opts <- asks docxOptions
case readerTrackChanges opts of
- AcceptChanges -> concatReduce <$> mapM runToInlines runs
+ AcceptChanges -> smushInlines <$> mapM runToInlines runs
RejectChanges -> return mempty
AllChanges -> do
- ils <- concatReduce <$> mapM runToInlines runs
+ ils <- smushInlines <$> mapM runToInlines runs
let attr = ("", ["insertion"], [("author", author), ("date", date)])
return $ spanWith attr ils
parPartToInlines (Deletion _ author date runs) = do
opts <- asks docxOptions
case readerTrackChanges opts of
AcceptChanges -> return mempty
- RejectChanges -> concatReduce <$> mapM runToInlines runs
+ RejectChanges -> smushInlines <$> mapM runToInlines runs
AllChanges -> do
- ils <- concatReduce <$> mapM runToInlines runs
+ ils <- smushInlines <$> mapM runToInlines runs
let attr = ("", ["deletion"], [("author", author), ("date", date)])
return $ spanWith attr ils
parPartToInlines (BookMark _ anchor) | anchor `elem` dummyAnchors =
@@ -361,10 +361,10 @@ parPartToInlines (Drawing fp bs ext) = do
modify $ \s -> s { docxMediaBag = insertMedia fp Nothing bs mediaBag }
return $ imageWith (extentToAttr ext) fp "" ""
parPartToInlines (InternalHyperLink anchor runs) = do
- ils <- concatReduce <$> mapM runToInlines runs
+ ils <- smushInlines <$> mapM runToInlines runs
return $ link ('#' : anchor) "" ils
parPartToInlines (ExternalHyperLink target runs) = do
- ils <- concatReduce <$> mapM runToInlines runs
+ ils <- smushInlines <$> mapM runToInlines runs
return $ link target "" ils
parPartToInlines (PlainOMath exps) = do
return $ math $ writeTeX exps
@@ -417,7 +417,7 @@ singleParaToPlain blks = blks
cellToBlocks :: Cell -> DocxContext Blocks
cellToBlocks (Cell bps) = do
- blks <- concatReduce <$> mapM bodyPartToBlocks bps
+ blks <- smushBlocks <$> mapM bodyPartToBlocks bps
return $ fromList $ blocksToDefinitions $ blocksToBullets $ toList blks
rowToBlocksList :: Row -> DocxContext [Blocks]
@@ -479,11 +479,11 @@ bodyPartToBlocks (Paragraph pPr parparts)
$ concatMap parPartToString parparts
| Just (style, n) <- pHeading pPr = do
ils <- local (\s-> s{docxInHeaderBlock=True}) $
- (concatReduce <$> mapM parPartToInlines parparts)
+ (smushInlines <$> mapM parPartToInlines parparts)
makeHeaderAnchor $
headerWith ("", delete style (pStyle pPr), []) n ils
| otherwise = do
- ils <- concatReduce <$> mapM parPartToInlines parparts >>=
+ ils <- smushInlines <$> mapM parPartToInlines parparts >>=
(return . fromList . trimLineBreaks . normalizeSpaces . toList)
dropIls <- gets docxDropCap
let ils' = dropIls <> ils
@@ -561,7 +561,7 @@ bodyToOutput :: Body -> DocxContext (Meta, [Block], MediaBag)
bodyToOutput (Body bps) = do
let (metabps, blkbps) = sepBodyParts bps
meta <- bodyPartsToMeta metabps
- blks <- concatReduce <$> mapM bodyPartToBlocks blkbps
+ blks <- smushBlocks <$> mapM bodyPartToBlocks blkbps
blks' <- rewriteLinks $ blocksToDefinitions $ blocksToBullets $ toList blks
mediaBag <- gets docxMediaBag
return $ (meta,
diff --git a/src/Text/Pandoc/Readers/Docx/Combine.hs b/src/Text/Pandoc/Readers/Docx/Combine.hs
new file mode 100644
index 000000000..39e0df825
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Docx/Combine.hs
@@ -0,0 +1,154 @@
+{-# LANGUAGE TypeSynonymInstances, FlexibleInstances,
+ PatternGuards #-}
+
+module Text.Pandoc.Readers.Docx.Combine ( smushInlines
+ , smushBlocks
+ )
+ where
+
+import Text.Pandoc.Builder
+import Data.List
+import Data.Sequence (ViewR(..), ViewL(..), viewl, viewr, (><), (|>))
+import qualified Data.Sequence as Seq (null)
+
+data Modifier a = Modifier (a -> a)
+ | AttrModifier (Attr -> a -> a) Attr
+ | NullModifier
+
+spaceOutInlinesL :: Inlines -> (Inlines, Inlines)
+spaceOutInlinesL ms = (l, stackInlines fs (m' <> r))
+ where (l, m, r) = spaceOutInlines ms
+ (fs, m') = unstackInlines m
+
+spaceOutInlinesR :: Inlines -> (Inlines, Inlines)
+spaceOutInlinesR ms = (stackInlines fs (l <> m'), r)
+ where (l, m, r) = spaceOutInlines ms
+ (fs, m') = unstackInlines m
+
+spaceOutInlines :: Inlines -> (Inlines, Inlines, Inlines)
+spaceOutInlines ils =
+ let (fs, ils') = unstackInlines ils
+ contents = unMany ils'
+ left = case viewl contents of
+ (Space :< _) -> space
+ _ -> mempty
+ right = case viewr contents of
+ (_ :> Space) -> space
+ _ -> mempty in
+ (left, (stackInlines fs $ trimInlines . Many $ contents), right)
+
+stackInlines :: [Modifier Inlines] -> Inlines -> Inlines
+stackInlines [] ms = ms
+stackInlines (NullModifier : fs) ms = stackInlines fs ms
+stackInlines ((Modifier f) : fs) ms =
+ if isEmpty ms
+ then stackInlines fs ms
+ else f $ stackInlines fs ms
+stackInlines ((AttrModifier f attr) : fs) ms = f attr $ stackInlines fs ms
+
+unstackInlines :: Inlines -> ([Modifier Inlines], Inlines)
+unstackInlines ms = case ilModifier ms of
+ NullModifier -> ([], ms)
+ _ -> (f : fs, ms') where
+ f = ilModifier ms
+ (fs, ms') = unstackInlines $ ilInnards ms
+
+ilModifier :: Inlines -> Modifier Inlines
+ilModifier ils = case viewl (unMany ils) of
+ (x :< xs) | Seq.null xs -> case x of
+ (Emph _) -> Modifier emph
+ (Strong _) -> Modifier strong
+ (SmallCaps _) -> Modifier smallcaps
+ (Strikeout _) -> Modifier strikeout
+ (Superscript _) -> Modifier superscript
+ (Subscript _) -> Modifier subscript
+ (Link attr _ tgt) -> Modifier $ linkWith attr (fst tgt) (snd tgt)
+ (Span attr _) -> AttrModifier spanWith attr
+ _ -> NullModifier
+ _ -> NullModifier
+
+ilInnards :: Inlines -> Inlines
+ilInnards ils = case viewl (unMany ils) of
+ (x :< xs) | Seq.null xs -> case x of
+ (Emph lst) -> fromList lst
+ (Strong lst) -> fromList lst
+ (SmallCaps lst) -> fromList lst
+ (Strikeout lst) -> fromList lst
+ (Superscript lst) -> fromList lst
+ (Subscript lst) -> fromList lst
+ (Link _ lst _) -> fromList lst
+ (Span _ lst) -> fromList lst
+ _ -> ils
+ _ -> ils
+
+inlinesL :: Inlines -> (Inlines, Inlines)
+inlinesL ils = case viewl $ unMany ils of
+ (s :< sq) -> (singleton s, Many sq)
+ _ -> (mempty, ils)
+
+inlinesR :: Inlines -> (Inlines, Inlines)
+inlinesR ils = case viewr $ unMany ils of
+ (sq :> s) -> (Many sq, singleton s)
+ _ -> (ils, mempty)
+
+combineInlines :: Inlines -> Inlines -> Inlines
+combineInlines x y =
+ let (xs', x') = inlinesR x
+ (y', ys') = inlinesL y
+ in
+ xs' <> (combineSingletonInlines x' y') <> ys'
+
+combineSingletonInlines :: Inlines -> Inlines -> Inlines
+combineSingletonInlines x y =
+ let (xfs, xs) = unstackInlines x
+ (yfs, ys) = unstackInlines y
+ shared = xfs `intersect` yfs
+ x_remaining = xfs \\ shared
+ y_remaining = yfs \\ shared
+ x_rem_attr = filter isAttrModifier x_remaining
+ y_rem_attr = filter isAttrModifier y_remaining
+ in
+ case null shared of
+ True | isEmpty xs && isEmpty ys ->
+ stackInlines (x_rem_attr ++ y_rem_attr) mempty
+ | isEmpty xs ->
+ let (sp, y') = spaceOutInlinesL y in
+ (stackInlines x_rem_attr mempty) <> sp <> y'
+ | isEmpty ys ->
+ let (x', sp) = spaceOutInlinesR x in
+ x' <> sp <> (stackInlines y_rem_attr mempty)
+ | otherwise ->
+ let (x', xsp) = spaceOutInlinesR x
+ (ysp, y') = spaceOutInlinesL y
+ in
+ x' <> xsp <> ysp <> y'
+ False -> stackInlines shared $
+ combineInlines
+ (stackInlines x_remaining xs)
+ (stackInlines y_remaining ys)
+
+combineBlocks :: Blocks -> Blocks -> Blocks
+combineBlocks bs cs
+ | bs' :> (BlockQuote bs'') <- viewr (unMany bs)
+ , (BlockQuote cs'') :< cs' <- viewl (unMany cs) =
+ Many $ (bs' |> (BlockQuote (bs'' <> cs''))) >< cs'
+combineBlocks bs cs = bs <> cs
+
+instance (Monoid a, Eq a) => Eq (Modifier a) where
+ (Modifier f) == (Modifier g) = (f mempty == g mempty)
+ (AttrModifier f attr) == (AttrModifier g attr') = (f attr mempty == g attr' mempty)
+ (NullModifier) == (NullModifier) = True
+ _ == _ = False
+
+isEmpty :: (Monoid a, Eq a) => a -> Bool
+isEmpty x = x == mempty
+
+isAttrModifier :: Modifier a -> Bool
+isAttrModifier (AttrModifier _ _) = True
+isAttrModifier _ = False
+
+smushInlines :: [Inlines] -> Inlines
+smushInlines xs = foldl combineInlines mempty xs
+
+smushBlocks :: [Blocks] -> Blocks
+smushBlocks xs = foldl combineBlocks mempty xs
diff --git a/src/Text/Pandoc/Readers/Docx/Reducible.hs b/src/Text/Pandoc/Readers/Docx/Reducible.hs
deleted file mode 100644
index e6de2d474..000000000
--- a/src/Text/Pandoc/Readers/Docx/Reducible.hs
+++ /dev/null
@@ -1,183 +0,0 @@
-{-# LANGUAGE TypeSynonymInstances, FlexibleInstances,
- PatternGuards #-}
-
-module Text.Pandoc.Readers.Docx.Reducible ( concatReduce
- , (<+>)
- )
- where
-
-
-import Text.Pandoc.Builder
-import Data.List
-import Data.Sequence (ViewR(..), ViewL(..), viewl, viewr)
-import qualified Data.Sequence as Seq (null)
-
-data Modifier a = Modifier (a -> a)
- | AttrModifier (Attr -> a -> a) Attr
- | NullModifier
-
-class (Eq a) => Modifiable a where
- modifier :: a -> Modifier a
- innards :: a -> a
- getL :: a -> (a, a)
- getR :: a -> (a, a)
- spaceOut :: a -> (a, a, a)
-
-spaceOutL :: (Monoid a, Modifiable a) => a -> (a, a)
-spaceOutL ms = (l, stack fs (m' <> r))
- where (l, m, r) = spaceOut ms
- (fs, m') = unstack m
-
-spaceOutR :: (Monoid a, Modifiable a) => a -> (a, a)
-spaceOutR ms = (stack fs (l <> m'), r)
- where (l, m, r) = spaceOut ms
- (fs, m') = unstack m
-
-instance (Monoid a, Show a) => Show (Modifier a) where
- show (Modifier f) = show $ f mempty
- show (AttrModifier f attr) = show $ f attr mempty
- show (NullModifier) = "NullModifier"
-
-instance (Monoid a, Eq a) => Eq (Modifier a) where
- (Modifier f) == (Modifier g) = (f mempty == g mempty)
- (AttrModifier f attr) == (AttrModifier g attr') = (f attr mempty == g attr' mempty)
- (NullModifier) == (NullModifier) = True
- _ == _ = False
-
-instance Modifiable Inlines where
- modifier ils = case viewl (unMany ils) of
- (x :< xs) | Seq.null xs -> case x of
- (Emph _) -> Modifier emph
- (Strong _) -> Modifier strong
- (SmallCaps _) -> Modifier smallcaps
- (Strikeout _) -> Modifier strikeout
- (Superscript _) -> Modifier superscript
- (Subscript _) -> Modifier subscript
- (Link attr _ tgt) -> Modifier $ linkWith attr (fst tgt) (snd tgt)
- (Span attr _) -> AttrModifier spanWith attr
- _ -> NullModifier
- _ -> NullModifier
-
- innards ils = case viewl (unMany ils) of
- (x :< xs) | Seq.null xs -> case x of
- (Emph lst) -> fromList lst
- (Strong lst) -> fromList lst
- (SmallCaps lst) -> fromList lst
- (Strikeout lst) -> fromList lst
- (Superscript lst) -> fromList lst
- (Subscript lst) -> fromList lst
- (Link _ lst _) -> fromList lst
- (Span _ lst) -> fromList lst
- _ -> ils
- _ -> ils
-
- getL ils = case viewl $ unMany ils of
- (s :< sq) -> (singleton s, Many sq)
- _ -> (mempty, ils)
-
- getR ils = case viewr $ unMany ils of
- (sq :> s) -> (Many sq, singleton s)
- _ -> (ils, mempty)
-
- spaceOut ils =
- let (fs, ils') = unstack ils
- contents = unMany ils'
- left = case viewl contents of
- (Space :< _) -> space
- _ -> mempty
- right = case viewr contents of
- (_ :> Space) -> space
- _ -> mempty in
- (left, (stack fs $ trimInlines .Many $ contents), right)
-
-instance Modifiable Blocks where
- modifier blks = case viewl (unMany blks) of
- (x :< xs) | Seq.null xs -> case x of
- (BlockQuote _) -> Modifier blockQuote
- -- (Div attr _) -> AttrModifier divWith attr
- _ -> NullModifier
- _ -> NullModifier
-
- innards blks = case viewl (unMany blks) of
- (x :< xs) | Seq.null xs -> case x of
- (BlockQuote lst) -> fromList lst
- -- (Div attr lst) -> fromList lst
- _ -> blks
- _ -> blks
-
- spaceOut blks = (mempty, blks, mempty)
-
- getL ils = case viewl $ unMany ils of
- (s :< sq) -> (singleton s, Many sq)
- _ -> (mempty, ils)
-
- getR ils = case viewr $ unMany ils of
- (sq :> s) -> (Many sq, singleton s)
- _ -> (ils, mempty)
-
-
-unstack :: (Modifiable a) => a -> ([Modifier a], a)
-unstack ms = case modifier ms of
- NullModifier -> ([], ms)
- _ -> (f : fs, ms') where
- f = modifier ms
- (fs, ms') = unstack $ innards ms
-
-stack :: (Monoid a, Modifiable a) => [Modifier a] -> a -> a
-stack [] ms = ms
-stack (NullModifier : fs) ms = stack fs ms
-stack ((Modifier f) : fs) ms =
- if isEmpty ms
- then stack fs ms
- else f $ stack fs ms
-stack ((AttrModifier f attr) : fs) ms = f attr $ stack fs ms
-
-isEmpty :: (Monoid a, Eq a) => a -> Bool
-isEmpty x = x == mempty
-
-
-combine :: (Monoid a, Modifiable a, Eq a) => a -> a -> a
-combine x y =
- let (xs', x') = getR x
- (y', ys') = getL y
- in
- xs' <> (combineSingleton x' y') <> ys'
-
-isAttrModifier :: Modifier a -> Bool
-isAttrModifier (AttrModifier _ _) = True
-isAttrModifier _ = False
-
-combineSingleton :: (Monoid a, Modifiable a, Eq a) => a -> a -> a
-combineSingleton x y =
- let (xfs, xs) = unstack x
- (yfs, ys) = unstack y
- shared = xfs `intersect` yfs
- x_remaining = xfs \\ shared
- y_remaining = yfs \\ shared
- x_rem_attr = filter isAttrModifier x_remaining
- y_rem_attr = filter isAttrModifier y_remaining
- in
- case null shared of
- True | isEmpty xs && isEmpty ys ->
- stack (x_rem_attr ++ y_rem_attr) mempty
- | isEmpty xs ->
- let (sp, y') = spaceOutL y in
- (stack x_rem_attr mempty) <> sp <> y'
- | isEmpty ys ->
- let (x', sp) = spaceOutR x in
- x' <> sp <> (stack y_rem_attr mempty)
- | otherwise ->
- let (x', xsp) = spaceOutR x
- (ysp, y') = spaceOutL y
- in
- x' <> xsp <> ysp <> y'
- False -> stack shared $
- combine
- (stack x_remaining xs)
- (stack y_remaining ys)
-
-(<+>) :: (Monoid a, Modifiable a, Eq a) => a -> a -> a
-x <+> y = combine x y
-
-concatReduce :: (Monoid a, Modifiable a) => [a] -> a
-concatReduce xs = foldl combine mempty xs