diff options
-rw-r--r-- | pandoc.cabal | 2 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Docx.hs | 30 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Docx/Combine.hs | 154 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Docx/Reducible.hs | 183 |
4 files changed, 170 insertions, 199 deletions
diff --git a/pandoc.cabal b/pandoc.cabal index 68de0afa8..b76d5d6f4 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -372,7 +372,7 @@ Library Text.Pandoc.Process, Text.Pandoc.CSS Other-Modules: Text.Pandoc.Readers.Docx.Lists, - Text.Pandoc.Readers.Docx.Reducible, + Text.Pandoc.Readers.Docx.Combine, Text.Pandoc.Readers.Docx.Parse, Text.Pandoc.Readers.Docx.Fonts, Text.Pandoc.Readers.Docx.Util, diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 1b3269136..eb71d8dd8 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -81,7 +81,7 @@ import Text.Pandoc.Builder import Text.Pandoc.Walk import Text.Pandoc.Readers.Docx.Parse import Text.Pandoc.Readers.Docx.Lists -import Text.Pandoc.Readers.Docx.Reducible +import Text.Pandoc.Readers.Docx.Combine import Text.Pandoc.Shared import Text.Pandoc.MediaBag (insertMedia, MediaBag) import Data.List (delete, (\\), intersect) @@ -167,7 +167,7 @@ bodyPartsToMeta' (bp : bps) | (Paragraph pPr parParts) <- bp , (c : _)<- intersect (pStyle pPr) (M.keys metaStyles) , (Just metaField) <- M.lookup c metaStyles = do - inlines <- concatReduce <$> mapM parPartToInlines parParts + inlines <- smushInlines <$> mapM parPartToInlines parParts remaining <- bodyPartsToMeta' bps let f (MetaInlines ils) (MetaInlines ils') = MetaBlocks [Para ils, Para ils'] @@ -291,13 +291,13 @@ runToInlines (Run rs runElems) Just SubScrpt -> subscript codeString _ -> codeString | otherwise = do - let ils = concatReduce (map runElemToInlines runElems) + let ils = smushInlines (map runElemToInlines runElems) return $ (runStyleToTransform $ resolveDependentRunStyle rs) ils runToInlines (Footnote bps) = do - blksList <- concatReduce <$> (mapM bodyPartToBlocks bps) + blksList <- smushBlocks <$> (mapM bodyPartToBlocks bps) return $ note blksList runToInlines (Endnote bps) = do - blksList <- concatReduce <$> (mapM bodyPartToBlocks bps) + blksList <- smushBlocks <$> (mapM bodyPartToBlocks bps) return $ note blksList runToInlines (InlineDrawing fp bs ext) = do mediaBag <- gets docxMediaBag @@ -316,19 +316,19 @@ parPartToInlines (PlainRun r) = runToInlines r parPartToInlines (Insertion _ author date runs) = do opts <- asks docxOptions case readerTrackChanges opts of - AcceptChanges -> concatReduce <$> mapM runToInlines runs + AcceptChanges -> smushInlines <$> mapM runToInlines runs RejectChanges -> return mempty AllChanges -> do - ils <- concatReduce <$> mapM runToInlines runs + ils <- smushInlines <$> mapM runToInlines runs let attr = ("", ["insertion"], [("author", author), ("date", date)]) return $ spanWith attr ils parPartToInlines (Deletion _ author date runs) = do opts <- asks docxOptions case readerTrackChanges opts of AcceptChanges -> return mempty - RejectChanges -> concatReduce <$> mapM runToInlines runs + RejectChanges -> smushInlines <$> mapM runToInlines runs AllChanges -> do - ils <- concatReduce <$> mapM runToInlines runs + ils <- smushInlines <$> mapM runToInlines runs let attr = ("", ["deletion"], [("author", author), ("date", date)]) return $ spanWith attr ils parPartToInlines (BookMark _ anchor) | anchor `elem` dummyAnchors = @@ -361,10 +361,10 @@ parPartToInlines (Drawing fp bs ext) = do modify $ \s -> s { docxMediaBag = insertMedia fp Nothing bs mediaBag } return $ imageWith (extentToAttr ext) fp "" "" parPartToInlines (InternalHyperLink anchor runs) = do - ils <- concatReduce <$> mapM runToInlines runs + ils <- smushInlines <$> mapM runToInlines runs return $ link ('#' : anchor) "" ils parPartToInlines (ExternalHyperLink target runs) = do - ils <- concatReduce <$> mapM runToInlines runs + ils <- smushInlines <$> mapM runToInlines runs return $ link target "" ils parPartToInlines (PlainOMath exps) = do return $ math $ writeTeX exps @@ -417,7 +417,7 @@ singleParaToPlain blks = blks cellToBlocks :: Cell -> DocxContext Blocks cellToBlocks (Cell bps) = do - blks <- concatReduce <$> mapM bodyPartToBlocks bps + blks <- smushBlocks <$> mapM bodyPartToBlocks bps return $ fromList $ blocksToDefinitions $ blocksToBullets $ toList blks rowToBlocksList :: Row -> DocxContext [Blocks] @@ -479,11 +479,11 @@ bodyPartToBlocks (Paragraph pPr parparts) $ concatMap parPartToString parparts | Just (style, n) <- pHeading pPr = do ils <- local (\s-> s{docxInHeaderBlock=True}) $ - (concatReduce <$> mapM parPartToInlines parparts) + (smushInlines <$> mapM parPartToInlines parparts) makeHeaderAnchor $ headerWith ("", delete style (pStyle pPr), []) n ils | otherwise = do - ils <- concatReduce <$> mapM parPartToInlines parparts >>= + ils <- smushInlines <$> mapM parPartToInlines parparts >>= (return . fromList . trimLineBreaks . normalizeSpaces . toList) dropIls <- gets docxDropCap let ils' = dropIls <> ils @@ -561,7 +561,7 @@ bodyToOutput :: Body -> DocxContext (Meta, [Block], MediaBag) bodyToOutput (Body bps) = do let (metabps, blkbps) = sepBodyParts bps meta <- bodyPartsToMeta metabps - blks <- concatReduce <$> mapM bodyPartToBlocks blkbps + blks <- smushBlocks <$> mapM bodyPartToBlocks blkbps blks' <- rewriteLinks $ blocksToDefinitions $ blocksToBullets $ toList blks mediaBag <- gets docxMediaBag return $ (meta, diff --git a/src/Text/Pandoc/Readers/Docx/Combine.hs b/src/Text/Pandoc/Readers/Docx/Combine.hs new file mode 100644 index 000000000..39e0df825 --- /dev/null +++ b/src/Text/Pandoc/Readers/Docx/Combine.hs @@ -0,0 +1,154 @@ +{-# LANGUAGE TypeSynonymInstances, FlexibleInstances, + PatternGuards #-} + +module Text.Pandoc.Readers.Docx.Combine ( smushInlines + , smushBlocks + ) + where + +import Text.Pandoc.Builder +import Data.List +import Data.Sequence (ViewR(..), ViewL(..), viewl, viewr, (><), (|>)) +import qualified Data.Sequence as Seq (null) + +data Modifier a = Modifier (a -> a) + | AttrModifier (Attr -> a -> a) Attr + | NullModifier + +spaceOutInlinesL :: Inlines -> (Inlines, Inlines) +spaceOutInlinesL ms = (l, stackInlines fs (m' <> r)) + where (l, m, r) = spaceOutInlines ms + (fs, m') = unstackInlines m + +spaceOutInlinesR :: Inlines -> (Inlines, Inlines) +spaceOutInlinesR ms = (stackInlines fs (l <> m'), r) + where (l, m, r) = spaceOutInlines ms + (fs, m') = unstackInlines m + +spaceOutInlines :: Inlines -> (Inlines, Inlines, Inlines) +spaceOutInlines ils = + let (fs, ils') = unstackInlines ils + contents = unMany ils' + left = case viewl contents of + (Space :< _) -> space + _ -> mempty + right = case viewr contents of + (_ :> Space) -> space + _ -> mempty in + (left, (stackInlines fs $ trimInlines . Many $ contents), right) + +stackInlines :: [Modifier Inlines] -> Inlines -> Inlines +stackInlines [] ms = ms +stackInlines (NullModifier : fs) ms = stackInlines fs ms +stackInlines ((Modifier f) : fs) ms = + if isEmpty ms + then stackInlines fs ms + else f $ stackInlines fs ms +stackInlines ((AttrModifier f attr) : fs) ms = f attr $ stackInlines fs ms + +unstackInlines :: Inlines -> ([Modifier Inlines], Inlines) +unstackInlines ms = case ilModifier ms of + NullModifier -> ([], ms) + _ -> (f : fs, ms') where + f = ilModifier ms + (fs, ms') = unstackInlines $ ilInnards ms + +ilModifier :: Inlines -> Modifier Inlines +ilModifier ils = case viewl (unMany ils) of + (x :< xs) | Seq.null xs -> case x of + (Emph _) -> Modifier emph + (Strong _) -> Modifier strong + (SmallCaps _) -> Modifier smallcaps + (Strikeout _) -> Modifier strikeout + (Superscript _) -> Modifier superscript + (Subscript _) -> Modifier subscript + (Link attr _ tgt) -> Modifier $ linkWith attr (fst tgt) (snd tgt) + (Span attr _) -> AttrModifier spanWith attr + _ -> NullModifier + _ -> NullModifier + +ilInnards :: Inlines -> Inlines +ilInnards ils = case viewl (unMany ils) of + (x :< xs) | Seq.null xs -> case x of + (Emph lst) -> fromList lst + (Strong lst) -> fromList lst + (SmallCaps lst) -> fromList lst + (Strikeout lst) -> fromList lst + (Superscript lst) -> fromList lst + (Subscript lst) -> fromList lst + (Link _ lst _) -> fromList lst + (Span _ lst) -> fromList lst + _ -> ils + _ -> ils + +inlinesL :: Inlines -> (Inlines, Inlines) +inlinesL ils = case viewl $ unMany ils of + (s :< sq) -> (singleton s, Many sq) + _ -> (mempty, ils) + +inlinesR :: Inlines -> (Inlines, Inlines) +inlinesR ils = case viewr $ unMany ils of + (sq :> s) -> (Many sq, singleton s) + _ -> (ils, mempty) + +combineInlines :: Inlines -> Inlines -> Inlines +combineInlines x y = + let (xs', x') = inlinesR x + (y', ys') = inlinesL y + in + xs' <> (combineSingletonInlines x' y') <> ys' + +combineSingletonInlines :: Inlines -> Inlines -> Inlines +combineSingletonInlines x y = + let (xfs, xs) = unstackInlines x + (yfs, ys) = unstackInlines y + shared = xfs `intersect` yfs + x_remaining = xfs \\ shared + y_remaining = yfs \\ shared + x_rem_attr = filter isAttrModifier x_remaining + y_rem_attr = filter isAttrModifier y_remaining + in + case null shared of + True | isEmpty xs && isEmpty ys -> + stackInlines (x_rem_attr ++ y_rem_attr) mempty + | isEmpty xs -> + let (sp, y') = spaceOutInlinesL y in + (stackInlines x_rem_attr mempty) <> sp <> y' + | isEmpty ys -> + let (x', sp) = spaceOutInlinesR x in + x' <> sp <> (stackInlines y_rem_attr mempty) + | otherwise -> + let (x', xsp) = spaceOutInlinesR x + (ysp, y') = spaceOutInlinesL y + in + x' <> xsp <> ysp <> y' + False -> stackInlines shared $ + combineInlines + (stackInlines x_remaining xs) + (stackInlines y_remaining ys) + +combineBlocks :: Blocks -> Blocks -> Blocks +combineBlocks bs cs + | bs' :> (BlockQuote bs'') <- viewr (unMany bs) + , (BlockQuote cs'') :< cs' <- viewl (unMany cs) = + Many $ (bs' |> (BlockQuote (bs'' <> cs''))) >< cs' +combineBlocks bs cs = bs <> cs + +instance (Monoid a, Eq a) => Eq (Modifier a) where + (Modifier f) == (Modifier g) = (f mempty == g mempty) + (AttrModifier f attr) == (AttrModifier g attr') = (f attr mempty == g attr' mempty) + (NullModifier) == (NullModifier) = True + _ == _ = False + +isEmpty :: (Monoid a, Eq a) => a -> Bool +isEmpty x = x == mempty + +isAttrModifier :: Modifier a -> Bool +isAttrModifier (AttrModifier _ _) = True +isAttrModifier _ = False + +smushInlines :: [Inlines] -> Inlines +smushInlines xs = foldl combineInlines mempty xs + +smushBlocks :: [Blocks] -> Blocks +smushBlocks xs = foldl combineBlocks mempty xs diff --git a/src/Text/Pandoc/Readers/Docx/Reducible.hs b/src/Text/Pandoc/Readers/Docx/Reducible.hs deleted file mode 100644 index e6de2d474..000000000 --- a/src/Text/Pandoc/Readers/Docx/Reducible.hs +++ /dev/null @@ -1,183 +0,0 @@ -{-# LANGUAGE TypeSynonymInstances, FlexibleInstances, - PatternGuards #-} - -module Text.Pandoc.Readers.Docx.Reducible ( concatReduce - , (<+>) - ) - where - - -import Text.Pandoc.Builder -import Data.List -import Data.Sequence (ViewR(..), ViewL(..), viewl, viewr) -import qualified Data.Sequence as Seq (null) - -data Modifier a = Modifier (a -> a) - | AttrModifier (Attr -> a -> a) Attr - | NullModifier - -class (Eq a) => Modifiable a where - modifier :: a -> Modifier a - innards :: a -> a - getL :: a -> (a, a) - getR :: a -> (a, a) - spaceOut :: a -> (a, a, a) - -spaceOutL :: (Monoid a, Modifiable a) => a -> (a, a) -spaceOutL ms = (l, stack fs (m' <> r)) - where (l, m, r) = spaceOut ms - (fs, m') = unstack m - -spaceOutR :: (Monoid a, Modifiable a) => a -> (a, a) -spaceOutR ms = (stack fs (l <> m'), r) - where (l, m, r) = spaceOut ms - (fs, m') = unstack m - -instance (Monoid a, Show a) => Show (Modifier a) where - show (Modifier f) = show $ f mempty - show (AttrModifier f attr) = show $ f attr mempty - show (NullModifier) = "NullModifier" - -instance (Monoid a, Eq a) => Eq (Modifier a) where - (Modifier f) == (Modifier g) = (f mempty == g mempty) - (AttrModifier f attr) == (AttrModifier g attr') = (f attr mempty == g attr' mempty) - (NullModifier) == (NullModifier) = True - _ == _ = False - -instance Modifiable Inlines where - modifier ils = case viewl (unMany ils) of - (x :< xs) | Seq.null xs -> case x of - (Emph _) -> Modifier emph - (Strong _) -> Modifier strong - (SmallCaps _) -> Modifier smallcaps - (Strikeout _) -> Modifier strikeout - (Superscript _) -> Modifier superscript - (Subscript _) -> Modifier subscript - (Link attr _ tgt) -> Modifier $ linkWith attr (fst tgt) (snd tgt) - (Span attr _) -> AttrModifier spanWith attr - _ -> NullModifier - _ -> NullModifier - - innards ils = case viewl (unMany ils) of - (x :< xs) | Seq.null xs -> case x of - (Emph lst) -> fromList lst - (Strong lst) -> fromList lst - (SmallCaps lst) -> fromList lst - (Strikeout lst) -> fromList lst - (Superscript lst) -> fromList lst - (Subscript lst) -> fromList lst - (Link _ lst _) -> fromList lst - (Span _ lst) -> fromList lst - _ -> ils - _ -> ils - - getL ils = case viewl $ unMany ils of - (s :< sq) -> (singleton s, Many sq) - _ -> (mempty, ils) - - getR ils = case viewr $ unMany ils of - (sq :> s) -> (Many sq, singleton s) - _ -> (ils, mempty) - - spaceOut ils = - let (fs, ils') = unstack ils - contents = unMany ils' - left = case viewl contents of - (Space :< _) -> space - _ -> mempty - right = case viewr contents of - (_ :> Space) -> space - _ -> mempty in - (left, (stack fs $ trimInlines .Many $ contents), right) - -instance Modifiable Blocks where - modifier blks = case viewl (unMany blks) of - (x :< xs) | Seq.null xs -> case x of - (BlockQuote _) -> Modifier blockQuote - -- (Div attr _) -> AttrModifier divWith attr - _ -> NullModifier - _ -> NullModifier - - innards blks = case viewl (unMany blks) of - (x :< xs) | Seq.null xs -> case x of - (BlockQuote lst) -> fromList lst - -- (Div attr lst) -> fromList lst - _ -> blks - _ -> blks - - spaceOut blks = (mempty, blks, mempty) - - getL ils = case viewl $ unMany ils of - (s :< sq) -> (singleton s, Many sq) - _ -> (mempty, ils) - - getR ils = case viewr $ unMany ils of - (sq :> s) -> (Many sq, singleton s) - _ -> (ils, mempty) - - -unstack :: (Modifiable a) => a -> ([Modifier a], a) -unstack ms = case modifier ms of - NullModifier -> ([], ms) - _ -> (f : fs, ms') where - f = modifier ms - (fs, ms') = unstack $ innards ms - -stack :: (Monoid a, Modifiable a) => [Modifier a] -> a -> a -stack [] ms = ms -stack (NullModifier : fs) ms = stack fs ms -stack ((Modifier f) : fs) ms = - if isEmpty ms - then stack fs ms - else f $ stack fs ms -stack ((AttrModifier f attr) : fs) ms = f attr $ stack fs ms - -isEmpty :: (Monoid a, Eq a) => a -> Bool -isEmpty x = x == mempty - - -combine :: (Monoid a, Modifiable a, Eq a) => a -> a -> a -combine x y = - let (xs', x') = getR x - (y', ys') = getL y - in - xs' <> (combineSingleton x' y') <> ys' - -isAttrModifier :: Modifier a -> Bool -isAttrModifier (AttrModifier _ _) = True -isAttrModifier _ = False - -combineSingleton :: (Monoid a, Modifiable a, Eq a) => a -> a -> a -combineSingleton x y = - let (xfs, xs) = unstack x - (yfs, ys) = unstack y - shared = xfs `intersect` yfs - x_remaining = xfs \\ shared - y_remaining = yfs \\ shared - x_rem_attr = filter isAttrModifier x_remaining - y_rem_attr = filter isAttrModifier y_remaining - in - case null shared of - True | isEmpty xs && isEmpty ys -> - stack (x_rem_attr ++ y_rem_attr) mempty - | isEmpty xs -> - let (sp, y') = spaceOutL y in - (stack x_rem_attr mempty) <> sp <> y' - | isEmpty ys -> - let (x', sp) = spaceOutR x in - x' <> sp <> (stack y_rem_attr mempty) - | otherwise -> - let (x', xsp) = spaceOutR x - (ysp, y') = spaceOutL y - in - x' <> xsp <> ysp <> y' - False -> stack shared $ - combine - (stack x_remaining xs) - (stack y_remaining ys) - -(<+>) :: (Monoid a, Modifiable a, Eq a) => a -> a -> a -x <+> y = combine x y - -concatReduce :: (Monoid a, Modifiable a) => [a] -> a -concatReduce xs = foldl combine mempty xs |