aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers/Docx
diff options
context:
space:
mode:
authorJesse Rosenthal <jrosenthal@jhu.edu>2014-08-16 10:22:55 -0400
committerJesse Rosenthal <jrosenthal@jhu.edu>2014-08-16 10:22:55 -0400
commit0ff9ec2f4e35bf3e99e2e71837d01e9f2e107798 (patch)
treea5bdbaa5929226337b5be6dde4f8030b2370ab8c /src/Text/Pandoc/Readers/Docx
parent2b6e8f4c83e12d5534d7c21a0882a9f4c1e3cb1d (diff)
downloadpandoc-0ff9ec2f4e35bf3e99e2e71837d01e9f2e107798.tar.gz
Rewrite Docx.hs and Reducible to use Builder.
The big news here is a rewrite of Docx to use the builder functions. As opposed to previous attempts, we now see a significant speedup -- times are cut in half (or more) in a few informal tests. Reducible has also been rewritten. It can doubtless be simplified and clarified further. We can consider this, at the moment, a reference for correct behavior.
Diffstat (limited to 'src/Text/Pandoc/Readers/Docx')
-rw-r--r--src/Text/Pandoc/Readers/Docx/Reducible.hs364
1 files changed, 175 insertions, 189 deletions
diff --git a/src/Text/Pandoc/Readers/Docx/Reducible.hs b/src/Text/Pandoc/Readers/Docx/Reducible.hs
index 80a0cee17..8269ca88d 100644
--- a/src/Text/Pandoc/Readers/Docx/Reducible.hs
+++ b/src/Text/Pandoc/Readers/Docx/Reducible.hs
@@ -1,196 +1,182 @@
-{-# LANGUAGE OverloadedStrings, PatternGuards #-}
-
-{-
-Copyright (C) 2014 Jesse Rosenthal <jrosenthal@jhu.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Readers.Docx.Reducible
- Copyright : Copyright (C) 2014 Jesse Rosenthal
- License : GNU GPL, version 2 or above
-
- Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu>
- Stability : alpha
- Portability : portable
-
-Typeclass for combining adjacent blocks and inlines correctly.
--}
-
-
-module Text.Pandoc.Readers.Docx.Reducible ((<++>),
- (<+++>),
- Reducible,
- Container(..),
- container,
- innards,
- reduceList,
- reduceListB,
- concatR,
- rebuild)
- where
+{-# LANGUAGE TypeSynonymInstances, FlexibleInstances,
+ PatternGuards #-}
-import Text.Pandoc.Builder
-import Data.List ((\\), intersect)
+module Text.Pandoc.Readers.Docx.Reducible ( concatReduce
+ , (<+>)
+ )
+ where
-data Container a = Container ([a] -> a) | NullContainer
-instance (Eq a) => Eq (Container a) where
- (Container x) == (Container y) = ((x []) == (y []))
- NullContainer == NullContainer = True
+import Text.Pandoc.Builder
+import Data.Monoid
+import Data.List
+import Data.Sequence (ViewR(..), ViewL(..), viewl, viewr)
+import qualified Data.Sequence as Seq (null)
+
+data Modifier a = Modifier (a -> a)
+ | AttrModifier (Attr -> a -> a) Attr
+ | NullModifier
+
+class (Eq a) => Modifiable a where
+ modifier :: a -> Modifier a
+ innards :: a -> a
+ getL :: a -> (a, a)
+ getR :: a -> (a, a)
+ spaceOut :: a -> (a, a, a)
+
+spaceOutL :: (Monoid a, Modifiable a) => a -> (a, a)
+spaceOutL ms = (l, stack fs (m' <> r))
+ where (l, m, r) = spaceOut ms
+ (fs, m') = unstack m
+
+spaceOutR :: (Monoid a, Modifiable a) => a -> (a, a)
+spaceOutR ms = (stack fs (l <> m'), r)
+ where (l, m, r) = spaceOut ms
+ (fs, m') = unstack m
+
+instance (Monoid a, Show a) => Show (Modifier a) where
+ show (Modifier f) = show $ f mempty
+ show (AttrModifier f attr) = show $ f attr mempty
+ show (NullModifier) = "NullModifier"
+
+instance (Monoid a, Eq a) => Eq (Modifier a) where
+ (Modifier f) == (Modifier g) = (f mempty == g mempty)
+ (AttrModifier f attr) == (AttrModifier g attr') = (f attr mempty == g attr' mempty)
+ (NullModifier) == (NullModifier) = True
_ == _ = False
-instance (Show a) => Show (Container a) where
- show (Container x) = "Container {" ++
- (reverse $ drop 3 $ reverse $ show $ x []) ++
- "}"
- show (NullContainer) = "NullContainer"
-
-class Reducible a where
- (<++>) :: a -> a -> [a]
- container :: a -> Container a
- innards :: a -> [a]
- isSpace :: a -> Bool
-
-(<+++>) :: (Reducible a) => Many a -> Many a -> Many a
-mr <+++> ms = fromList $ reduceList $ toList mr ++ toList ms
-
-reduceListB :: (Reducible a) => Many a -> Many a
-reduceListB = fromList . reduceList . toList
-
-reduceList' :: (Reducible a) => [a] -> [a] -> [a]
-reduceList' acc [] = acc
-reduceList' [] (x:xs) = reduceList' [x] xs
-reduceList' as (x:xs) = reduceList' (init as ++ (last as <++> x) ) xs
-
-reduceList :: (Reducible a) => [a] -> [a]
-reduceList = reduceList' []
-
-concatR :: (Reducible a) => [a] -> [a] -> [a]
-concatR [] [] = []
-concatR [] ss = ss
-concatR rs [] = rs
-concatR rs ss = let (x:xs) = reverse rs
- (y:ys) = ss
- in
- reverse xs ++ ( x <++> y ) ++ ys
-
-combineReducibles :: (Reducible a, Eq a) => a -> a -> [a]
-combineReducibles r s =
- let (conts, rs) = topLevelContainers r
- (conts', ss) = topLevelContainers s
- shared = conts `intersect` conts'
- remaining = conts \\ shared
- remaining' = conts' \\ shared
+instance Modifiable Inlines where
+ modifier ils = case viewl (unMany ils) of
+ (x :< xs) | Seq.null xs -> case x of
+ (Emph _) -> Modifier emph
+ (Strong _) -> Modifier strong
+ (SmallCaps _) -> Modifier smallcaps
+ (Strikeout _) -> Modifier strikeout
+ (Superscript _) -> Modifier superscript
+ (Subscript _) -> Modifier subscript
+ (Span attr _) -> AttrModifier spanWith attr
+ _ -> NullModifier
+ _ -> NullModifier
+
+ innards ils = case viewl (unMany ils) of
+ (x :< xs) | Seq.null xs -> case x of
+ (Emph lst) -> fromList lst
+ (Strong lst) -> fromList lst
+ (SmallCaps lst) -> fromList lst
+ (Strikeout lst) -> fromList lst
+ (Superscript lst) -> fromList lst
+ (Subscript lst) -> fromList lst
+ (Span _ lst) -> fromList lst
+ _ -> ils
+ _ -> ils
+
+ getL ils = case viewl $ unMany ils of
+ (s :< sq) -> (singleton s, Many sq)
+ _ -> (mempty, ils)
+
+ getR ils = case viewr $ unMany ils of
+ (sq :> s) -> (Many sq, singleton s)
+ _ -> (ils, mempty)
+
+ spaceOut ils =
+ let (fs, ils') = unstack ils
+ contents = unMany ils'
+ left = case viewl contents of
+ (Space :< _) -> space
+ _ -> mempty
+ right = case viewr contents of
+ (_ :> Space) -> space
+ _ -> mempty in
+ (left, (stack fs $ trimInlines .Many $ contents), right)
+
+instance Modifiable Blocks where
+ modifier blks = case viewl (unMany blks) of
+ (x :< xs) | Seq.null xs -> case x of
+ (BlockQuote _) -> Modifier blockQuote
+ -- (Div attr _) -> AttrModifier divWith attr
+ _ -> NullModifier
+ _ -> NullModifier
+
+ innards blks = case viewl (unMany blks) of
+ (x :< xs) | Seq.null xs -> case x of
+ (BlockQuote lst) -> fromList lst
+ -- (Div attr lst) -> fromList lst
+ _ -> blks
+ _ -> blks
+
+ spaceOut blks = (mempty, blks, mempty)
+
+ getL ils = case viewl $ unMany ils of
+ (s :< sq) -> (singleton s, Many sq)
+ _ -> (mempty, ils)
+
+ getR ils = case viewr $ unMany ils of
+ (sq :> s) -> (Many sq, singleton s)
+ _ -> (ils, mempty)
+
+
+unstack :: (Modifiable a) => a -> ([Modifier a], a)
+unstack ms = case modifier ms of
+ NullModifier -> ([], ms)
+ _ -> (f : fs, ms') where
+ f = modifier ms
+ (fs, ms') = unstack $ innards ms
+
+stack :: (Monoid a, Modifiable a) => [Modifier a] -> a -> a
+stack [] ms = ms
+stack (NullModifier : fs) ms = stack fs ms
+stack ((Modifier f) : fs) ms =
+ if isEmpty ms
+ then stack fs ms
+ else f $ stack fs ms
+stack ((AttrModifier f attr) : fs) ms = f attr $ stack fs ms
+
+isEmpty :: (Monoid a, Eq a) => a -> Bool
+isEmpty x = x == mempty
+
+
+combine :: (Monoid a, Modifiable a, Eq a) => a -> a -> a
+combine x y =
+ let (xs', x') = getR x
+ (y', ys') = getL y
+ in
+ xs' <> (combineSingleton x' y') <> ys'
+
+isAttrModifier :: Modifier a -> Bool
+isAttrModifier (AttrModifier _ _) = True
+isAttrModifier _ = False
+
+combineSingleton :: (Monoid a, Modifiable a, Eq a) => a -> a -> a
+combineSingleton x y =
+ let (xfs, xs) = unstack x
+ (yfs, ys) = unstack y
+ shared = xfs `intersect` yfs
+ x_remaining = xfs \\ shared
+ y_remaining = yfs \\ shared
+ x_rem_attr = filter isAttrModifier x_remaining
+ y_rem_attr = filter isAttrModifier y_remaining
in
case null shared of
- True | (x : xs) <- reverse rs
- , isSpace x -> case xs of
- [] -> [x, s]
- _ -> rebuild conts (reverse xs) ++ [x, s]
- | (x : xs) <- ss
- , isSpace x -> case xs of
- [] -> [r, x]
- _ -> [r, x] ++ rebuild conts' (xs)
- True -> [r,s]
- False -> rebuild
- shared $
- reduceList $
- (rebuild remaining rs) ++ (rebuild remaining' ss)
-
-instance Reducible Inline where
- s1@(Span (id1, classes1, kvs1) ils1) <++> s2@(Span (id2, classes2, kvs2) ils2) =
- let classes' = classes1 `intersect` classes2
- kvs' = kvs1 `intersect` kvs2
- classes1' = classes1 \\ classes'
- kvs1' = kvs1 \\ kvs'
- classes2' = classes2 \\ classes'
- kvs2' = kvs2 \\ kvs'
- in
- case null classes' && null kvs' of
- True -> [s1,s2]
- False -> let attr' = ("", classes', kvs')
- attr1' = (id1, classes1', kvs1')
- attr2' = (id2, classes2', kvs2')
- s1' = case null classes1' && null kvs1' of
- True -> ils1
- False -> [Span attr1' ils1]
- s2' = case null classes2' && null kvs2' of
- True -> ils2
- False -> [Span attr2' ils2]
- in
- [Span attr' $ reduceList $ s1' ++ s2']
- (Str x) <++> (Str y) = [Str (x++y)]
- il <++> il' = combineReducibles il il'
-
- container (Emph _) = Container Emph
- container (Strong _) = Container Strong
- container (SmallCaps _) = Container SmallCaps
- container (Strikeout _) = Container Strikeout
- container (Subscript _) = Container Subscript
- container (Superscript _) = Container Superscript
- container (Quoted qt _) = Container $ Quoted qt
- container (Cite cs _) = Container $ Cite cs
- container (Span attr _) = Container $ Span attr
- container _ = NullContainer
-
- innards (Emph ils) = ils
- innards (SmallCaps ils) = ils
- innards (Strong ils) = ils
- innards (Strikeout ils) = ils
- innards (Subscript ils) = ils
- innards (Superscript ils) = ils
- innards (Quoted _ ils) = ils
- innards (Cite _ ils) = ils
- innards (Span _ ils) = ils
- innards _ = []
-
- isSpace Space = True
- isSpace _ = False
-
-instance Reducible Block where
- (Div (ident, classes, kvs) blks) <++> blk | "list-item" `elem` classes =
- [Div (ident, classes, kvs) (reduceList blks), blk]
-
- blk <++> blk' = combineReducibles blk blk'
-
- container (BlockQuote _) = Container BlockQuote
- container (Div attr _) = Container $ Div attr
- container _ = NullContainer
-
- innards (BlockQuote bs) = bs
- innards (Div _ bs) = bs
- innards _ = []
-
- isSpace _ = False
-
-
-topLevelContainers' :: (Reducible a) => [a] -> ([Container a], [a])
-topLevelContainers' (r : []) = case container r of
- NullContainer -> ([], [r])
- _ ->
- let (conts, inns) = topLevelContainers' (innards r)
- in
- ((container r) : conts, inns)
-topLevelContainers' rs = ([], rs)
-
-topLevelContainers :: (Reducible a) => a -> ([Container a], [a])
-topLevelContainers il = topLevelContainers' [il]
-
-rebuild :: [Container a] -> [a] -> [a]
-rebuild [] xs = xs
-rebuild ((Container f) : cs) xs = rebuild cs $ [f xs]
-rebuild (NullContainer : cs) xs = rebuild cs $ xs
+ True | isEmpty xs && isEmpty ys ->
+ stack (x_rem_attr ++ y_rem_attr) mempty
+ | isEmpty xs ->
+ let (sp, y') = spaceOutL y in
+ (stack x_rem_attr mempty) <> sp <> y'
+ | isEmpty ys ->
+ let (x', sp) = spaceOutR x in
+ x' <> sp <> (stack y_rem_attr mempty)
+ | otherwise ->
+ let (x', xsp) = spaceOutR x
+ (ysp, y') = spaceOutL y
+ in
+ x' <> xsp <> ysp <> y'
+ False -> stack shared $
+ combine
+ (stack x_remaining xs)
+ (stack y_remaining ys)
+
+(<+>) :: (Monoid a, Modifiable a, Eq a) => a -> a -> a
+x <+> y = combine x y
+
+concatReduce :: (Monoid a, Modifiable a) => [a] -> a
+concatReduce xs = foldl combine mempty xs