diff options
Diffstat (limited to 'src/Text/Pandoc/Readers/Docx')
-rw-r--r-- | src/Text/Pandoc/Readers/Docx/Combine.hs | 154 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Docx/Parse.hs | 90 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Docx/Reducible.hs | 181 |
3 files changed, 218 insertions, 207 deletions
diff --git a/src/Text/Pandoc/Readers/Docx/Combine.hs b/src/Text/Pandoc/Readers/Docx/Combine.hs new file mode 100644 index 000000000..39e0df825 --- /dev/null +++ b/src/Text/Pandoc/Readers/Docx/Combine.hs @@ -0,0 +1,154 @@ +{-# LANGUAGE TypeSynonymInstances, FlexibleInstances, + PatternGuards #-} + +module Text.Pandoc.Readers.Docx.Combine ( smushInlines + , smushBlocks + ) + where + +import Text.Pandoc.Builder +import Data.List +import Data.Sequence (ViewR(..), ViewL(..), viewl, viewr, (><), (|>)) +import qualified Data.Sequence as Seq (null) + +data Modifier a = Modifier (a -> a) + | AttrModifier (Attr -> a -> a) Attr + | NullModifier + +spaceOutInlinesL :: Inlines -> (Inlines, Inlines) +spaceOutInlinesL ms = (l, stackInlines fs (m' <> r)) + where (l, m, r) = spaceOutInlines ms + (fs, m') = unstackInlines m + +spaceOutInlinesR :: Inlines -> (Inlines, Inlines) +spaceOutInlinesR ms = (stackInlines fs (l <> m'), r) + where (l, m, r) = spaceOutInlines ms + (fs, m') = unstackInlines m + +spaceOutInlines :: Inlines -> (Inlines, Inlines, Inlines) +spaceOutInlines ils = + let (fs, ils') = unstackInlines ils + contents = unMany ils' + left = case viewl contents of + (Space :< _) -> space + _ -> mempty + right = case viewr contents of + (_ :> Space) -> space + _ -> mempty in + (left, (stackInlines fs $ trimInlines . Many $ contents), right) + +stackInlines :: [Modifier Inlines] -> Inlines -> Inlines +stackInlines [] ms = ms +stackInlines (NullModifier : fs) ms = stackInlines fs ms +stackInlines ((Modifier f) : fs) ms = + if isEmpty ms + then stackInlines fs ms + else f $ stackInlines fs ms +stackInlines ((AttrModifier f attr) : fs) ms = f attr $ stackInlines fs ms + +unstackInlines :: Inlines -> ([Modifier Inlines], Inlines) +unstackInlines ms = case ilModifier ms of + NullModifier -> ([], ms) + _ -> (f : fs, ms') where + f = ilModifier ms + (fs, ms') = unstackInlines $ ilInnards ms + +ilModifier :: Inlines -> Modifier Inlines +ilModifier ils = case viewl (unMany ils) of + (x :< xs) | Seq.null xs -> case x of + (Emph _) -> Modifier emph + (Strong _) -> Modifier strong + (SmallCaps _) -> Modifier smallcaps + (Strikeout _) -> Modifier strikeout + (Superscript _) -> Modifier superscript + (Subscript _) -> Modifier subscript + (Link attr _ tgt) -> Modifier $ linkWith attr (fst tgt) (snd tgt) + (Span attr _) -> AttrModifier spanWith attr + _ -> NullModifier + _ -> NullModifier + +ilInnards :: Inlines -> Inlines +ilInnards ils = case viewl (unMany ils) of + (x :< xs) | Seq.null xs -> case x of + (Emph lst) -> fromList lst + (Strong lst) -> fromList lst + (SmallCaps lst) -> fromList lst + (Strikeout lst) -> fromList lst + (Superscript lst) -> fromList lst + (Subscript lst) -> fromList lst + (Link _ lst _) -> fromList lst + (Span _ lst) -> fromList lst + _ -> ils + _ -> ils + +inlinesL :: Inlines -> (Inlines, Inlines) +inlinesL ils = case viewl $ unMany ils of + (s :< sq) -> (singleton s, Many sq) + _ -> (mempty, ils) + +inlinesR :: Inlines -> (Inlines, Inlines) +inlinesR ils = case viewr $ unMany ils of + (sq :> s) -> (Many sq, singleton s) + _ -> (ils, mempty) + +combineInlines :: Inlines -> Inlines -> Inlines +combineInlines x y = + let (xs', x') = inlinesR x + (y', ys') = inlinesL y + in + xs' <> (combineSingletonInlines x' y') <> ys' + +combineSingletonInlines :: Inlines -> Inlines -> Inlines +combineSingletonInlines x y = + let (xfs, xs) = unstackInlines x + (yfs, ys) = unstackInlines y + shared = xfs `intersect` yfs + x_remaining = xfs \\ shared + y_remaining = yfs \\ shared + x_rem_attr = filter isAttrModifier x_remaining + y_rem_attr = filter isAttrModifier y_remaining + in + case null shared of + True | isEmpty xs && isEmpty ys -> + stackInlines (x_rem_attr ++ y_rem_attr) mempty + | isEmpty xs -> + let (sp, y') = spaceOutInlinesL y in + (stackInlines x_rem_attr mempty) <> sp <> y' + | isEmpty ys -> + let (x', sp) = spaceOutInlinesR x in + x' <> sp <> (stackInlines y_rem_attr mempty) + | otherwise -> + let (x', xsp) = spaceOutInlinesR x + (ysp, y') = spaceOutInlinesL y + in + x' <> xsp <> ysp <> y' + False -> stackInlines shared $ + combineInlines + (stackInlines x_remaining xs) + (stackInlines y_remaining ys) + +combineBlocks :: Blocks -> Blocks -> Blocks +combineBlocks bs cs + | bs' :> (BlockQuote bs'') <- viewr (unMany bs) + , (BlockQuote cs'') :< cs' <- viewl (unMany cs) = + Many $ (bs' |> (BlockQuote (bs'' <> cs''))) >< cs' +combineBlocks bs cs = bs <> cs + +instance (Monoid a, Eq a) => Eq (Modifier a) where + (Modifier f) == (Modifier g) = (f mempty == g mempty) + (AttrModifier f attr) == (AttrModifier g attr') = (f attr mempty == g attr' mempty) + (NullModifier) == (NullModifier) = True + _ == _ = False + +isEmpty :: (Monoid a, Eq a) => a -> Bool +isEmpty x = x == mempty + +isAttrModifier :: Modifier a -> Bool +isAttrModifier (AttrModifier _ _) = True +isAttrModifier _ = False + +smushInlines :: [Inlines] -> Inlines +smushInlines xs = foldl combineInlines mempty xs + +smushBlocks :: [Blocks] -> Blocks +smushBlocks xs = foldl combineBlocks mempty xs diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index eec8b12c9..364483929 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -50,6 +50,7 @@ module Text.Pandoc.Readers.Docx.Parse ( Docx(..) , Row(..) , Cell(..) , archiveToDocx + , archiveToDocxWithWarnings ) where import Codec.Archive.Zip import Text.XML.Light @@ -60,6 +61,7 @@ import Data.Bits ((.|.)) import qualified Data.ByteString.Lazy as B import qualified Text.Pandoc.UTF8 as UTF8 import Control.Monad.Reader +import Control.Monad.State import Control.Applicative ((<|>)) import qualified Data.Map as M import Text.Pandoc.Compat.Except @@ -81,16 +83,20 @@ data ReaderEnv = ReaderEnv { envNotes :: Notes } deriving Show +data ReaderState = ReaderState { stateWarnings :: [String] } + deriving Show + + data DocxError = DocxError | WrongElem deriving Show instance Error DocxError where noMsg = WrongElem -type D = ExceptT DocxError (Reader ReaderEnv) +type D = ExceptT DocxError (ReaderT ReaderEnv (State ReaderState)) -runD :: D a -> ReaderEnv -> Either DocxError a -runD dx re = runReader (runExceptT dx) re +runD :: D a -> ReaderEnv -> ReaderState -> (Either DocxError a, ReaderState) +runD dx re rs = runState (runReaderT (runExceptT dx) re) rs maybeToD :: Maybe a -> D a maybeToD (Just a) = return a @@ -257,7 +263,10 @@ type Author = String type ChangeDate = String archiveToDocx :: Archive -> Either DocxError Docx -archiveToDocx archive = do +archiveToDocx archive = fst <$> archiveToDocxWithWarnings archive + +archiveToDocxWithWarnings :: Archive -> Either DocxError (Docx, [String]) +archiveToDocxWithWarnings archive = do let notes = archiveToNotes archive numbering = archiveToNumbering archive rels = archiveToRelationships archive @@ -265,8 +274,12 @@ archiveToDocx archive = do (styles, parstyles) = archiveToStyles archive rEnv = ReaderEnv notes numbering rels media Nothing styles parstyles InDocument - doc <- runD (archiveToDocument archive) rEnv - return $ Docx doc + rState = ReaderState { stateWarnings = [] } + (eitherDoc, st) = runD (archiveToDocument archive) rEnv rState + case eitherDoc of + Right doc -> Right (Docx doc, stateWarnings st) + Left e -> Left e + archiveToDocument :: Archive -> D Document @@ -576,12 +589,14 @@ elemToBodyPart ns element sty <- asks envParStyles let parstyle = elemToParagraphStyle ns element sty parparts <- mapD (elemToParPart ns) (elChildren element) - case pNumInfo parstyle of - Just (numId, lvl) -> do - num <- asks envNumbering - let levelInfo = lookupLevel numId lvl num - return $ ListItem parstyle numId lvl levelInfo parparts - Nothing -> return $ Paragraph parstyle parparts + -- Word uses list enumeration for numbered headings, so we only + -- want to infer a list from the styles if it is NOT a heading. + case pHeading parstyle of + Nothing | Just (numId, lvl) <- pNumInfo parstyle -> do + num <- asks envNumbering + let levelInfo = lookupLevel numId lvl num + return $ ListItem parstyle numId lvl levelInfo parparts + _ -> return $ Paragraph parstyle parparts elemToBodyPart ns element | isElem ns "w" "tbl" element = do let caption' = findChild (elemName ns "w" "tblPr") element @@ -702,36 +717,58 @@ elemToExtent drawingElem = getDim at = findElement (QName "extent" (Just wp_ns) (Just "wp")) drawingElem >>= findAttr (QName at Nothing Nothing) >>= safeRead -elemToRun :: NameSpaces -> Element -> D Run -elemToRun ns element - | isElem ns "w" "r" element - , Just drawingElem <- findChild (elemName ns "w" "drawing") element = + +childElemToRun :: NameSpaces -> Element -> D Run +childElemToRun ns element + | isElem ns "w" "drawing" element = let a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main" - drawing = findElement (QName "blip" (Just a_ns) (Just "a")) drawingElem + drawing = findElement (QName "blip" (Just a_ns) (Just "a")) element >>= findAttr (QName "embed" (lookup "r" ns) (Just "r")) in case drawing of Just s -> expandDrawingId s >>= - (\(fp, bs) -> return $ InlineDrawing fp bs $ elemToExtent drawingElem) + (\(fp, bs) -> return $ InlineDrawing fp bs $ elemToExtent element) Nothing -> throwError WrongElem -elemToRun ns element - | isElem ns "w" "r" element - , Just ref <- findChild (elemName ns "w" "footnoteReference") element - , Just fnId <- findAttr (elemName ns "w" "id") ref = do +childElemToRun ns element + | isElem ns "w" "footnoteReference" element + , Just fnId <- findAttr (elemName ns "w" "id") element = do notes <- asks envNotes case lookupFootnote fnId notes of Just e -> do bps <- local (\r -> r {envLocation=InFootnote}) $ mapD (elemToBodyPart ns) (elChildren e) return $ Footnote bps Nothing -> return $ Footnote [] -elemToRun ns element - | isElem ns "w" "r" element - , Just ref <- findChild (elemName ns "w" "endnoteReference") element - , Just enId <- findAttr (elemName ns "w" "id") ref = do +childElemToRun ns element + | isElem ns "w" "endnoteReference" element + , Just enId <- findAttr (elemName ns "w" "id") element = do notes <- asks envNotes case lookupEndnote enId notes of Just e -> do bps <- local (\r -> r {envLocation=InEndnote}) $ mapD (elemToBodyPart ns) (elChildren e) return $ Endnote bps Nothing -> return $ Endnote [] +childElemToRun _ _ = throwError WrongElem + +elemToRun :: NameSpaces -> Element -> D Run +elemToRun ns element + | isElem ns "w" "r" element + , Just altCont <- findChild (elemName ns "mc" "AlternateContent") element = + do let choices = findChildren (elemName ns "mc" "Choice") altCont + choiceChildren = map head $ filter (not . null) $ map elChildren choices + outputs <- mapD (childElemToRun ns) choiceChildren + case outputs of + r : _ -> return r + [] -> throwError WrongElem +elemToRun ns element + | isElem ns "w" "r" element + , Just drawingElem <- findChild (elemName ns "w" "drawing") element = + childElemToRun ns drawingElem +elemToRun ns element + | isElem ns "w" "r" element + , Just ref <- findChild (elemName ns "w" "footnoteReference") element = + childElemToRun ns ref +elemToRun ns element + | isElem ns "w" "r" element + , Just ref <- findChild (elemName ns "w" "endnoteReference") element = + childElemToRun ns ref elemToRun ns element | isElem ns "w" "r" element = do runElems <- elemToRunElems ns element @@ -940,3 +977,4 @@ elemToRunElems _ _ = throwError WrongElem setFont :: Maybe Font -> ReaderEnv -> ReaderEnv setFont f s = s{envFont = f} + diff --git a/src/Text/Pandoc/Readers/Docx/Reducible.hs b/src/Text/Pandoc/Readers/Docx/Reducible.hs deleted file mode 100644 index c93b40119..000000000 --- a/src/Text/Pandoc/Readers/Docx/Reducible.hs +++ /dev/null @@ -1,181 +0,0 @@ -{-# LANGUAGE TypeSynonymInstances, FlexibleInstances, - PatternGuards #-} - -module Text.Pandoc.Readers.Docx.Reducible ( concatReduce - , (<+>) - ) - where - - -import Text.Pandoc.Builder -import Data.List -import Data.Sequence (ViewR(..), ViewL(..), viewl, viewr) -import qualified Data.Sequence as Seq (null) - -data Modifier a = Modifier (a -> a) - | AttrModifier (Attr -> a -> a) Attr - | NullModifier - -class (Eq a) => Modifiable a where - modifier :: a -> Modifier a - innards :: a -> a - getL :: a -> (a, a) - getR :: a -> (a, a) - spaceOut :: a -> (a, a, a) - -spaceOutL :: (Monoid a, Modifiable a) => a -> (a, a) -spaceOutL ms = (l, stack fs (m' <> r)) - where (l, m, r) = spaceOut ms - (fs, m') = unstack m - -spaceOutR :: (Monoid a, Modifiable a) => a -> (a, a) -spaceOutR ms = (stack fs (l <> m'), r) - where (l, m, r) = spaceOut ms - (fs, m') = unstack m - -instance (Monoid a, Show a) => Show (Modifier a) where - show (Modifier f) = show $ f mempty - show (AttrModifier f attr) = show $ f attr mempty - show (NullModifier) = "NullModifier" - -instance (Monoid a, Eq a) => Eq (Modifier a) where - (Modifier f) == (Modifier g) = (f mempty == g mempty) - (AttrModifier f attr) == (AttrModifier g attr') = (f attr mempty == g attr' mempty) - (NullModifier) == (NullModifier) = True - _ == _ = False - -instance Modifiable Inlines where - modifier ils = case viewl (unMany ils) of - (x :< xs) | Seq.null xs -> case x of - (Emph _) -> Modifier emph - (Strong _) -> Modifier strong - (SmallCaps _) -> Modifier smallcaps - (Strikeout _) -> Modifier strikeout - (Superscript _) -> Modifier superscript - (Subscript _) -> Modifier subscript - (Span attr _) -> AttrModifier spanWith attr - _ -> NullModifier - _ -> NullModifier - - innards ils = case viewl (unMany ils) of - (x :< xs) | Seq.null xs -> case x of - (Emph lst) -> fromList lst - (Strong lst) -> fromList lst - (SmallCaps lst) -> fromList lst - (Strikeout lst) -> fromList lst - (Superscript lst) -> fromList lst - (Subscript lst) -> fromList lst - (Span _ lst) -> fromList lst - _ -> ils - _ -> ils - - getL ils = case viewl $ unMany ils of - (s :< sq) -> (singleton s, Many sq) - _ -> (mempty, ils) - - getR ils = case viewr $ unMany ils of - (sq :> s) -> (Many sq, singleton s) - _ -> (ils, mempty) - - spaceOut ils = - let (fs, ils') = unstack ils - contents = unMany ils' - left = case viewl contents of - (Space :< _) -> space - _ -> mempty - right = case viewr contents of - (_ :> Space) -> space - _ -> mempty in - (left, (stack fs $ trimInlines .Many $ contents), right) - -instance Modifiable Blocks where - modifier blks = case viewl (unMany blks) of - (x :< xs) | Seq.null xs -> case x of - (BlockQuote _) -> Modifier blockQuote - -- (Div attr _) -> AttrModifier divWith attr - _ -> NullModifier - _ -> NullModifier - - innards blks = case viewl (unMany blks) of - (x :< xs) | Seq.null xs -> case x of - (BlockQuote lst) -> fromList lst - -- (Div attr lst) -> fromList lst - _ -> blks - _ -> blks - - spaceOut blks = (mempty, blks, mempty) - - getL ils = case viewl $ unMany ils of - (s :< sq) -> (singleton s, Many sq) - _ -> (mempty, ils) - - getR ils = case viewr $ unMany ils of - (sq :> s) -> (Many sq, singleton s) - _ -> (ils, mempty) - - -unstack :: (Modifiable a) => a -> ([Modifier a], a) -unstack ms = case modifier ms of - NullModifier -> ([], ms) - _ -> (f : fs, ms') where - f = modifier ms - (fs, ms') = unstack $ innards ms - -stack :: (Monoid a, Modifiable a) => [Modifier a] -> a -> a -stack [] ms = ms -stack (NullModifier : fs) ms = stack fs ms -stack ((Modifier f) : fs) ms = - if isEmpty ms - then stack fs ms - else f $ stack fs ms -stack ((AttrModifier f attr) : fs) ms = f attr $ stack fs ms - -isEmpty :: (Monoid a, Eq a) => a -> Bool -isEmpty x = x == mempty - - -combine :: (Monoid a, Modifiable a, Eq a) => a -> a -> a -combine x y = - let (xs', x') = getR x - (y', ys') = getL y - in - xs' <> (combineSingleton x' y') <> ys' - -isAttrModifier :: Modifier a -> Bool -isAttrModifier (AttrModifier _ _) = True -isAttrModifier _ = False - -combineSingleton :: (Monoid a, Modifiable a, Eq a) => a -> a -> a -combineSingleton x y = - let (xfs, xs) = unstack x - (yfs, ys) = unstack y - shared = xfs `intersect` yfs - x_remaining = xfs \\ shared - y_remaining = yfs \\ shared - x_rem_attr = filter isAttrModifier x_remaining - y_rem_attr = filter isAttrModifier y_remaining - in - case null shared of - True | isEmpty xs && isEmpty ys -> - stack (x_rem_attr ++ y_rem_attr) mempty - | isEmpty xs -> - let (sp, y') = spaceOutL y in - (stack x_rem_attr mempty) <> sp <> y' - | isEmpty ys -> - let (x', sp) = spaceOutR x in - x' <> sp <> (stack y_rem_attr mempty) - | otherwise -> - let (x', xsp) = spaceOutR x - (ysp, y') = spaceOutL y - in - x' <> xsp <> ysp <> y' - False -> stack shared $ - combine - (stack x_remaining xs) - (stack y_remaining ys) - -(<+>) :: (Monoid a, Modifiable a, Eq a) => a -> a -> a -x <+> y = combine x y - -concatReduce :: (Monoid a, Modifiable a) => [a] -> a -concatReduce xs = foldl combine mempty xs |