aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers/Docx
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc/Readers/Docx')
-rw-r--r--src/Text/Pandoc/Readers/Docx/Combine.hs154
-rw-r--r--src/Text/Pandoc/Readers/Docx/Parse.hs90
-rw-r--r--src/Text/Pandoc/Readers/Docx/Reducible.hs181
3 files changed, 218 insertions, 207 deletions
diff --git a/src/Text/Pandoc/Readers/Docx/Combine.hs b/src/Text/Pandoc/Readers/Docx/Combine.hs
new file mode 100644
index 000000000..39e0df825
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Docx/Combine.hs
@@ -0,0 +1,154 @@
+{-# LANGUAGE TypeSynonymInstances, FlexibleInstances,
+ PatternGuards #-}
+
+module Text.Pandoc.Readers.Docx.Combine ( smushInlines
+ , smushBlocks
+ )
+ where
+
+import Text.Pandoc.Builder
+import Data.List
+import Data.Sequence (ViewR(..), ViewL(..), viewl, viewr, (><), (|>))
+import qualified Data.Sequence as Seq (null)
+
+data Modifier a = Modifier (a -> a)
+ | AttrModifier (Attr -> a -> a) Attr
+ | NullModifier
+
+spaceOutInlinesL :: Inlines -> (Inlines, Inlines)
+spaceOutInlinesL ms = (l, stackInlines fs (m' <> r))
+ where (l, m, r) = spaceOutInlines ms
+ (fs, m') = unstackInlines m
+
+spaceOutInlinesR :: Inlines -> (Inlines, Inlines)
+spaceOutInlinesR ms = (stackInlines fs (l <> m'), r)
+ where (l, m, r) = spaceOutInlines ms
+ (fs, m') = unstackInlines m
+
+spaceOutInlines :: Inlines -> (Inlines, Inlines, Inlines)
+spaceOutInlines ils =
+ let (fs, ils') = unstackInlines ils
+ contents = unMany ils'
+ left = case viewl contents of
+ (Space :< _) -> space
+ _ -> mempty
+ right = case viewr contents of
+ (_ :> Space) -> space
+ _ -> mempty in
+ (left, (stackInlines fs $ trimInlines . Many $ contents), right)
+
+stackInlines :: [Modifier Inlines] -> Inlines -> Inlines
+stackInlines [] ms = ms
+stackInlines (NullModifier : fs) ms = stackInlines fs ms
+stackInlines ((Modifier f) : fs) ms =
+ if isEmpty ms
+ then stackInlines fs ms
+ else f $ stackInlines fs ms
+stackInlines ((AttrModifier f attr) : fs) ms = f attr $ stackInlines fs ms
+
+unstackInlines :: Inlines -> ([Modifier Inlines], Inlines)
+unstackInlines ms = case ilModifier ms of
+ NullModifier -> ([], ms)
+ _ -> (f : fs, ms') where
+ f = ilModifier ms
+ (fs, ms') = unstackInlines $ ilInnards ms
+
+ilModifier :: Inlines -> Modifier Inlines
+ilModifier ils = case viewl (unMany ils) of
+ (x :< xs) | Seq.null xs -> case x of
+ (Emph _) -> Modifier emph
+ (Strong _) -> Modifier strong
+ (SmallCaps _) -> Modifier smallcaps
+ (Strikeout _) -> Modifier strikeout
+ (Superscript _) -> Modifier superscript
+ (Subscript _) -> Modifier subscript
+ (Link attr _ tgt) -> Modifier $ linkWith attr (fst tgt) (snd tgt)
+ (Span attr _) -> AttrModifier spanWith attr
+ _ -> NullModifier
+ _ -> NullModifier
+
+ilInnards :: Inlines -> Inlines
+ilInnards ils = case viewl (unMany ils) of
+ (x :< xs) | Seq.null xs -> case x of
+ (Emph lst) -> fromList lst
+ (Strong lst) -> fromList lst
+ (SmallCaps lst) -> fromList lst
+ (Strikeout lst) -> fromList lst
+ (Superscript lst) -> fromList lst
+ (Subscript lst) -> fromList lst
+ (Link _ lst _) -> fromList lst
+ (Span _ lst) -> fromList lst
+ _ -> ils
+ _ -> ils
+
+inlinesL :: Inlines -> (Inlines, Inlines)
+inlinesL ils = case viewl $ unMany ils of
+ (s :< sq) -> (singleton s, Many sq)
+ _ -> (mempty, ils)
+
+inlinesR :: Inlines -> (Inlines, Inlines)
+inlinesR ils = case viewr $ unMany ils of
+ (sq :> s) -> (Many sq, singleton s)
+ _ -> (ils, mempty)
+
+combineInlines :: Inlines -> Inlines -> Inlines
+combineInlines x y =
+ let (xs', x') = inlinesR x
+ (y', ys') = inlinesL y
+ in
+ xs' <> (combineSingletonInlines x' y') <> ys'
+
+combineSingletonInlines :: Inlines -> Inlines -> Inlines
+combineSingletonInlines x y =
+ let (xfs, xs) = unstackInlines x
+ (yfs, ys) = unstackInlines y
+ shared = xfs `intersect` yfs
+ x_remaining = xfs \\ shared
+ y_remaining = yfs \\ shared
+ x_rem_attr = filter isAttrModifier x_remaining
+ y_rem_attr = filter isAttrModifier y_remaining
+ in
+ case null shared of
+ True | isEmpty xs && isEmpty ys ->
+ stackInlines (x_rem_attr ++ y_rem_attr) mempty
+ | isEmpty xs ->
+ let (sp, y') = spaceOutInlinesL y in
+ (stackInlines x_rem_attr mempty) <> sp <> y'
+ | isEmpty ys ->
+ let (x', sp) = spaceOutInlinesR x in
+ x' <> sp <> (stackInlines y_rem_attr mempty)
+ | otherwise ->
+ let (x', xsp) = spaceOutInlinesR x
+ (ysp, y') = spaceOutInlinesL y
+ in
+ x' <> xsp <> ysp <> y'
+ False -> stackInlines shared $
+ combineInlines
+ (stackInlines x_remaining xs)
+ (stackInlines y_remaining ys)
+
+combineBlocks :: Blocks -> Blocks -> Blocks
+combineBlocks bs cs
+ | bs' :> (BlockQuote bs'') <- viewr (unMany bs)
+ , (BlockQuote cs'') :< cs' <- viewl (unMany cs) =
+ Many $ (bs' |> (BlockQuote (bs'' <> cs''))) >< cs'
+combineBlocks bs cs = bs <> cs
+
+instance (Monoid a, Eq a) => Eq (Modifier a) where
+ (Modifier f) == (Modifier g) = (f mempty == g mempty)
+ (AttrModifier f attr) == (AttrModifier g attr') = (f attr mempty == g attr' mempty)
+ (NullModifier) == (NullModifier) = True
+ _ == _ = False
+
+isEmpty :: (Monoid a, Eq a) => a -> Bool
+isEmpty x = x == mempty
+
+isAttrModifier :: Modifier a -> Bool
+isAttrModifier (AttrModifier _ _) = True
+isAttrModifier _ = False
+
+smushInlines :: [Inlines] -> Inlines
+smushInlines xs = foldl combineInlines mempty xs
+
+smushBlocks :: [Blocks] -> Blocks
+smushBlocks xs = foldl combineBlocks mempty xs
diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs
index eec8b12c9..364483929 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse.hs
@@ -50,6 +50,7 @@ module Text.Pandoc.Readers.Docx.Parse ( Docx(..)
, Row(..)
, Cell(..)
, archiveToDocx
+ , archiveToDocxWithWarnings
) where
import Codec.Archive.Zip
import Text.XML.Light
@@ -60,6 +61,7 @@ import Data.Bits ((.|.))
import qualified Data.ByteString.Lazy as B
import qualified Text.Pandoc.UTF8 as UTF8
import Control.Monad.Reader
+import Control.Monad.State
import Control.Applicative ((<|>))
import qualified Data.Map as M
import Text.Pandoc.Compat.Except
@@ -81,16 +83,20 @@ data ReaderEnv = ReaderEnv { envNotes :: Notes
}
deriving Show
+data ReaderState = ReaderState { stateWarnings :: [String] }
+ deriving Show
+
+
data DocxError = DocxError | WrongElem
deriving Show
instance Error DocxError where
noMsg = WrongElem
-type D = ExceptT DocxError (Reader ReaderEnv)
+type D = ExceptT DocxError (ReaderT ReaderEnv (State ReaderState))
-runD :: D a -> ReaderEnv -> Either DocxError a
-runD dx re = runReader (runExceptT dx) re
+runD :: D a -> ReaderEnv -> ReaderState -> (Either DocxError a, ReaderState)
+runD dx re rs = runState (runReaderT (runExceptT dx) re) rs
maybeToD :: Maybe a -> D a
maybeToD (Just a) = return a
@@ -257,7 +263,10 @@ type Author = String
type ChangeDate = String
archiveToDocx :: Archive -> Either DocxError Docx
-archiveToDocx archive = do
+archiveToDocx archive = fst <$> archiveToDocxWithWarnings archive
+
+archiveToDocxWithWarnings :: Archive -> Either DocxError (Docx, [String])
+archiveToDocxWithWarnings archive = do
let notes = archiveToNotes archive
numbering = archiveToNumbering archive
rels = archiveToRelationships archive
@@ -265,8 +274,12 @@ archiveToDocx archive = do
(styles, parstyles) = archiveToStyles archive
rEnv =
ReaderEnv notes numbering rels media Nothing styles parstyles InDocument
- doc <- runD (archiveToDocument archive) rEnv
- return $ Docx doc
+ rState = ReaderState { stateWarnings = [] }
+ (eitherDoc, st) = runD (archiveToDocument archive) rEnv rState
+ case eitherDoc of
+ Right doc -> Right (Docx doc, stateWarnings st)
+ Left e -> Left e
+
archiveToDocument :: Archive -> D Document
@@ -576,12 +589,14 @@ elemToBodyPart ns element
sty <- asks envParStyles
let parstyle = elemToParagraphStyle ns element sty
parparts <- mapD (elemToParPart ns) (elChildren element)
- case pNumInfo parstyle of
- Just (numId, lvl) -> do
- num <- asks envNumbering
- let levelInfo = lookupLevel numId lvl num
- return $ ListItem parstyle numId lvl levelInfo parparts
- Nothing -> return $ Paragraph parstyle parparts
+ -- Word uses list enumeration for numbered headings, so we only
+ -- want to infer a list from the styles if it is NOT a heading.
+ case pHeading parstyle of
+ Nothing | Just (numId, lvl) <- pNumInfo parstyle -> do
+ num <- asks envNumbering
+ let levelInfo = lookupLevel numId lvl num
+ return $ ListItem parstyle numId lvl levelInfo parparts
+ _ -> return $ Paragraph parstyle parparts
elemToBodyPart ns element
| isElem ns "w" "tbl" element = do
let caption' = findChild (elemName ns "w" "tblPr") element
@@ -702,36 +717,58 @@ elemToExtent drawingElem =
getDim at = findElement (QName "extent" (Just wp_ns) (Just "wp")) drawingElem
>>= findAttr (QName at Nothing Nothing) >>= safeRead
-elemToRun :: NameSpaces -> Element -> D Run
-elemToRun ns element
- | isElem ns "w" "r" element
- , Just drawingElem <- findChild (elemName ns "w" "drawing") element =
+
+childElemToRun :: NameSpaces -> Element -> D Run
+childElemToRun ns element
+ | isElem ns "w" "drawing" element =
let a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main"
- drawing = findElement (QName "blip" (Just a_ns) (Just "a")) drawingElem
+ drawing = findElement (QName "blip" (Just a_ns) (Just "a")) element
>>= findAttr (QName "embed" (lookup "r" ns) (Just "r"))
in
case drawing of
Just s -> expandDrawingId s >>=
- (\(fp, bs) -> return $ InlineDrawing fp bs $ elemToExtent drawingElem)
+ (\(fp, bs) -> return $ InlineDrawing fp bs $ elemToExtent element)
Nothing -> throwError WrongElem
-elemToRun ns element
- | isElem ns "w" "r" element
- , Just ref <- findChild (elemName ns "w" "footnoteReference") element
- , Just fnId <- findAttr (elemName ns "w" "id") ref = do
+childElemToRun ns element
+ | isElem ns "w" "footnoteReference" element
+ , Just fnId <- findAttr (elemName ns "w" "id") element = do
notes <- asks envNotes
case lookupFootnote fnId notes of
Just e -> do bps <- local (\r -> r {envLocation=InFootnote}) $ mapD (elemToBodyPart ns) (elChildren e)
return $ Footnote bps
Nothing -> return $ Footnote []
-elemToRun ns element
- | isElem ns "w" "r" element
- , Just ref <- findChild (elemName ns "w" "endnoteReference") element
- , Just enId <- findAttr (elemName ns "w" "id") ref = do
+childElemToRun ns element
+ | isElem ns "w" "endnoteReference" element
+ , Just enId <- findAttr (elemName ns "w" "id") element = do
notes <- asks envNotes
case lookupEndnote enId notes of
Just e -> do bps <- local (\r -> r {envLocation=InEndnote}) $ mapD (elemToBodyPart ns) (elChildren e)
return $ Endnote bps
Nothing -> return $ Endnote []
+childElemToRun _ _ = throwError WrongElem
+
+elemToRun :: NameSpaces -> Element -> D Run
+elemToRun ns element
+ | isElem ns "w" "r" element
+ , Just altCont <- findChild (elemName ns "mc" "AlternateContent") element =
+ do let choices = findChildren (elemName ns "mc" "Choice") altCont
+ choiceChildren = map head $ filter (not . null) $ map elChildren choices
+ outputs <- mapD (childElemToRun ns) choiceChildren
+ case outputs of
+ r : _ -> return r
+ [] -> throwError WrongElem
+elemToRun ns element
+ | isElem ns "w" "r" element
+ , Just drawingElem <- findChild (elemName ns "w" "drawing") element =
+ childElemToRun ns drawingElem
+elemToRun ns element
+ | isElem ns "w" "r" element
+ , Just ref <- findChild (elemName ns "w" "footnoteReference") element =
+ childElemToRun ns ref
+elemToRun ns element
+ | isElem ns "w" "r" element
+ , Just ref <- findChild (elemName ns "w" "endnoteReference") element =
+ childElemToRun ns ref
elemToRun ns element
| isElem ns "w" "r" element = do
runElems <- elemToRunElems ns element
@@ -940,3 +977,4 @@ elemToRunElems _ _ = throwError WrongElem
setFont :: Maybe Font -> ReaderEnv -> ReaderEnv
setFont f s = s{envFont = f}
+
diff --git a/src/Text/Pandoc/Readers/Docx/Reducible.hs b/src/Text/Pandoc/Readers/Docx/Reducible.hs
deleted file mode 100644
index c93b40119..000000000
--- a/src/Text/Pandoc/Readers/Docx/Reducible.hs
+++ /dev/null
@@ -1,181 +0,0 @@
-{-# LANGUAGE TypeSynonymInstances, FlexibleInstances,
- PatternGuards #-}
-
-module Text.Pandoc.Readers.Docx.Reducible ( concatReduce
- , (<+>)
- )
- where
-
-
-import Text.Pandoc.Builder
-import Data.List
-import Data.Sequence (ViewR(..), ViewL(..), viewl, viewr)
-import qualified Data.Sequence as Seq (null)
-
-data Modifier a = Modifier (a -> a)
- | AttrModifier (Attr -> a -> a) Attr
- | NullModifier
-
-class (Eq a) => Modifiable a where
- modifier :: a -> Modifier a
- innards :: a -> a
- getL :: a -> (a, a)
- getR :: a -> (a, a)
- spaceOut :: a -> (a, a, a)
-
-spaceOutL :: (Monoid a, Modifiable a) => a -> (a, a)
-spaceOutL ms = (l, stack fs (m' <> r))
- where (l, m, r) = spaceOut ms
- (fs, m') = unstack m
-
-spaceOutR :: (Monoid a, Modifiable a) => a -> (a, a)
-spaceOutR ms = (stack fs (l <> m'), r)
- where (l, m, r) = spaceOut ms
- (fs, m') = unstack m
-
-instance (Monoid a, Show a) => Show (Modifier a) where
- show (Modifier f) = show $ f mempty
- show (AttrModifier f attr) = show $ f attr mempty
- show (NullModifier) = "NullModifier"
-
-instance (Monoid a, Eq a) => Eq (Modifier a) where
- (Modifier f) == (Modifier g) = (f mempty == g mempty)
- (AttrModifier f attr) == (AttrModifier g attr') = (f attr mempty == g attr' mempty)
- (NullModifier) == (NullModifier) = True
- _ == _ = False
-
-instance Modifiable Inlines where
- modifier ils = case viewl (unMany ils) of
- (x :< xs) | Seq.null xs -> case x of
- (Emph _) -> Modifier emph
- (Strong _) -> Modifier strong
- (SmallCaps _) -> Modifier smallcaps
- (Strikeout _) -> Modifier strikeout
- (Superscript _) -> Modifier superscript
- (Subscript _) -> Modifier subscript
- (Span attr _) -> AttrModifier spanWith attr
- _ -> NullModifier
- _ -> NullModifier
-
- innards ils = case viewl (unMany ils) of
- (x :< xs) | Seq.null xs -> case x of
- (Emph lst) -> fromList lst
- (Strong lst) -> fromList lst
- (SmallCaps lst) -> fromList lst
- (Strikeout lst) -> fromList lst
- (Superscript lst) -> fromList lst
- (Subscript lst) -> fromList lst
- (Span _ lst) -> fromList lst
- _ -> ils
- _ -> ils
-
- getL ils = case viewl $ unMany ils of
- (s :< sq) -> (singleton s, Many sq)
- _ -> (mempty, ils)
-
- getR ils = case viewr $ unMany ils of
- (sq :> s) -> (Many sq, singleton s)
- _ -> (ils, mempty)
-
- spaceOut ils =
- let (fs, ils') = unstack ils
- contents = unMany ils'
- left = case viewl contents of
- (Space :< _) -> space
- _ -> mempty
- right = case viewr contents of
- (_ :> Space) -> space
- _ -> mempty in
- (left, (stack fs $ trimInlines .Many $ contents), right)
-
-instance Modifiable Blocks where
- modifier blks = case viewl (unMany blks) of
- (x :< xs) | Seq.null xs -> case x of
- (BlockQuote _) -> Modifier blockQuote
- -- (Div attr _) -> AttrModifier divWith attr
- _ -> NullModifier
- _ -> NullModifier
-
- innards blks = case viewl (unMany blks) of
- (x :< xs) | Seq.null xs -> case x of
- (BlockQuote lst) -> fromList lst
- -- (Div attr lst) -> fromList lst
- _ -> blks
- _ -> blks
-
- spaceOut blks = (mempty, blks, mempty)
-
- getL ils = case viewl $ unMany ils of
- (s :< sq) -> (singleton s, Many sq)
- _ -> (mempty, ils)
-
- getR ils = case viewr $ unMany ils of
- (sq :> s) -> (Many sq, singleton s)
- _ -> (ils, mempty)
-
-
-unstack :: (Modifiable a) => a -> ([Modifier a], a)
-unstack ms = case modifier ms of
- NullModifier -> ([], ms)
- _ -> (f : fs, ms') where
- f = modifier ms
- (fs, ms') = unstack $ innards ms
-
-stack :: (Monoid a, Modifiable a) => [Modifier a] -> a -> a
-stack [] ms = ms
-stack (NullModifier : fs) ms = stack fs ms
-stack ((Modifier f) : fs) ms =
- if isEmpty ms
- then stack fs ms
- else f $ stack fs ms
-stack ((AttrModifier f attr) : fs) ms = f attr $ stack fs ms
-
-isEmpty :: (Monoid a, Eq a) => a -> Bool
-isEmpty x = x == mempty
-
-
-combine :: (Monoid a, Modifiable a, Eq a) => a -> a -> a
-combine x y =
- let (xs', x') = getR x
- (y', ys') = getL y
- in
- xs' <> (combineSingleton x' y') <> ys'
-
-isAttrModifier :: Modifier a -> Bool
-isAttrModifier (AttrModifier _ _) = True
-isAttrModifier _ = False
-
-combineSingleton :: (Monoid a, Modifiable a, Eq a) => a -> a -> a
-combineSingleton x y =
- let (xfs, xs) = unstack x
- (yfs, ys) = unstack y
- shared = xfs `intersect` yfs
- x_remaining = xfs \\ shared
- y_remaining = yfs \\ shared
- x_rem_attr = filter isAttrModifier x_remaining
- y_rem_attr = filter isAttrModifier y_remaining
- in
- case null shared of
- True | isEmpty xs && isEmpty ys ->
- stack (x_rem_attr ++ y_rem_attr) mempty
- | isEmpty xs ->
- let (sp, y') = spaceOutL y in
- (stack x_rem_attr mempty) <> sp <> y'
- | isEmpty ys ->
- let (x', sp) = spaceOutR x in
- x' <> sp <> (stack y_rem_attr mempty)
- | otherwise ->
- let (x', xsp) = spaceOutR x
- (ysp, y') = spaceOutL y
- in
- x' <> xsp <> ysp <> y'
- False -> stack shared $
- combine
- (stack x_remaining xs)
- (stack y_remaining ys)
-
-(<+>) :: (Monoid a, Modifiable a, Eq a) => a -> a -> a
-x <+> y = combine x y
-
-concatReduce :: (Monoid a, Modifiable a) => [a] -> a
-concatReduce xs = foldl combine mempty xs