aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers/Docx
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc/Readers/Docx')
-rw-r--r--src/Text/Pandoc/Readers/Docx/Combine.hs154
-rw-r--r--src/Text/Pandoc/Readers/Docx/Lists.hs229
-rw-r--r--src/Text/Pandoc/Readers/Docx/Parse.hs1044
-rw-r--r--src/Text/Pandoc/Readers/Docx/StyleMap.hs108
-rw-r--r--src/Text/Pandoc/Readers/Docx/Util.hs47
5 files changed, 0 insertions, 1582 deletions
diff --git a/src/Text/Pandoc/Readers/Docx/Combine.hs b/src/Text/Pandoc/Readers/Docx/Combine.hs
deleted file mode 100644
index 39e0df825..000000000
--- a/src/Text/Pandoc/Readers/Docx/Combine.hs
+++ /dev/null
@@ -1,154 +0,0 @@
-{-# LANGUAGE TypeSynonymInstances, FlexibleInstances,
- PatternGuards #-}
-
-module Text.Pandoc.Readers.Docx.Combine ( smushInlines
- , smushBlocks
- )
- where
-
-import Text.Pandoc.Builder
-import Data.List
-import Data.Sequence (ViewR(..), ViewL(..), viewl, viewr, (><), (|>))
-import qualified Data.Sequence as Seq (null)
-
-data Modifier a = Modifier (a -> a)
- | AttrModifier (Attr -> a -> a) Attr
- | NullModifier
-
-spaceOutInlinesL :: Inlines -> (Inlines, Inlines)
-spaceOutInlinesL ms = (l, stackInlines fs (m' <> r))
- where (l, m, r) = spaceOutInlines ms
- (fs, m') = unstackInlines m
-
-spaceOutInlinesR :: Inlines -> (Inlines, Inlines)
-spaceOutInlinesR ms = (stackInlines fs (l <> m'), r)
- where (l, m, r) = spaceOutInlines ms
- (fs, m') = unstackInlines m
-
-spaceOutInlines :: Inlines -> (Inlines, Inlines, Inlines)
-spaceOutInlines ils =
- let (fs, ils') = unstackInlines ils
- contents = unMany ils'
- left = case viewl contents of
- (Space :< _) -> space
- _ -> mempty
- right = case viewr contents of
- (_ :> Space) -> space
- _ -> mempty in
- (left, (stackInlines fs $ trimInlines . Many $ contents), right)
-
-stackInlines :: [Modifier Inlines] -> Inlines -> Inlines
-stackInlines [] ms = ms
-stackInlines (NullModifier : fs) ms = stackInlines fs ms
-stackInlines ((Modifier f) : fs) ms =
- if isEmpty ms
- then stackInlines fs ms
- else f $ stackInlines fs ms
-stackInlines ((AttrModifier f attr) : fs) ms = f attr $ stackInlines fs ms
-
-unstackInlines :: Inlines -> ([Modifier Inlines], Inlines)
-unstackInlines ms = case ilModifier ms of
- NullModifier -> ([], ms)
- _ -> (f : fs, ms') where
- f = ilModifier ms
- (fs, ms') = unstackInlines $ ilInnards ms
-
-ilModifier :: Inlines -> Modifier Inlines
-ilModifier ils = case viewl (unMany ils) of
- (x :< xs) | Seq.null xs -> case x of
- (Emph _) -> Modifier emph
- (Strong _) -> Modifier strong
- (SmallCaps _) -> Modifier smallcaps
- (Strikeout _) -> Modifier strikeout
- (Superscript _) -> Modifier superscript
- (Subscript _) -> Modifier subscript
- (Link attr _ tgt) -> Modifier $ linkWith attr (fst tgt) (snd tgt)
- (Span attr _) -> AttrModifier spanWith attr
- _ -> NullModifier
- _ -> NullModifier
-
-ilInnards :: Inlines -> Inlines
-ilInnards ils = case viewl (unMany ils) of
- (x :< xs) | Seq.null xs -> case x of
- (Emph lst) -> fromList lst
- (Strong lst) -> fromList lst
- (SmallCaps lst) -> fromList lst
- (Strikeout lst) -> fromList lst
- (Superscript lst) -> fromList lst
- (Subscript lst) -> fromList lst
- (Link _ lst _) -> fromList lst
- (Span _ lst) -> fromList lst
- _ -> ils
- _ -> ils
-
-inlinesL :: Inlines -> (Inlines, Inlines)
-inlinesL ils = case viewl $ unMany ils of
- (s :< sq) -> (singleton s, Many sq)
- _ -> (mempty, ils)
-
-inlinesR :: Inlines -> (Inlines, Inlines)
-inlinesR ils = case viewr $ unMany ils of
- (sq :> s) -> (Many sq, singleton s)
- _ -> (ils, mempty)
-
-combineInlines :: Inlines -> Inlines -> Inlines
-combineInlines x y =
- let (xs', x') = inlinesR x
- (y', ys') = inlinesL y
- in
- xs' <> (combineSingletonInlines x' y') <> ys'
-
-combineSingletonInlines :: Inlines -> Inlines -> Inlines
-combineSingletonInlines x y =
- let (xfs, xs) = unstackInlines x
- (yfs, ys) = unstackInlines y
- shared = xfs `intersect` yfs
- x_remaining = xfs \\ shared
- y_remaining = yfs \\ shared
- x_rem_attr = filter isAttrModifier x_remaining
- y_rem_attr = filter isAttrModifier y_remaining
- in
- case null shared of
- True | isEmpty xs && isEmpty ys ->
- stackInlines (x_rem_attr ++ y_rem_attr) mempty
- | isEmpty xs ->
- let (sp, y') = spaceOutInlinesL y in
- (stackInlines x_rem_attr mempty) <> sp <> y'
- | isEmpty ys ->
- let (x', sp) = spaceOutInlinesR x in
- x' <> sp <> (stackInlines y_rem_attr mempty)
- | otherwise ->
- let (x', xsp) = spaceOutInlinesR x
- (ysp, y') = spaceOutInlinesL y
- in
- x' <> xsp <> ysp <> y'
- False -> stackInlines shared $
- combineInlines
- (stackInlines x_remaining xs)
- (stackInlines y_remaining ys)
-
-combineBlocks :: Blocks -> Blocks -> Blocks
-combineBlocks bs cs
- | bs' :> (BlockQuote bs'') <- viewr (unMany bs)
- , (BlockQuote cs'') :< cs' <- viewl (unMany cs) =
- Many $ (bs' |> (BlockQuote (bs'' <> cs''))) >< cs'
-combineBlocks bs cs = bs <> cs
-
-instance (Monoid a, Eq a) => Eq (Modifier a) where
- (Modifier f) == (Modifier g) = (f mempty == g mempty)
- (AttrModifier f attr) == (AttrModifier g attr') = (f attr mempty == g attr' mempty)
- (NullModifier) == (NullModifier) = True
- _ == _ = False
-
-isEmpty :: (Monoid a, Eq a) => a -> Bool
-isEmpty x = x == mempty
-
-isAttrModifier :: Modifier a -> Bool
-isAttrModifier (AttrModifier _ _) = True
-isAttrModifier _ = False
-
-smushInlines :: [Inlines] -> Inlines
-smushInlines xs = foldl combineInlines mempty xs
-
-smushBlocks :: [Blocks] -> Blocks
-smushBlocks xs = foldl combineBlocks mempty xs
diff --git a/src/Text/Pandoc/Readers/Docx/Lists.hs b/src/Text/Pandoc/Readers/Docx/Lists.hs
deleted file mode 100644
index 395a53907..000000000
--- a/src/Text/Pandoc/Readers/Docx/Lists.hs
+++ /dev/null
@@ -1,229 +0,0 @@
-{-
-Copyright (C) 2014-2016 Jesse Rosenthal <jrosenthal@jhu.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Readers.Docx.Lists
- Copyright : Copyright (C) 2014-2016 Jesse Rosenthal
- License : GNU GPL, version 2 or above
-
- Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu>
- Stability : alpha
- Portability : portable
-
-Functions for converting flat docx paragraphs into nested lists.
--}
-
-module Text.Pandoc.Readers.Docx.Lists ( blocksToBullets
- , blocksToDefinitions
- , listParagraphDivs
- ) where
-
-import Text.Pandoc.JSON
-import Text.Pandoc.Generic (bottomUp)
-import Text.Pandoc.Shared (trim)
-import Control.Monad
-import Data.List
-import Data.Maybe
-
-isListItem :: Block -> Bool
-isListItem (Div (_, classes, _) _) | "list-item" `elem` classes = True
-isListItem _ = False
-
-getLevel :: Block -> Maybe Integer
-getLevel (Div (_, _, kvs) _) = liftM read $ lookup "level" kvs
-getLevel _ = Nothing
-
-getLevelN :: Block -> Integer
-getLevelN b = case getLevel b of
- Just n -> n
- Nothing -> -1
-
-getNumId :: Block -> Maybe Integer
-getNumId (Div (_, _, kvs) _) = liftM read $ lookup "num-id" kvs
-getNumId _ = Nothing
-
-getNumIdN :: Block -> Integer
-getNumIdN b = case getNumId b of
- Just n -> n
- Nothing -> -1
-
-getText :: Block -> Maybe String
-getText (Div (_, _, kvs) _) = lookup "text" kvs
-getText _ = Nothing
-
-data ListType = Itemized | Enumerated ListAttributes
-
-listStyleMap :: [(String, ListNumberStyle)]
-listStyleMap = [("upperLetter", UpperAlpha),
- ("lowerLetter", LowerAlpha),
- ("upperRoman", UpperRoman),
- ("lowerRoman", LowerRoman),
- ("decimal", Decimal)]
-
-listDelimMap :: [(String, ListNumberDelim)]
-listDelimMap = [("%1)", OneParen),
- ("(%1)", TwoParens),
- ("%1.", Period)]
-
-getListType :: Block -> Maybe ListType
-getListType b@(Div (_, _, kvs) _) | isListItem b =
- let
- start = lookup "start" kvs
- frmt = lookup "format" kvs
- txt = lookup "text" kvs
- in
- case frmt of
- Just "bullet" -> Just Itemized
- Just f ->
- case txt of
- Just t -> Just $ Enumerated (
- read (fromMaybe "1" start) :: Int,
- fromMaybe DefaultStyle (lookup f listStyleMap),
- fromMaybe DefaultDelim (lookup t listDelimMap))
- Nothing -> Nothing
- _ -> Nothing
-getListType _ = Nothing
-
-listParagraphDivs :: [String]
-listParagraphDivs = ["ListParagraph"]
-
--- This is a first stab at going through and attaching meaning to list
--- paragraphs, without an item marker, following a list item. We
--- assume that these are paragraphs in the same item.
-
-handleListParagraphs :: [Block] -> [Block]
-handleListParagraphs [] = []
-handleListParagraphs (
- (Div attr1@(_, classes1, _) blks1) :
- (Div (ident2, classes2, kvs2) blks2) :
- blks
- ) | "list-item" `elem` classes1 &&
- not ("list-item" `elem` classes2) &&
- (not . null) (listParagraphDivs `intersect` classes2) =
- -- We don't want to keep this indent.
- let newDiv2 =
- (Div (ident2, classes2, filter (\kv -> fst kv /= "indent") kvs2) blks2)
- in
- handleListParagraphs ((Div attr1 (blks1 ++ [newDiv2])) : blks)
-handleListParagraphs (blk:blks) = blk : (handleListParagraphs blks)
-
-separateBlocks' :: Block -> [[Block]] -> [[Block]]
-separateBlocks' blk ([] : []) = [[blk]]
-separateBlocks' b@(BulletList _) acc = (init acc) ++ [(last acc) ++ [b]]
-separateBlocks' b@(OrderedList _ _) acc = (init acc) ++ [(last acc) ++ [b]]
--- The following is for the invisible bullet lists. This is how
--- pandoc-generated ooxml does multiparagraph item lists.
-separateBlocks' b acc | liftM trim (getText b) == Just "" =
- (init acc) ++ [(last acc) ++ [b]]
-separateBlocks' b acc = acc ++ [[b]]
-
-separateBlocks :: [Block] -> [[Block]]
-separateBlocks blks = foldr separateBlocks' [[]] (reverse blks)
-
-flatToBullets' :: Integer -> [Block] -> [Block]
-flatToBullets' _ [] = []
-flatToBullets' num xs@(b : elems)
- | getLevelN b == num = b : (flatToBullets' num elems)
- | otherwise =
- let bNumId = getNumIdN b
- bLevel = getLevelN b
- (children, remaining) =
- span
- (\b' ->
- ((getLevelN b') > bLevel ||
- ((getLevelN b') == bLevel && (getNumIdN b') == bNumId)))
- xs
- in
- case getListType b of
- Just (Enumerated attr) ->
- (OrderedList attr (separateBlocks $ flatToBullets' bLevel children)) :
- (flatToBullets' num remaining)
- _ ->
- (BulletList (separateBlocks $ flatToBullets' bLevel children)) :
- (flatToBullets' num remaining)
-
-flatToBullets :: [Block] -> [Block]
-flatToBullets elems = flatToBullets' (-1) elems
-
-singleItemHeaderToHeader :: Block -> Block
-singleItemHeaderToHeader (OrderedList _ [[h@(Header _ _ _)]]) = h
-singleItemHeaderToHeader blk = blk
-
-
-blocksToBullets :: [Block] -> [Block]
-blocksToBullets blks =
- map singleItemHeaderToHeader $
- bottomUp removeListDivs $
- flatToBullets $ (handleListParagraphs blks)
-
-plainParaInlines :: Block -> [Inline]
-plainParaInlines (Plain ils) = ils
-plainParaInlines (Para ils) = ils
-plainParaInlines _ = []
-
-blocksToDefinitions' :: [([Inline], [[Block]])] -> [Block] -> [Block] -> [Block]
-blocksToDefinitions' [] acc [] = reverse acc
-blocksToDefinitions' defAcc acc [] =
- reverse $ (DefinitionList (reverse defAcc)) : acc
-blocksToDefinitions' defAcc acc
- ((Div (_, classes1, _) blks1) : (Div (ident2, classes2, kvs2) blks2) : blks)
- | "DefinitionTerm" `elem` classes1 && "Definition" `elem` classes2 =
- let remainingAttr2 = (ident2, delete "Definition" classes2, kvs2)
- pair = case remainingAttr2 == ("", [], []) of
- True -> (concatMap plainParaInlines blks1, [blks2])
- False -> (concatMap plainParaInlines blks1, [[Div remainingAttr2 blks2]])
- in
- blocksToDefinitions' (pair : defAcc) acc blks
-blocksToDefinitions' defAcc acc
- ((Div (ident2, classes2, kvs2) blks2) : blks)
- | (not . null) defAcc && "Definition" `elem` classes2 =
- let remainingAttr2 = (ident2, delete "Definition" classes2, kvs2)
- defItems2 = case remainingAttr2 == ("", [], []) of
- True -> blks2
- False -> [Div remainingAttr2 blks2]
- ((defTerm, defItems):defs) = defAcc
- defAcc' = case null defItems of
- True -> (defTerm, [defItems2]) : defs
- False -> (defTerm, init defItems ++ [last defItems ++ defItems2]) : defs
- in
- blocksToDefinitions' defAcc' acc blks
-blocksToDefinitions' [] acc (b:blks) =
- blocksToDefinitions' [] (b:acc) blks
-blocksToDefinitions' defAcc acc (b:blks) =
- blocksToDefinitions' [] (b : (DefinitionList (reverse defAcc)) : acc) blks
-
-removeListDivs' :: Block -> [Block]
-removeListDivs' (Div (ident, classes, kvs) blks)
- | "list-item" `elem` classes =
- case delete "list-item" classes of
- [] -> blks
- classes' -> [Div (ident, classes', kvs) $ blks]
-removeListDivs' (Div (ident, classes, kvs) blks)
- | not $ null $ listParagraphDivs `intersect` classes =
- case classes \\ listParagraphDivs of
- [] -> blks
- classes' -> [Div (ident, classes', kvs) blks]
-removeListDivs' blk = [blk]
-
-removeListDivs :: [Block] -> [Block]
-removeListDivs = concatMap removeListDivs'
-
-
-
-blocksToDefinitions :: [Block] -> [Block]
-blocksToDefinitions = blocksToDefinitions' [] []
diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs
deleted file mode 100644
index 221a1d10a..000000000
--- a/src/Text/Pandoc/Readers/Docx/Parse.hs
+++ /dev/null
@@ -1,1044 +0,0 @@
-{-# LANGUAGE PatternGuards, ViewPatterns, FlexibleInstances #-}
-
-{-
-Copyright (C) 2014-2016 Jesse Rosenthal <jrosenthal@jhu.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Readers.Docx.Parse
- Copyright : Copyright (C) 2014-2016 Jesse Rosenthal
- License : GNU GPL, version 2 or above
-
- Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu>
- Stability : alpha
- Portability : portable
-
-Conversion of docx archive into Docx haskell type
--}
-
-module Text.Pandoc.Readers.Docx.Parse ( Docx(..)
- , Document(..)
- , Body(..)
- , BodyPart(..)
- , TblLook(..)
- , Extent
- , ParPart(..)
- , Run(..)
- , RunElem(..)
- , Notes
- , Numbering
- , Relationship
- , Media
- , RunStyle(..)
- , VertAlign(..)
- , ParIndentation(..)
- , ParagraphStyle(..)
- , Row(..)
- , Cell(..)
- , archiveToDocx
- , archiveToDocxWithWarnings
- ) where
-import Codec.Archive.Zip
-import Text.XML.Light
-import Data.Maybe
-import Data.List
-import System.FilePath
-import Data.Bits ((.|.))
-import qualified Data.ByteString.Lazy as B
-import qualified Text.Pandoc.UTF8 as UTF8
-import Control.Monad.Reader
-import Control.Monad.State
-import Control.Applicative ((<|>))
-import qualified Data.Map as M
-import Control.Monad.Except
-import Text.Pandoc.Shared (safeRead, filteredFilesFromArchive)
-import Text.TeXMath.Readers.OMML (readOMML)
-import Text.TeXMath.Unicode.Fonts (getUnicode, stringToFont, Font(..))
-import Text.TeXMath (Exp)
-import Text.Pandoc.Readers.Docx.Util
-import Data.Char (readLitChar, ord, chr, isDigit)
-
-data ReaderEnv = ReaderEnv { envNotes :: Notes
- , envComments :: Comments
- , envNumbering :: Numbering
- , envRelationships :: [Relationship]
- , envMedia :: Media
- , envFont :: Maybe Font
- , envCharStyles :: CharStyleMap
- , envParStyles :: ParStyleMap
- , envLocation :: DocumentLocation
- }
- deriving Show
-
-data ReaderState = ReaderState { stateWarnings :: [String] }
- deriving Show
-
-data DocxError = DocxError | WrongElem
- deriving Show
-
-type D = ExceptT DocxError (ReaderT ReaderEnv (State ReaderState))
-
-runD :: D a -> ReaderEnv -> ReaderState -> (Either DocxError a, ReaderState)
-runD dx re rs = runState (runReaderT (runExceptT dx) re) rs
-
-maybeToD :: Maybe a -> D a
-maybeToD (Just a) = return a
-maybeToD Nothing = throwError DocxError
-
-eitherToD :: Either a b -> D b
-eitherToD (Right b) = return b
-eitherToD (Left _) = throwError DocxError
-
-concatMapM :: (Monad m) => (a -> m [b]) -> [a] -> m [b]
-concatMapM f xs = liftM concat (mapM f xs)
-
-
--- This is similar to `mapMaybe`: it maps a function returning the D
--- monad over a list, and only keeps the non-erroring return values.
-mapD :: (a -> D b) -> [a] -> D [b]
-mapD f xs =
- let handler x = (f x >>= (\y-> return [y])) `catchError` (\_ -> return [])
- in
- concatMapM handler xs
-
-data Docx = Docx Document
- deriving Show
-
-data Document = Document NameSpaces Body
- deriving Show
-
-data Body = Body [BodyPart]
- deriving Show
-
-type Media = [(FilePath, B.ByteString)]
-
-type CharStyle = (String, RunStyle)
-
-type ParStyle = (String, ParStyleData)
-
-type CharStyleMap = M.Map String RunStyle
-
-type ParStyleMap = M.Map String ParStyleData
-
-data Numbering = Numbering NameSpaces [Numb] [AbstractNumb]
- deriving Show
-
-data Numb = Numb String String -- right now, only a key to an abstract num
- deriving Show
-
-data AbstractNumb = AbstractNumb String [Level]
- deriving Show
-
--- (ilvl, format, string, start)
-type Level = (String, String, String, Maybe Integer)
-
-data DocumentLocation = InDocument | InFootnote | InEndnote
- deriving (Eq,Show)
-
-data Relationship = Relationship DocumentLocation RelId Target
- deriving Show
-
-data Notes = Notes NameSpaces
- (Maybe (M.Map String Element))
- (Maybe (M.Map String Element))
- deriving Show
-
-data Comments = Comments NameSpaces (M.Map String Element)
- deriving Show
-
-data ParIndentation = ParIndentation { leftParIndent :: Maybe Integer
- , rightParIndent :: Maybe Integer
- , hangingParIndent :: Maybe Integer}
- deriving Show
-
-data ParagraphStyle = ParagraphStyle { pStyle :: [String]
- , indentation :: Maybe ParIndentation
- , dropCap :: Bool
- , pHeading :: Maybe (String, Int)
- , pNumInfo :: Maybe (String, String)
- , pBlockQuote :: Maybe Bool
- }
- deriving Show
-
-defaultParagraphStyle :: ParagraphStyle
-defaultParagraphStyle = ParagraphStyle { pStyle = []
- , indentation = Nothing
- , dropCap = False
- , pHeading = Nothing
- , pNumInfo = Nothing
- , pBlockQuote = Nothing
- }
-
-
-data BodyPart = Paragraph ParagraphStyle [ParPart]
- | ListItem ParagraphStyle String String (Maybe Level) [ParPart]
- | Tbl String TblGrid TblLook [Row]
- | OMathPara [Exp]
- deriving Show
-
-type TblGrid = [Integer]
-
-data TblLook = TblLook {firstRowFormatting::Bool}
- deriving Show
-
-defaultTblLook :: TblLook
-defaultTblLook = TblLook{firstRowFormatting = False}
-
-data Row = Row [Cell]
- deriving Show
-
-data Cell = Cell [BodyPart]
- deriving Show
-
--- (width, height) in EMUs
-type Extent = Maybe (Double, Double)
-
-data ParPart = PlainRun Run
- | Insertion ChangeId Author ChangeDate [Run]
- | Deletion ChangeId Author ChangeDate [Run]
- | CommentStart CommentId Author CommentDate [BodyPart]
- | CommentEnd CommentId
- | BookMark BookMarkId Anchor
- | InternalHyperLink Anchor [Run]
- | ExternalHyperLink URL [Run]
- | Drawing FilePath String String B.ByteString Extent -- title, alt
- | Chart -- placeholder for now
- | PlainOMath [Exp]
- | SmartTag [Run]
- deriving Show
-
-data Run = Run RunStyle [RunElem]
- | Footnote [BodyPart]
- | Endnote [BodyPart]
- | InlineDrawing FilePath String String B.ByteString Extent -- title, alt
- | InlineChart -- placeholder
- deriving Show
-
-data RunElem = TextRun String | LnBrk | Tab | SoftHyphen | NoBreakHyphen
- deriving Show
-
-data VertAlign = BaseLn | SupScrpt | SubScrpt
- deriving Show
-
-data RunStyle = RunStyle { isBold :: Maybe Bool
- , isItalic :: Maybe Bool
- , isSmallCaps :: Maybe Bool
- , isStrike :: Maybe Bool
- , rVertAlign :: Maybe VertAlign
- , rUnderline :: Maybe String
- , rStyle :: Maybe CharStyle}
- deriving Show
-
-data ParStyleData = ParStyleData { headingLev :: Maybe (String, Int)
- , isBlockQuote :: Maybe Bool
- , numInfo :: Maybe (String, String)
- , psStyle :: Maybe ParStyle}
- deriving Show
-
-defaultRunStyle :: RunStyle
-defaultRunStyle = RunStyle { isBold = Nothing
- , isItalic = Nothing
- , isSmallCaps = Nothing
- , isStrike = Nothing
- , rVertAlign = Nothing
- , rUnderline = Nothing
- , rStyle = Nothing}
-
-type Target = String
-type Anchor = String
-type URL = String
-type BookMarkId = String
-type RelId = String
-type ChangeId = String
-type CommentId = String
-type Author = String
-type ChangeDate = String
-type CommentDate = String
-
-archiveToDocx :: Archive -> Either DocxError Docx
-archiveToDocx archive = fst <$> archiveToDocxWithWarnings archive
-
-archiveToDocxWithWarnings :: Archive -> Either DocxError (Docx, [String])
-archiveToDocxWithWarnings archive = do
- let notes = archiveToNotes archive
- comments = archiveToComments archive
- numbering = archiveToNumbering archive
- rels = archiveToRelationships archive
- media = filteredFilesFromArchive archive filePathIsMedia
- (styles, parstyles) = archiveToStyles archive
- rEnv =
- ReaderEnv notes comments numbering rels media Nothing styles parstyles InDocument
- rState = ReaderState { stateWarnings = [] }
- (eitherDoc, st) = runD (archiveToDocument archive) rEnv rState
- case eitherDoc of
- Right doc -> Right (Docx doc, stateWarnings st)
- Left e -> Left e
-
-
-
-archiveToDocument :: Archive -> D Document
-archiveToDocument zf = do
- entry <- maybeToD $ findEntryByPath "word/document.xml" zf
- docElem <- maybeToD $ (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry
- let namespaces = elemToNameSpaces docElem
- bodyElem <- maybeToD $ findChildByName namespaces "w" "body" docElem
- body <- elemToBody namespaces bodyElem
- return $ Document namespaces body
-
-elemToBody :: NameSpaces -> Element -> D Body
-elemToBody ns element | isElem ns "w" "body" element =
- mapD (elemToBodyPart ns) (elChildren element) >>=
- (\bps -> return $ Body bps)
-elemToBody _ _ = throwError WrongElem
-
-archiveToStyles :: Archive -> (CharStyleMap, ParStyleMap)
-archiveToStyles zf =
- let stylesElem = findEntryByPath "word/styles.xml" zf >>=
- (parseXMLDoc . UTF8.toStringLazy . fromEntry)
- in
- case stylesElem of
- Nothing -> (M.empty, M.empty)
- Just styElem ->
- let namespaces = elemToNameSpaces styElem
- in
- ( M.fromList $ buildBasedOnList namespaces styElem
- (Nothing :: Maybe CharStyle),
- M.fromList $ buildBasedOnList namespaces styElem
- (Nothing :: Maybe ParStyle) )
-
-isBasedOnStyle :: (ElemToStyle a) => NameSpaces -> Element -> Maybe a -> Bool
-isBasedOnStyle ns element parentStyle
- | isElem ns "w" "style" element
- , Just styleType <- findAttrByName ns "w" "type" element
- , styleType == cStyleType parentStyle
- , Just basedOnVal <- findChildByName ns "w" "basedOn" element >>=
- findAttrByName ns "w" "val"
- , Just ps <- parentStyle = (basedOnVal == getStyleId ps)
- | isElem ns "w" "style" element
- , Just styleType <- findAttrByName ns "w" "type" element
- , styleType == cStyleType parentStyle
- , Nothing <- findChildByName ns "w" "basedOn" element
- , Nothing <- parentStyle = True
- | otherwise = False
-
-class ElemToStyle a where
- cStyleType :: Maybe a -> String
- elemToStyle :: NameSpaces -> Element -> Maybe a -> Maybe a
- getStyleId :: a -> String
-
-instance ElemToStyle CharStyle where
- cStyleType _ = "character"
- elemToStyle ns element parentStyle
- | isElem ns "w" "style" element
- , Just "character" <- findAttrByName ns "w" "type" element
- , Just styleId <- findAttrByName ns "w" "styleId" element =
- Just (styleId, elemToRunStyle ns element parentStyle)
- | otherwise = Nothing
- getStyleId s = fst s
-
-instance ElemToStyle ParStyle where
- cStyleType _ = "paragraph"
- elemToStyle ns element parentStyle
- | isElem ns "w" "style" element
- , Just "paragraph" <- findAttrByName ns "w" "type" element
- , Just styleId <- findAttrByName ns "w" "styleId" element =
- Just (styleId, elemToParStyleData ns element parentStyle)
- | otherwise = Nothing
- getStyleId s = fst s
-
-getStyleChildren :: (ElemToStyle a) => NameSpaces -> Element -> Maybe a -> [a]
-getStyleChildren ns element parentStyle
- | isElem ns "w" "styles" element =
- mapMaybe (\e -> elemToStyle ns e parentStyle) $
- filterChildren (\e' -> isBasedOnStyle ns e' parentStyle) element
- | otherwise = []
-
-buildBasedOnList :: (ElemToStyle a) => NameSpaces -> Element -> Maybe a -> [a]
-buildBasedOnList ns element rootStyle =
- case (getStyleChildren ns element rootStyle) of
- [] -> []
- stys -> stys ++
- (concatMap (\s -> buildBasedOnList ns element (Just s)) stys)
-
-archiveToNotes :: Archive -> Notes
-archiveToNotes zf =
- let fnElem = findEntryByPath "word/footnotes.xml" zf
- >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry)
- enElem = findEntryByPath "word/endnotes.xml" zf
- >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry)
- fn_namespaces = case fnElem of
- Just e -> elemToNameSpaces e
- Nothing -> []
- en_namespaces = case enElem of
- Just e -> elemToNameSpaces e
- Nothing -> []
- ns = unionBy (\x y -> fst x == fst y) fn_namespaces en_namespaces
- fn = fnElem >>= (elemToNotes ns "footnote")
- en = enElem >>= (elemToNotes ns "endnote")
- in
- Notes ns fn en
-
-archiveToComments :: Archive -> Comments
-archiveToComments zf =
- let cmtsElem = findEntryByPath "word/comments.xml" zf
- >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry)
- cmts_namespaces = case cmtsElem of
- Just e -> elemToNameSpaces e
- Nothing -> []
- cmts = (elemToComments cmts_namespaces) <$> cmtsElem
- in
- case cmts of
- Just c -> Comments cmts_namespaces c
- Nothing -> Comments cmts_namespaces M.empty
-
-filePathToRelType :: FilePath -> Maybe DocumentLocation
-filePathToRelType "word/_rels/document.xml.rels" = Just InDocument
-filePathToRelType "word/_rels/footnotes.xml.rels" = Just InFootnote
-filePathToRelType "word/_rels/endnotes.xml.rels" = Just InEndnote
-filePathToRelType _ = Nothing
-
-relElemToRelationship :: DocumentLocation -> Element -> Maybe Relationship
-relElemToRelationship relType element | qName (elName element) == "Relationship" =
- do
- relId <- findAttr (QName "Id" Nothing Nothing) element
- target <- findAttr (QName "Target" Nothing Nothing) element
- return $ Relationship relType relId target
-relElemToRelationship _ _ = Nothing
-
-filePathToRelationships :: Archive -> FilePath -> [Relationship]
-filePathToRelationships ar fp | Just relType <- filePathToRelType fp
- , Just entry <- findEntryByPath fp ar
- , Just relElems <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry =
- mapMaybe (relElemToRelationship relType) $ elChildren relElems
-filePathToRelationships _ _ = []
-
-archiveToRelationships :: Archive -> [Relationship]
-archiveToRelationships archive =
- concatMap (filePathToRelationships archive) $ filesInArchive archive
-
-filePathIsMedia :: FilePath -> Bool
-filePathIsMedia fp =
- let (dir, _) = splitFileName fp
- in
- (dir == "word/media/")
-
-lookupLevel :: String -> String -> Numbering -> Maybe Level
-lookupLevel numId ilvl (Numbering _ numbs absNumbs) = do
- absNumId <- lookup numId $ map (\(Numb nid absnumid) -> (nid, absnumid)) numbs
- lvls <- lookup absNumId $ map (\(AbstractNumb aid ls) -> (aid, ls)) absNumbs
- lvl <- lookup ilvl $ map (\l@(i, _, _, _) -> (i, l)) lvls
- return lvl
-
-
-numElemToNum :: NameSpaces -> Element -> Maybe Numb
-numElemToNum ns element
- | isElem ns "w" "num" element = do
- numId <- findAttrByName ns "w" "numId" element
- absNumId <- findChildByName ns "w" "abstractNumId" element
- >>= findAttrByName ns "w" "val"
- return $ Numb numId absNumId
-numElemToNum _ _ = Nothing
-
-absNumElemToAbsNum :: NameSpaces -> Element -> Maybe AbstractNumb
-absNumElemToAbsNum ns element
- | isElem ns "w" "abstractNum" element = do
- absNumId <- findAttrByName ns "w" "abstractNumId" element
- let levelElems = findChildrenByName ns "w" "lvl" element
- levels = mapMaybe (levelElemToLevel ns) levelElems
- return $ AbstractNumb absNumId levels
-absNumElemToAbsNum _ _ = Nothing
-
-levelElemToLevel :: NameSpaces -> Element -> Maybe Level
-levelElemToLevel ns element
- | isElem ns "w" "lvl" element = do
- ilvl <- findAttrByName ns "w" "ilvl" element
- fmt <- findChildByName ns "w" "numFmt" element
- >>= findAttrByName ns "w" "val"
- txt <- findChildByName ns "w" "lvlText" element
- >>= findAttrByName ns "w" "val"
- let start = findChildByName ns "w" "start" element
- >>= findAttrByName ns "w" "val"
- >>= (\s -> listToMaybe (map fst (reads s :: [(Integer, String)])))
- return (ilvl, fmt, txt, start)
-levelElemToLevel _ _ = Nothing
-
-archiveToNumbering' :: Archive -> Maybe Numbering
-archiveToNumbering' zf = do
- case findEntryByPath "word/numbering.xml" zf of
- Nothing -> Just $ Numbering [] [] []
- Just entry -> do
- numberingElem <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry
- let namespaces = elemToNameSpaces numberingElem
- numElems = findChildrenByName namespaces "w" "num" numberingElem
- absNumElems = findChildrenByName namespaces "w" "abstractNum" numberingElem
- nums = mapMaybe (numElemToNum namespaces) numElems
- absNums = mapMaybe (absNumElemToAbsNum namespaces) absNumElems
- return $ Numbering namespaces nums absNums
-
-archiveToNumbering :: Archive -> Numbering
-archiveToNumbering archive =
- fromMaybe (Numbering [] [] []) (archiveToNumbering' archive)
-
-elemToNotes :: NameSpaces -> String -> Element -> Maybe (M.Map String Element)
-elemToNotes ns notetype element
- | isElem ns "w" (notetype ++ "s") element =
- let pairs = mapMaybe
- (\e -> findAttrByName ns "w" "id" e >>=
- (\a -> Just (a, e)))
- (findChildrenByName ns "w" notetype element)
- in
- Just $ M.fromList $ pairs
-elemToNotes _ _ _ = Nothing
-
-elemToComments :: NameSpaces -> Element -> M.Map String Element
-elemToComments ns element
- | isElem ns "w" "comments" element =
- let pairs = mapMaybe
- (\e -> findAttrByName ns "w" "id" e >>=
- (\a -> Just (a, e)))
- (findChildrenByName ns "w" "comment" element)
- in
- M.fromList $ pairs
-elemToComments _ _ = M.empty
-
-
----------------------------------------------
----------------------------------------------
-
-elemToTblGrid :: NameSpaces -> Element -> D TblGrid
-elemToTblGrid ns element | isElem ns "w" "tblGrid" element =
- let cols = findChildrenByName ns "w" "gridCol" element
- in
- mapD (\e -> maybeToD (findAttrByName ns "w" "val" e >>= stringToInteger))
- cols
-elemToTblGrid _ _ = throwError WrongElem
-
-elemToTblLook :: NameSpaces -> Element -> D TblLook
-elemToTblLook ns element | isElem ns "w" "tblLook" element =
- let firstRow = findAttrByName ns "w" "firstRow" element
- val = findAttrByName ns "w" "val" element
- firstRowFmt =
- case firstRow of
- Just "1" -> True
- Just _ -> False
- Nothing -> case val of
- Just bitMask -> testBitMask bitMask 0x020
- Nothing -> False
- in
- return $ TblLook{firstRowFormatting = firstRowFmt}
-elemToTblLook _ _ = throwError WrongElem
-
-elemToRow :: NameSpaces -> Element -> D Row
-elemToRow ns element | isElem ns "w" "tr" element =
- do
- let cellElems = findChildrenByName ns "w" "tc" element
- cells <- mapD (elemToCell ns) cellElems
- return $ Row cells
-elemToRow _ _ = throwError WrongElem
-
-elemToCell :: NameSpaces -> Element -> D Cell
-elemToCell ns element | isElem ns "w" "tc" element =
- do
- cellContents <- mapD (elemToBodyPart ns) (elChildren element)
- return $ Cell cellContents
-elemToCell _ _ = throwError WrongElem
-
-elemToParIndentation :: NameSpaces -> Element -> Maybe ParIndentation
-elemToParIndentation ns element | isElem ns "w" "ind" element =
- Just $ ParIndentation {
- leftParIndent =
- findAttrByName ns "w" "left" element >>=
- stringToInteger
- , rightParIndent =
- findAttrByName ns "w" "right" element >>=
- stringToInteger
- , hangingParIndent =
- findAttrByName ns "w" "hanging" element >>=
- stringToInteger}
-elemToParIndentation _ _ = Nothing
-
-testBitMask :: String -> Int -> Bool
-testBitMask bitMaskS n =
- case (reads ("0x" ++ bitMaskS) :: [(Int, String)]) of
- [] -> False
- ((n', _) : _) -> ((n' .|. n) /= 0)
-
-stringToInteger :: String -> Maybe Integer
-stringToInteger s = listToMaybe $ map fst (reads s :: [(Integer, String)])
-
-elemToBodyPart :: NameSpaces -> Element -> D BodyPart
-elemToBodyPart ns element
- | isElem ns "w" "p" element
- , (c:_) <- findChildrenByName ns "m" "oMathPara" element =
- do
- expsLst <- eitherToD $ readOMML $ showElement c
- return $ OMathPara expsLst
-elemToBodyPart ns element
- | isElem ns "w" "p" element
- , Just (numId, lvl) <- getNumInfo ns element = do
- sty <- asks envParStyles
- let parstyle = elemToParagraphStyle ns element sty
- parparts <- mapD (elemToParPart ns) (elChildren element)
- num <- asks envNumbering
- let levelInfo = lookupLevel numId lvl num
- return $ ListItem parstyle numId lvl levelInfo parparts
-elemToBodyPart ns element
- | isElem ns "w" "p" element = do
- sty <- asks envParStyles
- let parstyle = elemToParagraphStyle ns element sty
- parparts <- mapD (elemToParPart ns) (elChildren element)
- -- Word uses list enumeration for numbered headings, so we only
- -- want to infer a list from the styles if it is NOT a heading.
- case pHeading parstyle of
- Nothing | Just (numId, lvl) <- pNumInfo parstyle -> do
- num <- asks envNumbering
- let levelInfo = lookupLevel numId lvl num
- return $ ListItem parstyle numId lvl levelInfo parparts
- _ -> return $ Paragraph parstyle parparts
-elemToBodyPart ns element
- | isElem ns "w" "tbl" element = do
- let caption' = findChildByName ns "w" "tblPr" element
- >>= findChildByName ns "w" "tblCaption"
- >>= findAttrByName ns "w" "val"
- caption = (fromMaybe "" caption')
- grid' = case findChildByName ns "w" "tblGrid" element of
- Just g -> elemToTblGrid ns g
- Nothing -> return []
- tblLook' = case findChildByName ns "w" "tblPr" element >>=
- findChildByName ns "w" "tblLook"
- of
- Just l -> elemToTblLook ns l
- Nothing -> return defaultTblLook
-
- grid <- grid'
- tblLook <- tblLook'
- rows <- mapD (elemToRow ns) (elChildren element)
- return $ Tbl caption grid tblLook rows
-elemToBodyPart _ _ = throwError WrongElem
-
-lookupRelationship :: DocumentLocation -> RelId -> [Relationship] -> Maybe Target
-lookupRelationship docLocation relid rels =
- lookup (docLocation, relid) pairs
- where
- pairs = map (\(Relationship loc relid' target) -> ((loc, relid'), target)) rels
-
-expandDrawingId :: String -> D (FilePath, B.ByteString)
-expandDrawingId s = do
- location <- asks envLocation
- target <- asks (lookupRelationship location s . envRelationships)
- case target of
- Just filepath -> do
- bytes <- asks (lookup ("word/" ++ filepath) . envMedia)
- case bytes of
- Just bs -> return (filepath, bs)
- Nothing -> throwError DocxError
- Nothing -> throwError DocxError
-
-getTitleAndAlt :: NameSpaces -> Element -> (String, String)
-getTitleAndAlt ns element =
- let mbDocPr = findChildByName ns "wp" "inline" element >>=
- findChildByName ns "wp" "docPr"
- title = case mbDocPr >>= findAttrByName ns "" "title" of
- Just title' -> title'
- Nothing -> ""
- alt = case mbDocPr >>= findAttrByName ns "" "descr" of
- Just alt' -> alt'
- Nothing -> ""
- in (title, alt)
-
-elemToParPart :: NameSpaces -> Element -> D ParPart
-elemToParPart ns element
- | isElem ns "w" "r" element
- , Just drawingElem <- findChildByName ns "w" "drawing" element
- , pic_ns <- "http://schemas.openxmlformats.org/drawingml/2006/picture"
- , Just picElem <- findElement (QName "pic" (Just pic_ns) (Just "pic")) drawingElem
- = let (title, alt) = getTitleAndAlt ns drawingElem
- a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main"
- drawing = findElement (QName "blip" (Just a_ns) (Just "a")) picElem
- >>= findAttrByName ns "r" "embed"
- in
- case drawing of
- Just s -> expandDrawingId s >>= (\(fp, bs) -> return $ Drawing fp title alt bs $ elemToExtent drawingElem)
- Nothing -> throwError WrongElem
--- The below is an attempt to deal with images in deprecated vml format.
-elemToParPart ns element
- | isElem ns "w" "r" element
- , Just _ <- findChildByName ns "w" "pict" element =
- let drawing = findElement (elemName ns "v" "imagedata") element
- >>= findAttrByName ns "r" "id"
- in
- case drawing of
- -- Todo: check out title and attr for deprecated format.
- Just s -> expandDrawingId s >>= (\(fp, bs) -> return $ Drawing fp "" "" bs Nothing)
- Nothing -> throwError WrongElem
--- Chart
-elemToParPart ns element
- | isElem ns "w" "r" element
- , Just drawingElem <- findChildByName ns "w" "drawing" element
- , c_ns <- "http://schemas.openxmlformats.org/drawingml/2006/chart"
- , Just _ <- findElement (QName "chart" (Just c_ns) (Just "c")) drawingElem
- = return Chart
-elemToParPart ns element
- | isElem ns "w" "r" element =
- elemToRun ns element >>= (\r -> return $ PlainRun r)
-elemToParPart ns element
- | isElem ns "w" "ins" element || isElem ns "w" "moveTo" element
- , Just cId <- findAttrByName ns "w" "id" element
- , Just cAuthor <- findAttrByName ns "w" "author" element
- , Just cDate <- findAttrByName ns "w" "date" element = do
- runs <- mapD (elemToRun ns) (elChildren element)
- return $ Insertion cId cAuthor cDate runs
-elemToParPart ns element
- | isElem ns "w" "del" element || isElem ns "w" "moveFrom" element
- , Just cId <- findAttrByName ns "w" "id" element
- , Just cAuthor <- findAttrByName ns "w" "author" element
- , Just cDate <- findAttrByName ns "w" "date" element = do
- runs <- mapD (elemToRun ns) (elChildren element)
- return $ Deletion cId cAuthor cDate runs
-elemToParPart ns element
- | isElem ns "w" "smartTag" element = do
- runs <- mapD (elemToRun ns) (elChildren element)
- return $ SmartTag runs
-elemToParPart ns element
- | isElem ns "w" "bookmarkStart" element
- , Just bmId <- findAttrByName ns "w" "id" element
- , Just bmName <- findAttrByName ns "w" "name" element =
- return $ BookMark bmId bmName
-elemToParPart ns element
- | isElem ns "w" "hyperlink" element
- , Just relId <- findAttrByName ns "r" "id" element = do
- location <- asks envLocation
- runs <- mapD (elemToRun ns) (elChildren element)
- rels <- asks envRelationships
- case lookupRelationship location relId rels of
- Just target -> do
- case findAttrByName ns "w" "anchor" element of
- Just anchor -> return $ ExternalHyperLink (target ++ '#':anchor) runs
- Nothing -> return $ ExternalHyperLink target runs
- Nothing -> return $ ExternalHyperLink "" runs
-elemToParPart ns element
- | isElem ns "w" "hyperlink" element
- , Just anchor <- findAttrByName ns "w" "anchor" element = do
- runs <- mapD (elemToRun ns) (elChildren element)
- return $ InternalHyperLink anchor runs
-elemToParPart ns element
- | isElem ns "w" "commentRangeStart" element
- , Just cmtId <- findAttrByName ns "w" "id" element = do
- (Comments _ commentMap) <- asks envComments
- case M.lookup cmtId commentMap of
- Just cmtElem -> elemToCommentStart ns cmtElem
- Nothing -> throwError WrongElem
-elemToParPart ns element
- | isElem ns "w" "commentRangeEnd" element
- , Just cmtId <- findAttrByName ns "w" "id" element =
- return $ CommentEnd cmtId
-elemToParPart ns element
- | isElem ns "m" "oMath" element =
- (eitherToD $ readOMML $ showElement element) >>= (return . PlainOMath)
-elemToParPart _ _ = throwError WrongElem
-
-elemToCommentStart :: NameSpaces -> Element -> D ParPart
-elemToCommentStart ns element
- | isElem ns "w" "comment" element
- , Just cmtId <- findAttrByName ns "w" "id" element
- , Just cmtAuthor <- findAttrByName ns "w" "author" element
- , Just cmtDate <- findAttrByName ns "w" "date" element = do
- bps <- mapD (elemToBodyPart ns) (elChildren element)
- return $ CommentStart cmtId cmtAuthor cmtDate bps
-elemToCommentStart _ _ = throwError WrongElem
-
-lookupFootnote :: String -> Notes -> Maybe Element
-lookupFootnote s (Notes _ fns _) = fns >>= (M.lookup s)
-
-lookupEndnote :: String -> Notes -> Maybe Element
-lookupEndnote s (Notes _ _ ens) = ens >>= (M.lookup s)
-
-elemToExtent :: Element -> Extent
-elemToExtent drawingElem =
- case (getDim "cx", getDim "cy") of
- (Just w, Just h) -> Just (w, h)
- _ -> Nothing
- where
- wp_ns = "http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing"
- getDim at = findElement (QName "extent" (Just wp_ns) (Just "wp")) drawingElem
- >>= findAttr (QName at Nothing Nothing) >>= safeRead
-
-
-childElemToRun :: NameSpaces -> Element -> D Run
-childElemToRun ns element
- | isElem ns "w" "drawing" element
- , pic_ns <- "http://schemas.openxmlformats.org/drawingml/2006/picture"
- , Just picElem <- findElement (QName "pic" (Just pic_ns) (Just "pic")) element
- = let (title, alt) = getTitleAndAlt ns element
- a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main"
- drawing = findElement (QName "blip" (Just a_ns) (Just "a")) picElem
- >>= findAttr (QName "embed" (lookup "r" ns) (Just "r"))
- in
- case drawing of
- Just s -> expandDrawingId s >>=
- (\(fp, bs) -> return $ InlineDrawing fp title alt bs $ elemToExtent element)
- Nothing -> throwError WrongElem
-childElemToRun ns element
- | isElem ns "w" "drawing" element
- , c_ns <- "http://schemas.openxmlformats.org/drawingml/2006/chart"
- , Just _ <- findElement (QName "chart" (Just c_ns) (Just "c")) element
- = return InlineChart
-childElemToRun ns element
- | isElem ns "w" "footnoteReference" element
- , Just fnId <- findAttrByName ns "w" "id" element = do
- notes <- asks envNotes
- case lookupFootnote fnId notes of
- Just e -> do bps <- local (\r -> r {envLocation=InFootnote}) $ mapD (elemToBodyPart ns) (elChildren e)
- return $ Footnote bps
- Nothing -> return $ Footnote []
-childElemToRun ns element
- | isElem ns "w" "endnoteReference" element
- , Just enId <- findAttrByName ns "w" "id" element = do
- notes <- asks envNotes
- case lookupEndnote enId notes of
- Just e -> do bps <- local (\r -> r {envLocation=InEndnote}) $ mapD (elemToBodyPart ns) (elChildren e)
- return $ Endnote bps
- Nothing -> return $ Endnote []
-childElemToRun _ _ = throwError WrongElem
-
-elemToRun :: NameSpaces -> Element -> D Run
-elemToRun ns element
- | isElem ns "w" "r" element
- , Just altCont <- findChildByName ns "mc" "AlternateContent" element =
- do let choices = findChildrenByName ns "mc" "Choice" altCont
- choiceChildren = map head $ filter (not . null) $ map elChildren choices
- outputs <- mapD (childElemToRun ns) choiceChildren
- case outputs of
- r : _ -> return r
- [] -> throwError WrongElem
-elemToRun ns element
- | isElem ns "w" "r" element
- , Just drawingElem <- findChildByName ns "w" "drawing" element =
- childElemToRun ns drawingElem
-elemToRun ns element
- | isElem ns "w" "r" element
- , Just ref <- findChildByName ns "w" "footnoteReference" element =
- childElemToRun ns ref
-elemToRun ns element
- | isElem ns "w" "r" element
- , Just ref <- findChildByName ns "w" "endnoteReference" element =
- childElemToRun ns ref
-elemToRun ns element
- | isElem ns "w" "r" element = do
- runElems <- elemToRunElems ns element
- runStyle <- elemToRunStyleD ns element
- return $ Run runStyle runElems
-elemToRun _ _ = throwError WrongElem
-
-getParentStyleValue :: (ParStyleData -> Maybe a) -> ParStyleData -> Maybe a
-getParentStyleValue field style
- | Just value <- field style = Just value
- | Just parentStyle <- psStyle style
- = getParentStyleValue field (snd parentStyle)
-getParentStyleValue _ _ = Nothing
-
-getParStyleField :: (ParStyleData -> Maybe a) -> ParStyleMap -> [String] ->
- Maybe a
-getParStyleField field stylemap styles
- | x <- mapMaybe (\x -> M.lookup x stylemap) styles
- , (y:_) <- mapMaybe (getParentStyleValue field) x
- = Just y
-getParStyleField _ _ _ = Nothing
-
-elemToParagraphStyle :: NameSpaces -> Element -> ParStyleMap -> ParagraphStyle
-elemToParagraphStyle ns element sty
- | Just pPr <- findChildByName ns "w" "pPr" element =
- let style =
- mapMaybe
- (findAttrByName ns "w" "val")
- (findChildrenByName ns "w" "pStyle" pPr)
- in ParagraphStyle
- {pStyle = style
- , indentation =
- findChildByName ns "w" "ind" pPr >>=
- elemToParIndentation ns
- , dropCap =
- case
- findChildByName ns "w" "framePr" pPr >>=
- findAttrByName ns "w" "dropCap"
- of
- Just "none" -> False
- Just _ -> True
- Nothing -> False
- , pHeading = getParStyleField headingLev sty style
- , pNumInfo = getParStyleField numInfo sty style
- , pBlockQuote = getParStyleField isBlockQuote sty style
- }
-elemToParagraphStyle _ _ _ = defaultParagraphStyle
-
-checkOnOff :: NameSpaces -> Element -> QName -> Maybe Bool
-checkOnOff ns rPr tag
- | Just t <- findChild tag rPr
- , Just val <- findAttrByName ns "w" "val" t =
- Just $ case val of
- "true" -> True
- "false" -> False
- "on" -> True
- "off" -> False
- "1" -> True
- "0" -> False
- _ -> False
- | Just _ <- findChild tag rPr = Just True
-checkOnOff _ _ _ = Nothing
-
-elemToRunStyleD :: NameSpaces -> Element -> D RunStyle
-elemToRunStyleD ns element
- | Just rPr <- findChildByName ns "w" "rPr" element = do
- charStyles <- asks envCharStyles
- let parentSty = case
- findChildByName ns "w" "rStyle" rPr >>=
- findAttrByName ns "w" "val"
- of
- Just styName | Just style <- M.lookup styName charStyles ->
- Just (styName, style)
- _ -> Nothing
- return $ elemToRunStyle ns element parentSty
-elemToRunStyleD _ _ = return defaultRunStyle
-
-elemToRunStyle :: NameSpaces -> Element -> Maybe CharStyle -> RunStyle
-elemToRunStyle ns element parentStyle
- | Just rPr <- findChildByName ns "w" "rPr" element =
- RunStyle
- {
- isBold = checkOnOff ns rPr (elemName ns "w" "b")
- , isItalic = checkOnOff ns rPr (elemName ns "w" "i")
- , isSmallCaps = checkOnOff ns rPr (elemName ns "w" "smallCaps")
- , isStrike = checkOnOff ns rPr (elemName ns "w" "strike")
- , rVertAlign =
- findChildByName ns "w" "vertAlign" rPr >>=
- findAttrByName ns "w" "val" >>=
- \v -> Just $ case v of
- "superscript" -> SupScrpt
- "subscript" -> SubScrpt
- _ -> BaseLn
- , rUnderline =
- findChildByName ns "w" "u" rPr >>=
- findAttrByName ns "w" "val"
- , rStyle = parentStyle
- }
-elemToRunStyle _ _ _ = defaultRunStyle
-
-isNumericNotNull :: String -> Bool
-isNumericNotNull str = (str /= []) && (all isDigit str)
-
-getHeaderLevel :: NameSpaces -> Element -> Maybe (String,Int)
-getHeaderLevel ns element
- | Just styleId <- findAttrByName ns "w" "styleId" element
- , Just index <- stripPrefix "Heading" styleId
- , isNumericNotNull index = Just (styleId, read index)
- | Just styleId <- findAttrByName ns "w" "styleId" element
- , Just index <- findChildByName ns "w" "name" element >>=
- findAttrByName ns "w" "val" >>=
- stripPrefix "heading "
- , isNumericNotNull index = Just (styleId, read index)
-getHeaderLevel _ _ = Nothing
-
-blockQuoteStyleIds :: [String]
-blockQuoteStyleIds = ["Quote", "BlockQuote", "BlockQuotation"]
-
-blockQuoteStyleNames :: [String]
-blockQuoteStyleNames = ["Quote", "Block Text"]
-
-getBlockQuote :: NameSpaces -> Element -> Maybe Bool
-getBlockQuote ns element
- | Just styleId <- findAttrByName ns "w" "styleId" element
- , styleId `elem` blockQuoteStyleIds = Just True
- | Just styleName <- findChildByName ns "w" "name" element >>=
- findAttrByName ns "w" "val"
- , styleName `elem` blockQuoteStyleNames = Just True
-getBlockQuote _ _ = Nothing
-
-getNumInfo :: NameSpaces -> Element -> Maybe (String, String)
-getNumInfo ns element = do
- let numPr = findChildByName ns "w" "pPr" element >>=
- findChildByName ns "w" "numPr"
- lvl = fromMaybe "0" (numPr >>=
- findChildByName ns "w" "ilvl" >>=
- findAttrByName ns "w" "val")
- numId <- numPr >>=
- findChildByName ns "w" "numId" >>=
- findAttrByName ns "w" "val"
- return (numId, lvl)
-
-
-elemToParStyleData :: NameSpaces -> Element -> Maybe ParStyle -> ParStyleData
-elemToParStyleData ns element parentStyle =
- ParStyleData
- {
- headingLev = getHeaderLevel ns element
- , isBlockQuote = getBlockQuote ns element
- , numInfo = getNumInfo ns element
- , psStyle = parentStyle
- }
-
-elemToRunElem :: NameSpaces -> Element -> D RunElem
-elemToRunElem ns element
- | isElem ns "w" "t" element
- || isElem ns "w" "delText" element
- || isElem ns "m" "t" element = do
- let str = strContent element
- font <- asks envFont
- case font of
- Nothing -> return $ TextRun str
- Just f -> return . TextRun $
- map (\x -> fromMaybe x . getUnicode f . lowerFromPrivate $ x) str
- | isElem ns "w" "br" element = return LnBrk
- | isElem ns "w" "tab" element = return Tab
- | isElem ns "w" "softHyphen" element = return SoftHyphen
- | isElem ns "w" "noBreakHyphen" element = return NoBreakHyphen
- | isElem ns "w" "sym" element = return (getSymChar ns element)
- | otherwise = throwError WrongElem
- where
- lowerFromPrivate (ord -> c)
- | c >= ord '\xF000' = chr $ c - ord '\xF000'
- | otherwise = chr c
-
--- The char attribute is a hex string
-getSymChar :: NameSpaces -> Element -> RunElem
-getSymChar ns element
- | Just s <- lowerFromPrivate <$> getCodepoint
- , Just font <- getFont =
- let [(char, _)] = readLitChar ("\\x" ++ s) in
- TextRun . maybe "" (:[]) $ getUnicode font char
- where
- getCodepoint = findAttrByName ns "w" "char" element
- getFont = stringToFont =<< findAttrByName ns "w" "font" element
- lowerFromPrivate ('F':xs) = '0':xs
- lowerFromPrivate xs = xs
-getSymChar _ _ = TextRun ""
-
-elemToRunElems :: NameSpaces -> Element -> D [RunElem]
-elemToRunElems ns element
- | isElem ns "w" "r" element
- || isElem ns "m" "r" element = do
- let qualName = elemName ns "w"
- let font = do
- fontElem <- findElement (qualName "rFonts") element
- stringToFont =<<
- (foldr (<|>) Nothing $
- map (flip findAttr fontElem . qualName) ["ascii", "hAnsi"])
- local (setFont font) (mapD (elemToRunElem ns) (elChildren element))
-elemToRunElems _ _ = throwError WrongElem
-
-setFont :: Maybe Font -> ReaderEnv -> ReaderEnv
-setFont f s = s{envFont = f}
-
diff --git a/src/Text/Pandoc/Readers/Docx/StyleMap.hs b/src/Text/Pandoc/Readers/Docx/StyleMap.hs
deleted file mode 100644
index 00906cf07..000000000
--- a/src/Text/Pandoc/Readers/Docx/StyleMap.hs
+++ /dev/null
@@ -1,108 +0,0 @@
-module Text.Pandoc.Readers.Docx.StyleMap ( StyleMaps(..)
- , alterMap
- , getMap
- , defaultStyleMaps
- , getStyleMaps
- , getStyleId
- , hasStyleName
- ) where
-
-import Text.XML.Light
-import Text.Pandoc.Readers.Docx.Util
-import Control.Monad.State
-import Data.Char (toLower)
-import qualified Data.Map as M
-
-newtype ParaStyleMap = ParaStyleMap ( M.Map String String )
-newtype CharStyleMap = CharStyleMap ( M.Map String String )
-
-class StyleMap a where
- alterMap :: (M.Map String String -> M.Map String String) -> a -> a
- getMap :: a -> M.Map String String
-
-instance StyleMap ParaStyleMap where
- alterMap f (ParaStyleMap m) = ParaStyleMap $ f m
- getMap (ParaStyleMap m) = m
-
-instance StyleMap CharStyleMap where
- alterMap f (CharStyleMap m) = CharStyleMap $ f m
- getMap (CharStyleMap m) = m
-
-insert :: (StyleMap a) => Maybe String -> Maybe String -> a -> a
-insert (Just k) (Just v) m = alterMap (M.insert k v) m
-insert _ _ m = m
-
-getStyleId :: (StyleMap a) => String -> a -> String
-getStyleId s = M.findWithDefault (filter (/=' ') s) (map toLower s) . getMap
-
-hasStyleName :: (StyleMap a) => String -> a -> Bool
-hasStyleName styleName = M.member (map toLower styleName) . getMap
-
-data StyleMaps = StyleMaps { sNameSpaces :: NameSpaces
- , sParaStyleMap :: ParaStyleMap
- , sCharStyleMap :: CharStyleMap
- }
-
-data StyleType = ParaStyle | CharStyle
-
-defaultStyleMaps :: StyleMaps
-defaultStyleMaps = StyleMaps { sNameSpaces = []
- , sParaStyleMap = ParaStyleMap M.empty
- , sCharStyleMap = CharStyleMap M.empty
- }
-
-type StateM a = State StyleMaps a
-
-getStyleMaps :: Element -> StyleMaps
-getStyleMaps docElem = execState genStyleMap state'
- where
- state' = defaultStyleMaps {sNameSpaces = elemToNameSpaces docElem}
- genStyleItem e = do
- styleType <- getStyleType e
- styleId <- getAttrStyleId e
- nameValLowercase <- fmap (map toLower) `fmap` getNameVal e
- case styleType of
- Just ParaStyle -> modParaStyleMap $ insert nameValLowercase styleId
- Just CharStyle -> modCharStyleMap $ insert nameValLowercase styleId
- _ -> return ()
- genStyleMap = do
- style <- elemName' "style"
- let styles = findChildren style docElem
- forM_ styles genStyleItem
-
-modParaStyleMap :: (ParaStyleMap -> ParaStyleMap) -> StateM ()
-modParaStyleMap f = modify $ \s ->
- s {sParaStyleMap = f $ sParaStyleMap s}
-
-modCharStyleMap :: (CharStyleMap -> CharStyleMap) -> StateM ()
-modCharStyleMap f = modify $ \s ->
- s {sCharStyleMap = f $ sCharStyleMap s}
-
-getStyleType :: Element -> StateM (Maybe StyleType)
-getStyleType e = do
- styleTypeStr <- getAttrType e
- case styleTypeStr of
- Just "paragraph" -> return $ Just ParaStyle
- Just "character" -> return $ Just CharStyle
- _ -> return Nothing
-
-getAttrType :: Element -> StateM (Maybe String)
-getAttrType el = do
- name <- elemName' "type"
- return $ findAttr name el
-
-getAttrStyleId :: Element -> StateM (Maybe String)
-getAttrStyleId el = do
- name <- elemName' "styleId"
- return $ findAttr name el
-
-getNameVal :: Element -> StateM (Maybe String)
-getNameVal el = do
- name <- elemName' "name"
- val <- elemName' "val"
- return $ findChild name el >>= findAttr val
-
-elemName' :: String -> StateM QName
-elemName' name = do
- namespaces <- gets sNameSpaces
- return $ elemName namespaces "w" name
diff --git a/src/Text/Pandoc/Readers/Docx/Util.hs b/src/Text/Pandoc/Readers/Docx/Util.hs
deleted file mode 100644
index 6646e5b7f..000000000
--- a/src/Text/Pandoc/Readers/Docx/Util.hs
+++ /dev/null
@@ -1,47 +0,0 @@
-module Text.Pandoc.Readers.Docx.Util (
- NameSpaces
- , elemName
- , isElem
- , elemToNameSpaces
- , findChildByName
- , findChildrenByName
- , findAttrByName
- ) where
-
-import Text.XML.Light
-import Data.Maybe (mapMaybe)
-
-type NameSpaces = [(String, String)]
-
-elemToNameSpaces :: Element -> NameSpaces
-elemToNameSpaces = mapMaybe attrToNSPair . elAttribs
-
-attrToNSPair :: Attr -> Maybe (String, String)
-attrToNSPair (Attr (QName s _ (Just "xmlns")) val) = Just (s, val)
-attrToNSPair _ = Nothing
-
-elemName :: NameSpaces -> String -> String -> QName
-elemName ns prefix name =
- QName name (lookup prefix ns) (if null prefix then Nothing else Just prefix)
-
-isElem :: NameSpaces -> String -> String -> Element -> Bool
-isElem ns prefix name element =
- let ns' = ns ++ elemToNameSpaces element
- in qName (elName element) == name &&
- qURI (elName element) == lookup prefix ns'
-
-findChildByName :: NameSpaces -> String -> String -> Element -> Maybe Element
-findChildByName ns pref name el =
- let ns' = ns ++ elemToNameSpaces el
- in findChild (elemName ns' pref name) el
-
-findChildrenByName :: NameSpaces -> String -> String -> Element -> [Element]
-findChildrenByName ns pref name el =
- let ns' = ns ++ elemToNameSpaces el
- in findChildren (elemName ns' pref name) el
-
-findAttrByName :: NameSpaces -> String -> String -> Element -> Maybe String
-findAttrByName ns pref name el =
- let ns' = ns ++ elemToNameSpaces el
- in findAttr (elemName ns' pref name) el
-