aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers/Docx/Parse.hs
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc/Readers/Docx/Parse.hs')
-rw-r--r--src/Text/Pandoc/Readers/Docx/Parse.hs107
1 files changed, 64 insertions, 43 deletions
diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs
index 432965d49..eec8b12c9 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse.hs
@@ -35,6 +35,7 @@ module Text.Pandoc.Readers.Docx.Parse ( Docx(..)
, Body(..)
, BodyPart(..)
, TblLook(..)
+ , Extent
, ParPart(..)
, Run(..)
, RunElem(..)
@@ -62,6 +63,7 @@ import Control.Monad.Reader
import Control.Applicative ((<|>))
import qualified Data.Map as M
import Text.Pandoc.Compat.Except
+import Text.Pandoc.Shared (safeRead)
import Text.TeXMath.Readers.OMML (readOMML)
import Text.Pandoc.Readers.Docx.Fonts (getUnicode, Font(..))
import Text.TeXMath (Exp)
@@ -75,6 +77,7 @@ data ReaderEnv = ReaderEnv { envNotes :: Notes
, envFont :: Maybe Font
, envCharStyles :: CharStyleMap
, envParStyles :: ParStyleMap
+ , envLocation :: DocumentLocation
}
deriving Show
@@ -87,7 +90,7 @@ instance Error DocxError where
type D = ExceptT DocxError (Reader ReaderEnv)
runD :: D a -> ReaderEnv -> Either DocxError a
-runD dx re = runReader (runExceptT dx ) re
+runD dx re = runReader (runExceptT dx) re
maybeToD :: Maybe a -> D a
maybeToD (Just a) = return a
@@ -140,7 +143,10 @@ data AbstractNumb = AbstractNumb String [Level]
-- (ilvl, format, string, start)
type Level = (String, String, String, Maybe Integer)
-data Relationship = Relationship (RelId, Target)
+data DocumentLocation = InDocument | InFootnote | InEndnote
+ deriving (Eq,Show)
+
+data Relationship = Relationship DocumentLocation RelId Target
deriving Show
data Notes = Notes NameSpaces
@@ -173,7 +179,7 @@ defaultParagraphStyle = ParagraphStyle { pStyle = []
data BodyPart = Paragraph ParagraphStyle [ParPart]
- | ListItem ParagraphStyle String String Level [ParPart]
+ | ListItem ParagraphStyle String String (Maybe Level) [ParPart]
| Tbl String TblGrid TblLook [Row]
| OMathPara [Exp]
deriving Show
@@ -192,20 +198,23 @@ data Row = Row [Cell]
data Cell = Cell [BodyPart]
deriving Show
+-- (width, height) in EMUs
+type Extent = Maybe (Double, Double)
+
data ParPart = PlainRun Run
| Insertion ChangeId Author ChangeDate [Run]
| Deletion ChangeId Author ChangeDate [Run]
| BookMark BookMarkId Anchor
| InternalHyperLink Anchor [Run]
| ExternalHyperLink URL [Run]
- | Drawing FilePath B.ByteString
+ | Drawing FilePath B.ByteString Extent
| PlainOMath [Exp]
deriving Show
data Run = Run RunStyle [RunElem]
| Footnote [BodyPart]
| Endnote [BodyPart]
- | InlineDrawing FilePath B.ByteString
+ | InlineDrawing FilePath B.ByteString Extent
deriving Show
data RunElem = TextRun String | LnBrk | Tab | SoftHyphen | NoBreakHyphen
@@ -238,7 +247,6 @@ defaultRunStyle = RunStyle { isBold = Nothing
, rUnderline = Nothing
, rStyle = Nothing}
-
type Target = String
type Anchor = String
type URL = String
@@ -255,7 +263,8 @@ archiveToDocx archive = do
rels = archiveToRelationships archive
media = archiveToMedia archive
(styles, parstyles) = archiveToStyles archive
- rEnv = ReaderEnv notes numbering rels media Nothing styles parstyles
+ rEnv =
+ ReaderEnv notes numbering rels media Nothing styles parstyles InDocument
doc <- runD (archiveToDocument archive) rEnv
return $ Docx doc
@@ -362,29 +371,30 @@ archiveToNotes zf =
in
Notes ns fn en
-filePathIsRel :: FilePath -> Bool
-filePathIsRel fp =
- let (dir, name) = splitFileName fp
- in
- (dir == "word/_rels/") && ((takeExtension name) == ".rels")
+filePathToRelType :: FilePath -> Maybe DocumentLocation
+filePathToRelType "word/_rels/document.xml.rels" = Just InDocument
+filePathToRelType "word/_rels/footnotes.xml.rels" = Just InFootnote
+filePathToRelType "word/_rels/endnotes.xml.rels" = Just InEndnote
+filePathToRelType _ = Nothing
-relElemToRelationship :: Element -> Maybe Relationship
-relElemToRelationship element | qName (elName element) == "Relationship" =
+relElemToRelationship :: DocumentLocation -> Element -> Maybe Relationship
+relElemToRelationship relType element | qName (elName element) == "Relationship" =
do
relId <- findAttr (QName "Id" Nothing Nothing) element
target <- findAttr (QName "Target" Nothing Nothing) element
- return $ Relationship (relId, target)
-relElemToRelationship _ = Nothing
-
-
+ return $ Relationship relType relId target
+relElemToRelationship _ _ = Nothing
+
+filePathToRelationships :: Archive -> FilePath -> [Relationship]
+filePathToRelationships ar fp | Just relType <- filePathToRelType fp
+ , Just entry <- findEntryByPath fp ar
+ , Just relElems <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry =
+ mapMaybe (relElemToRelationship relType) $ elChildren relElems
+filePathToRelationships _ _ = []
+
archiveToRelationships :: Archive -> [Relationship]
archiveToRelationships archive =
- let relPaths = filter filePathIsRel (filesInArchive archive)
- entries = mapMaybe (\f -> findEntryByPath f archive) relPaths
- relElems = mapMaybe (parseXMLDoc . UTF8.toStringLazy . fromEntry) entries
- rels = mapMaybe relElemToRelationship $ concatMap elChildren relElems
- in
- rels
+ concatMap (filePathToRelationships archive) $ filesInArchive archive
filePathIsMedia :: FilePath -> Bool
filePathIsMedia fp =
@@ -409,6 +419,7 @@ lookupLevel numId ilvl (Numbering _ numbs absNumbs) = do
lvl <- lookup ilvl $ map (\l@(i, _, _, _) -> (i, l)) lvls
return lvl
+
numElemToNum :: NameSpaces -> Element -> Maybe Numb
numElemToNum ns element |
qName (elName element) == "num" &&
@@ -558,9 +569,8 @@ elemToBodyPart ns element
let parstyle = elemToParagraphStyle ns element sty
parparts <- mapD (elemToParPart ns) (elChildren element)
num <- asks envNumbering
- case lookupLevel numId lvl num of
- Just levelInfo -> return $ ListItem parstyle numId lvl levelInfo parparts
- Nothing -> throwError WrongElem
+ let levelInfo = lookupLevel numId lvl num
+ return $ ListItem parstyle numId lvl levelInfo parparts
elemToBodyPart ns element
| isElem ns "w" "p" element = do
sty <- asks envParStyles
@@ -569,11 +579,8 @@ elemToBodyPart ns element
case pNumInfo parstyle of
Just (numId, lvl) -> do
num <- asks envNumbering
- case lookupLevel numId lvl num of
- Just levelInfo ->
- return $ ListItem parstyle numId lvl levelInfo parparts
- Nothing ->
- throwError WrongElem
+ let levelInfo = lookupLevel numId lvl num
+ return $ ListItem parstyle numId lvl levelInfo parparts
Nothing -> return $ Paragraph parstyle parparts
elemToBodyPart ns element
| isElem ns "w" "tbl" element = do
@@ -596,13 +603,16 @@ elemToBodyPart ns element
return $ Tbl caption grid tblLook rows
elemToBodyPart _ _ = throwError WrongElem
-lookupRelationship :: RelId -> [Relationship] -> Maybe Target
-lookupRelationship relid rels =
- lookup relid (map (\(Relationship pair) -> pair) rels)
+lookupRelationship :: DocumentLocation -> RelId -> [Relationship] -> Maybe Target
+lookupRelationship docLocation relid rels =
+ lookup (docLocation, relid) pairs
+ where
+ pairs = map (\(Relationship loc relid' target) -> ((loc, relid'), target)) rels
expandDrawingId :: String -> D (FilePath, B.ByteString)
expandDrawingId s = do
- target <- asks (lookupRelationship s . envRelationships)
+ location <- asks envLocation
+ target <- asks (lookupRelationship location s . envRelationships)
case target of
Just filepath -> do
bytes <- asks (lookup ("word/" ++ filepath) . envMedia)
@@ -614,13 +624,13 @@ expandDrawingId s = do
elemToParPart :: NameSpaces -> Element -> D ParPart
elemToParPart ns element
| isElem ns "w" "r" element
- , Just _ <- findChild (elemName ns "w" "drawing") element =
+ , Just drawingElem <- findChild (elemName ns "w" "drawing") element =
let a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main"
drawing = findElement (QName "blip" (Just a_ns) (Just "a")) element
>>= findAttr (QName "embed" (lookup "r" ns) (Just "r"))
in
case drawing of
- Just s -> expandDrawingId s >>= (\(fp, bs) -> return $ Drawing fp bs)
+ Just s -> expandDrawingId s >>= (\(fp, bs) -> return $ Drawing fp bs $ elemToExtent drawingElem)
Nothing -> throwError WrongElem
-- The below is an attempt to deal with images in deprecated vml format.
elemToParPart ns element
@@ -630,7 +640,7 @@ elemToParPart ns element
>>= findAttr (elemName ns "r" "id")
in
case drawing of
- Just s -> expandDrawingId s >>= (\(fp, bs) -> return $ Drawing fp bs)
+ Just s -> expandDrawingId s >>= (\(fp, bs) -> return $ Drawing fp bs Nothing)
Nothing -> throwError WrongElem
elemToParPart ns element
| isElem ns "w" "r" element =
@@ -657,9 +667,10 @@ elemToParPart ns element
elemToParPart ns element
| isElem ns "w" "hyperlink" element
, Just relId <- findAttr (elemName ns "r" "id") element = do
+ location <- asks envLocation
runs <- mapD (elemToRun ns) (elChildren element)
rels <- asks envRelationships
- case lookupRelationship relId rels of
+ case lookupRelationship location relId rels of
Just target -> do
case findAttr (elemName ns "w" "anchor") element of
Just anchor -> return $ ExternalHyperLink (target ++ '#':anchor) runs
@@ -681,6 +692,16 @@ lookupFootnote s (Notes _ fns _) = fns >>= (M.lookup s)
lookupEndnote :: String -> Notes -> Maybe Element
lookupEndnote s (Notes _ _ ens) = ens >>= (M.lookup s)
+elemToExtent :: Element -> Extent
+elemToExtent drawingElem =
+ case (getDim "cx", getDim "cy") of
+ (Just w, Just h) -> Just (w, h)
+ _ -> Nothing
+ where
+ wp_ns = "http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing"
+ getDim at = findElement (QName "extent" (Just wp_ns) (Just "wp")) drawingElem
+ >>= findAttr (QName at Nothing Nothing) >>= safeRead
+
elemToRun :: NameSpaces -> Element -> D Run
elemToRun ns element
| isElem ns "w" "r" element
@@ -691,7 +712,7 @@ elemToRun ns element
in
case drawing of
Just s -> expandDrawingId s >>=
- (\(fp, bs) -> return $ InlineDrawing fp bs)
+ (\(fp, bs) -> return $ InlineDrawing fp bs $ elemToExtent drawingElem)
Nothing -> throwError WrongElem
elemToRun ns element
| isElem ns "w" "r" element
@@ -699,7 +720,7 @@ elemToRun ns element
, Just fnId <- findAttr (elemName ns "w" "id") ref = do
notes <- asks envNotes
case lookupFootnote fnId notes of
- Just e -> do bps <- mapD (elemToBodyPart ns) (elChildren e)
+ Just e -> do bps <- local (\r -> r {envLocation=InFootnote}) $ mapD (elemToBodyPart ns) (elChildren e)
return $ Footnote bps
Nothing -> return $ Footnote []
elemToRun ns element
@@ -708,7 +729,7 @@ elemToRun ns element
, Just enId <- findAttr (elemName ns "w" "id") ref = do
notes <- asks envNotes
case lookupEndnote enId notes of
- Just e -> do bps <- mapD (elemToBodyPart ns) (elChildren e)
+ Just e -> do bps <- local (\r -> r {envLocation=InEndnote}) $ mapD (elemToBodyPart ns) (elChildren e)
return $ Endnote bps
Nothing -> return $ Endnote []
elemToRun ns element