aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJesse Rosenthal <jrosenthal@jhu.edu>2019-02-06 21:06:14 -0500
committerJesse Rosenthal <jrosenthal@jhu.edu>2019-02-06 21:14:46 -0500
commit4cce0efa4824b4081a2d971c488c5026c314bbb5 (patch)
tree3f383e4103e718d9c7bfb32a6988b6934fab7d52
parent5d3b8ede15b3de2d8b59e7c8a1ae60b5f5f879b4 (diff)
downloadpandoc-4cce0efa4824b4081a2d971c488c5026c314bbb5.tar.gz
Docx reader: Dynamically determine document.xml path.
The desktop Word program places the main document file in "word/document.xml", but the online word places it in "word/document2.xml". This file path is actually stated in the root "_rels/.rels" file, in the "Relationship" element with an "http://../officedocument" type. Closes #5277
-rw-r--r--src/Text/Pandoc/Readers/Docx/Parse.hs15
1 files changed, 12 insertions, 3 deletions
diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs
index e551ca7aa..fa4dc90a6 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse.hs
@@ -359,12 +359,21 @@ archiveToDocxWithWarnings archive = do
Right doc -> Right (Docx doc, stateWarnings st)
Left e -> Left e
-
+getDocumentPath :: Archive -> Maybe String
+getDocumentPath zf = do
+ entry <- findEntryByPath "_rels/.rels" zf
+ relsElem <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry
+ let rels = filterChildrenName (\n -> qName n == "Relationship") relsElem
+ rel <- listToMaybe $
+ filter (\e -> findAttr (QName "Type" Nothing Nothing) e ==
+ Just "http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument")
+ rels
+ findAttr (QName "Target" Nothing Nothing) rel
archiveToDocument :: Archive -> D Document
archiveToDocument zf = do
- entry <- maybeToD $ findEntryByPath "word/document.xml" zf
- `mplus` findEntryByPath "word/document2.xml" zf -- see #5277
+ docPath <- maybeToD $ getDocumentPath zf
+ entry <- maybeToD $ findEntryByPath docPath zf
docElem <- maybeToD $ (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry
let namespaces = elemToNameSpaces docElem
bodyElem <- maybeToD $ findChildByName namespaces "w" "body" docElem