aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2019-02-06 09:01:26 -0800
committerJohn MacFarlane <jgm@berkeley.edu>2019-02-06 09:01:26 -0800
commit2b003d4a6bc2b86f94859c43c9a16ccb6da9275c (patch)
tree2469c256c9588b3c5107b8d870878fdfe81f020e
parent59fa4eb17e1a518609a988cbecb8d6d9d74b6810 (diff)
downloadpandoc-2b003d4a6bc2b86f94859c43c9a16ccb6da9275c.tar.gz
Handle Word files generated by Microsoft Word Online.
For some reason, Word in Office 365 Online uses `document2.xml` for the content, instead of `document.xml`. This causes pandoc not to be able to parse docx. This quick fix has the parser check for both `document.xml` and `document2.xml`. Addresses #5277, but a more robust solution would be to get the name of the main document dynamically (who knows whether it might change again?).
-rw-r--r--src/Text/Pandoc/Readers/Docx/Parse.hs2
1 files changed, 2 insertions, 0 deletions
diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs
index 843f2cdcd..e551ca7aa 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse.hs
@@ -364,6 +364,7 @@ archiveToDocxWithWarnings archive = do
archiveToDocument :: Archive -> D Document
archiveToDocument zf = do
entry <- maybeToD $ findEntryByPath "word/document.xml" zf
+ `mplus` findEntryByPath "word/document2.xml" zf -- see #5277
docElem <- maybeToD $ (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry
let namespaces = elemToNameSpaces docElem
bodyElem <- maybeToD $ findChildByName namespaces "w" "body" docElem
@@ -478,6 +479,7 @@ archiveToComments zf =
filePathToRelType :: FilePath -> Maybe DocumentLocation
filePathToRelType "word/_rels/document.xml.rels" = Just InDocument
+filePathToRelType "word/_rels/document2.xml.rels" = Just InDocument
filePathToRelType "word/_rels/footnotes.xml.rels" = Just InFootnote
filePathToRelType "word/_rels/endnotes.xml.rels" = Just InEndnote
filePathToRelType _ = Nothing