27 files changed, 1511 insertions, 33 deletions
diff --git a/README b/README
index 6030fa2bb..1883ecd57 100644
--- a/README
+++ b/README
@@ -13,15 +13,15 @@ Description
 Pandoc is a [Haskell] library for converting from one markup format to
 another, and a command-line tool that uses this library. It can read
 [markdown] and (subsets of) [Textile], [reStructuredText], [HTML],
-[LaTeX], [MediaWiki markup], [Haddock markup], [OPML], [Emacs Org-mode]
-and [DocBook]; and it can write plain text, [markdown],
-[reStructuredText], [XHTML], [HTML 5], [LaTeX] (including [beamer] slide
-shows), [ConTeXt], [RTF], [OPML], [DocBook], [OpenDocument], [ODT],
-[Word docx], [GNU Texinfo], [MediaWiki markup], [EPUB] (v2 or v3),
-[FictionBook2], [Textile], [groff man] pages, [Emacs Org-Mode],
-[AsciiDoc], [InDesign ICML], and [Slidy], [Slideous], [DZSlides],
-[reveal.js] or [S5] HTML slide shows. It can also produce [PDF] output
-on systems where LaTeX is installed.
+[LaTeX], [MediaWiki markup], [Haddock markup], [OPML], [Emacs
+Org-mode], [DocBook], and [Word docx]; and it can write plain text,
+[markdown], [reStructuredText], [XHTML], [HTML 5], [LaTeX] (including
+[beamer] slide shows), [ConTeXt], [RTF], [OPML], [DocBook],
+[OpenDocument], [ODT], [Word docx], [GNU Texinfo], [MediaWiki markup],
+[EPUB] (v2 or v3), [FictionBook2], [Textile], [groff man] pages,
+[Emacs Org-Mode], [AsciiDoc], [InDesign ICML], and [Slidy],
+[Slideous], [DZSlides], [reveal.js] or [S5] HTML slide shows. It can
+also produce [PDF] output on systems where LaTeX is installed.
 
 Pandoc's enhanced version of markdown includes syntax for footnotes,
 tables, flexible ordered lists, definition lists, fenced code blocks,
diff --git a/pandoc.cabal b/pandoc.cabal
index be7d4977f..00fa4e06a 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -184,7 +184,25 @@ Extra-Source-Files:
                  tests/fb2.math.markdown,
                  tests/fb2.math.fb2,
                  tests/fb2.test-small.png,
-                 tests/fb2.test.jpg
+                 tests/fb2.test.jpg,
+                 tests/docx.block_quotes.docx,
+                 tests/docx.block_quotes_parse_indent.native,
+                 tests/docx.headers.docx,
+                 tests/docx.headers.native,
+                 tests/docx.image.docx,
+                 tests/docx.image_no_embed.native,
+                 tests/docx.inline_formatting.docx,
+                 tests/docx.inline_formatting.native,
+                 tests/docx.links.docx,
+                 tests/docx.links.native,
+                 tests/docx.lists.docx,
+                 tests/docx.lists.native,
+                 tests/docx.notes.docx,
+                 tests/docx.notes.native,
+                 tests/docx.tables.docx,
+                 tests/docx.tables.native,
+                 tests/docx.unicode.docx,
+                 tests/docx.unicode.native
 Extra-Tmp-Files: man/man1/pandoc.1,
                  man/man5/pandoc_markdown.5
 
@@ -275,6 +293,7 @@ Library
                    Text.Pandoc.Readers.Textile,
                    Text.Pandoc.Readers.Native,
                    Text.Pandoc.Readers.Haddock,
+                   Text.Pandoc.Readers.DocX,
                    Text.Pandoc.Writers.Native,
                    Text.Pandoc.Writers.Docbook,
                    Text.Pandoc.Writers.OPML,
@@ -305,6 +324,8 @@ Library
                    Text.Pandoc.Process
   Other-Modules:   Text.Pandoc.Readers.Haddock.Lex,
                    Text.Pandoc.Readers.Haddock.Parse,
+                   Text.Pandoc.Readers.DocX.Lists,
+                   Text.Pandoc.Readers.DocX.Parse,
                    Text.Pandoc.Writers.Shared,
                    Text.Pandoc.Asciify,
                    Text.Pandoc.MIME,
@@ -390,6 +411,7 @@ Test-Suite test-pandoc
                   Tests.Readers.Markdown
                   Tests.Readers.Org
                   Tests.Readers.RST
+                  Tests.Readers.DocX
                   Tests.Writers.Native
                   Tests.Writers.ConTeXt
                   Tests.Writers.HTML
diff --git a/pandoc.hs b/pandoc.hs
index 5dd0e6899..0a8070d7c 100644
--- a/pandoc.hs
+++ b/pandoc.hs
@@ -858,6 +858,7 @@ defaultReaderName fallback (x:xs) =
     ".textile"  -> "textile"
     ".native"   -> "native"
     ".json"     -> "json"
+    ".docx"     -> "docx"
     _           -> defaultReaderName fallback xs
 
 -- Returns True if extension of first source is .lhs
@@ -1158,15 +1159,21 @@ main = do
              Left e        -> throwIO e
              Right (bs,_)  -> return $ UTF8.toString bs
 
+  let readFiles [] = error "Cannot read archive from stdin"
+      readFiles (x:_) = B.readFile x
+
   let convertTabs = tabFilter (if preserveTabs then 0 else tabStop)
 
   let handleIncludes' = if readerName' == "latex" || readerName' == "latex+lhs"
                            then handleIncludes
                            else return
 
-  doc <- readSources sources >>=
-           handleIncludes' . convertTabs . intercalate "\n" >>=
-           reader readerOpts
+  doc <- case reader of
+          StringReader r-> 
+            readSources sources >>=
+              handleIncludes' . convertTabs . intercalate "\n" >>=
+              r readerOpts
+          ByteStringReader r -> readFiles sources >>= r readerOpts 
 
 
   let doc0 = M.foldWithKey setMeta doc metadata
diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs
index 130338f0e..aff471a3c 100644
--- a/src/Text/Pandoc.hs
+++ b/src/Text/Pandoc.hs
@@ -62,6 +62,8 @@ module Text.Pandoc
                , readers
                , writers
                -- * Readers: converting /to/ Pandoc format
+               , Reader (..)
+               , readDocX
                , readMarkdown
                , readMediaWiki
                , readRST
@@ -125,6 +127,7 @@ import Text.Pandoc.Readers.HTML
 import Text.Pandoc.Readers.Textile
 import Text.Pandoc.Readers.Native
 import Text.Pandoc.Readers.Haddock
+import Text.Pandoc.Readers.DocX
 import Text.Pandoc.Writers.Native
 import Text.Pandoc.Writers.Markdown
 import Text.Pandoc.Writers.RST
@@ -192,24 +195,34 @@ markdown o s = do
   mapM_ warn warnings
   return doc
 
+data Reader = StringReader (ReaderOptions -> String -> IO Pandoc)
+              | ByteStringReader (ReaderOptions -> BL.ByteString -> IO Pandoc)
+
+mkStringReader :: (ReaderOptions -> String -> Pandoc) -> Reader
+mkStringReader r = StringReader (\o s -> return $ r o s)
+
+mkBSReader :: (ReaderOptions -> BL.ByteString -> Pandoc) -> Reader
+mkBSReader r = ByteStringReader (\o s -> return $ r o s)
+
 -- | Association list of formats and readers.
-readers :: [(String, ReaderOptions -> String -> IO Pandoc)]
-readers = [ ("native"       , \_ s -> return $ readNative s)
-           ,("json"         , \o s -> return $ readJSON o s)
-           ,("markdown"     , markdown)
-           ,("markdown_strict" , markdown)
-           ,("markdown_phpextra" , markdown)
-           ,("markdown_github" , markdown)
-           ,("markdown_mmd",  markdown)
-           ,("rst"          , \o s -> return $ readRST o s)
-           ,("mediawiki"    , \o s -> return $ readMediaWiki o s)
-           ,("docbook"      , \o s -> return $ readDocBook o s)
-           ,("opml"         , \o s -> return $ readOPML o s)
-           ,("org"          , \o s -> return $ readOrg o s)
-           ,("textile"      , \o s -> return $ readTextile o s) -- TODO : textile+lhs
-           ,("html"         , \o s -> return $ readHtml o s)
-           ,("latex"        , \o s -> return $ readLaTeX o s)
-           ,("haddock"      , \o s -> return $ readHaddock o s)
+readers :: [(String, Reader)]
+readers = [ ("native"       , StringReader $ \_ s -> return $ readNative s)
+           ,("json"         , mkStringReader readJSON )
+           ,("markdown"     , StringReader  markdown)
+           ,("markdown_strict" , StringReader markdown)
+           ,("markdown_phpextra" , StringReader markdown)
+           ,("markdown_github" , StringReader markdown)
+           ,("markdown_mmd",  StringReader markdown)
+           ,("rst"          , mkStringReader readRST )
+           ,("mediawiki"    , mkStringReader readMediaWiki)
+           ,("docbook"      , mkStringReader readDocBook)
+           ,("opml"         , mkStringReader readOPML)
+           ,("org"          , mkStringReader readOrg)
+           ,("textile"      , mkStringReader readTextile) -- TODO : textile+lhs
+           ,("html"         , mkStringReader readHtml)
+           ,("latex"        , mkStringReader readLaTeX)
+           ,("haddock"      , mkStringReader readHaddock)
+           ,("docx"         , mkBSReader readDocX)
            ]
 
 data Writer = PureStringWriter   (WriterOptions -> Pandoc -> String)
@@ -280,14 +293,17 @@ getDefaultExtensions "textile"         = Set.fromList [Ext_auto_identifiers, Ext
 getDefaultExtensions _                 = Set.fromList [Ext_auto_identifiers]
 
 -- | Retrieve reader based on formatSpec (format+extensions).
-getReader :: String -> Either String (ReaderOptions -> String -> IO Pandoc)
+getReader :: String -> Either String Reader
 getReader s =
   case parseFormatSpec s of
        Left e  -> Left $ intercalate "\n" $ [m | Message m <- errorMessages e]
-       Right (readerName, setExts) ->
+       Right (readerName, setExts) -> 
            case lookup readerName readers of
                    Nothing  -> Left $ "Unknown reader: " ++ readerName
-                   Just  r  -> Right $ \o ->
+                   Just  (StringReader r)  -> Right $ StringReader $ \o ->
+                                  r o{ readerExtensions = setExts $
+                                            getDefaultExtensions readerName }
+                   Just (ByteStringReader r) -> Right $ ByteStringReader $ \o ->
                                   r o{ readerExtensions = setExts $
                                             getDefaultExtensions readerName }
 
diff --git a/src/Text/Pandoc/Readers/DocX.hs b/src/Text/Pandoc/Readers/DocX.hs
new file mode 100644
index 000000000..976e2e271
--- /dev/null
+++ b/src/Text/Pandoc/Readers/DocX.hs
@@ -0,0 +1,479 @@
+{-
+Copyright (C) 2014 Jesse Rosenthal <jrosenthal@jhu.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+-}
+
+{- |
+   Module      : Text.Pandoc.Readers.DocX
+   Copyright   : Copyright (C) 2014 Jesse Rosenthal
+   License     : GNU GPL, version 2 or above
+
+   Maintainer  : Jesse Rosenthal <jrosenthal@jhu.edu>
+   Stability   : alpha
+   Portability : portable
+
+Conversion of DocX type (defined in Text.Pandoc.Readers.DocX.Parse)
+to 'Pandoc' document.  -}
+
+{-
+Current state of implementation of DocX entities ([x] means
+implemented, [-] means partially implemented):
+
+* Blocks
+
+  - [X] Para
+  - [X] CodeBlock (styled with `SourceCode`)
+  - [X] BlockQuote (styled with `Quote`, `BlockQuote`, or, optionally,
+        indented)
+  - [X] OrderedList
+  - [X] BulletList
+  - [X] DefinitionList (styled with adjacent `DefinitionTerm` and `Definition`)
+  - [X] Header (styled with `Heading#`)
+  - [ ] HorizontalRule
+  - [-] Table (column widths and alignments not yet implemented)
+
+* Inlines
+
+  - [X] Str
+  - [X] Emph (From italics. `underline` currently read as span. In
+        future, it might optionally be emph as well)
+  - [X] Strong
+  - [X] Strikeout
+  - [X] Superscript
+  - [X] Subscript
+  - [X] SmallCaps
+  - [ ] Quoted
+  - [ ] Cite
+  - [X] Code (styled with `VerbatimChar`)
+  - [X] Space
+  - [X] LineBreak (these are invisible in Word: entered with Shift-Return)
+  - [ ] Math
+  - [X] Link (links to an arbitrary bookmark create a span with the target as
+        id and "anchor" class)
+  - [-] Image (Links to path in archive. Future option for
+        data-encoded URI likely.)
+  - [X] Note (Footnotes and Endnotes are silently combined.)
+-}
+
+module Text.Pandoc.Readers.DocX
+       ( readDocX
+       ) where
+
+import Codec.Archive.Zip
+import Text.Pandoc.Definition
+import Text.Pandoc.Options
+import Text.Pandoc.Builder (text, toList)
+import Text.Pandoc.Generic (bottomUp)
+import Text.Pandoc.MIME (getMimeType)
+import Text.Pandoc.UTF8 (toString)
+import Text.Pandoc.Readers.DocX.Parse
+import Text.Pandoc.Readers.DocX.Lists
+import Data.Maybe (mapMaybe, isJust, fromJust)
+import Data.List (delete, isPrefixOf, (\\), intersect)
+import qualified Data.ByteString as BS
+import qualified Data.ByteString.Lazy as B
+import Data.ByteString.Base64 (encode)
+import System.FilePath (combine)
+
+readDocX :: ReaderOptions
+         -> B.ByteString
+         -> Pandoc
+readDocX opts bytes =
+  case archiveToDocX (toArchive bytes) of
+    Just docx -> Pandoc nullMeta (docxToBlocks opts docx)
+    Nothing   -> error $ "couldn't parse docx file"
+
+runStyleToSpanAttr :: RunStyle -> (String, [String], [(String, String)])
+runStyleToSpanAttr rPr = ("",
+                          mapMaybe id [
+                            if isBold rPr then (Just "strong") else Nothing,
+                            if isItalic rPr then (Just "emph") else Nothing,
+                            if isSmallCaps rPr then (Just "smallcaps") else Nothing,
+                            if isStrike rPr then (Just "strike") else Nothing,
+                            if isSuperScript rPr then (Just "superscript") else Nothing,
+                            if isSubScript rPr then (Just "subscript") else Nothing,
+                            rStyle rPr],
+                          case underline rPr of
+                            Just fmt -> [("underline", fmt)]
+                            _        -> []
+                         )
+
+parStyleToDivAttr :: ParagraphStyle -> (String, [String], [(String, String)])
+parStyleToDivAttr pPr = ("",
+                          pStyle pPr,
+                          case indent pPr of
+                            Just n  -> [("indent", (show n))]
+                            Nothing -> []
+                         )
+
+strToInlines :: String -> [Inline]
+strToInlines = toList . text
+
+codeSpans :: [String]
+codeSpans = ["VerbatimChar"]
+
+blockQuoteDivs :: [String]
+blockQuoteDivs = ["Quote", "BlockQuote"]
+
+codeDivs :: [String]
+codeDivs = ["SourceCode"]
+
+runElemToInlines :: RunElem -> [Inline]
+runElemToInlines (TextRun s) = strToInlines s
+runElemToInlines (LnBrk) = [LineBreak]
+
+runElemToString :: RunElem -> String
+runElemToString (TextRun s) = s
+runElemToString (LnBrk) = ['\n']
+
+runElemsToString :: [RunElem] -> String
+runElemsToString = concatMap runElemToString
+
+strNormalize :: [Inline] -> [Inline]
+strNormalize [] = []
+strNormalize (Str "" : ils) = strNormalize ils
+strNormalize ((Str s) : (Str s') : l) = strNormalize ((Str (s++s')) : l)
+strNormalize (il:ils) = il : (strNormalize ils)
+
+runToInlines :: ReaderOptions -> DocX -> Run -> [Inline]
+runToInlines _ _ (Run rs runElems) 
+  | isJust (rStyle rs) && (fromJust (rStyle rs)) `elem` codeSpans =
+    case runStyleToSpanAttr rs == ("", [], []) of
+      True -> [Str (runElemsToString runElems)]
+      False -> [Span (runStyleToSpanAttr rs) [Str (runElemsToString runElems)]]
+  | otherwise = case runStyleToSpanAttr rs == ("", [], []) of
+      True -> concatMap runElemToInlines runElems
+      False -> [Span (runStyleToSpanAttr rs) (concatMap runElemToInlines runElems)]
+runToInlines opts docx@(DocX _ notes _ _ _ ) (Footnote fnId) =
+  case (getFootNote fnId notes) of
+    Just bodyParts ->
+      [Note [Div ("", ["footnote"], []) (map (bodyPartToBlock opts docx) bodyParts)]]
+    Nothing        ->
+      [Note [Div ("", ["footnote"], []) []]]
+runToInlines opts docx@(DocX _ notes _ _ _) (Endnote fnId) =
+  case (getEndNote fnId notes) of
+    Just bodyParts ->
+      [Note [Div ("", ["endnote"], []) (map (bodyPartToBlock opts docx) bodyParts)]]
+    Nothing        ->
+      [Note [Div ("", ["endnote"], []) []]]
+
+parPartToInlines :: ReaderOptions -> DocX -> ParPart -> [Inline]
+parPartToInlines opts docx (PlainRun r) = runToInlines opts docx r
+parPartToInlines _ _ (BookMark _ anchor) =
+  [Span (anchor, ["anchor"], []) []]
+parPartToInlines _ (DocX _ _ _ rels _) (Drawing relid) =
+  case lookupRelationship relid rels of
+    Just target -> [Image [] (combine "word" target, "")]
+    Nothing     -> [Image [] ("", "")]
+parPartToInlines opts docx (InternalHyperLink anchor runs) =
+  [Link (concatMap (runToInlines opts docx) runs) ('#' : anchor, "")]
+parPartToInlines opts docx@(DocX _ _ _ rels _) (ExternalHyperLink relid runs) =
+  case lookupRelationship relid rels of
+    Just target ->
+      [Link (concatMap (runToInlines opts docx) runs) (target, "")]
+    Nothing ->
+      [Link (concatMap (runToInlines opts docx) runs) ("", "")]
+
+isAnchorSpan :: Inline -> Bool
+isAnchorSpan (Span (ident, classes, kvs) ils) =
+  (not . null) ident &&
+  classes == ["anchor"] &&
+  null kvs &&
+  null ils
+isAnchorSpan _ = False
+
+dummyAnchors :: [String]
+dummyAnchors = ["_GoBack"]
+
+makeHeaderAnchors :: Block -> Block
+makeHeaderAnchors h@(Header n (_, classes, kvs) ils) =
+  case filter isAnchorSpan ils of
+    []   -> h
+    (x@(Span (ident, _, _) _) : xs) ->
+      case ident `elem` dummyAnchors of
+        True -> h
+        False -> Header n (ident, classes, kvs) (ils \\ (x:xs))
+    _ -> h
+makeHeaderAnchors blk = blk
+  
+
+parPartsToInlines :: ReaderOptions -> DocX -> [ParPart] -> [Inline]
+parPartsToInlines opts docx parparts =
+  --
+  -- We're going to skip data-uri's for now. It should be an option,
+  -- not mandatory.
+  --
+  --bottomUp (makeImagesSelfContained docx) $
+  bottomUp spanCorrect $
+  bottomUp spanTrim $
+  bottomUp spanReduce $
+  concatMap (parPartToInlines opts docx) parparts
+
+cellToBlocks :: ReaderOptions -> DocX -> Cell -> [Block]
+cellToBlocks opts docx (Cell bps) = map (bodyPartToBlock opts docx) bps
+
+rowToBlocksList :: ReaderOptions -> DocX -> Row -> [[Block]]
+rowToBlocksList opts docx (Row cells) = map (cellToBlocks opts docx) cells
+
+bodyPartToBlock :: ReaderOptions -> DocX -> BodyPart -> Block
+bodyPartToBlock opts docx (Paragraph pPr parparts) =
+  Div (parStyleToDivAttr pPr) [Para (parPartsToInlines opts docx parparts)]
+bodyPartToBlock opts docx@(DocX _ _ numbering _ _) (ListItem pPr numId lvl parparts) =
+  let
+    kvs = case lookupLevel numId lvl numbering of
+      Just (_, fmt, txt, Just start) -> [ ("level", lvl)
+                                        , ("num-id", numId)
+                                        , ("format", fmt)
+                                        , ("text", txt)
+                                        , ("start", (show start))
+                                        ]
+      
+      Just (_, fmt, txt, Nothing)    -> [ ("level", lvl)
+                                        , ("num-id", numId)
+                                        , ("format", fmt)
+                                        , ("text", txt)
+                                        ]
+      Nothing                        -> []
+  in
+   Div
+   ("", ["list-item"], kvs)
+   [bodyPartToBlock opts docx (Paragraph pPr parparts)]
+bodyPartToBlock _ _ (Tbl _ _ _ []) =
+  Para []
+bodyPartToBlock opts docx (Tbl cap _ look (r:rs)) =
+  let caption = strToInlines cap
+      (hdr, rows) = case firstRowFormatting look of
+        True -> (Just r, rs)
+        False -> (Nothing, r:rs)
+      hdrCells = case hdr of
+        Just r' -> rowToBlocksList opts docx r'
+        Nothing -> []
+      cells = map (rowToBlocksList opts docx) rows
+      
+      size = case null hdrCells of
+        True -> length $ head cells
+        False -> length $ hdrCells
+      --
+      -- The two following variables (horizontal column alignment and
+      -- relative column widths) go to the default at the
+      -- moment. Width information is in the TblGrid field of the Tbl,
+      -- so should be possible. Alignment might be more difficult,
+      -- since there doesn't seem to be a column entity in docx.
+      alignments = take size (repeat AlignDefault)
+      widths = take size (repeat 0) :: [Double]
+  in
+   Table caption alignments widths hdrCells cells
+
+makeImagesSelfContained :: DocX -> Inline -> Inline
+makeImagesSelfContained (DocX _ _ _ _ media) i@(Image alt (uri, title)) =
+  case lookup uri media of
+    Just bs -> case getMimeType uri of
+      Just mime ->  let data_uri =
+                          "data:" ++ mime ++ ";base64," ++ toString (encode $ BS.concat $ B.toChunks bs)
+                    in
+                     Image alt (data_uri, title)
+      Nothing  -> i
+    Nothing -> i
+makeImagesSelfContained _ inline = inline
+
+bodyToBlocks :: ReaderOptions -> DocX -> Body -> [Block]
+bodyToBlocks opts docx (Body bps) =
+  bottomUp removeEmptyPars $
+  bottomUp strNormalize $ 
+  bottomUp spanRemove $ 
+  bottomUp divRemove $
+  map (makeHeaderAnchors) $
+  bottomUp divCorrect $
+  bottomUp divReduce $
+  bottomUp divCorrectPreReduce $
+  bottomUp blocksToDefinitions $
+  blocksToBullets $
+  map (bodyPartToBlock opts docx) bps
+
+docxToBlocks :: ReaderOptions -> DocX -> [Block]
+docxToBlocks opts d@(DocX (Document _ body) _ _ _ _) = bodyToBlocks opts d body
+
+spanReduce :: [Inline] -> [Inline]
+spanReduce [] = []
+spanReduce ((Span (id1, classes1, kvs1) ils1) : ils)
+  | (id1, classes1, kvs1) == ("", [], []) = ils1 ++ (spanReduce ils)
+spanReduce (s1@(Span (id1, classes1, kvs1) ils1) :
+            s2@(Span (id2, classes2, kvs2) ils2) :
+            ils) =
+  let classes'  = classes1 `intersect` classes2
+      kvs'      = kvs1 `intersect` kvs2
+      classes1' = classes1 \\ classes'
+      kvs1'     = kvs1 \\ kvs'
+      classes2' = classes2 \\ classes'
+      kvs2'     = kvs2 \\ kvs'
+  in
+   case null classes' && null kvs' of
+     True -> s1 : (spanReduce (s2 : ils))
+     False -> let attr'  = ("", classes', kvs')
+                  attr1' = (id1, classes1', kvs1')
+                  attr2' = (id2, classes2', kvs2')
+              in
+               spanReduce (Span attr' [(Span attr1' ils1), (Span attr2' ils2)] :
+                           ils)
+spanReduce (il:ils) = il : (spanReduce ils)
+
+ilToCode :: Inline -> String
+ilToCode (Str s) = s
+ilToCode _ = ""
+
+spanRemove' :: Inline -> [Inline]
+spanRemove' s@(Span (ident, classes, _) [])
+  -- "_GoBack" is automatically inserted. We don't want to keep it.
+  | classes == ["anchor"] && not (ident `elem` dummyAnchors) = [s]
+spanRemove' (Span (_, _, kvs) ils) =
+  case lookup "underline" kvs of
+    Just val -> [Span ("", [], [("underline", val)]) ils]
+    Nothing  -> ils
+spanRemove' il = [il]
+
+spanRemove :: [Inline] -> [Inline]
+spanRemove = concatMap spanRemove'
+
+spanTrim' :: Inline -> [Inline]
+spanTrim' il@(Span _ []) = [il]
+spanTrim' il@(Span attr (il':[]))
+  | il' == Space = [Span attr [], Space]
+  | otherwise = [il]
+spanTrim' (Span attr ils)
+  | head ils == Space && last ils == Space =
+    [Space, Span attr (init $ tail ils), Space]
+  | head ils == Space = [Space, Span attr (tail ils)]
+  | last ils == Space = [Span attr (init ils), Space]
+spanTrim' il = [il]
+
+spanTrim :: [Inline] -> [Inline]
+spanTrim = concatMap spanTrim'
+
+spanCorrect' :: Inline -> [Inline]
+spanCorrect' (Span ("", [], []) ils) = ils
+spanCorrect' (Span (ident, classes, kvs) ils)
+  | "emph" `elem` classes =
+    [Emph $ spanCorrect' $ Span (ident, (delete "emph" classes), kvs) ils]
+  | "strong" `elem` classes =
+      [Strong $ spanCorrect' $ Span (ident, (delete "strong" classes), kvs) ils]
+  | "smallcaps" `elem` classes =
+      [SmallCaps $ spanCorrect' $ Span (ident, (delete "smallcaps" classes), kvs) ils]
+  | "strike" `elem` classes =
+      [Strikeout $ spanCorrect' $ Span (ident, (delete "strike" classes), kvs) ils]
+  | "superscript" `elem` classes =
+      [Superscript $ spanCorrect' $ Span (ident, (delete "superscript" classes), kvs) ils]
+  | "subscript" `elem` classes =
+      [Subscript $ spanCorrect' $ Span (ident, (delete "subscript" classes), kvs) ils]
+  | (not . null) (codeSpans `intersect` classes) =
+         [Code (ident, (classes \\ codeSpans), kvs) (init $ unlines $ map ilToCode ils)]
+  | otherwise =
+      [Span (ident, classes, kvs) ils]
+spanCorrect' il = [il]
+
+spanCorrect :: [Inline] -> [Inline]
+spanCorrect = concatMap spanCorrect'
+
+removeEmptyPars :: [Block] -> [Block]
+removeEmptyPars blks = filter (\b -> b /= (Para [])) blks
+
+divReduce :: [Block] -> [Block]
+divReduce [] = []
+divReduce ((Div (id1, classes1, kvs1) blks1) : blks)
+  | (id1, classes1, kvs1) == ("", [], []) = blks1 ++ (divReduce blks)
+divReduce (d1@(Div (id1, classes1, kvs1) blks1) :
+           d2@(Div (id2, classes2, kvs2) blks2) :
+            blks) =
+  let classes'  = classes1 `intersect` classes2
+      kvs'      = kvs1 `intersect` kvs2
+      classes1' = classes1 \\ classes'
+      kvs1'     = kvs1 \\ kvs'
+      classes2' = classes2 \\ classes'
+      kvs2'     = kvs2 \\ kvs'
+  in
+   case null classes' && null kvs' of
+     True -> d1 : (divReduce (d2 : blks))
+     False -> let attr'  = ("", classes', kvs')
+                  attr1' = (id1, classes1', kvs1')
+                  attr2' = (id2, classes2', kvs2')
+              in
+               divReduce (Div attr' [(Div attr1' blks1), (Div attr2' blks2)] :
+                           blks)
+divReduce (blk:blks) = blk : (divReduce blks)
+
+isHeaderClass :: String -> Maybe Int
+isHeaderClass s | "Heading" `isPrefixOf` s =
+  case reads (drop (length "Heading") s) :: [(Int, String)] of
+    [] -> Nothing
+    ((n, "") : []) -> Just n
+    _       -> Nothing
+isHeaderClass _ = Nothing
+
+findHeaderClass :: [String] -> Maybe Int
+findHeaderClass ss = case mapMaybe id $ map isHeaderClass ss of
+  [] -> Nothing
+  n : _ -> Just n
+
+blksToInlines :: [Block] -> [Inline]
+blksToInlines (Para ils : _) = ils
+blksToInlines (Plain ils : _) = ils
+blksToInlines _ = []
+
+divCorrectPreReduce' :: Block -> [Block]
+divCorrectPreReduce' (Div (ident, classes, kvs) blks)
+  | isJust $ findHeaderClass classes =
+    let n = fromJust $ findHeaderClass classes
+    in
+    [Header n (ident, delete ("Heading" ++ (show n)) classes, kvs) (blksToInlines blks)]
+  | otherwise = [Div (ident, classes, kvs) blks]
+divCorrectPreReduce' blk = [blk]
+
+divCorrectPreReduce :: [Block] -> [Block]
+divCorrectPreReduce = concatMap divCorrectPreReduce'
+
+blkToCode :: Block -> String
+blkToCode (Para []) = ""
+blkToCode (Para ((Code _ s):ils)) = s ++ (blkToCode (Para ils))
+blkToCode (Para ((Span (_, classes, _) ils'): ils))
+  | (not . null) (codeSpans `intersect` classes) =
+    (init $ unlines $ map ilToCode ils') ++ (blkToCode (Para ils))
+blkToCode _ = ""
+
+divRemove' :: Block -> [Block]
+divRemove' (Div (_, _, kvs) blks) =
+  case lookup "indent" kvs of
+    Just val -> [Div ("", [], [("indent", val)]) blks]
+    Nothing  -> blks
+divRemove' blk = [blk]
+
+divRemove :: [Block] -> [Block]
+divRemove = concatMap divRemove'
+                                                 
+divCorrect' :: Block -> [Block]
+divCorrect' b@(Div (ident, classes, kvs) blks)
+  | (not . null) (blockQuoteDivs `intersect` classes) =
+    [BlockQuote [Div (ident, classes \\ blockQuoteDivs, kvs) blks]]
+  | (not . null) (codeDivs `intersect` classes) =
+    [CodeBlock (ident, (classes \\ codeDivs), kvs) (init $ unlines $ map blkToCode blks)]
+  | otherwise =
+      case lookup "indent" kvs of
+        Just "0" -> [Div (ident, classes, filter (\kv -> fst kv /= "indent") kvs) blks]
+        Just _   ->
+          [BlockQuote [Div (ident, classes, filter (\kv -> fst kv /= "indent") kvs) blks]]
+        Nothing  -> [b]
+divCorrect' blk = [blk]
+
+divCorrect :: [Block] -> [Block]
+divCorrect = concatMap divCorrect'
diff --git a/src/Text/Pandoc/Readers/DocX/Lists.hs b/src/Text/Pandoc/Readers/DocX/Lists.hs
new file mode 100644
index 000000000..b20679261
--- /dev/null
+++ b/src/Text/Pandoc/Readers/DocX/Lists.hs
@@ -0,0 +1,208 @@
+{-
+Copyright (C) 2014 Jesse Rosenthal <jrosenthal@jhu.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+-}
+
+{- |
+   Module      : Text.Pandoc.Readers.DocX.Lists
+   Copyright   : Copyright (C) 2014 Jesse Rosenthal
+   License     : GNU GPL, version 2 or above
+
+   Maintainer  : Jesse Rosenthal <jrosenthal@jhu.edu>
+   Stability   : alpha
+   Portability : portable
+
+Functions for converting flat DocX paragraphs into nested lists.
+-}
+
+module Text.Pandoc.Readers.DocX.Lists ( blocksToBullets
+                                      , blocksToDefinitions) where
+
+import Text.Pandoc.JSON
+import Text.Pandoc.Shared (trim)
+import Control.Monad
+import Data.List
+import Data.Maybe
+
+isListItem :: Block -> Bool
+isListItem (Div (_, classes, _) _) | "list-item" `elem` classes = True
+isListItem _ = False
+
+getLevel :: Block -> Maybe Integer
+getLevel (Div (_, _, kvs) _) =  liftM read $ lookup "level" kvs
+getLevel _ = Nothing
+
+getLevelN :: Block -> Integer
+getLevelN b = case getLevel b of
+  Just n -> n
+  Nothing -> -1
+
+getNumId :: Block -> Maybe Integer
+getNumId (Div (_, _, kvs) _) =  liftM read $ lookup "num-id" kvs
+getNumId _ = Nothing
+
+getNumIdN :: Block -> Integer
+getNumIdN b = case getNumId b of
+  Just n -> n
+  Nothing -> -1
+
+getText :: Block -> Maybe String
+getText (Div (_, _, kvs) _) = lookup "text" kvs
+getText _ = Nothing
+
+data ListType = Itemized | Enumerated ListAttributes
+
+listStyleMap :: [(String, ListNumberStyle)]
+listStyleMap = [("upperLetter", UpperAlpha),
+                ("lowerLetter", LowerAlpha),
+                ("upperRoman", UpperRoman),
+                ("lowerRoman", LowerRoman),
+                ("decimal", Decimal)]
+
+listDelimMap :: [(String, ListNumberDelim)]
+listDelimMap = [("%1)", OneParen),
+                ("(%1)", TwoParens),
+                ("%1.", Period)]
+
+getListType :: Block -> Maybe ListType
+getListType b@(Div (_, _, kvs) _) | isListItem b =
+  let
+    start = lookup "start" kvs
+    frmt = lookup "format" kvs
+    txt  = lookup "text" kvs
+  in
+   case frmt of
+     Just "bullet" -> Just Itemized
+     Just f        ->
+       case txt of
+         Just t -> Just $ Enumerated (
+                  read (fromMaybe "1" start) :: Int,
+                  fromMaybe DefaultStyle (lookup f listStyleMap),
+                  fromMaybe DefaultDelim (lookup t listDelimMap))
+         Nothing -> Nothing
+     _ -> Nothing
+getListType _ = Nothing
+
+listParagraphDivs :: [String]
+listParagraphDivs = ["ListParagraph"]
+
+-- This is a first stab at going through and attaching meaning to list
+-- paragraphs, without an item marker, following a list item. We
+-- assume that these are paragraphs in the same item.
+
+handleListParagraphs :: [Block] -> [Block]
+handleListParagraphs [] = []
+handleListParagraphs (
+  (Div attr1@(_, classes1, _) blks1) :
+  (Div (ident2, classes2, kvs2) blks2) :
+  blks
+  ) | "list-item" `elem` classes1 &&
+    not ("list-item" `elem` classes2) &&
+    (not . null) (listParagraphDivs `intersect` classes2) =
+      -- We don't want to keep this indent.
+      let newDiv2 =
+            (Div (ident2, classes2, filter (\kv -> fst kv /= "indent") kvs2) blks2)
+      in
+       handleListParagraphs ((Div attr1 (blks1 ++ [newDiv2])) : blks)
+handleListParagraphs (blk:blks) = blk : (handleListParagraphs blks)
+                  
+separateBlocks' :: Block -> [[Block]] -> [[Block]]
+separateBlocks' blk ([] : []) = [[blk]]
+separateBlocks' b@(BulletList _) acc = (init acc) ++ [(last acc) ++ [b]]
+separateBlocks' b@(OrderedList _ _) acc = (init acc) ++ [(last acc) ++ [b]]
+-- The following is for the invisible bullet lists. This is how
+-- pandoc-generated ooxml does multiparagraph item lists.
+separateBlocks' b acc | liftM trim (getText b) == Just "" =
+  (init acc) ++ [(last acc) ++ [b]]
+separateBlocks' b acc = acc ++ [[b]]
+
+separateBlocks :: [Block] -> [[Block]]
+separateBlocks blks = foldr separateBlocks' [[]] (reverse blks)
+
+flatToBullets' :: Integer -> [Block] -> [Block]
+flatToBullets' _ [] = []
+flatToBullets' num xs@(b : elems)
+  | getLevelN b == num = b : (flatToBullets' num elems)
+  | otherwise = 
+    let bNumId = getNumIdN b
+        bLevel = getLevelN b
+        (children, remaining) =
+          span
+          (\b' ->
+            ((getLevelN b') > bLevel ||
+             ((getLevelN b') == bLevel && (getNumIdN b') == bNumId)))
+          xs
+    in
+     case getListType b of
+       Just (Enumerated attr) ->
+         (OrderedList attr (separateBlocks $ flatToBullets' bLevel children)) :
+         (flatToBullets' num remaining)
+       _ ->
+         (BulletList (separateBlocks $ flatToBullets' bLevel children)) :
+         (flatToBullets' num remaining)
+
+flatToBullets :: [Block] -> [Block]
+flatToBullets elems = flatToBullets' (-1) elems
+
+blocksToBullets :: [Block] -> [Block]
+blocksToBullets blks =
+  -- bottomUp removeListItemDivs $ 
+  flatToBullets $ (handleListParagraphs blks)
+
+
+plainParaInlines :: Block -> [Inline]
+plainParaInlines (Plain ils) = ils
+plainParaInlines (Para ils) = ils
+plainParaInlines _ = []
+
+blocksToDefinitions' :: [([Inline], [[Block]])] -> [Block] -> [Block] -> [Block]
+blocksToDefinitions' []     acc [] = reverse acc
+blocksToDefinitions' defAcc acc [] =
+  reverse $ (DefinitionList (reverse defAcc)) : acc
+blocksToDefinitions' defAcc acc
+  ((Div (_, classes1, _) blks1) : (Div (ident2, classes2, kvs2) blks2) : blks)
+  | "DefinitionTerm" `elem` classes1 && "Definition"  `elem` classes2 =
+    let remainingAttr2 = (ident2, delete "Definition" classes2, kvs2)
+        pair = case remainingAttr2 == ("", [], []) of
+          True -> (concatMap plainParaInlines blks1, [blks2])
+          False -> (concatMap plainParaInlines blks1, [[Div remainingAttr2 blks2]])
+    in
+     blocksToDefinitions' (pair : defAcc) acc blks
+blocksToDefinitions' defAcc acc
+  ((Div (ident2, classes2, kvs2) blks2) : blks)
+  | (not . null) defAcc && "Definition"  `elem` classes2 =
+    let remainingAttr2 = (ident2, delete "Definition" classes2, kvs2)
+        defItems2 = case remainingAttr2 == ("", [], []) of
+          True -> blks2
+          False -> [Div remainingAttr2 blks2]
+        ((defTerm, defItems):defs) = defAcc
+        defAcc' = case null defItems of
+          True -> (defTerm, [defItems2]) : defs
+          False -> (defTerm, init defItems ++ [last defItems ++ defItems2]) : defs
+    in
+     blocksToDefinitions' defAcc' acc blks
+blocksToDefinitions' [] acc (b:blks) =
+  blocksToDefinitions' [] (b:acc) blks
+blocksToDefinitions' defAcc acc (b:blks) =
+  blocksToDefinitions' [] (b : (DefinitionList (reverse defAcc)) : acc) blks
+
+
+blocksToDefinitions :: [Block] -> [Block]
+blocksToDefinitions = blocksToDefinitions' [] []
+
+    
+    
+    
diff --git a/src/Text/Pandoc/Readers/DocX/Parse.hs b/src/Text/Pandoc/Readers/DocX/Parse.hs
new file mode 100644
index 000000000..d7033d9e8
--- /dev/null
+++ b/src/Text/Pandoc/Readers/DocX/Parse.hs
@@ -0,0 +1,604 @@
+{-
+Copyright (C) 2014 Jesse Rosenthal <jrosenthal@jhu.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+-}
+
+{- |
+   Module      : Text.Pandoc.Readers.DocX.Parse
+   Copyright   : Copyright (C) 2014 Jesse Rosenthal
+   License     : GNU GPL, version 2 or above
+
+   Maintainer  : Jesse Rosenthal <jrosenthal@jhu.edu>
+   Stability   : alpha
+   Portability : portable
+
+Conversion of DocX archive into DocX haskell type
+-}
+
+
+module Text.Pandoc.Readers.DocX.Parse (  DocX(..)
+                                       , Document(..)
+                                       , Body(..)
+                                       , BodyPart(..)
+                                       , TblLook(..)
+                                       , ParPart(..)
+                                       , Run(..)
+                                       , RunElem(..)
+                                       , Notes
+                                       , Numbering
+                                       , Relationship
+                                       , Media
+                                       , RunStyle(..)
+                                       , ParagraphStyle(..)
+                                       , Row(..)
+                                       , Cell(..)
+                                       , getFootNote
+                                       , getEndNote
+                                       , lookupLevel
+                                       , lookupRelationship
+                                       , archiveToDocX
+                                       ) where
+import Codec.Archive.Zip
+import Text.XML.Light
+import Data.Maybe
+import Data.List
+import System.FilePath
+import Data.Bits ((.|.))
+import qualified Data.ByteString.Lazy as B
+import qualified Text.Pandoc.UTF8 as UTF8
+
+attrToNSPair :: Attr -> Maybe (String, String)
+attrToNSPair (Attr (QName s _ (Just "xmlns")) val) = Just (s, val)
+attrToNSPair _ = Nothing
+
+
+type NameSpaces = [(String, String)]
+
+data DocX = DocX Document Notes Numbering [Relationship] Media
+          deriving Show
+
+archiveToDocX :: Archive -> Maybe DocX
+archiveToDocX archive = do
+  let notes = archiveToNotes archive
+      rels = archiveToRelationships archive
+      media = archiveToMedia archive
+  doc <- archiveToDocument archive
+  numbering <- archiveToNumbering archive
+  return $ DocX doc notes numbering rels media
+
+data Document = Document NameSpaces Body 
+          deriving Show
+
+archiveToDocument :: Archive -> Maybe Document
+archiveToDocument zf = do
+  entry <- findEntryByPath "word/document.xml" zf
+  docElem <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry
+  let namespaces = mapMaybe attrToNSPair (elAttribs docElem) 
+  bodyElem <- findChild (QName "body" (lookup "w" namespaces) Nothing) docElem
+  body <- elemToBody namespaces bodyElem
+  return $ Document namespaces body
+
+type Media = [(FilePath, B.ByteString)]
+
+filePathIsMedia :: FilePath -> Bool
+filePathIsMedia fp =
+  let (dir, _) = splitFileName fp
+  in
+   (dir == "word/media/")
+
+getMediaPair :: Archive -> FilePath -> Maybe (FilePath, B.ByteString)
+getMediaPair zf fp =
+  case findEntryByPath fp zf of
+    Just e -> Just (fp, fromEntry e)
+    Nothing -> Nothing
+
+archiveToMedia :: Archive -> Media
+archiveToMedia zf =
+  mapMaybe (getMediaPair zf) (filter filePathIsMedia (filesInArchive zf))
+
+data Numbering = Numbering NameSpaces [Numb] [AbstractNumb]
+                 deriving Show
+
+data Numb = Numb String String           -- right now, only a key to an abstract num
+            deriving Show
+
+data AbstractNumb = AbstractNumb String [Level]
+                    deriving Show
+
+-- (ilvl, format, string, start)
+type Level = (String, String, String, Maybe Integer)
+
+lookupLevel :: String -> String -> Numbering -> Maybe Level
+lookupLevel numId ilvl (Numbering _ numbs absNumbs) = do
+  absNumId <- lookup numId $ map (\(Numb nid absnumid) -> (nid, absnumid)) numbs
+  lvls <- lookup absNumId $ map (\(AbstractNumb aid ls) -> (aid, ls)) absNumbs
+  lvl  <- lookup ilvl $ map (\l@(i, _, _, _) -> (i, l)) lvls
+  return lvl
+
+numElemToNum :: NameSpaces -> Element -> Maybe Numb
+numElemToNum ns element |
+  qName (elName element) == "num" &&
+  qURI (elName element) == (lookup "w" ns) = do
+    numId <- findAttr (QName "numId" (lookup "w" ns) (Just "w")) element
+    absNumId <- findChild (QName "abstractNumId" (lookup "w" ns) (Just "w")) element
+                >>= findAttr (QName "val" (lookup "w" ns) (Just "w"))
+    return $ Numb numId absNumId
+numElemToNum _ _ = Nothing
+
+absNumElemToAbsNum :: NameSpaces -> Element -> Maybe AbstractNumb
+absNumElemToAbsNum ns element |
+  qName (elName element) == "abstractNum" &&
+  qURI (elName element) == (lookup "w" ns) = do
+    absNumId <- findAttr
+                (QName "abstractNumId" (lookup "w" ns) (Just "w"))
+                element
+    let levelElems = findChildren
+                 (QName "lvl" (lookup "w" ns) (Just "w"))
+                 element
+        levels = mapMaybe id $ map (levelElemToLevel ns) levelElems
+    return $ AbstractNumb absNumId levels
+absNumElemToAbsNum _ _ = Nothing
+
+levelElemToLevel :: NameSpaces -> Element -> Maybe Level
+levelElemToLevel ns element |
+    qName (elName element) == "lvl" &&
+    qURI (elName element) == (lookup "w" ns) = do
+      ilvl <- findAttr (QName "ilvl" (lookup "w" ns) (Just "w")) element
+      fmt <- findChild (QName "numFmt" (lookup "w" ns) (Just "w")) element
+             >>= findAttr (QName "val" (lookup "w" ns) (Just "w"))
+      txt <- findChild (QName "lvlText" (lookup "w" ns) (Just "w")) element
+             >>= findAttr (QName "val" (lookup "w" ns) (Just "w"))
+      let start = findChild (QName "start" (lookup "w" ns) (Just "w")) element
+                  >>= findAttr (QName "val" (lookup "w" ns) (Just "w"))
+                  >>= (\s -> listToMaybe (map fst (reads s :: [(Integer, String)])))
+      return (ilvl, fmt, txt, start)
+levelElemToLevel _ _ = Nothing
+
+archiveToNumbering :: Archive -> Maybe Numbering
+archiveToNumbering zf =
+  case findEntryByPath "word/numbering.xml" zf of
+    Nothing -> Just $ Numbering [] [] []
+    Just entry -> do
+      numberingElem <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry
+      let namespaces = mapMaybe attrToNSPair (elAttribs numberingElem)
+          numElems = findChildren
+                     (QName "num" (lookup "w" namespaces) (Just "w"))
+                     numberingElem
+          absNumElems = findChildren
+                        (QName "abstractNum" (lookup "w" namespaces) (Just "w"))
+                        numberingElem
+          nums = mapMaybe id $ map (numElemToNum namespaces) numElems
+          absNums = mapMaybe id $ map (absNumElemToAbsNum namespaces) absNumElems
+      return $ Numbering namespaces nums absNums
+
+data Notes = Notes NameSpaces (Maybe [(String, [BodyPart])]) (Maybe [(String, [BodyPart])])
+           deriving Show
+
+noteElemToNote :: NameSpaces -> Element -> Maybe (String, [BodyPart])
+noteElemToNote ns element
+  | qName (elName element) `elem` ["endnote", "footnote"] &&
+    qURI (elName element) == (lookup "w" ns) =
+      do
+        noteId <- findAttr (QName "id" (lookup "w" ns) (Just "w")) element
+        let bps = map fromJust
+                  $ filter isJust
+                  $ map (elemToBodyPart ns)
+                  $ filterChildrenName (isParOrTbl ns) element
+        return $ (noteId, bps)
+noteElemToNote _ _ = Nothing
+
+getFootNote :: String -> Notes -> Maybe [BodyPart]
+getFootNote s (Notes _ fns _) = fns >>= (lookup s)
+
+getEndNote :: String -> Notes -> Maybe [BodyPart]
+getEndNote s (Notes _ _ ens) = ens >>= (lookup s)
+
+elemToNotes :: NameSpaces -> String -> Element -> Maybe [(String, [BodyPart])]
+elemToNotes ns notetype element
+  | qName (elName element) == (notetype ++ "s") &&
+    qURI (elName element) == (lookup "w" ns) =
+      Just $ map fromJust
+      $ filter isJust
+      $ map (noteElemToNote ns)
+      $ findChildren (QName notetype (lookup "w" ns) (Just "w")) element
+elemToNotes _ _ _ = Nothing
+
+archiveToNotes :: Archive -> Notes
+archiveToNotes zf =
+  let fnElem = findEntryByPath "word/footnotes.xml" zf
+               >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry)
+      enElem = findEntryByPath "word/endnotes.xml" zf
+               >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry)
+      fn_namespaces = case fnElem of
+        Just e -> mapMaybe attrToNSPair (elAttribs e)
+        Nothing -> []
+      en_namespaces = case enElem of
+        Just e -> mapMaybe attrToNSPair (elAttribs e)
+        Nothing -> []
+      ns = unionBy (\x y -> fst x == fst y) fn_namespaces en_namespaces
+      fn = fnElem >>= (elemToNotes ns "footnote")
+      en = enElem >>= (elemToNotes ns "endnote")
+  in
+   Notes ns fn en
+
+
+data Relationship = Relationship (RelId, Target)
+                  deriving Show
+
+lookupRelationship :: RelId -> [Relationship] -> Maybe Target
+lookupRelationship relid rels =
+  lookup relid (map (\(Relationship pair) -> pair) rels)
+
+filePathIsRel :: FilePath -> Bool
+filePathIsRel fp =
+  let (dir, name) = splitFileName fp
+  in
+   (dir == "word/_rels/") && ((takeExtension name) == ".rels")
+
+relElemToRelationship :: Element -> Maybe Relationship
+relElemToRelationship element | qName (elName element) == "Relationship" =
+  do
+    relId <- findAttr (QName "Id" Nothing Nothing) element
+    target <- findAttr (QName "Target" Nothing Nothing) element
+    return $ Relationship (relId, target)
+relElemToRelationship _ = Nothing
+  
+
+archiveToRelationships :: Archive -> [Relationship]
+archiveToRelationships archive = 
+  let relPaths = filter filePathIsRel (filesInArchive archive)
+      entries  = map fromJust $ filter isJust $ map (\f -> findEntryByPath f archive) relPaths
+      relElems = map fromJust $ filter isJust $ map (parseXMLDoc . UTF8.toStringLazy . fromEntry) entries
+      rels = map fromJust $ filter isJust $ map relElemToRelationship $ concatMap elChildren relElems
+  in
+   rels
+   
+data Body = Body [BodyPart]
+          deriving Show
+
+isParOrTbl :: NameSpaces -> QName -> Bool
+isParOrTbl ns q = qName q `elem` ["p", "tbl"] &&
+                  qURI q == (lookup "w" ns)
+
+elemToBody :: NameSpaces -> Element ->  Maybe Body
+elemToBody ns element | qName (elName element) == "body" && qURI (elName element) == (lookup "w" ns) =
+  Just $ Body
+  $ map fromJust
+  $ filter isJust
+  $ map (elemToBodyPart ns) $ filterChildrenName (isParOrTbl ns) element
+elemToBody _ _ = Nothing
+
+isRunOrLinkOrBookmark :: NameSpaces -> QName ->  Bool
+isRunOrLinkOrBookmark ns q = qName q `elem` ["r", "hyperlink", "bookmarkStart"] &&
+                   qURI q == (lookup "w" ns)
+
+elemToNumInfo :: NameSpaces -> Element -> Maybe (String, String)
+elemToNumInfo ns element
+  | qName (elName element) == "p" &&
+    qURI (elName element) == (lookup "w" ns) =
+      do
+        pPr <- findChild (QName "pPr" (lookup "w" ns) (Just "w")) element
+        numPr <- findChild (QName "numPr" (lookup "w" ns) (Just "w")) pPr
+        lvl <- findChild (QName "ilvl" (lookup "w" ns) (Just "w")) numPr >>=
+                findAttr (QName "val" (lookup "w" ns) (Just "w"))
+        numId <- findChild (QName "numId" (lookup "w" ns) (Just "w")) numPr >>=
+                 findAttr (QName "val" (lookup "w" ns) (Just "w"))
+        return (numId, lvl)
+elemToNumInfo _ _ = Nothing
+
+-- isBookMarkTag :: NameSpaces -> QName -> Bool
+-- isBookMarkTag ns q = qName q `elem` ["bookmarkStart", "bookmarkEnd"] &&
+--                      qURI q == (lookup "w" ns)
+
+-- parChildrenToBookmark :: NameSpaces -> [Element] -> BookMark
+-- parChildrenToBookmark ns (bms : bme : _)
+--   | qName (elName bms) == "bookmarkStart" &&
+--     qURI (elName bms) == (lookup "w" ns) &&
+--     qName (elName bme) == "bookmarkEnd" &&
+--     qURI (elName bme) == (lookup "w" ns) = do
+--       bmId <- findAttr (QName "id" (lookup "w" ns) (Just "w")) bms
+--       bmName <- findAttr (QName "name" (lookup "w" ns) (Just "w")) bms
+--       return $ (bmId, bmName)
+-- parChildrenToBookmark _ _ = Nothing
+
+elemToBodyPart :: NameSpaces -> Element ->  Maybe BodyPart
+elemToBodyPart ns element
+  | qName (elName element) == "p" &&
+    qURI (elName element) == (lookup "w" ns) =
+      let parstyle = elemToParagraphStyle ns element
+          parparts = mapMaybe id
+                     $ map (elemToParPart ns)
+                     $ filterChildrenName (isRunOrLinkOrBookmark ns) element
+      in
+       case elemToNumInfo ns element of
+         Just (numId, lvl) -> Just $ ListItem parstyle numId lvl parparts
+         Nothing -> Just $ Paragraph parstyle parparts
+  | qName (elName element) == "tbl" &&
+    qURI (elName element) == (lookup "w" ns) =
+      let
+        caption = findChild (QName "tblPr" (lookup "w" ns) (Just "w")) element
+                  >>= findChild (QName "tblCaption" (lookup "w" ns) (Just "w"))
+                  >>= findAttr (QName "val" (lookup "w" ns) (Just "w"))
+        grid = case
+          findChild (QName "tblGrid" (lookup "w" ns) (Just "w")) element
+          of
+            Just g -> elemToTblGrid ns g
+            Nothing -> []
+        tblLook = findChild (QName "tblPr" (lookup "w" ns) (Just "w")) element
+                  >>= findChild (QName "tblLook" (lookup "w" ns) (Just "w"))
+                  >>= elemToTblLook ns
+      in
+       Just $ Tbl
+       (fromMaybe "" caption)
+       grid
+       (fromMaybe defaultTblLook tblLook)
+       (mapMaybe (elemToRow ns) (elChildren element))
+  | otherwise = Nothing
+
+elemToTblLook :: NameSpaces -> Element -> Maybe TblLook
+elemToTblLook ns element
+  | qName (elName element) == "tblLook" &&
+    qURI (elName element) == (lookup "w" ns) =
+      let firstRow = findAttr (QName "firstRow" (lookup "w" ns) (Just "w")) element
+          val = findAttr (QName "val" (lookup "w" ns) (Just "w")) element
+          firstRowFmt = 
+            case firstRow of
+              Just "1" -> True
+              Just  _  -> False
+              Nothing -> case val of
+                Just bitMask -> testBitMask bitMask 0x020
+                Nothing      -> False
+      in
+       Just $ TblLook{firstRowFormatting = firstRowFmt}
+elemToTblLook _ _ = Nothing
+
+testBitMask :: String -> Int -> Bool
+testBitMask bitMaskS n =
+  case (reads ("0x" ++ bitMaskS) :: [(Int, String)]) of
+    []            -> False
+    ((n', _) : _) -> ((n' .|. n) /= 0)
+
+data ParagraphStyle = ParagraphStyle { pStyle :: [String]
+                                     , indent :: Maybe Integer
+                                     }
+                      deriving Show
+
+defaultParagraphStyle :: ParagraphStyle
+defaultParagraphStyle = ParagraphStyle { pStyle = []
+                                       , indent = Nothing
+                                       }
+
+elemToParagraphStyle :: NameSpaces -> Element -> ParagraphStyle
+elemToParagraphStyle ns element =
+  case findChild (QName "pPr" (lookup "w" ns) (Just "w")) element of
+    Just pPr ->
+      ParagraphStyle
+      {pStyle =
+          mapMaybe id $
+          map
+          (findAttr (QName "val" (lookup "w" ns) (Just "w")))
+          (findChildren (QName "pStyle" (lookup "w" ns) (Just "w")) pPr)
+      , indent =
+        findChild (QName "ind" (lookup "w" ns) (Just "w")) pPr >>=
+        findAttr (QName "left" (lookup "w" ns) (Just "w")) >>=
+        stringToInteger
+        }
+    Nothing -> defaultParagraphStyle
+
+
+data BodyPart = Paragraph ParagraphStyle [ParPart]
+              | ListItem ParagraphStyle String String [ParPart]
+              | Tbl String TblGrid TblLook [Row]
+
+              deriving Show
+
+type TblGrid = [Integer]
+
+data TblLook = TblLook {firstRowFormatting::Bool}
+              deriving Show
+
+defaultTblLook :: TblLook
+defaultTblLook = TblLook{firstRowFormatting = False}
+
+stringToInteger :: String -> Maybe Integer
+stringToInteger s = listToMaybe $ map fst (reads s :: [(Integer, String)])
+
+elemToTblGrid :: NameSpaces -> Element -> TblGrid
+elemToTblGrid ns element
+  | qName (elName element) == "tblGrid" &&
+    qURI (elName element) == (lookup "w" ns) =
+      let
+        cols = findChildren (QName "gridCol" (lookup "w" ns) (Just "w")) element
+      in
+       mapMaybe (\e ->
+                  findAttr (QName "val" (lookup "w" ns) (Just ("w"))) e
+                  >>= stringToInteger
+                )
+       cols
+elemToTblGrid _ _ = []
+
+data Row = Row [Cell]
+           deriving Show
+
+
+elemToRow :: NameSpaces -> Element -> Maybe Row
+elemToRow ns element
+  | qName (elName element) == "tr" &&
+    qURI (elName element) == (lookup "w" ns) =
+      let 
+        cells = findChildren (QName "tc" (lookup "w" ns) (Just "w")) element
+      in
+       Just $ Row (mapMaybe (elemToCell ns) cells)
+elemToRow _ _ = Nothing
+
+data Cell = Cell [BodyPart]
+            deriving Show
+
+elemToCell :: NameSpaces -> Element -> Maybe Cell
+elemToCell ns element
+  | qName (elName element) == "tc" &&
+    qURI (elName element) == (lookup "w" ns) =
+      Just $ Cell (mapMaybe (elemToBodyPart ns) (elChildren element))
+elemToCell _ _ = Nothing
+
+data ParPart = PlainRun Run
+             | BookMark BookMarkId Anchor
+             | InternalHyperLink Anchor [Run]
+             | ExternalHyperLink RelId [Run]
+             | Drawing String
+             deriving Show
+
+data Run = Run RunStyle [RunElem]
+         | Footnote String 
+         | Endnote String
+           deriving Show
+
+data RunElem = TextRun String | LnBrk
+             deriving Show
+
+data RunStyle = RunStyle { isBold :: Bool
+                         , isItalic :: Bool
+                         , isSmallCaps :: Bool
+                         , isStrike :: Bool
+                         , isSuperScript :: Bool
+                         , isSubScript :: Bool
+                         , underline :: Maybe String
+                         , rStyle :: Maybe String }
+                deriving Show
+
+defaultRunStyle :: RunStyle
+defaultRunStyle = RunStyle { isBold = False
+                           , isItalic = False
+                           , isSmallCaps = False
+                           , isStrike = False
+                           , isSuperScript = False
+                           , isSubScript = False
+                           , underline = Nothing
+                           , rStyle = Nothing
+                           } 
+
+elemToRunStyle :: NameSpaces -> Element -> RunStyle
+elemToRunStyle ns element =
+  case findChild (QName "rPr" (lookup "w" ns) (Just "w")) element of
+    Just rPr ->
+      RunStyle
+      {
+        isBold = isJust $ findChild (QName "b" (lookup "w" ns) (Just "w")) rPr
+      , isItalic = isJust $ findChild (QName "i" (lookup "w" ns) (Just "w")) rPr
+      , isSmallCaps = isJust $ findChild (QName "smallCaps" (lookup "w" ns) (Just "w")) rPr
+      , isStrike = isJust $ findChild (QName "strike" (lookup "w" ns) (Just "w")) rPr
+      , isSuperScript =
+        (Just "superscript" ==
+        (findChild (QName "vertAlign" (lookup "w" ns) (Just "w")) rPr >>=
+         findAttr (QName "val" (lookup "w" ns) (Just "w"))))
+      , isSubScript =
+        (Just "subscript" ==
+        (findChild (QName "vertAlign" (lookup "w" ns) (Just "w")) rPr >>=
+         findAttr (QName "val" (lookup "w" ns) (Just "w"))))
+      , underline =
+        findChild (QName "u" (lookup "w" ns) (Just "w")) rPr >>=
+        findAttr (QName "val" (lookup "w" ns) (Just "w"))
+      , rStyle =
+        findChild (QName "rStyle" (lookup "w" ns) (Just "w")) rPr >>=
+        findAttr (QName "val" (lookup "w" ns) (Just "w"))
+        }
+    Nothing -> defaultRunStyle
+
+elemToRun :: NameSpaces -> Element -> Maybe Run
+elemToRun ns element
+  | qName (elName element) == "r" &&
+    qURI (elName element) == (lookup "w" ns) =
+      case
+        findChild (QName "footnoteReference" (lookup "w" ns) (Just "w")) element >>=
+        findAttr (QName "id" (lookup "w" ns) (Just "w"))
+      of
+        Just s -> Just $ Footnote s
+        Nothing ->
+          case
+            findChild (QName "endnoteReference" (lookup "w" ns) (Just "w")) element >>=
+            findAttr (QName "id" (lookup "w" ns) (Just "w"))
+          of
+            Just s -> Just $ Endnote s 
+            Nothing ->  Just $
+                        Run (elemToRunStyle ns element)
+                        (elemToRunElems ns element)
+elemToRun _ _ = Nothing
+
+elemToRunElem :: NameSpaces -> Element -> Maybe RunElem
+elemToRunElem ns element
+  | qName (elName element) == "t" &&
+    qURI (elName element) == (lookup "w" ns) =
+      Just $ TextRun (strContent element)
+  | qName (elName element) == "br" &&
+    qURI (elName element) == (lookup "w" ns) =
+      Just $ LnBrk
+  | otherwise = Nothing
+
+
+elemToRunElems :: NameSpaces -> Element -> [RunElem]
+elemToRunElems ns element
+  | qName (elName element) == "r" &&
+    qURI (elName element) == (lookup "w" ns) =
+      mapMaybe (elemToRunElem ns) (elChildren element)
+  | otherwise = []
+
+elemToDrawing :: NameSpaces -> Element -> Maybe ParPart
+elemToDrawing ns element
+  | qName (elName element) == "drawing" &&
+    qURI (elName element) == (lookup "w" ns) =
+      let a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main"
+      in
+       findElement (QName "blip" (Just a_ns) (Just "a")) element
+       >>= findAttr (QName "embed" (lookup "r" ns) (Just "r"))
+       >>= (\s -> Just $ Drawing s)
+elemToDrawing _ _ = Nothing
+
+
+elemToParPart :: NameSpaces -> Element -> Maybe ParPart
+elemToParPart ns element
+  | qName (elName element) == "r" &&
+    qURI (elName element) == (lookup "w" ns) =
+      case findChild (QName "drawing" (lookup "w" ns) (Just "w")) element of
+        Just drawingElem -> elemToDrawing ns drawingElem
+        Nothing -> do
+          r <- elemToRun ns element
+          return $ PlainRun r
+elemToParPart ns element
+  | qName (elName element) == "bookmarkStart" &&
+    qURI (elName element) == (lookup "w" ns) = do
+      bmId <- findAttr (QName "id" (lookup "w" ns) (Just "w")) element
+      bmName <- findAttr (QName "name" (lookup "w" ns) (Just "w")) element
+      return $ BookMark bmId bmName
+elemToParPart ns element
+  | qName (elName element) == "hyperlink" &&
+    qURI (elName element) == (lookup "w" ns) =
+      let runs = map fromJust $ filter isJust $ map (elemToRun ns)
+                 $ findChildren (QName "r" (lookup "w" ns) (Just "w")) element
+      in
+       case findAttr (QName "anchor" (lookup "w" ns) (Just "w")) element of
+         Just anchor ->
+          Just $ InternalHyperLink anchor runs
+         Nothing ->
+           case findAttr (QName "id" (lookup "r" ns) (Just "r")) element of
+             Just relId -> Just $ ExternalHyperLink relId runs
+             Nothing    -> Nothing
+elemToParPart _ _ = Nothing
+
+type Target = String
+type Anchor = String
+type BookMarkId = String
+type RelId = String
+               
diff --git a/tests/Tests/Readers/DocX.hs b/tests/Tests/Readers/DocX.hs
new file mode 100644
index 000000000..f4564ea1d
--- /dev/null
+++ b/tests/Tests/Readers/DocX.hs
@@ -0,0 +1,68 @@
+module Tests.Readers.DocX (tests) where
+
+import Text.Pandoc.Options
+import Text.Pandoc.Readers.Native
+import Text.Pandoc.Definition
+import Tests.Helpers
+import Test.Framework
+import qualified Data.ByteString.Lazy as B
+import Text.Pandoc.Readers.DocX
+
+compareOutput :: FilePath -> FilePath -> IO (Pandoc, Pandoc)
+compareOutput docxFile nativeFile = do
+  df <- B.readFile docxFile
+  nf <- Prelude.readFile nativeFile
+  return $ (readDocX def df, readNative nf)
+
+testCompare' :: String -> FilePath -> FilePath -> IO Test
+testCompare' name docxFile nativeFile = do
+  (dp, np) <- compareOutput docxFile nativeFile
+  return $ test id name (dp, np)
+
+testCompare :: String -> FilePath -> FilePath -> Test
+testCompare name docxFile nativeFile =
+  buildTest $ testCompare' name docxFile nativeFile
+
+
+tests :: [Test]
+tests = [ testGroup "inlines"
+          [ testCompare
+            "font formatting"
+            "docx.inline_formatting.docx"
+            "docx.inline_formatting.native"
+          , testCompare
+            "hyperlinks"
+            "docx.links.docx"
+            "docx.links.native"
+          , testCompare
+            "inline image with reference output"
+            "docx.image.docx"
+            "docx.image_no_embed.native"
+          , testCompare
+            "handling unicode input"
+            "docx.unicode.docx"
+            "docx.unicode.native"]
+        , testGroup "blocks"
+          [ testCompare
+            "headers"
+            "docx.headers.docx"
+            "docx.headers.native"
+          , testCompare
+            "lists"
+            "docx.lists.docx"
+            "docx.lists.native"
+          , testCompare
+            "footnotes and endnotes"
+            "docx.notes.docx"
+            "docx.notes.native"
+          , testCompare
+            "blockquotes (parsing indent as blockquote)"
+            "docx.block_quotes.docx"
+            "docx.block_quotes_parse_indent.native"
+          , testCompare
+            "tables"
+            "docx.tables.docx"
+            "docx.tables.native"
+          ]
+        ]
+
diff --git a/tests/docx.block_quotes.docx b/tests/docx.block_quotes.docx
new file mode 100644
index 000000000..729ae1f43
--- /dev/null
+++ b/tests/docx.block_quotes.docx
diff --git a/tests/docx.block_quotes_parse_indent.native b/tests/docx.block_quotes_parse_indent.native
new file mode 100644
index 000000000..da1cef110
--- /dev/null
+++ b/tests/docx.block_quotes_parse_indent.native
@@ -0,0 +1,8 @@
+[Header 2 ("",[],[]) [Str "Some",Space,Str "block",Space,Str "quotes,",Space,Str "in",Space,Str "different",Space,Str "ways"]
+,Para [Str "This",Space,Str "is",Space,Str "the",Space,Str "proper",Space,Str "way,",Space,Str "with",Space,Str "a",Space,Str "style"]
+,BlockQuote
+ [Para [Str "I",Space,Str "don\8217t",Space,Str "know",Space,Str "why",Space,Str "this",Space,Str "would",Space,Str "be",Space,Str "in",Space,Str "italics,",Space,Str "but",Space,Str "so",Space,Str "it",Space,Str "appears",Space,Str "to",Space,Str "be",Space,Str "on",Space,Str "my",Space,Str "screen."]]
+,Para [Str "And",Space,Str "this",Space,Str "is",Space,Str "the",Space,Str "way",Space,Str "that",Space,Str "most",Space,Str "people",Space,Str "do",Space,Str "it:"]
+,BlockQuote
+ [Para [Str "I",Space,Str "just",Space,Str "indented",Space,Str "this,",Space,Str "so",Space,Str "it",Space,Str "looks",Space,Str "like",Space,Str "a",Space,Str "block",Space,Str "quote.",Space,Str "I",Space,Str "think",Space,Str "this",Space,Str "is",Space,Str "how",Space,Str "most",Space,Str "people",Space,Str "do",Space,Str "block",Space,Str "quotes",Space,Str "in",Space,Str "their",Space,Str "documents."]]
+,Para [Str "And",Space,Str "back",Space,Str "to",Space,Str "the",Space,Str "normal",Space,Str "style."]]
diff --git a/tests/docx.headers.docx b/tests/docx.headers.docx
new file mode 100644
index 000000000..630b6bfc5
--- /dev/null
+++ b/tests/docx.headers.docx
diff --git a/tests/docx.headers.native b/tests/docx.headers.native
new file mode 100644
index 000000000..e4d4a4781
--- /dev/null
+++ b/tests/docx.headers.native
@@ -0,0 +1,5 @@
+[Header 1 ("",[],[]) [Str "A",Space,Str "Test",Space,Str "of",Space,Str "Headers"]
+,Header 2 ("",[],[]) [Str "Second",Space,Str "Level"]
+,Para [Str "Some",Space,Str "plain",Space,Str "text."]
+,Header 3 ("",[],[]) [Str "Third",Space,Str "level"]
+,Para [Str "Some",Space,Str "more",Space,Str "plain",Space,Str "text."]]
diff --git a/tests/docx.image.docx b/tests/docx.image.docx
new file mode 100644
index 000000000..060f2b204
--- /dev/null
+++ b/tests/docx.image.docx
diff --git a/tests/docx.image_no_embed.native b/tests/docx.image_no_embed.native
new file mode 100644
index 000000000..18debf135
--- /dev/null
+++ b/tests/docx.image_no_embed.native
@@ -0,0 +1,2 @@
+[Header 2 ("",[],[]) [Str "An",Space,Str "image"]
+,Para [Image [] ("word/media/image1.jpeg","")]]
diff --git a/tests/docx.inline_formatting.docx b/tests/docx.inline_formatting.docx
new file mode 100644
index 000000000..eccf26425
--- /dev/null
+++ b/tests/docx.inline_formatting.docx
diff --git a/tests/docx.inline_formatting.native b/tests/docx.inline_formatting.native
new file mode 100644
index 000000000..dc8a3d19a
--- /dev/null
+++ b/tests/docx.inline_formatting.native
@@ -0,0 +1,5 @@
+[Para [Str "Regular",Space,Str "text",Space,Emph [Str "italics"],Space,Strong [Str "bold",Space,Emph [Str "bold",Space,Str "italics"]],Str "."]
+,Para [Str "This",Space,Str "is",Space,SmallCaps [Str "Small",Space,Str "Caps"],Str ",",Space,Str "and",Space,Str "this",Space,Str "is",Space,Strikeout [Str "strikethrough"],Str "."]
+,Para [Str "Some",Space,Str "people",Space,Str "use",Space,Span ("",[],[("underline","single")]) [Str "single",Space,Str "underlines",Space,Str "for",Space,Emph [Str "emphasis"]],Str "."]
+,Para [Str "Above",Space,Str "the",Space,Str "line",Space,Str "is",Space,Superscript [Str "superscript"],Space,Str "and",Space,Str "below",Space,Str "the",Space,Str "line",Space,Str "is",Space,Subscript [Str "subscript"],Str "."]
+,Para [Str "A",Space,Str "line",LineBreak,Str "break."]]
diff --git a/tests/docx.links.docx b/tests/docx.links.docx
new file mode 100644
index 000000000..10ec62fd7
--- /dev/null
+++ b/tests/docx.links.docx
diff --git a/tests/docx.links.native b/tests/docx.links.native
new file mode 100644
index 000000000..98768de5a
--- /dev/null
+++ b/tests/docx.links.native
@@ -0,0 +1,6 @@
+[Header 2 ("",[],[]) [Str "An",Space,Str "internal",Space,Str "link",Space,Str "and",Space,Str "an",Space,Str "external",Space,Str "link"]
+,Para [Str "An",Space,Link [Str "external",Space,Str "link"] ("http://google.com",""),Space,Str "to",Space,Str "a",Space,Str "popular",Space,Str "website."]
+,Para [Str "An",Space,Link [Str "internal",Space,Str "link"] ("#_A_section_for",""),Space,Str "to",Space,Str "a",Space,Str "section",Space,Str "header."]
+,Para [Str "An",Space,Link [Str "internal",Space,Str "link"] ("#my_bookmark",""),Space,Str "to",Space,Str "a",Space,Str "bookmark."]
+,Header 2 ("_A_section_for",[],[]) [Str "A",Space,Str "section",Space,Str "for",Space,Str "testing",Space,Str "link",Space,Str "targets"]
+,Para [Str "A",Space,Str "bookmark",Space,Str "right",Space,Span ("my_bookmark",["anchor"],[]) [],Str "here"]]
diff --git a/tests/docx.lists.docx b/tests/docx.lists.docx
new file mode 100644
index 000000000..bf7fd8ae4
--- /dev/null
+++ b/tests/docx.lists.docx
diff --git a/tests/docx.lists.native b/tests/docx.lists.native
new file mode 100644
index 000000000..e46bc140b
--- /dev/null
+++ b/tests/docx.lists.native
@@ -0,0 +1,18 @@
+[Header 2 ("",[],[]) [Str "Some",Space,Str "nested",Space,Str "lists"]
+,OrderedList (1,Decimal,Period)
+ [[Para [Str "one"]]
+ ,[Para [Str "two"]
+  ,OrderedList (1,LowerAlpha,DefaultDelim)
+   [[Para [Str "a"]]
+   ,[Para [Str "b"]]]]]
+,BulletList
+ [[Para [Str "one"]]
+ ,[Para [Str "two"]
+  ,BulletList
+   [[Para [Str "three"]
+    ,BulletList
+     [[Para [Str "four"]
+      ,Para [Str "Sub",Space,Str "paragraph"]]]]]]
+ ,[Para [Str "Same",Space,Str "list"]]]
+,BulletList
+ [[Para [Str "Different",Space,Str "list",Space,Str "adjacent",Space,Str "to",Space,Str "the",Space,Str "one",Space,Str "above."]]]]
diff --git a/tests/docx.notes.docx b/tests/docx.notes.docx
new file mode 100644
index 000000000..eb6fa12d4
--- /dev/null
+++ b/tests/docx.notes.docx
diff --git a/tests/docx.notes.native b/tests/docx.notes.native
new file mode 100644
index 000000000..1e9b6bba4
--- /dev/null
+++ b/tests/docx.notes.native
@@ -0,0 +1,2 @@
+[Header 2 ("",[],[]) [Str "A",Space,Str "footnote"]
+,Para [Str "Test",Space,Str "footnote.",Note [Para [Space,Str "My",Space,Str "note."]],Space,Str "Test",Space,Str "endnote.",Note [Para [Space,Str "This",Space,Str "is",Space,Str "an",Space,Str "endnote",Space,Str "at",Space,Str "the",Space,Str "end",Space,Str "of",Space,Str "the",Space,Str "document."]]]]
diff --git a/tests/docx.tables.docx b/tests/docx.tables.docx
new file mode 100644
index 000000000..7dcff8d35
--- /dev/null
+++ b/tests/docx.tables.docx
diff --git a/tests/docx.tables.native b/tests/docx.tables.native
new file mode 100644
index 000000000..8dbaabda7
--- /dev/null
+++ b/tests/docx.tables.native
@@ -0,0 +1,24 @@
+[Header 2 ("",[],[]) [Str "A",Space,Str "table,",Space,Str "with",Space,Str "and",Space,Str "without",Space,Str "a",Space,Str "header",Space,Str "row"]
+,Table [] [AlignDefault,AlignDefault,AlignDefault,AlignDefault] [0.0,0.0,0.0,0.0]
+ [[Para [Str "Name"]]
+ ,[Para [Str "Game"]]
+ ,[Para [Str "Fame"]]
+ ,[Para [Str "Blame"]]]
+ [[[Para [Str "Lebron",Space,Str "James"]]
+  ,[Para [Str "Basketball"]]
+  ,[Para [Str "Very",Space,Str "High"]]
+  ,[Para [Str "Leaving",Space,Str "Cleveland"]]]
+ ,[[Para [Str "Ryan",Space,Str "Braun"]]
+  ,[Para [Str "Baseball"]]
+  ,[Para [Str "Moderate"]]
+  ,[Para [Str "Steroids"]]]
+ ,[[Para [Str "Russell",Space,Str "Wilson"]]
+  ,[Para [Str "Football"]]
+  ,[Para [Str "High"]]
+  ,[Para [Str "Tacky",Space,Str "uniform"]]]]
+,Table [] [AlignDefault,AlignDefault] [0.0,0.0]
+ []
+ [[[Para [Str "Sinple"]]
+  ,[Para [Str "Table"]]]
+ ,[[Para [Str "Without"]]
+  ,[Para [Str "Header"]]]]]
diff --git a/tests/docx.unicode.docx b/tests/docx.unicode.docx
new file mode 100644
index 000000000..78d0107a1
--- /dev/null
+++ b/tests/docx.unicode.docx
diff --git a/tests/docx.unicode.native b/tests/docx.unicode.native
new file mode 100644
index 000000000..e636355c7
--- /dev/null
+++ b/tests/docx.unicode.native
@@ -0,0 +1 @@
+[Para [Str "Hello,",Space,Str "\19990\30028.",Space,Str "This",Space,Str "costs",Space,Str "\8364\&10."]]
diff --git a/tests/test-pandoc.hs b/tests/test-pandoc.hs
index 80d672589..9f9d85147 100644
--- a/tests/test-pandoc.hs
+++ b/tests/test-pandoc.hs
@@ -9,6 +9,7 @@ import qualified Tests.Readers.LaTeX
 import qualified Tests.Readers.Markdown
 import qualified Tests.Readers.Org
 import qualified Tests.Readers.RST
+import qualified Tests.Readers.DocX
 import qualified Tests.Writers.ConTeXt
 import qualified Tests.Writers.LaTeX
 import qualified Tests.Writers.HTML
@@ -38,6 +39,8 @@ tests = [ testGroup "Old" Tests.Old.tests
           , testGroup "Markdown" Tests.Readers.Markdown.tests
           , testGroup "Org" Tests.Readers.Org.tests
           , testGroup "RST" Tests.Readers.RST.tests
+          , testGroup "DocX" Tests.Readers.DocX.tests
+
           ]
         ]