aboutsummaryrefslogtreecommitdiff
path: root/tests/Tests/Readers/Docx.hs
blob: 565d117e91492e932dd3cedd9080ed2b79d1ba8e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
module Tests.Readers.Docx (tests) where

import Text.Pandoc.Options
import Text.Pandoc.Readers.Native
import Text.Pandoc.Definition
import Tests.Helpers
import Test.Framework
import Test.HUnit (assertBool)
import Test.Framework.Providers.HUnit
import qualified Data.ByteString.Lazy as B
import Text.Pandoc.Readers.Docx
import Text.Pandoc.Writers.Native (writeNative)
import qualified Data.Map as M
import Text.Pandoc.MediaBag (MediaBag, lookupMedia, mediaDirectory)
import Codec.Archive.Zip
import System.FilePath (combine)

-- We define a wrapper around pandoc that doesn't normalize in the
-- tests. Since we do our own normalization, we want to make sure
-- we're doing it right.

data NoNormPandoc = NoNormPandoc {unNoNorm :: Pandoc}
                 deriving Show

noNorm :: Pandoc -> NoNormPandoc
noNorm = NoNormPandoc

instance ToString NoNormPandoc where
  toString d = writeNative def{ writerStandalone = s } $ toPandoc d
   where s = case d of
                  NoNormPandoc (Pandoc (Meta m) _)
                    | M.null m  -> False
                    | otherwise -> True

instance ToPandoc NoNormPandoc where
  toPandoc = unNoNorm

compareOutput :: ReaderOptions
                 -> FilePath
                 -> FilePath
                 -> IO (NoNormPandoc, NoNormPandoc)
compareOutput opts docxFile nativeFile = do
  df <- B.readFile docxFile
  nf <- Prelude.readFile nativeFile
  let (p, _) = readDocx opts df
  return $ (noNorm p, noNorm (readNative nf))

testCompareWithOptsIO :: ReaderOptions -> String -> FilePath -> FilePath -> IO Test
testCompareWithOptsIO opts name docxFile nativeFile = do
  (dp, np) <- compareOutput opts docxFile nativeFile
  return $ test id name (dp, np)

testCompareWithOpts :: ReaderOptions -> String -> FilePath -> FilePath -> Test
testCompareWithOpts opts name docxFile nativeFile =
  buildTest $ testCompareWithOptsIO opts name docxFile nativeFile

testCompare :: String -> FilePath -> FilePath -> Test
testCompare = testCompareWithOpts def

getMedia :: FilePath -> FilePath -> IO (Maybe B.ByteString)
getMedia archivePath mediaPath = do
  zf <- B.readFile archivePath >>= return . toArchive
  return $ findEntryByPath (combine "word" mediaPath) zf >>= (Just . fromEntry)

compareMediaPathIO :: FilePath -> MediaBag -> FilePath -> IO Bool
compareMediaPathIO mediaPath mediaBag docxPath = do
  docxMedia <- getMedia docxPath mediaPath
  let mbBS   = case lookupMedia mediaPath mediaBag of
                 Just (_, bs) -> bs
                 Nothing      -> error ("couldn't find " ++
                                        mediaPath ++
                                        " in media bag")
      docxBS = case docxMedia of
                 Just bs -> bs
                 Nothing -> error ("couldn't find " ++
                                   mediaPath ++
                                   " in media bag")
  return $ mbBS == docxBS

compareMediaBagIO :: FilePath -> IO Bool
compareMediaBagIO docxFile = do
    df <- B.readFile docxFile
    let (_, mb) = readDocx def df
    bools <- mapM
             (\(fp, _, _) -> compareMediaPathIO fp mb docxFile)
             (mediaDirectory mb)
    return $ and bools

testMediaBagIO :: String -> FilePath -> IO Test
testMediaBagIO name docxFile = do
  outcome <- compareMediaBagIO docxFile
  return $ testCase name (assertBool
                          ("Media didn't match media bag in file " ++ docxFile)
                          outcome)

testMediaBag :: String -> FilePath -> Test
testMediaBag name docxFile = buildTest $ testMediaBagIO name docxFile

tests :: [Test]
tests = [ testGroup "inlines"
          [ testCompare
            "font formatting"
            "docx/inline_formatting.docx"
            "docx/inline_formatting.native"
          , testCompare
            "font formatting with character styles"
            "docx/char_styles.docx"
            "docx/char_styles.native"
          , testCompare
            "hyperlinks"
            "docx/links.docx"
            "docx/links.native"
          , testCompare
            "inline image"
            "docx/image.docx"
            "docx/image_no_embed.native"
          , testCompare
            "inline image in links"
            "docx/inline_images.docx"
            "docx/inline_images.native"
          , testCompare
            "handling unicode input"
            "docx/unicode.docx"
            "docx/unicode.native"
          , testCompare
            "literal tabs"
            "docx/tabs.docx"
            "docx/tabs.native"
          , testCompare
            "normalizing inlines"
            "docx/normalize.docx"
            "docx/normalize.native"
          , testCompare
            "normalizing inlines deep inside blocks"
            "docx/deep_normalize.docx"
            "docx/deep_normalize.native"
          , testCompare
            "move trailing spaces outside of formatting"
            "docx/trailing_spaces_in_formatting.docx"
            "docx/trailing_spaces_in_formatting.native"
          , testCompare
            "inline code (with VerbatimChar style)"
            "docx/inline_code.docx"
            "docx/inline_code.native"
          ]
        , testGroup "blocks"
          [ testCompare
            "headers"
            "docx/headers.docx"
            "docx/headers.native"
          , testCompare
            "headers already having auto identifiers"
            "docx/already_auto_ident.docx"
            "docx/already_auto_ident.native"
          , testCompare
            "numbered headers automatically made into list"
            "docx/numbered_header.docx"
            "docx/numbered_header.native"
          , testCompare
            "headers in other languages"
            "docx/danish_headers.docx"
            "docx/danish_headers.native"
          , testCompare
            "lists"
            "docx/lists.docx"
            "docx/lists.native"
          , testCompare
            "definition lists"
            "docx/definition_list.docx"
            "docx/definition_list.native"
          , testCompare
            "footnotes and endnotes"
            "docx/notes.docx"
            "docx/notes.native"
          , testCompare
            "blockquotes (parsing indent as blockquote)"
            "docx/block_quotes.docx"
            "docx/block_quotes_parse_indent.native"
          , testCompare
            "hanging indents"
            "docx/hanging_indent.docx"
            "docx/hanging_indent.native"
          , testCompare
            "tables"
            "docx/tables.docx"
            "docx/tables.native"
          , testCompare
            "code block"
            "docx/codeblock.docx"
            "docx/codeblock.native"
          , testCompare
            "dropcap paragraphs"
            "docx/drop_cap.docx"
            "docx/drop_cap.native"
          ]
        , testGroup "track changes"
          [ testCompare
            "insertion (default)"
            "docx/track_changes_insertion.docx"
            "docx/track_changes_insertion_accept.native"
          , testCompareWithOpts def{readerTrackChanges=AcceptChanges}
            "insert insertion (accept)"
            "docx/track_changes_insertion.docx"
            "docx/track_changes_insertion_accept.native"
          , testCompareWithOpts def{readerTrackChanges=RejectChanges}
            "remove insertion (reject)"
            "docx/track_changes_insertion.docx"
            "docx/track_changes_insertion_reject.native"
          , testCompare
            "deletion (default)"
            "docx/track_changes_deletion.docx"
            "docx/track_changes_deletion_accept.native"
          , testCompareWithOpts def{readerTrackChanges=AcceptChanges}
            "remove deletion (accept)"
            "docx/track_changes_deletion.docx"
            "docx/track_changes_deletion_accept.native"
          , testCompareWithOpts def{readerTrackChanges=RejectChanges}
            "insert deletion (reject)"
            "docx/track_changes_deletion.docx"
            "docx/track_changes_deletion_reject.native"
          , testCompareWithOpts def{readerTrackChanges=AllChanges}
            "keep insertion (all)"
            "docx/track_changes_deletion.docx"
            "docx/track_changes_deletion_all.native"
          , testCompareWithOpts def{readerTrackChanges=AllChanges}
            "keep deletion (all)"
            "docx/track_changes_deletion.docx"
            "docx/track_changes_deletion_all.native"
          ]
        , testGroup "media"
          [ testMediaBag
            "image extraction"
            "docx/image.docx"
          ]
        , testGroup "metadata"
          [ testCompareWithOpts def{readerStandalone=True}
            "metadata fields"
            "docx/metadata.docx"
            "docx/metadata.native"
          , testCompareWithOpts def{readerStandalone=True}
            "stop recording metadata with normal text"
            "docx/metadata_after_normal.docx"
            "docx/metadata_after_normal.native"
          ]

        ]