Skip to content

Commit ab89bc8

Browse files
committed
gh-152190: Further optimize tests and tidy code
Rename `TestRepack` to `TestRepacker` to better reflect its coverage of `zipfile._Repacker`. Move heavy text chunk generation and common constants from `setUp` to `setUpClass` to ensure single initialization across tests. Clean up repetitive local definitions of filenames and lorem text by promoting them to class properties. Reduce the test file size from 8 GiB to 4.1 GiB, which is large enough to trigger ZIP64 extension and sufficient for the testing purpose.
1 parent 0e54ea3 commit ab89bc8

1 file changed

Lines changed: 31 additions & 41 deletions

File tree

Lib/test/test_zipfile64.py

Lines changed: 31 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -92,21 +92,26 @@ def tearDown(self):
9292
os_helper.unlink(TESTFN2)
9393

9494

95-
class TestRepack(unittest.TestCase):
96-
def setUp(self):
97-
# Create test data.
95+
class TestRepacker(unittest.TestCase):
96+
@classmethod
97+
def setUpClass(cls):
98+
cls.largefilename = 'largefile.txt'
99+
98100
line_gen = ("Test of zipfile line %d." % i for i in range(1000000))
99-
self.data = '\n'.join(line_gen).encode('ascii')
101+
cls.chunk = '\n'.join(line_gen).encode('ascii')
102+
103+
# It will contain enough copies of cls.chunk to reach about 4.1 GiB.
104+
cls.chunkcount = int(4.1*1024**3 / len(cls.chunk))
100105

101-
# It will contain enough copies of self.data to reach about 8 GiB.
102-
self.datacount = 8*1024**3 // len(self.data)
106+
cls.filename = 'file.txt'
107+
cls.lorem = b'Sed ut perspiciatis unde omnis iste natus error sit voluptatem'
103108

104109
# Memory usage should not exceed 10 MiB during repacking.
105110
# This empirical threshold ensures that the internal processing
106111
# like signature scanning, compressed block end tracing, and
107112
# data copying are properly buffered without loading the entire
108113
# large file into memory.
109-
self.allowed_memory = 10*1024**2
114+
cls.allowed_memory = 10*1024**2
110115

111116
@contextmanager
112117
def assert_memory_usage(self, threshold):
@@ -120,89 +125,77 @@ def assert_memory_usage(self, threshold):
120125

121126
def _write_large_file(self, fh):
122127
next_time = time.monotonic() + _PRINT_WORKING_MSG_INTERVAL
123-
for num in range(self.datacount):
124-
fh.write(self.data)
128+
for num in range(self.chunkcount):
129+
fh.write(self.chunk)
125130
# Print still working message since this test can be really slow
126131
if next_time <= time.monotonic():
127132
next_time = time.monotonic() + _PRINT_WORKING_MSG_INTERVAL
128133
print((
129134
' writing %d of %d, be patient...' %
130-
(num, self.datacount)), file=sys.__stdout__)
135+
(num, self.chunkcount)), file=sys.__stdout__)
131136
sys.__stdout__.flush()
132137

133138
def test_strip_removed_large_file(self):
134139
"""Should move the physical data of a file positioned after a large
135140
removed file without causing a memory issue."""
136141
with TemporaryFile() as f:
137-
file = 'file.txt'
138-
file1 = 'largefile.txt'
139-
data = b'Sed ut perspiciatis unde omnis iste natus error sit voluptatem'
140142
with zipfile.ZipFile(f, 'w') as zh:
141-
with zh.open(file1, 'w', force_zip64=True) as fh:
143+
with zh.open(self.largefilename, 'w', force_zip64=True) as fh:
142144
self._write_large_file(fh)
143-
zh.writestr(file, data)
145+
zh.writestr(self.filename, self.lorem)
144146

145147
with self.assert_memory_usage(self.allowed_memory), \
146148
zipfile.ZipFile(f, 'a') as zh:
147-
zh.remove(file1)
149+
zh.remove(self.largefilename)
148150
zh.repack()
149151
self.assertIsNone(zh.testzip())
150152

151153
def test_strip_removed_file_before_large_file(self):
152154
"""Should move the physical data of a large file positioned after a
153155
removed file without causing a memory issue."""
154156
with TemporaryFile() as f:
155-
file = 'file.txt'
156-
file1 = 'largefile.txt'
157-
data = b'Sed ut perspiciatis unde omnis iste natus error sit voluptatem'
158157
with zipfile.ZipFile(f, 'w') as zh:
159-
zh.writestr(file, data)
160-
with zh.open(file1, 'w', force_zip64=True) as fh:
158+
zh.writestr(self.filename, self.lorem)
159+
with zh.open(self.largefilename, 'w', force_zip64=True) as fh:
161160
self._write_large_file(fh)
162161

163162
with self.assert_memory_usage(self.allowed_memory), \
164163
zipfile.ZipFile(f, 'a') as zh:
165-
zh.remove(file)
164+
zh.remove(self.filename)
166165
zh.repack()
167166
self.assertIsNone(zh.testzip())
168167

169168
def test_strip_removed_large_file_with_dd(self):
170169
"""Should scan for the data descriptor of a removed large file without
171170
causing a memory issue."""
172171
with TemporaryFile() as f:
173-
file = 'file.txt'
174-
file1 = 'largefile.txt'
175-
data = b'Sed ut perspiciatis unde omnis iste natus error sit voluptatem'
176172
with zipfile.ZipFile(Unseekable(f), 'w') as zh:
177-
with zh.open(file1, 'w', force_zip64=True) as fh:
173+
with zh.open(self.largefilename, 'w', force_zip64=True) as fh:
178174
self._write_large_file(fh)
179-
zh.writestr(file, data)
175+
zh.writestr(self.filename, self.lorem)
180176

181177
with self.assert_memory_usage(self.allowed_memory), \
182178
zipfile.ZipFile(f, 'a') as zh:
183-
zh.remove(file1)
179+
zh.remove(self.largefilename)
184180
zh.repack()
185181
self.assertIsNone(zh.testzip())
186182

187183
def test_strip_removed_large_file_with_dd_no_sig(self):
188184
"""Should scan for the unsigned data descriptor of a removed large file
189185
without causing a memory issue."""
190186
# Reduce data scale for this test, as it's especially slow...
191-
self.datacount = 30*1024**2 // len(self.data)
187+
self.chunkcount = int(30*1024**2 / len(self.chunk))
192188

193189
with TemporaryFile() as f:
194-
file = 'file.txt'
195-
file1 = 'largefile.txt'
196-
data = b'Sed ut perspiciatis unde omnis iste natus error sit voluptatem'
197190
with mock.patch('zipfile.struct.pack', side_effect=struct_pack_no_dd_sig), \
198191
zipfile.ZipFile(Unseekable(f), 'w') as zh:
199-
with zh.open(file1, 'w', force_zip64=True) as fh:
192+
with zh.open(self.largefilename, 'w', force_zip64=True) as fh:
200193
self._write_large_file(fh)
201-
zh.writestr(file, data)
194+
zh.writestr(self.filename, self.lorem)
202195

203196
with self.assert_memory_usage(self.allowed_memory), \
204197
zipfile.ZipFile(f, 'a') as zh:
205-
zh.remove(file1)
198+
zh.remove(self.largefilename)
206199
# strict_descriptor=False to scan the unsigned data descriptor
207200
# (scanning is disabled under the strict_descriptor=True default)
208201
zh.repack(strict_descriptor=False)
@@ -213,18 +206,15 @@ def test_strip_removed_large_file_with_dd_no_sig_by_decompression(self):
213206
"""Should scan for the unsigned data descriptor (via tracing compressed
214207
block end) of a removed large file without causing a memory issue."""
215208
with TemporaryFile() as f:
216-
file = 'file.txt'
217-
file1 = 'largefile.txt'
218-
data = b'Sed ut perspiciatis unde omnis iste natus error sit voluptatem'
219209
with mock.patch('zipfile.struct.pack', side_effect=struct_pack_no_dd_sig), \
220210
zipfile.ZipFile(Unseekable(f), 'w', compression=zipfile.ZIP_DEFLATED) as zh:
221-
with zh.open(file1, 'w', force_zip64=True) as fh:
211+
with zh.open(self.largefilename, 'w', force_zip64=True) as fh:
222212
self._write_large_file(fh)
223-
zh.writestr(file, data)
213+
zh.writestr(self.filename, self.lorem)
224214

225215
with self.assert_memory_usage(self.allowed_memory), \
226216
zipfile.ZipFile(f, 'a') as zh:
227-
zh.remove(file1)
217+
zh.remove(self.largefilename)
228218
# strict_descriptor=False to detect the unsigned data descriptor
229219
# (scanning is disabled under the strict_descriptor=True default)
230220
zh.repack(strict_descriptor=False)

0 commit comments

Comments
 (0)