Skip to content

Commit 22c3d19

Browse files
committed
gh-75707: tarfile: Add optional open() argument "reprodicuble"
This makes it possible to create reproducible .tar.gz files without overriding time.time(), by setting the gzip header field mtime to 0.
1 parent 96b7a2e commit 22c3d19

File tree

2 files changed

+16
-6
lines changed

2 files changed

+16
-6
lines changed

Lib/tarfile.py

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -339,7 +339,7 @@ class _Stream:
339339
"""
340340

341341
def __init__(self, name, mode, comptype, fileobj, bufsize,
342-
compresslevel, preset):
342+
compresslevel, preset, reproducible):
343343
"""Construct a _Stream object.
344344
"""
345345
self._extfileobj = True
@@ -374,7 +374,7 @@ def __init__(self, name, mode, comptype, fileobj, bufsize,
374374
self.exception = zlib.error
375375
self._init_read_gz()
376376
else:
377-
self._init_write_gz(compresslevel)
377+
self._init_write_gz(compresslevel, reproducible)
378378

379379
elif comptype == "bz2":
380380
try:
@@ -423,15 +423,19 @@ def __del__(self):
423423
if hasattr(self, "closed") and not self.closed:
424424
self.close()
425425

426-
def _init_write_gz(self, compresslevel):
426+
def _init_write_gz(self, compresslevel, reproducible):
427427
"""Initialize for writing with gzip compression.
428428
"""
429429
self.cmp = self.zlib.compressobj(compresslevel,
430430
self.zlib.DEFLATED,
431431
-self.zlib.MAX_WBITS,
432432
self.zlib.DEF_MEM_LEVEL,
433433
0)
434-
timestamp = struct.pack("<L", int(time.time()))
434+
if reproducible:
435+
timestamp = 0
436+
else:
437+
timestamp = int(time.time())
438+
timestamp = struct.pack("<L", timestamp)
435439
self.__write(b"\037\213\010\010" + timestamp + b"\002\377")
436440
if self.name.endswith(".gz"):
437441
self.name = self.name[:-3]
@@ -1726,7 +1730,7 @@ class TarFile(object):
17261730
def __init__(self, name=None, mode="r", fileobj=None, format=None,
17271731
tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
17281732
errors="surrogateescape", pax_headers=None, debug=None,
1729-
errorlevel=None, copybufsize=None, stream=False):
1733+
errorlevel=None, copybufsize=None, stream=False, reproducible=False):
17301734
"""Open an (uncompressed) tar archive 'name'. 'mode' is either 'r' to
17311735
read from an existing archive, 'a' to append data to an existing
17321736
file or 'w' to create a new file overwriting an existing one. 'mode'
@@ -1932,8 +1936,9 @@ def not_compressed(comptype):
19321936

19331937
compresslevel = kwargs.pop("compresslevel", 6)
19341938
preset = kwargs.pop("preset", None)
1939+
reproducible = kwargs.pop("reproducible", False)
19351940
stream = _Stream(name, filemode, comptype, fileobj, bufsize,
1936-
compresslevel, preset)
1941+
compresslevel, preset, reproducible)
19371942
try:
19381943
t = cls(name, filemode, stream, **kwargs)
19391944
except:

Lib/test/test_tarfile.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2115,6 +2115,11 @@ def test_create_with_compresslevel(self):
21152115
with tarfile.open(tmpname, 'r:gz', compresslevel=1) as tobj:
21162116
pass
21172117

2118+
def test_create_reproducible(self):
2119+
with tarfile.open(tmpname, self.mode, compresslevel=1, reproducible=True) as tobj:
2120+
tobj.add(self.file_path)
2121+
with self.open(tmpname, 'r') as fobj:
2122+
self.assertEqual(fobj.mtime, None)
21182123

21192124
class Bz2CreateTest(Bz2Test, CreateTest):
21202125

0 commit comments

Comments
 (0)