Skip to content

Commit

Permalink
force gzip mtime to zero for patch
Browse files Browse the repository at this point in the history
  • Loading branch information
dennisvang committed Nov 21, 2023
1 parent dfa0b3d commit a5261d1
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 43 deletions.
4 changes: 2 additions & 2 deletions src/tufup/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,6 @@ def apply_patch(cls, src_path: pathlib.Path, patch_path: pathlib.Path):
dst_path=decompressed_dst_path,
patch_path=patch_path,
)
# compress result
cls.gzip(src_path=decompressed_dst_path, dst_path=dst_path)
# compress result (mtime=0 for reproducibility)
cls.gzip(src_path=decompressed_dst_path, dst_path=dst_path, mtime=0)
return dst_path
65 changes: 24 additions & 41 deletions tests/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import pathlib
import struct
import tarfile
import time

import bsdiff4
from packaging.version import Version
Expand Down Expand Up @@ -142,29 +141,19 @@ def setUp(self) -> None:
self.file_paths = dict()
self.tar_paths = dict()
self.gz_paths = dict()
# The gzip header contains an mtime field [1], and we need to make sure we
# can set this field properly. However, the resolution of os.stat.mtime
# depends on the operating system and file system, e.g. Windows/FAT32 has a 2
# sec resolution [2], so to check for inequality of the *default* mtime in
# the gzip header, we would need to force a delay on the order of seconds in
# our tests. To work around this, we override the mtime field for test files.
# [1]: https://datatracker.ietf.org/doc/html/rfc1952#page-5
# [2]: https://docs.python.org/3.12/library/os.html#os.stat_result
mtimes = dict(
old=time.time() - 100, # some arbitrary time in the past [seconds]
new=None, # i.e. just use the default mtime (current time)
)
for key, mtime in mtimes.items():
for key in ['old', 'new']:
# create dummy file
file_path = self.temp_dir_path / key
file_path.write_text(key)
# create .tar archive from dummy file
tar_path = file_path.with_suffix('.tar')
with tarfile.open(tar_path, 'w') as tar:
tar.add(file_path)
# compress .tar file using gzip (without filename in header)
# compress .tar file using gzip (without filename and timestamp in header)
# "MTIME = 0 means no time stamp is available."
# https://datatracker.ietf.org/doc/html/rfc1952#page-7
gz_path = tar_path.with_suffix('.tar.gz')
gz_path.write_bytes(gzip.compress(data=tar_path.read_bytes(), mtime=mtime))
gz_path.write_bytes(gzip.compress(data=tar_path.read_bytes(), mtime=0))
# keep reference
self.file_paths[key] = file_path
self.tar_paths[key] = tar_path
Expand Down Expand Up @@ -193,43 +182,36 @@ def test_gzip_header(self):
self.assertEqual(expected_mtime, MTIME)
self.assertFalse(FNAME)

def test_gzip_compress_reproducibility(self):
# verify that different mtimes lead to differences in the gz file
not_repr_gz_path = Patcher.gzip(
src_path=self.tar_paths['old'],
dst_path=self.temp_dir_path / 'not-reproducible.tar.gz',
)
self.assertNotEqual(
self.gz_paths['old'].read_bytes(), not_repr_gz_path.read_bytes()
def test_gzip_compress_default(self):
self.assertEqual(
self.gz_paths['old'], Patcher.gzip(src_path=self.tar_paths['old'])
)
# verify that we can override the mtime to remove these differences
repr_gz_path = Patcher.gzip(
src_path=self.tar_paths['old'],
dst_path=self.temp_dir_path / 'reproducible.tar.gz',
mtime=0,

def test_gzip_decompress_default(self):
self.assertEqual(
self.tar_paths['old'], Patcher.gzip(src_path=self.gz_paths['old'])
)
self.assertEqual(self.gz_paths['old'].read_bytes(), repr_gz_path.read_bytes())

def test_gzip_compress(self):
# prepare
src_path = self.tar_paths['old']
dst_path = self.temp_dir_path / 'compressed.tar.gz'
# test gzip compression
with self.assertLogs(level='DEBUG') as logs:
for dst_path in [None, self.temp_dir_path / 'compressed.tar.gz']:
with self.subTest(msg=dst_path):
gz_path = Patcher.gzip(src_path=src_path, dst_path=dst_path)
self.assertTrue(gz_path.exists())
self.assertEqual(2, sum(1 for msg in logs.output if 'compress' in msg))
# note these are not byte-for-byte equal, because of the default mtime
gz_path = Patcher.gzip(src_path=src_path, dst_path=dst_path, mtime=0)
self.assertTrue(gz_path.exists())
self.assertIn(' compress', logs.output[0]) # keep whitespace
self.assertEqual(self.gz_paths['old'].read_bytes(), gz_path.read_bytes())

def test_gzip_decompress(self):
# prepare
src_path = self.gz_paths['old']
dst_path = self.temp_dir_path / 'decompressed.tar'
# test gzip decompression
with self.assertLogs(level='DEBUG') as logs:
for dst_path in [None, self.temp_dir_path / 'decompressed.tar']:
with self.subTest(msg=str(dst_path)):
tar_path = Patcher.gzip(src_path=src_path, dst_path=dst_path)
self.assertTrue(tar_path.exists())
self.assertEqual(2, sum(1 for msg in logs.output if 'decompress' in msg))
tar_path = Patcher.gzip(src_path=src_path, dst_path=dst_path)
self.assertTrue(tar_path.exists())
self.assertIn('decompress', logs.output[0])
self.assertEqual(self.tar_paths['old'].read_bytes(), tar_path.read_bytes())

def test_create_patch(self):
Expand All @@ -251,3 +233,4 @@ def test_apply_patch(self):
)
self.assertEqual(name + '.tar.gz', new_gz_path.name)
self.assertTrue(new_gz_path.exists())
self.assertEqual(self.gz_paths['new'].read_bytes(), new_gz_path.read_bytes())

0 comments on commit a5261d1

Please sign in to comment.