From 65d53459ee1837d1c8c321a3270e276ef568c846 Mon Sep 17 00:00:00 2001 From: dennisvang <29799340+dennisvang@users.noreply.github.com> Date: Tue, 21 Nov 2023 18:03:20 +0100 Subject: [PATCH] replace shutil.make_archive by tarfile and gzip this provides greater control and allows us to create reproducible gzip files i.e. without filename in the header, and with mtime set to zero --- src/tufup/repo/__init__.py | 28 +++++++++++++--------------- tests/test_repo.py | 12 ++++++++++-- 2 files changed, 23 insertions(+), 17 deletions(-) diff --git a/src/tufup/repo/__init__.py b/src/tufup/repo/__init__.py index 30b8890..f929b4b 100644 --- a/src/tufup/repo/__init__.py +++ b/src/tufup/repo/__init__.py @@ -1,9 +1,11 @@ +import tempfile from copy import deepcopy from datetime import datetime, timedelta import inspect import json import logging import pathlib +import tarfile try: # workaround for PyInstaller issue 6911 (setuptools issue 3089) @@ -37,7 +39,7 @@ ) from tuf.api.serialization.json import JSONSerializer -from tufup.common import Patcher, SUFFIX_ARCHIVE, SUFFIX_PATCH, TargetMeta +from tufup.common import Patcher, SUFFIX_PATCH, TargetMeta from tufup.utils.platform_specific import _patched_resolve logger = logging.getLogger(__name__) @@ -79,12 +81,7 @@ def make_gztar_archive( dst_dir: Union[pathlib.Path, str], app_name: str, version: str, - **kwargs, # allowed kwargs are passed on to shutil.make_archive ) -> Optional[TargetMeta]: - # remove disallowed kwargs - for key in ['base_name', 'root_dir', 'format']: - if kwargs.pop(key, None): - logger.warning(f'{key} ignored: using default') # ensure paths src_dir = pathlib.Path(src_dir) dst_dir = pathlib.Path(dst_dir) @@ -97,15 +94,16 @@ def make_gztar_archive( if input(f'Found existing archive: {archive_path}.\nOverwrite? [n]/y') != 'y': print('Using existing archive.') return TargetMeta(archive_path) - # make archive - base_name = str(dst_dir / archive_filename.replace(SUFFIX_ARCHIVE, '')) - archive_path_str = shutil.make_archive( - base_name=base_name, # archive file path, no suffix - root_dir=str(src_dir), # paths in archive will be relative to root_dir - format='gztar', - **kwargs, - ) - return TargetMeta(target_path=archive_path_str) + # create archive + with tempfile.NamedTemporaryFile(mode='wb') as temp_file: + # make temporary tar archive + with tarfile.open(fileobj=temp_file, mode='w') as tar: + for path in src_dir.iterdir(): + tar.add(name=path, arcname=path.relative_to(src_dir), recursive=True) + temp_file_path = pathlib.Path(temp_file.name) + # compress tar archive using gzip (force mtime to zero for reproducibility) + Patcher.gzip(src_path=temp_file_path, dst_path=archive_path, mtime=0) + return TargetMeta(target_path=archive_path) class RolesDict(TypedDict): diff --git a/tests/test_repo.py b/tests/test_repo.py index 4ed6170..48978c3 100644 --- a/tests/test_repo.py +++ b/tests/test_repo.py @@ -3,6 +3,8 @@ import json import logging import pathlib +import struct +import tarfile from tempfile import TemporaryDirectory from time import sleep from unittest.mock import Mock, patch @@ -96,14 +98,20 @@ def test_make_gztar_archive(self): dst_dir=self.temp_dir_path, app_name=app_name, version=version, - base_dir='.', # this kwarg is allowed - root_dir='some path', # this kwarg is removed ) self.assertIsInstance(archive, TargetMeta) self.assertEqual(exists, mock_input_no.called) self.assertTrue(archive.path.exists()) self.assertTrue(app_name in str(archive.path)) self.assertTrue(version in str(archive.path)) + # check mtime in archive gzip header (see RFC 1952 and test_common.py) + mtime = struct.unpack('