diff --git a/lib/galaxy/dependencies/pinned-requirements.txt b/lib/galaxy/dependencies/pinned-requirements.txt index cc8d86eec7dc..396004201838 100644 --- a/lib/galaxy/dependencies/pinned-requirements.txt +++ b/lib/galaxy/dependencies/pinned-requirements.txt @@ -92,6 +92,7 @@ idna==3.5 ; python_version >= "3.7" and python_version < "3.12" importlib-metadata==4.13.0 ; python_version >= "3.7" and python_version < "3.12" importlib-resources==5.12.0 ; python_version >= "3.7" and python_version < "3.12" isa-rwval==0.10.10 ; python_version >= "3.7" and python_version < "3.12" +isal==1.3.0 ; python_version >= "3.7" and python_version < "3.12" isodate==0.6.1 ; python_version >= "3.7" and python_version < "3.12" jinja2==3.1.2 ; python_version >= "3.7" and python_version < "3.12" jmespath==1.0.1 ; python_version >= "3.7" and python_version < "3.12" diff --git a/lib/galaxy/model/store/__init__.py b/lib/galaxy/model/store/__init__.py index e8a6c95fb377..973cec825999 100644 --- a/lib/galaxy/model/store/__init__.py +++ b/lib/galaxy/model/store/__init__.py @@ -2753,7 +2753,7 @@ def _finalize(self) -> None: out_file = out_file_name[: -len(".zip")] else: out_file = out_file_name - rval = shutil.make_archive(out_file, "zip", self.export_directory) + rval = shutil.make_archive(out_file, "fastzip", self.export_directory) if not self.file_source_uri: shutil.move(rval, self.out_file) else: diff --git a/lib/galaxy/util/compression_utils.py b/lib/galaxy/util/compression_utils.py index c10a6b4ed822..2282a7578880 100644 --- a/lib/galaxy/util/compression_utils.py +++ b/lib/galaxy/util/compression_utils.py @@ -3,6 +3,7 @@ import io import logging import os +import shutil import tarfile import tempfile import zipfile @@ -30,6 +31,12 @@ is_gzip, ) +try: + from isal import isal_zlib +except ImportError: + isal_zlib = None # type: ignore[assignment] + + log = logging.getLogger(__name__) FileObjTypeStr = Union[IO[str], io.TextIOWrapper] @@ -345,3 +352,84 @@ def zipfile_ok(self, path_to_archive: StrPath) -> bool: if not member_path.startswith(basename): return False return True + + +class FastZipFile(zipfile.ZipFile): + """ + Simple wrapper around ZipFile that uses the default compression strategy of ISA-L + to write zip files. Ignores compresslevel and compresstype arguments, and is + 3 to 4 times faster than the zlib implementation at the default compression level. + """ + + def _open_to_write(self, *args, **kwargs): # type: ignore[no-untyped-def] + zwf = super()._open_to_write(*args, **kwargs) # type: ignore[misc] + if isal_zlib: + zwf._compressor = isal_zlib.compressobj(isal_zlib.ISAL_DEFAULT_COMPRESSION, isal_zlib.DEFLATED, -15, 9) + return zwf + + +# modified from shutil._make_zipfile +def make_fast_zipfile( + base_name: str, + base_dir: str, + verbose: int = 0, + dry_run: int = 0, + logger: Optional[logging.Logger] = None, + owner: Optional[str] = None, + group: Optional[str] = None, + root_dir: Optional[str] = None, +) -> str: + """Create a zip file from all the files under 'base_dir'. + + The output zip file will be named 'base_name' + ".zip". Returns the + name of the output zip file. + """ + + zip_filename = base_name + ".zip" + archive_dir = os.path.dirname(base_name) + + if archive_dir and not os.path.exists(archive_dir): + if logger is not None: + logger.info("creating %s", archive_dir) + if not dry_run: + os.makedirs(archive_dir) + + if logger is not None: + logger.info("creating '%s' and adding '%s' to it", zip_filename, base_dir) + + if not dry_run: + with FastZipFile(zip_filename, mode="w", compression=zipfile.ZIP_DEFLATED) as zf: + arcname = os.path.normpath(base_dir) + if root_dir is not None: + base_dir = os.path.join(root_dir, base_dir) + base_dir = os.path.normpath(base_dir) + if arcname != os.curdir: + zf.write(base_dir, arcname) + if logger is not None: + logger.info("adding '%s'", base_dir) + for dirpath, dirnames, filenames in os.walk(base_dir): + arcdirpath = dirpath + if root_dir is not None: + arcdirpath = os.path.relpath(arcdirpath, root_dir) + arcdirpath = os.path.normpath(arcdirpath) + for name in sorted(dirnames): + path = os.path.join(dirpath, name) + arcname = os.path.join(arcdirpath, name) + zf.write(path, arcname) + if logger is not None: + logger.info("adding '%s'", path) + for name in filenames: + path = os.path.join(dirpath, name) + path = os.path.normpath(path) + if os.path.isfile(path): + arcname = os.path.join(arcdirpath, name) + zf.write(path, arcname) + if logger is not None: + logger.info("adding '%s'", path) + + if root_dir is not None: + zip_filename = os.path.abspath(zip_filename) + return zip_filename + + +shutil.register_archive_format("fastzip", make_fast_zipfile) diff --git a/packages/data/setup.cfg b/packages/data/setup.cfg index 02f74386b9d5..866021657d87 100644 --- a/packages/data/setup.cfg +++ b/packages/data/setup.cfg @@ -44,6 +44,7 @@ install_requires = h5grove h5py isa-rwval + isal MarkupSafe msal mrcfile diff --git a/packages/files/setup.cfg b/packages/files/setup.cfg index 9f89c257b476..8ef367a9d14f 100644 --- a/packages/files/setup.cfg +++ b/packages/files/setup.cfg @@ -34,6 +34,7 @@ include_package_data = True install_requires = galaxy-util fs + isal typing-extensions packages = find: python_requires = >=3.7 diff --git a/pyproject.toml b/pyproject.toml index ec98c6f49ef2..f6d9c0ca03a5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -71,6 +71,7 @@ h5py = "*" importlib-metadata = "<5" # Work around https://github.com/celery/kombu/issues/1600 importlib-resources = "*" isa-rwval = ">=0.10.10" +isal = "*" kombu = "*" lagom = "*" Mako = "*"