From ed69e0f821ca5cab31baf908ea56329a7605bb29 Mon Sep 17 00:00:00 2001 From: Matthias Bernt Date: Tue, 12 Mar 2024 14:07:47 +0100 Subject: [PATCH 1/2] Npz sniffing: do not read the whole file takes to much memory --- lib/galaxy/datatypes/binary.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/lib/galaxy/datatypes/binary.py b/lib/galaxy/datatypes/binary.py index cc6702cd589f..fd58af3dcc97 100644 --- a/lib/galaxy/datatypes/binary.py +++ b/lib/galaxy/datatypes/binary.py @@ -4137,9 +4137,7 @@ def sniff(self, filename: str) -> bool: try: npz = np.load(filename) if isinstance(npz, np.lib.npyio.NpzFile): - for f in npz.files: - if isinstance(npz[f], np.ndarray): - return True + return True except Exception: return False return False From 28c19ebadf42b13edf8e44ec55efcab6d61683f4 Mon Sep 17 00:00:00 2001 From: mvdbeek Date: Tue, 12 Mar 2024 18:24:24 +0100 Subject: [PATCH 2/2] Assert that at least one file in npz zipfile ends with .npy from https://pydoc.dev/numpy/latest/numpy.lib.npyio.NpzFile.html: > NpzFile is used to load files in the NumPy .npz data archive format. It assumes that files in the archive have a .npy extension, other files are ignored. --- lib/galaxy/datatypes/binary.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/galaxy/datatypes/binary.py b/lib/galaxy/datatypes/binary.py index fd58af3dcc97..456dacc5ba05 100644 --- a/lib/galaxy/datatypes/binary.py +++ b/lib/galaxy/datatypes/binary.py @@ -4135,9 +4135,9 @@ def __init__(self, **kwd): def sniff(self, filename: str) -> bool: try: - npz = np.load(filename) - if isinstance(npz, np.lib.npyio.NpzFile): - return True + with np.load(filename) as npz: + if isinstance(npz, np.lib.npyio.NpzFile) and any(f.filename.endswith(".npy") for f in npz.zip.filelist): + return True except Exception: return False return False