From 25d254de671ef8ca169d51780e94808c543d4697 Mon Sep 17 00:00:00 2001 From: Alexander OSTROVSKY Date: Tue, 20 Feb 2024 13:07:48 -0800 Subject: [PATCH 1/7] add npy datatype --- lib/galaxy/config/sample/datatypes_conf.xml.sample | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/galaxy/config/sample/datatypes_conf.xml.sample b/lib/galaxy/config/sample/datatypes_conf.xml.sample index 0e1cc7ae3afe..ab08527634e5 100644 --- a/lib/galaxy/config/sample/datatypes_conf.xml.sample +++ b/lib/galaxy/config/sample/datatypes_conf.xml.sample @@ -220,6 +220,7 @@ + From 94da21eaa3fb30a5751802dec2adff4150808376 Mon Sep 17 00:00:00 2001 From: Alexander OSTROVSKY Date: Thu, 22 Feb 2024 15:05:02 -0800 Subject: [PATCH 2/7] add sniffer, metadata, and test file for npy --- .../config/sample/datatypes_conf.xml.sample | 1 + lib/galaxy/datatypes/binary.py | 50 ++++++++++++++++++ lib/galaxy/datatypes/test/test.npy | Bin 0 -> 325 bytes 3 files changed, 51 insertions(+) create mode 100644 lib/galaxy/datatypes/test/test.npy diff --git a/lib/galaxy/config/sample/datatypes_conf.xml.sample b/lib/galaxy/config/sample/datatypes_conf.xml.sample index ab08527634e5..b92d56423dd0 100644 --- a/lib/galaxy/config/sample/datatypes_conf.xml.sample +++ b/lib/galaxy/config/sample/datatypes_conf.xml.sample @@ -1076,6 +1076,7 @@ + diff --git a/lib/galaxy/datatypes/binary.py b/lib/galaxy/datatypes/binary.py index a2343074f542..120297b652f8 100644 --- a/lib/galaxy/datatypes/binary.py +++ b/lib/galaxy/datatypes/binary.py @@ -4538,3 +4538,53 @@ def display_peek(self, dataset: DatasetProtocol) -> str: return dataset.peek except Exception: return f"Binary FITS file size ({nice_size(dataset.get_size())})" + +@build_sniff_from_prefix +class Numpy(Binary): + """ + Class defining a numpy data file + + >>> from galaxy.datatypes.sniff import get_test_fname + >>> fname = get_test_fname('test.npy') + >>> Numpy().sniff(fname) + True + """ + + file_ext = "npy" + + MetadataElement( + name="version", + default="", + param=DictParameter, + desc="Version string for the numpy file format", + readonly=True, + visible=True, + no_value={}, + optional=True + ) + def _numpy_version_string(self, filename): + magic_string = open(filename, "rb").read(8) + version_str = str(magic_string[6])+"."+str(magic_string[7]) + return version_str + + def set_meta(self, dataset: DatasetProtocol, *, overwrite: TYPE_CHECKING = True, **kwd) -> None: + dataset.metadata.version_dict = self._numpy_version_string(dataset.get_file_name()) + + def sniff_prefix(self, file_prefix: FilePrefix) -> bool: + # The first 6 bytes of any numpy file is '\x93NUMPY', with following bytes for version + # number of file formats, and info about header data. The rest of the file contains binary data. + return file_prefix.startswith_bytes(b"\x93NUMPY") + + def set_peek(self, dataset: DatasetProtocol, **kwd) -> None: + if not dataset.dataset.purged: + dataset.peek = "Binary numpy file version %s" % dataset.metadata.version_str + dataset.blurb = nice_size(dataset.get_size()) + else: + dataset.peek = "file does not exist" + dataset.blurb = "file purged from disk" + + def display_peek(self, dataset: DatasetProtocol) -> str: + try: + return dataset.peek + except Exception: + return "Binary numpy file (%s)" % (nice_size(dataset.get_size())) \ No newline at end of file diff --git a/lib/galaxy/datatypes/test/test.npy b/lib/galaxy/datatypes/test/test.npy new file mode 100644 index 0000000000000000000000000000000000000000..c9f33b010db65a716475b178aeb1bf9937a46729 GIT binary patch literal 325 zcmbR27wQ`j$;eQ~P_3SlTAW;@Zl$1J}V^Se=dm&3uA*+Trqc=-yAsbX-N=aowDpw(U1QP=T zgTF-~M`uTSA!kq_mxecU1S3$arjXmu&(G^W5P%79`;tPQq|VqvUJbwGq|!{FL-LBb lfB}#WG=?vN4XEX5%c>Wh>kIjv*`Dm^1+fHDO9};(^Z?FzVaEUf literal 0 HcmV?d00001 From 18e45e4d099bac6abd27d21b697d8f3555bd6196 Mon Sep 17 00:00:00 2001 From: Alexander OSTROVSKY Date: Fri, 23 Feb 2024 09:54:24 -0800 Subject: [PATCH 3/7] minor fix and suggestions added --- lib/galaxy/config/sample/datatypes_conf.xml.sample | 2 +- lib/galaxy/datatypes/binary.py | 12 ++++++++---- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/lib/galaxy/config/sample/datatypes_conf.xml.sample b/lib/galaxy/config/sample/datatypes_conf.xml.sample index b92d56423dd0..1ecd25ee5761 100644 --- a/lib/galaxy/config/sample/datatypes_conf.xml.sample +++ b/lib/galaxy/config/sample/datatypes_conf.xml.sample @@ -220,7 +220,7 @@ - + diff --git a/lib/galaxy/datatypes/binary.py b/lib/galaxy/datatypes/binary.py index 120297b652f8..e694d35d94d7 100644 --- a/lib/galaxy/datatypes/binary.py +++ b/lib/galaxy/datatypes/binary.py @@ -4553,13 +4553,13 @@ class Numpy(Binary): file_ext = "npy" MetadataElement( - name="version", + name="version_str", default="", - param=DictParameter, + param=MetadataParameter, desc="Version string for the numpy file format", readonly=True, visible=True, - no_value={}, + no_value=0, optional=True ) def _numpy_version_string(self, filename): @@ -4568,7 +4568,11 @@ def _numpy_version_string(self, filename): return version_str def set_meta(self, dataset: DatasetProtocol, *, overwrite: TYPE_CHECKING = True, **kwd) -> None: - dataset.metadata.version_dict = self._numpy_version_string(dataset.get_file_name()) + try: + dataset.metadata.version_str = self._numpy_version_string(dataset.get_file_name()) + except Exception as e: + log.warning("%s, set_meta Exception: %s", self, e) + def sniff_prefix(self, file_prefix: FilePrefix) -> bool: # The first 6 bytes of any numpy file is '\x93NUMPY', with following bytes for version From b029638a8aaa792e44248911c48f8afa8b56659e Mon Sep 17 00:00:00 2001 From: Alexander OSTROVSKY Date: Fri, 23 Feb 2024 10:02:47 -0800 Subject: [PATCH 4/7] lint --- lib/galaxy/datatypes/binary.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/lib/galaxy/datatypes/binary.py b/lib/galaxy/datatypes/binary.py index e694d35d94d7..f5963c4d9b5a 100644 --- a/lib/galaxy/datatypes/binary.py +++ b/lib/galaxy/datatypes/binary.py @@ -4539,6 +4539,7 @@ def display_peek(self, dataset: DatasetProtocol) -> str: except Exception: return f"Binary FITS file size ({nice_size(dataset.get_size())})" + @build_sniff_from_prefix class Numpy(Binary): """ @@ -4562,20 +4563,20 @@ class Numpy(Binary): no_value=0, optional=True ) + def _numpy_version_string(self, filename): magic_string = open(filename, "rb").read(8) - version_str = str(magic_string[6])+"."+str(magic_string[7]) + version_str = str(magic_string[6]) + "." + str(magic_string[7]) return version_str - + def set_meta(self, dataset: DatasetProtocol, *, overwrite: TYPE_CHECKING = True, **kwd) -> None: try: dataset.metadata.version_str = self._numpy_version_string(dataset.get_file_name()) except Exception as e: log.warning("%s, set_meta Exception: %s", self, e) - def sniff_prefix(self, file_prefix: FilePrefix) -> bool: - # The first 6 bytes of any numpy file is '\x93NUMPY', with following bytes for version + # The first 6 bytes of any numpy file is '\x93NUMPY', with following bytes for version # number of file formats, and info about header data. The rest of the file contains binary data. return file_prefix.startswith_bytes(b"\x93NUMPY") @@ -4591,4 +4592,4 @@ def display_peek(self, dataset: DatasetProtocol) -> str: try: return dataset.peek except Exception: - return "Binary numpy file (%s)" % (nice_size(dataset.get_size())) \ No newline at end of file + return "Binary numpy file (%s)" % (nice_size(dataset.get_size())) From 37dd32c605f8d76bad0481f136828ab68cef5c63 Mon Sep 17 00:00:00 2001 From: Alexander OSTROVSKY Date: Fri, 23 Feb 2024 10:14:25 -0800 Subject: [PATCH 5/7] metadata setting fix --- lib/galaxy/datatypes/binary.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/galaxy/datatypes/binary.py b/lib/galaxy/datatypes/binary.py index f5963c4d9b5a..a3f0a6a8cd42 100644 --- a/lib/galaxy/datatypes/binary.py +++ b/lib/galaxy/datatypes/binary.py @@ -4569,7 +4569,7 @@ def _numpy_version_string(self, filename): version_str = str(magic_string[6]) + "." + str(magic_string[7]) return version_str - def set_meta(self, dataset: DatasetProtocol, *, overwrite: TYPE_CHECKING = True, **kwd) -> None: + def set_meta(self, dataset: DatasetProtocol, overwrite: bool = True, **kwd) -> None: try: dataset.metadata.version_str = self._numpy_version_string(dataset.get_file_name()) except Exception as e: From eb6fff5eef50dcd4a9aafb6df5c281cf333753ce Mon Sep 17 00:00:00 2001 From: Alexander OSTROVSKY Date: Fri, 23 Feb 2024 10:21:46 -0800 Subject: [PATCH 6/7] comma --- lib/galaxy/datatypes/binary.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/galaxy/datatypes/binary.py b/lib/galaxy/datatypes/binary.py index a3f0a6a8cd42..2c59580c0a2d 100644 --- a/lib/galaxy/datatypes/binary.py +++ b/lib/galaxy/datatypes/binary.py @@ -4561,7 +4561,7 @@ class Numpy(Binary): readonly=True, visible=True, no_value=0, - optional=True + optional=True, ) def _numpy_version_string(self, filename): From 97f8782f92cc934c3c3fbcc2836e75bf398156de Mon Sep 17 00:00:00 2001 From: Alex Ostrovsky <40246333+astrovsky01@users.noreply.github.com> Date: Sat, 24 Feb 2024 17:32:19 -0800 Subject: [PATCH 7/7] Apply f-string Co-authored-by: Martin Cech --- lib/galaxy/datatypes/binary.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/galaxy/datatypes/binary.py b/lib/galaxy/datatypes/binary.py index 2c59580c0a2d..cf1803dbcafe 100644 --- a/lib/galaxy/datatypes/binary.py +++ b/lib/galaxy/datatypes/binary.py @@ -4566,7 +4566,7 @@ class Numpy(Binary): def _numpy_version_string(self, filename): magic_string = open(filename, "rb").read(8) - version_str = str(magic_string[6]) + "." + str(magic_string[7]) + version_str = f"{magic_string[6]}.{magic_string[7]}" return version_str def set_meta(self, dataset: DatasetProtocol, overwrite: bool = True, **kwd) -> None: @@ -4582,7 +4582,7 @@ def sniff_prefix(self, file_prefix: FilePrefix) -> bool: def set_peek(self, dataset: DatasetProtocol, **kwd) -> None: if not dataset.dataset.purged: - dataset.peek = "Binary numpy file version %s" % dataset.metadata.version_str + dataset.peek = f"Binary numpy file version {dataset.metadata.version_str}" dataset.blurb = nice_size(dataset.get_size()) else: dataset.peek = "file does not exist"