diff --git a/lib/galaxy/model/__init__.py b/lib/galaxy/model/__init__.py index 7f3159b131e7..eaf78862fd44 100644 --- a/lib/galaxy/model/__init__.py +++ b/lib/galaxy/model/__init__.py @@ -29,6 +29,7 @@ from typing import ( Any, cast, + ClassVar, Dict, Generic, Iterable, @@ -159,7 +160,6 @@ ) from galaxy.model.orm.now import now from galaxy.model.orm.util import add_object_to_object_session -from galaxy.objectstore import ObjectStorePopulator from galaxy.objectstore.templates import ( ObjectStoreConfiguration, ObjectStoreTemplate, @@ -219,6 +219,10 @@ from galaxy.util.sanitize_html import sanitize_html if TYPE_CHECKING: + from galaxy.objectstore import ( + BaseObjectStore, + ObjectStorePopulator, + ) from galaxy.schema.invocation import InvocationMessageUnion log = logging.getLogger(__name__) @@ -2708,11 +2712,12 @@ def dataset(self): class FakeDatasetAssociation: fake_dataset_association = True - def __init__(self, dataset=None): + def __init__(self, dataset: Optional["Dataset"] = None) -> None: self.dataset = dataset - self.metadata = {} + self.metadata: Dict = {} - def get_file_name(self, sync_cache=True): + def get_file_name(self, sync_cache: bool = True) -> str: + assert self.dataset return self.dataset.get_file_name(sync_cache) def __eq__(self, other): @@ -4041,7 +4046,7 @@ def flush(self): sa_session.commit() -def setup_global_object_store_for_models(object_store): +def setup_global_object_store_for_models(object_store: "BaseObjectStore") -> None: Dataset.object_store = object_store @@ -4133,7 +4138,9 @@ class conversion_messages(str, Enum): permitted_actions = get_permitted_actions(filter="DATASET") file_path = "/tmp/" - object_store = None # This get initialized in mapping.py (method init) by app.py + object_store: ClassVar[Optional["BaseObjectStore"]] = ( + None # This get initialized in mapping.py (method init) by app.py + ) engine = None def __init__( @@ -4167,7 +4174,7 @@ def in_ready_state(self): return self.state in self.ready_states @property - def shareable(self): + def shareable(self) -> bool: """Return True if placed into an objectstore not labeled as ``private``.""" if self.external_filename: return True @@ -4179,7 +4186,7 @@ def ensure_shareable(self): if not self.shareable: raise Exception(CANNOT_SHARE_PRIVATE_DATASET_MESSAGE) - def get_file_name(self, sync_cache=True): + def get_file_name(self, sync_cache: bool = True) -> str: if self.purged: log.warning(f"Attempt to get file name of purged dataset {self.id}") return "" @@ -4225,20 +4232,19 @@ def set_file_name(self, filename): else: self.external_filename = filename - def _assert_object_store_set(self): - assert self.object_store is not None, f"Object Store has not been initialized for dataset {self.id}" + def _assert_object_store_set(self) -> "BaseObjectStore": + assert self.object_store is not None, "Object Store has not been initialized" return self.object_store - def get_extra_files_path(self): + def get_extra_files_path(self) -> str: # Unlike get_file_name - external_extra_files_path is not backed by an # actual database column so if SA instantiates this object - the # attribute won't exist yet. if not getattr(self, "external_extra_files_path", None): - if self.object_store.exists(self, dir_only=True, extra_dir=self._extra_files_rel_path): - return self.object_store.get_filename(self, dir_only=True, extra_dir=self._extra_files_rel_path) - return self.object_store.construct_path( - self, dir_only=True, extra_dir=self._extra_files_rel_path, in_cache=True - ) + object_store = self._assert_object_store_set() + if object_store.exists(self, dir_only=True, extra_dir=self._extra_files_rel_path): + return object_store.get_filename(self, dir_only=True, extra_dir=self._extra_files_rel_path) + return object_store.construct_path(self, dir_only=True, extra_dir=self._extra_files_rel_path, in_cache=True) else: return os.path.abspath(self.external_extra_files_path) @@ -4283,7 +4289,7 @@ def _calculate_size(self) -> int: except OSError: return 0 assert self.object_store - return self.object_store.size(self) # type:ignore[unreachable] + return self.object_store.size(self) @overload def get_size(self, nice_size: Literal[False], calculate_size: bool = True) -> int: ... @@ -4663,7 +4669,7 @@ def get_quota_source_label(self): quota_source_label = property(get_quota_source_label) - def set_skipped(self, object_store_populator: ObjectStorePopulator): + def set_skipped(self, object_store_populator: "ObjectStorePopulator") -> None: assert self.dataset object_store_populator.set_object_store_id(self) self.extension = "expression.json" @@ -4674,7 +4680,7 @@ def set_skipped(self, object_store_populator: ObjectStorePopulator): out.write(json.dumps(None)) self.set_total_size() - def get_file_name(self, sync_cache=True) -> str: + def get_file_name(self, sync_cache: bool = True) -> str: if self.dataset.purged: return "" return self.dataset.get_file_name(sync_cache=sync_cache) @@ -9692,16 +9698,17 @@ def update_from_file(self, file_name): alt_name=os.path.basename(self.get_file_name()), ) - def get_file_name(self, sync_cache=True): + def get_file_name(self, sync_cache: bool = True) -> str: # Ensure the directory structure and the metadata file object exist try: da = self.history_dataset or self.library_dataset - if self.object_store_id is None and da is not None: + assert da is not None + if self.object_store_id is None: self.object_store_id = da.dataset.object_store_id object_store = da.dataset.object_store store_by = object_store.get_store_by(da.dataset) if store_by == "id" and self.id is None: - self.flush() + self.flush() # type:ignore[unreachable] identifier = getattr(self, store_by) alt_name = f"metadata_{identifier}.dat" if not object_store.exists(self, extra_dir="_metadata_files", extra_dir_at_root=True, alt_name=alt_name): @@ -9710,7 +9717,7 @@ def get_file_name(self, sync_cache=True): self, extra_dir="_metadata_files", extra_dir_at_root=True, alt_name=alt_name, sync_cache=sync_cache ) return path - except AttributeError: + except (AssertionError, AttributeError): assert ( self.id is not None ), "ID must be set before MetadataFile used without an HDA/LDDA (commit the object)" diff --git a/lib/galaxy/model/mapping.py b/lib/galaxy/model/mapping.py index 053e177edd9f..e1d975e5be5a 100644 --- a/lib/galaxy/model/mapping.py +++ b/lib/galaxy/model/mapping.py @@ -3,6 +3,7 @@ from typing import ( Optional, Type, + TYPE_CHECKING, ) from galaxy import model @@ -16,6 +17,9 @@ from galaxy.model.security import GalaxyRBACAgent from galaxy.model.triggers.update_audit_table import install as install_timestamp_triggers +if TYPE_CHECKING: + from galaxy.objectstore import BaseObjectStore + log = logging.getLogger(__name__) metadata = mapper_registry.metadata @@ -99,8 +103,11 @@ def _build_model_mapping(engine, map_install_models, thread_local_log) -> Galaxy def init_models_from_config( - config: GalaxyAppConfiguration, map_install_models=False, object_store=None, trace_logger=None -): + config: GalaxyAppConfiguration, + map_install_models: bool = False, + object_store: Optional["BaseObjectStore"] = None, + trace_logger=None, +) -> GalaxyModelMapping: model = init( config.file_path, config.database_connection, diff --git a/lib/galaxy/objectstore/__init__.py b/lib/galaxy/objectstore/__init__.py index 4a4d639bf927..70e20fc44987 100644 --- a/lib/galaxy/objectstore/__init__.py +++ b/lib/galaxy/objectstore/__init__.py @@ -143,14 +143,23 @@ class ObjectStore(metaclass=abc.ABCMeta): """ @abc.abstractmethod - def exists(self, obj, base_dir=None, dir_only=False, extra_dir=None, extra_dir_at_root=False, alt_name=None): + def exists( + self, obj, base_dir=None, dir_only=False, extra_dir=None, extra_dir_at_root=False, alt_name=None + ) -> bool: """Return True if the object identified by `obj` exists, False otherwise.""" raise NotImplementedError() @abc.abstractmethod def construct_path( - self, obj, base_dir=None, dir_only=False, extra_dir=None, extra_dir_at_root=False, alt_name=None - ): + self, + obj, + base_dir=None, + dir_only=False, + extra_dir=None, + extra_dir_at_root=False, + alt_name=None, + in_cache: bool = False, + ) -> str: raise NotImplementedError() @abc.abstractmethod @@ -235,8 +244,16 @@ def get_data( @abc.abstractmethod def get_filename( - self, obj, base_dir=None, dir_only=False, extra_dir=None, extra_dir_at_root=False, alt_name=None, obj_dir=False - ): + self, + obj, + base_dir=None, + dir_only=False, + extra_dir=None, + extra_dir_at_root=False, + alt_name=None, + obj_dir=False, + sync_cache: bool = True, + ) -> str: """ Get the expected filename with absolute path for object with id `obj.id`. @@ -254,9 +271,9 @@ def update_from_file( alt_name=None, obj_dir=False, file_name=None, - create=False, - preserve_symlinks=False, - ): + create: bool = False, + preserve_symlinks: bool = False, + ) -> None: """ Inform the store that the file associated with `obj.id` has been updated. @@ -310,7 +327,7 @@ def get_concrete_store_badges(self, obj) -> List[BadgeDict]: """Return a list of dictified badges summarizing the object store configuration.""" @abc.abstractmethod - def is_private(self, obj): + def is_private(self, obj) -> bool: """Return True iff supplied object is stored in private ConcreteObjectStore.""" def object_store_ids(self, private=None): @@ -450,35 +467,164 @@ def _get_object_id(self, obj): def _invoke(self, delegate, obj=None, **kwargs): return self.__getattribute__(f"_{delegate}")(obj=obj, **kwargs) - def exists(self, obj, **kwargs): - return self._invoke("exists", obj, **kwargs) + def exists( + self, obj, base_dir=None, dir_only=False, extra_dir=None, extra_dir_at_root=False, alt_name=None + ) -> bool: + return self._invoke( + "exists", + obj, + base_dir=base_dir, + dir_only=dir_only, + extra_dir=extra_dir, + extra_dir_at_root=extra_dir_at_root, + alt_name=alt_name, + ) - def construct_path(self, obj, **kwargs): - return self._invoke("construct_path", obj, **kwargs) + def construct_path( + self, obj, base_dir=None, dir_only=False, extra_dir=None, extra_dir_at_root=False, alt_name=None, in_cache: bool = False + ) -> str: + return self._invoke( + "construct_path", + obj, + base_dir=base_dir, + dir_only=dir_only, + extra_dir=extra_dir, + extra_dir_at_root=extra_dir_at_root, + alt_name=alt_name, + in_cache=in_cache + ) - def create(self, obj, **kwargs): - return self._invoke("create", obj, **kwargs) + def create( + self, obj, base_dir=None, dir_only=False, extra_dir=None, extra_dir_at_root=False, alt_name=None, obj_dir=False + ): + return self._invoke( + "create", + obj, + base_dir=base_dir, + dir_only=dir_only, + extra_dir=extra_dir, + extra_dir_at_root=extra_dir_at_root, + alt_name=alt_name, + obj_dir=obj_dir, + ) - def empty(self, obj, **kwargs): - return self._invoke("empty", obj, **kwargs) + def empty(self, obj, base_dir=None, extra_dir=None, extra_dir_at_root=False, alt_name=None, obj_dir=False): + return self._invoke( + "empty", + obj, + base_dir=base_dir, + extra_dir=extra_dir, + extra_dir_at_root=extra_dir_at_root, + alt_name=alt_name, + obj_dir=obj_dir, + ) - def size(self, obj, **kwargs): - return self._invoke("size", obj, **kwargs) + def size(self, obj, extra_dir=None, extra_dir_at_root=False, alt_name=None, obj_dir=False) -> int: + return self._invoke( + "size", obj, extra_dir=extra_dir, extra_dir_at_root=extra_dir_at_root, alt_name=alt_name, obj_dir=obj_dir + ) - def delete(self, obj, **kwargs): - return self._invoke("delete", obj, **kwargs) + def delete( + self, + obj, + entire_dir=False, + base_dir=None, + extra_dir=None, + extra_dir_at_root=False, + alt_name=None, + obj_dir=False, + ): + return self._invoke( + "delete", + obj, + entire_dir=entire_dir, + base_dir=base_dir, + extra_dir=extra_dir, + extra_dir_at_root=extra_dir_at_root, + alt_name=alt_name, + obj_dir=obj_dir, + ) - def get_data(self, obj, **kwargs): - return self._invoke("get_data", obj, **kwargs) + def get_data( + self, + obj, + start=0, + count=-1, + base_dir=None, + extra_dir=None, + extra_dir_at_root=False, + alt_name=None, + obj_dir=False, + ): + return self._invoke( + "get_data", + obj, + start=start, + count=count, + base_dir=base_dir, + extra_dir=extra_dir, + extra_dir_at_root=extra_dir_at_root, + alt_name=alt_name, + obj_dir=obj_dir, + ) - def get_filename(self, obj, **kwargs): - return self._invoke("get_filename", obj, **kwargs) + def get_filename( + self, + obj, + base_dir=None, + dir_only=False, + extra_dir=None, + extra_dir_at_root=False, + alt_name=None, + obj_dir=False, + sync_cache: bool = True, + ) -> str: + return self._invoke( + "get_filename", + obj, + base_dir=base_dir, + dir_only=dir_only, + extra_dir=extra_dir, + extra_dir_at_root=extra_dir_at_root, + alt_name=alt_name, + obj_dir=obj_dir, + sync_cache=sync_cache, + ) - def update_from_file(self, obj, **kwargs): - return self._invoke("update_from_file", obj, **kwargs) + def update_from_file( + self, + obj, + base_dir=None, + extra_dir=None, + extra_dir_at_root=False, + alt_name=None, + obj_dir=False, + file_name=None, + create: bool = False, + preserve_symlinks: bool = False, + ) -> None: + return self._invoke( + "update_from_file", + obj, + base_dir=base_dir, + extra_dir=extra_dir, + extra_dir_at_root=extra_dir_at_root, + alt_name=alt_name, + obj_dir=obj_dir, + file_name=file_name, + create=create, + preserve_symlinks=preserve_symlinks, + ) - def get_object_url(self, obj, **kwargs): - return self._invoke("get_object_url", obj, **kwargs) + def get_object_url(self, obj, extra_dir=None, extra_dir_at_root=False, alt_name=None, obj_dir=False): + return self._invoke( + "get_object_url", + obj, + extra_dir=extra_dir, + extra_dir_at_root=extra_dir_at_root, + alt_name=alt_name, + obj_dir=obj_dir, + ) def get_concrete_store_name(self, obj): return self._invoke("get_concrete_store_name", obj) @@ -495,7 +641,7 @@ def get_store_usage_percent(self): def get_store_by(self, obj, **kwargs): return self._invoke("get_store_by", obj, **kwargs) - def is_private(self, obj): + def is_private(self, obj) -> bool: return self._invoke("is_private", obj) def cache_targets(self) -> List[CacheTarget]: @@ -609,7 +755,7 @@ def _get_concrete_store_description_markdown(self, obj): def _get_store_by(self, obj): return self.store_by - def _is_private(self, obj): + def _is_private(self, obj) -> bool: return self.private @property @@ -708,7 +854,7 @@ def to_dict(self): def __get_filename( self, obj, base_dir=None, dir_only=False, extra_dir=None, extra_dir_at_root=False, alt_name=None, obj_dir=False - ): + ) -> str: """ Return the absolute path for the file corresponding to the `obj.id`. @@ -726,15 +872,16 @@ def __get_filename( ) # For backward compatibility: check the old style root path first; # otherwise construct hashed path. - if not os.path.exists(path): - return self._construct_path( - obj, - base_dir=base_dir, - dir_only=dir_only, - extra_dir=extra_dir, - extra_dir_at_root=extra_dir_at_root, - alt_name=alt_name, - ) + if os.path.exists(path): + return path + return self._construct_path( + obj, + base_dir=base_dir, + dir_only=dir_only, + extra_dir=extra_dir, + extra_dir_at_root=extra_dir_at_root, + alt_name=alt_name, + ) # TODO: rename to _disk_path or something like that to avoid conflicts with # children that'll use the local_extra_dirs decorator, e.g. S3 @@ -749,7 +896,7 @@ def _construct_path( alt_name=None, obj_dir=False, **kwargs, - ): + ) -> str: """ Construct the absolute path for accessing the object identified by `obj.id`. @@ -816,7 +963,7 @@ def _construct_path( path = os.path.join(path, alt_name if alt_name else f"dataset_{obj_id}.dat") return os.path.abspath(path) - def _exists(self, obj, **kwargs): + def _exists(self, obj, **kwargs) -> bool: """Override `ObjectStore`'s stub and check on disk.""" if self.check_old_style: path = self._construct_path(obj, old_style=True, **kwargs) @@ -843,7 +990,7 @@ def _create(self, obj, **kwargs): def _empty(self, obj, **kwargs): """Override `ObjectStore`'s stub by checking file size on disk.""" - return self.size(obj, **kwargs) == 0 + return self._size(obj, **kwargs) == 0 def _size(self, obj, **kwargs) -> int: """Override `ObjectStore`'s stub by return file size on disk. @@ -865,7 +1012,7 @@ def _size(self, obj, **kwargs) -> int: else: return 0 - def _delete(self, obj, entire_dir=False, **kwargs): + def _delete(self, obj, entire_dir: bool = False, **kwargs) -> bool: """Override `ObjectStore`'s stub; delete the file or folder on disk.""" path = self._get_filename(obj, **kwargs) extra_dir = kwargs.get("extra_dir", None) @@ -884,7 +1031,7 @@ def _delete(self, obj, entire_dir=False, **kwargs): # and another process writes files into that directory. # If the path doesn't exist anymore, another rmtree call was successful. path = self.__get_filename(obj, **kwargs) - if path is None: + if not os.path.exists(path): return True else: log.critical(f"{path} delete error {ex}", exc_info=True) @@ -898,7 +1045,7 @@ def _get_data(self, obj, start=0, count=-1, **kwargs): data_file.close() return content - def _get_filename(self, obj, **kwargs): + def _get_filename(self, obj, **kwargs) -> str: """ Override `ObjectStore`'s stub. @@ -916,7 +1063,7 @@ def _get_filename(self, obj, **kwargs): raise ObjectNotFound return path - def _update_from_file(self, obj, file_name=None, create=False, **kwargs): + def _update_from_file(self, obj, file_name=None, create: bool = False, **kwargs) -> None: """`create` parameter is not used in this implementation.""" preserve_symlinks = kwargs.pop("preserve_symlinks", False) # FIXME: symlinks and the object store model may not play well together @@ -1004,11 +1151,11 @@ def _get_data(self, obj, **kwargs): """For the first backend that has this `obj`, get data from it.""" return self._call_method("_get_data", obj, ObjectNotFound, True, **kwargs) - def _get_filename(self, obj, **kwargs): + def _get_filename(self, obj, **kwargs) -> str: """For the first backend that has this `obj`, get its filename.""" return self._call_method("_get_filename", obj, ObjectNotFound, True, **kwargs) - def _update_from_file(self, obj, **kwargs): + def _update_from_file(self, obj, **kwargs) -> None: """For the first backend that has this `obj`, update it from the given file.""" if kwargs.get("create", False): self._create(obj, **kwargs) @@ -1028,7 +1175,7 @@ def _get_concrete_store_description_markdown(self, obj): def _get_concrete_store_badges(self, obj) -> List[BadgeDict]: return self._call_method("_get_concrete_store_badges", obj, [], False) - def _is_private(self, obj): + def _is_private(self, obj) -> bool: return self._call_method("_is_private", obj, False, False) def _get_store_by(self, obj): @@ -1045,7 +1192,7 @@ def _repr_object_for_exception(self, obj): def _call_method(self, method, obj, default, default_is_exception, **kwargs): """Check all children object stores for the first one with the dataset.""" for store in self.backends.values(): - if store.exists(obj, **kwargs): + if store._exists(obj, **kwargs): return store.__getattribute__(method)(obj, **kwargs) if default_is_exception: raise default( @@ -1109,22 +1256,22 @@ def __init__( user_selection_allowed = [] for backend_def in config_dict["backends"]: - backened_id = backend_def["id"] + backend_id = backend_def["id"] maxpctfull = backend_def.get("max_percent_full", 0) weight = backend_def["weight"] allow_selection = backend_def.get("allow_selection") if allow_selection: - user_selection_allowed.append(backened_id) + user_selection_allowed.append(backend_id) backend = build_object_store_from_config(config, config_dict=backend_def, fsmon=fsmon) - self.backends[backened_id] = backend - self.max_percent_full[backened_id] = maxpctfull + self.backends[backend_id] = backend + self.max_percent_full[backend_id] = maxpctfull for _ in range(0, weight): # The simplest way to do weighting: add backend ids to a # sequence the number of times equalling weight, then randomly # choose a backend from that sequence at creation - self.weighted_backend_ids.append(backened_id) + self.weighted_backend_ids.append(backend_id) self.original_weighted_backend_ids = self.weighted_backend_ids self.user_object_store_resolver = user_object_store_resolver @@ -1244,7 +1391,7 @@ def __filesystem_monitor(self, sleeper: Sleeper): self.weighted_backend_ids = new_weighted_backend_ids sleeper.sleep(120) # Test free space every 2 minutes - def _construct_path(self, obj, **kwargs): + def _construct_path(self, obj, **kwargs) -> str: return self._resolve_backend(obj.object_store_id).construct_path(obj, **kwargs) def _create(self, obj, **kwargs): @@ -1340,7 +1487,7 @@ def __get_store_id_for(self, obj, **kwargs): # distributed object store, or if the object's store id is invalid, # try to locate the object for id, store in self.backends.items(): - if store.exists(obj, **kwargs): + if store._exists(obj, **kwargs): log.warning( f"{obj.__class__.__name__} object with ID {obj.id} found in backend object store with ID {id}" ) @@ -1401,7 +1548,7 @@ def __init__(self, config, config_dict, fsmon=False): """The default constructor. Extends `NestedObjectStore`.""" super().__init__(config, config_dict) - backends: Dict[int, ObjectStore] = {} + backends: Dict[int, BaseObjectStore] = {} is_private = config_dict.get("private", DEFAULT_PRIVATE) for order, backend_def in enumerate(config_dict["backends"]): backend_is_private = backend_def.get("private") @@ -1457,14 +1604,14 @@ def _exists(self, obj, **kwargs): return True return False - def _construct_path(self, obj, **kwargs): + def _construct_path(self, obj, **kwargs) -> str: return self.backends[0].construct_path(obj, **kwargs) def _create(self, obj, **kwargs): """Call the primary object store.""" return self.backends[0].create(obj, **kwargs) - def _is_private(self, obj): + def _is_private(self, obj) -> bool: # Unlink the DistributedObjectStore - the HierarchicalObjectStore does not use # object_store_id - so all the contained object stores need to define is_private # the same way. @@ -1840,17 +1987,16 @@ def __init__(self, has_object_store, user): self.object_store_id = None self.user = user - def set_object_store_id(self, data, require_shareable=False): + def set_object_store_id(self, data: "DatasetInstance", require_shareable: bool = False) -> None: self.set_dataset_object_store_id(data.dataset, require_shareable=require_shareable) - def set_dataset_object_store_id(self, dataset, require_shareable=True): + def set_dataset_object_store_id(self, dataset: "Dataset", require_shareable: bool = True) -> None: # Create an empty file immediately. The first dataset will be # created in the "default" store, all others will be created in # the same store as the first. dataset.object_store_id = self.object_store_id try: - ensure_non_private = require_shareable - concrete_store = self.object_store.create(dataset, ensure_non_private=ensure_non_private) + concrete_store = self.object_store.create(dataset) if concrete_store.private and require_shareable: raise ObjectCreationProblemSharingDisabled() except ObjectInvalid: diff --git a/test/unit/app/tools/test_metadata.py b/test/unit/app/tools/test_metadata.py index 7e1784a84caa..8f83c6b90ef6 100644 --- a/test/unit/app/tools/test_metadata.py +++ b/test/unit/app/tools/test_metadata.py @@ -18,7 +18,7 @@ class TestMetadata(TestCase, tools_support.UsesTools): def setUp(self): super().setUp() self.setup_app() - model.Dataset.object_store = self.app.object_store # type: ignore[assignment] + model.Dataset.object_store = self.app.object_store job = model.Job() sa_session = self.app.model.session sa_session.add(job)