From cdde21604f74ca18e694f0bffe58b6f21751cf56 Mon Sep 17 00:00:00 2001 From: mvdbeek Date: Thu, 7 Mar 2024 12:32:43 +0100 Subject: [PATCH] Add basic model import attribute validation And coerce invalid `deleted` dataset state to `discarded`. --- lib/galaxy/model/store/__init__.py | 44 ++++++++++++++++++------------ lib/galaxy/schema/schema.py | 21 +++++++------- test/unit/schema/test_schema.py | 12 ++++++++ 3 files changed, 50 insertions(+), 27 deletions(-) diff --git a/lib/galaxy/model/store/__init__.py b/lib/galaxy/model/store/__init__.py index 38564174ab34..bdd773946384 100644 --- a/lib/galaxy/model/store/__init__.py +++ b/lib/galaxy/model/store/__init__.py @@ -34,6 +34,10 @@ from bdbag import bdbag_api as bdb from boltons.iterutils import remap +from pydantic import ( + BaseModel, + ConfigDict, +) from rocrate.model.computationalworkflow import ( ComputationalWorkflow, WorkflowDescription, @@ -96,7 +100,10 @@ get_contributors, write_to_file, ) -from galaxy.schema.schema import ModelStoreFormat +from galaxy.schema.schema import ( + DatasetStateField, + ModelStoreFormat, +) from galaxy.security.idencoding import IdEncodingHelper from galaxy.util import ( FILENAME_VALID_CHARS, @@ -174,6 +181,20 @@ class ImportDiscardedDataType(Enum): FORCE = "force" +class DatasetAttributeImportModel(BaseModel): + state: Optional[DatasetStateField] = None + deleted: Optional[bool] = None + purged: Optional[bool] = None + external_filename: Optional[str] = None + _extra_files_path: Optional[str] = None + file_size: Optional[int] = None + object_store_id: Optional[str] = None + total_size: Optional[int] = None + created_from_basename: Optional[str] = None + uuid: Optional[str] = None + model_config = ConfigDict(extra="ignore") + + DEFAULT_DISCARDED_DATA_TYPE = ImportDiscardedDataType.FORBID @@ -444,22 +465,11 @@ def _import_datasets( def handle_dataset_object_edit(dataset_instance, dataset_attrs): if "dataset" in dataset_attrs: assert self.import_options.allow_dataset_object_edit - dataset_attributes = [ - "state", - "deleted", - "purged", - "external_filename", - "_extra_files_path", - "file_size", - "object_store_id", - "total_size", - "created_from_basename", - "uuid", - ] - - for attribute in dataset_attributes: - if attribute in dataset_attrs["dataset"]: - setattr(dataset_instance.dataset, attribute, dataset_attrs["dataset"][attribute]) + dataset_attributes = DatasetAttributeImportModel(**dataset_attrs["dataset"]).model_dump( + exclude_unset=True, + ) + for attribute, value in dataset_attributes.items(): + setattr(dataset_instance.dataset, attribute, value) self._attach_dataset_hashes(dataset_attrs["dataset"], dataset_instance) self._attach_dataset_sources(dataset_attrs["dataset"], dataset_instance) if "id" in dataset_attrs["dataset"] and self.import_options.allow_edit: diff --git a/lib/galaxy/schema/schema.py b/lib/galaxy/schema/schema.py index 1b8c0bd37a41..ec96b40fede0 100644 --- a/lib/galaxy/schema/schema.py +++ b/lib/galaxy/schema/schema.py @@ -20,6 +20,7 @@ AnyHttpUrl, AnyUrl, BaseModel, + BeforeValidator, ConfigDict, Field, Json, @@ -151,11 +152,11 @@ class DatasetCollectionPopulatedState(str, Enum): JobId = Annotated[EncodedDatabaseIdField, Field(..., title="Job ID")] -DatasetStateField: DatasetState = Field( - ..., - title="State", - description="The current state of this dataset.", -) +DatasetStateField = Annotated[ + DatasetState, + BeforeValidator(lambda v: "discarded" if v == "deleted" else v), + Field(..., title="State", description="The current state of this dataset."), +] CreateTimeField = Field( title="Create Time", @@ -661,7 +662,7 @@ class HDASummary(HDACommon): title="Dataset ID", description="The encoded ID of the dataset associated with this item.", ) - state: DatasetState = DatasetStateField + state: DatasetStateField extension: Optional[str] = Field( ..., title="Extension", @@ -679,7 +680,7 @@ class HDAInaccessible(HDACommon): """History Dataset Association information when the user can not access it.""" accessible: bool = AccessibleField - state: DatasetState = DatasetStateField + state: DatasetStateField HdaLddaField = Field( @@ -872,7 +873,7 @@ class HDAObject(Model, WithModelClass): # If so at least merge models id: HistoryDatasetAssociationId model_class: HDA_MODEL_CLASS = ModelClassField(HDA_MODEL_CLASS) - state: DatasetState = DatasetStateField + state: DatasetStateField hda_ldda: DatasetSourceType = HdaLddaField history_id: HistoryID tags: List[str] @@ -3080,7 +3081,7 @@ class FileLibraryFolderItem(LibraryFolderItemBase): date_uploaded: datetime is_unrestricted: bool is_private: bool - state: DatasetState = DatasetStateField + state: DatasetStateField file_size: str raw_size: int ldda_id: EncodedDatabaseIdField @@ -3650,7 +3651,7 @@ class DatasetSummary(Model): id: EncodedDatabaseIdField create_time: Optional[datetime] = CreateTimeField update_time: Optional[datetime] = UpdateTimeField - state: DatasetState = DatasetStateField + state: DatasetStateField deleted: bool purged: bool purgable: bool diff --git a/test/unit/schema/test_schema.py b/test/unit/schema/test_schema.py index 9d153dbb18a6..570469ed6fc2 100644 --- a/test/unit/schema/test_schema.py +++ b/test/unit/schema/test_schema.py @@ -1,5 +1,8 @@ from uuid import uuid4 +from pydantic import BaseModel + +from galaxy.schema.schema import DatasetStateField from galaxy.schema.tasks import ( GenerateInvocationDownload, RequestUser, @@ -22,3 +25,12 @@ def test_task_schema(): assert rehydrated_download.invocation_id == TEST_INVOCATION_ID assert rehydrated_download.user.user_id == TEST_USER_ID assert rehydrated_download.galaxy_url == TEST_GALAXY_URL + + +class StateModel(BaseModel): + state: DatasetStateField + + +def test_dataset_state_coercion(): + assert StateModel(state="ok").state == "ok" + assert StateModel(state="deleted").state == "discarded"