diff --git a/hypha/VERSION b/hypha/VERSION index 5285d88d..d1a4747f 100644 --- a/hypha/VERSION +++ b/hypha/VERSION @@ -1,3 +1,3 @@ { - "version": "0.20.38.post7" + "version": "0.20.38.post8" } diff --git a/hypha/artifact.py b/hypha/artifact.py index f6c2d9bb..ca27b262 100644 --- a/hypha/artifact.py +++ b/hypha/artifact.py @@ -59,9 +59,9 @@ class ArtifactModel(Base): ) # Store the weights for counting downloads; a dictionary of file paths and their weights 0-1 download_count = Column(Float, nullable=False, default=0.0) # New counter field view_count = Column(Float, nullable=False, default=0.0) # New counter field - public = Column( - Boolean, nullable=True - ) # New field for public artifacts, None means it follows the parent artifact + permissions = Column( + JSON, nullable=True + ) # New field for storing permissions for the artifact __table_args__ = ( UniqueConstraint("workspace", "prefix", name="_workspace_prefix_uc"), ) @@ -96,19 +96,19 @@ async def get_artifact( ): """Get artifact from the database.""" try: - artifact, manifest = await self._read_manifest( - workspace, path, stage=stage + artifact = await self._get_artifact_with_permission( + workspace, user_info, path, "read" + ) + manifest = await self._read_manifest( + artifact, stage=stage, increment_view_count=True ) - if not artifact.public: - if not user_info.check_permission(workspace, UserPermission.read): - raise PermissionError( - "User does not have read permission to the non-public artifact." - ) return manifest except KeyError as e: raise HTTPException(status_code=404, detail=str(e)) except PermissionError as e: raise HTTPException(status_code=403, detail=str(e)) + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) self.store.set_artifact_manager(self) self.store.register_public_service(self.get_artifact_service()) @@ -168,24 +168,15 @@ def end_transaction(session, transaction): return session - async def _read_manifest(self, workspace, prefix, stage=False): - session = await self._get_session() + async def _read_manifest(self, artifact, stage=False, increment_view_count=False): + manifest = artifact.stage_manifest if stage else artifact.manifest + prefix = artifact.prefix + workspace = artifact.workspace + if not manifest: + raise KeyError(f"No manifest found for artifact '{prefix}'.") try: + session = await self._get_session() async with session.begin(): - query = select(ArtifactModel).filter( - ArtifactModel.workspace == workspace, - ArtifactModel.prefix == prefix, - ) - result = await session.execute(query) - artifact = result.scalar_one_or_none() - - if not artifact: - raise KeyError(f"Artifact under prefix '{prefix}' does not exist.") - - manifest = artifact.stage_manifest if stage else artifact.manifest - if not manifest: - raise KeyError(f"No manifest found for artifact '{prefix}'.") - # If the artifact is a collection, dynamically populate the 'collection' field if manifest.get("type") == "collection": sub_prefix = f"{prefix}/" @@ -213,8 +204,8 @@ async def _read_manifest(self, workspace, prefix, stage=False): manifest["collection"] = collection if stage: - manifest["stage_files"] = artifact.stage_files - else: + manifest["_stage_files"] = artifact.stage_files + elif increment_view_count: # increase view count stmt = ( update(ArtifactModel) @@ -231,7 +222,7 @@ async def _read_manifest(self, workspace, prefix, stage=False): } if manifest.get("type") == "collection": manifest["_stats"]["child_count"] = len(collection) - return artifact, manifest + return manifest except Exception as e: raise e finally: @@ -245,25 +236,177 @@ async def _get_artifact(self, session, workspace, prefix): result = await session.execute(query) return result.scalar_one_or_none() + def _expand_permission(self, permission): + """Get the alias for the operation.""" + if isinstance(permission, str): + if permission == "r": + return ["read", "get_file", "list_files", "search"] + elif permission == "r+": + return [ + "read", + "get_file", + "put_file", + "list_files", + "search", + "create", + "commit", + ] + elif permission == "rw": + return [ + "read", + "get_file", + "list_files", + "search", + "edit", + "commit", + "put_file", + "remove_file", + ] + elif permission == "rw+": + return [ + "read", + "get_file", + "list_files", + "search", + "edit", + "commit", + "put_file", + "remove_file", + "create", + ] + elif permission == "*": + return [ + "read", + "get_file", + "list_files", + "search", + "edit", + "commit", + "put_file", + "remove_file", + "create", + "reset_stats", + ] + else: + raise ValueError(f"Permission '{permission}' is not supported.") + else: + return permission + + async def _get_artifact_with_permission( + self, workspace, user_info, prefix, operation + ): + """ + Check whether a user has the required permission to perform an operation on an artifact. + :param workspace: The workspace where the artifact is located. + :param user_info: The user info object, including user permissions. + :param prefix: The prefix of the artifact being accessed. + :param operation: The operation being checked ('read', 'write', etc.). + :return: True if the user has permission, False otherwise. + """ + + # Map the operation to the corresponding UserPermission enum + if operation in ["read", "get_file", "list_files", "search"]: # list + perm = UserPermission.read + elif operation in ["create", "edit", "commit", "put_file", "remove_file"]: + perm = UserPermission.read_write + elif operation in ["delete", "reset_stats"]: + perm = UserPermission.admin + else: + raise ValueError(f"Operation '{operation}' is not supported.") + + session = await self._get_session(read_only=True) + try: + async with session.begin(): + artifact = await self._get_artifact(session, workspace, prefix) + if not artifact: + if operation != "create": + raise KeyError( + f"Artifact with prefix '{prefix}' does not exist." + ) + else: + # Step 1: Check artifact-specific permissions + # Check if there are specific permissions for this artifact + if artifact.permissions: + # If all users are allowed for this operation, return True + if "*" in artifact.permissions and ( + operation + in self._expand_permission(artifact.permissions["*"]) + ): + return artifact + + # Check if this specific user is in the permissions dictionary + if user_info.id in artifact.permissions and ( + operation + in self._expand_permission( + artifact.permissions[user_info.id] + ) + ): + return artifact + + # Step 2: Check workspace-level permission + if user_info.check_permission(workspace, perm): + return artifact + + # Step 3: Check parent artifact's permissions if no explicit permission defined + parent_prefix = "/".join(prefix.split("/")[:-1]) + depth = 5 + while parent_prefix: + parent_artifact = await self._get_artifact( + session, workspace, parent_prefix + ) + if parent_artifact: + # Check parent artifact permissions if they exist + if parent_artifact.permissions: + # If all users are allowed for this operation, return True + if "*" in parent_artifact.permissions and ( + operation + in self._expand_permission( + parent_artifact.permissions["*"] + ) + ): + return artifact + + # Check if this specific user is in the parent artifact permissions dictionary + if user_info.id in parent_artifact.permissions and ( + operation + in self._expand_permission( + parent_artifact.permissions[user_info.id] + ) + ): + return artifact + else: + break + # Move up the hierarchy to check the next parent + parent_prefix = "/".join(parent_prefix.split("/")[:-1]) + depth -= 1 + if depth <= 0: + break + + except Exception as e: + raise e + finally: + await session.close() + + raise PermissionError( + f"User does not have permission to perform the operation '{operation}' on the artifact." + ) + async def create( self, prefix, manifest: dict, overwrite=False, stage=False, - public=None, + permissions: dict = None, context: dict = None, ): """Create a new artifact and store its manifest in the database.""" if context is None or "ws" not in context: raise ValueError("Context must include 'ws' (workspace).") ws = context["ws"] - user_info = UserInfo.model_validate(context["user"]) - if not user_info.check_permission(ws, UserPermission.read_write): - raise PermissionError( - "User does not have write permission to the workspace." - ) + + await self._get_artifact_with_permission(ws, user_info, prefix, "create") # Validate based on the type of manifest if manifest["type"] == "collection": @@ -280,15 +423,16 @@ async def create( session = await self._get_session() try: async with session.begin(): - if public is None: - # if public is not set, try to use the parent artifact's public setting - parent_prefix = "/".join(prefix.split("/")[:-1]) - if parent_prefix: - parent_artifact = await self._get_artifact( - session, ws, parent_prefix - ) - if parent_artifact: - public = parent_artifact.public + # if permissions is not set, try to use the parent artifact's permissions setting + parent_prefix = "/".join(prefix.split("/")[:-1]) + if parent_prefix: + parent_artifact = await self._get_artifact( + session, ws, parent_prefix + ) + if parent_artifact and permissions is None: + permissions = parent_artifact.permissions + else: + parent_artifact = None existing_artifact = await self._get_artifact(session, ws, prefix) @@ -303,6 +447,8 @@ async def create( await session.commit() async with session.begin(): + permissions = permissions or {} + permissions[user_info.id] = "*" new_artifact = ArtifactModel( workspace=ws, prefix=prefix, @@ -310,7 +456,7 @@ async def create( stage_manifest=manifest if stage else None, stage_files=[] if stage else None, download_weights=None, - public=public, + permissions=permissions, type=manifest["type"], ) session.add(new_artifact) @@ -330,17 +476,12 @@ async def reset_stats(self, prefix, context: dict): ws = context["ws"] user_info = UserInfo.model_validate(context["user"]) - if not user_info.check_permission(ws, UserPermission.read_write): - raise PermissionError( - "User does not have write permission to the workspace." - ) - + artifact = await self._get_artifact_with_permission( + ws, user_info, prefix, "reset_stats" + ) session = await self._get_session() try: async with session.begin(): - artifact = await self._get_artifact(session, ws, prefix) - if not artifact: - raise KeyError(f"Artifact under prefix '{prefix}' does not exist.") stmt = ( update(ArtifactModel) .where(ArtifactModel.id == artifact.id) @@ -362,28 +503,26 @@ async def read(self, prefix, stage=False, context: dict = None): ws = context["ws"] user_info = UserInfo.model_validate(context["user"]) - artifact, manifest = await self._read_manifest(ws, prefix, stage=stage) - - if not artifact.public and not user_info.check_permission( - ws, UserPermission.read - ): - raise PermissionError( - "User does not have read permission to the workspace." - ) - + artifact = await self._get_artifact_with_permission( + ws, user_info, prefix, "read" + ) + manifest = await self._read_manifest( + artifact, stage=stage, increment_view_count=True + ) return manifest - async def edit(self, prefix, manifest=None, context: dict = None): + async def edit( + self, prefix, manifest=None, permissions: dict = None, context: dict = None + ): """Edit the artifact's manifest and save it in the database.""" if context is None or "ws" not in context: raise ValueError("Context must include 'ws' (workspace).") ws = context["ws"] user_info = UserInfo.model_validate(context["user"]) - if not user_info.check_permission(ws, UserPermission.read_write): - raise PermissionError( - "User does not have write permission to the workspace." - ) + artifact = await self._get_artifact_with_permission( + ws, user_info, prefix, "edit" + ) if manifest: # Validate the manifest @@ -401,13 +540,13 @@ async def edit(self, prefix, manifest=None, context: dict = None): session = await self._get_session() try: async with session.begin(): - artifact = await self._get_artifact(session, ws, prefix) - if not artifact: - raise KeyError(f"Artifact under prefix '{prefix}' does not exist.") if manifest is None: manifest = copy.deepcopy(artifact.manifest) artifact.stage_manifest = manifest flag_modified(artifact, "stage_manifest") # Mark JSON field as modified + if permissions is not None: + artifact.permissions = permissions + flag_modified(artifact, "permissions") session.add(artifact) await session.commit() logger.info(f"Edited artifact under prefix: {prefix}") @@ -423,20 +562,13 @@ async def commit(self, prefix, context: dict): ws = context["ws"] user_info = UserInfo.model_validate(context["user"]) - if not user_info.check_permission(ws, UserPermission.read_write): - raise PermissionError( - "User does not have write permission to the workspace." - ) + artifact = await self._get_artifact_with_permission( + ws, user_info, prefix, "commit" + ) session = await self._get_session() try: async with session.begin(): - artifact = await self._get_artifact(session, ws, prefix) - if not artifact or not artifact.stage_manifest: - raise KeyError( - f"No staged manifest to commit for artifact '{prefix}'." - ) - manifest = artifact.stage_manifest download_weights = {} @@ -498,18 +630,14 @@ async def delete(self, prefix, context: dict): ws = context["ws"] user_info = UserInfo.model_validate(context["user"]) - if not user_info.check_permission(ws, UserPermission.read_write): - raise PermissionError( - "User does not have write permission to the workspace." - ) - + artifact = await self._get_artifact_with_permission( + ws, user_info, prefix, "delete" + ) session = await self._get_session() try: async with session.begin(): - artifact = await self._get_artifact(session, ws, prefix) - if artifact: - await session.delete(artifact) - await session.commit() + await session.delete(artifact) + await session.commit() logger.info(f"Deleted artifact under prefix: {prefix}") except Exception as e: raise e @@ -533,13 +661,7 @@ async def list_files( raise ValueError("Context must include 'ws' (workspace).") ws = context["ws"] user_info = UserInfo.model_validate(context["user"]) - artifact, _ = await self._read_manifest(ws, prefix, stage=stage) - if not artifact.public and not user_info.check_permission( - ws, UserPermission.read - ): - raise PermissionError( - "User does not have read permission to the workspace." - ) + await self._get_artifact_with_permission(ws, user_info, prefix, "list_files") async with self.s3_controller.create_client_async() as s3_client: full_path = safe_join(ws, prefix) + "/" items = await list_objects_async( @@ -553,23 +675,11 @@ async def list_artifacts(self, prefix="", context: dict = None): if context is None or "ws" not in context: raise ValueError("Context must include 'ws' (workspace).") ws = context["ws"] - user_info = UserInfo.model_validate(context["user"]) - try: - artifact, _ = await self._read_manifest(ws, prefix) - if not artifact.public and not user_info.check_permission( - ws, UserPermission.read - ): - raise PermissionError( - "User does not have read permission to the workspace." - ) - except KeyError: - # the prefix is not a valid artifact, check if the user has permission to list the artifacts - if not user_info.check_permission(ws, UserPermission.read): - raise PermissionError( - "User does not have read permission to the workspace." - ) - + if not user_info.check_permission(ws, UserPermission.read): + raise PermissionError( + "User does not have read permission to the workspace." + ) session = await self._get_session() try: async with session.begin(): @@ -608,14 +718,9 @@ async def search( ws = context["ws"] user_info = UserInfo.model_validate(context["user"]) - artifact, _ = await self._read_manifest(ws, prefix) - if not artifact.public and not user_info.check_permission( - ws, UserPermission.read - ): - raise PermissionError( - "User does not have read permission to the workspace." - ) - + artifact = await self._get_artifact_with_permission( + ws, user_info, prefix, "search" + ) session = await self._get_session(read_only=True) try: async with session.begin(): @@ -695,10 +800,9 @@ async def put_file( """Generate a pre-signed URL to upload a file to an artifact in S3 and update the manifest.""" ws = context["ws"] user_info = UserInfo.model_validate(context["user"]) - if not user_info.check_permission(ws, UserPermission.read_write): - raise PermissionError( - "User does not have write permission to the workspace." - ) + artifact = await self._get_artifact_with_permission( + ws, user_info, prefix, "put_file" + ) options = options or {} @@ -715,10 +819,6 @@ async def put_file( session = await self._get_session() try: async with session.begin(): - artifact = await self._get_artifact(session, ws, prefix) - if not artifact or not artifact.stage_manifest: - raise KeyError(f"No staged manifest found for artifact '{prefix}'.") - artifact.stage_files = artifact.stage_files or [] if not any(f["path"] == file_path for f in artifact.stage_files): @@ -745,14 +845,9 @@ async def get_file(self, prefix, path, options: dict = None, context: dict = Non ws = context["ws"] user_info = UserInfo.model_validate(context["user"]) - artifact, _ = await self._read_manifest(ws, prefix) - if not artifact.public and not user_info.check_permission( - ws, UserPermission.read - ): - raise PermissionError( - "User does not have read permission to the workspace." - ) - + artifact = await self._get_artifact_with_permission( + ws, user_info, prefix, "get_file" + ) async with self.s3_controller.create_client_async() as s3_client: file_key = safe_join(ws, f"{prefix}/{path}") presigned_url = await s3_client.generate_presigned_url( @@ -795,10 +890,9 @@ async def remove_file(self, prefix, file_path, context: dict): ws = context["ws"] user_info = UserInfo.model_validate(context["user"]) - if not user_info.check_permission(ws, UserPermission.read_write): - raise PermissionError( - "User does not have write permission to the workspace." - ) + artifact = await self._get_artifact_with_permission( + ws, user_info, prefix, "remove_file" + ) session = await self._get_session() try: @@ -845,13 +939,13 @@ def get_artifact_service(self): "create": self.create, "reset_stats": self.reset_stats, "edit": self.edit, - "read": self.read, # accessible to public if the artifact is public + "read": self.read, "commit": self.commit, "delete": self.delete, "put_file": self.put_file, "remove_file": self.remove_file, - "get_file": self.get_file, # accessible to public if the artifact is public - "list": self.list_artifacts, # accessible to public if the artifact is public - "search": self.search, # accessible to public if the artifact is public - "list_files": self.list_files, # accessible to public if the artifact is public + "get_file": self.get_file, + "list": self.list_artifacts, + "search": self.search, + "list_files": self.list_files, } diff --git a/hypha/core/store.py b/hypha/core/store.py index e3dc8a81..bc327451 100644 --- a/hypha/core/store.py +++ b/hypha/core/store.py @@ -254,30 +254,62 @@ async def upgrade(self): } ) - # SQL database upgrade to add 'public' column - + # SQL database upgrade to replace 'public' column with 'permissions' column def _upgrade_schema_if_needed(conn): - """Run schema upgrade by adding the 'public' column if it doesn't exist.""" + """Run schema upgrade by replacing the 'public' column with 'permissions' column.""" inspector = inspect(conn) columns = inspector.get_columns("artifacts") + # Check if 'permissions' column exists + if "permissions" not in [col["name"] for col in columns]: + logger.info("Adding 'permissions' column to 'artifacts' table.") + + # Step 1: Add 'permissions' column (JSON type) + conn.execute(text("ALTER TABLE artifacts ADD COLUMN permissions JSON")) + logger.info( + "Successfully added 'permissions' column to 'artifacts' table." + ) + else: + logger.info("'permissions' column already exists in 'artifacts' table.") + # Check if 'public' column exists - if "public" not in [col["name"] for col in columns]: - logger.info("Adding 'public' column to 'artifacts' table.") - # Alter the 'artifacts' table to add the 'public' column (no 'await' since this is synchronous) - conn.execute(text("ALTER TABLE artifacts ADD COLUMN public BOOLEAN")) - logger.info("Successfully added 'public' column to 'artifacts' table.") + if "public" in [col["name"] for col in columns]: + logger.info( + "Replacing 'public' column with 'permissions' column in 'artifacts' table." + ) + + # Step 2: Update existing records: if 'public' is True, set the permissions to {'*': "r+"} + conn.execute( + text( + """ + UPDATE artifacts + SET permissions = '{"*": "r+"}' + WHERE public IS TRUE + """ + ) + ) + logger.info( + "Updated 'permissions' column for records where 'public' is TRUE." + ) - # Log the change in the database_change_log + # Step 3: Drop the 'public' column + conn.execute(text("ALTER TABLE artifacts DROP COLUMN public")) + logger.info( + "Successfully removed 'public' column from 'artifacts' table." + ) + + # Log the schema change database_change_log.append( { "time": datetime.datetime.now().isoformat(), "version": __version__, - "change": "Added 'public' column to 'artifacts' table", + "change": "Replaced 'public' column with 'permissions' column in 'artifacts' table", } ) else: - logger.info("'public' column already exists in 'artifacts' table.") + logger.info( + "'public' column does not exist in 'artifacts' table, no need for schema upgrade." + ) async with self._sql_engine.begin() as conn: try: diff --git a/tests/test_artifact.py b/tests/test_artifact.py index 6cac93b7..4a9c7633 100644 --- a/tests/test_artifact.py +++ b/tests/test_artifact.py @@ -46,7 +46,7 @@ async def test_serve_artifact_endpoint(minio_server, fastapi_server): await artifact_manager.create( prefix="public/collections/dataset-gallery", manifest=collection_manifest, - public=True, + permissions={"*": "r+"}, ) # Create an artifact inside the public collection @@ -60,7 +60,7 @@ async def test_serve_artifact_endpoint(minio_server, fastapi_server): prefix="public/collections/dataset-gallery/public-example-dataset", manifest=dataset_manifest, stage=True, - # public=True, # This is not necessary since the collection is already public + # permissions={"*": "r+"}, # This is not necessary since the collection is already public ) # Commit the artifact @@ -86,7 +86,7 @@ async def test_serve_artifact_endpoint(minio_server, fastapi_server): await artifact_manager.create( prefix="collections/private-dataset-gallery", manifest=private_collection_manifest, - public=False, + permissions={}, ) # Create an artifact inside the private collection