Skip to content

Commit

Permalink
Improve file listing
Browse files Browse the repository at this point in the history
  • Loading branch information
oeway committed Oct 28, 2024
1 parent 0f259c3 commit 20eeba6
Show file tree
Hide file tree
Showing 4 changed files with 55 additions and 12 deletions.
22 changes: 22 additions & 0 deletions docs/artifact-manager.md
Original file line number Diff line number Diff line change
Expand Up @@ -426,6 +426,25 @@ get_url = await artifact_manager.get_file(prefix="collections/dataset-gallery/ex

---

### `list_files(prefix: str, dir_path: str=None) -> list`

Lists all files in the artifact.

**Parameters:**

- `prefix`: The path of the artifact, it can be a prefix relative to the current workspace (e.g., `"collections/dataset-gallery/example-dataset"`) or an absolute prefix with the workspace id (e.g., `"/my_workspace_id/collections/dataset-gallery/example-dataset"`).
- `dir_path`: Optional. The directory path within the artifact to list files. Default is `None`.

**Returns:** A list of files in the artifact.

**Example:**

```python
files = await artifact_manager.list_files(prefix="collections/dataset-gallery/example-dataset")
```

---

### `read(prefix: str, stage: bool = False, silent: bool = False) -> dict`

Reads and returns the manifest of an artifact or collection. If in staging mode, reads from `_manifest.yaml`.
Expand Down Expand Up @@ -547,6 +566,7 @@ The `Artifact Manager` provides an HTTP endpoint for retrieving artifact manifes

- `/{workspace}/artifacts/{prefix:path}` for fetching the artifact manifest.
- `/{workspace}/artifacts/{prefix:path}/__children__` for listing all artifacts in a collection.
- `/{workspace}/artifacts/{prefix:path}/__files__` for listing all files in the artifact.
- `/{workspace}/artifacts/{prefix:path}/__files__/{file_path:path}` for downloading a file from the artifact (will be redirected to a pre-signed URL).

### Path Parameters:
Expand All @@ -572,6 +592,8 @@ For `/{workspace}/artifacts/{prefix:path}`, the response will be a JSON object r

For `/{workspace}/artifacts/{prefix:path}/__children__`, the response will be a list of artifacts in the collection.

For `/{workspace}/artifacts/{prefix:path}/__files__`, the response will be a list of files in the artifact, each file is a dictionary with the `name` and `type` fields.

For `/{workspace}/artifacts/{prefix:path}/__files__/{file_path:path}`, the response will be a pre-signed URL to download the file.

### Example: Fetching a public artifact with download statistics
Expand Down
2 changes: 1 addition & 1 deletion hypha/VERSION
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
{
"version": "0.20.38.post13"
"version": "0.20.38.post14"
}
41 changes: 31 additions & 10 deletions hypha/artifact.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,14 +118,21 @@ async def get_artifact(
)

if "/__files__/" in prefix:
prefix, file_path = prefix.split("/__files__/")
url = await self.get_file(
prefix,
file_path,
context={"ws": workspace, "user": user_info.model_dump()},
)
# Redirect to the pre-signed URL
return RedirectResponse(url=url)
prefix, path = prefix.split("/__files__/")
try:
url = await self.get_file(
prefix,
path,
context={"ws": workspace, "user": user_info.model_dump()},
)
# Redirect to the pre-signed URL
return RedirectResponse(url=url)
except FileNotFoundError as e:
return await self.list_files(
prefix,
path,
context={"ws": workspace, "user": user_info.model_dump()},
)

if prefix.endswith("/__children__"):
assert not stage, "Cannot list children of a staged artifact."
Expand Down Expand Up @@ -746,7 +753,11 @@ async def _delete_s3_files(self, ws, prefix):
await remove_objects_async(s3_client, self.workspace_bucket, artifact_path)

async def list_files(
self, prefix, max_length=1000, stage=False, context: dict = None
self,
prefix: str,
dir_path: str = None,
max_length: int = 1000,
context: dict = None,
):
"""List files in the specified S3 prefix."""
if context is None or "ws" not in context:
Expand All @@ -759,7 +770,10 @@ async def list_files(
user_info = UserInfo.model_validate(context["user"])
await self._get_artifact_with_permission(ws, user_info, prefix, "list_files")
async with self.s3_controller.create_client_async() as s3_client:
full_path = safe_join(ws, prefix) + "/"
if dir_path:
full_path = safe_join(ws, prefix, dir_path) + "/"
else:
full_path = safe_join(ws, prefix) + "/"
items = await list_objects_async(
s3_client, self.workspace_bucket, full_path, max_length=max_length
)
Expand Down Expand Up @@ -960,6 +974,13 @@ async def get_file(self, prefix, path, options: dict = None, context: dict = Non
)
async with self.s3_controller.create_client_async() as s3_client:
file_key = safe_join(ws, f"{prefix}/{path}")
# check if the file exists
try:
await s3_client.head_object(Bucket=self.workspace_bucket, Key=file_key)
except ClientError:
raise FileNotFoundError(
f"File '{path}' does not exist in the artifact."
)
presigned_url = await s3_client.generate_presigned_url(
"get_object",
Params={"Bucket": self.workspace_bucket, "Key": file_key},
Expand Down
2 changes: 1 addition & 1 deletion tests/test_artifact.py
Original file line number Diff line number Diff line change
Expand Up @@ -572,7 +572,7 @@ async def test_artifact_manager_with_collection(minio_server, fastapi_server):
assert manifest_data["id"] == "test-dataset"

files = await artifact_manager.list_files(
prefix="collections/test-collection/test-dataset", stage=True
prefix="collections/test-collection/test-dataset"
)
assert find_item(files, "name", "test.txt")

Expand Down

0 comments on commit 20eeba6

Please sign in to comment.