Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add stat function to datasets #164

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 46 additions & 0 deletions faculty/datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,52 @@ def ls(prefix="/", project_id=None, show_hidden=False, object_client=None):
return non_hidden_paths


def stat(prefix="/", project_id=None, show_hidden=False, object_client=None):
"""Get the status metadata for the contents of project datasets.

Parameters
----------
prefix : str, optional
List only files in the datasets matching this prefix. Default behaviour
is to list all files.
project_id : str, optional
The project to list files from. You need to have access to this project
for it to work. Defaults to the project set by FACULTY_PROJECT_ID in
your environment.
show_hidden : bool, optional
Include hidden files in the output. Defaults to False.
object_client : faculty.clients.object.ObjectClient, optional
Advanced - can be used to benefit from caching in chain interactions
with datasets.

Returns
-------
list
The list of file metadata from the project datasets.
"""

project_id = project_id or get_context().project_id
object_client = object_client or ObjectClient(get_session())
list_response = object_client.list(project_id, prefix)

metadata = list_response.objects
while list_response.next_page_token is not None:
list_response = object_client.list(
project_id, prefix, list_response.next_page_token
)
metadata += list_response.objects

if not show_hidden:
metadata = [
data
for data in metadata
if not any(
element.startswith(".") for element in data.path.split("/")
)
]
return metadata


def glob(
pattern, prefix="/", project_id=None, show_hidden=False, object_client=None
):
Expand Down