Skip to content

Commit

Permalink
drop owner_id and owner_name signals
Browse files Browse the repository at this point in the history
  • Loading branch information
skshetry authored and rlamy committed Sep 4, 2024
1 parent 4ac1ead commit d4e50ec
Show file tree
Hide file tree
Showing 16 changed files with 12 additions and 93 deletions.
6 changes: 0 additions & 6 deletions src/datachain/catalog/catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -544,8 +544,6 @@ def find_column_to_str( # noqa: PLR0911
)
if column == "name":
return posixpath.basename(row[field_lookup["path"]]) or ""
if column == "owner":
return row[field_lookup["owner_name"]] or ""
if column == "path":
is_dir = row[field_lookup["dir_type"]] == DirType.DIR
path = row[field_lookup["path"]]
Expand Down Expand Up @@ -735,8 +733,6 @@ def enlist_source(
Column("is_latest", Boolean),
Column("last_modified", DateTime(timezone=True)),
Column("size", Int64),
Column("owner_name", String),
Column("owner_id", String),
Column("location", JSON),
Column("source", String),
]
Expand Down Expand Up @@ -2187,8 +2183,6 @@ def find(
field_set.add("path")
elif column == "name":
field_set.add("path")
elif column == "owner":
field_set.add("owner_name")
elif column == "path":
field_set.add("dir_type")
field_set.add("path")
Expand Down
12 changes: 5 additions & 7 deletions src/datachain/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@

TTL_HUMAN = "4h"
TTL_INT = 4 * 60 * 60
FIND_COLUMNS = ["du", "name", "owner", "path", "size", "type"]
FIND_COLUMNS = ["du", "name", "path", "size", "type"]


def human_time_type(value_str: str, can_be_none: bool = False) -> Optional[int]:
Expand Down Expand Up @@ -578,9 +578,8 @@ def _node_data_to_ls_values(row, long_format=False):
value = name + ending
if long_format:
last_modified = row[2]
owner_name = row[3]
timestamp = last_modified if not is_dir else None
return long_line_str(value, timestamp, owner_name)
return long_line_str(value, timestamp)

Check warning on line 582 in src/datachain/cli.py

View check run for this annotation

Codecov / codecov/patch

src/datachain/cli.py#L582

Added line #L582 was not covered by tests
return value


Expand All @@ -598,15 +597,15 @@ def _ls_urls_flat(
if client_cls.is_root_url(source):
buckets = client_cls.ls_buckets(**catalog.client_config)
if long:
values = (long_line_str(b.name, b.created, "") for b in buckets)
values = (long_line_str(b.name, b.created) for b in buckets)
else:
values = (b.name for b in buckets)
yield source, values
else:
found = False
fields = ["name", "dir_type"]
if long:
fields.extend(["last_modified", "owner_name"])
fields.append("last_modified")

Check warning on line 608 in src/datachain/cli.py

View check run for this annotation

Codecov / codecov/patch

src/datachain/cli.py#L608

Added line #L608 was not covered by tests
for data_source, results in catalog.ls([source], fields=fields, **kwargs):
values = (_node_data_to_ls_values(r, long) for r in results)
found = True
Expand All @@ -622,7 +621,7 @@ def ls_indexed_storages(catalog: "Catalog", long: bool = False) -> Iterator[str]
if long:
for uri in storage_uris:
# TODO: add Storage.created so it can be used here
yield long_line_str(uri, None, "")
yield long_line_str(uri, None)

Check warning on line 624 in src/datachain/cli.py

View check run for this annotation

Codecov / codecov/patch

src/datachain/cli.py#L624

Added line #L624 was not covered by tests
else:
yield from storage_uris

Expand Down Expand Up @@ -693,7 +692,6 @@ def ls_remote(
entry = long_line_str(
row["name"] + ("/" if row["dir_type"] else ""),
row["last_modified"],
row["owner_name"],
)
print(format_ls_entry(entry))
else:
Expand Down
4 changes: 0 additions & 4 deletions src/datachain/client/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,8 +119,6 @@ def _entry_from_boto(self, v, bucket, versions=False):
is_latest=v.get("IsLatest", True),
last_modified=v.get("LastModified", ""),
size=v["Size"],
owner_name=v.get("Owner", {}).get("DisplayName", ""),
owner_id=v.get("Owner", {}).get("ID", ""),
)

async def _fetch_dir(
Expand Down Expand Up @@ -165,8 +163,6 @@ def convert_info(self, v: dict[str, Any], path: str) -> Entry:
is_latest=v.get("IsLatest", True),
last_modified=v.get("LastModified", ""),
size=v["size"],
owner_name=v.get("Owner", {}).get("DisplayName", ""),
owner_id=v.get("Owner", {}).get("ID", ""),
)

def info_to_file(self, v: dict[str, Any], path: str) -> File:
Expand Down
2 changes: 0 additions & 2 deletions src/datachain/data_storage/warehouse.py
Original file line number Diff line number Diff line change
Expand Up @@ -629,8 +629,6 @@ def with_default(column):
with_default(dr.c.is_latest),
dr.c.last_modified,
with_default(dr.c.size),
with_default(dr.c.owner_name),
with_default(dr.c.owner_id),
with_default(dr.c.sys__rand),
dr.c.location,
de.c.source,
Expand Down
8 changes: 2 additions & 6 deletions src/datachain/node.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,6 @@ class Node:
is_latest: bool = True
last_modified: Optional[datetime] = None
size: int = 0
owner_name: str = ""
owner_id: str = ""
location: Optional[str] = None
source: StorageURI = StorageURI("")
dir_type: int = DirType.FILE
Expand Down Expand Up @@ -149,8 +147,6 @@ class Entry:
is_latest: bool = True
last_modified: Optional[datetime] = None
size: int = 0
owner_name: str = ""
owner_id: str = ""
location: Optional[str] = None

@classmethod
Expand Down Expand Up @@ -211,9 +207,9 @@ def full_path(self) -> str:
TIME_FMT = "%Y-%m-%d %H:%M"


def long_line_str(name: str, timestamp: Optional[datetime], owner: str) -> str:
def long_line_str(name: str, timestamp: Optional[datetime]) -> str:
if timestamp is None:
time = "-"
else:
time = timestamp.strftime(TIME_FMT)
return f"{owner: <19} {time: <19} {name}"
return f"{time: <19} {name}"

Check warning on line 215 in src/datachain/node.py

View check run for this annotation

Codecov / codecov/patch

src/datachain/node.py#L215

Added line #L215 was not covered by tests
6 changes: 0 additions & 6 deletions src/datachain/query/builtins.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,6 @@ def load_tar(raw):
C.source,
C.path,
C.size,
C.owner_name,
C.owner_id,
C.is_latest,
C.last_modified,
C.version,
Expand All @@ -36,8 +34,6 @@ def index_tar(
source,
parent_path,
size,
owner_name,
owner_id,
is_latest,
last_modified,
version,
Expand All @@ -49,8 +45,6 @@ def index_tar(
source=source,
path=parent_path,
size=size,
owner_name=owner_name,
owner_id=owner_id,
is_latest=bool(is_latest),
last_modified=last_modified,
version=version,
Expand Down
8 changes: 0 additions & 8 deletions src/datachain/query/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,8 +222,6 @@ class DatasetRow:
"path": String,
"size": Int64,
"location": JSON,
"owner_name": String,
"owner_id": String,
"is_latest": Boolean,
"last_modified": DateTime,
"version": String,
Expand All @@ -236,8 +234,6 @@ def create(
source: str = "",
size: int = 0,
location: Optional[dict[str, Any]] = None,
owner_name: str = "",
owner_id: str = "",
is_latest: bool = True,
last_modified: Optional[datetime] = None,
version: str = "",
Expand All @@ -248,8 +244,6 @@ def create(
int,
Optional[str],
int,
str,
str,
bool,
datetime,
str,
Expand All @@ -266,8 +260,6 @@ def create(
path,
size,
location,
owner_name,
owner_id,
is_latest,
last_modified,
version,
Expand Down
2 changes: 0 additions & 2 deletions src/datachain/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -340,8 +340,6 @@ def show_df(
"etag",
"is_latest",
"last_modified",
"owner_id",
"owner_name",
"size",
"version",
],
Expand Down
2 changes: 0 additions & 2 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -583,8 +583,6 @@ def dataset_rows():
"is_latest": True,
"name": f"dql_1m_meta_text_emd.parquet_3_{i}_0.snappy.parquet",
"etag": f"72b35c8e9b8eed1636c91eb94241c2f8-{i}",
"owner_id": "owner",
"owner_name": "aws-iterative-sandbox",
"last_modified": "2024-02-23T10:42:31.842944+00:00",
"size": 49807360,
"sys__rand": 12123123123,
Expand Down
20 changes: 0 additions & 20 deletions tests/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@
is_latest=True,
last_modified=datetime(2023, 2, 27, 18, 28, 54, tzinfo=utc),
size=13,
owner_name="webfile",
owner_id="75aa57f09aa0c8caeab4f8c24e99d10f8e7faeebf76c078efc7c6caea54ba06a",
),
Entry.from_file(
path="cats/cat1",
Expand All @@ -23,8 +21,6 @@
is_latest=True,
last_modified=datetime(2023, 2, 27, 18, 28, 54, tzinfo=utc),
size=4,
owner_name="webfile",
owner_id="75aa57f09aa0c8caeab4f8c24e99d10f8e7faeebf76c078efc7c6caea54ba06a",
),
Entry.from_file(
path="cats/cat2",
Expand All @@ -33,8 +29,6 @@
is_latest=True,
last_modified=datetime(2023, 2, 27, 18, 28, 54, tzinfo=utc),
size=4,
owner_name="webfile",
owner_id="75aa57f09aa0c8caeab4f8c24e99d10f8e7faeebf76c078efc7c6caea54ba06a",
),
Entry.from_file(
path="dogs/dog1",
Expand All @@ -43,8 +37,6 @@
is_latest=True,
last_modified=datetime(2023, 2, 27, 18, 28, 54, tzinfo=utc),
size=4,
owner_name="webfile",
owner_id="75aa57f09aa0c8caeab4f8c24e99d10f8e7faeebf76c078efc7c6caea54ba06a",
),
Entry.from_file(
path="dogs/dog2",
Expand All @@ -53,8 +45,6 @@
is_latest=True,
last_modified=datetime(2023, 2, 27, 18, 28, 54, tzinfo=utc),
size=3,
owner_name="webfile",
owner_id="75aa57f09aa0c8caeab4f8c24e99d10f8e7faeebf76c078efc7c6caea54ba06a",
),
Entry.from_file(
path="dogs/dog3",
Expand All @@ -63,8 +53,6 @@
is_latest=True,
last_modified=datetime(2023, 2, 27, 18, 28, 54, tzinfo=utc),
size=4,
owner_name="webfile",
owner_id="75aa57f09aa0c8caeab4f8c24e99d10f8e7faeebf76c078efc7c6caea54ba06a",
),
Entry.from_file(
path="dogs/others/dog4",
Expand All @@ -73,8 +61,6 @@
is_latest=True,
last_modified=datetime(2023, 2, 27, 18, 28, 54, tzinfo=utc),
size=4,
owner_name="webfile",
owner_id="75aa57f09aa0c8caeab4f8c24e99d10f8e7faeebf76c078efc7c6caea54ba06a",
),
]

Expand All @@ -90,8 +76,6 @@
is_latest=True,
last_modified=datetime(2023, 2, 27, 18, 28, 54, tzinfo=utc),
size=4,
owner_name="webfile",
owner_id="75aa57f09aa0c8caeab4f8c24e99d10f8e7faeebf76c078efc7c6caea54ba06a",
),
Entry.from_file(
path="dogs/others",
Expand All @@ -100,8 +84,6 @@
is_latest=True,
last_modified=datetime(2023, 2, 27, 18, 28, 54, tzinfo=utc),
size=4,
owner_name="webfile",
owner_id="75aa57f09aa0c8caeab4f8c24e99d10f8e7faeebf76c078efc7c6caea54ba06a",
),
Entry.from_file(
path="dogs/",
Expand All @@ -110,7 +92,5 @@
is_latest=True,
last_modified=datetime(2023, 2, 27, 18, 28, 54, tzinfo=utc),
size=4,
owner_name="webfile",
owner_id="75aa57f09aa0c8caeab4f8c24e99d10f8e7faeebf76c078efc7c6caea54ba06a",
),
]
10 changes: 4 additions & 6 deletions tests/func/test_catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,8 +146,6 @@ def test_find_names_columns(cloud_test_catalog, cloud_type):
src_uri = cloud_test_catalog.src_uri
catalog = cloud_test_catalog.catalog

owner = "webfile" if cloud_type == "s3" else ""

src_uri_path = src_uri
if cloud_type == "file":
src_uri_path = LocalFileSystem._strip_protocol(src_uri)
Expand All @@ -156,14 +154,14 @@ def test_find_names_columns(cloud_test_catalog, cloud_type):
catalog.find(
[src_uri],
names=["*cat*"],
columns=["du", "name", "owner", "path", "size", "type"],
columns=["du", "name", "path", "size", "type"],
)
) == {
"\t".join(columns)
for columns in [
["8", "cats", "", f"{src_uri_path}/cats/", "0", "d"],
["4", "cat1", owner, f"{src_uri_path}/cats/cat1", "4", "f"],
["4", "cat2", owner, f"{src_uri_path}/cats/cat2", "4", "f"],
["8", "cats", f"{src_uri_path}/cats/", "0", "d"],
["4", "cat1", f"{src_uri_path}/cats/cat1", "4", "f"],
["4", "cat2", f"{src_uri_path}/cats/cat2", "4", "f"],
]
}

Expand Down
2 changes: 1 addition & 1 deletion tests/func/test_dataset_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -506,7 +506,7 @@ def test_mutate(cloud_test_catalog, save):
else:
result = q.db_results(row_factory=lambda c, v: dict(zip(c, v)))
assert len(result) == 4
assert len(result[0]) == 17
assert len(result[0]) == 15
cols = {"size10x", "size1000x", "s2", "s3", "s4"}
new_data = [[v for k, v in r.items() if k in cols] for r in result]
assert new_data == [
Expand Down
Loading

0 comments on commit d4e50ec

Please sign in to comment.