Skip to content

Commit

Permalink
added dataset name dependencies test and fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
ilongin committed Sep 2, 2024
1 parent ed4f4a1 commit 8537347
Show file tree
Hide file tree
Showing 5 changed files with 35 additions and 13 deletions.
6 changes: 4 additions & 2 deletions src/datachain/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,11 +79,13 @@ class DatasetDependency:
@property
def dataset_name(self) -> str:
"""Returns clean dependency dataset name"""
from datachain.lib.listing import listing_dataset_name
from datachain.lib.listing import parse_listing_uri

if self.type == DatasetDependencyType.DATASET:
return self.name
return listing_dataset_name(self.name.strip("/"), "")

list_dataset_name, _, _ = parse_listing_uri(self.name.strip("/"), None, {})
return list_dataset_name

@classmethod
def parse(
Expand Down
2 changes: 2 additions & 0 deletions src/datachain/lib/listing.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,12 +96,14 @@ def is_listing_dataset(name: str) -> bool:
"""Returns True if it's special listing dataset"""
return name.startswith(LISTING_PREFIX)


def listing_uri_from_name(dataset_name: str) -> str:
"""Returns clean storage URI from listing dataset name"""
if not is_listing_dataset(dataset_name):
raise ValueError(f"Dataset {dataset_name} is not a listing")
return dataset_name.removeprefix(LISTING_PREFIX)


def is_listing_expired(created_at: datetime) -> bool:
"""Checks if listing has expired based on it's creation date"""
return datetime.now(timezone.utc) > created_at + timedelta(seconds=LISTING_TTL)
Expand Down
2 changes: 1 addition & 1 deletion tests/func/test_listing.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ def test_listing_generator(cloud_test_catalog, cloud_type):
uri = f"{ctc.src_uri}/cats"

dc = DataChain.from_records(DataChain.DEFAULT_FILE_RECORD).gen(
file=list_bucket(uri, **ctc.catalog.client_config)
file=list_bucket(uri, client_config=ctc.catalog.client_config)
)
assert dc.count() == 2

Expand Down
10 changes: 0 additions & 10 deletions tests/unit/lib/test_datachain.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,11 +258,6 @@ def test_datasets_in_memory():
assert datasets[0].num_objects == 6


@pytest.mark.parametrize(
"cloud_type,version_aware",
[("file", False)],
indirect=True,
)
def test_listings(test_session, tmp_dir):
df = pd.DataFrame(DF_DATA)
df.to_parquet(tmp_dir / "df.parquet")
Expand Down Expand Up @@ -294,11 +289,6 @@ def test_listings(test_session, tmp_dir):
assert listing.status == 4


@pytest.mark.parametrize(
"cloud_type,version_aware",
[("file", False)],
indirect=True,
)
def test_listings_reindex(test_session, tmp_dir):
df = pd.DataFrame(DF_DATA)
df.to_parquet(tmp_dir / "df.parquet")
Expand Down
28 changes: 28 additions & 0 deletions tests/unit/test_dataset.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
from datetime import datetime, timezone

import pytest
from sqlalchemy import Column, DateTime
from sqlalchemy.dialects.sqlite import dialect as sqlite_dialect
from sqlalchemy.schema import CreateTable

from datachain.data_storage.schema import DataTable
from datachain.dataset import DatasetDependency, DatasetDependencyType
from datachain.sql.types import (
JSON,
Array,
Expand Down Expand Up @@ -84,3 +88,27 @@ def test_schema_serialization(dataset_record):
"item_type": {"type": "Array", "item_type": {"type": "Float64"}},
}
}


@pytest.mark.parametrize(
"dep_name,dep_type,expected",
[
("dogs_dataset", DatasetDependencyType.DATASET, "dogs_dataset"),
(
"s3://dogs_dataset/dogs",
DatasetDependencyType.STORAGE,
"lst__s3://dogs_dataset/dogs/",
),
],
)
def test_dataset_dependency_dataset_name(dep_name, dep_type, expected):
dep = DatasetDependency(
id=1,
name=dep_name,
version="1",
type=dep_type,
created_at=datetime.now(timezone.utc),
dependencies=[],
)

assert dep.dataset_name == expected

0 comments on commit 8537347

Please sign in to comment.