Skip to content

Commit

Permalink
FIX: Make Manifest.use_signed_urls_for_datasets idempotent
Browse files Browse the repository at this point in the history
  • Loading branch information
cortadocodes committed Dec 4, 2023
1 parent 6bb11ad commit 2c62aab
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 2 deletions.
4 changes: 2 additions & 2 deletions octue/resources/manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,13 +70,13 @@ def update_dataset_paths(self, path_generator):

def use_signed_urls_for_datasets(self):
"""Generate signed URLs for any cloud datasets in the manifest and use these as their paths instead of regular
cloud paths. URLs will not be generated for any local datasets in the manifest.
cloud paths. URLs will not be generated for any local datasets in the manifest. This method is idempotent.
:return None:
"""

def signed_url_path_generator(dataset):
if dataset.exists_in_cloud:
if dataset.exists_in_cloud and storage.path.is_cloud_uri(dataset.path):
return dataset.generate_signed_url()
return dataset.path

Expand Down
20 changes: 20 additions & 0 deletions tests/resources/test_manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,3 +283,23 @@ def test_ignore_stored_metadata(self):

mock_dataset_use_cloud_metadata.assert_not_called()
self.assertEqual(dataset_in_manifest.labels, set())

def test_use_signed_urls_for_datasets(self):
"""Test that cloud URI dataset paths in a manifest can be swapped for signed URLs."""
manifest = Manifest(datasets={"my_dataset": self.create_nested_cloud_dataset()})
self.assertTrue(manifest.datasets["my_dataset"].path.startswith("gs://"))

manifest.use_signed_urls_for_datasets()
self.assertTrue(manifest.datasets["my_dataset"].path.startswith("http"))
self.assertIn(".signed_metadata_files", manifest.datasets["my_dataset"].path)

def test_use_signed_urls_for_datasets_is_idempotent(self):
"""Test that calling `use_signed_urls_for_datasets` on a manifest that already has signed URLs for its datasets'
paths just leaves the paths as they are.
"""
manifest = Manifest(datasets={"my_dataset": self.create_nested_cloud_dataset()})
manifest.use_signed_urls_for_datasets()
manifest.use_signed_urls_for_datasets()

self.assertTrue(manifest.datasets["my_dataset"].path.startswith("http"))
self.assertIn(".signed_metadata_files", manifest.datasets["my_dataset"].path)

0 comments on commit 2c62aab

Please sign in to comment.