Skip to content

Commit

Permalink
fix(Datasets): increase slug size and only add suffix if needed (#802)
Browse files Browse the repository at this point in the history
* fix(Datasets): increase slug size and only add suffix if needed

* fix(Datasets): use slug for dataset checks

* fix(Workspaces): Try to create buckets/datasets/workspaces using their slug is taken

---------

Co-authored-by: Quentin Gérôme <[email protected]>
  • Loading branch information
cheikhgwane and qgerome authored Sep 10, 2024
1 parent f50711f commit 93d4954
Show file tree
Hide file tree
Showing 5 changed files with 149 additions and 26 deletions.
31 changes: 31 additions & 0 deletions hexa/datasets/migrations/0007_alter_dataset_slug_and_more.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# Generated by Django 5.0.8 on 2024-09-03 09:20

from django.conf import settings
from django.db import migrations, models


class Migration(migrations.Migration):
dependencies = [
("datasets", "0006_datasetfilemetadata"),
(
"workspaces",
"0044_remove_workspaceinvitation_workspace_invitation_unique_workspace_email",
),
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
]

operations = [
migrations.AlterField(
model_name="dataset",
name="slug",
field=models.TextField(max_length=255),
),
migrations.AddConstraint(
model_name="dataset",
constraint=models.UniqueConstraint(
models.F("workspace_id"),
models.F("slug"),
name="unique_dataset_slug_per_workspace",
),
),
]
24 changes: 18 additions & 6 deletions hexa/datasets/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,13 @@
from hexa.user_management.models import User


def create_dataset_slug(name: str):
suffix = secrets.token_hex(3)
prefix = slugify(name[: 64 - 3])
return prefix[:23].rstrip("-") + "-" + suffix
def create_dataset_slug(name: str, workspace):
suffix = ""
while True:
slug = slugify(name[: 255 - len(suffix)] + suffix)
if not Dataset.objects.filter(workspace=workspace, slug=slug).exists():
return slug
suffix = "-" + secrets.token_hex(3)


class DatasetQuerySet(BaseQuerySet):
Expand Down Expand Up @@ -55,7 +58,7 @@ def create_if_has_perm(
created_by = principal if not isinstance(principal, PipelineRunUser) else None
dataset = self.create(
workspace=workspace,
slug=create_dataset_slug(name),
slug=create_dataset_slug(name, workspace),
created_by=created_by,
name=name,
description=description,
Expand All @@ -67,6 +70,15 @@ def create_if_has_perm(


class Dataset(Base):
class Meta:
constraints = [
models.UniqueConstraint(
"workspace_id",
"slug",
name="unique_dataset_slug_per_workspace",
)
]

workspace = models.ForeignKey(
"workspaces.Workspace",
null=True,
Expand All @@ -76,7 +88,7 @@ class Dataset(Base):

created_by = models.ForeignKey(User, null=True, on_delete=models.SET_NULL)
name = models.TextField(max_length=64, null=False, blank=False)
slug = models.TextField(null=False, blank=False, unique=True)
slug = models.TextField(null=False, blank=False, max_length=255)
description = models.TextField(blank=True, null=True)

objects = DatasetManager.from_queryset(DatasetQuerySet)()
Expand Down
55 changes: 48 additions & 7 deletions hexa/datasets/tests/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from django.conf import settings
from django.core.exceptions import ObjectDoesNotExist, PermissionDenied
from django.test import override_settings
from django.utils.crypto import get_random_string

from hexa.core.test import TestCase
from hexa.datasets.models import Dataset, DatasetVersion, DatasetVersionFile
Expand Down Expand Up @@ -65,9 +66,14 @@ def setUpTestData(cls):

class DatasetTest(BaseTestMixin, TestCase):
def test_create_dataset(
self, workspace=None, name="My dataset", description="Description of dataset"
self,
workspace=None,
user=None,
name="My dataset",
description="Description of dataset",
):
workspace = workspace or self.WORKSPACE
user = user or self.USER_ADMIN
with self.assertRaises(PermissionDenied):
Dataset.objects.create_if_has_perm(
self.USER_SERENA,
Expand All @@ -76,22 +82,57 @@ def test_create_dataset(
description=description,
)
dataset = Dataset.objects.create_if_has_perm(
self.USER_EDITOR,
self.WORKSPACE,
user,
workspace,
name=name,
description=description,
)

self.assertEqual(dataset.name, name)
self.assertEqual(dataset.description, description)
self.assertEqual(dataset.created_by, self.USER_EDITOR)
self.assertEqual(dataset.created_by, user)

return dataset

def test_create_dataset_with_double_dash(self):
def test_create_dataset_long_slug(self):
name = get_random_string(300)

dataset_1 = self.test_create_dataset(
workspace=self.WORKSPACE, name=name, description="description_1"
)
with patch("secrets.token_hex", return_value="123"):
dataset_2 = self.test_create_dataset(
workspace=self.WORKSPACE, name=name, description="description_1"
)
self.assertTrue(len(dataset_1.slug) <= 255)
self.assertTrue(len(dataset_2.slug) <= 255)

self.assertNotEqual(dataset_1.slug, dataset_2.slug)
self.assertTrue(dataset_2.slug.endswith("-123"))

def test_create_dataset_duplicate_slug_same_workspace(self):
dataset_1 = self.test_create_dataset(
workspace=self.WORKSPACE, name="my-slug", description="description_1"
)
with patch("secrets.token_hex", return_value="123"):
dataset = self.test_create_dataset(name="My dataset -- test-")
self.assertEqual(dataset.slug, "my-dataset-test-123")
dataset_2 = self.test_create_dataset(
workspace=self.WORKSPACE, name="my-slug", description="description_1"
)
self.assertNotEqual(dataset_1.slug, dataset_2.slug)
self.assertEqual(dataset_2.slug, "my-slug-123")

def test_create_dataset_duplicate_slug(self):
dataset_1 = self.test_create_dataset(
workspace=self.WORKSPACE, name="dataset_1", description="description_1"
)
dataset_2 = self.test_create_dataset(
workspace=self.WORKSPACE_2, name="dataset_1", description="description_1"
)
self.assertEqual(dataset_1.slug, dataset_2.slug)

def test_create_dataset_with_double_dash(self):
dataset = self.test_create_dataset(name="My dataset -- test-")
self.assertEqual(dataset.slug, "my-dataset-test")

def test_workspace_datasets(self):
self.test_create_dataset(name="dataset_1", description="description_1")
Expand Down
35 changes: 27 additions & 8 deletions hexa/workspaces/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from django.core.validators import RegexValidator, validate_slug
from django.db import models
from django.db.models import EmailField, Q
from django.forms import ValidationError
from django.utils.crypto import get_random_string
from django.utils.regex_helper import _lazy_re_compile
from django.utils.translation import gettext_lazy as _
Expand All @@ -38,9 +39,12 @@ class AlreadyExists(Exception):


def create_workspace_slug(name):
suffix = secrets.token_hex(3)
prefix = slugify(name[:23])
return prefix[:23].rstrip("-") + "-" + suffix
suffix = ""
while True:
slug = slugify(name[: 63 - len(suffix)] + suffix)
if not Workspace.objects.filter(slug=slug).exists():
return slug
suffix = "-" + secrets.token_hex(3)


def generate_database_name():
Expand All @@ -64,13 +68,30 @@ def load_bucket_sample_data(bucket_name):
)


def create_workspace_bucket(workspace_slug: str):
storage = get_storage()
while True:
suffix = get_random_string(
4, allowed_chars=string.ascii_lowercase + string.digits
)
try:
# Bucket names must be unique across all of Google Cloud, so we add a suffix to the workspace slug
# When separated by a dot, each segment can be up to 63 characters long
return storage.create_bucket(
f"{(settings.WORKSPACE_BUCKET_PREFIX + workspace_slug)[:63]}.{suffix}",
labels={"hexa-workspace": workspace_slug},
)
except ValidationError:
continue


class WorkspaceManager(models.Manager):
def create_if_has_perm(
self,
principal: User,
name: str,
description: str = None,
countries: typing.Sequence[Country] = None,
description: str | None = None,
countries: typing.Sequence[Country] | None = None,
load_sample_data: bool = False,
):
if not principal.has_perm("workspaces.create_workspace"):
Expand All @@ -96,9 +117,7 @@ def create_if_has_perm(
create_kwargs["db_name"] = db_name
create_database(db_name, db_password)

bucket = get_storage().create_bucket(
settings.WORKSPACE_BUCKET_PREFIX + slug, labels={"hexa-workspace": slug}
)
bucket = create_workspace_bucket(slug)
create_kwargs["bucket_name"] = bucket.name

if load_sample_data:
Expand Down
30 changes: 25 additions & 5 deletions hexa/workspaces/tests/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,8 @@ def test_create_workspace_no_slug(self):
name="this is a very long workspace name",
description="Description",
)
self.assertEqual(workspace.slug, "this-is-a-very-long-wor-mock")
self.assertTrue(len(workspace.slug) <= 30)
self.assertEqual(workspace.slug, "this-is-a-very-long-workspace-name")
self.assertTrue(len(workspace.slug) <= 63)

@backend.mock_storage
def test_create_workspace_with_underscore(self):
Expand All @@ -76,8 +76,8 @@ def test_create_workspace_with_underscore(self):
name="Worksp?ace_wi😱th_und~ersc!/ore",
description="Description",
)
self.assertEqual(workspace.slug, "worksp-ace-with-und-er-mock")
self.assertTrue("hexa-test-worksp-ace-with-und-er-mock" in backend.buckets)
self.assertEqual(workspace.slug, "worksp-ace-with-und-ersc-ore")
self.assertTrue(workspace.bucket_name in backend.buckets)

@backend.mock_storage
def test_create_workspace_with_random_characters(self):
Expand All @@ -89,7 +89,7 @@ def test_create_workspace_with_random_characters(self):
name="1workspace_with#_random$_char*",
description="Description",
)
self.assertEqual(workspace.slug, "1workspace-with-random-mock")
self.assertEqual(workspace.slug, "1workspace-with-random-char")
self.assertEqual(16, len(workspace.db_name))

@backend.mock_storage
Expand Down Expand Up @@ -120,6 +120,26 @@ def test_get_workspace_by_id_failed(self):
with self.assertRaises(ObjectDoesNotExist):
Workspace.objects.get(pk="7bf4c750-f74b-4ed6-b7f7-b23e4cac4e2c")

@backend.mock_storage
def test_create_workspaces_same_name(self):
with patch("hexa.workspaces.models.create_database"), patch(
"hexa.workspaces.models.load_database_sample_data"
), patch("secrets.token_hex", lambda _: "mock"):
workspace = Workspace.objects.create_if_has_perm(
self.USER_JULIA,
name="My workspace",
description="This is my workspace",
)
self.assertEqual(workspace.slug, "my-workspace")

workspace_2 = Workspace.objects.create_if_has_perm(
self.USER_JULIA,
name="My workspace",
description="This is my workspace",
)

self.assertEqual(workspace_2.slug, "my-workspace-mock")

@backend.mock_storage
def test_add_member(self):
with patch("hexa.workspaces.models.create_database"), patch(
Expand Down

0 comments on commit 93d4954

Please sign in to comment.