Skip to content

Commit

Permalink
Merge pull request #769 from cortex-lab/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
k1o0 authored Jan 18, 2023
2 parents 38fb5ba + d31f702 commit b118cc0
Show file tree
Hide file tree
Showing 14 changed files with 186 additions and 85 deletions.
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,9 @@ this setup will work on other systems. Assumptions made are that you have sudo p
* alyx-dev is sync with the **dev** branch
* Migrations files are provided by the repository
* Continuous integration is setup, to run tests locally:
- `./manage.py test -n` test without migrations (faster)
- `./manage.py test` test with migrations (recommended if model changes)
- `./manage.py test -n` test without migrations (faster)
- `./manage.py test` test with migrations (recommended if model changes)
- NB: When running tests ensure `DEBUG = True` in the settings.py file (specifically `SECURE_SSL_REDIRECT = True` causes REST tests to fail)

```
$ /manage.py test -n
Expand Down
2 changes: 1 addition & 1 deletion alyx/alyx/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
VERSION = __version__ = '1.7.0'
VERSION = __version__ = '1.8.0'
19 changes: 17 additions & 2 deletions alyx/data/admin.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from django.db.models import Count
from django.contrib import admin
from django.db.models import Count, ProtectedError
from django.contrib import admin, messages
from django.utils.html import format_html
from django_admin_listfilter_dropdown.filters import RelatedDropdownFilter
from rangefilter.filter import DateRangeFilter
Expand Down Expand Up @@ -135,6 +135,21 @@ def _public(self, obj):
_public.short_description = 'Public'
_public.boolean = True

def delete_queryset(self, request, queryset):
try:
queryset.delete()
except ProtectedError as e:
err_msg = e.args[0] if e.args else 'One or more dataset(s) protected'
self.message_user(request, err_msg, level=messages.ERROR)

def delete_model(self, request, obj):
try:
obj.delete()
except ProtectedError as e:
# FIXME This still shows the successful message which is confusing for users
err_msg = e.args[0] if e.args else f'Dataset {obj.name} is protected'
self.message_user(request, err_msg, level=messages.ERROR)


class FileRecordAdmin(BaseAdmin):
fields = ('relative_path', 'data_repository', 'dataset', 'exists')
Expand Down
3 changes: 2 additions & 1 deletion alyx/data/management/commands/files.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import logging

from django.core.management import BaseCommand
from django.db.models import Count, Q

Expand Down Expand Up @@ -195,7 +196,7 @@ def handle(self, *args, **options):
dt = None
for d in FileRecord.objects.all().select_related('dataset'):
try:
dt = transfers.get_dataset_type(d.relative_path, qs=qs)
dt = transfers.get_dataset_type(d.relative_path, qs)
except ValueError:
dt = None
continue
Expand Down
37 changes: 35 additions & 2 deletions alyx/data/models.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
import structlog

from django.core.validators import RegexValidator
from django.db import models
from django.utils import timezone

from alyx.settings import TIME_ZONE, AUTH_USER_MODEL
from actions.models import Session
from alyx.base import BaseModel, modify_fields, BaseManager, CharNullField
from alyx.base import BaseModel, modify_fields, BaseManager, CharNullField, BaseQuerySet

logger = structlog.get_logger(__name__)


def _related_string(field):
Expand Down Expand Up @@ -252,9 +256,27 @@ def __str__(self):
return "<Revision %s>" % self.name


class DatasetQuerySet(BaseQuerySet):
"""A Queryset that checks for protected datasets before deletion"""

def delete(self, force=False):
if (protected := self.filter(tags__protected=True)).exists():
if force:
logger.warning('The following protected datasets will be deleted:\n%s',
'\n'.join(map(str, protected.values_list('name', 'session_id'))))
else:
logger.error(
'The following protected datasets cannot be deleted without force=True:\n%s',
'\n'.join(map(str, protected.values_list('name', 'session_id'))))
raise models.ProtectedError(
f'Failed to delete {protected.count()} dataset(s) due to protected tags',
protected)
super().delete()


class DatasetManager(BaseManager):
def get_queryset(self):
qs = super(DatasetManager, self).get_queryset()
qs = DatasetQuerySet(self.model, using=self._db)
qs = qs.select_related('dataset_type', 'data_format')
return qs

Expand Down Expand Up @@ -361,6 +383,17 @@ def save(self, *args, **kwargs):
if len(pis):
self.probe_insertion.set(pis.values_list('pk', flat=True))

def delete(self, *args, force=False, **kwargs):
# If a dataset is protected and force=False, raise an exception
# NB This is not called when bulk deleting or in cascading deletes
if self.is_protected and not force:
tags = self.tags.filter(protected=True).values_list('name', flat=True)
tags_str = '"' + '", "'.join(tags) + '"'
logger.error(f'Dataset {self.name} is protected by tag(s); use force=True.')
raise models.ProtectedError(
f'Failed to delete dataset {self.name} due to protected tag(s) {tags_str}', self)
super().delete(*args, **kwargs)


# Files
# ------------------------------------------------------------------------------------------------
Expand Down
27 changes: 25 additions & 2 deletions alyx/data/tests.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,37 @@
from django.test import TestCase
from django.db import transaction
from django.db.utils import IntegrityError
from data.models import Dataset, DatasetType
from django.db.models import ProtectedError

from data.models import Dataset, DatasetType, Tag
from data.transfers import get_dataset_type


class TestModel(TestCase):
def test_model_methods(self):
(dset, _) = Dataset.objects.get_or_create(name='toto.npy')

assert dset.is_online is False
assert dset.is_public is False
assert dset.is_protected is False

def test_delete(self):
(dset, _) = Dataset.objects.get_or_create(name='foo.npy')
(tag, _) = Tag.objects.get_or_create(name='protected_tag', protected=True)
dset.tags.set([tag])
assert dset.is_protected is True

# Individual object delete
with transaction.atomic():
self.assertRaises(ProtectedError, dset.delete)

# As queryset
qs = Dataset.objects.filter(tags__name='protected_tag')
with transaction.atomic():
self.assertRaises(ProtectedError, qs.delete)
with self.assertLogs('data.models', 'WARNING'):
qs.delete(force=True)


class TestDatasetTypeModel(TestCase):
def test_model_methods(self):
Expand All @@ -33,6 +54,8 @@ def test_model_methods(self):
('bar.baz.ext', 'bar.baz'),
('some_file.ext', 'some_file')
)

dtypes = DatasetType.objects.all()
for filename, dataname in filename_typename:
with self.subTest(filename=filename):
self.assertEqual(get_dataset_type(filename).name, dataname)
self.assertEqual(get_dataset_type(filename, dtypes).name, dataname)
13 changes: 13 additions & 0 deletions alyx/data/tests_rest.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,19 @@ def test_dataset(self):
self.ar(r, 201)
self.assertEqual(r.data['default_dataset'], False)

# Create protected tag and dataset
r = self.ar(self.post(reverse('tag-list'), {'name': 'foo_tag', 'protected': True}), 201)
data = {'name': 'foo.bar', 'dataset_type': 'dst', 'created_by': 'test',
'data_format': 'df', 'date': '2018-01-01', 'number': 2, 'subject': self.subject,
'tags': [r['name']]}

r = self.ar(self.post(reverse('dataset-list'), data), 201)
did = r['url'].split('/')[-1]

# Now attempt to delete the protected dataset
r = self.client.delete(reverse('dataset-detail', args=[did]), data)
self.assertRegex(self.ar(r, 403), 'protected')

def test_dataset_date_filter(self):
# create 2 datasets with different dates
data = {
Expand Down
32 changes: 3 additions & 29 deletions alyx/data/transfers.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,12 @@
import re
import time
from pathlib import Path
from fnmatch import fnmatch

from django.db.models import Case, When, Count, Q, F
import globus_sdk
import numpy as np
from one.alf.files import filename_parts, add_uuid_string
from one.alf.spec import is_valid
from one.alf.files import add_uuid_string
from one.registration import get_dataset_type

from alyx import settings
from data.models import FileRecord, Dataset, DatasetType, DataFormat, DataRepository
Expand Down Expand Up @@ -170,31 +169,6 @@ def globus_file_exists(file_record):
return False


def get_dataset_type(filename, qs=None):
"""Get the dataset type from a given filename"""
dataset_types = []
for dt in qs or DatasetType.objects.all():
if not dt.filename_pattern.strip():
# If the filename pattern is null, check whether the filename object.attribute matches
# the dataset type name.
if is_valid(filename):
obj_attr = '.'.join(filename_parts(filename)[1:3])
else: # will match name against filename sans extension
obj_attr = op.splitext(filename)[0]
if dt.name == obj_attr:
dataset_types.append(dt)
# Check whether pattern matches filename
elif fnmatch(op.basename(filename).lower(), dt.filename_pattern.lower()):
dataset_types.append(dt)
n = len(dataset_types)
if n == 0:
raise ValueError("No dataset type found for filename `%s`" % filename)
elif n >= 2:
raise ValueError("Multiple matching dataset types found for filename `%s`: %s" % (
filename, ', '.join(map(str, dataset_types))))
return dataset_types[0]


def get_data_format(filename):
file_extension = op.splitext(filename)[-1]
# This raises an error if there is 0 or 2+ matching data formats.
Expand Down Expand Up @@ -275,7 +249,7 @@ def _create_dataset_file_records(
assert session is not None
revision_name = f'#{revision.name}#' if revision else ''
relative_path = op.join(rel_dir_path, collection or '', revision_name, filename)
dataset_type = get_dataset_type(filename)
dataset_type = get_dataset_type(filename, DatasetType.objects.all())
data_format = get_data_format(filename)
assert dataset_type
assert data_format
Expand Down
9 changes: 9 additions & 0 deletions alyx/data/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import re

from django.contrib.auth import get_user_model
from django.db import models
from rest_framework import generics, viewsets, mixins, serializers
from rest_framework.response import Response
import django_filters
Expand Down Expand Up @@ -226,6 +227,14 @@ class DatasetDetail(generics.RetrieveUpdateDestroyAPIView):
serializer_class = DatasetSerializer
permission_classes = rest_permission_classes()

def delete(self, request, *args, **kwargs):
try:
return super().delete(request, *args, **kwargs)
except models.ProtectedError as e:
# Return Forbidden response with dataset name and list of protected tags associated
err_msg, _ = e.args
return Response(e.args[0], 403)


# FileRecord
# ------------------------------------------------------------------------------------------------
Expand Down
4 changes: 2 additions & 2 deletions alyx/misc/management/commands/one_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -381,9 +381,9 @@ def generate_datasets_frame(int_id=True, tags=None) -> pd.DataFrame:
if tags:
kw = {'tags__name__in' if not isinstance(tags, str) else 'tags__name': tags}
ds = ds.prefetch_related('tag').filter(**kw)
# Filter out datasets that do not exist on either repository
# Filter out datasets that do not exist on either repository or have no associated session
ds = ds.annotate(exists_flatiron=Exists(on_flatiron), exists_aws=Exists(on_aws))
ds = ds.filter(Q(exists_flatiron=True) | Q(exists_aws=True))
ds = ds.filter(Q(exists_flatiron=True) | Q(exists_aws=True), session__isnull=False)

# fields to keep from Dataset table
fields = (
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -28,4 +28,4 @@ python-magic
pytz
structlog>=21.5.0
webdavclient3
ONE-api>=1.13.0
ONE-api>=1.18.0
36 changes: 18 additions & 18 deletions requirements_frozen.txt
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
asgiref==3.5.2
asgiref==3.6.0
backports.zoneinfo==0.2.1
boto3==1.26.24
botocore==1.29.24
certifi==2022.9.24
boto3==1.26.51
botocore==1.29.51
certifi==2022.12.7
cffi==1.15.1
charset-normalizer==2.1.1
charset-normalizer==3.0.1
click==8.1.3
colorlog==6.7.0
contourpy==1.0.6
contourpy==1.0.7
coreapi==2.3.3
coreschema==0.0.4
coverage==6.5.0
coveralls==3.3.1
cryptography==36.0.2
cycler==0.11.0
Django==4.1.4
Django==4.1.5
django-admin-list-filter-dropdown==1.0.3
django-admin-rangefilter==0.9.0
django-autocomplete-light==3.9.4
Expand All @@ -25,7 +25,7 @@ django-js-asset==2.0.0
django-mptt==0.14.0
django-polymorphic==3.1.0
django-reversion==5.0.4
django-storages==1.13.1
django-storages==1.13.2
django-structlog==4.0.1
django-test-without-migrations==0.6
djangorestframework==3.14.0
Expand All @@ -38,23 +38,23 @@ globus-cli==3.10.1
globus-sdk==3.15.0
iblutil==1.4.0
idna==3.4
importlib-metadata==5.1.0
importlib-metadata==6.0.0
itypes==1.2.0
Jinja2==3.1.2
jmespath==1.0.1
kiwisolver==1.4.4
llvmlite==0.39.1
lxml==4.9.1
lxml==4.9.2
Markdown==3.4.1
MarkupSafe==2.1.1
matplotlib==3.6.2
MarkupSafe==2.1.2
matplotlib==3.6.3
mccabe==0.7.0
numba==0.56.4
numpy==1.23.5
ONE-api==1.16.2
packaging==21.3
ONE-api==1.18.0
packaging==23.0
pandas==1.5.2
Pillow==9.3.0
Pillow==9.4.0
psycopg2-binary==2.9.5
pyarrow==10.0.1
pycodestyle==2.10.0
Expand All @@ -64,16 +64,16 @@ PyJWT==2.6.0
pyparsing==3.0.9
python-dateutil==2.8.2
python-magic==0.4.27
pytz==2022.6
pytz==2022.7.1
PyYAML==6.0
requests==2.28.1
requests==2.28.2
s3transfer==0.6.0
six==1.16.0
sqlparse==0.4.3
structlog==22.3.0
tqdm==4.64.1
typing_extensions==4.4.0
uritemplate==4.1.1
urllib3==1.26.13
urllib3==1.26.14
webdavclient3==3.14.6
zipp==3.11.0
4 changes: 2 additions & 2 deletions scripts/oneoff/2019-08-30-patch_register.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,14 @@
fr_server = FileRecord.objects.filter(dataset__session__in=sessions,
data_repository__globus_is_personal=False)

assert(fr_local.count() == fr_server.count())
assert fr_local.count() == fr_server.count()

fr_server.update(exists=False)
files_repos_save = fr_local.values_list('id', 'data_repository')
fr_local.update()

repo = fr_local.values_list('data_repository', flat=True).distinct()
assert(repo.count() == 1)
assert repo.count() == 1
repo = DataRepository.objects.get(id=repo[0])

globus_id_save = repo.globus_endpoint_id
Expand Down
Loading

0 comments on commit b118cc0

Please sign in to comment.