From a211a4f270db6b751d101e5967de10cd63ef0c6b Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 9 Dec 2022 08:20:53 +0000 Subject: [PATCH 1/8] Bump certifi from 2022.9.24 to 2022.12.7 Bumps [certifi](https://github.com/certifi/python-certifi) from 2022.9.24 to 2022.12.7. - [Release notes](https://github.com/certifi/python-certifi/releases) - [Commits](https://github.com/certifi/python-certifi/compare/2022.09.24...2022.12.07) --- updated-dependencies: - dependency-name: certifi dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- requirements_frozen.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements_frozen.txt b/requirements_frozen.txt index aceb9e55a..981b70500 100644 --- a/requirements_frozen.txt +++ b/requirements_frozen.txt @@ -2,7 +2,7 @@ asgiref==3.5.2 backports.zoneinfo==0.2.1 boto3==1.26.24 botocore==1.29.24 -certifi==2022.9.24 +certifi==2022.12.7 cffi==1.15.1 charset-normalizer==2.1.1 click==8.1.3 From 5142bff0e37d3cddc120e70476714618cc15050a Mon Sep 17 00:00:00 2001 From: Miles Wells Date: Tue, 3 Jan 2023 16:04:42 +0200 Subject: [PATCH 2/8] Exclude datasets with no associated session from cache tables --- alyx/misc/management/commands/one_cache.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/alyx/misc/management/commands/one_cache.py b/alyx/misc/management/commands/one_cache.py index c01351bc4..65da21978 100644 --- a/alyx/misc/management/commands/one_cache.py +++ b/alyx/misc/management/commands/one_cache.py @@ -381,9 +381,9 @@ def generate_datasets_frame(int_id=True, tags=None) -> pd.DataFrame: if tags: kw = {'tags__name__in' if not isinstance(tags, str) else 'tags__name': tags} ds = ds.prefetch_related('tag').filter(**kw) - # Filter out datasets that do not exist on either repository + # Filter out datasets that do not exist on either repository or have no associated session ds = ds.annotate(exists_flatiron=Exists(on_flatiron), exists_aws=Exists(on_aws)) - ds = ds.filter(Q(exists_flatiron=True) | Q(exists_aws=True)) + ds = ds.filter(Q(exists_flatiron=True) | Q(exists_aws=True), session__isnull=False) # fields to keep from Dataset table fields = ( From 99f317e7d902600cdc815f3ecfe63046125a26dc Mon Sep 17 00:00:00 2001 From: github-actions Date: Tue, 3 Jan 2023 14:10:54 +0000 Subject: [PATCH 3/8] GitHub Actions generated requirements_frozen.txt --- requirements_frozen.txt | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/requirements_frozen.txt b/requirements_frozen.txt index 9c2cf2198..bf92f6697 100644 --- a/requirements_frozen.txt +++ b/requirements_frozen.txt @@ -1,8 +1,8 @@ -asgiref==3.5.2 +asgiref==3.6.0 backports.zoneinfo==0.2.1 -boto3==1.26.16 -botocore==1.29.16 -certifi==2022.9.24 +boto3==1.26.41 +botocore==1.29.41 +certifi==2022.12.7 cffi==1.15.1 charset-normalizer==2.1.1 click==8.1.3 @@ -14,7 +14,7 @@ coverage==6.5.0 coveralls==3.3.1 cryptography==36.0.2 cycler==0.11.0 -Django==4.1.3 +Django==4.1.5 django-admin-list-filter-dropdown==1.0.3 django-admin-rangefilter==0.9.0 django-autocomplete-light==3.9.4 @@ -25,7 +25,7 @@ django-js-asset==2.0.0 django-mptt==0.14.0 django-polymorphic==3.1.0 django-reversion==5.0.4 -django-storages==1.13.1 +django-storages==1.13.2 django-structlog==4.0.1 django-test-without-migrations==0.6 djangorestframework==3.14.0 @@ -34,27 +34,27 @@ docutils==0.19 drfdocs==0.0.11 flake8==6.0.0 fonttools==4.38.0 -globus-cli==3.9.0 -globus-sdk==3.13.0 +globus-cli==3.10.1 +globus-sdk==3.15.0 iblutil==1.4.0 idna==3.4 -importlib-metadata==5.1.0 +importlib-metadata==6.0.0 itypes==1.2.0 Jinja2==3.1.2 -jmespath==0.10.0 +jmespath==1.0.1 kiwisolver==1.4.4 llvmlite==0.39.1 -lxml==4.9.1 +lxml==4.9.2 Markdown==3.4.1 MarkupSafe==2.1.1 matplotlib==3.6.2 mccabe==0.7.0 numba==0.56.4 numpy==1.23.5 -ONE-api==1.16.2 -packaging==21.3 +ONE-api==1.16.3 +packaging==22.0 pandas==1.5.2 -Pillow==9.3.0 +Pillow==9.4.0 psycopg2-binary==2.9.5 pyarrow==10.0.1 pycodestyle==2.10.0 @@ -64,7 +64,7 @@ PyJWT==2.6.0 pyparsing==3.0.9 python-dateutil==2.8.2 python-magic==0.4.27 -pytz==2022.6 +pytz==2022.7 PyYAML==6.0 requests==2.28.1 s3transfer==0.6.0 @@ -76,4 +76,4 @@ typing_extensions==4.4.0 uritemplate==4.1.1 urllib3==1.26.13 webdavclient3==3.14.6 -zipp==3.10.0 +zipp==3.11.0 From 110930b033a8ad3d0c2fde848dcb1fb531f9f8cc Mon Sep 17 00:00:00 2001 From: Miles Wells Date: Fri, 6 Jan 2023 14:46:48 +0200 Subject: [PATCH 4/8] QuerySet manager checks for protected tags before delete --- README.md | 5 +++-- alyx/data/models.py | 28 ++++++++++++++++++++++++++-- alyx/data/tests.py | 24 +++++++++++++++++++++++- alyx/data/tests_rest.py | 13 +++++++++++++ alyx/data/views.py | 12 ++++++++++++ 5 files changed, 77 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index b35906e0e..4a1a58a3f 100644 --- a/README.md +++ b/README.md @@ -25,8 +25,9 @@ this setup will work on other systems. Assumptions made are that you have sudo p * alyx-dev is sync with the **dev** branch * Migrations files are provided by the repository * Continuous integration is setup, to run tests locally: - - `./manage.py test -n` test without migrations (faster) - - `./manage.py test` test with migrations (recommended if model changes) + - `./manage.py test -n` test without migrations (faster) + - `./manage.py test` test with migrations (recommended if model changes) + - NB: When running tests ensure `DEBUG = True` in the settings.py file (specifically `SECURE_SSL_REDIRECT = True` causes REST tests to fail) ``` $ /manage.py test -n diff --git a/alyx/data/models.py b/alyx/data/models.py index 46192bbd7..d1c2220f5 100644 --- a/alyx/data/models.py +++ b/alyx/data/models.py @@ -1,10 +1,14 @@ +import structlog + from django.core.validators import RegexValidator from django.db import models from django.utils import timezone from alyx.settings import TIME_ZONE, AUTH_USER_MODEL from actions.models import Session -from alyx.base import BaseModel, modify_fields, BaseManager, CharNullField +from alyx.base import BaseModel, modify_fields, BaseManager, CharNullField, BaseQuerySet + +logger = structlog.get_logger(__name__) def _related_string(field): @@ -252,9 +256,22 @@ def __str__(self): return "" % self.name +class DatasetQuerySet(BaseQuerySet): + """A Queryset that checks for protected datasets before deletion""" + + def delete(self, force=False): + if (protected := self.filter(tags__protected=True)).exists(): + if force: + logger.warning('The following protected datasets will be deleted:\n%s', + '\n'.join(map(str, protected.values_list('name', 'session_id')))) + else: + raise models.ProtectedError(f'{protected.count()} dataset(s) protected.', protected) + super().delete() + + class DatasetManager(BaseManager): def get_queryset(self): - qs = super(DatasetManager, self).get_queryset() + qs = DatasetQuerySet(self.model, using=self._db) qs = qs.select_related('dataset_type', 'data_format') return qs @@ -361,6 +378,13 @@ def save(self, *args, **kwargs): if len(pis): self.probe_insertion.set(pis.values_list('pk', flat=True)) + def delete(self, *args, force=False, **kwargs): + # If a dataset is protected and force=False, raise an exception + # NB This is not called when bulk deleting or in cascading deletes + if self.is_protected and not force: + raise models.ProtectedError(f'Dataset {self.name} is protected.', self) + super().delete(*args, **kwargs) + # Files # ------------------------------------------------------------------------------------------------ diff --git a/alyx/data/tests.py b/alyx/data/tests.py index 5eff5d4e1..909311785 100644 --- a/alyx/data/tests.py +++ b/alyx/data/tests.py @@ -1,16 +1,37 @@ from django.test import TestCase +from django.db import transaction from django.db.utils import IntegrityError -from data.models import Dataset, DatasetType +from django.db.models import ProtectedError + +from data.models import Dataset, DatasetType, Tag from data.transfers import get_dataset_type class TestModel(TestCase): def test_model_methods(self): (dset, _) = Dataset.objects.get_or_create(name='toto.npy') + assert dset.is_online is False assert dset.is_public is False assert dset.is_protected is False + def test_delete(self): + (dset, _) = Dataset.objects.get_or_create(name='foo.npy') + (tag, _) = Tag.objects.get_or_create(name='protected_tag', protected=True) + dset.tags.set([tag]) + assert dset.is_protected is True + + # Individual object delete + with transaction.atomic(): + self.assertRaises(ProtectedError, dset.delete) + + # As queryset + qs = Dataset.objects.filter(tags__name='protected_tag') + with transaction.atomic(): + self.assertRaises(ProtectedError, qs.delete) + with self.assertLogs('data.models', 'WARNING'): + qs.delete(force=True) + class TestDatasetTypeModel(TestCase): def test_model_methods(self): @@ -36,3 +57,4 @@ def test_model_methods(self): for filename, dataname in filename_typename: with self.subTest(filename=filename): self.assertEqual(get_dataset_type(filename).name, dataname) + diff --git a/alyx/data/tests_rest.py b/alyx/data/tests_rest.py index 24a02d550..fcce55ac9 100644 --- a/alyx/data/tests_rest.py +++ b/alyx/data/tests_rest.py @@ -184,6 +184,19 @@ def test_dataset(self): self.ar(r, 201) self.assertEqual(r.data['default_dataset'], False) + # Create protected tag and dataset + r = self.ar(self.post(reverse('tag-list'), {'name': 'foo_tag', 'protected': True}), 201) + data = {'name': 'foo.bar', 'dataset_type': 'dst', 'created_by': 'test', + 'data_format': 'df', 'date': '2018-01-01', 'number': 2, 'subject': self.subject, + 'tags': [r['name']]} + + r = self.ar(self.post(reverse('dataset-list'), data), 201) + did = r['url'].split('/')[-1] + + # Now attempt to delete the protected dataset + r = self.client.delete(reverse('dataset-detail', args=[did]), data) + self.assertRegex(self.ar(r, 403), 'protected') + def test_dataset_date_filter(self): # create 2 datasets with different dates data = { diff --git a/alyx/data/views.py b/alyx/data/views.py index e1d98fe4b..841faf17d 100644 --- a/alyx/data/views.py +++ b/alyx/data/views.py @@ -2,6 +2,7 @@ import re from django.contrib.auth import get_user_model +from django.db import models from rest_framework import generics, viewsets, mixins, serializers from rest_framework.response import Response import django_filters @@ -226,6 +227,17 @@ class DatasetDetail(generics.RetrieveUpdateDestroyAPIView): serializer_class = DatasetSerializer permission_classes = rest_permission_classes() + def delete(self, request, *args, **kwargs): + try: + return super().delete(request, *args, **kwargs) + except models.ProtectedError as e: + # Return Forbidden response with dataset name and list of protected tags associated + tags = e.protected_objects.tags.filter(protected=True).values_list('name', flat=True) + tags_str = '"' + '", "'.join(tags) + '"' + return Response( + f'Dataset {e.protected_objects.name} is protected by the tag(s) {tags_str}', 403 + ) + # FileRecord # ------------------------------------------------------------------------------------------------ From 0c31a965078d39f88115c858dcd1d2eacf3434b7 Mon Sep 17 00:00:00 2001 From: Miles Wells Date: Fri, 6 Jan 2023 16:43:13 +0200 Subject: [PATCH 5/8] Admin error messages --- alyx/data/admin.py | 19 +++++++++++++++++-- alyx/data/models.py | 13 +++++++++++-- alyx/data/tests.py | 1 - alyx/data/views.py | 7 ++----- 4 files changed, 30 insertions(+), 10 deletions(-) diff --git a/alyx/data/admin.py b/alyx/data/admin.py index e214b1de2..e2e9df01a 100644 --- a/alyx/data/admin.py +++ b/alyx/data/admin.py @@ -1,5 +1,5 @@ -from django.db.models import Count -from django.contrib import admin +from django.db.models import Count, ProtectedError +from django.contrib import admin, messages from django.utils.html import format_html from django_admin_listfilter_dropdown.filters import RelatedDropdownFilter from rangefilter.filter import DateRangeFilter @@ -135,6 +135,21 @@ def _public(self, obj): _public.short_description = 'Public' _public.boolean = True + def delete_queryset(self, request, queryset): + try: + queryset.delete() + except ProtectedError as e: + err_msg = e.args[0] if e.args else 'One or more dataset(s) protected' + self.message_user(request, err_msg, level=messages.ERROR) + + def delete_model(self, request, obj): + try: + obj.delete() + except ProtectedError as e: + # FIXME This still shows the successful message which is confusing for users + err_msg = e.args[0] if e.args else f'Dataset {obj.name} is protected' + self.message_user(request, err_msg, level=messages.ERROR) + class FileRecordAdmin(BaseAdmin): fields = ('relative_path', 'data_repository', 'dataset', 'exists') diff --git a/alyx/data/models.py b/alyx/data/models.py index d1c2220f5..31d31d656 100644 --- a/alyx/data/models.py +++ b/alyx/data/models.py @@ -265,7 +265,12 @@ def delete(self, force=False): logger.warning('The following protected datasets will be deleted:\n%s', '\n'.join(map(str, protected.values_list('name', 'session_id')))) else: - raise models.ProtectedError(f'{protected.count()} dataset(s) protected.', protected) + logger.error( + 'The following protected datasets cannot be deleted without force=True:\n%s', + '\n'.join(map(str, protected.values_list('name', 'session_id')))) + raise models.ProtectedError( + f'Failed to delete {protected.count()} dataset(s) due to protected tags', + protected) super().delete() @@ -382,7 +387,11 @@ def delete(self, *args, force=False, **kwargs): # If a dataset is protected and force=False, raise an exception # NB This is not called when bulk deleting or in cascading deletes if self.is_protected and not force: - raise models.ProtectedError(f'Dataset {self.name} is protected.', self) + tags = self.tags.filter(protected=True).values_list('name', flat=True) + tags_str = '"' + '", "'.join(tags) + '"' + logger.error(f'Dataset {self.name} is protected by tag(s); use force=True.') + raise models.ProtectedError( + f'Failed to delete dataset {self.name} due to protected tag(s) {tags_str}', self) super().delete(*args, **kwargs) diff --git a/alyx/data/tests.py b/alyx/data/tests.py index 909311785..2b5e4ed1a 100644 --- a/alyx/data/tests.py +++ b/alyx/data/tests.py @@ -57,4 +57,3 @@ def test_model_methods(self): for filename, dataname in filename_typename: with self.subTest(filename=filename): self.assertEqual(get_dataset_type(filename).name, dataname) - diff --git a/alyx/data/views.py b/alyx/data/views.py index 841faf17d..fd2f412b3 100644 --- a/alyx/data/views.py +++ b/alyx/data/views.py @@ -232,11 +232,8 @@ def delete(self, request, *args, **kwargs): return super().delete(request, *args, **kwargs) except models.ProtectedError as e: # Return Forbidden response with dataset name and list of protected tags associated - tags = e.protected_objects.tags.filter(protected=True).values_list('name', flat=True) - tags_str = '"' + '", "'.join(tags) + '"' - return Response( - f'Dataset {e.protected_objects.name} is protected by the tag(s) {tags_str}', 403 - ) + err_msg, _ = e.args + return Response(e.args[0], 403) # FileRecord From 23ffe1bc9e268fcb0659d5d48afd6ec376967546 Mon Sep 17 00:00:00 2001 From: Miles Wells Date: Wed, 18 Jan 2023 15:57:14 +0200 Subject: [PATCH 6/8] get_dataset_type moved to ONE --- alyx/data/management/commands/files.py | 3 +- alyx/data/tests.py | 4 +- alyx/data/transfers.py | 32 +-------- requirements.txt | 2 +- scripts/oneoff/2019-08-30-patch_register.py | 4 +- scripts/sync_ucl/prune_cortexlab.py | 78 +++++++++++++++------ 6 files changed, 66 insertions(+), 57 deletions(-) diff --git a/alyx/data/management/commands/files.py b/alyx/data/management/commands/files.py index 45094ff71..fed7123cd 100644 --- a/alyx/data/management/commands/files.py +++ b/alyx/data/management/commands/files.py @@ -1,4 +1,5 @@ import logging + from django.core.management import BaseCommand from django.db.models import Count, Q @@ -195,7 +196,7 @@ def handle(self, *args, **options): dt = None for d in FileRecord.objects.all().select_related('dataset'): try: - dt = transfers.get_dataset_type(d.relative_path, qs=qs) + dt = transfers.get_dataset_type(d.relative_path, qs) except ValueError: dt = None continue diff --git a/alyx/data/tests.py b/alyx/data/tests.py index 2b5e4ed1a..cdc33bed3 100644 --- a/alyx/data/tests.py +++ b/alyx/data/tests.py @@ -54,6 +54,8 @@ def test_model_methods(self): ('bar.baz.ext', 'bar.baz'), ('some_file.ext', 'some_file') ) + + dtypes = DatasetType.objects.all() for filename, dataname in filename_typename: with self.subTest(filename=filename): - self.assertEqual(get_dataset_type(filename).name, dataname) + self.assertEqual(get_dataset_type(filename, dtypes).name, dataname) diff --git a/alyx/data/transfers.py b/alyx/data/transfers.py index 9abfcc8eb..654f5a2fa 100644 --- a/alyx/data/transfers.py +++ b/alyx/data/transfers.py @@ -5,13 +5,12 @@ import re import time from pathlib import Path -from fnmatch import fnmatch from django.db.models import Case, When, Count, Q, F import globus_sdk import numpy as np -from one.alf.files import filename_parts, add_uuid_string -from one.alf.spec import is_valid +from one.alf.files import add_uuid_string +from one.registration import get_dataset_type from alyx import settings from data.models import FileRecord, Dataset, DatasetType, DataFormat, DataRepository @@ -170,31 +169,6 @@ def globus_file_exists(file_record): return False -def get_dataset_type(filename, qs=None): - """Get the dataset type from a given filename""" - dataset_types = [] - for dt in qs or DatasetType.objects.all(): - if not dt.filename_pattern.strip(): - # If the filename pattern is null, check whether the filename object.attribute matches - # the dataset type name. - if is_valid(filename): - obj_attr = '.'.join(filename_parts(filename)[1:3]) - else: # will match name against filename sans extension - obj_attr = op.splitext(filename)[0] - if dt.name == obj_attr: - dataset_types.append(dt) - # Check whether pattern matches filename - elif fnmatch(op.basename(filename).lower(), dt.filename_pattern.lower()): - dataset_types.append(dt) - n = len(dataset_types) - if n == 0: - raise ValueError("No dataset type found for filename `%s`" % filename) - elif n >= 2: - raise ValueError("Multiple matching dataset types found for filename `%s`: %s" % ( - filename, ', '.join(map(str, dataset_types)))) - return dataset_types[0] - - def get_data_format(filename): file_extension = op.splitext(filename)[-1] # This raises an error if there is 0 or 2+ matching data formats. @@ -275,7 +249,7 @@ def _create_dataset_file_records( assert session is not None revision_name = f'#{revision.name}#' if revision else '' relative_path = op.join(rel_dir_path, collection or '', revision_name, filename) - dataset_type = get_dataset_type(filename) + dataset_type = get_dataset_type(filename, DatasetType.objects.all()) data_format = get_data_format(filename) assert dataset_type assert data_format diff --git a/requirements.txt b/requirements.txt index af32feaec..76243b17b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -28,4 +28,4 @@ python-magic pytz structlog>=21.5.0 webdavclient3 -ONE-api>=1.13.0 +ONE-api>=1.18.0 diff --git a/scripts/oneoff/2019-08-30-patch_register.py b/scripts/oneoff/2019-08-30-patch_register.py index 729972a7a..f39059a5e 100644 --- a/scripts/oneoff/2019-08-30-patch_register.py +++ b/scripts/oneoff/2019-08-30-patch_register.py @@ -13,14 +13,14 @@ fr_server = FileRecord.objects.filter(dataset__session__in=sessions, data_repository__globus_is_personal=False) -assert(fr_local.count() == fr_server.count()) +assert fr_local.count() == fr_server.count() fr_server.update(exists=False) files_repos_save = fr_local.values_list('id', 'data_repository') fr_local.update() repo = fr_local.values_list('data_repository', flat=True).distinct() -assert(repo.count() == 1) +assert repo.count() == 1 repo = DataRepository.objects.get(id=repo[0]) globus_id_save = repo.globus_endpoint_id diff --git a/scripts/sync_ucl/prune_cortexlab.py b/scripts/sync_ucl/prune_cortexlab.py index d23174837..8d4e346fd 100755 --- a/scripts/sync_ucl/prune_cortexlab.py +++ b/scripts/sync_ucl/prune_cortexlab.py @@ -2,6 +2,8 @@ import numpy as np from django.core.management import call_command +from django.db.models import CharField +from django.db.models.functions import Concat from subjects.models import Subject, Project, SubjectRequest from actions.models import Session, Surgery, NotificationRule, Notification @@ -131,7 +133,8 @@ In this case we remove the offending datasets from IBL: the UCL version always has priority (at some point using pandas might be much easier and legible) """ -# Here we are looking for duplicated that DO NOT have the same primary key, but the same session, collection, name and revision +# Here we are looking for duplicated that DO NOT have the same primary key, but the same session, +# collection, name and revision. # Find the datasets that only exist in the IBL database, load session, collection and name pk2check = ids_pk.difference(cds_pk) ibl_datasets = Dataset.objects.filter(pk__in=pk2check) @@ -159,14 +162,24 @@ set_cortex_lab_only = cds_pk.difference(ids_pk) set_ibl_only = ids_pk.difference(cds_pk) # get the interection querysets -cqs = Dataset.objects.using('cortexlab').exclude(pk__in=set_cortex_lab_only).order_by('pk').values_list(*dfields) -iqs = Dataset.objects.filter(session__lab__name='cortexlab').exclude(pk__in=set_ibl_only).order_by('pk').values_list(*dfields) +cqs = (Dataset + .objects + .using('cortexlab') + .exclude(pk__in=set_cortex_lab_only) + .order_by('pk') + .values_list(*dfields)) +iqs = (Dataset + .objects + .filter(session__lab__name='cortexlab') + .exclude(pk__in=set_ibl_only) + .order_by('pk') + .values_list(*dfields)) # manual check but this is expensive # assert len(set(iqs).difference(set(cqs))) == len(set(cqs).difference(set(iqs))) -# this is the set of pks for which there is a md5 mismatch - for all the others, do not import anything by deleting -# many datasets from the cortexlab database +# this is the set of pks for which there is a md5 mismatch - for all the others, do not import +# anything by deleting many datasets from the cortexlab database dpk = [s[0] for s in set(cds).difference(set(ids))] Dataset.objects.using('cortexlab').exclude(pk__in=set_cortex_lab_only.union(dpk)).delete() @@ -177,25 +190,25 @@ ti = np.array(iqs_md5.values_list('auto_datetime', flat=True)).astype(np.datetime64) tc = np.array(cqs_md5.values_list('auto_datetime', flat=True)).astype(np.datetime64) -# those are the indices where the autodatetiem from IBL is posterior to cortexlab - do not import by deleting the datasets -# from the cortexlab database +# those are the indices where the autodatetime from IBL is posterior to cortexlab - do not import +# by deleting the datasets from the cortexlab database ind_ibl = np.where(ti >= tc)[0] pk2remove = list(np.array(iqs_md5.values_list('pk', flat=True))[ind_ibl]) Dataset.objects.using('cortexlab').filter(pk__in=pk2remove).delete() -# for those that will imported from UCL, set the filerecord status to exist=False fr the local server fierecords +# for those that will imported from UCL, set the file record status to exist=False fr the local +# server file records ind_ucl = np.where(tc > ti)[0] pk2import = list(np.array(iqs_md5.values_list('pk', flat=True))[ind_ucl]) FileRecord.objects.filter(dataset__in=pk2import).update(exists=False, json=None) """ -Sync the tasks 1/2: For DLC tasks there might be duplicates, as we sometimes run them as batch on remote servers. +Sync the tasks 1/2: For DLC tasks there might be duplicates, as we sometimes run them as batch on +remote servers. For those import the cortexlab tasks unless there is a NEWER version in the ibl database """ task_names_to_check = ['TrainingDLC', 'EphysDLC'] dfields = ('session_id', 'name', 'arguments') -from django.db.models import CharField, Value -from django.db.models.functions import Concat # remove duplicates from cortexlab if any qs_cortex = Task.objects.using('cortexlab').filter(name__in=task_names_to_check) qs_cortex = qs_cortex.distinct(*dfields) @@ -206,15 +219,18 @@ qs_ibl = Task.objects.filter(session__lab__name='cortexlab').filter(name__in=task_names_to_check) qs_ibl = qs_ibl.annotate(eid_name_args=Concat(*dfields, output_field=CharField())) qs_cortex = qs_cortex.annotate(eid_name_args=Concat(*dfields, output_field=CharField())) -eid_name_args = set(qs_cortex.values_list('eid_name_args')).intersection(qs_cortex.values_list('eid_name_args')) +eid_name_args = (set(qs_cortex.values_list('eid_name_args')) + .intersection(qs_cortex.values_list('eid_name_args'))) dlc_cortex = qs_cortex.filter(eid_name_args__in=eid_name_args).order_by('eid_name_args') -dlc_ibl = qs_ibl.filter(name__in=task_names_to_check, eid_name_args__in=eid_name_args).order_by('eid_name_args') - +dlc_ibl = (qs_ibl + .filter(name__in=task_names_to_check, eid_name_args__in=eid_name_args) + .order_by('eid_name_args')) times_cortex = np.array(dlc_cortex.values_list('datetime', flat=True)).astype(np.datetime64) times_ibl = np.array(dlc_ibl.values_list('datetime', flat=True)).astype(np.datetime64) -# Indices where datetime from IBL is newer than cortexlab -- do not import by deleting the datasets from cortexlab db +# Indices where datetime from IBL is newer than cortexlab -- do not import by deleting the datasets +# from cortexlab db # Indices where datetime from IBL is older than cortexlab -- delete from ibl db keep_ibl = np.where(times_ibl >= times_cortex)[0] keep_cortex = np.where(times_ibl < times_cortex)[0] @@ -224,21 +240,34 @@ Task.objects.filter(pk__in=pk_del_ibl, name__in=task_names_to_check).delete() """ -Sync the tasks 2/2: For all other tasks, make sure there are no duplicate tasks with different ids that have been made -on IBL and cortex lab database. In the case of duplicates cortex lab database are kept and IBL deleted +Sync the tasks 2/2: For all other tasks, make sure there are no duplicate tasks with different ids +that have been made on IBL and cortex lab database. In the case of duplicates cortex lab database +are kept and IBL deleted """ task_names_to_exclude = ['TrainingDLC', 'EphysDLC'] -cortex_eids = Task.objects.using('cortexlab').exclude(name__in=task_names_to_exclude).values_list('session', flat=True) +cortex_eids = (Task + .objects + .using('cortexlab') + .exclude(name__in=task_names_to_exclude) + .values_list('session', flat=True)) ibl_eids = Task.objects.all().filter(session__lab__name='cortexlab').exclude( name__in=task_names_to_exclude).values_list('session', flat=True) # finds eids that have tasks on both ibl and cortex lab database overlap_eids = set(cortex_eids).intersection(ibl_eids) dfields = ('id', 'name', 'session') -task_cortex = Task.objects.using('cortexlab').filter(session__in=overlap_eids).exclude(name__in=task_names_to_exclude) +task_cortex = (Task + .objects + .using('cortexlab') + .filter(session__in=overlap_eids) + .exclude(name__in=task_names_to_exclude)) cids = task_cortex.values_list(*dfields) -task_ibl = Task.objects.all().filter(session__in=overlap_eids).exclude(name__in=task_names_to_exclude) +task_ibl = (Task + .objects + .all() + .filter(session__in=overlap_eids) + .exclude(name__in=task_names_to_exclude)) ids = task_ibl.values_list(*dfields) # find the tasks that are not common to both @@ -253,12 +282,15 @@ ts.delete() """ -Sync the notes. When a note is updated (in the behaviour criteria tracking) it is deleted and created anew. -The problem is this will create many duplicates on the IBL side after import. +Sync the notes. When a note is updated (in the behaviour criteria tracking) it is deleted and +created anew. The problem is this will create many duplicates on the IBL side after import. Here we look for all of the notes that are present on IBL and remove those that are not in UCL """ ibl_notes = Note.objects.filter(object_id__in=Subject.objects.filter(lab=CORTEX_LAB_PK)) -ucl_notes = Note.objects.using('cortexlab').filter(object_id__in=Subject.objects.filter(lab=CORTEX_LAB_PK)) +ucl_notes = (Note + .objects + .using('cortexlab') + .filter(object_id__in=Subject.objects.filter(lab=CORTEX_LAB_PK))) ibl_notes.exclude(pk__in=list(ucl_notes.values_list('pk', flat=True))).count() """ From 096d85a432f6cb552ccaa2c3803987c3d9b39d05 Mon Sep 17 00:00:00 2001 From: github-actions Date: Wed, 18 Jan 2023 15:24:42 +0000 Subject: [PATCH 7/8] GitHub Actions generated requirements_frozen.txt --- requirements_frozen.txt | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/requirements_frozen.txt b/requirements_frozen.txt index bf92f6697..5c30cc697 100644 --- a/requirements_frozen.txt +++ b/requirements_frozen.txt @@ -1,13 +1,13 @@ asgiref==3.6.0 backports.zoneinfo==0.2.1 -boto3==1.26.41 -botocore==1.29.41 +boto3==1.26.51 +botocore==1.29.51 certifi==2022.12.7 cffi==1.15.1 -charset-normalizer==2.1.1 +charset-normalizer==3.0.1 click==8.1.3 colorlog==6.7.0 -contourpy==1.0.6 +contourpy==1.0.7 coreapi==2.3.3 coreschema==0.0.4 coverage==6.5.0 @@ -46,13 +46,13 @@ kiwisolver==1.4.4 llvmlite==0.39.1 lxml==4.9.2 Markdown==3.4.1 -MarkupSafe==2.1.1 -matplotlib==3.6.2 +MarkupSafe==2.1.2 +matplotlib==3.6.3 mccabe==0.7.0 numba==0.56.4 numpy==1.23.5 -ONE-api==1.16.3 -packaging==22.0 +ONE-api==1.18.0 +packaging==23.0 pandas==1.5.2 Pillow==9.4.0 psycopg2-binary==2.9.5 @@ -64,9 +64,9 @@ PyJWT==2.6.0 pyparsing==3.0.9 python-dateutil==2.8.2 python-magic==0.4.27 -pytz==2022.7 +pytz==2022.7.1 PyYAML==6.0 -requests==2.28.1 +requests==2.28.2 s3transfer==0.6.0 six==1.16.0 sqlparse==0.4.3 @@ -74,6 +74,6 @@ structlog==22.3.0 tqdm==4.64.1 typing_extensions==4.4.0 uritemplate==4.1.1 -urllib3==1.26.13 +urllib3==1.26.14 webdavclient3==3.14.6 zipp==3.11.0 From d31f702297227af9d24e953b07453b082134529c Mon Sep 17 00:00:00 2001 From: Miles Wells Date: Wed, 18 Jan 2023 17:31:32 +0200 Subject: [PATCH 8/8] Bump version --- alyx/alyx/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/alyx/alyx/__init__.py b/alyx/alyx/__init__.py index 56684fc62..8310a90dd 100644 --- a/alyx/alyx/__init__.py +++ b/alyx/alyx/__init__.py @@ -1 +1 @@ -VERSION = __version__ = '1.7.0' +VERSION = __version__ = '1.8.0'