From a211a4f270db6b751d101e5967de10cd63ef0c6b Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Fri, 9 Dec 2022 08:20:53 +0000
Subject: [PATCH 1/8] Bump certifi from 2022.9.24 to 2022.12.7

Bumps [certifi](https://github.com/certifi/python-certifi) from 2022.9.24 to 2022.12.7.
- [Release notes](https://github.com/certifi/python-certifi/releases)
- [Commits](https://github.com/certifi/python-certifi/compare/2022.09.24...2022.12.07)

---
updated-dependencies:
- dependency-name: certifi
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 requirements_frozen.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements_frozen.txt b/requirements_frozen.txt
index aceb9e55a..981b70500 100644
--- a/requirements_frozen.txt
+++ b/requirements_frozen.txt
@@ -2,7 +2,7 @@ asgiref==3.5.2
 backports.zoneinfo==0.2.1
 boto3==1.26.24
 botocore==1.29.24
-certifi==2022.9.24
+certifi==2022.12.7
 cffi==1.15.1
 charset-normalizer==2.1.1
 click==8.1.3

From 5142bff0e37d3cddc120e70476714618cc15050a Mon Sep 17 00:00:00 2001
From: Miles Wells <k1o0@5tk.co>
Date: Tue, 3 Jan 2023 16:04:42 +0200
Subject: [PATCH 2/8] Exclude datasets with no associated session from cache
 tables

---
 alyx/misc/management/commands/one_cache.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/alyx/misc/management/commands/one_cache.py b/alyx/misc/management/commands/one_cache.py
index c01351bc4..65da21978 100644
--- a/alyx/misc/management/commands/one_cache.py
+++ b/alyx/misc/management/commands/one_cache.py
@@ -381,9 +381,9 @@ def generate_datasets_frame(int_id=True, tags=None) -> pd.DataFrame:
     if tags:
         kw = {'tags__name__in' if not isinstance(tags, str) else 'tags__name': tags}
         ds = ds.prefetch_related('tag').filter(**kw)
-    # Filter out datasets that do not exist on either repository
+    # Filter out datasets that do not exist on either repository or have no associated session
     ds = ds.annotate(exists_flatiron=Exists(on_flatiron), exists_aws=Exists(on_aws))
-    ds = ds.filter(Q(exists_flatiron=True) | Q(exists_aws=True))
+    ds = ds.filter(Q(exists_flatiron=True) | Q(exists_aws=True), session__isnull=False)
 
     # fields to keep from Dataset table
     fields = (

From 99f317e7d902600cdc815f3ecfe63046125a26dc Mon Sep 17 00:00:00 2001
From: github-actions <github-actions@github.com>
Date: Tue, 3 Jan 2023 14:10:54 +0000
Subject: [PATCH 3/8] GitHub Actions generated requirements_frozen.txt

---
 requirements_frozen.txt | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/requirements_frozen.txt b/requirements_frozen.txt
index 9c2cf2198..bf92f6697 100644
--- a/requirements_frozen.txt
+++ b/requirements_frozen.txt
@@ -1,8 +1,8 @@
-asgiref==3.5.2
+asgiref==3.6.0
 backports.zoneinfo==0.2.1
-boto3==1.26.16
-botocore==1.29.16
-certifi==2022.9.24
+boto3==1.26.41
+botocore==1.29.41
+certifi==2022.12.7
 cffi==1.15.1
 charset-normalizer==2.1.1
 click==8.1.3
@@ -14,7 +14,7 @@ coverage==6.5.0
 coveralls==3.3.1
 cryptography==36.0.2
 cycler==0.11.0
-Django==4.1.3
+Django==4.1.5
 django-admin-list-filter-dropdown==1.0.3
 django-admin-rangefilter==0.9.0
 django-autocomplete-light==3.9.4
@@ -25,7 +25,7 @@ django-js-asset==2.0.0
 django-mptt==0.14.0
 django-polymorphic==3.1.0
 django-reversion==5.0.4
-django-storages==1.13.1
+django-storages==1.13.2
 django-structlog==4.0.1
 django-test-without-migrations==0.6
 djangorestframework==3.14.0
@@ -34,27 +34,27 @@ docutils==0.19
 drfdocs==0.0.11
 flake8==6.0.0
 fonttools==4.38.0
-globus-cli==3.9.0
-globus-sdk==3.13.0
+globus-cli==3.10.1
+globus-sdk==3.15.0
 iblutil==1.4.0
 idna==3.4
-importlib-metadata==5.1.0
+importlib-metadata==6.0.0
 itypes==1.2.0
 Jinja2==3.1.2
-jmespath==0.10.0
+jmespath==1.0.1
 kiwisolver==1.4.4
 llvmlite==0.39.1
-lxml==4.9.1
+lxml==4.9.2
 Markdown==3.4.1
 MarkupSafe==2.1.1
 matplotlib==3.6.2
 mccabe==0.7.0
 numba==0.56.4
 numpy==1.23.5
-ONE-api==1.16.2
-packaging==21.3
+ONE-api==1.16.3
+packaging==22.0
 pandas==1.5.2
-Pillow==9.3.0
+Pillow==9.4.0
 psycopg2-binary==2.9.5
 pyarrow==10.0.1
 pycodestyle==2.10.0
@@ -64,7 +64,7 @@ PyJWT==2.6.0
 pyparsing==3.0.9
 python-dateutil==2.8.2
 python-magic==0.4.27
-pytz==2022.6
+pytz==2022.7
 PyYAML==6.0
 requests==2.28.1
 s3transfer==0.6.0
@@ -76,4 +76,4 @@ typing_extensions==4.4.0
 uritemplate==4.1.1
 urllib3==1.26.13
 webdavclient3==3.14.6
-zipp==3.10.0
+zipp==3.11.0

From 110930b033a8ad3d0c2fde848dcb1fb531f9f8cc Mon Sep 17 00:00:00 2001
From: Miles Wells <k1o0@5tk.co>
Date: Fri, 6 Jan 2023 14:46:48 +0200
Subject: [PATCH 4/8] QuerySet manager checks for protected tags before delete

---
 README.md               |  5 +++--
 alyx/data/models.py     | 28 ++++++++++++++++++++++++++--
 alyx/data/tests.py      | 24 +++++++++++++++++++++++-
 alyx/data/tests_rest.py | 13 +++++++++++++
 alyx/data/views.py      | 12 ++++++++++++
 5 files changed, 77 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index b35906e0e..4a1a58a3f 100644
--- a/README.md
+++ b/README.md
@@ -25,8 +25,9 @@ this setup will work on other systems. Assumptions made are that you have sudo p
 * alyx-dev is sync with the **dev** branch
 * Migrations files are provided by the repository
 * Continuous integration is setup, to run tests locally:
-    -   `./manage.py test -n` test without migrations (faster)
-    -   `./manage.py test` test with migrations (recommended if model changes)
+    - `./manage.py test -n` test without migrations (faster)
+    - `./manage.py test` test with migrations (recommended if model changes)
+    - NB: When running tests ensure `DEBUG = True` in the settings.py file (specifically `SECURE_SSL_REDIRECT = True` causes REST tests to fail)
 
 ```
 $ /manage.py test -n
diff --git a/alyx/data/models.py b/alyx/data/models.py
index 46192bbd7..d1c2220f5 100644
--- a/alyx/data/models.py
+++ b/alyx/data/models.py
@@ -1,10 +1,14 @@
+import structlog
+
 from django.core.validators import RegexValidator
 from django.db import models
 from django.utils import timezone
 
 from alyx.settings import TIME_ZONE, AUTH_USER_MODEL
 from actions.models import Session
-from alyx.base import BaseModel, modify_fields, BaseManager, CharNullField
+from alyx.base import BaseModel, modify_fields, BaseManager, CharNullField, BaseQuerySet
+
+logger = structlog.get_logger(__name__)
 
 
 def _related_string(field):
@@ -252,9 +256,22 @@ def __str__(self):
         return "<Revision %s>" % self.name
 
 
+class DatasetQuerySet(BaseQuerySet):
+    """A Queryset that checks for protected datasets before deletion"""
+
+    def delete(self, force=False):
+        if (protected := self.filter(tags__protected=True)).exists():
+            if force:
+                logger.warning('The following protected datasets will be deleted:\n%s',
+                               '\n'.join(map(str, protected.values_list('name', 'session_id'))))
+            else:
+                raise models.ProtectedError(f'{protected.count()} dataset(s) protected.', protected)
+        super().delete()
+
+
 class DatasetManager(BaseManager):
     def get_queryset(self):
-        qs = super(DatasetManager, self).get_queryset()
+        qs = DatasetQuerySet(self.model, using=self._db)
         qs = qs.select_related('dataset_type', 'data_format')
         return qs
 
@@ -361,6 +378,13 @@ def save(self, *args, **kwargs):
             if len(pis):
                 self.probe_insertion.set(pis.values_list('pk', flat=True))
 
+    def delete(self, *args, force=False, **kwargs):
+        # If a dataset is protected and force=False, raise an exception
+        # NB This is not called when bulk deleting or in cascading deletes
+        if self.is_protected and not force:
+            raise models.ProtectedError(f'Dataset {self.name} is protected.', self)
+        super().delete(*args, **kwargs)
+
 
 # Files
 # ------------------------------------------------------------------------------------------------
diff --git a/alyx/data/tests.py b/alyx/data/tests.py
index 5eff5d4e1..909311785 100644
--- a/alyx/data/tests.py
+++ b/alyx/data/tests.py
@@ -1,16 +1,37 @@
 from django.test import TestCase
+from django.db import transaction
 from django.db.utils import IntegrityError
-from data.models import Dataset, DatasetType
+from django.db.models import ProtectedError
+
+from data.models import Dataset, DatasetType, Tag
 from data.transfers import get_dataset_type
 
 
 class TestModel(TestCase):
     def test_model_methods(self):
         (dset, _) = Dataset.objects.get_or_create(name='toto.npy')
+
         assert dset.is_online is False
         assert dset.is_public is False
         assert dset.is_protected is False
 
+    def test_delete(self):
+        (dset, _) = Dataset.objects.get_or_create(name='foo.npy')
+        (tag, _) = Tag.objects.get_or_create(name='protected_tag', protected=True)
+        dset.tags.set([tag])
+        assert dset.is_protected is True
+
+        # Individual object delete
+        with transaction.atomic():
+            self.assertRaises(ProtectedError, dset.delete)
+
+        # As queryset
+        qs = Dataset.objects.filter(tags__name='protected_tag')
+        with transaction.atomic():
+            self.assertRaises(ProtectedError, qs.delete)
+        with self.assertLogs('data.models', 'WARNING'):
+            qs.delete(force=True)
+
 
 class TestDatasetTypeModel(TestCase):
     def test_model_methods(self):
@@ -36,3 +57,4 @@ def test_model_methods(self):
         for filename, dataname in filename_typename:
             with self.subTest(filename=filename):
                 self.assertEqual(get_dataset_type(filename).name, dataname)
+
diff --git a/alyx/data/tests_rest.py b/alyx/data/tests_rest.py
index 24a02d550..fcce55ac9 100644
--- a/alyx/data/tests_rest.py
+++ b/alyx/data/tests_rest.py
@@ -184,6 +184,19 @@ def test_dataset(self):
         self.ar(r, 201)
         self.assertEqual(r.data['default_dataset'], False)
 
+        # Create protected tag and dataset
+        r = self.ar(self.post(reverse('tag-list'), {'name': 'foo_tag', 'protected': True}), 201)
+        data = {'name': 'foo.bar', 'dataset_type': 'dst', 'created_by': 'test',
+                'data_format': 'df', 'date': '2018-01-01', 'number': 2, 'subject': self.subject,
+                'tags': [r['name']]}
+
+        r = self.ar(self.post(reverse('dataset-list'), data), 201)
+        did = r['url'].split('/')[-1]
+
+        # Now attempt to delete the protected dataset
+        r = self.client.delete(reverse('dataset-detail', args=[did]), data)
+        self.assertRegex(self.ar(r, 403), 'protected')
+
     def test_dataset_date_filter(self):
         # create 2 datasets with different dates
         data = {
diff --git a/alyx/data/views.py b/alyx/data/views.py
index e1d98fe4b..841faf17d 100644
--- a/alyx/data/views.py
+++ b/alyx/data/views.py
@@ -2,6 +2,7 @@
 import re
 
 from django.contrib.auth import get_user_model
+from django.db import models
 from rest_framework import generics, viewsets, mixins, serializers
 from rest_framework.response import Response
 import django_filters
@@ -226,6 +227,17 @@ class DatasetDetail(generics.RetrieveUpdateDestroyAPIView):
     serializer_class = DatasetSerializer
     permission_classes = rest_permission_classes()
 
+    def delete(self, request, *args, **kwargs):
+        try:
+            return super().delete(request, *args, **kwargs)
+        except models.ProtectedError as e:
+            # Return Forbidden response with dataset name and list of protected tags associated
+            tags = e.protected_objects.tags.filter(protected=True).values_list('name', flat=True)
+            tags_str = '"' + '", "'.join(tags) + '"'
+            return Response(
+                f'Dataset {e.protected_objects.name} is protected by the tag(s) {tags_str}', 403
+            )
+
 
 # FileRecord
 # ------------------------------------------------------------------------------------------------

From 0c31a965078d39f88115c858dcd1d2eacf3434b7 Mon Sep 17 00:00:00 2001
From: Miles Wells <k1o0@5tk.co>
Date: Fri, 6 Jan 2023 16:43:13 +0200
Subject: [PATCH 5/8] Admin error messages

---
 alyx/data/admin.py  | 19 +++++++++++++++++--
 alyx/data/models.py | 13 +++++++++++--
 alyx/data/tests.py  |  1 -
 alyx/data/views.py  |  7 ++-----
 4 files changed, 30 insertions(+), 10 deletions(-)

diff --git a/alyx/data/admin.py b/alyx/data/admin.py
index e214b1de2..e2e9df01a 100644
--- a/alyx/data/admin.py
+++ b/alyx/data/admin.py
@@ -1,5 +1,5 @@
-from django.db.models import Count
-from django.contrib import admin
+from django.db.models import Count, ProtectedError
+from django.contrib import admin, messages
 from django.utils.html import format_html
 from django_admin_listfilter_dropdown.filters import RelatedDropdownFilter
 from rangefilter.filter import DateRangeFilter
@@ -135,6 +135,21 @@ def _public(self, obj):
     _public.short_description = 'Public'
     _public.boolean = True
 
+    def delete_queryset(self, request, queryset):
+        try:
+            queryset.delete()
+        except ProtectedError as e:
+            err_msg = e.args[0] if e.args else 'One or more dataset(s) protected'
+            self.message_user(request, err_msg, level=messages.ERROR)
+
+    def delete_model(self, request, obj):
+        try:
+            obj.delete()
+        except ProtectedError as e:
+            # FIXME This still shows the successful message which is confusing for users
+            err_msg = e.args[0] if e.args else f'Dataset {obj.name} is protected'
+            self.message_user(request, err_msg, level=messages.ERROR)
+
 
 class FileRecordAdmin(BaseAdmin):
     fields = ('relative_path', 'data_repository', 'dataset', 'exists')
diff --git a/alyx/data/models.py b/alyx/data/models.py
index d1c2220f5..31d31d656 100644
--- a/alyx/data/models.py
+++ b/alyx/data/models.py
@@ -265,7 +265,12 @@ def delete(self, force=False):
                 logger.warning('The following protected datasets will be deleted:\n%s',
                                '\n'.join(map(str, protected.values_list('name', 'session_id'))))
             else:
-                raise models.ProtectedError(f'{protected.count()} dataset(s) protected.', protected)
+                logger.error(
+                    'The following protected datasets cannot be deleted without force=True:\n%s',
+                    '\n'.join(map(str, protected.values_list('name', 'session_id'))))
+                raise models.ProtectedError(
+                    f'Failed to delete {protected.count()} dataset(s) due to protected tags',
+                    protected)
         super().delete()
 
 
@@ -382,7 +387,11 @@ def delete(self, *args, force=False, **kwargs):
         # If a dataset is protected and force=False, raise an exception
         # NB This is not called when bulk deleting or in cascading deletes
         if self.is_protected and not force:
-            raise models.ProtectedError(f'Dataset {self.name} is protected.', self)
+            tags = self.tags.filter(protected=True).values_list('name', flat=True)
+            tags_str = '"' + '", "'.join(tags) + '"'
+            logger.error(f'Dataset {self.name} is protected by tag(s); use force=True.')
+            raise models.ProtectedError(
+                f'Failed to delete dataset {self.name} due to protected tag(s) {tags_str}', self)
         super().delete(*args, **kwargs)
 
 
diff --git a/alyx/data/tests.py b/alyx/data/tests.py
index 909311785..2b5e4ed1a 100644
--- a/alyx/data/tests.py
+++ b/alyx/data/tests.py
@@ -57,4 +57,3 @@ def test_model_methods(self):
         for filename, dataname in filename_typename:
             with self.subTest(filename=filename):
                 self.assertEqual(get_dataset_type(filename).name, dataname)
-
diff --git a/alyx/data/views.py b/alyx/data/views.py
index 841faf17d..fd2f412b3 100644
--- a/alyx/data/views.py
+++ b/alyx/data/views.py
@@ -232,11 +232,8 @@ def delete(self, request, *args, **kwargs):
             return super().delete(request, *args, **kwargs)
         except models.ProtectedError as e:
             # Return Forbidden response with dataset name and list of protected tags associated
-            tags = e.protected_objects.tags.filter(protected=True).values_list('name', flat=True)
-            tags_str = '"' + '", "'.join(tags) + '"'
-            return Response(
-                f'Dataset {e.protected_objects.name} is protected by the tag(s) {tags_str}', 403
-            )
+            err_msg, _ = e.args
+            return Response(e.args[0], 403)
 
 
 # FileRecord

From 23ffe1bc9e268fcb0659d5d48afd6ec376967546 Mon Sep 17 00:00:00 2001
From: Miles Wells <k1o0@5tk.co>
Date: Wed, 18 Jan 2023 15:57:14 +0200
Subject: [PATCH 6/8] get_dataset_type moved to ONE

---
 alyx/data/management/commands/files.py      |  3 +-
 alyx/data/tests.py                          |  4 +-
 alyx/data/transfers.py                      | 32 +--------
 requirements.txt                            |  2 +-
 scripts/oneoff/2019-08-30-patch_register.py |  4 +-
 scripts/sync_ucl/prune_cortexlab.py         | 78 +++++++++++++++------
 6 files changed, 66 insertions(+), 57 deletions(-)

diff --git a/alyx/data/management/commands/files.py b/alyx/data/management/commands/files.py
index 45094ff71..fed7123cd 100644
--- a/alyx/data/management/commands/files.py
+++ b/alyx/data/management/commands/files.py
@@ -1,4 +1,5 @@
 import logging
+
 from django.core.management import BaseCommand
 from django.db.models import Count, Q
 
@@ -195,7 +196,7 @@ def handle(self, *args, **options):
             dt = None
             for d in FileRecord.objects.all().select_related('dataset'):
                 try:
-                    dt = transfers.get_dataset_type(d.relative_path, qs=qs)
+                    dt = transfers.get_dataset_type(d.relative_path, qs)
                 except ValueError:
                     dt = None
                     continue
diff --git a/alyx/data/tests.py b/alyx/data/tests.py
index 2b5e4ed1a..cdc33bed3 100644
--- a/alyx/data/tests.py
+++ b/alyx/data/tests.py
@@ -54,6 +54,8 @@ def test_model_methods(self):
             ('bar.baz.ext', 'bar.baz'),
             ('some_file.ext', 'some_file')
         )
+
+        dtypes = DatasetType.objects.all()
         for filename, dataname in filename_typename:
             with self.subTest(filename=filename):
-                self.assertEqual(get_dataset_type(filename).name, dataname)
+                self.assertEqual(get_dataset_type(filename, dtypes).name, dataname)
diff --git a/alyx/data/transfers.py b/alyx/data/transfers.py
index 9abfcc8eb..654f5a2fa 100644
--- a/alyx/data/transfers.py
+++ b/alyx/data/transfers.py
@@ -5,13 +5,12 @@
 import re
 import time
 from pathlib import Path
-from fnmatch import fnmatch
 
 from django.db.models import Case, When, Count, Q, F
 import globus_sdk
 import numpy as np
-from one.alf.files import filename_parts, add_uuid_string
-from one.alf.spec import is_valid
+from one.alf.files import add_uuid_string
+from one.registration import get_dataset_type
 
 from alyx import settings
 from data.models import FileRecord, Dataset, DatasetType, DataFormat, DataRepository
@@ -170,31 +169,6 @@ def globus_file_exists(file_record):
     return False
 
 
-def get_dataset_type(filename, qs=None):
-    """Get the dataset type from a given filename"""
-    dataset_types = []
-    for dt in qs or DatasetType.objects.all():
-        if not dt.filename_pattern.strip():
-            # If the filename pattern is null, check whether the filename object.attribute matches
-            # the dataset type name.
-            if is_valid(filename):
-                obj_attr = '.'.join(filename_parts(filename)[1:3])
-            else:  # will match name against filename sans extension
-                obj_attr = op.splitext(filename)[0]
-            if dt.name == obj_attr:
-                dataset_types.append(dt)
-        # Check whether pattern matches filename
-        elif fnmatch(op.basename(filename).lower(), dt.filename_pattern.lower()):
-            dataset_types.append(dt)
-    n = len(dataset_types)
-    if n == 0:
-        raise ValueError("No dataset type found for filename `%s`" % filename)
-    elif n >= 2:
-        raise ValueError("Multiple matching dataset types found for filename `%s`: %s" % (
-            filename, ', '.join(map(str, dataset_types))))
-    return dataset_types[0]
-
-
 def get_data_format(filename):
     file_extension = op.splitext(filename)[-1]
     # This raises an error if there is 0 or 2+ matching data formats.
@@ -275,7 +249,7 @@ def _create_dataset_file_records(
     assert session is not None
     revision_name = f'#{revision.name}#' if revision else ''
     relative_path = op.join(rel_dir_path, collection or '', revision_name, filename)
-    dataset_type = get_dataset_type(filename)
+    dataset_type = get_dataset_type(filename, DatasetType.objects.all())
     data_format = get_data_format(filename)
     assert dataset_type
     assert data_format
diff --git a/requirements.txt b/requirements.txt
index af32feaec..76243b17b 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -28,4 +28,4 @@ python-magic
 pytz
 structlog>=21.5.0
 webdavclient3
-ONE-api>=1.13.0
+ONE-api>=1.18.0
diff --git a/scripts/oneoff/2019-08-30-patch_register.py b/scripts/oneoff/2019-08-30-patch_register.py
index 729972a7a..f39059a5e 100644
--- a/scripts/oneoff/2019-08-30-patch_register.py
+++ b/scripts/oneoff/2019-08-30-patch_register.py
@@ -13,14 +13,14 @@
 fr_server = FileRecord.objects.filter(dataset__session__in=sessions,
                                       data_repository__globus_is_personal=False)
 
-assert(fr_local.count() == fr_server.count())
+assert fr_local.count() == fr_server.count()
 
 fr_server.update(exists=False)
 files_repos_save = fr_local.values_list('id', 'data_repository')
 fr_local.update()
 
 repo = fr_local.values_list('data_repository', flat=True).distinct()
-assert(repo.count() == 1)
+assert repo.count() == 1
 repo = DataRepository.objects.get(id=repo[0])
 
 globus_id_save = repo.globus_endpoint_id
diff --git a/scripts/sync_ucl/prune_cortexlab.py b/scripts/sync_ucl/prune_cortexlab.py
index d23174837..8d4e346fd 100755
--- a/scripts/sync_ucl/prune_cortexlab.py
+++ b/scripts/sync_ucl/prune_cortexlab.py
@@ -2,6 +2,8 @@
 import numpy as np
 
 from django.core.management import call_command
+from django.db.models import CharField
+from django.db.models.functions import Concat
 
 from subjects.models import Subject, Project, SubjectRequest
 from actions.models import Session, Surgery, NotificationRule, Notification
@@ -131,7 +133,8 @@
 In this case we remove the offending datasets from IBL: the UCL version always has priority
 (at some point using pandas might be much easier and legible)
 """
-# Here we are looking for duplicated that DO NOT have the same primary key, but the same session, collection, name and revision
+# Here we are looking for duplicated that DO NOT have the same primary key, but the same session,
+# collection, name and revision.
 # Find the datasets that only exist in the IBL database, load session, collection and name
 pk2check = ids_pk.difference(cds_pk)
 ibl_datasets = Dataset.objects.filter(pk__in=pk2check)
@@ -159,14 +162,24 @@
 set_cortex_lab_only = cds_pk.difference(ids_pk)
 set_ibl_only = ids_pk.difference(cds_pk)
 # get the interection querysets
-cqs = Dataset.objects.using('cortexlab').exclude(pk__in=set_cortex_lab_only).order_by('pk').values_list(*dfields)
-iqs = Dataset.objects.filter(session__lab__name='cortexlab').exclude(pk__in=set_ibl_only).order_by('pk').values_list(*dfields)
+cqs = (Dataset
+       .objects
+       .using('cortexlab')
+       .exclude(pk__in=set_cortex_lab_only)
+       .order_by('pk')
+       .values_list(*dfields))
+iqs = (Dataset
+       .objects
+       .filter(session__lab__name='cortexlab')
+       .exclude(pk__in=set_ibl_only)
+       .order_by('pk')
+       .values_list(*dfields))
 
 # manual check but this is expensive
 # assert len(set(iqs).difference(set(cqs))) == len(set(cqs).difference(set(iqs)))
 
-# this is the set of pks for which there is a md5 mismatch - for all the others, do not import anything by deleting
-# many datasets from the cortexlab database
+# this is the set of pks for which there is a md5 mismatch - for all the others, do not import
+# anything by deleting many datasets from the cortexlab database
 dpk = [s[0] for s in set(cds).difference(set(ids))]
 Dataset.objects.using('cortexlab').exclude(pk__in=set_cortex_lab_only.union(dpk)).delete()
 
@@ -177,25 +190,25 @@
 
 ti = np.array(iqs_md5.values_list('auto_datetime', flat=True)).astype(np.datetime64)
 tc = np.array(cqs_md5.values_list('auto_datetime', flat=True)).astype(np.datetime64)
-# those are the indices where the autodatetiem from IBL is posterior to cortexlab - do not import by deleting the datasets
-# from the cortexlab database
+# those are the indices where the autodatetime from IBL is posterior to cortexlab - do not import
+# by deleting the datasets from the cortexlab database
 ind_ibl = np.where(ti >= tc)[0]
 pk2remove = list(np.array(iqs_md5.values_list('pk', flat=True))[ind_ibl])
 Dataset.objects.using('cortexlab').filter(pk__in=pk2remove).delete()
-# for those that will imported from UCL, set the filerecord status to exist=False fr the local server fierecords
+# for those that will imported from UCL, set the file record status to exist=False fr the local
+# server file records
 ind_ucl = np.where(tc > ti)[0]
 pk2import = list(np.array(iqs_md5.values_list('pk', flat=True))[ind_ucl])
 FileRecord.objects.filter(dataset__in=pk2import).update(exists=False, json=None)
 
 """
-Sync the tasks 1/2: For DLC tasks there might be duplicates, as we sometimes run them as batch on remote servers.
+Sync the tasks 1/2: For DLC tasks there might be duplicates, as we sometimes run them as batch on
+remote servers.
 For those import the cortexlab tasks unless there is a NEWER version in the ibl database
 """
 task_names_to_check = ['TrainingDLC', 'EphysDLC']
 dfields = ('session_id', 'name', 'arguments')
 
-from django.db.models import CharField, Value
-from django.db.models.functions import Concat
 # remove duplicates from cortexlab if any
 qs_cortex = Task.objects.using('cortexlab').filter(name__in=task_names_to_check)
 qs_cortex = qs_cortex.distinct(*dfields)
@@ -206,15 +219,18 @@
 qs_ibl = Task.objects.filter(session__lab__name='cortexlab').filter(name__in=task_names_to_check)
 qs_ibl = qs_ibl.annotate(eid_name_args=Concat(*dfields, output_field=CharField()))
 qs_cortex = qs_cortex.annotate(eid_name_args=Concat(*dfields, output_field=CharField()))
-eid_name_args = set(qs_cortex.values_list('eid_name_args')).intersection(qs_cortex.values_list('eid_name_args'))
+eid_name_args = (set(qs_cortex.values_list('eid_name_args'))
+                 .intersection(qs_cortex.values_list('eid_name_args')))
 
 dlc_cortex = qs_cortex.filter(eid_name_args__in=eid_name_args).order_by('eid_name_args')
-dlc_ibl = qs_ibl.filter(name__in=task_names_to_check, eid_name_args__in=eid_name_args).order_by('eid_name_args')
-
+dlc_ibl = (qs_ibl
+           .filter(name__in=task_names_to_check, eid_name_args__in=eid_name_args)
+           .order_by('eid_name_args'))
 
 times_cortex = np.array(dlc_cortex.values_list('datetime', flat=True)).astype(np.datetime64)
 times_ibl = np.array(dlc_ibl.values_list('datetime', flat=True)).astype(np.datetime64)
-# Indices where datetime from IBL is newer than cortexlab -- do not import by deleting the datasets from cortexlab db
+# Indices where datetime from IBL is newer than cortexlab -- do not import by deleting the datasets
+# from cortexlab db
 # Indices where datetime from IBL is older than cortexlab -- delete from ibl db
 keep_ibl = np.where(times_ibl >= times_cortex)[0]
 keep_cortex = np.where(times_ibl < times_cortex)[0]
@@ -224,21 +240,34 @@
 Task.objects.filter(pk__in=pk_del_ibl, name__in=task_names_to_check).delete()
 
 """
-Sync the tasks 2/2: For all other tasks, make sure there are no duplicate tasks with different ids that have been made
-on IBL and cortex lab database. In the case of duplicates cortex lab database are kept and IBL deleted
+Sync the tasks 2/2: For all other tasks, make sure there are no duplicate tasks with different ids
+that have been made on IBL and cortex lab database. In the case of duplicates cortex lab database
+are kept and IBL deleted
 """
 task_names_to_exclude = ['TrainingDLC', 'EphysDLC']
-cortex_eids = Task.objects.using('cortexlab').exclude(name__in=task_names_to_exclude).values_list('session', flat=True)
+cortex_eids = (Task
+               .objects
+               .using('cortexlab')
+               .exclude(name__in=task_names_to_exclude)
+               .values_list('session', flat=True))
 ibl_eids = Task.objects.all().filter(session__lab__name='cortexlab').exclude(
     name__in=task_names_to_exclude).values_list('session', flat=True)
 # finds eids that have tasks on both ibl and cortex lab database
 overlap_eids = set(cortex_eids).intersection(ibl_eids)
 
 dfields = ('id', 'name', 'session')
-task_cortex = Task.objects.using('cortexlab').filter(session__in=overlap_eids).exclude(name__in=task_names_to_exclude)
+task_cortex = (Task
+               .objects
+               .using('cortexlab')
+               .filter(session__in=overlap_eids)
+               .exclude(name__in=task_names_to_exclude))
 cids = task_cortex.values_list(*dfields)
 
-task_ibl = Task.objects.all().filter(session__in=overlap_eids).exclude(name__in=task_names_to_exclude)
+task_ibl = (Task
+            .objects
+            .all()
+            .filter(session__in=overlap_eids)
+            .exclude(name__in=task_names_to_exclude))
 ids = task_ibl.values_list(*dfields)
 
 # find the tasks that are not common to both
@@ -253,12 +282,15 @@
     ts.delete()
 
 """
-Sync the notes. When a note is updated (in the behaviour criteria tracking) it is deleted and created anew.
-The problem is this will create many duplicates on the IBL side after import. 
+Sync the notes. When a note is updated (in the behaviour criteria tracking) it is deleted and
+created anew. The problem is this will create many duplicates on the IBL side after import.
 Here we look for all of the notes that are present on IBL and remove those that are not in UCL
 """
 ibl_notes = Note.objects.filter(object_id__in=Subject.objects.filter(lab=CORTEX_LAB_PK))
-ucl_notes = Note.objects.using('cortexlab').filter(object_id__in=Subject.objects.filter(lab=CORTEX_LAB_PK))
+ucl_notes = (Note
+             .objects
+             .using('cortexlab')
+             .filter(object_id__in=Subject.objects.filter(lab=CORTEX_LAB_PK)))
 ibl_notes.exclude(pk__in=list(ucl_notes.values_list('pk', flat=True))).count()
 
 """

From 096d85a432f6cb552ccaa2c3803987c3d9b39d05 Mon Sep 17 00:00:00 2001
From: github-actions <github-actions@github.com>
Date: Wed, 18 Jan 2023 15:24:42 +0000
Subject: [PATCH 7/8] GitHub Actions generated requirements_frozen.txt

---
 requirements_frozen.txt | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/requirements_frozen.txt b/requirements_frozen.txt
index bf92f6697..5c30cc697 100644
--- a/requirements_frozen.txt
+++ b/requirements_frozen.txt
@@ -1,13 +1,13 @@
 asgiref==3.6.0
 backports.zoneinfo==0.2.1
-boto3==1.26.41
-botocore==1.29.41
+boto3==1.26.51
+botocore==1.29.51
 certifi==2022.12.7
 cffi==1.15.1
-charset-normalizer==2.1.1
+charset-normalizer==3.0.1
 click==8.1.3
 colorlog==6.7.0
-contourpy==1.0.6
+contourpy==1.0.7
 coreapi==2.3.3
 coreschema==0.0.4
 coverage==6.5.0
@@ -46,13 +46,13 @@ kiwisolver==1.4.4
 llvmlite==0.39.1
 lxml==4.9.2
 Markdown==3.4.1
-MarkupSafe==2.1.1
-matplotlib==3.6.2
+MarkupSafe==2.1.2
+matplotlib==3.6.3
 mccabe==0.7.0
 numba==0.56.4
 numpy==1.23.5
-ONE-api==1.16.3
-packaging==22.0
+ONE-api==1.18.0
+packaging==23.0
 pandas==1.5.2
 Pillow==9.4.0
 psycopg2-binary==2.9.5
@@ -64,9 +64,9 @@ PyJWT==2.6.0
 pyparsing==3.0.9
 python-dateutil==2.8.2
 python-magic==0.4.27
-pytz==2022.7
+pytz==2022.7.1
 PyYAML==6.0
-requests==2.28.1
+requests==2.28.2
 s3transfer==0.6.0
 six==1.16.0
 sqlparse==0.4.3
@@ -74,6 +74,6 @@ structlog==22.3.0
 tqdm==4.64.1
 typing_extensions==4.4.0
 uritemplate==4.1.1
-urllib3==1.26.13
+urllib3==1.26.14
 webdavclient3==3.14.6
 zipp==3.11.0

From d31f702297227af9d24e953b07453b082134529c Mon Sep 17 00:00:00 2001
From: Miles Wells <k1o0@5tk.co>
Date: Wed, 18 Jan 2023 17:31:32 +0200
Subject: [PATCH 8/8] Bump version

---
 alyx/alyx/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/alyx/alyx/__init__.py b/alyx/alyx/__init__.py
index 56684fc62..8310a90dd 100644
--- a/alyx/alyx/__init__.py
+++ b/alyx/alyx/__init__.py
@@ -1 +1 @@
-VERSION = __version__ = '1.7.0'
+VERSION = __version__ = '1.8.0'