Skip to content

Commit

Permalink
refactor: prefix doi workflows with doi_ for consistency
Browse files Browse the repository at this point in the history
- bump deps for datacite schema 4.5 and django cve
  • Loading branch information
alee committed Oct 29, 2024
1 parent 825aa3f commit c417e2f
Show file tree
Hide file tree
Showing 8 changed files with 127 additions and 35 deletions.
12 changes: 6 additions & 6 deletions django/library/doi.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
DataCiteRegistrationLog,
)

from datacite import DataCiteRESTClient, schema43
from datacite import DataCiteRESTClient, schema45
from datacite.errors import (
DataCiteError,
DataCiteNoContentError,
Expand Down Expand Up @@ -183,7 +183,7 @@ def _datacite_heartbeat_url(self):

def _validate_metadata(self, datacite_metadata: DataCiteSchema):
metadata_dict = datacite_metadata.to_dict()
if not schema43.validate(metadata_dict):
if not schema45.validate(metadata_dict):
logger.error("Invalid DataCite metadata: %s", metadata_dict)
raise DataCiteError(f"Invalid DataCite metadata: {metadata_dict}")
return datacite_metadata, metadata_dict
Expand All @@ -202,16 +202,16 @@ def mint_public_doi(self, codebase_or_release: Codebase | CodebaseRelease):
return "XX.DRYXX/XXXX-XRUN", True
if hasattr(codebase_or_release, "datacite"):
del codebase_or_release.datacite
datacite_metadata, metadata_dict = self._validate_metadata(
codebase_or_release.datacite
)

doi = "Unassigned"
http_status = 200
message = "Minted new DOI successfully."

datacite_metadata = codebase_or_release.datacite

try:
doi = self.datacite_client.public_doi(
metadata_dict, url=codebase_or_release.permanent_url
datacite_metadata.to_dict(), url=codebase_or_release.permanent_url
)
except DataCiteError as e:
logger.error(e)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import logging
import sys
from django.core.management.base import BaseCommand
from library.doi import VERIFICATION_MESSAGE, get_welcome_message
from library.models import CodebaseRelease
Expand All @@ -9,9 +10,7 @@
def remove_dois_from_unreviewed_releases(interactive=True, dry_run=True):
print(get_welcome_message(dry_run))

unreviewed_releases_with_dois = CodebaseRelease.objects.filter(
peer_reviewed=False, doi__isnull=False
)
unreviewed_releases_with_dois = CodebaseRelease.objects.unreviewed().with_doi()
total_unreviewed_releases_with_dois = unreviewed_releases_with_dois.count()

logger.info(
Expand All @@ -24,23 +23,21 @@ def remove_dois_from_unreviewed_releases(interactive=True, dry_run=True):
)
if confirm.lower() == "delete":
unreviewed_releases_with_dois.update(doi=None)
else:
logger.debug("Aborting...")
sys.exit()

"""
assert correctness
"""
if not dry_run:
print(VERIFICATION_MESSAGE)
logger.info(
"Checking that DOIs for all not peer reviewed releases have been deleted..."
)
assert (
CodebaseRelease.objects.filter(
peer_reviewed=False, doi__isnull=False
).count()
== 0
"Checking that DOIs for all unreviewed releases have been deleted..."
)
assert not CodebaseRelease.objects.unreviewed().with_doi().exists()
logger.info(
"All DOIs from not peer_reviewed CodebaseReleases %s with DOIs deleted successfully.",
"%s unreviewed CodebaseReleases with DOIs updated successfully.",
total_unreviewed_releases_with_dois,
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,18 +12,19 @@ def update_doi_metadata(interactive=True, dry_run=True):

datacite_api = DataCiteApi(dry_run=dry_run)
all_codebases_with_dois = Codebase.objects.with_doi()
total_number_of_codebases_with_dois = all_codebases_with_dois.count()

logger.info(
"Updating metadata for all codebases (%s) with DOIs and their releases with DOIs. ...",
all_codebases_with_dois.count(),
total_number_of_codebases_with_dois,
)

for i, codebase in enumerate(all_codebases_with_dois):
logger.debug(
"Processing codebase %s - %s/%s",
codebase.pk,
i + 1,
all_codebases_with_dois.count(),
total_number_of_codebases_with_dois,
)
if interactive:
input("Press Enter to continue or CTRL+C to quit...")
Expand Down Expand Up @@ -68,39 +69,38 @@ def update_doi_metadata(interactive=True, dry_run=True):
"""
if not dry_run:
print(VERIFICATION_MESSAGE)
logger.info("Checking that Comses metadata is in sync with DataCite...")
invalid_codebases = []
logger.info("Checking that local metadata is in sync with DataCite...")
invalid_releases = []

results = datacite_api.threaded_metadata_check(all_codebases_with_dois)
for pk, is_meta_valid in results:
if not is_meta_valid:
invalid_codebases.append(pk)

invalid_codebases = [
pk for pk, is_valid_metadata in results if not is_valid_metadata
]
if invalid_codebases:
logger.error(
"Failure. Metadata not in sync with DataCite for %s codebases: %s",
"FAILURE: Metadata not in sync with DataCite for %s codebases: %s",
invalid_codebases.count(),
invalid_codebases,
)
else:
logger.info(
"Success. Metadata in sync with DataCite for all codebases with DOI."
"SUCCESS: Metadata in sync with DataCite for all codebases with DOI."
)

all_releases_with_dois = CodebaseRelease.objects.with_doi()
results = datacite_api.threaded_metadata_check(all_releases_with_dois)
for pk, is_meta_valid in results:
if not is_meta_valid:
invalid_releases.append(pk)

invalid_releases = [
pk for pk, is_valid_metadata in results if not is_valid_metadata
]
if invalid_releases:
logger.error(
f"Failure. Metadata not in sync with DataCite for {len(invalid_releases)} releases: {invalid_releases}"
"FAILURE: Metadata not in sync with DataCite for %s releases: %s",
invalid_releases.count(),
invalid_releases,
)
else:
logger.info(
f"Success. Metadata in sync with DataCite for all releases with DOI."
"SUCCESS: Metadata in sync with DataCite for all releases with DOI."
)


Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
# Generated by Django 4.2.15 on 2024-10-25 23:47

from django.db import migrations, models
import django.db.models.deletion


class Migration(migrations.Migration):

dependencies = [
("library", "0030_peerreviewinvitation"),
]

operations = [
migrations.CreateModel(
name="DataCiteRegistrationLog",
fields=[
(
"id",
models.AutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
(
"action",
models.CharField(
choices=[
("CREATE_RELEASE_DOI", "create release DOI"),
("CREATE_CODEBASE_DOI", "create codebase DOI"),
("UPDATE_RELEASE_METADATA", "update release metadata"),
("UPDATE_CODEBASE_METADATA", "update codebase metadata"),
],
max_length=50,
),
),
("timestamp", models.DateTimeField(auto_now_add=True)),
("http_status", models.IntegerField(default=None, null=True)),
("message", models.TextField(default=None, null=True)),
("metadata_hash", models.CharField(max_length=255)),
("doi", models.CharField(blank=True, max_length=255, null=True)),
],
),
migrations.RemoveField(
model_name="contributor",
name="affiliations",
),
migrations.AlterField(
model_name="codebase",
name="date_created",
field=models.DateTimeField(auto_now_add=True),
),
migrations.AlterField(
model_name="codebaserelease",
name="date_created",
field=models.DateTimeField(auto_now_add=True),
),
migrations.AlterField(
model_name="codebasereleasedownload",
name="date_created",
field=models.DateTimeField(auto_now_add=True),
),
migrations.AlterField(
model_name="peerreviewinvitation",
name="date_sent",
field=models.DateTimeField(auto_now=True),
),
migrations.DeleteModel(
name="ContributorAffiliation",
),
migrations.AddField(
model_name="dataciteregistrationlog",
name="codebase",
field=models.ForeignKey(
null=True,
on_delete=django.db.models.deletion.CASCADE,
related_name="datacite_logs",
to="library.codebase",
),
),
migrations.AddField(
model_name="dataciteregistrationlog",
name="release",
field=models.ForeignKey(
null=True,
on_delete=django.db.models.deletion.CASCADE,
related_name="datacite_logs",
to="library.codebaserelease",
),
),
]
3 changes: 3 additions & 0 deletions django/library/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -1132,6 +1132,9 @@ def accessible(self, user):
def reviewed(self, **kwargs):
return self.filter(peer_reviewed=True, **kwargs)

def unreviewed(self, **kwargs):
return self.exclude(peer_reviewed=True).filter(**kwargs)

def with_doi(self, **kwargs):
return self.exclude(Q(doi__isnull=True) | Q(doi="")).filter(**kwargs)

Expand Down
4 changes: 2 additions & 2 deletions django/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
bagit==1.8.1
bleach==6.1.0
datacite==1.1.4
datacite==1.2.0
dedupe==3.0.2
django-allauth==0.63.6
django-anymail[mailgun]==10.3
Expand All @@ -24,7 +24,7 @@ django-vite==2.1.3 # latest is 3.0.4
django-waffle==4.1.0
djangorestframework==3.15.2
djangorestframework-camel-case==1.4.2
Django==4.2.15
Django==4.2.16
elasticsearch-dsl>=7.0.0,<8.0.0
elasticsearch>=7.0.0,<8.0.0
html2text>=2016.9.19
Expand Down

0 comments on commit c417e2f

Please sign in to comment.