Skip to content

Commit

Permalink
refactor: prefix doi management commands
Browse files Browse the repository at this point in the history
- prefix all one-off destructive DOI commands with `doi_`
- add reset_staging to mint new DOIs on staging using the datacite
  sandbox, doi_reset_staging -> step 3, doi_mint_parent_codebase_dois
- bump deps for datacite schema 4.5 and django cve
  • Loading branch information
alee committed Nov 14, 2024
1 parent 9c2678f commit 7217b2b
Show file tree
Hide file tree
Showing 14 changed files with 398 additions and 242 deletions.
21 changes: 15 additions & 6 deletions django/library/doi.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
DataCiteRegistrationLog,
)

from datacite import DataCiteRESTClient, schema43
from datacite import DataCiteRESTClient, schema45
from datacite.errors import (
DataCiteError,
DataCiteNoContentError,
Expand Down Expand Up @@ -183,8 +183,12 @@ def _datacite_heartbeat_url(self):

def _validate_metadata(self, datacite_metadata: DataCiteSchema):
metadata_dict = datacite_metadata.to_dict()
if not schema43.validate(metadata_dict):
logger.error("Invalid DataCite metadata: %s", metadata_dict)
try:
schema45.validator.validate(metadata_dict)
except Exception as e:
logger.error(
"Invalid DataCite metadata: %s", schema45.tostring(metadata_dict), e
)
raise DataCiteError(f"Invalid DataCite metadata: {metadata_dict}")
return datacite_metadata, metadata_dict

Expand All @@ -202,17 +206,22 @@ def mint_public_doi(self, codebase_or_release: Codebase | CodebaseRelease):
return "XX.DRYXX/XXXX-XRUN", True
if hasattr(codebase_or_release, "datacite"):
del codebase_or_release.datacite
datacite_metadata, metadata_dict = self._validate_metadata(
codebase_or_release.datacite
)

doi = "Unassigned"
http_status = 200
message = "Minted new DOI successfully."

datacite_metadata = codebase_or_release.datacite

try:
datacite_metadata, metadata_dict = self._validate_metadata(
datacite_metadata
)
doi = self.datacite_client.public_doi(
metadata_dict, url=codebase_or_release.permanent_url
)
codebase_or_release.doi = doi
codebase_or_release.save()
except DataCiteError as e:
logger.error(e)
message = str(e)
Expand Down
64 changes: 0 additions & 64 deletions django/library/management/commands/clean_peer_reviewed_dois_02.py

This file was deleted.

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def update_existing_dois(interactive=True, dry_run=True):
total_peer_reviewed_releases_count = peer_reviewed_releases.count()

logger.info(
"Updating DOIs for %s peer reviewed CodebaseReleases with DOIs",
"Updating DOIs for parent Codebases of %s peer reviewed CodebaseReleases with DOIs",
total_peer_reviewed_releases_count,
)

Expand Down Expand Up @@ -214,7 +214,7 @@ def add_arguments(self, parser):
"--interactive",
action="store_true",
help="Wait for user to press enter to continue.",
default=True,
default=False,
)
parser.add_argument(
"--dry-run", action="store_true", help="Output what would have happened."
Expand Down
79 changes: 79 additions & 0 deletions django/library/management/commands/doi_reset_production.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
import csv
import logging
import sys
from django.core.management.base import BaseCommand
from library.doi import VERIFICATION_MESSAGE, get_welcome_message
from library.models import Codebase, CodebaseRelease

logger = logging.getLogger(__name__)


def cleanup_existing_dois(interactive=True, dry_run=True):
print(get_welcome_message(dry_run))
codebases_with_dois = Codebase.objects.with_doi()

logger.info("Removing all Codebase DOIs")
if interactive and codebases_with_dois.exists():
confirm = input(
"WARNING: this will remove all existing codebase DOIs and is unrecoverable. Type 'DELETE' to continue or Ctrl+C to quit: "
)
if not confirm.lower() == "delete":
logger.info("Aborting.")
sys.exit()

"""
assert correctness
"""
if not dry_run:
print(VERIFICATION_MESSAGE)
with open("codebases_with_dois.csv", "w") as f:
writer = csv.writer(f)
writer.writerow(["Codebase ID", "Codebase DOI"])
for codebase in codebases_with_dois:
writer.writerow([codebase.pk, codebase.doi])
codebases_with_dois.update(doi=None)
assert not Codebase.objects.with_doi().exists()
logger.info("Success. All existing codebase DOIs deleted.")

# clean up unreviewed release DOIs

unreviewed_releases_with_dois = CodebaseRelease.objects.unreviewed().with_doi()
total_unreviewed_releases_with_dois = unreviewed_releases_with_dois.count()
logger.info(
"Removing %s unreviewed CodebaseRelease DOIs",
total_unreviewed_releases_with_dois,
)
if interactive:
confirm = input(
f"Deleting all DOIs for {total_unreviewed_releases_with_dois} unreviewed CodebaseReleases. Enter 'DELETE' to continue or CTRL+C to quit: "
)
if not confirm.lower() == "delete":
logger.debug("Aborting...")
sys.exit()

if not dry_run:
with open("unreviewed_releases_with_dois.csv", "w") as f:
writer = csv.writer(f)
writer.writerow(["CodebaseRelease ID", "CodebaseRelease DOI"])
for release in unreviewed_releases_with_dois:
writer.writerow([release.pk, release.doi])
unreviewed_releases_with_dois.update(doi=None)


class Command(BaseCommand):

def add_arguments(self, parser):
parser.add_argument(
"--interactive",
action="store_true",
help="Wait for user to press enter to continue.",
default=True,
)
parser.add_argument(
"--dry-run", action="store_true", help="Output what would have happened."
)

def handle(self, *args, **options):
interactive = options["interactive"]
dry_run = options["dry_run"]
cleanup_existing_dois(interactive, dry_run)
83 changes: 83 additions & 0 deletions django/library/management/commands/doi_reset_staging.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
import csv
import logging
import sys
from django.conf import settings
from django.core.management.base import BaseCommand
from library.doi import VERIFICATION_MESSAGE, get_welcome_message, DataCiteApi
from library.models import Codebase, CodebaseRelease

logger = logging.getLogger(__name__)


def reset_all_dois(interactive=True, dry_run=True):
print(get_welcome_message(dry_run))
if settings.DEPLOY_ENVIRONMENT.is_production:
logger.error("This command is not allowed in production.")
sys.exit()
logger.info("(ENV: %s) Removing all DOIs", settings.DEPLOY_ENVIRONMENT)
releases_with_dois = CodebaseRelease.objects.with_doi()
codebases_with_dois = Codebase.objects.with_doi()
confirm = input(
"WARNING: this will remove ALL existing DOIs and is unrecoverable. Type 'DELETE' to continue or Ctrl+C to quit: "
)
if confirm.lower() == "delete":
with open("deleted_codebase_dois.csv", "w") as f:
writer = csv.writer(f)
writer.writerow(["Codebase ID", "Codebase DOI"])
for codebase in codebases_with_dois:
writer.writerow([codebase.pk, codebase.doi])
Codebase.objects.update(doi=None)
with open("deleted_release_dois.csv", "w") as f:
writer = csv.writer(f)
writer.writerow(["CodebaseRelease ID", "CodebaseRelease DOI"])
for release in releases_with_dois:
writer.writerow([release.pk, release.doi])
CodebaseRelease.objects.update(doi=None)
else:
logger.info("Aborting.")
sys.exit()

"""
assert correctness
"""
if not dry_run:
print(VERIFICATION_MESSAGE)
assert Codebase.objects.with_doi().count() == 0
assert CodebaseRelease.objects.with_doi().count() == 0
logger.info("Success. All existing codebase DOIs deleted.")

""" Mint DOIs for all new Peer Reviewed Releases"""
peer_reviewed_releases = CodebaseRelease.objects.reviewed().public()
datacite_api = DataCiteApi(dry_run=dry_run)
invalid_releases = []
for release in peer_reviewed_releases:
try:
datacite_api.mint_new_doi_for_release(release)
except Exception as e:
logger.error("Error minting DOI for release %s", release)
invalid_releases.append((release, e))

for release, error in invalid_releases:
with open("invalid_releases.csv", "w") as f:
writer = csv.writer(f)
writer.writerow(["CodebaseRelease ID", "Reason", "Datacite Metadata"])
writer.writerow([release.pk, error, release.datacite.to_dict()])


class Command(BaseCommand):

def add_arguments(self, parser):
parser.add_argument(
"--interactive",
action="store_true",
help="Wait for user to press enter to continue.",
default=True,
)
parser.add_argument(
"--dry-run", action="store_true", help="Output what would have happened."
)

def handle(self, *args, **options):
interactive = options["interactive"]
dry_run = options["dry_run"]
reset_all_dois(interactive, dry_run)
Loading

0 comments on commit 7217b2b

Please sign in to comment.