-
Notifications
You must be signed in to change notification settings - Fork 16
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #720 from asuworks/230-cml-refactor-contributor-ed…
…it-metadata-form-affiliations-handling refactor contributor edit metadata form - replace Tag based affiliations with ROR json affiliations - clean up Contributor / ReleaseContributor creation logic and UI closes comses/planning#230
- Loading branch information
Showing
27 changed files
with
588 additions
and
142 deletions.
There are no files selected for viewing
2 changes: 1 addition & 1 deletion
2
django/core/management/commands/populate_memberprofile_affiliations.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
28 changes: 28 additions & 0 deletions
28
django/curator/management/commands/sync_user_contributors.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
import logging | ||
|
||
from django.core.management.base import BaseCommand | ||
|
||
from library.models import Contributor | ||
|
||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
class Command(BaseCommand): | ||
help = """Synchronize user metadata with contributor metadata for testing / development purposes.""" | ||
|
||
def handle(self, *args, **options): | ||
# cannot update local model attributes to a join field attribute; this doesn't work: | ||
# Contributor.objects.filter(user__isnull=False).update(given_name=F('user__first_name'), ...) | ||
# see | ||
# https://docs.djangoproject.com/en/dev/topics/db/queries/#updating-multiple-objects-at-once | ||
# for more details | ||
for contributor in Contributor.objects.select_related("user").filter( | ||
user__isnull=False | ||
): | ||
user = contributor.user | ||
contributor.given_name = user.first_name | ||
contributor.family_name = user.last_name | ||
contributor.email = user.email | ||
contributor.json_affiliations = user.member_profile.affiliations | ||
contributor.save() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -55,7 +55,6 @@ | |
|
||
|
||
class Command(BaseCommand): | ||
|
||
""" | ||
Create CoMSES 2019 virtual conference conference landing page | ||
""" | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
126 changes: 126 additions & 0 deletions
126
django/library/management/commands/populate_contributor_affiliations.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,126 @@ | ||
from collections import defaultdict | ||
from django.core.management.base import BaseCommand | ||
from rapidfuzz import fuzz | ||
from requests.adapters import HTTPAdapter | ||
from urllib3 import Retry | ||
|
||
import logging | ||
import requests | ||
|
||
from library.models import ContributorAffiliation, Contributor | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
class Command(BaseCommand): | ||
help = """Migrate data from ContributorAffiliation Tags to Contributor.json_affiliations; attempts to augment data with a basic ROR API lookup""" | ||
|
||
def add_arguments(self, parser): | ||
parser.add_argument( | ||
"-r", | ||
"--ratio", | ||
type=int, | ||
choices=range(1, 100), | ||
metavar="[1-100]", | ||
default=80, | ||
help="""threshold used in fuzzy matching and ROR API score (divided by 100 for a floating point number between 0.0 and 1.0). Defaults to 80""", | ||
) | ||
|
||
def handle(self, *args, **options): | ||
session = requests.Session() | ||
fuzzy_match_threshold = options["ratio"] | ||
ror_score_threshold = fuzzy_match_threshold / 100.0 | ||
adapter = HTTPAdapter( | ||
max_retries=Retry( | ||
total=6, | ||
backoff_factor=1.5, | ||
allowed_methods=None, | ||
status_forcelist=[429, 500, 502, 503, 504], | ||
), | ||
) | ||
session.mount("http://", adapter) | ||
session.mount("https://", adapter) | ||
|
||
ordered_contributor_affiliations = ( | ||
ContributorAffiliation.objects.all().order_by("content_object_id") | ||
) | ||
|
||
logger.info("Looking up affiliations against ROR API") | ||
|
||
# build affiliations_by_contributor_id dictionary | ||
contributor_affiliations = defaultdict(list) | ||
for ca in ordered_contributor_affiliations: | ||
if not (ca.tag and ca.tag.name and ca.content_object_id): | ||
continue | ||
|
||
contributor_id = ca.content_object_id | ||
affiliation_name = ca.tag.name | ||
best_match = self.lookup(session, affiliation_name) | ||
new_affiliation = self.to_affiliation( | ||
affiliation_name, | ||
best_match, | ||
match_threshold=fuzzy_match_threshold, | ||
ror_score_threshold=ror_score_threshold, | ||
) | ||
# register the new affiliation with this contributor | ||
contributor_affiliations[contributor_id].append(new_affiliation) | ||
|
||
# Loop through enriched affiliations and save the json_affiliations | ||
# on each contributor | ||
for contributor_id, affiliations in contributor_affiliations.items(): | ||
logger.info( | ||
"updating [contributor_id: %s] affiliations=%s", | ||
contributor_id, | ||
affiliations, | ||
) | ||
Contributor.objects.filter(pk=contributor_id).update( | ||
json_affiliations=affiliations | ||
) | ||
|
||
def lookup(self, session, name): | ||
ror_api_url = f"https://api.ror.org/organizations?affiliation={name}" | ||
try: | ||
response = session.get(ror_api_url, timeout=10) | ||
items = response.json()["items"] | ||
logger.debug("[lookup %s] found %s", name, items) | ||
return items[0] if items else None | ||
except Exception as e: | ||
logger.warning(e) | ||
return None | ||
|
||
def to_affiliation( | ||
self, name, best_match, match_threshold=85, ror_score_threshold=1.0 | ||
): | ||
""" | ||
Returns a new affiliation dictionary with ROR data if a good match | ||
or a dict of the original data { "name": name } otherwise | ||
""" | ||
if best_match: | ||
score = best_match["score"] | ||
ror_name = best_match["organization"]["name"] | ||
if ( | ||
score >= ror_score_threshold | ||
or fuzz.partial_ratio(ror_name, name) >= match_threshold | ||
): | ||
new_affiliation = { | ||
"name": ror_name, | ||
# ror id is guaranteed if lookup was successful | ||
"ror_id": best_match["organization"]["id"], | ||
} | ||
# acronyms and links are not guaranteed to exist | ||
if best_match["organization"]["acronyms"]: | ||
new_affiliation["acronym"] = best_match["organization"]["acronyms"][ | ||
0 | ||
] | ||
if best_match["organization"]["links"]: | ||
new_affiliation["url"] = best_match["organization"]["links"][0] | ||
# FIXME: additional geodata to include from the returned ROR API data? | ||
# e.g., GRID id, 'country', 'aliases', 'types', etc. | ||
return new_affiliation | ||
else: | ||
logger.warning("No reasonable match found for %s: %s", name, best_match) | ||
|
||
# either no best_match or failed the match_threshold fuzz test | ||
return { | ||
"name": name, | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
20 changes: 20 additions & 0 deletions
20
django/library/migrations/0028_contributor_json_affiliations.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
# Generated by Django 4.2.11 on 2024-05-22 23:38 | ||
|
||
from django.db import migrations, models | ||
|
||
|
||
class Migration(migrations.Migration): | ||
|
||
dependencies = [ | ||
("library", "0027_codebase_spam_moderation"), | ||
] | ||
|
||
operations = [ | ||
migrations.AddField( | ||
model_name="contributor", | ||
name="json_affiliations", | ||
field=models.JSONField( | ||
default=list, help_text="JSON-LD list of affiliated institutions" | ||
), | ||
), | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.