Skip to content

Commit

Permalink
Merge pull request #720 from asuworks/230-cml-refactor-contributor-ed…
Browse files Browse the repository at this point in the history
…it-metadata-form-affiliations-handling

refactor contributor edit metadata form

- replace Tag based affiliations with ROR json affiliations
- clean up Contributor / ReleaseContributor creation logic and UI

closes comses/planning#230
  • Loading branch information
alee authored Jun 12, 2024
2 parents a2a39b6 + 719f05f commit 0f39e59
Show file tree
Hide file tree
Showing 27 changed files with 588 additions and 142 deletions.
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from django.core.management.base import BaseCommand
from fuzzywuzzy import fuzz
from rapidfuzz import fuzz
import re
import requests
import time
Expand Down
1 change: 1 addition & 0 deletions django/core/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,7 @@ class Meta:
"email",
"profile_url",
"primary_affiliation_name",
"affiliations",
"tags",
"username",
)
Expand Down
4 changes: 3 additions & 1 deletion django/core/settings/test.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
from .dev import *
from .defaults import *

from os import path

DEPLOY_ENVIRONMENT = Environment.TEST

ALLOWED_HOSTS = ["localhost", "127.0.0.1", "server"]

EMAIL_BACKEND = "django.core.mail.backends.console.EmailBackend"

LOGGING["loggers"]["core.views"] = {
"level": "ERROR",
"handlers": ["console"],
Expand Down
28 changes: 28 additions & 0 deletions django/curator/management/commands/sync_user_contributors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import logging

from django.core.management.base import BaseCommand

from library.models import Contributor


logger = logging.getLogger(__name__)


class Command(BaseCommand):
help = """Synchronize user metadata with contributor metadata for testing / development purposes."""

def handle(self, *args, **options):
# cannot update local model attributes to a join field attribute; this doesn't work:
# Contributor.objects.filter(user__isnull=False).update(given_name=F('user__first_name'), ...)
# see
# https://docs.djangoproject.com/en/dev/topics/db/queries/#updating-multiple-objects-at-once
# for more details
for contributor in Contributor.objects.select_related("user").filter(
user__isnull=False
):
user = contributor.user
contributor.given_name = user.first_name
contributor.family_name = user.last_name
contributor.email = user.email
contributor.json_affiliations = user.member_profile.affiliations
contributor.save()
1 change: 0 additions & 1 deletion django/home/management/commands/add_comses_2019.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,6 @@


class Command(BaseCommand):

"""
Create CoMSES 2019 virtual conference conference landing page
"""
Expand Down
1 change: 0 additions & 1 deletion django/home/management/commands/add_open_code_page.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@


class Command(BaseCommand):

"""
Create CoMSES trusted digital repository landing page and add it as a child to the /resources/ CategoryIndexPage
"""
Expand Down
1 change: 0 additions & 1 deletion django/home/management/commands/add_repositories_page.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@


class Command(BaseCommand):

"""
Create CoMSES trusted digital repository landing page and add it as a child to the /resources/ CategoryIndexPage
"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@


class Command(BaseCommand):

"""
create or update education index page and tutorial detail pages from markdown formatted files
"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@


class Command(BaseCommand):

"""
Adjust the Community page for https://github.com/comses/comses.net/issues/584
this can't be in a data migration for arcane wagtail reasons related to CategoryIndexPage subclassing
Expand Down
1 change: 0 additions & 1 deletion django/home/management/commands/move_education_page.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,6 @@


class Command(BaseCommand):

"""
Move existing Education page from under Resources to a top level Page
"""
Expand Down
1 change: 0 additions & 1 deletion django/home/management/commands/setup_conference.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@


class Command(BaseCommand):

"""
Create CoMSES virtual conference landing pages, including wagtail replacement for archived CoMSES 2017 page.
See https://github.com/wagtail/wagtail/issues/742 for more details - this can't be in a data migration because the
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
from collections import defaultdict
from django.core.management.base import BaseCommand
from rapidfuzz import fuzz
from requests.adapters import HTTPAdapter
from urllib3 import Retry

import logging
import requests

from library.models import ContributorAffiliation, Contributor

logger = logging.getLogger(__name__)


class Command(BaseCommand):
help = """Migrate data from ContributorAffiliation Tags to Contributor.json_affiliations; attempts to augment data with a basic ROR API lookup"""

def add_arguments(self, parser):
parser.add_argument(
"-r",
"--ratio",
type=int,
choices=range(1, 100),
metavar="[1-100]",
default=80,
help="""threshold used in fuzzy matching and ROR API score (divided by 100 for a floating point number between 0.0 and 1.0). Defaults to 80""",
)

def handle(self, *args, **options):
session = requests.Session()
fuzzy_match_threshold = options["ratio"]
ror_score_threshold = fuzzy_match_threshold / 100.0
adapter = HTTPAdapter(
max_retries=Retry(
total=6,
backoff_factor=1.5,
allowed_methods=None,
status_forcelist=[429, 500, 502, 503, 504],
),
)
session.mount("http://", adapter)
session.mount("https://", adapter)

ordered_contributor_affiliations = (
ContributorAffiliation.objects.all().order_by("content_object_id")
)

logger.info("Looking up affiliations against ROR API")

# build affiliations_by_contributor_id dictionary
contributor_affiliations = defaultdict(list)
for ca in ordered_contributor_affiliations:
if not (ca.tag and ca.tag.name and ca.content_object_id):
continue

contributor_id = ca.content_object_id
affiliation_name = ca.tag.name
best_match = self.lookup(session, affiliation_name)
new_affiliation = self.to_affiliation(
affiliation_name,
best_match,
match_threshold=fuzzy_match_threshold,
ror_score_threshold=ror_score_threshold,
)
# register the new affiliation with this contributor
contributor_affiliations[contributor_id].append(new_affiliation)

# Loop through enriched affiliations and save the json_affiliations
# on each contributor
for contributor_id, affiliations in contributor_affiliations.items():
logger.info(
"updating [contributor_id: %s] affiliations=%s",
contributor_id,
affiliations,
)
Contributor.objects.filter(pk=contributor_id).update(
json_affiliations=affiliations
)

def lookup(self, session, name):
ror_api_url = f"https://api.ror.org/organizations?affiliation={name}"
try:
response = session.get(ror_api_url, timeout=10)
items = response.json()["items"]
logger.debug("[lookup %s] found %s", name, items)
return items[0] if items else None
except Exception as e:
logger.warning(e)
return None

def to_affiliation(
self, name, best_match, match_threshold=85, ror_score_threshold=1.0
):
"""
Returns a new affiliation dictionary with ROR data if a good match
or a dict of the original data { "name": name } otherwise
"""
if best_match:
score = best_match["score"]
ror_name = best_match["organization"]["name"]
if (
score >= ror_score_threshold
or fuzz.partial_ratio(ror_name, name) >= match_threshold
):
new_affiliation = {
"name": ror_name,
# ror id is guaranteed if lookup was successful
"ror_id": best_match["organization"]["id"],
}
# acronyms and links are not guaranteed to exist
if best_match["organization"]["acronyms"]:
new_affiliation["acronym"] = best_match["organization"]["acronyms"][
0
]
if best_match["organization"]["links"]:
new_affiliation["url"] = best_match["organization"]["links"][0]
# FIXME: additional geodata to include from the returned ROR API data?
# e.g., GRID id, 'country', 'aliases', 'types', etc.
return new_affiliation
else:
logger.warning("No reasonable match found for %s: %s", name, best_match)

# either no best_match or failed the match_threshold fuzz test
return {
"name": name,
}
1 change: 0 additions & 1 deletion django/library/management/commands/update_codemeta.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@


class Command(BaseCommand):

"""
Updates all codemeta files for all Codebases and updates archive
"""
Expand Down
20 changes: 20 additions & 0 deletions django/library/migrations/0028_contributor_json_affiliations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Generated by Django 4.2.11 on 2024-05-22 23:38

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
("library", "0027_codebase_spam_moderation"),
]

operations = [
migrations.AddField(
model_name="contributor",
name="json_affiliations",
field=models.JSONField(
default=list, help_text="JSON-LD list of affiliated institutions"
),
),
]
56 changes: 52 additions & 4 deletions django/library/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
from core import fs
from core.backends import add_to_comses_permission_whitelist
from core.fields import MarkdownField
from core.models import Platform, MemberProfile, ModeratedContent, SpamModeration
from core.models import Platform, MemberProfile, ModeratedContent
from core.queryset import get_viewable_objects_for_user
from core.utils import send_markdown_email
from core.view_helpers import get_search_queryset
Expand Down Expand Up @@ -136,6 +136,11 @@ class Contributor(index.Indexed, ClusterableModel):
middle_name = models.CharField(max_length=100, blank=True)
family_name = models.CharField(max_length=100, blank=True)
affiliations = ClusterTaggableManager(through=ContributorAffiliation)

json_affiliations = models.JSONField(
default=list, help_text=_("JSON-LD list of affiliated institutions")
)

type = models.CharField(
max_length=16,
choices=(("person", "person"), ("organization", "organization")),
Expand All @@ -151,6 +156,7 @@ class Contributor(index.Indexed, ClusterableModel):
index.SearchField("given_name"),
index.SearchField("family_name"),
index.RelatedFields("affiliations", [index.SearchField("name")]),
index.SearchField("json_affiliations_string"),
index.SearchField("email"),
index.RelatedFields(
"user",
Expand All @@ -163,11 +169,25 @@ class Contributor(index.Indexed, ClusterableModel):
),
]

@cached_property
def json_affiliations_string(self):
return ", ".join(
[
self.to_affiliation_string(affiliation)
for affiliation in self.json_affiliations
]
)

@classmethod
def to_affiliation_string(cls, afl):
# e.g., "Arizona State University https://www.asu.edu ASU"
return f"{afl.get('name')} {afl.get('url')} {afl.get('acronym')}"

@staticmethod
def from_user(user):
"""
Returns a tuple of (object, created) based on the
https://docs.djangoproject.com/en/3.1/ref/models/querysets/#get-or-create
https://docs.djangoproject.com/en/4.2/ref/models/querysets/#get-or-create
contract
"""
try:
Expand All @@ -177,6 +197,8 @@ def from_user(user):
"given_name": user.first_name,
"family_name": user.last_name,
"email": user.email,
# FIXME: rename to affiliations eventually
"json_affiliations": user.member_profile.affiliations,
},
)
except Contributor.MultipleObjectsReturned:
Expand Down Expand Up @@ -211,10 +233,11 @@ def to_codemeta(self):
"givenName": self.given_name,
"familyName": self.family_name,
}
# FIXME: should we proxy to User / MemberProfile fields if User is available
if self.orcid_url:
codemeta["@id"] = self.orcid_url
if self.affiliations.exists():
codemeta["affiliation"] = self.formatted_affiliations
if self.json_affiliations:
codemeta["affiliation"] = self.codemeta_affiliation
if self.email:
codemeta["email"] = self.email
return codemeta
Expand Down Expand Up @@ -259,10 +282,35 @@ def _get_person_full_name(self, family_name_first=False):
def formatted_affiliations(self):
return ", ".join(self.affiliations.values_list("name", flat=True))

@property
def codemeta_affiliation(self):
"""
For now codemeta affiliations appear to be a single https://schema.org/Organization
"""
if self.json_affiliations:
return self.to_codemeta_affiliation(self.json_affiliations[0])

@property
def primary_affiliation_name(self):
return self.affiliations.first().name if self.affiliations.exists() else ""

@property
def primary_json_affiliation_name(self):
return self.json_affiliations[0]["name"] if self.json_affiliations else ""

def to_codemeta_affiliation(self, affiliation):
if affiliation:
return {
# FIXME: may switch to https://schema.org/ResearchOrganization at some point
"@type": "Organization",
"@id": affiliation.get("ror_id"),
"name": affiliation.get("name"),
"url": affiliation.get("url"),
"identifier": affiliation.get("ror_id"),
"sameAs": affiliation.get("ror_id"),
}
return {}

def get_profile_url(self):
user = self.user
if user:
Expand Down
Loading

0 comments on commit 0f39e59

Please sign in to comment.