Skip to content

Commit

Permalink
Merge pull request #773 from CharlesSheelam/fix-metrics
Browse files Browse the repository at this point in the history
add map of comses members' institutional affiliations to metrics

- upgrade deps 
- refactor metrics generation
- add curator management command to update ROR affiliation metadata for all active MemberProfiles with affiliations
- fix incorrect labeling of codebase + codebaserelease metrics, resolves comses/planning#258
  • Loading branch information
alee authored Dec 12, 2024
2 parents 565fdb7 + f754b12 commit 96e7743
Show file tree
Hide file tree
Showing 16 changed files with 17,346 additions and 797 deletions.
3 changes: 3 additions & 0 deletions django/core/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -320,6 +320,9 @@ def public(self, **kwargs):
user__username__in=EXCLUDED_USERNAMES
)

def with_affiliations(self, **kwargs):
return self.exclude(affiliations=[]).filter(**kwargs)

def find_users_with_email(self, candidate_email, exclude_user=None):
"""
Return a queryset of user ids with the given email
Expand Down
3 changes: 3 additions & 0 deletions django/core/settings/defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -515,6 +515,9 @@ def is_test(self):
DATACITE_DRY_RUN = os.getenv("DATACITE_DRY_RUN", "true")
DATACITE_API_PASSWORD = read_secret("datacite_api_password")

ROR_API_URL = "https://api.ror.org/v2/organizations"


SOCIALACCOUNT_PROVIDERS = {
# https://developer.github.com/apps/building-integrations/setting-up-and-registering-oauth-apps/about-scopes-for-oauth-apps/
"github": {
Expand Down
107 changes: 107 additions & 0 deletions django/curator/management/commands/ror_update_affiliation_metadata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
import argparse
import json
import logging
import requests

from django.conf import settings
from django.core.management.base import BaseCommand

from core.models import MemberProfile


logger = logging.getLogger(__name__)


class Command(BaseCommand):
help = """Update all MemberProfile affiliations with lat/lon locations pulled from the ROR API"""

def add_arguments(self, parser):
parser.add_argument(
"--force",
action=argparse.BooleanOptionalAction,
default=False,
dest="force",
help="Force update of all affiliations with geo lat/lon data, name, link, and type",
)

def handle(self, *args, **options):
"""
Inspects and updates all active MemberProfiles with affiliations with lat/lon coordinate data from the ROR API
"""
force = options["force"]
with requests.Session() as session:

for profile in MemberProfile.objects.public().with_affiliations():

should_update_profile = False
for affiliation in profile.affiliations:
if "ror_id" not in affiliation:
continue
if "coordinates" not in affiliation or force:
updated_affiliation_data = self.lookup_ror_id(
affiliation["ror_id"], session
)
if updated_affiliation_data:
affiliation.update(**updated_affiliation_data)
should_update_profile = True

if should_update_profile:
profile.save()

def _build_ror_api_url(self, query):
return f"{settings.ROR_API_URL}/{query}"

def lookup_ror_id(self, ror_id, session):
# FIXME: should consider creating a simple ROR module to handle interactions with the ROR API
# and extraction of metadata from their schema though currently only used here,
# populate_memberprofile_affiliations, and in the frontend ror.ts api
# may also benefit from the pydantic schema work that @sgfost is doing with codemeticulous
api_url = self._build_ror_api_url(ror_id)
try:
response = session.get(api_url, timeout=10)
response.raise_for_status()
data = response.json()
logger.debug("ROR response: %s", data)
print("\nJSON: ", json.dumps(data, indent=4), "\n")
print(".", end="", flush=True)
location = data["locations"][0]
ror_data = {
"name": "",
"aliases": [],
"acronyms": [],
"link": "",
"types": data["types"],
"wikipedia_url": "",
"wikidata": "",
"location": location,
}
geonames_details = location["geonames_details"]
if geonames_details:
ror_data.update(
coordinates={
"lat": geonames_details["lat"],
"lon": geonames_details["lng"],
},
)
for name_object in data["names"]:
if "ror_display" in name_object["types"]:
ror_data["name"] = name_object["value"]
if "alias" in name_object["types"]:
ror_data["aliases"].append(name_object)
if "acronym" in name_object["types"]:
ror_data["acronyms"].append(name_object)
for link_object in data["links"]:
if link_object["type"] == "website":
ror_data["link"] = link_object["value"]
if link_object["type"] == "wikipedia":
ror_data["wikipedia_url"] = link_object["value"]
for external_id_object in data["external_ids"]:
if external_id_object["type"] == "wikidata":
ror_data["wikidata"] = external_id_object["all"][0]

return ror_data

except requests.RequestException:
print("E", end="", flush=True)
logger.warning("Unable to retrieve ROR data for %s", ror_id)
return {}
3 changes: 1 addition & 2 deletions django/home/management/commands/cache_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,7 @@


class Command(BaseCommand):
help = """Dump active user emails for mailchimp import with filtered by is_active=True
and optional date_joined --after=yyyy-mm-dd"""
help = """generate metrics and cache in redis"""

def add_arguments(self, parser):
"""
Expand Down
Loading

0 comments on commit 96e7743

Please sign in to comment.