diff --git a/Cerebrum/modules/greg/datasource.py b/Cerebrum/modules/greg/datasource.py index affda1824..94c871b9e 100644 --- a/Cerebrum/modules/greg/datasource.py +++ b/Cerebrum/modules/greg/datasource.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2021 University of Oslo, Norway +# Copyright 2021-2023 University of Oslo, Norway # # This file is part of Cerebrum. # @@ -183,7 +183,7 @@ def _parse_person_id(d): return { 'id': normalize_id(d['id']), 'person': normalize_id(d['person']), - # 'source': normalize_text(d['source']), + 'source': normalize_text(d['source'], allow_empty=True), 'type': normalize_text(d['type']), 'value': normalize_text(d['value'], allow_empty=True), 'verified': normalize_text(d['verified'], allow_empty=True), diff --git a/Cerebrum/modules/greg/importer.py b/Cerebrum/modules/greg/importer.py index 0000f194e..f9c1f7830 100644 --- a/Cerebrum/modules/greg/importer.py +++ b/Cerebrum/modules/greg/importer.py @@ -67,6 +67,15 @@ def get_import_class(cereconf=cereconf): return cls +def _is_deceased(person_obj, _today=None): + """ helper - check if a Person object is deceased. """ + today = _today or datetime.date.today() + return ( + person_obj + and person_obj.deceased_date + and date_compat.get_date(person_obj.deceased_date) < today) + + class GregImporter(object): REQUIRED_PERSON_ID = ( @@ -112,10 +121,11 @@ def get_person(self, greg_person): raise ValueError('invalid person: no external_ids') return search(self.db, criterias, required=False) - def get_ou(self, greg_orgunit): + def get_ou(self, orgunit_ids): """ Find matching ou from a Greg orgunit dict. """ search = OuMatcher() - criterias = tuple(self.mapper.get_orgunit_ids(greg_orgunit)) + criterias = tuple((id_type, id_value) + for id_type, id_value in orgunit_ids) if not criterias: raise ValueError('invalid orgunit: no external_ids') return search(self.db, criterias, required=True) @@ -143,11 +153,7 @@ def handle_object(self, greg_person, person_obj): """ greg_id = greg_person['id'] - is_deceased = ( - person_obj - and person_obj.deceased_date - and (date_compat.get_date(person_obj.deceased_date) - < datetime.date.today())) + is_deceased = _is_deceased(person_obj) if is_deceased: logger.warning('person_id=%s is marked as deceased', person_obj.entity_id) @@ -215,20 +221,23 @@ def remove(self, greg_person, person_obj): self._sync_affs(person_obj, ()) self._sync_consents(person_obj, ()) - def update(self, greg_person, person_obj): + def update(self, greg_person, person_obj, _today=None): """ Update the Person object using employee_data. """ if not greg_person: raise ValueError('update() called without greg person data!') if person_obj is None or not person_obj.entity_id: raise ValueError('update() called without cerebrum person!') + today = _today or datetime.date.today() + self._sync_name(person_obj, self.mapper.get_names(greg_person)) self._sync_ids(person_obj, self.mapper.get_person_ids(greg_person)) self._sync_cinfo(person_obj, self.mapper.get_contact_info(greg_person)) affs = ( - (aff_status, self.get_ou(ou_data).entity_id) - for aff_status, ou_data - in self.mapper.get_affiliations(greg_person) + (aff_status, self.get_ou(org_ids).entity_id) + for aff_status, org_ids, start_date, end_date + in self.mapper.get_affiliations(greg_person, + filter_active_at=today) ) self._sync_affs(person_obj, affs) self._sync_consents(person_obj, self.mapper.get_consents(greg_person)) diff --git a/Cerebrum/modules/greg/mapper.py b/Cerebrum/modules/greg/mapper.py index 058066126..6e4d38a8c 100644 --- a/Cerebrum/modules/greg/mapper.py +++ b/Cerebrum/modules/greg/mapper.py @@ -24,14 +24,6 @@ Import/update utils should use a py:class:`.GregMapper` to extract relevant Cerebrum data from a (sanitized) Greg object. - -Future changes --------------- -The Greg mappers are not configurable. If diverging business logic is required -in the future, we may opt to either: - -- Subclass GregMapper -- Add a mapper config, which we feed to the GregMapper on init. """ from __future__ import ( absolute_import, @@ -111,43 +103,47 @@ def get_names(greg_data): yield ('LAST', ln) -class GregPersonIds(object): - """ Extract external ids from greg person data. +class _GregIdentityMapper(object): + """ + Extract identity values from greg person data. - >>> person = { - ... 'id': '1', - ... 'identities': [{ - ... 'type': 'passport_number', - ... 'verified': 'automatic', - ... 'value': 'NO-123', - ... }]} - >>> get_ids = GregPersonIds() - >>> list(get_ids(person)) - [('GREG_PID', '1'), ('PASSNR', 'NO-123')] + Both external ids and contact info is extracted from this dataset. This is + the common functionality shared by both. """ - # identities.type value -> cerebrum external_id + # Map of source + type (or just type) to a cerebrum type. If a `(source, + # type)` exists in type_map, it will be chosen over just `type`. type_map = { - 'norwegian_national_id_number': 'NO_BIRTHNO', - 'passport_number': 'PASSNR', + # # external id examples + # + # ('foo', 'migration_id'): 'FOO_ID', + # 'norwegian_national_id_number': 'NO_BIRTHNO', + # 'passport_number': 'PASSNR', + + # # contact info examples + # ('feide', 'email'): 'EMAIL', + # 'private_mobile': 'PRIVATEMOBILE', } - # known identities.type that shouldn't be considered - ignore_types = set(( - 'feide_id', - 'feide_email', - 'private_email', - 'private_mobile', - )) + # Validate and/or normalize values from Greg by type. To invalidate a + # value, the callback should simply raise an exception (or return an empty + # value). + normalize_map = { + # # contact info examples + # ('feide', 'email'): (lambda v: str(v)), + # 'private_mobile': (lambda v: str(v)), + } - # values of identities.verified to accept - verified_values = set(( - 'automatic', - 'manual', - )) + # Values of identities.verified to accept. If empty, identities.verified + # won't be checked. + # + # TODO: We may want to set this by *type* - i.e. maybe we want to accept + # 'passport_number' regardless of its verified value, and we *only* want to + # accept 'norwegian_national_id_number' if `verified == 'automatic'` + verified_values = set() - # id -> cerebrum external_id - greg_id_type = 'GREG_PID' + # Include the greg object id value as this cerebrum type. + greg_id_type = None def __call__(self, greg_data): """ @@ -155,7 +151,7 @@ def __call__(self, greg_data): Sanitized greg person data (e.g. from `datasource.parse_person`) :returns generator: - Valid Cerebrum (id_type, id_value) pairs + Valid Cerebrum (crb_type, crb_value) pairs """ greg_id = greg_data['id'] @@ -163,27 +159,95 @@ def __call__(self, greg_data): yield self.greg_id_type, greg_id for id_obj in greg_data.get('identities', ()): - if id_obj['type'] in self.ignore_types: + id_type = id_obj['type'] + id_source = id_obj['source'] + id_verified = id_obj['verified'] + raw_value = id_obj['value'] + + # map and filter id source/type to external id type + if id_source and (id_source, id_type) in self.type_map: + crb_type = self.type_map[(id_source, id_type)] + elif id_type in self.type_map: + crb_type = self.type_map[id_type] + else: + # unknown id type + continue + + # map and filter verified values + if (self.verified_values + and id_verified not in self.verified_values): + logger.debug( + 'ignoring unverified source=%r type=%r for greg_id=%r', + id_source, id_type, greg_id) continue - if id_obj['type'] not in self.type_map: - logger.debug('ignoring unknown id_type=%r for greg_id=%s', - id_obj['type'], greg_id) + + # normalize and filter value + try: + if (id_source, id_type) in self.normalize_map: + value = self.normalize_map[id_source, id_type](raw_value) + elif id_type in self.normalize_map: + value = self.normalize_map[id_type](raw_value) + else: + value = raw_value + except Exception as e: + logger.warning( + "invalid source=%r type=%r for greg_id=%r: %s", + id_source, id_type, greg_id, str(e)) continue - crb_type = self.type_map[id_obj['type']] - if id_obj['verified'] not in self.verified_values: - logger.debug('ignoring unverified id_type=%r for greg_id=%s', - id_obj['type'], greg_id) + + if not value: + logger.warning( + "invalid source=%r type=%r for greg_id=%r: empty value", + id_source, id_type, greg_id) continue - value = id_obj['value'] # TODO/TBD: - # Values should already be verified by Greg, so we should be able - # to trust them. Still, we might want to validate/normalize - # values (e.g. valid fnr, passport number) here as well. + # Greg *can* have multiple values of a given type, from different + # sources. How should this be handled? Should we look for + # duplicates, and alternatively raise an error if there are + # multiple different values of a given type? + # + # Currently, we'll just crash and burn during import if we + # encounter duplicate crb_type with differing values yield crb_type, value -class GregContactInfo(object): +class GregPersonIds(_GregIdentityMapper): + """ Extract external ids from greg person data. + + >>> person = { + ... 'id': '1', + ... 'identities': [{ + ... 'type': 'passport_number', + ... 'verified': 'automatic', + ... 'value': 'NO-123', + ... }]} + >>> get_ids = GregPersonIds() + >>> list(get_ids(person)) + [('GREG_PID', '1'), ('PASSNR', 'NO-123')] + """ + + type_map = { + 'norwegian_national_id_number': 'NO_BIRTHNO', + 'passport_number': 'PASSNR', + } + + # TODO/TBD: + # Values should already be verified by Greg, so we should be able + # to trust them. Still, we might want to validate/normalize + # certain values here as well. + normalize_map = {} + + # values of identities.verified to accept + verified_values = set(( + 'automatic', + 'manual', + )) + + greg_id_type = 'GREG_PID' + + +class GregContactInfo(_GregIdentityMapper): """ Extract contanct info from greg person data. >>> person = { @@ -198,44 +262,15 @@ class GregContactInfo(object): [('PRIVATEMOBILE', '20123456')] """ - # identities.type value -> cerebrum contact_info_type type_map = { 'private_mobile': 'PRIVATEMOBILE', } - # known identities.type that shouldn't be considered - ignore_types = set(( - 'feide_id', - 'feide_email', - 'norwegian_national_id_number', - 'passport_number', - 'private_email', - )) - - def __call__(self, greg_data): - """ - :param dict greg_data: - Sanitized greg person data (e.g. from `datasource.parse_person`) - - :returns generator: - Valid Cerebrum (contact_info_type, contact_info_value) pairs - """ - greg_id = int(greg_data['id']) - for id_obj in greg_data.get('identities', ()): - if id_obj['type'] in self.ignore_types: - continue - if id_obj['type'] not in self.type_map: - logger.debug('ignoring unknown id_type=%r for greg_id=%s', - id_obj['type'], greg_id) - continue - - crb_type = self.type_map[id_obj['type']] - value = id_obj['value'] - # TODO/TBD: - # Values should already be verified by Greg, so we should be able - # to trust them. Still, we might want to validate/normalize - # values here as well. - yield crb_type, value + # TODO/TBD: + # Values should already be verified by Greg, so we should be able + # to trust them. Still, we might want to validate/normalize + # certain values here as well. + normalize_map = {} class GregConsents(object): @@ -243,11 +278,21 @@ class GregConsents(object): Extract consent data from greg person data. """ + # Map Greg consent type to internal import consent name. + # + # TODO: Do we really nedd to map this, or could this maybe just be a + # passlist of known consent types? + # + # TODO: Is this a generic consent? Or should this be moved to + # `Cerebrum.modules.no.uio.greg_import`? type_map = { 'publish': 'greg-publish', } # Consent value (choice) to bool (is-consent) + # + # TODO: We may want to map these value by type? + # E.g.: {'publish': {'yes': True, 'no': False}} value_map = { 'yes': True, 'no': False, @@ -281,72 +326,80 @@ def __call__(self, greg_data): consents.discard(greg_type) seen.add(greg_type) - if greg_value not in self.value_map: + if greg_value in self.value_map: + is_consent = self.value_map[greg_value] + logger.debug('found consent %s=%r (%s=%r) for greg_id=%s', + crb_type, is_consent, greg_type, greg_value, + greg_id) + else: # invalid consent value (choice), discard consent is_consent = False logger.warning('invalid consent %s value (%s=%r) for' ' greg_id=%s', crb_type, greg_type, greg_value, greg_id) - else: - is_consent = self.value_map[greg_value] - logger.debug('found consent %s=%r (%s=%r) for greg_id=%s', - crb_type, is_consent, greg_type, greg_value, - greg_id) if is_consent: consents.add(greg_type) else: consents.discard(greg_type) - return tuple(self.type_map[c] for c in consents) + return tuple(self.type_map[c] for c in sorted(consents)) class GregRoles(object): """ Extract affiliations from greg person roles. """ + # Greg role name -> AFFILIATION/status type_map = { - 'emeritus': 'TILKNYTTET/emeritus', - 'external-consultant': 'TILKNYTTET/ekst_partner', - 'external-partner': 'TILKNYTTET/ekst_partner', - 'guest-researcher': 'TILKNYTTET/gjesteforsker', + # example: + # + # 'emeritus': 'TILKNYTTET/emeritus', } - def __call__(self, greg_data, _today=None): + get_orgunit_ids = GregOrgunitIds() + + def __call__(self, greg_data, filter_active_at=None): """ :param dict greg_data: Sanitized greg person data (e.g. from `datasource.parse_person`) + :param datetime.date filter_active_at: + Only include roles that are active at the given date. + + If not given, the default behaviour is to include all past and + future roles. + :returns generator: - yields (affiliation, orgunit) tuples. + yields (affiliation, org-ids, start-date, end-date) tuples. - - Affiliation is an AFFILIATION/status string - - orgunit is the role orgunit value. + - affiliation is an AFFILIATION/status string + - org-ids is a sequence of orgunit (id-type, id-value) pairs + - start-/end-date are datetime.date objects """ greg_id = int(greg_data['id']) - # TODO: We may want to allow expired roles here, and filter them out in - # the is_active check, and ignore them when updating person affs. - today = _today or datetime.date.today() - for role_obj in greg_data.get('roles', ()): if role_obj['type'] not in self.type_map: logger.debug( 'ignoring unknown role type=%r, id=%r for greg_id=%s', role_obj['type'], role_obj['id'], greg_id) continue - if role_obj['start_date'] > today: + if filter_active_at and filter_active_at < role_obj['start_date']: logger.debug( 'ignoring future role type=%r, id=%r for greg_id=%s', role_obj['type'], role_obj['id'], greg_id) continue - if role_obj['end_date'] < today: + if filter_active_at and filter_active_at > role_obj['end_date']: logger.debug( - 'ignoring expired role type=%r id=%r for greg_id=%s', + 'ignoring expired role type=%r, id=%r for greg_id=%s', role_obj['type'], role_obj['id'], greg_id) continue - crb_type = self.type_map[role_obj['type']] - orgunit = role_obj['orgunit'] - yield crb_type, orgunit + yield ( + self.type_map[role_obj['type']], + tuple(self.get_orgunit_ids(role_obj['orgunit'])), + role_obj['start_date'], + role_obj['end_date'], + ) class GregMapper(object): @@ -356,7 +409,6 @@ class GregMapper(object): get_person_ids = GregPersonIds() get_affiliations = GregRoles() get_consents = GregConsents() - get_orgunit_ids = GregOrgunitIds() @classmethod def get_names(cls, greg_data): @@ -385,7 +437,7 @@ def is_active(self, greg_data, _today=None): greg_id) return False - if not list(self.get_affiliations(greg_data, _today=today)): + if not list(self.get_affiliations(greg_data, filter_active_at=today)): logger.info('no active roles for greg_id=%s', greg_id) return False diff --git a/Cerebrum/modules/no/uio/greg_import.py b/Cerebrum/modules/no/uio/greg_import.py index a68a8250f..0e2e46ca1 100644 --- a/Cerebrum/modules/no/uio/greg_import.py +++ b/Cerebrum/modules/no/uio/greg_import.py @@ -36,8 +36,29 @@ sync_greg_consent = GroupMembershipSetter(GREG_CONSENT_GROUP) -class UioGregMapper(mapper.GregMapper): - pass +class _UioGregOrgunitIds(mapper.GregOrgunitIds): + + type_map = { + ('orgreg', 'orgreg_id'): 'ORGREG_OU_ID', + ('sapuio', 'legacy_stedkode'): 'NO_SKO', + } + + +class _UioGregRoles(mapper.GregRoles): + + type_map = { + 'emeritus': 'TILKNYTTET/emeritus', + 'external-consultant': 'TILKNYTTET/ekst_partner', + 'external-partner': 'TILKNYTTET/ekst_partner', + 'guest-researcher': 'TILKNYTTET/gjesteforsker', + } + + get_orgunit_ids = _UioGregOrgunitIds() + + +class _UioGregMapper(mapper.GregMapper): + + get_affiliations = _UioGregRoles() class UioGregImporter(importer.GregImporter): @@ -46,4 +67,4 @@ class UioGregImporter(importer.GregImporter): 'greg-publish': sync_greg_consent, } - mapper = UioGregMapper() + mapper = _UioGregMapper() diff --git a/contrib/greg/greg-fetch-person.py b/contrib/greg/greg-fetch-person.py index c77f52749..09a8a6dea 100644 --- a/contrib/greg/greg-fetch-person.py +++ b/contrib/greg/greg-fetch-person.py @@ -112,7 +112,11 @@ def main(inargs=None): 'name': dict(mapper.get_names(greg_person)), 'contact_info': dict(mapper.get_contact_info(greg_person)), 'external_id': dict(mapper.get_person_ids(greg_person)), - 'affiliation': dict(mapper.get_affiliations(greg_person)), + 'affiliation': { + aff: {'at-ou': ou, 'aff-begin': str(start), 'aff-end': str(end)} + for aff, ou, start, end + in mapper.get_affiliations(greg_person) + }, } print('Person object:')