From 57cce2d3a03cbe3eebd8f39f1fa32e221c0273fd Mon Sep 17 00:00:00 2001 From: Mateusz Kulas Date: Tue, 5 Sep 2023 14:30:22 +0200 Subject: [PATCH] Fix the lack of private datasets in group entities --- odd_collector/adapters/ckan/adapter.py | 20 ++++++++++++++++--- odd_collector/adapters/ckan/mappers/group.py | 9 +-------- odd_collector/adapters/ckan/mappers/models.py | 10 ++++++---- odd_collector/adapters/ckan/utils.py | 14 ++++++++----- 4 files changed, 33 insertions(+), 20 deletions(-) diff --git a/odd_collector/adapters/ckan/adapter.py b/odd_collector/adapters/ckan/adapter.py index 2f13a251..b21899b1 100644 --- a/odd_collector/adapters/ckan/adapter.py +++ b/odd_collector/adapters/ckan/adapter.py @@ -1,15 +1,18 @@ +from collections import defaultdict + +from odd_collector.domain.plugin import CKANPlugin from odd_collector_sdk.domain.adapter import AsyncAbstractAdapter from odd_collector_sdk.errors import MappingDataError, DataSourceError from odd_models.models import DataEntity, DataEntityList from oddrn_generator import CKANGenerator -from odd_collector.domain.plugin import CKANPlugin from .client import CKANRestClient from .mappers.group import map_group from .mappers.organization import map_organization from .mappers.dataset import map_dataset from .mappers.resource import map_resource +from .utils import group_dataset_oddrns class Adapter(AsyncAbstractAdapter): @@ -23,6 +26,7 @@ def get_data_source_oddrn(self) -> str: async def get_data_entity_list(self) -> DataEntityList: organizations = await self.client.get_organizations() groups = await self.client.get_groups() + grouped_datasets_oddrns = defaultdict(list) organization_entities: list[DataEntity] = [] datasets_entities: list[DataEntity] = [] resources_entities: list[DataEntity] = [] @@ -33,12 +37,18 @@ async def get_data_entity_list(self) -> DataEntityList: datasets_entities_tmp: list[DataEntity] = [] datasets = await self.client.get_datasets(organization.id) for dataset in datasets: + resources_entities_tmp = [] self.oddrn_generator.set_oddrn_paths( organizations=organization.name, datasets=dataset.name, ) - + group_dataset_oddrns( + self.oddrn_generator, + dataset.name, + dataset.groups, + grouped_datasets_oddrns, + ) for resource in dataset.resources: fields = await self.client.get_resource_fields(resource.id) resources_entities_tmp.append( @@ -61,7 +71,11 @@ async def get_data_entity_list(self) -> DataEntityList: for group_name in groups: group = await self.client.get_group_details(group_name) - groups_entities.append(map_group(self.oddrn_generator, group)) + groups_entities.append( + map_group( + self.oddrn_generator, group, grouped_datasets_oddrns[group_name] + ) + ) except DataSourceError: raise diff --git a/odd_collector/adapters/ckan/mappers/group.py b/odd_collector/adapters/ckan/mappers/group.py index a0f9f7cc..1d95026b 100644 --- a/odd_collector/adapters/ckan/mappers/group.py +++ b/odd_collector/adapters/ckan/mappers/group.py @@ -5,15 +5,8 @@ def map_group( - oddrn_generator: CKANGenerator, - group: Group, + oddrn_generator: CKANGenerator, group: Group, datasets_oddrns: list[str] ) -> DataEntity: - datasets_oddrns: list[str] = [] - for dataset in group.datasets: - oddrn_generator.set_oddrn_paths(organizations=dataset["organization"]["name"]) - datasets_oddrns.append( - oddrn_generator.get_oddrn_by_path("datasets", dataset["name"]) - ) return DataEntity( oddrn=oddrn_generator.get_oddrn_by_path("groups", group.name), name=group.name, diff --git a/odd_collector/adapters/ckan/mappers/models.py b/odd_collector/adapters/ckan/mappers/models.py index 5ad17267..37b8cda5 100644 --- a/odd_collector/adapters/ckan/mappers/models.py +++ b/odd_collector/adapters/ckan/mappers/models.py @@ -4,7 +4,7 @@ from typing import Any from odd_collector_sdk.utils.metadata import HasMetadata -from odd_collector.adapters.ckan.utils import get_metadata, get_groups +from odd_collector.adapters.ckan.utils import get_metadata @dataclass @@ -87,6 +87,8 @@ def resources(self) -> list[Resource]: @property def odd_metadata(self) -> dict[str, Any]: - metadata = get_metadata(self.data, self.excluded_keys) - transformed = get_groups(metadata) - return transformed + return get_metadata(self.data, self.excluded_keys) + + @property + def groups(self) -> list[str]: + return [group["name"] for group in self.data["groups"]] diff --git a/odd_collector/adapters/ckan/utils.py b/odd_collector/adapters/ckan/utils.py index df445751..c18c05ce 100644 --- a/odd_collector/adapters/ckan/utils.py +++ b/odd_collector/adapters/ckan/utils.py @@ -1,12 +1,16 @@ +from collections import defaultdict from typing import Any +from oddrn_generator import CKANGenerator + def get_metadata(data: dict[str, Any], excluded_keys: list[str]) -> dict[str, Any]: return {key: data[key] for key in data if key not in excluded_keys} -def get_groups(data: dict[str, Any]) -> dict[str, Any]: - transformed_data = data.copy() - transformed_groups = {group["name"]: group for group in transformed_data["groups"]} - transformed_data["groups"] = transformed_groups - return transformed_data +def group_dataset_oddrns( + generator: CKANGenerator, dataset: str, groups: list[str], res: defaultdict +): + oddrn = generator.get_oddrn_by_path("datasets", dataset) + for group in groups: + res[group].append(oddrn)