From 3b129af7d010fdf39787cf738a8e7d2811b50c60 Mon Sep 17 00:00:00 2001 From: Robert Meissner Date: Tue, 19 Jul 2022 09:38:26 +0200 Subject: [PATCH] KBMBF-452: #37 Show ratio of oer-content OER for score --- src/app/api/api.py | 2 +- src/app/api/collections/counts.py | 9 ++-- src/app/api/quality_matrix/quality_matrix.py | 6 ++- src/app/api/score/models.py | 2 +- src/app/api/score/score.py | 57 +++++++++++++++----- src/app/core/constants.py | 3 -- 6 files changed, 56 insertions(+), 23 deletions(-) diff --git a/src/app/api/api.py b/src/app/api/api.py index 65cf4ca..95ec388 100644 --- a/src/app/api/api.py +++ b/src/app/api/api.py @@ -191,7 +191,7 @@ async def get_timestamps( @router.get( - "/collections/{node_id}/stats/score", + "/collections/{node_id}/score", response_model=ScoreOutput, status_code=HTTP_200_OK, responses={HTTP_404_NOT_FOUND: {"description": "Collection not found"}}, diff --git a/src/app/api/collections/counts.py b/src/app/api/collections/counts.py index 638fede..b6a895b 100644 --- a/src/app/api/collections/counts.py +++ b/src/app/api/collections/counts.py @@ -35,7 +35,7 @@ class AggregationMappings(str, Enum): def collection_counts_search(node_id: uuid.UUID, facet: AggregationMappings) -> Search: - s = Search().base_filters().query(query_materials(node_id=node_id)) + search = Search().base_filters().query(query_materials(node_id=node_id)) material_agg = A( "terms", field="collections.nodeRef.id.keyword", size=ELASTIC_TOTAL_SIZE ) @@ -47,8 +47,9 @@ def collection_counts_search(node_id: uuid.UUID, facet: AggregationMappings) -> size=ELASTIC_TOTAL_SIZE, ), ) - s.aggs.bucket(_AGGREGATION_NAME, material_agg) - s = s.source( + + search.aggs.bucket(_AGGREGATION_NAME, material_agg) + search = search.source( [ ElasticResourceAttribute.NODEREF_ID.path, CollectionAttribute.TITLE.path, @@ -56,7 +57,7 @@ def collection_counts_search(node_id: uuid.UUID, facet: AggregationMappings) -> CollectionAttribute.PARENT_ID.path, ] )[:0] - return s + return search async def collection_counts( diff --git a/src/app/api/quality_matrix/quality_matrix.py b/src/app/api/quality_matrix/quality_matrix.py index 2bd6345..8df0c5e 100644 --- a/src/app/api/quality_matrix/quality_matrix.py +++ b/src/app/api/quality_matrix/quality_matrix.py @@ -11,7 +11,7 @@ from app.api.quality_matrix.utils import default_properties from app.api.score.models import required_collection_properties from app.core.config import ELASTIC_TOTAL_SIZE -from app.core.constants import COLLECTION_ROOT_ID, PROPERTIES, REPLICATION_SOURCE_ID +from app.core.constants import COLLECTION_ROOT_ID from app.core.logging import logger from app.elastic.dsl import qbool, qmatch from app.elastic.search import Search @@ -138,6 +138,10 @@ async def items_in_response(response: Response) -> dict: return response.aggregations.to_dict().items() +REPLICATION_SOURCE_ID = "ccm:replicationsource" +PROPERTIES = "properties" + + async def source_quality( node_id: uuid.UUID = COLLECTION_ROOT_ID, match_keyword: str = f"{PROPERTIES}.{REPLICATION_SOURCE_ID}", diff --git a/src/app/api/score/models.py b/src/app/api/score/models.py index 8035c42..b8a9794 100644 --- a/src/app/api/score/models.py +++ b/src/app/api/score/models.py @@ -69,7 +69,7 @@ class MissingMaterialProperties(BaseModel): class ScoreOutput(BaseModel): score: int = Field(default=0, gt=0, le=100, description="Overall score") oer_ratio: int = Field( - default=0, gt=0, le=100, description="Overall ratio of OER content" + default=0, ge=0, le=100, description="Overall ratio of OER content" ) collections: MissingCollectionProperties = Field( description="Score for specific collection properties" diff --git a/src/app/api/score/score.py b/src/app/api/score/score.py index 61b2c6e..8e25d1b 100644 --- a/src/app/api/score/score.py +++ b/src/app/api/score/score.py @@ -4,7 +4,17 @@ from elasticsearch_dsl.response import Response from fastapi import Path -import app.core.constants +from app.api.collections.counts import ( + _AGGREGATION_NAME, + AggregationMappings, + collection_counts_search, +) +from app.api.score.models import ( + MissingCollectionProperties, + MissingMaterialProperties, + ScoreOutput, +) +from app.core.constants import COLLECTION_NAME_TO_ID from app.core.models import LearningMaterialAttribute from app.elastic.dsl import afilter, amissing from app.elastic.elastic import ( @@ -53,10 +63,9 @@ def calc_weighted_score(collection_scores: dict, material_scores: dict) -> int: def get_score_search(node_id: uuid.UUID, resource_type: ResourceType) -> Search: - query, aggs = None, None if resource_type is ResourceType.COLLECTION: query, aggs = query_collections, aggs_collection_validation - elif resource_type is ResourceType.MATERIAL: + else: # ResourceType.MATERIAL query, aggs = query_materials, aggs_material_validation s = Search().base_filters().query(query(node_id=node_id)) for name, _agg in aggs.items(): @@ -81,8 +90,7 @@ def search_score(node_id: uuid.UUID, resource_type: ResourceType) -> dict: def node_id_param( - *, - node_id: uuid.UUID = Path(..., examples=app.core.constants.COLLECTION_NAME_TO_ID), + *, node_id: uuid.UUID = Path(..., examples=COLLECTION_NAME_TO_ID) ) -> uuid.UUID: return node_id @@ -126,19 +134,42 @@ def field_names_used_for_score_calculation(properties: dict) -> list[str]: } -async def get_score(node_id): +async def get_score(node_id: uuid.UUID) -> ScoreOutput: collection_stats = search_score( node_id=node_id, resource_type=ResourceType.COLLECTION ) collection_scores = calc_scores(stats=collection_stats) + material_stats = search_score(node_id=node_id, resource_type=ResourceType.MATERIAL) material_scores = calc_scores(stats=material_stats) + score_ = calc_weighted_score( - collection_scores=collection_scores, - material_scores=material_scores, + collection_scores=collection_scores, material_scores=material_scores ) - return { - "score": score_, - "collections": {"total": collection_stats["total"], **collection_scores}, - "materials": {"total": material_stats["total"], **material_scores}, - } + + oer = oer_ratio(node_id) + + collections = MissingCollectionProperties( + total=collection_stats["total"], **collection_scores + ) + materials = MissingMaterialProperties( + total=material_stats["total"], **material_scores + ) + return ScoreOutput( + score=score_, collections=collections, materials=materials, oer_ratio=oer + ) + + +def oer_ratio(node_id: uuid.UUID) -> int: + oer_statistics = collection_counts_search(node_id, AggregationMappings.license) + response = oer_statistics.execute() + oer_elements = 0 + oer_total = 0 + oer_license = ["CC_0", "PDM", "CC_BY", "CC_BY_SA"] + for data in response.aggregations[_AGGREGATION_NAME].buckets: + for bucket in data["facet"]["buckets"]: + oer_total += bucket["doc_count"] + if bucket["key"] in oer_license: + oer_elements += bucket["doc_count"] + + return round((oer_elements / oer_total) * 100) diff --git a/src/app/core/constants.py b/src/app/core/constants.py index c8cdbc5..d4cd65c 100644 --- a/src/app/core/constants.py +++ b/src/app/core/constants.py @@ -1,6 +1,3 @@ -REPLICATION_SOURCE_ID = "ccm:replicationsource" -PROPERTIES = "properties" - COLLECTION_NAME_TO_ID = { "Physik": {"value": "94f22c9b-0d3a-4c1c-8987-4c8e83f3a92e"}, "Mathematik": {"value": "bd8be6d5-0fbe-4534-a4b3-773154ba6abc"},