KBMBF-452: #37 Show ratio of oer-content

OER for score
openeduhub · Jul 19, 2022 · 3b129af · 3b129af
1 parent 8f9ed81
commit 3b129af
Show file tree

Hide file tree

Showing 6 changed files with 56 additions and 23 deletions.
diff --git a/src/app/api/api.py b/src/app/api/api.py
@@ -191,7 +191,7 @@ async def get_timestamps(
 
 
 @router.get(
-    "/collections/{node_id}/stats/score",
+    "/collections/{node_id}/score",
     response_model=ScoreOutput,
     status_code=HTTP_200_OK,
     responses={HTTP_404_NOT_FOUND: {"description": "Collection not found"}},

diff --git a/src/app/api/collections/counts.py b/src/app/api/collections/counts.py
@@ -35,7 +35,7 @@ class AggregationMappings(str, Enum):
 
 
 def collection_counts_search(node_id: uuid.UUID, facet: AggregationMappings) -> Search:
-    s = Search().base_filters().query(query_materials(node_id=node_id))
+    search = Search().base_filters().query(query_materials(node_id=node_id))
     material_agg = A(
         "terms", field="collections.nodeRef.id.keyword", size=ELASTIC_TOTAL_SIZE
     )
@@ -47,16 +47,17 @@ def collection_counts_search(node_id: uuid.UUID, facet: AggregationMappings) ->
             size=ELASTIC_TOTAL_SIZE,
         ),
     )
-    s.aggs.bucket(_AGGREGATION_NAME, material_agg)
-    s = s.source(
+
+    search.aggs.bucket(_AGGREGATION_NAME, material_agg)
+    search = search.source(
         [
             ElasticResourceAttribute.NODEREF_ID.path,
             CollectionAttribute.TITLE.path,
             CollectionAttribute.PATH.path,
             CollectionAttribute.PARENT_ID.path,
         ]
     )[:0]
-    return s
+    return search
 
 
 async def collection_counts(

diff --git a/src/app/api/quality_matrix/quality_matrix.py b/src/app/api/quality_matrix/quality_matrix.py
@@ -11,7 +11,7 @@
 from app.api.quality_matrix.utils import default_properties
 from app.api.score.models import required_collection_properties
 from app.core.config import ELASTIC_TOTAL_SIZE
-from app.core.constants import COLLECTION_ROOT_ID, PROPERTIES, REPLICATION_SOURCE_ID
+from app.core.constants import COLLECTION_ROOT_ID
 from app.core.logging import logger
 from app.elastic.dsl import qbool, qmatch
 from app.elastic.search import Search
@@ -138,6 +138,10 @@ async def items_in_response(response: Response) -> dict:
     return response.aggregations.to_dict().items()
 
 
+REPLICATION_SOURCE_ID = "ccm:replicationsource"
+PROPERTIES = "properties"
+
+
 async def source_quality(
     node_id: uuid.UUID = COLLECTION_ROOT_ID,
     match_keyword: str = f"{PROPERTIES}.{REPLICATION_SOURCE_ID}",

diff --git a/src/app/api/score/models.py b/src/app/api/score/models.py
@@ -69,7 +69,7 @@ class MissingMaterialProperties(BaseModel):
 class ScoreOutput(BaseModel):
     score: int = Field(default=0, gt=0, le=100, description="Overall score")
     oer_ratio: int = Field(
-        default=0, gt=0, le=100, description="Overall ratio of OER content"
+        default=0, ge=0, le=100, description="Overall ratio of OER content"
     )
     collections: MissingCollectionProperties = Field(
         description="Score for specific collection properties"

diff --git a/src/app/api/score/score.py b/src/app/api/score/score.py
@@ -4,7 +4,17 @@
 from elasticsearch_dsl.response import Response
 from fastapi import Path
 
-import app.core.constants
+from app.api.collections.counts import (
+    _AGGREGATION_NAME,
+    AggregationMappings,
+    collection_counts_search,
+)
+from app.api.score.models import (
+    MissingCollectionProperties,
+    MissingMaterialProperties,
+    ScoreOutput,
+)
+from app.core.constants import COLLECTION_NAME_TO_ID
 from app.core.models import LearningMaterialAttribute
 from app.elastic.dsl import afilter, amissing
 from app.elastic.elastic import (
@@ -53,10 +63,9 @@ def calc_weighted_score(collection_scores: dict, material_scores: dict) -> int:
 
 
 def get_score_search(node_id: uuid.UUID, resource_type: ResourceType) -> Search:
-    query, aggs = None, None
     if resource_type is ResourceType.COLLECTION:
         query, aggs = query_collections, aggs_collection_validation
-    elif resource_type is ResourceType.MATERIAL:
+    else:  # ResourceType.MATERIAL
         query, aggs = query_materials, aggs_material_validation
     s = Search().base_filters().query(query(node_id=node_id))
     for name, _agg in aggs.items():
@@ -81,8 +90,7 @@ def search_score(node_id: uuid.UUID, resource_type: ResourceType) -> dict:
 
 
 def node_id_param(
-    *,
-    node_id: uuid.UUID = Path(..., examples=app.core.constants.COLLECTION_NAME_TO_ID),
+    *, node_id: uuid.UUID = Path(..., examples=COLLECTION_NAME_TO_ID)
 ) -> uuid.UUID:
     return node_id
 
@@ -126,19 +134,42 @@ def field_names_used_for_score_calculation(properties: dict) -> list[str]:
 }
 
 
-async def get_score(node_id):
+async def get_score(node_id: uuid.UUID) -> ScoreOutput:
     collection_stats = search_score(
         node_id=node_id, resource_type=ResourceType.COLLECTION
     )
     collection_scores = calc_scores(stats=collection_stats)
+
     material_stats = search_score(node_id=node_id, resource_type=ResourceType.MATERIAL)
     material_scores = calc_scores(stats=material_stats)
+
     score_ = calc_weighted_score(
-        collection_scores=collection_scores,
-        material_scores=material_scores,
+        collection_scores=collection_scores, material_scores=material_scores
     )
-    return {
-        "score": score_,
-        "collections": {"total": collection_stats["total"], **collection_scores},
-        "materials": {"total": material_stats["total"], **material_scores},
-    }
+
+    oer = oer_ratio(node_id)
+
+    collections = MissingCollectionProperties(
+        total=collection_stats["total"], **collection_scores
+    )
+    materials = MissingMaterialProperties(
+        total=material_stats["total"], **material_scores
+    )
+    return ScoreOutput(
+        score=score_, collections=collections, materials=materials, oer_ratio=oer
+    )
+
+
+def oer_ratio(node_id: uuid.UUID) -> int:
+    oer_statistics = collection_counts_search(node_id, AggregationMappings.license)
+    response = oer_statistics.execute()
+    oer_elements = 0
+    oer_total = 0
+    oer_license = ["CC_0", "PDM", "CC_BY", "CC_BY_SA"]
+    for data in response.aggregations[_AGGREGATION_NAME].buckets:
+        for bucket in data["facet"]["buckets"]:
+            oer_total += bucket["doc_count"]
+            if bucket["key"] in oer_license:
+                oer_elements += bucket["doc_count"]
+
+    return round((oer_elements / oer_total) * 100)
diff --git a/src/app/core/constants.py b/src/app/core/constants.py
@@ -1,6 +1,3 @@
-REPLICATION_SOURCE_ID = "ccm:replicationsource"
-PROPERTIES = "properties"
-
 COLLECTION_NAME_TO_ID = {
     "Physik": {"value": "94f22c9b-0d3a-4c1c-8987-4c8e83f3a92e"},
     "Mathematik": {"value": "bd8be6d5-0fbe-4534-a4b3-773154ba6abc"},