KBMBF-452: #37 Show ratio of oer-content

WIP: showing overall OER in table
openeduhub · Jul 19, 2022 · 1637950 · 1637950
1 parent 3b129af
commit 1637950
Show file tree

Hide file tree

Showing 10 changed files with 72 additions and 36 deletions.
diff --git a/src/app/api/analytics/analytics.py b/src/app/api/analytics/analytics.py
@@ -43,6 +43,9 @@ class Config(ElasticConfig):
 class StatsResponse(ResponseModel):
     derived_at: datetime
     stats: dict[str, dict[str, COUNT_STATISTICS_TYPE]]
+    oer_ratio: int = Field(
+        default=0, ge=0, le=100, description="Overall ratio of OER content"
+    )
 
 
 ValidationStatsT = TypeVar("ValidationStatsT")

diff --git a/src/app/api/analytics/background_task.py b/src/app/api/analytics/background_task.py
@@ -12,6 +12,7 @@
 from app.api.analytics.stats import get_ids_to_iterate, search_hits_by_material_type
 from app.api.analytics.storage import (
     _COLLECTION_COUNT,
+    _COLLECTION_COUNT_OER,
     _COLLECTIONS,
     _MATERIALS,
     _SEARCH,
@@ -105,6 +106,9 @@ def run():
     app.api.analytics.storage.global_storage[_COLLECTION_COUNT] = asyncio.run(
         collection_counts(COLLECTION_ROOT_ID, AggregationMappings.lrt)
     )
+    app.api.analytics.storage.global_storage[_COLLECTION_COUNT_OER] = asyncio.run(
+        collection_counts(COLLECTION_ROOT_ID, AggregationMappings.lrt, oer_only=True)
+    )
 
     all_collections = asyncio.run(get_ids_to_iterate(node_id=COLLECTION_ROOT_ID))
     print("Tree ready to iterate. Length: ", len(all_collections))

diff --git a/src/app/api/analytics/stats.py b/src/app/api/analytics/stats.py
@@ -25,6 +25,7 @@
     global_storage,
 )
 from app.api.collections.models import CollectionNode
+from app.api.collections.oer import oer_ratio
 from app.api.collections.tree import collection_tree
 from app.api.score.models import required_collection_properties
 from app.core.config import ELASTIC_TOTAL_SIZE
@@ -139,7 +140,9 @@ def nodes(data: list[CollectionNode]) -> list:
     return [Row(id=row[0], title=row[1]) for row in flatten_list(nodes(tree))]
 
 
-def query_material_types(node_id: uuid.UUID) -> dict[str, COUNT_STATISTICS_TYPE]:
+def query_material_types(
+    node_id: uuid.UUID, oer_only: bool
+) -> dict[str, COUNT_STATISTICS_TYPE]:
     """
     get collections with parent id equal to node_id
 
@@ -175,7 +178,7 @@ def filtered_collections(collections: list[Collection], node_id: uuid.UUID):
 
 
 async def stats_latest(
-    stat_type: StatType, node_id: uuid.UUID
+    stat_type: StatType, node_id: uuid.UUID, oer_only: bool
 ) -> dict[str, COUNT_STATISTICS_TYPE]:
     results = {}
 
@@ -185,18 +188,20 @@ async def stats_latest(
             stats = search_hits_by_material_type(row.title)
             results.update({str(row.id): stats})
     elif stat_type is StatType.MATERIAL_TYPES:
-        results = query_material_types(node_id)
+        results = query_material_types(node_id, oer_only)
     return results
 
 
-async def overall_stats(node_id) -> StatsResponse:
-    search_stats = await stats_latest(stat_type=StatType.SEARCH, node_id=node_id)
+async def overall_stats(node_id, oer_only: bool = False) -> StatsResponse:
+    search_stats = await stats_latest(
+        stat_type=StatType.SEARCH, node_id=node_id, oer_only=oer_only
+    )
 
     if not search_stats:
         raise StatsNotFoundException
 
     material_types_stats = await stats_latest(
-        stat_type=StatType.MATERIAL_TYPES, node_id=node_id
+        stat_type=StatType.MATERIAL_TYPES, node_id=node_id, oer_only=oer_only
     )
 
     if not material_types_stats:
@@ -210,8 +215,10 @@ async def overall_stats(node_id) -> StatsResponse:
         else:
             stats_output.update({key: {"material_types": value}})
 
-    output = StatsResponse(derived_at=datetime.datetime.now(), stats=stats_output)
-    return output
+    oer = oer_ratio(node_id)
+    return StatsResponse(
+        derived_at=datetime.datetime.now(), stats=stats_output, oer_ratio=oer
+    )
 
 
 def collections_with_missing_properties(

diff --git a/src/app/api/analytics/storage.py b/src/app/api/analytics/storage.py
@@ -2,6 +2,7 @@
 _MATERIALS = "materials"
 _SEARCH = "search"
 _COLLECTION_COUNT = "counts"
+_COLLECTION_COUNT_OER = "counts_oer"
 
 """
 A quick fix for a global storage
@@ -11,4 +12,5 @@
     _MATERIALS: [],
     _SEARCH: {},
     _COLLECTION_COUNT: {},
+    _COLLECTION_COUNT_OER: {},
 }  # TODO: Refactor me ASAP
diff --git a/src/app/api/api.py b/src/app/api/api.py
@@ -337,8 +337,12 @@ async def material_counts_tree(
     It relies on background data and is read every {BACKGROUND_TASK_TIME_INTERVAL}s.
     This is the granularity of the data.""",
 )
-async def read_stats(*, node_id: uuid.UUID = Depends(node_ids_for_major_collections)):
-    return await overall_stats(node_id)
+async def read_stats(
+    *,
+    node_id: uuid.UUID = Depends(node_ids_for_major_collections),
+    oer_only: bool = Query(default=False),
+):
+    return await overall_stats(node_id, oer_only)
 
 
 @router.get(

diff --git a/src/app/api/collections/counts.py b/src/app/api/collections/counts.py
@@ -34,7 +34,9 @@ class AggregationMappings(str, Enum):
     license = ("properties.ccm:commonlicense_key.keyword",)
 
 
-def collection_counts_search(node_id: uuid.UUID, facet: AggregationMappings) -> Search:
+def collection_counts_search(
+    node_id: uuid.UUID, facet: AggregationMappings, oer_only: bool = False
+) -> Search:
     search = Search().base_filters().query(query_materials(node_id=node_id))
     material_agg = A(
         "terms", field="collections.nodeRef.id.keyword", size=ELASTIC_TOTAL_SIZE
@@ -61,9 +63,9 @@ def collection_counts_search(node_id: uuid.UUID, facet: AggregationMappings) ->
 
 
 async def collection_counts(
-    node_id: uuid.UUID, facet: AggregationMappings
+    node_id: uuid.UUID, facet: AggregationMappings, oer_only: bool = False
 ) -> Optional[list[CollectionTreeCount]]:
-    response = collection_counts_search(node_id, facet).execute()
+    response = collection_counts_search(node_id, facet, oer_only).execute()
     if response.success():
         return build_counts(response)
 

diff --git a/src/app/api/collections/oer.py b/src/app/api/collections/oer.py
@@ -0,0 +1,22 @@
+import uuid
+
+from app.api.collections.counts import (
+    _AGGREGATION_NAME,
+    AggregationMappings,
+    collection_counts_search,
+)
+
+
+def oer_ratio(node_id: uuid.UUID) -> int:
+    oer_statistics = collection_counts_search(node_id, AggregationMappings.license)
+    response = oer_statistics.execute()
+    oer_elements = 0
+    oer_total = 0
+    oer_license = ["CC_0", "PDM", "CC_BY", "CC_BY_SA"]
+    for data in response.aggregations[_AGGREGATION_NAME].buckets:
+        for bucket in data["facet"]["buckets"]:
+            oer_total += bucket["doc_count"]
+            if bucket["key"] in oer_license:
+                oer_elements += bucket["doc_count"]
+
+    return round((oer_elements / oer_total) * 100)
diff --git a/src/app/api/score/score.py b/src/app/api/score/score.py
@@ -4,11 +4,7 @@
 from elasticsearch_dsl.response import Response
 from fastapi import Path
 
-from app.api.collections.counts import (
-    _AGGREGATION_NAME,
-    AggregationMappings,
-    collection_counts_search,
-)
+from app.api.collections.oer import oer_ratio
 from app.api.score.models import (
     MissingCollectionProperties,
     MissingMaterialProperties,
@@ -158,18 +154,3 @@ async def get_score(node_id: uuid.UUID) -> ScoreOutput:
     return ScoreOutput(
         score=score_, collections=collections, materials=materials, oer_ratio=oer
     )
-
-
-def oer_ratio(node_id: uuid.UUID) -> int:
-    oer_statistics = collection_counts_search(node_id, AggregationMappings.license)
-    response = oer_statistics.execute()
-    oer_elements = 0
-    oer_total = 0
-    oer_license = ["CC_0", "PDM", "CC_BY", "CC_BY_SA"]
-    for data in response.aggregations[_AGGREGATION_NAME].buckets:
-        for bucket in data["facet"]["buckets"]:
-            oer_total += bucket["doc_count"]
-            if bucket["key"] in oer_license:
-                oer_elements += bucket["doc_count"]
-
-    return round((oer_elements / oer_total) * 100)
diff --git a/src/app/core/config.py b/src/app/core/config.py
@@ -19,3 +19,5 @@
 ELASTICSEARCH_TIMEOUT = int(os.getenv("ELASTICSEARCH_TIMEOUT", 20))
 
 BACKGROUND_TASK_TIME_INTERVAL = 10 * 60  # Time between consecutive background calls
+
+ENABLE_DATABASE = True
diff --git a/src/app/main.py b/src/app/main.py
@@ -8,7 +8,14 @@
 
 from app.api.analytics.background_task import background_task
 from app.api.api import router
-from app.core.config import ALLOWED_HOSTS, API_DEBUG, API_PORT, LOG_LEVEL, ROOT_PATH
+from app.core.config import (
+    ALLOWED_HOSTS,
+    API_DEBUG,
+    API_PORT,
+    ENABLE_DATABASE,
+    LOG_LEVEL,
+    ROOT_PATH,
+)
 from app.core.constants import OPEN_API_VERSION
 from app.core.errors import http_422_error_handler, http_error_handler
 from app.core.logging import logger
@@ -34,9 +41,11 @@ def api() -> FastAPI:
     _api.add_middleware(RawContextMiddleware)
 
     _api.add_event_handler("startup", connect_to_elastic)
-    _api.add_event_handler("startup", create_start_app_handler(_api))
     _api.add_event_handler("startup", background_task)
-    _api.add_event_handler("shutdown", create_stop_app_handler(_api))
+
+    if ENABLE_DATABASE:
+        _api.add_event_handler("startup", create_start_app_handler(_api))
+        _api.add_event_handler("shutdown", create_stop_app_handler(_api))
 
     _api.add_exception_handler(HTTPException, http_error_handler)
     _api.add_exception_handler(HTTP_422_UNPROCESSABLE_ENTITY, http_422_error_handler)
Original file line number	Diff line number	Diff line change
Expand Up		@@ -19,3 +19,5 @@
		ELASTICSEARCH_TIMEOUT = int(os.getenv("ELASTICSEARCH_TIMEOUT", 20))

		BACKGROUND_TASK_TIME_INTERVAL = 10 * 60 # Time between consecutive background calls

		ENABLE_DATABASE = True