Skip to content

Commit

Permalink
KBMBF-452: #37 Show ratio of oer-content
Browse files Browse the repository at this point in the history
WIP: showing overall OER in table
  • Loading branch information
Robert Meissner committed Jul 19, 2022
1 parent 3b129af commit 1637950
Show file tree
Hide file tree
Showing 10 changed files with 72 additions and 36 deletions.
3 changes: 3 additions & 0 deletions src/app/api/analytics/analytics.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,9 @@ class Config(ElasticConfig):
class StatsResponse(ResponseModel):
derived_at: datetime
stats: dict[str, dict[str, COUNT_STATISTICS_TYPE]]
oer_ratio: int = Field(
default=0, ge=0, le=100, description="Overall ratio of OER content"
)


ValidationStatsT = TypeVar("ValidationStatsT")
Expand Down
4 changes: 4 additions & 0 deletions src/app/api/analytics/background_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from app.api.analytics.stats import get_ids_to_iterate, search_hits_by_material_type
from app.api.analytics.storage import (
_COLLECTION_COUNT,
_COLLECTION_COUNT_OER,
_COLLECTIONS,
_MATERIALS,
_SEARCH,
Expand Down Expand Up @@ -105,6 +106,9 @@ def run():
app.api.analytics.storage.global_storage[_COLLECTION_COUNT] = asyncio.run(
collection_counts(COLLECTION_ROOT_ID, AggregationMappings.lrt)
)
app.api.analytics.storage.global_storage[_COLLECTION_COUNT_OER] = asyncio.run(
collection_counts(COLLECTION_ROOT_ID, AggregationMappings.lrt, oer_only=True)
)

all_collections = asyncio.run(get_ids_to_iterate(node_id=COLLECTION_ROOT_ID))
print("Tree ready to iterate. Length: ", len(all_collections))
Expand Down
23 changes: 15 additions & 8 deletions src/app/api/analytics/stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
global_storage,
)
from app.api.collections.models import CollectionNode
from app.api.collections.oer import oer_ratio
from app.api.collections.tree import collection_tree
from app.api.score.models import required_collection_properties
from app.core.config import ELASTIC_TOTAL_SIZE
Expand Down Expand Up @@ -139,7 +140,9 @@ def nodes(data: list[CollectionNode]) -> list:
return [Row(id=row[0], title=row[1]) for row in flatten_list(nodes(tree))]


def query_material_types(node_id: uuid.UUID) -> dict[str, COUNT_STATISTICS_TYPE]:
def query_material_types(
node_id: uuid.UUID, oer_only: bool
) -> dict[str, COUNT_STATISTICS_TYPE]:
"""
get collections with parent id equal to node_id
Expand Down Expand Up @@ -175,7 +178,7 @@ def filtered_collections(collections: list[Collection], node_id: uuid.UUID):


async def stats_latest(
stat_type: StatType, node_id: uuid.UUID
stat_type: StatType, node_id: uuid.UUID, oer_only: bool
) -> dict[str, COUNT_STATISTICS_TYPE]:
results = {}

Expand All @@ -185,18 +188,20 @@ async def stats_latest(
stats = search_hits_by_material_type(row.title)
results.update({str(row.id): stats})
elif stat_type is StatType.MATERIAL_TYPES:
results = query_material_types(node_id)
results = query_material_types(node_id, oer_only)
return results


async def overall_stats(node_id) -> StatsResponse:
search_stats = await stats_latest(stat_type=StatType.SEARCH, node_id=node_id)
async def overall_stats(node_id, oer_only: bool = False) -> StatsResponse:
search_stats = await stats_latest(
stat_type=StatType.SEARCH, node_id=node_id, oer_only=oer_only
)

if not search_stats:
raise StatsNotFoundException

material_types_stats = await stats_latest(
stat_type=StatType.MATERIAL_TYPES, node_id=node_id
stat_type=StatType.MATERIAL_TYPES, node_id=node_id, oer_only=oer_only
)

if not material_types_stats:
Expand All @@ -210,8 +215,10 @@ async def overall_stats(node_id) -> StatsResponse:
else:
stats_output.update({key: {"material_types": value}})

output = StatsResponse(derived_at=datetime.datetime.now(), stats=stats_output)
return output
oer = oer_ratio(node_id)
return StatsResponse(
derived_at=datetime.datetime.now(), stats=stats_output, oer_ratio=oer
)


def collections_with_missing_properties(
Expand Down
2 changes: 2 additions & 0 deletions src/app/api/analytics/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
_MATERIALS = "materials"
_SEARCH = "search"
_COLLECTION_COUNT = "counts"
_COLLECTION_COUNT_OER = "counts_oer"

"""
A quick fix for a global storage
Expand All @@ -11,4 +12,5 @@
_MATERIALS: [],
_SEARCH: {},
_COLLECTION_COUNT: {},
_COLLECTION_COUNT_OER: {},
} # TODO: Refactor me ASAP
8 changes: 6 additions & 2 deletions src/app/api/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,8 +337,12 @@ async def material_counts_tree(
It relies on background data and is read every {BACKGROUND_TASK_TIME_INTERVAL}s.
This is the granularity of the data.""",
)
async def read_stats(*, node_id: uuid.UUID = Depends(node_ids_for_major_collections)):
return await overall_stats(node_id)
async def read_stats(
*,
node_id: uuid.UUID = Depends(node_ids_for_major_collections),
oer_only: bool = Query(default=False),
):
return await overall_stats(node_id, oer_only)


@router.get(
Expand Down
8 changes: 5 additions & 3 deletions src/app/api/collections/counts.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,9 @@ class AggregationMappings(str, Enum):
license = ("properties.ccm:commonlicense_key.keyword",)


def collection_counts_search(node_id: uuid.UUID, facet: AggregationMappings) -> Search:
def collection_counts_search(
node_id: uuid.UUID, facet: AggregationMappings, oer_only: bool = False
) -> Search:
search = Search().base_filters().query(query_materials(node_id=node_id))
material_agg = A(
"terms", field="collections.nodeRef.id.keyword", size=ELASTIC_TOTAL_SIZE
Expand All @@ -61,9 +63,9 @@ def collection_counts_search(node_id: uuid.UUID, facet: AggregationMappings) ->


async def collection_counts(
node_id: uuid.UUID, facet: AggregationMappings
node_id: uuid.UUID, facet: AggregationMappings, oer_only: bool = False
) -> Optional[list[CollectionTreeCount]]:
response = collection_counts_search(node_id, facet).execute()
response = collection_counts_search(node_id, facet, oer_only).execute()
if response.success():
return build_counts(response)

Expand Down
22 changes: 22 additions & 0 deletions src/app/api/collections/oer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import uuid

from app.api.collections.counts import (
_AGGREGATION_NAME,
AggregationMappings,
collection_counts_search,
)


def oer_ratio(node_id: uuid.UUID) -> int:
oer_statistics = collection_counts_search(node_id, AggregationMappings.license)
response = oer_statistics.execute()
oer_elements = 0
oer_total = 0
oer_license = ["CC_0", "PDM", "CC_BY", "CC_BY_SA"]
for data in response.aggregations[_AGGREGATION_NAME].buckets:
for bucket in data["facet"]["buckets"]:
oer_total += bucket["doc_count"]
if bucket["key"] in oer_license:
oer_elements += bucket["doc_count"]

return round((oer_elements / oer_total) * 100)
21 changes: 1 addition & 20 deletions src/app/api/score/score.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,7 @@
from elasticsearch_dsl.response import Response
from fastapi import Path

from app.api.collections.counts import (
_AGGREGATION_NAME,
AggregationMappings,
collection_counts_search,
)
from app.api.collections.oer import oer_ratio
from app.api.score.models import (
MissingCollectionProperties,
MissingMaterialProperties,
Expand Down Expand Up @@ -158,18 +154,3 @@ async def get_score(node_id: uuid.UUID) -> ScoreOutput:
return ScoreOutput(
score=score_, collections=collections, materials=materials, oer_ratio=oer
)


def oer_ratio(node_id: uuid.UUID) -> int:
oer_statistics = collection_counts_search(node_id, AggregationMappings.license)
response = oer_statistics.execute()
oer_elements = 0
oer_total = 0
oer_license = ["CC_0", "PDM", "CC_BY", "CC_BY_SA"]
for data in response.aggregations[_AGGREGATION_NAME].buckets:
for bucket in data["facet"]["buckets"]:
oer_total += bucket["doc_count"]
if bucket["key"] in oer_license:
oer_elements += bucket["doc_count"]

return round((oer_elements / oer_total) * 100)
2 changes: 2 additions & 0 deletions src/app/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,5 @@
ELASTICSEARCH_TIMEOUT = int(os.getenv("ELASTICSEARCH_TIMEOUT", 20))

BACKGROUND_TASK_TIME_INTERVAL = 10 * 60 # Time between consecutive background calls

ENABLE_DATABASE = True
15 changes: 12 additions & 3 deletions src/app/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,14 @@

from app.api.analytics.background_task import background_task
from app.api.api import router
from app.core.config import ALLOWED_HOSTS, API_DEBUG, API_PORT, LOG_LEVEL, ROOT_PATH
from app.core.config import (
ALLOWED_HOSTS,
API_DEBUG,
API_PORT,
ENABLE_DATABASE,
LOG_LEVEL,
ROOT_PATH,
)
from app.core.constants import OPEN_API_VERSION
from app.core.errors import http_422_error_handler, http_error_handler
from app.core.logging import logger
Expand All @@ -34,9 +41,11 @@ def api() -> FastAPI:
_api.add_middleware(RawContextMiddleware)

_api.add_event_handler("startup", connect_to_elastic)
_api.add_event_handler("startup", create_start_app_handler(_api))
_api.add_event_handler("startup", background_task)
_api.add_event_handler("shutdown", create_stop_app_handler(_api))

if ENABLE_DATABASE:
_api.add_event_handler("startup", create_start_app_handler(_api))
_api.add_event_handler("shutdown", create_stop_app_handler(_api))

_api.add_exception_handler(HTTPException, http_error_handler)
_api.add_exception_handler(HTTP_422_UNPROCESSABLE_ENTITY, http_422_error_handler)
Expand Down

0 comments on commit 1637950

Please sign in to comment.