Skip to content

Commit

Permalink
KBMBF-452: #37 Show ratio of oer-content
Browse files Browse the repository at this point in the history
OER for score
  • Loading branch information
Robert Meissner committed Jul 19, 2022
1 parent 8f9ed81 commit 3b129af
Show file tree
Hide file tree
Showing 6 changed files with 56 additions and 23 deletions.
2 changes: 1 addition & 1 deletion src/app/api/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ async def get_timestamps(


@router.get(
"/collections/{node_id}/stats/score",
"/collections/{node_id}/score",
response_model=ScoreOutput,
status_code=HTTP_200_OK,
responses={HTTP_404_NOT_FOUND: {"description": "Collection not found"}},
Expand Down
9 changes: 5 additions & 4 deletions src/app/api/collections/counts.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ class AggregationMappings(str, Enum):


def collection_counts_search(node_id: uuid.UUID, facet: AggregationMappings) -> Search:
s = Search().base_filters().query(query_materials(node_id=node_id))
search = Search().base_filters().query(query_materials(node_id=node_id))
material_agg = A(
"terms", field="collections.nodeRef.id.keyword", size=ELASTIC_TOTAL_SIZE
)
Expand All @@ -47,16 +47,17 @@ def collection_counts_search(node_id: uuid.UUID, facet: AggregationMappings) ->
size=ELASTIC_TOTAL_SIZE,
),
)
s.aggs.bucket(_AGGREGATION_NAME, material_agg)
s = s.source(

search.aggs.bucket(_AGGREGATION_NAME, material_agg)
search = search.source(
[
ElasticResourceAttribute.NODEREF_ID.path,
CollectionAttribute.TITLE.path,
CollectionAttribute.PATH.path,
CollectionAttribute.PARENT_ID.path,
]
)[:0]
return s
return search


async def collection_counts(
Expand Down
6 changes: 5 additions & 1 deletion src/app/api/quality_matrix/quality_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from app.api.quality_matrix.utils import default_properties
from app.api.score.models import required_collection_properties
from app.core.config import ELASTIC_TOTAL_SIZE
from app.core.constants import COLLECTION_ROOT_ID, PROPERTIES, REPLICATION_SOURCE_ID
from app.core.constants import COLLECTION_ROOT_ID
from app.core.logging import logger
from app.elastic.dsl import qbool, qmatch
from app.elastic.search import Search
Expand Down Expand Up @@ -138,6 +138,10 @@ async def items_in_response(response: Response) -> dict:
return response.aggregations.to_dict().items()


REPLICATION_SOURCE_ID = "ccm:replicationsource"
PROPERTIES = "properties"


async def source_quality(
node_id: uuid.UUID = COLLECTION_ROOT_ID,
match_keyword: str = f"{PROPERTIES}.{REPLICATION_SOURCE_ID}",
Expand Down
2 changes: 1 addition & 1 deletion src/app/api/score/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ class MissingMaterialProperties(BaseModel):
class ScoreOutput(BaseModel):
score: int = Field(default=0, gt=0, le=100, description="Overall score")
oer_ratio: int = Field(
default=0, gt=0, le=100, description="Overall ratio of OER content"
default=0, ge=0, le=100, description="Overall ratio of OER content"
)
collections: MissingCollectionProperties = Field(
description="Score for specific collection properties"
Expand Down
57 changes: 44 additions & 13 deletions src/app/api/score/score.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,17 @@
from elasticsearch_dsl.response import Response
from fastapi import Path

import app.core.constants
from app.api.collections.counts import (
_AGGREGATION_NAME,
AggregationMappings,
collection_counts_search,
)
from app.api.score.models import (
MissingCollectionProperties,
MissingMaterialProperties,
ScoreOutput,
)
from app.core.constants import COLLECTION_NAME_TO_ID
from app.core.models import LearningMaterialAttribute
from app.elastic.dsl import afilter, amissing
from app.elastic.elastic import (
Expand Down Expand Up @@ -53,10 +63,9 @@ def calc_weighted_score(collection_scores: dict, material_scores: dict) -> int:


def get_score_search(node_id: uuid.UUID, resource_type: ResourceType) -> Search:
query, aggs = None, None
if resource_type is ResourceType.COLLECTION:
query, aggs = query_collections, aggs_collection_validation
elif resource_type is ResourceType.MATERIAL:
else: # ResourceType.MATERIAL
query, aggs = query_materials, aggs_material_validation
s = Search().base_filters().query(query(node_id=node_id))
for name, _agg in aggs.items():
Expand All @@ -81,8 +90,7 @@ def search_score(node_id: uuid.UUID, resource_type: ResourceType) -> dict:


def node_id_param(
*,
node_id: uuid.UUID = Path(..., examples=app.core.constants.COLLECTION_NAME_TO_ID),
*, node_id: uuid.UUID = Path(..., examples=COLLECTION_NAME_TO_ID)
) -> uuid.UUID:
return node_id

Expand Down Expand Up @@ -126,19 +134,42 @@ def field_names_used_for_score_calculation(properties: dict) -> list[str]:
}


async def get_score(node_id):
async def get_score(node_id: uuid.UUID) -> ScoreOutput:
collection_stats = search_score(
node_id=node_id, resource_type=ResourceType.COLLECTION
)
collection_scores = calc_scores(stats=collection_stats)

material_stats = search_score(node_id=node_id, resource_type=ResourceType.MATERIAL)
material_scores = calc_scores(stats=material_stats)

score_ = calc_weighted_score(
collection_scores=collection_scores,
material_scores=material_scores,
collection_scores=collection_scores, material_scores=material_scores
)
return {
"score": score_,
"collections": {"total": collection_stats["total"], **collection_scores},
"materials": {"total": material_stats["total"], **material_scores},
}

oer = oer_ratio(node_id)

collections = MissingCollectionProperties(
total=collection_stats["total"], **collection_scores
)
materials = MissingMaterialProperties(
total=material_stats["total"], **material_scores
)
return ScoreOutput(
score=score_, collections=collections, materials=materials, oer_ratio=oer
)


def oer_ratio(node_id: uuid.UUID) -> int:
oer_statistics = collection_counts_search(node_id, AggregationMappings.license)
response = oer_statistics.execute()
oer_elements = 0
oer_total = 0
oer_license = ["CC_0", "PDM", "CC_BY", "CC_BY_SA"]
for data in response.aggregations[_AGGREGATION_NAME].buckets:
for bucket in data["facet"]["buckets"]:
oer_total += bucket["doc_count"]
if bucket["key"] in oer_license:
oer_elements += bucket["doc_count"]

return round((oer_elements / oer_total) * 100)
3 changes: 0 additions & 3 deletions src/app/core/constants.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
REPLICATION_SOURCE_ID = "ccm:replicationsource"
PROPERTIES = "properties"

COLLECTION_NAME_TO_ID = {
"Physik": {"value": "94f22c9b-0d3a-4c1c-8987-4c8e83f3a92e"},
"Mathematik": {"value": "bd8be6d5-0fbe-4534-a4b3-773154ba6abc"},
Expand Down

0 comments on commit 3b129af

Please sign in to comment.