Skip to content

Commit

Permalink
37 ratio of oer content (#68)
Browse files Browse the repository at this point in the history
* KBMBF-452: #37 Show ratio of oer-content

Fixing bug of wrong node_id parameter in score

* KBMBF-452: #37 Show ratio of oer-content

Adding additional required properties for output

* KBMBF-452: #37 Show ratio of oer-content

WIP: Fixing license

* KBMBF-452: #37 Show ratio of oer-content

WIP: Fixing license

* KBMBF-452: #37 Show ratio of oer-content

WIP: fixing license test

* KBMBF-452: #37 Show ratio of oer-content

Refactoring

* KBMBF-452: #37 Show ratio of oer-content

Refactoring

* KBMBF-452: #37 Show ratio of oer-content

Reverting API change for now
  • Loading branch information
RobertMeissner authored Jul 18, 2022
1 parent 1d97001 commit 2bbdae0
Show file tree
Hide file tree
Showing 16 changed files with 216 additions and 233 deletions.
12 changes: 2 additions & 10 deletions src/app/api/analytics/analytics.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
from starlette.exceptions import HTTPException
from starlette.status import HTTP_404_NOT_FOUND

from app.core.models import ResponseModel


class StatType(str, Enum):
# PORTAL_TREE = "portal-tree" # Currently unused
Expand Down Expand Up @@ -35,16 +37,6 @@ class Config(ElasticConfig):
pass


class ResponseConfig:
allow_population_by_field_name = True
extra = Extra.ignore


class ResponseModel(BaseModel):
class Config(ResponseConfig):
pass


COUNT_STATISTICS_TYPE = dict[str, int]


Expand Down
9 changes: 3 additions & 6 deletions src/app/api/analytics/stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,16 +24,13 @@
_MATERIALS,
global_storage,
)
from app.api.collections.descendants import aterms
from app.api.collections.models import CollectionNode
from app.api.collections.tree import collection_tree
from app.api.score.models import (
LearningMaterialAttribute,
required_collection_properties,
)
from app.api.score.models import required_collection_properties
from app.core.config import ELASTIC_TOTAL_SIZE
from app.core.models import LearningMaterialAttribute
from app.elastic.dsl import ElasticField, aterms
from app.elastic.elastic import query_materials
from app.elastic.fields import ElasticField
from app.elastic.search import Search


Expand Down
29 changes: 4 additions & 25 deletions src/app/api/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,19 +50,17 @@
from app.api.quality_matrix.quality_matrix import source_quality, store_in_timeline
from app.api.quality_matrix.timeline import timestamps
from app.api.quality_matrix.utils import transpose
from app.api.score.models import LearningMaterialAttribute, ScoreOutput
from app.api.score.models import ScoreOutput
from app.api.score.score import (
aggs_collection_validation,
aggs_material_validation,
calc_scores,
calc_weighted_score,
field_names_used_for_score_calculation,
get_score,
node_id_param,
search_score,
)
from app.core.config import API_DEBUG, BACKGROUND_TASK_TIME_INTERVAL
from app.core.constants import COLLECTION_NAME_TO_ID, COLLECTION_ROOT_ID
from app.elastic.elastic import ResourceType
from app.core.models import LearningMaterialAttribute


def get_database(request: Request) -> Database:
Expand Down Expand Up @@ -209,26 +207,7 @@ async def get_timestamps(
""",
)
async def score(*, node_id: uuid.UUID = Depends(node_id_param)):
collection_stats = search_score(
node_id=node_id, resource_type=ResourceType.COLLECTION
)

collection_scores = calc_scores(stats=collection_stats)

material_stats = search_score(node_id=node_id, resource_type=ResourceType.MATERIAL)

material_scores = calc_scores(stats=material_stats)

score_ = calc_weighted_score(
collection_scores=collection_scores,
material_scores=material_scores,
)

return {
"score": score_,
"collections": {"total": collection_stats["total"], **collection_scores},
"materials": {"total": material_stats["total"], **material_scores},
}
return await get_score(node_id)


class Ping(BaseModel):
Expand Down
69 changes: 9 additions & 60 deletions src/app/api/collections/descendants.py
Original file line number Diff line number Diff line change
@@ -1,36 +1,23 @@
import uuid
from itertools import chain
from typing import Optional, Type, TypeVar, Union
from typing import Optional, Type, TypeVar

from elasticsearch_dsl.aggs import A, Agg
from elasticsearch_dsl.query import Query
from elasticsearch_dsl.response import Response
from glom import Coalesce, Iter, glom
from pydantic import BaseModel, Extra

from app.api.collections.missing_materials import (
ElasticResource,
EmptyStrToNone,
LearningMaterialAttribute,
)
from app.api.collections.missing_materials import ElasticResource, EmptyStrToNone
from app.api.collections.utils import all_source_fields
from app.core.config import ELASTIC_TOTAL_SIZE
from app.elastic.dsl import qbool, qmatch
from app.core.models import LearningMaterialAttribute, ResponseModel
from app.elastic.dsl import ElasticField, aterms, qbool, qmatch
from app.elastic.elastic import ResourceType, query_materials, type_filter
from app.elastic.fields import ElasticField, ElasticFieldType
from app.elastic.search import Search
from app.elastic.utils import handle_text_field
from app.models import _DESCENDANT_COLLECTIONS_MATERIALS_COUNTS
from app.models import CollectionAttribute as _CollectionAttribute
from app.models import ElasticResourceAttribute


class _CollectionAttribute(ElasticField):
TITLE = ("properties.cm:title", ElasticFieldType.TEXT)
DESCRIPTION = ("properties.cm:description", ElasticFieldType.TEXT)
PATH = ("path", ElasticFieldType.KEYWORD)
PARENT_ID = ("parentRef.id", ElasticFieldType.KEYWORD)
NODE_ID = ("nodeRef.id", ElasticFieldType.KEYWORD)


_COLLECTION = TypeVar("_COLLECTION")
# TODO Remove duplicate
CollectionAttribute = ElasticField(
Expand All @@ -42,27 +29,12 @@ class _CollectionAttribute(ElasticField):
)


class ResponseConfig:
allow_population_by_field_name = True
extra = Extra.ignore


class ResponseModel(BaseModel):
class Config(ResponseConfig):
pass


class CollectionMaterialsCount(ResponseModel):
noderef_id: uuid.UUID
title: str
materials_count: int


_DESCENDANT_COLLECTIONS_MATERIALS_COUNTS = TypeVar(
"_DESCENDANT_COLLECTIONS_MATERIALS_COUNTS"
)


# TODO: Refactor
class DescendantCollectionsMaterialsCounts(BaseModel):
results: list[CollectionMaterialsCount]
Expand Down Expand Up @@ -91,17 +63,9 @@ def parse_elastic_response(
)


def aterms(qfield: Union[ElasticField, str], **kwargs) -> Agg:
kwargs["field"] = handle_text_field(qfield)
return A("terms", **kwargs)


def acomposite(sources: list[Union[Query, dict]], **kwargs) -> Agg:
return A("composite", sources=sources, **kwargs)


def agg_materials_by_collection(size: int = 65536) -> Agg:
return acomposite(
return A(
"composite",
sources=[
{
"noderef_id": aterms(
Expand All @@ -113,10 +77,6 @@ def agg_materials_by_collection(size: int = 65536) -> Agg:
)


def abucketsort(sort: list[Union[Query, dict]], **kwargs) -> Agg:
return A("bucket_sort", sort=sort, **kwargs)


def material_counts_by_descendant(
node_id: uuid.UUID,
) -> DescendantCollectionsMaterialsCounts:
Expand All @@ -131,7 +91,7 @@ def material_counts_search(node_id: uuid.UUID):
s = Search().base_filters().query(query_materials(node_id=node_id))
s.aggs.bucket("grouped_by_collection", agg_materials_by_collection()).pipeline(
"sorted_by_count",
abucketsort(sort=[{"_count": {"order": "asc"}}]),
A("bucket_sort", sort=[{"_count": {"order": "asc"}}]),
)
return s

Expand All @@ -143,17 +103,6 @@ class CollectionBase(ElasticResource):
path: Optional[list[uuid.UUID]] = None
parent_id: Optional[uuid.UUID] = None

source_fields = {
CollectionAttribute.NODEREF_ID,
CollectionAttribute.TYPE,
CollectionAttribute.NAME,
CollectionAttribute.TITLE,
CollectionAttribute.KEYWORDS,
CollectionAttribute.DESCRIPTION,
CollectionAttribute.PATH,
CollectionAttribute.PARENT_ID,
}

@classmethod
def parse_elastic_hit_to_dict(
cls: Type[_COLLECTION],
Expand Down
2 changes: 2 additions & 0 deletions src/app/api/collections/missing_attributes.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from app.api.collections.models import MissingMaterials
from app.api.collections.utils import all_source_fields, map_elastic_response_to_model
from app.core.config import ELASTIC_TOTAL_SIZE
from app.core.models import LearningMaterialAttribute
from app.elastic.dsl import qbool, qmatch
from app.elastic.elastic import ResourceType, type_filter
from app.elastic.search import Search
Expand All @@ -19,6 +20,7 @@
ElasticResourceAttribute.NAME,
ElasticResourceAttribute.KEYWORDS,
CollectionAttribute.DESCRIPTION,
LearningMaterialAttribute.LICENSES,
]


Expand Down
49 changes: 19 additions & 30 deletions src/app/api/collections/missing_materials.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,14 @@
from pydantic import BaseModel, Extra
from pydantic.validators import str_validator

from app.api.score.models import LearningMaterialAttribute
from app.core.config import ELASTIC_TOTAL_SIZE
from app.elastic.dsl import qbool, qmatch, qterm
from app.core.models import LearningMaterialAttribute, ResponseModel
from app.elastic.dsl import ElasticField, qbool, qmatch
from app.elastic.elastic import (
ResourceType,
query_missing_material_license,
type_filter,
)
from app.elastic.fields import ElasticField
from app.elastic.search import Search
from app.models import _ELASTIC_RESOURCE, ElasticResourceAttribute

Expand Down Expand Up @@ -131,16 +130,6 @@ def parse_elastic_hit_to_dict(
}


class ResponseConfig:
allow_population_by_field_name = True
extra = Extra.ignore


class ResponseModel(BaseModel):
class Config(ResponseConfig):
pass


class LearningMaterial(ResponseModel, LearningMaterialBase):
pass

Expand Down Expand Up @@ -170,6 +159,10 @@ def material_response_fields(
LearningMaterialAttribute.WWW_URL,
LearningMaterialAttribute.DESCRIPTION,
LearningMaterialAttribute.LICENSES,
LearningMaterialAttribute.OBJECT_TYPE,
LearningMaterialAttribute.LEARNINGRESOURCE_TYPE,
LearningMaterialAttribute.CONTAINS_ADS,
LearningMaterialAttribute.PUBLISHER,
]
],
)
Expand All @@ -185,31 +178,27 @@ def materials_filter_params(
return MissingAttributeFilter(attr=missing_attr)


base_filter = [
qterm(qfield=ElasticResourceAttribute.PERMISSION_READ, value="GROUP_EVERYONE"),
qterm(qfield=ElasticResourceAttribute.EDU_METADATASET, value="mds_oeh"),
qterm(qfield=ElasticResourceAttribute.PROTOCOL, value="workspace"),
]


def missing_attributes_search(
node_id: uuid.UUID, missing_attribute: str, max_hits: int
) -> Search:
if missing_attribute == LearningMaterialAttribute.LICENSES.path:
missing_attribute_query = {"filter": query_missing_material_license()}
else:
missing_attribute_query = {
"must_not": Q("wildcard", **{missing_attribute: {"value": "*"}})
}
query = {
"filter": [*type_filter[ResourceType.MATERIAL]],
"minimum_should_match": 1,
"should": [
qmatch(**{"path": node_id}),
qmatch(**{"nodeRef.id": node_id}),
qmatch(**{"collections.path": node_id}),
qmatch(**{"collections.nodeRef.id": node_id}),
],
**missing_attribute_query,
"filter": type_filter[
ResourceType.MATERIAL
].copy(), # copy otherwise appending the query causes mutation
}
if missing_attribute == LearningMaterialAttribute.LICENSES.path:
query["filter"].append(query_missing_material_license().to_dict())
else:
query.update(
{
"must_not": Q("wildcard", **{missing_attribute: {"value": "*"}}),
}
)

return (
Search()
Expand Down
43 changes: 3 additions & 40 deletions src/app/api/score/models.py
Original file line number Diff line number Diff line change
@@ -1,45 +1,5 @@
from itertools import chain

from pydantic import BaseModel, Field

from app.elastic.fields import ElasticField, ElasticFieldType
from app.models import ElasticResourceAttribute


class _LearningMaterialAttribute(ElasticField):
TITLE = ("properties.cclom:title", ElasticFieldType.TEXT)
SUBJECTS = ("properties.ccm:taxonid", ElasticFieldType.TEXT)
SUBJECTS_DE = ("i18n.de_DE.ccm:taxonid", ElasticFieldType.TEXT)
WWW_URL = ("properties.ccm:wwwurl", ElasticFieldType.TEXT)
DESCRIPTION = ("properties.cclom:general_description", ElasticFieldType.TEXT)
LICENSES = ("properties.ccm:commonlicense_key", ElasticFieldType.TEXT)
COLLECTION_NODEREF_ID = ("collections.nodeRef.id", ElasticFieldType.TEXT)
COLLECTION_PATH = ("collections.path", ElasticFieldType.TEXT)
CONTENT_FULLTEXT = ("content.fulltext", ElasticFieldType.TEXT)
LEARNINGRESOURCE_TYPE = (
"properties.ccm:oeh_lrt_aggregated",
ElasticFieldType.TEXT,
)
LEARNINGRESOURCE_TYPE_DE = (
"i18n.de_DE.ccm:oeh_lrt_aggregated",
ElasticFieldType.TEXT,
)
EDUENDUSERROLE_DE = (
"i18n.de_DE.ccm:educationalintendedenduserrole",
ElasticFieldType.TEXT,
)
CONTAINS_ADS = ("properties.ccm:containsAdvertisement", ElasticFieldType.TEXT)
OBJECT_TYPE = ("properties.ccm:objecttype", ElasticFieldType.TEXT)


LearningMaterialAttribute = ElasticField(
"LearningMaterialAttribute",
[
(f.name, (f.value, f.field_type))
for f in chain(ElasticResourceAttribute, _LearningMaterialAttribute)
],
)


class MissingCollectionProperties(BaseModel):
total: int = Field(default=0, gt=0, description="Number of entries")
Expand Down Expand Up @@ -108,6 +68,9 @@ class MissingMaterialProperties(BaseModel):

class ScoreOutput(BaseModel):
score: int = Field(default=0, gt=0, le=100, description="Overall score")
oer_ratio: int = Field(
default=0, gt=0, le=100, description="Overall ratio of OER content"
)
collections: MissingCollectionProperties = Field(
description="Score for specific collection properties"
)
Expand Down
Loading

0 comments on commit 2bbdae0

Please sign in to comment.