Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

37 ratio of oer content #68

Merged
merged 9 commits into from
Jul 18, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 2 additions & 10 deletions src/app/api/analytics/analytics.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
from starlette.exceptions import HTTPException
from starlette.status import HTTP_404_NOT_FOUND

from app.core.models import ResponseModel


class StatType(str, Enum):
# PORTAL_TREE = "portal-tree" # Currently unused
Expand Down Expand Up @@ -35,16 +37,6 @@ class Config(ElasticConfig):
pass


class ResponseConfig:
allow_population_by_field_name = True
extra = Extra.ignore


class ResponseModel(BaseModel):
class Config(ResponseConfig):
pass


COUNT_STATISTICS_TYPE = dict[str, int]


Expand Down
9 changes: 3 additions & 6 deletions src/app/api/analytics/stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,16 +24,13 @@
_MATERIALS,
global_storage,
)
from app.api.collections.descendants import aterms
from app.api.collections.models import CollectionNode
from app.api.collections.tree import collection_tree
from app.api.score.models import (
LearningMaterialAttribute,
required_collection_properties,
)
from app.api.score.models import required_collection_properties
from app.core.config import ELASTIC_TOTAL_SIZE
from app.core.models import LearningMaterialAttribute
from app.elastic.dsl import ElasticField, aterms
from app.elastic.elastic import query_materials
from app.elastic.fields import ElasticField
from app.elastic.search import Search


Expand Down
29 changes: 4 additions & 25 deletions src/app/api/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,19 +50,17 @@
from app.api.quality_matrix.quality_matrix import source_quality, store_in_timeline
from app.api.quality_matrix.timeline import timestamps
from app.api.quality_matrix.utils import transpose
from app.api.score.models import LearningMaterialAttribute, ScoreOutput
from app.api.score.models import ScoreOutput
from app.api.score.score import (
aggs_collection_validation,
aggs_material_validation,
calc_scores,
calc_weighted_score,
field_names_used_for_score_calculation,
get_score,
node_id_param,
search_score,
)
from app.core.config import API_DEBUG, BACKGROUND_TASK_TIME_INTERVAL
from app.core.constants import COLLECTION_NAME_TO_ID, COLLECTION_ROOT_ID
from app.elastic.elastic import ResourceType
from app.core.models import LearningMaterialAttribute


def get_database(request: Request) -> Database:
Expand Down Expand Up @@ -209,26 +207,7 @@ async def get_timestamps(
""",
)
async def score(*, node_id: uuid.UUID = Depends(node_id_param)):
collection_stats = search_score(
node_id=node_id, resource_type=ResourceType.COLLECTION
)

collection_scores = calc_scores(stats=collection_stats)

material_stats = search_score(node_id=node_id, resource_type=ResourceType.MATERIAL)

material_scores = calc_scores(stats=material_stats)

score_ = calc_weighted_score(
collection_scores=collection_scores,
material_scores=material_scores,
)

return {
"score": score_,
"collections": {"total": collection_stats["total"], **collection_scores},
"materials": {"total": material_stats["total"], **material_scores},
}
return await get_score(node_id)


class Ping(BaseModel):
Expand Down
69 changes: 9 additions & 60 deletions src/app/api/collections/descendants.py
Original file line number Diff line number Diff line change
@@ -1,36 +1,23 @@
import uuid
from itertools import chain
from typing import Optional, Type, TypeVar, Union
from typing import Optional, Type, TypeVar

from elasticsearch_dsl.aggs import A, Agg
from elasticsearch_dsl.query import Query
from elasticsearch_dsl.response import Response
from glom import Coalesce, Iter, glom
from pydantic import BaseModel, Extra

from app.api.collections.missing_materials import (
ElasticResource,
EmptyStrToNone,
LearningMaterialAttribute,
)
from app.api.collections.missing_materials import ElasticResource, EmptyStrToNone
from app.api.collections.utils import all_source_fields
from app.core.config import ELASTIC_TOTAL_SIZE
from app.elastic.dsl import qbool, qmatch
from app.core.models import LearningMaterialAttribute, ResponseModel
from app.elastic.dsl import ElasticField, aterms, qbool, qmatch
from app.elastic.elastic import ResourceType, query_materials, type_filter
from app.elastic.fields import ElasticField, ElasticFieldType
from app.elastic.search import Search
from app.elastic.utils import handle_text_field
from app.models import _DESCENDANT_COLLECTIONS_MATERIALS_COUNTS
from app.models import CollectionAttribute as _CollectionAttribute
from app.models import ElasticResourceAttribute


class _CollectionAttribute(ElasticField):
TITLE = ("properties.cm:title", ElasticFieldType.TEXT)
DESCRIPTION = ("properties.cm:description", ElasticFieldType.TEXT)
PATH = ("path", ElasticFieldType.KEYWORD)
PARENT_ID = ("parentRef.id", ElasticFieldType.KEYWORD)
NODE_ID = ("nodeRef.id", ElasticFieldType.KEYWORD)


_COLLECTION = TypeVar("_COLLECTION")
# TODO Remove duplicate
CollectionAttribute = ElasticField(
Expand All @@ -42,27 +29,12 @@ class _CollectionAttribute(ElasticField):
)


class ResponseConfig:
allow_population_by_field_name = True
extra = Extra.ignore


class ResponseModel(BaseModel):
class Config(ResponseConfig):
pass


class CollectionMaterialsCount(ResponseModel):
noderef_id: uuid.UUID
title: str
materials_count: int


_DESCENDANT_COLLECTIONS_MATERIALS_COUNTS = TypeVar(
"_DESCENDANT_COLLECTIONS_MATERIALS_COUNTS"
)


# TODO: Refactor
class DescendantCollectionsMaterialsCounts(BaseModel):
results: list[CollectionMaterialsCount]
Expand Down Expand Up @@ -91,17 +63,9 @@ def parse_elastic_response(
)


def aterms(qfield: Union[ElasticField, str], **kwargs) -> Agg:
kwargs["field"] = handle_text_field(qfield)
return A("terms", **kwargs)


def acomposite(sources: list[Union[Query, dict]], **kwargs) -> Agg:
return A("composite", sources=sources, **kwargs)


def agg_materials_by_collection(size: int = 65536) -> Agg:
return acomposite(
return A(
"composite",
sources=[
{
"noderef_id": aterms(
Expand All @@ -113,10 +77,6 @@ def agg_materials_by_collection(size: int = 65536) -> Agg:
)


def abucketsort(sort: list[Union[Query, dict]], **kwargs) -> Agg:
return A("bucket_sort", sort=sort, **kwargs)


def material_counts_by_descendant(
node_id: uuid.UUID,
) -> DescendantCollectionsMaterialsCounts:
Expand All @@ -131,7 +91,7 @@ def material_counts_search(node_id: uuid.UUID):
s = Search().base_filters().query(query_materials(node_id=node_id))
s.aggs.bucket("grouped_by_collection", agg_materials_by_collection()).pipeline(
"sorted_by_count",
abucketsort(sort=[{"_count": {"order": "asc"}}]),
A("bucket_sort", sort=[{"_count": {"order": "asc"}}]),
)
return s

Expand All @@ -143,17 +103,6 @@ class CollectionBase(ElasticResource):
path: Optional[list[uuid.UUID]] = None
parent_id: Optional[uuid.UUID] = None

source_fields = {
CollectionAttribute.NODEREF_ID,
CollectionAttribute.TYPE,
CollectionAttribute.NAME,
CollectionAttribute.TITLE,
CollectionAttribute.KEYWORDS,
CollectionAttribute.DESCRIPTION,
CollectionAttribute.PATH,
CollectionAttribute.PARENT_ID,
}

@classmethod
def parse_elastic_hit_to_dict(
cls: Type[_COLLECTION],
Expand Down
2 changes: 2 additions & 0 deletions src/app/api/collections/missing_attributes.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from app.api.collections.models import MissingMaterials
from app.api.collections.utils import all_source_fields, map_elastic_response_to_model
from app.core.config import ELASTIC_TOTAL_SIZE
from app.core.models import LearningMaterialAttribute
from app.elastic.dsl import qbool, qmatch
from app.elastic.elastic import ResourceType, type_filter
from app.elastic.search import Search
Expand All @@ -19,6 +20,7 @@
ElasticResourceAttribute.NAME,
ElasticResourceAttribute.KEYWORDS,
CollectionAttribute.DESCRIPTION,
LearningMaterialAttribute.LICENSES,
]


Expand Down
49 changes: 19 additions & 30 deletions src/app/api/collections/missing_materials.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,14 @@
from pydantic import BaseModel, Extra
from pydantic.validators import str_validator

from app.api.score.models import LearningMaterialAttribute
from app.core.config import ELASTIC_TOTAL_SIZE
from app.elastic.dsl import qbool, qmatch, qterm
from app.core.models import LearningMaterialAttribute, ResponseModel
from app.elastic.dsl import ElasticField, qbool, qmatch
from app.elastic.elastic import (
ResourceType,
query_missing_material_license,
type_filter,
)
from app.elastic.fields import ElasticField
from app.elastic.search import Search
from app.models import _ELASTIC_RESOURCE, ElasticResourceAttribute

Expand Down Expand Up @@ -131,16 +130,6 @@ def parse_elastic_hit_to_dict(
}


class ResponseConfig:
allow_population_by_field_name = True
extra = Extra.ignore


class ResponseModel(BaseModel):
class Config(ResponseConfig):
pass


class LearningMaterial(ResponseModel, LearningMaterialBase):
pass

Expand Down Expand Up @@ -170,6 +159,10 @@ def material_response_fields(
LearningMaterialAttribute.WWW_URL,
LearningMaterialAttribute.DESCRIPTION,
LearningMaterialAttribute.LICENSES,
LearningMaterialAttribute.OBJECT_TYPE,
LearningMaterialAttribute.LEARNINGRESOURCE_TYPE,
LearningMaterialAttribute.CONTAINS_ADS,
LearningMaterialAttribute.PUBLISHER,
]
],
)
Expand All @@ -185,31 +178,27 @@ def materials_filter_params(
return MissingAttributeFilter(attr=missing_attr)


base_filter = [
qterm(qfield=ElasticResourceAttribute.PERMISSION_READ, value="GROUP_EVERYONE"),
qterm(qfield=ElasticResourceAttribute.EDU_METADATASET, value="mds_oeh"),
qterm(qfield=ElasticResourceAttribute.PROTOCOL, value="workspace"),
]


def missing_attributes_search(
node_id: uuid.UUID, missing_attribute: str, max_hits: int
) -> Search:
if missing_attribute == LearningMaterialAttribute.LICENSES.path:
missing_attribute_query = {"filter": query_missing_material_license()}
else:
missing_attribute_query = {
"must_not": Q("wildcard", **{missing_attribute: {"value": "*"}})
}
query = {
"filter": [*type_filter[ResourceType.MATERIAL]],
"minimum_should_match": 1,
"should": [
qmatch(**{"path": node_id}),
qmatch(**{"nodeRef.id": node_id}),
qmatch(**{"collections.path": node_id}),
qmatch(**{"collections.nodeRef.id": node_id}),
],
**missing_attribute_query,
"filter": type_filter[
ResourceType.MATERIAL
].copy(), # copy otherwise appending the query causes mutation
}
if missing_attribute == LearningMaterialAttribute.LICENSES.path:
query["filter"].append(query_missing_material_license().to_dict())
else:
query.update(
{
"must_not": Q("wildcard", **{missing_attribute: {"value": "*"}}),
}
)

return (
Search()
Expand Down
43 changes: 3 additions & 40 deletions src/app/api/score/models.py
Original file line number Diff line number Diff line change
@@ -1,45 +1,5 @@
from itertools import chain

from pydantic import BaseModel, Field

from app.elastic.fields import ElasticField, ElasticFieldType
from app.models import ElasticResourceAttribute


class _LearningMaterialAttribute(ElasticField):
TITLE = ("properties.cclom:title", ElasticFieldType.TEXT)
SUBJECTS = ("properties.ccm:taxonid", ElasticFieldType.TEXT)
SUBJECTS_DE = ("i18n.de_DE.ccm:taxonid", ElasticFieldType.TEXT)
WWW_URL = ("properties.ccm:wwwurl", ElasticFieldType.TEXT)
DESCRIPTION = ("properties.cclom:general_description", ElasticFieldType.TEXT)
LICENSES = ("properties.ccm:commonlicense_key", ElasticFieldType.TEXT)
COLLECTION_NODEREF_ID = ("collections.nodeRef.id", ElasticFieldType.TEXT)
COLLECTION_PATH = ("collections.path", ElasticFieldType.TEXT)
CONTENT_FULLTEXT = ("content.fulltext", ElasticFieldType.TEXT)
LEARNINGRESOURCE_TYPE = (
"properties.ccm:oeh_lrt_aggregated",
ElasticFieldType.TEXT,
)
LEARNINGRESOURCE_TYPE_DE = (
"i18n.de_DE.ccm:oeh_lrt_aggregated",
ElasticFieldType.TEXT,
)
EDUENDUSERROLE_DE = (
"i18n.de_DE.ccm:educationalintendedenduserrole",
ElasticFieldType.TEXT,
)
CONTAINS_ADS = ("properties.ccm:containsAdvertisement", ElasticFieldType.TEXT)
OBJECT_TYPE = ("properties.ccm:objecttype", ElasticFieldType.TEXT)


LearningMaterialAttribute = ElasticField(
"LearningMaterialAttribute",
[
(f.name, (f.value, f.field_type))
for f in chain(ElasticResourceAttribute, _LearningMaterialAttribute)
],
)


class MissingCollectionProperties(BaseModel):
total: int = Field(default=0, gt=0, description="Number of entries")
Expand Down Expand Up @@ -108,6 +68,9 @@ class MissingMaterialProperties(BaseModel):

class ScoreOutput(BaseModel):
score: int = Field(default=0, gt=0, le=100, description="Overall score")
oer_ratio: int = Field(
default=0, gt=0, le=100, description="Overall ratio of OER content"
)
collections: MissingCollectionProperties = Field(
description="Score for specific collection properties"
)
Expand Down
Loading