diff --git a/src/app/api/analytics/analytics.py b/src/app/api/analytics/analytics.py index 2691083..f822273 100644 --- a/src/app/api/analytics/analytics.py +++ b/src/app/api/analytics/analytics.py @@ -8,6 +8,8 @@ from starlette.exceptions import HTTPException from starlette.status import HTTP_404_NOT_FOUND +from app.core.models import ResponseModel + class StatType(str, Enum): # PORTAL_TREE = "portal-tree" # Currently unused @@ -35,16 +37,6 @@ class Config(ElasticConfig): pass -class ResponseConfig: - allow_population_by_field_name = True - extra = Extra.ignore - - -class ResponseModel(BaseModel): - class Config(ResponseConfig): - pass - - COUNT_STATISTICS_TYPE = dict[str, int] diff --git a/src/app/api/analytics/stats.py b/src/app/api/analytics/stats.py index bff8047..4375962 100644 --- a/src/app/api/analytics/stats.py +++ b/src/app/api/analytics/stats.py @@ -24,16 +24,13 @@ _MATERIALS, global_storage, ) -from app.api.collections.descendants import aterms from app.api.collections.models import CollectionNode from app.api.collections.tree import collection_tree -from app.api.score.models import ( - LearningMaterialAttribute, - required_collection_properties, -) +from app.api.score.models import required_collection_properties from app.core.config import ELASTIC_TOTAL_SIZE +from app.core.models import LearningMaterialAttribute +from app.elastic.dsl import ElasticField, aterms from app.elastic.elastic import query_materials -from app.elastic.fields import ElasticField from app.elastic.search import Search diff --git a/src/app/api/api.py b/src/app/api/api.py index 898b53d..65cf4ca 100644 --- a/src/app/api/api.py +++ b/src/app/api/api.py @@ -50,19 +50,17 @@ from app.api.quality_matrix.quality_matrix import source_quality, store_in_timeline from app.api.quality_matrix.timeline import timestamps from app.api.quality_matrix.utils import transpose -from app.api.score.models import LearningMaterialAttribute, ScoreOutput +from app.api.score.models import ScoreOutput from app.api.score.score import ( aggs_collection_validation, aggs_material_validation, - calc_scores, - calc_weighted_score, field_names_used_for_score_calculation, + get_score, node_id_param, - search_score, ) from app.core.config import API_DEBUG, BACKGROUND_TASK_TIME_INTERVAL from app.core.constants import COLLECTION_NAME_TO_ID, COLLECTION_ROOT_ID -from app.elastic.elastic import ResourceType +from app.core.models import LearningMaterialAttribute def get_database(request: Request) -> Database: @@ -209,26 +207,7 @@ async def get_timestamps( """, ) async def score(*, node_id: uuid.UUID = Depends(node_id_param)): - collection_stats = search_score( - node_id=node_id, resource_type=ResourceType.COLLECTION - ) - - collection_scores = calc_scores(stats=collection_stats) - - material_stats = search_score(node_id=node_id, resource_type=ResourceType.MATERIAL) - - material_scores = calc_scores(stats=material_stats) - - score_ = calc_weighted_score( - collection_scores=collection_scores, - material_scores=material_scores, - ) - - return { - "score": score_, - "collections": {"total": collection_stats["total"], **collection_scores}, - "materials": {"total": material_stats["total"], **material_scores}, - } + return await get_score(node_id) class Ping(BaseModel): diff --git a/src/app/api/collections/descendants.py b/src/app/api/collections/descendants.py index 71dcb8d..5e798e6 100644 --- a/src/app/api/collections/descendants.py +++ b/src/app/api/collections/descendants.py @@ -1,36 +1,23 @@ import uuid from itertools import chain -from typing import Optional, Type, TypeVar, Union +from typing import Optional, Type, TypeVar from elasticsearch_dsl.aggs import A, Agg -from elasticsearch_dsl.query import Query from elasticsearch_dsl.response import Response from glom import Coalesce, Iter, glom from pydantic import BaseModel, Extra -from app.api.collections.missing_materials import ( - ElasticResource, - EmptyStrToNone, - LearningMaterialAttribute, -) +from app.api.collections.missing_materials import ElasticResource, EmptyStrToNone from app.api.collections.utils import all_source_fields from app.core.config import ELASTIC_TOTAL_SIZE -from app.elastic.dsl import qbool, qmatch +from app.core.models import LearningMaterialAttribute, ResponseModel +from app.elastic.dsl import ElasticField, aterms, qbool, qmatch from app.elastic.elastic import ResourceType, query_materials, type_filter -from app.elastic.fields import ElasticField, ElasticFieldType from app.elastic.search import Search -from app.elastic.utils import handle_text_field +from app.models import _DESCENDANT_COLLECTIONS_MATERIALS_COUNTS +from app.models import CollectionAttribute as _CollectionAttribute from app.models import ElasticResourceAttribute - -class _CollectionAttribute(ElasticField): - TITLE = ("properties.cm:title", ElasticFieldType.TEXT) - DESCRIPTION = ("properties.cm:description", ElasticFieldType.TEXT) - PATH = ("path", ElasticFieldType.KEYWORD) - PARENT_ID = ("parentRef.id", ElasticFieldType.KEYWORD) - NODE_ID = ("nodeRef.id", ElasticFieldType.KEYWORD) - - _COLLECTION = TypeVar("_COLLECTION") # TODO Remove duplicate CollectionAttribute = ElasticField( @@ -42,27 +29,12 @@ class _CollectionAttribute(ElasticField): ) -class ResponseConfig: - allow_population_by_field_name = True - extra = Extra.ignore - - -class ResponseModel(BaseModel): - class Config(ResponseConfig): - pass - - class CollectionMaterialsCount(ResponseModel): noderef_id: uuid.UUID title: str materials_count: int -_DESCENDANT_COLLECTIONS_MATERIALS_COUNTS = TypeVar( - "_DESCENDANT_COLLECTIONS_MATERIALS_COUNTS" -) - - # TODO: Refactor class DescendantCollectionsMaterialsCounts(BaseModel): results: list[CollectionMaterialsCount] @@ -91,17 +63,9 @@ def parse_elastic_response( ) -def aterms(qfield: Union[ElasticField, str], **kwargs) -> Agg: - kwargs["field"] = handle_text_field(qfield) - return A("terms", **kwargs) - - -def acomposite(sources: list[Union[Query, dict]], **kwargs) -> Agg: - return A("composite", sources=sources, **kwargs) - - def agg_materials_by_collection(size: int = 65536) -> Agg: - return acomposite( + return A( + "composite", sources=[ { "noderef_id": aterms( @@ -113,10 +77,6 @@ def agg_materials_by_collection(size: int = 65536) -> Agg: ) -def abucketsort(sort: list[Union[Query, dict]], **kwargs) -> Agg: - return A("bucket_sort", sort=sort, **kwargs) - - def material_counts_by_descendant( node_id: uuid.UUID, ) -> DescendantCollectionsMaterialsCounts: @@ -131,7 +91,7 @@ def material_counts_search(node_id: uuid.UUID): s = Search().base_filters().query(query_materials(node_id=node_id)) s.aggs.bucket("grouped_by_collection", agg_materials_by_collection()).pipeline( "sorted_by_count", - abucketsort(sort=[{"_count": {"order": "asc"}}]), + A("bucket_sort", sort=[{"_count": {"order": "asc"}}]), ) return s @@ -143,17 +103,6 @@ class CollectionBase(ElasticResource): path: Optional[list[uuid.UUID]] = None parent_id: Optional[uuid.UUID] = None - source_fields = { - CollectionAttribute.NODEREF_ID, - CollectionAttribute.TYPE, - CollectionAttribute.NAME, - CollectionAttribute.TITLE, - CollectionAttribute.KEYWORDS, - CollectionAttribute.DESCRIPTION, - CollectionAttribute.PATH, - CollectionAttribute.PARENT_ID, - } - @classmethod def parse_elastic_hit_to_dict( cls: Type[_COLLECTION], diff --git a/src/app/api/collections/missing_attributes.py b/src/app/api/collections/missing_attributes.py index 036ef57..d23003a 100644 --- a/src/app/api/collections/missing_attributes.py +++ b/src/app/api/collections/missing_attributes.py @@ -9,6 +9,7 @@ from app.api.collections.models import MissingMaterials from app.api.collections.utils import all_source_fields, map_elastic_response_to_model from app.core.config import ELASTIC_TOTAL_SIZE +from app.core.models import LearningMaterialAttribute from app.elastic.dsl import qbool, qmatch from app.elastic.elastic import ResourceType, type_filter from app.elastic.search import Search @@ -19,6 +20,7 @@ ElasticResourceAttribute.NAME, ElasticResourceAttribute.KEYWORDS, CollectionAttribute.DESCRIPTION, + LearningMaterialAttribute.LICENSES, ] diff --git a/src/app/api/collections/missing_materials.py b/src/app/api/collections/missing_materials.py index 94eaf1f..2c6f18d 100644 --- a/src/app/api/collections/missing_materials.py +++ b/src/app/api/collections/missing_materials.py @@ -7,15 +7,14 @@ from pydantic import BaseModel, Extra from pydantic.validators import str_validator -from app.api.score.models import LearningMaterialAttribute from app.core.config import ELASTIC_TOTAL_SIZE -from app.elastic.dsl import qbool, qmatch, qterm +from app.core.models import LearningMaterialAttribute, ResponseModel +from app.elastic.dsl import ElasticField, qbool, qmatch from app.elastic.elastic import ( ResourceType, query_missing_material_license, type_filter, ) -from app.elastic.fields import ElasticField from app.elastic.search import Search from app.models import _ELASTIC_RESOURCE, ElasticResourceAttribute @@ -131,16 +130,6 @@ def parse_elastic_hit_to_dict( } -class ResponseConfig: - allow_population_by_field_name = True - extra = Extra.ignore - - -class ResponseModel(BaseModel): - class Config(ResponseConfig): - pass - - class LearningMaterial(ResponseModel, LearningMaterialBase): pass @@ -170,6 +159,10 @@ def material_response_fields( LearningMaterialAttribute.WWW_URL, LearningMaterialAttribute.DESCRIPTION, LearningMaterialAttribute.LICENSES, + LearningMaterialAttribute.OBJECT_TYPE, + LearningMaterialAttribute.LEARNINGRESOURCE_TYPE, + LearningMaterialAttribute.CONTAINS_ADS, + LearningMaterialAttribute.PUBLISHER, ] ], ) @@ -185,31 +178,27 @@ def materials_filter_params( return MissingAttributeFilter(attr=missing_attr) -base_filter = [ - qterm(qfield=ElasticResourceAttribute.PERMISSION_READ, value="GROUP_EVERYONE"), - qterm(qfield=ElasticResourceAttribute.EDU_METADATASET, value="mds_oeh"), - qterm(qfield=ElasticResourceAttribute.PROTOCOL, value="workspace"), -] - - def missing_attributes_search( node_id: uuid.UUID, missing_attribute: str, max_hits: int ) -> Search: - if missing_attribute == LearningMaterialAttribute.LICENSES.path: - missing_attribute_query = {"filter": query_missing_material_license()} - else: - missing_attribute_query = { - "must_not": Q("wildcard", **{missing_attribute: {"value": "*"}}) - } query = { - "filter": [*type_filter[ResourceType.MATERIAL]], "minimum_should_match": 1, "should": [ - qmatch(**{"path": node_id}), - qmatch(**{"nodeRef.id": node_id}), + qmatch(**{"collections.path": node_id}), + qmatch(**{"collections.nodeRef.id": node_id}), ], - **missing_attribute_query, + "filter": type_filter[ + ResourceType.MATERIAL + ].copy(), # copy otherwise appending the query causes mutation } + if missing_attribute == LearningMaterialAttribute.LICENSES.path: + query["filter"].append(query_missing_material_license().to_dict()) + else: + query.update( + { + "must_not": Q("wildcard", **{missing_attribute: {"value": "*"}}), + } + ) return ( Search() diff --git a/src/app/api/score/models.py b/src/app/api/score/models.py index f904d44..8035c42 100644 --- a/src/app/api/score/models.py +++ b/src/app/api/score/models.py @@ -1,45 +1,5 @@ -from itertools import chain - from pydantic import BaseModel, Field -from app.elastic.fields import ElasticField, ElasticFieldType -from app.models import ElasticResourceAttribute - - -class _LearningMaterialAttribute(ElasticField): - TITLE = ("properties.cclom:title", ElasticFieldType.TEXT) - SUBJECTS = ("properties.ccm:taxonid", ElasticFieldType.TEXT) - SUBJECTS_DE = ("i18n.de_DE.ccm:taxonid", ElasticFieldType.TEXT) - WWW_URL = ("properties.ccm:wwwurl", ElasticFieldType.TEXT) - DESCRIPTION = ("properties.cclom:general_description", ElasticFieldType.TEXT) - LICENSES = ("properties.ccm:commonlicense_key", ElasticFieldType.TEXT) - COLLECTION_NODEREF_ID = ("collections.nodeRef.id", ElasticFieldType.TEXT) - COLLECTION_PATH = ("collections.path", ElasticFieldType.TEXT) - CONTENT_FULLTEXT = ("content.fulltext", ElasticFieldType.TEXT) - LEARNINGRESOURCE_TYPE = ( - "properties.ccm:oeh_lrt_aggregated", - ElasticFieldType.TEXT, - ) - LEARNINGRESOURCE_TYPE_DE = ( - "i18n.de_DE.ccm:oeh_lrt_aggregated", - ElasticFieldType.TEXT, - ) - EDUENDUSERROLE_DE = ( - "i18n.de_DE.ccm:educationalintendedenduserrole", - ElasticFieldType.TEXT, - ) - CONTAINS_ADS = ("properties.ccm:containsAdvertisement", ElasticFieldType.TEXT) - OBJECT_TYPE = ("properties.ccm:objecttype", ElasticFieldType.TEXT) - - -LearningMaterialAttribute = ElasticField( - "LearningMaterialAttribute", - [ - (f.name, (f.value, f.field_type)) - for f in chain(ElasticResourceAttribute, _LearningMaterialAttribute) - ], -) - class MissingCollectionProperties(BaseModel): total: int = Field(default=0, gt=0, description="Number of entries") @@ -108,6 +68,9 @@ class MissingMaterialProperties(BaseModel): class ScoreOutput(BaseModel): score: int = Field(default=0, gt=0, le=100, description="Overall score") + oer_ratio: int = Field( + default=0, gt=0, le=100, description="Overall ratio of OER content" + ) collections: MissingCollectionProperties = Field( description="Score for specific collection properties" ) diff --git a/src/app/api/score/score.py b/src/app/api/score/score.py index 1b44670..61b2c6e 100644 --- a/src/app/api/score/score.py +++ b/src/app/api/score/score.py @@ -5,7 +5,7 @@ from fastapi import Path import app.core.constants -from app.api.score.models import LearningMaterialAttribute +from app.core.models import LearningMaterialAttribute from app.elastic.dsl import afilter, amissing from app.elastic.elastic import ( ResourceType, @@ -64,7 +64,7 @@ def get_score_search(node_id: uuid.UUID, resource_type: ResourceType) -> Search: return s -def score(response: Response) -> dict: +def map_response_to_output(response: Response) -> dict: return { "total": response.hits.total.value, **{k: v["doc_count"] for k, v in response.aggregations.to_dict().items()}, @@ -77,7 +77,7 @@ def search_score(node_id: uuid.UUID, resource_type: ResourceType) -> dict: response: Response = s.execute() if response.success(): - return score(response) + return map_response_to_output(response) def node_id_param( @@ -124,3 +124,21 @@ def field_names_used_for_score_calculation(properties: dict) -> list[str]: ), "missing_edu_context": amissing(qfield=ElasticResourceAttribute.EDU_CONTEXT), } + + +async def get_score(node_id): + collection_stats = search_score( + node_id=node_id, resource_type=ResourceType.COLLECTION + ) + collection_scores = calc_scores(stats=collection_stats) + material_stats = search_score(node_id=node_id, resource_type=ResourceType.MATERIAL) + material_scores = calc_scores(stats=material_stats) + score_ = calc_weighted_score( + collection_scores=collection_scores, + material_scores=material_scores, + ) + return { + "score": score_, + "collections": {"total": collection_stats["total"], **collection_scores}, + "materials": {"total": material_stats["total"], **material_scores}, + } diff --git a/src/app/core/models.py b/src/app/core/models.py new file mode 100644 index 0000000..755e4fc --- /dev/null +++ b/src/app/core/models.py @@ -0,0 +1,52 @@ +from itertools import chain + +from pydantic import BaseModel, Extra + +from app.elastic.dsl import ElasticField, ElasticFieldType +from app.models import ElasticResourceAttribute + + +class _LearningMaterialAttribute(ElasticField): + TITLE = ("properties.cclom:title", ElasticFieldType.TEXT) + SUBJECTS = ("properties.ccm:taxonid", ElasticFieldType.TEXT) + SUBJECTS_DE = ("i18n.de_DE.ccm:taxonid", ElasticFieldType.TEXT) + WWW_URL = ("properties.ccm:wwwurl", ElasticFieldType.TEXT) + DESCRIPTION = ("properties.cclom:general_description", ElasticFieldType.TEXT) + LICENSES = ("properties.ccm:commonlicense_key", ElasticFieldType.TEXT) + COLLECTION_NODEREF_ID = ("collections.nodeRef.id", ElasticFieldType.TEXT) + COLLECTION_PATH = ("collections.path", ElasticFieldType.TEXT) + CONTENT_FULLTEXT = ("content.fulltext", ElasticFieldType.TEXT) + LEARNINGRESOURCE_TYPE = ( + "properties.ccm:oeh_lrt_aggregated", + ElasticFieldType.TEXT, + ) + LEARNINGRESOURCE_TYPE_DE = ( + "i18n.de_DE.ccm:oeh_lrt_aggregated", + ElasticFieldType.TEXT, + ) + EDUENDUSERROLE_DE = ( + "i18n.de_DE.ccm:educationalintendedenduserrole", + ElasticFieldType.TEXT, + ) + CONTAINS_ADS = ("properties.ccm:containsAdvertisement", ElasticFieldType.TEXT) + OBJECT_TYPE = ("properties.ccm:objecttype", ElasticFieldType.TEXT) + PUBLISHER = ("properties.ccm:oeh_publisher_combined", ElasticFieldType.TEXT) + + +LearningMaterialAttribute = ElasticField( + "LearningMaterialAttribute", + [ + (f.name, (f.value, f.field_type)) + for f in chain(ElasticResourceAttribute, _LearningMaterialAttribute) + ], +) + + +class ResponseConfig: + allow_population_by_field_name = True + extra = Extra.ignore + + +class ResponseModel(BaseModel): + class Config(ResponseConfig): + pass diff --git a/src/app/elastic/dsl.py b/src/app/elastic/dsl.py index 2718336..df657b9 100644 --- a/src/app/elastic/dsl.py +++ b/src/app/elastic/dsl.py @@ -1,11 +1,23 @@ +from enum import Enum, auto from typing import List, Union from elasticsearch_dsl import A, Q from elasticsearch_dsl.aggs import Agg from elasticsearch_dsl.query import Query -from .fields import ElasticField -from .utils import handle_text_field + +class ElasticFieldType(str, Enum): + KEYWORD = auto() + TEXT = auto() + + +class ElasticField(str, Enum): + def __new__(cls, path: str, field_type: ElasticFieldType): + obj = str.__new__(cls, [path]) + obj._value_ = path + obj.path = path + obj.field_type = field_type + return obj def qterm(qfield: Union[ElasticField, str], value, **kwargs) -> Query: @@ -49,3 +61,18 @@ def afilter(query: Query) -> Agg: def amissing(qfield: Union[ElasticField, str]) -> Agg: return A("missing", field=handle_text_field(qfield)) + + +def aterms(qfield: Union[ElasticField, str], **kwargs) -> Agg: + kwargs["field"] = handle_text_field(qfield) + return A("terms", **kwargs) + + +def handle_text_field(qfield: Union[ElasticField, str]) -> str: + if isinstance(qfield, ElasticField): + qfield_key = qfield.path + if qfield.field_type is ElasticFieldType.TEXT: + qfield_key = f"{qfield_key}.keyword" + return qfield_key + else: + return qfield diff --git a/src/app/elastic/elastic.py b/src/app/elastic/elastic.py index e88e8d1..df95371 100644 --- a/src/app/elastic/elastic.py +++ b/src/app/elastic/elastic.py @@ -3,7 +3,7 @@ from elasticsearch_dsl.query import Query -from app.api.score.models import LearningMaterialAttribute +from app.core.models import LearningMaterialAttribute from app.elastic.dsl import qbool, qboolor, qnotexists, qterm, qterms from app.models import CollectionAttribute, ElasticResourceAttribute diff --git a/src/app/elastic/fields.py b/src/app/elastic/fields.py deleted file mode 100644 index 58744a2..0000000 --- a/src/app/elastic/fields.py +++ /dev/null @@ -1,15 +0,0 @@ -from enum import Enum, auto - - -class ElasticFieldType(str, Enum): - KEYWORD = auto() - TEXT = auto() - - -class ElasticField(str, Enum): - def __new__(cls, path: str, field_type: ElasticFieldType): - obj = str.__new__(cls, [path]) - obj._value_ = path - obj.path = path - obj.field_type = field_type - return obj diff --git a/src/app/elastic/utils.py b/src/app/elastic/utils.py index 4fafadc..4f1f1d4 100644 --- a/src/app/elastic/utils.py +++ b/src/app/elastic/utils.py @@ -1,12 +1,8 @@ -from typing import Union - from elasticsearch_dsl import connections from app.core.config import ELASTICSEARCH_TIMEOUT, ELASTICSEARCH_URL from app.core.logging import logger -from .fields import ElasticField, ElasticFieldType - async def connect_to_elastic(): logger.debug(f"Attempt to open connection: {ELASTICSEARCH_URL}") @@ -14,13 +10,3 @@ async def connect_to_elastic(): connections.create_connection( hosts=[ELASTICSEARCH_URL], timeout=ELASTICSEARCH_TIMEOUT ) - - -def handle_text_field(qfield: Union[ElasticField, str]) -> str: - if isinstance(qfield, ElasticField): - qfield_key = qfield.path - if qfield.field_type is ElasticFieldType.TEXT: - qfield_key = f"{qfield_key}.keyword" - return qfield_key - else: - return qfield diff --git a/src/app/models.py b/src/app/models.py index c45c614..9a8eb1e 100644 --- a/src/app/models.py +++ b/src/app/models.py @@ -1,6 +1,6 @@ from typing import TypeVar -from app.elastic.fields import ElasticField, ElasticFieldType +from app.elastic.dsl import ElasticField, ElasticFieldType _ELASTIC_RESOURCE = TypeVar("_ELASTIC_RESOURCE") _DESCENDANT_COLLECTIONS_MATERIALS_COUNTS = TypeVar( diff --git a/tests/unit_tests/crud/test_missing_materials.py b/tests/unit_tests/crud/test_missing_materials.py index 0f35b77..355b38a 100644 --- a/tests/unit_tests/crud/test_missing_materials.py +++ b/tests/unit_tests/crud/test_missing_materials.py @@ -1,29 +1,19 @@ -""" - import uuid -import pytest +from app.api.collections.missing_attributes import missing_attribute_filter +from app.api.collections.missing_materials import missing_attributes_search -from app.api.collections.missing_materials import ( - LearningMaterialAttribute, - MissingAttributeFilter, - MissingMaterialField, -) -# TODO: More tests cases to also enable filtering, see __call__ MissingAttributeFilter -@pytest.mark.skip(reason="Outdated") def test_missing_materials_search(): dummy_uuid = uuid.uuid4() - attribute = LearningMaterialAttribute.KEYWORDS - dummy_missing_attribute = MissingAttributeFilter( - attr=MissingMaterialField[attribute.name] - ) + dummy_attribute = "properties.cm:title" + dummy_missing_attribute = missing_attribute_filter[0].value dummy_maximum_size = 3 - search = missing_materials_search( + search = missing_attributes_search( dummy_uuid, dummy_missing_attribute, dummy_maximum_size ) actual = search.to_dict() - actual_source = actual["_source"] + actual_source = actual["_source"]["includes"] actual["_source"] = [] assert actual == { "query": { @@ -39,9 +29,7 @@ def test_missing_materials_search(): {"match": {"collections.nodeRef.id": dummy_uuid}}, ], "minimum_should_match": 1, - "must_not": [ - {"wildcard": {dummy_missing_attribute.attr.value: {"value": "*"}}} - ], + "must_not": [{"wildcard": {dummy_attribute: {"value": "*"}}}], } }, "from": 0, @@ -66,4 +54,59 @@ def test_missing_materials_search(): source_contains_equal_elements = actual_source == expected_source assert source_contains_equal_elements -""" + +def test_missing_materials_search_license(): + dummy_uuid = uuid.uuid4() + dummy_attribute = "properties.ccm:commonlicense_key" + dummy_missing_attribute = missing_attribute_filter[4].value + dummy_maximum_size = 3 + search = missing_attributes_search( + dummy_uuid, dummy_missing_attribute, dummy_maximum_size + ) + actual = search.to_dict() + actual["_source"] = [] + + assert actual == { + "query": { + "bool": { + "filter": [ + {"term": {"permissions.Read.keyword": "GROUP_EVERYONE"}}, + {"term": {"properties.cm:edu_metadataset.keyword": "mds_oeh"}}, + {"term": {"nodeRef.storeRef.protocol": "workspace"}}, + {"term": {"type": "ccm:io"}}, + { + "bool": { + "should": [ + { + "terms": { + dummy_attribute + + ".keyword": [ + "UNTERRICHTS_UND_LEHRMEDIEN", + "NONE", + "", + ] + } + }, + { + "bool": { + "must_not": [ + {"exists": {"field": dummy_attribute}} + ] + } + }, + ], + "minimum_should_match": 1, + } + }, + ], + "should": [ + {"match": {"collections.path": dummy_uuid}}, + {"match": {"collections.nodeRef.id": dummy_uuid}}, + ], + "minimum_should_match": 1, + } + }, + "from": 0, + "size": dummy_maximum_size, + "_source": [], + } diff --git a/tests/unit_tests/crud/test_score.py b/tests/unit_tests/crud/test_score.py index a0ffcf9..80d0d38 100644 --- a/tests/unit_tests/crud/test_score.py +++ b/tests/unit_tests/crud/test_score.py @@ -1,3 +1,4 @@ +import uuid from unittest.mock import MagicMock import pytest @@ -7,7 +8,7 @@ calc_scores, calc_weighted_score, get_score_search, - score, + map_response_to_output, ) from app.elastic.elastic import ResourceType @@ -22,7 +23,7 @@ def test_score_empty_hits(): mocked_response._hits = MagicMock() mocked_response.hits = MagicMock() mocked_response.hits.total.value = 0 - output = score(mocked_response) + output = map_response_to_output(mocked_response) expected_score = {"total": len(dummy_hits["hits"])} assert output == expected_score @@ -58,7 +59,7 @@ def test_score_with_hits(): mocked_response._hits = MagicMock() mocked_response.hits = MagicMock() mocked_response.hits.total.value = 0 - output = score(mocked_response) + output = map_response_to_output(mocked_response) expected_score = { "total": 0, @@ -76,7 +77,7 @@ def test_score_with_hits(): def test_score_search_material(): - noderef_id = "dummy_id" + noderef_id = uuid.uuid4() assert ResourceType.MATERIAL == "MATERIAL" resource_type = ResourceType.MATERIAL search = get_score_search(noderef_id, resource_type) @@ -151,7 +152,7 @@ def test_score_search_material(): def test_score_search_collection(): - noderef_id = "dummy_id" + noderef_id = uuid.uuid4() assert ResourceType.COLLECTION == "COLLECTION" resource_type = ResourceType.COLLECTION search = get_score_search(noderef_id, resource_type) @@ -195,7 +196,7 @@ def test_score_search_collection(): @pytest.mark.skip(reason="Unhandled exception") def test_score_search_exception(): - noderef_id = "123" + noderef_id = uuid.uuid4() resource_type = "" search = get_score_search(noderef_id, resource_type) assert search.to_dict() == {}