Skip to content

Commit

Permalink
KBMBF-452: #37 Show ratio of oer-content
Browse files Browse the repository at this point in the history
Refactoring
  • Loading branch information
Robert Meissner committed Jul 18, 2022
1 parent 6a6272c commit e75f275
Show file tree
Hide file tree
Showing 9 changed files with 55 additions and 118 deletions.
12 changes: 2 additions & 10 deletions src/app/api/analytics/analytics.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
from starlette.exceptions import HTTPException
from starlette.status import HTTP_404_NOT_FOUND

from app.core.models import ResponseModel


class StatType(str, Enum):
# PORTAL_TREE = "portal-tree" # Currently unused
Expand Down Expand Up @@ -35,16 +37,6 @@ class Config(ElasticConfig):
pass


class ResponseConfig:
allow_population_by_field_name = True
extra = Extra.ignore


class ResponseModel(BaseModel):
class Config(ResponseConfig):
pass


COUNT_STATISTICS_TYPE = dict[str, int]


Expand Down
3 changes: 1 addition & 2 deletions src/app/api/analytics/stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,13 @@
_MATERIALS,
global_storage,
)
from app.api.collections.descendants import aterms
from app.api.collections.models import CollectionNode
from app.api.collections.tree import collection_tree
from app.api.score.models import required_collection_properties
from app.core.config import ELASTIC_TOTAL_SIZE
from app.core.models import LearningMaterialAttribute
from app.elastic.dsl import ElasticField, aterms
from app.elastic.elastic import query_materials
from app.elastic.fields import ElasticField
from app.elastic.search import Search


Expand Down
54 changes: 7 additions & 47 deletions src/app/api/collections/descendants.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,20 @@
import uuid
from itertools import chain
from typing import Optional, Type, TypeVar, Union
from typing import Optional, Type, TypeVar

from elasticsearch_dsl.aggs import A, Agg
from elasticsearch_dsl.query import Query
from elasticsearch_dsl.response import Response
from glom import Coalesce, Iter, glom
from pydantic import BaseModel, Extra

from app.api.collections.missing_materials import ElasticResource, EmptyStrToNone
from app.api.collections.utils import all_source_fields
from app.core.config import ELASTIC_TOTAL_SIZE
from app.core.models import LearningMaterialAttribute
from app.elastic.dsl import qbool, qmatch
from app.core.models import LearningMaterialAttribute, ResponseModel
from app.elastic.dsl import ElasticField, aterms, qbool, qmatch
from app.elastic.elastic import ResourceType, query_materials, type_filter
from app.elastic.fields import ElasticField
from app.elastic.search import Search
from app.elastic.utils import handle_text_field
from app.models import _DESCENDANT_COLLECTIONS_MATERIALS_COUNTS
from app.models import CollectionAttribute as _CollectionAttribute
from app.models import ElasticResourceAttribute

Expand All @@ -31,27 +29,12 @@
)


class ResponseConfig:
allow_population_by_field_name = True
extra = Extra.ignore


class ResponseModel(BaseModel):
class Config(ResponseConfig):
pass


class CollectionMaterialsCount(ResponseModel):
noderef_id: uuid.UUID
title: str
materials_count: int


_DESCENDANT_COLLECTIONS_MATERIALS_COUNTS = TypeVar(
"_DESCENDANT_COLLECTIONS_MATERIALS_COUNTS"
)


# TODO: Refactor
class DescendantCollectionsMaterialsCounts(BaseModel):
results: list[CollectionMaterialsCount]
Expand Down Expand Up @@ -80,17 +63,9 @@ def parse_elastic_response(
)


def aterms(qfield: Union[ElasticField, str], **kwargs) -> Agg:
kwargs["field"] = handle_text_field(qfield)
return A("terms", **kwargs)


def acomposite(sources: list[Union[Query, dict]], **kwargs) -> Agg:
return A("composite", sources=sources, **kwargs)


def agg_materials_by_collection(size: int = 65536) -> Agg:
return acomposite(
return A(
"composite",
sources=[
{
"noderef_id": aterms(
Expand All @@ -102,10 +77,6 @@ def agg_materials_by_collection(size: int = 65536) -> Agg:
)


def abucketsort(sort: list[Union[Query, dict]], **kwargs) -> Agg:
return A("bucket_sort", sort=sort, **kwargs)


def material_counts_by_descendant(
node_id: uuid.UUID,
) -> DescendantCollectionsMaterialsCounts:
Expand All @@ -120,7 +91,7 @@ def material_counts_search(node_id: uuid.UUID):
s = Search().base_filters().query(query_materials(node_id=node_id))
s.aggs.bucket("grouped_by_collection", agg_materials_by_collection()).pipeline(
"sorted_by_count",
abucketsort(sort=[{"_count": {"order": "asc"}}]),
A("bucket_sort", sort=[{"_count": {"order": "asc"}}]),
)
return s

Expand All @@ -132,17 +103,6 @@ class CollectionBase(ElasticResource):
path: Optional[list[uuid.UUID]] = None
parent_id: Optional[uuid.UUID] = None

source_fields = {
CollectionAttribute.NODEREF_ID,
CollectionAttribute.TYPE,
CollectionAttribute.NAME,
CollectionAttribute.TITLE,
CollectionAttribute.KEYWORDS,
CollectionAttribute.DESCRIPTION,
CollectionAttribute.PATH,
CollectionAttribute.PARENT_ID,
}

@classmethod
def parse_elastic_hit_to_dict(
cls: Type[_COLLECTION],
Expand Down
28 changes: 2 additions & 26 deletions src/app/api/collections/missing_materials.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,13 @@
from pydantic.validators import str_validator

from app.core.config import ELASTIC_TOTAL_SIZE
from app.core.models import LearningMaterialAttribute
from app.elastic.dsl import qbool, qmatch, qterm
from app.core.models import LearningMaterialAttribute, ResponseModel
from app.elastic.dsl import ElasticField, qbool, qmatch
from app.elastic.elastic import (
ResourceType,
query_missing_material_license,
type_filter,
)
from app.elastic.fields import ElasticField
from app.elastic.search import Search
from app.models import _ELASTIC_RESOURCE, ElasticResourceAttribute

Expand Down Expand Up @@ -131,16 +130,6 @@ def parse_elastic_hit_to_dict(
}


class ResponseConfig:
allow_population_by_field_name = True
extra = Extra.ignore


class ResponseModel(BaseModel):
class Config(ResponseConfig):
pass


class LearningMaterial(ResponseModel, LearningMaterialBase):
pass

Expand All @@ -157,12 +146,6 @@ def material_response_fields(
return response_fields


"""
"properties.ccm:objecttype": "object_type",
"properties.ccm:containsAdvertisement": "ads_qualifier",
"properties.cclom:oeh_lrt_aggregated": "learning_resource_type",
"""
MissingMaterialField = ElasticField(
"MissingMaterialField",
[
Expand Down Expand Up @@ -195,13 +178,6 @@ def materials_filter_params(
return MissingAttributeFilter(attr=missing_attr)


base_filter = [
qterm(qfield=ElasticResourceAttribute.PERMISSION_READ, value="GROUP_EVERYONE"),
qterm(qfield=ElasticResourceAttribute.EDU_METADATASET, value="mds_oeh"),
qterm(qfield=ElasticResourceAttribute.PROTOCOL, value="workspace"),
]


def missing_attributes_search(
node_id: uuid.UUID, missing_attribute: str, max_hits: int
) -> Search:
Expand Down
14 changes: 13 additions & 1 deletion src/app/core/models.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from itertools import chain

from app.elastic.fields import ElasticField, ElasticFieldType
from pydantic import BaseModel, Extra

from app.elastic.dsl import ElasticField, ElasticFieldType
from app.models import ElasticResourceAttribute


Expand Down Expand Up @@ -38,3 +40,13 @@ class _LearningMaterialAttribute(ElasticField):
for f in chain(ElasticResourceAttribute, _LearningMaterialAttribute)
],
)


class ResponseConfig:
allow_population_by_field_name = True
extra = Extra.ignore


class ResponseModel(BaseModel):
class Config(ResponseConfig):
pass
31 changes: 29 additions & 2 deletions src/app/elastic/dsl.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,23 @@
from enum import Enum, auto
from typing import List, Union

from elasticsearch_dsl import A, Q
from elasticsearch_dsl.aggs import Agg
from elasticsearch_dsl.query import Query

from .fields import ElasticField
from .utils import handle_text_field

class ElasticFieldType(str, Enum):
KEYWORD = auto()
TEXT = auto()


class ElasticField(str, Enum):
def __new__(cls, path: str, field_type: ElasticFieldType):
obj = str.__new__(cls, [path])
obj._value_ = path
obj.path = path
obj.field_type = field_type
return obj


def qterm(qfield: Union[ElasticField, str], value, **kwargs) -> Query:
Expand Down Expand Up @@ -49,3 +61,18 @@ def afilter(query: Query) -> Agg:

def amissing(qfield: Union[ElasticField, str]) -> Agg:
return A("missing", field=handle_text_field(qfield))


def aterms(qfield: Union[ElasticField, str], **kwargs) -> Agg:
kwargs["field"] = handle_text_field(qfield)
return A("terms", **kwargs)


def handle_text_field(qfield: Union[ElasticField, str]) -> str:
if isinstance(qfield, ElasticField):
qfield_key = qfield.path
if qfield.field_type is ElasticFieldType.TEXT:
qfield_key = f"{qfield_key}.keyword"
return qfield_key
else:
return qfield
15 changes: 0 additions & 15 deletions src/app/elastic/fields.py

This file was deleted.

14 changes: 0 additions & 14 deletions src/app/elastic/utils.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,12 @@
from typing import Union

from elasticsearch_dsl import connections

from app.core.config import ELASTICSEARCH_TIMEOUT, ELASTICSEARCH_URL
from app.core.logging import logger

from .fields import ElasticField, ElasticFieldType


async def connect_to_elastic():
logger.debug(f"Attempt to open connection: {ELASTICSEARCH_URL}")

connections.create_connection(
hosts=[ELASTICSEARCH_URL], timeout=ELASTICSEARCH_TIMEOUT
)


def handle_text_field(qfield: Union[ElasticField, str]) -> str:
if isinstance(qfield, ElasticField):
qfield_key = qfield.path
if qfield.field_type is ElasticFieldType.TEXT:
qfield_key = f"{qfield_key}.keyword"
return qfield_key
else:
return qfield
2 changes: 1 addition & 1 deletion src/app/models.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from typing import TypeVar

from app.elastic.fields import ElasticField, ElasticFieldType
from app.elastic.dsl import ElasticField, ElasticFieldType

_ELASTIC_RESOURCE = TypeVar("_ELASTIC_RESOURCE")
_DESCENDANT_COLLECTIONS_MATERIALS_COUNTS = TypeVar(
Expand Down

0 comments on commit e75f275

Please sign in to comment.