Skip to content

Commit

Permalink
60 prepare to merge dev to main (#63)
Browse files Browse the repository at this point in the history
* #60 Prepare to merge dev to main

Pulling missing materials endpoint over

* #60 Prepare to merge dev to main

pending materials endpoint included

* #60 Prepare to merge dev to main

Adding descendants endpoint

* #60 Prepare to merge dev to main

Adding stub analytics endpoint

* #60 Prepare to merge dev to main

Adding stub analytics endpoint

* #60 Prepare to merge dev to main

Adding stub analytics endpoint

* #60 Prepare to merge dev to main

Adding stub analytics endpoint

* #60 Prepare to merge dev to main

WIP: Adding search functionality

* #60 Prepare to merge dev to main

Adding search functionality

* #60 Prepare to merge dev to main

WIP: Adding material type functionality

* #60 Prepare to merge dev to main

Displaying only required metadata

* #60 Prepare to merge dev to main

WIP: Adding background tasks

* #60 Prepare to merge dev to main

WIP: Adding background tasks

* #60 Prepare to merge dev to main

Fixing required properties

* #60 Prepare to merge dev to main

WIP: Fixing issue with string execution

* #60 Prepare to merge dev to main

Correcting title property

* #60 Prepare to merge dev to main

WIP: Fixing str call bug

* #60 Prepare to merge dev to main

WIP: Fixing material search

* KBMBF-452: #60 Prepare to merge dev to main

WIP: Fixing material search

* KBMBF-452: #60 Prepare to merge dev to main

WIP: Fixing material search

#time 5m #comment Verbindung von Github und Jira für mehr Transparenz was wir wozu bei Tickets machen.

* KBMBF-452: Adding JIRA integration (#62)

* KBMBF-452: #60 Prepare to merge dev to main

WIP: Fixing material search

#time 5m #comment Verbindung von Github und Jira für mehr Transparenz was wir wozu bei Tickets machen.

* KBMBF-452: #60 Prepare to merge dev to main

WIP: Fixing material search

* KBMBF-452: #60 Prepare to merge dev to main

WIP: Fixing material search

#time 5m #comment Verbindung von Github und Jira für mehr Transparenz was wir wozu bei Tickets machen.

* KBMBF-452: #60 Prepare to merge dev to main

WIP: Fixing broken data structure

* KBMBF-452: #60 Prepare to merge dev to main

WIP: Fixing broken data structure

* KBMBF-452: #60 Prepare to merge dev to main

Fixing material search

* KBMBF-452: #60 Prepare to merge dev to main

Fixing material search

* KBMBF-452: #60 Prepare to merge dev to main

WIP: Enabling pipeline tests

* KBMBF-452: #60 Prepare to merge dev to main

WIP: Enabling pipeline tests

* KBMBF-452: #60 Prepare to merge dev to main

Refactoring

* KBMBF-452: #60 Prepare to merge dev to main

Adding collection validation

* KBMBF-452: #60 Prepare to merge dev to main

Adding unit test

* KBMBF-452: #60 Prepare to merge dev to main

Updating unit tests

* KBMBF-452: #60 Prepare to merge dev to main

Adding missing materials endpoint

* KBMBF-452: #60 Prepare to merge dev to main

WIP: Adding unit tests

* KBMBF-452: #60 Prepare to merge dev to main

WIP: Adding unit tests

* KBMBF-452: #60 Prepare to merge dev to main

Adding descriptions

* KBMBF-452: #60 Prepare to merge dev to main

Adding descriptions

Refactoring

* KBMBF-452: #60 Prepare to merge dev to main

Cleanup

* KBMBF-452: #60 Prepare to merge dev to main

Cleanup

* KBMBF-452: #60 Prepare to merge dev to main

Cleanup

* KBMBF-452: #60 Prepare to merge dev to main

Refactoring and removing duplicates

* KBMBF-452: #60 Prepare to merge dev to main

Cleanup

* KBMBF-452: #60 Prepare to merge dev to main

Cleanup

* KBMBF-452: #60 Prepare to merge dev to main

Cleanup

* KBMBF-452: #60 Prepare to merge dev to main

Cleanup

* KBMBF-452: #60 Prepare to merge dev to main

Cleanup
  • Loading branch information
RobertMeissner authored Jul 12, 2022
1 parent 0948849 commit 4ebb0be
Show file tree
Hide file tree
Showing 30 changed files with 1,777 additions and 94 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ Launch `index.html` in `build` directory
## For confluence

WIP: Currently not possible to automatically push to confluence.
Add token from JIRA.

```bash
./build_confluence.sh
Expand Down
Empty file.
111 changes: 111 additions & 0 deletions src/app/api/analytics/analytics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
import uuid
from datetime import datetime
from enum import Enum
from typing import ClassVar, Generic, Optional, TypeVar

from pydantic import BaseModel, Extra, Field, validator
from pydantic.generics import GenericModel
from starlette.exceptions import HTTPException
from starlette.status import HTTP_404_NOT_FOUND


class StatType(str, Enum):
# PORTAL_TREE = "portal-tree" # Currently unused
SEARCH = "search"
MATERIAL_TYPES = "material-types"
VALIDATION_COLLECTIONS = "validation-collections"
VALIDATION_MATERIALS = "validation-materials" # Currently unused


class StatsNotFoundException(HTTPException):
def __init__(self):
super().__init__(
status_code=HTTP_404_NOT_FOUND,
detail="Stats not found",
)


class ElasticConfig:
allow_population_by_field_name = True
extra = Extra.allow


class ElasticModel(BaseModel):
class Config(ElasticConfig):
pass


class ResponseConfig:
allow_population_by_field_name = True
extra = Extra.ignore


class ResponseModel(BaseModel):
class Config(ResponseConfig):
pass


COUNT_STATISTICS_TYPE = dict[str, int]


class StatsResponse(ResponseModel):
derived_at: datetime
stats: dict[str, dict[str, COUNT_STATISTICS_TYPE]]


ValidationStatsT = TypeVar("ValidationStatsT")


class ValidationStatsResponse(GenericModel, Generic[ValidationStatsT]):
noderef_id: uuid.UUID
derived_at: datetime = Field(default_factory=datetime.now)
validation_stats: ValidationStatsT


ElasticFieldValidationT = TypeVar("ElasticFieldValidationT")


class ElasticValidationStats(GenericModel, Generic[ElasticFieldValidationT]):
title: Optional[ElasticFieldValidationT]
keywords: Optional[ElasticFieldValidationT]
description: Optional[ElasticFieldValidationT]
edu_context: Optional[ElasticFieldValidationT]


class OehValidationError(str, Enum):
MISSING = "missing"
TOO_SHORT = "too_short"
TOO_FEW = "too_few"
LACKS_CLARITY = "lacks_clarity"
INVALID_SPELLING = "invalid_spelling"

_lut: ClassVar[dict]


class CollectionValidationStats(ElasticValidationStats[list[OehValidationError]]):
pass


def none_to_empty_list(v: list) -> Optional[list]:
if v is None:
return []
return v


class MaterialFieldValidation(BaseModel):
missing: Optional[list[uuid.UUID]]
too_short: Optional[list[uuid.UUID]]
too_few: Optional[list[uuid.UUID]]
lacks_clarity: Optional[list[uuid.UUID]]
invalid_spelling: Optional[list[uuid.UUID]]

# validators
_none_to_empty_list = validator("*", pre=True, allow_reuse=True)(none_to_empty_list)


class MaterialValidationStats(ElasticValidationStats[MaterialFieldValidation]):
subjects: Optional[MaterialFieldValidation]
license: Optional[MaterialFieldValidation]
ads_qualifier: Optional[MaterialFieldValidation]
material_type: Optional[MaterialFieldValidation]
object_type: Optional[MaterialFieldValidation]
116 changes: 116 additions & 0 deletions src/app/api/analytics/background_task.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
import asyncio
import os
from datetime import datetime

from fastapi import APIRouter
from fastapi_utils.tasks import repeat_every
from starlette.background import BackgroundTasks
from starlette.status import HTTP_202_ACCEPTED

import app.api.analytics.storage
from app.api.analytics.models import Collection
from app.api.analytics.stats import get_ids_to_iterate, search_hits_by_material_type
from app.api.analytics.storage import (
_COLLECTION_COUNT,
_COLLECTIONS,
_MATERIALS,
_SEARCH,
)
from app.api.collections.counts import AggregationMappings, collection_counts
from app.api.score.models import required_collection_properties
from app.core.config import BACKGROUND_TASK_TIME_INTERVAL
from app.core.constants import COLLECTION_ROOT_ID
from app.core.logging import logger
from app.elastic.elastic import query_collections, query_materials
from app.elastic.search import Search

background_router = APIRouter(tags=["Background"])


@background_router.post("/run-analytics", status_code=HTTP_202_ACCEPTED)
async def run_analytics(*, background_tasks: BackgroundTasks):
background_tasks.add_task(run)


@repeat_every(seconds=BACKGROUND_TASK_TIME_INTERVAL, logger=logger)
def background_task():
run()


def import_collections(derived_at: datetime):
s = (
Search()
.query(query_collections(node_id=COLLECTION_ROOT_ID))
.source(
includes=["nodeRef.*", "path", *list(required_collection_properties.keys())]
)
)

seen = set()
collections = []
for hit in s.scan():
if hit.nodeRef["id"] in seen:
continue

seen.add(hit.nodeRef["id"])
collections.append(
Collection(
id=str(hit.nodeRef["id"]),
doc=hit.to_dict(),
derived_at=derived_at,
)
)
app.api.analytics.storage.global_storage[_COLLECTIONS] = collections


def import_materials(derived_at: datetime):
s = (
Search()
.query(query_materials(node_id=COLLECTION_ROOT_ID))
.source(
includes=[
"nodeRef.*",
"collections.nodeRef.id",
*list(required_collection_properties.keys()),
]
)
)

seen = set()
collections = []
for hit in s.scan():
node_id = hit.nodeRef["id"]
if node_id not in seen:
seen.add(node_id)
collections.append(
Collection(
id=str(node_id),
doc=hit.to_dict(),
derived_at=derived_at,
)
)
app.api.analytics.storage.global_storage[_MATERIALS] = collections


def run():
derived_at = datetime.now()
logger.info(f"{os.getpid()}: Starting analytics import at: {derived_at}")

import_collections(derived_at=derived_at)

import_materials(derived_at=derived_at)

print("Collection and materials imported")

app.api.analytics.storage.global_storage[_COLLECTION_COUNT] = asyncio.run(
collection_counts(COLLECTION_ROOT_ID, AggregationMappings.lrt)
)

all_collections = asyncio.run(get_ids_to_iterate(node_id=COLLECTION_ROOT_ID))
print("Tree ready to iterate. Length: ", len(all_collections))

# TODO Refactor, this is very expensive
app.api.analytics.storage.global_storage[_SEARCH] = {
row.id: search_hits_by_material_type(row.title) for row in all_collections
}
print("Background task done")
10 changes: 10 additions & 0 deletions src/app/api/analytics/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from datetime import datetime

from pydantic import BaseModel


# TODO: Rename, as used for materials in background_task, as well
class Collection(BaseModel):
id: str
doc: dict
derived_at: datetime
Loading

0 comments on commit 4ebb0be

Please sign in to comment.