diff --git a/backend/app/app/db/init_db.py b/backend/app/app/db/init_db.py index 9c84b2d..cca368a 100644 --- a/backend/app/app/db/init_db.py +++ b/backend/app/app/db/init_db.py @@ -36,7 +36,6 @@ def init_db(session: Session) -> None: is_active=False, ) user = User.create(session, user_in) - logging.debug(f"User {user.email} created") # type: ignore with open("/app/app/db/item.csv", "r") as file: reader = csv.reader(file) diff --git a/backend/app/app/worker.py b/backend/app/app/worker.py index 8a6b4f6..ed8b217 100644 --- a/backend/app/app/worker.py +++ b/backend/app/app/worker.py @@ -7,6 +7,7 @@ from typing import Union import gensim +import numpy as np import pandas as pd from celery import Task from celery.utils.log import get_task_logger @@ -17,7 +18,6 @@ from rectools.dataset import Dataset from rectools.metrics import MAP, MeanInvUserFreq, Serendipity, calc_metrics from rectools.models import ImplicitALSWrapperModel -from sklearn.model_selection import train_test_split from sqlmodel import Session, col, select from app import source @@ -193,6 +193,28 @@ def byte_to_list_float(byte: bytes): return list(struct.unpack("f" * (len(byte) // 4), byte)) +def train_test_split( + df: DataFrame, test_size: float = 0.2, random_state: int = 32 +): + """ + Split the dataset into train and test sets. + + Args: + df (DataFrame): The dataset to split. + test_size (float): The size of the test set. + random_state (int): The random state for reproducibility. + + Returns: + tuple: A tuple of train and test sets. + """ + np.random.seed(random_state) + shuffled_indices = np.random.permutation(len(df)) + test_set_size = int(len(df) * test_size) + test_indices = shuffled_indices[:test_set_size] + train_indices = shuffled_indices[test_set_size:] + return df.iloc[train_indices], df.iloc[test_indices] + + @celery_app.task( acks_late=True, base=DatabaseTask, diff --git a/backend/backend.dockerfile b/backend/backend.dockerfile index f3c114e..3a81b6c 100644 --- a/backend/backend.dockerfile +++ b/backend/backend.dockerfile @@ -10,8 +10,7 @@ COPY ./app/pyproject.toml ./app/poetry.lock* /app/ # Allow installing dev dependencies to run tests ARG INSTALL_DEV=false -RUN if [ $INSTALL_DEV == 'true' ] ; then poetry install --no-root ; else poetry install --no-root --only main ; fi -RUN poetry run python -m pip install --no-use-pep517 rectools[lightfm] +RUN sh -c "if [ '$INSTALL_DEV' == 'true' ] ; then poetry install --no-root ; else poetry install --no-root ; fi" COPY ./app /app diff --git a/backend/celeryworker.dockerfile b/backend/celeryworker.dockerfile index 35c6ac1..0aa7e67 100644 --- a/backend/celeryworker.dockerfile +++ b/backend/celeryworker.dockerfile @@ -11,8 +11,7 @@ COPY ./app/pyproject.toml ./app/poetry.lock* /app/ # Allow installing dev dependencies to run tests ARG INSTALL_DEV=false -RUN if [ $INSTALL_DEV == 'true' ] ; then poetry install --no-root ; else poetry install --no-root --only main ; fi -RUN poetry run python -m pip install --no-use-pep517 rectools[lightfm] +RUN sh -c "if [ '$INSTALL_DEV' == 'true' ] ; then poetry install --no-root ; else poetry install --no-root ; fi" ENV C_FORCE_ROOT=1