Skip to content

Commit

Permalink
creating integration test cases
Browse files Browse the repository at this point in the history
  • Loading branch information
rafaelgreca committed Nov 13, 2024
1 parent 928b826 commit ad24076
Show file tree
Hide file tree
Showing 6 changed files with 89 additions and 3 deletions.
2 changes: 1 addition & 1 deletion data/download_data.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ function parse_yaml {
}

# setting important variables
eval $(parse_yaml ../credentials.yaml "CONFIG_")
eval $(parse_yaml ../config/credentials.yaml "CONFIG_")

# defining important variables
export KAGGLE_USERNAME="$CONFIG_KAGGLE_USERNAME"
Expand Down
4 changes: 4 additions & 0 deletions src/model/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,12 @@ def predict(self, x: np.ndarray, transform_to_str: bool = True) -> np.ndarray:
"""
prediction = self.model.predict(x)

print(prediction.shape)

if transform_to_str:
prediction = label_encoder.inverse_transform(prediction)
else:
prediction = np.max(prediction, axis=1)

logger.info(f"Prediction: {prediction}.")
return prediction
Empty file added tests/integration/__init__.py
Empty file.
30 changes: 30 additions & 0 deletions tests/integration/test_data_processing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import pathlib

import pandas as pd
import numpy as np

from src.config.settings import general_settings
from src.config.model import model_settings
from src.data.processing import data_processing_inference, load_dataset


# loading the raw dataset that was used to train the model
dataset = load_dataset(
path=pathlib.Path.joinpath(
general_settings.DATA_PATH,
general_settings.RAW_FILE_NAME
)
)

def test_data_processing_pipeline():
"""
Testing the integration of the entire data processing pipeline.
"""
_dataset = dataset.copy()
_dataset = _dataset.drop(columns=general_settings.TARGET_COLUMN)

X = data_processing_inference(dataframe=_dataset)

assert isinstance(_dataset, pd.DataFrame)
assert isinstance(X, np.ndarray)
assert X.shape[1] == len(model_settings.FEATURES)
52 changes: 52 additions & 0 deletions tests/integration/test_model_inference.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import pathlib

import pandas as pd
import numpy as np

from src.config.settings import general_settings
from src.config.model import model_settings
from src.data.processing import data_processing_inference, load_dataset
from src.model.inference import ModelServe

# loading the raw dataset that was used to train the model
dataset = load_dataset(
path=pathlib.Path.joinpath(
general_settings.DATA_PATH,
general_settings.RAW_FILE_NAME
)
)

def test_model_inference_pipeline():
"""
Testing the integration of the entire model inference pipeline.
"""
_dataset = dataset.copy()
_dataset = _dataset.drop(columns=general_settings.TARGET_COLUMN)

X = data_processing_inference(dataframe=_dataset)

assert isinstance(_dataset, pd.DataFrame)
assert isinstance(X, np.ndarray)
assert X.shape[1] == len(model_settings.FEATURES)

loaded_model = ModelServe(
model_name=model_settings.MODEL_NAME,
model_flavor=model_settings.MODEL_FLAVOR,
model_version=model_settings.VERSION,
)
loaded_model.load()

assert loaded_model.model is not None

predictions = loaded_model.predict(X, transform_to_str=False)

assert isinstance(predictions, np.ndarray)
assert predictions.shape[0] == X.shape[0]
assert isinstance(predictions.dtype, type(np.dtype("float64")))

# FIXME: fix this
# predictions = loaded_model.predict(X, transform_to_str=True)

# assert isinstance(predictions, List)
# assert len(predictions) == X.shape[0]
# assert isinstance(type(predictions[0]), str)
4 changes: 2 additions & 2 deletions tests/unit/test_model_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ def test_model_performance() -> None:
)
y_train = np.max(y_train, axis=1)

train_predictions = np.max(loaded_model.predict(X_train, transform_to_str=False), axis=1)
train_predictions = loaded_model.predict(X_train, transform_to_str=False)
train_score = f1_score(y_true=y_train, y_pred=train_predictions, average="weighted")

X_valid = load_feature(
Expand All @@ -117,7 +117,7 @@ def test_model_performance() -> None:
)
y_valid = np.max(y_valid, axis=1)

valid_predictions = np.max(loaded_model.predict(X_valid, transform_to_str=False), axis=1)
valid_predictions = loaded_model.predict(X_valid, transform_to_str=False)
valid_score = f1_score(y_true=y_valid, y_pred=valid_predictions, average="weighted")

assert train_score == train_score
Expand Down

0 comments on commit ad24076

Please sign in to comment.