From ad24076a7140403845757a3cdc9534a39483d5b8 Mon Sep 17 00:00:00 2001 From: Rafael Greca Date: Tue, 12 Nov 2024 21:29:25 -0300 Subject: [PATCH] creating integration test cases --- data/download_data.sh | 2 +- src/model/inference.py | 4 ++ tests/integration/__init__.py | 0 tests/integration/test_data_processing.py | 30 +++++++++++++ tests/integration/test_model_inference.py | 52 +++++++++++++++++++++++ tests/unit/test_model_functions.py | 4 +- 6 files changed, 89 insertions(+), 3 deletions(-) create mode 100644 tests/integration/__init__.py create mode 100644 tests/integration/test_data_processing.py create mode 100644 tests/integration/test_model_inference.py diff --git a/data/download_data.sh b/data/download_data.sh index 30d045a..d4d2734 100644 --- a/data/download_data.sh +++ b/data/download_data.sh @@ -19,7 +19,7 @@ function parse_yaml { } # setting important variables -eval $(parse_yaml ../credentials.yaml "CONFIG_") +eval $(parse_yaml ../config/credentials.yaml "CONFIG_") # defining important variables export KAGGLE_USERNAME="$CONFIG_KAGGLE_USERNAME" diff --git a/src/model/inference.py b/src/model/inference.py index 664c68b..4409955 100644 --- a/src/model/inference.py +++ b/src/model/inference.py @@ -62,8 +62,12 @@ def predict(self, x: np.ndarray, transform_to_str: bool = True) -> np.ndarray: """ prediction = self.model.predict(x) + print(prediction.shape) + if transform_to_str: prediction = label_encoder.inverse_transform(prediction) + else: + prediction = np.max(prediction, axis=1) logger.info(f"Prediction: {prediction}.") return prediction diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/integration/test_data_processing.py b/tests/integration/test_data_processing.py new file mode 100644 index 0000000..da3b5a2 --- /dev/null +++ b/tests/integration/test_data_processing.py @@ -0,0 +1,30 @@ +import pathlib + +import pandas as pd +import numpy as np + +from src.config.settings import general_settings +from src.config.model import model_settings +from src.data.processing import data_processing_inference, load_dataset + + +# loading the raw dataset that was used to train the model +dataset = load_dataset( + path=pathlib.Path.joinpath( + general_settings.DATA_PATH, + general_settings.RAW_FILE_NAME + ) +) + +def test_data_processing_pipeline(): + """ + Testing the integration of the entire data processing pipeline. + """ + _dataset = dataset.copy() + _dataset = _dataset.drop(columns=general_settings.TARGET_COLUMN) + + X = data_processing_inference(dataframe=_dataset) + + assert isinstance(_dataset, pd.DataFrame) + assert isinstance(X, np.ndarray) + assert X.shape[1] == len(model_settings.FEATURES) diff --git a/tests/integration/test_model_inference.py b/tests/integration/test_model_inference.py new file mode 100644 index 0000000..5a550dd --- /dev/null +++ b/tests/integration/test_model_inference.py @@ -0,0 +1,52 @@ +import pathlib + +import pandas as pd +import numpy as np + +from src.config.settings import general_settings +from src.config.model import model_settings +from src.data.processing import data_processing_inference, load_dataset +from src.model.inference import ModelServe + +# loading the raw dataset that was used to train the model +dataset = load_dataset( + path=pathlib.Path.joinpath( + general_settings.DATA_PATH, + general_settings.RAW_FILE_NAME + ) +) + +def test_model_inference_pipeline(): + """ + Testing the integration of the entire model inference pipeline. + """ + _dataset = dataset.copy() + _dataset = _dataset.drop(columns=general_settings.TARGET_COLUMN) + + X = data_processing_inference(dataframe=_dataset) + + assert isinstance(_dataset, pd.DataFrame) + assert isinstance(X, np.ndarray) + assert X.shape[1] == len(model_settings.FEATURES) + + loaded_model = ModelServe( + model_name=model_settings.MODEL_NAME, + model_flavor=model_settings.MODEL_FLAVOR, + model_version=model_settings.VERSION, + ) + loaded_model.load() + + assert loaded_model.model is not None + + predictions = loaded_model.predict(X, transform_to_str=False) + + assert isinstance(predictions, np.ndarray) + assert predictions.shape[0] == X.shape[0] + assert isinstance(predictions.dtype, type(np.dtype("float64"))) + + # FIXME: fix this + # predictions = loaded_model.predict(X, transform_to_str=True) + + # assert isinstance(predictions, List) + # assert len(predictions) == X.shape[0] + # assert isinstance(type(predictions[0]), str) diff --git a/tests/unit/test_model_functions.py b/tests/unit/test_model_functions.py index ea9d25c..2fed69a 100644 --- a/tests/unit/test_model_functions.py +++ b/tests/unit/test_model_functions.py @@ -104,7 +104,7 @@ def test_model_performance() -> None: ) y_train = np.max(y_train, axis=1) - train_predictions = np.max(loaded_model.predict(X_train, transform_to_str=False), axis=1) + train_predictions = loaded_model.predict(X_train, transform_to_str=False) train_score = f1_score(y_true=y_train, y_pred=train_predictions, average="weighted") X_valid = load_feature( @@ -117,7 +117,7 @@ def test_model_performance() -> None: ) y_valid = np.max(y_valid, axis=1) - valid_predictions = np.max(loaded_model.predict(X_valid, transform_to_str=False), axis=1) + valid_predictions = loaded_model.predict(X_valid, transform_to_str=False) valid_score = f1_score(y_true=y_valid, y_pred=valid_predictions, average="weighted") assert train_score == train_score