From ad24076a7140403845757a3cdc9534a39483d5b8 Mon Sep 17 00:00:00 2001
From: Rafael Greca <rgvieira97@gmail.com>
Date: Tue, 12 Nov 2024 21:29:25 -0300
Subject: [PATCH] creating integration test cases

---
 data/download_data.sh                     |  2 +-
 src/model/inference.py                    |  4 ++
 tests/integration/__init__.py             |  0
 tests/integration/test_data_processing.py | 30 +++++++++++++
 tests/integration/test_model_inference.py | 52 +++++++++++++++++++++++
 tests/unit/test_model_functions.py        |  4 +-
 6 files changed, 89 insertions(+), 3 deletions(-)
 create mode 100644 tests/integration/__init__.py
 create mode 100644 tests/integration/test_data_processing.py
 create mode 100644 tests/integration/test_model_inference.py

diff --git a/data/download_data.sh b/data/download_data.sh
index 30d045a..d4d2734 100644
--- a/data/download_data.sh
+++ b/data/download_data.sh
@@ -19,7 +19,7 @@ function parse_yaml {
 }
 
 # setting important variables
-eval $(parse_yaml ../credentials.yaml "CONFIG_")
+eval $(parse_yaml ../config/credentials.yaml "CONFIG_")
 
 # defining important variables
 export KAGGLE_USERNAME="$CONFIG_KAGGLE_USERNAME"
diff --git a/src/model/inference.py b/src/model/inference.py
index 664c68b..4409955 100644
--- a/src/model/inference.py
+++ b/src/model/inference.py
@@ -62,8 +62,12 @@ def predict(self, x: np.ndarray, transform_to_str: bool = True) -> np.ndarray:
         """
         prediction = self.model.predict(x)
 
+        print(prediction.shape)
+
         if transform_to_str:
             prediction = label_encoder.inverse_transform(prediction)
+        else:
+            prediction = np.max(prediction, axis=1)
 
         logger.info(f"Prediction: {prediction}.")
         return prediction
diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/integration/test_data_processing.py b/tests/integration/test_data_processing.py
new file mode 100644
index 0000000..da3b5a2
--- /dev/null
+++ b/tests/integration/test_data_processing.py
@@ -0,0 +1,30 @@
+import pathlib
+
+import pandas as pd
+import numpy as np
+
+from src.config.settings import general_settings
+from src.config.model import model_settings
+from src.data.processing import data_processing_inference, load_dataset
+
+
+# loading the raw dataset that was used to train the model
+dataset = load_dataset(
+    path=pathlib.Path.joinpath(
+        general_settings.DATA_PATH,
+        general_settings.RAW_FILE_NAME
+    )
+)
+
+def test_data_processing_pipeline():
+    """
+    Testing the integration of the entire data processing pipeline.
+    """
+    _dataset = dataset.copy()
+    _dataset = _dataset.drop(columns=general_settings.TARGET_COLUMN)
+
+    X = data_processing_inference(dataframe=_dataset)
+
+    assert isinstance(_dataset, pd.DataFrame)
+    assert isinstance(X, np.ndarray)
+    assert X.shape[1] == len(model_settings.FEATURES)
diff --git a/tests/integration/test_model_inference.py b/tests/integration/test_model_inference.py
new file mode 100644
index 0000000..5a550dd
--- /dev/null
+++ b/tests/integration/test_model_inference.py
@@ -0,0 +1,52 @@
+import pathlib
+
+import pandas as pd
+import numpy as np
+
+from src.config.settings import general_settings
+from src.config.model import model_settings
+from src.data.processing import data_processing_inference, load_dataset
+from src.model.inference import ModelServe
+
+# loading the raw dataset that was used to train the model
+dataset = load_dataset(
+    path=pathlib.Path.joinpath(
+        general_settings.DATA_PATH,
+        general_settings.RAW_FILE_NAME
+    )
+)
+
+def test_model_inference_pipeline():
+    """
+    Testing the integration of the entire model inference pipeline.
+    """
+    _dataset = dataset.copy()
+    _dataset = _dataset.drop(columns=general_settings.TARGET_COLUMN)
+
+    X = data_processing_inference(dataframe=_dataset)
+
+    assert isinstance(_dataset, pd.DataFrame)
+    assert isinstance(X, np.ndarray)
+    assert X.shape[1] == len(model_settings.FEATURES)
+
+    loaded_model = ModelServe(
+        model_name=model_settings.MODEL_NAME,
+        model_flavor=model_settings.MODEL_FLAVOR,
+        model_version=model_settings.VERSION,
+    )
+    loaded_model.load()
+
+    assert loaded_model.model is not None
+
+    predictions = loaded_model.predict(X, transform_to_str=False)
+
+    assert isinstance(predictions, np.ndarray)
+    assert predictions.shape[0] == X.shape[0]
+    assert isinstance(predictions.dtype, type(np.dtype("float64")))
+
+    # FIXME: fix this
+    # predictions = loaded_model.predict(X, transform_to_str=True)
+
+    # assert isinstance(predictions, List)
+    # assert len(predictions) == X.shape[0]
+    # assert isinstance(type(predictions[0]), str)
diff --git a/tests/unit/test_model_functions.py b/tests/unit/test_model_functions.py
index ea9d25c..2fed69a 100644
--- a/tests/unit/test_model_functions.py
+++ b/tests/unit/test_model_functions.py
@@ -104,7 +104,7 @@ def test_model_performance() -> None:
     )
     y_train = np.max(y_train, axis=1)
 
-    train_predictions = np.max(loaded_model.predict(X_train, transform_to_str=False), axis=1)
+    train_predictions = loaded_model.predict(X_train, transform_to_str=False)
     train_score = f1_score(y_true=y_train, y_pred=train_predictions, average="weighted")
 
     X_valid = load_feature(
@@ -117,7 +117,7 @@ def test_model_performance() -> None:
     )
     y_valid = np.max(y_valid, axis=1)
 
-    valid_predictions = np.max(loaded_model.predict(X_valid, transform_to_str=False), axis=1)
+    valid_predictions = loaded_model.predict(X_valid, transform_to_str=False)
     valid_score = f1_score(y_true=y_valid, y_pred=valid_predictions, average="weighted")
 
     assert train_score == train_score