diff --git a/challenge/api.py b/challenge/api.py
index e18305a..5731c9c 100644
--- a/challenge/api.py
+++ b/challenge/api.py
@@ -126,9 +126,6 @@ async def post_predict(flight_data: FlightData) -> dict:
         preprocessed_data = model.preprocess(flight_data_df)
 
         # sorts column to feed the model
-        column_order = model._model.feature_names_in_
-        preprocessed_data = preprocessed_data[column_order]
-
         pred = model.predict(preprocessed_data)
 
         return {"predict": pred}
@@ -138,5 +135,5 @@ async def post_predict(flight_data: FlightData) -> dict:
         with open("error_logs.txt", "a") as f:
             f.write(f"{datetime.now(timezone.utc)}: encounter error {e}")
         raise HTTPException(
-            status_code=500, detail="Internal server error during prediction"
+            status_code=500, detail="Internal server error during prediction."
         )
diff --git a/challenge/model.py b/challenge/model.py
index 5358195..2b87b4d 100644
--- a/challenge/model.py
+++ b/challenge/model.py
@@ -5,62 +5,91 @@
 
 import numpy as np
 import pandas as pd
-from sklearn.linear_model import LogisticRegression
+from xgboost import XGBClassifier
 
-FEATURES_COLS = [
-    "OPERA_Latin American Wings",
-    "MES_7",
-    "MES_10",
-    "OPERA_Grupo LATAM",
-    "MES_12",
-    "TIPOVUELO_I",
-    "MES_4",
-    "MES_11",
-    "OPERA_Sky Airline",
-    "OPERA_Copa Air",
-]
 
+class DelayModel:
+    FEATURES_COLS = [
+        "MES_4",
+        "MES_7",
+        "MES_10",
+        "MES_11",
+        "MES_12",
+        "OPERA_Copa Air",
+        "OPERA_Grupo LATAM",
+        "OPERA_Latin American Wings",
+        "OPERA_Sky Airline",
+        "TIPOVUELO_I",
+    ]
+
+    THRESHOLD_IN_MINUTES = 15
 
-def get_min_diff(data):
-    fecha_o = datetime.strptime(data["Fecha-O"], "%Y-%m-%d %H:%M:%S")
-    fecha_i = datetime.strptime(data["Fecha-I"], "%Y-%m-%d %H:%M:%S")
-    min_diff = ((fecha_o - fecha_i).total_seconds()) / 60
-    return min_diff
+    def __init__(self):
+        self._model = XGBClassifier()
 
+    def _get_min_diff(self, data: pd.Series) -> float:
+        """
+        Auxiliary function to get target.
 
-def get_delay_target(data: pd.DataFrame) -> pd.Series:
-    data["min_diff"] = data.apply(get_min_diff, axis=1)
-    threshold_in_minutes = 15
-    data["delay"] = np.where(data["min_diff"] > threshold_in_minutes, 1, 0)
+        Args:
+            data (pd.Series): raw data row.
 
-    return data["delay"].to_frame()
+        Returns:
+            float: difference between two rows in minutes.
+        """
+        fecha_o = datetime.strptime(data["Fecha-O"], "%Y-%m-%d %H:%M:%S")
+        fecha_i = datetime.strptime(data["Fecha-I"], "%Y-%m-%d %H:%M:%S")
+        min_diff = ((fecha_o - fecha_i).total_seconds()) / 60
+        return min_diff
 
+    def _get_delay_target(self, data: pd.DataFrame) -> pd.Series:
+        """
+        Compute and return target to train the model with, from raw data.
 
-def get_features(data: pd.DataFrame) -> pd.DataFrame:
-    # get the one hot enconding of the columns suggested by the DS
-    features = pd.concat(
-        [
-            pd.get_dummies(data["OPERA"], prefix="OPERA"),
-            pd.get_dummies(data["TIPOVUELO"], prefix="TIPOVUELO"),
-            pd.get_dummies(data["MES"], prefix="MES"),
-        ],
-        axis=1,
-    )
-    present_features = list(set(FEATURES_COLS).intersection(set(features.columns)))
-    missing_features = list(set(FEATURES_COLS).difference(set(features.columns)))
+        Args:
+            data (pd.DataFrame): raw data.
 
-    features = features[present_features]
+        Returns:
+            pd.Series: target to predict.
+        """
+        data["min_diff"] = data.apply(self._get_min_diff, axis=1)
+        data["delay"] = np.where(data["min_diff"] > self.THRESHOLD_IN_MINUTES, 1, 0)
 
-    # fill missing features with 0 due to one-hot encoding of features
-    for feature in missing_features:
-        features[feature] = 0
+        return data["delay"].to_frame()
 
-    return features
+    def _get_features(self, data: pd.DataFrame) -> pd.DataFrame:
+        """
+        Compute and return input features to feed the model from raw data.
 
+        Args:
+            data (pd.DataFrame): raw_data.
 
-class DelayModel:
-    def __init__(self):
-        self._model = LogisticRegression()
+        Returns:
+            pd.DataFrame: features with columns in a specific order.
+        """
+        # get the one hot enconding of the columns suggested by the DS
+        # the existance of these three columns is enforced by the api above this code
+        features = pd.concat(
+            [
+                pd.get_dummies(data["OPERA"], prefix="OPERA"),
+                pd.get_dummies(data["TIPOVUELO"], prefix="TIPOVUELO"),
+                pd.get_dummies(data["MES"], prefix="MES"),
+            ],
+            axis=1,
+        )
+        valid_features = list(
+            set(self.FEATURES_COLS).intersection(set(features.columns))
+        )
+        missing_features = list(
+            set(self.FEATURES_COLS).difference(set(features.columns))
+        )
+
+        # get valid features and fill missin with  0 due to one-hot encoding
+        features = features[valid_features]
+        features[missing_features] = 0
+
+        # return dataframe with sorted columns
+        return features[self.FEATURES_COLS]
 
     def preprocess(
         self, data: pd.DataFrame, target_column: Optional[str] = None
@@ -78,20 +107,20 @@ def preprocess(
             pd.DataFrame: features.
         """
         # retrieve features from the data
-        x = get_features(data)
+        x = self._get_features(data)
 
         # return different sets, depending on the target
         if target_column is None:
             return x
         elif target_column == "delay":
-            y = get_delay_target(data)
+            y = self._get_delay_target(data)
             return (x, y)
         else:
             raise NotImplementedError("Only implemented 'delay' as target column")
 
     def fit(self, features: pd.DataFrame, target: pd.DataFrame) -> None:
         """
-        Fit model with preprocessed data.
+        Fit model with data preprocessed by this class.
 
         Args:
             features (pd.DataFrame): preprocessed data.
@@ -103,10 +132,11 @@ def fit(self, features: pd.DataFrame, target: pd.DataFrame) -> None:
         # get values to compensate unbalancing
         n_y0 = len(target[target[target_column] == 0])
         n_y1 = len(target[target[target_column] == 1])
+        scale = n_y0 / n_y1
 
         # instantiate model and fit
-        self._model = LogisticRegression(
-            class_weight={1: n_y0 / len(target), 0: n_y1 / len(target)}
+        self._model = XGBClassifier(
+            random_state=1, learning_rate=0.01, scale_pos_weight=scale
         )
         self._model.fit(features, target[target_column])
 
@@ -130,7 +160,7 @@ def load_model(self, path: str):
 
     def predict(self, features: pd.DataFrame) -> List[int]:
         """
-        Predict delays for new flights.
+        Predict delays for new flights on data preprocessed by this class.
 
         Args:
             features (pd.DataFrame): preprocessed data.
@@ -146,38 +176,20 @@ def predict(self, features: pd.DataFrame) -> List[int]:
         return pred
 
 
-if __name__ == "__main__":
-    from sklearn.metrics import classification_report, mean_squared_error
-    from sklearn.model_selection import train_test_split
+def main():
+    # perform a training of the model with all available data for production deployment
 
-    # perform a training of the model for production deployment
+    # get data and initial model
     model = DelayModel()
     data = pd.read_csv(filepath_or_buffer="data/data.csv")
 
+    # preprocess data and fit
     features, target = model.preprocess(data=data, target_column="delay")
-
-    _, features_validation, _, target_validation = train_test_split(
-        features, target, test_size=0.33, random_state=42
-    )
-
     model.fit(features=features, target=target)
 
-    predicted_target = model.predict(features_validation)
-
-    report = classification_report(
-        target_validation, predicted_target, output_dict=True
-    )
-
     # save
     model.save_model("models")
 
-    # re instantiate to override model
-    model = DelayModel()
-    model.load_model("models")
-
-    predicted_target_load = model.predict(features_validation)
 
-    print(
-        "The difference in prediction is:"
-        f" {mean_squared_error(predicted_target, predicted_target_load)}"
-    )
+if __name__ == "__main__":
+    main()
diff --git a/docs/challenge.md b/docs/challenge.md
index dffe313..e66f696 100644
--- a/docs/challenge.md
+++ b/docs/challenge.md
@@ -1,24 +1,89 @@
 # Challange Notes
 
-## Reviewing the notebook
+Notes during the development of the challange.
 
-### Generalities
+## Part I Model selection and transcription
 
-In the documentation (README) it is said that the data has a column named
-DATA-I when talking about the additional DS features, but this column does not
-exist. Instead, from the code and the description, we can assume this column
-name should be FECHA-I.
+### Reviewing the DS's notebook.
 
-### Feedback to the DS
+There are quite some issues with how the notebook was presented. The first issue
+is that it not runs properly, due to the abscense of the `x=` and `y=` kwargs
+missing in the barplots. A second issue is that the `.set` method of seaborn
+was deprecated in favour of `.set_theme`; also, this method could only be called
+once. And lastly, some subtitles in markdown did not corresponded to the cells
+they had below along the First Sight on the data analysis. While not critical,
+nor relevant to the result they do not give a good impression of the care taken
+while summarising the DS's experiments into this notebook. These errors where
+fixed to properly execute the notebook (alongside with the change from
+`../data/data.csv` to `data/data.csv`).
 
-I would like to ask a few things to the DS as it seems unclear why he did some
-of the selections he did. I am supposing this is a kind of summary notebook, so
-some details may have been missed in the condensation of his/her analysis.
+#### Data analysis
+
+The colour lightblue is a really poor choice for a contrast between the graph
+background and the bars.
+
+The "flight by destination" graph is so cramped up, its hard to see which bar
+corresponds to which destination.
+
+#### Features generation
+
+The additional features the DS computed makes sense at first sight, but the
+target doesn't match up with what was asked. If the idea was to predict the
+**probability** of a flight being delayed or not, then regressor should be
+trained, and this information on "how much delay" the flight had, could be
+implemented into this encoding. This may have more to do with the modelling part
+than with the feature generation, but it is very tightly coupled. Also, the
+value of 15 minutes to be considered a delay is not appropriately justified.
+
+#### Data analysis II
+
+First cell of this part shows bad coding practices.
+
+#### Training
+
+`training_data` was defined in second cell but not used again.
+
+The selection of three features is done with no explanation whatsoever. A binary
+encong of the `TIPOVUELO` may benefit the model by reducing the input dimension.
+a cyclical encoding of the months could benefit the model by: reducing the input
+dimension, and, redundantly, adding the cyclicality nature of the months to the
+model.
+
+On the xgboost training, the DS added an artificial threshold to its outputs,
+this wasn't needed as the model already outputs 0/1. It should also be a red
+flag that the model predicted always 0, meaning that it was not capturing enough
+the information in the dataset. This result could be used as an argument of the
+unbalancing of the dataset (in addition to the Logistic Regression below). By no
+means should this trained model used to get the most important features, as it
+would take the features that mostly predict the 0 value.
+
+Notice that the top 10 features that are selected, do not match with the top 10
+features of the graph (at least in this run).
+
+No mention is done on this whatsoever, a comment on such calling result
+should be done.
+
+#### Training II
+
+For a second round of training, the DS considered the most important features to
+be more relevant than the balancing, and made the experimentation as such,
+getting expected results when the balancing is not done. The results without
+the balancing lose meaning in this context.
+
+Though, with the balancing, some better results are obtained. But, there is no
+explanation whether this is enough or not for the buisness. I don't think that
+should be explicilty in the DS's analysis, but would be a nice to have as a
+conclusion of the work.
+
+#### Conclusions and next steps:
+
+I would send back an email to the DS, asking for some more explanations of the
+results, and their interpretations. Pointing out some of the mistakes I've
+found. Something of the sort:
 
 1. I see that at the data splitting step, you decided to keep just three
 features. Why keep these features specifically? It seems to me that other
-features, as the destination, may encode some information on the delay process;
-but maybe I am missing something.
+features, as the destination, may encode some information on the delay process.
 2. On the target encoding, I understood that we wanted to predict the
 probability of a delay on a specific flight. Your encoding just predicts whether
 the flight would be more than 15 minutes delayed or not. Have you came up with
@@ -34,30 +99,43 @@ this decision.
 4. The 10 features you selected to train with, are not the top 10 I am seeing in
 the graph of feature importance from the xgboost. I think it might be due to
 some random see issue. Would you care to go over it?
+5. Are these metrics enough from a business perspective? Or they are expected to
+be improved on further iterations?
 
 Also pointed out some comments in the code, but wouldn't bother the DS with
-them, as it was not the main focus of the work.
+them, as it was not the main focus of the work, and I haven't found clear bugs
+on the used prediction features, that's what I would use later. Though, I would
+report any bugs in the computation of features if I found one.
+
+##### Model selection
+
+At the first iteration of the challange, I choose the LogisticRegression as the
+model of choice, for its simplicity, ease of explainability (which feature
+it gives more weight to), and because it is part of scikit learn, our training
+framework. But given that there are a lot of unsolved mysteries, and we may need
+more predictive power, I will now choose the xgboost model. To solve any
+additional issue we might have early, and for easiness of retraining. With the
+added benefit that we don't loose that much explainability.
+
+##### Feature selection
+
+I will use the features selected by the DS as-is, because it's the best baseline
+we have and it does not compromises time-to-delivery on a first iteration.
+Putting this list as a parameter and updating it if it needs to isn't too hard.
 
+##### Code-wise
 
-### How to continue
+Implemented the functions as similar to what they are in the notebook as
+possible, improving as much code as I could.
 
-I will proceed by moving the pipeline the DS implemented here as similar as
-possible to the production pipeline, as this is the best predictions we have
-yet. Though, I will also try to make it as versatile as possible in the feature
-selection stage, that I think was the one that may need a revision from his
-side; though, without compromising the time-to-production of the system.
+Added a code to be executed if the script was run with
+`python challenge/model.py` to generate and save a trained model in the path:
+`models/model.pkl`. Pickle is not the best format to use, due to compatibility
+reasons, but as I just saved the LogisticRegression, and used it inside the same
+class as it was generated in, I don't expect errors. And converting it to a more
+general format (like ONNX) and then running it properly is not trivial.
 
-#### On the model selection
-The final model might be modified if another stage of experimenting would be
-done by the DS but, as both linear regression and xgboost support the same
-interface for prediction, it shouldn't be that much of an issue to change it
-afterwards if implemented appropriately.
 
-I will go with the linear regression, as it's execution time is determined only
-by the number of features, and not on a highly tunned hyperparameter (as the
-number of trees in xgboost). Also, it has the advantage that we can limit
-ourselves to only one framework (scikit learn), and have less imcompatibility
-issues when trying to move our model to production.
 
 ## Part II API developement
 
diff --git a/poetry.lock b/poetry.lock
index 6deb3aa..c29609b 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1913,45 +1913,72 @@ files = [
 
 [[package]]
 name = "pandas"
-version = "1.3.5"
+version = "2.2.2"
 description = "Powerful data structures for data analysis, time series, and statistics"
 optional = false
-python-versions = ">=3.7.1"
-files = [
-    {file = "pandas-1.3.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:62d5b5ce965bae78f12c1c0df0d387899dd4211ec0bdc52822373f13a3a022b9"},
-    {file = "pandas-1.3.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:adfeb11be2d54f275142c8ba9bf67acee771b7186a5745249c7d5a06c670136b"},
-    {file = "pandas-1.3.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:60a8c055d58873ad81cae290d974d13dd479b82cbb975c3e1fa2cf1920715296"},
-    {file = "pandas-1.3.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fd541ab09e1f80a2a1760032d665f6e032d8e44055d602d65eeea6e6e85498cb"},
-    {file = "pandas-1.3.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2651d75b9a167cc8cc572cf787ab512d16e316ae00ba81874b560586fa1325e0"},
-    {file = "pandas-1.3.5-cp310-cp310-win_amd64.whl", hash = "sha256:aaf183a615ad790801fa3cf2fa450e5b6d23a54684fe386f7e3208f8b9bfbef6"},
-    {file = "pandas-1.3.5-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:344295811e67f8200de2390093aeb3c8309f5648951b684d8db7eee7d1c81fb7"},
-    {file = "pandas-1.3.5-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:552020bf83b7f9033b57cbae65589c01e7ef1544416122da0c79140c93288f56"},
-    {file = "pandas-1.3.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5cce0c6bbeb266b0e39e35176ee615ce3585233092f685b6a82362523e59e5b4"},
-    {file = "pandas-1.3.5-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7d28a3c65463fd0d0ba8bbb7696b23073efee0510783340a44b08f5e96ffce0c"},
-    {file = "pandas-1.3.5-cp37-cp37m-win32.whl", hash = "sha256:a62949c626dd0ef7de11de34b44c6475db76995c2064e2d99c6498c3dba7fe58"},
-    {file = "pandas-1.3.5-cp37-cp37m-win_amd64.whl", hash = "sha256:8025750767e138320b15ca16d70d5cdc1886e8f9cc56652d89735c016cd8aea6"},
-    {file = "pandas-1.3.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:fe95bae4e2d579812865db2212bb733144e34d0c6785c0685329e5b60fcb85dd"},
-    {file = "pandas-1.3.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5f261553a1e9c65b7a310302b9dbac31cf0049a51695c14ebe04e4bfd4a96f02"},
-    {file = "pandas-1.3.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8b6dbec5f3e6d5dc80dcfee250e0a2a652b3f28663492f7dab9a24416a48ac39"},
-    {file = "pandas-1.3.5-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d3bc49af96cd6285030a64779de5b3688633a07eb75c124b0747134a63f4c05f"},
-    {file = "pandas-1.3.5-cp38-cp38-win32.whl", hash = "sha256:b6b87b2fb39e6383ca28e2829cddef1d9fc9e27e55ad91ca9c435572cdba51bf"},
-    {file = "pandas-1.3.5-cp38-cp38-win_amd64.whl", hash = "sha256:a395692046fd8ce1edb4c6295c35184ae0c2bbe787ecbe384251da609e27edcb"},
-    {file = "pandas-1.3.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:bd971a3f08b745a75a86c00b97f3007c2ea175951286cdda6abe543e687e5f2f"},
-    {file = "pandas-1.3.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:37f06b59e5bc05711a518aa10beaec10942188dccb48918bb5ae602ccbc9f1a0"},
-    {file = "pandas-1.3.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2c21778a688d3712d35710501f8001cdbf96eb70a7c587a3d5613573299fdca6"},
-    {file = "pandas-1.3.5-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3345343206546545bc26a05b4602b6a24385b5ec7c75cb6059599e3d56831da2"},
-    {file = "pandas-1.3.5-cp39-cp39-win32.whl", hash = "sha256:c69406a2808ba6cf580c2255bcf260b3f214d2664a3a4197d0e640f573b46fd3"},
-    {file = "pandas-1.3.5-cp39-cp39-win_amd64.whl", hash = "sha256:32e1a26d5ade11b547721a72f9bfc4bd113396947606e00d5b4a5b79b3dcb006"},
-    {file = "pandas-1.3.5.tar.gz", hash = "sha256:1e4285f5de1012de20ca46b188ccf33521bff61ba5c5ebd78b4fb28e5416a9f1"},
+python-versions = ">=3.9"
+files = [
+    {file = "pandas-2.2.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:90c6fca2acf139569e74e8781709dccb6fe25940488755716d1d354d6bc58bce"},
+    {file = "pandas-2.2.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c7adfc142dac335d8c1e0dcbd37eb8617eac386596eb9e1a1b77791cf2498238"},
+    {file = "pandas-2.2.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4abfe0be0d7221be4f12552995e58723c7422c80a659da13ca382697de830c08"},
+    {file = "pandas-2.2.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8635c16bf3d99040fdf3ca3db669a7250ddf49c55dc4aa8fe0ae0fa8d6dcc1f0"},
+    {file = "pandas-2.2.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:40ae1dffb3967a52203105a077415a86044a2bea011b5f321c6aa64b379a3f51"},
+    {file = "pandas-2.2.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8e5a0b00e1e56a842f922e7fae8ae4077aee4af0acb5ae3622bd4b4c30aedf99"},
+    {file = "pandas-2.2.2-cp310-cp310-win_amd64.whl", hash = "sha256:ddf818e4e6c7c6f4f7c8a12709696d193976b591cc7dc50588d3d1a6b5dc8772"},
+    {file = "pandas-2.2.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:696039430f7a562b74fa45f540aca068ea85fa34c244d0deee539cb6d70aa288"},
+    {file = "pandas-2.2.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8e90497254aacacbc4ea6ae5e7a8cd75629d6ad2b30025a4a8b09aa4faf55151"},
+    {file = "pandas-2.2.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:58b84b91b0b9f4bafac2a0ac55002280c094dfc6402402332c0913a59654ab2b"},
+    {file = "pandas-2.2.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6d2123dc9ad6a814bcdea0f099885276b31b24f7edf40f6cdbc0912672e22eee"},
+    {file = "pandas-2.2.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:2925720037f06e89af896c70bca73459d7e6a4be96f9de79e2d440bd499fe0db"},
+    {file = "pandas-2.2.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:0cace394b6ea70c01ca1595f839cf193df35d1575986e484ad35c4aeae7266c1"},
+    {file = "pandas-2.2.2-cp311-cp311-win_amd64.whl", hash = "sha256:873d13d177501a28b2756375d59816c365e42ed8417b41665f346289adc68d24"},
+    {file = "pandas-2.2.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:9dfde2a0ddef507a631dc9dc4af6a9489d5e2e740e226ad426a05cabfbd7c8ef"},
+    {file = "pandas-2.2.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:e9b79011ff7a0f4b1d6da6a61aa1aa604fb312d6647de5bad20013682d1429ce"},
+    {file = "pandas-2.2.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1cb51fe389360f3b5a4d57dbd2848a5f033350336ca3b340d1c53a1fad33bcad"},
+    {file = "pandas-2.2.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eee3a87076c0756de40b05c5e9a6069c035ba43e8dd71c379e68cab2c20f16ad"},
+    {file = "pandas-2.2.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:3e374f59e440d4ab45ca2fffde54b81ac3834cf5ae2cdfa69c90bc03bde04d76"},
+    {file = "pandas-2.2.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:43498c0bdb43d55cb162cdc8c06fac328ccb5d2eabe3cadeb3529ae6f0517c32"},
+    {file = "pandas-2.2.2-cp312-cp312-win_amd64.whl", hash = "sha256:d187d355ecec3629624fccb01d104da7d7f391db0311145817525281e2804d23"},
+    {file = "pandas-2.2.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:0ca6377b8fca51815f382bd0b697a0814c8bda55115678cbc94c30aacbb6eff2"},
+    {file = "pandas-2.2.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9057e6aa78a584bc93a13f0a9bf7e753a5e9770a30b4d758b8d5f2a62a9433cd"},
+    {file = "pandas-2.2.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:001910ad31abc7bf06f49dcc903755d2f7f3a9186c0c040b827e522e9cef0863"},
+    {file = "pandas-2.2.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:66b479b0bd07204e37583c191535505410daa8df638fd8e75ae1b383851fe921"},
+    {file = "pandas-2.2.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:a77e9d1c386196879aa5eb712e77461aaee433e54c68cf253053a73b7e49c33a"},
+    {file = "pandas-2.2.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:92fd6b027924a7e178ac202cfbe25e53368db90d56872d20ffae94b96c7acc57"},
+    {file = "pandas-2.2.2-cp39-cp39-win_amd64.whl", hash = "sha256:640cef9aa381b60e296db324337a554aeeb883ead99dc8f6c18e81a93942f5f4"},
+    {file = "pandas-2.2.2.tar.gz", hash = "sha256:9e79019aba43cb4fda9e4d983f8e88ca0373adbb697ae9c6c43093218de28b54"},
 ]
 
 [package.dependencies]
-numpy = {version = ">=1.21.0", markers = "python_version >= \"3.10\""}
-python-dateutil = ">=2.7.3"
-pytz = ">=2017.3"
+numpy = {version = ">=1.22.4", markers = "python_version < \"3.11\""}
+python-dateutil = ">=2.8.2"
+pytz = ">=2020.1"
+tzdata = ">=2022.7"
 
 [package.extras]
-test = ["hypothesis (>=3.58)", "pytest (>=6.0)", "pytest-xdist"]
+all = ["PyQt5 (>=5.15.9)", "SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "adbc-driver-sqlite (>=0.8.0)", "beautifulsoup4 (>=4.11.2)", "bottleneck (>=1.3.6)", "dataframe-api-compat (>=0.1.7)", "fastparquet (>=2022.12.0)", "fsspec (>=2022.11.0)", "gcsfs (>=2022.11.0)", "html5lib (>=1.1)", "hypothesis (>=6.46.1)", "jinja2 (>=3.1.2)", "lxml (>=4.9.2)", "matplotlib (>=3.6.3)", "numba (>=0.56.4)", "numexpr (>=2.8.4)", "odfpy (>=1.4.1)", "openpyxl (>=3.1.0)", "pandas-gbq (>=0.19.0)", "psycopg2 (>=2.9.6)", "pyarrow (>=10.0.1)", "pymysql (>=1.0.2)", "pyreadstat (>=1.2.0)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)", "python-calamine (>=0.1.7)", "pyxlsb (>=1.0.10)", "qtpy (>=2.3.0)", "s3fs (>=2022.11.0)", "scipy (>=1.10.0)", "tables (>=3.8.0)", "tabulate (>=0.9.0)", "xarray (>=2022.12.0)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.5)", "zstandard (>=0.19.0)"]
+aws = ["s3fs (>=2022.11.0)"]
+clipboard = ["PyQt5 (>=5.15.9)", "qtpy (>=2.3.0)"]
+compression = ["zstandard (>=0.19.0)"]
+computation = ["scipy (>=1.10.0)", "xarray (>=2022.12.0)"]
+consortium-standard = ["dataframe-api-compat (>=0.1.7)"]
+excel = ["odfpy (>=1.4.1)", "openpyxl (>=3.1.0)", "python-calamine (>=0.1.7)", "pyxlsb (>=1.0.10)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.5)"]
+feather = ["pyarrow (>=10.0.1)"]
+fss = ["fsspec (>=2022.11.0)"]
+gcp = ["gcsfs (>=2022.11.0)", "pandas-gbq (>=0.19.0)"]
+hdf5 = ["tables (>=3.8.0)"]
+html = ["beautifulsoup4 (>=4.11.2)", "html5lib (>=1.1)", "lxml (>=4.9.2)"]
+mysql = ["SQLAlchemy (>=2.0.0)", "pymysql (>=1.0.2)"]
+output-formatting = ["jinja2 (>=3.1.2)", "tabulate (>=0.9.0)"]
+parquet = ["pyarrow (>=10.0.1)"]
+performance = ["bottleneck (>=1.3.6)", "numba (>=0.56.4)", "numexpr (>=2.8.4)"]
+plot = ["matplotlib (>=3.6.3)"]
+postgresql = ["SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "psycopg2 (>=2.9.6)"]
+pyarrow = ["pyarrow (>=10.0.1)"]
+spss = ["pyreadstat (>=1.2.0)"]
+sql-other = ["SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "adbc-driver-sqlite (>=0.8.0)"]
+test = ["hypothesis (>=6.46.1)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)"]
+xml = ["lxml (>=4.9.2)"]
 
 [[package]]
 name = "parso"
@@ -2928,6 +2955,17 @@ files = [
     {file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"},
 ]
 
+[[package]]
+name = "tzdata"
+version = "2024.1"
+description = "Provider of IANA time zone data"
+optional = false
+python-versions = ">=2"
+files = [
+    {file = "tzdata-2024.1-py2.py3-none-any.whl", hash = "sha256:9068bc196136463f5245e51efda838afa15aaeca9903f49050dfa2679db4d252"},
+    {file = "tzdata-2024.1.tar.gz", hash = "sha256:2674120f8d891909751c38abcdfd386ac0a5a1127954fbc332af6b5ceae07efd"},
+]
+
 [[package]]
 name = "urllib3"
 version = "2.2.2"
@@ -3239,4 +3277,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"]
 [metadata]
 lock-version = "2.0"
 python-versions = "~3.10"
-content-hash = "6bcf5787c5c5ea9f571129e0b0bf8a96277ec9c9396ae13e085ae5e3bf8f3167"
+content-hash = "cdd4dc4f62d0aec436717db61682e1e92468a3ecf808a1e6eef25388229f0e6e"
diff --git a/pyproject.toml b/pyproject.toml
index 084b0da..de46239 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -18,7 +18,7 @@ fastapi = "^0.111.0"
 pydantic = "~1.10.2"
 uvicorn = "~0.15.0"
 numpy = "~1.22.4"
-pandas = "~1.3.5"
+pandas = "^2.0.0"
 scikit-learn = "~1.3.0"
 
 [tool.poetry.group.dev.dependencies]