From 02b074c035086f86af7fc1e3ae5b235d1dbd7639 Mon Sep 17 00:00:00 2001 From: Konrad Bachusz Date: Fri, 9 Feb 2024 16:41:23 +0000 Subject: [PATCH 01/13] feat(preprocessing): moved changes form Ethan's branch in old repo --- CHANGELOG.md | 5 + README.md | 2 +- main.tf | 21 +- .../models_template_notebook.ipynb | 1830 +++++------------ modules/s3/main.tf | 12 +- modules/s3/variables.tf | 7 +- modules/sagemaker/main.tf | 21 +- .../templates/startupscript.sh.tftpl | 6 + modules/sagemaker/variables.tf | 5 + variables.tf | 6 + 10 files changed, 621 insertions(+), 1294 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b366aa1..c4f1a00 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,9 @@ # AWS-MLOps-module +## [2.0.2] - 09/02/24 +* Added functionality for passing preprocessing script + +## [2.0.1] - 02/02/24 +* Updated retraining_schedule validation ## [2.0.0] - 21/12/23 **BREAKING CHANGES** diff --git a/README.md b/README.md index 252b5dc..fbc3683 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ This repo contains a terraform module with corresponding AWS resources that enab ## Example Usage - +#TODO update ``` module "MLOps" { source = "github.com/crederauk/terraform-aws-mlops-module?ref=" diff --git a/main.tf b/main.tf index 685f88e..dcc025e 100644 --- a/main.tf +++ b/main.tf @@ -1,8 +1,9 @@ module "s3" { source = "./modules/s3" - resource_naming_prefix = var.resource_naming_prefix - tags = var.tags + resource_naming_prefix = var.resource_naming_prefix + tags = var.tags + preprocessing_script_path = var.preprocessing_script_path } module "sagemaker" { @@ -26,15 +27,15 @@ module "sagemaker" { ecr_repo_uri = "${module.ecr.repository.repository_url}:latest" # S3 - config_s3_bucket = module.s3.config_bucket.id - config_bucket_key_arn = module.s3.encryption_key.arn - data_s3_bucket = var.data_s3_bucket - data_bucket_key_arn = var.data_s3_bucket_encryption_key_arn - data_location_s3 = var.data_location_s3 - model_s3_bucket = module.s3.model_bucket.id - model_bucket_key_arn = module.s3.encryption_key.arn + config_s3_bucket = module.s3.config_bucket.id + config_bucket_key_arn = module.s3.encryption_key.arn + data_s3_bucket = var.data_s3_bucket + data_bucket_key_arn = var.data_s3_bucket_encryption_key_arn + data_location_s3 = var.data_location_s3 + model_s3_bucket = module.s3.model_bucket.id + model_bucket_key_arn = module.s3.encryption_key.arn + preprocessing_script_path = var.preprocessing_script_path } - module "retraining_job" { count = var.retrain_model_bool ? 1 : 0 source = "./modules/glue" diff --git a/mlops_ml_models/models_template_notebook.ipynb b/mlops_ml_models/models_template_notebook.ipynb index e9d7e71..e5690dc 100644 --- a/mlops_ml_models/models_template_notebook.ipynb +++ b/mlops_ml_models/models_template_notebook.ipynb @@ -2,181 +2,300 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Requirement already satisfied: pycaret in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (3.2.0)\n", - "Requirement already satisfied: category-encoders>=2.4.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from pycaret) (2.6.3)\n", - "Requirement already satisfied: cloudpickle in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from pycaret) (2.2.1)\n", - "Requirement already satisfied: deprecation>=2.1.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from pycaret) (2.1.0)\n", - "Requirement already satisfied: imbalanced-learn>=0.8.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from pycaret) (0.11.0)\n", - "Requirement already satisfied: importlib-metadata>=4.12.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from pycaret) (6.8.0)\n", - "Requirement already satisfied: ipython>=5.5.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from pycaret) (8.16.1)\n", - "Requirement already satisfied: ipywidgets>=7.6.5 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from pycaret) (8.1.1)\n", - "Requirement already satisfied: jinja2>=1.2 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from pycaret) (3.1.2)\n", - "Requirement already satisfied: joblib>=1.2.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from pycaret) (1.3.2)\n", - "Requirement already satisfied: kaleido>=0.2.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from pycaret) (0.2.1)\n", - "Requirement already satisfied: lightgbm>=3.0.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from pycaret) (4.1.0)\n", - "Requirement already satisfied: markupsafe>=2.0.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from pycaret) (2.1.3)\n", - "Requirement already satisfied: matplotlib<=3.6,>=3.3.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from pycaret) (3.6.0)\n", - "Requirement already satisfied: nbformat>=4.2.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from pycaret) (5.9.2)\n", - "Requirement already satisfied: numba>=0.55.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from pycaret) (0.57.1)\n", - "Requirement already satisfied: numpy<1.27,>=1.21 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from pycaret) (1.22.4)\n", - "Requirement already satisfied: pandas<2.0.0,>=1.3.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from pycaret) (1.5.3)\n", - "Requirement already satisfied: plotly-resampler>=0.8.3.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from pycaret) (0.9.1)\n", - "Requirement already satisfied: plotly>=5.0.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from pycaret) (5.18.0)\n", - "Requirement already satisfied: pmdarima!=1.8.1,<3.0.0,>=1.8.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from pycaret) (2.0.4)\n", - "Requirement already satisfied: psutil>=5.9.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from pycaret) (5.9.5)\n", - "Requirement already satisfied: pyod>=1.0.8 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from pycaret) (1.1.2)\n", - "Requirement already satisfied: requests>=2.27.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from pycaret) (2.31.0)\n", - "Requirement already satisfied: schemdraw==0.15 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from pycaret) (0.15)\n", - "Requirement already satisfied: scikit-learn<1.3.0,>=1.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from pycaret) (1.2.2)\n", - "Requirement already satisfied: scikit-plot>=0.3.7 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from pycaret) (0.3.7)\n", - "Requirement already satisfied: scipy~=1.10.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from pycaret) (1.10.1)\n", - "Requirement already satisfied: sktime!=0.17.1,!=0.17.2,!=0.18.0,<0.22.0,>=0.16.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from pycaret) (0.21.1)\n", - "Requirement already satisfied: statsmodels>=0.12.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from pycaret) (0.14.0)\n", - "Requirement already satisfied: tbats>=1.1.3 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from pycaret) (1.1.3)\n", - "Requirement already satisfied: tqdm>=4.62.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from pycaret) (4.66.1)\n", - "Requirement already satisfied: xxhash in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from pycaret) (3.4.1)\n", - "Requirement already satisfied: yellowbrick>=1.4 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from pycaret) (1.5)\n", - "Requirement already satisfied: wurlitzer in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from pycaret) (3.0.3)\n", - "Requirement already satisfied: patsy>=0.5.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from category-encoders>=2.4.0->pycaret) (0.5.3)\n", - "Requirement already satisfied: packaging in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from deprecation>=2.1.0->pycaret) (21.3)\n", - "Requirement already satisfied: threadpoolctl>=2.0.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from imbalanced-learn>=0.8.1->pycaret) (3.2.0)\n", - "Requirement already satisfied: zipp>=0.5 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from importlib-metadata>=4.12.0->pycaret) (3.17.0)\n", - "Requirement already satisfied: backcall in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from ipython>=5.5.0->pycaret) (0.2.0)\n", - "Requirement already satisfied: decorator in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from ipython>=5.5.0->pycaret) (5.1.1)\n", - "Requirement already satisfied: jedi>=0.16 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from ipython>=5.5.0->pycaret) (0.18.2)\n", - "Requirement already satisfied: matplotlib-inline in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from ipython>=5.5.0->pycaret) (0.1.6)\n", - "Requirement already satisfied: pickleshare in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from ipython>=5.5.0->pycaret) (0.7.5)\n", - "Requirement already satisfied: prompt-toolkit!=3.0.37,<3.1.0,>=3.0.30 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from ipython>=5.5.0->pycaret) (3.0.39)\n", - "Requirement already satisfied: pygments>=2.4.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from ipython>=5.5.0->pycaret) (2.16.1)\n", - "Requirement already satisfied: stack-data in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from ipython>=5.5.0->pycaret) (0.6.2)\n", - "Requirement already satisfied: traitlets>=5 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from ipython>=5.5.0->pycaret) (5.12.0)\n", - "Requirement already satisfied: exceptiongroup in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from ipython>=5.5.0->pycaret) (1.1.3)\n", - "Requirement already satisfied: pexpect>4.3 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from ipython>=5.5.0->pycaret) (4.8.0)\n", - "Requirement already satisfied: comm>=0.1.3 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from ipywidgets>=7.6.5->pycaret) (0.1.4)\n", - "Requirement already satisfied: widgetsnbextension~=4.0.9 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from ipywidgets>=7.6.5->pycaret) (4.0.9)\n", - "Requirement already satisfied: jupyterlab-widgets~=3.0.9 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from ipywidgets>=7.6.5->pycaret) (3.0.9)\n", - "Requirement already satisfied: contourpy>=1.0.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from matplotlib<=3.6,>=3.3.0->pycaret) (1.1.1)\n", - "Requirement already satisfied: cycler>=0.10 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from matplotlib<=3.6,>=3.3.0->pycaret) (0.12.1)\n", - "Requirement already satisfied: fonttools>=4.22.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from matplotlib<=3.6,>=3.3.0->pycaret) (4.43.1)\n", - "Requirement already satisfied: kiwisolver>=1.0.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from matplotlib<=3.6,>=3.3.0->pycaret) (1.4.5)\n", - "Requirement already satisfied: pillow>=6.2.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from matplotlib<=3.6,>=3.3.0->pycaret) (10.0.1)\n", - "Requirement already satisfied: pyparsing>=2.2.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from matplotlib<=3.6,>=3.3.0->pycaret) (3.1.1)\n", - "Requirement already satisfied: python-dateutil>=2.7 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from matplotlib<=3.6,>=3.3.0->pycaret) (2.8.2)\n", - "Requirement already satisfied: fastjsonschema in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from nbformat>=4.2.0->pycaret) (2.18.1)\n", - "Requirement already satisfied: jsonschema>=2.6 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from nbformat>=4.2.0->pycaret) (4.19.1)\n", - "Requirement already satisfied: jupyter-core in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from nbformat>=4.2.0->pycaret) (5.4.0)\n", - "Requirement already satisfied: llvmlite<0.41,>=0.40.0dev0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from numba>=0.55.0->pycaret) (0.40.1)\n", - "Requirement already satisfied: pytz>=2020.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from pandas<2.0.0,>=1.3.0->pycaret) (2023.3.post1)\n", - "Requirement already satisfied: tenacity>=6.2.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from plotly>=5.0.0->pycaret) (8.2.3)\n", - "Requirement already satisfied: dash<3.0.0,>=2.11.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from plotly-resampler>=0.8.3.1->pycaret) (2.14.2)\n", - "Requirement already satisfied: orjson<4.0.0,>=3.8.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from plotly-resampler>=0.8.3.1->pycaret) (3.9.10)\n", - "Requirement already satisfied: trace-updater>=0.0.8 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from plotly-resampler>=0.8.3.1->pycaret) (0.0.9.1)\n", - "Requirement already satisfied: tsdownsample==0.1.2 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from plotly-resampler>=0.8.3.1->pycaret) (0.1.2)\n", - "Requirement already satisfied: Cython!=0.29.18,!=0.29.31,>=0.29 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from pmdarima!=1.8.1,<3.0.0,>=1.8.0->pycaret) (3.0.4)\n", - "Requirement already satisfied: urllib3 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from pmdarima!=1.8.1,<3.0.0,>=1.8.0->pycaret) (1.26.18)\n", - "Requirement already satisfied: setuptools!=50.0.0,>=38.6.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from pmdarima!=1.8.1,<3.0.0,>=1.8.0->pycaret) (68.2.2)\n", - "Requirement already satisfied: six in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from pyod>=1.0.8->pycaret) (1.16.0)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from requests>=2.27.1->pycaret) (3.3.1)\n", - "Requirement already satisfied: idna<4,>=2.5 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from requests>=2.27.1->pycaret) (3.4)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from requests>=2.27.1->pycaret) (2023.7.22)\n", - "Requirement already satisfied: deprecated>=1.2.13 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from sktime!=0.17.1,!=0.17.2,!=0.18.0,<0.22.0,>=0.16.1->pycaret) (1.2.14)\n", - "Requirement already satisfied: scikit-base<0.6.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from sktime!=0.17.1,!=0.17.2,!=0.18.0,<0.22.0,>=0.16.1->pycaret) (0.5.2)\n", - "Requirement already satisfied: Flask<3.1,>=1.0.4 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from dash<3.0.0,>=2.11.0->plotly-resampler>=0.8.3.1->pycaret) (3.0.0)\n", - "Requirement already satisfied: Werkzeug<3.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from dash<3.0.0,>=2.11.0->plotly-resampler>=0.8.3.1->pycaret) (3.0.1)\n", - "Requirement already satisfied: dash-html-components==2.0.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from dash<3.0.0,>=2.11.0->plotly-resampler>=0.8.3.1->pycaret) (2.0.0)\n", - "Requirement already satisfied: dash-core-components==2.0.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from dash<3.0.0,>=2.11.0->plotly-resampler>=0.8.3.1->pycaret) (2.0.0)\n", - "Requirement already satisfied: dash-table==5.0.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from dash<3.0.0,>=2.11.0->plotly-resampler>=0.8.3.1->pycaret) (5.0.0)\n", - "Requirement already satisfied: typing-extensions>=4.1.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from dash<3.0.0,>=2.11.0->plotly-resampler>=0.8.3.1->pycaret) (4.8.0)\n", - "Requirement already satisfied: retrying in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from dash<3.0.0,>=2.11.0->plotly-resampler>=0.8.3.1->pycaret) (1.3.4)\n", - "Requirement already satisfied: ansi2html in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from dash<3.0.0,>=2.11.0->plotly-resampler>=0.8.3.1->pycaret) (1.9.1)\n", - "Requirement already satisfied: nest-asyncio in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from dash<3.0.0,>=2.11.0->plotly-resampler>=0.8.3.1->pycaret) (1.5.8)\n", - "Requirement already satisfied: wrapt<2,>=1.10 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from deprecated>=1.2.13->sktime!=0.17.1,!=0.17.2,!=0.18.0,<0.22.0,>=0.16.1->pycaret) (1.15.0)\n", - "Requirement already satisfied: parso<0.9.0,>=0.8.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from jedi>=0.16->ipython>=5.5.0->pycaret) (0.8.3)\n", - "Requirement already satisfied: attrs>=22.2.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from jsonschema>=2.6->nbformat>=4.2.0->pycaret) (23.1.0)\n", - "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from jsonschema>=2.6->nbformat>=4.2.0->pycaret) (2023.7.1)\n", - "Requirement already satisfied: referencing>=0.28.4 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from jsonschema>=2.6->nbformat>=4.2.0->pycaret) (0.30.2)\n", - "Requirement already satisfied: rpds-py>=0.7.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from jsonschema>=2.6->nbformat>=4.2.0->pycaret) (0.10.6)\n", - "Requirement already satisfied: ptyprocess>=0.5 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from pexpect>4.3->ipython>=5.5.0->pycaret) (0.7.0)\n", - "Requirement already satisfied: wcwidth in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from prompt-toolkit!=3.0.37,<3.1.0,>=3.0.30->ipython>=5.5.0->pycaret) (0.2.8)\n", - "Requirement already satisfied: platformdirs>=2.5 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from jupyter-core->nbformat>=4.2.0->pycaret) (3.11.0)\n", - "Requirement already satisfied: executing>=1.2.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from stack-data->ipython>=5.5.0->pycaret) (1.2.0)\n", - "Requirement already satisfied: asttokens>=2.1.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from stack-data->ipython>=5.5.0->pycaret) (2.4.1)\n", - "Requirement already satisfied: pure-eval in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from stack-data->ipython>=5.5.0->pycaret) (0.2.2)\n", - "Requirement already satisfied: itsdangerous>=2.1.2 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from Flask<3.1,>=1.0.4->dash<3.0.0,>=2.11.0->plotly-resampler>=0.8.3.1->pycaret) (2.1.2)\n", - "Requirement already satisfied: click>=8.1.3 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from Flask<3.1,>=1.0.4->dash<3.0.0,>=2.11.0->plotly-resampler>=0.8.3.1->pycaret) (8.1.7)\n", - "Requirement already satisfied: blinker>=1.6.2 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from Flask<3.1,>=1.0.4->dash<3.0.0,>=2.11.0->plotly-resampler>=0.8.3.1->pycaret) (1.6.3)\n", - "Requirement already satisfied: python-dotenv in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (1.0.0)\n", - "Requirement already satisfied: ydata-profiling in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (4.6.3)\n", - "Requirement already satisfied: scipy<1.12,>=1.4.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from ydata-profiling) (1.10.1)\n", - "Requirement already satisfied: pandas!=1.4.0,<3,>1.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from ydata-profiling) (1.5.3)\n", - "Requirement already satisfied: matplotlib<3.9,>=3.2 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from ydata-profiling) (3.6.0)\n", - "Requirement already satisfied: pydantic>=2 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from ydata-profiling) (2.5.2)\n", - "Requirement already satisfied: PyYAML<6.1,>=5.0.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from ydata-profiling) (6.0.1)\n", - "Requirement already satisfied: jinja2<3.2,>=2.11.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from ydata-profiling) (3.1.2)\n", - "Requirement already satisfied: visions==0.7.5 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from visions[type_image_path]==0.7.5->ydata-profiling) (0.7.5)\n", - "Requirement already satisfied: numpy<1.26,>=1.16.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from ydata-profiling) (1.22.4)\n", - "Requirement already satisfied: htmlmin==0.1.12 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from ydata-profiling) (0.1.12)\n", - "Requirement already satisfied: phik<0.13,>=0.11.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from ydata-profiling) (0.12.3)\n", - "Requirement already satisfied: requests<3,>=2.24.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from ydata-profiling) (2.31.0)\n", - "Requirement already satisfied: tqdm<5,>=4.48.2 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from ydata-profiling) (4.66.1)\n", - "Requirement already satisfied: seaborn<0.13,>=0.10.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from ydata-profiling) (0.12.2)\n", - "Requirement already satisfied: multimethod<2,>=1.4 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from ydata-profiling) (1.10)\n", - "Requirement already satisfied: statsmodels<1,>=0.13.2 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from ydata-profiling) (0.14.0)\n", - "Requirement already satisfied: typeguard<5,>=4.1.2 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from ydata-profiling) (4.1.5)\n", - "Requirement already satisfied: imagehash==4.3.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from ydata-profiling) (4.3.1)\n", - "Requirement already satisfied: wordcloud>=1.9.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from ydata-profiling) (1.9.3)\n", - "Requirement already satisfied: dacite>=1.8 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from ydata-profiling) (1.8.1)\n", - "Requirement already satisfied: numba<0.59.0,>=0.56.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from ydata-profiling) (0.57.1)\n", - "Requirement already satisfied: PyWavelets in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from imagehash==4.3.1->ydata-profiling) (1.4.1)\n", - "Requirement already satisfied: pillow in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from imagehash==4.3.1->ydata-profiling) (10.0.1)\n", - "Requirement already satisfied: attrs>=19.3.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from visions==0.7.5->visions[type_image_path]==0.7.5->ydata-profiling) (23.1.0)\n", - "Requirement already satisfied: networkx>=2.4 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from visions==0.7.5->visions[type_image_path]==0.7.5->ydata-profiling) (3.2)\n", - "Requirement already satisfied: tangled-up-in-unicode>=0.0.4 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from visions==0.7.5->visions[type_image_path]==0.7.5->ydata-profiling) (0.2.0)\n", - "\u001b[33mWARNING: visions 0.7.5 does not provide the extra 'type-image-path'\u001b[0m\u001b[33m\n", - "\u001b[0mRequirement already satisfied: MarkupSafe>=2.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from jinja2<3.2,>=2.11.1->ydata-profiling) (2.1.3)\n", - "Requirement already satisfied: contourpy>=1.0.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from matplotlib<3.9,>=3.2->ydata-profiling) (1.1.1)\n", - "Requirement already satisfied: cycler>=0.10 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from matplotlib<3.9,>=3.2->ydata-profiling) (0.12.1)\n", - "Requirement already satisfied: fonttools>=4.22.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from matplotlib<3.9,>=3.2->ydata-profiling) (4.43.1)\n", - "Requirement already satisfied: kiwisolver>=1.0.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from matplotlib<3.9,>=3.2->ydata-profiling) (1.4.5)\n", - "Requirement already satisfied: packaging>=20.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from matplotlib<3.9,>=3.2->ydata-profiling) (21.3)\n", - "Requirement already satisfied: pyparsing>=2.2.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from matplotlib<3.9,>=3.2->ydata-profiling) (3.1.1)\n", - "Requirement already satisfied: python-dateutil>=2.7 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from matplotlib<3.9,>=3.2->ydata-profiling) (2.8.2)\n", - "Requirement already satisfied: llvmlite<0.41,>=0.40.0dev0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from numba<0.59.0,>=0.56.0->ydata-profiling) (0.40.1)\n", - "Requirement already satisfied: pytz>=2020.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from pandas!=1.4.0,<3,>1.1->ydata-profiling) (2023.3.post1)\n", - "Requirement already satisfied: joblib>=0.14.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from phik<0.13,>=0.11.1->ydata-profiling) (1.3.2)\n", - "Requirement already satisfied: annotated-types>=0.4.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from pydantic>=2->ydata-profiling) (0.6.0)\n", - "Requirement already satisfied: pydantic-core==2.14.5 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from pydantic>=2->ydata-profiling) (2.14.5)\n", - "Requirement already satisfied: typing-extensions>=4.6.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from pydantic>=2->ydata-profiling) (4.8.0)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from requests<3,>=2.24.0->ydata-profiling) (3.3.1)\n", - "Requirement already satisfied: idna<4,>=2.5 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from requests<3,>=2.24.0->ydata-profiling) (3.4)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from requests<3,>=2.24.0->ydata-profiling) (1.26.18)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from requests<3,>=2.24.0->ydata-profiling) (2023.7.22)\n", - "Requirement already satisfied: patsy>=0.5.2 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from statsmodels<1,>=0.13.2->ydata-profiling) (0.5.3)\n", - "Requirement already satisfied: six in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from patsy>=0.5.2->statsmodels<1,>=0.13.2->ydata-profiling) (1.16.0)\n", - "Requirement already satisfied: shap in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (0.43.0)\n", - "Requirement already satisfied: numpy in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from shap) (1.22.4)\n", - "Requirement already satisfied: scipy in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from shap) (1.10.1)\n", - "Requirement already satisfied: scikit-learn in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from shap) (1.2.2)\n", - "Requirement already satisfied: pandas in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from shap) (1.5.3)\n", - "Requirement already satisfied: tqdm>=4.27.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from shap) (4.66.1)\n", - "Requirement already satisfied: packaging>20.9 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from shap) (21.3)\n", - "Requirement already satisfied: slicer==0.0.7 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from shap) (0.0.7)\n", - "Requirement already satisfied: numba in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from shap) (0.57.1)\n", - "Requirement already satisfied: cloudpickle in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from shap) (2.2.1)\n", - "Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from packaging>20.9->shap) (3.1.1)\n", - "Requirement already satisfied: llvmlite<0.41,>=0.40.0dev0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from numba->shap) (0.40.1)\n", - "Requirement already satisfied: python-dateutil>=2.8.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from pandas->shap) (2.8.2)\n", - "Requirement already satisfied: pytz>=2020.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from pandas->shap) (2023.3.post1)\n", - "Requirement already satisfied: joblib>=1.1.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from scikit-learn->shap) (1.3.2)\n", - "Requirement already satisfied: threadpoolctl>=2.0.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from scikit-learn->shap) (3.2.0)\n", - "Requirement already satisfied: six>=1.5 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from python-dateutil>=2.8.1->pandas->shap) (1.16.0)\n" + "\u001b[33mWARNING: Skipping /opt/homebrew/lib/python3.11/site-packages/urllib3-2.1.0.dist-info due to invalid metadata entry 'name'\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33mWARNING: Skipping /opt/homebrew/lib/python3.11/site-packages/charset_normalizer-3.3.2.dist-info due to invalid metadata entry 'name'\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33mWARNING: Skipping /opt/homebrew/lib/python3.11/site-packages/urllib3-2.1.0.dist-info due to invalid metadata entry 'name'\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33mWARNING: Skipping /opt/homebrew/lib/python3.11/site-packages/charset_normalizer-3.3.2.dist-info due to invalid metadata entry 'name'\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33mWARNING: Skipping /opt/homebrew/lib/python3.11/site-packages/idna-3.6.dist-info due to invalid metadata entry 'name'\u001b[0m\u001b[33m\n", + "\u001b[0mRequirement already satisfied: pycaret in /opt/homebrew/lib/python3.11/site-packages (3.2.0)\n", + "Requirement already satisfied: category-encoders>=2.4.0 in /opt/homebrew/lib/python3.11/site-packages (from pycaret) (2.6.3)\n", + "Requirement already satisfied: cloudpickle in /opt/homebrew/lib/python3.11/site-packages (from pycaret) (3.0.0)\n", + "Requirement already satisfied: deprecation>=2.1.0 in /opt/homebrew/lib/python3.11/site-packages (from pycaret) (2.1.0)\n", + "Requirement already satisfied: imbalanced-learn>=0.8.1 in /opt/homebrew/lib/python3.11/site-packages (from pycaret) (0.11.0)\n", + "Requirement already satisfied: importlib-metadata>=4.12.0 in /opt/homebrew/lib/python3.11/site-packages (from pycaret) (6.8.0)\n", + "Requirement already satisfied: ipython>=5.5.0 in /opt/homebrew/lib/python3.11/site-packages (from pycaret) (8.20.0)\n", + "Requirement already satisfied: ipywidgets>=7.6.5 in /opt/homebrew/lib/python3.11/site-packages (from pycaret) (8.1.1)\n", + "Requirement already satisfied: jinja2>=1.2 in /opt/homebrew/lib/python3.11/site-packages (from pycaret) (3.1.2)\n", + "Requirement already satisfied: joblib>=1.2.0 in /opt/homebrew/lib/python3.11/site-packages (from pycaret) (1.3.2)\n", + "Requirement already satisfied: kaleido>=0.2.1 in /opt/homebrew/lib/python3.11/site-packages (from pycaret) (0.2.1)\n", + "Requirement already satisfied: lightgbm>=3.0.0 in /opt/homebrew/lib/python3.11/site-packages (from pycaret) (4.2.0)\n", + "Requirement already satisfied: markupsafe>=2.0.1 in /opt/homebrew/lib/python3.11/site-packages (from pycaret) (2.1.3)\n", + "Requirement already satisfied: matplotlib<=3.6,>=3.3.0 in /opt/homebrew/lib/python3.11/site-packages (from pycaret) (3.6.0)\n", + "Requirement already satisfied: nbformat>=4.2.0 in /opt/homebrew/lib/python3.11/site-packages (from pycaret) (5.9.2)\n", + "Requirement already satisfied: numba>=0.55.0 in /opt/homebrew/lib/python3.11/site-packages (from pycaret) (0.58.1)\n", + "Requirement already satisfied: numpy<1.27,>=1.21 in /opt/homebrew/lib/python3.11/site-packages (from pycaret) (1.25.2)\n", + "Requirement already satisfied: pandas<2.0.0,>=1.3.0 in /opt/homebrew/lib/python3.11/site-packages (from pycaret) (1.5.3)\n", + "Requirement already satisfied: plotly-resampler>=0.8.3.1 in /opt/homebrew/lib/python3.11/site-packages (from pycaret) (0.9.2)\n", + "Requirement already satisfied: plotly>=5.0.0 in /opt/homebrew/lib/python3.11/site-packages (from pycaret) (5.18.0)\n", + "Requirement already satisfied: pmdarima!=1.8.1,<3.0.0,>=1.8.0 in /opt/homebrew/lib/python3.11/site-packages (from pycaret) (2.0.4)\n", + "Requirement already satisfied: psutil>=5.9.0 in /opt/homebrew/lib/python3.11/site-packages (from pycaret) (5.9.6)\n", + "Requirement already satisfied: pyod>=1.0.8 in /opt/homebrew/lib/python3.11/site-packages (from pycaret) (1.1.2)\n", + "Requirement already satisfied: requests>=2.27.1 in /opt/homebrew/lib/python3.11/site-packages (from pycaret) (2.31.0)\n", + "Requirement already satisfied: schemdraw==0.15 in /opt/homebrew/lib/python3.11/site-packages (from pycaret) (0.15)\n", + "Requirement already satisfied: scikit-learn<1.3.0,>=1.0 in /opt/homebrew/lib/python3.11/site-packages (from pycaret) (1.2.2)\n", + "Requirement already satisfied: scikit-plot>=0.3.7 in /opt/homebrew/lib/python3.11/site-packages (from pycaret) (0.3.7)\n", + "Requirement already satisfied: scipy~=1.10.1 in /opt/homebrew/lib/python3.11/site-packages (from pycaret) (1.10.1)\n", + "Requirement already satisfied: sktime!=0.17.1,!=0.17.2,!=0.18.0,<0.22.0,>=0.16.1 in /opt/homebrew/lib/python3.11/site-packages (from pycaret) (0.21.1)\n", + "Requirement already satisfied: statsmodels>=0.12.1 in /opt/homebrew/lib/python3.11/site-packages (from pycaret) (0.14.1)\n", + "Requirement already satisfied: tbats>=1.1.3 in /opt/homebrew/lib/python3.11/site-packages (from pycaret) (1.1.3)\n", + "Requirement already satisfied: tqdm>=4.62.0 in /opt/homebrew/lib/python3.11/site-packages (from pycaret) (4.66.1)\n", + "Requirement already satisfied: xxhash in /opt/homebrew/lib/python3.11/site-packages (from pycaret) (3.4.1)\n", + "Requirement already satisfied: yellowbrick>=1.4 in /opt/homebrew/lib/python3.11/site-packages (from pycaret) (1.5)\n", + "Requirement already satisfied: wurlitzer in /opt/homebrew/lib/python3.11/site-packages (from pycaret) (3.0.3)\n", + "Requirement already satisfied: patsy>=0.5.1 in /opt/homebrew/lib/python3.11/site-packages (from category-encoders>=2.4.0->pycaret) (0.5.6)\n", + "Requirement already satisfied: packaging in /opt/homebrew/lib/python3.11/site-packages (from deprecation>=2.1.0->pycaret) (23.2)\n", + "Requirement already satisfied: threadpoolctl>=2.0.0 in /opt/homebrew/lib/python3.11/site-packages (from imbalanced-learn>=0.8.1->pycaret) (3.2.0)\n", + "Requirement already satisfied: zipp>=0.5 in /opt/homebrew/lib/python3.11/site-packages (from importlib-metadata>=4.12.0->pycaret) (3.17.0)\n", + "Requirement already satisfied: decorator in /opt/homebrew/lib/python3.11/site-packages (from ipython>=5.5.0->pycaret) (5.1.1)\n", + "Requirement already satisfied: jedi>=0.16 in /opt/homebrew/lib/python3.11/site-packages (from ipython>=5.5.0->pycaret) (0.19.1)\n", + "Requirement already satisfied: matplotlib-inline in /opt/homebrew/lib/python3.11/site-packages (from ipython>=5.5.0->pycaret) (0.1.6)\n", + "Requirement already satisfied: prompt-toolkit<3.1.0,>=3.0.41 in /opt/homebrew/lib/python3.11/site-packages (from ipython>=5.5.0->pycaret) (3.0.43)\n", + "Requirement already satisfied: pygments>=2.4.0 in /opt/homebrew/lib/python3.11/site-packages (from ipython>=5.5.0->pycaret) (2.17.2)\n", + "Requirement already satisfied: stack-data in /opt/homebrew/lib/python3.11/site-packages (from ipython>=5.5.0->pycaret) (0.6.3)\n", + "Requirement already satisfied: traitlets>=5 in /opt/homebrew/lib/python3.11/site-packages (from ipython>=5.5.0->pycaret) (5.14.1)\n", + "Requirement already satisfied: pexpect>4.3 in /opt/homebrew/lib/python3.11/site-packages (from ipython>=5.5.0->pycaret) (4.9.0)\n", + "Requirement already satisfied: comm>=0.1.3 in /opt/homebrew/lib/python3.11/site-packages (from ipywidgets>=7.6.5->pycaret) (0.2.1)\n", + "Requirement already satisfied: widgetsnbextension~=4.0.9 in /opt/homebrew/lib/python3.11/site-packages (from ipywidgets>=7.6.5->pycaret) (4.0.9)\n", + "Requirement already satisfied: jupyterlab-widgets~=3.0.9 in /opt/homebrew/lib/python3.11/site-packages (from ipywidgets>=7.6.5->pycaret) (3.0.9)\n", + "Requirement already satisfied: contourpy>=1.0.1 in /opt/homebrew/lib/python3.11/site-packages (from matplotlib<=3.6,>=3.3.0->pycaret) (1.1.1)\n", + "Requirement already satisfied: cycler>=0.10 in /opt/homebrew/lib/python3.11/site-packages (from matplotlib<=3.6,>=3.3.0->pycaret) (0.12.0)\n", + "Requirement already satisfied: fonttools>=4.22.0 in /opt/homebrew/lib/python3.11/site-packages (from matplotlib<=3.6,>=3.3.0->pycaret) (4.43.1)\n", + "Requirement already satisfied: kiwisolver>=1.0.1 in /opt/homebrew/lib/python3.11/site-packages (from matplotlib<=3.6,>=3.3.0->pycaret) (1.4.5)\n", + "Requirement already satisfied: pillow>=6.2.0 in /opt/homebrew/lib/python3.11/site-packages (from matplotlib<=3.6,>=3.3.0->pycaret) (10.0.1)\n", + "Requirement already satisfied: pyparsing>=2.2.1 in /opt/homebrew/lib/python3.11/site-packages (from matplotlib<=3.6,>=3.3.0->pycaret) (3.1.1)\n", + "Requirement already satisfied: python-dateutil>=2.7 in /opt/homebrew/lib/python3.11/site-packages (from matplotlib<=3.6,>=3.3.0->pycaret) (2.8.2)\n", + "Requirement already satisfied: fastjsonschema in /opt/homebrew/lib/python3.11/site-packages (from nbformat>=4.2.0->pycaret) (2.19.1)\n", + "Requirement already satisfied: jsonschema>=2.6 in /opt/homebrew/lib/python3.11/site-packages (from nbformat>=4.2.0->pycaret) (4.19.1)\n", + "Requirement already satisfied: jupyter-core in /opt/homebrew/lib/python3.11/site-packages (from nbformat>=4.2.0->pycaret) (5.7.1)\n", + "Requirement already satisfied: llvmlite<0.42,>=0.41.0dev0 in /opt/homebrew/lib/python3.11/site-packages (from numba>=0.55.0->pycaret) (0.41.1)\n", + "Requirement already satisfied: pytz>=2020.1 in /opt/homebrew/lib/python3.11/site-packages (from pandas<2.0.0,>=1.3.0->pycaret) (2023.3.post1)\n", + "Requirement already satisfied: tenacity>=6.2.0 in /opt/homebrew/lib/python3.11/site-packages (from plotly>=5.0.0->pycaret) (8.2.3)\n", + "Requirement already satisfied: dash>=2.9.0 in /opt/homebrew/lib/python3.11/site-packages (from plotly-resampler>=0.8.3.1->pycaret) (2.14.2)\n", + "Requirement already satisfied: orjson<4.0.0,>=3.8.0 in /opt/homebrew/lib/python3.11/site-packages (from plotly-resampler>=0.8.3.1->pycaret) (3.9.10)\n", + "Requirement already satisfied: tsdownsample==0.1.2 in /opt/homebrew/lib/python3.11/site-packages (from plotly-resampler>=0.8.3.1->pycaret) (0.1.2)\n", + "Requirement already satisfied: Cython!=0.29.18,!=0.29.31,>=0.29 in /opt/homebrew/lib/python3.11/site-packages (from pmdarima!=1.8.1,<3.0.0,>=1.8.0->pycaret) (3.0.8)\n", + "Requirement already satisfied: urllib3 in /Users/ethan.charlton-harrow/Library/Python/3.11/lib/python/site-packages (from pmdarima!=1.8.1,<3.0.0,>=1.8.0->pycaret) (1.26.18)\n", + "Requirement already satisfied: setuptools!=50.0.0,>=38.6.0 in /opt/homebrew/lib/python3.11/site-packages (from pmdarima!=1.8.1,<3.0.0,>=1.8.0->pycaret) (68.2.2)\n", + "Requirement already satisfied: six in /opt/homebrew/lib/python3.11/site-packages (from pyod>=1.0.8->pycaret) (1.16.0)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /opt/homebrew/lib/python3.11/site-packages (from requests>=2.27.1->pycaret) (3.3.0)\n", + "Requirement already satisfied: idna<4,>=2.5 in /opt/homebrew/lib/python3.11/site-packages (from requests>=2.27.1->pycaret) (3.4)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /opt/homebrew/lib/python3.11/site-packages (from requests>=2.27.1->pycaret) (2023.11.17)\n", + "Requirement already satisfied: deprecated>=1.2.13 in /opt/homebrew/lib/python3.11/site-packages (from sktime!=0.17.1,!=0.17.2,!=0.18.0,<0.22.0,>=0.16.1->pycaret) (1.2.14)\n", + "Requirement already satisfied: scikit-base<0.6.0 in /opt/homebrew/lib/python3.11/site-packages (from sktime!=0.17.1,!=0.17.2,!=0.18.0,<0.22.0,>=0.16.1->pycaret) (0.5.2)\n", + "Requirement already satisfied: Flask<3.1,>=1.0.4 in /opt/homebrew/lib/python3.11/site-packages (from dash>=2.9.0->plotly-resampler>=0.8.3.1->pycaret) (3.0.0)\n", + "Requirement already satisfied: Werkzeug<3.1 in /opt/homebrew/lib/python3.11/site-packages (from dash>=2.9.0->plotly-resampler>=0.8.3.1->pycaret) (3.0.1)\n", + "Requirement already satisfied: dash-html-components==2.0.0 in /opt/homebrew/lib/python3.11/site-packages (from dash>=2.9.0->plotly-resampler>=0.8.3.1->pycaret) (2.0.0)\n", + "Requirement already satisfied: dash-core-components==2.0.0 in /opt/homebrew/lib/python3.11/site-packages (from dash>=2.9.0->plotly-resampler>=0.8.3.1->pycaret) (2.0.0)\n", + "Requirement already satisfied: dash-table==5.0.0 in /opt/homebrew/lib/python3.11/site-packages (from dash>=2.9.0->plotly-resampler>=0.8.3.1->pycaret) (5.0.0)\n", + "Requirement already satisfied: typing-extensions>=4.1.1 in /opt/homebrew/lib/python3.11/site-packages (from dash>=2.9.0->plotly-resampler>=0.8.3.1->pycaret) (4.8.0)\n", + "Requirement already satisfied: retrying in /opt/homebrew/lib/python3.11/site-packages (from dash>=2.9.0->plotly-resampler>=0.8.3.1->pycaret) (1.3.4)\n", + "Requirement already satisfied: ansi2html in /opt/homebrew/lib/python3.11/site-packages (from dash>=2.9.0->plotly-resampler>=0.8.3.1->pycaret) (1.9.1)\n", + "Requirement already satisfied: nest-asyncio in /opt/homebrew/lib/python3.11/site-packages (from dash>=2.9.0->plotly-resampler>=0.8.3.1->pycaret) (1.5.9)\n", + "Requirement already satisfied: wrapt<2,>=1.10 in /opt/homebrew/lib/python3.11/site-packages (from deprecated>=1.2.13->sktime!=0.17.1,!=0.17.2,!=0.18.0,<0.22.0,>=0.16.1->pycaret) (1.16.0)\n", + "Requirement already satisfied: parso<0.9.0,>=0.8.3 in /opt/homebrew/lib/python3.11/site-packages (from jedi>=0.16->ipython>=5.5.0->pycaret) (0.8.3)\n", + "Requirement already satisfied: attrs>=22.2.0 in /opt/homebrew/lib/python3.11/site-packages (from jsonschema>=2.6->nbformat>=4.2.0->pycaret) (23.1.0)\n", + "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /opt/homebrew/lib/python3.11/site-packages (from jsonschema>=2.6->nbformat>=4.2.0->pycaret) (2023.7.1)\n", + "Requirement already satisfied: referencing>=0.28.4 in /opt/homebrew/lib/python3.11/site-packages (from jsonschema>=2.6->nbformat>=4.2.0->pycaret) (0.30.2)\n", + "Requirement already satisfied: rpds-py>=0.7.1 in /opt/homebrew/lib/python3.11/site-packages (from jsonschema>=2.6->nbformat>=4.2.0->pycaret) (0.10.6)\n", + "Requirement already satisfied: ptyprocess>=0.5 in /opt/homebrew/lib/python3.11/site-packages (from pexpect>4.3->ipython>=5.5.0->pycaret) (0.7.0)\n", + "Requirement already satisfied: wcwidth in /Users/ethan.charlton-harrow/Library/Python/3.11/lib/python/site-packages (from prompt-toolkit<3.1.0,>=3.0.41->ipython>=5.5.0->pycaret) (0.1.9)\n", + "Requirement already satisfied: platformdirs>=2.5 in /opt/homebrew/lib/python3.11/site-packages (from jupyter-core->nbformat>=4.2.0->pycaret) (4.1.0)\n", + "Requirement already satisfied: executing>=1.2.0 in /opt/homebrew/lib/python3.11/site-packages (from stack-data->ipython>=5.5.0->pycaret) (2.0.1)\n", + "Requirement already satisfied: asttokens>=2.1.0 in /opt/homebrew/lib/python3.11/site-packages (from stack-data->ipython>=5.5.0->pycaret) (2.4.1)\n", + "Requirement already satisfied: pure-eval in /opt/homebrew/lib/python3.11/site-packages (from stack-data->ipython>=5.5.0->pycaret) (0.2.2)\n", + "Requirement already satisfied: itsdangerous>=2.1.2 in /opt/homebrew/lib/python3.11/site-packages (from Flask<3.1,>=1.0.4->dash>=2.9.0->plotly-resampler>=0.8.3.1->pycaret) (2.1.2)\n", + "Requirement already satisfied: click>=8.1.3 in /opt/homebrew/lib/python3.11/site-packages (from Flask<3.1,>=1.0.4->dash>=2.9.0->plotly-resampler>=0.8.3.1->pycaret) (8.1.7)\n", + "Requirement already satisfied: blinker>=1.6.2 in /opt/homebrew/lib/python3.11/site-packages (from Flask<3.1,>=1.0.4->dash>=2.9.0->plotly-resampler>=0.8.3.1->pycaret) (1.6.3)\n", + "\u001b[33mWARNING: Skipping /opt/homebrew/lib/python3.11/site-packages/urllib3-2.1.0.dist-info due to invalid metadata entry 'name'\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33mWARNING: Skipping /opt/homebrew/lib/python3.11/site-packages/charset_normalizer-3.3.2.dist-info due to invalid metadata entry 'name'\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33mWARNING: Skipping /opt/homebrew/lib/python3.11/site-packages/idna-3.6.dist-info due to invalid metadata entry 'name'\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33mWARNING: Skipping /opt/homebrew/lib/python3.11/site-packages/urllib3-2.1.0.dist-info due to invalid metadata entry 'name'\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33mWARNING: Skipping /opt/homebrew/lib/python3.11/site-packages/charset_normalizer-3.3.2.dist-info due to invalid metadata entry 'name'\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33mWARNING: Skipping /opt/homebrew/lib/python3.11/site-packages/urllib3-2.1.0.dist-info due to invalid metadata entry 'name'\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33mWARNING: Skipping /opt/homebrew/lib/python3.11/site-packages/urllib3-2.1.0.dist-info due to invalid metadata entry 'name'\u001b[0m\u001b[33m\n", + "\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.3.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.3.2\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpython3.11 -m pip install --upgrade pip\u001b[0m\n", + "\u001b[33mWARNING: Skipping /opt/homebrew/lib/python3.11/site-packages/urllib3-2.1.0.dist-info due to invalid metadata entry 'name'\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33mWARNING: Skipping /opt/homebrew/lib/python3.11/site-packages/charset_normalizer-3.3.2.dist-info due to invalid metadata entry 'name'\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33mWARNING: Skipping /opt/homebrew/lib/python3.11/site-packages/urllib3-2.1.0.dist-info due to invalid metadata entry 'name'\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33mWARNING: Skipping /opt/homebrew/lib/python3.11/site-packages/charset_normalizer-3.3.2.dist-info due to invalid metadata entry 'name'\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33mWARNING: Skipping /opt/homebrew/lib/python3.11/site-packages/idna-3.6.dist-info due to invalid metadata entry 'name'\u001b[0m\u001b[33m\n", + "\u001b[0mCollecting python-dotenv\n", + " Downloading python_dotenv-1.0.0-py3-none-any.whl (19 kB)\n", + "\u001b[33mWARNING: Skipping /opt/homebrew/lib/python3.11/site-packages/urllib3-2.1.0.dist-info due to invalid metadata entry 'name'\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33mWARNING: Skipping /opt/homebrew/lib/python3.11/site-packages/charset_normalizer-3.3.2.dist-info due to invalid metadata entry 'name'\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33mWARNING: Skipping /opt/homebrew/lib/python3.11/site-packages/idna-3.6.dist-info due to invalid metadata entry 'name'\u001b[0m\u001b[33m\n", + "\u001b[0mInstalling collected packages: python-dotenv\n", + "\u001b[33mWARNING: Skipping /opt/homebrew/lib/python3.11/site-packages/urllib3-2.1.0.dist-info due to invalid metadata entry 'name'\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33mWARNING: Skipping /opt/homebrew/lib/python3.11/site-packages/charset_normalizer-3.3.2.dist-info due to invalid metadata entry 'name'\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33mWARNING: Skipping /opt/homebrew/lib/python3.11/site-packages/idna-3.6.dist-info due to invalid metadata entry 'name'\u001b[0m\u001b[33m\n", + "\u001b[0mSuccessfully installed python-dotenv-1.0.0\n", + "\u001b[33mWARNING: Skipping /opt/homebrew/lib/python3.11/site-packages/urllib3-2.1.0.dist-info due to invalid metadata entry 'name'\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33mWARNING: Skipping /opt/homebrew/lib/python3.11/site-packages/charset_normalizer-3.3.2.dist-info due to invalid metadata entry 'name'\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33mWARNING: Skipping /opt/homebrew/lib/python3.11/site-packages/urllib3-2.1.0.dist-info due to invalid metadata entry 'name'\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33mWARNING: Skipping /opt/homebrew/lib/python3.11/site-packages/urllib3-2.1.0.dist-info due to invalid metadata entry 'name'\u001b[0m\u001b[33m\n", + "\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.3.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.3.2\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpython3.11 -m pip install --upgrade pip\u001b[0m\n", + "\u001b[33mWARNING: Skipping /opt/homebrew/lib/python3.11/site-packages/urllib3-2.1.0.dist-info due to invalid metadata entry 'name'\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33mWARNING: Skipping /opt/homebrew/lib/python3.11/site-packages/charset_normalizer-3.3.2.dist-info due to invalid metadata entry 'name'\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33mWARNING: Skipping /opt/homebrew/lib/python3.11/site-packages/urllib3-2.1.0.dist-info due to invalid metadata entry 'name'\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33mWARNING: Skipping /opt/homebrew/lib/python3.11/site-packages/charset_normalizer-3.3.2.dist-info due to invalid metadata entry 'name'\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33mWARNING: Skipping /opt/homebrew/lib/python3.11/site-packages/idna-3.6.dist-info due to invalid metadata entry 'name'\u001b[0m\u001b[33m\n", + "\u001b[0mCollecting ydata-profiling\n", + " Downloading ydata_profiling-4.6.4-py2.py3-none-any.whl.metadata (20 kB)\n", + "Requirement already satisfied: scipy<1.12,>=1.4.1 in /opt/homebrew/lib/python3.11/site-packages (from ydata-profiling) (1.10.1)\n", + "Requirement already satisfied: pandas!=1.4.0,<3,>1.1 in /opt/homebrew/lib/python3.11/site-packages (from ydata-profiling) (1.5.3)\n", + "Requirement already satisfied: matplotlib<3.9,>=3.2 in /opt/homebrew/lib/python3.11/site-packages (from ydata-profiling) (3.6.0)\n", + "Requirement already satisfied: pydantic>=2 in /opt/homebrew/lib/python3.11/site-packages (from ydata-profiling) (2.4.2)\n", + "Requirement already satisfied: PyYAML<6.1,>=5.0.0 in /opt/homebrew/lib/python3.11/site-packages (from ydata-profiling) (6.0.1)\n", + "Requirement already satisfied: jinja2<3.2,>=2.11.1 in /opt/homebrew/lib/python3.11/site-packages (from ydata-profiling) (3.1.2)\n", + "Collecting visions==0.7.5 (from visions[type_image_path]==0.7.5->ydata-profiling)\n", + " Downloading visions-0.7.5-py3-none-any.whl (102 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m102.7/102.7 kB\u001b[0m \u001b[31m4.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: numpy<1.26,>=1.16.0 in /opt/homebrew/lib/python3.11/site-packages (from ydata-profiling) (1.25.2)\n", + "Requirement already satisfied: htmlmin==0.1.12 in /opt/homebrew/lib/python3.11/site-packages (from ydata-profiling) (0.1.12)\n", + "Requirement already satisfied: phik<0.13,>=0.11.1 in /opt/homebrew/lib/python3.11/site-packages (from ydata-profiling) (0.12.3)\n", + "Requirement already satisfied: requests<3,>=2.24.0 in /opt/homebrew/lib/python3.11/site-packages (from ydata-profiling) (2.31.0)\n", + "Requirement already satisfied: tqdm<5,>=4.48.2 in /opt/homebrew/lib/python3.11/site-packages (from ydata-profiling) (4.66.1)\n", + "Collecting seaborn<0.13,>=0.10.1 (from ydata-profiling)\n", + " Downloading seaborn-0.12.2-py3-none-any.whl (293 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m293.3/293.3 kB\u001b[0m \u001b[31m6.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: multimethod<2,>=1.4 in /opt/homebrew/lib/python3.11/site-packages (from ydata-profiling) (1.10)\n", + "Requirement already satisfied: statsmodels<1,>=0.13.2 in /opt/homebrew/lib/python3.11/site-packages (from ydata-profiling) (0.14.1)\n", + "Collecting typeguard<5,>=4.1.2 (from ydata-profiling)\n", + " Downloading typeguard-4.1.5-py3-none-any.whl.metadata (3.7 kB)\n", + "Requirement already satisfied: imagehash==4.3.1 in /opt/homebrew/lib/python3.11/site-packages (from ydata-profiling) (4.3.1)\n", + "Collecting wordcloud>=1.9.1 (from ydata-profiling)\n", + " Downloading wordcloud-1.9.3-cp311-cp311-macosx_11_0_arm64.whl.metadata (3.4 kB)\n", + "Collecting dacite>=1.8 (from ydata-profiling)\n", + " Downloading dacite-1.8.1-py3-none-any.whl.metadata (15 kB)\n", + "Requirement already satisfied: numba<0.59.0,>=0.56.0 in /opt/homebrew/lib/python3.11/site-packages (from ydata-profiling) (0.58.1)\n", + "Requirement already satisfied: PyWavelets in /opt/homebrew/lib/python3.11/site-packages (from imagehash==4.3.1->ydata-profiling) (1.4.1)\n", + "Requirement already satisfied: pillow in /opt/homebrew/lib/python3.11/site-packages (from imagehash==4.3.1->ydata-profiling) (10.0.1)\n", + "Requirement already satisfied: attrs>=19.3.0 in /opt/homebrew/lib/python3.11/site-packages (from visions==0.7.5->visions[type_image_path]==0.7.5->ydata-profiling) (23.1.0)\n", + "Requirement already satisfied: networkx>=2.4 in /opt/homebrew/lib/python3.11/site-packages (from visions==0.7.5->visions[type_image_path]==0.7.5->ydata-profiling) (3.1)\n", + "Requirement already satisfied: tangled-up-in-unicode>=0.0.4 in /opt/homebrew/lib/python3.11/site-packages (from visions==0.7.5->visions[type_image_path]==0.7.5->ydata-profiling) (0.2.0)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /opt/homebrew/lib/python3.11/site-packages (from jinja2<3.2,>=2.11.1->ydata-profiling) (2.1.3)\n", + "Requirement already satisfied: contourpy>=1.0.1 in /opt/homebrew/lib/python3.11/site-packages (from matplotlib<3.9,>=3.2->ydata-profiling) (1.1.1)\n", + "Requirement already satisfied: cycler>=0.10 in /opt/homebrew/lib/python3.11/site-packages (from matplotlib<3.9,>=3.2->ydata-profiling) (0.12.0)\n", + "Requirement already satisfied: fonttools>=4.22.0 in /opt/homebrew/lib/python3.11/site-packages (from matplotlib<3.9,>=3.2->ydata-profiling) (4.43.1)\n", + "Requirement already satisfied: kiwisolver>=1.0.1 in /opt/homebrew/lib/python3.11/site-packages (from matplotlib<3.9,>=3.2->ydata-profiling) (1.4.5)\n", + "Requirement already satisfied: packaging>=20.0 in /opt/homebrew/lib/python3.11/site-packages (from matplotlib<3.9,>=3.2->ydata-profiling) (23.2)\n", + "Requirement already satisfied: pyparsing>=2.2.1 in /opt/homebrew/lib/python3.11/site-packages (from matplotlib<3.9,>=3.2->ydata-profiling) (3.1.1)\n", + "Requirement already satisfied: python-dateutil>=2.7 in /opt/homebrew/lib/python3.11/site-packages (from matplotlib<3.9,>=3.2->ydata-profiling) (2.8.2)\n", + "Requirement already satisfied: llvmlite<0.42,>=0.41.0dev0 in /opt/homebrew/lib/python3.11/site-packages (from numba<0.59.0,>=0.56.0->ydata-profiling) (0.41.1)\n", + "Requirement already satisfied: pytz>=2020.1 in /opt/homebrew/lib/python3.11/site-packages (from pandas!=1.4.0,<3,>1.1->ydata-profiling) (2023.3.post1)\n", + "Requirement already satisfied: joblib>=0.14.1 in /opt/homebrew/lib/python3.11/site-packages (from phik<0.13,>=0.11.1->ydata-profiling) (1.3.2)\n", + "Requirement already satisfied: annotated-types>=0.4.0 in /opt/homebrew/lib/python3.11/site-packages (from pydantic>=2->ydata-profiling) (0.6.0)\n", + "Requirement already satisfied: pydantic-core==2.10.1 in /opt/homebrew/lib/python3.11/site-packages (from pydantic>=2->ydata-profiling) (2.10.1)\n", + "Requirement already satisfied: typing-extensions>=4.6.1 in /opt/homebrew/lib/python3.11/site-packages (from pydantic>=2->ydata-profiling) (4.8.0)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /opt/homebrew/lib/python3.11/site-packages (from requests<3,>=2.24.0->ydata-profiling) (3.3.0)\n", + "Requirement already satisfied: idna<4,>=2.5 in /opt/homebrew/lib/python3.11/site-packages (from requests<3,>=2.24.0->ydata-profiling) (3.4)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/ethan.charlton-harrow/Library/Python/3.11/lib/python/site-packages (from requests<3,>=2.24.0->ydata-profiling) (1.26.18)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /opt/homebrew/lib/python3.11/site-packages (from requests<3,>=2.24.0->ydata-profiling) (2023.11.17)\n", + "Requirement already satisfied: patsy>=0.5.4 in /opt/homebrew/lib/python3.11/site-packages (from statsmodels<1,>=0.13.2->ydata-profiling) (0.5.6)\n", + "Requirement already satisfied: six in /opt/homebrew/lib/python3.11/site-packages (from patsy>=0.5.4->statsmodels<1,>=0.13.2->ydata-profiling) (1.16.0)\n", + "Downloading ydata_profiling-4.6.4-py2.py3-none-any.whl (357 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m357.8/357.8 kB\u001b[0m \u001b[31m2.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n", + "\u001b[?25hDownloading dacite-1.8.1-py3-none-any.whl (14 kB)\n", + "Downloading typeguard-4.1.5-py3-none-any.whl (34 kB)\n", + "Downloading wordcloud-1.9.3-cp311-cp311-macosx_11_0_arm64.whl (168 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m168.3/168.3 kB\u001b[0m \u001b[31m2.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n", + "\u001b[?25h\u001b[33mWARNING: Skipping /opt/homebrew/lib/python3.11/site-packages/urllib3-2.1.0.dist-info due to invalid metadata entry 'name'\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33mWARNING: Skipping /opt/homebrew/lib/python3.11/site-packages/charset_normalizer-3.3.2.dist-info due to invalid metadata entry 'name'\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33mWARNING: Skipping /opt/homebrew/lib/python3.11/site-packages/idna-3.6.dist-info due to invalid metadata entry 'name'\u001b[0m\u001b[33m\n", + "\u001b[0mInstalling collected packages: typeguard, dacite, wordcloud, visions, seaborn, ydata-profiling\n", + " Attempting uninstall: visions\n", + "\u001b[33m WARNING: Skipping /opt/homebrew/lib/python3.11/site-packages/urllib3-2.1.0.dist-info due to invalid metadata entry 'name'\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33m WARNING: Skipping /opt/homebrew/lib/python3.11/site-packages/charset_normalizer-3.3.2.dist-info due to invalid metadata entry 'name'\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33m WARNING: Skipping /opt/homebrew/lib/python3.11/site-packages/idna-3.6.dist-info due to invalid metadata entry 'name'\u001b[0m\u001b[33m\n", + "\u001b[0m Found existing installation: visions 0.7.4\n", + " Uninstalling visions-0.7.4:\n", + " Successfully uninstalled visions-0.7.4\n", + " Attempting uninstall: seaborn\n", + "\u001b[33m WARNING: Skipping /opt/homebrew/lib/python3.11/site-packages/urllib3-2.1.0.dist-info due to invalid metadata entry 'name'\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33m WARNING: Skipping /opt/homebrew/lib/python3.11/site-packages/charset_normalizer-3.3.2.dist-info due to invalid metadata entry 'name'\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33m WARNING: Skipping /opt/homebrew/lib/python3.11/site-packages/idna-3.6.dist-info due to invalid metadata entry 'name'\u001b[0m\u001b[33m\n", + "\u001b[0m Found existing installation: seaborn 0.13.0\n", + " Uninstalling seaborn-0.13.0:\n", + " Successfully uninstalled seaborn-0.13.0\n", + "\u001b[33mWARNING: Skipping /opt/homebrew/lib/python3.11/site-packages/urllib3-2.1.0.dist-info due to invalid metadata entry 'name'\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33mWARNING: Skipping /opt/homebrew/lib/python3.11/site-packages/charset_normalizer-3.3.2.dist-info due to invalid metadata entry 'name'\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33mWARNING: Skipping /opt/homebrew/lib/python3.11/site-packages/idna-3.6.dist-info due to invalid metadata entry 'name'\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33mWARNING: Skipping /opt/homebrew/lib/python3.11/site-packages/urllib3-2.1.0.dist-info due to invalid metadata entry 'name'\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33mWARNING: Skipping /opt/homebrew/lib/python3.11/site-packages/charset_normalizer-3.3.2.dist-info due to invalid metadata entry 'name'\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33mWARNING: Skipping /opt/homebrew/lib/python3.11/site-packages/idna-3.6.dist-info due to invalid metadata entry 'name'\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33mWARNING: Skipping /opt/homebrew/lib/python3.11/site-packages/urllib3-2.1.0.dist-info due to invalid metadata entry 'name'\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33mWARNING: Skipping /opt/homebrew/lib/python3.11/site-packages/charset_normalizer-3.3.2.dist-info due to invalid metadata entry 'name'\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33mWARNING: Skipping /opt/homebrew/lib/python3.11/site-packages/idna-3.6.dist-info due to invalid metadata entry 'name'\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33mWARNING: Skipping /opt/homebrew/lib/python3.11/site-packages/urllib3-2.1.0.dist-info due to invalid metadata entry 'name'\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33mWARNING: Skipping /opt/homebrew/lib/python3.11/site-packages/charset_normalizer-3.3.2.dist-info due to invalid metadata entry 'name'\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33mWARNING: Skipping /opt/homebrew/lib/python3.11/site-packages/idna-3.6.dist-info due to invalid metadata entry 'name'\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33mWARNING: Skipping /opt/homebrew/lib/python3.11/site-packages/urllib3-2.1.0.dist-info due to invalid metadata entry 'name'\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33mWARNING: Skipping /opt/homebrew/lib/python3.11/site-packages/charset_normalizer-3.3.2.dist-info due to invalid metadata entry 'name'\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33mWARNING: Skipping /opt/homebrew/lib/python3.11/site-packages/idna-3.6.dist-info due to invalid metadata entry 'name'\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", + "pandas-profiling 3.2.0 requires joblib~=1.1.0, but you have joblib 1.3.2 which is incompatible.\n", + "pandas-profiling 3.2.0 requires visions[type_image_path]==0.7.4, but you have visions 0.7.5 which is incompatible.\u001b[0m\u001b[31m\n", + "\u001b[0mSuccessfully installed dacite-1.8.1 seaborn-0.12.2 typeguard-4.1.5 visions-0.7.5 wordcloud-1.9.3 ydata-profiling-4.6.4\n", + "\u001b[33mWARNING: Skipping /opt/homebrew/lib/python3.11/site-packages/urllib3-2.1.0.dist-info due to invalid metadata entry 'name'\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33mWARNING: Skipping /opt/homebrew/lib/python3.11/site-packages/charset_normalizer-3.3.2.dist-info due to invalid metadata entry 'name'\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33mWARNING: Skipping /opt/homebrew/lib/python3.11/site-packages/urllib3-2.1.0.dist-info due to invalid metadata entry 'name'\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33mWARNING: Skipping /opt/homebrew/lib/python3.11/site-packages/urllib3-2.1.0.dist-info due to invalid metadata entry 'name'\u001b[0m\u001b[33m\n", + "\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.3.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.3.2\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpython3.11 -m pip install --upgrade pip\u001b[0m\n", + "\u001b[33mWARNING: Skipping /opt/homebrew/lib/python3.11/site-packages/urllib3-2.1.0.dist-info due to invalid metadata entry 'name'\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33mWARNING: Skipping /opt/homebrew/lib/python3.11/site-packages/charset_normalizer-3.3.2.dist-info due to invalid metadata entry 'name'\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33mWARNING: Skipping /opt/homebrew/lib/python3.11/site-packages/urllib3-2.1.0.dist-info due to invalid metadata entry 'name'\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33mWARNING: Skipping /opt/homebrew/lib/python3.11/site-packages/charset_normalizer-3.3.2.dist-info due to invalid metadata entry 'name'\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33mWARNING: Skipping /opt/homebrew/lib/python3.11/site-packages/idna-3.6.dist-info due to invalid metadata entry 'name'\u001b[0m\u001b[33m\n", + "\u001b[0mCollecting shap\n", + " Downloading shap-0.44.0-cp311-cp311-macosx_11_0_arm64.whl.metadata (24 kB)\n", + "Requirement already satisfied: numpy in /opt/homebrew/lib/python3.11/site-packages (from shap) (1.25.2)\n", + "Requirement already satisfied: scipy in /opt/homebrew/lib/python3.11/site-packages (from shap) (1.10.1)\n", + "Requirement already satisfied: scikit-learn in /opt/homebrew/lib/python3.11/site-packages (from shap) (1.2.2)\n", + "Requirement already satisfied: pandas in /opt/homebrew/lib/python3.11/site-packages (from shap) (1.5.3)\n", + "Requirement already satisfied: tqdm>=4.27.0 in /opt/homebrew/lib/python3.11/site-packages (from shap) (4.66.1)\n", + "Requirement already satisfied: packaging>20.9 in /opt/homebrew/lib/python3.11/site-packages (from shap) (23.2)\n", + "Collecting slicer==0.0.7 (from shap)\n", + " Downloading slicer-0.0.7-py3-none-any.whl (14 kB)\n", + "Requirement already satisfied: numba in /opt/homebrew/lib/python3.11/site-packages (from shap) (0.58.1)\n", + "Requirement already satisfied: cloudpickle in /opt/homebrew/lib/python3.11/site-packages (from shap) (3.0.0)\n", + "Requirement already satisfied: llvmlite<0.42,>=0.41.0dev0 in /opt/homebrew/lib/python3.11/site-packages (from numba->shap) (0.41.1)\n", + "Requirement already satisfied: python-dateutil>=2.8.1 in /opt/homebrew/lib/python3.11/site-packages (from pandas->shap) (2.8.2)\n", + "Requirement already satisfied: pytz>=2020.1 in /opt/homebrew/lib/python3.11/site-packages (from pandas->shap) (2023.3.post1)\n", + "Requirement already satisfied: joblib>=1.1.1 in /opt/homebrew/lib/python3.11/site-packages (from scikit-learn->shap) (1.3.2)\n", + "Requirement already satisfied: threadpoolctl>=2.0.0 in /opt/homebrew/lib/python3.11/site-packages (from scikit-learn->shap) (3.2.0)\n", + "Requirement already satisfied: six>=1.5 in /opt/homebrew/lib/python3.11/site-packages (from python-dateutil>=2.8.1->pandas->shap) (1.16.0)\n", + "Downloading shap-0.44.0-cp311-cp311-macosx_11_0_arm64.whl (445 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m446.0/446.0 kB\u001b[0m \u001b[31m6.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", + "\u001b[?25h\u001b[33mWARNING: Skipping /opt/homebrew/lib/python3.11/site-packages/urllib3-2.1.0.dist-info due to invalid metadata entry 'name'\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33mWARNING: Skipping /opt/homebrew/lib/python3.11/site-packages/charset_normalizer-3.3.2.dist-info due to invalid metadata entry 'name'\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33mWARNING: Skipping /opt/homebrew/lib/python3.11/site-packages/idna-3.6.dist-info due to invalid metadata entry 'name'\u001b[0m\u001b[33m\n", + "\u001b[0mInstalling collected packages: slicer, shap\n", + "\u001b[33mWARNING: Skipping /opt/homebrew/lib/python3.11/site-packages/urllib3-2.1.0.dist-info due to invalid metadata entry 'name'\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33mWARNING: Skipping /opt/homebrew/lib/python3.11/site-packages/charset_normalizer-3.3.2.dist-info due to invalid metadata entry 'name'\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33mWARNING: Skipping /opt/homebrew/lib/python3.11/site-packages/urllib3-2.1.0.dist-info due to invalid metadata entry 'name'\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33mWARNING: Skipping /opt/homebrew/lib/python3.11/site-packages/charset_normalizer-3.3.2.dist-info due to invalid metadata entry 'name'\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33mWARNING: Skipping /opt/homebrew/lib/python3.11/site-packages/idna-3.6.dist-info due to invalid metadata entry 'name'\u001b[0m\u001b[33m\n", + "\u001b[0mSuccessfully installed shap-0.44.0 slicer-0.0.7\n", + "\u001b[33mWARNING: Skipping /opt/homebrew/lib/python3.11/site-packages/urllib3-2.1.0.dist-info due to invalid metadata entry 'name'\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33mWARNING: Skipping /opt/homebrew/lib/python3.11/site-packages/charset_normalizer-3.3.2.dist-info due to invalid metadata entry 'name'\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33mWARNING: Skipping /opt/homebrew/lib/python3.11/site-packages/urllib3-2.1.0.dist-info due to invalid metadata entry 'name'\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33mWARNING: Skipping /opt/homebrew/lib/python3.11/site-packages/urllib3-2.1.0.dist-info due to invalid metadata entry 'name'\u001b[0m\u001b[33m\n", + "\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.3.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.3.2\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpython3.11 -m pip install --upgrade pip\u001b[0m\n" ] } ], @@ -224,18 +343,9 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml\n", - "sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml\n" - ] - } - ], + "outputs": [], "source": [ "import os\n", "from sagemaker import get_execution_role\n", @@ -248,7 +358,8 @@ "from finalize_and_save_model import finalize_and_save_model\n", "from delete_sagemaker_endpoint import delete_sagemaker_endpoint\n", "from ydata_profiling import ProfileReport\n", - "import shap\n" + "import shap\n", + "import pandas as pd" ] }, { @@ -263,16 +374,14 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 22, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml\n", - "sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml\n", - "streaming-data-platform-ml-data/ethan_data.csv classification y classification-proba-endpoint banking-classification s3://streaming-data-platform-ml-data/ethan_data.csv ml.m4.xlarge 135544376709.dkr.ecr.eu-west-1.amazonaws.com/mlops-classification-repo:latest AUC\n" + "None None None None None s3://None None None None\n" ] } ], @@ -293,8 +402,10 @@ "inference_instance_count = int(os.getenv(\"inference_instance_count\"))\n", "image_uri = os.getenv(\"ecr_repo_uri\")\n", "tuning_metric = os.getenv(\"tuning_metric\")\n", + "#TODO change \n", + "os.environ['preprocessing_script_path'] = 'filename_path'\n", "\n", - "print(data_location_s3, algorithm_choice, target, endpoint_name, model_name, data_location, instance_type, image_uri, tuning_metric)\n" + "print(data_location_s3, algorithm_choice, target, endpoint_name, model_name, data_location, instance_type, image_uri, tuning_metric)" ] }, { @@ -306,21 +417,9 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "metadata": {}, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Your installed version of s3fs is very old and known to cause\n", - "severe performance issues, see also https://github.com/dask/dask/issues/10276\n", - "\n", - "To fix, you should specify a lower version bound on s3fs, or\n", - "update the current installation.\n", - "\n" - ] - }, { "data": { "text/html": [ @@ -342,86 +441,153 @@ " \n", " \n", " \n", + " Unnamed: 0\n", " age\n", " job\n", + " marital\n", " education\n", " default\n", " balance\n", " housing\n", " loan\n", + " contact\n", + " day\n", + " month\n", + " duration\n", + " campaign\n", + " pdays\n", + " previous\n", + " poutcome\n", " y\n", " \n", " \n", " \n", " \n", " 0\n", - " 32\n", - " 7\n", - " 2\n", - " 1\n", - " -238\n", - " 1\n", " 0\n", + " 58\n", + " management\n", + " married\n", + " tertiary\n", + " no\n", + " 2143\n", + " yes\n", + " no\n", + " unknown\n", + " 5\n", + " may\n", + " 261\n", + " 1\n", + " -1\n", " 0\n", + " unknown\n", + " no\n", " \n", " \n", " 1\n", - " 34\n", - " 4\n", - " 2\n", - " 0\n", - " -478\n", " 1\n", + " 44\n", + " technician\n", + " single\n", + " secondary\n", + " no\n", + " 29\n", + " yes\n", + " no\n", + " unknown\n", + " 5\n", + " may\n", + " 151\n", " 1\n", + " -1\n", " 0\n", + " unknown\n", + " no\n", " \n", " \n", " 2\n", - " 32\n", - " 3\n", " 2\n", - " 0\n", - " 266\n", + " 33\n", + " entrepreneur\n", + " married\n", + " secondary\n", + " no\n", + " 2\n", + " yes\n", + " yes\n", + " unknown\n", + " 5\n", + " may\n", + " 76\n", " 1\n", + " -1\n", " 0\n", - " 0\n", + " unknown\n", + " no\n", " \n", " \n", " 3\n", - " 36\n", - " 7\n", - " 2\n", - " 1\n", - " 13\n", - " 0\n", + " 3\n", + " 47\n", + " blue-collar\n", + " married\n", + " unknown\n", + " no\n", + " 1506\n", + " yes\n", + " no\n", + " unknown\n", + " 5\n", + " may\n", + " 92\n", " 1\n", + " -1\n", " 0\n", + " unknown\n", + " no\n", " \n", " \n", " 4\n", - " 23\n", - " 11\n", - " 2\n", - " 0\n", - " 486\n", - " 0\n", - " 0\n", + " 4\n", + " 33\n", + " unknown\n", + " single\n", + " unknown\n", + " no\n", + " 1\n", + " no\n", + " no\n", + " unknown\n", + " 5\n", + " may\n", + " 198\n", + " 1\n", + " -1\n", " 0\n", + " unknown\n", + " no\n", " \n", " \n", "\n", "" ], "text/plain": [ - " age job education default balance housing loan y\n", - "0 32 7 2 1 -238 1 0 0\n", - "1 34 4 2 0 -478 1 1 0\n", - "2 32 3 2 0 266 1 0 0\n", - "3 36 7 2 1 13 0 1 0\n", - "4 23 11 2 0 486 0 0 0" + " Unnamed: 0 age job marital education default balance housing \\\n", + "0 0 58 management married tertiary no 2143 yes \n", + "1 1 44 technician single secondary no 29 yes \n", + "2 2 33 entrepreneur married secondary no 2 yes \n", + "3 3 47 blue-collar married unknown no 1506 yes \n", + "4 4 33 unknown single unknown no 1 no \n", + "\n", + " loan contact day month duration campaign pdays previous poutcome y \n", + "0 no unknown 5 may 261 1 -1 0 unknown no \n", + "1 no unknown 5 may 151 1 -1 0 unknown no \n", + "2 yes unknown 5 may 76 1 -1 0 unknown no \n", + "3 no unknown 5 may 92 1 -1 0 unknown no \n", + "4 no unknown 5 may 198 1 -1 0 unknown no " ] }, - "execution_count": 4, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -441,64 +607,19 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 24, "metadata": {}, "outputs": [ { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "cd0eeb9e5943413eb2a07921ad25a2c2", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Summarize dataset: 0%| | 0/5 [00:00 1\u001b[0m profile \u001b[38;5;241m=\u001b[39m \u001b[43mProfileReport\u001b[49m(\n\u001b[1;32m 2\u001b[0m df,\n\u001b[1;32m 3\u001b[0m sort\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[1;32m 4\u001b[0m html\u001b[38;5;241m=\u001b[39m{\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mstyle\u001b[39m\u001b[38;5;124m\"\u001b[39m: {\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfull_width\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;28;01mTrue\u001b[39;00m}},\n\u001b[1;32m 5\u001b[0m title\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mData Exploration\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 6\u001b[0m explorative\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m,\n\u001b[1;32m 7\u001b[0m )\n\u001b[1;32m 8\u001b[0m profile\u001b[38;5;241m.\u001b[39mto_widgets()\n", + "\u001b[0;31mNameError\u001b[0m: name 'ProfileReport' is not defined" + ] } ], "source": [ @@ -512,68 +633,151 @@ "profile.to_widgets()\n" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "

6. Data Cleaning and Feature Engineering Placeholder

" + ] + }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - " age job education default balance housing loan y\n", - "0 46 7 1 0 1666 1 0 0\n", - "1 30 7 2 0 3532 1 0 0\n", - "2 37 2 3 0 2905 1 0 1\n", - "3 37 3 2 0 -797 1 0 1\n", - "4 92 8 4 0 775 0 0 1\n", - "... ... ... ... ... ... ... ... ..\n", - "44649 37 3 2 0 588 1 0 0\n", - "44650 41 4 2 0 239 1 0 0\n", - "44651 65 8 1 0 543 0 0 1\n", - "44652 50 2 2 0 1716 1 0 0\n", - "44653 40 2 3 0 0 0 0 1\n", + " Unnamed: 0 age education balance day month duration campaign \\\n", + "0 0 58 2 2143 5 8 261 1 \n", + "1 1 44 1 29 5 8 151 1 \n", + "2 2 33 1 2 5 8 76 1 \n", + "3 3 47 3 1506 5 8 92 1 \n", + "4 4 33 3 1 5 8 198 1 \n", + "... ... ... ... ... ... ... ... ... \n", + "79839 41749 33 1 1222 9 10 348 1 \n", + "79840 40301 30 1 444 16 6 195 2 \n", + "79841 41479 38 1 1844 7 11 245 1 \n", + "79842 31045 58 1 37 11 3 155 1 \n", + "79843 30566 58 2 3355 5 3 330 4 \n", "\n", - "[44654 rows x 8 columns] age job education default balance housing loan y\n", - "44654 35 7 4 0 2298 0 0 0\n", - "44655 31 5 2 0 132 0 0 0\n", - "44656 50 4 2 0 1375 0 0 1\n", - "44657 30 1 3 0 734 1 0 0\n", - "44658 36 4 2 0 1305 1 0 1\n", - "... ... ... ... ... ... ... ... ..\n", - "55813 42 2 3 0 -380 1 0 0\n", - "55814 18 11 1 0 608 0 0 1\n", - "55815 40 7 1 0 105 1 0 0\n", - "55816 31 2 2 0 4150 1 0 1\n", - "55817 35 2 2 0 910 0 0 0\n", + " pdays previous job_admin. job_blue-collar job_entrepreneur \\\n", + "0 -1 0 0 0 0 \n", + "1 -1 0 0 0 0 \n", + "2 -1 0 0 0 1 \n", + "3 -1 0 0 1 0 \n", + "4 -1 0 0 0 0 \n", + "... ... ... ... ... ... \n", + "79839 135 1 0 0 0 \n", + "79840 -1 0 0 0 0 \n", + "79841 62 0 0 0 0 \n", + "79842 -1 0 0 0 0 \n", + "79843 -1 0 0 0 0 \n", "\n", - "[11164 rows x 8 columns]\n" + " job_housemaid job_management job_retired job_self-employed \\\n", + "0 0 1 0 0 \n", + "1 0 0 0 0 \n", + "2 0 0 0 0 \n", + "3 0 0 0 0 \n", + "4 0 0 0 0 \n", + "... ... ... ... ... \n", + "79839 0 0 0 0 \n", + "79840 0 0 0 0 \n", + "79841 0 0 0 0 \n", + "79842 0 0 0 0 \n", + "79843 0 0 0 0 \n", + "\n", + " job_services job_student job_technician job_unemployed \\\n", + "0 0 0 0 0 \n", + "1 0 0 1 0 \n", + "2 0 0 0 0 \n", + "3 0 0 0 0 \n", + "4 0 0 0 0 \n", + "... ... ... ... ... \n", + "79839 0 0 0 0 \n", + "79840 0 0 0 0 \n", + "79841 0 0 0 0 \n", + "79842 0 0 0 0 \n", + "79843 0 0 0 0 \n", + "\n", + " marital_divorced marital_married marital_single contact_cellular \\\n", + "0 0 1 0 0 \n", + "1 0 0 1 0 \n", + "2 0 1 0 0 \n", + "3 0 1 0 0 \n", + "4 0 0 1 0 \n", + "... ... ... ... ... \n", + "79839 0 0 0 0 \n", + "79840 0 0 0 1 \n", + "79841 0 0 0 1 \n", + "79842 0 0 0 1 \n", + "79843 0 0 0 1 \n", + "\n", + " contact_telephone default_no default_yes housing_no housing_yes \\\n", + "0 0 1 0 0 1 \n", + "1 0 1 0 0 1 \n", + "2 0 1 0 0 1 \n", + "3 0 1 0 0 1 \n", + "4 0 1 0 1 0 \n", + "... ... ... ... ... ... \n", + "79839 0 1 0 1 0 \n", + "79840 0 1 0 0 0 \n", + "79841 0 1 0 1 0 \n", + "79842 0 1 0 1 0 \n", + "79843 0 1 0 1 0 \n", + "\n", + " loan_no loan_yes y \n", + "0 1 0 0 \n", + "1 1 0 0 \n", + "2 0 1 0 \n", + "3 1 0 0 \n", + "4 1 0 0 \n", + "... ... ... .. \n", + "79839 1 0 1 \n", + "79840 0 0 1 \n", + "79841 1 0 1 \n", + "79842 1 0 1 \n", + "79843 1 0 1 \n", + "\n", + "[79844 rows x 33 columns]\n" ] } ], "source": [ - "# Split and shuffle data\n", - "train_data, test_data = split_data(df, shuffle=True)\n", - "print(train_data, test_data)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "

6. Data Cleaning and Feature Engineering Placeholder

\n" + "#TODO tidy up\n", + "data_processing_path = os.getenv(\"data_processing\")\n", + "\n", + "if data_processing_path != \"None\":\n", + " try:\n", + " from preprocess_data import preprocess_data\n", + " df = preprocess_data(df)\n", + " if isinstance(df, pd.DataFrame):\n", + " print(\"The function returned a DataFrame.\")\n", + " else:\n", + " print(\"The function did not return a DataFrame.\")\n", + "\n", + " except ImportError:\n", + " print(\"file does not exist\")\n", + "\n", + "df.head()\n", + "\n" ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "# Split and shuffle data\n", + "train_data, test_data = split_data(df, shuffle=True)\n", + "print(train_data, test_data)" + ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -583,142 +787,9 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
 DescriptionValue
0Session id123
1Targety
2Target typeBinary
3Original data shape(44654, 8)
4Transformed data shape(44654, 8)
5Transformed train set shape(31257, 8)
6Transformed test set shape(13397, 8)
7Numeric features7
8PreprocessTrue
9Imputation typesimple
10Numeric imputationmean
11Categorical imputationmode
12Fold GeneratorStratifiedKFold
13Fold Number10
14CPU Jobs-1
15Use GPUFalse
16Log ExperimentFalse
17Experiment Nameclf-default-name
18USI5013
\n" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# Initialize data in PyCaret with all the defined parameters\n", "pycaret.setup(data=train_data, target=target, session_id=123)\n" @@ -736,248 +807,9 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
 ModelAccuracyAUCRecallPrec.F1KappaMCCTT (Sec)
rfRandom Forest Classifier0.79790.87680.82460.78280.80310.59580.59681.4370
dtDecision Tree Classifier0.78550.79040.81690.76850.79190.57090.57210.0650
etExtra Trees Classifier0.77290.86110.79770.76000.77830.54580.54651.1060
lightgbmLight Gradient Boosting Machine0.76320.84010.77910.75510.76680.52630.52670.7680
knnK Neighbors Classifier0.75640.84020.86390.71100.78000.51290.52530.1020
gbcGradient Boosting Classifier0.73920.81250.76010.72970.74450.47850.47901.0040
adaAda Boost Classifier0.72520.79540.75740.71160.73380.45040.45140.3740
ridgeRidge Classifier0.71740.00000.77200.69610.73200.43490.43760.0250
ldaLinear Discriminant Analysis0.71740.75770.77200.69610.73200.43490.43760.0390
lrLogistic Regression0.71340.74860.77500.69000.73000.42680.43020.8080
nbNaive Bayes0.69810.76040.80490.66330.72720.39630.40570.0250
qdaQuadratic Discriminant Analysis0.58740.77060.96720.54960.70090.17490.26870.0260
svmSVM - Linear Kernel0.53730.00000.55610.49820.45220.07460.08870.0920
dummyDummy Classifier0.50010.50000.00000.00000.00000.00000.00000.0200
\n" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "# Train and evaluate the performance of all estimators available in the model library using cross-validation.\n", "bestModel = pycaret.compare_models()\n" @@ -1003,223 +835,18 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
 AccuracyAUCRecallPrec.F1KappaMCC
Fold       
00.73060.80030.76630.71510.73980.46130.4625
10.71910.78660.76180.70170.73050.43830.4399
20.72710.80300.78060.70520.74100.45430.4569
30.71690.78870.76260.69870.72930.43380.4356
40.72780.80100.77670.70750.74050.45550.4577
50.72840.80760.78570.70490.74310.45680.4598
60.71940.79140.77740.69670.73480.43890.4419
70.71140.78190.77980.68580.72980.42270.4268
80.71970.79830.78040.69580.73570.43940.4427
90.70910.78080.76630.68750.72480.41830.4210
Mean0.72100.79400.77380.69990.73490.44190.4445
Std0.00700.00890.00820.00850.00580.01400.0136
\n" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Fitting 10 folds for each of 10 candidates, totalling 100 fits\n", - "Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).\n" - ] - } - ], + "outputs": [], "source": [ "bestModel = pycaret.tune_model(bestModel, n_iter = 10, optimize = tuning_metric)\n" ] }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,\n", - " criterion='gini', max_depth=None, max_features='sqrt',\n", - " max_leaf_nodes=None, max_samples=None,\n", - " min_impurity_decrease=0.0, min_samples_leaf=1,\n", - " min_samples_split=2, min_weight_fraction_leaf=0.0,\n", - " n_estimators=100, n_jobs=-1, oob_score=False,\n", - " random_state=123, verbose=0, warm_start=False)\n" - ] - } - ], + "outputs": [], "source": [ "# View the model's hyperparameter\n", "print(bestModel)\n" @@ -1236,27 +863,12 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "a80ac25ea3bb4a059f8d9e065ed7b973", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Pipeline Plot', 'pipelin…" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "# Evaluate model: Display UI analyzing Hyperparameters, Confusion Matrix, Class Report, etc.\n", - "pycaret.evaluate_model(bestModel)\n" + "pycaret.evaluate_model(bestModel)" ] }, { @@ -1270,19 +882,11 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "metadata": { "scrolled": true }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Transformation Pipeline and Model Successfully Saved\n" - ] - } - ], + "outputs": [], "source": [ "# Finalising model and save the model to current directory\n", "final_model = finalize_and_save_model(algorithm_choice, bestModel, model_name)\n" @@ -1290,174 +894,9 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
 ModelAccuracyAUCRecallPrec.F1KappaMCC
0Random Forest Classifier0.81520.89590.83960.80080.81970.63040.6311
\n" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
agejobeducationdefaultbalancehousingloanyprediction_labelprediction_score
4465435740229800010.590
446553152013200010.960
4465650420137500100.610
446573013073410001.000
4465836420130510110.531
\n", - "
" - ], - "text/plain": [ - " age job education default balance housing loan y \\\n", - "44654 35 7 4 0 2298 0 0 0 \n", - "44655 31 5 2 0 132 0 0 0 \n", - "44656 50 4 2 0 1375 0 0 1 \n", - "44657 30 1 3 0 734 1 0 0 \n", - "44658 36 4 2 0 1305 1 0 1 \n", - "\n", - " prediction_label prediction_score \n", - "44654 1 0.590 \n", - "44655 1 0.960 \n", - "44656 0 0.610 \n", - "44657 0 1.000 \n", - "44658 1 0.531 " - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "final_prediction = pycaret.predict_model(final_model, data=test_data)\n", "final_prediction.head()\n" @@ -1505,104 +944,9 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "ExactExplainer explainer: 11165it [21:57, 8.43it/s] \n" - ] - }, - { - "data": { - "text/html": [ - "
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "findfont: Generic family 'sans-serif' not found because none of the following families were found: Arial, Liberation Sans, Bitstream Vera Sans, sans-serif\n", - "findfont: Generic family 'sans-serif' not found because none of the following families were found: Arial, Liberation Sans, Bitstream Vera Sans, sans-serif\n", - "findfont: Generic family 'sans-serif' not found because none of the following families were found: Arial, Liberation Sans, Bitstream Vera Sans, sans-serif\n", - "findfont: Generic family 'sans-serif' not found because none of the following families were found: Arial, Liberation Sans, Bitstream Vera Sans, sans-serif\n", - "findfont: Generic family 'sans-serif' not found because none of the following families were found: Arial, Liberation Sans, Bitstream Vera Sans, sans-serif\n", - "findfont: Generic family 'sans-serif' not found because none of the following families were found: Arial, Liberation Sans, Bitstream Vera Sans, sans-serif\n", - "findfont: Generic family 'sans-serif' not found because none of the following families were found: Arial, Liberation Sans, Bitstream Vera Sans, sans-serif\n", - "findfont: Generic family 'sans-serif' not found because none of the following families were found: Arial, Liberation Sans, Bitstream Vera Sans, sans-serif\n", - "findfont: Generic family 'sans-serif' not found because none of the following families were found: Arial, Liberation Sans, Bitstream Vera Sans, sans-serif\n", - "findfont: Generic family 'sans-serif' not found because none of the following families were found: Arial, Liberation Sans, Bitstream Vera Sans, sans-serif\n", - "findfont: Generic family 'sans-serif' not found because none of the following families were found: Arial, Liberation Sans, Bitstream Vera Sans, sans-serif\n", - "findfont: Generic family 'sans-serif' not found because none of the following families were found: Arial, Liberation Sans, Bitstream Vera Sans, sans-serif\n", - "findfont: Generic family 'sans-serif' not found because none of the following families were found: Arial, Liberation Sans, Bitstream Vera Sans, sans-serif\n", - "findfont: Generic family 'sans-serif' not found because none of the following families were found: Arial, Liberation Sans, Bitstream Vera Sans, sans-serif\n", - "findfont: Generic family 'sans-serif' not found because none of the following families were found: Arial, Liberation Sans, Bitstream Vera Sans, sans-serif\n", - "findfont: Generic family 'sans-serif' not found because none of the following families were found: Arial, Liberation Sans, Bitstream Vera Sans, sans-serif\n", - "findfont: Generic family 'sans-serif' not found because none of the following families were found: Arial, Liberation Sans, Bitstream Vera Sans, sans-serif\n", - "findfont: Generic family 'sans-serif' not found because none of the following families were found: Arial, Liberation Sans, Bitstream Vera Sans, sans-serif\n", - "findfont: Generic family 'sans-serif' not found because none of the following families were found: Arial, Liberation Sans, Bitstream Vera Sans, sans-serif\n", - "findfont: Generic family 'sans-serif' not found because none of the following families were found: Arial, Liberation Sans, Bitstream Vera Sans, sans-serif\n", - "findfont: Generic family 'sans-serif' not found because none of the following families were found: Arial, Liberation Sans, Bitstream Vera Sans, sans-serif\n", - "findfont: Generic family 'sans-serif' not found because none of the following families were found: Arial, Liberation Sans, Bitstream Vera Sans, sans-serif\n", - "findfont: Generic family 'sans-serif' not found because none of the following families were found: Arial, Liberation Sans, Bitstream Vera Sans, sans-serif\n", - "findfont: Generic family 'sans-serif' not found because none of the following families were found: Arial, Liberation Sans, Bitstream Vera Sans, sans-serif\n", - "findfont: Generic family 'sans-serif' not found because none of the following families were found: Arial, Liberation Sans, Bitstream Vera Sans, sans-serif\n", - "findfont: Generic family 'sans-serif' not found because none of the following families were found: Arial, Liberation Sans, Bitstream Vera Sans, sans-serif\n", - "findfont: Generic family 'sans-serif' not found because none of the following families were found: Arial, Liberation Sans, Bitstream Vera Sans, sans-serif\n", - "findfont: Generic family 'sans-serif' not found because none of the following families were found: Arial, Liberation Sans, Bitstream Vera Sans, sans-serif\n", - "findfont: Generic family 'sans-serif' not found because none of the following families were found: Arial, Liberation Sans, Bitstream Vera Sans, sans-serif\n", - "findfont: Generic family 'sans-serif' not found because none of the following families were found: Arial, Liberation Sans, Bitstream Vera Sans, sans-serif\n", - "findfont: Generic family 'sans-serif' not found because none of the following families were found: Arial, Liberation Sans, Bitstream Vera Sans, sans-serif\n", - "findfont: Generic family 'sans-serif' not found because none of the following families were found: Arial, Liberation Sans, Bitstream Vera Sans, sans-serif\n", - "findfont: Generic family 'sans-serif' not found because none of the following families were found: Arial, Liberation Sans, Bitstream Vera Sans, sans-serif\n", - "findfont: Generic family 'sans-serif' not found because none of the following families were found: Arial, Liberation Sans, Bitstream Vera Sans, sans-serif\n", - "findfont: Generic family 'sans-serif' not found because none of the following families were found: Arial, Liberation Sans, Bitstream Vera Sans, sans-serif\n", - "findfont: Generic family 'sans-serif' not found because none of the following families were found: Arial, Liberation Sans, Bitstream Vera Sans, sans-serif\n", - "findfont: Generic family 'sans-serif' not found because none of the following families were found: Arial, Liberation Sans, Bitstream Vera Sans, sans-serif\n", - "findfont: Generic family 'sans-serif' not found because none of the following families were found: Arial, Liberation Sans, Bitstream Vera Sans, sans-serif\n", - "findfont: Generic family 'sans-serif' not found because none of the following families were found: Arial, Liberation Sans, Bitstream Vera Sans, sans-serif\n", - "findfont: Generic family 'sans-serif' not found because none of the following families were found: Arial, Liberation Sans, Bitstream Vera Sans, sans-serif\n", - "findfont: Generic family 'sans-serif' not found because none of the following families were found: Arial, Liberation Sans, Bitstream Vera Sans, sans-serif\n", - "findfont: Generic family 'sans-serif' not found because none of the following families were found: Arial, Liberation Sans, Bitstream Vera Sans, sans-serif\n", - "findfont: Generic family 'sans-serif' not found because none of the following families were found: Arial, Liberation Sans, Bitstream Vera Sans, sans-serif\n", - "findfont: Generic family 'sans-serif' not found because none of the following families were found: Arial, Liberation Sans, Bitstream Vera Sans, sans-serif\n", - "findfont: Generic family 'sans-serif' not found because none of the following families were found: Arial, Liberation Sans, Bitstream Vera Sans, sans-serif\n", - "findfont: Generic family 'sans-serif' not found because none of the following families were found: Arial, Liberation Sans, Bitstream Vera Sans, sans-serif\n", - "findfont: Generic family 'sans-serif' not found because none of the following families were found: Arial, Liberation Sans, Bitstream Vera Sans, sans-serif\n", - "findfont: Generic family 'sans-serif' not found because none of the following families were found: Arial, Liberation Sans, Bitstream Vera Sans, sans-serif\n", - "findfont: Generic family 'sans-serif' not found because none of the following families were found: Arial, Liberation Sans, Bitstream Vera Sans, sans-serif\n", - "findfont: Generic family 'sans-serif' not found because none of the following families were found: Arial, Liberation Sans, Bitstream Vera Sans, sans-serif\n", - "findfont: Generic family 'sans-serif' not found because none of the following families were found: Arial, Liberation Sans, Bitstream Vera Sans, sans-serif\n", - "findfont: Generic family 'sans-serif' not found because none of the following families were found: Arial, Liberation Sans, Bitstream Vera Sans, sans-serif\n", - "findfont: Generic family 'sans-serif' not found because none of the following families were found: Arial, Liberation Sans, Bitstream Vera Sans, sans-serif\n", - "findfont: Generic family 'sans-serif' not found because none of the following families were found: Arial, Liberation Sans, Bitstream Vera Sans, sans-serif\n", - "findfont: Generic family 'sans-serif' not found because none of the following families were found: Arial, Liberation Sans, Bitstream Vera Sans, sans-serif\n", - "findfont: Generic family 'sans-serif' not found because none of the following families were found: Arial, Liberation Sans, Bitstream Vera Sans, sans-serif\n", - "findfont: Generic family 'sans-serif' not found because none of the following families were found: Arial, Liberation Sans, Bitstream Vera Sans, sans-serif\n" - ] - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "test_features = test_data.copy().drop(target, axis=1)\n", "\n", @@ -1623,37 +967,9 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - "
\n", - "
\n", - " Visualization omitted, Javascript library not loaded!
\n", - " Have you run `initjs()` in this notebook? If this notebook was from another\n", - " user you must also trust this notebook (File -> Trust notebook). If you are viewing\n", - " this notebook on github the Javascript has been stripped for security. If you are using\n", - " JupyterLab this error is because a JupyterLab extension has not yet been written.\n", - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 18, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "shap_values_array = shap_values.values[0]\n", "\n", @@ -1671,21 +987,9 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml\n", - "sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml\n", - "sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml\n", - "sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml\n", - "-------!" - ] - } - ], + "outputs": [], "source": [ "# deploy model to sagemaker endpoint\n", "deploy_model(model_name, algorithm_choice, model_s3_bucket, instance_type, endpoint_name, role, inference_instance_count, image_uri)\n" @@ -1701,33 +1005,17 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Are you sure you want to delete the endpoint 'classification-proba-endpoint'? Type 'Yes' to confirm: Yes\n", - "Endpoint 'classification-proba-endpoint' and its configuration have been deleted.\n" - ] - } - ], + "outputs": [], "source": [ "delete_sagemaker_endpoint(endpoint_name)\n" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -1741,9 +1029,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.6" + "version": "3.11.6" } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 } diff --git a/modules/s3/main.tf b/modules/s3/main.tf index a4400b6..eb7a057 100644 --- a/modules/s3/main.tf +++ b/modules/s3/main.tf @@ -3,7 +3,8 @@ # The model bucket will contain the model artifact # The config-bucket is used to store ipynb files, python files and other configuration files locals { - file_path = "${path.module}/../../mlops_ml_models" + preprocessing_script_path = var.preprocessing_script_path + file_path = "${path.module}/../../mlops_ml_models" files_to_upload = concat( tolist(fileset(local.file_path, "*.ipynb")), tolist(fileset(local.file_path, "*.py")), @@ -63,3 +64,12 @@ resource "random_string" "s3_suffix" { special = false upper = false } + +resource "aws_s3_object" "preprocessing_script_path" { + count = var.preprocessing_script_path != "None" ? 1 : 0 + bucket = aws_s3_bucket.model_buckets[1].id + key = "preprocess_data.py" + source = var.preprocessing_script_path + etag = filemd5(local.preprocessing_script_path) + tags = var.tags +} \ No newline at end of file diff --git a/modules/s3/variables.tf b/modules/s3/variables.tf index 3e5998a..c3d4caa 100644 --- a/modules/s3/variables.tf +++ b/modules/s3/variables.tf @@ -12,4 +12,9 @@ variable "tags" { type = map(string) } - +variable "preprocessing_script_path" { + description = "The path the user provides if they want to include their own data cleaning logic" + type = string + default = "None" +} +#TODO replace "None" with None \ No newline at end of file diff --git a/modules/sagemaker/main.tf b/modules/sagemaker/main.tf index cfd98e9..c965645 100644 --- a/modules/sagemaker/main.tf +++ b/modules/sagemaker/main.tf @@ -13,16 +13,17 @@ resource "aws_sagemaker_notebook_instance_lifecycle_configuration" "training_not { config_s3_bucket = var.config_s3_bucket env = { - data_location_s3 = "${var.data_s3_bucket}${var.data_location_s3}" - target = var.model_target_variable - algorithm_choice = var.algorithm_choice - endpoint_name = local.endpoint_name - model_name = local.model_name - model_s3_bucket = var.model_s3_bucket - inference_instance_type = var.inference_instance_type - inference_instance_count = var.inference_instance_count - ecr_repo_uri = var.ecr_repo_uri - tuning_metric = var.tuning_metric + data_location_s3 = "${var.data_s3_bucket}${var.data_location_s3}" + target = var.model_target_variable + algorithm_choice = var.algorithm_choice + endpoint_name = local.endpoint_name + model_name = local.model_name + model_s3_bucket = var.model_s3_bucket + inference_instance_type = var.inference_instance_type + inference_instance_count = var.inference_instance_count + ecr_repo_uri = var.ecr_repo_uri + tuning_metric = var.tuning_metric + preprocessing_script_path = var.preprocessing_script_path } } )) diff --git a/modules/sagemaker/templates/startupscript.sh.tftpl b/modules/sagemaker/templates/startupscript.sh.tftpl index 6be12e0..18ada0a 100644 --- a/modules/sagemaker/templates/startupscript.sh.tftpl +++ b/modules/sagemaker/templates/startupscript.sh.tftpl @@ -13,6 +13,12 @@ cat << EOF > /home/ec2-user/SageMaker/.env %{ for key, value in env ~} ${key}=${value} %{ endfor ~} +#TODO remplace "None" with None +if [ ${var.preprocessing_script_path} == "None" ]; then + echo "preprocessing_script_path" = "None" >> /home/ec2-user/SageMaker/.env +else + echo "preprocessing_script_path" = preprocess_data.py >> /home/ec2-user/SageMaker/.env +fi EOF exit 0 diff --git a/modules/sagemaker/variables.tf b/modules/sagemaker/variables.tf index f8b676a..e9a1452 100644 --- a/modules/sagemaker/variables.tf +++ b/modules/sagemaker/variables.tf @@ -88,3 +88,8 @@ variable "config_bucket_key_arn" { description = "The ARN of the KMS key using which notebook scripts are encrypted in S3." type = string } + +variable "preprocessing_script_path" { + description = "The path the user provides if they want to include their own data cleaning logic" + type = string +} \ No newline at end of file diff --git a/variables.tf b/variables.tf index 56f6e31..469e51c 100644 --- a/variables.tf +++ b/variables.tf @@ -97,3 +97,9 @@ variable "tuning_metric" { description = "The metric user want to focus when tuning hyperparameter" type = string } + +variable "preprocessing_script_path" { + description = "The path the user provides if they want to include their own data cleaning logic" + type = string + default = "None" +} \ No newline at end of file From 4e5723d9605e4010d6b1b0048ce9c71ad5395052 Mon Sep 17 00:00:00 2001 From: Konrad Bachusz Date: Fri, 23 Feb 2024 16:32:36 +0000 Subject: [PATCH 02/13] fix(preprocessing_script_path): variable formatting fixes --- .../models_template_notebook.ipynb | 136 ++++-------------- modules/s3/main.tf | 2 +- modules/s3/variables.tf | 5 +- .../templates/startupscript.sh.tftpl | 8 +- variables.tf | 2 +- 5 files changed, 30 insertions(+), 123 deletions(-) diff --git a/mlops_ml_models/models_template_notebook.ipynb b/mlops_ml_models/models_template_notebook.ipynb index e5690dc..2b54e6a 100644 --- a/mlops_ml_models/models_template_notebook.ipynb +++ b/mlops_ml_models/models_template_notebook.ipynb @@ -359,7 +359,8 @@ "from delete_sagemaker_endpoint import delete_sagemaker_endpoint\n", "from ydata_profiling import ProfileReport\n", "import shap\n", - "import pandas as pd" + "import pandas as pd\n", + "import unittest" ] }, { @@ -402,8 +403,7 @@ "inference_instance_count = int(os.getenv(\"inference_instance_count\"))\n", "image_uri = os.getenv(\"ecr_repo_uri\")\n", "tuning_metric = os.getenv(\"tuning_metric\")\n", - "#TODO change \n", - "os.environ['preprocessing_script_path'] = 'filename_path'\n", + "preprocessing_script_path = os.getenv(\"preprocessing_script_path\")\n", "\n", "print(data_location_s3, algorithm_choice, target, endpoint_name, model_name, data_location, instance_type, image_uri, tuning_metric)" ] @@ -642,123 +642,37 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 5, "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - " Unnamed: 0 age education balance day month duration campaign \\\n", - "0 0 58 2 2143 5 8 261 1 \n", - "1 1 44 1 29 5 8 151 1 \n", - "2 2 33 1 2 5 8 76 1 \n", - "3 3 47 3 1506 5 8 92 1 \n", - "4 4 33 3 1 5 8 198 1 \n", - "... ... ... ... ... ... ... ... ... \n", - "79839 41749 33 1 1222 9 10 348 1 \n", - "79840 40301 30 1 444 16 6 195 2 \n", - "79841 41479 38 1 1844 7 11 245 1 \n", - "79842 31045 58 1 37 11 3 155 1 \n", - "79843 30566 58 2 3355 5 3 330 4 \n", - "\n", - " pdays previous job_admin. job_blue-collar job_entrepreneur \\\n", - "0 -1 0 0 0 0 \n", - "1 -1 0 0 0 0 \n", - "2 -1 0 0 0 1 \n", - "3 -1 0 0 1 0 \n", - "4 -1 0 0 0 0 \n", - "... ... ... ... ... ... \n", - "79839 135 1 0 0 0 \n", - "79840 -1 0 0 0 0 \n", - "79841 62 0 0 0 0 \n", - "79842 -1 0 0 0 0 \n", - "79843 -1 0 0 0 0 \n", - "\n", - " job_housemaid job_management job_retired job_self-employed \\\n", - "0 0 1 0 0 \n", - "1 0 0 0 0 \n", - "2 0 0 0 0 \n", - "3 0 0 0 0 \n", - "4 0 0 0 0 \n", - "... ... ... ... ... \n", - "79839 0 0 0 0 \n", - "79840 0 0 0 0 \n", - "79841 0 0 0 0 \n", - "79842 0 0 0 0 \n", - "79843 0 0 0 0 \n", - "\n", - " job_services job_student job_technician job_unemployed \\\n", - "0 0 0 0 0 \n", - "1 0 0 1 0 \n", - "2 0 0 0 0 \n", - "3 0 0 0 0 \n", - "4 0 0 0 0 \n", - "... ... ... ... ... \n", - "79839 0 0 0 0 \n", - "79840 0 0 0 0 \n", - "79841 0 0 0 0 \n", - "79842 0 0 0 0 \n", - "79843 0 0 0 0 \n", - "\n", - " marital_divorced marital_married marital_single contact_cellular \\\n", - "0 0 1 0 0 \n", - "1 0 0 1 0 \n", - "2 0 1 0 0 \n", - "3 0 1 0 0 \n", - "4 0 0 1 0 \n", - "... ... ... ... ... \n", - "79839 0 0 0 0 \n", - "79840 0 0 0 1 \n", - "79841 0 0 0 1 \n", - "79842 0 0 0 1 \n", - "79843 0 0 0 1 \n", - "\n", - " contact_telephone default_no default_yes housing_no housing_yes \\\n", - "0 0 1 0 0 1 \n", - "1 0 1 0 0 1 \n", - "2 0 1 0 0 1 \n", - "3 0 1 0 0 1 \n", - "4 0 1 0 1 0 \n", - "... ... ... ... ... ... \n", - "79839 0 1 0 1 0 \n", - "79840 0 1 0 0 0 \n", - "79841 0 1 0 1 0 \n", - "79842 0 1 0 1 0 \n", - "79843 0 1 0 1 0 \n", - "\n", - " loan_no loan_yes y \n", - "0 1 0 0 \n", - "1 1 0 0 \n", - "2 0 1 0 \n", - "3 1 0 0 \n", - "4 1 0 0 \n", - "... ... ... .. \n", - "79839 1 0 1 \n", - "79840 0 0 1 \n", - "79841 1 0 1 \n", - "79842 1 0 1 \n", - "79843 1 0 1 \n", - "\n", - "[79844 rows x 33 columns]\n" + "ename": "NameError", + "evalue": "name 'df' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32mc:\\Users\\KonradBachusz(Creder\\OneDrive - OneWorkplace\\Documents\\Projects\\internal\\terraform-aws-mlops-module\\mlops_ml_models\\models_template_notebook.ipynb Cell 12\u001b[0m line \u001b[0;36m1\n\u001b[0;32m 14\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39m\"\u001b[39m\u001b[39mFile does not exist\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[0;32m 15\u001b[0m \u001b[39mreturn\u001b[39;00m df\n\u001b[1;32m---> 17\u001b[0m df\u001b[39m.\u001b[39mhead()\n", + "\u001b[1;31mNameError\u001b[0m: name 'df' is not defined" ] } ], "source": [ "#TODO tidy up\n", - "data_processing_path = os.getenv(\"data_processing\")\n", "\n", - "if data_processing_path != \"None\":\n", - " try:\n", - " from preprocess_data import preprocess_data\n", - " df = preprocess_data(df)\n", - " if isinstance(df, pd.DataFrame):\n", - " print(\"The function returned a DataFrame.\")\n", - " else:\n", - " print(\"The function did not return a DataFrame.\")\n", + "def preprocess_df(df):\n", + " \"\"\"\"\"\"\n", + " \n", "\n", - " except ImportError:\n", - " print(\"file does not exist\")\n", + " if preprocessing_script_path:\n", + " try:\n", + " from preprocess_data import preprocess_data\n", + " df = preprocess_data(df)\n", + " message = \"preprocess_data function didn't return a dataframe\"\n", + " assertIsInstance(df, pd.DataFrame, message)\n", + " except ImportError:\n", + " print(\"File does not exist\")\n", + " return df\n", "\n", "df.head()\n", "\n" @@ -1029,7 +943,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.6" + "version": "3.10.11" } }, "nbformat": 4, diff --git a/modules/s3/main.tf b/modules/s3/main.tf index eb7a057..5cc30cf 100644 --- a/modules/s3/main.tf +++ b/modules/s3/main.tf @@ -66,7 +66,7 @@ resource "random_string" "s3_suffix" { } resource "aws_s3_object" "preprocessing_script_path" { - count = var.preprocessing_script_path != "None" ? 1 : 0 + count = var.preprocessing_script_path != null ? 1 : 0 bucket = aws_s3_bucket.model_buckets[1].id key = "preprocess_data.py" source = var.preprocessing_script_path diff --git a/modules/s3/variables.tf b/modules/s3/variables.tf index c3d4caa..02b8e34 100644 --- a/modules/s3/variables.tf +++ b/modules/s3/variables.tf @@ -15,6 +15,5 @@ variable "tags" { variable "preprocessing_script_path" { description = "The path the user provides if they want to include their own data cleaning logic" type = string - default = "None" -} -#TODO replace "None" with None \ No newline at end of file + default = null +} \ No newline at end of file diff --git a/modules/sagemaker/templates/startupscript.sh.tftpl b/modules/sagemaker/templates/startupscript.sh.tftpl index 18ada0a..28a3865 100644 --- a/modules/sagemaker/templates/startupscript.sh.tftpl +++ b/modules/sagemaker/templates/startupscript.sh.tftpl @@ -13,12 +13,6 @@ cat << EOF > /home/ec2-user/SageMaker/.env %{ for key, value in env ~} ${key}=${value} %{ endfor ~} -#TODO remplace "None" with None -if [ ${var.preprocessing_script_path} == "None" ]; then - echo "preprocessing_script_path" = "None" >> /home/ec2-user/SageMaker/.env -else - echo "preprocessing_script_path" = preprocess_data.py >> /home/ec2-user/SageMaker/.env -fi EOF -exit 0 +exit 0 \ No newline at end of file diff --git a/variables.tf b/variables.tf index 469e51c..9e44d68 100644 --- a/variables.tf +++ b/variables.tf @@ -101,5 +101,5 @@ variable "tuning_metric" { variable "preprocessing_script_path" { description = "The path the user provides if they want to include their own data cleaning logic" type = string - default = "None" + default = null } \ No newline at end of file From 2beda1f2f997678ceecc0c11d60f567239e6f2c4 Mon Sep 17 00:00:00 2001 From: Konrad Bachusz Date: Fri, 23 Feb 2024 17:03:38 +0000 Subject: [PATCH 03/13] fix(unit test): unit test for split_data --- mlops_ml_models/delete_sagemaker_endpoint.py | 7 ++-- mlops_ml_models/deploy_model_endpoint.py | 16 +++++---- mlops_ml_models/finalize_and_save_model.py | 3 +- mlops_ml_models/load_data.py | 2 +- .../models_template_notebook.ipynb | 2 +- mlops_ml_models/save_model_to_s3.py | 8 ++--- .../{split_data.py => transfom_data.py} | 2 +- pycaret_image_files/prediction_script.py | 9 +++-- tests/test_load_data.py | 10 +++--- tests/test_transform_data.py | 35 +++++++++++++++++++ 10 files changed, 61 insertions(+), 33 deletions(-) rename mlops_ml_models/{split_data.py => transfom_data.py} (96%) create mode 100644 tests/test_transform_data.py diff --git a/mlops_ml_models/delete_sagemaker_endpoint.py b/mlops_ml_models/delete_sagemaker_endpoint.py index 7741287..fe118d7 100644 --- a/mlops_ml_models/delete_sagemaker_endpoint.py +++ b/mlops_ml_models/delete_sagemaker_endpoint.py @@ -32,11 +32,8 @@ def delete_sagemaker_endpoint(endpoint_name: str) -> None: sagemaker_client.delete_endpoint(EndpointName=endpoint_name) # Delete endpoint configuration - sagemaker_client.delete_endpoint_config( - EndpointConfigName=endpoint_name - ) + sagemaker_client.delete_endpoint_config(EndpointConfigName=endpoint_name) - print(f"Endpoint '{endpoint_name}' and its configuration have " - "been deleted.") + print(f"Endpoint '{endpoint_name}' and its configuration have " "been deleted.") else: print("Endpoint deletion cancelled.") diff --git a/mlops_ml_models/deploy_model_endpoint.py b/mlops_ml_models/deploy_model_endpoint.py index ce4052e..ce3b391 100644 --- a/mlops_ml_models/deploy_model_endpoint.py +++ b/mlops_ml_models/deploy_model_endpoint.py @@ -2,10 +2,15 @@ def deploy_model( - model_name: str, model_type: str, model_s3_bucket: str, instance_type: str, endpoint_name, - role: str, inference_instance_count: int, image_uri: str + model_name: str, + model_type: str, + model_s3_bucket: str, + instance_type: str, + endpoint_name, + role: str, + inference_instance_count: int, + image_uri: str, ) -> None: - """This script deploys the sagemaker endpoint using the tar.gz file saved in s3. @@ -23,10 +28,7 @@ def deploy_model( image_uri=(image_uri), # The ECR image you pushed model_data=model_file, # Location of your serialized model role=role, - env={ - "MODEL_NAME": model_name, - "MODEL_TYPE": model_type - } + env={"MODEL_NAME": model_name, "MODEL_TYPE": model_type}, ) model.deploy( initial_instance_count=inference_instance_count, diff --git a/mlops_ml_models/finalize_and_save_model.py b/mlops_ml_models/finalize_and_save_model.py index acbcadc..eb67b79 100644 --- a/mlops_ml_models/finalize_and_save_model.py +++ b/mlops_ml_models/finalize_and_save_model.py @@ -1,8 +1,7 @@ import importlib -def finalize_and_save_model(algorithm_choice: str, bestModel: str, - model_name: str): +def finalize_and_save_model(algorithm_choice: str, bestModel: str, model_name: str): """ Finalizes the best model obtained from PyCaret and saves it locally. diff --git a/mlops_ml_models/load_data.py b/mlops_ml_models/load_data.py index 421ed06..c3bef86 100644 --- a/mlops_ml_models/load_data.py +++ b/mlops_ml_models/load_data.py @@ -18,7 +18,7 @@ def load_data(data_location: str) -> pd.DataFrame: df = pd.read_csv(data_location, low_memory=False) # Dropped unnamed columns. You should comment this portion out before # using the script if you dont have unamed columns - df = df.loc[:, ~df.columns.str.contains('^Unnamed')] + df = df.loc[:, ~df.columns.str.contains("^Unnamed")] return df except Exception as e: print(f"Error loading data: {e}") diff --git a/mlops_ml_models/models_template_notebook.ipynb b/mlops_ml_models/models_template_notebook.ipynb index 2b54e6a..7ba5362 100644 --- a/mlops_ml_models/models_template_notebook.ipynb +++ b/mlops_ml_models/models_template_notebook.ipynb @@ -351,7 +351,7 @@ "from sagemaker import get_execution_role\n", "from dotenv import load_dotenv\n", "from load_data import load_data\n", - "from split_data import split_data\n", + "from transfom_data import split_data\n", "import importlib\n", "from save_model_to_s3 import save_model_to_s3\n", "from deploy_model_endpoint import deploy_model\n", diff --git a/mlops_ml_models/save_model_to_s3.py b/mlops_ml_models/save_model_to_s3.py index 1895541..d6e4432 100644 --- a/mlops_ml_models/save_model_to_s3.py +++ b/mlops_ml_models/save_model_to_s3.py @@ -2,9 +2,7 @@ import boto3 -def save_model_to_s3( - trained_model_name: str, bucket_name: str -) -> None: +def save_model_to_s3(trained_model_name: str, bucket_name: str) -> None: """This saves the tar.gz model in an s3 bucket Args: @@ -16,5 +14,5 @@ def save_model_to_s3( s3 = boto3.client("s3") s3.upload_file( - f"{trained_model_name}.tar.gz", bucket_name, - f"{trained_model_name}.tar.gz") + f"{trained_model_name}.tar.gz", bucket_name, f"{trained_model_name}.tar.gz" + ) diff --git a/mlops_ml_models/split_data.py b/mlops_ml_models/transfom_data.py similarity index 96% rename from mlops_ml_models/split_data.py rename to mlops_ml_models/transfom_data.py index b7b8738..965e120 100644 --- a/mlops_ml_models/split_data.py +++ b/mlops_ml_models/transfom_data.py @@ -3,7 +3,7 @@ def split_data(df: pd.DataFrame, shuffle: bool) -> pd.DataFrame: """This script split the data into test_data and train_data, - with optinal shuffle function + with optional shuffle function Note: Remember that this function returns 2 values, therefore using, diff --git a/pycaret_image_files/prediction_script.py b/pycaret_image_files/prediction_script.py index d24b336..8ec20d5 100644 --- a/pycaret_image_files/prediction_script.py +++ b/pycaret_image_files/prediction_script.py @@ -14,8 +14,8 @@ # Instantiate Flask app app = Flask(__name__) -MODEL_NAME = os.getenv('MODEL_NAME') -MODEL_TYPE = os.getenv('MODEL_TYPE') +MODEL_NAME = os.getenv("MODEL_NAME") +MODEL_TYPE = os.getenv("MODEL_TYPE") # Define the model path # When you configure the model, you will need to specify the S3 location of @@ -32,8 +32,7 @@ @app.route("/ping", methods=["GET"]) def ping(): - return flask.Response(response="\n", status=200, - mimetype="application/json") + return flask.Response(response="\n", status=200, mimetype="application/json") # Define an endpoint for making predictions @@ -47,7 +46,7 @@ def predict(): logging.info(df) # Make predictions using the loaded model - if (MODEL_TYPE == "classification"): + if MODEL_TYPE == "classification": prediction = model.predict_proba(df) else: prediction = model.predict(df) diff --git a/tests/test_load_data.py b/tests/test_load_data.py index 2438faf..58fae9e 100644 --- a/tests/test_load_data.py +++ b/tests/test_load_data.py @@ -16,11 +16,9 @@ def mock_df() -> pd.DataFrame: pd.DataFrame: dataframe created from the script. Should be the same as in the resources. """ - return pd.DataFrame({ - 'col1': [1, 2, 3], - 'col2': ['A', 'B', 'C'], - 'col3': [4.5, 5.5, 6.5] - }) + return pd.DataFrame( + {"col1": [1, 2, 3], "col2": ["A", "B", "C"], "col3": [4.5, 5.5, 6.5]} + ) def test_load_data(mock_df: pd.DataFrame) -> None: @@ -30,6 +28,6 @@ def test_load_data(mock_df: pd.DataFrame) -> None: Args: mock_df (pd.DataFrame): Mock data generated from mock_df function """ - with patch('pandas.read_csv', return_value=mock_df): + with patch("pandas.read_csv", return_value=mock_df): result = load_data("mlops_ml_models/tests/resources/sample.csv") pd.testing.assert_frame_equal(result, mock_df) diff --git a/tests/test_transform_data.py b/tests/test_transform_data.py new file mode 100644 index 0000000..ee22716 --- /dev/null +++ b/tests/test_transform_data.py @@ -0,0 +1,35 @@ +from mlops_ml_models.transfom_data import split_data +import pandas as pd +import pytest +from unittest.mock import patch + + +@pytest.fixture +def mock_df() -> pd.DataFrame: + """This creates a mock dataframe based on the data + entered in the columns below. The data in the mock + dataframe is the same data that we have in the csv file in the + resources section The aim of this is to be able to test if the + load_data.py file returns a the same dataframe as what we have here. + + Returns: + pd.DataFrame: dataframe created from the script. Should be the same as + in the resources. + """ + return pd.DataFrame( + { + "col1": [1, 2, 3, 1, 2, 3, 1, 2, 3, 1], + "col2": ["A", "B", "C", "A", "B", "C", "A", "B", "C", "A"], + "col3": [4.5, 5.5, 6.5, 4.5, 5.5, 6.5, 4.5, 5.5, 6.5, 6.5], + } + ) + + +def test_split_data(mock_df: pd.DataFrame, shuffle=False) -> None: + """This Test compares if the split_data correctly splits a dataframe into 80% and 20% of rows + + Args: + mock_df (pd.DataFrame): Mock data generated from mock_df function + """ + train_data, test_data = split_data(mock_df, shuffle=False) + assert len(train_data) == 8 and len(test_data) == 2 From c8c237c0003518aae8c913b28b3dd828088b659d Mon Sep 17 00:00:00 2001 From: Konrad Bachusz Date: Fri, 23 Feb 2024 17:17:25 +0000 Subject: [PATCH 04/13] feat(unit tests): test_split_data_shuffle --- tests/test_transform_data.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/tests/test_transform_data.py b/tests/test_transform_data.py index ee22716..c961234 100644 --- a/tests/test_transform_data.py +++ b/tests/test_transform_data.py @@ -25,11 +25,21 @@ def mock_df() -> pd.DataFrame: ) -def test_split_data(mock_df: pd.DataFrame, shuffle=False) -> None: - """This Test compares if the split_data correctly splits a dataframe into 80% and 20% of rows +def test_split_data_shuffle(mock_df: pd.DataFrame) -> None: + """This Test compares if the split_data correctly splits a dataframe into 80% and 20% of rows with shuffling. Args: mock_df (pd.DataFrame): Mock data generated from mock_df function """ - train_data, test_data = split_data(mock_df, shuffle=False) + train_data, test_data = split_data(mock_df, shuffle=True) assert len(train_data) == 8 and len(test_data) == 2 + + +def test_split_data(mock_df: pd.DataFrame) -> None: + """This Test compares if the split_data correctly splits a dataframe into 80% and 20% of rows with no shuffling. + + Args: + mock_df (pd.DataFrame): Mock data generated from mock_df function + """ + train_data, test_data = split_data(mock_df, shuffle=False) + assert list(train_data['col1']) == [1, 2, 3, 1, 2, 3, 1, 2] and list(test_data['col1']) == [3, 1] From e362b09cfaa696475e53430914fc48a318c00d31 Mon Sep 17 00:00:00 2001 From: Konrad Bachusz Date: Fri, 23 Feb 2024 17:18:40 +0000 Subject: [PATCH 05/13] fix(flake8): flake8 format --- tests/test_transform_data.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/test_transform_data.py b/tests/test_transform_data.py index c961234..1df7626 100644 --- a/tests/test_transform_data.py +++ b/tests/test_transform_data.py @@ -1,7 +1,6 @@ from mlops_ml_models.transfom_data import split_data import pandas as pd import pytest -from unittest.mock import patch @pytest.fixture @@ -42,4 +41,6 @@ def test_split_data(mock_df: pd.DataFrame) -> None: mock_df (pd.DataFrame): Mock data generated from mock_df function """ train_data, test_data = split_data(mock_df, shuffle=False) - assert list(train_data['col1']) == [1, 2, 3, 1, 2, 3, 1, 2] and list(test_data['col1']) == [3, 1] + assert list(train_data["col1"]) == [1, 2, 3, 1, 2, 3, 1, 2] and list( + test_data["col1"] + ) == [3, 1] From 5a954fc5da1873897e289d2d290f2fda80b4760a Mon Sep 17 00:00:00 2001 From: Konrad Bachusz Date: Fri, 23 Feb 2024 17:28:41 +0000 Subject: [PATCH 06/13] unit test commit --- mlops_ml_models/transfom_data.py | 14 ++++++++++++++ tests/test_transform_data.py | 9 ++++++++- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/mlops_ml_models/transfom_data.py b/mlops_ml_models/transfom_data.py index 965e120..1eada30 100644 --- a/mlops_ml_models/transfom_data.py +++ b/mlops_ml_models/transfom_data.py @@ -30,3 +30,17 @@ def split_data(df: pd.DataFrame, shuffle: bool) -> pd.DataFrame: return train_data, test_data except Exception as e: print(f"Error loading data: {e}") + +#TODO finish +def preprocess_df(df,preprocessing_script_path): + """""" + if preprocessing_script_path: + try: + print("Loading file") + from preprocess_data import preprocess_data + df = preprocess_data(df) + message = "preprocess_data function didn't return a dataframe" + assertIsInstance(df, pd.DataFrame, message) + except ImportError: + print("File does not exist") + return df \ No newline at end of file diff --git a/tests/test_transform_data.py b/tests/test_transform_data.py index 1df7626..4a5d2b2 100644 --- a/tests/test_transform_data.py +++ b/tests/test_transform_data.py @@ -1,4 +1,4 @@ -from mlops_ml_models.transfom_data import split_data +from mlops_ml_models.transfom_data import split_data, preprocess_df import pandas as pd import pytest @@ -44,3 +44,10 @@ def test_split_data(mock_df: pd.DataFrame) -> None: assert list(train_data["col1"]) == [1, 2, 3, 1, 2, 3, 1, 2] and list( test_data["col1"] ) == [3, 1] + +def test_preprocess_df(mock_df: pd.DataFrame) -> None: + """TODO""" + preprocessing_script_path="test" + df=preprocess_df(mock_df, preprocessing_script_path) + print(df) + assert 2==2 \ No newline at end of file From b31e7fbb9cbb4573dc2cd9053f724664f70f6f42 Mon Sep 17 00:00:00 2001 From: Konrad Bachusz Date: Fri, 1 Mar 2024 13:41:47 +0000 Subject: [PATCH 07/13] fix(formatting): black formatting --- mlops_ml_models/transfom_data.py | 10 ++++++---- tests/test_transform_data.py | 7 ++++--- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/mlops_ml_models/transfom_data.py b/mlops_ml_models/transfom_data.py index 1eada30..5f870fe 100644 --- a/mlops_ml_models/transfom_data.py +++ b/mlops_ml_models/transfom_data.py @@ -31,16 +31,18 @@ def split_data(df: pd.DataFrame, shuffle: bool) -> pd.DataFrame: except Exception as e: print(f"Error loading data: {e}") -#TODO finish -def preprocess_df(df,preprocessing_script_path): + +# TODO finish +def preprocess_df(df, preprocessing_script_path): """""" if preprocessing_script_path: try: print("Loading file") from preprocess_data import preprocess_data - df = preprocess_data(df) + + df = preprocess_data(df) message = "preprocess_data function didn't return a dataframe" assertIsInstance(df, pd.DataFrame, message) except ImportError: print("File does not exist") - return df \ No newline at end of file + return df diff --git a/tests/test_transform_data.py b/tests/test_transform_data.py index 4a5d2b2..04013bc 100644 --- a/tests/test_transform_data.py +++ b/tests/test_transform_data.py @@ -45,9 +45,10 @@ def test_split_data(mock_df: pd.DataFrame) -> None: test_data["col1"] ) == [3, 1] + def test_preprocess_df(mock_df: pd.DataFrame) -> None: """TODO""" - preprocessing_script_path="test" - df=preprocess_df(mock_df, preprocessing_script_path) + preprocessing_script_path = "test" + df = preprocess_df(mock_df, preprocessing_script_path) print(df) - assert 2==2 \ No newline at end of file + assert 2 == 2 From 9cf9594d86e7936aff9a65208a8b95e5bb665bdc Mon Sep 17 00:00:00 2001 From: Konrad Bachusz Date: Fri, 1 Mar 2024 14:40:42 +0000 Subject: [PATCH 08/13] feat(preprocess_data): added a function for unit testing --- tests/preprocess_data.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 tests/preprocess_data.py diff --git a/tests/preprocess_data.py b/tests/preprocess_data.py new file mode 100644 index 0000000..cf807ef --- /dev/null +++ b/tests/preprocess_data.py @@ -0,0 +1,18 @@ +import pandas as pd + + +def preprocess_data(df): + """This placeholder function is supposed to mock some dataframe pre-processing to b used in unit testing + Args: + df: input dataframe + Returns: + df: processed dataframe""" + + # One-hot-encode categorical columns + df = pd.get_dummies(data=df, columns=["col1", "col2"]) + + # Create some dummy columns + df["col4"] = df["col3"] + 23 + df["col5"] = (df["col3"] + 100) / df["col4"] + + return df From 6f01cf069e174cff2efb362c9da770b328d13fe1 Mon Sep 17 00:00:00 2001 From: Konrad Bachusz Date: Fri, 1 Mar 2024 15:06:50 +0000 Subject: [PATCH 09/13] feat(preprocessing): added unit tests to preprocessing function --- mlops_ml_models/transfom_data.py | 3 +-- .../preprocess_data.py => preprocess_data.py | 0 tests/test_transform_data.py | 22 +++++++++++++++---- 3 files changed, 19 insertions(+), 6 deletions(-) rename tests/preprocess_data.py => preprocess_data.py (100%) diff --git a/mlops_ml_models/transfom_data.py b/mlops_ml_models/transfom_data.py index 5f870fe..d67a15e 100644 --- a/mlops_ml_models/transfom_data.py +++ b/mlops_ml_models/transfom_data.py @@ -41,8 +41,7 @@ def preprocess_df(df, preprocessing_script_path): from preprocess_data import preprocess_data df = preprocess_data(df) - message = "preprocess_data function didn't return a dataframe" - assertIsInstance(df, pd.DataFrame, message) + assert isinstance(df, pd.DataFrame) except ImportError: print("File does not exist") return df diff --git a/tests/preprocess_data.py b/preprocess_data.py similarity index 100% rename from tests/preprocess_data.py rename to preprocess_data.py diff --git a/tests/test_transform_data.py b/tests/test_transform_data.py index 04013bc..3b87ffd 100644 --- a/tests/test_transform_data.py +++ b/tests/test_transform_data.py @@ -47,8 +47,22 @@ def test_split_data(mock_df: pd.DataFrame) -> None: def test_preprocess_df(mock_df: pd.DataFrame) -> None: - """TODO""" - preprocessing_script_path = "test" + """This test checks if the pre-processing function can be imported and execute a custom script to update the dataframe. + + Args: + mock_df: mock dataframe""" + preprocessing_script_path = "tests\\preprocess_data.py" df = preprocess_df(mock_df, preprocessing_script_path) - print(df) - assert 2 == 2 + + assert len(df.columns) == 9 + + +def test_preprocess_df_no_path(mock_df: pd.DataFrame) -> None: + """This test checks if the pre-processing function doesn't change the data if the preprocessing_script_path is not present. + + Args: + mock_df: mock dataframe""" + preprocessing_script_path = None + df = preprocess_df(mock_df, preprocessing_script_path) + + assert df.equals(mock_df) From b0fcd8a5b2bc8be81721f1858f97d1cb3a3568e0 Mon Sep 17 00:00:00 2001 From: Konrad Bachusz Date: Fri, 1 Mar 2024 15:20:17 +0000 Subject: [PATCH 10/13] fix(formatting): flake8 fixes --- .../models_template_notebook.ipynb | 24 +++---------------- mlops_ml_models/transfom_data.py | 12 ++++++++-- setup.cfg | 2 ++ 3 files changed, 15 insertions(+), 23 deletions(-) create mode 100644 setup.cfg diff --git a/mlops_ml_models/models_template_notebook.ipynb b/mlops_ml_models/models_template_notebook.ipynb index 7ba5362..b0ff651 100644 --- a/mlops_ml_models/models_template_notebook.ipynb +++ b/mlops_ml_models/models_template_notebook.ipynb @@ -351,7 +351,7 @@ "from sagemaker import get_execution_role\n", "from dotenv import load_dotenv\n", "from load_data import load_data\n", - "from transfom_data import split_data\n", + "from transfom_data import split_data, preprocess_df\n", "import importlib\n", "from save_model_to_s3 import save_model_to_s3\n", "from deploy_model_endpoint import deploy_model\n", @@ -359,8 +359,7 @@ "from delete_sagemaker_endpoint import delete_sagemaker_endpoint\n", "from ydata_profiling import ProfileReport\n", "import shap\n", - "import pandas as pd\n", - "import unittest" + "import pandas as pd" ] }, { @@ -658,24 +657,7 @@ } ], "source": [ - "#TODO tidy up\n", - "\n", - "def preprocess_df(df):\n", - " \"\"\"\"\"\"\n", - " \n", - "\n", - " if preprocessing_script_path:\n", - " try:\n", - " from preprocess_data import preprocess_data\n", - " df = preprocess_data(df)\n", - " message = \"preprocess_data function didn't return a dataframe\"\n", - " assertIsInstance(df, pd.DataFrame, message)\n", - " except ImportError:\n", - " print(\"File does not exist\")\n", - " return df\n", - "\n", - "df.head()\n", - "\n" + "df=preprocess_df(df)" ] }, { diff --git a/mlops_ml_models/transfom_data.py b/mlops_ml_models/transfom_data.py index d67a15e..48776ef 100644 --- a/mlops_ml_models/transfom_data.py +++ b/mlops_ml_models/transfom_data.py @@ -32,9 +32,17 @@ def split_data(df: pd.DataFrame, shuffle: bool) -> pd.DataFrame: print(f"Error loading data: {e}") -# TODO finish def preprocess_df(df, preprocessing_script_path): - """""" + """This is a placeholder function to import the preprocess_data function + if it has been uploaded into s3 when the preprocessing_script_path is provided by the user. + Args: + df: + preprocessing_script_path: Path to the data preprocessing script declared in user's repo + + Returns: + df: dataframe that has been processed or unchanged depending + if the preprocessing_script_path has been provided + """ if preprocessing_script_path: try: print("Loading file") diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..905c8bb --- /dev/null +++ b/setup.cfg @@ -0,0 +1,2 @@ +[flake8] +max-line-length = 160 \ No newline at end of file From 17c36a303a322025444ac36b515e5e704f62a65f Mon Sep 17 00:00:00 2001 From: Konrad Bachusz Date: Fri, 1 Mar 2024 15:27:10 +0000 Subject: [PATCH 11/13] chore(readme and changelog): updated files --- CHANGELOG.md | 3 ++- README.md | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c4f1a00..e53c6b8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,6 @@ # AWS-MLOps-module -## [2.0.2] - 09/02/24 + +## [2.0.2] - 01/03/24 * Added functionality for passing preprocessing script ## [2.0.1] - 02/02/24 diff --git a/README.md b/README.md index fbc3683..9beb203 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,6 @@ This repo contains a terraform module with corresponding AWS resources that enab ## Example Usage -#TODO update ``` module "MLOps" { source = "github.com/crederauk/terraform-aws-mlops-module?ref=" @@ -22,6 +21,7 @@ module "MLOps" { algorithm_choice = "classification" sagemaker_training_notebook_instance_type = "ml.m4.xlarge" inference_instance_count = 1 + preprocessing_script_path = "terraform\preprocess_data.py" tags = { my-tag-key = "my-tag-value" } @@ -67,6 +67,7 @@ No resources. | [inference\_instance\_count](#input\_inference\_instance\_count) | The initial number of instances to serve the model endpoint | `number` | `1` | no | | [inference\_instance\_type](#input\_inference\_instance\_type) | The instance type to be created for serving the model. Must be a valid EC2 instance type | `string` | `"ml.t2.medium"` | no | | [model\_target\_variable](#input\_model\_target\_variable) | The dependent variable (or 'label') that the model aims to predict. This should be a column name in the dataset. | `string` | n/a | yes | +| [preprocessing\_script\_path](#input\_preprocessing\_script\_path) | The path the user provides if they want to include their own data cleaning logic | `string` | `null` | no | | [resource\_naming\_prefix](#input\_resource\_naming\_prefix) | Naming prefix to be applied to all resources created by this module | `string` | n/a | yes | | [retrain\_model\_bool](#input\_retrain\_model\_bool) | Boolean to indicate if the retraining pipeline shoud be added | `bool` | `false` | no | | [retraining\_schedule](#input\_retraining\_schedule) | Cron expression for the model retraining frequency in the AWS format. See https://docs.aws.amazon.com/lambda/latest/dg/services-cloudwatchevents-expressions.html for details | `string` | `""` | no | From 39b8047a4e3885a782da32732c55cb93d7a392f0 Mon Sep 17 00:00:00 2001 From: Konrad Bachusz Date: Fri, 8 Mar 2024 16:39:35 +0000 Subject: [PATCH 12/13] fix(preprocessing_script_path): default as null --- modules/sagemaker/variables.tf | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/sagemaker/variables.tf b/modules/sagemaker/variables.tf index e9a1452..b75e56f 100644 --- a/modules/sagemaker/variables.tf +++ b/modules/sagemaker/variables.tf @@ -92,4 +92,5 @@ variable "config_bucket_key_arn" { variable "preprocessing_script_path" { description = "The path the user provides if they want to include their own data cleaning logic" type = string + default = null } \ No newline at end of file From c3fb46c58e84e3960a9681d37a0fc723add0da53 Mon Sep 17 00:00:00 2001 From: Konrad Bachusz Date: Fri, 8 Mar 2024 16:40:52 +0000 Subject: [PATCH 13/13] fix(forward slash): formatting fixed --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 9beb203..cd3fa7f 100644 --- a/README.md +++ b/README.md @@ -21,7 +21,7 @@ module "MLOps" { algorithm_choice = "classification" sagemaker_training_notebook_instance_type = "ml.m4.xlarge" inference_instance_count = 1 - preprocessing_script_path = "terraform\preprocess_data.py" + preprocessing_script_path = "terraform/preprocess_data.py" tags = { my-tag-key = "my-tag-value" }