From f81b8591d06a2e23382b8f22e589029a52084962 Mon Sep 17 00:00:00 2001 From: Florencia Date: Thu, 27 Jun 2024 22:58:10 +0200 Subject: [PATCH 1/3] Added bucket for model logic --- .github/workflows/cd.yml | 12 +- challenge/model.py | 15 +- challenge/terraform/main.tf | 5 + challenge/terraform/terraform.tfstate | 76 ++++++- challenge/terraform/terraform.tfstate.backup | 200 +++++++++++++++++-- challenge/terraform/variables.tf | 6 + requirements.txt | 2 + 7 files changed, 285 insertions(+), 31 deletions(-) diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml index 597ce5e..7bedc36 100644 --- a/.github/workflows/cd.yml +++ b/.github/workflows/cd.yml @@ -3,7 +3,7 @@ name: 'Continuous Delivery' on: pull_request: #TODO: For testing branches: - - "main" + - "develop" jobs: build: @@ -26,11 +26,15 @@ jobs: python -m pip install --upgrade pip pip install -r requirements.txt -r requirements-dev.txt -r requirements-test.txt - # Run training script to have the model available - - name: Run training script + # Download model from GCS + - name: Download model from GCS + env: + BUCKET_NAME: ${{ secrets.BUCKET_NAME }} + MODEL: ${{ secrets.MODEL }} run: | export PYTHONPATH=$PYTHONPATH:$(pwd) - python ./challenge/training.py + mkdir -p ./challenge/models + gsutil cp gs://$BUCKET_NAME/$MODEL.pkl ./challenge/models/$MODEL.pkl # Build and push Docker image - name: Build and push Docker image diff --git a/challenge/model.py b/challenge/model.py index aa9d479..fd33ded 100644 --- a/challenge/model.py +++ b/challenge/model.py @@ -7,12 +7,13 @@ import pandas as pd import numpy as np import xgboost as xgb +from google.cloud import storage from sklearn.model_selection import train_test_split from sklearn.metrics import confusion_matrix, classification_report MODEL_PATH = os.path.join('challenge', 'models', 'model.pkl') THRESHOLD_IN_MINUTES = 15 - +BUKET_NAME = 'latam-model-challenge' class DelayModel: def __init__( @@ -44,6 +45,15 @@ def get_min_diff(self, data): fecha_i = datetime.strptime(data['Fecha-I'], '%Y-%m-%d %H:%M:%S') min_diff = ((fecha_o - fecha_i).total_seconds())/60 return min_diff + + def _upload_to_gcs(self, source_file_name, destination_blob_name): + """Uploads a file to the bucket.""" + storage_client = storage.Client() + bucket = storage_client.bucket(BUKET_NAME) + blob = bucket.blob(destination_blob_name) + blob.upload_from_filename(source_file_name) + + logging.info(f"File {source_file_name} uploaded to {destination_blob_name}.") def preprocess( self, @@ -123,6 +133,9 @@ def fit( # Save the model to a file with open(MODEL_PATH, 'wb') as f: pickle.dump(self._model, f) + + # Upload the model to Google Cloud Storage + self._upload_to_gcs(MODEL_PATH, os.path.basename(MODEL_PATH)) def predict( self, diff --git a/challenge/terraform/main.tf b/challenge/terraform/main.tf index 001eda6..c177025 100644 --- a/challenge/terraform/main.tf +++ b/challenge/terraform/main.tf @@ -25,6 +25,11 @@ resource "google_cloud_run_service" "latam_challenge_service" { } } +resource "google_storage_bucket" "bucket" { + name = var.bucket_name + location = var.region +} + resource "google_cloud_run_service_iam_policy" "noauth" { location = google_cloud_run_service.latam_challenge_service.location project = google_cloud_run_service.latam_challenge_service.project diff --git a/challenge/terraform/terraform.tfstate b/challenge/terraform/terraform.tfstate index ab95f0b..a5a02b4 100644 --- a/challenge/terraform/terraform.tfstate +++ b/challenge/terraform/terraform.tfstate @@ -1,7 +1,7 @@ { "version": 4, "terraform_version": "1.5.2", - "serial": 11, + "serial": 16, "lineage": "41caa1a9-fd55-f2c3-6cf1-50daff5a262d", "outputs": { "service_url": { @@ -52,19 +52,21 @@ { "annotations": {}, "effective_annotations": { + "run.googleapis.com/client-name": "gcloud", + "run.googleapis.com/client-version": "481.0.0", "run.googleapis.com/ingress": "all", "run.googleapis.com/ingress-status": "all", - "run.googleapis.com/operation-id": "798e9ce8-f9a8-4c27-a595-6ab85ec73a63", + "run.googleapis.com/operation-id": "b406ad63-11e9-4d32-ab22-7dd6ff009f67", "serving.knative.dev/creator": "florencia@tryolabs.com", "serving.knative.dev/lastModifier": "florencia@tryolabs.com" }, "effective_labels": { "cloud.googleapis.com/location": "us-central1" }, - "generation": 1, + "generation": 6, "labels": {}, "namespace": "florencia-tryolabs-latam", - "resource_version": "AAYbzoRf3lE", + "resource_version": "AAYb5Qwfytc", "self_link": "/apis/serving.knative.dev/v1/namespaces/307648342667/services/latam-challenge", "terraform_labels": {}, "uid": "22637102-e0fa-49ea-8643-85dfc7604be4" @@ -94,14 +96,14 @@ "type": "RoutesReady" } ], - "latest_created_revision_name": "latam-challenge-00001-xqz", - "latest_ready_revision_name": "latam-challenge-00001-xqz", - "observed_generation": 1, + "latest_created_revision_name": "latam-challenge-00006-6hb", + "latest_ready_revision_name": "latam-challenge-00006-6hb", + "observed_generation": 6, "traffic": [ { "latest_revision": true, "percent": 100, - "revision_name": "latam-challenge-00001-xqz", + "revision_name": "latam-challenge-00006-6hb", "tag": "", "url": "" } @@ -114,10 +116,13 @@ "metadata": [ { "annotations": { - "autoscaling.knative.dev/maxScale": "100" + "autoscaling.knative.dev/maxScale": "100", + "run.googleapis.com/client-name": "gcloud", + "run.googleapis.com/client-version": "481.0.0" }, "generation": 0, "labels": { + "client.knative.dev/nonce": "nmztqcazkn", "run.googleapis.com/startupProbeType": "Default" }, "name": "", @@ -132,8 +137,8 @@ "container_concurrency": 80, "containers": [ { - "args": null, - "command": null, + "args": [], + "command": [], "env": [], "env_from": [], "image": "us-docker.pkg.dev/florencia-tryolabs-latam/florencia-repo-latam-challenge/latam-challenge", @@ -222,6 +227,55 @@ ] } ] + }, + { + "mode": "managed", + "type": "google_storage_bucket", + "name": "bucket", + "provider": "provider[\"registry.terraform.io/hashicorp/google\"]", + "instances": [ + { + "schema_version": 1, + "attributes": { + "autoclass": [], + "cors": [], + "custom_placement_config": [], + "default_event_based_hold": false, + "effective_labels": {}, + "enable_object_retention": false, + "encryption": [], + "force_destroy": false, + "id": "latam-model-challenge", + "labels": {}, + "lifecycle_rule": [], + "location": "US-CENTRAL1", + "logging": [], + "name": "latam-model-challenge", + "project": "florencia-tryolabs-latam", + "project_number": 307648342667, + "public_access_prevention": "inherited", + "requester_pays": false, + "retention_policy": [], + "rpo": null, + "self_link": "https://www.googleapis.com/storage/v1/b/latam-model-challenge", + "soft_delete_policy": [ + { + "effective_time": "2024-06-27T20:33:31.173Z", + "retention_duration_seconds": 604800 + } + ], + "storage_class": "STANDARD", + "terraform_labels": {}, + "timeouts": null, + "uniform_bucket_level_access": false, + "url": "gs://latam-model-challenge", + "versioning": [], + "website": [] + }, + "sensitive_attributes": [], + "private": "eyJlMmJmYjczMC1lY2FhLTExZTYtOGY4OC0zNDM2M2JjN2M0YzAiOnsiY3JlYXRlIjo2MDAwMDAwMDAwMDAsInJlYWQiOjI0MDAwMDAwMDAwMCwidXBkYXRlIjoyNDAwMDAwMDAwMDB9LCJzY2hlbWFfdmVyc2lvbiI6IjEifQ==" + } + ] } ], "check_results": null diff --git a/challenge/terraform/terraform.tfstate.backup b/challenge/terraform/terraform.tfstate.backup index c598144..ee6b8b7 100644 --- a/challenge/terraform/terraform.tfstate.backup +++ b/challenge/terraform/terraform.tfstate.backup @@ -1,9 +1,14 @@ { "version": 4, "terraform_version": "1.5.2", - "serial": 7, + "serial": 15, "lineage": "41caa1a9-fd55-f2c3-6cf1-50daff5a262d", - "outputs": {}, + "outputs": { + "service_url": { + "value": "https://latam-challenge-7eedzyco2q-uc.a.run.app", + "type": "string" + } + }, "resources": [ { "mode": "data", @@ -38,26 +43,102 @@ "provider": "provider[\"registry.terraform.io/hashicorp/google\"]", "instances": [ { - "status": "tainted", "schema_version": 2, "attributes": { "autogenerate_revision_name": false, "id": "locations/us-central1/namespaces/florencia-tryolabs-latam/services/latam-challenge", "location": "us-central1", - "metadata": null, + "metadata": [ + { + "annotations": {}, + "effective_annotations": { + "run.googleapis.com/client-name": "gcloud", + "run.googleapis.com/client-version": "481.0.0", + "run.googleapis.com/ingress": "all", + "run.googleapis.com/ingress-status": "all", + "run.googleapis.com/operation-id": "b406ad63-11e9-4d32-ab22-7dd6ff009f67", + "serving.knative.dev/creator": "florencia@tryolabs.com", + "serving.knative.dev/lastModifier": "florencia@tryolabs.com" + }, + "effective_labels": { + "cloud.googleapis.com/location": "us-central1" + }, + "generation": 6, + "labels": {}, + "namespace": "florencia-tryolabs-latam", + "resource_version": "AAYb5Qwfytc", + "self_link": "/apis/serving.knative.dev/v1/namespaces/307648342667/services/latam-challenge", + "terraform_labels": {}, + "uid": "22637102-e0fa-49ea-8643-85dfc7604be4" + } + ], "name": "latam-challenge", "project": "florencia-tryolabs-latam", - "status": null, + "status": [ + { + "conditions": [ + { + "message": "", + "reason": "", + "status": "True", + "type": "Ready" + }, + { + "message": "", + "reason": "", + "status": "True", + "type": "ConfigurationsReady" + }, + { + "message": "", + "reason": "", + "status": "True", + "type": "RoutesReady" + } + ], + "latest_created_revision_name": "latam-challenge-00006-6hb", + "latest_ready_revision_name": "latam-challenge-00006-6hb", + "observed_generation": 6, + "traffic": [ + { + "latest_revision": true, + "percent": 100, + "revision_name": "latam-challenge-00006-6hb", + "tag": "", + "url": "" + } + ], + "url": "https://latam-challenge-7eedzyco2q-uc.a.run.app" + } + ], "template": [ { - "metadata": [], + "metadata": [ + { + "annotations": { + "autoscaling.knative.dev/maxScale": "100", + "run.googleapis.com/client-name": "gcloud", + "run.googleapis.com/client-version": "481.0.0" + }, + "generation": 0, + "labels": { + "client.knative.dev/nonce": "nmztqcazkn", + "run.googleapis.com/startupProbeType": "Default" + }, + "name": "", + "namespace": "", + "resource_version": "", + "self_link": "", + "uid": "" + } + ], "spec": [ { - "container_concurrency": 0, + "container_concurrency": 80, "containers": [ { - "args": null, - "command": null, + "args": [], + "command": [], "env": [], "env_from": [], "image": "us-docker.pkg.dev/florencia-tryolabs-latam/florencia-repo-latam-challenge/latam-challenge", @@ -66,19 +147,41 @@ "ports": [ { "container_port": 8000, - "name": "", + "name": "http1", "protocol": "" } ], - "resources": [], - "startup_probe": [], + "resources": [ + { + "limits": { + "cpu": "1000m", + "memory": "512Mi" + }, + "requests": {} + } + ], + "startup_probe": [ + { + "failure_threshold": 1, + "grpc": [], + "http_get": [], + "initial_delay_seconds": 0, + "period_seconds": 240, + "tcp_socket": [ + { + "port": 8000 + } + ], + "timeout_seconds": 240 + } + ], "volume_mounts": [], "working_dir": "" } ], - "service_account_name": "", + "service_account_name": "307648342667-compute@developer.gserviceaccount.com", "serving_state": "", - "timeout_seconds": 0, + "timeout_seconds": 300, "volumes": [] } ] @@ -105,7 +208,74 @@ "type": "google_cloud_run_service_iam_policy", "name": "noauth", "provider": "provider[\"registry.terraform.io/hashicorp/google\"]", - "instances": [] + "instances": [ + { + "schema_version": 0, + "attributes": { + "etag": "BwYbzoS1QOI=", + "id": "v1/projects/florencia-tryolabs-latam/locations/us-central1/services/latam-challenge", + "location": "us-central1", + "policy_data": "{\"bindings\":[{\"members\":[\"allUsers\"],\"role\":\"roles/run.invoker\"}]}", + "project": "florencia-tryolabs-latam", + "service": "v1/projects/florencia-tryolabs-latam/locations/us-central1/services/latam-challenge" + }, + "sensitive_attributes": [], + "private": "bnVsbA==", + "dependencies": [ + "data.google_iam_policy.noauth", + "google_cloud_run_service.latam_challenge_service" + ] + } + ] + }, + { + "mode": "managed", + "type": "google_storage_bucket", + "name": "bucket", + "provider": "provider[\"registry.terraform.io/hashicorp/google\"]", + "instances": [ + { + "schema_version": 1, + "attributes": { + "autoclass": [], + "cors": [], + "custom_placement_config": [], + "default_event_based_hold": false, + "effective_labels": {}, + "enable_object_retention": false, + "encryption": [], + "force_destroy": false, + "id": "latam-model-challenge", + "labels": null, + "lifecycle_rule": [], + "location": "US-CENTRAL1", + "logging": [], + "name": "latam-model-challenge", + "project": "florencia-tryolabs-latam", + "project_number": 307648342667, + "public_access_prevention": "inherited", + "requester_pays": false, + "retention_policy": [], + "rpo": null, + "self_link": "https://www.googleapis.com/storage/v1/b/latam-model-challenge", + "soft_delete_policy": [ + { + "effective_time": "2024-06-27T20:33:31.173Z", + "retention_duration_seconds": 604800 + } + ], + "storage_class": "STANDARD", + "terraform_labels": {}, + "timeouts": null, + "uniform_bucket_level_access": false, + "url": "gs://latam-model-challenge", + "versioning": [], + "website": [] + }, + "sensitive_attributes": [], + "private": "eyJlMmJmYjczMC1lY2FhLTExZTYtOGY4OC0zNDM2M2JjN2M0YzAiOnsiY3JlYXRlIjo2MDAwMDAwMDAwMDAsInJlYWQiOjI0MDAwMDAwMDAwMCwidXBkYXRlIjoyNDAwMDAwMDAwMDB9LCJzY2hlbWFfdmVyc2lvbiI6IjEifQ==" + } + ] } ], "check_results": null diff --git a/challenge/terraform/variables.tf b/challenge/terraform/variables.tf index 88528e3..d8091da 100644 --- a/challenge/terraform/variables.tf +++ b/challenge/terraform/variables.tf @@ -15,3 +15,9 @@ variable "image_url" { type = string default = "us-docker.pkg.dev/florencia-tryolabs-latam/florencia-repo-latam-challenge/latam-challenge" } + +variable "bucket_name" { + description = "Bucket to save model" + type = string + default = "latam-model-challenge" +} diff --git a/requirements.txt b/requirements.txt index 1405c5a..16c6c7e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,3 +5,5 @@ numpy~=1.22.4 pandas~=1.3.5 scikit-learn~=1.3.0 xgboost~=2.0.3 +google-cloud-storage~=2.17.0 +protobuf~=3.20.0 From f4413b68bce94d68cdd07c516a35e8def4bcb1e8 Mon Sep 17 00:00:00 2001 From: Florencia Date: Thu, 27 Jun 2024 23:02:59 +0200 Subject: [PATCH 2/3] Fix in ci --- .github/workflows/ci.yml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 20df210..447cc1f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -33,6 +33,13 @@ jobs: run: | python -m pip install --upgrade pip pip install -r requirements.txt -r requirements-dev.txt -r requirements-test.txt + + # Authenticate to Google Cloud + - id: 'auth' + name: 'Authenticate to Google Cloud' + uses: 'google-github-actions/auth@v1' + with: + credentials_json: '${{ secrets.GOOGLE_CREDENTIALS }}' # Run tests - name: Run tests From d173cf9bc43abe92349ba40ef20598b5f3f8fa68 Mon Sep 17 00:00:00 2001 From: Florencia Date: Thu, 27 Jun 2024 23:15:45 +0200 Subject: [PATCH 3/3] Removed training script no longer needed --- .github/workflows/cd.yml | 4 ++-- challenge/training.py | 19 ------------------- 2 files changed, 2 insertions(+), 21 deletions(-) delete mode 100644 challenge/training.py diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml index 7bedc36..44814c9 100644 --- a/.github/workflows/cd.yml +++ b/.github/workflows/cd.yml @@ -1,9 +1,9 @@ name: 'Continuous Delivery' on: - pull_request: #TODO: For testing + pull_request: branches: - - "develop" + - "main" jobs: build: diff --git a/challenge/training.py b/challenge/training.py deleted file mode 100644 index 9819400..0000000 --- a/challenge/training.py +++ /dev/null @@ -1,19 +0,0 @@ -import pandas as pd - -from sklearn.model_selection import train_test_split -from challenge.model import DelayModel - -model = DelayModel() -data = pd.read_csv(filepath_or_buffer="data/data.csv") - -features, target = model.preprocess( - data=data, - target_column="delay" -) - -_, features_validation, _, target_validation = train_test_split(features, target, test_size = 0.33, random_state = 42) - -model.fit( - features=features, - target=target -)