diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index d26fd3932..d1defc032 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -96,6 +96,9 @@ jobs: matrix: environment: [staging, prod] + # prevent deployment failure in an environment to interrupt other deployments + fail-fast: 1 + environment: ${{ matrix.environment }} env: @@ -103,6 +106,7 @@ jobs: AWS_ACCESS_KEY_ID: ${{ vars.AWS_ACCESS_KEY_ID }} AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} TF_VARS_BASE64: ${{ secrets.TF_VARS_BASE64 }} + TF_VAR_stack_version: ${{ github.sha }} defaults: run: @@ -122,15 +126,6 @@ jobs: | jq 'to_entries | map(.value // empty) | .[]' \ | xargs -I{} echo '::add-mask::{}' - # `TF_VAR_*` are case sensitive and must match the case of variables - - name: set tf variables - run: | - echo "${TF_VARS_BASE64}" \ - | base64 --decode \ - | jq 'to_entries | map("TF_VAR_\(.key)=\(.value // "")") | .[]' \ - | xargs -I{} echo '{}' \ - >> "${GITHUB_ENV}" - - name: tf init run: | terraform init \ @@ -143,8 +138,12 @@ jobs: - name: tf plan run: | + trap "rm -f terraform.tfvars.json" EXIT + echo "${TF_VARS_BASE64}" | base64 --decode > terraform.tfvars.json terraform plan -input=false - name: tf apply run: | + trap "rm -f terraform.tfvars.json" EXIT + echo "${TF_VARS_BASE64}" | base64 --decode > terraform.tfvars.json terraform apply -input=false -auto-approve diff --git a/deployment/main.tf b/deployment/main.tf index c68561467..5cc300f3b 100644 --- a/deployment/main.tf +++ b/deployment/main.tf @@ -19,17 +19,6 @@ resource "scaleway_instance_security_group" "main" { action = "accept" port = 443 } - inbound_rule { - action = "accept" - port = 5432 - } -} - -resource "scaleway_instance_volume" "main" { - count = var.environment == "prod" ? 1 : 0 - name = "data" - size_in_gb = 100 - type = "b_ssd" } resource "scaleway_instance_server" "main" { @@ -40,16 +29,47 @@ resource "scaleway_instance_server" "main" { security_group_id = scaleway_instance_security_group.main.id root_volume { + size_in_gb = var.environment == "prod" ? 150 : 50 delete_on_termination = false } - - additional_volume_ids = length(scaleway_instance_volume.main) != 0 ? [scaleway_instance_volume.main[0].id] : [] } resource "random_pet" "datalake_bucket_suffix" {} resource "scaleway_object_bucket" "main" { name = "data-inclusion-datalake-${var.environment}-${random_pet.datalake_bucket_suffix.id}" + + lifecycle_rule { + id = "archive-raw-data-after-30-days" + prefix = "data/raw" + enabled = true + + transition { + days = 30 + storage_class = "GLACIER" + } + } + + lifecycle_rule { + id = "archive-marts-data-after-7-days" + prefix = "data/marts" + enabled = true + + transition { + days = 7 + storage_class = "GLACIER" + } + } + + lifecycle_rule { + id = "expire-marts-data-after-30-days" + prefix = "data/marts" + enabled = true + + expiration { + days = 30 + } + } } data "scaleway_account_project" "main" { @@ -142,13 +162,18 @@ locals { } resource "scaleway_domain_record" "dns" { - for_each = toset([local.airflow_hostname]) + for_each = toset( + [ + "", + local.airflow_hostname + ] + ) dns_zone = var.dns_zone name = replace(each.key, ".${var.dns_zone}", "") type = "A" data = scaleway_instance_server.main.public_ip - ttl = 60 + ttl = 3600 } resource "null_resource" "up" { @@ -172,39 +197,39 @@ resource "null_resource" "up" { provisioner "file" { content = sensitive(<<-EOT - STACK_VERSION=${var.stack_version} - AIRFLOW_HOSTNAME=${local.airflow_hostname} + STACK_VERSION='${var.stack_version}' + AIRFLOW_HOSTNAME='${local.airflow_hostname}' # Datawarehouse - DATAWAREHOUSE_DI_DATABASE=${var.datawarehouse_di_database} - DATAWAREHOUSE_DI_PASSWORD=${var.datawarehouse_di_password} - DATAWAREHOUSE_DI_USERNAME=${var.datawarehouse_di_username} + DATAWAREHOUSE_DI_DATABASE='${var.datawarehouse_di_database}' + DATAWAREHOUSE_DI_PASSWORD='${var.datawarehouse_di_password}' + DATAWAREHOUSE_DI_USERNAME='${var.datawarehouse_di_username}' # Airflow settings - AIRFLOW_WWW_USER_PASSWORD=${var.airflow_admin_password} - AIRFLOW__CORE__FERNET_KEY=${var.airflow__core__fernet_key} - AIRFLOW__SENTRY__SENTRY_DSN=${var.airflow__sentry__sentry_dsn} - AIRFLOW__WEBSERVER__BASE_URL=https://${local.airflow_hostname} + AIRFLOW_WWW_USER_PASSWORD='${var.airflow_admin_password}' + AIRFLOW__CORE__FERNET_KEY='${var.airflow__core__fernet_key}' + AIRFLOW__SENTRY__SENTRY_DSN='${var.airflow__sentry__sentry_dsn}' + AIRFLOW__WEBSERVER__BASE_URL='https://${local.airflow_hostname}' # Airflow connections - AIRFLOW_CONN_MATTERMOST=${var.airflow_conn_mattermost} - AIRFLOW_CONN_PG_API=${var.airflow_conn_pg_api} - AIRFLOW_CONN_PG=${local.airflow_conn_pg} - AIRFLOW_CONN_S3_SOURCES=${var.airflow_conn_s3_sources} - AIRFLOW_CONN_S3=${local.airflow_conn_s3} - AIRFLOW_CONN_SSH_API=${var.airflow_conn_ssh_api} + AIRFLOW_CONN_MATTERMOST='${var.airflow_conn_mattermost}' + AIRFLOW_CONN_PG_API='${var.airflow_conn_pg_api}' + AIRFLOW_CONN_PG='${local.airflow_conn_pg}' + AIRFLOW_CONN_S3_SOURCES='${var.airflow_conn_s3_sources}' + AIRFLOW_CONN_S3='${local.airflow_conn_s3}' + AIRFLOW_CONN_SSH_API='${var.airflow_conn_ssh_api}' # Airflow variables - AIRFLOW_VAR_BREVO_API_KEY=${var.brevo_api_key} - AIRFLOW_VAR_DATAGOUV_API_KEY=${var.datagouv_api_key} - AIRFLOW_VAR_DORA_API_TOKEN=${var.dora_api_token} - AIRFLOW_VAR_DORA_API_URL=${var.dora_api_url} - AIRFLOW_VAR_EMPLOIS_API_TOKEN=${var.emplois_api_token} - AIRFLOW_VAR_FT_API_TOKEN=${var.ft_api_token} - AIRFLOW_VAR_GRIST_API_TOKEN=${var.grist_api_token} - AIRFLOW_VAR_MES_AIDES_AIRTABLE_KEY=${var.mes_aides_airtable_key} - AIRFLOW_VAR_SIAO_FILE_URL=${var.siao_file_url} - AIRFLOW_VAR_SOLIGUIDE_API_TOKEN=${var.soliguide_api_token} + AIRFLOW_VAR_BREVO_API_KEY='${var.brevo_api_key}' + AIRFLOW_VAR_DATAGOUV_API_KEY='${var.datagouv_api_key}' + AIRFLOW_VAR_DORA_API_TOKEN='${var.dora_api_token}' + AIRFLOW_VAR_DORA_API_URL='${var.dora_api_url}' + AIRFLOW_VAR_EMPLOIS_API_TOKEN='${var.emplois_api_token}' + AIRFLOW_VAR_FT_API_TOKEN='${var.ft_api_token}' + AIRFLOW_VAR_GRIST_API_TOKEN='${var.grist_api_token}' + AIRFLOW_VAR_MES_AIDES_AIRTABLE_KEY='${var.mes_aides_airtable_key}' + AIRFLOW_VAR_SIAO_FILE_URL='${var.siao_file_url}' + AIRFLOW_VAR_SOLIGUIDE_API_TOKEN='${var.soliguide_api_token}' EOT ) destination = "${local.work_dir}/.env" diff --git a/pipeline/Dockerfile b/pipeline/Dockerfile index 782fd82b9..7fa232dec 100644 --- a/pipeline/Dockerfile +++ b/pipeline/Dockerfile @@ -48,6 +48,9 @@ ENV PYTHONDONTWRITEBYTECODE 1 ENV AIRFLOW_VAR_DBT_PROJECT_DIR /opt/airflow/dbt +COPY requirements requirements +RUN pip install -r requirements/airflow/requirements.txt + USER root RUN apt-get update \ diff --git a/pipeline/defaults.env b/pipeline/defaults.env index ef5482af4..3d6db65f5 100644 --- a/pipeline/defaults.env +++ b/pipeline/defaults.env @@ -21,7 +21,7 @@ AIRFLOW_VAR_INSEE_COG_DATASET_URL=https://www.insee.fr/fr/statistiques/fichier/6 AIRFLOW_VAR_MEDNUM_API_URL=https://cartographie.societenumerique.gouv.fr/api/v0/ AIRFLOW_VAR_MES_AIDES_AIDES_URL=https://airtable.com/appRga7C9USklxYiV/tblcAC5yMV3Ftzv5c/viwMte3unsIYXxY9a AIRFLOW_VAR_MES_AIDES_GARAGES_URL=https://airtable.com/appRga7C9USklxYiV/tblfhYoBpcQoJwGIv/viwoJsw0vsAnU0fAo -AIRFLOW_VAR_MONENFANT_CRECHES_FILE_URL=https://data-inclusion-lake.s3.fr-par.scw.cloud/sources/monenfant/2023-06-14/creches.json +AIRFLOW_VAR_MONENFANT_CRECHES_FILE_URL=https://data-inclusion-datalake-prod-grand-titmouse.s3.fr-par.scw.cloud/sources/monenfant/2023-06-14/creches.json AIRFLOW_VAR_ODSPEP_S3_KEY_PREFIX=sources/odspep/2023-01-23/denormalized/Exports/ AIRFLOW_VAR_RESEAU_ALPHA_URL=https://www.reseau-alpha.org AIRFLOW_VAR_SIAO_FILE_URL=