From e3e9ff62de6d59107cd1df9af54b6f737fe22947 Mon Sep 17 00:00:00 2001 From: Valentin Matton Date: Mon, 16 Oct 2023 11:32:00 +0200 Subject: [PATCH] chore: set remaining variables --- .github/workflows/build.yml | 7 ++ .template.env | 100 ++---------------- deployment/main.tf | 48 +++++++++ .../modules/stack_data/docker-compose.yml | 28 ++--- deployment/modules/stack_data/main.tf | 44 ++++---- deployment/modules/stack_data/variables.tf | 41 +++++++ docker-compose.yml | 75 ++----------- pipeline/.gitignore | 2 + pipeline/Dockerfile | 4 +- pipeline/defaults.env | 76 +++++++++++++ 10 files changed, 236 insertions(+), 189 deletions(-) create mode 100644 pipeline/defaults.env diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 30b55cc91..6605bc532 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -120,7 +120,14 @@ jobs: TF_VAR_dns_subdomain: ${{ vars.DNS_SUBDOMAIN }} TF_VAR_airflow__core__fernet_key: ${{ secrets.AIRFLOW__CORE__FERNET_KEY }} TF_VAR_api_token_enabled: ${{ vars.api_token_enabled }} + TF_VAR_airflow_conn_s3_sources: ${{ secrets.AIRFLOW_CONN_S3_SOURCES }} + TF_VAR_datagouv_api_key: ${{ secrets.DATAGOUV_API_KEY }} + TF_VAR_dora_api_url: ${{ vars.DORA_API_URL }} TF_VAR_dora_api_token: ${{ secrets.DORA_API_TOKEN }} + TF_VAR_emplois_api_token: ${{ secrets.EMPLOIS_API_TOKEN }} + TF_VAR_grist_api_token: ${{ secrets.GRIST_API_TOKEN }} + TF_VAR_mes_aides_airtable_key: ${{ secrets.MES_AIDES_AIRTABLE_KEY }} + TF_VAR_soliguide_api_token: ${{ secrets.SOLIGUIDE_API_TOKEN }} ENV: ${{ vars.ENVIRONMENT }} volumes: - .:/deployment diff --git a/.template.env b/.template.env index 9bd28547d..6766d74fe 100644 --- a/.template.env +++ b/.template.env @@ -9,6 +9,16 @@ # cf https://airflow.apache.org/docs/apache-airflow/stable/howto/docker-compose/index.html#setting-the-right-airflow-user AIRFLOW_UID= +# airflow connections string *must* be urlencoded (using `urllib.parse.urlencode` for instance) + +AIRFLOW_CONN_S3_SOURCES= +AIRFLOW_VAR_DATAGOUV_API_KEY= +AIRFLOW_VAR_DORA_API_TOKEN= +AIRFLOW_VAR_EMPLOIS_API_TOKEN= +AIRFLOW_VAR_GRIST_API_TOKEN= +AIRFLOW_VAR_MES_AIDES_AIRTABLE_KEY= +AIRFLOW_VAR_SOLIGUIDE_API_TOKEN= + ### ### target-db ### @@ -30,93 +40,3 @@ SIRETISATION_UI_PORT=8005 SIRETISATION_UI_ENV=dev SIRETISATION_UI_DEBUG=True ANNUAIRE_ENTREPRISES_API_URL=https://recherche-entreprises.api.gouv.fr - -### -### sources -### - -# airflow connections string *must* be urlencoded (using `urllib.parse.urlencode` for instance) - -AGEFIPH_SERVICES_API_URL=https://www.agefiph.fr/jsonapi/node/aide_service -AGEFIPH_STRUCTURES_FILE_URL= -AIRFLOW_CONN_S3_SOURCES= -BAN_API_URL=https://api-adresse.data.gouv.fr -CD35_FILE_URL=https://data.ille-et-vilaine.fr/dataset/8d5ec0f0-ebe1-442d-9d99-655b37d5ad07/resource/8b781e9d-e11d-486c-98cf-0f63abfae8ed/download/annuaire_sociale_fixe.csv -CD72_STRUCTURES_FILE_URL=https://grist.incubateur.net/o/datainclusion/api/docs/dFpXXzs2fug9Kb7zZhyWyn/download/csv?tableId=Structures -CD72_SERVICES_FILE_URL=https://grist.incubateur.net/o/datainclusion/api/docs/dFpXXzs2fug9Kb7zZhyWyn/download/csv?tableId=Services -DI_EXTRA_SERVICES_FILE_URL=https://data-inclusion-lake.s3.fr-par.scw.cloud/sources/data-inclusion/2023-08-16/services.json -DI_EXTRA_STRUCTURES_FILE_URL=https://data-inclusion-lake.s3.fr-par.scw.cloud/sources/data-inclusion/2023-08-16/structures.json -DORA_API_TOKEN= -DORA_API_URL=https://api.dora.inclusion.beta.gouv.fr/api/v2/ -EMPLOIS_API_TOKEN= -EMPLOIS_API_URL=https://emplois.inclusion.beta.gouv.fr/api/v1/structures/ -ETAB_PUB_FILE_URL=https://www.data.gouv.fr/fr/datasets/r/73302880-e4df-4d4c-8676-1a61bb997f3d -FINESS_FILE_URL=https://www.data.gouv.fr/fr/datasets/r/3dc9b1d5-0157-440d-a7b5-c894fcfdfd45 -GRIST_API_TOKEN= -IGN_ADMIN_EXPRESS_FILE_URL=http://files.opendatarchives.fr/professionnels.ign.fr/adminexpress/ADMIN-EXPRESS-COG_3-0__SHP__FRA_WM_2021-05-19.7z -IMMERSION_FACILITEE_S3_KEY_PREFIX=sources/immersion-facilitee/2023-03-06/after-siretisation-auto/ -INSEE_FIRSTNAME_FILE_URL=https://www.insee.fr/fr/statistiques/fichier/2540004/nat2021_csv.zip -INSEE_COG_DATASET_URL=https://www.insee.fr/fr/statistiques/fichier/6800675 -MEDNUM_AIDANTS_CONNECT_DATASET_URL=https://www.data.gouv.fr/fr/datasets/64a7e1371873e4865272fbd8 -MEDNUM_ANGERS_DATASET_URL=https://www.data.gouv.fr/fr/datasets/63d13f5ec3e0721ea91a2e5d -MEDNUM_ASSEMBLEURS_DATASET_URL=https://www.data.gouv.fr/fr/datasets/63ca9667f0e2740ddfa1bf98 -MEDNUM_CD17_DATASET_URL=https://www.data.gouv.fr/fr/datasets/646dfe9eb1a95610f26aca49 -MEDNUM_CD23_DATASET_URL=https://www.data.gouv.fr/fr/datasets/641864db09184e5b19500970 -MEDNUM_CD28_APPUI_TERRITORIAL_DATASET_URL=https://www.data.gouv.fr/fr/datasets/6470869eb0d058e044cb5b81 -MEDNUM_CD33_DATASET_URL=https://www.data.gouv.fr/fr/datasets/645381a7d6b3815929f68b82 -MEDNUM_CD40_DATASET_URL=https://www.data.gouv.fr/fr/datasets/641860de365041f1d489eeff -MEDNUM_CD44_DATASET_URL=https://www.data.gouv.fr/fr/datasets/6413499019b0b5d458e4c145 -MEDNUM_CD49_DATASET_URL=https://www.data.gouv.fr/fr/datasets/63ca9664c2b40e3e08c20453 -MEDNUM_CD85_DATASET_URL=https://www.data.gouv.fr/fr/datasets/64a2d7ec70a28c9b4f145be0 -MEDNUM_CD87_DATASET_URL=https://www.data.gouv.fr/fr/datasets/6446555e15560fb6d85fede1 -MEDNUM_CONSEILLER_NUMERIQUE_DATASET_URL=https://www.data.gouv.fr/fr/datasets/63ca966a2facfcdb4dd3fa4f -MEDNUM_CONUMM_DATASET_URL=https://www.data.gouv.fr/fr/datasets/647d9dbd426d8423c5497de4 -MEDNUM_CR93_DATASET_URL=https://www.data.gouv.fr/fr/datasets/63fcd16c3a6713f39ccfaed1 -MEDNUM_ETAPES_NUMERIQUE_DATASET_URL=https://www.data.gouv.fr/fr/datasets/645a7b7ba4f1578539412fd7 -MEDNUM_FIBRE_64_DATASET_URL=https://www.data.gouv.fr/fr/datasets/644bc30fd6f232ae075a0fc2 -MEDNUM_FRANCE_SERVICES_DATASET_URL=https://www.data.gouv.fr/fr/datasets/63ca9666774b02d12be44596 -MEDNUM_FRANCE_TIERS_LIEUX_DATASET_URL=https://www.data.gouv.fr/fr/datasets/63d7de199bb6eb8f4786982b -MEDNUM_FRANCILIN_DATASET_URL=https://www.data.gouv.fr/fr/datasets/63d0fa06a3406203e0f16ed1 -MEDNUM_HINAURA_DATASET_URL=https://www.data.gouv.fr/fr/datasets/63ca966d774b02d12be44597 -MEDNUM_HUB_ANTILLES_DATASET_URL=https://www.data.gouv.fr/fr/datasets/644bc30fce70044e21ccefca -MEDNUM_HUB_LO_DATASET_URL=https://www.data.gouv.fr/fr/datasets/63fc697f74f28bd3729806e9 -MEDNUM_MULHOUSE_DATASET_URL=https://www.data.gouv.fr/fr/datasets/63f771187acc8da3c1143974 -MEDNUM_RES_IN_DATASET_URL=https://www.data.gouv.fr/fr/datasets/640a0753b2587b420259932e -MEDNUM_RHINOCC_DATASET_URL=https://www.data.gouv.fr/fr/datasets/6409d8ec539b90e1f25a174b -MEDNUM_ULTRA_NUMERIQUE_DATASET_URL=https://www.data.gouv.fr/fr/datasets/642eeced5b28a9521b5be918 -MES_AIDES_AIDES_URL=https://airtable.com/appoYjASNOp90Ryy5/tblN4m8Ayzxzgxl9W/viw7HRKMxq4hR2f27 -MES_AIDES_AIRTABLE_KEY= -MES_AIDES_GARAGES_URL=https://airtable.com/appEvva5gyqqoQRnr/tblnGf4Y5EUEeVHtJ/viw9ZZAUkexq6uDaI -MONENFANT_CRECHES_FILE_URL= -ODSPEP_S3_KEY_PREFIX=sources/odspep/2023-01-23/denormalized/Exports/ -RESEAU_ALPHA_URL=https://www.reseau-alpha.org -SIAO_FILE_URL= -SIRENE_STOCK_ETAB_GEOCODE_FILE_URL=https://data.cquest.org/geo_sirene/v2019/last/StockEtablissementActif_utf8_geo.csv.gz -SIRENE_STOCK_ETAB_HIST_FILE_URL=https://www.data.gouv.fr/fr/datasets/r/88fbb6b4-0320-443e-b739-b4376a012c32 -SIRENE_STOCK_ETAB_LIENS_SUCCESSION_URL=https://www.data.gouv.fr/fr/datasets/r/9c4d5d9c-4bbb-4b9c-837a-6155cb589e26 -SIRENE_STOCK_UNITE_LEGALE_FILE_URL=https://www.data.gouv.fr/fr/datasets/r/825f4199-cadd-486c-ac46-a65a8ea1a047 -SOLIGUIDE_API_TOKEN= -SOLIGUIDE_API_URL=https://api.soliguide.fr/ -UN_JEUNE_UNE_SOLUTION_API_URL=https://mes-aides.1jeune1solution.beta.gouv.fr/api/ - -### -### data.gouv -### - -DATAGOUV_API_KEY= -DATAGOUV_API_URL=https://www.data.gouv.fr/api/ -DATAGOUV_DI_DATASET_ID=6233723c2c1e4a54af2f6b2d -DATAGOUV_DI_RESOURCE_IDS=' -{ - "structures": { - "json": "4fc64287-e869-4550-8fb9-b1e0b7809ffa", - "csv": "fd4cb3ef-5c31-4c99-92fe-2cd8016c0ca5", - "xlsx": "fad88958-c9a7-4914-a9b8-89d1285c210a" - }, - "services": { - "json": "0eac1faa-66f9-4e49-8fb3-f0721027d89f", - "csv": "5abc151a-5729-4055-b0a9-d5691276f461", - "xlsx": "de2eb57b-113d-48eb-95d2-59a69ba36eb1" - }, -} -' \ No newline at end of file diff --git a/deployment/main.tf b/deployment/main.tf index a5033176e..e1dc547f6 100644 --- a/deployment/main.tf +++ b/deployment/main.tf @@ -110,12 +110,53 @@ variable "airflow__core__fernet_key" { sensitive = true } +variable "airflow_conn_s3_sources" { + description = "Used in extraction tasks orchestrated by airflow" + type = string + sensitive = true +} + +variable "datagouv_api_key" { + description = "Used in extraction tasks orchestrated by airflow" + type = string + sensitive = true +} + variable "dora_api_token" { description = "Used in extraction tasks orchestrated by airflow" type = string sensitive = true } +variable "dora_api_url" { + description = "Used in extraction tasks orchestrated by airflow" + sensitive = true +} + +variable "emplois_api_token" { + description = "Used in extraction tasks orchestrated by airflow" + type = string + sensitive = true +} + +variable "grist_api_token" { + description = "Used in extraction tasks orchestrated by airflow" + type = string + sensitive = true +} + +variable "mes_aides_airtable_key" { + description = "Used in extraction tasks orchestrated by airflow" + type = string + sensitive = true +} + +variable "soliguide_api_token" { + description = "Used in extraction tasks orchestrated by airflow" + type = string + sensitive = true +} + module "stack_data" { source = "./modules/stack_data" @@ -138,7 +179,14 @@ module "stack_data" { dns_zone = var.dns_zone dns_subdomain = var.dns_subdomain airflow__core__fernet_key = var.airflow__core__fernet_key + airflow_conn_s3_sources = var.airflow_conn_s3_sources + datagouv_api_key = var.datagouv_api_key dora_api_token = var.dora_api_token + dora_api_url = var.dora_api_url + emplois_api_token = var.emplois_api_token + grist_api_token = var.grist_api_token + mes_aides_airtable_key = var.mes_aides_airtable_key + soliguide_api_token = var.soliguide_api_token api_token_enabled = var.api_token_enabled } diff --git a/deployment/modules/stack_data/docker-compose.yml b/deployment/modules/stack_data/docker-compose.yml index d76ede9bb..73f34d8e2 100644 --- a/deployment/modules/stack_data/docker-compose.yml +++ b/deployment/modules/stack_data/docker-compose.yml @@ -17,24 +17,22 @@ x-airflow-common: AIRFLOW__WEBSERVER__BASE_URL: https://${AIRFLOW_HOSTNAME} # Connections - AIRFLOW_CONN_S3: ${AIRFLOW_CONN_S3} AIRFLOW_CONN_PG: ${AIRFLOW_CONN_PG} + AIRFLOW_CONN_S3: ${AIRFLOW_CONN_S3} + AIRFLOW_CONN_S3_SOURCES: ${AIRFLOW_CONN_S3_SOURCES} # Variables - AIRFLOW_VAR_DBT_PROJECT_DIR: /opt/airflow/dbt - AIRFLOW_VAR_DBT_TARGET_PATH: /opt/airflow/dbt-runtime/target + AIRFLOW_VAR_DATAGOUV_API_KEY: ${AIRFLOW_VAR_DATAGOUV_API_KEY} + AIRFLOW_VAR_DORA_API_TOKEN: ${AIRFLOW_VAR_DORA_API_TOKEN} + AIRFLOW_VAR_EMPLOIS_API_TOKEN: ${AIRFLOW_VAR_EMPLOIS_API_TOKEN} + AIRFLOW_VAR_GRIST_API_TOKEN: ${AIRFLOW_VAR_GRIST_API_TOKEN} + AIRFLOW_VAR_MES_AIDES_AIRTABLE_KEY: ${AIRFLOW_VAR_MES_AIDES_AIRTABLE_KEY} + AIRFLOW_VAR_SOLIGUIDE_API_TOKEN: ${AIRFLOW_VAR_SOLIGUIDE_API_TOKEN} + + AIRFLOW_VAR_DORA_API_URL: ${AIRFLOW_VAR_DORA_API_URL} + AIRFLOW_VAR_DBT_LOG_PATH: /opt/airflow/dbt-runtime/logs - AIRFLOW_VAR_BAN_API_URL: ${BAN_API_URL} - AIRFLOW_VAR_DORA_API_URL: ${DORA_API_URL} - AIRFLOW_VAR_DORA_API_TOKEN: ${DORA_API_TOKEN} - AIRFLOW_VAR_IGN_ADMIN_EXPRESS_FILE_URL: ${IGN_ADMIN_EXPRESS_FILE_URL} - AIRFLOW_VAR_INSEE_FIRSTNAME_FILE_URL: ${INSEE_FIRSTNAME_FILE_URL} - AIRFLOW_VAR_INSEE_COG_DATASET_URL: ${INSEE_COG_DATASET_URL} - AIRFLOW_VAR_SIRENE_STOCK_ETAB_GEOCODE_FILE_URL: ${SIRENE_STOCK_ETAB_GEOCODE_FILE_URL} - AIRFLOW_VAR_SIRENE_STOCK_ETAB_HIST_FILE_URL: ${SIRENE_STOCK_ETAB_HIST_FILE_URL} - AIRFLOW_VAR_SIRENE_STOCK_ETAB_LIENS_SUCCESSION_URL: ${SIRENE_STOCK_ETAB_LIENS_SUCCESSION_URL} - AIRFLOW_VAR_SIRENE_STOCK_UNITE_LEGALE_FILE_URL: ${SIRENE_STOCK_UNITE_LEGALE_FILE_URL} - AIRFLOW_VAR_UN_JEUNE_UNE_SOLUTION_API_URL: ${UN_JEUNE_UNE_SOLUTION_API_URL} + AIRFLOW_VAR_DBT_TARGET_PATH: /opt/airflow/dbt-runtime/target volumes: - airflow-logs:/opt/airflow/logs @@ -109,6 +107,8 @@ services: <<: *airflow-common-depends-on airflow-init: condition: service_completed_successfully + env_file: + - ./defaults.env datawarehouse: image: rg.fr-par.scw.cloud/data-inclusion/data-inclusion-datawarehouse:${STACK_VERSION} diff --git a/deployment/modules/stack_data/main.tf b/deployment/modules/stack_data/main.tf index efe8c7e7a..b9ca906b1 100644 --- a/deployment/modules/stack_data/main.tf +++ b/deployment/modules/stack_data/main.tf @@ -157,29 +157,32 @@ resource "null_resource" "up" { provisioner "file" { content = sensitive(<<-EOT - STACK_VERSION=${var.stack_version} + API_HOSTNAME=${local.api_hostname} + API_SECRET_KEY=${var.api_secret_key} + API_TOKEN_ENABLED=${var.api_token_enabled} + + # common configuration + AIRFLOW__CORE__FERNET_KEY=${var.airflow__core__fernet_key} AIRFLOW_CONN_PG=${local.airflow_conn_pg} AIRFLOW_CONN_S3=${local.airflow_conn_s3} - AIRFLOW__CORE__FERNET_KEY=${var.airflow__core__fernet_key} + AIRFLOW_HOSTNAME=${local.airflow_hostname} AIRFLOW_WWW_USER_PASSWORD=${var.airflow_admin_password} DATAWAREHOUSE_DI_DATABASE=${var.datawarehouse_di_database} - DATAWAREHOUSE_DI_USERNAME=${var.datawarehouse_di_username} DATAWAREHOUSE_DI_PASSWORD=${var.datawarehouse_di_password} - API_SECRET_KEY=${var.api_secret_key} - BAN_API_URL=https://api-adresse.data.gouv.fr - DORA_API_URL=https://api.dora.incubateur.net/api/v2/ - DORA_API_TOKEN=${var.dora_api_token} - IGN_ADMIN_EXPRESS_FILE_URL=http://files.opendatarchives.fr/professionnels.ign.fr/adminexpress/ADMIN-EXPRESS-COG_3-0__SHP__FRA_WM_2021-05-19.7z - INSEE_FIRSTNAME_FILE_URL=https://www.insee.fr/fr/statistiques/fichier/2540004/nat2021_csv.zip - INSEE_COG_DATASET_URL=https://www.insee.fr/fr/statistiques/fichier/6800675 - SIRENE_STOCK_ETAB_GEOCODE_FILE_URL=https://data.cquest.org/geo_sirene/v2019/last/StockEtablissementActif_utf8_geo.csv.gz - SIRENE_STOCK_ETAB_HIST_FILE_URL=https://www.data.gouv.fr/fr/datasets/r/88fbb6b4-0320-443e-b739-b4376a012c32 - SIRENE_STOCK_ETAB_LIENS_SUCCESSION_URL=https://www.data.gouv.fr/fr/datasets/r/9c4d5d9c-4bbb-4b9c-837a-6155cb589e26 - SIRENE_STOCK_UNITE_LEGALE_FILE_URL=https://www.data.gouv.fr/fr/datasets/r/825f4199-cadd-486c-ac46-a65a8ea1a047 - UN_JEUNE_UNE_SOLUTION_API_URL=https://mes-aides.1jeune1solution.beta.gouv.fr/api/ - AIRFLOW_HOSTNAME=${local.airflow_hostname} - API_HOSTNAME=${local.api_hostname} - API_TOKEN_ENABLED=${var.api_token_enabled} + DATAWAREHOUSE_DI_USERNAME=${var.datawarehouse_di_username} + STACK_VERSION=${var.stack_version} + + # pipeline secrets + AIRFLOW_CONN_S3_SOURCES=${var.airflow_conn_s3_sources} + AIRFLOW_VAR_DATAGOUV_API_KEY=${var.datagouv_api_key} + AIRFLOW_VAR_DORA_API_TOKEN=${var.dora_api_token} + AIRFLOW_VAR_EMPLOIS_API_TOKEN=${var.emplois_api_token} + AIRFLOW_VAR_GRIST_API_TOKEN=${var.grist_api_token} + AIRFLOW_VAR_MES_AIDES_AIRTABLE_KEY=${var.mes_aides_airtable_key} + AIRFLOW_VAR_SOLIGUIDE_API_TOKEN=${var.soliguide_api_token} + + # overrides + AIRFLOW_VAR_DORA_API_URL=${var.dora_api_url} EOT ) destination = "${local.work_dir}/.env" @@ -190,6 +193,11 @@ resource "null_resource" "up" { destination = "${local.work_dir}/docker-compose.yml" } + provisioner "file" { + source = "${path.root}/../pipeline/defaults.env" + destination = "${local.work_dir}/defaults.env" + } + provisioner "remote-exec" { inline = [ "cd ${local.work_dir}", diff --git a/deployment/modules/stack_data/variables.tf b/deployment/modules/stack_data/variables.tf index 269eacfbf..d63812b8e 100644 --- a/deployment/modules/stack_data/variables.tf +++ b/deployment/modules/stack_data/variables.tf @@ -105,12 +105,53 @@ variable "airflow__core__fernet_key" { sensitive = true } +variable "airflow_conn_s3_sources" { + description = "Used in extraction tasks orchestrated by airflow" + type = string + sensitive = true +} + +variable "datagouv_api_key" { + description = "Used in extraction tasks orchestrated by airflow" + type = string + sensitive = true +} + variable "dora_api_token" { description = "Used in extraction tasks orchestrated by airflow" type = string sensitive = true } +variable "dora_api_url" { + description = "Used in extraction tasks orchestrated by airflow" + sensitive = true +} + +variable "emplois_api_token" { + description = "Used in extraction tasks orchestrated by airflow" + type = string + sensitive = true +} + +variable "grist_api_token" { + description = "Used in extraction tasks orchestrated by airflow" + type = string + sensitive = true +} + +variable "mes_aides_airtable_key" { + description = "Used in extraction tasks orchestrated by airflow" + type = string + sensitive = true +} + +variable "soliguide_api_token" { + description = "Used in extraction tasks orchestrated by airflow" + type = string + sensitive = true +} + variable "api_token_enabled" { description = "Whether to enable the api token auth or not" type = string diff --git a/docker-compose.yml b/docker-compose.yml index 80c811cef..a0799b12b 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -15,76 +15,17 @@ x-airflow-common: AIRFLOW__WEBSERVER__WORKERS: 1 # Connections + AIRFLOW_CONN_PG: postgresql://data-inclusion:data-inclusion@target-db:5432/data-inclusion AIRFLOW_CONN_S3: aws://@/data-inclusion-lake?endpoint_url=http%3A%2F%2Fminio%3A9000&aws_access_key_id=minioadmin&aws_secret_access_key=minioadmin AIRFLOW_CONN_S3_SOURCES: ${AIRFLOW_CONN_S3_SOURCES} - AIRFLOW_CONN_PG: postgresql://data-inclusion:data-inclusion@target-db:5432/data-inclusion # Variables - AIRFLOW_VAR_AGEFIPH_SERVICES_API_URL: ${AGEFIPH_SERVICES_API_URL} - AIRFLOW_VAR_AGEFIPH_STRUCTURES_FILE_URL: ${AGEFIPH_STRUCTURES_FILE_URL} - AIRFLOW_VAR_DBT_PROJECT_DIR: /opt/airflow/dbt - AIRFLOW_VAR_BAN_API_URL: ${BAN_API_URL} - AIRFLOW_VAR_CD35_FILE_URL: ${CD35_FILE_URL} - AIRFLOW_VAR_CD72_STRUCTURES_FILE_URL: ${CD72_STRUCTURES_FILE_URL} - AIRFLOW_VAR_CD72_SERVICES_FILE_URL: ${CD72_SERVICES_FILE_URL} - AIRFLOW_VAR_DATAGOUV_API_KEY: ${DATAGOUV_API_KEY} - AIRFLOW_VAR_DATAGOUV_API_URL: ${DATAGOUV_API_URL} - AIRFLOW_VAR_DATAGOUV_DI_DATASET_ID: ${DATAGOUV_DI_DATASET_ID} - AIRFLOW_VAR_DATAGOUV_DI_RESOURCE_IDS: ${DATAGOUV_DI_RESOURCE_IDS} - AIRFLOW_VAR_DI_EXTRA_SERVICES_FILE_URL: ${DI_EXTRA_SERVICES_FILE_URL} - AIRFLOW_VAR_DI_EXTRA_STRUCTURES_FILE_URL: ${DI_EXTRA_STRUCTURES_FILE_URL} - AIRFLOW_VAR_DORA_API_TOKEN: ${DORA_API_TOKEN} - AIRFLOW_VAR_DORA_API_URL: ${DORA_API_URL} - AIRFLOW_VAR_EMPLOIS_API_TOKEN: ${EMPLOIS_API_TOKEN} - AIRFLOW_VAR_EMPLOIS_API_URL: ${EMPLOIS_API_URL} - AIRFLOW_VAR_ETAB_PUB_FILE_URL: ${ETAB_PUB_FILE_URL} - AIRFLOW_VAR_FINESS_FILE_URL: ${FINESS_FILE_URL} - AIRFLOW_VAR_GRIST_API_TOKEN: ${GRIST_API_TOKEN} - AIRFLOW_VAR_IGN_ADMIN_EXPRESS_FILE_URL: ${IGN_ADMIN_EXPRESS_FILE_URL} - AIRFLOW_VAR_IMMERSION_FACILITEE_S3_KEY_PREFIX: ${IMMERSION_FACILITEE_S3_KEY_PREFIX} - AIRFLOW_VAR_INSEE_FIRSTNAME_FILE_URL: ${INSEE_FIRSTNAME_FILE_URL} - AIRFLOW_VAR_INSEE_COG_DATASET_URL: ${INSEE_COG_DATASET_URL} - AIRFLOW_VAR_MEDNUM_AIDANTS_CONNECT_DATASET_URL: ${MEDNUM_AIDANTS_CONNECT_DATASET_URL} - AIRFLOW_VAR_MEDNUM_ANGERS_DATASET_URL: ${MEDNUM_ANGERS_DATASET_URL} - AIRFLOW_VAR_MEDNUM_ASSEMBLEURS_DATASET_URL: ${MEDNUM_ASSEMBLEURS_DATASET_URL} - AIRFLOW_VAR_MEDNUM_CD17_DATASET_URL: ${MEDNUM_CD17_DATASET_URL} - AIRFLOW_VAR_MEDNUM_CD23_DATASET_URL: ${MEDNUM_CD23_DATASET_URL} - AIRFLOW_VAR_MEDNUM_CD28_APPUI_TERRITORIAL_DATASET_URL: ${MEDNUM_CD28_APPUI_TERRITORIAL_DATASET_URL} - AIRFLOW_VAR_MEDNUM_CD33_DATASET_URL: ${MEDNUM_CD33_DATASET_URL} - AIRFLOW_VAR_MEDNUM_CD40_DATASET_URL: ${MEDNUM_CD40_DATASET_URL} - AIRFLOW_VAR_MEDNUM_CD44_DATASET_URL: ${MEDNUM_CD44_DATASET_URL} - AIRFLOW_VAR_MEDNUM_CD49_DATASET_URL: ${MEDNUM_CD49_DATASET_URL} - AIRFLOW_VAR_MEDNUM_CD85_DATASET_URL: ${MEDNUM_CD85_DATASET_URL} - AIRFLOW_VAR_MEDNUM_CD87_DATASET_URL: ${MEDNUM_CD87_DATASET_URL} - AIRFLOW_VAR_MEDNUM_CONSEILLER_NUMERIQUE_DATASET_URL: ${MEDNUM_CONSEILLER_NUMERIQUE_DATASET_URL} - AIRFLOW_VAR_MEDNUM_CONUMM_DATASET_URL: ${MEDNUM_CONUMM_DATASET_URL} - AIRFLOW_VAR_MEDNUM_CR93_DATASET_URL: ${MEDNUM_CR93_DATASET_URL} - AIRFLOW_VAR_MEDNUM_ETAPES_NUMERIQUE_DATASET_URL: ${MEDNUM_ETAPES_NUMERIQUE_DATASET_URL} - AIRFLOW_VAR_MEDNUM_FIBRE_64_DATASET_URL: ${MEDNUM_FIBRE_64_DATASET_URL} - AIRFLOW_VAR_MEDNUM_FRANCE_SERVICES_DATASET_URL: ${MEDNUM_FRANCE_SERVICES_DATASET_URL} - AIRFLOW_VAR_MEDNUM_FRANCE_TIERS_LIEUX_DATASET_URL: ${MEDNUM_FRANCE_TIERS_LIEUX_DATASET_URL} - AIRFLOW_VAR_MEDNUM_FRANCILIN_DATASET_URL: ${MEDNUM_FRANCILIN_DATASET_URL} - AIRFLOW_VAR_MEDNUM_HINAURA_DATASET_URL: ${MEDNUM_HINAURA_DATASET_URL} - AIRFLOW_VAR_MEDNUM_HUB_ANTILLES_DATASET_URL: ${MEDNUM_HUB_ANTILLES_DATASET_URL} - AIRFLOW_VAR_MEDNUM_HUB_LO_DATASET_URL: ${MEDNUM_HUB_LO_DATASET_URL} - AIRFLOW_VAR_MEDNUM_MULHOUSE_DATASET_URL: ${MEDNUM_MULHOUSE_DATASET_URL} - AIRFLOW_VAR_MEDNUM_RES_IN_DATASET_URL: ${MEDNUM_RES_IN_DATASET_URL} - AIRFLOW_VAR_MEDNUM_RHINOCC_DATASET_URL: ${MEDNUM_RHINOCC_DATASET_URL} - AIRFLOW_VAR_MEDNUM_ULTRA_NUMERIQUE_DATASET_URL: ${MEDNUM_ULTRA_NUMERIQUE_DATASET_URL} - AIRFLOW_VAR_MES_AIDES_AIDES_URL: ${MES_AIDES_AIDES_URL} - AIRFLOW_VAR_MES_AIDES_AIRTABLE_KEY: ${MES_AIDES_AIRTABLE_KEY} - AIRFLOW_VAR_MES_AIDES_GARAGES_URL: ${MES_AIDES_GARAGES_URL} - AIRFLOW_VAR_MONENFANT_CRECHES_FILE_URL: ${MONENFANT_CRECHES_FILE_URL} - AIRFLOW_VAR_ODSPEP_S3_KEY_PREFIX: ${ODSPEP_S3_KEY_PREFIX} - AIRFLOW_VAR_SIAO_FILE_URL: ${SIAO_FILE_URL} - AIRFLOW_VAR_SIRENE_STOCK_ETAB_GEOCODE_FILE_URL: ${SIRENE_STOCK_ETAB_GEOCODE_FILE_URL} - AIRFLOW_VAR_SIRENE_STOCK_ETAB_HIST_FILE_URL: ${SIRENE_STOCK_ETAB_HIST_FILE_URL} - AIRFLOW_VAR_SIRENE_STOCK_ETAB_LIENS_SUCCESSION_URL: ${SIRENE_STOCK_ETAB_LIENS_SUCCESSION_URL} - AIRFLOW_VAR_SIRENE_STOCK_UNITE_LEGALE_FILE_URL: ${SIRENE_STOCK_UNITE_LEGALE_FILE_URL} - AIRFLOW_VAR_SOLIGUIDE_API_TOKEN: ${SOLIGUIDE_API_TOKEN} - AIRFLOW_VAR_SOLIGUIDE_API_URL: ${SOLIGUIDE_API_URL} - AIRFLOW_VAR_UN_JEUNE_UNE_SOLUTION_API_URL: ${UN_JEUNE_UNE_SOLUTION_API_URL} - AIRFLOW_VAR_RESEAU_ALPHA_URL: ${RESEAU_ALPHA_URL} + AIRFLOW_VAR_DATAGOUV_API_KEY: ${AIRFLOW_VAR_DATAGOUV_API_KEY} + AIRFLOW_VAR_DORA_API_TOKEN: ${AIRFLOW_VAR_DORA_API_TOKEN} + AIRFLOW_VAR_EMPLOIS_API_TOKEN: ${AIRFLOW_VAR_EMPLOIS_API_TOKEN} + AIRFLOW_VAR_GRIST_API_TOKEN: ${AIRFLOW_VAR_GRIST_API_TOKEN} + AIRFLOW_VAR_MES_AIDES_AIRTABLE_KEY: ${AIRFLOW_VAR_MES_AIDES_AIRTABLE_KEY} + AIRFLOW_VAR_SOLIGUIDE_API_TOKEN: ${AIRFLOW_VAR_SOLIGUIDE_API_TOKEN} volumes: - ./pipeline/dbt:/opt/airflow/dbt @@ -144,6 +85,8 @@ services: <<: *airflow-common-depends-on airflow-init: condition: service_completed_successfully + env_file: + - ./pipeline/defaults.env airflow-init: <<: *airflow-common diff --git a/pipeline/.gitignore b/pipeline/.gitignore index b836e2137..59c1c8b6b 100644 --- a/pipeline/.gitignore +++ b/pipeline/.gitignore @@ -1,2 +1,4 @@ # Airflow temporary artifacts logs/ + +!defaults.env \ No newline at end of file diff --git a/pipeline/Dockerfile b/pipeline/Dockerfile index 03c16d961..a85631f91 100644 --- a/pipeline/Dockerfile +++ b/pipeline/Dockerfile @@ -46,6 +46,8 @@ FROM apache/airflow:2.7.0-python3.10 ENV PYTHONUNBUFFERED 1 ENV PYTHONDONTWRITEBYTECODE 1 +ENV AIRFLOW_VAR_DBT_PROJECT_DIR /opt/airflow/dbt + USER root RUN apt-get update \ @@ -64,7 +66,7 @@ COPY --chown=airflow:0 --from=compile-image /opt/airflow/venvs /opt/airflow/venv COPY --chown=airflow:0 dags /opt/airflow/dags COPY --chown=airflow:0 src /opt/airflow/src -COPY --chown=airflow:0 dbt /opt/airflow/dbt +COPY --chown=airflow:0 dbt ${AIRFLOW_VAR_DBT_PROJECT_DIR} RUN /opt/airflow/venvs/dbt/venv/bin/dbt deps --project-dir /opt/airflow/dbt diff --git a/pipeline/defaults.env b/pipeline/defaults.env new file mode 100644 index 000000000..bae548769 --- /dev/null +++ b/pipeline/defaults.env @@ -0,0 +1,76 @@ +# Public variables +# Do not add secrets to this file + +AIRFLOW_VAR_AGEFIPH_SERVICES_API_URL=https://www.agefiph.fr/jsonapi/node/aide_service +AIRFLOW_VAR_AGEFIPH_STRUCTURES_FILE_URL= +AIRFLOW_VAR_BAN_API_URL=https://api-adresse.data.gouv.fr +AIRFLOW_VAR_CD35_FILE_URL=https://data.ille-et-vilaine.fr/dataset/8d5ec0f0-ebe1-442d-9d99-655b37d5ad07/resource/8b781e9d-e11d-486c-98cf-0f63abfae8ed/download/annuaire_sociale_fixe.csv +AIRFLOW_VAR_CD72_STRUCTURES_FILE_URL=https://grist.incubateur.net/o/datainclusion/api/docs/dFpXXzs2fug9Kb7zZhyWyn/download/csv?tableId=Structures +AIRFLOW_VAR_CD72_SERVICES_FILE_URL=https://grist.incubateur.net/o/datainclusion/api/docs/dFpXXzs2fug9Kb7zZhyWyn/download/csv?tableId=Services +AIRFLOW_VAR_DI_EXTRA_SERVICES_FILE_URL=https://data-inclusion-lake.s3.fr-par.scw.cloud/sources/data-inclusion/2023-08-16/services.json +AIRFLOW_VAR_DI_EXTRA_STRUCTURES_FILE_URL=https://data-inclusion-lake.s3.fr-par.scw.cloud/sources/data-inclusion/2023-08-16/structures.json +AIRFLOW_VAR_DORA_API_URL=https://api.dora.inclusion.beta.gouv.fr/api/v2/ +AIRFLOW_VAR_EMPLOIS_API_URL=https://emplois.inclusion.beta.gouv.fr/api/v1/structures/ +AIRFLOW_VAR_ETAB_PUB_FILE_URL=https://www.data.gouv.fr/fr/datasets/r/73302880-e4df-4d4c-8676-1a61bb997f3d +AIRFLOW_VAR_FINESS_FILE_URL=https://www.data.gouv.fr/fr/datasets/r/3dc9b1d5-0157-440d-a7b5-c894fcfdfd45 +AIRFLOW_VAR_IGN_ADMIN_EXPRESS_FILE_URL=http://files.opendatarchives.fr/professionnels.ign.fr/adminexpress/ADMIN-EXPRESS-COG_3-0__SHP__FRA_WM_2021-05-19.7z +AIRFLOW_VAR_IMMERSION_FACILITEE_S3_KEY_PREFIX=sources/immersion-facilitee/2023-03-06/after-siretisation-auto/ +AIRFLOW_VAR_INSEE_FIRSTNAME_FILE_URL=https://www.insee.fr/fr/statistiques/fichier/2540004/nat2021_csv.zip +AIRFLOW_VAR_INSEE_COG_DATASET_URL=https://www.insee.fr/fr/statistiques/fichier/6800675 +AIRFLOW_VAR_MEDNUM_AIDANTS_CONNECT_DATASET_URL=https://www.data.gouv.fr/fr/datasets/64a7e1371873e4865272fbd8 +AIRFLOW_VAR_MEDNUM_ANGERS_DATASET_URL=https://www.data.gouv.fr/fr/datasets/63d13f5ec3e0721ea91a2e5d +AIRFLOW_VAR_MEDNUM_ASSEMBLEURS_DATASET_URL=https://www.data.gouv.fr/fr/datasets/63ca9667f0e2740ddfa1bf98 +AIRFLOW_VAR_MEDNUM_CD17_DATASET_URL=https://www.data.gouv.fr/fr/datasets/646dfe9eb1a95610f26aca49 +AIRFLOW_VAR_MEDNUM_CD23_DATASET_URL=https://www.data.gouv.fr/fr/datasets/641864db09184e5b19500970 +AIRFLOW_VAR_MEDNUM_CD28_APPUI_TERRITORIAL_DATASET_URL=https://www.data.gouv.fr/fr/datasets/6470869eb0d058e044cb5b81 +AIRFLOW_VAR_MEDNUM_CD33_DATASET_URL=https://www.data.gouv.fr/fr/datasets/645381a7d6b3815929f68b82 +AIRFLOW_VAR_MEDNUM_CD40_DATASET_URL=https://www.data.gouv.fr/fr/datasets/641860de365041f1d489eeff +AIRFLOW_VAR_MEDNUM_CD44_DATASET_URL=https://www.data.gouv.fr/fr/datasets/6413499019b0b5d458e4c145 +AIRFLOW_VAR_MEDNUM_CD49_DATASET_URL=https://www.data.gouv.fr/fr/datasets/63ca9664c2b40e3e08c20453 +AIRFLOW_VAR_MEDNUM_CD85_DATASET_URL=https://www.data.gouv.fr/fr/datasets/64a2d7ec70a28c9b4f145be0 +AIRFLOW_VAR_MEDNUM_CD87_DATASET_URL=https://www.data.gouv.fr/fr/datasets/6446555e15560fb6d85fede1 +AIRFLOW_VAR_MEDNUM_CONSEILLER_NUMERIQUE_DATASET_URL=https://www.data.gouv.fr/fr/datasets/63ca966a2facfcdb4dd3fa4f +AIRFLOW_VAR_MEDNUM_CONUMM_DATASET_URL=https://www.data.gouv.fr/fr/datasets/647d9dbd426d8423c5497de4 +AIRFLOW_VAR_MEDNUM_CR93_DATASET_URL=https://www.data.gouv.fr/fr/datasets/63fcd16c3a6713f39ccfaed1 +AIRFLOW_VAR_MEDNUM_ETAPES_NUMERIQUE_DATASET_URL=https://www.data.gouv.fr/fr/datasets/645a7b7ba4f1578539412fd7 +AIRFLOW_VAR_MEDNUM_FIBRE_64_DATASET_URL=https://www.data.gouv.fr/fr/datasets/644bc30fd6f232ae075a0fc2 +AIRFLOW_VAR_MEDNUM_FRANCE_SERVICES_DATASET_URL=https://www.data.gouv.fr/fr/datasets/63ca9666774b02d12be44596 +AIRFLOW_VAR_MEDNUM_FRANCE_TIERS_LIEUX_DATASET_URL=https://www.data.gouv.fr/fr/datasets/63d7de199bb6eb8f4786982b +AIRFLOW_VAR_MEDNUM_FRANCILIN_DATASET_URL=https://www.data.gouv.fr/fr/datasets/63d0fa06a3406203e0f16ed1 +AIRFLOW_VAR_MEDNUM_HINAURA_DATASET_URL=https://www.data.gouv.fr/fr/datasets/63ca966d774b02d12be44597 +AIRFLOW_VAR_MEDNUM_HUB_ANTILLES_DATASET_URL=https://www.data.gouv.fr/fr/datasets/644bc30fce70044e21ccefca +AIRFLOW_VAR_MEDNUM_HUB_LO_DATASET_URL=https://www.data.gouv.fr/fr/datasets/63fc697f74f28bd3729806e9 +AIRFLOW_VAR_MEDNUM_MULHOUSE_DATASET_URL=https://www.data.gouv.fr/fr/datasets/63f771187acc8da3c1143974 +AIRFLOW_VAR_MEDNUM_RES_IN_DATASET_URL=https://www.data.gouv.fr/fr/datasets/640a0753b2587b420259932e +AIRFLOW_VAR_MEDNUM_RHINOCC_DATASET_URL=https://www.data.gouv.fr/fr/datasets/6409d8ec539b90e1f25a174b +AIRFLOW_VAR_MEDNUM_ULTRA_NUMERIQUE_DATASET_URL=https://www.data.gouv.fr/fr/datasets/642eeced5b28a9521b5be918 +AIRFLOW_VAR_MES_AIDES_AIDES_URL=https://airtable.com/appoYjASNOp90Ryy5/tblN4m8Ayzxzgxl9W/viw7HRKMxq4hR2f27 +AIRFLOW_VAR_MES_AIDES_GARAGES_URL=https://airtable.com/appEvva5gyqqoQRnr/tblnGf4Y5EUEeVHtJ/viw9ZZAUkexq6uDaI +AIRFLOW_VAR_MONENFANT_CRECHES_FILE_URL= +AIRFLOW_VAR_ODSPEP_S3_KEY_PREFIX=sources/odspep/2023-01-23/denormalized/Exports/ +AIRFLOW_VAR_RESEAU_ALPHA_URL=https://www.reseau-alpha.org +AIRFLOW_VAR_SIAO_FILE_URL= +AIRFLOW_VAR_SIRENE_STOCK_ETAB_GEOCODE_FILE_URL=https://data.cquest.org/geo_sirene/v2019/last/StockEtablissementActif_utf8_geo.csv.gz +AIRFLOW_VAR_SIRENE_STOCK_ETAB_HIST_FILE_URL=https://www.data.gouv.fr/fr/datasets/r/88fbb6b4-0320-443e-b739-b4376a012c32 +AIRFLOW_VAR_SIRENE_STOCK_ETAB_LIENS_SUCCESSION_URL=https://www.data.gouv.fr/fr/datasets/r/9c4d5d9c-4bbb-4b9c-837a-6155cb589e26 +AIRFLOW_VAR_SIRENE_STOCK_UNITE_LEGALE_FILE_URL=https://www.data.gouv.fr/fr/datasets/r/825f4199-cadd-486c-ac46-a65a8ea1a047 +AIRFLOW_VAR_SOLIGUIDE_API_URL=https://api.soliguide.fr/ +AIRFLOW_VAR_UN_JEUNE_UNE_SOLUTION_API_URL=https://mes-aides.1jeune1solution.beta.gouv.fr/api/ + + +AIRFLOW_VAR_DATAGOUV_API_URL=https://www.data.gouv.fr/api/ +AIRFLOW_VAR_DATAGOUV_DI_DATASET_ID=6233723c2c1e4a54af2f6b2d +AIRFLOW_VAR_DATAGOUV_DI_RESOURCE_IDS=' +{ + "structures": { + "json": "4fc64287-e869-4550-8fb9-b1e0b7809ffa", + "csv": "fd4cb3ef-5c31-4c99-92fe-2cd8016c0ca5", + "xlsx": "fad88958-c9a7-4914-a9b8-89d1285c210a" + }, + "services": { + "json": "0eac1faa-66f9-4e49-8fb3-f0721027d89f", + "csv": "5abc151a-5729-4055-b0a9-d5691276f461", + "xlsx": "de2eb57b-113d-48eb-95d2-59a69ba36eb1" + } +} +' \ No newline at end of file