Skip to content

Commit

Permalink
Merge branch 'main' into vmttn/feat/provision-terraform-scaleway
Browse files Browse the repository at this point in the history
  • Loading branch information
vmttn committed Sep 29, 2023
2 parents 1e5088a + 5f0faff commit cdbfd31
Show file tree
Hide file tree
Showing 123 changed files with 3,395 additions and 1,033 deletions.
6 changes: 3 additions & 3 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
repos:
# api
- repo: https://github.com/psf/black
rev: 22.3.0
rev: 23.7.0
hooks:
- id: black
name: api|black
Expand Down Expand Up @@ -41,7 +41,7 @@ repos:

# pipeline
- repo: https://github.com/psf/black
rev: 22.10.0
rev: 23.7.0
hooks:
- id: black
name: pipeline|black
Expand Down Expand Up @@ -105,7 +105,7 @@ repos:
files: ^siretisation
exclude: ^siretisation/django(/.*)*/static/vendor
- repo: https://github.com/psf/black
rev: 22.10.0
rev: 23.7.0
hooks:
- id: black
name: siretisation|black
Expand Down
7 changes: 6 additions & 1 deletion .template.env
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,15 @@ ANNUAIRE_ENTREPRISES_API_URL=https://recherche-entreprises.api.gouv.fr
### sources
###

# airflow connections string *must* be urlencoded (using `urllib.parse.urlencode` for instance)

AGEFIPH_SERVICES_API_URL=https://www.agefiph.fr/jsonapi/node/aide_service
AGEFIPH_STRUCTURES_FILE_URL=
AIRFLOW_CONN_S3_SOURCES=
BAN_API_URL=https://api-adresse.data.gouv.fr
CD35_FILE_URL=https://data.ille-et-vilaine.fr/dataset/8d5ec0f0-ebe1-442d-9d99-655b37d5ad07/resource/8b781e9d-e11d-486c-98cf-0f63abfae8ed/download/annuaire_sociale_fixe.csv
CD72_FILE_URL=
CD72_STRUCTURES_FILE_URL=https://grist.incubateur.net/o/datainclusion/api/docs/dFpXXzs2fug9Kb7zZhyWyn/download/csv?tableId=Structures
CD72_SERVICES_FILE_URL=https://grist.incubateur.net/o/datainclusion/api/docs/dFpXXzs2fug9Kb7zZhyWyn/download/csv?tableId=Services
DI_EXTRA_SERVICES_FILE_URL=https://data-inclusion-lake.s3.fr-par.scw.cloud/sources/data-inclusion/2023-08-16/services.json
DI_EXTRA_STRUCTURES_FILE_URL=https://data-inclusion-lake.s3.fr-par.scw.cloud/sources/data-inclusion/2023-08-16/structures.json
DORA_API_TOKEN=
Expand All @@ -49,6 +52,7 @@ EMPLOIS_API_TOKEN=
EMPLOIS_API_URL=https://emplois.inclusion.beta.gouv.fr/api/v1/structures/
ETAB_PUB_FILE_URL=https://www.data.gouv.fr/fr/datasets/r/73302880-e4df-4d4c-8676-1a61bb997f3d
FINESS_FILE_URL=https://www.data.gouv.fr/fr/datasets/r/3dc9b1d5-0157-440d-a7b5-c894fcfdfd45
GRIST_API_TOKEN=
IGN_ADMIN_EXPRESS_FILE_URL=http://files.opendatarchives.fr/professionnels.ign.fr/adminexpress/ADMIN-EXPRESS-COG_3-0__SHP__FRA_WM_2021-05-19.7z
IMMERSION_FACILITEE_S3_KEY_PREFIX=sources/immersion-facilitee/2023-03-06/after-siretisation-auto/
INSEE_FIRSTNAME_FILE_URL=https://www.insee.fr/fr/statistiques/fichier/2540004/nat2021_csv.zip
Expand Down Expand Up @@ -85,6 +89,7 @@ MES_AIDES_AIRTABLE_KEY=
MES_AIDES_GARAGES_URL=https://airtable.com/appEvva5gyqqoQRnr/tblnGf4Y5EUEeVHtJ/viw9ZZAUkexq6uDaI
MONENFANT_CRECHES_FILE_URL=
ODSPEP_S3_KEY_PREFIX=sources/odspep/2023-01-23/denormalized/Exports/
RESEAU_ALPHA_URL=https://www.reseau-alpha.org
SIAO_FILE_URL=
SIRENE_STOCK_ETAB_GEOCODE_FILE_URL=https://data.cquest.org/geo_sirene/v2019/last/StockEtablissementActif_utf8_geo.csv.gz
SIRENE_STOCK_ETAB_HIST_FILE_URL=https://www.data.gouv.fr/fr/datasets/r/88fbb6b4-0320-443e-b739-b4376a012c32
Expand Down
3 changes: 3 additions & 0 deletions .vscode/data-inclusion.code-workspace
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@
},
{
"path": ".."
},
{
"path": "../../dora-back"
}
]
}
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ Les données collectées sont:
* enrichies via les outils développés par data·inclusion:
* l'outil de correspondance, qui permet de faire correspondre 2 jeux de données brutes,
* l'outil de sirétisation, qui permet d'attribuer un siret aux structures, afin de croiser,
* alignées sur le [schéma de données de data·inclusion](https://schema.data.gouv.fr/betagouv/data-inclusion-schema/)
* alignées sur le [schéma de données de data·inclusion](https://schema.data.gouv.fr/gip-inclusion/data-inclusion-schema/)
* publiées régulièrement en [open data sur data.gouv](https://www.data.gouv.fr/fr/datasets/referentiel-de-loffre-dinsertion-liste-des-structures-et-services-dinsertion/), la plateforme de données publiques,
* consultables via une api.

Expand Down
3 changes: 2 additions & 1 deletion analyse/.template.env
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,5 @@ FINESS_FILE_URL=https://www.data.gouv.fr/fr/datasets/r/3dc9b1d5-0157-440d-a7b5-c
CD72_FILE_URL=
CD93_FILE_URL=
CD35_FILE_URL=https://data.ille-et-vilaine.fr/dataset/8d5ec0f0-ebe1-442d-9d99-655b37d5ad07/resource/665776ae-fa25-46ab-9bfd-c4241866f03f/download/annuaire_sociale_fixe.csv
CD62_FILE_URL=
CD62_FILE_URL=
RESEAU_ALPHA_TEST_W_LOCAL_FILES=0
199 changes: 199 additions & 0 deletions analyse/notebooks/grist/template.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,199 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%pip install -e ../../../pipeline\n",
"%pip install -e ../../../../data-inclusion-schema\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"\n",
"import dotenv\n",
"import pandas as pd\n",
"\n",
"from data_inclusion.scripts.tasks import grist\n",
"from data_inclusion import schema"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"dotenv.load_dotenv(dotenv.find_dotenv())\n",
"\n",
"GRIST_API_TOKEN = os.environ[\"GRIST_API_TOKEN\"]\n",
"GRIST_API_URL = \"https://grist.incubateur.net/api\"\n",
"WORKSPACE_ID = \"27\"\n",
"DOCUMENT_NAME = \"template\""
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"grist_client = grist.GristClient(base_url=GRIST_API_URL, token=GRIST_API_TOKEN)\n",
"\n",
"document_id = grist_client.create_document(\n",
" workspace_id=WORKSPACE_ID, document_name=DOCUMENT_NAME\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"for referentiel in [\n",
" \"frais\",\n",
" \"labels_nationaux\",\n",
" \"modes_accueil\",\n",
" \"modes_orientation_accompagnateur\",\n",
" \"modes_orientation_beneficiaire\",\n",
" \"profils\",\n",
" \"thematiques\",\n",
" \"typologies_de_services\",\n",
" \"typologies_de_structures\",\n",
" \"zones_de_diffusion_types\",\n",
"]:\n",
" table_id = grist_client.create_table(\n",
" document_id=document_id,\n",
" table_name=referentiel.capitalize(),\n",
" columns=[\n",
" {\"id\": \"value\", \"fields\": {\"label\": \"valeur\", \"type\": \"Text\"}},\n",
" {\"id\": \"label\", \"fields\": {\"label\": \"label\", \"type\": \"Text\"}},\n",
" ],\n",
" )\n",
"\n",
" referentiel_df = pd.read_csv(\n",
" f\"../../../pipeline/dbt/seeds/schema/{referentiel}.csv\",\n",
" dtype=str,\n",
" )\n",
"\n",
" # attention: pas idempotent\n",
"\n",
" grist_client.add_records(\n",
" document_id=document_id,\n",
" table_id=table_id,\n",
" records=[\n",
" {\"fields\": value_dict}\n",
" for value_dict in referentiel_df[[\"value\", \"label\"]].to_dict(\n",
" orient=\"records\"\n",
" )\n",
" ],\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import re\n",
"\n",
"\n",
"def get_column_type(field) -> str:\n",
" match_referentiel = re.search(\n",
" r\"data_inclusion.schema.(?P<referentiel>\\w+)\", str(field.annotation)\n",
" )\n",
"\n",
" if match_referentiel is not None:\n",
" return \"Ref:\" + match_referentiel.group(\"referentiel\").capitalize()\n",
" elif \"float\" in str(field.annotation):\n",
" return \"Numeric\"\n",
" elif \"bool\" in str(field.annotation):\n",
" return \"Bool\"\n",
" elif \"date\" in str(field.annotation):\n",
" return \"DateTime:Europe/Paris\"\n",
"\n",
" return \"Text\""
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"grist_columns = [\n",
" {\n",
" \"id\": field_name,\n",
" \"fields\": {\n",
" \"label\": field_name,\n",
" \"type\": get_column_type(field_info),\n",
" # \"visibleCol\": TODO\n",
" },\n",
" }\n",
" for field_name, field_info in schema.Structure.model_fields.items()\n",
"]\n",
"\n",
"grist_client.create_table(\n",
" document_id=document_id,\n",
" table_name=\"Structures\",\n",
" columns=grist_columns,\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"grist_columns = [\n",
" {\n",
" \"id\": field_name,\n",
" \"fields\": {\n",
" \"label\": field_name,\n",
" \"type\": get_column_type(field_info),\n",
" # \"visibleCol\": TODO\n",
" },\n",
" }\n",
" for field_name, field_info in schema.Service.model_fields.items()\n",
"]\n",
"\n",
"grist_client.create_table(\n",
" document_id=document_id,\n",
" table_name=\"Services\",\n",
" columns=grist_columns,\n",
")\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.11"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
3 changes: 3 additions & 0 deletions analyse/notebooks/reseau-alpha/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
structures
services
*.html
Loading

0 comments on commit cdbfd31

Please sign in to comment.