Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore(pipeline): update xp data #178

Merged
merged 2 commits into from
Dec 15, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions api/src/data_inclusion/api/entrypoints/fastapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ def list_structures(
query = query.filter(models.Structure.source != "agefiph")
if not request.user.is_authenticated or "dora" not in request.user.username:
query = query.filter(models.Structure.source != "soliguide")
query = query.filter(models.Structure.source != "data-inclusion")

if id_ is not None:
query = query.filter_by(id=id_)
Expand Down Expand Up @@ -303,6 +304,7 @@ def list_services(
query = query.filter(models.Structure.source != "agefiph")
if not request.user.is_authenticated or "dora" not in request.user.username:
query = query.filter(models.Structure.source != "soliguide")
query = query.filter(models.Structure.source != "data-inclusion")

if departement is not None:
query = query.filter(
Expand Down Expand Up @@ -438,6 +440,7 @@ def search_services(
query = query.filter(models.Structure.source != "agefiph")
if not request.user.is_authenticated or "dora" not in request.user.username:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

j'espère que le username "adorable" n'existe pas ^^

query = query.filter(models.Structure.source != "soliguide")
query = query.filter(models.Structure.source != "data-inclusion")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit sans intéret ici : de ce que je comprends les données soliguide et data-inclusion sont donc dispo si je ne suis pas authentifié ? Ou je loupe quelque chose ?


if commune_instance is not None:
# filter by zone de diffusion
Expand Down
4 changes: 2 additions & 2 deletions pipeline/dags/dag_utils/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,12 +243,12 @@
"streams": [
{
"id": "services",
"filename": "services.json",
"filename": "services.csv",
"url": Variable.get("DI_EXTRA_SERVICES_FILE_URL", None),
},
{
"id": "structures",
"filename": "structures.json",
"filename": "structures.csv",
"url": Variable.get("DI_EXTRA_STRUCTURES_FILE_URL", None),
},
],
Expand Down
2 changes: 1 addition & 1 deletion pipeline/dags/import_sources.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ def load_from_s3_to_data_warehouse(
"annuaire-du-service-public": annuaire_du_service_public.read,
"cd35": lambda path: utils.read_csv(path, sep=";"),
"cd72": lambda path: utils.read_csv(path, sep=","),
"data-inclusion": utils.read_json,
"data-inclusion": lambda path: utils.read_csv(path, sep=","),
"dora": utils.read_json,
"emplois-de-linclusion": utils.read_json,
"finess": lambda path: utils.read_csv(path, sep=","),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,44 +16,44 @@ di_profil_by_dora_profil AS (

final AS (
SELECT
id AS "adresse_id",
TRUE AS "contact_public",
contact_nom AS "contact_nom_prenom",
courriel AS "courriel",
cumulable AS "cumulable",
date_creation::DATE AS "date_creation",
date_maj::DATE AS "date_maj",
date_suspension::DATE AS "date_suspension",
formulaire_en_ligne AS "formulaire_en_ligne",
frais_autres AS "frais_autres",
id AS "id",
justificatifs AS "justificatifs",
NULL AS "lien_source", -- ignored
modes_accueil AS "modes_accueil",
NULL::TEXT [] AS "modes_orientation_accompagnateur",
NULL AS "modes_orientation_accompagnateur_autres",
NULL::TEXT [] AS "modes_orientation_beneficiaire",
NULL AS "modes_orientation_beneficiaire_autres",
nom AS "nom",
presentation_resume AS "presentation_resume",
presentation_detail AS "presentation_detail",
prise_rdv AS "prise_rdv",
id AS "adresse_id",
contact_public AS "contact_public",
contact_nom AS "contact_nom_prenom",
courriel AS "courriel",
cumulable AS "cumulable",
date_creation::DATE AS "date_creation",
date_maj::DATE AS "date_maj",
date_suspension::DATE AS "date_suspension",
formulaire_en_ligne AS "formulaire_en_ligne",
frais_autres AS "frais_autres",
id AS "id",
justificatifs AS "justificatifs",
NULL AS "lien_source", -- ignored
modes_accueil AS "modes_accueil",
modes_orientation_accompagnateur AS "modes_orientation_accompagnateur",
modes_orientation_accompagnateur_autres AS "modes_orientation_accompagnateur_autres",
modes_orientation_beneficiaire AS "modes_orientation_beneficiaire",
modes_orientation_beneficiaire_autres AS "modes_orientation_beneficiaire_autres",
nom AS "nom",
presentation_resume AS "presentation_resume",
presentation_detail AS "presentation_detail",
prise_rdv AS "prise_rdv",
ARRAY(
SELECT di_profil_by_dora_profil.di_profil
FROM di_profil_by_dora_profil
WHERE di_profil_by_dora_profil.dora_profil = ANY(services.profils)
)::TEXT [] AS "profils",
recurrence AS "recurrence",
_di_source_id AS "source",
structure_id AS "structure_id",
NULL AS "telephone",
thematiques AS "thematiques",
types AS "types",
zone_diffusion_code AS "zone_diffusion_code",
zone_diffusion_nom AS "zone_diffusion_nom",
zone_diffusion_type AS "zone_diffusion_type",
pre_requis AS "pre_requis",
ARRAY[frais] AS "frais"
)::TEXT [] AS "profils",
recurrence AS "recurrence",
_di_source_id AS "source",
structure_id AS "structure_id",
telephone AS "telephone",
thematiques AS "thematiques",
types AS "types",
zone_diffusion_code AS "zone_diffusion_code",
zone_diffusion_nom AS "zone_diffusion_nom",
zone_diffusion_type AS "zone_diffusion_type",
pre_requis AS "pre_requis",
frais AS "frais"
FROM services
)

Expand Down
2 changes: 1 addition & 1 deletion pipeline/dbt/models/marts/opendata/opendata_services.sql
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ final AS (
{{ obfuscate('courriel') }} AS "courriel",
{{ obfuscate('telephone') }} AS "telephone"
FROM services
WHERE services.source NOT IN ('soliguide', 'agefiph')
WHERE services.source NOT IN ('soliguide', 'agefiph', 'data-inclusion')
)

SELECT * FROM final
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ final AS (
ELSE structures.telephone
END AS "telephone"
FROM structures
WHERE structures.source NOT IN ('soliguide', 'siao', 'finess', 'agefiph')
WHERE structures.source NOT IN ('soliguide', 'siao', 'finess', 'agefiph', 'data-inclusion')
)

SELECT * FROM final
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,12 @@ models:
- unique
- not_null
- dbt_utils.not_empty_string
- name: structure_id
tests:
- not_null
- relationships:
to: ref('stg_data_inclusion__structures')
field: id

- name: stg_data_inclusion__structures
columns:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,45 +4,51 @@ WITH source AS (

final AS (
SELECT
_di_source_id AS "_di_source_id",
(data ->> 'contact_public')::BOOLEAN AS "contact_public",
(data ->> 'cumulable')::BOOLEAN AS "cumulable",
(data ->> 'date_creation')::TIMESTAMP WITH TIME ZONE AS "date_creation",
(data ->> 'date_maj')::TIMESTAMP WITH TIME ZONE AS "date_maj",
(data ->> 'date_suspension')::TIMESTAMP WITH TIME ZONE AS "date_suspension",
(data ->> 'latitude')::FLOAT AS "latitude",
(data ->> 'longitude')::FLOAT AS "longitude",
ARRAY(SELECT * FROM JSONB_ARRAY_ELEMENTS_TEXT(data -> 'modes_accueil'))::TEXT [] AS "modes_accueil",
ARRAY(SELECT * FROM JSONB_ARRAY_ELEMENTS_TEXT(data -> 'profils'))::TEXT [] AS "profils",
ARRAY(SELECT * FROM JSONB_ARRAY_ELEMENTS_TEXT(data -> 'thematiques'))::TEXT [] AS "thematiques",
ARRAY(SELECT * FROM JSONB_ARRAY_ELEMENTS_TEXT(data -> 'types'))::TEXT [] AS "types",
STRING_TO_ARRAY(NULLIF(TRIM(data ->> 'justificatifs'), ''), ',') AS "justificatifs",
STRING_TO_ARRAY(NULLIF(TRIM(data ->> 'pre_requis'), ''), ',') AS "pre_requis",
data ->> 'adresse' AS "adresse",
data ->> 'code_insee' AS "code_insee",
data ->> 'code_postal' AS "code_postal",
data ->> 'commune' AS "commune",
data ->> 'complement_adresse' AS "complement_adresse",
NULLIF(TRIM(data ->> 'contact_nom'), '') AS "contact_nom",
NULLIF(TRIM(data ->> 'contact_prenom'), '') AS "contact_prenom",
NULLIF(TRIM(data ->> 'courriel'), '') AS "courriel",
data ->> 'formulaire_en_ligne' AS "formulaire_en_ligne",
data ->> 'frais_autres' AS "frais_autres",
data ->> 'frais' AS "frais",
data ->> 'id' AS "id",
data ->> 'lien_source' AS "lien_source",
data ->> 'nom' AS "nom",
data ->> 'presentation_resume' AS "presentation_resume",
data ->> 'presentation_detail' AS "presentation_detail",
data ->> 'prise_rdv' AS "prise_rdv",
data ->> 'recurrence' AS "recurrence",
data ->> 'source' AS "source",
data ->> 'structure_id' AS "structure_id",
NULLIF(TRIM(data ->> 'telephone'), '') AS "telephone",
NULLIF(TRIM(data ->> 'zone_diffusion_code'), '') AS "zone_diffusion_code",
NULLIF(TRIM(data ->> 'zone_diffusion_nom'), '') AS "zone_diffusion_nom",
data ->> 'zone_diffusion_type' AS "zone_diffusion_type"
_di_source_id AS "_di_source_id",
'data-inclusion' AS "source",
CAST(data ->> 'contact_public' AS BOOLEAN) AS "contact_public",
CAST(data ->> 'cumulable' AS BOOLEAN) AS "cumulable",
TO_DATE(data ->> 'date_creation', 'DD/MM/YYYY') AS "date_creation",
TO_DATE(data ->> 'date_maj', 'DD/MM/YYYY') AS "date_maj",
TO_DATE(data ->> 'date_suspension', 'DD/MM/YYYY') AS "date_suspension",
CAST(data ->> 'latitude' AS FLOAT) AS "latitude",
CAST(data ->> 'longitude' AS FLOAT) AS "longitude",
ARRAY_REMOVE(ARRAY(SELECT value FROM JSONB_EACH_TEXT(data) WHERE key ~* 'modes_accueil.\d+'), NULL) AS "modes_accueil",
ARRAY_REMOVE(ARRAY(SELECT value FROM JSONB_EACH_TEXT(data) WHERE key ~* 'profils.\d+'), NULL) AS "profils",
ARRAY_REMOVE(ARRAY(SELECT value FROM JSONB_EACH_TEXT(data) WHERE key ~* 'thematiques.\d+'), NULL) AS "thematiques",
ARRAY_REMOVE(ARRAY(SELECT value FROM JSONB_EACH_TEXT(data) WHERE key ~* 'types.\d+'), NULL) AS "types",
ARRAY_REMOVE(ARRAY(SELECT value FROM JSONB_EACH_TEXT(data) WHERE key ~* 'justificatifs.\d+'), NULL) AS "justificatifs",
ARRAY_REMOVE(ARRAY(SELECT value FROM JSONB_EACH_TEXT(data) WHERE key ~* 'pre_requis.\d+'), NULL) AS "pre_requis",
data ->> 'adresse' AS "adresse",
data ->> 'code_insee' AS "code_insee",
data ->> 'code_postal' AS "code_postal",
data ->> 'commune' AS "commune",
data ->> 'complement_adresse' AS "complement_adresse",
data ->> 'contact_nom' AS "contact_nom",
data ->> 'contact_prenom' AS "contact_prenom",
data ->> 'courriel' AS "courriel",
data ->> 'formulaire_en_ligne' AS "formulaire_en_ligne",
data ->> 'frais_autres' AS "frais_autres",
ARRAY_REMOVE(ARRAY(SELECT value FROM JSONB_EACH_TEXT(data) WHERE key ~* 'frais.\d+'), NULL) AS "frais",
data ->> 'id' AS "id",
data ->> 'lien_source' AS "lien_source",
data ->> 'nom' AS "nom",
data ->> 'presentation_resume' AS "presentation_resume",
data ->> 'presentation_detail' AS "presentation_detail",
data ->> 'prise_rdv' AS "prise_rdv",
data ->> 'recurrence' AS "recurrence",
data ->> 'structure_id' AS "structure_id",
data ->> 'telephone' AS "telephone",
data ->> 'zone_diffusion_code' AS "zone_diffusion_code",
data ->> 'zone_diffusion_nom' AS "zone_diffusion_nom",
data ->> 'zone_diffusion_type' AS "zone_diffusion_type",
ARRAY_REMOVE(ARRAY(SELECT value FROM JSONB_EACH_TEXT(data) WHERE key ~* 'modes_orientation_beneficiaire.\d+'), NULL) AS "modes_orientation_beneficiaire",
data ->> 'modes_orientation_beneficiaire_autres' AS "modes_orientation_beneficiaire_autres",
ARRAY_REMOVE(ARRAY(SELECT value FROM JSONB_EACH_TEXT(data) WHERE key ~* 'modes_orientation_accompagnateur.\d+'), NULL) AS "modes_orientation_accompagnateur",
data ->> 'modes_orientation_accompagnateur_autres' AS "modes_orientation_accompagnateur_autres"
FROM source
WHERE
NOT COALESCE(CAST(data ->> '__ignore__' AS BOOLEAN), FALSE)
)

SELECT * FROM final
Loading
Loading