diff --git a/pipeline/dbt/models/_sources.yml b/pipeline/dbt/models/_sources.yml index ae3b5b80..7f53ca3d 100644 --- a/pipeline/dbt/models/_sources.yml +++ b/pipeline/dbt/models/_sources.yml @@ -8,11 +8,17 @@ sources: - name: data_inclusion schema: data_inclusion + meta: + is_provider: true tables: - name: structures description: Entered by the data.inclusion team. + meta: + kind: structure - name: services description: Entered by the data.inclusion team. + meta: + kind: service - name: insee schema: insee @@ -26,15 +32,27 @@ sources: - name: dora schema: dora + meta: + is_provider: true tables: - name: structures + meta: + kind: structure - name: services + meta: + kind: service - name: france_travail schema: france_travail + meta: + is_provider: true tables: - name: agences + meta: + kind: structure - name: services + meta: + kind: service - name: finess schema: finess @@ -54,9 +72,15 @@ sources: - name: mes_aides schema: mes_aides + meta: + is_provider: true tables: - name: garages + meta: + kind: structure - name: aides + meta: + kind: service - name: annuaire_du_service_public schema: annuaire_du_service_public @@ -65,29 +89,53 @@ sources: - name: cd35 schema: cd35 + meta: + is_provider: true tables: - name: organisations + meta: + kind: structure - name: cd72 schema: cd72 + meta: + is_provider: true tables: - name: structures + meta: + kind: structure - name: services + meta: + kind: service - name: emplois_de_linclusion schema: emplois_de_linclusion + meta: + is_provider: true tables: - name: siaes + meta: + kind: structure - name: organisations + meta: + kind: structure - name: mediation_numerique schema: mediation_numerique + meta: + is_provider: true tables: - name: structures + meta: + kind: structure - name: services + meta: + kind: service - name: odspep schema: odspep + meta: + is_provider: true tables: - name: DD009_ACTIONs_DEMARCHES - name: DD009_ADRESSE @@ -110,27 +158,48 @@ sources: - name: DD009_REGION_RESSOURCE_2 - name: DD009_REGION_SUGGESTION - name: DD009_RES_PARTENARIALE + meta: + kind: service + staging_name: res_partenariales - name: soliguide schema: soliguide + meta: + is_provider: true tables: - name: lieux + meta: + kind: structure - name: monenfant schema: monenfant + meta: + is_provider: true tables: - name: creches + meta: + kind: structure - name: agefiph schema: agefiph + meta: + is_provider: true tables: - name: services + meta: + kind: service - name: reseau_alpha schema: reseau_alpha + meta: + is_provider: true tables: - name: structures + meta: + kind: structure - name: formations + meta: + kind: service - name: brevo schema: brevo @@ -144,6 +213,12 @@ sources: - name: action_logement schema: action_logement + meta: + is_provider: true tables: - name: structures + meta: + kind: structure - name: services + meta: + kind: service diff --git a/pipeline/dbt/models/intermediate/quality/_quality_models.yml b/pipeline/dbt/models/intermediate/quality/_quality_models.yml new file mode 100644 index 00000000..91a1977f --- /dev/null +++ b/pipeline/dbt/models/intermediate/quality/_quality_models.yml @@ -0,0 +1,91 @@ +version: 2 + +models: + - name: int_quality__stats + data_tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - source + - stream + + columns: + - name: source + data_tests: + - not_null + - dbt_utils.not_constant + - accepted_values: + values: + - action_logement + - agefiph + - cd35 + - cd72 + - data_inclusion + - dora + - emplois_de_linclusion + - france_travail + - mediation_numerique + - mes_aides + - monenfant + - odspep + - reseau_alpha + - soliguide + + - name: stream + data_tests: + - not_null + - dbt_utils.not_constant + - accepted_values: + values: + - agences + - aides + - creches + - DD009_RES_PARTENARIALE + - formations + - garages + - lieux + - organisations + - services + - siaes + - structures + + - name: count_raw + data_tests: + - dbt_utils.accepted_range: + min_value: 0 + inclusive: false + + - name: count_stg + data_tests: + - dbt_utils.accepted_range: + min_value: 0 + inclusive: false + + - name: count_int + data_tests: + - dbt_utils.accepted_range: + min_value: 0 + inclusive: false + + - name: count_marts + data_tests: + - dbt_utils.accepted_range: + min_value: 0 + inclusive: false + + - name: count_api + data_tests: + - dbt_utils.accepted_range: + min_value: 0 + inclusive: false + + - name: count_contacts + data_tests: + - dbt_utils.accepted_range: + min_value: 0 + inclusive: true + + - name: count_addresses + data_tests: + - dbt_utils.accepted_range: + min_value: 0 + inclusive: true diff --git a/pipeline/dbt/models/intermediate/quality/int_quality__stats.sql b/pipeline/dbt/models/intermediate/quality/int_quality__stats.sql new file mode 100644 index 00000000..4a07a6f8 --- /dev/null +++ b/pipeline/dbt/models/intermediate/quality/int_quality__stats.sql @@ -0,0 +1,119 @@ +/* Included : all the providers that do actually end up in the marts */ + +-- depends_on: {{ ref('stg_action_logement__services') }} +-- depends_on: {{ ref('stg_action_logement__structures') }} +-- depends_on: {{ ref('int_action_logement__services') }} +-- depends_on: {{ ref('int_action_logement__structures') }} +-- depends_on: {{ ref('stg_agefiph__services') }} +-- depends_on: {{ ref('int_agefiph__services') }} +-- depends_on: {{ ref('stg_cd35__organisations') }} +-- depends_on: {{ ref('int_cd35__structures') }} +-- depends_on: {{ ref('stg_cd72__services') }} +-- depends_on: {{ ref('stg_cd72__structures') }} +-- depends_on: {{ ref('int_cd72__services') }} +-- depends_on: {{ ref('int_cd72__structures') }} +-- depends_on: {{ ref('stg_data_inclusion__services') }} +-- depends_on: {{ ref('stg_data_inclusion__structures') }} +-- depends_on: {{ ref('int_data_inclusion__services') }} +-- depends_on: {{ ref('int_data_inclusion__structures') }} +-- depends_on: {{ ref('stg_dora__services') }} +-- depends_on: {{ ref('stg_dora__structures') }} +-- depends_on: {{ ref('int_dora__services') }} +-- depends_on: {{ ref('int_dora__structures') }} +-- depends_on: {{ ref('stg_emplois_de_linclusion__organisations') }} +-- depends_on: {{ ref('stg_emplois_de_linclusion__siaes') }} +-- depends_on: {{ ref('int_emplois_de_linclusion__structures') }} +-- depends_on: {{ ref('stg_france_travail__agences') }} +-- depends_on: {{ ref('stg_france_travail__services') }} +-- depends_on: {{ ref('int_france_travail__services') }} +-- depends_on: {{ ref('int_france_travail__structures') }} +-- depends_on: {{ ref('stg_mediation_numerique__services') }} +-- depends_on: {{ ref('stg_mediation_numerique__structures') }} +-- depends_on: {{ ref('int_mediation_numerique__services') }} +-- depends_on: {{ ref('int_mediation_numerique__structures') }} +-- depends_on: {{ ref('stg_mes_aides__aides') }} +-- depends_on: {{ ref('stg_mes_aides__garages') }} +-- depends_on: {{ ref('int_mes_aides__services') }} +-- depends_on: {{ ref('int_mes_aides__structures') }} +-- depends_on: {{ ref('stg_monenfant__creches') }} +-- depends_on: {{ ref('int_monenfant__structures') }} +-- depends_on: {{ ref('stg_odspep__res_partenariales') }} +-- depends_on: {{ ref('int_odspep__services') }} +-- depends_on: {{ ref('stg_reseau_alpha__formations') }} +-- depends_on: {{ ref('stg_reseau_alpha__structures') }} +-- depends_on: {{ ref('int_reseau_alpha__services') }} +-- depends_on: {{ ref('int_reseau_alpha__structures') }} +-- depends_on: {{ ref('stg_soliguide__lieux') }} +-- depends_on: {{ ref('int_soliguide__structures') }} +-- depends_on: {{ ref('marts_inclusion__services') }} +-- depends_on: {{ ref('marts_inclusion__structures') }} + +WITH + +{% for source_node in graph.sources.values() if source_node.source_meta.is_provider %} + + {% if source_node.meta.kind %} + + {% set source_name = source_node.source_name %} + {% set source_slug = source_name | replace("_", "-") %} + {% set stream_name = source_node.name %} + {% set staging_name = source_node.meta.staging_name or stream_name %} + {% set stream_kind = source_node.meta.kind ~ "s" %} + + {{ source_name }}__{{ stream_name }}__tmp_marts AS ( + SELECT * FROM {{ ref('marts_inclusion__' ~ stream_kind) }} WHERE source = '{{ source_slug }}' + ), + + {{ source_name }}__{{ stream_name }}__tmp_api AS ( + SELECT * FROM public.api__{{ stream_kind }} WHERE source = '{{ source_slug }}' + ), + + {{ source_name }}__{{ stream_name }}__tmp_api_contacts AS ( + SELECT * FROM {{ source_name }}__{{ stream_name }}__tmp_api + WHERE courriel IS NOT NULL AND telephone IS NOT NULL + ), + + {{ source_name }}__{{ stream_name }}__tmp_api_adresse AS ( + SELECT * FROM {{ source_name }}__{{ stream_name }}__tmp_api + WHERE NOT (adresse IS NOT NULL AND code_insee IS NOT NULL) + ), + + {{ source_name }}__{{ stream_name }}__stats AS ( + SELECT + '{{ run_started_at.strftime("%Y-%m-%d") }}' AS date_day, + '{{ source_name }}' AS source, + '{{ stream_name }}' AS stream, -- noqa: references.keywords + (SELECT COUNT(*) FROM {{ source(source_name, stream_name) }}) AS count_raw, + (SELECT COUNT(*) FROM {{ ref('stg_' ~ source_name ~ '__' ~ staging_name) }}) AS count_stg, + (SELECT COUNT(*) FROM {{ ref('int_' ~ source_name ~ '__' ~ stream_kind) }}) AS count_int, + (SELECT COUNT(*) FROM {{ source_name }}__{{ stream_name }}__tmp_marts) AS count_marts, + (SELECT COUNT(*) FROM {{ source_name }}__{{ stream_name }}__tmp_api) AS count_api, + (SELECT COUNT(*) FROM {{ source_name }}__{{ stream_name }}__tmp_api_contacts)AS count_contacts, + (SELECT COUNT(*) FROM {{ source_name }}__{{ stream_name }}__tmp_api_adresse) AS count_addresses + ), + + {% endif %} + +{% endfor %} + +final AS ( + + {% for source_node in graph.sources.values() if source_node.source_meta.is_provider %} + + {% if source_node.meta.kind %} + + {% set source_name = source_node.source_name %} + {% set stream_name = source_node.name %} + + SELECT * FROM {{ source_name }}__{{ stream_name }}__stats + {% if not loop.last %} + UNION ALL + {% endif %} + + {% endif %} + + {% endfor %} + +) + +SELECT * FROM final