From 53b051fba1bfc61d96a08ae06c7d9fd634cef7f2 Mon Sep 17 00:00:00 2001 From: Victor Perron Date: Tue, 20 Aug 2024 11:59:16 +0200 Subject: [PATCH] chore(api) : Fix the import command and migrations The _di_geocodage_code_insee has been removed recently; the import did not work anymore. Also, the _di_geocodage_score column always was absent, there was no need to keep it in the table. --- ...5237_517603187775_cleanup_unused_fields.py | 56 +++++++++++++++++++ .../api/inclusion_data/commands.py | 31 +--------- 2 files changed, 59 insertions(+), 28 deletions(-) create mode 100644 api/src/alembic/versions/20240820_115237_517603187775_cleanup_unused_fields.py diff --git a/api/src/alembic/versions/20240820_115237_517603187775_cleanup_unused_fields.py b/api/src/alembic/versions/20240820_115237_517603187775_cleanup_unused_fields.py new file mode 100644 index 00000000..aceb1ae6 --- /dev/null +++ b/api/src/alembic/versions/20240820_115237_517603187775_cleanup_unused_fields.py @@ -0,0 +1,56 @@ +"""Cleanup unused fields + +Revision ID: 517603187775 +Revises: 9f9a66546e3a +Create Date: 2024-08-20 11:52:37.705289 + +""" + +import sqlalchemy as sa +from alembic import op + +# revision identifiers, used by Alembic. +revision = "517603187775" +down_revision = "9f9a66546e3a" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + op.drop_column("api__services", "_di_geocodage_score") + op.drop_column("api__services", "_di_geocodage_code_insee") + op.drop_column("api__structures", "_di_geocodage_score") + op.drop_column("api__structures", "_di_geocodage_code_insee") + + +def downgrade() -> None: + op.add_column( + "api__structures", + sa.Column( + "_di_geocodage_code_insee", sa.VARCHAR(), autoincrement=False, nullable=True + ), + ) + op.add_column( + "api__structures", + sa.Column( + "_di_geocodage_score", + sa.DOUBLE_PRECISION(precision=53), + autoincrement=False, + nullable=True, + ), + ) + op.add_column( + "api__services", + sa.Column( + "_di_geocodage_code_insee", sa.VARCHAR(), autoincrement=False, nullable=True + ), + ) + op.add_column( + "api__services", + sa.Column( + "_di_geocodage_score", + sa.DOUBLE_PRECISION(precision=53), + autoincrement=False, + nullable=True, + ), + ) diff --git a/api/src/data_inclusion/api/inclusion_data/commands.py b/api/src/data_inclusion/api/inclusion_data/commands.py index 9f4e1fd0..0e89635a 100644 --- a/api/src/data_inclusion/api/inclusion_data/commands.py +++ b/api/src/data_inclusion/api/inclusion_data/commands.py @@ -124,41 +124,16 @@ def load_inclusion_data(): structures_df = structures_df.replace({np.nan: None}) services_df = services_df.replace({np.nan: None}) - # TODO: this must be fixed in the publication + # TODO(vperron) : To remove when we handle the city districts structures_df = structures_df.assign( code_insee=structures_df.code_insee.apply(clean_up_code_insee), - _di_geocodage_code_insee=structures_df._di_geocodage_code_insee.apply( - clean_up_code_insee - ), ) services_df = services_df.assign( code_insee=services_df.code_insee.apply(clean_up_code_insee), - _di_geocodage_code_insee=services_df._di_geocodage_code_insee.apply( - clean_up_code_insee - ), ) - # fill missing codes with geocoding results - # and overwrite existing ones if the geocoder is confident enough - geocoder_validity_threshold = 0.7 - structures_df = structures_df.assign( - code_insee=structures_df._di_geocodage_code_insee.where( - structures_df._di_geocodage_score > geocoder_validity_threshold, - structures_df.code_insee, - ) - ) - services_df = services_df.assign( - code_insee=services_df._di_geocodage_code_insee.where( - services_df._di_geocodage_score > geocoder_validity_threshold, - services_df.code_insee, - ) - ) - structures_df = structures_df.drop( - columns=["_di_geocodage_code_insee", "_di_geocodage_score"] - ) - services_df = services_df.drop( - columns=["_di_geocodage_code_insee", "_di_geocodage_score"] - ) + structures_df = structures_df.drop(columns=["_di_geocodage_score"]) + services_df = services_df.drop(columns=["_di_geocodage_score"]) structure_errors_df = validate_df(structures_df, model_schema=schema.Structure) service_errors_df = validate_df(services_df, model_schema=schema.Service)