From 53b051fba1bfc61d96a08ae06c7d9fd634cef7f2 Mon Sep 17 00:00:00 2001
From: Victor Perron <victor@iso3103.net>
Date: Tue, 20 Aug 2024 11:59:16 +0200
Subject: [PATCH] chore(api) : Fix the import command and migrations

The _di_geocodage_code_insee has been removed recently; the import did
not work anymore.

Also, the _di_geocodage_score column always was absent, there was no
need to keep it in the table.
---
 ...5237_517603187775_cleanup_unused_fields.py | 56 +++++++++++++++++++
 .../api/inclusion_data/commands.py            | 31 +---------
 2 files changed, 59 insertions(+), 28 deletions(-)
 create mode 100644 api/src/alembic/versions/20240820_115237_517603187775_cleanup_unused_fields.py

diff --git a/api/src/alembic/versions/20240820_115237_517603187775_cleanup_unused_fields.py b/api/src/alembic/versions/20240820_115237_517603187775_cleanup_unused_fields.py
new file mode 100644
index 00000000..aceb1ae6
--- /dev/null
+++ b/api/src/alembic/versions/20240820_115237_517603187775_cleanup_unused_fields.py
@@ -0,0 +1,56 @@
+"""Cleanup unused fields
+
+Revision ID: 517603187775
+Revises: 9f9a66546e3a
+Create Date: 2024-08-20 11:52:37.705289
+
+"""
+
+import sqlalchemy as sa
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision = "517603187775"
+down_revision = "9f9a66546e3a"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    op.drop_column("api__services", "_di_geocodage_score")
+    op.drop_column("api__services", "_di_geocodage_code_insee")
+    op.drop_column("api__structures", "_di_geocodage_score")
+    op.drop_column("api__structures", "_di_geocodage_code_insee")
+
+
+def downgrade() -> None:
+    op.add_column(
+        "api__structures",
+        sa.Column(
+            "_di_geocodage_code_insee", sa.VARCHAR(), autoincrement=False, nullable=True
+        ),
+    )
+    op.add_column(
+        "api__structures",
+        sa.Column(
+            "_di_geocodage_score",
+            sa.DOUBLE_PRECISION(precision=53),
+            autoincrement=False,
+            nullable=True,
+        ),
+    )
+    op.add_column(
+        "api__services",
+        sa.Column(
+            "_di_geocodage_code_insee", sa.VARCHAR(), autoincrement=False, nullable=True
+        ),
+    )
+    op.add_column(
+        "api__services",
+        sa.Column(
+            "_di_geocodage_score",
+            sa.DOUBLE_PRECISION(precision=53),
+            autoincrement=False,
+            nullable=True,
+        ),
+    )
diff --git a/api/src/data_inclusion/api/inclusion_data/commands.py b/api/src/data_inclusion/api/inclusion_data/commands.py
index 9f4e1fd0..0e89635a 100644
--- a/api/src/data_inclusion/api/inclusion_data/commands.py
+++ b/api/src/data_inclusion/api/inclusion_data/commands.py
@@ -124,41 +124,16 @@ def load_inclusion_data():
     structures_df = structures_df.replace({np.nan: None})
     services_df = services_df.replace({np.nan: None})
 
-    # TODO: this must be fixed in the publication
+    # TODO(vperron) : To remove when we handle the city districts
     structures_df = structures_df.assign(
         code_insee=structures_df.code_insee.apply(clean_up_code_insee),
-        _di_geocodage_code_insee=structures_df._di_geocodage_code_insee.apply(
-            clean_up_code_insee
-        ),
     )
     services_df = services_df.assign(
         code_insee=services_df.code_insee.apply(clean_up_code_insee),
-        _di_geocodage_code_insee=services_df._di_geocodage_code_insee.apply(
-            clean_up_code_insee
-        ),
     )
 
-    # fill missing codes with geocoding results
-    # and overwrite existing ones if the geocoder is confident enough
-    geocoder_validity_threshold = 0.7
-    structures_df = structures_df.assign(
-        code_insee=structures_df._di_geocodage_code_insee.where(
-            structures_df._di_geocodage_score > geocoder_validity_threshold,
-            structures_df.code_insee,
-        )
-    )
-    services_df = services_df.assign(
-        code_insee=services_df._di_geocodage_code_insee.where(
-            services_df._di_geocodage_score > geocoder_validity_threshold,
-            services_df.code_insee,
-        )
-    )
-    structures_df = structures_df.drop(
-        columns=["_di_geocodage_code_insee", "_di_geocodage_score"]
-    )
-    services_df = services_df.drop(
-        columns=["_di_geocodage_code_insee", "_di_geocodage_score"]
-    )
+    structures_df = structures_df.drop(columns=["_di_geocodage_score"])
+    services_df = services_df.drop(columns=["_di_geocodage_score"])
 
     structure_errors_df = validate_df(structures_df, model_schema=schema.Structure)
     service_errors_df = validate_df(services_df, model_schema=schema.Service)