Skip to content

Commit

Permalink
chore(api) : Fix the import command and migrations
Browse files Browse the repository at this point in the history
The _di_geocodage_code_insee has been removed recently; the import did
not work anymore.

Also, the _di_geocodage_score column always was absent, there was no
need to keep it in the table.
  • Loading branch information
vperron committed Aug 20, 2024
1 parent 92208de commit 53b051f
Show file tree
Hide file tree
Showing 2 changed files with 59 additions and 28 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
"""Cleanup unused fields
Revision ID: 517603187775
Revises: 9f9a66546e3a
Create Date: 2024-08-20 11:52:37.705289
"""

import sqlalchemy as sa
from alembic import op

# revision identifiers, used by Alembic.
revision = "517603187775"
down_revision = "9f9a66546e3a"
branch_labels = None
depends_on = None


def upgrade() -> None:
op.drop_column("api__services", "_di_geocodage_score")
op.drop_column("api__services", "_di_geocodage_code_insee")
op.drop_column("api__structures", "_di_geocodage_score")
op.drop_column("api__structures", "_di_geocodage_code_insee")


def downgrade() -> None:
op.add_column(
"api__structures",
sa.Column(
"_di_geocodage_code_insee", sa.VARCHAR(), autoincrement=False, nullable=True
),
)
op.add_column(
"api__structures",
sa.Column(
"_di_geocodage_score",
sa.DOUBLE_PRECISION(precision=53),
autoincrement=False,
nullable=True,
),
)
op.add_column(
"api__services",
sa.Column(
"_di_geocodage_code_insee", sa.VARCHAR(), autoincrement=False, nullable=True
),
)
op.add_column(
"api__services",
sa.Column(
"_di_geocodage_score",
sa.DOUBLE_PRECISION(precision=53),
autoincrement=False,
nullable=True,
),
)
31 changes: 3 additions & 28 deletions api/src/data_inclusion/api/inclusion_data/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,41 +124,16 @@ def load_inclusion_data():
structures_df = structures_df.replace({np.nan: None})
services_df = services_df.replace({np.nan: None})

# TODO: this must be fixed in the publication
# TODO(vperron) : To remove when we handle the city districts
structures_df = structures_df.assign(
code_insee=structures_df.code_insee.apply(clean_up_code_insee),
_di_geocodage_code_insee=structures_df._di_geocodage_code_insee.apply(
clean_up_code_insee
),
)
services_df = services_df.assign(
code_insee=services_df.code_insee.apply(clean_up_code_insee),
_di_geocodage_code_insee=services_df._di_geocodage_code_insee.apply(
clean_up_code_insee
),
)

# fill missing codes with geocoding results
# and overwrite existing ones if the geocoder is confident enough
geocoder_validity_threshold = 0.7
structures_df = structures_df.assign(
code_insee=structures_df._di_geocodage_code_insee.where(
structures_df._di_geocodage_score > geocoder_validity_threshold,
structures_df.code_insee,
)
)
services_df = services_df.assign(
code_insee=services_df._di_geocodage_code_insee.where(
services_df._di_geocodage_score > geocoder_validity_threshold,
services_df.code_insee,
)
)
structures_df = structures_df.drop(
columns=["_di_geocodage_code_insee", "_di_geocodage_score"]
)
services_df = services_df.drop(
columns=["_di_geocodage_code_insee", "_di_geocodage_score"]
)
structures_df = structures_df.drop(columns=["_di_geocodage_score"])
services_df = services_df.drop(columns=["_di_geocodage_score"])

structure_errors_df = validate_df(structures_df, model_schema=schema.Structure)
service_errors_df = validate_df(services_df, model_schema=schema.Service)
Expand Down

0 comments on commit 53b051f

Please sign in to comment.