From 128c70f75060cae70dfcad7aee60d52d0b2ff4f8 Mon Sep 17 00:00:00 2001 From: Ewen Corre Date: Mon, 6 Jan 2025 11:31:00 +0100 Subject: [PATCH] import_geiq: force SIRET type to integer When replacing NaN elements with None, if the siret column is not explicitely defined as integer, it is converted to float. These values later converted to string, need to be integers, otherwise they are suffixed with `.0`. --- .../management/commands/import_geiq.py | 3 +++ .../test_management_command_import_geiq.py | 20 +++++++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/itou/companies/management/commands/import_geiq.py b/itou/companies/management/commands/import_geiq.py index 8c1811d1f5..a80e2bcb7f 100755 --- a/itou/companies/management/commands/import_geiq.py +++ b/itou/companies/management/commands/import_geiq.py @@ -34,6 +34,9 @@ def get_geiq_df(filename): } df = remap_columns(df, column_mapping=column_mapping) + # Force siret type to integer, otherwise replacing NaN elements to None blindly converts them to float. + df["siret"] = df["siret"].astype("Int64") + # Replace NaN elements with None. df = df.replace({np.nan: None}) diff --git a/tests/companies/test_management_command_import_geiq.py b/tests/companies/test_management_command_import_geiq.py index 93c85cc0fb..4e6e33a65f 100644 --- a/tests/companies/test_management_command_import_geiq.py +++ b/tests/companies/test_management_command_import_geiq.py @@ -132,6 +132,26 @@ def test_get_geiq_df(sftp_directory, faker): with pytest.raises(AssertionError): df, info_stats = get_geiq_df(file_path) + # Missing some sirets + rows = 185 + rows_with_empty_siret = 20 + rows_with_empty_email = 0 + data = generate_data( + rows=rows, rows_with_empty_siret=rows_with_empty_siret, rows_with_empty_email=rows_with_empty_email + ) + file_path = sftp_directory.joinpath(faker.geiq_filename()) + with open(file_path, "wb") as xlsxfile: + workbook = generate_excel_sheet(FILE_HEADERS, data) + workbook.save(xlsxfile) + df, info_stats = get_geiq_df(file_path) + assert df.shape == (rows - rows_with_empty_siret, 8) + assert info_stats == { + "rows_in_file": rows, + "rows_with_a_siret": rows - rows_with_empty_siret, + "rows_after_deduplication": rows - rows_with_empty_siret, + "rows_with_empty_email": 0, + } + # Duplicated rows rows = 250 rows_with_empty_siret = 0