diff --git a/itou/companies/management/commands/import_geiq.py b/itou/companies/management/commands/import_geiq.py index 8c1811d1f5..a80e2bcb7f 100755 --- a/itou/companies/management/commands/import_geiq.py +++ b/itou/companies/management/commands/import_geiq.py @@ -34,6 +34,9 @@ def get_geiq_df(filename): } df = remap_columns(df, column_mapping=column_mapping) + # Force siret type to integer, otherwise replacing NaN elements to None blindly converts them to float. + df["siret"] = df["siret"].astype("Int64") + # Replace NaN elements with None. df = df.replace({np.nan: None}) diff --git a/itou/utils/faker_providers.py b/itou/utils/faker_providers.py index 7fa682ed76..6f60f46360 100644 --- a/itou/utils/faker_providers.py +++ b/itou/utils/faker_providers.py @@ -2,6 +2,7 @@ import random from django.contrib.gis.geos import Point +from django.utils import timezone from faker.providers import BaseProvider @@ -13,6 +14,12 @@ def asp_ea2_filename(self, date: datetime.date = None) -> str: date_part = random.randint(0, 99999999) if date is None else date.strftime("%Y%m%d") return f"FLUX_EA2_ITOU_{date_part}.zip" + def geiq_filename(self, date: datetime.date = None) -> str: + if date is None: + date = timezone.localdate() + date_part = date.strftime("%Y-%m-%d") + return f"{date_part} - Export BDD FFGEIQ.xls" + def geopoint(self) -> Point: return Point( [float(coord) for coord in self.generator.format("local_latlng", country_code="FR", coords_only=True)] diff --git a/tests/companies/test_management_command_import_geiq.py b/tests/companies/test_management_command_import_geiq.py new file mode 100644 index 0000000000..4e6e33a65f --- /dev/null +++ b/tests/companies/test_management_command_import_geiq.py @@ -0,0 +1,177 @@ +import pytest +from faker import Faker + +from itou.companies.management.commands.import_geiq import get_geiq_df +from itou.utils.export import generate_excel_sheet +from tests.utils.test import create_fake_postcode + + +faker = Faker() + +FILE_HEADERS = ["Nom", "Rue", "Rue (suite)", "Code Postal", "Ville", "SIRET", "e-mail"] + + +def generate_data(rows=185, rows_with_empty_siret=0, rows_with_empty_email=0, duplicated_sirets=0): + data = [] + rows_count = 0 + duplicated_sirets_count = 0 + while rows_count < rows: + if rows_with_empty_siret > 0: + siret = "" + rows_with_empty_siret -= 1 + else: + siret = faker.numerify("1#############") + + if rows_with_empty_email > 0: + email = "" + rows_with_empty_email -= 1 + else: + email = faker.email() + + row = [ + faker.name(), + faker.street_address(), + "Sous l'escalier", + create_fake_postcode(), + faker.city(), + siret, + email, + ] + + data.append(row) + + if duplicated_sirets_count < duplicated_sirets: + data.append(row) + rows_count += 1 + duplicated_sirets_count += 1 + + rows_count += 1 + return data + + +def test_get_geiq_df(sftp_directory, faker): + # Correct data + rows = 185 + rows_with_empty_siret = 0 + rows_with_empty_email = 0 + data = generate_data( + rows=rows, rows_with_empty_siret=rows_with_empty_siret, rows_with_empty_email=rows_with_empty_email + ) + file_path = sftp_directory.joinpath(faker.geiq_filename()) + with open(file_path, "wb") as xlsxfile: + workbook = generate_excel_sheet(FILE_HEADERS, data) + workbook.save(xlsxfile) + df, info_stats = get_geiq_df(file_path) + assert df.shape == (rows, 8) + assert info_stats == { + "rows_in_file": rows, + "rows_with_a_siret": rows, + "rows_after_deduplication": rows, + "rows_with_empty_email": rows_with_empty_email, + } + + # File too small, need at least 150 rows + rows = 140 + rows_with_empty_siret = 0 + rows_with_empty_email = 0 + data = generate_data( + rows=rows, rows_with_empty_siret=rows_with_empty_siret, rows_with_empty_email=rows_with_empty_email + ) + file_path = sftp_directory.joinpath(faker.geiq_filename()) + with open(file_path, "wb") as xlsxfile: + workbook = generate_excel_sheet(FILE_HEADERS, data) + workbook.save(xlsxfile) + with pytest.raises(AssertionError): + df, info_stats = get_geiq_df(file_path) + + # Too many missing emails + rows = 185 + rows_with_empty_siret = 0 + rows_with_empty_email = 100 + data = generate_data( + rows=rows, rows_with_empty_siret=rows_with_empty_siret, rows_with_empty_email=rows_with_empty_email + ) + file_path = sftp_directory.joinpath(faker.geiq_filename()) + with open(file_path, "wb") as xlsxfile: + workbook = generate_excel_sheet(FILE_HEADERS, data) + workbook.save(xlsxfile) + with pytest.raises(AssertionError): + df, info_stats = get_geiq_df(file_path) + + # Some missing emails + rows = 185 + rows_with_empty_siret = 0 + rows_with_empty_email = 20 + data = generate_data( + rows=rows, rows_with_empty_siret=rows_with_empty_siret, rows_with_empty_email=rows_with_empty_email + ) + file_path = sftp_directory.joinpath(faker.geiq_filename()) + with open(file_path, "wb") as xlsxfile: + workbook = generate_excel_sheet(FILE_HEADERS, data) + workbook.save(xlsxfile) + df, info_stats = get_geiq_df(file_path) + assert df.shape == (rows - rows_with_empty_email, 8) + assert info_stats == { + "rows_in_file": rows, + "rows_with_a_siret": rows, + "rows_after_deduplication": rows, + "rows_with_empty_email": rows_with_empty_email, + } + + # Too many missing sirets + rows = 185 + rows_with_empty_siret = 100 + rows_with_empty_email = 0 + data = generate_data( + rows=rows, rows_with_empty_siret=rows_with_empty_siret, rows_with_empty_email=rows_with_empty_email + ) + file_path = sftp_directory.joinpath(faker.geiq_filename()) + with open(file_path, "wb") as xlsxfile: + workbook = generate_excel_sheet(FILE_HEADERS, data) + workbook.save(xlsxfile) + with pytest.raises(AssertionError): + df, info_stats = get_geiq_df(file_path) + + # Missing some sirets + rows = 185 + rows_with_empty_siret = 20 + rows_with_empty_email = 0 + data = generate_data( + rows=rows, rows_with_empty_siret=rows_with_empty_siret, rows_with_empty_email=rows_with_empty_email + ) + file_path = sftp_directory.joinpath(faker.geiq_filename()) + with open(file_path, "wb") as xlsxfile: + workbook = generate_excel_sheet(FILE_HEADERS, data) + workbook.save(xlsxfile) + df, info_stats = get_geiq_df(file_path) + assert df.shape == (rows - rows_with_empty_siret, 8) + assert info_stats == { + "rows_in_file": rows, + "rows_with_a_siret": rows - rows_with_empty_siret, + "rows_after_deduplication": rows - rows_with_empty_siret, + "rows_with_empty_email": 0, + } + + # Duplicated rows + rows = 250 + rows_with_empty_siret = 0 + rows_with_empty_email = 0 + duplicated_sirets = 20 + data = generate_data( + rows=rows, + rows_with_empty_siret=rows_with_empty_siret, + rows_with_empty_email=rows_with_empty_email, + duplicated_sirets=duplicated_sirets, + ) + file_path = sftp_directory.joinpath(faker.geiq_filename()) + with open(file_path, "wb") as xlsxfile: + workbook = generate_excel_sheet(FILE_HEADERS, data) + workbook.save(xlsxfile) + df, info_stats = get_geiq_df(file_path) + assert df.shape == (rows - duplicated_sirets, 8) + assert info_stats == { + "rows_in_file": rows, + "rows_with_a_siret": rows, + "rows_after_deduplication": rows - duplicated_sirets, + "rows_with_empty_email": rows_with_empty_email, + }