Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GEIQ : meilleure gestion du type de la colonne SIRET du xls importé #5297

Merged
merged 2 commits into from
Jan 6, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions itou/companies/management/commands/import_geiq.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@ def get_geiq_df(filename):
}
df = remap_columns(df, column_mapping=column_mapping)

# Force siret type to integer, otherwise replacing NaN elements to None blindly converts them to float.
df["siret"] = df["siret"].astype("Int64")

# Replace NaN elements with None.
df = df.replace({np.nan: None})

Expand Down
7 changes: 7 additions & 0 deletions itou/utils/faker_providers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import random

from django.contrib.gis.geos import Point
from django.utils import timezone
from faker.providers import BaseProvider


Expand All @@ -13,6 +14,12 @@ def asp_ea2_filename(self, date: datetime.date = None) -> str:
date_part = random.randint(0, 99999999) if date is None else date.strftime("%Y%m%d")
return f"FLUX_EA2_ITOU_{date_part}.zip"

def geiq_filename(self, date: datetime.date = None) -> str:
if date is None:
date = timezone.localdate()
date_part = date.strftime("%Y-%m-%d")
return f"{date_part} - Export BDD FFGEIQ.xls"

def geopoint(self) -> Point:
return Point(
[float(coord) for coord in self.generator.format("local_latlng", country_code="FR", coords_only=True)]
Expand Down
177 changes: 177 additions & 0 deletions tests/companies/test_management_command_import_geiq.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
import pytest
from faker import Faker

from itou.companies.management.commands.import_geiq import get_geiq_df
from itou.utils.export import generate_excel_sheet
from tests.utils.test import create_fake_postcode


faker = Faker()

FILE_HEADERS = ["Nom", "Rue", "Rue (suite)", "Code Postal", "Ville", "SIRET", "e-mail"]


def generate_data(rows=185, rows_with_empty_siret=0, rows_with_empty_email=0, duplicated_sirets=0):
data = []
rows_count = 0
duplicated_sirets_count = 0
while rows_count < rows:
if rows_with_empty_siret > 0:
siret = ""
rows_with_empty_siret -= 1
else:
siret = faker.numerify("1#############")

if rows_with_empty_email > 0:
email = ""
rows_with_empty_email -= 1
else:
email = faker.email()

row = [
faker.name(),
faker.street_address(),
"Sous l'escalier",
create_fake_postcode(),
faker.city(),
siret,
email,
]

data.append(row)

if duplicated_sirets_count < duplicated_sirets:
data.append(row)
rows_count += 1
duplicated_sirets_count += 1

rows_count += 1
return data


def test_get_geiq_df(sftp_directory, faker):
# Correct data
rows = 185
rows_with_empty_siret = 0
rows_with_empty_email = 0
data = generate_data(
rows=rows, rows_with_empty_siret=rows_with_empty_siret, rows_with_empty_email=rows_with_empty_email
)
file_path = sftp_directory.joinpath(faker.geiq_filename())
with open(file_path, "wb") as xlsxfile:
workbook = generate_excel_sheet(FILE_HEADERS, data)
workbook.save(xlsxfile)
df, info_stats = get_geiq_df(file_path)
assert df.shape == (rows, 8)
assert info_stats == {
"rows_in_file": rows,
"rows_with_a_siret": rows,
"rows_after_deduplication": rows,
"rows_with_empty_email": rows_with_empty_email,
}

# File too small, need at least 150 rows
rows = 140
rows_with_empty_siret = 0
rows_with_empty_email = 0
data = generate_data(
rows=rows, rows_with_empty_siret=rows_with_empty_siret, rows_with_empty_email=rows_with_empty_email
)
file_path = sftp_directory.joinpath(faker.geiq_filename())
with open(file_path, "wb") as xlsxfile:
workbook = generate_excel_sheet(FILE_HEADERS, data)
workbook.save(xlsxfile)
with pytest.raises(AssertionError):
df, info_stats = get_geiq_df(file_path)

# Too many missing emails
rows = 185
rows_with_empty_siret = 0
rows_with_empty_email = 100
data = generate_data(
rows=rows, rows_with_empty_siret=rows_with_empty_siret, rows_with_empty_email=rows_with_empty_email
)
file_path = sftp_directory.joinpath(faker.geiq_filename())
with open(file_path, "wb") as xlsxfile:
workbook = generate_excel_sheet(FILE_HEADERS, data)
workbook.save(xlsxfile)
with pytest.raises(AssertionError):
df, info_stats = get_geiq_df(file_path)

# Some missing emails
rows = 185
rows_with_empty_siret = 0
rows_with_empty_email = 20
data = generate_data(
rows=rows, rows_with_empty_siret=rows_with_empty_siret, rows_with_empty_email=rows_with_empty_email
)
file_path = sftp_directory.joinpath(faker.geiq_filename())
with open(file_path, "wb") as xlsxfile:
workbook = generate_excel_sheet(FILE_HEADERS, data)
workbook.save(xlsxfile)
df, info_stats = get_geiq_df(file_path)
assert df.shape == (rows - rows_with_empty_email, 8)
assert info_stats == {
"rows_in_file": rows,
"rows_with_a_siret": rows,
"rows_after_deduplication": rows,
"rows_with_empty_email": rows_with_empty_email,
}

# Too many missing sirets
rows = 185
rows_with_empty_siret = 100
rows_with_empty_email = 0
data = generate_data(
rows=rows, rows_with_empty_siret=rows_with_empty_siret, rows_with_empty_email=rows_with_empty_email
)
file_path = sftp_directory.joinpath(faker.geiq_filename())
with open(file_path, "wb") as xlsxfile:
workbook = generate_excel_sheet(FILE_HEADERS, data)
workbook.save(xlsxfile)
with pytest.raises(AssertionError):
df, info_stats = get_geiq_df(file_path)

# Missing some sirets
rows = 185
rows_with_empty_siret = 20
rows_with_empty_email = 0
data = generate_data(
rows=rows, rows_with_empty_siret=rows_with_empty_siret, rows_with_empty_email=rows_with_empty_email
)
file_path = sftp_directory.joinpath(faker.geiq_filename())
with open(file_path, "wb") as xlsxfile:
workbook = generate_excel_sheet(FILE_HEADERS, data)
workbook.save(xlsxfile)
df, info_stats = get_geiq_df(file_path)
assert df.shape == (rows - rows_with_empty_siret, 8)
assert info_stats == {
"rows_in_file": rows,
"rows_with_a_siret": rows - rows_with_empty_siret,
"rows_after_deduplication": rows - rows_with_empty_siret,
"rows_with_empty_email": 0,
}

# Duplicated rows
rows = 250
rows_with_empty_siret = 0
rows_with_empty_email = 0
duplicated_sirets = 20
data = generate_data(
rows=rows,
rows_with_empty_siret=rows_with_empty_siret,
rows_with_empty_email=rows_with_empty_email,
duplicated_sirets=duplicated_sirets,
)
file_path = sftp_directory.joinpath(faker.geiq_filename())
with open(file_path, "wb") as xlsxfile:
workbook = generate_excel_sheet(FILE_HEADERS, data)
workbook.save(xlsxfile)
df, info_stats = get_geiq_df(file_path)
assert df.shape == (rows - duplicated_sirets, 8)
assert info_stats == {
"rows_in_file": rows,
"rows_with_a_siret": rows,
"rows_after_deduplication": rows - duplicated_sirets,
"rows_with_empty_email": rows_with_empty_email,
}
Loading