Skip to content

Commit

Permalink
fix(api): fix error if no error and fix schema validation
Browse files Browse the repository at this point in the history
  • Loading branch information
hlecuyer committed Jul 1, 2024
1 parent e182abf commit f7a536d
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 13 deletions.
14 changes: 14 additions & 0 deletions api/log.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
_di_surrogate_id ... thematiques
0 agefiph-c7f4be8b-309e-4a6a-b562-c4f7f3bb3c5c ... [handicap]
1 france-travail-ARA0014 ... None
2 france-travail-ARA0018 ... None
3 france-travail-ARA0021 ... None
4 france-travail-ARA0024 ... None
... ... ... ...
23301 soliguide-9981 ... None
23302 soliguide-9983 ... None
23303 soliguide-9984 ... None
23304 soliguide-9995 ... None
23305 soliguide-9997 ... None

[23306 rows x 29 columns]
31 changes: 18 additions & 13 deletions api/src/data_inclusion/api/inclusion_data/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,10 @@
from furl import furl
from tqdm import tqdm

from data_inclusion import schema
from data_inclusion.api.code_officiel_geo import constants
from data_inclusion.api.config import settings
from data_inclusion.api.core import db
from data_inclusion.api.inclusion_data import models
from data_inclusion.api.inclusion_data import models, schemas

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -95,6 +94,9 @@ def validate_df(df: pd.DataFrame, model_schema) -> pd.DataFrame:


def log_errors(errors_df: pd.DataFrame):
if errors_df.empty:
logger.info("no error")
return
info_str = str(
errors_df.groupby(["source", "errors.loc"])["_di_surrogate_id"]
.count()
Expand Down Expand Up @@ -129,24 +131,26 @@ def load_inclusion_data():
code_insee=services_df.code_insee.apply(clean_up_code_insee)
)

structure_errors_df = validate_df(structures_df, model_schema=schema.Structure)
service_errors_df = validate_df(services_df, model_schema=schema.Service)
structure_errors_df = validate_df(structures_df, model_schema=schemas.Structure)
service_errors_df = validate_df(services_df, model_schema=schemas.Service)

logger.info("Structure validation errors:")
log_errors(structure_errors_df)
logger.info("Services validation errors:")
log_errors(service_errors_df)

# exclude invalid data
structures_df = structures_df[
~structures_df._di_surrogate_id.isin(structure_errors_df._di_surrogate_id)
]
services_df = services_df[
~services_df._di_surrogate_id.isin(service_errors_df._di_surrogate_id)
& ~services_df._di_structure_surrogate_id.isin(
structure_errors_df._di_surrogate_id
)
]
if not structure_errors_df.empty:
structures_df = structures_df[
~structures_df._di_surrogate_id.isin(structure_errors_df._di_surrogate_id)
]
if not service_errors_df.empty:
services_df = services_df[
~services_df._di_surrogate_id.isin(service_errors_df._di_surrogate_id)
& ~services_df._di_structure_surrogate_id.isin(
structure_errors_df._di_surrogate_id
)
]

structure_data_list = structures_df.to_dict(orient="records")
service_data_list = services_df.to_dict(orient="records")
Expand Down Expand Up @@ -196,4 +200,5 @@ def validate_data(model_schema, data):
try:
model_schema(**data)
except pydantic.ValidationError as exc:
print(exc.errors())
return exc.errors()

0 comments on commit f7a536d

Please sign in to comment.