Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Import des ESATs du fichier Excel de l'ASP #948

Merged
merged 3 commits into from
Oct 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions config/settings/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -857,3 +857,11 @@
# ------------------------------------------------------------------------------
MTCAPTCHA_PRIVATE_KEY = env.str("MTCAPTCHA_PRIVATE_KEY", "")
MTCAPTCHA_PUBLIC_KEY = env.str("MTCAPTCHA_PUBLIC_KEY", "")


# OPENAI
# ------------------------------------------------------------------------------
OPENAI_ORG = env.str("OPENAI_ORG", "")
OPENAI_API_BASE = env.str("OPENAI_API_BASE", "")
OPENAI_API_KEY = env.str("OPENAI_API_KEY", "")
OPENAI_MODEL = env.str("OPENAI_MODEL", "")
7 changes: 7 additions & 0 deletions env.default.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,10 @@ export TRACKER_HOST="https://example.com"
# ########################
export MTCAPTCHA_PRIVATE_KEY=""
export MTCAPTCHA_PUBLIC_KEY=""

# OPENAI
# ########################
export OPENAI_ORG=""
export OPENAI_API_BASE=""
export OPENAI_API_KEY=""
export OPENAI_MODEL=""
7 changes: 7 additions & 0 deletions env.docker_default.local
Original file line number Diff line number Diff line change
Expand Up @@ -46,3 +46,10 @@ MAILJET_NEWSLETTER_CONTACT_LIST_BUYER_ID=
# ########################
MTCAPTCHA_PRIVATE_KEY=
MTCAPTCHA_PUBLIC_KEY=

# OPENAI
# ########################
OPENAI_ORG=
OPENAI_API_BASE=
OPENAI_API_BASE=
OPENAI_MODEL=
153 changes: 153 additions & 0 deletions lemarche/siaes/management/commands/import_esat_from_asp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
import csv
import os
import time

import openai
from django.conf import settings
from django.core.management.base import BaseCommand

from lemarche.siaes import constants as siae_constants
from lemarche.siaes.models import Siae
from lemarche.utils.apis.geocoding import get_geocoding_data
from lemarche.utils.constants import DEPARTMENT_TO_REGION, department_from_postcode
from lemarche.utils.data import rename_dict_key


FILE_NAME = "Annuaire_ESAT_20230717.csv"
FILE_PATH = os.path.dirname(os.path.realpath(__file__)) + "/" + FILE_NAME
FIELD_NAME_LIST = [
"Département",
"Dénomination",
"N° de Siret",
"Adresse",
]

# openai client configuration
openai.organization = settings.OPENAI_ORG
openai.api_base = settings.OPENAI_API_BASE
openai.api_key = settings.OPENAI_API_KEY
aimodel = settings.OPENAI_MODEL


def read_csv():
esat_list = list()

with open(FILE_PATH) as csv_file:
# Header : "Département","Dénomination","N° de Siret","Adresse"
csvreader = csv.DictReader(csv_file, delimiter=",")
for index, row in enumerate(csvreader):
esat_list.append(row)

return esat_list


class Command(BaseCommand):
"""
Usage: poetry run python manage.py import_esat_from_asp
"""

def handle(self, *args, **options):
print("-" * 80)
esat_list = read_csv()

print("Importing ESAT FROM ASP...")
progress = 0

already_exits = 0
address_changes = 0
news = 0

for index, esat in enumerate(esat_list):
progress += 1
if (progress % 50) == 0:
print(f"{progress}...")

esat_siret = esat["N° de Siret"]
esat_denom = esat["Dénomination"]
if siae := Siae.objects.filter(siret=esat_siret).first():
already_exits += 1

address_in_db = f"{siae.address} {siae.post_code} {siae.city}".strip().lower()
address_in_file = esat["Adresse"].strip().lower()

# IA used to check if the address has really changed
if address_in_file != address_in_db:
SebastienReuiller marked this conversation as resolved.
Show resolved Hide resolved
prompt = (
f'By answering yes or no, tell me if these two addresses, "{address_in_db}" '
f'and "{address_in_file}", refer to the same place ?'
)
messages = [{"role": "user", "content": prompt}]

has_answered = False
while not has_answered:
try:
chat_completion = openai.ChatCompletion.create(
model=aimodel, temperature=0.5, max_tokens=150, messages=messages, request_timeout=15
)
has_answered = True
result = chat_completion.to_dict_recursive()
if result["choices"][0]["message"]["content"].strip().startswith("No"):
address_changes += 1
self.update_esat_address(siae, address_in_file)
except: # noqa E722
print("OpenAI API Timeout, sleep before retry")
time.sleep(3)
print(f"{esat_denom} ({esat_siret}) addess change : {address_in_db} -> {address_in_file}")
else:
news += 1
print(f"{esat_denom} ({esat_siret}) is a new !")
self.import_esat(esat)

# avoid DDOSing APIs
time.sleep(1)

print(
f"Done with {already_exits} already_exits ({address_changes} addresses updated) and {news} new esat added."
)

def update_esat_address(self, siae, address):
geocoding_data = get_geocoding_data(address)
if geocoding_data:
print(geocoding_data)
siae.address = geocoding_data["address"]
siae.post_code = geocoding_data["post_code"]
siae.city = geocoding_data["city"]
siae.department = department_from_postcode(geocoding_data["post_code"])
siae.region = DEPARTMENT_TO_REGION[siae.department]
siae.coords = geocoding_data["coords"]
siae.save()
else:
print(f"Geocoding not found,{siae.name},{address}")

def import_esat(self, esat): # noqa C901
# store raw dict
esat["import_source"] = "esat_asp"
esat["import_raw_object"] = esat.copy()

# defaults
esat["kind"] = siae_constants.KIND_ESAT
esat["source"] = Siae.SOURCE_ESAT
esat["geo_range"] = siae_constants.GEO_RANGE_DEPARTMENT

# basic fields
rename_dict_key(esat, "Dénomination", "name")
esat["name"].strip()
esat["name"] = esat["name"].replace(" ", " ")
rename_dict_key(esat, "N° de Siret", "siret")
esat["siret_is_valid"] = True

full_address = esat.pop("Adresse")

# create object
try:
print("Create new esat..")
[esat.pop(key) for key in ["import_source", "Département"]]
print(esat)
siae = Siae.objects.create(**esat)
self.update_esat_address(siae, full_address)
except Exception as e:
print(e)
print(esat)

# avoid DDOSing APIs
time.sleep(0.3)
Loading