From c88c8370a57b1f349083443f146f59a0438d8d9d Mon Sep 17 00:00:00 2001 From: Raphael Odini Date: Mon, 27 Nov 2023 18:25:57 +0100 Subject: [PATCH 1/4] Add API_EMPLOIS_INCLUSION settings --- config/settings/base.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/config/settings/base.py b/config/settings/base.py index 6e161ff68..70e22d6aa 100644 --- a/config/settings/base.py +++ b/config/settings/base.py @@ -559,6 +559,10 @@ # API Marché APProch MARCHE_APPROCH_TOKEN_RECETTE = env.str("MARCHE_APPROCH_TOKEN_RECETTE", "set-it") +# API Emplois de l'inclusion +API_EMPLOIS_INCLUSION_URL = "https://emplois.inclusion.beta.gouv.fr/api/v1" +API_EMPLOIS_INCLUSION_TOKEN = env.str("API_EMPLOIS_INCLUSION_TOKEN", "set-it") + # Django REST Framework (DRF) # https://www.django-rest-framework.org/ From 45ed29ed2c0d6b423824a674678f2f91652fd76c Mon Sep 17 00:00:00 2001 From: Raphael Odini Date: Mon, 27 Nov 2023 18:26:11 +0100 Subject: [PATCH 2/4] Update Sync script --- .../siaes/management/commands/sync_c1_c4.py | 77 +++++-------------- 1 file changed, 18 insertions(+), 59 deletions(-) diff --git a/lemarche/siaes/management/commands/sync_c1_c4.py b/lemarche/siaes/management/commands/sync_c1_c4.py index 920ace011..af90d8265 100644 --- a/lemarche/siaes/management/commands/sync_c1_c4.py +++ b/lemarche/siaes/management/commands/sync_c1_c4.py @@ -2,8 +2,7 @@ import re from datetime import timedelta -import psycopg2 -import psycopg2.extras +import requests from django.conf import settings from django.contrib.gis.geos import GEOSGeometry from django.core.management.base import CommandError @@ -18,6 +17,10 @@ from lemarche.utils.data import rename_dict_key +API_ENDPOINT = f"{settings.API_EMPLOIS_INCLUSION_URL}/marche" +API_HEADERS = {"Authorization": f"Token {settings.API_EMPLOIS_INCLUSION_TOKEN}"} + + UPDATE_FIELDS = [ # "name", # what happens to the slug if the name is updated? "brand", @@ -45,46 +48,6 @@ C1_EXTRA_KEYS = ["convention_is_active", "convention_asp_id"] -REQUEST_SQL = """ - SELECT - siae.id as id, - siae.siret, - siae.naf, - siae.kind, - siae.name, - siae.brand, - siae.phone, - siae.email, - siae.website, - siae.description, - siae.address_line_1, - siae.address_line_2, - siae.post_code, - siae.city, - siae.department, - siae.source, - ST_X(siae.coords::geometry) AS longitude, - ST_Y(siae.coords::geometry) AS latitude, - convention.is_active as convention_is_active, - convention.asp_id as convention_asp_id, - ad.admin_name as admin_name, - ad.admin_email as admin_email - FROM siaes_siae AS siae - LEFT OUTER JOIN siaes_siaeconvention AS convention - ON convention.id = siae.convention_id - LEFT OUTER JOIN ( - SELECT - m.siae_id as siae_id, - u.username as admin_name, - u.email as admin_email - FROM - siaes_siaemembership m - JOIN users_user u - ON m.user_id = u.id - WHERE m.is_admin = True - ) ad ON ad.siae_id = siae.id - """ - def map_siae_presta_type(siae_kind): if siae_kind: @@ -193,14 +156,20 @@ def handle(self, dry_run=False, **options): def c1_export(self): try: - conn = self.get_c1_connection() c1_list_temp = list() - - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(REQUEST_SQL) - response = cur.fetchall() - for row in response: - c1_list_temp.append(dict(row)) + pagination = 0 + + # loop on API to fetch all the data + while True: + response = requests.get(f"{API_ENDPOINT}?page_size={pagination}", headers=API_HEADERS) + data = response.json() + if data["results"]: + for siae in response.json()["results"]: + c1_list_temp.append(siae) + if data["next"]: + pagination += 1000 + else: + break # clean fields c1_list_cleaned = list() @@ -253,21 +222,11 @@ def c1_export(self): self.stdout_info(f"Found {len(c1_list_cleaned)} Siae in C1") return c1_list_cleaned - except psycopg2.OperationalError as e: - raise psycopg2.OperationalError(e) except Exception as e: api_slack.send_message_to_channel("Erreur lors de la synchronisation C1 <-> C4") self.stdout_error(e) raise Exception(e) - def get_c1_connection(self): - try: - return psycopg2.connect(os.environ.get("C1_DSN")) - except psycopg2.OperationalError as e: - self.stdout_error(e) - api_slack.send_message_to_channel("Erreur de connexion à la db du C1 lors de la synchronisation C1 <-> C4") - raise psycopg2.OperationalError(e) - def filter_c1_export(self, c1_list): """ Some rules to filter out the siae that we don't want: From ad6dffb1cddc4eb297d5d1efbfdab08178e74e50 Mon Sep 17 00:00:00 2001 From: Raphael Odini Date: Mon, 27 Nov 2023 18:32:25 +0100 Subject: [PATCH 3/4] extract api call to dedicated file --- .../siaes/management/commands/sync_c1_c4.py | 24 +++--------------- lemarche/utils/apis/api_emplois_inclusion.py | 25 +++++++++++++++++++ 2 files changed, 28 insertions(+), 21 deletions(-) create mode 100644 lemarche/utils/apis/api_emplois_inclusion.py diff --git a/lemarche/siaes/management/commands/sync_c1_c4.py b/lemarche/siaes/management/commands/sync_c1_c4.py index af90d8265..f135c5eb0 100644 --- a/lemarche/siaes/management/commands/sync_c1_c4.py +++ b/lemarche/siaes/management/commands/sync_c1_c4.py @@ -2,7 +2,6 @@ import re from datetime import timedelta -import requests from django.conf import settings from django.contrib.gis.geos import GEOSGeometry from django.core.management.base import CommandError @@ -11,16 +10,12 @@ from lemarche.siaes import constants as siae_constants from lemarche.siaes.models import Siae -from lemarche.utils.apis import api_mailjet, api_slack +from lemarche.utils.apis import api_emplois_inclusion, api_mailjet, api_slack from lemarche.utils.commands import BaseCommand from lemarche.utils.constants import DEPARTMENT_TO_REGION from lemarche.utils.data import rename_dict_key -API_ENDPOINT = f"{settings.API_EMPLOIS_INCLUSION_URL}/marche" -API_HEADERS = {"Authorization": f"Token {settings.API_EMPLOIS_INCLUSION_TOKEN}"} - - UPDATE_FIELDS = [ # "name", # what happens to the slug if the name is updated? "brand", @@ -154,22 +149,9 @@ def handle(self, dry_run=False, **options): self.stdout_messages_success(msg_success) api_slack.send_message_to_channel("\n".join(msg_success)) - def c1_export(self): + def c1_export(self): # noqa C901 try: - c1_list_temp = list() - pagination = 0 - - # loop on API to fetch all the data - while True: - response = requests.get(f"{API_ENDPOINT}?page_size={pagination}", headers=API_HEADERS) - data = response.json() - if data["results"]: - for siae in response.json()["results"]: - c1_list_temp.append(siae) - if data["next"]: - pagination += 1000 - else: - break + c1_list_temp = api_emplois_inclusion.get_siae_list() # clean fields c1_list_cleaned = list() diff --git a/lemarche/utils/apis/api_emplois_inclusion.py b/lemarche/utils/apis/api_emplois_inclusion.py new file mode 100644 index 000000000..2649170ad --- /dev/null +++ b/lemarche/utils/apis/api_emplois_inclusion.py @@ -0,0 +1,25 @@ +import requests +from django.conf import settings + + +API_ENDPOINT = f"{settings.API_EMPLOIS_INCLUSION_URL}/marche" +API_HEADERS = {"Authorization": f"Token {settings.API_EMPLOIS_INCLUSION_TOKEN}"} + + +def get_siae_list(): + siae_list = list() + pagination = 0 + + # loop on API to fetch all the data + while True: + response = requests.get(f"{API_ENDPOINT}?page_size={pagination}", headers=API_HEADERS) + data = response.json() + if data["results"]: + for siae in data["results"]: + siae_list.append(siae) + if data["next"]: + pagination += 1000 + else: + break + + return siae_list From 24c2ac0d963709d17fda4772f6f459d34afe3191 Mon Sep 17 00:00:00 2001 From: Raphael Odini Date: Thu, 30 Nov 2023 11:31:53 +0100 Subject: [PATCH 4/4] Fix pagination --- env.default.sh | 3 +++ lemarche/siaes/management/commands/sync_c1_c4.py | 7 +++++-- lemarche/utils/apis/api_emplois_inclusion.py | 13 ++++++++++--- 3 files changed, 18 insertions(+), 5 deletions(-) diff --git a/env.default.sh b/env.default.sh index d247fa230..2c441fab5 100644 --- a/env.default.sh +++ b/env.default.sh @@ -14,6 +14,9 @@ export SECRET_KEY="coucou" export DJANGO_SETTINGS_MODULE="config.settings.dev" export TRACKER_HOST="https://example.com" +# APIs +export API_EMPLOIS_INCLUSION_TOKEN="" + # MTCAPTCHA # ######################## export MTCAPTCHA_PRIVATE_KEY="" diff --git a/lemarche/siaes/management/commands/sync_c1_c4.py b/lemarche/siaes/management/commands/sync_c1_c4.py index f135c5eb0..7b9b68ade 100644 --- a/lemarche/siaes/management/commands/sync_c1_c4.py +++ b/lemarche/siaes/management/commands/sync_c1_c4.py @@ -106,12 +106,15 @@ def add_arguments(self, parser): parser.add_argument("--dry-run", dest="dry_run", action="store_true", help="Dry run, no writes") def handle(self, dry_run=False, **options): - if not os.environ.get("C1_DSN"): - raise CommandError("Missing C1_DSN in env") + if not os.environ.get("API_EMPLOIS_INCLUSION_TOKEN"): + raise CommandError("Missing API_EMPLOIS_INCLUSION_TOKEN in env") self.stdout_info("-" * 80) self.stdout_info("Sync script between C1 & C4...") + if dry_run: + self.stdout_info("Running in dry run mode !") + self.stdout_info("-" * 80) self.stdout_info("Step 1: fetching C1 data") c1_list = self.c1_export() diff --git a/lemarche/utils/apis/api_emplois_inclusion.py b/lemarche/utils/apis/api_emplois_inclusion.py index 2649170ad..3c1cbe9f1 100644 --- a/lemarche/utils/apis/api_emplois_inclusion.py +++ b/lemarche/utils/apis/api_emplois_inclusion.py @@ -1,24 +1,31 @@ +import logging + import requests from django.conf import settings +logger = logging.getLogger(__name__) + + API_ENDPOINT = f"{settings.API_EMPLOIS_INCLUSION_URL}/marche" API_HEADERS = {"Authorization": f"Token {settings.API_EMPLOIS_INCLUSION_TOKEN}"} def get_siae_list(): siae_list = list() - pagination = 0 + pagination = 1 # loop on API to fetch all the data while True: - response = requests.get(f"{API_ENDPOINT}?page_size={pagination}", headers=API_HEADERS) + API_URL = f"{API_ENDPOINT}?page={pagination}&page_size={1000}" + logger.info(API_URL) + response = requests.get(API_URL, headers=API_HEADERS) data = response.json() if data["results"]: for siae in data["results"]: siae_list.append(siae) if data["next"]: - pagination += 1000 + pagination += 1 else: break