From ac3de01cc5175e20bed53ac65f680592a900b6c3 Mon Sep 17 00:00:00 2001 From: SebastienReuiller Date: Wed, 5 Jun 2024 12:34:02 +0200 Subject: [PATCH] =?UTF-8?q?feat(Structures):=20R=C3=A9indexation=20automat?= =?UTF-8?q?ique=20des=20structures=20dans=20l'index=20Elasticsearch=20(#12?= =?UTF-8?q?45)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ...companies_update_users_and_count_fields.sh | 2 +- clevercloud/conversations_delete_outdated.sh | 2 +- clevercloud/crm_brevo_sync_companies.sh | 2 +- clevercloud/cron.json | 1 + clevercloud/siaes_export_all_siae_to_file.sh | 2 +- .../siaes_send_completion_reminder_emails.sh | 2 +- ...siaes_send_user_request_reminder_emails.sh | 2 +- clevercloud/siaes_sync_c2_c4.sh | 2 +- clevercloud/siaes_sync_elasticsearch_index.sh | 22 ++++++++++++++++++ .../siaes_sync_with_emplois_inclusion.sh | 2 +- .../siaes_update_api_entreprise_fields.sh | 2 +- clevercloud/siaes_update_api_qpv_fields.sh | 2 +- clevercloud/siaes_update_api_zrr_fields.sh | 2 +- clevercloud/siaes_update_count_fields.sh | 2 +- clevercloud/siaes_update_super_badge_field.sh | 2 +- ...stats_export_user_download_list_to_file.sh | 2 +- .../stats_export_user_search_list_to_file.sh | 2 +- ..._send_author_list_of_super_siaes_emails.sh | 2 +- ...nd_author_transactioned_question_emails.sh | 2 +- ...ers_send_siae_contacted_reminder_emails.sh | 2 +- ...rs_send_siae_interested_reminder_emails.sh | 2 +- ...send_siae_transactioned_question_emails.sh | 2 +- clevercloud/tenders_send_validated.sh | 2 +- clevercloud/tenders_update_count_fields.sh | 2 +- .../put_siaes_in_elasticsearch_index.py | 23 ++++++++++++++++--- lemarche/utils/apis/api_elasticsearch.py | 12 ++++++++++ 26 files changed, 77 insertions(+), 25 deletions(-) create mode 100755 clevercloud/siaes_sync_elasticsearch_index.sh diff --git a/clevercloud/companies_update_users_and_count_fields.sh b/clevercloud/companies_update_users_and_count_fields.sh index 41e03d631..e33a99805 100755 --- a/clevercloud/companies_update_users_and_count_fields.sh +++ b/clevercloud/companies_update_users_and_count_fields.sh @@ -9,7 +9,7 @@ if [[ -z "$CRON_UPDATE_COMPANY_USERS_ENABLED" ]]; then fi # About clever cloud cronjobs: -# https://www.clever-cloud.com/doc/tools/crons/ +# https://developers.clever-cloud.com/doc/administrate/cron/ if [[ "$INSTANCE_NUMBER" != "0" ]]; then echo "Instance number is ${INSTANCE_NUMBER}. Stop here." diff --git a/clevercloud/conversations_delete_outdated.sh b/clevercloud/conversations_delete_outdated.sh index b12f0889d..d139c247c 100755 --- a/clevercloud/conversations_delete_outdated.sh +++ b/clevercloud/conversations_delete_outdated.sh @@ -9,7 +9,7 @@ if [[ -z "$CRON_CONVERSATIONS_DELETE_OUTDATED" ]]; then fi # About clever cloud cronjobs: -# https://www.clever-cloud.com/doc/tools/crons/ +# https://developers.clever-cloud.com/doc/administrate/cron/ if [[ "$INSTANCE_NUMBER" != "0" ]]; then echo "Instance number is ${INSTANCE_NUMBER}. Stop here." diff --git a/clevercloud/crm_brevo_sync_companies.sh b/clevercloud/crm_brevo_sync_companies.sh index 9a9204523..7d15150bd 100644 --- a/clevercloud/crm_brevo_sync_companies.sh +++ b/clevercloud/crm_brevo_sync_companies.sh @@ -9,7 +9,7 @@ if [[ -z "$CRON_CRM_BREVO_SYNC_COMPANIES_ENABLED" ]]; then fi # About clever cloud cronjobs: -# https://www.clever-cloud.com/doc/tools/crons/ +# https://developers.clever-cloud.com/doc/administrate/cron/ if [[ "$INSTANCE_NUMBER" != "0" ]]; then echo "Instance number is ${INSTANCE_NUMBER}. Stop here." diff --git a/clevercloud/cron.json b/clevercloud/cron.json index 7fd235067..56e943b09 100644 --- a/clevercloud/cron.json +++ b/clevercloud/cron.json @@ -3,6 +3,7 @@ "15 0 * * * $ROOT/clevercloud/stats_export_user_download_list_to_file.sh", "30 0 * * * $ROOT/clevercloud/stats_export_user_search_list_to_file.sh", "0 1 * * * $ROOT/clevercloud/tenders_update_count_fields.sh", + "15 1 * * 1 $ROOT/clevercloud/siaes_sync_elasticsearch_index.sh", "0 6 * * * $ROOT/clevercloud/conversations_delete_outdated.sh", "0 7 * * 1 $ROOT/clevercloud/siaes_sync_with_emplois_inclusion.sh", "5 7 * * 1 $ROOT/clevercloud/siaes_sync_c2_c4.sh", diff --git a/clevercloud/siaes_export_all_siae_to_file.sh b/clevercloud/siaes_export_all_siae_to_file.sh index 6a191961c..71441bd3a 100755 --- a/clevercloud/siaes_export_all_siae_to_file.sh +++ b/clevercloud/siaes_export_all_siae_to_file.sh @@ -9,7 +9,7 @@ if [[ -z "$CRON_SIAE_EXPORT_ENABLED" ]]; then fi # About clever cloud cronjobs: -# https://www.clever-cloud.com/doc/tools/crons/ +# https://developers.clever-cloud.com/doc/administrate/cron/ if [[ "$INSTANCE_NUMBER" != "0" ]]; then echo "Instance number is ${INSTANCE_NUMBER}. Stop here." diff --git a/clevercloud/siaes_send_completion_reminder_emails.sh b/clevercloud/siaes_send_completion_reminder_emails.sh index fbd105234..f1431badc 100755 --- a/clevercloud/siaes_send_completion_reminder_emails.sh +++ b/clevercloud/siaes_send_completion_reminder_emails.sh @@ -9,7 +9,7 @@ if [[ -z "$CRON_SIAE_SEND_COMPLETION_REMINDER_EMAILS_ENABLED" ]]; then fi # About clever cloud cronjobs: -# https://www.clever-cloud.com/doc/tools/crons/ +# https://developers.clever-cloud.com/doc/administrate/cron/ if [[ "$INSTANCE_NUMBER" != "0" ]]; then echo "Instance number is ${INSTANCE_NUMBER}. Stop here." diff --git a/clevercloud/siaes_send_user_request_reminder_emails.sh b/clevercloud/siaes_send_user_request_reminder_emails.sh index de4ddcc69..1d2ee0c9d 100755 --- a/clevercloud/siaes_send_user_request_reminder_emails.sh +++ b/clevercloud/siaes_send_user_request_reminder_emails.sh @@ -9,7 +9,7 @@ if [[ -z "$CRON_SIAE_SEND_USER_REQUEST_REMINDER_EMAILS_ENABLED" ]]; then fi # About clever cloud cronjobs: -# https://www.clever-cloud.com/doc/tools/crons/ +# https://developers.clever-cloud.com/doc/administrate/cron/ if [[ "$INSTANCE_NUMBER" != "0" ]]; then echo "Instance number is ${INSTANCE_NUMBER}. Stop here." diff --git a/clevercloud/siaes_sync_c2_c4.sh b/clevercloud/siaes_sync_c2_c4.sh index 848819795..71620356d 100755 --- a/clevercloud/siaes_sync_c2_c4.sh +++ b/clevercloud/siaes_sync_c2_c4.sh @@ -9,7 +9,7 @@ if [[ -z "$CRON_SYNC_C2_C4_ENABLED" ]]; then fi # About clever cloud cronjobs: -# https://www.clever-cloud.com/doc/tools/crons/ +# https://developers.clever-cloud.com/doc/administrate/cron/ if [[ "$INSTANCE_NUMBER" != "0" ]]; then echo "Instance number is ${INSTANCE_NUMBER}. Stop here." diff --git a/clevercloud/siaes_sync_elasticsearch_index.sh b/clevercloud/siaes_sync_elasticsearch_index.sh new file mode 100755 index 000000000..4f27f8019 --- /dev/null +++ b/clevercloud/siaes_sync_elasticsearch_index.sh @@ -0,0 +1,22 @@ +#!/bin/bash -l + +# Update Elasticsearch index for semantic search and IA Matching + +# Do not run if this env var is not set: +if [[ -z "$CRON_SIAES_SYNC_ELASTICSEARCH_INDEX_ENABLED" ]]; then + echo "CRON_SIAES_SYNC_ELASTICSEARCH_INDEX_ENABLED not set. Exiting..." + exit 0 +fi + +# About clever cloud cronjobs: +# https://developers.clever-cloud.com/doc/administrate/cron/ + +if [[ "$INSTANCE_NUMBER" != "0" ]]; then + echo "Instance number is ${INSTANCE_NUMBER}. Stop here." + exit 0 +fi + +# $APP_HOME is set by default by clever cloud. +cd $APP_HOME + +django-admin put_siaes_in_elasticsearch_index diff --git a/clevercloud/siaes_sync_with_emplois_inclusion.sh b/clevercloud/siaes_sync_with_emplois_inclusion.sh index 577200976..c20ad9d3f 100755 --- a/clevercloud/siaes_sync_with_emplois_inclusion.sh +++ b/clevercloud/siaes_sync_with_emplois_inclusion.sh @@ -9,7 +9,7 @@ if [[ -z "$CRON_SYNC_WITH_EMPLOIS_INCLUSION_ENABLED" ]]; then fi # About clever cloud cronjobs: -# https://www.clever-cloud.com/doc/tools/crons/ +# https://developers.clever-cloud.com/doc/administrate/cron/ if [[ "$INSTANCE_NUMBER" != "0" ]]; then echo "Instance number is ${INSTANCE_NUMBER}. Stop here." diff --git a/clevercloud/siaes_update_api_entreprise_fields.sh b/clevercloud/siaes_update_api_entreprise_fields.sh index c55b29891..91cf998f2 100755 --- a/clevercloud/siaes_update_api_entreprise_fields.sh +++ b/clevercloud/siaes_update_api_entreprise_fields.sh @@ -9,7 +9,7 @@ if [[ -z "$CRON_UPDATE_API_ENTREPRISE_FIELDS_ENABLED" ]]; then fi # About clever cloud cronjobs: -# https://www.clever-cloud.com/doc/tools/crons/ +# https://developers.clever-cloud.com/doc/administrate/cron/ if [[ "$INSTANCE_NUMBER" != "0" ]]; then echo "Instance number is ${INSTANCE_NUMBER}. Stop here." diff --git a/clevercloud/siaes_update_api_qpv_fields.sh b/clevercloud/siaes_update_api_qpv_fields.sh index 6f0c00192..584f0741d 100755 --- a/clevercloud/siaes_update_api_qpv_fields.sh +++ b/clevercloud/siaes_update_api_qpv_fields.sh @@ -9,7 +9,7 @@ if [[ -z "$CRON_UPDATE_API_QPV_FIELDS_ENABLED" ]]; then fi # About clever cloud cronjobs: -# https://www.clever-cloud.com/doc/tools/crons/ +# https://developers.clever-cloud.com/doc/administrate/cron/ if [[ "$INSTANCE_NUMBER" != "0" ]]; then echo "Instance number is ${INSTANCE_NUMBER}. Stop here." diff --git a/clevercloud/siaes_update_api_zrr_fields.sh b/clevercloud/siaes_update_api_zrr_fields.sh index a706f3aba..73e01f26d 100755 --- a/clevercloud/siaes_update_api_zrr_fields.sh +++ b/clevercloud/siaes_update_api_zrr_fields.sh @@ -9,7 +9,7 @@ if [[ -z "$CRON_UPDATE_API_ZRR_FIELDS_ENABLED" ]]; then fi # About clever cloud cronjobs: -# https://www.clever-cloud.com/doc/tools/crons/ +# https://developers.clever-cloud.com/doc/administrate/cron/ if [[ "$INSTANCE_NUMBER" != "0" ]]; then echo "Instance number is ${INSTANCE_NUMBER}. Stop here." diff --git a/clevercloud/siaes_update_count_fields.sh b/clevercloud/siaes_update_count_fields.sh index 49134aa9b..d9de199fd 100755 --- a/clevercloud/siaes_update_count_fields.sh +++ b/clevercloud/siaes_update_count_fields.sh @@ -9,7 +9,7 @@ if [[ -z "$CRON_UPDATE_SIAE_COUNT_FIELDS_ENABLED" ]]; then fi # About clever cloud cronjobs: -# https://www.clever-cloud.com/doc/tools/crons/ +# https://developers.clever-cloud.com/doc/administrate/cron/ if [[ "$INSTANCE_NUMBER" != "0" ]]; then echo "Instance number is ${INSTANCE_NUMBER}. Stop here." diff --git a/clevercloud/siaes_update_super_badge_field.sh b/clevercloud/siaes_update_super_badge_field.sh index 74fd92a9f..7469166de 100755 --- a/clevercloud/siaes_update_super_badge_field.sh +++ b/clevercloud/siaes_update_super_badge_field.sh @@ -9,7 +9,7 @@ if [[ -z "$CRON_UPDATE_SIAE_SUPER_BADGE_FIELD_ENABLED" ]]; then fi # About clever cloud cronjobs: -# https://www.clever-cloud.com/doc/tools/crons/ +# https://developers.clever-cloud.com/doc/administrate/cron/ if [[ "$INSTANCE_NUMBER" != "0" ]]; then echo "Instance number is ${INSTANCE_NUMBER}. Stop here." diff --git a/clevercloud/stats_export_user_download_list_to_file.sh b/clevercloud/stats_export_user_download_list_to_file.sh index 88c4d0ac0..f3b6987b2 100755 --- a/clevercloud/stats_export_user_download_list_to_file.sh +++ b/clevercloud/stats_export_user_download_list_to_file.sh @@ -7,7 +7,7 @@ if [[ -z "$CRON_SIAE_EXPORT_ENABLED" ]]; then fi # About clever cloud cronjobs: -# https://www.clever-cloud.com/doc/tools/crons/ +# https://developers.clever-cloud.com/doc/administrate/cron/ if [[ "$INSTANCE_NUMBER" != "0" ]]; then echo "Instance number is ${INSTANCE_NUMBER}. Stop here." diff --git a/clevercloud/stats_export_user_search_list_to_file.sh b/clevercloud/stats_export_user_search_list_to_file.sh index 5c15dc80f..28a84dd0a 100755 --- a/clevercloud/stats_export_user_search_list_to_file.sh +++ b/clevercloud/stats_export_user_search_list_to_file.sh @@ -7,7 +7,7 @@ if [[ -z "$CRON_SIAE_EXPORT_ENABLED" ]]; then fi # About clever cloud cronjobs: -# https://www.clever-cloud.com/doc/tools/crons/ +# https://developers.clever-cloud.com/doc/administrate/cron/ if [[ "$INSTANCE_NUMBER" != "0" ]]; then echo "Instance number is ${INSTANCE_NUMBER}. Stop here." diff --git a/clevercloud/tenders_send_author_list_of_super_siaes_emails.sh b/clevercloud/tenders_send_author_list_of_super_siaes_emails.sh index b9d3be2e5..a8f363de5 100755 --- a/clevercloud/tenders_send_author_list_of_super_siaes_emails.sh +++ b/clevercloud/tenders_send_author_list_of_super_siaes_emails.sh @@ -9,7 +9,7 @@ if [[ -z "$CRON_TENDER_SEND_AUTHOR_LIST_OF_SUPER_SIAES_EMAILS_ENABLED" ]]; then fi # About clever cloud cronjobs: -# https://www.clever-cloud.com/doc/tools/crons/ +# https://developers.clever-cloud.com/doc/administrate/cron/ if [[ "$INSTANCE_NUMBER" != "0" ]]; then echo "Instance number is ${INSTANCE_NUMBER}. Stop here." diff --git a/clevercloud/tenders_send_author_transactioned_question_emails.sh b/clevercloud/tenders_send_author_transactioned_question_emails.sh index a6c0d115e..6a42924b6 100755 --- a/clevercloud/tenders_send_author_transactioned_question_emails.sh +++ b/clevercloud/tenders_send_author_transactioned_question_emails.sh @@ -9,7 +9,7 @@ if [[ -z "$CRON_TENDER_SEND_AUTHOR_TRANSACTIONED_QUESTION_ENABLED" ]]; then fi # About clever cloud cronjobs: -# https://www.clever-cloud.com/doc/tools/crons/ +# https://developers.clever-cloud.com/doc/administrate/cron/ if [[ "$INSTANCE_NUMBER" != "0" ]]; then echo "Instance number is ${INSTANCE_NUMBER}. Stop here." diff --git a/clevercloud/tenders_send_siae_contacted_reminder_emails.sh b/clevercloud/tenders_send_siae_contacted_reminder_emails.sh index 0c135f3e5..30e44a631 100755 --- a/clevercloud/tenders_send_siae_contacted_reminder_emails.sh +++ b/clevercloud/tenders_send_siae_contacted_reminder_emails.sh @@ -9,7 +9,7 @@ if [[ -z "$CRON_TENDER_SEND_SIAE_CONTACTED_REMINDER_EMAILS_ENABLED" ]]; then fi # About clever cloud cronjobs: -# https://www.clever-cloud.com/doc/tools/crons/ +# https://developers.clever-cloud.com/doc/administrate/cron/ if [[ "$INSTANCE_NUMBER" != "0" ]]; then echo "Instance number is ${INSTANCE_NUMBER}. Stop here." diff --git a/clevercloud/tenders_send_siae_interested_reminder_emails.sh b/clevercloud/tenders_send_siae_interested_reminder_emails.sh index e0613a3f4..182b0f330 100755 --- a/clevercloud/tenders_send_siae_interested_reminder_emails.sh +++ b/clevercloud/tenders_send_siae_interested_reminder_emails.sh @@ -9,7 +9,7 @@ if [[ -z "$CRON_TENDER_SEND_SIAE_INTERESTED_REMINDER_EMAILS_ENABLED" ]]; then fi # About clever cloud cronjobs: -# https://www.clever-cloud.com/doc/tools/crons/ +# https://developers.clever-cloud.com/doc/administrate/cron/ if [[ "$INSTANCE_NUMBER" != "0" ]]; then echo "Instance number is ${INSTANCE_NUMBER}. Stop here." diff --git a/clevercloud/tenders_send_siae_transactioned_question_emails.sh b/clevercloud/tenders_send_siae_transactioned_question_emails.sh index 9996cf5e2..c7bc466d0 100755 --- a/clevercloud/tenders_send_siae_transactioned_question_emails.sh +++ b/clevercloud/tenders_send_siae_transactioned_question_emails.sh @@ -9,7 +9,7 @@ if [[ -z "$CRON_TENDER_SEND_SIAE_TRANSACTIONED_QUESTION_ENABLED" ]]; then fi # About clever cloud cronjobs: -# https://www.clever-cloud.com/doc/tools/crons/ +# https://developers.clever-cloud.com/doc/administrate/cron/ if [[ "$INSTANCE_NUMBER" != "0" ]]; then echo "Instance number is ${INSTANCE_NUMBER}. Stop here." diff --git a/clevercloud/tenders_send_validated.sh b/clevercloud/tenders_send_validated.sh index d53336dcb..3af37f10a 100755 --- a/clevercloud/tenders_send_validated.sh +++ b/clevercloud/tenders_send_validated.sh @@ -9,7 +9,7 @@ if [[ -z "$CRON_TENDER_SEND_VALIDATED_ENABLED" ]]; then fi # About clever cloud cronjobs: -# https://www.clever-cloud.com/doc/tools/crons/ +# https://developers.clever-cloud.com/doc/administrate/cron/ if [[ "$INSTANCE_NUMBER" != "0" ]]; then echo "Instance number is ${INSTANCE_NUMBER}. Stop here." diff --git a/clevercloud/tenders_update_count_fields.sh b/clevercloud/tenders_update_count_fields.sh index 4e6402739..06e4733dc 100755 --- a/clevercloud/tenders_update_count_fields.sh +++ b/clevercloud/tenders_update_count_fields.sh @@ -9,7 +9,7 @@ if [[ -z "$CRON_UPDATE_TENDER_COUNT_FIELDS_ENABLED" ]]; then fi # About clever cloud cronjobs: -# https://www.clever-cloud.com/doc/tools/crons/ +# https://developers.clever-cloud.com/doc/administrate/cron/ if [[ "$INSTANCE_NUMBER" != "0" ]]; then echo "Instance number is ${INSTANCE_NUMBER}. Stop here." diff --git a/lemarche/siaes/management/commands/put_siaes_in_elasticsearch_index.py b/lemarche/siaes/management/commands/put_siaes_in_elasticsearch_index.py index 0232ad978..505c58e4e 100644 --- a/lemarche/siaes/management/commands/put_siaes_in_elasticsearch_index.py +++ b/lemarche/siaes/management/commands/put_siaes_in_elasticsearch_index.py @@ -7,7 +7,7 @@ from langchain_community.vectorstores import ElasticVectorSearch from lemarche.siaes.models import Siae -from lemarche.utils.apis.api_elasticsearch import URL_WITH_USER +from lemarche.utils.apis import api_elasticsearch, api_slack from lemarche.utils.commands import BaseCommand @@ -17,23 +17,40 @@ class Command(BaseCommand): def handle(self, *args, **options): self.stdout_success("put siae to elasticsearch index started..") + # Delete old Elasticsearch documents from siaes index before new indexing + deleted_documents = api_elasticsearch.siaes_delete_all_documents() + self.stdout_success( + f"The {deleted_documents} documents in the index {settings.ELASTICSEARCH_INDEX_SIAES} have been deleted" + ) + # Elasticsearch as a vector db embeddings = OpenAIEmbeddings() db = ElasticVectorSearch( - embedding=embeddings, elasticsearch_url=URL_WITH_USER, index_name=settings.ELASTICSEARCH_INDEX_SIAES + embedding=embeddings, + elasticsearch_url=api_elasticsearch.URL_WITH_USER, + index_name=settings.ELASTICSEARCH_INDEX_SIAES, ) # Siaes with completed description TextField.register_lookup(Length) # at least 10 characters siaes = Siae.objects.filter(description__length__gt=9).all() + created_documents = 0 for siae in siaes: db.from_texts( [siae.elasticsearch_index_text], metadatas=[siae.elasticsearch_index_metadata], embedding=embeddings, - elasticsearch_url=URL_WITH_USER, + elasticsearch_url=api_elasticsearch.URL_WITH_USER, index_name=settings.ELASTICSEARCH_INDEX_SIAES, ) time.sleep(1) self.stdout_success(f"{siae.name} added !") + created_documents += 1 + + msg_success = [ + f"----- Elasticsearch {settings.ELASTICSEARCH_INDEX_SIAES} index update -----", + f"Done! Deleted {deleted_documents} documents / created {created_documents} documents", + ] + self.stdout_messages_success(msg_success) + api_slack.send_message_to_channel("\n".join(msg_success)) diff --git a/lemarche/utils/apis/api_elasticsearch.py b/lemarche/utils/apis/api_elasticsearch.py index e41658ebd..e9066aa07 100644 --- a/lemarche/utils/apis/api_elasticsearch.py +++ b/lemarche/utils/apis/api_elasticsearch.py @@ -1,4 +1,5 @@ from django.conf import settings +from elasticsearch import Elasticsearch from langchain_community.embeddings.openai import OpenAIEmbeddings from langchain_community.vectorstores.elasticsearch import ElasticsearchStore @@ -12,6 +13,17 @@ ) +def siaes_delete_all_documents(): + """Delete all documents from the siaes index + + Returns: + int: number of deleted documents + """ + es = Elasticsearch(hosts=[URL], http_auth=(settings.ELASTICSEARCH_USERNAME, settings.ELASTICSEARCH_PASSWORD)) + result = es.delete_by_query(index=settings.ELASTICSEARCH_INDEX_SIAES, body={"query": {"match_all": {}}}) + return result["deleted"] + + def siaes_similarity_search(search_text: str, search_filter: list = [], siae_kinds: list = []): """Performs semantic search with Elasticsearch as a vector db