Skip to content

Commit

Permalink
organize settings, elasticsearch api and siae index/meta infos
Browse files Browse the repository at this point in the history
  • Loading branch information
SebastienReuiller committed Dec 27, 2023
1 parent 7be4dc3 commit 59bf5d8
Show file tree
Hide file tree
Showing 5 changed files with 41 additions and 22 deletions.
2 changes: 2 additions & 0 deletions config/settings/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -881,7 +881,9 @@

# ELASTICSEARCH
# ------------------------------------------------------------------------------
ELASTICSEARCH_SCHEME = env.str("ELASTICSEARCH_SCHEME", "https")
ELASTICSEARCH_HOST = env.str("ELASTICSEARCH_HOST", "")
ELASTICSEARCH_PORT = env.str("ELASTICSEARCH_PORT", "443")
ELASTICSEARCH_USERNAME = env.str("ELASTICSEARCH_USERNAME", "")
ELASTICSEARCH_PASSWORD = env.str("ELASTICSEARCH_PASSWORD", "")
ELASTICSEARCH_INDEX_SIAES = env.str("ELASTICSEARCH_INDEX_SIAES", "")
Original file line number Diff line number Diff line change
@@ -1,49 +1,39 @@
import time

from django.conf import settings
from django.core.management.base import BaseCommand
from django.db.models import TextField
from django.db.models.functions import Length
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import ElasticVectorSearch

from lemarche.siaes.models import Siae
from lemarche.utils.apis.api_elasticsearch import URL_WITH_USER
from lemarche.utils.commands import BaseCommand


class Command(BaseCommand):
help = ""

def handle(self, *args, **options):
self.stdout.write(self.style.SUCCESS("put siae to elasticsearch index started.."))
self.stdout_success("put siae to elasticsearch index started..")

# Elasticsearch as a vector db
url = (
f"https://{settings.ELASTICSEARCH_USERNAME}:{settings.ELASTICSEARCH_PASSWORD}@"
f"{settings.ELASTICSEARCH_HOST}:443"
)
embeddings = OpenAIEmbeddings()
db = ElasticVectorSearch(
embedding=embeddings, elasticsearch_url=url, index_name=settings.ELASTICSEARCH_INDEX_SIAES
embedding=embeddings, elasticsearch_url=URL_WITH_USER, index_name=settings.ELASTICSEARCH_INDEX_SIAES
)

# Siaes with completed description
TextField.register_lookup(Length) # at least 10 characters
siaes = Siae.objects.filter(description__length__gt=9).all()

for siae in siaes:
text = siae.description
if siae.offers.count() > 0:
offers = "\n\nPrestations:\n"
for offer in siae.offers.all():
offers += f"- {offer.name}:\n{offer.description}\n\n"
text += offers

db.from_texts(
[text],
metadatas=[{"id": siae.id, "name": siae.name, "website": siae.website if siae.website else ""}],
[siae.elasticsearch_index_text],
metadatas=[siae.elasticsearch_index_metadata],
embedding=embeddings,
elasticsearch_url=url,
elasticsearch_url=URL_WITH_USER,
index_name=settings.ELASTICSEARCH_INDEX_SIAES,
)
time.sleep(1)
self.stdout.write(self.style.SUCCESS(f"{siae.name} added !"))
self.stdout_success(f"{siae.name} added !")
14 changes: 14 additions & 0 deletions lemarche/siaes/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -1125,6 +1125,20 @@ def latest_activity_at(self):
latest_activity_at = self.updated_at
return latest_activity_at

@property
def elasticsearch_index_text(self):
text = self.description
if self.offers.count() > 0:
offers = "\n\nPrestations:\n"
for offer in self.offers.all():
offers += f"- {offer.name}:\n{offer.description}\n\n"
text += offers
return text

@property
def elasticsearch_index_metadata(self):
return {"id": self.id, "name": self.name, "website": self.website if self.website else ""}

def sectors_list_string(self, display_max=3):
sectors_name_list = self.sectors.form_filter_queryset().values_list("name", flat=True)
if display_max and len(sectors_name_list) > display_max:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,27 @@
from langchain.vectorstores.elasticsearch import ElasticsearchStore


BASE_URL = f"{settings.ELASTICSEARCH_HOST}:{settings.ELASTICSEARCH_PORT}"
URL = f"{settings.ELASTICSEARCH_SCHEME}://{BASE_URL}"
URL_WITH_USER = (
f"{settings.ELASTICSEARCH_SCHEME}://{settings.ELASTICSEARCH_USERNAME}:{settings.ELASTICSEARCH_PASSWORD}@{BASE_URL}"
)


def siaes_similarity_search(search_text):
# Elasticsearch as a vector db
url = f"https://{settings.ELASTICSEARCH_HOST}:443"
"""Performs semantic search with Elasticsearch as a vector db
Args:
search_text (str): User search query
Returns:
list: list of siaes id that match the search query
"""
db = ElasticsearchStore(
embedding=OpenAIEmbeddings(),
es_user=settings.ELASTICSEARCH_USERNAME,
es_password=settings.ELASTICSEARCH_PASSWORD,
es_url=url,
es_url=URL,
index_name=settings.ELASTICSEARCH_INDEX_SIAES,
)

Expand Down
2 changes: 1 addition & 1 deletion lemarche/www/siaes/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from lemarche.conversations.models import Conversation
from lemarche.favorites.models import FavoriteList
from lemarche.siaes.models import Siae
from lemarche.utils.elasticsearch_tools import siaes_similarity_search
from lemarche.utils.apis.api_elasticsearch import siaes_similarity_search
from lemarche.utils.export import export_siae_to_csv, export_siae_to_excel
from lemarche.utils.s3 import API_CONNECTION_DICT
from lemarche.utils.urls import get_domain_url, get_encoded_url_from_params
Expand Down

0 comments on commit 59bf5d8

Please sign in to comment.