Skip to content

Commit

Permalink
Ouverture de la recherche par mot clé (#1073)
Browse files Browse the repository at this point in the history
* add geo_range in meta

* add city field

* tests for semantic search

* factor autocomplete
  • Loading branch information
SebastienReuiller authored Feb 9, 2024
1 parent ef46d20 commit b9fb4a6
Show file tree
Hide file tree
Showing 12 changed files with 220 additions and 45 deletions.
6 changes: 6 additions & 0 deletions lemarche/api/perimeters/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,12 @@ def test_should_filter_perimeters_autocomplete_by_q_code(self):
self.assertEqual(len(response.data), 1 + 1)
self.assertEqual(response.data[0]["name"], "Isère")

def test_should_filter_perimeters_autocomplete_by_q_code_only_cities(self):
url = reverse("api:perimeters-autocomplete-list") + "?q=38&kind=CITY" # anonymous user
response = self.client.get(url)
self.assertEqual(len(response.data), 1)
self.assertEqual(response.data[0]["name"], "Grenoble")

def test_should_filter_perimeters_autocomplete_by_q_post_code(self):
url = reverse("api:perimeters-autocomplete-list") + "?q=38100" # anonymous user
response = self.client.get(url)
Expand Down
6 changes: 6 additions & 0 deletions lemarche/api/perimeters/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,12 @@ def list(self, request, *args, **kwargs):
"""
return super().list(request, args, kwargs)

def get_queryset(self):
kind = self.request.query_params.get("kind", None)
if kind and kind in [id for (id, name) in Perimeter.KIND_CHOICES]:
return Perimeter.objects.filter(kind=kind)
return self.queryset


class PerimeterKindViewSet(mixins.ListModelMixin, viewsets.GenericViewSet):
serializer_class = PerimeterChoiceSerializer
Expand Down
3 changes: 3 additions & 0 deletions lemarche/perimeters/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@


class PerimeterQuerySet(models.QuerySet):
def cities(self):
return self.filter(kind="CITY")

def regions(self):
return self.filter(kind="REGION")

Expand Down
10 changes: 10 additions & 0 deletions lemarche/siaes/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -1180,6 +1180,16 @@ def elasticsearch_index_metadata(self):
"lat": self.latitude,
"lon": self.longitude,
}

if self.geo_range == siae_constants.GEO_RANGE_COUNTRY:
metadata["geo_country"] = True
elif self.geo_range == siae_constants.GEO_RANGE_REGION:
metadata["geo_reg"] = self.region
elif self.geo_range == siae_constants.GEO_RANGE_DEPARTMENT:
metadata["geo_dep"] = self.department
elif self.geo_range == siae_constants.GEO_RANGE_CUSTOM:
metadata["geo_dist"] = self.geo_range_custom_distance

return metadata

def sectors_list_string(self, display_max=3):
Expand Down
27 changes: 17 additions & 10 deletions lemarche/static/js/perimeter_autocomplete_field.js
Original file line number Diff line number Diff line change
Expand Up @@ -29,16 +29,18 @@ const debounce = (callback, wait) => {
};
}

async function fetchSource(query) {
const res = await fetch(`${API_ENDPOINT}?q=${query}&results=10`);
async function fetchSource(query, kind = undefined) {
const res = await fetch(`${API_ENDPOINT}?q=${query}&results=10${kind ? `&kind=${kind}` : ''}`);
const data = await res.json();
return data; // data.results
}

class PerimeterAutocomplete {
constructor(perimeter_container_name, perimeter_input_id) {
constructor(perimeter_container_name, perimeter_input_id, perimeter_placeholder='Région, ville…', perimeter_kind = undefined) {
this.perimeter_container_name= perimeter_container_name;
this.perimeter_input_id= perimeter_input_id;
this.perimeter_kind= perimeter_kind;
this.perimeter_placeholder= perimeter_placeholder;
this.perimeter_name_input_id= `${this.perimeter_input_id}_name`;
this.perimeterAutocompleteContainer = document.getElementById(perimeter_container_name);
this.perimeterInput = document.getElementById(perimeter_input_id); // hidden
Expand All @@ -53,10 +55,10 @@ class PerimeterAutocomplete {
element: this.perimeterAutocompleteContainer,
id: this.perimeter_name_input_id,
name: this.perimeter_name_input_id, // url GET param name
placeholder: 'Région, ville…', // 'Autour de (Arras, Bobigny, Strasbourg…)', 'Région, département, ville'
placeholder: this.perimeter_placeholder,
minLength: 2,
defaultValue: this.initial_value_name,
source: this.getSource,
source: this.perimeter_kind === 'CITY' ? this.getSourceCity : this.getSource,
displayMenu: 'overlay',
templates: {
inputValue: this.inputValue, // returns the string value to be inserted into the input
Expand Down Expand Up @@ -89,6 +91,11 @@ class PerimeterAutocomplete {
populateResults(res);
}

async getSourceCity(query, populateResults) {
const res = await fetchSource(query, "CITY");
populateResults(res);
}

inputValue(result) {
// strip html from suggestion
if(!result) {
Expand Down Expand Up @@ -220,7 +227,7 @@ class PerimetersMultiAutocomplete {
inputValue(result) {
return "";
}

inputValueHiddenField(result) {
// we want to avoid clicks outside that return 'undefined'
if (result) {
Expand All @@ -239,20 +246,20 @@ class PerimetersMultiAutocomplete {
suggestion(result) {
// display suggestions as `name (kind)`
let resultName, resultKind = '';

// build resultName & resultKind from the result object
if (typeof result === 'object') {
resultName = result.name;
resultKind = (result.kind === 'CITY') ? result.department_code : KIND_MAPPING[result.kind];
}

// Edge case: if there is an initial value
// reconstruct resultName & resultKind from the result string
if (typeof result === 'string') {
resultName = result.substring(0, result.lastIndexOf(' '));
resultKind = result.includes('(') ? result.substring(result.lastIndexOf(' ') + 2, result.length - 1) : '';
}

let nameWithKind = '<strong>' + resultName + '</strong>';
if (resultKind) {
nameWithKind += ' <small>(' + resultKind + ')</small>';
Expand All @@ -266,7 +273,7 @@ class PerimetersMultiAutocomplete {
$(`#${idRefInput}`).remove();
$(this).remove();
}

createHiddenInputPerimeter(resultId, resultName) {
let removeIcon = $('<i>', { class: "ri-close-line font-weight-bold mr-0", "aria-hidden": true });
let resultIdString = `${this.perimeter_hidden_input_selector_prefix}-${resultId}`;
Expand Down
26 changes: 16 additions & 10 deletions lemarche/templates/siaes/search_results.html
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@
Recherche par SIRET / nom
</a>
</li>
{% if is_admin %}
<li class="nav-item" role="presentation">
<a class="nav-link"
id="search-semantic-tab"
Expand All @@ -62,11 +61,11 @@
role="tab"
aria-controls="search-semantic"
aria-selected="false">
Recherche sémantique
<span class="badge badge-sm badge-pill badge-important ml-2">Nouveauté</span>
Recherche par mot clé
<span class="badge badge-sm badge-pill badge-marche ml-2">Beta</span>
<span class="badge badge-sm badge-pill badge-important">Nouveauté</span>
</a>
</li>
{% endif %}
<li class="nav-item-dropdown dropdown">
<a class="nav-link dropdown-toggle"
href="#"
Expand Down Expand Up @@ -205,7 +204,6 @@
</div>
</form>
</div>
{% if is_admin %}
<div class="tab-pane fade"
id="search-semantic"
role="tabpanel"
Expand All @@ -218,12 +216,17 @@
id="text-semantic-search-form">
{% bootstrap_form_errors form_semantic type="all" %}
<div class="row">
<div class="col-12 col-lg-8">
<div class="row">
<div class="col-12">{% bootstrap_field form_semantic.search_query %}</div>
<div class="col-12 col-lg-5">
{% bootstrap_field form_semantic.search_query %}
</div>
<div class="col-12 col-lg-5">
<div class="form-group">
<label for="id_city">{{ form_semantic.city.label }}</label>
<div id="dir_form_city" data-input-name="{{ form_semantic.city.name }}"></div>
{{ form_semantic.city }}
</div>
</div>
<div class="col-12 col-lg-4">
<div class="col-12 col-lg-2">
<span class="mb-2 d-none d-md-inline-block">&nbsp;</span>
<button id="text-search-submit"
class="btn btn-primary btn-block btn-ico"
Expand All @@ -235,7 +238,6 @@
</div>
</form>
</div>
{% endif %}
</div>
</div>
</div>
Expand Down Expand Up @@ -363,6 +365,10 @@ <h1 class="h4 mb-0">
const locationsAutoComplete = new PerimetersMultiAutocomplete(LOCATION_AUTOCOMPLETE_ID, LOCATION_AUTOCOMPLETE_CONTAINER_SELECTOR, LOCATION_SELECTED_CONTAINER_SELECTOR, LOCATION_HIDDEN_INPUT_SELECTOR_PREFIX, LOCATION_CURRENT_ID);
locationsAutoComplete.init();
{% endif %}

// init city form fields
const cityAutoComplete = new PerimeterAutocomplete("dir_form_city", "id_city", "Ville", "CITY");
cityAutoComplete.init();
});
</script>
<script type="text/javascript">
Expand Down
11 changes: 11 additions & 0 deletions lemarche/templates/siaes/semantic_search_results.html
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,17 @@
{% block content %}
<div class="col-12 col-lg-8">
<div class="c-box mb-3">
<div class="d-flex align-items-center mb-3">
<div class="flex-grow-1">
<h1 class="h4 mb-0">
{% if siaes %}
{{ siaes.count }} prestataire{{ siaes.count|pluralize }} correspond{{ siaes.count|pluralize:"ent" }} à vos critères
{% else %}
Oups, aucun prestataire trouvé !
{% endif %}
</h1>
</div>
</div>
{% if siaes %}
{% for siae in siaes %}
{% include "siaes/_card_search_result.html" with siae=siae %}
Expand Down
2 changes: 1 addition & 1 deletion lemarche/tenders/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -565,7 +565,7 @@ def set_siae_found_list(self):
and self.distance_location > 0
):
# with geo distance
siae_ids = api_elasticsearch.siaes_similarity_search(
siae_ids = api_elasticsearch.siaes_similarity_search_with_geo_distance(
self.description,
geo_distance=self.distance_location,
geo_lat=self.location.coords.y,
Expand Down
57 changes: 48 additions & 9 deletions lemarche/utils/apis/api_elasticsearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores.elasticsearch import ElasticsearchStore

from lemarche.perimeters.models import Perimeter


BASE_URL = f"{settings.ELASTICSEARCH_HOST}:{settings.ELASTICSEARCH_PORT}"
URL = f"{settings.ELASTICSEARCH_SCHEME}://{BASE_URL}"
Expand All @@ -10,7 +12,7 @@
)


def siaes_similarity_search(search_text: str, geo_distance: int = None, geo_lat: float = None, geo_lon: float = None):
def siaes_similarity_search(search_text: str, search_filter: dict = {}):
"""Performs semantic search with Elasticsearch as a vector db
Args:
Expand All @@ -26,7 +28,20 @@ def siaes_similarity_search(search_text: str, geo_distance: int = None, geo_lat:
es_url=URL,
index_name=settings.ELASTICSEARCH_INDEX_SIAES,
)
similar_docs = db.similarity_search_with_score(search_text, k=50, filter=search_filter)
siaes_id = []
for similar_doc, similar_score in similar_docs:
# Discussion to understand score :
# https://github.com/langchain-ai/langchain/discussions/9984#discussioncomment-6860841
if similar_score > settings.ELASTICSEARCH_MIN_SCORE:
siaes_id.append(similar_doc.metadata["id"])

return siaes_id


def siaes_similarity_search_with_geo_distance(
search_text: str, geo_distance: int = None, geo_lat: float = None, geo_lon: float = None
):
search_filter = []
if geo_distance and geo_lat and geo_lon:
search_filter = [
Expand All @@ -41,12 +56,36 @@ def siaes_similarity_search(search_text: str, geo_distance: int = None, geo_lat:
}
]

similar_docs = db.similarity_search_with_score(search_text, k=50, filter=search_filter)
siaes_id = []
for similar_doc, similar_score in similar_docs:
# Discussion to understand score :
# https://github.com/langchain-ai/langchain/discussions/9984#discussioncomment-6860841
if similar_score > settings.ELASTICSEARCH_MIN_SCORE:
siaes_id.append(similar_doc.metadata["id"])
return siaes_similarity_search(search_text, search_filter)

return siaes_id

def siaes_similarity_search_with_city(search_text: str, city: Perimeter):
search_filter = [
{
"bool": {
"should": [
{"bool": {"must": [], "filter": [{"match_phrase": {"metadata.geo_country": True}}]}},
{"bool": {"must": [], "filter": [{"match_phrase": {"metadata.geo_reg": city.region_code}}]}},
{"bool": {"must": [], "filter": [{"match_phrase": {"metadata.geo_dep": city.department_code}}]}},
{
"bool": {
"must": [],
"filter": [
{
"geo_distance": {
"distance": "50km",
"metadata.geo_location": {
"lat": city.latitude,
"lon": city.longitude,
},
}
}
],
}
},
],
"minimum_should_match": 1,
}
}
]
return siaes_similarity_search(search_text, search_filter)
23 changes: 15 additions & 8 deletions lemarche/www/siaes/forms.py
Original file line number Diff line number Diff line change
Expand Up @@ -497,13 +497,20 @@ def filter_queryset(self, qs=None):

class SiaeSemanticForm(forms.Form):
search_query = forms.CharField(
label="Recherche sémantique",
label="Prestation recherchée",
required=False,
widget=forms.TextInput(attrs={"placeholder": "Je cherche…"}),
help_text=" ".join(
[
"Soyez le plus précis possible (Exemple: nettoyage des locaux d'entreprise",
"/ entretien des espaces verts)",
]
),
widget=forms.TextInput(attrs={"placeholder": "Nettoyage de locaux"}),
min_length=5,
)
city = forms.ModelChoiceField(
label="Localisation de votre besoin",
queryset=Perimeter.objects.cities(),
to_field_name="slug",
required=False,
widget=forms.HiddenInput(),
)

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
# this field are autocompletes
self.fields["city"].choices = []
Loading

0 comments on commit b9fb4a6

Please sign in to comment.