fix(profils): fix after review victor and Antoine

gip-inclusion · Dec 18, 2024 · d65b76e · d65b76e
1 parent acbd8c5
commit d65b76e
Show file tree

Hide file tree

Showing 13 changed files with 93 additions and 27 deletions.
diff --git a/api/src/alembic/versions/20241028_172223_c947102bb23f_add_profils_autres_field_in_service.py b/api/src/alembic/versions/20241028_172223_c947102bb23f_add_profils_autres_field_in_service.py
@@ -61,8 +61,48 @@ def upgrade() -> None:
         ["searchable_index_profils_precisions"],
         postgresql_using="gin",
     )
+    op.execute("""
+        CREATE OR REPLACE FUNCTION generate_profils_searchable(
+            profils TEXT[]
+        )
+        RETURNS TSVECTOR AS $$
+        BEGIN
+            RETURN to_tsvector(
+               'french',
+                ARRAY_TO_STRING(profils, ' ')::text
+            );
+        END;
+        $$ LANGUAGE plpgsql IMMUTABLE;
+    """)
+    op.add_column(
+        "api__services",
+        sa.Column(
+            "searchable_index_profils",
+            TSVECTOR(),
+            sa.Computed(
+                "generate_profils_searchable(profils)",
+                persisted=True,
+            ),
+        ),
+    )
+    op.create_index(
+        "ix_api__services_searchable_index_profils",
+        "api__services",
+        ["searchable_index_profils"],
+        postgresql_using="gin",
+    )
+    # Create a new dictionary to customize our search without accents
+    op.execute("""
+        CREATE EXTENSION IF NOT EXISTS unaccent;
+        CREATE TEXT SEARCH CONFIGURATION french_di ( COPY = french );
+        ALTER TEXT SEARCH CONFIGURATION french_di
+            ALTER MAPPING FOR hword, hword_part, word
+        WITH unaccent, french_stem;
+    """)
 
 
 def downgrade() -> None:
     op.drop_column("api__services", "searchable_index_profils_precisions")
     op.drop_column("api__services", "profils_precisions")
+    op.drop_column("api__services", "searchable_index_profils")
+    op.execute("DROP TEXT SEARCH CONFIGURATION french_di")
diff --git a/api/src/data_inclusion/api/inclusion_data/models.py b/api/src/data_inclusion/api/inclusion_data/models.py
@@ -104,6 +104,10 @@ class Service(Base):
             "generate_profils_precisions(profils_precisions, profils)", persisted=True
         ),
     )
+    searchable_index_profils: Mapped[str | None] = mapped_column(
+        TSVECTOR,
+        Computed("generate_profils(profils)", persisted=True),
+    )
     recurrence: Mapped[str | None]
     source: Mapped[str]
     structure_id: Mapped[str]

diff --git a/api/src/data_inclusion/api/inclusion_data/routes.py b/api/src/data_inclusion/api/inclusion_data/routes.py
@@ -379,7 +379,7 @@ def search_services_endpoint(
             """
         ),
     ] = None,
-    profils_precisions: Annotated[
+    profils_search: Annotated[
         Optional[str],
         fastapi.Query(
             description="""Une recherche elargie sur les profils.
@@ -454,7 +454,7 @@ def search_services_endpoint(
         frais=frais,
         modes_accueil=modes_accueil,
         profils=profils,
-        profils_precisions=profils_precisions,
+        profils_search=profils_search,
         types=types,
         search_point=search_point,
         include_outdated=inclure_suspendus,

diff --git a/api/src/data_inclusion/api/inclusion_data/services.py b/api/src/data_inclusion/api/inclusion_data/services.py
@@ -7,7 +7,7 @@
 
 import geoalchemy2
 import sqlalchemy as sqla
-from sqlalchemy import func, orm
+from sqlalchemy import func, or_, orm
 
 import fastapi
 
@@ -137,13 +137,20 @@ def filter_services_by_profils(
     )
 
 
-def filter_services_by_profils_precisions(
+def filter_services_by_profils_search(
     query: sqla.Select,
-    profils_precisions: str,
+    profils_search: str,
 ):
+    profils_only = profils_search.split(" ")
+    profils_only = [p.strip() for p in profils_only]
     return query.filter(
-        models.Service.searchable_index_profils_precisions.bool_op("@@")(
-            func.websearch_to_tsquery("french", profils_precisions)
+        or_(
+            models.Service.searchable_index_profils.bool_op("@@")(
+                func.to_tsquery("french_di", " | ".join(profils_only))
+            ),
+            models.Service.searchable_index_profils_precisions.bool_op("@@")(
+                func.websearch_to_tsquery("french_di", profils_search)
+            ),
         )
     )
 
@@ -274,7 +281,7 @@ def filter_services(
     thematiques: list[di_schema.Thematique] | None = None,
     frais: list[di_schema.Frais] | None = None,
     profils: list[di_schema.Profil] | None = None,
-    profils_precisions: str | None = None,
+    profils_search: str | None = None,
     modes_accueil: list[di_schema.ModeAccueil] | None = None,
     types: list[di_schema.TypologieService] | None = None,
     include_outdated: bool | None = False,
@@ -302,8 +309,8 @@ def filter_services(
     if not include_outdated:
         query = filter_outdated_services(query)
 
-    if profils_precisions is not None:
-        query = filter_services_by_profils_precisions(query, profils_precisions)
+    if profils_search is not None:
+        query = filter_services_by_profils_search(query, profils_search)
 
     return query
 
@@ -367,7 +374,7 @@ def search_services(
     frais: list[di_schema.Frais] | None = None,
     modes_accueil: list[di_schema.ModeAccueil] | None = None,
     profils: list[di_schema.Profil] | None = None,
-    profils_precisions: str | None = None,
+    profils_search: str | None = None,
     types: list[di_schema.TypologieService] | None = None,
     search_point: str | None = None,
     include_outdated: bool | None = False,
@@ -468,7 +475,7 @@ def search_services(
         thematiques=thematiques,
         frais=frais,
         profils=profils,
-        profils_precisions=profils_precisions,
+        profils_search=profils_search,
         modes_accueil=modes_accueil,
         types=types,
         include_outdated=include_outdated,

diff --git a/api/tests/conftest.py b/api/tests/conftest.py
@@ -89,6 +89,7 @@ def db_init():
     # Create postgis extension in test database
     with test_engine.connect() as conn:
         conn.execute(sqla.text("CREATE EXTENSION postgis;"))
+        conn.execute(sqla.text("CREATE EXTENSION unaccent;"))
         conn.commit()
 
     # Migrate the database

diff --git a/api/tests/e2e/api/__snapshots__/test_inclusion_data.ambr b/api/tests/e2e/api/__snapshots__/test_inclusion_data.ambr
@@ -653,13 +653,13 @@
               "description": "Une liste de profils.\n                Chaque résultat renvoyé a (au moins) un profil dans cette liste.\n            "
             },
             {
-              "name": "profils_precisions",
+              "name": "profils_search",
               "in": "query",
               "required": false,
               "schema": {
                 "type": "string",
                 "description": "Une recherche elargie sur les profils.\n               Chaque résultat renvoyé correspond a la recherche fulltext sur\n               ce champs.\n            ",
-                "title": "Profils Precisions"
+                "title": "Profils Search"
               },
               "description": "Une recherche elargie sur les profils.\n               Chaque résultat renvoyé correspond a la recherche fulltext sur\n               ce champs.\n            "
             },

diff --git a/api/tests/e2e/api/test_inclusion_data.py b/api/tests/e2e/api/test_inclusion_data.py
@@ -408,10 +408,10 @@ def test_list_structures_order(
         ("jeunes moins de 18 ans", "jeunes", True),
         ("jeune moins de 18 ans", "jeunes", True),
         ("jeunes et personne age", "vieux", False),
-        ("jeunes et personne age", "personne OR âgée", True),
+        ("jeunes et personne age", "personne OR vieux", True),
         ("jeunes et personne age", "personne jeune", True),
-        # FIXME: this test is failing because of the accent in the input
-        ("jeunes et personne agee", "âgée", False),
+        ("jeunes et personne age", "personne AND jeune", True),
+        ("jeunes et personne agee", "âgée", True),
     ],
 )
 @pytest.mark.with_token
@@ -424,7 +424,7 @@ def test_can_filter_resources_by_profils_precisions(
     factories.ServiceFactory(profils=None, profils_precisions="tests")
 
     response = api_client.get(
-        "/api/v0/search/services", params={"profils_precisions": input}
+        "/api/v0/search/services", params={"profils_search": input}
     )
 
     assert response.status_code == 200
@@ -442,6 +442,11 @@ def test_can_filter_resources_by_profils_precisions(
         ([schema.Profil.FEMMES.value], "femme", True),
         ([schema.Profil.JEUNES_16_26.value], "jeune", True),
         ([schema.Profil.FEMMES.value], "jeune", False),
+        (
+            [schema.Profil.DEFICIENCE_VISUELLE.value],
+            "deficience jeune difficulte",
+            True,
+        ),
     ],
 )
 @pytest.mark.with_token
@@ -452,7 +457,7 @@ def test_can_filter_resources_by_profils_precisions_with_only_profils_data(
     factories.ServiceFactory(profils=schema.Profil.RETRAITES, profils_precisions="")
 
     response = api_client.get(
-        "/api/v0/search/services", params={"profils_precisions": input}
+        "/api/v0/search/services", params={"profils_search": input}
     )
 
     assert response.status_code == 200

diff --git a/pipeline/dbt/models/intermediate/sources/soliguide/int_soliguide__services.sql b/pipeline/dbt/models/intermediate/sources/soliguide/int_soliguide__services.sql
@@ -75,21 +75,22 @@ di_thematique_by_soliguide_category_code AS (
 profils AS (
     SELECT
         publics.lieu_id,
-        ARRAY_TO_STRING(ARRAY_AGG(DISTINCT di_mapping.traduction), ',') AS traduction,
-        ARRAY_REMOVE(ARRAY_AGG(DISTINCT di_mapping.profils), NULL)      AS profils
+        ARRAY_TO_STRING(ARRAY_AGG(DISTINCT di_mapping.traduction), ', ') AS traduction,
+        ARRAY_REMOVE(ARRAY_AGG(DISTINCT di_mapping.profils), NULL)       AS profils
     FROM
         publics
     LEFT JOIN (
         VALUES
         -- administrative status
         ('regular', 'en situation régulière', NULL),
         ('asylum', 'demandeur asile', 'personnes-de-nationalite-etrangere'),
+        ('refugee', 'personne avec un status de refugiée', 'personnes-de-nationalite-etrangere'),
         ('undocumented', 'sans-papiers', 'personnes-de-nationalite-etrangere'),
         -- family status
         ('isolated', 'isolé', NULL),
         ('family', 'famille', 'familles-enfants'),
         ('couple', 'couple', 'familles-enfants'),
-        ('pregnent', 'enceinte', 'familles-enfants'),
+        ('pregnant', 'enceinte', 'familles-enfants'),
         -- gender status
         ('men', 'homme', NULL),
         ('women', 'femme', 'femmes'),

diff --git a/pipeline/dbt/models/staging/sources/soliguide/_soliguide__models.yml b/pipeline/dbt/models/staging/sources/soliguide/_soliguide__models.yml
@@ -135,7 +135,7 @@ models:
           - not_null
           - dbt_utils.not_empty_string
           - accepted_values:
-              values: ['regular', 'asylum', 'refugee', 'undocumented']
+              values: ["regular", "asylum", "refugee", "undocumented"]
   - name: stg_soliguide__lieux__publics__other
     columns:
       - name: lieu_id

diff --git a/...ne/dbt/models/staging/sources/soliguide/stg_soliguide__lieux__publics__administrative.sql b/...ne/dbt/models/staging/sources/soliguide/stg_soliguide__lieux__publics__administrative.sql
@@ -7,7 +7,9 @@ final AS (
         data ->> 'lieu_id'                                                     AS "lieu_id",
         TRIM(JSONB_ARRAY_ELEMENTS_TEXT(data -> 'publics' -> 'administrative')) AS "value"
     FROM source
-    WHERE data -> 'publics' -> 'administrative' IS NOT NULL
+    WHERE
+        data -> 'publics' -> 'administrative' IS NOT NULL
+        AND data #>> '{position,country}' = 'fr' AND NOT data -> 'sources' @> '[{"name": "dora"}]'
 )
 
 SELECT * FROM final
diff --git a/pipeline/dbt/models/staging/sources/soliguide/stg_soliguide__lieux__publics__familiale.sql b/pipeline/dbt/models/staging/sources/soliguide/stg_soliguide__lieux__publics__familiale.sql
@@ -7,7 +7,9 @@ final AS (
         data ->> 'lieu_id'                                                AS "lieu_id",
         TRIM(JSONB_ARRAY_ELEMENTS_TEXT(data -> 'publics' -> 'familiale')) AS "value"
     FROM source
-    WHERE data -> 'publics' -> 'familiale' IS NOT NULL
+    WHERE
+        data -> 'publics' -> 'familiale' IS NOT NULL
+        AND data #>> '{position,country}' = 'fr' AND NOT data -> 'sources' @> '[{"name": "dora"}]'
 )
 
 SELECT * FROM final
diff --git a/pipeline/dbt/models/staging/sources/soliguide/stg_soliguide__lieux__publics__gender.sql b/pipeline/dbt/models/staging/sources/soliguide/stg_soliguide__lieux__publics__gender.sql
@@ -7,7 +7,9 @@ final AS (
         data ->> 'lieu_id'                                             AS "lieu_id",
         TRIM(JSONB_ARRAY_ELEMENTS_TEXT(data -> 'publics' -> 'gender')) AS "value"
     FROM source
-    WHERE data -> 'publics' -> 'gender' IS NOT NULL
+    WHERE
+        data -> 'publics' -> 'gender' IS NOT NULL
+        AND data #>> '{position,country}' = 'fr' AND NOT data -> 'sources' @> '[{"name": "dora"}]'
 )
 
 SELECT * FROM final
diff --git a/pipeline/dbt/models/staging/sources/soliguide/stg_soliguide__lieux__publics__other.sql b/pipeline/dbt/models/staging/sources/soliguide/stg_soliguide__lieux__publics__other.sql
@@ -7,7 +7,9 @@ final AS (
         data ->> 'lieu_id'                                            AS "lieu_id",
         TRIM(JSONB_ARRAY_ELEMENTS_TEXT(data -> 'publics' -> 'other')) AS "value"
     FROM source
-    WHERE data -> 'publics' -> 'other' IS NOT NULL
+    WHERE
+        data -> 'publics' -> 'other' IS NOT NULL
+        AND data #>> '{position,country}' = 'fr' AND NOT data -> 'sources' @> '[{"name": "dora"}]'
 )
 
 SELECT * FROM final