Skip to content

Commit

Permalink
Merge pull request #11107 from demarches-simplifiees/better_search
Browse files Browse the repository at this point in the history
ETQ usager et instructeur, la recherche ne tient plus compte des accents
  • Loading branch information
LeSim authored Dec 5, 2024
2 parents 1137491 + cf70581 commit 9fabef0
Show file tree
Hide file tree
Showing 2 changed files with 89 additions and 194 deletions.
23 changes: 11 additions & 12 deletions app/services/dossier_search_service.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ def self.matching_dossiers(dossiers, search_terms, with_annotations = false)
[]
else
dossier_by_exact_id(dossiers, search_terms)
.presence || dossier_by_full_text(dossiers, search_terms, with_annotations)
.presence || dossier_ids_by_full_text(dossiers, search_terms, with_annotations)
end
end

Expand All @@ -26,24 +26,23 @@ def self.dossier_by_exact_id(dossiers, search_terms)
end
end

def self.dossier_by_full_text(dossiers, search_terms, with_annotations)
ts_vector = "to_tsvector('french', #{with_annotations ? 'dossiers.search_terms || dossiers.private_search_terms' : 'dossiers.search_terms'})"
ts_query = "to_tsquery('french', #{Dossier.connection.quote(to_tsquery(search_terms))})"

dossiers
.visible_by_administration
.where("#{ts_vector} @@ #{ts_query}")
.order(Arel.sql("COALESCE(ts_rank(#{ts_vector}, #{ts_query}), 0) DESC"))
def self.dossier_ids_by_full_text(dossiers, search_terms, with_annotations)
dossier_by_full_text(dossiers.visible_by_administration, search_terms, with_annotations:)
.pluck('id')
.uniq
end

def self.dossier_by_full_text_for_user(search_terms, dossiers)
ts_vector = "to_tsvector('french', search_terms)"
ts_query = "to_tsquery('french', #{Dossier.includes(:procedure).connection.quote(to_tsquery(search_terms))})"
dossier_by_full_text(dossiers.visible_by_user, search_terms)
end

def self.dossier_by_full_text(dossiers, search_terms, with_annotations: false)
columns = with_annotations ? 'search_terms || \' \' || private_search_terms' : 'search_terms'

ts_vector = "to_tsvector('french', unaccent(#{columns}))"
ts_query = "to_tsquery('french', unaccent(#{Dossier.connection.quote(to_tsquery(search_terms))}))"

dossiers
.visible_by_user
.where("#{ts_vector} @@ #{ts_query}")
.order(Arel.sql("COALESCE(ts_rank(#{ts_vector}, #{ts_query}), 0) DESC"))
end
Expand Down
260 changes: 78 additions & 182 deletions spec/services/dossier_search_service_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,229 +2,125 @@

describe DossierSearchService do
describe '#matching_dossiers' do
subject { liste_dossiers }
let!(:dossiers) { Dossier.where(id: dossier.id) }

let(:liste_dossiers) do
described_class.matching_dossiers(instructeur_1.dossiers, terms)
end

let(:administrateur_1) { administrateurs(:default_admin) }
let(:administrateur_2) { administrateurs(:default_admin) }

let(:instructeur_1) { create(:instructeur, administrateurs: [administrateur_1]) }
let(:instructeur_2) { create(:instructeur, administrateurs: [administrateur_2]) }

before do
instructeur_1.assign_to_procedure(procedure_1)
instructeur_2.assign_to_procedure(procedure_2)
before { perform_enqueued_jobs(only: DossierIndexSearchTermsJob) }

# create dossier before performing jobs
# because let!() syntax is executed after "before" callback
dossier_0
dossier_1
dossier_2
dossier_3
dossier_archived

perform_enqueued_jobs(only: DossierIndexSearchTermsJob)
def searching(terms, with_annotations: false)
described_class.matching_dossiers(dossiers, terms, with_annotations)
end

let(:procedure_1) { create(:procedure, :published, administrateur: administrateur_1) }
let(:procedure_2) { create(:procedure, :published, administrateur: administrateur_2) }

let(:dossier_0) { create(:dossier, state: Dossier.states.fetch(:brouillon), procedure: procedure_1, user: create(:user, email: '[email protected]')) }

let(:etablissement_1) { create(:etablissement, entreprise_raison_sociale: 'OCTO Academy', siret: '41636169600051') }
let(:dossier_1) { create(:dossier, :en_construction, procedure: procedure_1, user: create(:user, email: '[email protected]'), etablissement: etablissement_1) }

let(:etablissement_2) { create(:etablissement, entreprise_raison_sociale: 'Plop octo', siret: '41816602300012') }
let(:dossier_2) { create(:dossier, :en_construction, procedure: procedure_1, user: create(:user, email: '[email protected]'), etablissement: etablissement_2) }
describe 'ignores brouillon' do
let(:dossier) { create(:dossier, state: :brouillon) }

let(:etablissement_3) { create(:etablissement, entreprise_raison_sociale: 'OCTO Technology', siret: '41816609600051') }
let(:dossier_3) { create(:dossier, :en_construction, procedure: procedure_2, user: create(:user, email: '[email protected]'), etablissement: etablissement_3) }

let(:dossier_archived) { create(:dossier, :en_construction, procedure: procedure_1, archived: true, user: create(:user, email: '[email protected]')) }

describe 'search is empty' do
let(:terms) { '' }

it { expect(subject.size).to eq(0) }
it { expect(searching(dossier.id.to_s)).to eq([]) }
end

describe 'search brouillon file' do
let(:terms) { 'brouillon' }
context 'with a dossier not in brouillon' do
let(:user) { create(:user, email: '[email protected]') }
let(:etablissement) { create(:etablissement, entreprise_raison_sociale: 'Direction Interministerielle Du Numérique', siret: '13002526500013') }
let(:procedure) { create(:procedure, types_de_champ_public: [{ type: :text }], types_de_champ_private: [{ type: :text }]) }
let(:dossier) do
create(:dossier, procedure:, state: :en_construction, user:, etablissement:).tap do |dossier|
dossier.project_champs_public.first.update!(value: 'Hélène mange des pommes')
dossier.project_champs_private.first.update!(value: 'annotations')
end
end

it { expect(subject.size).to eq(0) }
end
it do
expect(searching('')).to eq([])

describe 'search archived file' do
let(:terms) { 'archived' }
# by dossier id
expect(searching(dossier.id.to_s)).to eq([dossier.id])

it { expect(subject.size).to eq(1) }
end
# annotations is unsearchable by default
expect(searching('annotations')).to eq([])
# but can be searched with the with_annotations option
expect(searching('annotations', with_annotations: true)).to eq([dossier.id])

describe 'search on contact email' do
let(:terms) { 'clap' }
# by email
expect(searching('[email protected]')).to eq([dossier.id])
expect(searching('nicolas')).to eq([dossier.id])

it { expect(subject.size).to eq(0) }
end
# by SIRET
expect(searching('13002526500013')).to eq([dossier.id])
expect(searching('1300')).to eq([dossier.id])

describe 'search on SIRET' do
context 'when is part of SIRET' do
let(:terms) { '4181' }
# by raison sociale
expect(searching('Direction Interministerielle Du Numérique')).to eq([dossier.id])
expect(searching('Direction')).to eq([dossier.id])

it { expect(subject.size).to eq(1) }
end
# with multiple terms
expect(searching('Direction nicolas')).to eq([dossier.id])

context 'when is a complet SIRET' do
let(:terms) { '41816602300012' }
# with forbidden characters
expect(searching("'?\\:&!(Direction) <Interministerielle>")).to eq([dossier.id])

it { expect(subject.size).to eq(1) }
end
end
# with a single forbidden character should not crash postgres
expect(searching('? Direction')).to eq([dossier.id])

describe 'search on raison social' do
let(:terms) { 'OCTO' }
# with supirious spaces
expect(searching(" nicolas ")).to eq([dossier.id])

it { expect(subject.size).to eq(2) }
end
# with wrong case
expect(searching('direction')).to eq([dossier.id])

describe 'search terms surrounded with spurious spaces' do
let(:terms) { ' OCTO ' }
# by champ text
expect(searching('Hélène')).to eq([dossier.id])

it { expect(subject.size).to eq(2) }
end
# by singular
expect(searching('la pomme')).to eq([dossier.id])

describe 'search on multiple fields' do
let(:terms) { 'octo plop' }
# without accent
expect(searching('helene')).to eq([dossier.id])

it { expect(subject.size).to eq(1) }
# NOT WORKING YET
# with a single faulty character
expect(searching('des pammes')).to eq([])
end
end

describe 'search with characters disallowed by the tsquery parser' do
let(:terms) { "'?\\:&!(OCTO) <plop>" }
describe 'does not ignore archived dossiers' do
let(:dossier) { create(:dossier, state: :en_construction, archived: true) }

it { expect(subject.size).to eq(1) }
it { expect(searching(dossier.id.to_s)).to eq([dossier.id]) }
end
end

describe '#matching_dossiers_for_user' do
subject { liste_dossiers }

before do
dossier_0
dossier_0b
dossier_1
dossier_2
dossier_3
dossier_archived
perform_enqueued_jobs(only: DossierIndexSearchTermsJob)
end

let(:liste_dossiers) do
described_class.matching_dossiers_for_user(terms, user_1)
end

let(:user_1) { create(:user, email: '[email protected]') }
let(:user_2) { create(:user) }

let(:procedure_1) { create(:procedure, :published) }
let(:procedure_2) { create(:procedure, :published) }

let(:dossier_0) { create(:dossier, state: Dossier.states.fetch(:brouillon), procedure: procedure_1, user: user_1) }
let(:dossier_0b) { create(:dossier, state: Dossier.states.fetch(:brouillon), procedure: procedure_1, user: user_2) }

let(:etablissement_1) { create(:etablissement, entreprise_raison_sociale: 'OCTO Academy', siret: '41636169600051') }
let(:dossier_1) { create(:dossier, state: Dossier.states.fetch(:en_construction), procedure: procedure_1, user: user_1, etablissement: etablissement_1) }

let(:etablissement_2) { create(:etablissement, entreprise_raison_sociale: 'Plop octo', siret: '41816602300012') }
let(:dossier_2) { create(:dossier, state: Dossier.states.fetch(:en_construction), procedure: procedure_1, user: user_1, etablissement: etablissement_2) }

let(:etablissement_3) { create(:etablissement, entreprise_raison_sociale: 'OCTO Technology', siret: '41816609600051') }
let(:dossier_3) { create(:dossier, state: Dossier.states.fetch(:en_construction), procedure: procedure_2, user: user_1, etablissement: etablissement_3) }
let(:user) { create(:user) }
let(:another_user) { create(:user) }

let(:dossier_archived) { create(:dossier, state: Dossier.states.fetch(:en_construction), procedure: procedure_1, archived: true, user: user_1) }
before { perform_enqueued_jobs(only: DossierIndexSearchTermsJob) }

describe 'search is empty' do
let(:terms) { '' }
def searching(terms, user) = described_class.matching_dossiers_for_user(terms, user)

it { expect(subject.size).to eq(0) }
end

describe 'search by dossier id' do
context 'when the user owns the dossier' do
let(:terms) { dossier_0.id.to_s }

it { expect(subject.map(&:id)).to include(dossier_0.id) }
end

context 'when the user does not own the dossier' do
let(:terms) { dossier_0b.id.to_s }

it { expect(subject.map(&:id)).not_to include(dossier_0b.id) }
context 'when the dossier is brouillon' do
let(:procedure) { create(:procedure, types_de_champ_private: [{ type: :text }]) }
let(:dossier) do
create(:dossier, procedure:, state: :brouillon, user:).tap do |dossier|
dossier.project_champs_private.first.update!(value: 'annotations')
end
end
end

describe 'search brouillon file' do
let(:terms) { 'brouillon' }

it { expect(subject.size).to eq(0) }
end

describe 'search on contact email' do
let(:terms) { '[email protected]' }

it { expect(subject.size).to eq(5) }
end

describe 'search on contact name' do
let(:terms) { '[email protected]' }

it { expect(subject.size).to eq(5) }
end
it do
# searching its own dossier by id
expect(searching(dossier.id.to_s, user)).to eq([dossier])

describe 'search on SIRET' do
context 'when is part of SIRET' do
let(:terms) { '4181' }
# searching another dossier by id
expect(searching(dossier.id.to_s, another_user)).to eq([])

it { expect(subject.size).to eq(2) }
# annotations is unsearchable
expect(searching('annotations', user)).to eq([])
end

context 'when is a complet SIRET' do
let(:terms) { '41816602300012' }

it { expect(subject.size).to eq(1) }
end
end

describe 'search on raison social' do
let(:terms) { 'OCTO' }

it { expect(subject.size).to eq(3) }
end

describe 'search terms surrounded with spurious spaces' do
let(:terms) { ' OCTO ' }

it { expect(subject.size).to eq(3) }
end

describe 'search on multiple fields' do
let(:terms) { 'octo plop' }

it { expect(subject.size).to eq(1) }
end

describe 'search with characters disallowed by the tsquery parser' do
let(:terms) { "'?\\:&!(OCTO) <plop>" }

it { expect(subject.size).to eq(1) }
end
context 'when the user is invited on the dossier' do
let(:dossier) { create(:dossier) }

describe 'search with a single forbidden character should not crash postgres' do
let(:terms) { '? OCTO' }
before { create(:invite, dossier:, user:) }

it { expect(subject.size).to eq(3) }
it { expect(searching(dossier.id.to_s, user)).to eq([dossier]) }
end
end
end

0 comments on commit 9fabef0

Please sign in to comment.