Skip to content

Commit

Permalink
Added experimental knn option
Browse files Browse the repository at this point in the history
  • Loading branch information
ankane committed Sep 3, 2024
1 parent c95a743 commit f571a36
Show file tree
Hide file tree
Showing 7 changed files with 83 additions and 3 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
## 5.4.0 (unreleased)

- Added experimental `knn` option
- Added experimental support for `_raw` to `where` option
- Added warning for `exists` with non-`true` values
- Added warning for full reindex and `:queue` mode
Expand Down
25 changes: 25 additions & 0 deletions lib/searchkick/index_options.rb
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,10 @@ def generate_settings
max_shingle_diff: 4
}

if options[:knn] && Searchkick.opensearch?
settings[:index][:knn] = true
end

if options[:case_sensitive]
settings[:analysis][:analyzer].each do |_, analyzer|
analyzer[:filter].delete("lowercase")
Expand Down Expand Up @@ -406,6 +410,27 @@ def generate_mappings
mapping[field] = shape_options.merge(type: "geo_shape")
end

(options[:knn] || []).each do |field, knn_options|
if Searchkick.opensearch?
mapping[field.to_s] = {
type: "knn_vector",
dimension: knn_options[:dimensions],
method: {
name: "hnsw",
space_type: "cosinesimil",
engine: "lucene"
}
}
else
mapping[field.to_s] = {
type: "dense_vector",
dims: knn_options[:dimensions],
index: true,
similarity: "cosine"
}
end
end

if options[:inheritance]
mapping[:type] = keyword_mapping
end
Expand Down
2 changes: 1 addition & 1 deletion lib/searchkick/model.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ def searchkick(**options)
options = Searchkick.model_options.merge(options)

unknown_keywords = options.keys - [:_all, :_type, :batch_size, :callbacks, :case_sensitive, :conversions, :deep_paging, :default_fields,
:filterable, :geo_shape, :highlight, :ignore_above, :index_name, :index_prefix, :inheritance, :language,
:filterable, :geo_shape, :highlight, :ignore_above, :index_name, :index_prefix, :inheritance, :knn, :language,
:locations, :mappings, :match, :max_result_window, :merge_mappings, :routing, :searchable, :search_synonyms, :settings, :similarity,
:special_characters, :stem, :stemmer, :stem_conversions, :stem_exclusion, :stemmer_override, :suggest, :synonyms, :text_end,
:text_middle, :text_start, :unscope, :word, :word_end, :word_middle, :word_start]
Expand Down
38 changes: 37 additions & 1 deletion lib/searchkick/query.rb
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ class Query
def initialize(klass, term = "*", **options)
unknown_keywords = options.keys - [:aggs, :block, :body, :body_options, :boost,
:boost_by, :boost_by_distance, :boost_by_recency, :boost_where, :conversions, :conversions_term, :debug, :emoji, :exclude, :explain,
:fields, :highlight, :includes, :index_name, :indices_boost, :limit, :load,
:fields, :highlight, :includes, :index_name, :indices_boost, :knn, :limit, :load,
:match, :misspellings, :models, :model_includes, :offset, :operator, :order, :padding, :page, :per_page, :profile,
:request_params, :routing, :scope_results, :scroll, :select, :similar, :smart_aggs, :suggest, :total_entries, :track, :type, :where]
raise ArgumentError, "unknown keywords: #{unknown_keywords.join(", ")}" if unknown_keywords.any?
Expand Down Expand Up @@ -526,6 +526,42 @@ def prepare
end
end

# knn
if options[:knn]
if term != "*"
raise ArgumentError, "Hybrid search not supported yet"
end

if options[:where]
raise ArgumentError, "KNN search with where not supported yet"
end

if options[:knn].size != 1
raise ArgumentError, "Invalid knn option"
end

k = per_page + offset

if Searchkick.opensearch?
payload[:query].delete(:match_all)
payload[:query][:knn] = {}
options[:knn].each do |field, vector|
payload[:query][:knn][field.to_sym] = {
vector: vector,
k: k
}
end
else
options[:knn].each do |field, vector|
payload[:knn] = {
field: field,
k: k,
query_vector: vector
}
end
end
end

# pagination
pagination_options = options[:page] || options[:limit] || options[:per_page] || options[:offset] || options[:padding]
if !options[:body] || pagination_options
Expand Down
10 changes: 10 additions & 0 deletions test/knn_test.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
require_relative "test_helper"

class KnnTest < Minitest::Test
def test_works
store [{name: "A", embedding: [1, 2, 3]}, {name: "B", embedding: [-1, -2, -3]}]
assert_order "*", ["A", "B"], knn: {embedding: [1, 2, 3]}
expected = Searchkick.opensearch? ? [1, 0] : [2, 1]
assert_equal expected, Product.search(knn: {embedding: [1, 2, 3]}).hits.map { |v| v["_score"] }
end
end
9 changes: 8 additions & 1 deletion test/models/product.rb
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,14 @@ class Product
highlight: [:name],
filterable: [:name, :color, :description],
similarity: "BM25",
match: ENV["MATCH"] ? ENV["MATCH"].to_sym : nil
match: ENV["MATCH"] ? ENV["MATCH"].to_sym : nil,
knn: {embedding: {dimensions: 3}}

if ActiveRecord::VERSION::STRING.to_f >= 7.1
serialize :embedding, coder: JSON
else
serialize :embedding, JSON
end

attr_accessor :conversions, :user_ids, :aisle, :details

Expand Down
1 change: 1 addition & 0 deletions test/support/activerecord.rb
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
t.decimal :longitude, precision: 10, scale: 7
t.text :description
t.text :alt_description
t.text :embedding
t.timestamps null: true
end

Expand Down

0 comments on commit f571a36

Please sign in to comment.