-
Notifications
You must be signed in to change notification settings - Fork 4
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Use renamed lantern access method #84
Merged
Merged
Changes from 4 commits
Commits
Show all changes
9 commits
Select commit
Hold shift + click to select a range
0a64ce5
Use renamed lantern access method
Ngalstyan4 9683724
Release v0.2.0
Ngalstyan4 bf947ad
Temporarily change lantern tag for testing before lantern is released
Ngalstyan4 4b540b3
Implement pq-quantization in external index construction
Ngalstyan4 786c6cb
Fix codebook offset bug
Ngalstyan4 5b4d024
set pq parameter in index construction when importing
Ngalstyan4 ccbe33c
Fix codebook lifetime bug in rust<->C interface
Ngalstyan4 93126c8
Prepare for release
Ngalstyan4 09be32c
Fix naming for uppercase table names, check if codebook table exists …
var77 File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -114,6 +114,7 @@ pub fn create_usearch_index( | |
let full_table_name = get_full_table_name(&args.schema, &args.table); | ||
|
||
transaction.execute("SET lock_timeout='5s'", &[])?; | ||
//todo:: ask-Varik: why is this necessary? | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We are locking table to make sure no tids will be changed as index is being created |
||
transaction.execute( | ||
&format!("LOCK TABLE ONLY {full_table_name} IN SHARE MODE"), | ||
&[], | ||
|
@@ -142,6 +143,60 @@ pub fn create_usearch_index( | |
dimensions, args.m, args.ef, args.efc | ||
)); | ||
|
||
let mut pq_codebook: *const f32 = std::ptr::null(); | ||
let mut num_centroids: usize = 0; | ||
let mut num_subvectors: usize = 0; | ||
|
||
if args.pq { | ||
let rows_c = transaction.query( | ||
&format!( | ||
"SELECT count(*) FROM _lantern_internal._codebook_{table_name}_{column_name} WHERE subvector_id = 0;", | ||
table_name = args.table, | ||
column_name = args.column, | ||
), | ||
&[], | ||
)?; | ||
let rows_sv = transaction.query( | ||
&format!( | ||
"SELECT count(*) FROM _lantern_internal._codebook_{table_name}_{column_name} WHERE centroid_id = 0;", | ||
table_name = args.table, | ||
column_name = args.column, | ||
), | ||
&[], | ||
)?; | ||
|
||
if rows_c.len() == 0 || rows_sv.len() == 0 { | ||
anyhow::bail!("Invalid codebook table"); | ||
} | ||
|
||
num_centroids = rows_c.first().unwrap().get::<usize, i64>(0) as usize; | ||
num_subvectors = rows_sv.first().unwrap().get::<usize, i64>(0) as usize; | ||
|
||
let rows = transaction.query( | ||
&format!( | ||
"SELECT subvector_id, centroid_id, c FROM _lantern_internal._codebook_{table_name}_{column_name};", | ||
table_name = args.table, | ||
column_name = args.column, | ||
), | ||
&[], | ||
)?; | ||
let mut v = vec![0.; num_centroids * dimensions]; | ||
pq_codebook = v.as_ptr(); | ||
logger.info(&format!( | ||
"codebook has {} rows - {num_centroids} centroids and {num_subvectors} subvectors", | ||
rows.len() | ||
)); | ||
|
||
for r in rows { | ||
let subvector_id: i32 = r.get(0); | ||
let centroid_id: i32 = r.get(1); | ||
let subvector: Vec<f32> = r.get(2); | ||
for i in 0..subvector.len() { | ||
v[centroid_id as usize * dimensions + subvector_id as usize + i] = subvector[i]; | ||
} | ||
} | ||
} | ||
|
||
let options = IndexOptions { | ||
dimensions, | ||
metric: args.metric_kind.value(), | ||
|
@@ -150,6 +205,20 @@ pub fn create_usearch_index( | |
connectivity: args.m, | ||
expansion_add: args.efc, | ||
expansion_search: args.ef, | ||
|
||
num_threads: 0, // automatic | ||
|
||
// note: pq_construction and pq_output distinction is not yet implemented in usearch | ||
// in the future, if pq_construction is false, we will use full vectors in memory (and | ||
// require large memory for construction) but will output pq-quantized graph | ||
// | ||
// currently, regardless of pq_construction value, as long as pq_output is true, | ||
// we construct a pq_quantized index using quantized values during construction | ||
pq_construction: args.pq, | ||
pq_output: args.pq, | ||
num_centroids, | ||
num_subvectors, | ||
codebook: pq_codebook, | ||
}; | ||
let index = Index::new(&options)?; | ||
|
||
|
@@ -331,7 +400,7 @@ pub fn create_usearch_index( | |
} | ||
|
||
transaction.execute( | ||
&format!("CREATE INDEX {idx_name} ON {table_name} USING hnsw({column_name} {op_class}) WITH (_experimental_index_path='{index_path}', ef={ef}, dim={dim}, m={m}, ef_construction={ef_construction});", index_path=args.out, table_name=&get_full_table_name(&args.schema, &args.table),column_name="e_ident(&args.column), m=args.m, ef=args.ef, ef_construction=args.efc, dim=dimensions), | ||
&format!("CREATE INDEX {idx_name} ON {table_name} USING lantern_hnsw({column_name} {op_class}) WITH (_experimental_index_path='{index_path}', ef={ef}, dim={dim}, m={m}, ef_construction={ef_construction});", index_path=args.out, table_name=&get_full_table_name(&args.schema, &args.table),column_name="e_ident(&args.column), m=args.m, ef=args.ef, ef_construction=args.efc, dim=dimensions), | ||
&[], | ||
)?; | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,6 @@ | ||
[package] | ||
name = "lantern_extras" | ||
version = "0.1.2" | ||
version = "0.2.0" | ||
edition = "2021" | ||
|
||
[lib] | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
the version I used is now upstream @ narek/pq-index https://github.com/Ngalstyan4/usearch/tree/narek/pq-index