Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Product Quantization #79

Merged
merged 26 commits into from
Feb 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
59f17e3
Implement product quantization in lantern cli
var77 Feb 10, 2024
3a6fd65
Process all splits in parallel
var77 Feb 11, 2024
3a32b9f
Add subvector-id argument and ability to horizontally scale the runni…
var77 Feb 11, 2024
dfb41c7
Fix progress tracking for pq
var77 Feb 11, 2024
6ccc001
Fix indexing bug
var77 Feb 11, 2024
ffa9a49
Parallelize vector compression
var77 Feb 12, 2024
41c12ec
Parallelize data fetching and export
var77 Feb 12, 2024
16011ba
Refactor and separate code parts
var77 Feb 14, 2024
d2fbdf8
Refactor code, pack arguments in a struct
var77 Feb 15, 2024
606ac54
Add gcp batch job flow
var77 Feb 15, 2024
dc6ef0f
Add tests for lantern_pq
var77 Feb 16, 2024
cfa1dab
Add action to push cli image to GCR
var77 Feb 16, 2024
4cc5977
Remove unnecessary arguments
var77 Feb 16, 2024
1155b57
Rename codebook table and params to match lantern pq
var77 Feb 19, 2024
50fdb62
Fix naming issues, add --dataset-limit argument
var77 Feb 20, 2024
d3e4575
Conditionaliy publish latest tag for cli docker image
var77 Feb 20, 2024
000e628
Use renamed lantern access method
Ngalstyan4 Feb 17, 2024
92aeef0
Release v0.2.0
Ngalstyan4 Feb 17, 2024
2a342e6
Temporarily change lantern tag for testing before lantern is released
Ngalstyan4 Feb 17, 2024
70f284c
Implement pq-quantization in external index construction
Ngalstyan4 Feb 20, 2024
19b7ba8
Fix codebook offset bug
Ngalstyan4 Feb 20, 2024
5adcf10
set pq parameter in index construction when importing
Ngalstyan4 Feb 20, 2024
a467cb0
Fix codebook lifetime bug in rust<->C interface
Ngalstyan4 Feb 21, 2024
d107f81
Prepare for release
Ngalstyan4 Feb 21, 2024
dcdfcd9
Fix naming for uppercase table names, check if codebook table exists …
var77 Feb 21, 2024
2dc901f
Add pq argument for external index reindexing
var77 Feb 21, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 34 additions & 3 deletions .github/workflows/publish-cli-docker.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,16 @@ name: publish-cli-docker
on:
workflow_dispatch:
inputs:
LATEST:
type: boolean
description: "Publish as latest release"
required: false
default: false
VERSION:
type: string
description: "CLI version"
required: true
default: "0.0.38"
default: "0.0.39"
IMAGE_NAME:
type: string
description: "Container image name to tag"
Expand Down Expand Up @@ -34,12 +39,38 @@ jobs:
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Build and push
- name: Login to GCR Container Registry
uses: docker/login-action@v3
with:
registry: ${{ secrets.GCP_REGION }}-docker.pkg.dev
username: _json_key_base64
password: ${{ secrets.GCP_CREDENTIALS_JSON_B64 }}
- name: Build and push without latest tags
uses: docker/build-push-action@v5
id: build_image
if: ${{ inputs.LATEST == false || inputs.LATEST == 'false' }}
with:
context: .
platforms: linux/amd64
file: Dockerfile.cli${{ (matrix.device == 'gpu' && '.cuda' || '') }}
push: true
tags: |
${{ inputs.IMAGE_NAME }}:${{ inputs.VERSION }}-${{ matrix.device }}
${{ secrets.GCP_REGION }}-docker.pkg.dev/${{ secrets.GCP_PROJECT_ID }}/${{ inputs.IMAGE_NAME }}:${{ inputs.VERSION }}-${{ matrix.device }}
- name: Build and push with latest tags
uses: docker/build-push-action@v5
id: build_image_latest
if: ${{ inputs.LATEST == true || inputs.LATEST == 'true' }}
with:
context: .
platforms: linux/amd64
file: Dockerfile.cli${{ (matrix.device == 'gpu' && '.cuda' || '') }}
push: true
# the :latest tag will refer to cpu version
tags: ${{ (matrix.device == 'cpu' && format('{0}:latest', inputs.IMAGE_NAME) || format('{0}:gpu', inputs.IMAGE_NAME)) }},${{ inputs.IMAGE_NAME }}:latest-${{ matrix.device }},${{ inputs.IMAGE_NAME }}:${{ inputs.VERSION }}-${{ matrix.device }}
tags: |
${{ (matrix.device == 'cpu' && format('{0}:latest', inputs.IMAGE_NAME) || format('{0}:gpu', inputs.IMAGE_NAME)) }}
${{ inputs.IMAGE_NAME }}:latest-${{ matrix.device }}
${{ inputs.IMAGE_NAME }}:${{ inputs.VERSION }}-${{ matrix.device }}
${{ (matrix.device == 'cpu' && format('{0}-docker.pkg.dev/{1}/{2}:latest', secrets.GCP_REGION, secrets.GCP_PROJECT_ID, inputs.IMAGE_NAME) || format('{0}-docker.pkg.dev/{1}/{2}:gpu', secrets.GCP_REGION, secrets.GCP_PROJECT_ID, inputs.IMAGE_NAME)) }}
${{ secrets.GCP_REGION }}-docker.pkg.dev/${{ secrets.GCP_PROJECT_ID }}/${{ inputs.IMAGE_NAME }}:latest-${{ matrix.device }}
${{ secrets.GCP_REGION }}-docker.pkg.dev/${{ secrets.GCP_PROJECT_ID }}/${{ inputs.IMAGE_NAME }}:${{ inputs.VERSION }}-${{ matrix.device }}
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ members = [
"lantern_cli",
"lantern_daemon",
"lantern_index_autotune",
"lantern_pq",
]

[profile.release]
Expand Down
53 changes: 53 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -347,3 +347,56 @@ CREATE TABLE "public"."index_parameter_experiment_results" (
build_time DOUBLE PRECISION NULL
);
```

## Lantern PQ

## Description

Use external product quantization to compress table vectors using kmeans clustering.

### Usage

Run `lantern-cli pq-table --help` to show the cli options.

Job can be run both on local instance and also using GCP batch jobs to parallelize the workload over handreds of VMs to speed up clustering.

To run locally use:

```bash
lantern-cli pq-table --uri 'postgres://[email protected]:5432/postgres' --table sift10k --column v --clusters 256 --splits 32
```

The job will be run on current machine utilizing all available cores.

For big datasets over 1M it is convinient to run the job using GCP batch jobs.
Make sure to have GCP credentials set-up before running this command:

```bash
lantern-cli pq-table --uri 'postgres://[email protected]:5432/postgres' --table sift10k --column v --clusters 256 --splits 32 --run-on-gcp
```

If you prefer to orchestrate task on your own on premise servers you need to do the following 3 steps:

1. Run setup job. This will create necessary tables and add `pqvec` column on target table

```bash
lantern-cli pq-table --uri 'postgres://[email protected]:5432/postgres' --table sift10k --column v --clusters 256 --splits 32 --skip-codebook-creation --skip-vector-compression
```

2. Run clustering job. This will create codebook for the table and export to postgres table

```bash
lantern-cli pq-table --uri 'postgres://[email protected]:5432/postgres' --table sift10k --column v --clusters 256 --splits 32 --skip-table-setup --skip-vector-compression --parallel-task-count 10 --subvector-id 0
```

In this case this command should be run 32 times for each subvector in range [0-31] and `--parallel-task-count` means at most we will run 10 tasks in parallel. This is used to not exceed max connection limit on postgres.

3. Run compression job. This will compress vectors using the generated codebook and export results under `pqvec` column

```bash
lantern-cli pq-table --uri 'postgres://[email protected]:5432/postgres' --table sift10k --column v --clusters 256 --splits 32 --skip-table-setup --skip-codebook-creation --parallel-task-count 10 --total-task-count 10 --compression-task-id 0
```

In this case this command should be run 10 times for each part of codebook in range [0-9] and `--parallel-task-count` means at most we will run 10 tasks in parallel. This is used to not exceed max connection limit on postgres.

Table should have primary key, in order for this job to work. If primary key is different than `id` provide it using `--pk` argument
5 changes: 3 additions & 2 deletions ci/scripts/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -48,14 +48,15 @@ function setup_postgres() {
}

function setup_lantern() {
LANTERN_VERSION=v0.1.1
LANTERN_VERSION=main
LANTERN_VERSION=narek/pgvector-compat
git clone --recursive https://github.com/lanterndata/lantern.git /tmp/lantern
pushd /tmp/lantern
git checkout ${LANTERN_VERSION} && \
git submodule update --recursive && \
mkdir build
pushd build
cmake -DUSEARCH_NO_MARCH_NATIVE=ON .. && \
cmake -DMARCH_NATIVE=OFF -DBUILD_FOR_DSTRIBUTING=1 .. && \
make install
popd
popd
Expand Down
3 changes: 2 additions & 1 deletion lantern_cli/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "lantern_cli"
version = "0.0.38"
version = "0.0.39"
edition = "2021"

[[bin]]
Expand All @@ -16,3 +16,4 @@ lantern_embeddings = { path = "../lantern_embeddings" }
lantern_daemon = { path = "../lantern_daemon" }
lantern_logger = { path = "../lantern_logger" }
lantern_index_autotune = { path = "../lantern_index_autotune" }
lantern_pq = { path = "../lantern_pq" }
3 changes: 3 additions & 0 deletions lantern_cli/src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ use lantern_daemon::cli::DaemonArgs;
use lantern_embeddings::cli::{EmbeddingArgs, MeasureModelSpeedArgs, ShowModelsArgs};
use lantern_external_index::cli::CreateIndexArgs;
use lantern_index_autotune::cli::IndexAutotuneArgs;
use lantern_pq::cli::PQArgs;

#[derive(Subcommand, Debug)]
pub enum Commands {
Expand All @@ -18,6 +19,8 @@ pub enum Commands {
MeasureModelSpeed(MeasureModelSpeedArgs),
/// Autotune index
AutotuneIndex(IndexAutotuneArgs),
/// Quantize table
PQTable(PQArgs),
/// Start in daemon mode
StartDaemon(DaemonArgs),
}
Expand Down
9 changes: 9 additions & 0 deletions lantern_cli/src/main.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
use std::process;

use clap::Parser;
use lantern_daemon;
use lantern_embeddings;
use lantern_external_index;
use lantern_logger::{LogLevel, Logger};
use lantern_pq;
mod cli;

fn main() {
Expand Down Expand Up @@ -46,6 +49,11 @@ fn main() {
_main_logger = Some(logger.clone());
lantern_index_autotune::autotune_index(&args, None, None, Some(logger))
}
cli::Commands::PQTable(args) => {
let logger = Logger::new("Lantern PQ", LogLevel::Debug);
_main_logger = Some(logger.clone());
lantern_pq::quantize_table(args, None, None, Some(logger))
}
cli::Commands::StartDaemon(args) => {
let logger = Logger::new("Lantern Daemon", args.log_level.value());
_main_logger = Some(logger.clone());
Expand All @@ -56,5 +64,6 @@ fn main() {
let logger = _main_logger.unwrap();
if let Err(e) = res {
logger.error(&e.to_string());
process::exit(1);
}
}
1 change: 1 addition & 0 deletions lantern_daemon/src/external_index_jobs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ async fn external_index_worker(
dims: 0,
out: index_path,
remote_database: true,
pq: false,
}, progress_callback, Some(is_canceled_clone), Some(task_logger));
futures::executor::block_on(cancel_tx_clone.send(false))?;
result
Expand Down
2 changes: 1 addition & 1 deletion lantern_external_index/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ clap = { version = "4.4.0", features = ["derive"] }
cxx = "1.0.106"
postgres = "0.19.7"
postgres-types = { version = "0.2.6", features = ["derive"] }
usearch = { git = "https://github.com/Ngalstyan4/usearch.git", branch = "main-lantern" }
usearch = { git = "https://github.com/Ngalstyan4/usearch.git", branch="main-lantern" }
lantern_logger = { path = "../lantern_logger" }
lantern_utils = { path = "../lantern_utils" }
rand = "0.8.5"
4 changes: 4 additions & 0 deletions lantern_external_index/src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,10 @@ pub struct CreateIndexArgs {
#[arg(short, long)]
pub column: String,

/// Use already created codebook to create product-quantized binary index
#[arg(short, long, default_value_t = false)]
pub pq: bool,

/// Number of neighbours for each vector
#[arg(short, default_value_t = 16)]
pub m: usize,
Expand Down
83 changes: 82 additions & 1 deletion lantern_external_index/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,71 @@ pub fn create_usearch_index(
dimensions, args.m, args.ef, args.efc
));

let mut pq_codebook: *const f32 = std::ptr::null();
let mut v: Vec<f32> = vec![];
let mut num_centroids: usize = 0;
let mut num_subvectors: usize = 0;

if args.pq {
let codebook_table_name = format!(
"_codebook_{table_name}_{column_name}",
table_name = &args.table,
column_name = &args.column
);
let full_codebook_table_name =
get_full_table_name("_lantern_internal", &codebook_table_name);

let rows_codebook_exists = transaction.query("SELECT true FROM information_schema.tables WHERE table_schema='_lantern_internal' AND table_name=$1;", &[&codebook_table_name])?;

if rows_codebook_exists.len() == 0 {
anyhow::bail!("Codebook table {full_codebook_table_name} does not exist");
}

let rows_c = transaction.query(
&format!("SELECT COUNT(*) FROM {full_codebook_table_name} WHERE subvector_id = 0;"),
&[],
)?;
let rows_sv = transaction.query(
&format!("SELECT COUNT(*) FROM {full_codebook_table_name} WHERE centroid_id = 0;"),
&[],
)?;

if rows_c.len() == 0 || rows_sv.len() == 0 {
anyhow::bail!("Invalid codebook table");
}

num_centroids = rows_c.first().unwrap().get::<usize, i64>(0) as usize;
num_subvectors = rows_sv.first().unwrap().get::<usize, i64>(0) as usize;

v.resize(num_centroids * dimensions, 0.);

let rows = transaction.query(
&format!(
"SELECT subvector_id, centroid_id, c FROM _lantern_internal._codebook_{table_name}_{column_name};",
table_name = args.table,
column_name = args.column,
),
&[],
)?;

logger.info(&format!(
"Codebook has {} rows - {num_centroids} centroids and {num_subvectors} subvectors",
rows.len()
));

for r in rows {
let subvector_id: i32 = r.get(0);
let centroid_id: i32 = r.get(1);
let subvector: Vec<f32> = r.get(2);
for i in 0..subvector.len() {
v[centroid_id as usize * dimensions
+ subvector_id as usize * subvector.len()
+ i] = subvector[i];
}
}
pq_codebook = v.as_ptr();
}

let options = IndexOptions {
dimensions,
metric: args.metric_kind.value(),
Expand All @@ -150,6 +215,20 @@ pub fn create_usearch_index(
connectivity: args.m,
expansion_add: args.efc,
expansion_search: args.ef,

num_threads: 0, // automatic

// note: pq_construction and pq_output distinction is not yet implemented in usearch
// in the future, if pq_construction is false, we will use full vectors in memory (and
// require large memory for construction) but will output pq-quantized graph
//
// currently, regardless of pq_construction value, as long as pq_output is true,
// we construct a pq_quantized index using quantized values during construction
pq_construction: args.pq,
pq_output: args.pq,
num_centroids,
num_subvectors,
codebook: pq_codebook,
};
let index = Index::new(&options)?;

Expand Down Expand Up @@ -317,6 +396,7 @@ pub fn create_usearch_index(
args.efc,
dimensions,
args.m,
args.pq,
)?;
} else {
// If job is run on the same server as database we can skip copying part
Expand All @@ -331,7 +411,8 @@ pub fn create_usearch_index(
}

transaction.execute(
&format!("CREATE INDEX {idx_name} ON {table_name} USING hnsw({column_name} {op_class}) WITH (_experimental_index_path='{index_path}', ef={ef}, dim={dim}, m={m}, ef_construction={ef_construction});", index_path=args.out, table_name=&get_full_table_name(&args.schema, &args.table),column_name=&quote_ident(&args.column), m=args.m, ef=args.ef, ef_construction=args.efc, dim=dimensions),
&format!("CREATE INDEX {idx_name} ON {table_name} USING lantern_hnsw({column_name} {op_class}) WITH (_experimental_index_path='{index_path}', pq={pq}, ef={ef}, dim={dim}, m={m}, ef_construction={ef_construction});", index_path=args.out, table_name=&get_full_table_name(&args.schema, &args.table),
column_name=&quote_ident(&args.column), pq=args.pq, m=args.m, ef=args.ef, ef_construction=args.efc, dim=dimensions),
&[],
)?;

Expand Down
4 changes: 3 additions & 1 deletion lantern_external_index/src/postgres_large_objects.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ impl<'a> LargeObject<'a> {
ef_construction: usize,
dim: usize,
m: usize,
pq: bool,
) -> crate::AnyhowVoidResult {
let mut transaction = self.transaction.unwrap();
transaction.execute(
Expand All @@ -57,7 +58,8 @@ impl<'a> LargeObject<'a> {
}

transaction.execute(
&format!("CREATE INDEX {idx_name} ON {table_name} USING hnsw({column_name} {op_class}) WITH (_experimental_index_path='{index_path}', ef={ef}, dim={dim}, m={m}, ef_construction={ef_construction});", index_path=self.index_path),
&format!("CREATE INDEX {idx_name} ON {table_name} USING lantern_hnsw({column_name} {op_class}) WITH (_experimental_index_path='{index_path}', pq={pq}, ef={ef}, dim={dim}, m={m}, ef_construction={ef_construction});",
index_path=self.index_path),
&[],
)?;

Expand Down
4 changes: 4 additions & 0 deletions lantern_extras/src/external_index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ fn lantern_create_external_index<'a>(
m: default!(i32, 16),
ef_construction: default!(i32, 16),
ef: default!(i32, 16),
pq: default!(bool, false),
index_name: default!(&'a str, "''"),
) -> Result<(), anyhow::Error> {
validate_index_param("ef", ef, 1, 400);
Expand Down Expand Up @@ -84,6 +85,7 @@ fn lantern_create_external_index<'a>(
dims: dim as usize,
index_name,
remote_database: false,
pq,
},
None,
None,
Expand Down Expand Up @@ -111,6 +113,7 @@ mod lantern_extras {
m: i32,
ef_construction: i32,
ef: i32,
pq: default!(bool, false),
) -> Result<(), anyhow::Error> {
let index_name = index.name().to_owned();
let schema = index.namespace().to_owned();
Expand Down Expand Up @@ -152,6 +155,7 @@ mod lantern_extras {
m,
ef_construction,
ef,
pq,
&index_name,
)
}
Expand Down
Loading
Loading