Fix offset bug in gcp job

lanterndata · Mar 2, 2024 · 618dabe · 618dabe
1 parent a1f50ca
commit 618dabe
Show file tree

Hide file tree

Showing 3 changed files with 6 additions and 6 deletions.
diff --git a/.github/workflows/publish-cli-docker.yaml b/.github/workflows/publish-cli-docker.yaml
@@ -11,7 +11,7 @@ on:
         type: string
         description: "CLI version"
         required: true
-        default: "0.1.2"
+        default: "0.1.3"
       IMAGE_NAME:
         type: string
         description: "Container image name to tag"

diff --git a/lantern_cli/src/pq/gcp_batch.rs b/lantern_cli/src/pq/gcp_batch.rs
@@ -98,7 +98,7 @@ static QUANTIZATION_TASK_TEMPLATE: &'static str = r#"{
         "memoryMib": 0
       },
       "maxRetryCount": 1,
-      "maxRunDuration": "2000s"
+      "maxRunDuration": "3000s"
      },
      "taskCount": "{gcp_quantization_task_count}",
      "taskCountPerNode": 1,
@@ -285,7 +285,7 @@ pub fn quantize_table_on_gcp(
     // Let each vm process max 50k rows
     let gcp_quantization_task_count = args
         .gcp_quantization_task_count
-        .unwrap_or(cmp::max(total_row_count / 50000, 1));
+        .unwrap_or(cmp::max(total_row_count / 100000, 1));
 
     // Limit parallel task count to not exceed max connection limit
     let gcp_quantization_task_parallelism = args

diff --git a/lantern_cli/src/pq/quantization.rs b/lantern_cli/src/pq/quantization.rs
@@ -192,7 +192,7 @@ pub fn quantize_and_write_vectors(args: QuantizeAndWriteVectorArgs, mut client:
     let progress_cb =  args.progress_cb;
 
     let mut limit_start = 0;
-    let mut limit_end = args.total_row_count ;
+    let mut limit_end = args.total_row_count;
 
     // In batch mode each task will operate on a range of vectors from dataset
     // Here we will determine the range from the task id
@@ -204,7 +204,7 @@ pub fn quantize_and_write_vectors(args: QuantizeAndWriteVectorArgs, mut client:
 
         let chunk_per_task = limit_end / quantization_task_count;
         limit_start = chunk_per_task * quantization_task_id;
-        limit_end = if *quantization_task_id == quantization_task_count - 1 { limit_end } else { limit_start + chunk_per_task };
+        limit_end = if *quantization_task_id == quantization_task_count - 1 { limit_end + 1 } else { limit_start + chunk_per_task };
     }
 
     // Read all codebook and create a hashmap from it
@@ -285,7 +285,7 @@ pub fn quantize_and_write_vectors(args: QuantizeAndWriteVectorArgs, mut client:
             let mut client = Client::connect(&db_uri, NoTls)?;
             let mut transaction = client.transaction()?;
             let range_start = limit_start + (i * chunk_size);
-            let range_end = if i == num_cores - 1 { limit_end + 1 } else { range_start + chunk_size };
+            let range_end = if i == num_cores - 1 { limit_end } else { range_start + chunk_size };
 
             let fetch_start_time = Instant::now();
             let rows = transaction.query(