From 598a73a123140e2f06468a268bc06061cfb03478 Mon Sep 17 00:00:00 2001
From: Jk Xu <54522439+Dousir9@users.noreply.github.com>
Date: Thu, 12 Oct 2023 12:30:32 +0800
Subject: [PATCH 01/13] chore(query): refine hash join bitmap and support fast
 path for datablock take (#13213)

* add set_true_validity

* fast path for take

---------

Co-authored-by: sundyli <543950155@qq.com>
---
 src/query/expression/src/kernels/take.rs      |  9 ++
 .../expression/src/kernels/take_chunks.rs     | 13 +++
 .../processors/transforms/hash_join/common.rs | 32 +++----
 .../hash_join/hash_join_build_state.rs        |  5 +-
 .../hash_join/hash_join_probe_state.rs        | 31 ++-----
 .../hash_join/probe_join/inner_join.rs        | 34 +++-----
 .../hash_join/probe_join/left_join.rs         | 86 +++++--------------
 .../hash_join/probe_join/right_join.rs        | 12 +--
 8 files changed, 76 insertions(+), 146 deletions(-)
diff --git a/src/query/expression/src/kernels/take.rs b/src/query/expression/src/kernels/take.rs
index fa8d1d34f128..6e8f3d9dddf2 100644
--- a/src/query/expression/src/kernels/take.rs
+++ b/src/query/expression/src/kernels/take.rs
@@ -262,6 +262,15 @@ impl Column {
     pub fn take_boolean_types<I>(col: &Bitmap, indices: &[I]) -> Bitmap
     where I: common_arrow::arrow::types::Index {
         let num_rows = indices.len();
+        // Fast path: avoid iterating column to generate a new bitmap.
+        // If this [`Bitmap`] is all true or all false and `num_rows <= bitmap.len()``,
+        // we can just slice it.
+        if num_rows <= col.len() && (col.unset_bits() == 0 || col.unset_bits() == col.len()) {
+            let mut bitmap = col.clone();
+            bitmap.slice(0, num_rows);
+            return bitmap;
+        }
+
         let capacity = num_rows.saturating_add(7) / 8;
         let mut builder: Vec<u8> = Vec::with_capacity(capacity);
         let mut builder_len = 0;
diff --git a/src/query/expression/src/kernels/take_chunks.rs b/src/query/expression/src/kernels/take_chunks.rs
index 0c52ff886bd3..7d34875bccfa 100644
--- a/src/query/expression/src/kernels/take_chunks.rs
+++ b/src/query/expression/src/kernels/take_chunks.rs
@@ -771,6 +771,19 @@ impl Column {
 
     pub fn take_block_vec_boolean_types(col: &[Bitmap], indices: &[RowPtr]) -> Bitmap {
         let num_rows = indices.len();
+        // Fast path: avoid iterating column to generate a new bitmap.
+        for bitmap in col.iter() {
+            // If this [`Bitmap`] is all true or all false and `num_rows <= bitmap.len()``,
+            // we can just slice it.
+            if num_rows <= bitmap.len()
+                && (bitmap.unset_bits() == 0 || bitmap.unset_bits() == bitmap.len())
+            {
+                let mut bitmap = bitmap.clone();
+                bitmap.slice(0, num_rows);
+                return bitmap;
+            }
+        }
+
         let capacity = num_rows.saturating_add(7) / 8;
         let mut builder: Vec<u8> = Vec::with_capacity(capacity);
         let mut builder_len = 0;
diff --git a/src/query/service/src/pipelines/processors/transforms/hash_join/common.rs b/src/query/service/src/pipelines/processors/transforms/hash_join/common.rs
index 141a32061214..7a44d1cfcdd3 100644
--- a/src/query/service/src/pipelines/processors/transforms/hash_join/common.rs
+++ b/src/query/service/src/pipelines/processors/transforms/hash_join/common.rs
@@ -27,7 +27,6 @@ use common_expression::DataBlock;
 use common_expression::Evaluator;
 use common_expression::Expr;
 use common_expression::FunctionContext;
-use common_expression::Scalar;
 use common_expression::Value;
 use common_functions::BUILTIN_FUNCTIONS;
 use common_sql::executor::cast_expr_to_non_null_boolean;
@@ -187,32 +186,21 @@ impl HashJoinState {
     }
 }
 
-pub(crate) fn set_validity(column: &BlockEntry, num_rows: usize, validity: &Bitmap) -> BlockEntry {
+pub(crate) fn set_true_validity(
+    column: &BlockEntry,
+    num_rows: usize,
+    true_validity: &Bitmap,
+) -> BlockEntry {
     let (value, data_type) = (&column.value, &column.data_type);
     let col = value.convert_to_full_column(data_type, num_rows);
-
-    if matches!(col, Column::Null { .. }) {
+    if matches!(col, Column::Null { .. }) || col.as_nullable().is_some() {
         column.clone()
-    } else if let Some(col) = col.as_nullable() {
-        if col.len() == 0 {
-            return BlockEntry::new(data_type.clone(), Value::Scalar(Scalar::Null));
-        }
-        // It's possible validity is longer than col.
-        let diff_len = validity.len() - col.validity.len();
-        let mut new_validity = MutableBitmap::with_capacity(validity.len());
-        for (b1, b2) in validity.iter().zip(col.validity.iter()) {
-            new_validity.push(b1 & b2);
-        }
-        new_validity.extend_constant(diff_len, false);
-        let col = Column::Nullable(Box::new(NullableColumn {
-            column: col.column.clone(),
-            validity: new_validity.into(),
-        }));
-        BlockEntry::new(data_type.clone(), Value::Column(col))
     } else {
+        let mut validity = true_validity.clone();
+        validity.slice(0, num_rows);
         let col = Column::Nullable(Box::new(NullableColumn {
-            column: col.clone(),
-            validity: validity.clone(),
+            column: col,
+            validity,
         }));
         BlockEntry::new(data_type.wrap_nullable(), Value::Column(col))
     }
diff --git a/src/query/service/src/pipelines/processors/transforms/hash_join/hash_join_build_state.rs b/src/query/service/src/pipelines/processors/transforms/hash_join/hash_join_build_state.rs
index 8e22215d5618..a6fc8d3d2431 100644
--- a/src/query/service/src/pipelines/processors/transforms/hash_join/hash_join_build_state.rs
+++ b/src/query/service/src/pipelines/processors/transforms/hash_join/hash_join_build_state.rs
@@ -54,7 +54,7 @@ use log::info;
 use parking_lot::Mutex;
 use parking_lot::RwLock;
 
-use crate::pipelines::processors::transforms::hash_join::common::set_validity;
+use crate::pipelines::processors::transforms::hash_join::common::set_true_validity;
 use crate::pipelines::processors::transforms::hash_join::desc::MARKER_KIND_FALSE;
 use crate::pipelines::processors::transforms::hash_join::hash_join_state::FixedKeyHashJoinHashTable;
 use crate::pipelines::processors::transforms::hash_join::hash_join_state::HashJoinHashTable;
@@ -171,11 +171,10 @@ impl HashJoinBuildState {
             let mut validity = MutableBitmap::new();
             validity.extend_constant(data_block.num_rows(), true);
             let validity: Bitmap = validity.into();
-
             let nullable_columns = data_block
                 .columns()
                 .iter()
-                .map(|c| set_validity(c, validity.len(), &validity))
+                .map(|c| set_true_validity(c, validity.len(), &validity))
                 .collect::<Vec<_>>();
             data_block = DataBlock::new(nullable_columns, data_block.num_rows());
         }
diff --git a/src/query/service/src/pipelines/processors/transforms/hash_join/hash_join_probe_state.rs b/src/query/service/src/pipelines/processors/transforms/hash_join/hash_join_probe_state.rs
index 3ed563e25cac..973fe50b3372 100644
--- a/src/query/service/src/pipelines/processors/transforms/hash_join/hash_join_probe_state.rs
+++ b/src/query/service/src/pipelines/processors/transforms/hash_join/hash_join_probe_state.rs
@@ -18,7 +18,6 @@ use std::sync::atomic::AtomicUsize;
 use std::sync::atomic::Ordering;
 use std::sync::Arc;
 
-use common_arrow::arrow::bitmap::Bitmap;
 use common_arrow::arrow::bitmap::MutableBitmap;
 use common_base::base::tokio::sync::Barrier;
 use common_catalog::table_context::TableContext;
@@ -46,7 +45,7 @@ use parking_lot::Mutex;
 use parking_lot::RwLock;
 
 use super::ProbeState;
-use crate::pipelines::processors::transforms::hash_join::common::set_validity;
+use crate::pipelines::processors::transforms::hash_join::common::set_true_validity;
 use crate::pipelines::processors::transforms::hash_join::desc::MARKER_KIND_FALSE;
 use crate::pipelines::processors::transforms::hash_join::desc::MARKER_KIND_NULL;
 use crate::pipelines::processors::transforms::hash_join::desc::MARKER_KIND_TRUE;
@@ -169,12 +168,7 @@ impl HashJoinProbeState {
             let nullable_columns = input
                 .columns()
                 .iter()
-                .map(|c| {
-                    let mut validity = MutableBitmap::new();
-                    validity.extend_constant(input.num_rows(), true);
-                    let validity: Bitmap = validity.into();
-                    set_validity(c, validity.len(), &validity)
-                })
+                .map(|c| set_true_validity(c, input.num_rows(), &probe_state.true_validity))
                 .collect::<Vec<_>>();
             input = DataBlock::new(nullable_columns, input.num_rows());
         }
@@ -456,22 +450,11 @@ impl HashJoinProbeState {
 
                 if self.hash_join_state.hash_join_desc.join_type == JoinType::Full {
                     let num_rows = unmatched_build_block.num_rows();
-                    let nullable_unmatched_build_columns = if num_rows == max_block_size {
-                        unmatched_build_block
-                            .columns()
-                            .iter()
-                            .map(|c| set_validity(c, num_rows, true_validity))
-                            .collect::<Vec<_>>()
-                    } else {
-                        let mut validity = MutableBitmap::new();
-                        validity.extend_constant(num_rows, true);
-                        let validity: Bitmap = validity.into();
-                        unmatched_build_block
-                            .columns()
-                            .iter()
-                            .map(|c| set_validity(c, num_rows, &validity))
-                            .collect::<Vec<_>>()
-                    };
+                    let nullable_unmatched_build_columns = unmatched_build_block
+                        .columns()
+                        .iter()
+                        .map(|c| set_true_validity(c, num_rows, true_validity))
+                        .collect::<Vec<_>>();
                     unmatched_build_block =
                         DataBlock::new(nullable_unmatched_build_columns, num_rows);
                 };
diff --git a/src/query/service/src/pipelines/processors/transforms/hash_join/probe_join/inner_join.rs b/src/query/service/src/pipelines/processors/transforms/hash_join/probe_join/inner_join.rs
index 0e05b85cadf5..7b30fdb97542 100644
--- a/src/query/service/src/pipelines/processors/transforms/hash_join/probe_join/inner_join.rs
+++ b/src/query/service/src/pipelines/processors/transforms/hash_join/probe_join/inner_join.rs
@@ -15,8 +15,6 @@
 use std::iter::TrustedLen;
 use std::sync::atomic::Ordering;
 
-use common_arrow::arrow::bitmap::Bitmap;
-use common_arrow::arrow::bitmap::MutableBitmap;
 use common_exception::ErrorCode;
 use common_exception::Result;
 use common_expression::types::BooleanType;
@@ -27,7 +25,7 @@ use common_functions::BUILTIN_FUNCTIONS;
 use common_hashtable::HashJoinHashtableLike;
 use common_sql::executor::cast_expr_to_non_null_boolean;
 
-use crate::pipelines::processors::transforms::hash_join::common::set_validity;
+use crate::pipelines::processors::transforms::hash_join::common::set_true_validity;
 use crate::pipelines::processors::transforms::hash_join::HashJoinProbeState;
 use crate::pipelines::processors::transforms::hash_join::ProbeState;
 
@@ -120,16 +118,11 @@ impl HashJoinProbeState {
                                 (true, false) => {
                                     result_block.get_by_offset(*index).clone().remove_nullable()
                                 }
-                                (false, true) => {
-                                    let mut validity = MutableBitmap::new();
-                                    validity.extend_constant(result_block.num_rows(), true);
-                                    let validity: Bitmap = validity.into();
-                                    set_validity(
-                                        result_block.get_by_offset(*index),
-                                        validity.len(),
-                                        &validity,
-                                    )
-                                }
+                                (false, true) => set_true_validity(
+                                    result_block.get_by_offset(*index),
+                                    result_block.num_rows(),
+                                    &probe_state.true_validity,
+                                ),
                             };
                             result_block.add_column(entry);
                         }
@@ -195,16 +188,11 @@ impl HashJoinProbeState {
                         (true, false) => {
                             result_block.get_by_offset(*index).clone().remove_nullable()
                         }
-                        (false, true) => {
-                            let mut validity = MutableBitmap::new();
-                            validity.extend_constant(result_block.num_rows(), true);
-                            let validity: Bitmap = validity.into();
-                            set_validity(
-                                result_block.get_by_offset(*index),
-                                validity.len(),
-                                &validity,
-                            )
-                        }
+                        (false, true) => set_true_validity(
+                            result_block.get_by_offset(*index),
+                            result_block.num_rows(),
+                            &probe_state.true_validity,
+                        ),
                     };
                     result_block.add_column(entry);
                 }
diff --git a/src/query/service/src/pipelines/processors/transforms/hash_join/probe_join/left_join.rs b/src/query/service/src/pipelines/processors/transforms/hash_join/probe_join/left_join.rs
index 0ada98786288..56f6021e2ddd 100644
--- a/src/query/service/src/pipelines/processors/transforms/hash_join/probe_join/left_join.rs
+++ b/src/query/service/src/pipelines/processors/transforms/hash_join/probe_join/left_join.rs
@@ -16,7 +16,6 @@ use std::iter::TrustedLen;
 use std::sync::atomic::Ordering;
 
 use common_arrow::arrow::bitmap::Bitmap;
-use common_arrow::arrow::bitmap::MutableBitmap;
 use common_exception::ErrorCode;
 use common_exception::Result;
 use common_expression::BlockEntry;
@@ -25,7 +24,7 @@ use common_expression::Scalar;
 use common_expression::Value;
 use common_hashtable::HashJoinHashtableLike;
 
-use crate::pipelines::processors::transforms::hash_join::common::set_validity;
+use crate::pipelines::processors::transforms::hash_join::common::set_true_validity;
 use crate::pipelines::processors::transforms::hash_join::HashJoinProbeState;
 use crate::pipelines::processors::transforms::hash_join::ProbeState;
 use crate::sql::plans::JoinType;
@@ -114,6 +113,7 @@ impl HashJoinProbeState {
                         probe_unmatched_indexes_occupied,
                         is_probe_projected,
                         is_build_projected,
+                        &probe_state.true_validity,
                         string_items_buf,
                     )?);
                     probe_unmatched_indexes_occupied = 0;
@@ -135,22 +135,11 @@ impl HashJoinProbeState {
                         )?;
                         // For full join, wrap nullable for probe block
                         if self.hash_join_state.hash_join_desc.join_type == JoinType::Full {
-                            let nullable_probe_columns = if matched_num == max_block_size {
-                                probe_block
-                                    .columns()
-                                    .iter()
-                                    .map(|c| set_validity(c, max_block_size, true_validity))
-                                    .collect::<Vec<_>>()
-                            } else {
-                                let mut validity = MutableBitmap::new();
-                                validity.extend_constant(matched_num, true);
-                                let validity: Bitmap = validity.into();
-                                probe_block
-                                    .columns()
-                                    .iter()
-                                    .map(|c| set_validity(c, matched_num, &validity))
-                                    .collect::<Vec<_>>()
-                            };
+                            let nullable_probe_columns = probe_block
+                                .columns()
+                                .iter()
+                                .map(|c| set_true_validity(c, matched_num, true_validity))
+                                .collect::<Vec<_>>();
                             probe_block = DataBlock::new(nullable_probe_columns, matched_num);
                         }
                         Some(probe_block)
@@ -178,24 +167,12 @@ impl HashJoinProbeState {
                                     .collect::<Vec<_>>(),
                                 matched_num,
                             )
-                        } else if matched_num == max_block_size {
-                            (
-                                build_block
-                                    .columns()
-                                    .iter()
-                                    .map(|c| set_validity(c, max_block_size, true_validity))
-                                    .collect::<Vec<_>>(),
-                                max_block_size,
-                            )
                         } else {
-                            let mut validity = MutableBitmap::new();
-                            validity.extend_constant(matched_num, true);
-                            let validity: Bitmap = validity.into();
                             (
                                 build_block
                                     .columns()
                                     .iter()
-                                    .map(|c| set_validity(c, matched_num, &validity))
+                                    .map(|c| set_true_validity(c, matched_num, true_validity))
                                     .collect::<Vec<_>>(),
                                 matched_num,
                             )
@@ -258,6 +235,7 @@ impl HashJoinProbeState {
             probe_unmatched_indexes_occupied,
             is_probe_projected,
             is_build_projected,
+            &probe_state.true_validity,
             string_items_buf,
         )?);
         Ok(result_blocks)
@@ -358,22 +336,11 @@ impl HashJoinProbeState {
                         )?;
                         // For full join, wrap nullable for probe block
                         if self.hash_join_state.hash_join_desc.join_type == JoinType::Full {
-                            let nullable_probe_columns = if matched_num == max_block_size {
-                                probe_block
-                                    .columns()
-                                    .iter()
-                                    .map(|c| set_validity(c, max_block_size, true_validity))
-                                    .collect::<Vec<_>>()
-                            } else {
-                                let mut validity = MutableBitmap::new();
-                                validity.extend_constant(matched_num, true);
-                                let validity: Bitmap = validity.into();
-                                probe_block
-                                    .columns()
-                                    .iter()
-                                    .map(|c| set_validity(c, matched_num, &validity))
-                                    .collect::<Vec<_>>()
-                            };
+                            let nullable_probe_columns = probe_block
+                                .columns()
+                                .iter()
+                                .map(|c| set_true_validity(c, matched_num, true_validity))
+                                .collect::<Vec<_>>();
                             probe_block = DataBlock::new(nullable_probe_columns, matched_num)
                         }
                         Some(probe_block)
@@ -401,24 +368,12 @@ impl HashJoinProbeState {
                                     .collect::<Vec<_>>(),
                                 matched_num,
                             )
-                        } else if matched_num == max_block_size {
-                            (
-                                build_block
-                                    .columns()
-                                    .iter()
-                                    .map(|c| set_validity(c, max_block_size, true_validity))
-                                    .collect::<Vec<_>>(),
-                                max_block_size,
-                            )
                         } else {
-                            let mut validity = MutableBitmap::new();
-                            validity.extend_constant(matched_num, true);
-                            let validity: Bitmap = validity.into();
                             (
                                 build_block
                                     .columns()
                                     .iter()
-                                    .map(|c| set_validity(c, matched_num, &validity))
+                                    .map(|c| set_true_validity(c, matched_num, true_validity))
                                     .collect::<Vec<_>>(),
                                 matched_num,
                             )
@@ -535,6 +490,7 @@ impl HashJoinProbeState {
                         matched_num,
                         is_probe_projected,
                         is_build_projected,
+                        &probe_state.true_validity,
                         string_items_buf,
                     )?);
                     matched_num = 0;
@@ -553,11 +509,13 @@ impl HashJoinProbeState {
             matched_num,
             is_probe_projected,
             is_build_projected,
+            &probe_state.true_validity,
             string_items_buf,
         )?);
         Ok(result_blocks)
     }
 
+    #[allow(clippy::too_many_arguments)]
     fn create_left_join_null_block(
         &self,
         input: &DataBlock,
@@ -565,6 +523,7 @@ impl HashJoinProbeState {
         matched_num: usize,
         is_probe_projected: bool,
         is_build_projected: bool,
+        true_validity: &Bitmap,
         string_items_buf: &mut Option<Vec<(u64, usize)>>,
     ) -> Result<DataBlock> {
         let probe_block = if is_probe_projected {
@@ -575,12 +534,7 @@ impl HashJoinProbeState {
                 let nullable_probe_columns = probe_block
                     .columns()
                     .iter()
-                    .map(|c| {
-                        let mut probe_validity = MutableBitmap::new();
-                        probe_validity.extend_constant(matched_num, true);
-                        let probe_validity: Bitmap = probe_validity.into();
-                        set_validity(c, matched_num, &probe_validity)
-                    })
+                    .map(|c| set_true_validity(c, matched_num, true_validity))
                     .collect::<Vec<_>>();
                 probe_block = DataBlock::new(nullable_probe_columns, matched_num);
             }
diff --git a/src/query/service/src/pipelines/processors/transforms/hash_join/probe_join/right_join.rs b/src/query/service/src/pipelines/processors/transforms/hash_join/probe_join/right_join.rs
index b0f0afbea9f6..a1daa8ef242a 100644
--- a/src/query/service/src/pipelines/processors/transforms/hash_join/probe_join/right_join.rs
+++ b/src/query/service/src/pipelines/processors/transforms/hash_join/probe_join/right_join.rs
@@ -17,14 +17,13 @@ use std::sync::atomic::AtomicBool;
 use std::sync::atomic::Ordering;
 
 use common_arrow::arrow::bitmap::Bitmap;
-use common_arrow::arrow::bitmap::MutableBitmap;
 use common_exception::ErrorCode;
 use common_exception::Result;
 use common_expression::DataBlock;
 use common_hashtable::HashJoinHashtableLike;
 use common_hashtable::RowPtr;
 
-use crate::pipelines::processors::transforms::hash_join::common::set_validity;
+use crate::pipelines::processors::transforms::hash_join::common::set_true_validity;
 use crate::pipelines::processors::transforms::hash_join::HashJoinProbeState;
 use crate::pipelines::processors::transforms::hash_join::ProbeState;
 use crate::sql::plans::JoinType;
@@ -114,7 +113,7 @@ impl HashJoinProbeState {
                         let nullable_columns = probe_block
                             .columns()
                             .iter()
-                            .map(|c| set_validity(c, max_block_size, true_validity))
+                            .map(|c| set_true_validity(c, max_block_size, true_validity))
                             .collect::<Vec<_>>();
                         Some(DataBlock::new(nullable_columns, max_block_size))
                     } else {
@@ -253,15 +252,12 @@ impl HashJoinProbeState {
             )?;
 
             // The join type is right join, we need to wrap nullable for probe side.
-            let mut validity = MutableBitmap::new();
-            validity.extend_constant(matched_num, true);
-            let validity: Bitmap = validity.into();
             let nullable_columns = probe_block
                 .columns()
                 .iter()
-                .map(|c| set_validity(c, probe_block.num_rows(), &validity))
+                .map(|c| set_true_validity(c, matched_num, true_validity))
                 .collect::<Vec<_>>();
-            Some(DataBlock::new(nullable_columns, validity.len()))
+            Some(DataBlock::new(nullable_columns, matched_num))
         } else {
             None
         };

From 12b37417f1148bbb231909b34cba535297e42703 Mon Sep 17 00:00:00 2001
From: "xudong.w" <wxd963996380@gmail.com>
Date: Thu, 12 Oct 2023 13:30:27 +0800
Subject: [PATCH 02/13] chore: disable spill test (#13224)

---
 tests/sqllogictests/suites/query/spill.test | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/sqllogictests/suites/query/spill.test b/tests/sqllogictests/suites/query/spill.test
index 2c724fd4099c..51d6e7982f71 100644
--- a/tests/sqllogictests/suites/query/spill.test
+++ b/tests/sqllogictests/suites/query/spill.test
@@ -4,7 +4,7 @@ statement ok
 set disable_join_reorder = 1;
 
 statement ok
-set join_spilling_threshold = 1;
+set join_spilling_threshold = 0;
 
 statement ok
 create table t3(a int);
@@ -65,7 +65,7 @@ statement ok
 create table t3 as select number as a from numbers(1000000);
 
 statement ok
-set join_spilling_threshold = 100;
+set join_spilling_threshold = 0;
 
 query I
 select count() from t3 inner join numbers(1000000) on t3.a = number;
@@ -74,7 +74,7 @@ select count() from t3 inner join numbers(1000000) on t3.a = number;
 
 onlyif mysql
 statement ok
-set join_spilling_threshold = 1024 * 1024 * 1;
+set join_spilling_threshold = 0;
 
 onlyif mysql
 query I

From 2b2bf8243adf0766e2b1a613034ab89fd90430ca Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E7=82=8E=E6=B3=BC?= <drdr.xp@gmail.com>
Date: Thu, 12 Oct 2023 13:30:40 +0800
Subject: [PATCH 03/13] chore: move grpc helper functions to GrpcHelper
 (#13221)

---
 src/meta/service/src/grpc_helper.rs           | 53 +++++++++++++++++++
 src/meta/service/src/lib.rs                   |  1 +
 .../src/meta_service/meta_service_impl.rs     | 51 ++++--------------
 3 files changed, 65 insertions(+), 40 deletions(-)
 create mode 100644 src/meta/service/src/grpc_helper.rs

diff --git a/src/meta/service/src/grpc_helper.rs b/src/meta/service/src/grpc_helper.rs
new file mode 100644
index 000000000000..1ddff1f642d8
--- /dev/null
+++ b/src/meta/service/src/grpc_helper.rs
@@ -0,0 +1,53 @@
+// Copyright 2021 Datafuse Labs
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! Helper functions for handling grpc.
+
+use std::error::Error;
+
+use common_meta_types::protobuf::RaftReply;
+use common_meta_types::protobuf::RaftRequest;
+
+pub struct GrpcHelper;
+
+impl GrpcHelper {
+    /// Parse tonic::Request and decode it into required type.
+    pub fn parse_req<T>(request: tonic::Request<RaftRequest>) -> Result<T, tonic::Status>
+    where T: serde::de::DeserializeOwned {
+        let raft_req = request.into_inner();
+        let req: T = serde_json::from_str(&raft_req.data).map_err(Self::invalid_arg)?;
+        Ok(req)
+    }
+
+    /// Create an Ok response for raft API.
+    pub fn ok_response<D>(d: D) -> Result<tonic::Response<RaftReply>, tonic::Status>
+    where D: serde::Serialize {
+        let data = serde_json::to_string(&d).expect("fail to serialize resp");
+        let reply = RaftReply {
+            data,
+            error: "".to_string(),
+        };
+        Ok(tonic::Response::new(reply))
+    }
+
+    /// Create a tonic::Status with invalid argument error.
+    pub fn invalid_arg(e: impl Error) -> tonic::Status {
+        tonic::Status::invalid_argument(e.to_string())
+    }
+
+    /// Create a tonic::Status with internal error.
+    pub fn internal_err(e: impl Error) -> tonic::Status {
+        tonic::Status::internal(e.to_string())
+    }
+}
diff --git a/src/meta/service/src/lib.rs b/src/meta/service/src/lib.rs
index ee367ffd0eb0..85277764f040 100644
--- a/src/meta/service/src/lib.rs
+++ b/src/meta/service/src/lib.rs
@@ -18,6 +18,7 @@
 pub mod api;
 pub mod configs;
 pub mod export;
+pub(crate) mod grpc_helper;
 pub mod message;
 pub mod meta_service;
 pub mod metrics;
diff --git a/src/meta/service/src/meta_service/meta_service_impl.rs b/src/meta/service/src/meta_service/meta_service_impl.rs
index 5da92e148349..6f51d2d08de7 100644
--- a/src/meta/service/src/meta_service/meta_service_impl.rs
+++ b/src/meta/service/src/meta_service/meta_service_impl.rs
@@ -15,7 +15,6 @@
 //! Meta service impl a grpc server that serves both raft protocol: append_entries, vote and install_snapshot.
 //! It also serves RPC for user-data access.
 
-use std::error::Error;
 use std::pin::Pin;
 use std::sync::Arc;
 use std::time::Instant;
@@ -27,6 +26,7 @@ use common_tracing::func_name;
 use minitrace::prelude::*;
 use tonic::codegen::futures_core::Stream;
 
+use crate::grpc_helper::GrpcHelper;
 use crate::message::ForwardRequest;
 use crate::meta_service::MetaNode;
 use crate::metrics::raft_metrics;
@@ -50,35 +50,6 @@ impl RaftServiceImpl {
             raft_metrics::network::incr_recv_bytes_from_peer(addr.to_string(), bytes);
         }
     }
-
-    /// Parse tonic::Request and decode it into required type.
-    fn parse_req<T>(request: tonic::Request<RaftRequest>) -> Result<T, tonic::Status>
-    where T: serde::de::DeserializeOwned {
-        let raft_req = request.into_inner();
-        let req: T = serde_json::from_str(&raft_req.data).map_err(Self::invalid_arg)?;
-        Ok(req)
-    }
-
-    /// Create an Ok response for raft API.
-    fn ok_response<D>(d: D) -> Result<tonic::Response<RaftReply>, tonic::Status>
-    where D: serde::Serialize {
-        let data = serde_json::to_string(&d).expect("fail to serialize resp");
-        let reply = RaftReply {
-            data,
-            error: "".to_string(),
-        };
-        Ok(tonic::Response::new(reply))
-    }
-
-    /// Create a tonic::Status with invalid argument error.
-    fn invalid_arg(e: impl Error) -> tonic::Status {
-        tonic::Status::invalid_argument(e.to_string())
-    }
-
-    /// Create a tonic::Status with internal error.
-    fn internal_err(e: impl Error) -> tonic::Status {
-        tonic::Status::internal(e.to_string())
-    }
 }
 
 #[async_trait::async_trait]
@@ -90,7 +61,7 @@ impl RaftService for RaftServiceImpl {
         let root = common_tracing::start_trace_for_remote_request(func_name!(), &request);
 
         async {
-            let forward_req: ForwardRequest = Self::parse_req(request)?;
+            let forward_req: ForwardRequest = GrpcHelper::parse_req(request)?;
 
             let res = self.meta_node.handle_forwardable_request(forward_req).await;
 
@@ -111,15 +82,15 @@ impl RaftService for RaftServiceImpl {
         async {
             self.incr_meta_metrics_recv_bytes_from_peer(&request);
 
-            let ae_req = Self::parse_req(request)?;
+            let ae_req = GrpcHelper::parse_req(request)?;
             let raft = &self.meta_node.raft;
 
             let resp = raft
                 .append_entries(ae_req)
                 .await
-                .map_err(Self::internal_err)?;
+                .map_err(GrpcHelper::internal_err)?;
 
-            Self::ok_response(resp)
+            GrpcHelper::ok_response(resp)
         }
         .in_span(root)
         .await
@@ -142,13 +113,13 @@ impl RaftService for RaftServiceImpl {
             self.incr_meta_metrics_recv_bytes_from_peer(&request);
             raft_metrics::network::incr_snapshot_recv_inflights_from_peer(addr.clone(), 1);
 
-            let is_req = Self::parse_req(request)?;
+            let is_req = GrpcHelper::parse_req(request)?;
             let raft = &self.meta_node.raft;
 
             let resp = raft
                 .install_snapshot(is_req)
                 .await
-                .map_err(Self::internal_err);
+                .map_err(GrpcHelper::internal_err);
 
             raft_metrics::network::sample_snapshot_recv(
                 addr.clone(),
@@ -158,7 +129,7 @@ impl RaftService for RaftServiceImpl {
             raft_metrics::network::incr_snapshot_recv_status_from_peer(addr.clone(), resp.is_ok());
 
             match resp {
-                Ok(resp) => Self::ok_response(resp),
+                Ok(resp) => GrpcHelper::ok_response(resp),
                 Err(e) => Err(e),
             }
         }
@@ -175,12 +146,12 @@ impl RaftService for RaftServiceImpl {
         async {
             self.incr_meta_metrics_recv_bytes_from_peer(&request);
 
-            let v_req = Self::parse_req(request)?;
+            let v_req = GrpcHelper::parse_req(request)?;
             let raft = &self.meta_node.raft;
 
-            let resp = raft.vote(v_req).await.map_err(Self::internal_err)?;
+            let resp = raft.vote(v_req).await.map_err(GrpcHelper::internal_err)?;
 
-            Self::ok_response(resp)
+            GrpcHelper::ok_response(resp)
         }
         .in_span(root)
         .await

From 1a8b79d7852dce100203d90fb920f4fd15c96b37 Mon Sep 17 00:00:00 2001
From: JackTan25 <60096118+JackTan25@users.noreply.github.com>
Date: Thu, 12 Oct 2023 14:41:43 +0800
Subject: [PATCH 04/13] fix: fix predicate_index and not matched null cast
 (#13208)

* fix predicate_index and not matched null cast

* use u32 instead of uszie

* fix test

* add log

* use u64
---
 .../src/interpreters/interpreter_merge_into.rs  |  8 ++++++--
 src/query/sql/src/planner/binder/merge_into.rs  | 10 ++++++++--
 src/query/sql/src/planner/plans/update.rs       |  1 +
 .../merge_into/mutator/matched_mutator.rs       |  5 +++++
 .../base/09_fuse_engine/09_0026_merge_into      | 17 ++++++++++++++++-
 5 files changed, 36 insertions(+), 5 deletions(-)

diff --git a/src/query/service/src/interpreters/interpreter_merge_into.rs b/src/query/service/src/interpreters/interpreter_merge_into.rs
index b046407d98fb..ea7d5ba70e90 100644
--- a/src/query/service/src/interpreters/interpreter_merge_into.rs
+++ b/src/query/service/src/interpreters/interpreter_merge_into.rs
@@ -15,6 +15,7 @@
 use std::collections::HashMap;
 use std::sync::Arc;
 use std::time::Instant;
+use std::u64::MAX;
 
 use common_base::runtime::GlobalIORuntime;
 use common_exception::ErrorCode;
@@ -33,6 +34,7 @@ use common_sql::executor::PhysicalPlan;
 use common_sql::executor::PhysicalPlanBuilder;
 use common_sql::plans::MergeInto as MergePlan;
 use common_sql::plans::UpdatePlan;
+use common_sql::IndexType;
 use common_sql::ScalarExpr;
 use common_sql::TypeCheck;
 use common_storages_factory::Table;
@@ -51,6 +53,8 @@ use crate::pipelines::PipelineBuildResult;
 use crate::schedulers::build_query_pipeline_without_render_result_set;
 use crate::sessions::QueryContext;
 
+// predicate_index should not be conflict with update expr's column_binding's index.
+pub const PREDICATE_COLUMN_INDEX: IndexType = MAX as usize;
 const DUMMY_COL_INDEX: usize = 1;
 pub struct MergeIntoInterpreter {
     ctx: Arc<QueryContext>,
@@ -260,7 +264,7 @@ impl MergeIntoInterpreter {
                         self.ctx.clone(),
                         fuse_table.schema().into(),
                         col_indices,
-                        Some(join_output_schema.num_fields()),
+                        Some(PREDICATE_COLUMN_INDEX),
                         target_alias.is_some(),
                     )?;
                 let update_list = update_list
@@ -274,7 +278,7 @@ impl MergeIntoInterpreter {
                                     // there will add a predicate col when we process matched clauses.
                                     // so it's not in join_output_schema for now. But it's must be added
                                     // to the tail, so let do it like below.
-                                    if name == &join_output_schema.num_fields().to_string() {
+                                    if *name == PREDICATE_COLUMN_INDEX.to_string() {
                                         join_output_schema.num_fields()
                                     } else {
                                         join_output_schema.index_of(name).unwrap()
diff --git a/src/query/sql/src/planner/binder/merge_into.rs b/src/query/sql/src/planner/binder/merge_into.rs
index b198a9b8648c..ac7c1e914417 100644
--- a/src/query/sql/src/planner/binder/merge_into.rs
+++ b/src/query/sql/src/planner/binder/merge_into.rs
@@ -408,8 +408,15 @@ impl Binder {
             let mut values = Vec::with_capacity(default_schema.num_fields());
             let update_columns_star = update_columns_star.unwrap();
             for idx in 0..default_schema.num_fields() {
-                values.push(update_columns_star.get(&idx).unwrap().clone());
+                let scalar = update_columns_star.get(&idx).unwrap().clone();
+                // cast expr
+                values.push(wrap_cast_scalar(
+                    &scalar,
+                    &scalar.data_type()?,
+                    &DataType::from(default_schema.field(idx).data_type()),
+                )?);
             }
+
             Ok(UnmatchedEvaluator {
                 source_schema: Arc::new(Arc::new(default_schema).into()),
                 condition,
@@ -423,7 +430,6 @@ impl Binder {
             }
 
             let mut values = Vec::with_capacity(clause.insert_operation.values.len());
-
             // we need to get source schema, and use it for filling columns.
             let source_schema = if let Some(fields) = clause.insert_operation.columns.clone() {
                 self.schema_project(&table_schema, &fields)?
diff --git a/src/query/sql/src/planner/plans/update.rs b/src/query/sql/src/planner/plans/update.rs
index 591e26571421..a9e475005f0c 100644
--- a/src/query/sql/src/planner/plans/update.rs
+++ b/src/query/sql/src/planner/plans/update.rs
@@ -114,6 +114,7 @@ impl UpdatePlan {
 
                     let mut right = right.ok_or_else(|| ErrorCode::Internal("It's a bug"))?;
                     let right_data_type = right.data_type()?;
+
                     // cornor case: for merge into, if target_table's fields are not null, when after bind_join, it will
                     // change into nullable, so we need to cast this.
                     right = wrap_cast_scalar(&right, &right_data_type, target_type)?;
diff --git a/src/query/storages/fuse/src/operations/merge_into/mutator/matched_mutator.rs b/src/query/storages/fuse/src/operations/merge_into/mutator/matched_mutator.rs
index 05704c975bff..bbdb8fbec0b3 100644
--- a/src/query/storages/fuse/src/operations/merge_into/mutator/matched_mutator.rs
+++ b/src/query/storages/fuse/src/operations/merge_into/mutator/matched_mutator.rs
@@ -211,7 +211,12 @@ impl MatchedAggregator {
             let permit = acquire_task_permit(self.io_request_semaphore.clone()).await?;
             let aggregation_ctx = self.aggregation_ctx.clone();
             let segment_info = segment_infos.get(&segment_idx).unwrap();
+            info!(
+                "merge into apply: segment_idx:{},blk_idx:{}",
+                segment_idx, block_idx
+            );
             let block_idx = segment_info.blocks.len() - block_idx as usize - 1;
+            assert!(block_idx < segment_info.blocks.len());
             // the row_id is generated by block_id, not block_idx,reference to fill_internal_column_meta()
             let block_meta = segment_info.blocks[block_idx].clone();
 
diff --git a/tests/sqllogictests/suites/base/09_fuse_engine/09_0026_merge_into b/tests/sqllogictests/suites/base/09_fuse_engine/09_0026_merge_into
index 5dd2d4acf453..81b24a93b422 100644
--- a/tests/sqllogictests/suites/base/09_fuse_engine/09_0026_merge_into
+++ b/tests/sqllogictests/suites/base/09_fuse_engine/09_0026_merge_into
@@ -545,6 +545,21 @@ select * from target_test order by a;
 3 f
 5 f
 
+## test not match cast and predicate index
 statement ok
-set enable_experimental_merge_into = 0;
+drop table if exists test_order;
+
+statement ok
+drop table if exists random_source;
 
+statement ok
+create table test_order(id bigint, id1 bigint, id2 bigint, id3 bigint, id4 bigint, id5 bigint, id6 bigint, id7 bigint, s1 varchar, s2 varchar, s3 varchar, s4 varchar, s5 varchar, s6 varchar, s7 varchar, s8 varchar, s9 varchar, s10 varchar, s11 varchar, s12 varchar, s13 varchar, d1 DECIMAL(20, 8), d2 DECIMAL(20, 8), d3 DECIMAL(20, 8), d4 DECIMAL(20, 8), d5 DECIMAL(20, 8), d6 DECIMAL(30, 8), d7 DECIMAL(30, 8), d8 DECIMAL(30, 8), d9 DECIMAL(30, 8), d10 DECIMAL(30, 8),insert_time datetime, insert_time1 datetime, insert_time2 datetime, insert_time3 datetime,i int) CLUSTER BY(to_yyyymmdd(insert_time), id) bloom_index_columns='insert_time,id';
+
+statement ok
+create table random_source(id bigint not null, id1 bigint, id2 bigint, id3 bigint, id4 bigint, id5 bigint, id6 bigint, id7 bigint,s1 varchar, s2 varchar, s3 varchar, s4 varchar, s5 varchar, s6 varchar, s7 varchar, s8 varchar, s9 varchar, s10 varchar, s11 varchar, s12 varchar, s13 varchar,d1 DECIMAL(20, 8), d2 DECIMAL(20, 8), d3 DECIMAL(20, 8), d4 DECIMAL(20, 8), d5 DECIMAL(20, 8), d6 DECIMAL(30, 8), d7 DECIMAL(30, 8), d8 DECIMAL(30, 8), d9 DECIMAL(30, 8), d10 DECIMAL(30, 8),insert_time datetime not null, insert_time1 datetime, insert_time2 datetime, insert_time3 datetime,i int) Engine = Random; 
+
+statement ok
+merge into test_order as t using (select id,34 as id1,238 as id2, id3, id4, id5, id6, id7,s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13,d1, d2, d3, d4, d5, d6, d7, d8, d9, d10,insert_time,insert_time1,insert_time2,insert_time3,i from random_source limit 1) as s on t.id = s.id and t.insert_time = s.insert_time when matched then update * when not matched then insert *;
+
+statement ok
+set enable_experimental_merge_into = 0;

From 31bf85bd10fba4d2dbf78e2013121e2a6041887d Mon Sep 17 00:00:00 2001
From: zhyass <mytesla@live.com>
Date: Thu, 12 Oct 2023 14:46:15 +0800
Subject: [PATCH 05/13] fix: compact limit get no affect rows (#13210)

* fix: compact bug

* add test case

* Update src/query/storages/fuse/src/operations/common/processors/transform_mutation_aggregator.rs

Co-authored-by: Sky Fan <3374614481@qq.com>

* update

* fix lint

---------

Co-authored-by: Sky Fan <3374614481@qq.com>
Co-authored-by: dantengsky <dantengsky@gmail.com>
---
 .../transform_mutation_aggregator.rs          | 19 ++++-
 .../mutation/compact/block_compact_mutator.rs | 24 ++++--
 .../09_0008_fuse_optimize_table               | 80 +++++++++++++++++++
 3 files changed, 114 insertions(+), 9 deletions(-)

diff --git a/src/query/storages/fuse/src/operations/common/processors/transform_mutation_aggregator.rs b/src/query/storages/fuse/src/operations/common/processors/transform_mutation_aggregator.rs
index b648d643aaef..0d6ade3d2f72 100644
--- a/src/query/storages/fuse/src/operations/common/processors/transform_mutation_aggregator.rs
+++ b/src/query/storages/fuse/src/operations/common/processors/transform_mutation_aggregator.rs
@@ -31,6 +31,7 @@ use common_sql::executor::MutationKind;
 use itertools::Itertools;
 use log::debug;
 use log::info;
+use log::warn;
 use opendal::Operator;
 use storages_common_table_meta::meta::BlockMeta;
 use storages_common_table_meta::meta::Location;
@@ -307,6 +308,7 @@ impl TableMutationAggregator {
             let op = self.dal.clone();
             let location_gen = self.location_gen.clone();
 
+            let mut all_perfect = false;
             tasks.push(async move {
                 let (new_blocks, origin_summary) = if let Some(loc) = location {
                     // read the old segment
@@ -341,6 +343,9 @@ impl TableMutationAggregator {
                 } else {
                     // use by compact.
                     assert!(segment_mutation.deleted_blocks.is_empty());
+                    // There are more than 1 blocks, means that the blocks can no longer be compacted.
+                    // They can be marked as perfect blocks.
+                    all_perfect = segment_mutation.replaced_blocks.len() > 1;
                     let new_blocks = segment_mutation
                         .replaced_blocks
                         .into_iter()
@@ -350,14 +355,24 @@ impl TableMutationAggregator {
                     (new_blocks, None)
                 };
 
+                let location = location_gen.gen_segment_info_location();
                 // re-calculate the segment statistics
-                let new_summary =
+                let mut new_summary =
                     reduce_block_metas(&new_blocks, thresholds, default_cluster_key_id);
+                if all_perfect {
+                    // To fix issue #13217.
+                    if new_summary.block_count > new_summary.perfect_block_count {
+                        warn!(
+                            "compact: generate new segment: {}, perfect_block_count: {}, block_count: {}",
+                            location, new_summary.perfect_block_count, new_summary.block_count,
+                        );
+                        new_summary.perfect_block_count = new_summary.block_count;
+                    }
+                }
                 // create new segment info
                 let new_segment = SegmentInfo::new(new_blocks, new_summary.clone());
 
                 // write the segment info.
-                let location = location_gen.gen_segment_info_location();
                 let serialized_segment = SerializedSegment {
                     path: location.clone(),
                     segment: Arc::new(new_segment),
diff --git a/src/query/storages/fuse/src/operations/mutation/compact/block_compact_mutator.rs b/src/query/storages/fuse/src/operations/mutation/compact/block_compact_mutator.rs
index ddccd71cf9ba..f9a197bd6a1e 100644
--- a/src/query/storages/fuse/src/operations/mutation/compact/block_compact_mutator.rs
+++ b/src/query/storages/fuse/src/operations/mutation/compact/block_compact_mutator.rs
@@ -536,16 +536,26 @@ impl CompactTaskBuilder {
                 // The clustering table cannot compact different level blocks.
                 self.build_task(&mut tasks, &mut unchanged_blocks, block_idx, tail);
             } else {
-                let (index, mut blocks) = if latest_flag {
-                    unchanged_blocks
-                        .pop()
-                        .map_or((0, vec![]), |(k, v)| (k, vec![v]))
+                let mut blocks = if latest_flag {
+                    unchanged_blocks.pop().map_or(vec![], |(_, v)| vec![v])
                 } else {
-                    tasks.pop_back().unwrap_or((0, vec![]))
+                    tasks.pop_back().map_or(vec![], |(_, v)| v)
                 };
 
-                blocks.extend(tail);
-                tasks.push_back((index, blocks));
+                let (total_rows, total_size) =
+                    blocks.iter().chain(tail.iter()).fold((0, 0), |mut acc, x| {
+                        acc.0 += x.row_count as usize;
+                        acc.1 += x.block_size as usize;
+                        acc
+                    });
+                if self.thresholds.check_for_compact(total_rows, total_size) {
+                    blocks.extend(tail);
+                    self.build_task(&mut tasks, &mut unchanged_blocks, block_idx, blocks);
+                } else {
+                    // blocks > 2N
+                    self.build_task(&mut tasks, &mut unchanged_blocks, block_idx, blocks);
+                    self.build_task(&mut tasks, &mut unchanged_blocks, block_idx + 1, tail);
+                }
             }
         }
 
diff --git a/tests/sqllogictests/suites/base/09_fuse_engine/09_0008_fuse_optimize_table b/tests/sqllogictests/suites/base/09_fuse_engine/09_0008_fuse_optimize_table
index c238f4d50981..5dfdea336e97 100644
--- a/tests/sqllogictests/suites/base/09_fuse_engine/09_0008_fuse_optimize_table
+++ b/tests/sqllogictests/suites/base/09_fuse_engine/09_0008_fuse_optimize_table
@@ -668,6 +668,83 @@ select segment_count, block_count from fuse_snapshot('db_09_0008', 't12') limit
 
 
 
+# For PR#13210
+statement ok
+create table t13(a int) row_per_block=10 block_per_segment=2;
+
+statement ok
+insert into t13 select number from numbers(7)
+
+statement ok
+insert into t13 select number from numbers(7)
+
+statement ok
+insert into t13 select number from numbers(7)
+
+statement ok
+insert into t13 select number from numbers(7)
+
+statement ok
+optimize table t13 compact limit 2
+
+query II
+select block_count, row_count from fuse_segment('db_09_0008', 't13')
+----
+1 14
+1 7
+1 7
+
+statement ok
+insert into t13 select number from numbers(7)
+
+statement ok
+optimize table t13 compact limit 2
+
+query II
+select block_count, row_count from fuse_segment('db_09_0008', 't13')
+----
+2 21
+1 7
+1 7
+
+statement ok
+optimize table t13 compact limit 2
+
+query II
+select block_count, row_count from fuse_segment('db_09_0008', 't13')
+----
+2 21
+1 14
+
+statement ok
+insert into t13 select number from numbers(7)
+
+statement ok
+optimize table t13 compact
+
+query I
+select row_count from fuse_block('db_09_0008', 't13')
+----
+14
+14
+14
+
+statement ok
+insert into t13 select number from numbers(7)
+
+statement ok
+optimize table t13 compact
+
+query I
+select row_count from fuse_block('db_09_0008', 't13')
+----
+14
+14
+7
+14
+
+
+
 statement ok
 DROP TABLE m
 
@@ -710,6 +787,9 @@ DROP TABLE t11
 statement ok
 DROP TABLE t12
 
+statement ok
+DROP TABLE t13
+
 statement ok
 DROP DATABASE db_09_0008
 

From d18c095a267978b85126dc453e257522a38db868 Mon Sep 17 00:00:00 2001
From: everpcpc <everpcpc@users.noreply.github.com>
Date: Thu, 12 Oct 2023 15:11:26 +0800
Subject: [PATCH 06/13] chore(ci): build python binding with self hosted runner
 (#13225)

---
 .../actions/build_bindings_python/action.yml  | 20 +-----
 .github/workflows/bindings.python.yml         | 61 ++++++++-----------
 .github/workflows/dev.yml                     |  2 +
 scripts/setup/dev_setup.sh                    |  3 +-
 4 files changed, 31 insertions(+), 55 deletions(-)

diff --git a/.github/actions/build_bindings_python/action.yml b/.github/actions/build_bindings_python/action.yml
index d9b37216cf24..d2bee5df758e 100644
--- a/.github/actions/build_bindings_python/action.yml
+++ b/.github/actions/build_bindings_python/action.yml
@@ -24,32 +24,24 @@ runs:
       id: toolchain
       shell: bash
       run: |
-        bash ./scripts/setup/dev_setup.sh -yb
         RUST_TOOLCHAIN=$(awk -F'[ ="]+' '$1 == "channel" { print $2 }' rust-toolchain.toml)
         echo "RUST_TOOLCHAIN=${RUST_TOOLCHAIN}" >> $GITHUB_OUTPUT
 
-    # NOTE: for exporting ACTIONS_RUNTIME_TOKEN and ACTIONS_CACHE_URL
-    - name: Expose GitHub Runtime
-      uses: crazy-max/ghaction-github-runtime@v2
-      if: env.RUNNER_PROVIDER == 'github'
-
     - name: Get opts
       id: opts
       shell: bash
       run: |
-        echo "DOCKER_OPTS=--env RUSTC_WRAPPER=sccache --env SCCACHE_GHA_ENABLED=true" >> $GITHUB_OUTPUT
-        if [[ "${{ github.event_name }}" == "pull_request" ]]; then
+        if [[ -z "${{ inputs.version }}" ]]; then
           echo "BUILD_ARGS=--strip --out dist" >> $GITHUB_OUTPUT
-          echo "BUILD_PROFILE=debug" >> $GITHUB_ENV
         else
           echo "BUILD_ARGS=--release --strip --out dist" >> $GITHUB_OUTPUT
-          echo "BUILD_PROFILE=release" >> $GITHUB_ENV
         fi
 
     - name: Cross setup for macOS
       if: endsWith(inputs.target, '-darwin')
       shell: bash
       run: |
+        bash ./scripts/setup/dev_setup.sh -yb
         echo "JEMALLOC_SYS_WITH_LG_PAGE=14" >> $GITHUB_ENV
         echo "JEMALLOC_SYS_WITH_MALLOC_CONF=oversize_threshold:0,dirty_decay_ms:5000,muzzy_decay_ms:5000" >> $GITHUB_ENV
 
@@ -63,7 +55,7 @@ runs:
         # Keep them in one line due to https://github.com/PyO3/maturin-action/issues/153
         rustup-components: rust-std rustfmt
         args: ${{ steps.opts.outputs.BUILD_ARGS }}
-        docker-options: ${{ steps.opts.outputs.DOCKER_OPTS }}
+        docker-options: --env RUSTC_WRAPPER=sccache --env SCCACHE_GCS_RW_MODE=READ_WRITE --env SCCACHE_GCS_BUCKET=databend-ci --env SCCACHE_GCS_KEY_PREFIX=cache/sccache/
         before-script-linux: ../../scripts/setup/dev_setup.sh -yb
 
     - name: Run tests
@@ -74,9 +66,3 @@ runs:
         pip install dist/*.whl
         pip install pytest pyarrow pandas polars
         pytest -v tests/*
-
-    - name: Upload artifact
-      uses: actions/upload-artifact@v3
-      with:
-        name: dist
-        path: src/bendpy/dist/*.whl
diff --git a/.github/workflows/bindings.python.yml b/.github/workflows/bindings.python.yml
index 0398eeb2b97b..9dcb37cb9a78 100644
--- a/.github/workflows/bindings.python.yml
+++ b/.github/workflows/bindings.python.yml
@@ -6,6 +6,7 @@ on:
       - main
     paths:
       - "src/**"
+      - ".github/workflows/bindings.python.yml"
   workflow_call:
     inputs:
       tag:
@@ -17,32 +18,15 @@ concurrency:
   group: ${{ github.workflow }}-${{ github.ref }}-${{ github.event_name }}
   cancel-in-progress: true
 
-env:
-  RUNNER_PROVIDER: github
-
 jobs:
-  build_linux:
-    name: build-${{ matrix.target }}
-    runs-on: ubuntu-latest
+  linux:
+    name: ${{ matrix.target }}
+    runs-on: [self-hosted, X64, Linux, 8c16g, gcp]
     strategy:
       matrix:
         target:
           - x86_64-unknown-linux-gnu
     steps:
-      - name: Free Disk Space (Ubuntu)
-        uses: jlumbroso/free-disk-space@main
-        with:
-          # this might remove tools that are actually needed,
-          # if set to "true" but frees about 6 GB
-          tool-cache: false
-          # all of these default to true, but feel free to set to
-          # "false" if necessary for your workflow
-          android: true
-          dotnet: true
-          haskell: true
-          large-packages: false
-          docker-images: true
-          swap-storage: true
       - uses: actions/checkout@v4
         with:
           fetch-depth: 0
@@ -50,11 +34,20 @@ jobs:
         with:
           target: ${{ matrix.target }}
           version: ${{ inputs.tag }}
+      - name: Publish to PyPI
+        if: inputs.tag
+        env:
+          MATURIN_PYPI_TOKEN: ${{ secrets.PYPI_PASSWORD }}
+        uses: PyO3/maturin-action@v1
+        with:
+          command: upload
+          args: --skip-existing *
+          working-directory: src/bendpy
 
-  build_macos:
-    name: build-${{ matrix.target }}
-    if: github.event_name != 'pull_request'
-    runs-on: macos-11
+  macos:
+    if: inputs.tag
+    name: ${{ matrix.target }}
+    runs-on: macos-latest
     strategy:
       matrix:
         target:
@@ -68,18 +61,12 @@ jobs:
         with:
           target: ${{ matrix.target }}
           version: ${{ inputs.tag }}
-
-  release:
-    # publish release only the version endsWith 0
-    # if: endsWith(inputs.tag, '0')
-    if: inputs.tag
-    name: Publish to PyPI
-    needs: [build_linux, build_macos]
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/download-artifact@v3
       - name: Publish to PyPI
-        uses: pypa/gh-action-pypi-publish@release/v1
+        if: inputs.tag
+        env:
+          MATURIN_PYPI_TOKEN: ${{ secrets.PYPI_PASSWORD }}
+        uses: PyO3/maturin-action@v1
         with:
-          password: ${{ secrets.pypi_password }}
-          skip-existing: true
+          command: upload
+          args: --skip-existing *
+          working-directory: src/bendpy
diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml
index b5cabfd7fa72..79ce331fe2bb 100644
--- a/.github/workflows/dev.yml
+++ b/.github/workflows/dev.yml
@@ -21,6 +21,8 @@ jobs:
       any_src_changed: ${{ steps.src.outputs.any_changed }}
     steps:
       - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
       - name: Check Source File Changes
         uses: tj-actions/changed-files@v39
         id: src
diff --git a/scripts/setup/dev_setup.sh b/scripts/setup/dev_setup.sh
index 1f7acb627c99..e3b7e4970b98 100755
--- a/scripts/setup/dev_setup.sh
+++ b/scripts/setup/dev_setup.sh
@@ -578,8 +578,9 @@ if [[ "$INSTALL_CHECK_TOOLS" == "true" ]]; then
 	if [[ -f scripts/setup/rust-tools.txt ]]; then
 		export RUSTFLAGS="-C target-feature=-crt-static"
 		cargo install cargo-quickinstall
+		cargo quickinstall cargo-binstall
 		while read -r tool; do
-			cargo quickinstall "$tool"
+			cargo binstall "$tool"
 		done <scripts/setup/rust-tools.txt
 	fi
 

From df8ca51ef8bc59bceeb1ea1612a8a478bce72e3c Mon Sep 17 00:00:00 2001
From: everpcpc <everpcpc@users.noreply.github.com>
Date: Thu, 12 Oct 2023 15:21:38 +0800
Subject: [PATCH 07/13] chore(ci): fix build tool binstall arg (#13226)

---
 .github/actions/setup_bendsql/action.yml | 14 +++-----------
 scripts/setup/dev_setup.sh               |  2 +-
 2 files changed, 4 insertions(+), 12 deletions(-)

diff --git a/.github/actions/setup_bendsql/action.yml b/.github/actions/setup_bendsql/action.yml
index f4d237610ec7..ac1e26f4793a 100644
--- a/.github/actions/setup_bendsql/action.yml
+++ b/.github/actions/setup_bendsql/action.yml
@@ -6,15 +6,7 @@ runs:
     - name: Download and Install
       shell: bash
       run: |
-
-        version=$(gh release view --repo datafuselabs/bendsql --json name | jq -r '.name')
-        deb_version=${version/v/}
-        wget -q https://github.com/datafuselabs/bendsql/releases/download/${version}/bendsql_${deb_version}_amd64.deb
-        sudo dpkg -i bendsql_${deb_version}_amd64.deb
+        sudo curl -L -o /etc/apt/sources.list.d/datafuselabs.sources https://repo.databend.rs/deb/datafuselabs.sources
+        sudo apt update
+        sudo apt install -y bendsql
         bendsql --version
-
-        # sudo curl -L -o /usr/share/keyrings/datafuselabs-keyring.gpg https://repo.databend.rs/deb/datafuselabs.gpg
-        # sudo curl -L -o /etc/apt/sources.list.d/datafuselabs.list https://repo.databend.rs/deb/datafuselabs.list
-        # sudo apt update
-        # sudo apt install -y bendsql
-        # bendsql --version
diff --git a/scripts/setup/dev_setup.sh b/scripts/setup/dev_setup.sh
index e3b7e4970b98..b8fd7244bd68 100755
--- a/scripts/setup/dev_setup.sh
+++ b/scripts/setup/dev_setup.sh
@@ -580,7 +580,7 @@ if [[ "$INSTALL_CHECK_TOOLS" == "true" ]]; then
 		cargo install cargo-quickinstall
 		cargo quickinstall cargo-binstall
 		while read -r tool; do
-			cargo binstall "$tool"
+			cargo binstall -y "$tool"
 		done <scripts/setup/rust-tools.txt
 	fi
 

From 6116bbb0e5b1bc6076fb6baa2b25dbb659c31af5 Mon Sep 17 00:00:00 2001
From: soyeric128 <soyeric128@yahoo.com>
Date: Thu, 12 Oct 2023 15:37:13 +0800
Subject: [PATCH 08/13] docs: aggregating index (#13218)

* Update toweekofyear.md

* added

* update

* fixed comments
---
 .../02-enterprise/10-enterprise-features.md   |   1 +
 .../103-aggregating-index/_category_.json     |   3 +
 .../create-aggregating-index.md               |  38 +++++++
 .../drop-aggregating-index.md                 |  27 +++++
 .../00-ddl/103-aggregating-index/index.md     | 101 ++++++++++++++++++
 .../refresh-aggregating-index.md              |  33 ++++++
 .../30-datetime-functions/toweekofyear.md     |   2 +-
 7 files changed, 204 insertions(+), 1 deletion(-)
 create mode 100644 docs/doc/14-sql-commands/00-ddl/103-aggregating-index/_category_.json
 create mode 100644 docs/doc/14-sql-commands/00-ddl/103-aggregating-index/create-aggregating-index.md
 create mode 100644 docs/doc/14-sql-commands/00-ddl/103-aggregating-index/drop-aggregating-index.md
 create mode 100644 docs/doc/14-sql-commands/00-ddl/103-aggregating-index/index.md
 create mode 100644 docs/doc/14-sql-commands/00-ddl/103-aggregating-index/refresh-aggregating-index.md

diff --git a/docs/doc/02-enterprise/10-enterprise-features.md b/docs/doc/02-enterprise/10-enterprise-features.md
index 24e2f1bebb4a..a98b6bb54d5a 100644
--- a/docs/doc/02-enterprise/10-enterprise-features.md
+++ b/docs/doc/02-enterprise/10-enterprise-features.md
@@ -6,6 +6,7 @@ This page provides an updated list of available enterprise features. To access t
 
 | Feature                                                                                        	 | Description                                                                                                                                                                                                                                                                                                                                                                                 	 |
 |--------------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| [Aggregating Index](../14-sql-commands/00-ddl/103-aggregating-index/index.md) | Elevate your query speed with aggregating indexes:<br/>- Supercharge queries through precomputed and indexed aggregations.<br/>- Customize the index to meet your unique data analysis requirements. |
 | [Masking Policy](../14-sql-commands/00-ddl/102-mask-policy/index.md) | Enhance your data security with role-based masking feature:<br/>- Safeguard sensitive information through customizable data masking.<br/>- Preserve data usability while reinforcing security. |
 | [Vacuum Dropped Table](../14-sql-commands/00-ddl/20-table/91-vacuum-drop-table.md)            	 | Optimize storage and data management for dropped tables:<br/>- Efficiently free up storage by removing dropped tables' data files.<br/>- Utilize the 'Retain N hours' option to specify a time window during which dropped table data files are retained for potential recovery. <br/>- Safely preview the removal of data files using the dry-run option.                                  	 |
 | [Vacuum Historical Data](../14-sql-commands/00-ddl/20-table/91-vacuum-table.md)            	 | Deep clean your storage space:<br/>- Remove orphan segment and block files. <br/>- Safely preview the removal of data files using the dry-run option.                                                                                                                                                                         	 |
diff --git a/docs/doc/14-sql-commands/00-ddl/103-aggregating-index/_category_.json b/docs/doc/14-sql-commands/00-ddl/103-aggregating-index/_category_.json
new file mode 100644
index 000000000000..da29fd8bb992
--- /dev/null
+++ b/docs/doc/14-sql-commands/00-ddl/103-aggregating-index/_category_.json
@@ -0,0 +1,3 @@
+{
+  "label": "Aggregating Index"
+}
\ No newline at end of file
diff --git a/docs/doc/14-sql-commands/00-ddl/103-aggregating-index/create-aggregating-index.md b/docs/doc/14-sql-commands/00-ddl/103-aggregating-index/create-aggregating-index.md
new file mode 100644
index 000000000000..81a69b17591f
--- /dev/null
+++ b/docs/doc/14-sql-commands/00-ddl/103-aggregating-index/create-aggregating-index.md
@@ -0,0 +1,38 @@
+---
+title: CREATE AGGREGATING INDEX
+---
+
+import FunctionDescription from '@site/src/components/FunctionDescription';
+
+<FunctionDescription description="Introduced or updated: v1.2.151"/>
+
+import EEFeature from '@site/src/components/EEFeature';
+
+<EEFeature featureName='AGGREGATING INDEX'/>
+
+Creates a new aggregating index in Databend.
+
+## Syntax
+
+```sql
+CREATE AGGREGATING INDEX <index_name> AS SELECT ...
+```
+
+- When creating aggregating indexes, limit their usage to standard aggregate functions (e.g., AVG, SUM, MIN, MAX, COUNT), while keeping in mind that GROUPING SETS, window functions, LIMIT, and ORDER BY are not accepted.
+
+- The query filter scope defined when creating aggregating indexes should either match or encompass the scope of your actual queries.
+
+- To confirm if an aggregating index works for a query, use the [EXPLAIN](../../90-explain-cmds/explain.md) command to analyze the query.
+
+## Examples
+
+This example creates an aggregating index named *my_agg_index* for the query "SELECT MIN(a), MAX(c) FROM agg":
+
+```sql
+-- Prepare data
+CREATE TABLE agg(a int, b int, c int);
+INSERT INTO agg VALUES (1,1,4), (1,2,1), (1,2,4), (2,2,5);
+
+-- Create an aggregating index
+CREATE AGGREGATING INDEX my_agg_index AS SELECT MIN(a), MAX(c) FROM agg;
+```
\ No newline at end of file
diff --git a/docs/doc/14-sql-commands/00-ddl/103-aggregating-index/drop-aggregating-index.md b/docs/doc/14-sql-commands/00-ddl/103-aggregating-index/drop-aggregating-index.md
new file mode 100644
index 000000000000..a039496c2255
--- /dev/null
+++ b/docs/doc/14-sql-commands/00-ddl/103-aggregating-index/drop-aggregating-index.md
@@ -0,0 +1,27 @@
+---
+title: DROP AGGREGATING INDEX
+---
+
+import FunctionDescription from '@site/src/components/FunctionDescription';
+
+<FunctionDescription description="Introduced or updated: v1.2.151"/>
+
+import EEFeature from '@site/src/components/EEFeature';
+
+<EEFeature featureName='AGGREGATING INDEX'/>
+
+Deletes an existing aggregating index. Please note that deleting an aggregating index does NOT remove the associated storage blocks. To delete the blocks as well, use the [VACUUM TABLE](../20-table/91-vacuum-table.md) command. To disable the aggregating indexing feature, set 'enable_aggregating_index_scan' to 0.
+
+## Syntax
+
+```sql
+DROP AGGREGATING INDEX <index_name>
+```
+
+## Examples
+
+This example deleted an aggregating index named *my_agg_index*:
+
+```sql
+DROP AGGREGATING INDEX my_agg_index;
+```
\ No newline at end of file
diff --git a/docs/doc/14-sql-commands/00-ddl/103-aggregating-index/index.md b/docs/doc/14-sql-commands/00-ddl/103-aggregating-index/index.md
new file mode 100644
index 000000000000..7b1226684d19
--- /dev/null
+++ b/docs/doc/14-sql-commands/00-ddl/103-aggregating-index/index.md
@@ -0,0 +1,101 @@
+---
+title: AGGREGATING INDEX
+---
+import IndexOverviewList from '@site/src/components/IndexOverviewList';
+import EEFeature from '@site/src/components/EEFeature';
+
+<EEFeature featureName='AGGREGATING INDEX'/>
+
+### Why Aggregating Index?
+
+The primary purpose of the aggregating index is to enhance query performance, especially in scenarios involving aggregation queries such as MIN, MAX, and SUM. It achieves this by precomputing and storing query results separately in blocks, eliminating the need to scan the entire table and thereby speeding up data retrieval.
+
+The feature also incorporates a refresh mechanism that enables you to update and save the latest query results as needed, ensuring that the query responses consistently reflect the most current data. This manual control allows you to maintain data accuracy and reliability by refreshing the results when deemed necessary.
+
+Please note the following when creating aggregating indexes:
+
+- When creating aggregating indexes, limit their usage to standard aggregate functions (e.g., AVG, SUM, MIN, MAX, COUNT), while keeping in mind that GROUPING SETS, window functions, LIMIT, and ORDER BY are not accepted.
+
+- The query filter scope defined when creating aggregating indexes should either match or encompass the scope of your actual queries.
+
+- To confirm if an aggregating index works for a query, use the [EXPLAIN](../../90-explain-cmds/explain.md) command to analyze the query.
+
+Databend recommends refreshing an aggregating index before executing a query that relies on it to retrieve the most up-to-date data (while Databend Cloud automatically refreshes aggregating indexes for you). If you no longer need an aggregating index, consider deleting it. Please note that deleting an aggregating index does NOT remove the associated storage blocks. To delete the blocks as well, use the [VACUUM TABLE](../20-table/91-vacuum-table.md) command. To disable the aggregating indexing feature, set 'enable_aggregating_index_scan' to 0.
+
+### Implementing Aggregating Index
+
+Databend provides the following commands to manage aggregating indexes:
+
+<IndexOverviewList />
+
+### Usage Example
+
+This example demonstrates the utilization of aggregating indexes and illustrates their impact on the query execution plan.
+
+```sql
+-- Prepare data
+CREATE TABLE agg(a int, b int, c int);
+INSERT INTO agg VALUES (1,1,4), (1,2,1), (1,2,4), (2,2,5);
+
+-- Create an aggregating index
+CREATE AGGREGATING INDEX my_agg_index AS SELECT MIN(a), MAX(c) FROM agg;
+
+-- Refresh the aggregating index
+REFRESH AGGREGATING INDEX my_agg_index;
+
+-- Verify if the aggregating index works
+EXPLAIN SELECT MIN(a), MAX(c) FROM agg;
+
+explain                                                                                                               |
+----------------------------------------------------------------------------------------------------------------------+
+AggregateFinal                                                                                                        |
+├── output columns: [MIN(a) (#8), MAX(c) (#9)]                                                                        |
+├── group by: []                                                                                                      |
+├── aggregate functions: [min(a), max(c)]                                                                             |
+├── estimated rows: 1.00                                                                                              |
+└── AggregatePartial                                                                                                  |
+    ├── output columns: [MIN(a) (#8), MAX(c) (#9)]                                                                    |
+    ├── group by: []                                                                                                  |
+    ├── aggregate functions: [min(a), max(c)]                                                                         |
+    ├── estimated rows: 1.00                                                                                          |
+    └── TableScan                                                                                                     |
+        ├── table: default.default.agg                                                                                |
+        ├── output columns: [a (#5), c (#7)]                                                                          |
+        ├── read rows: 4                                                                                              |
+        ├── read bytes: 61                                                                                            |
+        ├── partitions total: 1                                                                                       |
+        ├── partitions scanned: 1                                                                                     |
+        ├── pruning stats: [segments: <range pruning: 1 to 1>, blocks: <range pruning: 1 to 1, bloom pruning: 0 to 0>]|
+        ├── push downs: [filters: [], limit: NONE]                                                                    |
+        ├── aggregating index: [SELECT MIN(a), MAX(c) FROM default.agg]                                               |
+        ├── rewritten query: [selection: [index_col_0 (#0), index_col_1 (#1)]]                                        |
+        └── estimated rows: 4.00                                                                                      |
+
+-- Delete the aggregating index
+DROP AGGREGATING INDEX my_agg_index;
+
+EXPLAIN SELECT MIN(a), MAX(c) FROM agg;
+
+explain                                                                                                               |
+----------------------------------------------------------------------------------------------------------------------+
+AggregateFinal                                                                                                        |
+├── output columns: [MIN(a) (#3), MAX(c) (#4)]                                                                        |
+├── group by: []                                                                                                      |
+├── aggregate functions: [min(a), max(c)]                                                                             |
+├── estimated rows: 1.00                                                                                              |
+└── AggregatePartial                                                                                                  |
+    ├── output columns: [MIN(a) (#3), MAX(c) (#4)]                                                                    |
+    ├── group by: []                                                                                                  |
+    ├── aggregate functions: [min(a), max(c)]                                                                         |
+    ├── estimated rows: 1.00                                                                                          |
+    └── TableScan                                                                                                     |
+        ├── table: default.default.agg                                                                                |
+        ├── output columns: [a (#0), c (#2)]                                                                          |
+        ├── read rows: 4                                                                                              |
+        ├── read bytes: 61                                                                                            |
+        ├── partitions total: 1                                                                                       |
+        ├── partitions scanned: 1                                                                                     |
+        ├── pruning stats: [segments: <range pruning: 1 to 1>, blocks: <range pruning: 1 to 1, bloom pruning: 0 to 0>]|
+        ├── push downs: [filters: [], limit: NONE]                                                                    |
+        └── estimated rows: 4.00                                                                                      |
+```
\ No newline at end of file
diff --git a/docs/doc/14-sql-commands/00-ddl/103-aggregating-index/refresh-aggregating-index.md b/docs/doc/14-sql-commands/00-ddl/103-aggregating-index/refresh-aggregating-index.md
new file mode 100644
index 000000000000..63c4037bfaf4
--- /dev/null
+++ b/docs/doc/14-sql-commands/00-ddl/103-aggregating-index/refresh-aggregating-index.md
@@ -0,0 +1,33 @@
+---
+title: REFRESH AGGREGATING INDEX
+---
+
+import FunctionDescription from '@site/src/components/FunctionDescription';
+
+<FunctionDescription description="Introduced or updated: v1.2.151"/>
+
+import EEFeature from '@site/src/components/EEFeature';
+
+<EEFeature featureName='AGGREGATING INDEX'/>
+
+Refreshes an aggregating index to update its stored results. Databend recommends refreshing an aggregating index before executing a query that relies on it to retrieve the most up-to-date data.
+
+:::note
+When using Databend Cloud, manual execution of this refresh command is unnecessary, as the system automatically handles index updates for you.
+:::
+
+## Syntax
+
+```sql
+REFRESH AGGREGATING INDEX <index_name> [LIMIT <limit>]
+```
+
+The "LIMIT" parameter allows you to control the maximum number of blocks that can be updated with each refresh action. It is strongly recommended to use this parameter with a defined limit to optimize memory usage. Please also note that setting a limit may result in partial data updates. For example, if you have 100 blocks but set a limit of 10, a single refresh might not update the most recent data, potentially leaving some blocks unrefreshed. You may need to execute multiple refresh actions to ensure a complete update.
+
+## Examples
+
+This example refreshes an aggregating index named *my_agg_index*:
+
+```sql
+REFRESH AGGREGATING INDEX my_agg_index;
+```
\ No newline at end of file
diff --git a/docs/doc/15-sql-functions/30-datetime-functions/toweekofyear.md b/docs/doc/15-sql-functions/30-datetime-functions/toweekofyear.md
index fd8fdd6baabc..8c2e44481832 100644
--- a/docs/doc/15-sql-functions/30-datetime-functions/toweekofyear.md
+++ b/docs/doc/15-sql-functions/30-datetime-functions/toweekofyear.md
@@ -23,7 +23,7 @@ TO_WEEK_OF_YEAR(<expr>)
 
 ## Return Type
 
-Returns an integer that represents the week number within a year, with numbering starting from 1.
+Returns an integer that represents the week number within a year, with numbering ranging from 1 to 53.
 
 ## Examples
 

From b9ba3f0e22c941a07487a6e167a9eff7503d358e Mon Sep 17 00:00:00 2001
From: Sky Fan <3374614481@qq.com>
Date: Thu, 12 Oct 2023 17:35:30 +0800
Subject: [PATCH 09/13] feat: replace support delete only when matched (#13118)

* parser

* bind

* fix bind

* fix typo

* fix empty block

* add test

* fix conflict

* make lint

* fix slt

* move some plan to heap

* fix conflict

* fix schema

* fix project

* fix clippy

* fix column id

* fix conflict

* fix cluster key

* remove log
---
 src/query/ast/src/ast/statements/replace.rs   |  2 +
 src/query/ast/src/parser/statement.rs         |  3 +
 src/query/expression/src/schema.rs            |  2 +-
 .../src/interpreters/interpreter_delete.rs    |  4 +-
 .../interpreters/interpreter_merge_into.rs    |  8 +-
 .../src/interpreters/interpreter_replace.rs   | 86 ++++++++++++++++---
 .../interpreter_table_optimize.rs             |  8 +-
 .../service/src/pipelines/pipeline_builder.rs | 41 +++++++--
 .../src/schedulers/fragments/fragmenter.rs    |  6 +-
 .../src/schedulers/fragments/plan_fragment.rs | 12 +--
 src/query/sql/src/executor/physical_plan.rs   | 10 +--
 .../sql/src/executor/physical_plan_visitor.rs | 18 ++--
 .../physical_plans/physical_deduplicate.rs    |  2 +
 src/query/sql/src/planner/binder/replace.rs   |  4 +-
 src/query/sql/src/planner/plans/replace.rs    |  2 +
 .../storages/fuse/src/operations/append.rs    | 16 ++--
 .../storages/fuse/src/operations/compact.rs   |  3 +-
 .../storages/fuse/src/operations/recluster.rs |  2 +-
 .../processors/processor_replace_into.rs      | 59 ++++++++++++-
 .../processor_unbranched_replace_into.rs      | 14 ++-
 .../storages/fuse/src/operations/update.rs    |  2 +-
 .../base/09_fuse_engine/09_0023_replace_into  | 61 ++++++++++++-
 22 files changed, 297 insertions(+), 68 deletions(-)

diff --git a/src/query/ast/src/ast/statements/replace.rs b/src/query/ast/src/ast/statements/replace.rs
index d8f60b22f14e..6a8b5383d237 100644
--- a/src/query/ast/src/ast/statements/replace.rs
+++ b/src/query/ast/src/ast/statements/replace.rs
@@ -17,6 +17,7 @@ use std::fmt::Formatter;
 
 use crate::ast::write_comma_separated_list;
 use crate::ast::write_dot_separated_list;
+use crate::ast::Expr;
 use crate::ast::Hint;
 use crate::ast::Identifier;
 use crate::ast::InsertSource;
@@ -30,6 +31,7 @@ pub struct ReplaceStmt {
     pub on_conflict_columns: Vec<Identifier>,
     pub columns: Vec<Identifier>,
     pub source: InsertSource,
+    pub delete_when: Option<Expr>,
 }
 
 impl Display for ReplaceStmt {
diff --git a/src/query/ast/src/parser/statement.rs b/src/query/ast/src/parser/statement.rs
index 2e2f74ad0651..0480e4deffdf 100644
--- a/src/query/ast/src/parser/statement.rs
+++ b/src/query/ast/src/parser/statement.rs
@@ -125,6 +125,7 @@ pub fn statement(i: Input) -> IResult<StatementMsg> {
             ~ #dot_separated_idents_1_to_3
             ~ ( "(" ~ #comma_separated_list1(ident) ~ ")" )?
             ~ (ON ~ CONFLICT? ~ "(" ~ #comma_separated_list1(ident) ~ ")")
+            ~ (DELETE ~ WHEN ~ ^#expr)?
             ~ #insert_source
         },
         |(
@@ -134,6 +135,7 @@ pub fn statement(i: Input) -> IResult<StatementMsg> {
             (catalog, database, table),
             opt_columns,
             (_, _, _, on_conflict_columns, _),
+            opt_delete_when,
             source,
         )| {
             Statement::Replace(ReplaceStmt {
@@ -146,6 +148,7 @@ pub fn statement(i: Input) -> IResult<StatementMsg> {
                     .map(|(_, columns, _)| columns)
                     .unwrap_or_default(),
                 source,
+                delete_when: opt_delete_when.map(|(_, _, expr)| expr),
             })
         },
     );
diff --git a/src/query/expression/src/schema.rs b/src/query/expression/src/schema.rs
index 1e369bc5e032..4dc53f182138 100644
--- a/src/query/expression/src/schema.rs
+++ b/src/query/expression/src/schema.rs
@@ -63,7 +63,7 @@ pub fn is_internal_column_id(column_id: ColumnId) -> bool {
 
 #[derive(Debug, Clone, PartialEq, Eq, Default, Serialize, Deserialize)]
 pub struct DataSchema {
-    pub(crate) fields: Vec<DataField>,
+    pub fields: Vec<DataField>,
     pub(crate) metadata: BTreeMap<String, String>,
 }
 
diff --git a/src/query/service/src/interpreters/interpreter_delete.rs b/src/query/service/src/interpreters/interpreter_delete.rs
index 1121da2d4a09..ae7694906ed7 100644
--- a/src/query/service/src/interpreters/interpreter_delete.rs
+++ b/src/query/service/src/interpreters/interpreter_delete.rs
@@ -313,14 +313,14 @@ impl DeleteInterpreter {
             });
         }
 
-        Ok(PhysicalPlan::CommitSink(CommitSink {
+        Ok(PhysicalPlan::CommitSink(Box::new(CommitSink {
             input: Box::new(root),
             snapshot,
             table_info,
             catalog_info,
             mutation_kind: MutationKind::Delete,
             merge_meta,
-        }))
+        })))
     }
 }
 
diff --git a/src/query/service/src/interpreters/interpreter_merge_into.rs b/src/query/service/src/interpreters/interpreter_merge_into.rs
index ea7d5ba70e90..0f40491ed7c8 100644
--- a/src/query/service/src/interpreters/interpreter_merge_into.rs
+++ b/src/query/service/src/interpreters/interpreter_merge_into.rs
@@ -310,7 +310,7 @@ impl MergeIntoInterpreter {
 
         // recv datablocks from matched upstream and unmatched upstream
         // transform and append dat
-        let merge_into = PhysicalPlan::MergeInto(MergeInto {
+        let merge_into = PhysicalPlan::MergeInto(Box::new(MergeInto {
             input: Box::new(merge_into_source),
             table_info: table_info.clone(),
             catalog_info: catalog_.info(),
@@ -324,10 +324,10 @@ impl MergeIntoInterpreter {
                 .into_iter()
                 .enumerate()
                 .collect(),
-        });
+        }));
 
         // build mutation_aggregate
-        let physical_plan = PhysicalPlan::CommitSink(CommitSink {
+        let physical_plan = PhysicalPlan::CommitSink(Box::new(CommitSink {
             input: Box::new(merge_into),
             snapshot: base_snapshot,
             table_info: table_info.clone(),
@@ -335,7 +335,7 @@ impl MergeIntoInterpreter {
             // let's use update first, we will do some optimizeations and select exact strategy
             mutation_kind: MutationKind::Update,
             merge_meta: false,
-        });
+        }));
 
         Ok((physical_plan, table_info.clone()))
     }
diff --git a/src/query/service/src/interpreters/interpreter_replace.rs b/src/query/service/src/interpreters/interpreter_replace.rs
index 7f47d751474c..eae15be11261 100644
--- a/src/query/service/src/interpreters/interpreter_replace.rs
+++ b/src/query/service/src/interpreters/interpreter_replace.rs
@@ -19,7 +19,9 @@ use common_catalog::table_context::TableContext;
 use common_exception::ErrorCode;
 use common_exception::Result;
 use common_expression::DataSchemaRef;
+use common_functions::BUILTIN_FUNCTIONS;
 use common_meta_app::principal::StageInfo;
+use common_sql::executor::cast_expr_to_non_null_boolean;
 use common_sql::executor::AsyncSourcerPlan;
 use common_sql::executor::CommitSink;
 use common_sql::executor::Deduplicate;
@@ -32,9 +34,14 @@ use common_sql::executor::SelectCtx;
 use common_sql::plans::InsertInputSource;
 use common_sql::plans::Plan;
 use common_sql::plans::Replace;
+use common_sql::BindContext;
+use common_sql::Metadata;
+use common_sql::NameResolutionContext;
+use common_sql::ScalarBinder;
 use common_storage::StageFileInfo;
 use common_storages_factory::Table;
 use common_storages_fuse::FuseTable;
+use parking_lot::RwLock;
 use storages_common_table_meta::meta::TableSnapshot;
 
 use crate::interpreters::common::check_deduplicate_label;
@@ -166,7 +173,8 @@ impl ReplaceInterpreter {
         let table_is_empty = base_snapshot.segments.is_empty();
         let table_level_range_index = base_snapshot.summary.col_stats.clone();
         let mut purge_info = None;
-        let (mut root, select_ctx) = self
+
+        let (mut root, select_ctx, bind_context) = self
             .connect_input_source(
                 self.ctx.clone(),
                 &self.plan.source,
@@ -183,6 +191,59 @@ impl ReplaceInterpreter {
                 ));
             }
         }
+
+        let delete_when = if let Some(expr) = &plan.delete_when {
+            if bind_context.is_none() {
+                return Err(ErrorCode::Unimplemented(
+                    "Delete semantic is only supported in subquery",
+                ));
+            }
+            let mut bind_context = bind_context.unwrap();
+            let name_resolution_ctx =
+                NameResolutionContext::try_from(self.ctx.get_settings().as_ref())?;
+            let metadata = Arc::new(RwLock::new(Metadata::default()));
+            let mut scalar_binder = ScalarBinder::new(
+                &mut bind_context,
+                self.ctx.clone(),
+                &name_resolution_ctx,
+                metadata,
+                &[],
+                Default::default(),
+                Default::default(),
+            );
+            let (scalar, _) = scalar_binder.bind(expr).await?;
+            let columns = scalar.used_columns();
+            if columns.len() != 1 {
+                return Err(ErrorCode::BadArguments(
+                    "Delete must have one column in predicate",
+                ));
+            }
+            let delete_column = columns.iter().next().unwrap();
+            let column_bindings = &bind_context.columns;
+            let delete_column_binding = column_bindings.iter().find(|c| c.index == *delete_column);
+            if delete_column_binding.is_none() {
+                return Err(ErrorCode::BadArguments(
+                    "Delete must have one column in predicate",
+                ));
+            }
+            let delete_column_name = delete_column_binding.unwrap().column_name.clone();
+            let filter = cast_expr_to_non_null_boolean(
+                scalar.as_expr()?.project_column_ref(|col| col.index),
+            )?;
+
+            let filter = filter.as_remote_expr();
+
+            let expr = filter.as_expr(&BUILTIN_FUNCTIONS);
+            if !expr.is_deterministic(&BUILTIN_FUNCTIONS) {
+                return Err(ErrorCode::Unimplemented(
+                    "Delete must have deterministic predicate",
+                ));
+            }
+            Some((filter, delete_column_name))
+        } else {
+            None
+        };
+
         // remove top exchange
         if let PhysicalPlan::Exchange(Exchange { input, .. }) = root.as_ref() {
             root = input.clone();
@@ -209,7 +270,7 @@ impl ReplaceInterpreter {
             vec![]
         };
 
-        root = Box::new(PhysicalPlan::Deduplicate(Deduplicate {
+        root = Box::new(PhysicalPlan::Deduplicate(Box::new(Deduplicate {
             input: root,
             on_conflicts: on_conflicts.clone(),
             bloom_filter_column_indexes: bloom_filter_column_indexes.clone(),
@@ -220,8 +281,9 @@ impl ReplaceInterpreter {
             table_schema: plan.schema.clone(),
             table_level_range_index,
             need_insert: true,
-        }));
-        root = Box::new(PhysicalPlan::ReplaceInto(ReplaceInto {
+            delete_when,
+        })));
+        root = Box::new(PhysicalPlan::ReplaceInto(Box::new(ReplaceInto {
             input: root,
             block_thresholds: fuse_table.get_block_thresholds(),
             table_info: table_info.clone(),
@@ -236,7 +298,7 @@ impl ReplaceInterpreter {
                 .collect(),
             block_slots: None,
             need_insert: true,
-        }));
+        })));
         if is_distributed {
             root = Box::new(PhysicalPlan::Exchange(Exchange {
                 plan_id: 0,
@@ -246,14 +308,14 @@ impl ReplaceInterpreter {
                 ignore_exchange: false,
             }));
         }
-        root = Box::new(PhysicalPlan::CommitSink(CommitSink {
+        root = Box::new(PhysicalPlan::CommitSink(Box::new(CommitSink {
             input: root,
             snapshot: base_snapshot,
             table_info: table_info.clone(),
             catalog_info: catalog.info(),
             mutation_kind: MutationKind::Replace,
             merge_meta: false,
-        }));
+        })));
         Ok((root, purge_info))
     }
 
@@ -273,11 +335,11 @@ impl ReplaceInterpreter {
         source: &'a InsertInputSource,
         schema: DataSchemaRef,
         purge_info: &mut Option<(Vec<StageFileInfo>, StageInfo)>,
-    ) -> Result<(Box<PhysicalPlan>, Option<SelectCtx>)> {
+    ) -> Result<(Box<PhysicalPlan>, Option<SelectCtx>, Option<BindContext>)> {
         match source {
             InsertInputSource::Values { data, start } => self
                 .connect_value_source(schema.clone(), data, *start)
-                .map(|x| (x, None)),
+                .map(|x| (x, None, None)),
 
             InsertInputSource::SelectPlan(plan) => {
                 self.connect_query_plan_source(ctx.clone(), plan).await
@@ -289,7 +351,7 @@ impl ReplaceInterpreter {
                     let (physical_plan, files) =
                         interpreter.build_physical_plan(&copy_plan).await?;
                     *purge_info = Some((files, copy_plan.stage_table_info.stage_info.clone()));
-                    Ok((Box::new(physical_plan), None))
+                    Ok((Box::new(physical_plan), None, None))
                 }
                 _ => unreachable!("plan in InsertInputSource::Stag must be CopyIntoTable"),
             },
@@ -317,7 +379,7 @@ impl ReplaceInterpreter {
         &'a self,
         ctx: Arc<QueryContext>,
         query_plan: &Plan,
-    ) -> Result<(Box<PhysicalPlan>, Option<SelectCtx>)> {
+    ) -> Result<(Box<PhysicalPlan>, Option<SelectCtx>, Option<BindContext>)> {
         let (s_expr, metadata, bind_context, formatted_ast) = match query_plan {
             Plan::Query {
                 s_expr,
@@ -346,6 +408,6 @@ impl ReplaceInterpreter {
             select_column_bindings: bind_context.columns.clone(),
             select_schema: query_plan.schema(),
         };
-        Ok((physical_plan, Some(select_ctx)))
+        Ok((physical_plan, Some(select_ctx), Some(*bind_context.clone())))
     }
 }
diff --git a/src/query/service/src/interpreters/interpreter_table_optimize.rs b/src/query/service/src/interpreters/interpreter_table_optimize.rs
index e1f0c8fd29d5..0756398b4c94 100644
--- a/src/query/service/src/interpreters/interpreter_table_optimize.rs
+++ b/src/query/service/src/interpreters/interpreter_table_optimize.rs
@@ -90,12 +90,12 @@ impl OptimizeTableInterpreter {
         is_distributed: bool,
     ) -> Result<PhysicalPlan> {
         let merge_meta = parts.is_lazy;
-        let mut root = PhysicalPlan::CompactPartial(CompactPartial {
+        let mut root = PhysicalPlan::CompactPartial(Box::new(CompactPartial {
             parts,
             table_info: table_info.clone(),
             catalog_info: catalog_info.clone(),
             column_ids: snapshot.schema.to_leaf_column_id_set(),
-        });
+        }));
 
         if is_distributed {
             root = PhysicalPlan::Exchange(Exchange {
@@ -107,14 +107,14 @@ impl OptimizeTableInterpreter {
             });
         }
 
-        Ok(PhysicalPlan::CommitSink(CommitSink {
+        Ok(PhysicalPlan::CommitSink(Box::new(CommitSink {
             input: Box::new(root),
             table_info,
             catalog_info,
             snapshot,
             mutation_kind: MutationKind::Compact,
             merge_meta,
-        }))
+        })))
     }
 
     async fn build_pipeline(
diff --git a/src/query/service/src/pipelines/pipeline_builder.rs b/src/query/service/src/pipelines/pipeline_builder.rs
index 5ff201c0e78d..186b17d50af5 100644
--- a/src/query/service/src/pipelines/pipeline_builder.rs
+++ b/src/query/service/src/pipelines/pipeline_builder.rs
@@ -331,14 +331,16 @@ impl PipelineBuilder {
             table_level_range_index,
             table_schema,
             need_insert,
+            delete_when,
         } = deduplicate;
 
         let tbl = self
             .ctx
             .build_table_by_table_info(catalog_info, table_info, None)?;
         let table = FuseTable::try_from_table(tbl.as_ref())?;
-        let target_schema: Arc<DataSchema> = Arc::new(table_schema.clone().into());
         self.build_pipeline(input)?;
+        let mut delete_column_idx = 0;
+        let mut opt_modified_schema = None;
         if let Some(SelectCtx {
             select_column_bindings,
             select_schema,
@@ -351,6 +353,22 @@ impl PipelineBuilder {
                 &mut self.main_pipeline,
                 false,
             )?;
+
+            let mut target_schema: DataSchema = table_schema.clone().into();
+            if let Some((_, delete_column)) = delete_when {
+                delete_column_idx = select_schema.index_of(delete_column.as_str())?;
+                let delete_column = select_schema.field(delete_column_idx).clone();
+                target_schema
+                    .fields
+                    .insert(delete_column_idx, delete_column);
+                opt_modified_schema = Some(Arc::new(target_schema.clone()));
+            }
+            let target_schema = Arc::new(target_schema.clone());
+            if target_schema.fields().len() != select_schema.fields().len() {
+                return Err(ErrorCode::BadArguments(
+                    "The number of columns in the target table is different from the number of columns in the SELECT clause",
+                ));
+            }
             if Self::check_schema_cast(select_schema.clone(), target_schema.clone())? {
                 self.main_pipeline.add_transform(
                     |transform_input_port, transform_output_port| {
@@ -370,13 +388,14 @@ impl PipelineBuilder {
             self.ctx.clone(),
             &mut self.main_pipeline,
             tbl.clone(),
-            target_schema.clone(),
+            Arc::new(table_schema.clone().into()),
         )?;
 
         let _ = table.cluster_gen_for_append(
             self.ctx.clone(),
             &mut self.main_pipeline,
             table.get_block_thresholds(),
+            opt_modified_schema,
         )?;
         // 1. resize input to 1, since the UpsertTransform need to de-duplicate inputs "globally"
         self.main_pipeline.try_resize(1)?;
@@ -395,16 +414,25 @@ impl PipelineBuilder {
         // (1) -> output_port_merge_into_action
         //    the "downstream" is supposed to be connected with a processor which can process MergeIntoOperations
         //    in our case, it is the broadcast processor
+        let delete_when = if let Some((remote_expr, delete_column)) = delete_when {
+            Some((
+                remote_expr.as_expr(&BUILTIN_FUNCTIONS),
+                delete_column.clone(),
+            ))
+        } else {
+            None
+        };
         let cluster_keys = table.cluster_keys(self.ctx.clone());
         if *need_insert {
             let replace_into_processor = ReplaceIntoProcessor::create(
-                self.ctx.as_ref(),
+                self.ctx.clone(),
                 on_conflicts.clone(),
                 cluster_keys,
                 bloom_filter_column_indexes.clone(),
                 table_schema.as_ref(),
                 *table_is_empty,
                 table_level_range_index.clone(),
+                delete_when.map(|(expr, _)| (expr, delete_column_idx)),
             )?;
             self.main_pipeline
                 .add_pipe(replace_into_processor.into_pipe());
@@ -417,6 +445,7 @@ impl PipelineBuilder {
                 table_schema.as_ref(),
                 *table_is_empty,
                 table_level_range_index.clone(),
+                delete_when.map(|_| delete_column_idx),
             )?;
             self.main_pipeline
                 .add_pipe(replace_into_processor.into_pipe());
@@ -445,7 +474,7 @@ impl PipelineBuilder {
         let block_thresholds = table.get_block_thresholds();
 
         let cluster_stats_gen =
-            table.get_cluster_stats_gen(self.ctx.clone(), 0, block_thresholds)?;
+            table.get_cluster_stats_gen(self.ctx.clone(), 0, block_thresholds, None)?;
 
         // this TransformSerializeBlock is just used to get block_builder
         let block_builder = TransformSerializeBlock::try_create(
@@ -684,7 +713,7 @@ impl PipelineBuilder {
             .build_table_by_table_info(catalog_info, table_info, None)?;
         let table = FuseTable::try_from_table(table.as_ref())?;
         let cluster_stats_gen =
-            table.get_cluster_stats_gen(self.ctx.clone(), 0, *block_thresholds)?;
+            table.get_cluster_stats_gen(self.ctx.clone(), 0, *block_thresholds, None)?;
         self.build_pipeline(input)?;
         // connect to broadcast processor and append transform
         let serialize_block_transform = TransformSerializeBlock::try_create(
@@ -954,7 +983,7 @@ impl PipelineBuilder {
             &mut self.main_pipeline,
         )?;
         let cluster_stats_gen =
-            table.get_cluster_stats_gen(self.ctx.clone(), 0, table.get_block_thresholds())?;
+            table.get_cluster_stats_gen(self.ctx.clone(), 0, table.get_block_thresholds(), None)?;
         self.main_pipeline.add_transform(|input, output| {
             let proc = TransformSerializeBlock::try_create(
                 self.ctx.clone(),
diff --git a/src/query/service/src/schedulers/fragments/fragmenter.rs b/src/query/service/src/schedulers/fragments/fragmenter.rs
index 412969f96920..cdb1b21f8f77 100644
--- a/src/query/service/src/schedulers/fragments/fragmenter.rs
+++ b/src/query/service/src/schedulers/fragments/fragmenter.rs
@@ -154,10 +154,10 @@ impl PhysicalPlanReplacer for Fragmenter {
         let input = self.replace(&plan.input)?;
         self.state = State::ReplaceInto;
 
-        Ok(PhysicalPlan::ReplaceInto(ReplaceInto {
+        Ok(PhysicalPlan::ReplaceInto(Box::new(ReplaceInto {
             input: Box::new(input),
             ..plan.clone()
-        }))
+        })))
     }
 
     //  TODO(Sky): remove rebudant code
@@ -191,7 +191,7 @@ impl PhysicalPlanReplacer for Fragmenter {
     ) -> Result<PhysicalPlan> {
         self.state = State::Compact;
 
-        Ok(PhysicalPlan::CompactPartial(plan.clone()))
+        Ok(PhysicalPlan::CompactPartial(Box::new(plan.clone())))
     }
 
     fn replace_delete_partial(
diff --git a/src/query/service/src/schedulers/fragments/plan_fragment.rs b/src/query/service/src/schedulers/fragments/plan_fragment.rs
index 88154be0c46b..68d388424fdd 100644
--- a/src/query/service/src/schedulers/fragments/plan_fragment.rs
+++ b/src/query/service/src/schedulers/fragments/plan_fragment.rs
@@ -428,10 +428,10 @@ struct ReplaceCompactBlock {
 
 impl PhysicalPlanReplacer for ReplaceCompactBlock {
     fn replace_compact_partial(&mut self, plan: &CompactPartial) -> Result<PhysicalPlan> {
-        Ok(PhysicalPlan::CompactPartial(CompactPartial {
+        Ok(PhysicalPlan::CompactPartial(Box::new(CompactPartial {
             parts: self.partitions.clone(),
             ..plan.clone()
-        }))
+        })))
     }
 }
 
@@ -458,22 +458,22 @@ struct ReplaceReplaceInto {
 impl PhysicalPlanReplacer for ReplaceReplaceInto {
     fn replace_replace_into(&mut self, plan: &ReplaceInto) -> Result<PhysicalPlan> {
         let input = self.replace(&plan.input)?;
-        Ok(PhysicalPlan::ReplaceInto(ReplaceInto {
+        Ok(PhysicalPlan::ReplaceInto(Box::new(ReplaceInto {
             input: Box::new(input),
             need_insert: self.need_insert,
             segments: self.partitions.clone(),
             block_slots: self.slot.clone(),
             ..plan.clone()
-        }))
+        })))
     }
 
     fn replace_deduplicate(&mut self, plan: &Deduplicate) -> Result<PhysicalPlan> {
         let input = self.replace(&plan.input)?;
-        Ok(PhysicalPlan::Deduplicate(Deduplicate {
+        Ok(PhysicalPlan::Deduplicate(Box::new(Deduplicate {
             input: Box::new(input),
             need_insert: self.need_insert,
             table_is_empty: self.partitions.is_empty(),
             ..plan.clone()
-        }))
+        })))
     }
 }
diff --git a/src/query/sql/src/executor/physical_plan.rs b/src/query/sql/src/executor/physical_plan.rs
index 6da9c77bda11..a76e7390daa8 100644
--- a/src/query/sql/src/executor/physical_plan.rs
+++ b/src/query/sql/src/executor/physical_plan.rs
@@ -91,16 +91,16 @@ pub enum PhysicalPlan {
 
     /// Replace
     AsyncSourcer(AsyncSourcerPlan),
-    Deduplicate(Deduplicate),
-    ReplaceInto(ReplaceInto),
+    Deduplicate(Box<Deduplicate>),
+    ReplaceInto(Box<ReplaceInto>),
 
     /// MergeInto
     MergeIntoSource(MergeIntoSource),
-    MergeInto(MergeInto),
+    MergeInto(Box<MergeInto>),
 
     /// Compact
-    CompactPartial(CompactPartial),
-    CommitSink(CommitSink),
+    CompactPartial(Box<CompactPartial>),
+    CommitSink(Box<CommitSink>),
 }
 
 impl PhysicalPlan {
diff --git a/src/query/sql/src/executor/physical_plan_visitor.rs b/src/query/sql/src/executor/physical_plan_visitor.rs
index 55c54a89b72c..26a8a76570fa 100644
--- a/src/query/sql/src/executor/physical_plan_visitor.rs
+++ b/src/query/sql/src/executor/physical_plan_visitor.rs
@@ -371,7 +371,7 @@ pub trait PhysicalPlanReplacer {
     }
 
     fn replace_compact_partial(&mut self, plan: &CompactPartial) -> Result<PhysicalPlan> {
-        Ok(PhysicalPlan::CompactPartial(plan.clone()))
+        Ok(PhysicalPlan::CompactPartial(Box::new(plan.clone())))
     }
 
     fn replace_delete_partial(&mut self, plan: &DeletePartial) -> Result<PhysicalPlan> {
@@ -380,10 +380,10 @@ pub trait PhysicalPlanReplacer {
 
     fn replace_commit_sink(&mut self, plan: &CommitSink) -> Result<PhysicalPlan> {
         let input = self.replace(&plan.input)?;
-        Ok(PhysicalPlan::CommitSink(CommitSink {
+        Ok(PhysicalPlan::CommitSink(Box::new(CommitSink {
             input: Box::new(input),
             ..plan.clone()
-        }))
+        })))
     }
 
     fn replace_async_sourcer(&mut self, plan: &AsyncSourcerPlan) -> Result<PhysicalPlan> {
@@ -392,26 +392,26 @@ pub trait PhysicalPlanReplacer {
 
     fn replace_deduplicate(&mut self, plan: &Deduplicate) -> Result<PhysicalPlan> {
         let input = self.replace(&plan.input)?;
-        Ok(PhysicalPlan::Deduplicate(Deduplicate {
+        Ok(PhysicalPlan::Deduplicate(Box::new(Deduplicate {
             input: Box::new(input),
             ..plan.clone()
-        }))
+        })))
     }
 
     fn replace_replace_into(&mut self, plan: &ReplaceInto) -> Result<PhysicalPlan> {
         let input = self.replace(&plan.input)?;
-        Ok(PhysicalPlan::ReplaceInto(ReplaceInto {
+        Ok(PhysicalPlan::ReplaceInto(Box::new(ReplaceInto {
             input: Box::new(input),
             ..plan.clone()
-        }))
+        })))
     }
 
     fn replace_merge_into(&mut self, plan: &MergeInto) -> Result<PhysicalPlan> {
         let input = self.replace(&plan.input)?;
-        Ok(PhysicalPlan::MergeInto(MergeInto {
+        Ok(PhysicalPlan::MergeInto(Box::new(MergeInto {
             input: Box::new(input),
             ..plan.clone()
-        }))
+        })))
     }
 
     fn replace_merge_into_source(&mut self, plan: &MergeIntoSource) -> Result<PhysicalPlan> {
diff --git a/src/query/sql/src/executor/physical_plans/physical_deduplicate.rs b/src/query/sql/src/executor/physical_plans/physical_deduplicate.rs
index e18fad873b40..c075491cef42 100644
--- a/src/query/sql/src/executor/physical_plans/physical_deduplicate.rs
+++ b/src/query/sql/src/executor/physical_plans/physical_deduplicate.rs
@@ -17,6 +17,7 @@ use std::collections::HashMap;
 use common_expression::ColumnId;
 use common_expression::DataSchemaRef;
 use common_expression::FieldIndex;
+use common_expression::RemoteExpr;
 use common_expression::TableSchemaRef;
 use common_meta_app::schema::CatalogInfo;
 use common_meta_app::schema::TableInfo;
@@ -38,6 +39,7 @@ pub struct Deduplicate {
     pub select_ctx: Option<SelectCtx>,
     pub table_level_range_index: HashMap<ColumnId, ColumnStatistics>,
     pub need_insert: bool,
+    pub delete_when: Option<(RemoteExpr, String)>,
 }
 
 #[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
diff --git a/src/query/sql/src/planner/binder/replace.rs b/src/query/sql/src/planner/binder/replace.rs
index 36f133e1856c..ee4a11acdadf 100644
--- a/src/query/sql/src/planner/binder/replace.rs
+++ b/src/query/sql/src/planner/binder/replace.rs
@@ -46,7 +46,8 @@ impl Binder {
             on_conflict_columns,
             columns,
             source,
-            ..
+            delete_when,
+            hints: _,
         } = stmt;
 
         let (catalog_name, database_name, table_name) =
@@ -152,6 +153,7 @@ impl Binder {
             on_conflict_fields,
             schema,
             source: input_source?,
+            delete_when: delete_when.clone(),
         };
 
         Ok(Plan::Replace(Box::new(plan)))
diff --git a/src/query/sql/src/planner/plans/replace.rs b/src/query/sql/src/planner/plans/replace.rs
index c2a927bfafa8..f64dea0b91c3 100644
--- a/src/query/sql/src/planner/plans/replace.rs
+++ b/src/query/sql/src/planner/plans/replace.rs
@@ -14,6 +14,7 @@
 
 use std::sync::Arc;
 
+use common_ast::ast::Expr;
 use common_expression::DataSchemaRef;
 use common_expression::TableField;
 use common_expression::TableSchemaRef;
@@ -30,6 +31,7 @@ pub struct Replace {
     pub on_conflict_fields: Vec<TableField>,
     pub schema: TableSchemaRef,
     pub source: InsertInputSource,
+    pub delete_when: Option<Expr>,
 }
 
 impl PartialEq for Replace {
diff --git a/src/query/storages/fuse/src/operations/append.rs b/src/query/storages/fuse/src/operations/append.rs
index d675e07b7c57..dec72781f9a6 100644
--- a/src/query/storages/fuse/src/operations/append.rs
+++ b/src/query/storages/fuse/src/operations/append.rs
@@ -21,6 +21,7 @@ use common_catalog::table_context::TableContext;
 use common_exception::Result;
 use common_expression::BlockThresholds;
 use common_expression::DataField;
+use common_expression::DataSchema;
 use common_expression::Expr;
 use common_expression::SortColumnDescription;
 use common_functions::BUILTIN_FUNCTIONS;
@@ -71,7 +72,7 @@ impl FuseTable {
         }
 
         let cluster_stats_gen =
-            self.cluster_gen_for_append(ctx.clone(), pipeline, block_thresholds)?;
+            self.cluster_gen_for_append(ctx.clone(), pipeline, block_thresholds, None)?;
         pipeline.add_transform(|input, output| {
             let proc = TransformSerializeBlock::try_create(
                 ctx.clone(),
@@ -93,7 +94,8 @@ impl FuseTable {
         block_thresholds: BlockThresholds,
         specified_last_len: usize,
     ) -> Result<ClusterStatsGenerator> {
-        let cluster_stats_gen = self.get_cluster_stats_gen(ctx.clone(), 0, block_thresholds)?;
+        let cluster_stats_gen =
+            self.get_cluster_stats_gen(ctx.clone(), 0, block_thresholds, None)?;
         let output_lens = pipeline.output_len();
         let items1 = create_dummy_items(output_lens - specified_last_len, output_lens);
         let items2 = create_dummy_items(output_lens - specified_last_len, output_lens);
@@ -149,8 +151,10 @@ impl FuseTable {
         ctx: Arc<dyn TableContext>,
         pipeline: &mut Pipeline,
         block_thresholds: BlockThresholds,
+        modified_schema: Option<Arc<DataSchema>>,
     ) -> Result<ClusterStatsGenerator> {
-        let cluster_stats_gen = self.get_cluster_stats_gen(ctx.clone(), 0, block_thresholds)?;
+        let cluster_stats_gen =
+            self.get_cluster_stats_gen(ctx.clone(), 0, block_thresholds, modified_schema)?;
 
         let operators = cluster_stats_gen.operators.clone();
         if !operators.is_empty() {
@@ -195,15 +199,15 @@ impl FuseTable {
         ctx: Arc<dyn TableContext>,
         level: i32,
         block_thresholds: BlockThresholds,
+        modified_schema: Option<Arc<DataSchema>>,
     ) -> Result<ClusterStatsGenerator> {
         let cluster_keys = self.cluster_keys(ctx.clone());
         if cluster_keys.is_empty() {
             return Ok(ClusterStatsGenerator::default());
         }
 
-        let input_schema = self.table_info.schema();
-        let mut merged: Vec<DataField> =
-            input_schema.fields().iter().map(DataField::from).collect();
+        let input_schema = modified_schema.unwrap_or(DataSchema::from(self.schema()).into());
+        let mut merged: Vec<DataField> = input_schema.fields().clone();
 
         let mut cluster_key_index = Vec::with_capacity(cluster_keys.len());
         let mut extra_key_num = 0;
diff --git a/src/query/storages/fuse/src/operations/compact.rs b/src/query/storages/fuse/src/operations/compact.rs
index 4aa9bdf208af..8b0686b035a8 100644
--- a/src/query/storages/fuse/src/operations/compact.rs
+++ b/src/query/storages/fuse/src/operations/compact.rs
@@ -175,7 +175,8 @@ impl FuseTable {
         )?;
 
         // sort
-        let cluster_stats_gen = self.cluster_gen_for_append(ctx.clone(), pipeline, thresholds)?;
+        let cluster_stats_gen =
+            self.cluster_gen_for_append(ctx.clone(), pipeline, thresholds, None)?;
         pipeline.add_transform(
             |input: Arc<common_pipeline_core::processors::port::InputPort>, output| {
                 let proc = TransformSerializeBlock::try_create(
diff --git a/src/query/storages/fuse/src/operations/recluster.rs b/src/query/storages/fuse/src/operations/recluster.rs
index f2bad1017cf3..9b31c0575fcf 100644
--- a/src/query/storages/fuse/src/operations/recluster.rs
+++ b/src/query/storages/fuse/src/operations/recluster.rs
@@ -220,7 +220,7 @@ impl FuseTable {
         self.do_read_data(ctx.clone(), &plan, pipeline)?;
 
         let cluster_stats_gen =
-            self.get_cluster_stats_gen(ctx.clone(), mutator.level + 1, block_thresholds)?;
+            self.get_cluster_stats_gen(ctx.clone(), mutator.level + 1, block_thresholds, None)?;
         let operators = cluster_stats_gen.operators.clone();
         if !operators.is_empty() {
             let func_ctx2 = cluster_stats_gen.func_ctx.clone();
diff --git a/src/query/storages/fuse/src/operations/replace_into/processors/processor_replace_into.rs b/src/query/storages/fuse/src/operations/replace_into/processors/processor_replace_into.rs
index 3775356fbc86..bd7d4246df89 100644
--- a/src/query/storages/fuse/src/operations/replace_into/processors/processor_replace_into.rs
+++ b/src/query/storages/fuse/src/operations/replace_into/processors/processor_replace_into.rs
@@ -14,16 +14,23 @@
 
 use std::any::Any;
 use std::collections::HashMap;
+use std::collections::HashSet;
+use std::ops::Not;
 use std::sync::Arc;
 use std::time::Instant;
 
 use common_catalog::table_context::TableContext;
 use common_exception::Result;
+use common_expression::types::BooleanType;
 use common_expression::ColumnId;
 use common_expression::DataBlock;
+use common_expression::Evaluator;
+use common_expression::Expr;
 use common_expression::FieldIndex;
 use common_expression::RemoteExpr;
 use common_expression::TableSchema;
+use common_expression::Value;
+use common_functions::BUILTIN_FUNCTIONS;
 use common_pipeline_core::pipe::Pipe;
 use common_pipeline_core::pipe::PipeItem;
 use common_pipeline_core::processors::port::InputPort;
@@ -52,20 +59,24 @@ pub struct ReplaceIntoProcessor {
     output_data_append: Option<DataBlock>,
 
     target_table_empty: bool,
+    delete_when: Option<(Expr, usize)>,
+    ctx: Arc<dyn TableContext>,
 }
 
 impl ReplaceIntoProcessor {
+    #[allow(clippy::too_many_arguments)]
     pub fn create(
-        ctx: &dyn TableContext,
+        ctx: Arc<dyn TableContext>,
         on_conflict_fields: Vec<OnConflictField>,
         cluster_keys: Vec<RemoteExpr<String>>,
         bloom_filter_column_indexes: Vec<FieldIndex>,
         table_schema: &TableSchema,
         target_table_empty: bool,
         table_range_idx: HashMap<ColumnId, ColumnStatistics>,
+        delete_when: Option<(Expr, usize)>,
     ) -> Result<Self> {
         let replace_into_mutator = ReplaceIntoMutator::try_create(
-            ctx,
+            ctx.as_ref(),
             on_conflict_fields,
             cluster_keys,
             bloom_filter_column_indexes,
@@ -85,6 +96,8 @@ impl ReplaceIntoProcessor {
             output_data_merge_into_action: None,
             output_data_append: None,
             target_table_empty,
+            delete_when,
+            ctx,
         })
     }
 
@@ -165,8 +178,34 @@ impl Processor for ReplaceIntoProcessor {
     }
 
     fn process(&mut self) -> Result<()> {
-        if let Some(data_block) = self.input_data.take() {
+        if let Some(mut data_block) = self.input_data.take() {
             let start = Instant::now();
+            let mut filter = None;
+            let mut all_delete = false;
+            if let Some((expr, delete_column)) = &self.delete_when {
+                let expr = expr.project_column_ref(|_| *delete_column);
+                let func_ctx = self.ctx.get_function_context()?;
+                let evaluator = Evaluator::new(&data_block, &func_ctx, &BUILTIN_FUNCTIONS);
+                let predicates = evaluator
+                    .run(&expr)
+                    .map_err(|e| e.add_message("eval filter failed:"))?
+                    .try_downcast::<BooleanType>()
+                    .unwrap();
+                match predicates {
+                    Value::Scalar(scalar) => {
+                        all_delete = scalar;
+                    }
+                    Value::Column(column) => {
+                        filter = Some(column.not());
+                    }
+                }
+
+                let column_num = data_block.num_columns();
+                let projections = (0..column_num)
+                    .filter(|i| i != delete_column)
+                    .collect::<HashSet<_>>();
+                data_block = data_block.project(&projections);
+            };
             let merge_into_action = self.replace_into_mutator.process_input_block(&data_block)?;
             metrics_inc_replace_process_input_block_time_ms(start.elapsed().as_millis() as u64);
             metrics_inc_replace_block_number_input(1);
@@ -174,8 +213,20 @@ impl Processor for ReplaceIntoProcessor {
                 self.output_data_merge_into_action =
                     Some(DataBlock::empty_with_meta(Box::new(merge_into_action)));
             }
+
+            if all_delete {
+                return Ok(());
+            }
+
+            if let Some(filter) = filter {
+                data_block = data_block.filter_with_bitmap(&filter)?;
+            }
+
             metrics_inc_replace_append_blocks_rows(data_block.num_rows() as u64);
-            self.output_data_append = Some(data_block);
+
+            if data_block.num_rows() > 0 {
+                self.output_data_append = Some(data_block);
+            }
             return Ok(());
         }
 
diff --git a/src/query/storages/fuse/src/operations/replace_into/processors/processor_unbranched_replace_into.rs b/src/query/storages/fuse/src/operations/replace_into/processors/processor_unbranched_replace_into.rs
index cc1b5c3b1378..81f7131f7b70 100644
--- a/src/query/storages/fuse/src/operations/replace_into/processors/processor_unbranched_replace_into.rs
+++ b/src/query/storages/fuse/src/operations/replace_into/processors/processor_unbranched_replace_into.rs
@@ -14,6 +14,7 @@
 
 use std::any::Any;
 use std::collections::HashMap;
+use std::collections::HashSet;
 use std::sync::Arc;
 use std::time::Instant;
 
@@ -48,9 +49,11 @@ pub struct UnbranchedReplaceIntoProcessor {
     output_data_merge_into_action: Option<DataBlock>,
 
     target_table_empty: bool,
+    delete_column: Option<usize>,
 }
 
 impl UnbranchedReplaceIntoProcessor {
+    #[allow(clippy::too_many_arguments)]
     pub fn create(
         ctx: &dyn TableContext,
         on_conflict_fields: Vec<OnConflictField>,
@@ -59,6 +62,7 @@ impl UnbranchedReplaceIntoProcessor {
         table_schema: &TableSchema,
         target_table_empty: bool,
         table_range_idx: HashMap<ColumnId, ColumnStatistics>,
+        delete_column: Option<usize>,
     ) -> Result<Self> {
         let replace_into_mutator = ReplaceIntoMutator::try_create(
             ctx,
@@ -78,6 +82,7 @@ impl UnbranchedReplaceIntoProcessor {
             input_data: None,
             output_data_merge_into_action: None,
             target_table_empty,
+            delete_column,
         })
     }
 
@@ -146,8 +151,15 @@ impl Processor for UnbranchedReplaceIntoProcessor {
     }
 
     fn process(&mut self) -> Result<()> {
-        if let Some(data_block) = self.input_data.take() {
+        if let Some(mut data_block) = self.input_data.take() {
             let start = Instant::now();
+            if let Some(delete_column) = self.delete_column {
+                let column_num = data_block.num_columns();
+                let projections = (0..column_num)
+                    .filter(|i| *i != delete_column)
+                    .collect::<HashSet<_>>();
+                data_block = data_block.project(&projections);
+            }
             let merge_into_action = self.replace_into_mutator.process_input_block(&data_block)?;
             metrics_inc_replace_process_input_block_time_ms(start.elapsed().as_millis() as u64);
             if !self.target_table_empty {
diff --git a/src/query/storages/fuse/src/operations/update.rs b/src/query/storages/fuse/src/operations/update.rs
index c556a71d63b1..1ee4b044398d 100644
--- a/src/query/storages/fuse/src/operations/update.rs
+++ b/src/query/storages/fuse/src/operations/update.rs
@@ -101,7 +101,7 @@ impl FuseTable {
         let block_thresholds = self.get_block_thresholds();
         // sort
         let cluster_stats_gen =
-            self.cluster_gen_for_append(ctx.clone(), pipeline, block_thresholds)?;
+            self.cluster_gen_for_append(ctx.clone(), pipeline, block_thresholds, None)?;
 
         pipeline.add_transform(|input, output| {
             let proc = TransformSerializeBlock::try_create(
diff --git a/tests/sqllogictests/suites/base/09_fuse_engine/09_0023_replace_into b/tests/sqllogictests/suites/base/09_fuse_engine/09_0023_replace_into
index 593d8b6cae03..c16866b2ddb6 100644
--- a/tests/sqllogictests/suites/base/09_fuse_engine/09_0023_replace_into
+++ b/tests/sqllogictests/suites/base/09_fuse_engine/09_0023_replace_into
@@ -433,9 +433,68 @@ select sum(id), sum(c1), sum(c2), sum(c3), sum(c4) from t;
 statement ok
 drop table t;
 
+# delete when
+
+statement ok
+drop table if exists t;
+
+statement ok
+drop table if exists s;
+
+statement ok
+create table t(c int);
+
+statement ok
+create table s(c1 int , c2 int);
+
+statement ok
+insert into s values(1,2), (3, 4);
+
+# column c1 used for delete, column c2 used for insert or update
+statement ok
+replace into t on(c) delete when c1 = 1 select * from s;
+
+query I
+select * from t;
+----
+4
+
+statement ok
+replace into t on(c) delete when c1 = 0 select * from s;
+
+query I
+select * from t order by c;
+----
+2
+4
+
 statement ok
-DROP DATABASE db_09_0023
+replace into t on(c) delete when c1 = 3 select * from s;
 
+query I
+select * from t order by c;
+----
+2
 
+# column c2 used for delete, column c1 used for insert or update
+statement ok
+replace into t on(c) delete when c2 = 0 select * from s;
 
+query I
+select * from t order by c;
+----
+1
+2
+3
+
+statement ok
+replace into t on(c) delete when c2 = 2 select * from s;
+
+query I
+select * from t order by c;
+----
+2
+3
 
+statement ok
+DROP DATABASE db_09_0023
\ No newline at end of file

From 0dee1ba37db4ef4c5f6d4c7fe849809ab5a15305 Mon Sep 17 00:00:00 2001
From: everpcpc <everpcpc@users.noreply.github.com>
Date: Thu, 12 Oct 2023 20:01:46 +0800
Subject: [PATCH 10/13] chore(ci): fix publishing python binding to pypi
 (#13230)

---
 .github/workflows/bindings.python.yml | 20 ++++++++------------
 1 file changed, 8 insertions(+), 12 deletions(-)

diff --git a/.github/workflows/bindings.python.yml b/.github/workflows/bindings.python.yml
index 9dcb37cb9a78..fc0e3f78f5ad 100644
--- a/.github/workflows/bindings.python.yml
+++ b/.github/workflows/bindings.python.yml
@@ -36,13 +36,11 @@ jobs:
           version: ${{ inputs.tag }}
       - name: Publish to PyPI
         if: inputs.tag
-        env:
-          MATURIN_PYPI_TOKEN: ${{ secrets.PYPI_PASSWORD }}
-        uses: PyO3/maturin-action@v1
+        uses: pypa/gh-action-pypi-publish@release/v1
         with:
-          command: upload
-          args: --skip-existing *
-          working-directory: src/bendpy
+          skip-existing: true
+          password: ${{ secrets.PYPI_PASSWORD }}
+          packages-dir: src/bendpy/dist
 
   macos:
     if: inputs.tag
@@ -63,10 +61,8 @@ jobs:
           version: ${{ inputs.tag }}
       - name: Publish to PyPI
         if: inputs.tag
-        env:
-          MATURIN_PYPI_TOKEN: ${{ secrets.PYPI_PASSWORD }}
-        uses: PyO3/maturin-action@v1
+        uses: pypa/gh-action-pypi-publish@release/v1
         with:
-          command: upload
-          args: --skip-existing *
-          working-directory: src/bendpy
+          skip-existing: true
+          password: ${{ secrets.PYPI_PASSWORD }}
+          packages-dir: src/bendpy/dist

From 055c4c6a380e99580e730526e7a0e5fb5e8dbcc8 Mon Sep 17 00:00:00 2001
From: dantengsky <dantengsky@gmail.com>
Date: Thu, 12 Oct 2023 20:05:35 +0800
Subject: [PATCH 11/13] chore: partially revert "feat: tweak table data
 life-cycle related sql stmts (#13015)" (#13223)

* Revert "feat: tweak table data life-cycle related sql stmts (#13015)"

This reverts commit e14dc7ca0f71b676807e2c08b624d28fb5d4a73f.

* remove `truncate ... purge`

* clean up

* update doc of ddl-truncate-table
---
 benchmark/clickbench/hits/clear.sql           |  3 +-
 benchmark/clickbench/tpch/clear.sql           | 17 +++++------
 benchmark/tpcds/load_data.sh                  |  4 +--
 .../00-ddl/20-table/20-ddl-drop-table.md      |  3 ++
 .../00-ddl/20-table/40-ddl-truncate-table.md  |  2 +-
 scripts/benchmark/query/load/hits.sh          |  6 +---
 src/query/ast/src/ast/statements/table.rs     |  5 +++-
 src/query/ast/src/parser/statement.rs         |  5 ++--
 src/query/ast/tests/it/testdata/statement.txt |  4 +++
 .../tests/it/aggregating_index/index_scan.rs  |  4 +--
 .../interpreters/interpreter_table_drop.rs    | 18 +++++++++++
 .../tests/it/storages/fuse/operations/gc.rs   |  6 ++--
 .../it/storages/fuse/operations/purge_drop.rs | 30 +++++++++++++++++++
 .../it/storages/fuse/operations/truncate.rs   |  2 +-
 src/query/sql/src/planner/binder/ddl/table.rs |  2 ++
 src/query/sql/src/planner/plans/ddl/table.rs  |  1 +
 src/query/storages/fuse/src/fuse_table.rs     |  3 +-
 .../block/block_reader_parquet_deserialize.rs |  2 +-
 .../storages/fuse/src/operations/delete.rs    |  6 ++--
 .../storages/fuse/src/operations/truncate.rs  | 20 +++++++++++--
 src/tests/sqlsmith/src/sql_gen/ddl.rs         |  1 +
 .../base/01_system/01_0002_system_query_log   |  2 +-
 .../01_0007_system_clustering_history         |  2 +-
 .../base/03_common/03_0003_select_group_by    |  2 +-
 .../suites/base/03_common/03_0025_delete_from | 14 ++++-----
 .../base/03_common/03_0028_copy_into_stage    |  2 +-
 .../03_common/03_0031_copy_into_user_stage    |  2 +-
 .../suites/base/03_common/03_0035_update      |  6 ++--
 .../base/05_ddl/05_0001_ddl_drop_table_full   |  4 +--
 .../suites/base/05_ddl/05_0023_exists_table   |  2 +-
 ...ncate => 09_0007_func_fuse_truncate_purge} |  0
 .../09_fuse_engine/09_0017_transient_table    |  1 +
 .../12_time_travel/12_0003_time_travel_undrop | 17 +++++++++++
 .../suites/base/issues/issue_10103.test       |  4 +--
 .../group/group_by_grouping_sets.test         |  4 +--
 .../mode/cluster/04_0002_explain_v2.test      |  4 +--
 .../mode/standalone/explain/explain.test      |  4 +--
 .../standalone/explain_native/explain.test    |  4 +--
 .../02_function/02_0012_function_datetimes    |  2 +-
 .../02_function/02_0012_function_datetimes_tz |  2 +-
 .../query/02_function/02_0014_function_maths  |  2 +-
 .../02_0018_function_strings_repeat           |  8 ++---
 ...0048_function_semi_structureds_object_keys |  2 +-
 tests/sqllogictests/suites/query/cte.test     |  2 +-
 .../suites/query/render_result.test           |  4 +--
 tests/sqllogictests/suites/ydb/select1-1.test |  2 +-
 tests/sqllogictests/suites/ydb/select1-2.test |  2 +-
 tests/sqllogictests/suites/ydb/select1-3.test |  2 +-
 tests/sqllogictests/suites/ydb/select1-4.test |  2 +-
 tests/sqllogictests/suites/ydb/select1-5.test |  2 +-
 tests/sqllogictests/suites/ydb/select2-1.test |  2 +-
 tests/sqllogictests/suites/ydb/select2-2.test |  2 +-
 tests/sqllogictests/suites/ydb/select2-3.test |  2 +-
 tests/sqllogictests/suites/ydb/select2-4.test |  2 +-
 tests/sqllogictests/suites/ydb/select2-5.test |  2 +-
 tests/sqllogictests/suites/ydb/select3-1.test |  2 +-
 .../sqllogictests/suites/ydb/select3-10.test  |  2 +-
 .../sqllogictests/suites/ydb/select3-11.test  |  2 +-
 .../sqllogictests/suites/ydb/select3-12.test  |  2 +-
 .../sqllogictests/suites/ydb/select3-13.test  |  2 +-
 .../sqllogictests/suites/ydb/select3-14.test  |  2 +-
 .../sqllogictests/suites/ydb/select3-15.test  |  2 +-
 tests/sqllogictests/suites/ydb/select3-2.test |  2 +-
 tests/sqllogictests/suites/ydb/select3-3.test |  2 +-
 tests/sqllogictests/suites/ydb/select3-4.test |  2 +-
 tests/sqllogictests/suites/ydb/select3-5.test |  2 +-
 tests/sqllogictests/suites/ydb/select3-6.test |  2 +-
 tests/sqllogictests/suites/ydb/select3-7.test |  2 +-
 tests/sqllogictests/suites/ydb/select3-8.test |  2 +-
 tests/sqllogictests/suites/ydb/select3-9.test |  2 +-
 .../17_0002_alter_table_purge_before.sh       |  4 +--
 .../17_0003_alter_table_update.sh             |  4 +--
 .../20+_others/20_0011_purge_before.sh        |  4 +--
 .../20+_others/20_0012_privilege_access.sh    |  6 ++--
 .../04_mini_dataset/04_0000_mini_ontime.sh    |  2 +-
 .../04_mini_dataset/04_0001_mini_hits.sh      |  2 +-
 .../05_01_00_load_compact_copy.sh             |  2 +-
 .../05_01_01_load_compact_streaming_load.sh   |  2 +-
 .../05_01_02_load_compact_copy_max_size.sh    |  2 +-
 ...5_01_02_load_compact_copy_row_per_block.sh |  2 +-
 80 files changed, 206 insertions(+), 114 deletions(-)
 rename tests/sqllogictests/suites/base/09_fuse_engine/{09_0007_func_fuse_truncate => 09_0007_func_fuse_truncate_purge} (100%)

diff --git a/benchmark/clickbench/hits/clear.sql b/benchmark/clickbench/hits/clear.sql
index 81452994830f..e70c0347a8da 100644
--- a/benchmark/clickbench/hits/clear.sql
+++ b/benchmark/clickbench/hits/clear.sql
@@ -1,2 +1 @@
-drop table hits;
-VACUUM DROP TABLE retain 0 hours;
+drop table hits all;
diff --git a/benchmark/clickbench/tpch/clear.sql b/benchmark/clickbench/tpch/clear.sql
index 60b4ace1ff06..ded376e4a710 100644
--- a/benchmark/clickbench/tpch/clear.sql
+++ b/benchmark/clickbench/tpch/clear.sql
@@ -1,9 +1,8 @@
-drop table customer;
-drop table lineitem;
-drop table nation;
-drop table orders;
-drop table partsupp;
-drop table part;
-drop table region;
-drop table supplier;
-VACUUM DROP TABLE retain 0 hours;
+drop table customer all;
+drop table lineitem all;
+drop table nation all;
+drop table orders all;
+drop table partsupp all;
+drop table part all;
+drop table region all;
+drop table supplier all;
diff --git a/benchmark/tpcds/load_data.sh b/benchmark/tpcds/load_data.sh
index ca34a816b25c..b2f71f834d31 100755
--- a/benchmark/tpcds/load_data.sh
+++ b/benchmark/tpcds/load_data.sh
@@ -36,11 +36,9 @@ tables=(
 # Clear Data
 for t in ${tables[@]}
 do
-    echo "DROP TABLE IF EXISTS $t" | $MYSQL_CLIENT_CONNECT
+    echo "DROP TABLE IF EXISTS $t ALL" | $MYSQL_CLIENT_CONNECT
 done
 
-echo "VACUUM DROP TABLE retain 0 hours" | $MYSQL_CLIENT_CONNECT
-
 # Create Tables;
 cat "$CURDIR"/tpcds.sql | $MYSQL_CLIENT_CONNECT
 
diff --git a/docs/doc/14-sql-commands/00-ddl/20-table/20-ddl-drop-table.md b/docs/doc/14-sql-commands/00-ddl/20-table/20-ddl-drop-table.md
index 0ca931eae633..704f7253825b 100644
--- a/docs/doc/14-sql-commands/00-ddl/20-table/20-ddl-drop-table.md
+++ b/docs/doc/14-sql-commands/00-ddl/20-table/20-ddl-drop-table.md
@@ -18,6 +18,9 @@ DROP TABLE [IF EXISTS] [db.]name
 :::caution
 
 `DROP TABLE` only remove the table schema from meta service, we do not remove the underlying data from the storage.
+If you want to delete the data and table all, please use:
+
+`DROP TABLE <table_name> ALL;`
 
 :::
 
diff --git a/docs/doc/14-sql-commands/00-ddl/20-table/40-ddl-truncate-table.md b/docs/doc/14-sql-commands/00-ddl/20-table/40-ddl-truncate-table.md
index 9f8cabac2fc2..7a1a02af3d6a 100644
--- a/docs/doc/14-sql-commands/00-ddl/20-table/40-ddl-truncate-table.md
+++ b/docs/doc/14-sql-commands/00-ddl/20-table/40-ddl-truncate-table.md
@@ -50,4 +50,4 @@ FROM
   test_truncate
 
 0 row in 0.017 sec. Processed 0 rows, 0B (0 rows/s, 0B/s)
-```
\ No newline at end of file
+```
diff --git a/scripts/benchmark/query/load/hits.sh b/scripts/benchmark/query/load/hits.sh
index f0c6d2a99576..9754e6020532 100755
--- a/scripts/benchmark/query/load/hits.sh
+++ b/scripts/benchmark/query/load/hits.sh
@@ -7,11 +7,7 @@ select version();
 SQL
 
 cat <<SQL | bendsql
-DROP TABLE IF EXISTS hits;
-SQL
-
-cat <<SQL | bendsql
-VACUUM DROP TABLE retain 0 hours;
+DROP TABLE IF EXISTS hits ALL;
 SQL
 
 cat <<SQL | bendsql
diff --git a/src/query/ast/src/ast/statements/table.rs b/src/query/ast/src/ast/statements/table.rs
index ae90ee03f5e7..00dc966a26f1 100644
--- a/src/query/ast/src/ast/statements/table.rs
+++ b/src/query/ast/src/ast/statements/table.rs
@@ -256,6 +256,7 @@ pub struct DropTableStmt {
     pub catalog: Option<Identifier>,
     pub database: Option<Identifier>,
     pub table: Identifier,
+    pub all: bool,
 }
 
 impl Display for DropTableStmt {
@@ -271,6 +272,9 @@ impl Display for DropTableStmt {
                 .chain(&self.database)
                 .chain(Some(&self.table)),
         )?;
+        if self.all {
+            write!(f, " ALL")?;
+        }
 
         Ok(())
     }
@@ -476,7 +480,6 @@ impl Display for TruncateTableStmt {
                 .chain(&self.database)
                 .chain(Some(&self.table)),
         )?;
-
         Ok(())
     }
 }
diff --git a/src/query/ast/src/parser/statement.rs b/src/query/ast/src/parser/statement.rs
index 0480e4deffdf..17b63c8b5a76 100644
--- a/src/query/ast/src/parser/statement.rs
+++ b/src/query/ast/src/parser/statement.rs
@@ -578,14 +578,15 @@ pub fn statement(i: Input) -> IResult<StatementMsg> {
     );
     let drop_table = map(
         rule! {
-            DROP ~ TABLE ~ ( IF ~ ^EXISTS )? ~ #dot_separated_idents_1_to_3
+            DROP ~ TABLE ~ ( IF ~ ^EXISTS )? ~ #dot_separated_idents_1_to_3 ~ ALL?
         },
-        |(_, _, opt_if_exists, (catalog, database, table))| {
+        |(_, _, opt_if_exists, (catalog, database, table), opt_all)| {
             Statement::DropTable(DropTableStmt {
                 if_exists: opt_if_exists.is_some(),
                 catalog,
                 database,
                 table,
+                all: opt_all.is_some(),
             })
         },
     );
diff --git a/src/query/ast/tests/it/testdata/statement.txt b/src/query/ast/tests/it/testdata/statement.txt
index f456112a13c2..5687f68a24b7 100644
--- a/src/query/ast/tests/it/testdata/statement.txt
+++ b/src/query/ast/tests/it/testdata/statement.txt
@@ -1513,6 +1513,7 @@ DropTable(
                 11..12,
             ),
         },
+        all: false,
     },
 )
 
@@ -1544,6 +1545,7 @@ DropTable(
                 23..26,
             ),
         },
+        all: false,
     },
 )
 
@@ -2404,6 +2406,7 @@ DropTable(
                 11..17,
             ),
         },
+        all: false,
     },
 )
 
@@ -2425,6 +2428,7 @@ DropTable(
                 21..27,
             ),
         },
+        all: false,
     },
 )
 
diff --git a/src/query/ee/tests/it/aggregating_index/index_scan.rs b/src/query/ee/tests/it/aggregating_index/index_scan.rs
index 6ddfa982dbab..43e1cca22a3b 100644
--- a/src/query/ee/tests/it/aggregating_index/index_scan.rs
+++ b/src/query/ee/tests/it/aggregating_index/index_scan.rs
@@ -1110,8 +1110,8 @@ async fn test_fuzz_impl(format: &str, spill: bool) -> Result<()> {
             }
 
             // Clear data
-            execute_sql(fixture.ctx(), "DROP TABLE rt").await?;
-            execute_sql(fixture.ctx(), "DROP TABLE t").await?;
+            execute_sql(fixture.ctx(), "DROP TABLE rt ALL").await?;
+            execute_sql(fixture.ctx(), "DROP TABLE t ALL").await?;
         }
     }
     Ok(())
diff --git a/src/query/service/src/interpreters/interpreter_table_drop.rs b/src/query/service/src/interpreters/interpreter_table_drop.rs
index 362695d296e3..77527d8fb2d2 100644
--- a/src/query/service/src/interpreters/interpreter_table_drop.rs
+++ b/src/query/service/src/interpreters/interpreter_table_drop.rs
@@ -14,10 +14,12 @@
 
 use std::sync::Arc;
 
+use common_catalog::table::TableExt;
 use common_exception::ErrorCode;
 use common_exception::Result;
 use common_meta_app::schema::DropTableByIdReq;
 use common_sql::plans::DropTablePlan;
+use common_storages_fuse::FuseTable;
 use common_storages_share::save_share_spec;
 use common_storages_view::view_table::VIEW_ENGINE;
 
@@ -77,6 +79,22 @@ impl Interpreter for DropTableInterpreter {
                 })
                 .await?;
 
+            // if `plan.all`, truncate, then purge the historical data
+            if self.plan.all {
+                // the above `catalog.drop_table` operation changed the table meta version,
+                // thus if we do not refresh the table instance, `truncate` will fail
+                let latest = tbl.as_ref().refresh(self.ctx.as_ref()).await?;
+                let maybe_fuse_table = FuseTable::try_from_table(latest.as_ref());
+                // if target table if of type FuseTable, purge its historical data
+                // otherwise, plain truncate
+                if let Ok(fuse_table) = maybe_fuse_table {
+                    let purge = true;
+                    fuse_table.do_truncate(self.ctx.clone(), purge).await?
+                } else {
+                    latest.truncate(self.ctx.clone()).await?
+                }
+            }
+
             if let Some((spec_vec, share_table_info)) = resp.spec_vec {
                 save_share_spec(
                     &self.ctx.get_tenant(),
diff --git a/src/query/service/tests/it/storages/fuse/operations/gc.rs b/src/query/service/tests/it/storages/fuse/operations/gc.rs
index 447ddcc394c0..512436f2d875 100644
--- a/src/query/service/tests/it/storages/fuse/operations/gc.rs
+++ b/src/query/service/tests/it/storages/fuse/operations/gc.rs
@@ -114,9 +114,9 @@ async fn test_fuse_purge_normal_orphan_snapshot() -> Result<()> {
         "do_gc: there should be 1 snapshot, 0 segment/block",
         expected_num_of_snapshot,
         0, // 0 snapshot statistic
-        1, // 1 segments
-        1, // 1 blocks
-        1, // 1 index
+        1, // 0 segments
+        1, // 0 blocks
+        1, // 0 index
         Some(()),
         None,
     )
diff --git a/src/query/service/tests/it/storages/fuse/operations/purge_drop.rs b/src/query/service/tests/it/storages/fuse/operations/purge_drop.rs
index b78191acce2f..4c9e7762c5ca 100644
--- a/src/query/service/tests/it/storages/fuse/operations/purge_drop.rs
+++ b/src/query/service/tests/it/storages/fuse/operations/purge_drop.rs
@@ -15,6 +15,7 @@
 use common_base::base::tokio;
 use common_exception::Result;
 use databend_query::test_kits::table_test_fixture::append_sample_data;
+use databend_query::test_kits::table_test_fixture::check_data_dir;
 use databend_query::test_kits::table_test_fixture::execute_command;
 use databend_query::test_kits::table_test_fixture::TestFixture;
 
@@ -33,3 +34,32 @@ async fn test_fuse_snapshot_truncate_in_drop_stmt() -> Result<()> {
     execute_command(ctx.clone(), qry.as_str()).await?;
     Ok(())
 }
+
+#[tokio::test(flavor = "multi_thread")]
+async fn test_fuse_snapshot_truncate_in_drop_all_stmt() -> Result<()> {
+    let fixture = TestFixture::new().await;
+    let db = fixture.default_db_name();
+    let tbl = fixture.default_table_name();
+    let ctx = fixture.ctx();
+    fixture.create_default_table().await?;
+
+    // ingests some test data
+    append_sample_data(1, &fixture).await?;
+    // let's Drop
+    let qry = format!("drop table {}.{} all", db, tbl);
+    execute_command(ctx.clone(), qry.as_str()).await?;
+
+    check_data_dir(
+        &fixture,
+        "drop table: there should be 1 snapshot, 0 segment/block",
+        1, // 1 snapshot
+        0, // 0 snapshot statistic
+        0, // 0 segments
+        0, // 0 blocks
+        0, // 0 index
+        None,
+        None,
+    )
+    .await?;
+    Ok(())
+}
diff --git a/src/query/service/tests/it/storages/fuse/operations/truncate.rs b/src/query/service/tests/it/storages/fuse/operations/truncate.rs
index 967ae9dd6a5f..ff302fd9aaf6 100644
--- a/src/query/service/tests/it/storages/fuse/operations/truncate.rs
+++ b/src/query/service/tests/it/storages/fuse/operations/truncate.rs
@@ -145,7 +145,7 @@ async fn test_fuse_table_truncate_appending_concurrently() -> common_exception::
     append_data(s1_table_to_be_truncated.clone()).await?;
     let s2_table_to_appended = fixture.latest_default_table().await?;
 
-    // 4. perform `truncate purge` operation on s1
+    // 4. perform `truncate` operation on s1
     let r = s1_table_to_be_truncated.truncate(ctx.clone()).await;
     // version mismatched, and `truncate purge` should result in error (but nothing should have been removed)
     assert!(r.is_err());
diff --git a/src/query/sql/src/planner/binder/ddl/table.rs b/src/query/sql/src/planner/binder/ddl/table.rs
index 655cf3fb4785..924c72307226 100644
--- a/src/query/sql/src/planner/binder/ddl/table.rs
+++ b/src/query/sql/src/planner/binder/ddl/table.rs
@@ -666,6 +666,7 @@ impl Binder {
             catalog,
             database,
             table,
+            all,
         } = stmt;
 
         let tenant = self.ctx.get_tenant();
@@ -678,6 +679,7 @@ impl Binder {
             catalog,
             database,
             table,
+            all: *all,
         })))
     }
 
diff --git a/src/query/sql/src/planner/plans/ddl/table.rs b/src/query/sql/src/planner/plans/ddl/table.rs
index 899eae287f8a..ea86d7575ac5 100644
--- a/src/query/sql/src/planner/plans/ddl/table.rs
+++ b/src/query/sql/src/planner/plans/ddl/table.rs
@@ -84,6 +84,7 @@ pub struct DropTablePlan {
     pub database: String,
     /// The table name
     pub table: String,
+    pub all: bool,
 }
 
 impl DropTablePlan {
diff --git a/src/query/storages/fuse/src/fuse_table.rs b/src/query/storages/fuse/src/fuse_table.rs
index 71039e682160..f694548bb8d4 100644
--- a/src/query/storages/fuse/src/fuse_table.rs
+++ b/src/query/storages/fuse/src/fuse_table.rs
@@ -554,7 +554,8 @@ impl Table for FuseTable {
     #[minitrace::trace(name = "fuse_table_truncate")]
     #[async_backtrace::framed]
     async fn truncate(&self, ctx: Arc<dyn TableContext>) -> Result<()> {
-        self.do_truncate(ctx).await
+        let purge = false;
+        self.do_truncate(ctx, purge).await
     }
 
     #[minitrace::trace(name = "fuse_table_optimize")]
diff --git a/src/query/storages/fuse/src/io/read/block/block_reader_parquet_deserialize.rs b/src/query/storages/fuse/src/io/read/block/block_reader_parquet_deserialize.rs
index e83767533c59..1c7a8528965b 100644
--- a/src/query/storages/fuse/src/io/read/block/block_reader_parquet_deserialize.rs
+++ b/src/query/storages/fuse/src/io/read/block/block_reader_parquet_deserialize.rs
@@ -336,7 +336,7 @@ impl BlockReader {
                                            Suppose the name of table is T
                                             ~~~
                                             create table tmp_t as select * from T;
-                                            drop table T;
+                                            drop table T all;
                                             alter table tmp_t rename to T;
                                             ~~~
                                         Please note that the history of table T WILL BE LOST.
diff --git a/src/query/storages/fuse/src/operations/delete.rs b/src/query/storages/fuse/src/operations/delete.rs
index d92faa19b428..e08ef24f1dce 100644
--- a/src/query/storages/fuse/src/operations/delete.rs
+++ b/src/query/storages/fuse/src/operations/delete.rs
@@ -99,7 +99,8 @@ impl FuseTable {
                 };
                 ctx.get_write_progress().incr(&progress_values);
                 // deleting the whole table... just a truncate
-                return self.do_truncate(ctx.clone()).await.map(|_| None);
+                let purge = false;
+                return self.do_truncate(ctx.clone(), purge).await.map(|_| None);
             }
             Some(filters) => filters,
         };
@@ -121,7 +122,8 @@ impl FuseTable {
                 ctx.get_write_progress().incr(&progress_values);
 
                 // deleting the whole table... just a truncate
-                return self.do_truncate(ctx.clone()).await.map(|_| None);
+                let purge = false;
+                return self.do_truncate(ctx.clone(), purge).await.map(|_| None);
             }
         }
         Ok(Some(snapshot.clone()))
diff --git a/src/query/storages/fuse/src/operations/truncate.rs b/src/query/storages/fuse/src/operations/truncate.rs
index 540b714df832..25217356c288 100644
--- a/src/query/storages/fuse/src/operations/truncate.rs
+++ b/src/query/storages/fuse/src/operations/truncate.rs
@@ -30,7 +30,7 @@ use crate::FuseTable;
 impl FuseTable {
     #[inline]
     #[async_backtrace::framed]
-    pub async fn do_truncate(&self, ctx: Arc<dyn TableContext>) -> Result<()> {
+    pub async fn do_truncate(&self, ctx: Arc<dyn TableContext>, purge: bool) -> Result<()> {
         if let Some(prev_snapshot) = self.read_table_snapshot().await? {
             // 1. prepare new snapshot
             let prev_id = prev_snapshot.snapshot_id;
@@ -82,7 +82,6 @@ impl FuseTable {
                 })
                 .await?;
 
-            // best effort to remove the table's copied files.
             catalog
                 .truncate_table(&self.table_info, TruncateTableReq {
                     table_id,
@@ -97,6 +96,23 @@ impl FuseTable {
                 new_snapshot_loc,
             )
             .await;
+
+            // best effort to remove historical data. if failed, let `vacuum` to do the job.
+            // TODO: consider remove the `purge` option from `truncate`
+            // - it is not a safe operation, there is NO retention interval protection here
+            // - it is incompatible with time travel features
+            if purge {
+                let snapshot_files = self.list_snapshot_files().await?;
+                let keep_last_snapshot = false;
+                let ret = self
+                    .do_purge(&ctx, snapshot_files, None, keep_last_snapshot, false)
+                    .await;
+                if let Err(e) = ret {
+                    return Err(e);
+                } else {
+                    return Ok(());
+                }
+            }
         }
 
         Ok(())
diff --git a/src/tests/sqlsmith/src/sql_gen/ddl.rs b/src/tests/sqlsmith/src/sql_gen/ddl.rs
index c91329af1d63..ef981e2b8463 100644
--- a/src/tests/sqlsmith/src/sql_gen/ddl.rs
+++ b/src/tests/sqlsmith/src/sql_gen/ddl.rs
@@ -66,6 +66,7 @@ impl<'a, R: Rng> SqlGenerator<'a, R> {
                 catalog: None,
                 database: None,
                 table: Identifier::from_name(table_name.clone()),
+                all: false,
             };
             let create_table = CreateTableStmt {
                 if_not_exists: true,
diff --git a/tests/sqllogictests/suites/base/01_system/01_0002_system_query_log b/tests/sqllogictests/suites/base/01_system/01_0002_system_query_log
index 82ce2774d1d8..9ec8305f11df 100644
--- a/tests/sqllogictests/suites/base/01_system/01_0002_system_query_log
+++ b/tests/sqllogictests/suites/base/01_system/01_0002_system_query_log
@@ -12,7 +12,7 @@ select count(*) > 0 from system.query_log
 1
 
 statement ok
-drop table if exists tbl_01_0002
+drop table if exists tbl_01_0002 all
 
 statement ok
 create table tbl_01_0002(a int)
diff --git a/tests/sqllogictests/suites/base/01_system/01_0007_system_clustering_history b/tests/sqllogictests/suites/base/01_system/01_0007_system_clustering_history
index 1072cd21c742..0294adf01e81 100644
--- a/tests/sqllogictests/suites/base/01_system/01_0007_system_clustering_history
+++ b/tests/sqllogictests/suites/base/01_system/01_0007_system_clustering_history
@@ -1,5 +1,5 @@
 statement ok
-drop table if exists tbl_01_0007
+drop table if exists tbl_01_0007 all
 
 statement ok
 create table tbl_01_0007(a int not null) cluster by(a)
diff --git a/tests/sqllogictests/suites/base/03_common/03_0003_select_group_by b/tests/sqllogictests/suites/base/03_common/03_0003_select_group_by
index 602cf6039f5d..5a57592d690f 100644
--- a/tests/sqllogictests/suites/base/03_common/03_0003_select_group_by
+++ b/tests/sqllogictests/suites/base/03_common/03_0003_select_group_by
@@ -75,7 +75,7 @@ statement ok
 DROP table t
 
 statement ok
-drop table if exists t_datetime
+drop table if exists t_datetime all
 
 statement ok
 CREATE TABLE t_datetime(created_at Date, created_time DateTime, count Int32)
diff --git a/tests/sqllogictests/suites/base/03_common/03_0025_delete_from b/tests/sqllogictests/suites/base/03_common/03_0025_delete_from
index ec371b908bba..692e4f549964 100644
--- a/tests/sqllogictests/suites/base/03_common/03_0025_delete_from
+++ b/tests/sqllogictests/suites/base/03_common/03_0025_delete_from
@@ -54,7 +54,7 @@ select count(*) = 0 from t
 
 
 statement ok
-drop table t
+drop table t all
 
 statement ok
 create table t (c Int null)
@@ -141,7 +141,7 @@ select count(*) = 0 from t
 
 
 statement ok
-drop table t
+drop table t all
 
 statement ok
 create table t(c Int) CLUSTER BY(c+1)
@@ -161,7 +161,7 @@ select count(*) = 2 from t
 1
 
 statement ok
-drop table t
+drop table t all
 
 statement ok
 create table t(a Int, b Int)
@@ -186,7 +186,7 @@ statement ok
 delete from t where t.a in (select * from numbers(10))
 
 statement ok
-drop table t
+drop table t all
 
 
 ####################################
@@ -245,7 +245,7 @@ select * from t order by c;
 
 
 statement ok
-drop table t
+drop table t all
 
 ####################################
 # delete pruning, whole segments   #
@@ -279,7 +279,7 @@ select * from t order by c;
 9
 
 statement ok
-drop table t
+drop table t all
 
 # test large data
 statement ok
@@ -319,7 +319,7 @@ select count(*) from t where c >= 0 and c < 1500000;
 0
 
 statement ok
-drop table t
+drop table t all
 
 statement ok
 DROP DATABASE db1
diff --git a/tests/sqllogictests/suites/base/03_common/03_0028_copy_into_stage b/tests/sqllogictests/suites/base/03_common/03_0028_copy_into_stage
index c0c942e443cb..0ee2e7040cdb 100644
--- a/tests/sqllogictests/suites/base/03_common/03_0028_copy_into_stage
+++ b/tests/sqllogictests/suites/base/03_common/03_0028_copy_into_stage
@@ -31,7 +31,7 @@ SELECT COUNT() FROM test_table
 4
 
 statement ok
-drop table test_table
+drop table test_table all
 
 statement ok
 drop stage test
diff --git a/tests/sqllogictests/suites/base/03_common/03_0031_copy_into_user_stage b/tests/sqllogictests/suites/base/03_common/03_0031_copy_into_user_stage
index 462da395ef8e..6dee4c9f4fdf 100644
--- a/tests/sqllogictests/suites/base/03_common/03_0031_copy_into_user_stage
+++ b/tests/sqllogictests/suites/base/03_common/03_0031_copy_into_user_stage
@@ -28,7 +28,7 @@ SELECT COUNT() FROM test_table
 4
 
 statement ok
-drop table test_table
+drop table test_table all
 
 statement ok
 DROP DATABASE db1
diff --git a/tests/sqllogictests/suites/base/03_common/03_0035_update b/tests/sqllogictests/suites/base/03_common/03_0035_update
index f347f054432a..25d88b78d7d6 100644
--- a/tests/sqllogictests/suites/base/03_common/03_0035_update
+++ b/tests/sqllogictests/suites/base/03_common/03_0035_update
@@ -89,13 +89,13 @@ select a from t3
 6
 
 statement ok
-drop table t1
+drop table t1 all
 
 statement ok
-drop table t2
+drop table t2 all
 
 statement ok
-drop table t3
+drop table t3 all
 
 statement ok
 create table t1(id1 int, val1 varchar(255));
diff --git a/tests/sqllogictests/suites/base/05_ddl/05_0001_ddl_drop_table_full b/tests/sqllogictests/suites/base/05_ddl/05_0001_ddl_drop_table_full
index 35cf934d75ed..5be01f171242 100644
--- a/tests/sqllogictests/suites/base/05_ddl/05_0001_ddl_drop_table_full
+++ b/tests/sqllogictests/suites/base/05_ddl/05_0001_ddl_drop_table_full
@@ -11,13 +11,13 @@ statement ok
 CREATE TABLE t(c1 int) ENGINE = Null
 
 statement ok
-DROP TABLE t
+DROP TABLE t ALL
 
 statement ok
 CREATE TABLE t(c1 int) ENGINE = Fuse
 
 statement ok
-DROP TABLE t
+DROP TABLE t ALL
 
 statement ok
 DROP database db_13_0001
diff --git a/tests/sqllogictests/suites/base/05_ddl/05_0023_exists_table b/tests/sqllogictests/suites/base/05_ddl/05_0023_exists_table
index 80bcb8cbfe49..971e21f68fbc 100644
--- a/tests/sqllogictests/suites/base/05_ddl/05_0023_exists_table
+++ b/tests/sqllogictests/suites/base/05_ddl/05_0023_exists_table
@@ -23,7 +23,7 @@ statement ok
 EXISTS TABLE db_05_0023_v2.t
 
 statement ok
-DROP TABLE t
+DROP TABLE t ALL
 
 statement ok
 EXISTS TABLE db_05_0023_v2.t
diff --git a/tests/sqllogictests/suites/base/09_fuse_engine/09_0007_func_fuse_truncate b/tests/sqllogictests/suites/base/09_fuse_engine/09_0007_func_fuse_truncate_purge
similarity index 100%
rename from tests/sqllogictests/suites/base/09_fuse_engine/09_0007_func_fuse_truncate
rename to tests/sqllogictests/suites/base/09_fuse_engine/09_0007_func_fuse_truncate_purge
diff --git a/tests/sqllogictests/suites/base/09_fuse_engine/09_0017_transient_table b/tests/sqllogictests/suites/base/09_fuse_engine/09_0017_transient_table
index 52d2b201306b..50a3db5b52a0 100644
--- a/tests/sqllogictests/suites/base/09_fuse_engine/09_0017_transient_table
+++ b/tests/sqllogictests/suites/base/09_fuse_engine/09_0017_transient_table
@@ -38,3 +38,4 @@ DROP TABLE t09_0016
 
 statement ok
 DROP DATABASE db1
+
diff --git a/tests/sqllogictests/suites/base/12_time_travel/12_0003_time_travel_undrop b/tests/sqllogictests/suites/base/12_time_travel/12_0003_time_travel_undrop
index 50acc17bf61c..0f0609ace3e8 100644
--- a/tests/sqllogictests/suites/base/12_time_travel/12_0003_time_travel_undrop
+++ b/tests/sqllogictests/suites/base/12_time_travel/12_0003_time_travel_undrop
@@ -103,5 +103,22 @@ SELECT count(1) FROM t
 statement ok
 DROP TABLE t
 
+statement ok
+CREATE TABLE t(c int)
+
+statement ok
+INSERT INTO t values(1)
+
+statement ok
+DROP TABLE t ALL
+
+statement ok
+UNDROP TABLE t
+
+query I
+SELECT count(*) FROM t
+----
+0
+
 statement ok
 DROP database db_12_0003
diff --git a/tests/sqllogictests/suites/base/issues/issue_10103.test b/tests/sqllogictests/suites/base/issues/issue_10103.test
index 3be0dad32034..f617f68c50e8 100644
--- a/tests/sqllogictests/suites/base/issues/issue_10103.test
+++ b/tests/sqllogictests/suites/base/issues/issue_10103.test
@@ -34,10 +34,10 @@ SELECT ts FROM test_ts_table LIMIT 1
 2023-02-19 11:18:01.000000
 
 statement ok
-drop table test_table
+drop table test_table all
 
 statement ok
-drop table test_ts_table
+drop table test_ts_table all
 
 statement ok
 drop stage test_10103
diff --git a/tests/sqllogictests/suites/duckdb/sql/aggregate/group/group_by_grouping_sets.test b/tests/sqllogictests/suites/duckdb/sql/aggregate/group/group_by_grouping_sets.test
index 4bfb3f29314e..e19ae1bf808e 100644
--- a/tests/sqllogictests/suites/duckdb/sql/aggregate/group/group_by_grouping_sets.test
+++ b/tests/sqllogictests/suites/duckdb/sql/aggregate/group/group_by_grouping_sets.test
@@ -245,10 +245,10 @@ a B 1 5 NULL B
 a A 1 5 NULL NULL
 
 statement ok
-drop table t;
+drop table t all;
 
 statement ok
-drop table tt;
+drop table tt all;
 
 statement ok
 drop database grouping_sets;
diff --git a/tests/sqllogictests/suites/mode/cluster/04_0002_explain_v2.test b/tests/sqllogictests/suites/mode/cluster/04_0002_explain_v2.test
index 3c7300f00ddd..c48c284d5f33 100644
--- a/tests/sqllogictests/suites/mode/cluster/04_0002_explain_v2.test
+++ b/tests/sqllogictests/suites/mode/cluster/04_0002_explain_v2.test
@@ -2,10 +2,10 @@ statement ok
 set prefer_broadcast_join = 0
 
 statement ok
-drop table if exists t1;
+drop table if exists t1 all;
 
 statement ok
-drop table if exists t2;
+drop table if exists t2 all;
 
 statement ok
 create table t1(a int not null, b int not null);
diff --git a/tests/sqllogictests/suites/mode/standalone/explain/explain.test b/tests/sqllogictests/suites/mode/standalone/explain/explain.test
index 6a3bff0473b1..d800c2ed23bb 100644
--- a/tests/sqllogictests/suites/mode/standalone/explain/explain.test
+++ b/tests/sqllogictests/suites/mode/standalone/explain/explain.test
@@ -1,8 +1,8 @@
 statement ok
-drop table if exists t1
+drop table if exists t1 all
 
 statement ok
-drop table if exists t2
+drop table if exists t2 all
 
 statement ok
 create table t1 as select number as a, number as b from numbers(1)
diff --git a/tests/sqllogictests/suites/mode/standalone/explain_native/explain.test b/tests/sqllogictests/suites/mode/standalone/explain_native/explain.test
index 0a9cbd5809df..bd2f6f0459d6 100644
--- a/tests/sqllogictests/suites/mode/standalone/explain_native/explain.test
+++ b/tests/sqllogictests/suites/mode/standalone/explain_native/explain.test
@@ -1,8 +1,8 @@
 statement ok
-drop table if exists t1
+drop table if exists t1 all
 
 statement ok
-drop table if exists t2
+drop table if exists t2 all
 
 statement ok
 create table t1 as select number as a, number as b from numbers(1)
diff --git a/tests/sqllogictests/suites/query/02_function/02_0012_function_datetimes b/tests/sqllogictests/suites/query/02_function/02_0012_function_datetimes
index 646c3355004e..9f9607c8c2e6 100644
--- a/tests/sqllogictests/suites/query/02_function/02_0012_function_datetimes
+++ b/tests/sqllogictests/suites/query/02_function/02_0012_function_datetimes
@@ -1,5 +1,5 @@
 statement ok
-drop table if exists t
+drop table if exists t all
 
 statement ok
 set timezone = 'UTC'
diff --git a/tests/sqllogictests/suites/query/02_function/02_0012_function_datetimes_tz b/tests/sqllogictests/suites/query/02_function/02_0012_function_datetimes_tz
index cd751f969ee3..765c124cbf74 100644
--- a/tests/sqllogictests/suites/query/02_function/02_0012_function_datetimes_tz
+++ b/tests/sqllogictests/suites/query/02_function/02_0012_function_datetimes_tz
@@ -1,5 +1,5 @@
 statement ok
-drop table if exists tt
+drop table if exists tt all
 
 statement ok
 set timezone='UTC'
diff --git a/tests/sqllogictests/suites/query/02_function/02_0014_function_maths b/tests/sqllogictests/suites/query/02_function/02_0014_function_maths
index 6325dc32dbbc..e88a7c4cb3f7 100644
--- a/tests/sqllogictests/suites/query/02_function/02_0014_function_maths
+++ b/tests/sqllogictests/suites/query/02_function/02_0014_function_maths
@@ -1,5 +1,5 @@
 statement ok
-drop table if exists math_sample_numbers
+drop table if exists math_sample_numbers all
 
 statement ok
 CREATE TABLE math_sample_numbers (timestamp UInt32, value Int32) Engine = Fuse
diff --git a/tests/sqllogictests/suites/query/02_function/02_0018_function_strings_repeat b/tests/sqllogictests/suites/query/02_function/02_0018_function_strings_repeat
index 7e6bcdbafe20..a2c3ba90ad7e 100644
--- a/tests/sqllogictests/suites/query/02_function/02_0018_function_strings_repeat
+++ b/tests/sqllogictests/suites/query/02_function/02_0018_function_strings_repeat
@@ -1,14 +1,14 @@
 statement ok
-drop table if exists strings_repeat_sample_u8
+drop table if exists strings_repeat_sample_u8 all
 
 statement ok
-drop table if exists strings_repeat_sample_u16
+drop table if exists strings_repeat_sample_u16 all
 
 statement ok
-drop table if exists strings_repeat_sample_u32
+drop table if exists strings_repeat_sample_u32 all
 
 statement ok
-drop table if exists strings_repeat_sample_u64
+drop table if exists strings_repeat_sample_u64 all
 
 statement ok
 CREATE TABLE strings_repeat_sample_u8(s String, n Uint8) Engine = Fuse
diff --git a/tests/sqllogictests/suites/query/02_function/02_0048_function_semi_structureds_object_keys b/tests/sqllogictests/suites/query/02_function/02_0048_function_semi_structureds_object_keys
index af71632ed5ce..d475a1a6839c 100644
--- a/tests/sqllogictests/suites/query/02_function/02_0048_function_semi_structureds_object_keys
+++ b/tests/sqllogictests/suites/query/02_function/02_0048_function_semi_structureds_object_keys
@@ -1,5 +1,5 @@
 statement ok
-drop table if exists objects_test1
+drop table if exists objects_test1 all
 
 statement ok
 CREATE TABLE IF NOT EXISTS objects_test1(id TINYINT, obj VARIANT, var VARIANT) Engine = Fuse
diff --git a/tests/sqllogictests/suites/query/cte.test b/tests/sqllogictests/suites/query/cte.test
index 9df89c397fc6..4ffcf46b2de4 100644
--- a/tests/sqllogictests/suites/query/cte.test
+++ b/tests/sqllogictests/suites/query/cte.test
@@ -2,7 +2,7 @@ statement ok
 use default
 
 statement ok
-drop table if exists t1
+drop table if exists t1 all
 
 statement ok
 create table t1(a integer, b integer, c integer, d integer, e integer)
diff --git a/tests/sqllogictests/suites/query/render_result.test b/tests/sqllogictests/suites/query/render_result.test
index bfb960bf447c..aef7151accd1 100644
--- a/tests/sqllogictests/suites/query/render_result.test
+++ b/tests/sqllogictests/suites/query/render_result.test
@@ -2,7 +2,7 @@ statement ok
 use default
 
 statement ok
-drop table if exists t1
+drop table if exists t1 all
 
 statement ok
 create table t1(a integer, b integer, c integer, d integer, e integer)
@@ -30,4 +30,4 @@ order by col1,col5,col3,col2,col4
 106     1   333  1067   109
 
 statement ok
-drop table if exists t1
+drop table if exists t1 all
diff --git a/tests/sqllogictests/suites/ydb/select1-1.test b/tests/sqllogictests/suites/ydb/select1-1.test
index 0d0390458591..766b45362807 100644
--- a/tests/sqllogictests/suites/ydb/select1-1.test
+++ b/tests/sqllogictests/suites/ydb/select1-1.test
@@ -1,5 +1,5 @@
 statement ok
-drop table if exists t1
+drop table if exists t1 all
 
 statement ok
 create table t1(a integer, b integer, c integer, d integer, e integer)
diff --git a/tests/sqllogictests/suites/ydb/select1-2.test b/tests/sqllogictests/suites/ydb/select1-2.test
index 6093d3b5d977..b8eb36808143 100644
--- a/tests/sqllogictests/suites/ydb/select1-2.test
+++ b/tests/sqllogictests/suites/ydb/select1-2.test
@@ -1,5 +1,5 @@
 statement ok
-drop table if exists t1
+drop table if exists t1 all
 
 statement ok
 create table t1(a integer, b integer, c integer, d integer, e integer)
diff --git a/tests/sqllogictests/suites/ydb/select1-3.test b/tests/sqllogictests/suites/ydb/select1-3.test
index 7ebd1a7dc9fa..4d3b407c6f04 100644
--- a/tests/sqllogictests/suites/ydb/select1-3.test
+++ b/tests/sqllogictests/suites/ydb/select1-3.test
@@ -1,5 +1,5 @@
 statement ok
-drop table if exists t1
+drop table if exists t1 all
 
 statement ok
 create table t1(a integer, b integer, c integer, d integer, e integer)
diff --git a/tests/sqllogictests/suites/ydb/select1-4.test b/tests/sqllogictests/suites/ydb/select1-4.test
index b6906faa30d2..49a214f8502e 100644
--- a/tests/sqllogictests/suites/ydb/select1-4.test
+++ b/tests/sqllogictests/suites/ydb/select1-4.test
@@ -1,5 +1,5 @@
 statement ok
-drop table if exists t1
+drop table if exists t1 all
 
 statement ok
 create table t1(a integer, b integer, c integer, d integer, e integer)
diff --git a/tests/sqllogictests/suites/ydb/select1-5.test b/tests/sqllogictests/suites/ydb/select1-5.test
index dcf4708612d2..4be2a249bfe1 100644
--- a/tests/sqllogictests/suites/ydb/select1-5.test
+++ b/tests/sqllogictests/suites/ydb/select1-5.test
@@ -1,5 +1,5 @@
 statement ok
-drop table if exists t1
+drop table if exists t1 all
 
 statement ok
 create table t1(a integer, b integer, c integer, d integer, e integer)
diff --git a/tests/sqllogictests/suites/ydb/select2-1.test b/tests/sqllogictests/suites/ydb/select2-1.test
index 9b45f459106e..36cf9b00c8e9 100644
--- a/tests/sqllogictests/suites/ydb/select2-1.test
+++ b/tests/sqllogictests/suites/ydb/select2-1.test
@@ -1,5 +1,5 @@
 statement ok
-drop table if exists t1
+drop table if exists t1 all
 
 statement ok
 create table t1(a integer null, b integer null, c integer null, d integer null, e integer null)
diff --git a/tests/sqllogictests/suites/ydb/select2-2.test b/tests/sqllogictests/suites/ydb/select2-2.test
index 3ff660a15473..9d5471946805 100644
--- a/tests/sqllogictests/suites/ydb/select2-2.test
+++ b/tests/sqllogictests/suites/ydb/select2-2.test
@@ -1,5 +1,5 @@
 statement ok
-drop table if exists t1
+drop table if exists t1 all
 
 statement ok
 create table t1(a integer null, b integer null, c integer null, d integer null, e integer null)
diff --git a/tests/sqllogictests/suites/ydb/select2-3.test b/tests/sqllogictests/suites/ydb/select2-3.test
index 3309b5d1b6b8..8261386c5567 100644
--- a/tests/sqllogictests/suites/ydb/select2-3.test
+++ b/tests/sqllogictests/suites/ydb/select2-3.test
@@ -1,5 +1,5 @@
 statement ok
-drop table if exists t1
+drop table if exists t1 all
 
 statement ok
 create table t1(a integer null, b integer null, c integer null, d integer null, e integer null)
diff --git a/tests/sqllogictests/suites/ydb/select2-4.test b/tests/sqllogictests/suites/ydb/select2-4.test
index 2161ba020978..0cf264950b1f 100644
--- a/tests/sqllogictests/suites/ydb/select2-4.test
+++ b/tests/sqllogictests/suites/ydb/select2-4.test
@@ -1,5 +1,5 @@
 statement ok
-drop table if exists t1
+drop table if exists t1 all
 
 statement ok
 create table t1(a integer null, b integer null, c integer null, d integer null, e integer null)
diff --git a/tests/sqllogictests/suites/ydb/select2-5.test b/tests/sqllogictests/suites/ydb/select2-5.test
index 10b98ba17e16..c992d31c3c2d 100644
--- a/tests/sqllogictests/suites/ydb/select2-5.test
+++ b/tests/sqllogictests/suites/ydb/select2-5.test
@@ -1,5 +1,5 @@
 statement ok
-drop table if exists t1
+drop table if exists t1 all
 
 statement ok
 create table t1(a integer null, b integer null, c integer null, d integer null, e integer null)
diff --git a/tests/sqllogictests/suites/ydb/select3-1.test b/tests/sqllogictests/suites/ydb/select3-1.test
index 073074374831..22456c0cd6c9 100644
--- a/tests/sqllogictests/suites/ydb/select3-1.test
+++ b/tests/sqllogictests/suites/ydb/select3-1.test
@@ -1,5 +1,5 @@
 statement ok
-drop table if exists t1
+drop table if exists t1 all
 
 statement ok
 create table t1(a integer null, b integer null, c integer null, d integer null, e integer null)
diff --git a/tests/sqllogictests/suites/ydb/select3-10.test b/tests/sqllogictests/suites/ydb/select3-10.test
index c1f1216ab432..9866756f8909 100644
--- a/tests/sqllogictests/suites/ydb/select3-10.test
+++ b/tests/sqllogictests/suites/ydb/select3-10.test
@@ -1,5 +1,5 @@
 statement ok
-drop table if exists t1
+drop table if exists t1 all
 
 statement ok
 create table t1(a integer null, b integer null, c integer null, d integer null, e integer null)
diff --git a/tests/sqllogictests/suites/ydb/select3-11.test b/tests/sqllogictests/suites/ydb/select3-11.test
index e3e6d6b63fd7..1a446a863362 100644
--- a/tests/sqllogictests/suites/ydb/select3-11.test
+++ b/tests/sqllogictests/suites/ydb/select3-11.test
@@ -1,5 +1,5 @@
 statement ok
-drop table if exists t1
+drop table if exists t1 all
 
 statement ok
 create table t1(a integer null, b integer null, c integer null, d integer null, e integer null)
diff --git a/tests/sqllogictests/suites/ydb/select3-12.test b/tests/sqllogictests/suites/ydb/select3-12.test
index 708709884737..041169ab4421 100644
--- a/tests/sqllogictests/suites/ydb/select3-12.test
+++ b/tests/sqllogictests/suites/ydb/select3-12.test
@@ -1,5 +1,5 @@
 statement ok
-drop table if exists t1
+drop table if exists t1 all
 
 statement ok
 create table t1(a integer null, b integer null, c integer null, d integer null, e integer null)
diff --git a/tests/sqllogictests/suites/ydb/select3-13.test b/tests/sqllogictests/suites/ydb/select3-13.test
index ae77d1da8322..4f0d2d708804 100644
--- a/tests/sqllogictests/suites/ydb/select3-13.test
+++ b/tests/sqllogictests/suites/ydb/select3-13.test
@@ -1,5 +1,5 @@
 statement ok
-drop table if exists t1
+drop table if exists t1 all
 
 statement ok
 create table t1(a integer null, b integer null, c integer null, d integer null, e integer null)
diff --git a/tests/sqllogictests/suites/ydb/select3-14.test b/tests/sqllogictests/suites/ydb/select3-14.test
index 1b62d6d12f15..223a247e9df1 100644
--- a/tests/sqllogictests/suites/ydb/select3-14.test
+++ b/tests/sqllogictests/suites/ydb/select3-14.test
@@ -1,5 +1,5 @@
 statement ok
-drop table if exists t1
+drop table if exists t1 all
 
 statement ok
 create table t1(a integer null, b integer null, c integer null, d integer null, e integer null)
diff --git a/tests/sqllogictests/suites/ydb/select3-15.test b/tests/sqllogictests/suites/ydb/select3-15.test
index cf6c1fab47d2..4c814cf69207 100644
--- a/tests/sqllogictests/suites/ydb/select3-15.test
+++ b/tests/sqllogictests/suites/ydb/select3-15.test
@@ -1,5 +1,5 @@
 statement ok
-drop table if exists t1
+drop table if exists t1 all
 
 statement ok
 create table t1(a integer null, b integer null, c integer null, d integer null, e integer null)
diff --git a/tests/sqllogictests/suites/ydb/select3-2.test b/tests/sqllogictests/suites/ydb/select3-2.test
index f5e46aca3f4a..e6b89c287891 100644
--- a/tests/sqllogictests/suites/ydb/select3-2.test
+++ b/tests/sqllogictests/suites/ydb/select3-2.test
@@ -1,5 +1,5 @@
 statement ok
-drop table if exists t1
+drop table if exists t1 all
 
 statement ok
 create table t1(a integer null, b integer null, c integer null, d integer null, e integer null)
diff --git a/tests/sqllogictests/suites/ydb/select3-3.test b/tests/sqllogictests/suites/ydb/select3-3.test
index feb46d3a94db..bd6c1fcaa9aa 100644
--- a/tests/sqllogictests/suites/ydb/select3-3.test
+++ b/tests/sqllogictests/suites/ydb/select3-3.test
@@ -1,5 +1,5 @@
 statement ok
-drop table if exists t1
+drop table if exists t1 all
 
 statement ok
 create table t1(a integer null, b integer null, c integer null, d integer null, e integer null)
diff --git a/tests/sqllogictests/suites/ydb/select3-4.test b/tests/sqllogictests/suites/ydb/select3-4.test
index e3dd6076aa0a..0ff745810507 100644
--- a/tests/sqllogictests/suites/ydb/select3-4.test
+++ b/tests/sqllogictests/suites/ydb/select3-4.test
@@ -1,5 +1,5 @@
 statement ok
-drop table if exists t1
+drop table if exists t1 all
 
 statement ok
 create table t1(a integer null, b integer null, c integer null, d integer null, e integer null)
diff --git a/tests/sqllogictests/suites/ydb/select3-5.test b/tests/sqllogictests/suites/ydb/select3-5.test
index a9913c947404..259d23e75d5b 100644
--- a/tests/sqllogictests/suites/ydb/select3-5.test
+++ b/tests/sqllogictests/suites/ydb/select3-5.test
@@ -1,5 +1,5 @@
 statement ok
-drop table if exists t1
+drop table if exists t1 all
 
 statement ok
 create table t1(a integer null, b integer null, c integer null, d integer null, e integer null)
diff --git a/tests/sqllogictests/suites/ydb/select3-6.test b/tests/sqllogictests/suites/ydb/select3-6.test
index 312d01a101a1..4877d8876609 100644
--- a/tests/sqllogictests/suites/ydb/select3-6.test
+++ b/tests/sqllogictests/suites/ydb/select3-6.test
@@ -1,5 +1,5 @@
 statement ok
-drop table if exists t1
+drop table if exists t1 all
 
 statement ok
 create table t1(a integer null, b integer null, c integer null, d integer null, e integer null)
diff --git a/tests/sqllogictests/suites/ydb/select3-7.test b/tests/sqllogictests/suites/ydb/select3-7.test
index 477e9ea78503..a2e5a8d5ddaa 100644
--- a/tests/sqllogictests/suites/ydb/select3-7.test
+++ b/tests/sqllogictests/suites/ydb/select3-7.test
@@ -1,5 +1,5 @@
 statement ok
-drop table if exists t1
+drop table if exists t1 all
 
 statement ok
 create table t1(a integer null, b integer null, c integer null, d integer null, e integer null)
diff --git a/tests/sqllogictests/suites/ydb/select3-8.test b/tests/sqllogictests/suites/ydb/select3-8.test
index 855703744f49..37e3828f498c 100644
--- a/tests/sqllogictests/suites/ydb/select3-8.test
+++ b/tests/sqllogictests/suites/ydb/select3-8.test
@@ -1,5 +1,5 @@
 statement ok
-drop table if exists t1
+drop table if exists t1 all
 
 statement ok
 create table t1(a integer null, b integer null, c integer null, d integer null, e integer null)
diff --git a/tests/sqllogictests/suites/ydb/select3-9.test b/tests/sqllogictests/suites/ydb/select3-9.test
index 01b582ab3fa2..f42942c264d9 100644
--- a/tests/sqllogictests/suites/ydb/select3-9.test
+++ b/tests/sqllogictests/suites/ydb/select3-9.test
@@ -1,5 +1,5 @@
 statement ok
-drop table if exists t1
+drop table if exists t1 all
 
 statement ok
 create table t1(a integer null, b integer null, c integer null, d integer null, e integer null)
diff --git a/tests/suites/0_stateless/17_altertable/17_0002_alter_table_purge_before.sh b/tests/suites/0_stateless/17_altertable/17_0002_alter_table_purge_before.sh
index 93644ecfd07d..85f385032306 100755
--- a/tests/suites/0_stateless/17_altertable/17_0002_alter_table_purge_before.sh
+++ b/tests/suites/0_stateless/17_altertable/17_0002_alter_table_purge_before.sh
@@ -46,7 +46,7 @@ echo "checking that after purge (by snapshot id) there should be 4 rows left"
 echo "select count(*)=4  from t17_0002" | $MYSQL_CLIENT_CONNECT
 
 ## Drop table.
-echo "drop table t17_0002" | $MYSQL_CLIENT_CONNECT
+echo "drop table t17_0002 all" | $MYSQL_CLIENT_CONNECT
 
 # PURGE BEFORE TIMESTAMP
 
@@ -90,4 +90,4 @@ echo "checking that after purge (by timestamp) there should be 5 rows left"
 echo "select count(*)=5  from t17_0002" | $MYSQL_CLIENT_CONNECT
 
 ## Drop table.
-echo "drop table t17_0002" | $MYSQL_CLIENT_CONNECT
+echo "drop table t17_0002 all" | $MYSQL_CLIENT_CONNECT
diff --git a/tests/suites/0_stateless/17_altertable/17_0003_alter_table_update.sh b/tests/suites/0_stateless/17_altertable/17_0003_alter_table_update.sh
index 583cfb563b3a..fa884b01df13 100755
--- a/tests/suites/0_stateless/17_altertable/17_0003_alter_table_update.sh
+++ b/tests/suites/0_stateless/17_altertable/17_0003_alter_table_update.sh
@@ -25,7 +25,7 @@ echo "update table column"
 echo "update t17_0003 set c=2 where c=1" | $MYSQL_CLIENT_CONNECT
 
 ## Drop table.
-echo "drop table t17_0003" | $MYSQL_CLIENT_CONNECT
+echo "drop table t17_0003 all" | $MYSQL_CLIENT_CONNECT
 
 ## create two column table
 echo "create table t17_0003(a int not null, b int not null)" | $MYSQL_CLIENT_CONNECT
@@ -50,4 +50,4 @@ echo "update table column"
 echo "update t17_0003 set a=3 where a=1" | $MYSQL_CLIENT_CONNECT
 
 ## Drop table.
-echo "drop table t17_0003" | $MYSQL_CLIENT_CONNECT
+echo "drop table t17_0003 all" | $MYSQL_CLIENT_CONNECT
\ No newline at end of file
diff --git a/tests/suites/0_stateless/20+_others/20_0011_purge_before.sh b/tests/suites/0_stateless/20+_others/20_0011_purge_before.sh
index 49d136e729c4..4980aa0d9885 100755
--- a/tests/suites/0_stateless/20+_others/20_0011_purge_before.sh
+++ b/tests/suites/0_stateless/20+_others/20_0011_purge_before.sh
@@ -31,7 +31,7 @@ echo "checking that after purge (by snapshot id) there should be 4 rows left"
 echo "select count(*)=4  from t20_0011" | $MYSQL_CLIENT_CONNECT
 
 ## Drop table.
-echo "drop table t20_0011" | $MYSQL_CLIENT_CONNECT
+echo "drop table t20_0011 all" | $MYSQL_CLIENT_CONNECT
 
 # PURGE BEFORE TIMESTAMP
 
@@ -58,4 +58,4 @@ echo "checking that after purge (by timestamp) there should be 4 rows left"
 echo "select count(*)=4  from t20_0011" | $MYSQL_CLIENT_CONNECT
 
 ## Drop table.
-echo "drop table t20_0011" | $MYSQL_CLIENT_CONNECT
+echo "drop table t20_0011 all" | $MYSQL_CLIENT_CONNECT
diff --git a/tests/suites/0_stateless/20+_others/20_0012_privilege_access.sh b/tests/suites/0_stateless/20+_others/20_0012_privilege_access.sh
index c766b6005b5b..134e1f1c5efd 100755
--- a/tests/suites/0_stateless/20+_others/20_0012_privilege_access.sh
+++ b/tests/suites/0_stateless/20+_others/20_0012_privilege_access.sh
@@ -113,9 +113,9 @@ echo "GRANT SELECT ON system.fuse_block TO 'test-user'" | $MYSQL_CLIENT_CONNECT
 echo "select count(*)>=1 from fuse_block('default', 't20_0012_a')" | $TEST_USER_CONNECT
 
 ## Drop table.
-echo "drop table default.t20_0012" | $MYSQL_CLIENT_CONNECT
-echo "drop table default.t20_0012_a" | $MYSQL_CLIENT_CONNECT
-echo "drop table default.t20_0012_b" | $MYSQL_CLIENT_CONNECT
+echo "drop table default.t20_0012 all" | $MYSQL_CLIENT_CONNECT
+echo "drop table default.t20_0012_a all" | $MYSQL_CLIENT_CONNECT
+echo "drop table default.t20_0012_b all" | $MYSQL_CLIENT_CONNECT
 echo "drop view default2.v_t20_0012" | $MYSQL_CLIENT_CONNECT
 
 ## Drop database.
diff --git a/tests/suites/1_stateful/04_mini_dataset/04_0000_mini_ontime.sh b/tests/suites/1_stateful/04_mini_dataset/04_0000_mini_ontime.sh
index acf4a58613f7..707acc4db0d0 100755
--- a/tests/suites/1_stateful/04_mini_dataset/04_0000_mini_ontime.sh
+++ b/tests/suites/1_stateful/04_mini_dataset/04_0000_mini_ontime.sh
@@ -28,4 +28,4 @@ for i in "${ontime_statements[@]}"; do
 done
 
 ## Clean table
-echo "drop table if exists ontime_mini;" | $MYSQL_CLIENT_CONNECT
+echo "drop table if exists ontime_mini all;" | $MYSQL_CLIENT_CONNECT
diff --git a/tests/suites/1_stateful/04_mini_dataset/04_0001_mini_hits.sh b/tests/suites/1_stateful/04_mini_dataset/04_0001_mini_hits.sh
index e5a7e3ffa244..291ad00c80f2 100755
--- a/tests/suites/1_stateful/04_mini_dataset/04_0001_mini_hits.sh
+++ b/tests/suites/1_stateful/04_mini_dataset/04_0001_mini_hits.sh
@@ -105,4 +105,4 @@ for i in "${hits_statements[@]}"; do
 done
 
 ## Clean up
-echo "drop table if exists hits;" | $MYSQL_CLIENT_CONNECT
+echo "drop table if exists hits all;" | $MYSQL_CLIENT_CONNECT
diff --git a/tests/suites/1_stateful/05_formats/05_01_compact/05_01_00_load_compact_copy.sh b/tests/suites/1_stateful/05_formats/05_01_compact/05_01_00_load_compact_copy.sh
index be21f27434c9..bd847a61660b 100755
--- a/tests/suites/1_stateful/05_formats/05_01_compact/05_01_00_load_compact_copy.sh
+++ b/tests/suites/1_stateful/05_formats/05_01_compact/05_01_00_load_compact_copy.sh
@@ -10,7 +10,7 @@ for j in $(seq 1 1000);do
 	printf "0123456789\n" >> "$DATA"
 done
 
-echo "drop table if exists t1" | $MYSQL_CLIENT_CONNECT
+echo "drop table if exists t1 all" | $MYSQL_CLIENT_CONNECT
 echo "CREATE TABLE t1
 (
     c0 string
diff --git a/tests/suites/1_stateful/05_formats/05_01_compact/05_01_01_load_compact_streaming_load.sh b/tests/suites/1_stateful/05_formats/05_01_compact/05_01_01_load_compact_streaming_load.sh
index ef82801b4435..2b37912014b9 100755
--- a/tests/suites/1_stateful/05_formats/05_01_compact/05_01_01_load_compact_streaming_load.sh
+++ b/tests/suites/1_stateful/05_formats/05_01_compact/05_01_01_load_compact_streaming_load.sh
@@ -10,7 +10,7 @@ for j in $(seq 1 1000);do
 	printf "0123456789\n" >> "$DATA"
 done
 
-echo "drop table if exists t1" | $MYSQL_CLIENT_CONNECT
+echo "drop table if exists t1 all" | $MYSQL_CLIENT_CONNECT
 echo "CREATE TABLE t1
 (
     c0 string
diff --git a/tests/suites/1_stateful/05_formats/05_01_compact/05_01_02_load_compact_copy_max_size.sh b/tests/suites/1_stateful/05_formats/05_01_compact/05_01_02_load_compact_copy_max_size.sh
index 2c2a0c6ebda1..4f9c131b9a89 100755
--- a/tests/suites/1_stateful/05_formats/05_01_compact/05_01_02_load_compact_copy_max_size.sh
+++ b/tests/suites/1_stateful/05_formats/05_01_compact/05_01_02_load_compact_copy_max_size.sh
@@ -11,7 +11,7 @@ for j in $(seq 1 1000);do
 	printf "0123456789\n" >> "$DATA"
 done
 
-echo "drop table if exists t1" | $MYSQL_CLIENT_CONNECT
+echo "drop table if exists t1 all" | $MYSQL_CLIENT_CONNECT
 echo "CREATE TABLE t1
 (
     c0 string
diff --git a/tests/suites/1_stateful/05_formats/05_01_compact/05_01_02_load_compact_copy_row_per_block.sh b/tests/suites/1_stateful/05_formats/05_01_compact/05_01_02_load_compact_copy_row_per_block.sh
index 9f8fdbfd8c1d..087fbcd550bf 100755
--- a/tests/suites/1_stateful/05_formats/05_01_compact/05_01_02_load_compact_copy_row_per_block.sh
+++ b/tests/suites/1_stateful/05_formats/05_01_compact/05_01_02_load_compact_copy_row_per_block.sh
@@ -11,7 +11,7 @@ for j in $(seq 1 1000);do
 	printf "0123456789\n" >> "$DATA"
 done
 
-echo "drop table if exists t1" | $MYSQL_CLIENT_CONNECT
+echo "drop table if exists t1 all" | $MYSQL_CLIENT_CONNECT
 echo "CREATE TABLE t1
 (
     c0 string

From be5d33db7aa03c7b06a922984f1d75358a14b942 Mon Sep 17 00:00:00 2001
From: sundyli <543950155@qq.com>
Date: Thu, 12 Oct 2023 05:08:16 -0700
Subject: [PATCH 12/13] chore(query): improve list udf error (#13227)

---
 src/query/management/src/udf/udf_mgr.rs | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/query/management/src/udf/udf_mgr.rs b/src/query/management/src/udf/udf_mgr.rs
index 85c00161e3d1..3fd6ae067178 100644
--- a/src/query/management/src/udf/udf_mgr.rs
+++ b/src/query/management/src/udf/udf_mgr.rs
@@ -133,8 +133,10 @@ impl UdfApi for UdfMgr {
         let values = self.kv_api.prefix_list_kv(&self.udf_prefix).await?;
 
         let mut udfs = Vec::with_capacity(values.len());
-        for (_, value) in values {
-            let udf = deserialize_struct(&value.data, ErrorCode::IllegalUDFFormat, || "")?;
+        for (name, value) in values {
+            let udf = deserialize_struct(&value.data, ErrorCode::IllegalUDFFormat, || {
+                format!("udf {name} is corrupt")
+            })?;
             udfs.push(udf);
         }
         Ok(udfs)

From e04851376ff3d7279389a242891361c14090a46a Mon Sep 17 00:00:00 2001
From: "xudong.w" <wxd963996380@gmail.com>
Date: Thu, 12 Oct 2023 20:09:02 +0800
Subject: [PATCH 13/13] feat: speed up iejoin (#13229)

---
 .../transforms/range_join/range_join_state.rs | 24 ++++++++++++++++++-
 1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/src/query/service/src/pipelines/processors/transforms/range_join/range_join_state.rs b/src/query/service/src/pipelines/processors/transforms/range_join/range_join_state.rs
index dc29f00d75d5..bc0e38e00080 100644
--- a/src/query/service/src/pipelines/processors/transforms/range_join/range_join_state.rs
+++ b/src/query/service/src/pipelines/processors/transforms/range_join/range_join_state.rs
@@ -17,6 +17,7 @@ use std::sync::atomic::AtomicU64;
 use std::sync::Arc;
 
 use common_base::base::tokio::sync::Notify;
+use common_catalog::table_context::TableContext;
 use common_exception::Result;
 use common_expression::types::DataType;
 use common_expression::types::NumberDataType;
@@ -171,8 +172,29 @@ impl RangeJoinState {
     }
 
     pub(crate) fn partition(&self) -> Result<()> {
+        let max_threads = self.ctx.get_settings().get_max_threads()? as usize;
         let left_table = self.left_table.read();
-        let right_table = self.right_table.read();
+        // Right table is bigger than left table
+        let mut right_table = self.right_table.write();
+        if !left_table.is_empty()
+            && !right_table.is_empty()
+            && left_table.len() * right_table.len() < max_threads
+        {
+            let num_parts = max_threads / left_table.len() + 1;
+            // Spit right_table to num_parts equally
+            let merged_right_table = DataBlock::concat(&right_table)?;
+            let mut indices = Vec::with_capacity(merged_right_table.num_rows());
+            for idx in 0..merged_right_table.num_rows() {
+                indices.push((idx % num_parts) as u32);
+            }
+            let scatter_blocks = DataBlock::scatter(&merged_right_table, &indices, num_parts)?;
+            right_table.clear();
+            for block in scatter_blocks.iter() {
+                if !block.is_empty() {
+                    right_table.push(block.clone());
+                }
+            }
+        }
 
         let mut left_sorted_blocks = self.left_sorted_blocks.write();
         let mut right_sorted_blocks = self.right_sorted_blocks.write();