From 03d17886054c501e05672459939be8c838484b31 Mon Sep 17 00:00:00 2001
From: Yijun Zhao <ariesdevil77@gmail.com>
Date: Wed, 20 Sep 2023 14:20:39 +0800
Subject: [PATCH 01/21] fix reviewer comments (#12917)

---
 src/query/expression/src/values.rs                |  8 +++++---
 src/query/sql/src/planner/semantic/type_check.rs  |  4 ++--
 .../query/02_function/02_0061_function_array      | 15 +++++++++++++++
 3 files changed, 22 insertions(+), 5 deletions(-)
diff --git a/src/query/expression/src/values.rs b/src/query/expression/src/values.rs
index 57420c733f25..ca6974ea7840 100755
--- a/src/query/expression/src/values.rs
+++ b/src/query/expression/src/values.rs
@@ -829,7 +829,9 @@ impl Column {
     }
 
     pub fn domain(&self) -> Domain {
-        assert!(self.len() > 0);
+        if !matches!(self, Column::Array(_) | Column::Map(_)) {
+            assert!(self.len() > 0);
+        }
         match self {
             Column::Null { .. } => Domain::Nullable(NullableDomain {
                 has_null: true,
@@ -865,7 +867,7 @@ impl Column {
                 })
             }
             Column::Array(col) => {
-                if col.len() == 0 {
+                if col.len() == 0 || col.values.len() == 0 {
                     Domain::Array(None)
                 } else {
                     let inner_domain = col.values.domain();
@@ -873,7 +875,7 @@ impl Column {
                 }
             }
             Column::Map(col) => {
-                if col.len() == 0 {
+                if col.len() == 0 || col.values.len() == 0 {
                     Domain::Map(None)
                 } else {
                     let inner_domain = col.values.domain();
diff --git a/src/query/sql/src/planner/semantic/type_check.rs b/src/query/sql/src/planner/semantic/type_check.rs
index 4f70b385b9e0..187400e9d297 100644
--- a/src/query/sql/src/planner/semantic/type_check.rs
+++ b/src/query/sql/src/planner/semantic/type_check.rs
@@ -697,14 +697,14 @@ impl<'a> TypeChecker<'a> {
                 }
 
                 // check window function legal
-                {
+                if window.is_some() {
                     let supported_window_funcs = AggregateFunctionFactory::instance()
                         .registered_names()
                         .into_iter()
                         .chain(GENERAL_WINDOW_FUNCTIONS.iter().cloned().map(str::to_string))
                         .collect::<Vec<String>>();
                     let name = func_name.to_lowercase();
-                    if window.is_some() && !supported_window_funcs.contains(&name) {
+                    if !supported_window_funcs.contains(&name) {
                         return Err(ErrorCode::SemanticError(
                             "only general and aggregate functions allowed in window syntax",
                         )
diff --git a/tests/sqllogictests/suites/query/02_function/02_0061_function_array b/tests/sqllogictests/suites/query/02_function/02_0061_function_array
index 157c6fe14e49..711651d5668f 100644
--- a/tests/sqllogictests/suites/query/02_function/02_0061_function_array
+++ b/tests/sqllogictests/suites/query/02_function/02_0061_function_array
@@ -104,6 +104,21 @@ select array_distinct(col1), array_distinct(col2), array_distinct(col3) from t
 ----
 [1,2,3] ['x','y','z'] ['2022-02-02']
 
+query T
+select array_distinct([array_distinct([NULL, NULL])])
+----
+[[]]
+
+query T
+select array_distinct(array_distinct([NULL, NULL]))
+----
+[]
+
+query T
+select array_distinct([null, null, 1, 1])
+----
+[1]
+
 query I
 select array_sum(col1) from t
 ----

From 306b08589309a6a17d56024fe2d42c1a3ff1b3df Mon Sep 17 00:00:00 2001
From: flaneur <me.ssword@gmail.com>
Date: Wed, 20 Sep 2023 16:36:46 +0800
Subject: [PATCH 02/21] feat(observability): replace metrics-rs with
 prometheus-client (#12787)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* add metrics in mod openai

* add metrics in exchange

* add metrics in cluster

* add metrics in interpreters/common/metrics.rs

* fix build

* fix type

* cargo check

* add metrics in transform_metrics

* cover transform_metrics.rs

* fix metrics on mysql

* fix lint

* make lint

* add metrics in http query

* rename metrics with prefix

* cover metrics in interpreter_metrics

* fix lint

* remove the mysql legacy metrics

* fix check

* chore: resolve session leak in background service (#12776)

* docs: query node configurations (#12778)

* updates

* updates

* Update 02-query-config.md

* Update create-catalog.md

* Update presign.md

* fix: not allow '('/')' in stage at_string. (#12781)

* chore: change partition reshuffle (#12784)

* fix test (#12788)

* fix test (#12799)

* fix: tag styles (#12800)

* chore: styles

* chore: Replaced Twitter Logo

* fix: styles

* fix: tag styles

* chore: adjust datasets for ci (#12796)

* add test case (#12789)

Co-authored-by: BohuTANG <overred.shuttler@gmail.com>

* chore: fix naming in call procedure (#12801)

* feat: support domain calculation for decimal types. (#12683)

* Support decimal calc_domain for to_decimal and compare ops.

* Support decimal calc_domain for decimal to float and uncomment prune page tests.

* Refactor codes.

* Support decimal calc_domain for decimal arithmetic operations.

* Use `passthrough_nullable`.

* Fix unit test.

* Fix.

* Reuse methods.

* Fix.

* Support bool to decimal domain calc.

* Reuse type cast codes.

* Remove nullable for decimal functions.

---------

Co-authored-by: BohuTANG <overred.shuttler@gmail.com>

* fix(query): the panic issue of (instr, position, locate, range) function (#12786)

* fix(query): fix the panic issue of functions(instr, position, locate)

* fix(query): panic issue of range function

---------

Co-authored-by: BohuTANG <overred.shuttler@gmail.com>

* chore: fix right anti join (#12803)

* fix right anti join

* sqllogictest

* fix(query): fix and filter panic (#12770)

* fix(query): fix and filter panic

* and_filters support boolean type

* fix

* fix

* use try_reduce for and_filters

* fix

* fix

* docs: update slugs (#12804)

* updates

* Update 60-optimize-table.md

* fixed

* Update 60-optimize-table.md

* Update docs/doc/14-sql-commands/00-ddl/20-table/60-optimize-table.md

Co-authored-by: zhyass <mytesla@live.com>

* Update 60-optimize-table.md

* updates

* updates

* Update _category_.json

* updates

* Update 43-data-type-variant.md

* Update mindsdb.md

---------

Co-authored-by: zhyass <mytesla@live.com>

* fix(query): fix null column in group by (#12797)

* fix(query): fix null column in group by

* fix(query): fix null column in group by

* fix(query): fix null column in group by

---------

Co-authored-by: BohuTANG <overred.shuttler@gmail.com>

* refactor: let Level manage multi level data instead of using a chain of Arc (#12772)

* chore: minor refactor

* refactor: let Level manage multi level data instead of using a chain of Arc

Using a `Vec<Arc<LevelData>>` simplifies kway-merge.
And it adds flexibility to manage different types of `MapApi`
implementation.

* M  src/meta/raft-store/src/sm_v002/leveled_store/leveled_map.rs

* fix: fix syntax of Querying Staged Files. (#12815)

* docs: fix example in querying-stage. (#12816)

* chore: migrate repo to R2 (#12813)

* Update 05-querying-stage.md (#12819)

* fix: fields of select from csv/TSV should be nullable. (#12807)

* chore: rename snapshot methods (#12810)

* refactor: rename MetaApi to SysDataApi

* refactor: move non-user data from state machine to `SysData`

* chore: simplify snapshot_view_v002_test

* chore: rename snapshot methods

* feat: fill default columns for not match (#12747)

* fix metrics

* fill default columns without source schema

* add fill default columns and stage blocks

* fix typo

* fix bug

* fix bug

* fix test

* remove println

* reduce duplicate codes

* fix bug and add test

---------

Co-authored-by: Sky Fan <3374614481@qq.com>

* feat(sqlsmith): add gen_agg_func (#12783)

* feat(sqlsmith): add agg func

* fix: check_number has already cast the expr internal. no need cast out it.

* fix conversation

* fix ci err

* chore(query): add temp files system table (#12808)

* chore(query): add temp files system table

* chore(query): add temp files system table

* chore(query): add temp files system table

* chore(query): add temp files system table

* docs(weekly): add this week in databend 110 (#12822)

* docs(weekly): add this week in databend 110

Signed-off-by: Chojan Shang <psiace@apache.org>

* docs: apply suggestions from code review

Co-authored-by: soyeric128 <soyeric128@yahoo.com>

* docs(weekly): minor update

Signed-off-by: Chojan Shang <psiace@apache.org>

---------

Signed-off-by: Chojan Shang <psiace@apache.org>
Co-authored-by: soyeric128 <soyeric128@yahoo.com>

* fix: consider tuple type alone when checking type (#12814)

* fix: consider tuple type alone when checking type

* fix

* Update src/query/expression/src/block.rs

Co-authored-by: Andy Lok <andylokandy@hotmail.com>

---------

Co-authored-by: Andy Lok <andylokandy@hotmail.com>

* fix(cluster): fix exchange read source hang (#12823)

* fix: fix some sqlsmith hunted bugs (#12821)

* fix bitmap eq check

* fix great_circle_angle panic

* add sqllogictests

* chore(ci): modify mysql auth py test, use pdbc-mysql (#12818)

Co-authored-by: sundyli <543950155@qq.com>

* feat(planner): support infer filter (#12779)

* support infer filter

* add Equal test

* update

* add NotEqual test

* add LT test

* add infer_filter test for native

* make lint

* fix

* sqllogictest

* fix build-aarch64

* fix

* fix python bindings

* docs: Insert / replace updates (#12829)

* Update dml-replace.md

* Update dml-replace.md

* Update dml-replace.md

* Update dml-insert.md

* fix comments

* fix: decimal unary minus. (#12817)

* Fix decimal unary minus.

* Avoid overflow.

* fix delete contain internal columns panic (#12833)

* feat: support column stats provider for PaquetTable (parquet_rs) (#12809)

* parquet_rs: read meta when create parquet table and collect stats.

* Store parquet meta in memory after creating table.

* Lazy read parquet metas.

* Skip create stats provider if is copy.

* Fix deadlock.

* Fix comment.

* Fix leaf fields.

* Satisfy comment.

* Fix clippy.

* Fix unit tests for leaf fields.

---------

Co-authored-by: sundyli <543950155@qq.com>

* fix Ontime DDL hyperlink (#12836)

fix Ontime DDL link in [this page ](https://databend.rs/doc/use-cases/analyze-ontime-with-databend-on-ec2-and-s3)

* feat: support stage location as a literal string. (#12830)

* feat: support stage location as a literal string.

* update unit tests.

* add unit tests.

* update test.

* feat: allow `select $1 from @s1 ()` (#12838)

* fix #12735: not display TableReference::Location.options if it is empty.

* feat: allow `select $1 from @s1 ()`.

* feat(query): Support inserting bitmap string values (#12825)

* feat(query): Support insert bitmap values

* fix

* fix

* display <bitmap binary> string

* fix(query): rewrite view query when table_ref with None database (#12840)

* fix(query): rewrite view query that table_ref with None database

* fix conversation & modify some test

* feat(sqlsmith): Support generating factory scalar functions (#12844)

Co-authored-by: TCeason <33082201+TCeason@users.noreply.github.com>

* docs: share endpoint (#12843)

* fix: allow setting log-xx-on to false (#12846)

* feat(planner): improve infer predicates (#12845)

* improve infer predicates

* add test

* sqllogictest

* sqllogictest

* fix: fix_sort_null_panic (#12827)

* fix: add_null_sort

* fix: correct testing case

---------

Co-authored-by: 陈侗 <chendong@chendongdeMacBook-Pro.local>

* feat(sqlsmith): Ignore reports of some known errors (#12858)

* feat(sqlsmith): Ignore reports of some known errors

* fix

* fix: avoid relying on db-id-to-name mapping when dropping a table (#12847)

When dropping a table, it needs to update the count of all tables
belonging to a `tenant`.

Before this commit, it loads the `tenent` from a reversed mapping from
`database-id` to `tenant`.

In this commit, `tenent` is passed in to `schema-api` by callers.
So that when the reversed mapping is lacking(old data), it won't return
an UnknownDatabaseId error.

* feat: push down check duplicate and idempotent delete (#12780)

* remove check logic for now

* finish check

* fix bug

* add idempotent delete

* fix linux check

* fix bug

* fix bugs

* unify codes

* fix

* add comments

* change the metrics in meta-grpc-client

* add session_metrics

* remove unused metrics-rs metrics

* fix lint

* remove the legacy init recorder

* migrate the metrics_handler

* fix test

* fix ut

* fix lint

* add comment about reset_global_prometheus_registry

* remove common-metrics::recorder

* fix cargo.toml

* remove the metrics layer in opendal

* add prefix for registry

* disable metrics-layer

* strip prefix

* strip prefix in dump_metric_samples

---------

Signed-off-by: Chojan Shang <psiace@apache.org>
Co-authored-by: zhihanz <zzh243402448@gmail.com>
Co-authored-by: soyeric128 <soyeric128@yahoo.com>
Co-authored-by: Yang Xiufeng <yangxiufeng.c@gmail.com>
Co-authored-by: zhyass <mytesla@live.com>
Co-authored-by: JackTan25 <60096118+JackTan25@users.noreply.github.com>
Co-authored-by: Quan <787025321@qq.com>
Co-authored-by: everpcpc <everpcpc@users.noreply.github.com>
Co-authored-by: BohuTANG <overred.shuttler@gmail.com>
Co-authored-by: RinChanNOW <rin_chan_now@outlook.com>
Co-authored-by: LanceLi <cctonange@gmail.com>
Co-authored-by: Jk Xu <54522439+Dousir9@users.noreply.github.com>
Co-authored-by: baishen <baishen2009@gmail.com>
Co-authored-by: sundyli <543950155@qq.com>
Co-authored-by: 张炎泼 <drdr.xp@gmail.com>
Co-authored-by: Sky Fan <3374614481@qq.com>
Co-authored-by: TCeason <33082201+TCeason@users.noreply.github.com>
Co-authored-by: Winter Zhang <coswde@gmail.com>
Co-authored-by: Chojan Shang <psiace@apache.org>
Co-authored-by: xudong.w <wxd963996380@gmail.com>
Co-authored-by: Andy Lok <andylokandy@hotmail.com>
Co-authored-by: Yijun Zhao <ariesdevil77@gmail.com>
Co-authored-by: Naga <nagaraja.tantry@gmail.com>
Co-authored-by: TONG CHEN <52696678+ct20000901@users.noreply.github.com>
Co-authored-by: 陈侗 <chendong@chendongdeMacBook-Pro.local>
---
 Cargo.lock                                    |   9 +-
 Cargo.toml                                    |   1 -
 src/binaries/Cargo.toml                       |   7 -
 src/binaries/meta/entry.rs                    |   2 -
 src/binaries/query/entry.rs                   |   2 -
 src/common/metrics/Cargo.toml                 |   3 -
 src/common/metrics/src/dump.rs                |  15 +-
 src/common/metrics/src/lib.rs                 |  19 +-
 src/common/metrics/src/recorder.rs            | 313 ------------------
 src/common/metrics/src/registry.rs            |  18 +-
 src/common/metrics/src/reset.rs               |  26 --
 src/common/metrics/tests/it/main.rs           |  40 ++-
 src/common/openai/Cargo.toml                  |   2 +
 src/common/openai/src/metrics.rs              |  14 +
 src/common/storage/src/metrics/copy.rs        |  40 +--
 src/common/storage/src/metrics/merge_into.rs  |   5 -
 src/common/storage/src/operator.rs            |   4 +-
 src/meta/client/Cargo.toml                    |   1 +
 src/meta/client/src/grpc_client.rs            |  75 +----
 src/meta/client/src/grpc_metrics.rs           |  78 +++++
 src/meta/client/src/lib.rs                    |   1 +
 src/meta/service/tests/it/api/http/metrics.rs |   3 -
 src/query/service/Cargo.toml                  |   1 +
 .../rpc/exchange/metrics/exchange_metrics.rs  |  14 +
 src/query/service/src/clusters/cluster.rs     | 119 ++-----
 src/query/service/src/clusters/metrics.rs     |  78 ++++-
 .../src/interpreters/common/metrics.rs        |  28 +-
 .../src/interpreters/interpreter_metrics.rs   | 143 +++++---
 .../service/src/metrics/metric_service.rs     |  16 +-
 .../transforms/metrics/transform_metrics.rs   |  93 ++++--
 src/query/service/src/servers/http/metrics.rs |  34 +-
 .../servers/mysql/mysql_interactive_worker.rs |  13 +-
 .../src/servers/mysql/mysql_metrics.rs        |  22 +-
 src/query/service/src/sessions/mod.rs         |   1 +
 .../service/src/sessions/session_metrics.rs   |  37 +++
 src/query/service/src/sessions/session_mgr.rs |  35 +-
 src/query/service/tests/it/metrics.rs         |  17 +-
 src/query/service/tests/it/storages/system.rs |  15 +-
 src/query/sharing/src/layer.rs                |   4 +-
 .../storages/common/cache/src/metrics.rs      |  11 -
 .../storages/fuse/src/metrics/fuse_metrics.rs |  78 -----
 .../fuse/src/metrics/index_metrics.rs         |   4 -
 .../storages/system/src/metrics_table.rs      |  13 +-
 43 files changed, 596 insertions(+), 858 deletions(-)
 delete mode 100644 src/common/metrics/src/recorder.rs
 delete mode 100644 src/common/metrics/src/reset.rs
 create mode 100644 src/meta/client/src/grpc_metrics.rs
 create mode 100644 src/query/service/src/sessions/session_metrics.rs

diff --git a/Cargo.lock b/Cargo.lock
index 88b66acc55ec..521ff607751d 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2243,6 +2243,7 @@ dependencies = [
  "common-tracing",
  "derive_more",
  "futures",
+ "lazy_static",
  "log",
  "logcall",
  "minitrace",
@@ -2428,11 +2429,8 @@ dependencies = [
  "anyhow",
  "common-exception",
  "lazy_static",
- "log",
  "metrics",
  "metrics-exporter-prometheus",
- "once_cell",
- "parking_lot 0.12.1",
  "procfs",
  "prometheus-client",
  "prometheus-parse",
@@ -2445,6 +2443,8 @@ name = "common-openai"
 version = "0.1.0"
 dependencies = [
  "common-exception",
+ "common-metrics",
+ "lazy_static",
  "log",
  "metrics",
  "openai_api_rust",
@@ -3662,7 +3662,6 @@ dependencies = [
  "common-meta-sled-store",
  "common-meta-store",
  "common-meta-types",
- "common-metrics",
  "common-storage",
  "common-tracing",
  "databend-meta",
@@ -3875,6 +3874,7 @@ dependencies = [
  "itertools 0.10.5",
  "jsonb",
  "jwt-simple",
+ "lazy_static",
  "log",
  "lz4",
  "maplit",
@@ -8296,7 +8296,6 @@ dependencies = [
  "hyper",
  "log",
  "md-5",
- "metrics",
  "minitrace",
  "moka",
  "once_cell",
diff --git a/Cargo.toml b/Cargo.toml
index b1987c7be1d9..a28467126363 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -108,7 +108,6 @@ members = [
 sled = { git = "https://github.com/datafuse-extras/sled", tag = "v0.34.7-datafuse.1", default-features = false }
 opendal = { version = "0.39", features = [
     "layers-minitrace",
-    "layers-metrics",
     "services-ipfs",
     "services-moka",
     "services-redis",
diff --git a/src/binaries/Cargo.toml b/src/binaries/Cargo.toml
index 9351051caa44..29eb0819ff1a 100644
--- a/src/binaries/Cargo.toml
+++ b/src/binaries/Cargo.toml
@@ -27,12 +27,6 @@ io-uring = [
     "common-meta-raft-store/io-uring",
 ]
 
-enable-histogram-metrics = [
-    "default",
-    "common-metrics/enable-histogram",
-    "databend-query/enable-histogram-metrics",
-]
-
 [dependencies]
 # Workspace dependencies
 common-base = { path = "../common/base" }
@@ -48,7 +42,6 @@ common-meta-raft-store = { path = "../meta/raft-store" }
 common-meta-sled-store = { path = "../meta/sled-store" }
 common-meta-store = { path = "../meta/store" }
 common-meta-types = { path = "../meta/types" }
-common-metrics = { path = "../common/metrics" }
 common-storage = { path = "../common/storage" }
 common-tracing = { path = "../common/tracing" }
 databend-meta = { path = "../meta/service" }
diff --git a/src/binaries/meta/entry.rs b/src/binaries/meta/entry.rs
index 4633971fac0f..c4a52325b958 100644
--- a/src/binaries/meta/entry.rs
+++ b/src/binaries/meta/entry.rs
@@ -31,7 +31,6 @@ use common_meta_types::Cmd;
 use common_meta_types::LogEntry;
 use common_meta_types::MetaAPIError;
 use common_meta_types::Node;
-use common_metrics::init_default_metrics_recorder;
 use common_tracing::init_logging;
 use common_tracing::set_panic_hook;
 use databend_meta::api::GrpcServer;
@@ -91,7 +90,6 @@ pub async fn entry(conf: Config) -> anyhow::Result<()> {
     }
 
     init_sled_db(conf.raft_config.raft_dir.clone());
-    init_default_metrics_recorder();
 
     let single_or_join = if conf.raft_config.single {
         "single".to_string()
diff --git a/src/binaries/query/entry.rs b/src/binaries/query/entry.rs
index 37ddd5be1eb5..5d0e3758c2a9 100644
--- a/src/binaries/query/entry.rs
+++ b/src/binaries/query/entry.rs
@@ -25,7 +25,6 @@ use common_config::QUERY_SEMVER;
 use common_exception::ErrorCode;
 use common_exception::Result;
 use common_meta_client::MIN_METASRV_SEMVER;
-use common_metrics::init_default_metrics_recorder;
 use common_tracing::set_panic_hook;
 use databend_query::api::HttpService;
 use databend_query::api::RpcService;
@@ -59,7 +58,6 @@ pub async fn run_cmd(conf: &InnerConfig) -> Result<bool> {
 }
 
 pub async fn init_services(conf: &InnerConfig) -> Result<()> {
-    init_default_metrics_recorder();
     set_panic_hook();
     set_alloc_error_hook();
 
diff --git a/src/common/metrics/Cargo.toml b/src/common/metrics/Cargo.toml
index f1e2838a3e75..d1094050673a 100644
--- a/src/common/metrics/Cargo.toml
+++ b/src/common/metrics/Cargo.toml
@@ -19,11 +19,8 @@ common-exception = { path = "../exception" }
 
 # Crates.io dependencies
 lazy_static = { workspace = true }
-log = { workspace = true }
 metrics = "0.20.1"
 metrics-exporter-prometheus = { version = "0.11.0", default-features = false }
-once_cell = "1.15.0"
-parking_lot = "0.12.1"
 prometheus-client = { workspace = true }
 prometheus-parse = "0.2.3"
 serde = { workspace = true }
diff --git a/src/common/metrics/src/dump.rs b/src/common/metrics/src/dump.rs
index 1d9df2501f91..cfa86df54614 100644
--- a/src/common/metrics/src/dump.rs
+++ b/src/common/metrics/src/dump.rs
@@ -16,7 +16,9 @@ use std::collections::HashMap;
 
 use common_exception::ErrorCode;
 use common_exception::Result;
-use metrics_exporter_prometheus::PrometheusHandle;
+use prometheus_client::registry::Registry;
+
+use crate::render_prometheus_metrics;
 
 #[derive(Debug)]
 pub struct MetricSample {
@@ -86,8 +88,8 @@ pub struct SummaryCount {
     pub count: f64,
 }
 
-pub fn dump_metric_samples(handle: PrometheusHandle) -> Result<Vec<MetricSample>> {
-    let text = handle.render();
+pub fn dump_metric_samples(registry: &Registry) -> Result<Vec<MetricSample>> {
+    let text = render_prometheus_metrics(registry);
     let lines = text.lines().map(|s| Ok(s.to_owned()));
     let mut samples = prometheus_parse::Scrape::parse(lines)
         .map_err(|err| ErrorCode::Internal(format!("Dump prometheus metrics failed: {:?}", err)))?
@@ -95,8 +97,13 @@ pub fn dump_metric_samples(handle: PrometheusHandle) -> Result<Vec<MetricSample>
         .into_iter()
         .map(|s| {
             let value: MetricValue = s.value.into();
+            let metric_name = s
+                .metric
+                .strip_prefix("databend_")
+                .map(|s| s.to_string())
+                .unwrap_or(s.metric);
             MetricSample {
-                name: s.metric,
+                name: metric_name,
                 value,
                 labels: (*s.labels).clone(),
             }
diff --git a/src/common/metrics/src/lib.rs b/src/common/metrics/src/lib.rs
index f4295cf632b4..066ee962b0b3 100644
--- a/src/common/metrics/src/lib.rs
+++ b/src/common/metrics/src/lib.rs
@@ -17,9 +17,9 @@
 pub mod counter;
 mod dump;
 pub mod histogram;
-mod recorder;
 pub mod registry;
-mod reset;
+
+pub type VecLabels = Vec<(&'static str, String)>;
 
 pub use dump::dump_metric_samples;
 pub use dump::HistogramCount;
@@ -31,18 +31,6 @@ pub use prometheus_client::metrics::counter::Counter;
 pub use prometheus_client::metrics::family::Family;
 pub use prometheus_client::metrics::gauge::Gauge;
 pub use prometheus_client::metrics::histogram::Histogram;
-pub use recorder::init_default_metrics_recorder;
-pub use recorder::label_counter;
-pub use recorder::label_counter_with_val;
-pub use recorder::label_counter_with_val_and_labels;
-pub use recorder::label_decrement_gauge_with_val_and_labels;
-pub use recorder::label_gauge;
-pub use recorder::label_gauge_with_val_and_labels;
-pub use recorder::label_histogram_with_val;
-pub use recorder::label_increment_gauge_with_val_and_labels;
-pub use recorder::try_handle;
-pub use recorder::LABEL_KEY_CLUSTER;
-pub use recorder::LABEL_KEY_TENANT;
 pub use registry::load_global_prometheus_registry;
 pub use registry::register_counter;
 pub use registry::register_counter_family;
@@ -52,4 +40,5 @@ pub use registry::register_histogram_family_in_milliseconds;
 pub use registry::register_histogram_family_in_seconds;
 pub use registry::register_histogram_in_milliseconds;
 pub use registry::register_histogram_in_seconds;
-pub use reset::reset_metrics;
+pub use registry::render_prometheus_metrics;
+pub use registry::reset_global_prometheus_registry;
diff --git a/src/common/metrics/src/recorder.rs b/src/common/metrics/src/recorder.rs
deleted file mode 100644
index 8addaf0da91e..000000000000
--- a/src/common/metrics/src/recorder.rs
+++ /dev/null
@@ -1,313 +0,0 @@
-// Copyright 2021 Datafuse Labs
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-use std::mem::ManuallyDrop;
-use std::sync::Arc;
-use std::sync::Once;
-
-use log::warn;
-use metrics::counter;
-use metrics::decrement_gauge;
-use metrics::gauge;
-use metrics::histogram;
-use metrics::increment_gauge;
-use metrics::Counter;
-use metrics::CounterFn;
-use metrics::Gauge;
-use metrics::GaugeFn;
-use metrics::Histogram;
-use metrics::HistogramFn;
-use metrics::Key;
-use metrics::KeyName;
-use metrics::Recorder;
-use metrics::SharedString;
-use metrics::Unit;
-use metrics_exporter_prometheus::PrometheusBuilder;
-use metrics_exporter_prometheus::PrometheusHandle;
-use metrics_exporter_prometheus::PrometheusRecorder;
-use once_cell::sync::Lazy;
-use parking_lot::RwLock;
-
-static PROMETHEUS_HANDLE: Lazy<Arc<RwLock<Option<ClearableRecorder>>>> =
-    Lazy::new(|| Arc::new(RwLock::new(None)));
-
-pub const LABEL_KEY_TENANT: &str = "tenant";
-pub const LABEL_KEY_CLUSTER: &str = "cluster_name";
-
-#[inline]
-pub fn label_histogram_with_val(
-    name: &'static str,
-    labels: &Vec<(&'static str, String)>,
-    val: f64,
-) {
-    histogram!(name, val, labels);
-}
-
-#[inline]
-pub fn label_counter_with_val_and_labels(
-    name: &'static str,
-    labels: &Vec<(&'static str, String)>,
-    val: u64,
-) {
-    counter!(name, val, labels);
-}
-
-#[inline]
-pub fn label_gauge_with_val_and_labels(
-    name: &'static str,
-    labels: &Vec<(&'static str, String)>,
-    val: f64,
-) {
-    gauge!(name, val, labels);
-}
-
-#[inline]
-pub fn label_increment_gauge_with_val_and_labels(
-    name: &'static str,
-    labels: &Vec<(&'static str, String)>,
-    val: f64,
-) {
-    increment_gauge!(name, val, labels);
-}
-
-#[inline]
-pub fn label_decrement_gauge_with_val_and_labels(
-    name: &'static str,
-    labels: &Vec<(&'static str, String)>,
-    val: f64,
-) {
-    decrement_gauge!(name, val, labels);
-}
-
-#[inline]
-pub fn label_counter(name: &'static str, tenant_id: &str, cluster_id: &str) {
-    label_counter_with_val(name, 1, tenant_id, cluster_id)
-}
-
-#[inline]
-pub fn label_counter_with_val(name: &'static str, val: u64, tenant_id: &str, cluster_id: &str) {
-    let labels = [
-        (LABEL_KEY_TENANT, tenant_id.to_string()),
-        (LABEL_KEY_CLUSTER, cluster_id.to_string()),
-    ];
-    counter!(name, val, &labels);
-}
-
-#[inline]
-pub fn label_gauge(name: &'static str, val: f64, tenant_id: &str, cluster_id: &str) {
-    let labels = [
-        (LABEL_KEY_TENANT, tenant_id.to_string()),
-        (LABEL_KEY_CLUSTER, cluster_id.to_string()),
-    ];
-    gauge!(name, val, &labels);
-}
-
-pub fn init_default_metrics_recorder() {
-    static START: Once = Once::new();
-    START.call_once(init_prometheus_recorder)
-}
-
-/// Init prometheus recorder.
-fn init_prometheus_recorder() {
-    let recorder = ClearableRecorder::create();
-    let mut h = PROMETHEUS_HANDLE.as_ref().write();
-    *h = Some(recorder.clone());
-    unsafe {
-        metrics::clear_recorder();
-    }
-    match metrics::set_boxed_recorder(Box::new(recorder)) {
-        Ok(_) => (),
-        Err(err) => warn!("Install prometheus recorder failed, cause: {}", err),
-    };
-}
-
-pub fn try_handle() -> Option<PrometheusHandle> {
-    let read_guard = PROMETHEUS_HANDLE.as_ref().read();
-    read_guard.as_ref().map(ClearableRecorder::handle)
-}
-
-pub fn try_get_record() -> Option<ClearableRecorder> {
-    let read_guard = PROMETHEUS_HANDLE.as_ref().read();
-    read_guard.as_ref().cloned()
-}
-
-struct CounterFnWrap<Holder> {
-    pub counter: ManuallyDrop<Counter>,
-    pub holder: ManuallyDrop<Arc<Holder>>,
-}
-
-impl<Holder> Drop for CounterFnWrap<Holder> {
-    fn drop(&mut self) {
-        unsafe {
-            ManuallyDrop::drop(&mut self.counter);
-            ManuallyDrop::drop(&mut self.holder);
-        }
-    }
-}
-
-impl<Holder> CounterFn for CounterFnWrap<Holder> {
-    fn increment(&self, value: u64) {
-        self.counter.increment(value)
-    }
-
-    fn absolute(&self, value: u64) {
-        self.counter.absolute(value)
-    }
-}
-
-struct GaugeFnWrap<Holder> {
-    pub gauge: ManuallyDrop<Gauge>,
-    pub holder: ManuallyDrop<Arc<Holder>>,
-}
-
-impl<Holder> Drop for GaugeFnWrap<Holder> {
-    fn drop(&mut self) {
-        unsafe {
-            ManuallyDrop::drop(&mut self.gauge);
-            ManuallyDrop::drop(&mut self.holder);
-        }
-    }
-}
-
-impl<Holder> GaugeFn for GaugeFnWrap<Holder> {
-    fn increment(&self, value: f64) {
-        self.gauge.increment(value)
-    }
-
-    fn decrement(&self, value: f64) {
-        self.gauge.decrement(value)
-    }
-
-    fn set(&self, value: f64) {
-        self.gauge.set(value)
-    }
-}
-
-struct HistogramFnWrap<Holder> {
-    pub histogram: std::mem::ManuallyDrop<Histogram>,
-    pub holder: ManuallyDrop<Arc<Holder>>,
-}
-
-impl<Holder> Drop for HistogramFnWrap<Holder> {
-    fn drop(&mut self) {
-        unsafe {
-            ManuallyDrop::drop(&mut self.histogram);
-            ManuallyDrop::drop(&mut self.holder);
-        }
-    }
-}
-
-impl<Holder> HistogramFn for HistogramFnWrap<Holder> {
-    fn record(&self, value: f64) {
-        self.histogram.record(value)
-    }
-}
-
-// It will be ensured that the recorder will be destroyed after all counters, gauge, histogram are destroyed
-struct ArcRecorder<T: Recorder> {
-    pub inner: Arc<T>,
-}
-
-impl<T: Recorder + Send + Sync + 'static> Recorder for ArcRecorder<T> {
-    #[inline]
-    fn describe_counter(&self, key: KeyName, unit: Option<Unit>, description: SharedString) {
-        self.inner.describe_counter(key, unit, description)
-    }
-
-    #[inline]
-    fn describe_gauge(&self, key: KeyName, unit: Option<Unit>, description: SharedString) {
-        self.inner.describe_gauge(key, unit, description)
-    }
-
-    #[inline]
-    fn describe_histogram(&self, key: KeyName, unit: Option<Unit>, description: SharedString) {
-        self.inner.describe_histogram(key, unit, description)
-    }
-
-    fn register_counter(&self, key: &Key) -> Counter {
-        Counter::from_arc(Arc::new(CounterFnWrap {
-            counter: ManuallyDrop::new(self.inner.register_counter(key)),
-            holder: ManuallyDrop::new(self.inner.clone()),
-        }))
-    }
-
-    fn register_gauge(&self, key: &Key) -> Gauge {
-        Gauge::from_arc(Arc::new(GaugeFnWrap {
-            gauge: ManuallyDrop::new(self.inner.register_gauge(key)),
-            holder: ManuallyDrop::new(self.inner.clone()),
-        }))
-    }
-
-    fn register_histogram(&self, key: &Key) -> Histogram {
-        Histogram::from_arc(Arc::new(HistogramFnWrap {
-            histogram: ManuallyDrop::new(self.inner.register_histogram(key)),
-            holder: ManuallyDrop::new(self.inner.clone()),
-        }))
-    }
-}
-
-// TODO: use atomic refactor rwlock
-#[derive(Clone)]
-pub struct ClearableRecorder {
-    inner: Arc<RwLock<ArcRecorder<PrometheusRecorder>>>,
-}
-
-impl ClearableRecorder {
-    pub fn create() -> ClearableRecorder {
-        let recorder = PrometheusBuilder::new().build_recorder();
-        ClearableRecorder {
-            inner: Arc::new(RwLock::new(ArcRecorder {
-                inner: Arc::new(recorder),
-            })),
-        }
-    }
-
-    pub fn clear(&self) {
-        let mut inner = self.inner.write();
-        let recorder = PrometheusBuilder::new().build_recorder();
-        *inner = ArcRecorder {
-            inner: Arc::new(recorder),
-        };
-    }
-
-    pub fn handle(&self) -> PrometheusHandle {
-        self.inner.read().inner.handle()
-    }
-}
-
-impl Recorder for ClearableRecorder {
-    fn describe_counter(&self, key: KeyName, unit: Option<Unit>, description: SharedString) {
-        self.inner.read().describe_counter(key, unit, description)
-    }
-
-    fn describe_gauge(&self, key: KeyName, unit: Option<Unit>, description: SharedString) {
-        self.inner.read().describe_gauge(key, unit, description)
-    }
-
-    fn describe_histogram(&self, key: KeyName, unit: Option<Unit>, description: SharedString) {
-        self.inner.read().describe_histogram(key, unit, description)
-    }
-
-    fn register_counter(&self, key: &Key) -> Counter {
-        self.inner.read().register_counter(key)
-    }
-
-    fn register_gauge(&self, key: &Key) -> Gauge {
-        self.inner.read().register_gauge(key)
-    }
-
-    fn register_histogram(&self, key: &Key) -> Histogram {
-        self.inner.read().register_histogram(key)
-    }
-}
diff --git a/src/common/metrics/src/registry.rs b/src/common/metrics/src/registry.rs
index 160e52e4d205..a7de87108e80 100644
--- a/src/common/metrics/src/registry.rs
+++ b/src/common/metrics/src/registry.rs
@@ -16,6 +16,7 @@ use std::sync::Mutex;
 use std::sync::MutexGuard;
 
 use lazy_static::lazy_static;
+use prometheus_client::encoding::text::encode as prometheus_encode;
 use prometheus_client::encoding::EncodeLabelSet;
 use prometheus_client::metrics::counter::Counter;
 use prometheus_client::metrics::family::Family;
@@ -27,13 +28,28 @@ use crate::histogram::BUCKET_MILLISECONDS;
 use crate::histogram::BUCKET_SECONDS;
 
 lazy_static! {
-    pub static ref REGISTRY: Mutex<Registry> = Mutex::new(Registry::default());
+    pub static ref REGISTRY: Mutex<Registry> = Mutex::new(Registry::with_prefix("databend"));
 }
 
 pub fn load_global_prometheus_registry() -> MutexGuard<'static, Registry> {
     REGISTRY.lock().unwrap()
 }
 
+pub fn reset_global_prometheus_registry() {
+    // TODO(liyz): do nothing yet. This function would be trivial once prometheus_client
+    // supports iterating metrics. However it's not supported yet. I've raised an issue about
+    // this: https://github.com/prometheus/client_rust/issues/163 . If this feature request
+    // got denied, we can still wrap a customized Registry which record the metrics by itself.
+}
+
+pub fn render_prometheus_metrics(registry: &Registry) -> String {
+    let mut text = String::new();
+    match prometheus_encode(&mut text, registry) {
+        Ok(_) => text,
+        Err(err) => format!("Failed to encode metrics: {}", err),
+    }
+}
+
 pub fn register_counter(name: &str) -> Counter {
     let counter = Counter::default();
     let mut registry = load_global_prometheus_registry();
diff --git a/src/common/metrics/src/reset.rs b/src/common/metrics/src/reset.rs
deleted file mode 100644
index a81804819852..000000000000
--- a/src/common/metrics/src/reset.rs
+++ /dev/null
@@ -1,26 +0,0 @@
-// Copyright 2021 Datafuse Labs
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-use common_exception::Result;
-
-use crate::recorder::try_get_record;
-
-/// Reset gauge metrics to 0.
-pub fn reset_metrics() -> Result<()> {
-    if let Some(recorder) = try_get_record() {
-        recorder.clear();
-    }
-
-    Ok(())
-}
diff --git a/src/common/metrics/tests/it/main.rs b/src/common/metrics/tests/it/main.rs
index 24383d744620..60dab279f8e2 100644
--- a/src/common/metrics/tests/it/main.rs
+++ b/src/common/metrics/tests/it/main.rs
@@ -14,40 +14,38 @@
 
 use std::collections::HashMap;
 
+use common_exception::ErrorCode;
 use common_metrics::dump_metric_samples;
-use common_metrics::init_default_metrics_recorder;
-use common_metrics::try_handle;
+use common_metrics::load_global_prometheus_registry;
+use common_metrics::register_counter;
+use common_metrics::register_histogram_in_milliseconds;
 use common_metrics::MetricValue;
 
 #[tokio::test(flavor = "multi_thread")]
 async fn test_dump_metric_samples() -> common_exception::Result<()> {
-    init_default_metrics_recorder();
-    metrics::counter!("test.test1_count", 1);
-    metrics::counter!("test.test2_count", 2);
+    let counter1 = register_counter("test_test1_count");
+    let counter2 = register_counter("test_test2_count");
+    let histogram1 = register_histogram_in_milliseconds("test_test_query_usedtime");
+    counter1.inc();
+    counter2.inc_by(2);
+    histogram1.observe(2.0);
 
-    #[cfg(feature = "enable_histogram")]
-    metrics::histogram!("test.test_query_usedtime", 2.0);
-
-    let handle = crate::try_handle().unwrap();
-    let samples = dump_metric_samples(handle)
+    let registry = load_global_prometheus_registry();
+    let samples = dump_metric_samples(&registry)
         .unwrap()
         .into_iter()
         .map(|s| (s.name.clone(), s))
         .collect::<HashMap<_, _>>();
     assert_eq!(
-        MetricValue::Counter(1.0),
-        samples.get("test_test1_count").unwrap().value
+        MetricValue::Untyped(1.0),
+        samples.get("test_test1_count_total").unwrap().value
     );
 
-    #[cfg(feature = "enable_histogram")]
-    {
-        use common_exception::ErrorCode;
-        let summaries = match &samples.get("test_test_query_usedtime").unwrap().value {
-            MetricValue::Summary(summaries) => summaries,
-            _ => return Err(ErrorCode::Internal("test failed")),
-        };
-        assert_eq!(7, summaries.len());
-    }
+    let histogram = match &samples.get("test_test_query_usedtime").unwrap().value {
+        MetricValue::Histogram(histogram) => histogram,
+        _ => return Err(ErrorCode::Internal("test failed")),
+    };
+    assert_eq!(16, histogram.len());
 
     Ok(())
 }
diff --git a/src/common/openai/Cargo.toml b/src/common/openai/Cargo.toml
index c9f3526b7c0c..72f2b7508246 100644
--- a/src/common/openai/Cargo.toml
+++ b/src/common/openai/Cargo.toml
@@ -15,10 +15,12 @@ test = false
 [dependencies]
 # Workspace dependencies
 common-exception = { path = "../exception" }
+common-metrics = { path = "../metrics" }
 
 # GitHub dependencies
 
 # Crates.io dependencies
+lazy_static = { workspace = true }
 log = { workspace = true }
 metrics = "0.20.1"
 openai_api_rust = { git = "https://github.com/datafuse-extras/openai-api", rev = "819a0ed" }
diff --git a/src/common/openai/src/metrics.rs b/src/common/openai/src/metrics.rs
index 4391e0c81e04..e3fcc44d77ea 100644
--- a/src/common/openai/src/metrics.rs
+++ b/src/common/openai/src/metrics.rs
@@ -12,20 +12,34 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+use common_metrics::register_counter;
+use common_metrics::Counter;
+use lazy_static::lazy_static;
 use metrics::counter;
 
+lazy_static! {
+    static ref OPENAI_COMPLETION_COUNT: Counter = register_counter("openai_completion_count");
+    static ref OPENAI_COMPLETION_TOKEN: Counter = register_counter("openai_completion_token");
+    static ref OPENAI_EMBEDDING_COUNT: Counter = register_counter("openai_embedding_count");
+    static ref OPENAI_EMBEDDING_TOKEN: Counter = register_counter("openai_embedding_token");
+}
+
 pub fn metrics_completion_count(c: u32) {
     counter!("openai_completion_count", c.into());
+    OPENAI_COMPLETION_COUNT.inc_by(c as u64);
 }
 
 pub fn metrics_completion_token(c: u32) {
     counter!("openai_completion_token", c.into());
+    OPENAI_COMPLETION_TOKEN.inc_by(c as u64);
 }
 
 pub fn metrics_embedding_count(c: u32) {
     counter!("openai_embedding_count", c.into());
+    OPENAI_EMBEDDING_COUNT.inc_by(c as u64);
 }
 
 pub fn metrics_embedding_token(c: u32) {
     counter!("openai_embedding_token", c.into());
+    OPENAI_EMBEDDING_TOKEN.inc_by(c as u64);
 }
diff --git a/src/common/storage/src/metrics/copy.rs b/src/common/storage/src/metrics/copy.rs
index e9d3dee16a52..8fc777afd766 100644
--- a/src/common/storage/src/metrics/copy.rs
+++ b/src/common/storage/src/metrics/copy.rs
@@ -17,76 +17,52 @@ use common_metrics::register_histogram_in_milliseconds;
 use common_metrics::Counter;
 use common_metrics::Histogram;
 use lazy_static::lazy_static;
-use metrics::increment_gauge;
-
-macro_rules! key {
-    ($key: literal) => {
-        concat!("query_", $key)
-    };
-}
 
 lazy_static! {
-    static ref COPY_PURGE_FILE_COUNTER: Counter = register_counter("copy_purge_file_counter");
+    static ref COPY_PURGE_FILE_COUNTER: Counter = register_counter("query_copy_purge_file_counter");
     static ref COPY_PURGE_FILE_COST_MILLISECONDS: Histogram =
-        register_histogram_in_milliseconds("copy_purge_file_cost_milliseconds");
-    static ref COPY_READ_PART_COUNTER: Counter = register_counter("copy_read_part_counter");
-    static ref COPY_READ_SIZE_BYTES: Counter = register_counter("copy_read_size_bytes");
+        register_histogram_in_milliseconds("query_copy_purge_file_cost_milliseconds");
+    static ref COPY_READ_PART_COUNTER: Counter = register_counter("query_copy_read_part_counter");
+    static ref COPY_READ_SIZE_BYTES: Counter = register_counter("query_copy_read_size_bytes");
     static ref COPY_READ_PART_COST_MILLISECONDS: Histogram =
-        register_histogram_in_milliseconds("copy_read_part_cost_milliseconds");
+        register_histogram_in_milliseconds("query_copy_read_part_cost_milliseconds");
     static ref FILTER_OUT_COPIED_FILES_REQUEST_MILLISECONDS: Histogram =
-        register_histogram_in_milliseconds("filter_out_copied_files_request_milliseconds");
+        register_histogram_in_milliseconds("query_filter_out_copied_files_request_milliseconds");
     static ref FILTER_OUT_COPIED_FILES_ENTIRE_MILLISECONDS: Histogram =
-        register_histogram_in_milliseconds("filter_out_copied_files_entire_milliseconds");
+        register_histogram_in_milliseconds("query_filter_out_copied_files_entire_milliseconds");
     static ref COLLECT_FILES_GET_ALL_SOURCE_FILES_MILLISECONDS: Histogram =
-        register_histogram_in_milliseconds("collect_files_get_all_source_files_milliseconds");
+        register_histogram_in_milliseconds("query_collect_files_get_all_source_files_milliseconds");
 }
 
 /// COPY
 pub fn metrics_inc_copy_purge_files_counter(c: u32) {
-    increment_gauge!(key!("copy_purge_file_counter"), c as f64);
     COPY_PURGE_FILE_COUNTER.inc_by(c as u64);
 }
 
 pub fn metrics_inc_copy_purge_files_cost_milliseconds(c: u32) {
-    increment_gauge!(key!("copy_purge_file_cost_milliseconds"), c as f64);
     COPY_PURGE_FILE_COST_MILLISECONDS.observe(c as f64);
 }
 
 pub fn metrics_inc_copy_read_part_counter() {
-    increment_gauge!(key!("copy_read_part_counter"), 1.0);
     COPY_READ_PART_COUNTER.inc();
 }
 
 pub fn metrics_inc_copy_read_size_bytes(c: u64) {
-    increment_gauge!(key!("copy_read_size_bytes"), c as f64);
     COPY_READ_SIZE_BYTES.inc_by(c);
 }
 
 pub fn metrics_inc_copy_read_part_cost_milliseconds(c: u64) {
-    increment_gauge!(key!("copy_read_part_cost_milliseconds"), c as f64);
     COPY_READ_PART_COST_MILLISECONDS.observe(c as f64);
 }
 
 pub fn metrics_inc_filter_out_copied_files_request_milliseconds(c: u64) {
-    increment_gauge!(
-        key!("filter_out_copied_files_request_milliseconds"),
-        c as f64
-    );
     FILTER_OUT_COPIED_FILES_REQUEST_MILLISECONDS.observe(c as f64);
 }
 
 pub fn metrics_inc_filter_out_copied_files_entire_milliseconds(c: u64) {
-    increment_gauge!(
-        key!("filter_out_copied_files_entire_milliseconds"),
-        c as f64
-    );
     FILTER_OUT_COPIED_FILES_ENTIRE_MILLISECONDS.observe(c as f64);
 }
 
 pub fn metrics_inc_collect_files_get_all_source_files_milliseconds(c: u64) {
-    increment_gauge!(
-        key!("collect_files_get_all_source_files_milliseconds"),
-        c as f64
-    );
     COLLECT_FILES_GET_ALL_SOURCE_FILES_MILLISECONDS.observe(c as f64);
 }
diff --git a/src/common/storage/src/metrics/merge_into.rs b/src/common/storage/src/metrics/merge_into.rs
index 9a236bea2b87..a295afbed8ec 100644
--- a/src/common/storage/src/metrics/merge_into.rs
+++ b/src/common/storage/src/metrics/merge_into.rs
@@ -15,7 +15,6 @@
 use common_metrics::register_counter;
 use common_metrics::Counter;
 use lazy_static::lazy_static;
-use metrics::increment_gauge;
 
 macro_rules! key {
     ($key: literal) => {
@@ -34,21 +33,17 @@ lazy_static! {
 }
 
 pub fn metrics_inc_merge_into_replace_blocks_counter(c: u32) {
-    increment_gauge!(key!("merge_into_replace_blocks_counter"), c as f64);
     MERGE_INTO_REPLACE_BLOCKS_COUNTER.inc_by(c as u64);
 }
 
 pub fn metrics_inc_merge_into_append_blocks_counter(c: u32) {
-    increment_gauge!(key!("merge_into_append_blocks_counter"), c as f64);
     MERGE_INTO_APPEND_BLOCKS_COUNTER.inc_by(c as u64);
 }
 
 pub fn metrics_inc_merge_into_matched_rows(c: u32) {
-    increment_gauge!(key!("merge_into_matched_rows"), c as f64);
     MERGE_INTO_MATCHED_ROWS.inc_by(c as u64);
 }
 
 pub fn metrics_inc_merge_into_unmatched_rows(c: u32) {
-    increment_gauge!(key!("merge_into_unmatched_rows"), c as f64);
     MERGE_INTO_UNMATCHED_ROWS.inc_by(c as u64);
 }
diff --git a/src/common/storage/src/operator.rs b/src/common/storage/src/operator.rs
index 044e93faacfe..b2320fe823e5 100644
--- a/src/common/storage/src/operator.rs
+++ b/src/common/storage/src/operator.rs
@@ -41,7 +41,6 @@ use common_meta_app::storage::StorageWebhdfsConfig;
 use log::warn;
 use opendal::layers::ImmutableIndexLayer;
 use opendal::layers::LoggingLayer;
-use opendal::layers::MetricsLayer;
 use opendal::layers::MinitraceLayer;
 use opendal::layers::RetryLayer;
 use opendal::layers::TimeoutLayer;
@@ -106,12 +105,11 @@ pub fn build_operator<B: Builder>(builder: B) -> Result<Operator> {
         )
         // Add retry
         .layer(RetryLayer::new().with_jitter())
-        // Add metrics
-        .layer(MetricsLayer)
         // Add logging
         .layer(LoggingLayer::default())
         // Add tracing
         .layer(MinitraceLayer)
+        // TODO(liyz): add PrometheusClientLayer
         .finish();
 
     Ok(op)
diff --git a/src/meta/client/Cargo.toml b/src/meta/client/Cargo.toml
index 271e53c9dab9..243b853a6ca1 100644
--- a/src/meta/client/Cargo.toml
+++ b/src/meta/client/Cargo.toml
@@ -28,6 +28,7 @@ common-tracing = { path = "../../common/tracing" }
 
 derive_more = { workspace = true }
 futures = "0.3.24"
+lazy_static = { workspace = true }
 log = { workspace = true }
 logcall = { workspace = true }
 minitrace = { workspace = true }
diff --git a/src/meta/client/src/grpc_client.rs b/src/meta/client/src/grpc_client.rs
index 29a3debd16a3..aa6b86b3d647 100644
--- a/src/meta/client/src/grpc_client.rs
+++ b/src/meta/client/src/grpc_client.rs
@@ -61,10 +61,6 @@ use common_meta_types::MetaHandshakeError;
 use common_meta_types::MetaNetworkError;
 use common_meta_types::TxnReply;
 use common_meta_types::TxnRequest;
-use common_metrics::label_counter_with_val_and_labels;
-use common_metrics::label_decrement_gauge_with_val_and_labels;
-use common_metrics::label_histogram_with_val;
-use common_metrics::label_increment_gauge_with_val_and_labels;
 use futures::stream::StreamExt;
 use log::as_debug;
 use log::as_display;
@@ -88,6 +84,7 @@ use tonic::Status;
 
 use crate::from_digit_ver;
 use crate::grpc_action::RequestFor;
+use crate::grpc_metrics;
 use crate::message;
 use crate::to_digit_ver;
 use crate::MetaGrpcReq;
@@ -95,14 +92,6 @@ use crate::METACLI_COMMIT_SEMVER;
 use crate::MIN_METASRV_SEMVER;
 
 const AUTH_TOKEN_KEY: &str = "auth-token-bin";
-const META_GRPC_CLIENT_REQUEST_DURATION_MS: &str = "meta_grpc_client_request_duration_ms";
-const META_GRPC_CLIENT_REQUEST_INFLIGHT: &str = "meta_grpc_client_request_inflight";
-const META_GRPC_CLIENT_REQUEST_SUCCESS: &str = "meta_grpc_client_request_success";
-const META_GRPC_CLIENT_REQUEST_FAILED: &str = "meta_grpc_client_request_fail";
-const META_GRPC_MAKE_CLIENT_FAILED: &str = "meta_grpc_make_client_fail";
-const LABEL_ENDPOINT: &str = "endpoint";
-const LABEL_REQUEST: &str = "request";
-const LABEL_ERROR: &str = "error";
 
 #[derive(Debug)]
 struct MetaChannelManager {
@@ -180,12 +169,7 @@ impl ClientHandle {
                 resp_tx: tx,
                 req: req.into(),
             };
-
-            label_increment_gauge_with_val_and_labels(
-                META_GRPC_CLIENT_REQUEST_INFLIGHT,
-                &vec![],
-                1.0,
-            );
+            grpc_metrics::incr_meta_grpc_client_request_inflight(1);
 
             let res = self.req_tx.send(req).await.map_err(|e| {
                 let cli_err = MetaClientError::ClientRuntimeError(
@@ -195,32 +179,20 @@ impl ClientHandle {
             });
 
             if let Err(err) = res {
-                label_decrement_gauge_with_val_and_labels(
-                    META_GRPC_CLIENT_REQUEST_INFLIGHT,
-                    &vec![],
-                    1.0,
-                );
+                grpc_metrics::incr_meta_grpc_client_request_inflight(-1);
 
                 return Err(err);
             }
 
             let res = rx.await.map_err(|e| {
-                label_decrement_gauge_with_val_and_labels(
-                    META_GRPC_CLIENT_REQUEST_INFLIGHT,
-                    &vec![],
-                    1.0,
-                );
+                grpc_metrics::incr_meta_grpc_client_request_inflight(-1);
 
                 MetaClientError::ClientRuntimeError(
                     AnyError::new(&e).add_context(|| "when recv resp from MetaGrpcClient worker"),
                 )
             })?;
 
-            label_decrement_gauge_with_val_and_labels(
-                META_GRPC_CLIENT_REQUEST_INFLIGHT,
-                &vec![],
-                1.0,
-            );
+            grpc_metrics::incr_meta_grpc_client_request_inflight(-1);
             let res: Result<Resp, E> = res
                 .try_into()
                 .map_err(|e| format!("expect: {}, got: {}", std::any::type_name::<Resp>(), e))
@@ -467,12 +439,9 @@ impl MetaGrpcClient {
 
             if let Some(current_endpoint) = current_endpoint {
                 let elapsed = start.elapsed().as_millis() as f64;
-                label_histogram_with_val(
-                    META_GRPC_CLIENT_REQUEST_DURATION_MS,
-                    &vec![
-                        (LABEL_ENDPOINT, current_endpoint.to_string()),
-                        (LABEL_REQUEST, req_name.to_string()),
-                    ],
+                grpc_metrics::record_meta_grpc_client_request_duration_ms(
+                    &current_endpoint,
+                    req_name,
                     elapsed,
                 );
                 if elapsed > 1000_f64 {
@@ -483,24 +452,16 @@ impl MetaGrpcClient {
                 }
 
                 if let Some(err) = resp.err() {
-                    label_counter_with_val_and_labels(
-                        META_GRPC_CLIENT_REQUEST_FAILED,
-                        &vec![
-                            (LABEL_ENDPOINT, current_endpoint.to_string()),
-                            (LABEL_ERROR, err.to_string()),
-                            (LABEL_REQUEST, req_name.to_string()),
-                        ],
-                        1,
+                    grpc_metrics::incr_meta_grpc_client_request_failed(
+                        &current_endpoint,
+                        req_name,
+                        &err.to_string(),
                     );
                     error!("MetaGrpcClient error: {:?}", err);
                 } else {
-                    label_counter_with_val_and_labels(
-                        META_GRPC_CLIENT_REQUEST_SUCCESS,
-                        &vec![
-                            (LABEL_ENDPOINT, current_endpoint.to_string()),
-                            (LABEL_REQUEST, req_name.to_string()),
-                        ],
-                        1,
+                    grpc_metrics::incr_meta_grpc_client_request_success(
+                        &current_endpoint,
+                        req_name,
                     );
                 }
             }
@@ -643,11 +604,7 @@ impl MetaGrpcClient {
                     "grpc_client create channel with {} failed, err: {:?}",
                     addr, e
                 );
-                label_counter_with_val_and_labels(
-                    META_GRPC_MAKE_CLIENT_FAILED,
-                    &vec![(LABEL_ENDPOINT, addr.to_string())],
-                    1,
-                );
+                grpc_metrics::incr_meta_grpc_make_client_fail(&addr);
                 Err(e)
             }
         }
diff --git a/src/meta/client/src/grpc_metrics.rs b/src/meta/client/src/grpc_metrics.rs
new file mode 100644
index 000000000000..50110d8e758e
--- /dev/null
+++ b/src/meta/client/src/grpc_metrics.rs
@@ -0,0 +1,78 @@
+// Copyright 2021 Datafuse Labs
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use common_metrics::register_counter_family;
+use common_metrics::register_gauge;
+use common_metrics::register_histogram_family_in_milliseconds;
+use common_metrics::Counter;
+use common_metrics::Family;
+use common_metrics::Gauge;
+use common_metrics::Histogram;
+use common_metrics::VecLabels;
+use lazy_static::lazy_static;
+
+lazy_static! {
+    static ref META_GRPC_CLIENT_REQUEST_DURATION_MS: Family<VecLabels, Histogram> =
+        register_histogram_family_in_milliseconds("meta_grpc_client_request_duration_ms");
+    static ref META_GRPC_CLIENT_REQUEST_INFLIGHT: Gauge =
+        register_gauge("meta_grpc_client_request_inflight");
+    static ref META_GRPC_CLIENT_REQUEST_SUCCESS: Family<VecLabels, Counter> =
+        register_counter_family("meta_grpc_client_request_success");
+    static ref META_GRPC_CLIENT_REQUEST_FAILED: Family<VecLabels, Counter> =
+        register_counter_family("meta_grpc_client_request_fail");
+    static ref META_GRPC_MAKE_CLIENT_FAIL: Family<VecLabels, Counter> =
+        register_counter_family("meta_grpc_make_client_fail");
+}
+
+const LABEL_ENDPOINT: &str = "endpoint";
+const LABEL_REQUEST: &str = "request";
+const LABEL_ERROR: &str = "error";
+
+pub fn record_meta_grpc_client_request_duration_ms(endpoint: &str, request: &str, duration: f64) {
+    let labels = vec![
+        (LABEL_ENDPOINT, endpoint.to_string()),
+        (LABEL_REQUEST, request.to_string()),
+    ];
+    META_GRPC_CLIENT_REQUEST_DURATION_MS
+        .get_or_create(&labels)
+        .observe(duration);
+}
+
+pub fn incr_meta_grpc_client_request_inflight(val: i64) {
+    META_GRPC_CLIENT_REQUEST_INFLIGHT.inc_by(val);
+}
+
+pub fn incr_meta_grpc_client_request_success(endpoint: &str, request: &str) {
+    let labels = vec![
+        (LABEL_ENDPOINT, endpoint.to_string()),
+        (LABEL_REQUEST, request.to_string()),
+    ];
+    META_GRPC_CLIENT_REQUEST_SUCCESS
+        .get_or_create(&labels)
+        .inc();
+}
+
+pub fn incr_meta_grpc_client_request_failed(endpoint: &str, request: &str, err: &str) {
+    let labels = vec![
+        (LABEL_ENDPOINT, endpoint.to_string()),
+        (LABEL_REQUEST, request.to_string()),
+        (LABEL_ERROR, err.to_string()),
+    ];
+    META_GRPC_CLIENT_REQUEST_FAILED.get_or_create(&labels).inc();
+}
+
+pub fn incr_meta_grpc_make_client_fail(endpoint: &str) {
+    let labels = vec![(LABEL_ENDPOINT, endpoint.to_string())];
+    META_GRPC_MAKE_CLIENT_FAIL.get_or_create(&labels).inc();
+}
diff --git a/src/meta/client/src/lib.rs b/src/meta/client/src/lib.rs
index a59a391bdbd5..cf1a97f3f297 100644
--- a/src/meta/client/src/lib.rs
+++ b/src/meta/client/src/lib.rs
@@ -16,6 +16,7 @@
 
 mod grpc_action;
 mod grpc_client;
+mod grpc_metrics;
 mod kv_api_impl;
 mod message;
 
diff --git a/src/meta/service/tests/it/api/http/metrics.rs b/src/meta/service/tests/it/api/http/metrics.rs
index c68be7636ffb..a5a7cbbe5cfe 100644
--- a/src/meta/service/tests/it/api/http/metrics.rs
+++ b/src/meta/service/tests/it/api/http/metrics.rs
@@ -12,7 +12,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-use common_metrics::init_default_metrics_recorder;
 use databend_meta::api::http::v1::metrics::metrics_handler;
 use databend_meta::metrics::network_metrics;
 use databend_meta::metrics::raft_metrics;
@@ -36,8 +35,6 @@ use crate::tests::meta_node::start_meta_node_cluster;
 #[test(harness = meta_service_test_harness)]
 #[minitrace::trace]
 async fn test_metrics() -> anyhow::Result<()> {
-    init_default_metrics_recorder();
-
     let (_, tcs) = start_meta_node_cluster(btreeset! {0,1,2}, btreeset! {}).await?;
 
     let leader = tcs[0].meta_node.clone().unwrap();
diff --git a/src/query/service/Cargo.toml b/src/query/service/Cargo.toml
index f601ba7b20f0..7d69c3cb242d 100644
--- a/src/query/service/Cargo.toml
+++ b/src/query/service/Cargo.toml
@@ -130,6 +130,7 @@ humantime = "2.1.0"
 indicatif = "0.17.5"
 itertools = "0.10.5"
 jwt-simple = "0.11.0"
+lazy_static = { workspace = true }
 log = { workspace = true }
 lz4 = "1.24.0"
 match-template = "0.0.1"
diff --git a/src/query/service/src/api/rpc/exchange/metrics/exchange_metrics.rs b/src/query/service/src/api/rpc/exchange/metrics/exchange_metrics.rs
index a86b3b465e90..2eb7485e71fb 100644
--- a/src/query/service/src/api/rpc/exchange/metrics/exchange_metrics.rs
+++ b/src/query/service/src/api/rpc/exchange/metrics/exchange_metrics.rs
@@ -12,6 +12,9 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+use common_metrics::register_counter;
+use common_metrics::Counter;
+use lazy_static::lazy_static;
 use metrics::counter;
 
 macro_rules! key {
@@ -20,18 +23,29 @@ macro_rules! key {
     };
 }
 
+lazy_static! {
+    static ref EXCHANGE_WRITE_COUNT: Counter = register_counter(key!("exchange_write_count"));
+    static ref EXCHANGE_WRITE_BYTES: Counter = register_counter(key!("exchange_write_bytes"));
+    static ref EXCHANGE_READ_COUNT: Counter = register_counter(key!("exchange_read_count"));
+    static ref EXCHANGE_READ_BYTES: Counter = register_counter(key!("exchange_read_bytes"));
+}
+
 pub fn metrics_inc_exchange_write_count(v: usize) {
     counter!(key!("exchange_write_count"), v as u64);
+    EXCHANGE_WRITE_COUNT.inc_by(v as u64);
 }
 
 pub fn metrics_inc_exchange_write_bytes(c: usize) {
     counter!(key!("exchange_write_bytes"), c as u64);
+    EXCHANGE_WRITE_BYTES.inc_by(c as u64);
 }
 
 pub fn metrics_inc_exchange_read_count(v: usize) {
     counter!(key!("exchange_read_count"), v as u64);
+    EXCHANGE_READ_COUNT.inc_by(v as u64);
 }
 
 pub fn metrics_inc_exchange_read_bytes(c: usize) {
     counter!(key!("exchange_read_bytes"), c as u64);
+    EXCHANGE_READ_BYTES.inc_by(c as u64);
 }
diff --git a/src/query/service/src/clusters/cluster.rs b/src/query/service/src/clusters/cluster.rs
index 77b6c181c399..c19302277cae 100644
--- a/src/query/service/src/clusters/cluster.rs
+++ b/src/query/service/src/clusters/cluster.rs
@@ -44,14 +44,12 @@ use common_meta_store::MetaStore;
 use common_meta_store::MetaStoreProvider;
 use common_meta_types::MatchSeq;
 use common_meta_types::NodeInfo;
-use common_metrics::label_counter_with_val_and_labels;
 use futures::future::select;
 use futures::future::Either;
 use futures::Future;
 use futures::StreamExt;
 use log::error;
 use log::warn;
-use metrics::gauge;
 use rand::thread_rng;
 use rand::Rng;
 
@@ -140,8 +138,6 @@ impl ClusterHelper for Cluster {
 }
 
 impl ClusterDiscovery {
-    const METRIC_LABEL_FUNCTION: &'static str = "function";
-
     #[async_backtrace::framed]
     pub async fn create_meta_client(cfg: &InnerConfig) -> Result<MetaStore> {
         let meta_api_provider = MetaStoreProvider::new(cfg.meta.to_meta_grpc_client_conf());
@@ -204,31 +200,12 @@ impl ClusterDiscovery {
     pub async fn discover(&self, config: &InnerConfig) -> Result<Arc<Cluster>> {
         match self.api_provider.get_nodes().await {
             Err(cause) => {
-                label_counter_with_val_and_labels(
-                    super::metrics::METRIC_CLUSTER_ERROR_COUNT,
-                    &vec![
-                        (
-                            super::metrics::METRIC_LABEL_LOCAL_ID,
-                            String::from(&self.local_id),
-                        ),
-                        (
-                            ClusterDiscovery::METRIC_LABEL_FUNCTION,
-                            String::from("discover"),
-                        ),
-                        (
-                            super::metrics::METRIC_LABEL_CLUSTER_ID,
-                            self.cluster_id.clone(),
-                        ),
-                        (
-                            super::metrics::METRIC_LABEL_TENANT_ID,
-                            self.tenant_id.clone(),
-                        ),
-                        (
-                            super::metrics::METRIC_LABEL_FLIGHT_ADDRESS,
-                            self.flight_address.clone(),
-                        ),
-                    ],
-                    1,
+                super::metrics::metric_incr_cluster_error_count(
+                    &self.local_id,
+                    "discover",
+                    &self.cluster_id,
+                    &self.tenant_id,
+                    &self.flight_address,
                 );
                 Err(cause.add_message_back("(while cluster api get_nodes)."))
             }
@@ -252,29 +229,13 @@ impl ClusterDiscovery {
                     res.push(Arc::new(node.clone()));
                 }
 
-                gauge!(
-                    super::metrics::METRIC_CLUSTER_DISCOVERED_NODE_GAUGE,
+                super::metrics::metrics_gauge_discovered_nodes(
+                    &self.local_id,
+                    &self.cluster_id,
+                    &self.tenant_id,
+                    &self.flight_address,
                     cluster_nodes.len() as f64,
-                    &[
-                        (
-                            super::metrics::METRIC_LABEL_LOCAL_ID,
-                            String::from(&self.local_id)
-                        ),
-                        (
-                            super::metrics::METRIC_LABEL_CLUSTER_ID,
-                            self.cluster_id.clone(),
-                        ),
-                        (
-                            super::metrics::METRIC_LABEL_TENANT_ID,
-                            self.tenant_id.clone(),
-                        ),
-                        (
-                            super::metrics::METRIC_LABEL_FLIGHT_ADDRESS,
-                            self.flight_address.clone(),
-                        ),
-                    ]
                 );
-
                 Ok(Cluster::create(res, self.local_id.clone()))
             }
         }
@@ -285,31 +246,12 @@ impl ClusterDiscovery {
         let current_nodes_info = match self.api_provider.get_nodes().await {
             Ok(nodes) => nodes,
             Err(cause) => {
-                label_counter_with_val_and_labels(
-                    super::metrics::METRIC_CLUSTER_ERROR_COUNT,
-                    &vec![
-                        (
-                            super::metrics::METRIC_LABEL_LOCAL_ID,
-                            String::from(&self.local_id),
-                        ),
-                        (
-                            ClusterDiscovery::METRIC_LABEL_FUNCTION,
-                            String::from("drop_invalid_nodes.get_nodes"),
-                        ),
-                        (
-                            super::metrics::METRIC_LABEL_CLUSTER_ID,
-                            self.cluster_id.clone(),
-                        ),
-                        (
-                            super::metrics::METRIC_LABEL_TENANT_ID,
-                            self.tenant_id.clone(),
-                        ),
-                        (
-                            super::metrics::METRIC_LABEL_FLIGHT_ADDRESS,
-                            self.flight_address.clone(),
-                        ),
-                    ],
-                    1,
+                super::metrics::metric_incr_cluster_error_count(
+                    &self.local_id,
+                    "drop_invalid_ndes.get_nodes",
+                    &self.cluster_id,
+                    &self.tenant_id,
+                    &self.flight_address,
                 );
                 return Err(cause.add_message_back("(while drop_invalid_nodes)"));
             }
@@ -421,8 +363,6 @@ struct ClusterHeartbeat {
 }
 
 impl ClusterHeartbeat {
-    const METRIC_LABEL_RESULT: &'static str = "result";
-
     pub fn create(
         timeout: Duration,
         cluster_api: Arc<dyn ClusterApi>,
@@ -467,25 +407,12 @@ impl ClusterHeartbeat {
                         shutdown_notified = new_shutdown_notified;
                         let heartbeat = cluster_api.heartbeat(&node, MatchSeq::GE(1));
                         if let Err(failure) = heartbeat.await {
-                            label_counter_with_val_and_labels(
-                                super::metrics::METRIC_CLUSTER_HEARTBEAT_COUNT,
-                                &vec![
-                                    (
-                                        super::metrics::METRIC_LABEL_LOCAL_ID,
-                                        String::from(&node.id),
-                                    ),
-                                    (
-                                        super::metrics::METRIC_LABEL_FLIGHT_ADDRESS,
-                                        String::from(&node.flight_address),
-                                    ),
-                                    (super::metrics::METRIC_LABEL_CLUSTER_ID, cluster_id.clone()),
-                                    (super::metrics::METRIC_LABEL_TENANT_ID, tenant_id.clone()),
-                                    (
-                                        ClusterHeartbeat::METRIC_LABEL_RESULT,
-                                        String::from("failure"),
-                                    ),
-                                ],
-                                1,
+                            super::metrics::metric_incr_cluster_heartbeat_count(
+                                &node.id,
+                                &node.flight_address,
+                                &cluster_id,
+                                &tenant_id,
+                                "failure",
                             );
                             error!("Cluster cluster api heartbeat failure: {:?}", failure);
                         }
diff --git a/src/query/service/src/clusters/metrics.rs b/src/query/service/src/clusters/metrics.rs
index f264e11ded7c..ddd5c18b59eb 100644
--- a/src/query/service/src/clusters/metrics.rs
+++ b/src/query/service/src/clusters/metrics.rs
@@ -12,11 +12,73 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-pub static METRIC_CLUSTER_HEARTBEAT_COUNT: &str = "cluster.heartbeat.count";
-pub static METRIC_CLUSTER_ERROR_COUNT: &str = "cluster.error.count";
-pub static METRIC_CLUSTER_DISCOVERED_NODE_GAUGE: &str = "cluster.discovered_node.gauge";
-
-pub static METRIC_LABEL_LOCAL_ID: &str = "local_id";
-pub static METRIC_LABEL_FLIGHT_ADDRESS: &str = "flight_address";
-pub static METRIC_LABEL_CLUSTER_ID: &str = "cluster_id";
-pub static METRIC_LABEL_TENANT_ID: &str = "tenant_id";
+use common_metrics::register_counter_family;
+use common_metrics::register_gauge_family;
+use common_metrics::Counter;
+use common_metrics::Family;
+use common_metrics::Gauge;
+use lazy_static::lazy_static;
+
+lazy_static! {
+    static ref CLUSTER_CLUSTER_HEARTBEAT_COUNT: Family<Vec<(&'static str, String)>, Counter> =
+        register_counter_family("cluster_heartbeat_count");
+    static ref CLUSTER_CLUSTER_ERROR_COUNT: Family<Vec<(&'static str, String)>, Counter> =
+        register_counter_family("cluster_error_count");
+    static ref CLUSTER_DISCOVERED_NODE_GAUGE: Family<Vec<(&'static str, String)>, Gauge> =
+        register_gauge_family("cluster_discovered_node");
+}
+
+pub(crate) fn metric_incr_cluster_heartbeat_count(
+    local_id: &str,
+    flight_address: &str,
+    cluster_id: &str,
+    tenant_id: &str,
+    result: &str,
+) {
+    let labels = &vec![
+        ("local_id", String::from(local_id)),
+        ("flight_address", String::from(flight_address)),
+        ("cluster_id", cluster_id.to_string()),
+        ("tenant_id", tenant_id.to_string()),
+        ("result", result.to_string()),
+    ];
+
+    CLUSTER_CLUSTER_HEARTBEAT_COUNT.get_or_create(labels).inc();
+}
+
+pub(crate) fn metric_incr_cluster_error_count(
+    local_id: &str,
+    function: &str,
+    cluster_id: &str,
+    tenant_id: &str,
+    flight_address: &str,
+) {
+    let labels = &vec![
+        ("local_id", local_id.to_string()),
+        ("function", function.to_string()),
+        ("cluster_id", cluster_id.to_string()),
+        ("tenant_id", tenant_id.to_string()),
+        ("flight_address", flight_address.to_string()),
+    ];
+
+    CLUSTER_CLUSTER_ERROR_COUNT.get_or_create(labels).inc();
+}
+
+pub(crate) fn metrics_gauge_discovered_nodes(
+    local_id: &str,
+    cluster_id: &str,
+    tenant_id: &str,
+    flight_address: &str,
+    val: f64,
+) {
+    let labels = &vec![
+        ("local_id", local_id.to_string()),
+        ("cluster_id", cluster_id.to_string()),
+        ("tenant_id", tenant_id.to_string()),
+        ("flight_address", flight_address.to_string()),
+    ];
+
+    CLUSTER_DISCOVERED_NODE_GAUGE
+        .get_or_create(labels)
+        .set(val as i64);
+}
diff --git a/src/query/service/src/interpreters/common/metrics.rs b/src/query/service/src/interpreters/common/metrics.rs
index 707a9217e654..9346d48687da 100644
--- a/src/query/service/src/interpreters/common/metrics.rs
+++ b/src/query/service/src/interpreters/common/metrics.rs
@@ -12,22 +12,32 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-use metrics::increment_gauge;
+use common_metrics::register_histogram_family_in_milliseconds;
+use common_metrics::Family;
+use common_metrics::Histogram;
+use lazy_static::lazy_static;
+
+lazy_static! {
+    static ref COMPACT_HOOK_EXECUTION_MS: Family<Vec<(&'static str, String)>, Histogram> =
+        register_histogram_family_in_milliseconds("compact_hook_execution_ms");
+    static ref COMPACT_HOOK_COMPACTION_MS: Family<Vec<(&'static str, String)>, Histogram> =
+        register_histogram_family_in_milliseconds("compact_hook_compaction_ms");
+}
 
 // the time used in executing the main operation  (replace-into, copy-into, etc)
 // metrics names with pattern `compact_hook_{operation_name}_time_execution_ms`
 pub fn metrics_inc_compact_hook_main_operation_time_ms(operation_name: &str, c: u64) {
-    increment_gauge!(
-        format!("compact_hook_{}_time_execution_ms", operation_name),
-        c as f64
-    );
+    let labels = &vec![("operation", operation_name.to_string())];
+    COMPACT_HOOK_EXECUTION_MS
+        .get_or_create(labels)
+        .observe(c as f64);
 }
 
 // the time used in executing the compaction
 // metrics names with pattern `compact_hook_{operation_name}_time_compaction_ms`
 pub fn metrics_inc_compact_hook_compact_time_ms(operation_name: &str, c: u64) {
-    increment_gauge!(
-        format!("compact_hook_{}_time_compaction_ms", operation_name),
-        c as f64
-    );
+    let labels = &vec![("operation", operation_name.to_string())];
+    COMPACT_HOOK_COMPACTION_MS
+        .get_or_create(labels)
+        .observe(c as f64);
 }
diff --git a/src/query/service/src/interpreters/interpreter_metrics.rs b/src/query/service/src/interpreters/interpreter_metrics.rs
index 281cd4fbc03d..0ea223831876 100644
--- a/src/query/service/src/interpreters/interpreter_metrics.rs
+++ b/src/query/service/src/interpreters/interpreter_metrics.rs
@@ -18,32 +18,74 @@ use std::time::UNIX_EPOCH;
 
 use common_config::GlobalConfig;
 use common_exception::ErrorCode;
-use common_metrics::label_counter_with_val_and_labels;
-use common_metrics::label_histogram_with_val;
+use common_metrics::register_counter_family;
+use common_metrics::register_histogram_family_in_milliseconds;
+use common_metrics::Counter;
+use common_metrics::Family;
+use common_metrics::Histogram;
+use common_metrics::VecLabels;
+use lazy_static::lazy_static;
 
 use crate::sessions::QueryContext;
 use crate::sessions::TableContext;
 
 pub struct InterpreterMetrics;
 
-const QUERY_START: &str = "query_start";
-const QUERY_ERROR: &str = "query_error";
-const QUERY_SUCCESS: &str = "query_success";
-const QUERY_FAILED: &str = "query_failed";
-
-const QUERY_DURATION_MS: &str = "query_duration_ms";
-const QUERY_WRITE_ROWS: &str = "query_write_rows";
-const QUERY_WRITE_BYTES: &str = "query_write_bytes";
-const QUERY_WRITE_IO_BYTES: &str = "query_write_io_bytes";
-const QUERY_WRITE_IO_BYTES_COST_MS: &str = "query_write_io_bytes_cost_ms";
-const QUERY_SCAN_ROWS: &str = "query_scan_rows";
-const QUERY_SCAN_BYTES: &str = "query_scan_bytes";
-const QUERY_SCAN_IO_BYTES: &str = "query_scan_io_bytes";
-const QUERY_SCAN_IO_BYTES_COST_MS: &str = "query_scan_io_bytes_cost_ms";
-const QUERY_SCAN_PARTITIONS: &str = "query_scan_partitions";
-const QUERY_TOTAL_PARTITIONS: &str = "query_total_partitions";
-const QUERY_RESULT_ROWS: &str = "query_result_rows";
-const QUERY_RESULT_BYTES: &str = "query_result_bytes";
+const METRIC_QUERY_START: &str = "query_start";
+const METRIC_QUERY_ERROR: &str = "query_error";
+const METRIC_QUERY_SUCCESS: &str = "query_success";
+const METRIC_QUERY_FAILED: &str = "query_failed";
+
+const METRIC_QUERY_DURATION_MS: &str = "query_duration_ms";
+const METRIC_QUERY_WRITE_ROWS: &str = "query_write_rows";
+const METRIC_QUERY_WRITE_BYTES: &str = "query_write_bytes";
+const METRIC_QUERY_WRITE_IO_BYTES: &str = "query_write_io_bytes";
+const METRIC_QUERY_WRITE_IO_BYTES_COST_MS: &str = "query_write_io_bytes_cost_ms";
+const METRIC_QUERY_SCAN_ROWS: &str = "query_scan_rows";
+const METRIC_QUERY_SCAN_BYTES: &str = "query_scan_bytes";
+const METRIC_QUERY_SCAN_IO_BYTES: &str = "query_scan_io_bytes";
+const METRIC_QUERY_SCAN_IO_BYTES_COST_MS: &str = "query_scan_io_bytes_cost_ms";
+const METRIC_QUERY_SCAN_PARTITIONS: &str = "query_scan_partitions";
+const METRIC_QUERY_TOTAL_PARTITIONS: &str = "query_total_partitions";
+const METRIC_QUERY_RESULT_ROWS: &str = "query_result_rows";
+const METRIC_QUERY_RESULT_BYTES: &str = "query_result_bytes";
+
+lazy_static! {
+    static ref QUERY_START: Family<VecLabels, Counter> =
+        register_counter_family(METRIC_QUERY_START);
+    static ref QUERY_ERROR: Family<VecLabels, Counter> =
+        register_counter_family(METRIC_QUERY_ERROR);
+    static ref QUERY_SUCCESS: Family<VecLabels, Counter> =
+        register_counter_family(METRIC_QUERY_SUCCESS);
+    static ref QUERY_FAILED: Family<VecLabels, Counter> =
+        register_counter_family(METRIC_QUERY_FAILED);
+    static ref QUERY_DURATION_MS: Family<VecLabels, Histogram> =
+        register_histogram_family_in_milliseconds(METRIC_QUERY_DURATION_MS);
+    static ref QUERY_WRITE_ROWS: Family<VecLabels, Counter> =
+        register_counter_family(METRIC_QUERY_WRITE_ROWS);
+    static ref QUERY_WRITE_BYTES: Family<VecLabels, Counter> =
+        register_counter_family(METRIC_QUERY_WRITE_BYTES);
+    static ref QUERY_WRITE_IO_BYTES: Family<VecLabels, Counter> =
+        register_counter_family(METRIC_QUERY_WRITE_IO_BYTES);
+    static ref QUERY_WRITE_IO_BYTES_COST_MS: Family<VecLabels, Histogram> =
+        register_histogram_family_in_milliseconds(METRIC_QUERY_WRITE_IO_BYTES_COST_MS);
+    static ref QUERY_SCAN_ROWS: Family<VecLabels, Counter> =
+        register_counter_family(METRIC_QUERY_SCAN_ROWS);
+    static ref QUERY_SCAN_BYTES: Family<VecLabels, Counter> =
+        register_counter_family(METRIC_QUERY_SCAN_BYTES);
+    static ref QUERY_SCAN_IO_BYTES: Family<VecLabels, Counter> =
+        register_counter_family(METRIC_QUERY_SCAN_IO_BYTES);
+    static ref QUERY_SCAN_IO_BYTES_COST_MS: Family<VecLabels, Histogram> =
+        register_histogram_family_in_milliseconds(METRIC_QUERY_SCAN_IO_BYTES_COST_MS);
+    static ref QUERY_SCAN_PARTITIONS: Family<VecLabels, Counter> =
+        register_counter_family(METRIC_QUERY_SCAN_PARTITIONS);
+    static ref QUERY_TOTAL_PARTITIONS: Family<VecLabels, Counter> =
+        register_counter_family(METRIC_QUERY_TOTAL_PARTITIONS);
+    static ref QUERY_RESULT_ROWS: Family<VecLabels, Counter> =
+        register_counter_family(METRIC_QUERY_RESULT_ROWS);
+    static ref QUERY_RESULT_BYTES: Family<VecLabels, Counter> =
+        register_counter_family(METRIC_QUERY_RESULT_BYTES);
+}
 
 const LABEL_HANDLER: &str = "handler";
 const LABEL_KIND: &str = "kind";
@@ -89,39 +131,50 @@ impl InterpreterMetrics {
         let result_rows = ctx.get_result_progress_value().rows as u64;
         let result_bytes = ctx.get_result_progress_value().bytes as u64;
 
-        label_histogram_with_val(QUERY_DURATION_MS, labels, query_duration_ms);
+        QUERY_DURATION_MS
+            .get_or_create(labels)
+            .observe(query_duration_ms);
+
+        QUERY_WRITE_ROWS.get_or_create(labels).inc_by(written_rows);
+        QUERY_WRITE_BYTES
+            .get_or_create(labels)
+            .inc_by(written_bytes);
+        QUERY_WRITE_IO_BYTES
+            .get_or_create(labels)
+            .inc_by(written_io_bytes);
 
-        label_counter_with_val_and_labels(QUERY_WRITE_ROWS, labels, written_rows);
-        label_counter_with_val_and_labels(QUERY_WRITE_BYTES, labels, written_bytes);
-        label_counter_with_val_and_labels(QUERY_WRITE_IO_BYTES, labels, written_io_bytes);
         if written_io_bytes_cost_ms > 0 {
-            label_histogram_with_val(
-                QUERY_WRITE_IO_BYTES_COST_MS,
-                labels,
-                written_io_bytes_cost_ms as f64,
-            );
+            QUERY_WRITE_IO_BYTES_COST_MS
+                .get_or_create(labels)
+                .observe(written_io_bytes_cost_ms as f64);
         }
 
-        label_counter_with_val_and_labels(QUERY_SCAN_ROWS, labels, scan_rows);
-        label_counter_with_val_and_labels(QUERY_SCAN_BYTES, labels, scan_bytes);
-        label_counter_with_val_and_labels(QUERY_SCAN_IO_BYTES, labels, scan_io_bytes);
+        QUERY_SCAN_ROWS.get_or_create(labels).inc_by(scan_rows);
+        QUERY_SCAN_BYTES.get_or_create(labels).inc_by(scan_bytes);
+        QUERY_SCAN_IO_BYTES
+            .get_or_create(labels)
+            .inc_by(scan_io_bytes);
         if scan_io_bytes_cost_ms > 0 {
-            label_histogram_with_val(
-                QUERY_SCAN_IO_BYTES_COST_MS,
-                labels,
-                scan_io_bytes_cost_ms as f64,
-            );
+            QUERY_SCAN_IO_BYTES_COST_MS
+                .get_or_create(labels)
+                .observe(scan_io_bytes_cost_ms as f64);
         }
 
-        label_counter_with_val_and_labels(QUERY_SCAN_PARTITIONS, labels, scan_partitions);
-        label_counter_with_val_and_labels(QUERY_TOTAL_PARTITIONS, labels, total_partitions);
-        label_counter_with_val_and_labels(QUERY_RESULT_ROWS, labels, result_rows);
-        label_counter_with_val_and_labels(QUERY_RESULT_BYTES, labels, result_bytes);
+        QUERY_SCAN_PARTITIONS
+            .get_or_create(labels)
+            .inc_by(scan_partitions);
+        QUERY_TOTAL_PARTITIONS
+            .get_or_create(labels)
+            .inc_by(total_partitions);
+        QUERY_RESULT_ROWS.get_or_create(labels).inc_by(result_rows);
+        QUERY_RESULT_BYTES
+            .get_or_create(labels)
+            .inc_by(result_bytes);
     }
 
     pub fn record_query_start(ctx: &QueryContext) {
         let labels = Self::common_labels(ctx);
-        label_counter_with_val_and_labels(QUERY_START, &labels, 1);
+        QUERY_START.get_or_create(&labels).inc();
     }
 
     pub fn record_query_finished(ctx: &QueryContext, err: Option<ErrorCode>) {
@@ -129,18 +182,18 @@ impl InterpreterMetrics {
         Self::record_query_detail(ctx, &labels);
         match err {
             None => {
-                label_counter_with_val_and_labels(QUERY_SUCCESS, &labels, 1);
+                QUERY_SUCCESS.get_or_create(&labels).inc();
             }
             Some(err) => {
                 labels.push((LABEL_CODE, err.code().to_string()));
-                label_counter_with_val_and_labels(QUERY_FAILED, &labels, 1);
+                QUERY_FAILED.get_or_create(&labels).inc();
             }
         };
     }
 
     pub fn record_query_error(ctx: &QueryContext) {
         let labels = Self::common_labels(ctx);
-        label_counter_with_val_and_labels(QUERY_ERROR, &labels, 1);
+        QUERY_ERROR.get_or_create(&labels).inc();
     }
 }
 
diff --git a/src/query/service/src/metrics/metric_service.rs b/src/query/service/src/metrics/metric_service.rs
index b9a4d693b0f5..ff5abe1928be 100644
--- a/src/query/service/src/metrics/metric_service.rs
+++ b/src/query/service/src/metrics/metric_service.rs
@@ -18,9 +18,8 @@ use std::time::Duration;
 use common_exception::ErrorCode;
 use common_http::HttpError;
 use common_http::HttpShutdownHandler;
-use common_metrics::PrometheusHandle;
-use poem::web::Data;
-use poem::EndpointExt;
+use common_metrics::load_global_prometheus_registry;
+use common_metrics::render_prometheus_metrics;
 use poem::IntoResponse;
 
 use crate::servers::Server;
@@ -32,8 +31,9 @@ pub struct MetricService {
 #[allow(clippy::let_with_type_underscore)]
 #[poem::handler]
 #[async_backtrace::framed]
-pub async fn metric_handler(prom_extension: Data<&PrometheusHandle>) -> impl IntoResponse {
-    prom_extension.0.render()
+pub async fn metrics_handler() -> impl IntoResponse {
+    let registry = load_global_prometheus_registry();
+    render_prometheus_metrics(&registry)
 }
 
 impl MetricService {
@@ -46,11 +46,7 @@ impl MetricService {
 
     #[async_backtrace::framed]
     async fn start_without_tls(&mut self, listening: SocketAddr) -> Result<SocketAddr, HttpError> {
-        let prometheus_handle = common_metrics::try_handle().unwrap();
-
-        let app = poem::Route::new()
-            .at("/metrics", poem::get(metric_handler))
-            .data(prometheus_handle);
+        let app = poem::Route::new().at("/metrics", poem::get(metrics_handler));
         let addr = self
             .shutdown_handler
             .start_service(listening, None, app, Some(Duration::from_millis(100)))
diff --git a/src/query/service/src/pipelines/processors/transforms/metrics/transform_metrics.rs b/src/query/service/src/pipelines/processors/transforms/metrics/transform_metrics.rs
index bb4119308b87..b0ffbdb3cd5d 100644
--- a/src/query/service/src/pipelines/processors/transforms/metrics/transform_metrics.rs
+++ b/src/query/service/src/pipelines/processors/transforms/metrics/transform_metrics.rs
@@ -12,7 +12,14 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-use metrics::increment_gauge;
+use common_metrics::register_counter;
+use common_metrics::register_counter_family;
+use common_metrics::register_histogram_family_in_milliseconds;
+use common_metrics::Counter;
+use common_metrics::Family;
+use common_metrics::Histogram;
+use common_metrics::VecLabels;
+use lazy_static::lazy_static;
 
 macro_rules! key {
     ($key: literal) => {
@@ -20,67 +27,105 @@ macro_rules! key {
     };
 }
 
+lazy_static! {
+    static ref AGGREGATE_PARTIAL_SPILL_CELL_COUNT: Counter =
+        register_counter(key!("aggregate_partial_spill_cell_count"));
+    static ref AGGREGATE_PARTIAL_HASHTABLE_ALLOCATED_BYTES: Counter =
+        register_counter(key!("aggregate_partial_hashtable_allocated_bytes"));
+    static ref SPILL_COUNT: Family<VecLabels, Counter> =
+        register_counter_family(key!("spill_count"));
+    static ref SPILL_WRITE_COUNT: Family<VecLabels, Counter> =
+        register_counter_family(key!("spill_write_count"));
+    static ref SPILL_WRITE_BYTES: Family<VecLabels, Counter> =
+        register_counter_family(key!("spill_write_bytes"));
+    static ref SPILL_WRITE_MILLISECONDS: Family<VecLabels, Histogram> =
+        register_histogram_family_in_milliseconds(key!("spill_write_milliseconds"));
+    static ref SPILL_READ_COUNT: Family<VecLabels, Counter> =
+        register_counter_family(key!("spill_read_count"));
+    static ref SPILL_READ_BYTES: Family<VecLabels, Counter> =
+        register_counter_family(key!("spill_read_bytes"));
+    static ref SPILL_READ_MILLISECONDS: Family<VecLabels, Histogram> =
+        register_histogram_family_in_milliseconds(key!("spill_read_milliseconds"));
+    static ref SPILL_DATA_DESERIALIZE_MILLISECONDS: Family<VecLabels, Histogram> =
+        register_histogram_family_in_milliseconds(key!("spill_data_deserialize_milliseconds"));
+    static ref SPILL_DATA_SERIALIZE_MILLISECONDS: Family<VecLabels, Histogram> =
+        register_histogram_family_in_milliseconds(key!("spill_data_serialize_milliseconds"));
+}
+
 pub fn metrics_inc_aggregate_partial_spill_count() {
-    increment_gauge!(key!("aggregate_partial_spill_count"), 1_f64);
+    let labels = &vec![("spill", "aggregate_partial_spill".to_string())];
+    SPILL_COUNT.get_or_create(labels).inc();
 }
 
 pub fn metrics_inc_aggregate_partial_spill_cell_count(c: u64) {
-    increment_gauge!(key!("aggregate_partial_spill_cell_count"), c as f64);
+    AGGREGATE_PARTIAL_SPILL_CELL_COUNT.inc_by(c);
 }
 
 pub fn metrics_inc_aggregate_partial_hashtable_allocated_bytes(c: u64) {
-    increment_gauge!(
-        key!("aggregate_partial_hashtable_allocated_bytes"),
-        c as f64
-    );
+    AGGREGATE_PARTIAL_HASHTABLE_ALLOCATED_BYTES.inc_by(c);
 }
 
 pub fn metrics_inc_group_by_spill_write_count() {
-    increment_gauge!(key!("group_by_spill_write_count"), 1_f64);
+    let labels = &vec![("spill", "group_by_spill".to_string())];
+    SPILL_WRITE_COUNT.get_or_create(labels).inc();
 }
 
 pub fn metrics_inc_group_by_spill_write_bytes(c: u64) {
-    increment_gauge!(key!("group_by_spill_write_bytes"), c as f64);
+    let labels = &vec![("spill", "group_by_spill".to_string())];
+    SPILL_WRITE_BYTES.get_or_create(labels).inc_by(c);
 }
 
 pub fn metrics_inc_group_by_spill_write_milliseconds(c: u64) {
-    increment_gauge!(key!("group_by_spill_write_milliseconds"), c as f64);
+    let labels = &vec![("spill", "group_by_spill".to_string())];
+    SPILL_WRITE_MILLISECONDS
+        .get_or_create(labels)
+        .observe(c as f64)
 }
 
 pub fn metrics_inc_aggregate_spill_write_count() {
-    increment_gauge!(key!("aggregate_spill_write_count"), 1_f64);
+    let labels = &vec![("spill", "aggregate_spill".to_string())];
+    SPILL_WRITE_COUNT.get_or_create(labels).inc();
 }
 
 pub fn metrics_inc_aggregate_spill_write_bytes(c: u64) {
-    increment_gauge!(key!("aggregate_spill_write_bytes"), c as f64);
+    let labels = &vec![("spill", "aggregate_spill".to_string())];
+    SPILL_WRITE_BYTES.get_or_create(labels).inc_by(c);
 }
 
 pub fn metrics_inc_aggregate_spill_write_milliseconds(c: u64) {
-    increment_gauge!(key!("aggregate_spill_write_milliseconds"), c as f64);
+    let labels = &vec![("spill", "aggregate_spill".to_string())];
+    SPILL_WRITE_MILLISECONDS
+        .get_or_create(labels)
+        .observe(c as f64);
 }
 
 pub fn metrics_inc_aggregate_spill_read_count() {
-    increment_gauge!(key!("aggregate_spill_read_count"), 1_f64);
+    let labels = &vec![("spill", "aggregate_spill".to_string())];
+    SPILL_READ_COUNT.get_or_create(labels).inc();
 }
 
 pub fn metrics_inc_aggregate_spill_read_bytes(c: u64) {
-    increment_gauge!(key!("aggregate_spill_read_bytes"), c as f64);
+    let labels = &vec![("spill", "aggregate_spill".to_string())];
+    SPILL_READ_BYTES.get_or_create(labels).inc_by(c);
 }
 
 pub fn metrics_inc_aggregate_spill_read_milliseconds(c: u64) {
-    increment_gauge!(key!("aggregate_spill_read_milliseconds"), c as f64);
+    let labels = &vec![("spill", "aggregate_spill".to_string())];
+    SPILL_READ_MILLISECONDS
+        .get_or_create(labels)
+        .observe(c as f64);
 }
 
 pub fn metrics_inc_aggregate_spill_data_serialize_milliseconds(c: u64) {
-    increment_gauge!(
-        key!("aggregate_spill_data_serialize_milliseconds"),
-        c as f64
-    );
+    let labels = &vec![("spill", "aggregate_spill".to_string())];
+    SPILL_DATA_SERIALIZE_MILLISECONDS
+        .get_or_create(labels)
+        .observe(c as f64);
 }
 
 pub fn metrics_inc_aggregate_spill_data_deserialize_milliseconds(c: u64) {
-    increment_gauge!(
-        key!("aggregate_spill_data_deserialize_milliseconds"),
-        c as f64
-    );
+    let labels = &vec![("spill", "aggregate_spill".to_string())];
+    SPILL_DATA_DESERIALIZE_MILLISECONDS
+        .get_or_create(labels)
+        .observe(c as f64);
 }
diff --git a/src/query/service/src/servers/http/metrics.rs b/src/query/service/src/servers/http/metrics.rs
index 4bb902e2e5f6..c51a22491363 100644
--- a/src/query/service/src/servers/http/metrics.rs
+++ b/src/query/service/src/servers/http/metrics.rs
@@ -12,23 +12,41 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-use metrics::counter;
+use common_metrics::register_counter;
+use common_metrics::register_counter_family;
+use common_metrics::Counter;
+use common_metrics::Family;
+use common_metrics::VecLabels;
+use lazy_static::lazy_static;
+
+lazy_static! {
+    static ref QUERY_HTTP_REQUESTS_COUNT: Family<VecLabels, Counter> =
+        register_counter_family("query_http_requests_count");
+    static ref QUERY_HTTP_SLOW_REQUESTS_COUNT: Family<VecLabels, Counter> =
+        register_counter_family("query_http_slow_requests_count");
+    static ref QUERY_HTTP_RESPONSE_ERRORS_COUNT: Family<VecLabels, Counter> =
+        register_counter_family("query_http_response_errors_count");
+    static ref QUERY_HTTP_RESPONSE_PANICS_COUNT: Counter =
+        register_counter("query_http_response_panics_count");
+}
 
 pub fn metrics_incr_http_request_count(method: String, api: String, status: String) {
-    let labels = [("method", method), ("api", api), ("status", status)];
-    counter!("query_http_requests_count", 1, &labels);
+    let labels = vec![("method", method), ("api", api), ("status", status)];
+    QUERY_HTTP_REQUESTS_COUNT.get_or_create(&labels).inc();
 }
 
 pub fn metrics_incr_http_slow_request_count(method: String, api: String, status: String) {
-    let labels = [("method", method), ("api", api), ("status", status)];
-    counter!("query_http_slow_requests_count", 1, &labels);
+    let labels = vec![("method", method), ("api", api), ("status", status)];
+    QUERY_HTTP_SLOW_REQUESTS_COUNT.get_or_create(&labels).inc();
 }
 
 pub fn metrics_incr_http_response_errors_count(err: String, code: u16) {
-    let labels = [("err", err), ("code", code.to_string())];
-    counter!("query_http_response_errors_count", 1, &labels);
+    let labels = vec![("err", err), ("code", code.to_string())];
+    QUERY_HTTP_RESPONSE_ERRORS_COUNT
+        .get_or_create(&labels)
+        .inc();
 }
 
 pub fn metrics_incr_http_response_panics_count() {
-    counter!("query_http_response_panics_count", 1);
+    QUERY_HTTP_RESPONSE_PANICS_COUNT.inc();
 }
diff --git a/src/query/service/src/servers/mysql/mysql_interactive_worker.rs b/src/query/service/src/servers/mysql/mysql_interactive_worker.rs
index a87f9f485c1a..559309296df8 100644
--- a/src/query/service/src/servers/mysql/mysql_interactive_worker.rs
+++ b/src/query/service/src/servers/mysql/mysql_interactive_worker.rs
@@ -35,7 +35,6 @@ use common_users::UserApiProvider;
 use futures_util::StreamExt;
 use log::error;
 use log::info;
-use metrics::histogram;
 use minitrace::prelude::*;
 use opensrv_mysql::AsyncMysqlShim;
 use opensrv_mysql::ErrorKind;
@@ -48,6 +47,7 @@ use rand::RngCore;
 use crate::interpreters::Interpreter;
 use crate::interpreters::InterpreterFactory;
 use crate::interpreters::InterpreterQueryLog;
+use crate::servers::mysql::mysql_metrics;
 use crate::servers::mysql::writers::DFInitResultWriter;
 use crate::servers::mysql::writers::DFQueryResultWriter;
 use crate::servers::mysql::writers::ProgressReporter;
@@ -217,11 +217,7 @@ impl<W: AsyncWrite + Send + Sync + Unpin> AsyncMysqlShim<W> for InteractiveWorke
             let suffix = format!("(while in query {})", query);
             write_result = Err(cause.add_message_back(suffix));
         }
-
-        histogram!(
-            super::mysql_metrics::METRIC_MYSQL_PROCESSOR_REQUEST_DURATION,
-            instant.elapsed()
-        );
+        mysql_metrics::observe_mysql_process_request_duration(instant.elapsed());
 
         write_result
     }
@@ -397,10 +393,7 @@ impl InteractiveWorkerBase {
                 let ctx = context.clone();
                 async move {
                     let mut data_stream = interpreter.execute(ctx.clone()).await?;
-                    histogram!(
-                        super::mysql_metrics::METRIC_INTERPRETER_USEDTIME,
-                        instant.elapsed()
-                    );
+                    mysql_metrics::observe_mysql_interpreter_used_time(instant.elapsed());
 
                     // Wrap the data stream, log finish event at the end of stream
                     let intercepted_stream = async_stream::stream! {
diff --git a/src/query/service/src/servers/mysql/mysql_metrics.rs b/src/query/service/src/servers/mysql/mysql_metrics.rs
index 29209e1c2d7d..4ed12d9c7276 100644
--- a/src/query/service/src/servers/mysql/mysql_metrics.rs
+++ b/src/query/service/src/servers/mysql/mysql_metrics.rs
@@ -12,5 +12,23 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-pub static METRIC_MYSQL_PROCESSOR_REQUEST_DURATION: &str = "mysql.process_request_duration";
-pub static METRIC_INTERPRETER_USEDTIME: &str = "interpreter.usedtime";
+use std::time::Duration;
+
+use common_metrics::register_histogram_in_milliseconds;
+use common_metrics::Histogram;
+use lazy_static::lazy_static;
+
+lazy_static! {
+    static ref MYSQL_PROCESSOR_REQUEST_DURATION: Histogram =
+        register_histogram_in_milliseconds("mysql_process_request_duration_ms");
+    static ref MYSQL_INTERPRETER_USEDTIME: Histogram =
+        register_histogram_in_milliseconds("mysql_interpreter_usedtime_ms");
+}
+
+pub fn observe_mysql_process_request_duration(duration: Duration) {
+    MYSQL_PROCESSOR_REQUEST_DURATION.observe(duration.as_millis() as f64);
+}
+
+pub fn observe_mysql_interpreter_used_time(duration: Duration) {
+    MYSQL_INTERPRETER_USEDTIME.observe(duration.as_millis() as f64);
+}
diff --git a/src/query/service/src/sessions/mod.rs b/src/query/service/src/sessions/mod.rs
index 84d3d238224e..cf78ad82f172 100644
--- a/src/query/service/src/sessions/mod.rs
+++ b/src/query/service/src/sessions/mod.rs
@@ -18,6 +18,7 @@ mod query_ctx_shared;
 mod session;
 mod session_ctx;
 mod session_info;
+mod session_metrics;
 mod session_mgr;
 mod session_mgr_status;
 mod session_status;
diff --git a/src/query/service/src/sessions/session_metrics.rs b/src/query/service/src/sessions/session_metrics.rs
new file mode 100644
index 000000000000..0c0d004c1a81
--- /dev/null
+++ b/src/query/service/src/sessions/session_metrics.rs
@@ -0,0 +1,37 @@
+// Copyright 2021 Datafuse Labs
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use common_metrics::register_counter;
+use common_metrics::register_gauge;
+use common_metrics::Counter;
+use common_metrics::Gauge;
+use lazy_static::lazy_static;
+
+lazy_static! {
+    static ref SESSION_CONNECT_NUMBERS: Counter = register_counter("session_connect_numbers");
+    static ref SESSION_CLOSE_NUMBERS: Counter = register_counter("session_close_numbers");
+    static ref SESSION_ACTIVE_CONNECTIONS: Gauge = register_gauge("session_connections");
+}
+
+pub fn incr_session_connect_numbers() {
+    SESSION_CONNECT_NUMBERS.inc();
+}
+
+pub fn incr_session_close_numbers() {
+    SESSION_CLOSE_NUMBERS.inc();
+}
+
+pub fn set_session_active_connections(num: usize) {
+    SESSION_ACTIVE_CONNECTIONS.set(num as i64);
+}
diff --git a/src/query/service/src/sessions/session_mgr.rs b/src/query/service/src/sessions/session_mgr.rs
index eb0353b0f41d..b698d78fd74a 100644
--- a/src/query/service/src/sessions/session_mgr.rs
+++ b/src/query/service/src/sessions/session_mgr.rs
@@ -29,8 +29,6 @@ use common_config::GlobalConfig;
 use common_config::InnerConfig;
 use common_exception::ErrorCode;
 use common_exception::Result;
-use common_metrics::label_counter;
-use common_metrics::label_gauge;
 use common_settings::Settings;
 use futures::future::Either;
 use futures::StreamExt;
@@ -38,15 +36,12 @@ use log::info;
 use parking_lot::RwLock;
 
 use crate::sessions::session::Session;
+use crate::sessions::session_metrics;
 use crate::sessions::ProcessInfo;
 use crate::sessions::SessionContext;
 use crate::sessions::SessionManagerStatus;
 use crate::sessions::SessionType;
 
-static METRIC_SESSION_CONNECT_NUMBERS: &str = "session_connect_numbers";
-static METRIC_SESSION_CLOSE_NUMBERS: &str = "session_close_numbers";
-static METRIC_SESSION_ACTIVE_CONNECTIONS: &str = "session_connections";
-
 pub struct SessionManager {
     pub(in crate::sessions) max_sessions: usize,
     pub(in crate::sessions) active_sessions: Arc<RwLock<HashMap<String, Weak<Session>>>>,
@@ -134,18 +129,8 @@ impl SessionManager {
             self.validate_max_active_sessions(sessions.len(), "active sessions")?;
         }
 
-        let config = GlobalConfig::instance();
-        label_counter(
-            METRIC_SESSION_CONNECT_NUMBERS,
-            &config.query.tenant_id,
-            &config.query.cluster_id,
-        );
-        label_gauge(
-            METRIC_SESSION_ACTIVE_CONNECTIONS,
-            sessions.len() as f64,
-            &config.query.tenant_id,
-            &config.query.cluster_id,
-        );
+        session_metrics::incr_session_connect_numbers();
+        session_metrics::set_session_active_connections(sessions.len());
 
         if !matches!(typ, SessionType::FlightRPC) {
             sessions.insert(session.get_id(), Arc::downgrade(&session));
@@ -171,13 +156,6 @@ impl SessionManager {
     }
 
     pub fn destroy_session(&self, session_id: &String) {
-        let config = GlobalConfig::instance();
-        label_counter(
-            METRIC_SESSION_CLOSE_NUMBERS,
-            &config.query.tenant_id,
-            &config.query.cluster_id,
-        );
-
         // stop tracking session
         {
             // Make sure this write lock has been released before dropping.
@@ -193,6 +171,13 @@ impl SessionManager {
                 mysql_conns_map.remove(&k);
             }
         }
+
+        let sessions_count = {
+            let sessions = self.active_sessions.read();
+            sessions.len()
+        };
+        session_metrics::incr_session_close_numbers();
+        session_metrics::set_session_active_connections(sessions_count);
     }
 
     pub fn graceful_shutdown(
diff --git a/src/query/service/tests/it/metrics.rs b/src/query/service/tests/it/metrics.rs
index 520ea6808f79..abf2d235a6ba 100644
--- a/src/query/service/tests/it/metrics.rs
+++ b/src/query/service/tests/it/metrics.rs
@@ -15,16 +15,12 @@
 use std::net::SocketAddr;
 
 use common_base::base::tokio;
-use common_metrics::init_default_metrics_recorder;
+use common_metrics::register_counter;
 use databend_query::metrics::MetricService;
 use databend_query::servers::Server;
-use metrics::counter;
-
-pub static METRIC_TEST: &str = "metrics.test";
 
 #[tokio::test(flavor = "multi_thread")]
 async fn test_metric_server() -> common_exception::Result<()> {
-    init_default_metrics_recorder();
     let mut service = MetricService::create();
     let listening = "127.0.0.1:0".parse::<SocketAddr>()?;
     let listening = service.start(listening).await?;
@@ -34,15 +30,20 @@ async fn test_metric_server() -> common_exception::Result<()> {
     assert!(resp.is_ok());
     let resp = resp.unwrap();
     assert!(resp.status().is_success());
-    assert_eq!(resp.text().await.unwrap().find("metrics_test 1"), None);
-    counter!(METRIC_TEST, 1);
+    assert_eq!(
+        resp.text().await.unwrap().find("unit_test_counter_total 1"),
+        None
+    );
+
+    let test_counter = register_counter("unit_test_counter");
+    test_counter.inc();
 
     let resp = client.get(url).send().await;
     assert!(resp.is_ok());
     let resp = resp.unwrap();
     assert!(resp.status().is_success());
     let output = resp.text().await.unwrap();
-    assert!(output.contains("metrics_test 1"));
+    assert!(output.contains("unit_test_counter_total 1"));
 
     Ok(())
 }
diff --git a/src/query/service/tests/it/storages/system.rs b/src/query/service/tests/it/storages/system.rs
index 7dbddeea262c..dd767dc8ef89 100644
--- a/src/query/service/tests/it/storages/system.rs
+++ b/src/query/service/tests/it/storages/system.rs
@@ -28,7 +28,6 @@ use common_meta_app::principal::UserOption;
 use common_meta_app::principal::UserQuota;
 use common_meta_app::storage::StorageParams;
 use common_meta_app::storage::StorageS3Config;
-use common_metrics::init_default_metrics_recorder;
 use common_sql::executor::table_read_plan::ToReadDataSourcePlan;
 use common_storages_system::BuildOptionsTable;
 use common_storages_system::CachesTable;
@@ -268,14 +267,15 @@ async fn test_functions_table() -> Result<()> {
 
 #[tokio::test(flavor = "multi_thread")]
 async fn test_metrics_table() -> Result<()> {
-    init_default_metrics_recorder();
     let (_guard, ctx) = databend_query::test_kits::create_query_context().await?;
     let table = MetricsTable::create(1);
     let source_plan = table.read_plan(ctx.clone(), None, true).await?;
+    let counter1 = common_metrics::register_counter("test_metrics_table_count");
+    let histogram1 =
+        common_metrics::register_histogram_in_milliseconds("test_metrics_table_histogram");
 
-    metrics::counter!("test.test_metrics_table_count", 1);
-    #[cfg(feature = "enable_histogram")]
-    metrics::histogram!("test.test_metrics_table_histogram", 1.0);
+    counter1.inc();
+    histogram1.observe(2.0);
 
     let stream = table.read_data_block_stream(ctx, &source_plan).await?;
     let result = stream.try_collect::<Vec<_>>().await?;
@@ -284,9 +284,8 @@ async fn test_metrics_table() -> Result<()> {
     assert!(block.num_rows() >= 1);
 
     let output = pretty_format_blocks(result.as_slice())?;
-    assert!(output.contains("test_test_metrics_table_count"));
-    #[cfg(feature = "enable_histogram")]
-    assert!(output.contains("test_test_metrics_table_histogram"));
+    assert!(output.contains("test_metrics_table_count"));
+    assert!(output.contains("test_metrics_table_histogram"));
 
     Ok(())
 }
diff --git a/src/query/sharing/src/layer.rs b/src/query/sharing/src/layer.rs
index 2dd407339cde..0bafa2054841 100644
--- a/src/query/sharing/src/layer.rs
+++ b/src/query/sharing/src/layer.rs
@@ -22,7 +22,6 @@ use http::Request;
 use http::Response;
 use http::StatusCode;
 use opendal::layers::LoggingLayer;
-use opendal::layers::MetricsLayer;
 use opendal::layers::MinitraceLayer;
 use opendal::layers::RetryLayer;
 use opendal::raw::new_request_build_error;
@@ -77,12 +76,11 @@ pub fn create_share_table_operator(
             })?
             // Add retry
             .layer(RetryLayer::new().with_jitter())
-            // Add metrics
-            .layer(MetricsLayer)
             // Add logging
             .layer(LoggingLayer::default())
             // Add tracing
             .layer(MinitraceLayer)
+            // TODO(liyz): add PrometheusClientLayer
             .finish()
         }
         None => {
diff --git a/src/query/storages/common/cache/src/metrics.rs b/src/query/storages/common/cache/src/metrics.rs
index 16df38de05ee..76cbf65b2de1 100644
--- a/src/query/storages/common/cache/src/metrics.rs
+++ b/src/query/storages/common/cache/src/metrics.rs
@@ -18,13 +18,8 @@ use common_metrics::Counter;
 use common_metrics::Family;
 use common_metrics::Histogram;
 use lazy_static::lazy_static;
-use metrics::increment_gauge;
 use prometheus_client::encoding::EncodeLabelSet;
 
-fn key_str(cache_name: &str, action: &str) -> String {
-    format!("cache_{cache_name}_{action}")
-}
-
 #[derive(Clone, Debug, EncodeLabelSet, Hash, PartialEq, Eq)]
 struct CacheLabels {
     cache_name: String,
@@ -46,7 +41,6 @@ lazy_static! {
 }
 
 pub fn metrics_inc_cache_access_count(c: u64, cache_name: &str) {
-    increment_gauge!(key_str(cache_name, "access_count"), c as f64);
     CACHE_ACCESS_COUNT
         .get_or_create(&CacheLabels {
             cache_name: cache_name.to_string(),
@@ -56,7 +50,6 @@ pub fn metrics_inc_cache_access_count(c: u64, cache_name: &str) {
 
 pub fn metrics_inc_cache_miss_count(c: u64, cache_name: &str) {
     // increment_gauge!(key!("memory_miss_count"), c as f64);
-    increment_gauge!(key_str(cache_name, "miss_count"), c as f64);
     CACHE_MISS_COUNT
         .get_or_create(&CacheLabels {
             cache_name: cache_name.to_string(),
@@ -66,7 +59,6 @@ pub fn metrics_inc_cache_miss_count(c: u64, cache_name: &str) {
 
 // When cache miss, load time cost.
 pub fn metrics_inc_cache_miss_load_millisecond(c: u64, cache_name: &str) {
-    increment_gauge!(key_str(cache_name, "miss_load_millisecond"), c as f64);
     CACHE_MISS_LOAD_MILLISECOND
         .get_or_create(&CacheLabels {
             cache_name: cache_name.to_string(),
@@ -75,7 +67,6 @@ pub fn metrics_inc_cache_miss_load_millisecond(c: u64, cache_name: &str) {
 }
 
 pub fn metrics_inc_cache_hit_count(c: u64, cache_name: &str) {
-    increment_gauge!(key_str(cache_name, "hit_count"), c as f64);
     CACHE_HIT_COUNT
         .get_or_create(&CacheLabels {
             cache_name: cache_name.to_string(),
@@ -84,7 +75,6 @@ pub fn metrics_inc_cache_hit_count(c: u64, cache_name: &str) {
 }
 
 pub fn metrics_inc_cache_population_pending_count(c: i64, cache_name: &str) {
-    increment_gauge!(key_str(cache_name, "population_pending_count"), c as f64);
     CACHE_POPULATION_PENDING_COUNT
         .get_or_create(&CacheLabels {
             cache_name: cache_name.to_string(),
@@ -93,7 +83,6 @@ pub fn metrics_inc_cache_population_pending_count(c: i64, cache_name: &str) {
 }
 
 pub fn metrics_inc_cache_population_overflow_count(c: i64, cache_name: &str) {
-    increment_gauge!(key_str(cache_name, "population_overflow_count"), c as f64);
     CACHE_POPULATION_OVERFLOW_COUNT
         .get_or_create(&CacheLabels {
             cache_name: cache_name.to_string(),
diff --git a/src/query/storages/fuse/src/metrics/fuse_metrics.rs b/src/query/storages/fuse/src/metrics/fuse_metrics.rs
index 9bff1ead72d8..2f772fec64fc 100644
--- a/src/query/storages/fuse/src/metrics/fuse_metrics.rs
+++ b/src/query/storages/fuse/src/metrics/fuse_metrics.rs
@@ -17,8 +17,6 @@ use common_metrics::register_histogram_in_milliseconds;
 use common_metrics::Counter;
 use common_metrics::Histogram;
 use lazy_static::lazy_static;
-use metrics::counter;
-use metrics::increment_gauge;
 
 macro_rules! key {
     ($key: literal) => {
@@ -137,330 +135,254 @@ lazy_static! {
 }
 
 pub fn metrics_inc_commit_mutation_unresolvable_conflict() {
-    counter!(key!("commit_mutation_unresolvable_conflict"), 1);
     COMMIT_MUTATION_UNRESOLVABLE_CONFLICT.inc();
 }
 
 pub fn metrics_inc_commit_mutation_latest_snapshot_append_only() {
-    counter!(key!("commit_mutation_latest_snapshot_append_only"), 1);
     COMMIT_MUTATION_LATEST_SNAPSHOT_APPEND_ONLY.inc();
 }
 
 pub fn metrics_inc_commit_mutation_modified_segment_exists_in_latest() {
-    counter!(key!("modified_segment_exists_in_latest"), 1);
     COMMIT_MUTATION_MODIFIED_SEGMENT_EXISTS_IN_LATEST.inc();
 }
 
 pub fn metrics_inc_commit_mutation_retry() {
-    counter!(key!("commit_mutation_retry"), 1);
     COMMIT_MUTATION_RETRY.inc();
 }
 
 pub fn metrics_inc_commit_mutation_success() {
-    counter!(key!("commit_mutation_success"), 1);
     COMMIT_MUTATION_SUCCESS.inc();
 }
 
 pub fn metrics_inc_commit_copied_files(n: u64) {
-    counter!(key!("commit_copied_files"), n);
     COMMIT_COPIED_FILES.inc_by(n);
 }
 
 pub fn metrics_inc_commit_milliseconds(c: u128) {
-    increment_gauge!(key!("commit_milliseconds"), c as f64);
     COMMIT_MILLISECONDS.inc_by(c as u64);
 }
 
 pub fn metrics_inc_commit_aborts() {
-    counter!(key!("commit_aborts"), 1);
     COMMIT_ABORTS.inc();
 }
 
 pub fn metrics_inc_remote_io_seeks(c: u64) {
-    increment_gauge!(key!("remote_io_seeks"), c as f64);
     REMOTE_IO_SEEKS.inc_by(c);
 }
 
 pub fn metrics_inc_remote_io_seeks_after_merged(c: u64) {
-    increment_gauge!(key!("remote_io_seeks_after_merged"), c as f64);
     REMOTE_IO_SEEKS_AFTER_MERGED.inc_by(c);
 }
 
 pub fn metrics_inc_remote_io_read_bytes(c: u64) {
-    increment_gauge!(key!("remote_io_read_bytes"), c as f64);
     REMOTE_IO_READ_BYTES.inc_by(c);
 }
 
 pub fn metrics_inc_remote_io_read_bytes_after_merged(c: u64) {
-    increment_gauge!(key!("remote_io_read_bytes_after_merged"), c as f64);
     REMOTE_IO_READ_BYTES_AFTER_MERGED.inc_by(c);
 }
 
 pub fn metrics_inc_remote_io_read_parts(c: u64) {
-    increment_gauge!(key!("remote_io_read_parts"), c as f64);
     REMOTE_IO_READ_PARTS.inc_by(c);
 }
 
 pub fn metrics_inc_remote_io_read_milliseconds(c: u64) {
-    increment_gauge!(key!("remote_io_read_milliseconds"), c as f64);
     REMOTE_IO_READ_MILLISECONDS.observe(c as f64);
 }
 
 pub fn metrics_inc_remote_io_deserialize_milliseconds(c: u64) {
-    increment_gauge!(key!("remote_io_deserialize_milliseconds"), c as f64);
     REMOTE_IO_DESERIALIZE_MILLISECONDS.observe(c as f64);
 }
 
 /// Block metrics.
 pub fn metrics_inc_block_write_nums(c: u64) {
-    increment_gauge!(key!("block_write_nums"), c as f64);
     BLOCK_WRITE_NUMS.inc_by(c);
 }
 
 pub fn metrics_inc_block_write_bytes(c: u64) {
-    increment_gauge!(key!("block_write_bytes"), c as f64);
     BLOCK_WRITE_BYTES.inc_by(c);
 }
 
 pub fn metrics_inc_block_write_milliseconds(c: u64) {
-    increment_gauge!(key!("block_write_milliseconds"), c as f64);
     BLOCK_WRITE_MILLISECONDS.observe(c as f64);
 }
 
 pub fn metrics_inc_block_index_write_nums(c: u64) {
-    increment_gauge!(key!("block_index_write_nums"), c as f64);
     BLOCK_INDEX_WRITE_NUMS.inc_by(c);
 }
 
 pub fn metrics_inc_block_index_write_bytes(c: u64) {
-    increment_gauge!(key!("block_index_write_bytes"), c as f64);
     BLOCK_INDEX_WRITE_BYTES.inc_by(c);
 }
 
 pub fn metrics_inc_block_index_write_milliseconds(c: u64) {
-    increment_gauge!(key!("block_index_write_milliseconds"), c as f64);
     BLOCK_INDEX_WRITE_MILLISECONDS.observe(c as f64);
 }
 
 pub fn metrics_inc_block_index_read_bytes(c: u64) {
-    increment_gauge!(key!("block_index_read_bytes"), c as f64);
     BLOCK_INDEX_READ_BYTES.inc_by(c);
 }
 
 /// Compact metrics.
 pub fn metrics_inc_compact_block_read_nums(c: u64) {
-    increment_gauge!(key!("compact_block_read_nums"), c as f64);
     COMPACT_BLOCK_READ_NUMS.inc_by(c);
 }
 
 pub fn metrics_inc_compact_block_read_bytes(c: u64) {
-    increment_gauge!(key!("compact_block_read_bytes"), c as f64);
     COMPACT_BLOCK_READ_BYTES.inc_by(c);
 }
 
 pub fn metrics_inc_compact_block_read_milliseconds(c: u64) {
-    increment_gauge!(key!("compact_block_read_milliseconds"), c as f64);
     COMPACT_BLOCK_READ_MILLISECONDS.observe(c as f64);
 }
 
 /// Pruning metrics.
 pub fn metrics_inc_segments_range_pruning_before(c: u64) {
-    increment_gauge!(key!("segments_range_pruning_before"), c as f64);
     SEGMENTS_RANGE_PRUNING_BEFORE.inc_by(c);
 }
 
 pub fn metrics_inc_segments_range_pruning_after(c: u64) {
-    increment_gauge!(key!("segments_range_pruning_after"), c as f64);
     SEGMENTS_RANGE_PRUNING_AFTER.inc_by(c);
 }
 
 pub fn metrics_inc_bytes_segment_range_pruning_before(c: u64) {
-    increment_gauge!(key!("bytes_segment_range_pruning_before"), c as f64);
     BYTES_SEGMENT_RANGE_PRUNING_BEFORE.inc_by(c);
 }
 
 pub fn metrics_inc_bytes_segment_range_pruning_after(c: u64) {
-    increment_gauge!(key!("bytes_segment_range_pruning_after"), c as f64);
     BYTES_SEGMENT_RANGE_PRUNING_AFTER.inc_by(c);
 }
 
 pub fn metrics_inc_blocks_range_pruning_before(c: u64) {
-    increment_gauge!(key!("blocks_range_pruning_before"), c as f64);
     BLOCKS_RANGE_PRUNING_BEFORE.inc_by(c);
 }
 
 pub fn metrics_inc_blocks_range_pruning_after(c: u64) {
-    increment_gauge!(key!("blocks_range_pruning_after"), c as f64);
     BLOCKS_RANGE_PRUNING_AFTER.inc_by(c);
 }
 
 pub fn metrics_inc_bytes_block_range_pruning_before(c: u64) {
-    increment_gauge!(key!("bytes_block_range_pruning_before"), c as f64);
     BYTES_BLOCK_BLOOM_PRUNING_BEFORE.inc_by(c);
 }
 
 pub fn metrics_inc_bytes_block_range_pruning_after(c: u64) {
-    increment_gauge!(key!("bytes_block_range_pruning_after"), c as f64);
     BYTES_BLOCK_BLOOM_PRUNING_AFTER.inc_by(c);
 }
 
 pub fn metrics_inc_blocks_bloom_pruning_before(c: u64) {
-    increment_gauge!(key!("blocks_bloom_pruning_before"), c as f64);
     BLOCKS_BLOOM_PRUNING_BEFORE.inc_by(c);
 }
 
 pub fn metrics_inc_blocks_bloom_pruning_after(c: u64) {
-    increment_gauge!(key!("blocks_bloom_pruning_after"), c as f64);
     BLOCKS_BLOOM_PRUNING_AFTER.inc_by(c);
 }
 
 pub fn metrics_inc_bytes_block_bloom_pruning_before(c: u64) {
-    increment_gauge!(key!("bytes_block_bloom_pruning_before"), c as f64);
     BYTES_BLOCK_BLOOM_PRUNING_BEFORE.inc_by(c);
 }
 
 pub fn metrics_inc_bytes_block_bloom_pruning_after(c: u64) {
-    increment_gauge!(key!("bytes_block_bloom_pruning_after"), c as f64);
     BYTES_BLOCK_BLOOM_PRUNING_AFTER.inc_by(c);
 }
 
 pub fn metrics_inc_pruning_prewhere_nums(c: u64) {
-    increment_gauge!(key!("pruning_prewhere_nums"), c as f64);
     PRUNING_PREWHERE_NUMS.inc_by(c);
 }
 
 pub fn metrics_inc_pruning_milliseconds(c: u64) {
-    increment_gauge!(key!("pruning_milliseconds"), c as f64);
     PRUNING_MILLISECONDS.observe(c as f64);
 }
 
 pub fn metrics_inc_deletion_block_range_pruned_nums(c: u64) {
-    increment_gauge!(key!("deletion_block_range_pruned_nums"), c as f64);
     DELETION_BLOCK_RANGE_PRUNED_NUMS.inc_by(c);
 }
 
 pub fn metrics_inc_deletion_segment_range_purned_whole_segment_nums(c: u64) {
-    increment_gauge!(
-        key!("deletion_segment_range_pruned_whole_segment_nums"),
-        c as f64
-    );
     DELETION_SEGMENT_RANGE_PRUNED_WHOLE_SEGMENT_NUMS.inc_by(c);
 }
 
 pub fn metrics_inc_deletion_block_range_pruned_whole_block_nums(c: u64) {
-    increment_gauge!(
-        key!("deletion_block_range_pruned_whole_block_nums"),
-        c as f64
-    );
     DELETION_BLOCK_RANGE_PRUNED_WHOLE_BLOCK_NUMS.inc_by(c);
 }
 
 pub fn metrics_inc_replace_block_number_after_pruning(c: u64) {
-    increment_gauge!(key!("replace_into_block_number_after_pruning"), c as f64);
     REPLACE_INTO_BLOCK_NUMBER_AFTER_PRUNING.inc_by(c);
 }
 
 pub fn metrics_inc_replace_segment_number_after_pruning(c: u64) {
-    increment_gauge!(key!("replace_into_segment_number_after_pruning"), c as f64);
     REPLACE_INTO_SEGMENT_NUMBER_AFTER_PRUNING.inc_by(c);
 }
 
 pub fn metrics_inc_replace_row_number_after_pruning(c: u64) {
-    increment_gauge!(key!("replace_into_row_number_after_pruning"), c as f64);
     REPLACE_INTO_ROW_NUMBER_AFTER_PRUNING.inc_by(c);
 }
 
 pub fn metrics_inc_replace_block_number_totally_loaded(c: u64) {
-    increment_gauge!(key!("replace_into_block_number_totally_loaded"), c as f64);
     REPLACE_INTO_BLOCK_NUMBER_TOTALLY_LOADED.inc_by(c);
 }
 
 pub fn metrics_inc_replace_row_number_write(c: u64) {
-    increment_gauge!(key!("replace_into_row_number_write"), c as f64);
     REPLACE_INTO_ROW_NUMBER_WRITE.inc_by(c);
 }
 
 pub fn metrics_inc_replace_block_number_write(c: u64) {
-    increment_gauge!(key!("replace_into_block_number_write"), c as f64);
     REPLACE_INTO_BLOCK_NUMBER_WRITE.inc_by(c);
 }
 
 pub fn metrics_inc_replace_row_number_totally_loaded(c: u64) {
-    increment_gauge!(key!("replace_into_row_number_totally_loaded"), c as f64);
     REPLACE_INTO_ROW_NUMBER_TOTALLY_LOADED.inc_by(c);
 }
 
 pub fn metrics_inc_replace_whole_block_deletion(c: u64) {
-    increment_gauge!(
-        key!("replace_into_block_number_whole_block_deletion"),
-        c as f64
-    );
     REPLACE_INTO_BLOCK_NUMBER_WHOLE_BLOCK_DELETION.inc_by(c);
 }
 
 pub fn metrics_inc_replace_block_of_zero_row_deleted(c: u64) {
-    increment_gauge!(key!("replace_into_block_number_zero_row_deleted"), c as f64);
     REPLACE_INTO_BLOCK_NUMBER_ZERO_ROW_DELETED.inc_by(c);
 }
 
 pub fn metrics_inc_replace_original_row_number(c: u64) {
-    increment_gauge!(key!("replace_into_row_number_source_block"), c as f64);
     REPLACE_INTO_ROW_NUMBER_SOURCE_BLOCK.inc_by(c);
 }
 
 pub fn metrics_inc_replace_row_number_after_table_level_pruning(c: u64) {
-    increment_gauge!(
-        key!("replace_into_row_number_after_table_level_pruning"),
-        c as f64
-    );
     REPLACE_INTO_ROW_NUMBER_AFTER_TABLE_LEVEL_PRUNING.inc_by(c);
 }
 
 pub fn metrics_inc_replace_partition_number(c: u64) {
-    increment_gauge!(key!("replace_into_partition_number"), c as f64);
     REPLACE_INTO_PARTITION_NUMBER.inc_by(c);
 }
 
 // time used in processing the input block
 pub fn metrics_inc_replace_process_input_block_time_ms(c: u64) {
-    increment_gauge!(key!("replace_into_time_process_input_block_ms"), c as f64);
     REPLACE_INTO_TIME_PROCESS_INPUT_BLOCK_MS.observe(c as f64);
 }
 
 // the number of accumulate_merge_action operation invoked
 pub fn metrics_inc_replace_number_accumulated_merge_action() {
-    increment_gauge!(key!("replace_into_number_accumulate_merge_action"), 1_f64);
     REPLACE_INTO_NUMBER_ACCUMULATED_MERGE_ACTION.inc();
 }
 
 // the number of apply_deletion operation applied
 pub fn metrics_inc_replace_number_apply_deletion() {
-    increment_gauge!(key!("replace_into_number_apply_deletion"), 1_f64);
     REPLACE_INTO_NUMBER_APPLY_DELETION.inc();
 }
 
 // time used in executing the accumulated_merge_action operation
 pub fn metrics_inc_replace_accumulated_merge_action_time_ms(c: u64) {
-    increment_gauge!(
-        key!("replace_into_time_accumulated_merge_action_ms"),
-        c as f64
-    );
     REPLACE_INTO_TIME_ACCUMULATED_MERGE_ACTION_MS.observe(c as f64)
 }
 
 // time used in executing the apply_deletion operation
 pub fn metrics_inc_replace_apply_deletion_time_ms(c: u64) {
-    increment_gauge!(key!("replace_into_time_apply_deletion_ms"), c as f64);
     REPLACE_INTO_TIME_APPLY_DELETION_MS.observe(c as f64);
 }
 
 // number of blocks that pruned by bloom filter
 pub fn metrics_inc_replace_block_number_bloom_pruned(c: u64) {
-    increment_gauge!(key!("replace_into_block_number_bloom_pruned"), c as f64);
     REPLACE_INTO_BLOCK_NUMBER_BLOOM_PRUNED.inc_by(c);
 }
 // number of blocks from upstream  source
 pub fn metrics_inc_replace_block_number_input(c: u64) {
-    increment_gauge!(key!("replace_into_block_number_source"), c as f64);
     REPLACE_INTO_BLOCK_NUMBER_SOURCE.inc_by(c);
 }
diff --git a/src/query/storages/fuse/src/metrics/index_metrics.rs b/src/query/storages/fuse/src/metrics/index_metrics.rs
index 5be52900b00e..6e6d46756284 100644
--- a/src/query/storages/fuse/src/metrics/index_metrics.rs
+++ b/src/query/storages/fuse/src/metrics/index_metrics.rs
@@ -17,7 +17,6 @@ use common_metrics::register_histogram_in_milliseconds;
 use common_metrics::Counter;
 use common_metrics::Histogram;
 use lazy_static::lazy_static;
-use metrics::increment_gauge;
 
 macro_rules! agg_index_key {
     ($key: literal) => {
@@ -33,16 +32,13 @@ lazy_static! {
 }
 
 pub fn metrics_inc_agg_index_write_nums(c: u64) {
-    increment_gauge!(agg_index_key!("write_nums"), c as f64);
     AGG_INDEX_WRITE_NUMS.inc_by(c);
 }
 
 pub fn metrics_inc_agg_index_write_bytes(c: u64) {
-    increment_gauge!(agg_index_key!("write_bytes"), c as f64);
     AGG_INDEX_WRITE_BYTES.inc_by(c);
 }
 
 pub fn metrics_inc_agg_index_write_milliseconds(c: u64) {
-    increment_gauge!(agg_index_key!("write_milliseconds"), c as f64);
     AGG_INDEX_WRITE_MILLISECONDS.observe(c as f64);
 }
diff --git a/src/query/storages/system/src/metrics_table.rs b/src/query/storages/system/src/metrics_table.rs
index a70750ca88c7..d46558275e14 100644
--- a/src/query/storages/system/src/metrics_table.rs
+++ b/src/query/storages/system/src/metrics_table.rs
@@ -29,7 +29,7 @@ use common_expression::TableSchemaRefExt;
 use common_meta_app::schema::TableIdent;
 use common_meta_app::schema::TableInfo;
 use common_meta_app::schema::TableMeta;
-use common_metrics::reset_metrics;
+use common_metrics::reset_global_prometheus_registry;
 use common_metrics::MetricSample;
 use common_metrics::MetricValue;
 
@@ -53,11 +53,10 @@ impl SyncSystemTable for MetricsTable {
     fn get_full_data(&self, ctx: Arc<dyn TableContext>) -> Result<DataBlock> {
         let local_id = ctx.get_cluster().local_id.clone();
 
-        let prometheus_handle = common_metrics::try_handle().ok_or_else(|| {
-            ErrorCode::InitPrometheusFailure("Prometheus recorder is not initialized yet.")
-        })?;
-
-        let mut samples = common_metrics::dump_metric_samples(prometheus_handle)?;
+        let mut samples = {
+            let registry = common_metrics::load_global_prometheus_registry();
+            common_metrics::dump_metric_samples(&registry)?
+        };
         samples.extend(self.custom_metric_samples()?);
 
         let mut nodes: Vec<Vec<u8>> = Vec::with_capacity(samples.len());
@@ -83,7 +82,7 @@ impl SyncSystemTable for MetricsTable {
     }
 
     fn truncate(&self, _ctx: Arc<dyn TableContext>) -> Result<()> {
-        reset_metrics()?;
+        reset_global_prometheus_registry();
         Ok(())
     }
 }

From ed7433901a64855bb7fb2d1fc47cc7e4374c1bc7 Mon Sep 17 00:00:00 2001
From: RinChanNOW <rin_chan_now@outlook.com>
Date: Wed, 20 Sep 2023 17:46:28 +0800
Subject: [PATCH 03/21] fix: support aggregation arguments in grouping sets.
 (#12939)

* Wrap grouping sets related variables into one struct.

* Fix: support agg args in grouping sets.
---
 .../service/src/pipelines/pipeline_builder.rs |  19 ++-
 .../aggregator/transform_aggregate_expand.rs  |  18 ++-
 .../transforms/window/window_function.rs      |   2 +-
 src/query/sql/src/executor/format.rs          |   1 +
 src/query/sql/src/executor/physical_plan.rs   |  23 ++-
 .../sql/src/executor/physical_plan_builder.rs | 122 ++++++++--------
 .../sql/src/executor/physical_plan_display.rs |   5 +-
 .../sql/src/executor/physical_plan_visitor.rs |   1 -
 src/query/sql/src/executor/profile.rs         |   1 +
 src/query/sql/src/planner/binder/aggregate.rs | 138 ++++++++++++++----
 src/query/sql/src/planner/binder/distinct.rs  |   3 +-
 src/query/sql/src/planner/binder/project.rs   |   2 +-
 .../optimizer/heuristic/decorrelate.rs        |   4 +-
 .../optimizer/heuristic/subquery_rewriter.rs  |   6 +-
 .../rule/rewrite/agg_index/query_rewrite.rs   |   2 +-
 src/query/sql/src/planner/plans/aggregate.rs  |  18 ++-
 .../src/planner/semantic/grouping_check.rs    |   4 +-
 .../group/group_by_grouping_sets.test         | 101 ++++++++++++-
 .../explain/explain_grouping_sets.test        |  12 +-
 .../explain_native/explain_grouping_sets.test |  12 +-
 20 files changed, 350 insertions(+), 144 deletions(-)

diff --git a/src/query/service/src/pipelines/pipeline_builder.rs b/src/query/service/src/pipelines/pipeline_builder.rs
index 0f38b147e7a6..cd7f0f79039f 100644
--- a/src/query/service/src/pipelines/pipeline_builder.rs
+++ b/src/query/service/src/pipelines/pipeline_builder.rs
@@ -1322,20 +1322,17 @@ impl PipelineBuilder {
             .group_bys
             .iter()
             .take(expand.group_bys.len() - 1) // The last group-by will be virtual column `_grouping_id`
-            .map(|i| {
-                let index = input_schema.index_of(&i.to_string())?;
-                let ty = input_schema.field(index).data_type();
-                Ok((index, ty.clone()))
-            })
+            .map(|i| input_schema.index_of(&i.to_string()))
             .collect::<Result<Vec<_>>>()?;
         let grouping_sets = expand
             .grouping_sets
+            .sets
             .iter()
             .map(|sets| {
                 sets.iter()
                     .map(|i| {
                         let i = input_schema.index_of(&i.to_string())?;
-                        let offset = group_bys.iter().position(|(j, _)| *j == i).unwrap();
+                        let offset = group_bys.iter().position(|j| *j == i).unwrap();
                         Ok(offset)
                     })
                     .collect::<Result<Vec<_>>>()
@@ -1607,7 +1604,15 @@ impl PipelineBuilder {
         let aggs: Vec<AggregateFunctionRef> = agg_funcs
             .iter()
             .map(|agg_func| {
-                agg_args.push(agg_func.args.clone());
+                let args = agg_func
+                    .arg_indices
+                    .iter()
+                    .map(|i| {
+                        let index = input_schema.index_of(&i.to_string())?;
+                        Ok(index)
+                    })
+                    .collect::<Result<Vec<_>>>()?;
+                agg_args.push(args);
                 AggregateFunctionFactory::instance().get(
                     agg_func.sig.name.as_str(),
                     agg_func.sig.params.clone(),
diff --git a/src/query/service/src/pipelines/processors/transforms/aggregator/transform_aggregate_expand.rs b/src/query/service/src/pipelines/processors/transforms/aggregator/transform_aggregate_expand.rs
index d62e35d11368..30601e08f23b 100644
--- a/src/query/service/src/pipelines/processors/transforms/aggregator/transform_aggregate_expand.rs
+++ b/src/query/service/src/pipelines/processors/transforms/aggregator/transform_aggregate_expand.rs
@@ -29,7 +29,7 @@ use common_pipeline_transforms::processors::transforms::Transform;
 use common_pipeline_transforms::processors::transforms::Transformer;
 
 pub struct TransformExpandGroupingSets {
-    group_bys: Vec<(usize, DataType)>,
+    group_bys: Vec<usize>,
     grouping_ids: Vec<usize>,
 }
 
@@ -37,7 +37,7 @@ impl TransformExpandGroupingSets {
     pub fn create(
         input: Arc<InputPort>,
         output: Arc<OutputPort>,
-        group_bys: Vec<(usize, DataType)>,
+        group_bys: Vec<usize>,
         grouping_ids: Vec<usize>,
     ) -> ProcessorPtr {
         ProcessorPtr::create(Transformer::create(
@@ -58,10 +58,15 @@ impl Transform for TransformExpandGroupingSets {
         let num_rows = data.num_rows();
         let num_group_bys = self.group_bys.len();
         let mut output_blocks = Vec::with_capacity(self.grouping_ids.len());
+        let dup_group_by_cols = self
+            .group_bys
+            .iter()
+            .map(|i| data.columns()[*i].clone())
+            .collect::<Vec<_>>();
 
         for &id in &self.grouping_ids {
             // Repeat data for each grouping set.
-            let grouping_column = BlockEntry::new(
+            let grouping_id_column = BlockEntry::new(
                 DataType::Number(NumberDataType::UInt32),
                 Value::Scalar(Scalar::Number(NumberScalar::UInt32(id as u32))),
             );
@@ -69,13 +74,14 @@ impl Transform for TransformExpandGroupingSets {
                 .columns()
                 .iter()
                 .cloned()
-                .chain(vec![grouping_column])
+                .chain(dup_group_by_cols.iter().cloned())
+                .chain(vec![grouping_id_column])
                 .collect::<Vec<_>>();
             let bits = !id;
             for i in 0..num_group_bys {
                 let entry = unsafe {
-                    let offset = self.group_bys.get_unchecked(i).0;
-                    columns.get_unchecked_mut(offset)
+                    let offset = self.group_bys.get_unchecked(i);
+                    columns.get_unchecked_mut(*offset)
                 };
                 if bits & (1 << i) == 0 {
                     // This column should be set to NULLs.
diff --git a/src/query/service/src/pipelines/processors/transforms/window/window_function.rs b/src/query/service/src/pipelines/processors/transforms/window/window_function.rs
index 49216baf0da6..d19261f0ce5b 100644
--- a/src/query/service/src/pipelines/processors/transforms/window/window_function.rs
+++ b/src/query/service/src/pipelines/processors/transforms/window/window_function.rs
@@ -188,7 +188,7 @@ impl WindowFunctionInfo {
                     agg.sig.args.clone(),
                 )?;
                 let args = agg
-                    .args
+                    .arg_indices
                     .iter()
                     .map(|p| {
                         let offset = schema.index_of(&p.to_string())?;
diff --git a/src/query/sql/src/executor/format.rs b/src/query/sql/src/executor/format.rs
index bf69fbc770f2..37447a2028ff 100644
--- a/src/query/sql/src/executor/format.rs
+++ b/src/query/sql/src/executor/format.rs
@@ -507,6 +507,7 @@ fn aggregate_expand_to_format_tree(
 ) -> Result<FormatTreeNode<String>> {
     let sets = plan
         .grouping_sets
+        .sets
         .iter()
         .map(|set| {
             set.iter()
diff --git a/src/query/sql/src/executor/physical_plan.rs b/src/query/sql/src/executor/physical_plan.rs
index 7a0cc6af5c8f..dc1781fb593c 100644
--- a/src/query/sql/src/executor/physical_plan.rs
+++ b/src/query/sql/src/executor/physical_plan.rs
@@ -53,6 +53,7 @@ use crate::executor::explain::PlanStatsInfo;
 use crate::executor::RangeJoinCondition;
 use crate::optimizer::ColumnSet;
 use crate::plans::CopyIntoTableMode;
+use crate::plans::GroupingSets;
 use crate::plans::JoinType;
 use crate::plans::RuntimeFilterId;
 use crate::plans::ValidationMode;
@@ -288,6 +289,7 @@ impl ProjectSet {
     }
 }
 
+/// Add dummy data before `GROUPING SETS`.
 #[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
 pub struct AggregateExpand {
     /// A unique id of operator in a `PhysicalPlan` tree.
@@ -296,8 +298,7 @@ pub struct AggregateExpand {
 
     pub input: Box<PhysicalPlan>,
     pub group_bys: Vec<IndexType>,
-    pub grouping_id_index: IndexType,
-    pub grouping_sets: Vec<Vec<IndexType>>,
+    pub grouping_sets: GroupingSets,
     /// Only used for explain
     pub stat_info: Option<PlanStatsInfo>,
 }
@@ -306,20 +307,25 @@ impl AggregateExpand {
     pub fn output_schema(&self) -> Result<DataSchemaRef> {
         let input_schema = self.input.output_schema()?;
         let mut output_fields = input_schema.fields().clone();
+        // Add virtual columns to group by.
+        output_fields.reserve(self.group_bys.len() + 1);
 
-        for group_by in self
+        for (group_by, (actual, ty)) in self
             .group_bys
             .iter()
-            .filter(|&index| *index != self.grouping_id_index)
+            .zip(self.grouping_sets.dup_group_items.iter())
         {
             // All group by columns will wrap nullable.
             let i = input_schema.index_of(&group_by.to_string())?;
             let f = &mut output_fields[i];
-            *f = DataField::new(f.name(), f.data_type().wrap_nullable())
+            debug_assert_eq!(f.data_type(), ty);
+            *f = DataField::new(f.name(), f.data_type().wrap_nullable());
+            let new_field = DataField::new(&actual.to_string(), ty.clone());
+            output_fields.push(new_field);
         }
 
         output_fields.push(DataField::new(
-            &self.grouping_id_index.to_string(),
+            &self.grouping_sets.grouping_id_index.to_string(),
             DataType::Number(NumberDataType::UInt32),
         ));
         Ok(DataSchemaRefExt::create(output_fields))
@@ -342,7 +348,8 @@ pub struct AggregatePartial {
 impl AggregatePartial {
     pub fn output_schema(&self) -> Result<DataSchemaRef> {
         let input_schema = self.input.output_schema()?;
-        let mut fields = Vec::with_capacity(self.agg_funcs.len() + self.group_by.len());
+        let mut fields =
+            Vec::with_capacity(self.agg_funcs.len() + self.group_by.is_empty() as usize);
         for agg in self.agg_funcs.iter() {
             fields.push(DataField::new(
                 &agg.output_column.to_string(),
@@ -1266,7 +1273,7 @@ impl PhysicalPlan {
 pub struct AggregateFunctionDesc {
     pub sig: AggregateFunctionSignature,
     pub output_column: IndexType,
-    pub args: Vec<usize>,
+    /// Bound indices of arguments. Only used in partial aggregation.
     pub arg_indices: Vec<IndexType>,
 }
 
diff --git a/src/query/sql/src/executor/physical_plan_builder.rs b/src/query/sql/src/executor/physical_plan_builder.rs
index 6b2d3a218ef8..4f171e2514d7 100644
--- a/src/query/sql/src/executor/physical_plan_builder.rs
+++ b/src/query/sql/src/executor/physical_plan_builder.rs
@@ -324,7 +324,6 @@ impl PhysicalPlanBuilder {
                     from_distinct: agg.from_distinct,
                     mode: agg.mode,
                     limit: agg.limit,
-                    grouping_id_index: agg.grouping_id_index,
                     grouping_sets: agg.grouping_sets.clone(),
                 };
 
@@ -335,7 +334,7 @@ impl PhysicalPlanBuilder {
 
                 let result = match &agg.mode {
                     AggregateMode::Partial => {
-                        let agg_funcs: Vec<AggregateFunctionDesc> = agg.aggregate_functions.iter().map(|v| {
+                        let mut agg_funcs: Vec<AggregateFunctionDesc> = agg.aggregate_functions.iter().map(|v| {
                             if let ScalarExpr::AggregateFunction(agg) = &v.scalar {
                                 Ok(AggregateFunctionDesc {
                                     sig: AggregateFunctionSignature {
@@ -352,15 +351,6 @@ impl PhysicalPlanBuilder {
                                         params: agg.params.clone(),
                                     },
                                     output_column: v.index,
-                                    args: agg.args.iter().map(|arg| {
-                                        if let ScalarExpr::BoundColumnRef(col) = arg {
-                                            input_schema.index_of(&col.column.index.to_string())
-                                        } else {
-                                            Err(ErrorCode::Internal(
-                                                "Aggregate function argument must be a BoundColumnRef".to_string()
-                                            ))
-                                        }
-                                    }).collect::<Result<_>>()?,
                                     arg_indices: agg.args.iter().map(|arg| {
                                         if let ScalarExpr::BoundColumnRef(col) = arg {
                                             Ok(col.column.index)
@@ -379,35 +369,52 @@ impl PhysicalPlanBuilder {
                         let settings = self.ctx.get_settings();
                         let group_by_shuffle_mode = settings.get_group_by_shuffle_mode()?;
 
+                        if let Some(grouping_sets) = agg.grouping_sets.as_ref() {
+                            assert_eq!(grouping_sets.dup_group_items.len(), group_items.len() - 1); // ignore `_grouping_id`.
+                            // If the aggregation function argument if a group item,
+                            // we cannot use the group item directly.
+                            // It's because the group item will be wrapped with nullable and fill dummy NULLs (in `AggregateExpand` plan),
+                            // which will cause panic while executing aggregation function.
+                            // To avoid the panic, we will duplicate (`Arc::clone`) original group item columns in `AggregateExpand`,
+                            // we should use these columns instead.
+                            for func in agg_funcs.iter_mut() {
+                                for arg in func.arg_indices.iter_mut() {
+                                    if let Some(pos) = group_items.iter().position(|g| g == arg) {
+                                        *arg = grouping_sets.dup_group_items[pos].0;
+                                    }
+                                }
+                            }
+                        }
+
                         match input {
                             PhysicalPlan::Exchange(PhysicalExchange { input, kind, .. })
                                 if group_by_shuffle_mode == "before_merge" =>
                             {
-                                let aggregate_partial = if !agg.grouping_sets.is_empty() {
-                                    let expand = AggregateExpand {
-                                        plan_id: self.next_plan_id(),
-                                        input,
-                                        group_bys: group_items.clone(),
-                                        grouping_id_index: agg.grouping_id_index,
-                                        grouping_sets: agg.grouping_sets.clone(),
-                                        stat_info: Some(stat_info.clone()),
+                                let aggregate_partial =
+                                    if let Some(grouping_sets) = agg.grouping_sets {
+                                        let expand = AggregateExpand {
+                                            plan_id: self.next_plan_id(),
+                                            input,
+                                            group_bys: group_items.clone(),
+                                            grouping_sets,
+                                            stat_info: Some(stat_info.clone()),
+                                        };
+                                        AggregatePartial {
+                                            plan_id: self.next_plan_id(),
+                                            input: Box::new(PhysicalPlan::AggregateExpand(expand)),
+                                            agg_funcs,
+                                            group_by: group_items,
+                                            stat_info: Some(stat_info),
+                                        }
+                                    } else {
+                                        AggregatePartial {
+                                            plan_id: self.next_plan_id(),
+                                            input,
+                                            agg_funcs,
+                                            group_by: group_items,
+                                            stat_info: Some(stat_info),
+                                        }
                                     };
-                                    AggregatePartial {
-                                        plan_id: self.next_plan_id(),
-                                        input: Box::new(PhysicalPlan::AggregateExpand(expand)),
-                                        agg_funcs,
-                                        group_by: group_items,
-                                        stat_info: Some(stat_info),
-                                    }
-                                } else {
-                                    AggregatePartial {
-                                        plan_id: self.next_plan_id(),
-                                        input,
-                                        agg_funcs,
-                                        group_by: group_items,
-                                        stat_info: Some(stat_info),
-                                    }
-                                };
 
                                 let settings = self.ctx.get_settings();
                                 let efficiently_memory =
@@ -441,13 +448,12 @@ impl PhysicalPlanBuilder {
                                 })
                             }
                             _ => {
-                                if !agg.grouping_sets.is_empty() {
+                                if let Some(grouping_sets) = agg.grouping_sets {
                                     let expand = AggregateExpand {
                                         plan_id: self.next_plan_id(),
                                         input: Box::new(input),
                                         group_bys: group_items.clone(),
-                                        grouping_id_index: agg.grouping_id_index,
-                                        grouping_sets: agg.grouping_sets.clone(),
+                                        grouping_sets,
                                         stat_info: Some(stat_info.clone()),
                                     };
                                     PhysicalPlan::AggregatePartial(AggregatePartial {
@@ -488,7 +494,7 @@ impl PhysicalPlanBuilder {
                             }
                         };
 
-                        let agg_funcs: Vec<AggregateFunctionDesc> = agg.aggregate_functions.iter().map(|v| {
+                        let mut agg_funcs: Vec<AggregateFunctionDesc> = agg.aggregate_functions.iter().map(|v| {
                             if let ScalarExpr::AggregateFunction(agg) = &v.scalar {
                                 Ok(AggregateFunctionDesc {
                                     sig: AggregateFunctionSignature {
@@ -505,15 +511,6 @@ impl PhysicalPlanBuilder {
                                         params: agg.params.clone(),
                                     },
                                     output_column: v.index,
-                                    args: agg.args.iter().map(|arg| {
-                                        if let ScalarExpr::BoundColumnRef(col) = arg {
-                                            input_schema.index_of(&col.column.index.to_string())
-                                        } else {
-                                            Err(ErrorCode::Internal(
-                                                "Aggregate function argument must be a BoundColumnRef".to_string()
-                                            ))
-                                        }
-                                    }).collect::<Result<_>>()?,
                                     arg_indices: agg.args.iter().map(|arg| {
                                         if let ScalarExpr::BoundColumnRef(col) = arg {
                                             Ok(col.column.index)
@@ -529,6 +526,21 @@ impl PhysicalPlanBuilder {
                             }
                         }).collect::<Result<_>>()?;
 
+                        if let Some(grouping_sets) = agg.grouping_sets.as_ref() {
+                            // The argument types are wrapped nullable due to `AggregateExpand` plan. We should recover them to original types.
+                            assert_eq!(grouping_sets.dup_group_items.len(), group_items.len() - 1); // ignore `_grouping_id`.
+                            for func in agg_funcs.iter_mut() {
+                                for (arg, ty) in
+                                    func.arg_indices.iter_mut().zip(func.sig.args.iter_mut())
+                                {
+                                    if let Some(pos) = group_items.iter().position(|g| g == arg) {
+                                        *arg = grouping_sets.dup_group_items[pos].0;
+                                        *ty = grouping_sets.dup_group_items[pos].1.clone();
+                                    }
+                                }
+                            }
+                        }
+
                         match input {
                             PhysicalPlan::AggregatePartial(ref partial) => {
                                 let before_group_by_schema = partial.input.output_schema()?;
@@ -1271,20 +1283,6 @@ impl PhysicalPlanBuilder {
                     params: agg.params.clone(),
                 },
                 output_column: w.index,
-                args: agg
-                    .args
-                    .iter()
-                    .map(|arg| {
-                        if let ScalarExpr::BoundColumnRef(col) = arg {
-                            Ok(col.column.index)
-                        } else {
-                            Err(ErrorCode::Internal(
-                                "Window's aggregate function argument must be a BoundColumnRef"
-                                    .to_string(),
-                            ))
-                        }
-                    })
-                    .collect::<Result<_>>()?,
                 arg_indices: agg
                     .args
                     .iter()
diff --git a/src/query/sql/src/executor/physical_plan_display.rs b/src/query/sql/src/executor/physical_plan_display.rs
index e594e4b1d9ee..c1e8a03283d7 100644
--- a/src/query/sql/src/executor/physical_plan_display.rs
+++ b/src/query/sql/src/executor/physical_plan_display.rs
@@ -212,6 +212,7 @@ impl Display for AggregateExpand {
     fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
         let sets = self
             .grouping_sets
+            .sets
             .iter()
             .map(|set| {
                 set.iter()
@@ -242,7 +243,7 @@ impl Display for AggregateFinal {
                 format!(
                     "{}({})",
                     item.sig.name,
-                    item.args
+                    item.arg_indices
                         .iter()
                         .map(|index| index.to_string())
                         .collect::<Vec<String>>()
@@ -275,7 +276,7 @@ impl Display for AggregatePartial {
                 format!(
                     "{}({})",
                     item.sig.name,
-                    item.args
+                    item.arg_indices
                         .iter()
                         .map(|index| index.to_string())
                         .collect::<Vec<String>>()
diff --git a/src/query/sql/src/executor/physical_plan_visitor.rs b/src/query/sql/src/executor/physical_plan_visitor.rs
index a0d99825b13b..9b990c13cbbe 100644
--- a/src/query/sql/src/executor/physical_plan_visitor.rs
+++ b/src/query/sql/src/executor/physical_plan_visitor.rs
@@ -145,7 +145,6 @@ pub trait PhysicalPlanReplacer {
             plan_id: plan.plan_id,
             input: Box::new(input),
             group_bys: plan.group_bys.clone(),
-            grouping_id_index: plan.grouping_id_index,
             grouping_sets: plan.grouping_sets.clone(),
             stat_info: plan.stat_info.clone(),
         }))
diff --git a/src/query/sql/src/executor/profile.rs b/src/query/sql/src/executor/profile.rs
index 031ac6a90424..af5e6499c437 100644
--- a/src/query/sql/src/executor/profile.rs
+++ b/src/query/sql/src/executor/profile.rs
@@ -205,6 +205,7 @@ fn flatten_plan_node_profile(
                 attribute: OperatorAttribute::AggregateExpand(AggregateExpandAttribute {
                     group_keys: expand
                         .grouping_sets
+                        .sets
                         .iter()
                         .map(|columns| {
                             format!(
diff --git a/src/query/sql/src/planner/binder/aggregate.rs b/src/query/sql/src/planner/binder/aggregate.rs
index 6a0ce04f6fd6..e2fe06c5c657 100644
--- a/src/query/sql/src/planner/binder/aggregate.rs
+++ b/src/query/sql/src/planner/binder/aggregate.rs
@@ -44,6 +44,7 @@ use crate::plans::BoundColumnRef;
 use crate::plans::CastExpr;
 use crate::plans::EvalScalar;
 use crate::plans::FunctionCall;
+use crate::plans::GroupingSets;
 use crate::plans::LagLeadFunction;
 use crate::plans::LambdaFunc;
 use crate::plans::NthValueFunction;
@@ -57,6 +58,73 @@ use crate::BindContext;
 use crate::IndexType;
 use crate::MetadataRef;
 
+/// Information for `GROUPING SETS`.
+///
+/// `GROUPING SETS` will generate several `GROUP BY` sets, and union their results. For example:
+///
+/// ```sql
+/// SELECT min(a), b, c FROM t GROUP BY GROUPING SETS (b, c);
+/// ```
+///
+/// is equal to:
+///
+/// ```sql
+/// (SELECT min(a), b, NULL FROM t GROUP BY b) UNION (SELECT min(a), NULL, c FROM t GROUP BY c);
+/// ```
+///
+/// In Databend, we do not really rewrite the plan to a `UNION` plan.
+/// We will add a new virtual column `_grouping_id` to the group by items,
+/// where `_grouping_id` is the result value of function [grouping](https://databend.rs/doc/sql-functions/other-functions/grouping).
+///
+/// For example, we will rewrite the SQL above to:
+///
+/// ```sql
+/// SELECT min(a), b, c FROM t GROUP BY (b, c, _grouping_id);
+/// ```
+///
+/// To get the right result, we also need to fill dummy data for each grouping set.
+///
+/// The above SQL again, if the columns' data is:
+///
+///  a  |  b  |  c
+/// --- | --- | ---
+///  1  |  2  |  3
+///  4  |  5  |  6
+///
+/// We will expand the data to:
+///
+/// - Grouping sets (b):
+///
+///  a  |  b  |   c    | grouping(b,c)
+/// --- | --- |  ---   |    ---
+///  1  |  2  |  NULL  |     2 (0b10)
+///  4  |  5  |  NULL  |     2 (0b10)
+///
+/// - Grouping sets (c):
+///
+///  a  |   b    |  c  | grouping(b,c)
+/// --- |  ---   | --- |   ---
+///  1  |  NULL  |  3  |    1 (0b01)
+///  4  |  NULL  |  6  |    1 (0b01)
+///    
+#[derive(Clone, PartialEq, Eq, Debug)]
+pub struct GroupingSetsInfo {
+    /// Index for virtual column `grouping_id`.
+    pub grouping_id_column: ColumnBinding,
+    /// Each grouping set is a list of column indices in `group_items`.
+    pub sets: Vec<Vec<IndexType>>,
+    /// The indices generated to identify the duplicate group items in the execution of the `GROUPING SETS` plan (not including `_grouping_id`).
+    ///
+    /// If the aggregation function argument is an item in the grouping set, for example:
+    ///
+    /// ```sql
+    /// SELECT min(a) FROM t GROUP BY GROUPING SETS (a, ...);
+    /// ```
+    ///
+    /// we should use the original column `a` data instead of the column data after filling dummy NULLs.
+    pub dup_group_items: Vec<(IndexType, DataType)>,
+}
+
 #[derive(Default, Clone, PartialEq, Eq, Debug)]
 pub struct AggregateInfo {
     /// Aggregation functions
@@ -82,10 +150,8 @@ pub struct AggregateInfo {
     /// We will check the validity by lookup this map with display name.
     pub group_items_map: HashMap<ScalarExpr, usize>,
 
-    /// Index for virtual column `grouping_id`. It's valid only if `grouping_sets` is not empty.
-    pub grouping_id_column: Option<ColumnBinding>,
-    /// Each grouping set is a list of column indices in `group_items`.
-    pub grouping_sets: Vec<Vec<IndexType>>,
+    /// Information of grouping sets
+    pub grouping_sets: Option<GroupingSetsInfo>,
 }
 
 pub(super) struct AggregateRewriter<'a> {
@@ -320,13 +386,17 @@ impl<'a> AggregateRewriter<'a> {
 
     fn replace_grouping(&mut self, function: &FunctionCall) -> Result<ScalarExpr> {
         let agg_info = &mut self.bind_context.aggregate_info;
-        if agg_info.grouping_id_column.is_none() {
+        if agg_info.grouping_sets.is_none() {
             return Err(ErrorCode::SemanticError(
                 "grouping can only be called in GROUP BY GROUPING SETS clauses",
             ));
         }
-        let grouping_id_column = agg_info.grouping_id_column.clone().unwrap();
-
+        let grouping_id_column = agg_info
+            .grouping_sets
+            .as_ref()
+            .unwrap()
+            .grouping_id_column
+            .clone();
         // Rewrite the args to params.
         // The params are the index offset in `grouping_id`.
         // Here is an example:
@@ -494,16 +564,15 @@ impl Binder {
 
         let aggregate_plan = Aggregate {
             mode: AggregateMode::Initial,
-            group_items: bind_context.aggregate_info.group_items.clone(),
-            aggregate_functions: bind_context.aggregate_info.aggregate_functions.clone(),
+            group_items: agg_info.group_items.clone(),
+            aggregate_functions: agg_info.aggregate_functions.clone(),
             from_distinct: false,
             limit: None,
-            grouping_sets: agg_info.grouping_sets.clone(),
-            grouping_id_index: agg_info
-                .grouping_id_column
-                .as_ref()
-                .map(|g| g.index)
-                .unwrap_or(0),
+            grouping_sets: agg_info.grouping_sets.as_ref().map(|g| GroupingSets {
+                grouping_id_index: g.grouping_id_column.index,
+                sets: g.sets.clone(),
+                dup_group_items: g.dup_group_items.clone(),
+            }),
         };
         new_expr = SExpr::create_unary(Arc::new(aggregate_plan.into()), Arc::new(new_expr));
 
@@ -548,28 +617,43 @@ impl Binder {
             })
             .collect::<Vec<_>>();
         let grouping_sets = grouping_sets.into_iter().unique().collect();
-        agg_info.grouping_sets = grouping_sets;
+        let mut dup_group_items = Vec::with_capacity(agg_info.group_items.len());
+        for (i, item) in agg_info.group_items.iter().enumerate() {
+            // We just generate a new bound index.
+            let dummy = self.create_derived_column_binding(
+                format!("_dup_group_item_{i}"),
+                item.scalar.data_type()?,
+            );
+            dup_group_items.push((dummy.index, *dummy.data_type));
+        }
         // Add a virtual column `_grouping_id` to group items.
         let grouping_id_column = self.create_derived_column_binding(
             "_grouping_id".to_string(),
             DataType::Number(NumberDataType::UInt32),
         );
-        let index = grouping_id_column.index;
-        agg_info.grouping_id_column = Some(grouping_id_column.clone());
+
+        let bound_grouping_id_col = BoundColumnRef {
+            span: None,
+            column: grouping_id_column.clone(),
+        };
+
         agg_info.group_items_map.insert(
-            ScalarExpr::BoundColumnRef(BoundColumnRef {
-                span: None,
-                column: grouping_id_column.clone(),
-            }),
+            bound_grouping_id_col.clone().into(),
             agg_info.group_items.len(),
         );
         agg_info.group_items.push(ScalarItem {
-            index,
-            scalar: ScalarExpr::BoundColumnRef(BoundColumnRef {
-                span: None,
-                column: grouping_id_column,
-            }),
+            index: grouping_id_column.index,
+            scalar: bound_grouping_id_col.into(),
         });
+
+        let grouping_sets_info = GroupingSetsInfo {
+            grouping_id_column,
+            sets: grouping_sets,
+            dup_group_items,
+        };
+
+        agg_info.grouping_sets = Some(grouping_sets_info);
+
         Ok(())
     }
 
diff --git a/src/query/sql/src/planner/binder/distinct.rs b/src/query/sql/src/planner/binder/distinct.rs
index 03631abe0bbb..4c9f3d67ccc4 100644
--- a/src/query/sql/src/planner/binder/distinct.rs
+++ b/src/query/sql/src/planner/binder/distinct.rs
@@ -82,8 +82,7 @@ impl Binder {
             aggregate_functions: vec![],
             from_distinct: true,
             limit: None,
-            grouping_id_index: 0,
-            grouping_sets: vec![],
+            grouping_sets: None,
         };
 
         Ok(SExpr::create_unary(
diff --git a/src/query/sql/src/planner/binder/project.rs b/src/query/sql/src/planner/binder/project.rs
index 1cf35f73da04..b193efa761d3 100644
--- a/src/query/sql/src/planner/binder/project.rs
+++ b/src/query/sql/src/planner/binder/project.rs
@@ -59,7 +59,7 @@ impl Binder {
         let mut scalars = HashMap::new();
         for item in select_list.items.iter() {
             // This item is a grouping sets item, its data type should be nullable.
-            let is_grouping_sets_item = agg_info.grouping_id_column.is_some()
+            let is_grouping_sets_item = agg_info.grouping_sets.is_some()
                 && agg_info.group_items_map.contains_key(&item.scalar);
             let mut column_binding = if let ScalarExpr::BoundColumnRef(ref column_ref) = item.scalar
             {
diff --git a/src/query/sql/src/planner/optimizer/heuristic/decorrelate.rs b/src/query/sql/src/planner/optimizer/heuristic/decorrelate.rs
index d877c1ec4b87..2ca3938670aa 100644
--- a/src/query/sql/src/planner/optimizer/heuristic/decorrelate.rs
+++ b/src/query/sql/src/planner/optimizer/heuristic/decorrelate.rs
@@ -490,8 +490,7 @@ impl SubqueryRewriter {
                         aggregate_functions: vec![],
                         from_distinct: false,
                         limit: None,
-                        grouping_id_index: 0,
-                        grouping_sets: vec![],
+                        grouping_sets: None,
                     }
                     .into(),
                 ),
@@ -707,7 +706,6 @@ impl SubqueryRewriter {
                             aggregate_functions: agg_items,
                             from_distinct: aggregate.from_distinct,
                             limit: aggregate.limit,
-                            grouping_id_index: aggregate.grouping_id_index,
                             grouping_sets: aggregate.grouping_sets.clone(),
                         }
                         .into(),
diff --git a/src/query/sql/src/planner/optimizer/heuristic/subquery_rewriter.rs b/src/query/sql/src/planner/optimizer/heuristic/subquery_rewriter.rs
index 2048eeeec6cd..097c25ef6fde 100644
--- a/src/query/sql/src/planner/optimizer/heuristic/subquery_rewriter.rs
+++ b/src/query/sql/src/planner/optimizer/heuristic/subquery_rewriter.rs
@@ -407,8 +407,7 @@ impl SubqueryRewriter {
                     from_distinct: false,
                     mode: AggregateMode::Initial,
                     limit: None,
-                    grouping_id_index: 0,
-                    grouping_sets: vec![],
+                    grouping_sets: None,
                 };
 
                 let compare = FunctionCall {
@@ -610,8 +609,7 @@ impl SubqueryRewriter {
                     ],
                     from_distinct: false,
                     limit: None,
-                    grouping_id_index: 0,
-                    grouping_sets: vec![],
+                    grouping_sets: None,
                 }
                 .into(),
             ),
diff --git a/src/query/sql/src/planner/optimizer/rule/rewrite/agg_index/query_rewrite.rs b/src/query/sql/src/planner/optimizer/rule/rewrite/agg_index/query_rewrite.rs
index 71e87b8c20cd..315904c589da 100644
--- a/src/query/sql/src/planner/optimizer/rule/rewrite/agg_index/query_rewrite.rs
+++ b/src/query/sql/src/planner/optimizer/rule/rewrite/agg_index/query_rewrite.rs
@@ -624,7 +624,7 @@ impl RewriteInfomartion<'_> {
 
     fn can_apply_index(&self) -> bool {
         if let Some((agg, _)) = self.aggregation {
-            if !agg.grouping_sets.is_empty() {
+            if agg.grouping_sets.is_some() {
                 // Grouping sets is not supported.
                 return false;
             }
diff --git a/src/query/sql/src/planner/plans/aggregate.rs b/src/query/sql/src/planner/plans/aggregate.rs
index a7263b82ed95..453e7339f731 100644
--- a/src/query/sql/src/planner/plans/aggregate.rs
+++ b/src/query/sql/src/planner/plans/aggregate.rs
@@ -17,6 +17,7 @@ use std::sync::Arc;
 use common_catalog::table_context::TableContext;
 use common_exception::ErrorCode;
 use common_exception::Result;
+use common_expression::types::DataType;
 
 use crate::optimizer::ColumnSet;
 use crate::optimizer::Distribution;
@@ -41,6 +42,18 @@ pub enum AggregateMode {
     Initial,
 }
 
+/// Information for `GROUPING SETS`.
+/// See the comment of [`crate::planner::binder::aggregate::GroupingSetsInfo`].
+#[derive(Clone, Debug, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
+pub struct GroupingSets {
+    /// The index of the virtual column `_grouping_id`. It's valid only if `grouping_sets` is not empty.
+    pub grouping_id_index: IndexType,
+    /// See the comment in `GroupingSetsInfo`.
+    pub sets: Vec<Vec<IndexType>>,
+    /// See the comment in `GroupingSetsInfo`.
+    pub dup_group_items: Vec<(IndexType, DataType)>,
+}
+
 #[derive(Clone, Debug, PartialEq, Eq, Hash)]
 pub struct Aggregate {
     pub mode: AggregateMode,
@@ -51,10 +64,7 @@ pub struct Aggregate {
     // True if the plan is generated from distinct, else the plan is a normal aggregate;
     pub from_distinct: bool,
     pub limit: Option<usize>,
-    /// The index of the virtual column `_grouping_id`. It's valid only if `grouping_sets` is not empty.
-    pub grouping_id_index: IndexType,
-    /// The grouping sets, each grouping set is a list of `group_items` indices.
-    pub grouping_sets: Vec<Vec<IndexType>>,
+    pub grouping_sets: Option<GroupingSets>,
 }
 
 impl Aggregate {
diff --git a/src/query/sql/src/planner/semantic/grouping_check.rs b/src/query/sql/src/planner/semantic/grouping_check.rs
index d457b24468fa..ba0f898f2e09 100644
--- a/src/query/sql/src/planner/semantic/grouping_check.rs
+++ b/src/query/sql/src/planner/semantic/grouping_check.rs
@@ -56,8 +56,8 @@ impl<'a> GroupingChecker<'a> {
                 .build()
             };
 
-            if let Some(grouping_id) = &self.bind_context.aggregate_info.grouping_id_column {
-                if grouping_id.index != column_binding.index {
+            if let Some(grouping_sets) = &self.bind_context.aggregate_info.grouping_sets {
+                if grouping_sets.grouping_id_column.index != column_binding.index {
                     column_binding.data_type = Box::new(column_binding.data_type.wrap_nullable());
                 }
             }
diff --git a/tests/sqllogictests/suites/duckdb/sql/aggregate/group/group_by_grouping_sets.test b/tests/sqllogictests/suites/duckdb/sql/aggregate/group/group_by_grouping_sets.test
index c152cdb272ee..e19ae1bf808e 100644
--- a/tests/sqllogictests/suites/duckdb/sql/aggregate/group/group_by_grouping_sets.test
+++ b/tests/sqllogictests/suites/duckdb/sql/aggregate/group/group_by_grouping_sets.test
@@ -148,8 +148,107 @@ a NULL 7 1 0 1 2
 b NULL 11 1 0 1 2
 NULL NULL 18 1 1 3 3
 
+# ISSUE-12852. Aggregation function argument is in grouping sets.
+query TT
+SELECT arg_min(c, 10), c FROM t GROUP BY CUBE (c) ORDER BY c;
+----
+1 1
+2 2
+3 3
+4 4
+5 5
+1 NULL
+
+query TT
+SELECT min(c), c FROM t GROUP BY CUBE (c) ORDER BY c;
+----
+1 1
+2 2
+3 3
+4 4
+5 5
+1 NULL
+
+query TT
+SELECT min(c + 1), c + 1 FROM t GROUP BY CUBE (c + 1) ORDER BY c + 1;
+----
+2 2
+3 3
+4 4
+5 5
+6 6
+2 NULL
+
+query TTTTTT
+SELECT min(a), min(b), min(c), max(c), a, b FROM t GROUP BY CUBE (a, b) ORDER BY a, b;
+----
+a A 1 2 a A
+a B 1 3 a B
+a A 1 3 a NULL
+b A 1 4 b A
+b B 1 5 b B
+b A 1 5 b NULL
+a A 1 4 NULL A
+a B 1 5 NULL B
+a A 1 5 NULL NULL
+
+statement ok
+drop table if exists tt;
+
+statement ok
+create table tt (a string not null, b string not null, c int not null);
+
+statement ok
+insert into tt select *  from t;
+
+query TT
+SELECT arg_min(c, 10), c FROM tt GROUP BY CUBE (c) ORDER BY c;
+----
+1 1
+2 2
+3 3
+4 4
+5 5
+1 NULL
+
+query TT
+SELECT min(c), c FROM tt GROUP BY CUBE (c) ORDER BY c;
+----
+1 1
+2 2
+3 3
+4 4
+5 5
+1 NULL
+
+query TT
+SELECT min(c + 1), c + 1 FROM tt GROUP BY CUBE (c + 1) ORDER BY c + 1;
+----
+2 2
+3 3
+4 4
+5 5
+6 6
+2 NULL
+
+query TTTTTT
+SELECT min(a), min(b), min(c), max(c), a, b FROM tt GROUP BY CUBE (a, b) ORDER BY a, b;
+----
+a A 1 2 a A
+a B 1 3 a B
+a A 1 3 a NULL
+b A 1 4 b A
+b B 1 5 b B
+b A 1 5 b NULL
+a A 1 4 NULL A
+a B 1 5 NULL B
+a A 1 5 NULL NULL
+
 statement ok
 drop table t all;
 
 statement ok
-drop database grouping_sets;
\ No newline at end of file
+drop table tt all;
+
+statement ok
+drop database grouping_sets;
diff --git a/tests/sqllogictests/suites/mode/standalone/explain/explain_grouping_sets.test b/tests/sqllogictests/suites/mode/standalone/explain/explain_grouping_sets.test
index facab39196f0..e378af9d4f79 100644
--- a/tests/sqllogictests/suites/mode/standalone/explain/explain_grouping_sets.test
+++ b/tests/sqllogictests/suites/mode/standalone/explain/explain_grouping_sets.test
@@ -2,11 +2,11 @@ query T
 explain select number % 2 as a, number % 3 as b, number % 5 as c from numbers(1) group by rollup(a, b, c);
 ----
 EvalScalar
-├── output columns: [a (#5), b (#6), c (#7)]
+├── output columns: [a (#8), b (#9), c (#10)]
 ├── expressions: [group_item (#1), group_item (#2), group_item (#3)]
 ├── estimated rows: 1.00
 └── AggregateFinal
-    ├── output columns: [a (#1), b (#2), c (#3), _grouping_id (#4)]
+    ├── output columns: [a (#1), b (#2), c (#3), _grouping_id (#7)]
     ├── group by: [a, b, c, _grouping_id]
     ├── aggregate functions: []
     ├── estimated rows: 1.00
@@ -16,7 +16,7 @@ EvalScalar
         ├── aggregate functions: []
         ├── estimated rows: 1.00
         └── AggregateExpand
-            ├── output columns: [a (#1), b (#2), c (#3), _grouping_id (#4)]
+            ├── output columns: [a (#1), b (#2), c (#3), _dup_group_item_0 (#4), _dup_group_item_1 (#5), _dup_group_item_2 (#6), _grouping_id (#7)]
             ├── grouping sets: [(a, b, c), (a, b), (a), ()]
             ├── estimated rows: 1.00
             └── EvalScalar
@@ -37,11 +37,11 @@ query T
 explain select number % 2 as a, number % 3 as b, number % 5 as c from numbers(1) group by cube(a, b, c);
 ----
 EvalScalar
-├── output columns: [a (#5), b (#6), c (#7)]
+├── output columns: [a (#8), b (#9), c (#10)]
 ├── expressions: [group_item (#1), group_item (#2), group_item (#3)]
 ├── estimated rows: 1.00
 └── AggregateFinal
-    ├── output columns: [a (#1), b (#2), c (#3), _grouping_id (#4)]
+    ├── output columns: [a (#1), b (#2), c (#3), _grouping_id (#7)]
     ├── group by: [a, b, c, _grouping_id]
     ├── aggregate functions: []
     ├── estimated rows: 1.00
@@ -51,7 +51,7 @@ EvalScalar
         ├── aggregate functions: []
         ├── estimated rows: 1.00
         └── AggregateExpand
-            ├── output columns: [a (#1), b (#2), c (#3), _grouping_id (#4)]
+            ├── output columns: [a (#1), b (#2), c (#3), _dup_group_item_0 (#4), _dup_group_item_1 (#5), _dup_group_item_2 (#6), _grouping_id (#7)]
             ├── grouping sets: [(), (a), (b), (c), (a, b), (a, c), (b, c), (a, b, c)]
             ├── estimated rows: 1.00
             └── EvalScalar
diff --git a/tests/sqllogictests/suites/mode/standalone/explain_native/explain_grouping_sets.test b/tests/sqllogictests/suites/mode/standalone/explain_native/explain_grouping_sets.test
index facab39196f0..e378af9d4f79 100644
--- a/tests/sqllogictests/suites/mode/standalone/explain_native/explain_grouping_sets.test
+++ b/tests/sqllogictests/suites/mode/standalone/explain_native/explain_grouping_sets.test
@@ -2,11 +2,11 @@ query T
 explain select number % 2 as a, number % 3 as b, number % 5 as c from numbers(1) group by rollup(a, b, c);
 ----
 EvalScalar
-├── output columns: [a (#5), b (#6), c (#7)]
+├── output columns: [a (#8), b (#9), c (#10)]
 ├── expressions: [group_item (#1), group_item (#2), group_item (#3)]
 ├── estimated rows: 1.00
 └── AggregateFinal
-    ├── output columns: [a (#1), b (#2), c (#3), _grouping_id (#4)]
+    ├── output columns: [a (#1), b (#2), c (#3), _grouping_id (#7)]
     ├── group by: [a, b, c, _grouping_id]
     ├── aggregate functions: []
     ├── estimated rows: 1.00
@@ -16,7 +16,7 @@ EvalScalar
         ├── aggregate functions: []
         ├── estimated rows: 1.00
         └── AggregateExpand
-            ├── output columns: [a (#1), b (#2), c (#3), _grouping_id (#4)]
+            ├── output columns: [a (#1), b (#2), c (#3), _dup_group_item_0 (#4), _dup_group_item_1 (#5), _dup_group_item_2 (#6), _grouping_id (#7)]
             ├── grouping sets: [(a, b, c), (a, b), (a), ()]
             ├── estimated rows: 1.00
             └── EvalScalar
@@ -37,11 +37,11 @@ query T
 explain select number % 2 as a, number % 3 as b, number % 5 as c from numbers(1) group by cube(a, b, c);
 ----
 EvalScalar
-├── output columns: [a (#5), b (#6), c (#7)]
+├── output columns: [a (#8), b (#9), c (#10)]
 ├── expressions: [group_item (#1), group_item (#2), group_item (#3)]
 ├── estimated rows: 1.00
 └── AggregateFinal
-    ├── output columns: [a (#1), b (#2), c (#3), _grouping_id (#4)]
+    ├── output columns: [a (#1), b (#2), c (#3), _grouping_id (#7)]
     ├── group by: [a, b, c, _grouping_id]
     ├── aggregate functions: []
     ├── estimated rows: 1.00
@@ -51,7 +51,7 @@ EvalScalar
         ├── aggregate functions: []
         ├── estimated rows: 1.00
         └── AggregateExpand
-            ├── output columns: [a (#1), b (#2), c (#3), _grouping_id (#4)]
+            ├── output columns: [a (#1), b (#2), c (#3), _dup_group_item_0 (#4), _dup_group_item_1 (#5), _dup_group_item_2 (#6), _grouping_id (#7)]
             ├── grouping sets: [(), (a), (b), (c), (a, b), (a, c), (b, c), (a, b, c)]
             ├── estimated rows: 1.00
             └── EvalScalar

From be39b462785deef7cb2170ba39e3e3c4d1ae6b60 Mon Sep 17 00:00:00 2001
From: zhyass <mytesla@live.com>
Date: Wed, 20 Sep 2023 19:55:06 +0800
Subject: [PATCH 04/21] fix: add column panic (#12946)

* fix add column panic

* Add sqllogc test
---
 src/meta/app/src/schema/table.rs              | 23 +++++++++--------
 .../interpreters/interpreter_table_create.rs  | 11 ++++++--
 .../interpreter_table_modify_column.rs        |  1 +
 .../05_0028_ddl_alter_table_add_drop_column   | 25 +++++++++++++++++++
 4 files changed, 48 insertions(+), 12 deletions(-)

diff --git a/src/meta/app/src/schema/table.rs b/src/meta/app/src/schema/table.rs
index e2eb77c6296f..38e5c5f352f5 100644
--- a/src/meta/app/src/schema/table.rs
+++ b/src/meta/app/src/schema/table.rs
@@ -243,22 +243,14 @@ pub struct TableMeta {
 }
 
 impl TableMeta {
-    pub fn add_columns(&mut self, fields: &[TableField], field_comments: &[String]) -> Result<()> {
-        let mut new_schema = self.schema.as_ref().to_owned();
-        new_schema.add_columns(fields)?;
-        self.schema = Arc::new(new_schema);
-        field_comments.iter().for_each(|c| {
-            self.field_comments.push(c.to_owned());
-        });
-        Ok(())
-    }
-
     pub fn add_column(
         &mut self,
         field: &TableField,
         comment: &str,
         index: FieldIndex,
     ) -> Result<()> {
+        self.fill_field_comments();
+
         let mut new_schema = self.schema.as_ref().to_owned();
         new_schema.add_column(field, index)?;
         self.schema = Arc::new(new_schema);
@@ -267,12 +259,23 @@ impl TableMeta {
     }
 
     pub fn drop_column(&mut self, column: &str) -> Result<()> {
+        self.fill_field_comments();
+
         let mut new_schema = self.schema.as_ref().to_owned();
         let index = new_schema.drop_column(column)?;
         self.field_comments.remove(index);
         self.schema = Arc::new(new_schema);
         Ok(())
     }
+
+    /// To fix the field comments panic.
+    pub fn fill_field_comments(&mut self) {
+        let num_fields = self.schema.num_fields();
+        // If the field comments is confused, fill it with empty string.
+        if self.field_comments.len() < num_fields {
+            self.field_comments = vec!["".to_string(); num_fields];
+        }
+    }
 }
 
 impl TableInfo {
diff --git a/src/query/service/src/interpreters/interpreter_table_create.rs b/src/query/service/src/interpreters/interpreter_table_create.rs
index 3317efc99459..f574ef453e3f 100644
--- a/src/query/service/src/interpreters/interpreter_table_create.rs
+++ b/src/query/service/src/interpreters/interpreter_table_create.rs
@@ -243,6 +243,11 @@ impl CreateTableInterpreter {
             }
             is_valid_column(field.name())?;
         }
+        let field_comments = if self.plan.field_comments.is_empty() {
+            vec!["".to_string(); fields.len()]
+        } else {
+            self.plan.field_comments.clone()
+        };
         let schema = TableSchemaRefExt::create(fields);
 
         let mut table_meta = TableMeta {
@@ -252,7 +257,7 @@ impl CreateTableInterpreter {
             part_prefix: self.plan.part_prefix.clone(),
             options: self.plan.options.clone(),
             default_cluster_key: None,
-            field_comments: self.plan.field_comments.clone(),
+            field_comments,
             drop_on: None,
             statistics: if let Some(stat) = statistics {
                 stat
@@ -325,6 +330,8 @@ impl CreateTableInterpreter {
             number_of_segments: Some(snapshot.segments.len() as u64),
             number_of_blocks: Some(snapshot.summary.block_count),
         };
+
+        let field_comments = vec!["".to_string(); snapshot.schema.num_fields()];
         let table_meta = TableMeta {
             schema: Arc::new(snapshot.schema.clone()),
             engine: self.plan.engine.to_string(),
@@ -332,7 +339,7 @@ impl CreateTableInterpreter {
             part_prefix: self.plan.part_prefix.clone(),
             options,
             default_cluster_key: None,
-            field_comments: self.plan.field_comments.clone(),
+            field_comments,
             drop_on: None,
             statistics: stat,
             ..Default::default()
diff --git a/src/query/service/src/interpreters/interpreter_table_modify_column.rs b/src/query/service/src/interpreters/interpreter_table_modify_column.rs
index a4d13a5c2a82..322fdd41a5e6 100644
--- a/src/query/service/src/interpreters/interpreter_table_modify_column.rs
+++ b/src/query/service/src/interpreters/interpreter_table_modify_column.rs
@@ -238,6 +238,7 @@ impl ModifyTableColumnInterpreter {
         }
 
         let mut table_info = table.get_table_info().clone();
+        table_info.meta.fill_field_comments();
         for (field, comment) in field_and_comments {
             let column = &field.name.to_string();
             let data_type = &field.data_type;
diff --git a/tests/sqllogictests/suites/base/05_ddl/05_0028_ddl_alter_table_add_drop_column b/tests/sqllogictests/suites/base/05_ddl/05_0028_ddl_alter_table_add_drop_column
index a657a1a8171e..b5889011da0b 100644
--- a/tests/sqllogictests/suites/base/05_ddl/05_0028_ddl_alter_table_add_drop_column
+++ b/tests/sqllogictests/suites/base/05_ddl/05_0028_ddl_alter_table_add_drop_column
@@ -232,5 +232,30 @@ SELECT a,c FROM `05_0028_at_t0_3` order by a
 statement ok
 ALTER TABLE `05_0028_at_t0_3` ADD COLUMN d int not null AFTER c
 
+statement ok
+CREATE TABLE `05_0028_at_t0_4` AS SELECT a,d FROM `05_0028_at_t0_3`
+
+statement ok
+ALTER TABLE `05_0028_at_t0_4` ADD COLUMN e int COMMENT 'end'
+
+statement ok
+ALTER TABLE `05_0028_at_t0_4` MODIFY COLUMN d uint64 COMMENT 'middle'
+
+query TT
+SHOW CREATE TABLE `05_0028_at_t0_4`
+----
+05_0028_at_t0_4 CREATE TABLE `05_0028_at_t0_4` (   `a` FLOAT,   `d` BIGINT UNSIGNED NULL COMMENT 'middle',   `e` INT NULL COMMENT 'end' ) ENGINE=FUSE
+
+query IIT
+SELECT * FROM `05_0028_at_t0_4` order by a
+----
+0.1 0 NULL
+0.2 0 NULL
+0.3 0 NULL
+0.4 0 NULL
+
 statement ok
 DROP TABLE IF EXISTS `05_0028_at_t0_3`
+
+statement ok
+DROP TABLE IF EXISTS `05_0028_at_t0_4`

From 49cb14d6486fdcfae16a026d085cd6c114c2c045 Mon Sep 17 00:00:00 2001
From: TCeason <33082201+TCeason@users.noreply.github.com>
Date: Wed, 20 Sep 2023 19:55:42 +0800
Subject: [PATCH 05/21] feat(sqlsmith): add table function (#12942)

* add table function

* optimize code
---
 .../service/src/table_functions/srf/range.rs  |   6 +-
 src/tests/sqlsmith/src/runner.rs              |   5 +-
 src/tests/sqlsmith/src/sql_gen/func.rs        |   2 +-
 src/tests/sqlsmith/src/sql_gen/query.rs       | 123 +++++++++++++++++-
 4 files changed, 128 insertions(+), 8 deletions(-)

diff --git a/src/query/service/src/table_functions/srf/range.rs b/src/query/service/src/table_functions/srf/range.rs
index 54e5620fd6f4..58b376585356 100644
--- a/src/query/service/src/table_functions/srf/range.rs
+++ b/src/query/service/src/table_functions/srf/range.rs
@@ -68,9 +68,9 @@ impl RangeTable {
         validate_args(&table_args.positioned, table_func_name)?;
 
         let data_type = match &table_args.positioned[0] {
-            Scalar::Number(_) => Int64Type::data_type(),
-            Scalar::Timestamp(_) => TimestampType::data_type(),
-            Scalar::Date(_) => DateType::data_type(),
+            Scalar::Number(_) => DataType::Number(NumberDataType::Int64),
+            Scalar::Timestamp(_) => DataType::Timestamp,
+            Scalar::Date(_) => DataType::Date,
             other => {
                 return Err(ErrorCode::BadArguments(format!(
                     "Unsupported data type for generate_series: {:?}",
diff --git a/src/tests/sqlsmith/src/runner.rs b/src/tests/sqlsmith/src/runner.rs
index 5640fb89e63d..af09186a5b9c 100644
--- a/src/tests/sqlsmith/src/runner.rs
+++ b/src/tests/sqlsmith/src/runner.rs
@@ -32,7 +32,7 @@ use rand::SeedableRng;
 use crate::sql_gen::SqlGenerator;
 use crate::sql_gen::Table;
 
-const KNOWN_ERRORS: [&str; 27] = [
+const KNOWN_ERRORS: [&str; 30] = [
     // Errors caused by illegal parameters
     "Overflow on date YMD",
     "timestamp is out of range",
@@ -62,6 +62,9 @@ const KNOWN_ERRORS: [&str; 27] = [
     "The arguments of AggregateRetention should be an expression which returns a Boolean result",
     "AggregateWindowFunnelFunction does not support type",
     "nth_value should count from 1",
+    "start must be less than or equal to end when step is positive vice versa",
+    "Expected Number, Date or Timestamp type, but got",
+    "Unsupported data type for generate_series",
 ];
 
 pub struct Runner {
diff --git a/src/tests/sqlsmith/src/sql_gen/func.rs b/src/tests/sqlsmith/src/sql_gen/func.rs
index 82388e03dbe0..22209fb959cf 100644
--- a/src/tests/sqlsmith/src/sql_gen/func.rs
+++ b/src/tests/sqlsmith/src/sql_gen/func.rs
@@ -283,7 +283,7 @@ impl<'a, R: Rng> SqlGenerator<'a, R> {
                 (name, vec![], args_type)
             }
             DataType::Decimal(_) => {
-                let decimal = vec!["to_float64", "to_folat32", "to_decimal", "try_to_decimal"];
+                let decimal = vec!["to_float64", "to_float32", "to_decimal", "try_to_decimal"];
                 let name = decimal[self.rng.gen_range(0..=3)].to_string();
                 if name == "to_decimal" || name == "try_to_decimal" {
                     let args_type = vec![self.gen_data_type(); 1];
diff --git a/src/tests/sqlsmith/src/sql_gen/query.rs b/src/tests/sqlsmith/src/sql_gen/query.rs
index c3fb249d960f..f9bb81cbcfd6 100644
--- a/src/tests/sqlsmith/src/sql_gen/query.rs
+++ b/src/tests/sqlsmith/src/sql_gen/query.rs
@@ -28,6 +28,10 @@ use common_ast::ast::SelectTarget;
 use common_ast::ast::SetExpr;
 use common_ast::ast::TableReference;
 use common_expression::types::DataType;
+use common_expression::types::NumberDataType;
+use common_expression::TableDataType;
+use common_expression::TableField;
+use common_expression::TableSchemaRefExt;
 use rand::Rng;
 
 use crate::sql_gen::Column;
@@ -296,21 +300,29 @@ impl<'a, R: Rng> SqlGenerator<'a, R> {
     }
 
     fn gen_from(&mut self) -> Vec<TableReference> {
-        match self.rng.gen_range(0..=9) {
+        let mut table_refs = vec![];
+        // TODO: generate more table reference
+        // let table_ref_num = self.rng.gen_range(1..=3);
+        match self.rng.gen_range(0..=10) {
             0..=7 => {
                 let i = self.rng.gen_range(0..self.tables.len());
                 let table_ref = self.gen_table_ref(self.tables[i].clone());
-                vec![table_ref]
+                table_refs.push(table_ref);
             }
             // join
             8..=9 => {
                 self.is_join = true;
                 let join = self.gen_join_table_ref();
-                vec![join]
+                table_refs.push(join);
+            }
+            10 => {
+                let table_func = self.gen_table_func();
+                table_refs.push(table_func);
             }
             // TODO
             _ => unreachable!(),
         }
+        table_refs
     }
 
     fn gen_table_ref(&mut self, table: Table) -> TableReference {
@@ -336,6 +348,111 @@ impl<'a, R: Rng> SqlGenerator<'a, R> {
         }
     }
 
+    // Only test:
+    // [numbers, numbers_mt, numbers_local, generate_series, range]
+    // No need to test:
+    // [fuse_snapshot,fuse_segment, fuse_block, fuse_column, fuse_statistic, clustering_information,
+    // sync_crash_me, async_crash_me ,infer_schema ,list_stage,
+    // ai_to_sql, execute_background_job, license_info, suggested_background_tasks ,tenant_quota]
+    fn gen_table_func(&mut self) -> TableReference {
+        let tbl_func = [
+            "numbers",
+            "numbers_mt",
+            "numbers_local",
+            "generate_series",
+            "range",
+        ];
+        let name = tbl_func[self.rng.gen_range(0..=4)];
+
+        match name {
+            "numbers" | "numbers_mt" | "numbers_local" => {
+                let table = Table {
+                    name: name.to_string(),
+                    schema: TableSchemaRefExt::create(vec![TableField::new(
+                        "number",
+                        TableDataType::Number(NumberDataType::UInt64),
+                    )]),
+                };
+                self.bound_table(table);
+                TableReference::TableFunction {
+                    span: None,
+                    name: Identifier::from_name(name),
+                    params: vec![Expr::Literal {
+                        span: None,
+                        lit: Literal::UInt64(self.rng.gen_range(0..=10)),
+                    }],
+                    named_params: vec![],
+                    alias: None,
+                }
+            }
+            "generate_series" | "range" => {
+                let mut gen_expr = || -> (TableDataType, Expr) {
+                    let idx = self.rng.gen_range(0..=2);
+                    match idx {
+                        0 => {
+                            let arg = Expr::Literal {
+                                span: None,
+                                lit: Literal::UInt64(self.rng.gen_range(0..=1000000)),
+                            };
+                            (TableDataType::Timestamp, Expr::FunctionCall {
+                                span: None,
+                                distinct: false,
+                                name: Identifier::from_name("to_date".to_string()),
+                                args: vec![arg],
+                                params: vec![],
+                                window: None,
+                                lambda: None,
+                            })
+                        }
+                        1 => {
+                            let arg = Expr::Literal {
+                                span: None,
+                                lit: Literal::UInt64(self.rng.gen_range(0..=10000000000000)),
+                            };
+                            (TableDataType::Date, Expr::FunctionCall {
+                                span: None,
+                                distinct: false,
+                                name: Identifier::from_name("to_timestamp".to_string()),
+                                args: vec![arg],
+                                params: vec![],
+                                window: None,
+                                lambda: None,
+                            })
+                        }
+                        2 => (
+                            TableDataType::Number(NumberDataType::Int64),
+                            Expr::Literal {
+                                span: None,
+                                lit: Literal::UInt64(self.rng.gen_range(0..=1000)),
+                            },
+                        ),
+                        _ => unreachable!(),
+                    }
+                };
+                let (ty1, param1) = gen_expr();
+                let (_, param2) = gen_expr();
+                let table = Table {
+                    name: name.to_string(),
+                    schema: TableSchemaRefExt::create(vec![TableField::new(name, ty1)]),
+                };
+                let (_, param3) = gen_expr();
+                self.bound_table(table);
+
+                TableReference::TableFunction {
+                    span: None,
+                    name: Identifier::from_name(name),
+                    params: if self.rng.gen_bool(0.5) {
+                        vec![param1, param2]
+                    } else {
+                        vec![param1, param2, param3]
+                    },
+                    named_params: vec![],
+                    alias: None,
+                }
+            }
+            _ => unreachable!(),
+        }
+    }
     fn gen_join_table_ref(&mut self) -> TableReference {
         let i = self.rng.gen_range(0..self.tables.len());
         let j = if i == self.tables.len() - 1 { 0 } else { i + 1 };

From 2add53d0b329a4df6290b2b49a0ea2860f5246ff Mon Sep 17 00:00:00 2001
From: Yang Xiufeng <yangxiufeng.c@gmail.com>
Date: Wed, 20 Sep 2023 21:00:44 +0800
Subject: [PATCH 06/21] fix: add session info in final response. (#12947)

---
 .../src/servers/http/v1/query/http_query.rs   | 21 ++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/src/query/service/src/servers/http/v1/query/http_query.rs b/src/query/service/src/servers/http/v1/query/http_query.rs
index 5ec81be59e31..e97489cfd41b 100644
--- a/src/query/service/src/servers/http/v1/query/http_query.rs
+++ b/src/query/service/src/servers/http/v1/query/http_query.rs
@@ -387,28 +387,35 @@ impl HttpQuery {
     pub async fn get_response_page(&self, page_no: usize) -> Result<HttpQueryResponseInternal> {
         let data = Some(self.get_page(page_no).await?);
         let state = self.get_state().await;
-        let session_conf = self.request.session.clone().unwrap_or_default();
-        let session_conf = if let Some(affect) = &state.affect {
-            Some(session_conf.apply_affect(affect))
+        let session = self.request.session.clone().unwrap_or_default();
+        let session = if let Some(affect) = &state.affect {
+            Some(session.apply_affect(affect))
         } else {
-            Some(session_conf)
+            Some(session)
         };
 
         Ok(HttpQueryResponseInternal {
             data,
             state,
-            session: session_conf,
+            session,
             session_id: self.session_id.clone(),
         })
     }
 
     #[async_backtrace::framed]
     pub async fn get_response_state_only(&self) -> HttpQueryResponseInternal {
+        let state = self.get_state().await;
+        let session = self.request.session.clone().unwrap_or_default();
+        let session = if let Some(affect) = &state.affect {
+            Some(session.apply_affect(affect))
+        } else {
+            Some(session)
+        };
         HttpQueryResponseInternal {
             data: None,
             session_id: self.session_id.clone(),
-            state: self.get_state().await,
-            session: None,
+            state,
+            session,
         }
     }
 

From 321b99b26e00623a9c1a83236e2be9ef093047ec Mon Sep 17 00:00:00 2001
From: baishen <baishen2009@gmail.com>
Date: Wed, 20 Sep 2023 21:45:00 +0800
Subject: [PATCH 07/21] fix(query): fix add/subtract datetime with big integer
 panic (#12940)

* fix(query): fix add/subtract datetime with big integer panic

* fix

* fix domain
---
 src/query/expression/src/utils/date_helper.rs |   7 +
 src/query/functions/src/scalars/datetime.rs   | 102 ++++++++++----
 .../functions/tests/it/scalars/datetime.rs    |  18 +++
 .../tests/it/scalars/testdata/datetime.txt    | 130 ++++++++++++++++++
 .../it/scalars/testdata/function_list.txt     |   4 +
 .../02_function/02_0012_function_datetimes    |  35 +++++
 6 files changed, 268 insertions(+), 28 deletions(-)

diff --git a/src/query/expression/src/utils/date_helper.rs b/src/query/expression/src/utils/date_helper.rs
index 528af75adfb7..0722f0097f9c 100644
--- a/src/query/expression/src/utils/date_helper.rs
+++ b/src/query/expression/src/utils/date_helper.rs
@@ -417,6 +417,7 @@ pub struct ToYYYYMMDD;
 pub struct ToYYYYMMDDHH;
 pub struct ToYYYYMMDDHHMMSS;
 pub struct ToYear;
+pub struct ToQuarter;
 pub struct ToMonth;
 pub struct ToDayOfYear;
 pub struct ToDayOfMonth;
@@ -464,6 +465,12 @@ impl ToNumber<u16> for ToYear {
     }
 }
 
+impl ToNumber<u8> for ToQuarter {
+    fn to_number(dt: &DateTime<Tz>) -> u8 {
+        (dt.month0() / 3 + 1) as u8
+    }
+}
+
 impl ToNumber<u8> for ToMonth {
     fn to_number(dt: &DateTime<Tz>) -> u8 {
         dt.month() as u8
diff --git a/src/query/functions/src/scalars/datetime.rs b/src/query/functions/src/scalars/datetime.rs
index 62d2a4a3a85d..03b40a8f5024 100644
--- a/src/query/functions/src/scalars/datetime.rs
+++ b/src/query/functions/src/scalars/datetime.rs
@@ -909,6 +909,13 @@ fn register_to_number_functions(registry: &mut FunctionRegistry) {
             ToNumberImpl::eval_date::<ToYear, _>(val, ctx.func_ctx.tz)
         }),
     );
+    registry.register_passthrough_nullable_1_arg::<DateType, UInt8Type, _, _>(
+        "to_quarter",
+        |_, _| FunctionDomain::Full,
+        vectorize_1_arg::<DateType, UInt8Type>(|val, ctx| {
+            ToNumberImpl::eval_date::<ToQuarter, _>(val, ctx.func_ctx.tz)
+        }),
+    );
     registry.register_passthrough_nullable_1_arg::<DateType, UInt8Type, _, _>(
         "to_month",
         |_, _| FunctionDomain::Full,
@@ -973,6 +980,13 @@ fn register_to_number_functions(registry: &mut FunctionRegistry) {
             ToNumberImpl::eval_timestamp::<ToYear, _>(val, ctx.func_ctx.tz)
         }),
     );
+    registry.register_passthrough_nullable_1_arg::<TimestampType, UInt8Type, _, _>(
+        "to_quarter",
+        |_, _| FunctionDomain::Full,
+        vectorize_1_arg::<TimestampType, UInt8Type>(|val, ctx| {
+            ToNumberImpl::eval_timestamp::<ToQuarter, _>(val, ctx.func_ctx.tz)
+        }),
+    );
     registry.register_passthrough_nullable_1_arg::<TimestampType, UInt8Type, _, _>(
         "to_month",
         |_, _| FunctionDomain::Full,
@@ -1027,23 +1041,31 @@ fn register_to_number_functions(registry: &mut FunctionRegistry) {
 }
 
 fn register_timestamp_add_sub(registry: &mut FunctionRegistry) {
-    registry.register_2_arg::<DateType, Int64Type, DateType, _, _>(
+    registry.register_passthrough_nullable_2_arg::<DateType, Int64Type, DateType, _, _>(
         "plus",
         |_, lhs, rhs| {
             (|| {
-                let lm = lhs.max;
-                let ln = lhs.min;
-                let rm: i32 = num_traits::cast::cast(rhs.max)?;
-                let rn: i32 = num_traits::cast::cast(rhs.min)?;
+                let lm: i64 = num_traits::cast::cast(lhs.max)?;
+                let ln: i64 = num_traits::cast::cast(lhs.min)?;
+                let rm = rhs.max;
+                let rn = rhs.min;
 
                 Some(FunctionDomain::Domain(SimpleDomain::<i32> {
-                    min: ln.checked_add(rn)?,
-                    max: lm.checked_add(rm)?,
+                    min: check_date(ln + rn).ok()?,
+                    max: check_date(lm + rm).ok()?,
                 }))
             })()
-            .unwrap_or(FunctionDomain::Full)
+            .unwrap_or(FunctionDomain::MayThrow)
         },
-        |a, b, _| a + (b as i32),
+        vectorize_with_builder_2_arg::<DateType, Int64Type, DateType>(|a, b, output, ctx| {
+            match check_date((a as i64) + b) {
+                Ok(v) => output.push(v),
+                Err(err) => {
+                    ctx.set_error(output.len(), err);
+                    output.push(0);
+                }
+            }
+        }),
     );
 
     registry.register_2_arg::<DateType, DateType, Int32Type, _, _>(
@@ -1065,7 +1087,7 @@ fn register_timestamp_add_sub(registry: &mut FunctionRegistry) {
         |a, b, _| a + b,
     );
 
-    registry.register_2_arg::<TimestampType, Int64Type, TimestampType, _, _>(
+    registry.register_passthrough_nullable_2_arg::<TimestampType, Int64Type, TimestampType, _, _>(
         "plus",
         |_, lhs, rhs| {
             (|| {
@@ -1074,13 +1096,21 @@ fn register_timestamp_add_sub(registry: &mut FunctionRegistry) {
                 let rm = rhs.max;
                 let rn = rhs.min;
                 Some(FunctionDomain::Domain(SimpleDomain::<i64> {
-                    min: ln.checked_add(rn)?,
-                    max: lm.checked_add(rm)?,
+                    min: check_timestamp(ln + rn).ok()?,
+                    max: check_timestamp(lm + rm).ok()?,
                 }))
             })()
-            .unwrap_or(FunctionDomain::Full)
+            .unwrap_or(FunctionDomain::MayThrow)
         },
-        |a, b, _| a + b,
+        vectorize_with_builder_2_arg::<TimestampType, Int64Type, TimestampType>(
+            |a, b, output, ctx| match check_timestamp(a + b) {
+                Ok(v) => output.push(v),
+                Err(err) => {
+                    ctx.set_error(output.len(), err);
+                    output.push(0);
+                }
+            },
+        ),
     );
 
     registry.register_2_arg::<TimestampType, TimestampType, Int64Type, _, _>(
@@ -1101,23 +1131,31 @@ fn register_timestamp_add_sub(registry: &mut FunctionRegistry) {
         |a, b, _| a + b,
     );
 
-    registry.register_2_arg::<DateType, Int64Type, DateType, _, _>(
+    registry.register_passthrough_nullable_2_arg::<DateType, Int64Type, DateType, _, _>(
         "minus",
         |_, lhs, rhs| {
             (|| {
-                let lm = lhs.max;
-                let ln = lhs.min;
-                let rm: i32 = num_traits::cast::cast(rhs.max)?;
-                let rn: i32 = num_traits::cast::cast(rhs.min)?;
+                let lm: i64 = num_traits::cast::cast(lhs.max)?;
+                let ln: i64 = num_traits::cast::cast(lhs.min)?;
+                let rm = rhs.max;
+                let rn = rhs.min;
 
                 Some(FunctionDomain::Domain(SimpleDomain::<i32> {
-                    min: ln.checked_sub(rm)?,
-                    max: lm.checked_sub(rn)?,
+                    min: check_date(ln - rn).ok()?,
+                    max: check_date(lm - rm).ok()?,
                 }))
             })()
-            .unwrap_or(FunctionDomain::Full)
+            .unwrap_or(FunctionDomain::MayThrow)
         },
-        |a, b, _| a - b as i32,
+        vectorize_with_builder_2_arg::<DateType, Int64Type, DateType>(|a, b, output, ctx| {
+            match check_date((a as i64) - b) {
+                Ok(v) => output.push(v),
+                Err(err) => {
+                    ctx.set_error(output.len(), err);
+                    output.push(0);
+                }
+            }
+        }),
     );
 
     registry.register_2_arg::<DateType, DateType, Int32Type, _, _>(
@@ -1139,7 +1177,7 @@ fn register_timestamp_add_sub(registry: &mut FunctionRegistry) {
         |a, b, _| a - b,
     );
 
-    registry.register_2_arg::<TimestampType, Int64Type, TimestampType, _, _>(
+    registry.register_passthrough_nullable_2_arg::<TimestampType, Int64Type, TimestampType, _, _>(
         "minus",
         |_, lhs, rhs| {
             (|| {
@@ -1149,13 +1187,21 @@ fn register_timestamp_add_sub(registry: &mut FunctionRegistry) {
                 let rn = rhs.min;
 
                 Some(FunctionDomain::Domain(SimpleDomain::<i64> {
-                    min: ln.checked_sub(rm)?,
-                    max: lm.checked_sub(rn)?,
+                    min: check_timestamp(ln - rn).ok()?,
+                    max: check_timestamp(lm - rm).ok()?,
                 }))
             })()
-            .unwrap_or(FunctionDomain::Full)
+            .unwrap_or(FunctionDomain::MayThrow)
         },
-        |a, b, _| a - b,
+        vectorize_with_builder_2_arg::<TimestampType, Int64Type, TimestampType>(
+            |a, b, output, ctx| match check_timestamp(a - b) {
+                Ok(v) => output.push(v),
+                Err(err) => {
+                    ctx.set_error(output.len(), err);
+                    output.push(0);
+                }
+            },
+        ),
     );
 
     registry.register_2_arg::<TimestampType, TimestampType, Int64Type, _, _>(
diff --git a/src/query/functions/tests/it/scalars/datetime.rs b/src/query/functions/tests/it/scalars/datetime.rs
index e672a6a095bb..8f5fb8ccc028 100644
--- a/src/query/functions/tests/it/scalars/datetime.rs
+++ b/src/query/functions/tests/it/scalars/datetime.rs
@@ -108,10 +108,14 @@ fn test_date_add_subtract(file: &mut impl Write) {
     run_ast(file, "add_years(to_date(0), 100)", &[]);
     run_ast(file, "add_months(to_date(0), 100)", &[]);
     run_ast(file, "add_days(to_date(0), 100)", &[]);
+    run_ast(file, "add(to_date(0), 100)", &[]);
+    run_ast(file, "add(to_date(0), 10000000)", &[]);
     run_ast(file, "subtract_years(to_date(0), 100)", &[]);
     run_ast(file, "subtract_quarters(to_date(0), 100)", &[]);
     run_ast(file, "subtract_months(to_date(0), 100)", &[]);
     run_ast(file, "subtract_days(to_date(0), 100)", &[]);
+    run_ast(file, "subtract(to_date(0), 100)", &[]);
+    run_ast(file, "subtract(to_date(0), 10000000)", &[]);
     run_ast(file, "add_years(a, b)", &[
         ("a", DateType::from_data(vec![-100, 0, 100])),
         ("b", Int32Type::from_data(vec![1, 2, 3])),
@@ -155,6 +159,8 @@ fn test_timestamp_add_subtract(file: &mut impl Write) {
     run_ast(file, "add_hours(to_timestamp(0), 100)", &[]);
     run_ast(file, "add_minutes(to_timestamp(0), 100)", &[]);
     run_ast(file, "add_seconds(to_timestamp(0), 100)", &[]);
+    run_ast(file, "add(to_timestamp(0), 100000000000000)", &[]);
+    run_ast(file, "add(to_timestamp(0), 1000000000000000000)", &[]);
     run_ast(file, "subtract_years(to_timestamp(0), 100)", &[]);
     run_ast(file, "subtract_quarters(to_timestamp(0), 100)", &[]);
     run_ast(file, "subtract_months(to_timestamp(0), 100)", &[]);
@@ -162,6 +168,8 @@ fn test_timestamp_add_subtract(file: &mut impl Write) {
     run_ast(file, "subtract_hours(to_timestamp(0), 100)", &[]);
     run_ast(file, "subtract_minutes(to_timestamp(0), 100)", &[]);
     run_ast(file, "subtract_seconds(to_timestamp(0), 100)", &[]);
+    run_ast(file, "subtract(to_timestamp(0), 100000000000000)", &[]);
+    run_ast(file, "subtract(to_timestamp(0), 1000000000000000000)", &[]);
     run_ast(file, "add_years(a, b)", &[
         ("a", TimestampType::from_data(vec![-100, 0, 100])),
         ("b", Int32Type::from_data(vec![1, 2, 3])),
@@ -462,6 +470,7 @@ fn test_to_number(file: &mut impl Write) {
     run_ast(file, "to_yyyymmdd(to_date(18875))", &[]);
     run_ast(file, "to_yyyymmddhhmmss(to_date(18875))", &[]);
     run_ast(file, "to_year(to_date(18875))", &[]);
+    run_ast(file, "to_quarter(to_date(18875))", &[]);
     run_ast(file, "to_month(to_date(18875))", &[]);
     run_ast(file, "to_day_of_year(to_date(18875))", &[]);
     run_ast(file, "to_day_of_month(to_date(18875))", &[]);
@@ -482,6 +491,10 @@ fn test_to_number(file: &mut impl Write) {
         "a",
         DateType::from_data(vec![-100, 0, 100]),
     )]);
+    run_ast(file, "to_quarter(a)", &[(
+        "a",
+        DateType::from_data(vec![-100, 0, 100]),
+    )]);
     run_ast(file, "to_month(a)", &[(
         "a",
         DateType::from_data(vec![-100, 0, 100]),
@@ -504,6 +517,7 @@ fn test_to_number(file: &mut impl Write) {
     run_ast(file, "to_yyyymmdd(to_timestamp(1630812366))", &[]);
     run_ast(file, "to_yyyymmddhhmmss(to_timestamp(1630812366))", &[]);
     run_ast(file, "to_year(to_timestamp(1630812366))", &[]);
+    run_ast(file, "to_quarter(to_timestamp(1630812366))", &[]);
     run_ast(file, "to_month(to_timestamp(1630812366))", &[]);
     run_ast(file, "to_day_of_year(to_timestamp(1630812366))", &[]);
     run_ast(file, "to_day_of_month(to_timestamp(1630812366))", &[]);
@@ -527,6 +541,10 @@ fn test_to_number(file: &mut impl Write) {
         "a",
         TimestampType::from_data(vec![-100, 0, 100]),
     )]);
+    run_ast(file, "to_quarter(a)", &[(
+        "a",
+        TimestampType::from_data(vec![-100, 0, 100]),
+    )]);
     run_ast(file, "to_month(a)", &[(
         "a",
         TimestampType::from_data(vec![-100, 0, 100]),
diff --git a/src/query/functions/tests/it/scalars/testdata/datetime.txt b/src/query/functions/tests/it/scalars/testdata/datetime.txt
index 699082a4c25a..2b42bc1f3d04 100644
--- a/src/query/functions/tests/it/scalars/testdata/datetime.txt
+++ b/src/query/functions/tests/it/scalars/testdata/datetime.txt
@@ -355,6 +355,23 @@ output domain  : {100..=100}
 output         : '1970-04-11'
 
 
+ast            : add(to_date(0), 100)
+raw expr       : add(to_date(0), 100)
+checked expr   : plus<Date, Int64>(to_date<Int64>(to_int64<UInt8>(0_u8)), to_int64<UInt8>(100_u8))
+optimized expr : 100
+output type    : Date
+output domain  : {100..=100}
+output         : '1970-04-11'
+
+
+error: 
+  --> SQL:1:1
+  |
+1 | add(to_date(0), 10000000)
+  | ^^^^^^^^^^^^^^^^^^^^^^^^^ date is out of range while evaluating function `plus('1970-01-01', 10000000)`
+
+
+
 ast            : subtract_years(to_date(0), 100)
 raw expr       : subtract_years(to_date(0), 100)
 checked expr   : subtract_years<Date, Int64>(to_date<Int64>(to_int64<UInt8>(0_u8)), to_int64<UInt8>(100_u8))
@@ -391,6 +408,23 @@ output domain  : {-100..=-100}
 output         : '1969-09-23'
 
 
+ast            : subtract(to_date(0), 100)
+raw expr       : subtract(to_date(0), 100)
+checked expr   : minus<Date, Int64>(to_date<Int64>(to_int64<UInt8>(0_u8)), to_int64<UInt8>(100_u8))
+optimized expr : -100
+output type    : Date
+output domain  : {-100..=-100}
+output         : '1969-09-23'
+
+
+error: 
+  --> SQL:1:1
+  |
+1 | subtract(to_date(0), 10000000)
+  | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ date is out of range while evaluating function `minus('1970-01-01', 10000000)`
+
+
+
 ast            : add_years(a, b)
 raw expr       : add_years(a::Date, b::Int32)
 checked expr   : add_years<Date, Int64>(a, to_int64<Int32>(b))
@@ -646,6 +680,23 @@ output domain  : {100000000..=100000000}
 output         : '1970-01-01 00:01:40.000000'
 
 
+ast            : add(to_timestamp(0), 100000000000000)
+raw expr       : add(to_timestamp(0), 100000000000000)
+checked expr   : plus<Timestamp, Int64>(to_timestamp<Int64>(to_int64<UInt8>(0_u8)), to_int64<UInt64>(100000000000000_u64))
+optimized expr : 100000000000000
+output type    : Timestamp
+output domain  : {100000000000000..=100000000000000}
+output         : '1973-03-03 09:46:40.000000'
+
+
+error: 
+  --> SQL:1:1
+  |
+1 | add(to_timestamp(0), 1000000000000000000)
+  | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ timestamp is out of range while evaluating function `plus('1970-01-01 00:00:00.000000', 1000000000000000000)`
+
+
+
 ast            : subtract_years(to_timestamp(0), 100)
 raw expr       : subtract_years(to_timestamp(0), 100)
 checked expr   : subtract_years<Timestamp, Int64>(to_timestamp<Int64>(to_int64<UInt8>(0_u8)), to_int64<UInt8>(100_u8))
@@ -709,6 +760,23 @@ output domain  : {-100000000..=-100000000}
 output         : '1969-12-31 23:58:20.000000'
 
 
+ast            : subtract(to_timestamp(0), 100000000000000)
+raw expr       : subtract(to_timestamp(0), 100000000000000)
+checked expr   : minus<Timestamp, Int64>(to_timestamp<Int64>(to_int64<UInt8>(0_u8)), to_int64<UInt64>(100000000000000_u64))
+optimized expr : -100000000000000
+output type    : Timestamp
+output domain  : {-100000000000000..=-100000000000000}
+output         : '1966-10-31 14:13:20.000000'
+
+
+error: 
+  --> SQL:1:1
+  |
+1 | subtract(to_timestamp(0), 1000000000000000000)
+  | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ timestamp is out of range while evaluating function `minus('1970-01-01 00:00:00.000000', 1000000000000000000)`
+
+
+
 ast            : add_years(a, b)
 raw expr       : add_years(a::Timestamp, b::Int32)
 checked expr   : add_years<Timestamp, Int64>(a, to_int64<Int32>(b))
@@ -2507,6 +2575,15 @@ output domain  : {2021..=2021}
 output         : 2021
 
 
+ast            : to_quarter(to_date(18875))
+raw expr       : to_quarter(to_date(18875))
+checked expr   : to_quarter<Date>(to_date<Int64>(to_int64<UInt16>(18875_u16)))
+optimized expr : 3_u8
+output type    : UInt8
+output domain  : {3..=3}
+output         : 3
+
+
 ast            : to_month(to_date(18875))
 raw expr       : to_month(to_date(18875))
 checked expr   : to_month<Date>(to_date<Int64>(to_int64<UInt16>(18875_u16)))
@@ -2631,6 +2708,28 @@ evaluation (internal):
 +--------+----------------------------+
 
 
+ast            : to_quarter(a)
+raw expr       : to_quarter(a::Date)
+checked expr   : to_quarter<Date>(a)
+evaluation:
++--------+--------------+-----------+
+|        | a            | Output    |
++--------+--------------+-----------+
+| Type   | Date         | UInt8     |
+| Domain | {-100..=100} | {0..=255} |
+| Row 0  | '1969-09-23' | 3         |
+| Row 1  | '1970-01-01' | 1         |
+| Row 2  | '1970-04-11' | 2         |
++--------+--------------+-----------+
+evaluation (internal):
++--------+------------------+
+| Column | Data             |
++--------+------------------+
+| a      | [-100, 0, 100]   |
+| Output | UInt8([3, 1, 2]) |
++--------+------------------+
+
+
 ast            : to_month(a)
 raw expr       : to_month(a::Date)
 checked expr   : to_month<Date>(a)
@@ -2755,6 +2854,15 @@ output domain  : {2021..=2021}
 output         : 2021
 
 
+ast            : to_quarter(to_timestamp(1630812366))
+raw expr       : to_quarter(to_timestamp(1630812366))
+checked expr   : to_quarter<Timestamp>(to_timestamp<Int64>(to_int64<UInt32>(1630812366_u32)))
+optimized expr : 3_u8
+output type    : UInt8
+output domain  : {3..=3}
+output         : 3
+
+
 ast            : to_month(to_timestamp(1630812366))
 raw expr       : to_month(to_timestamp(1630812366))
 checked expr   : to_month<Timestamp>(to_timestamp<Int64>(to_int64<UInt32>(1630812366_u32)))
@@ -2906,6 +3014,28 @@ evaluation (internal):
 +--------+----------------------------+
 
 
+ast            : to_quarter(a)
+raw expr       : to_quarter(a::Timestamp)
+checked expr   : to_quarter<Timestamp>(a)
+evaluation:
++--------+------------------------------+-----------+
+|        | a                            | Output    |
++--------+------------------------------+-----------+
+| Type   | Timestamp                    | UInt8     |
+| Domain | {-100..=100}                 | {0..=255} |
+| Row 0  | '1969-12-31 23:59:59.999900' | 4         |
+| Row 1  | '1970-01-01 00:00:00.000000' | 1         |
+| Row 2  | '1970-01-01 00:00:00.000100' | 1         |
++--------+------------------------------+-----------+
+evaluation (internal):
++--------+------------------+
+| Column | Data             |
++--------+------------------+
+| a      | [-100, 0, 100]   |
+| Output | UInt8([4, 1, 1]) |
++--------+------------------+
+
+
 ast            : to_month(a)
 raw expr       : to_month(a::Timestamp)
 checked expr   : to_month<Timestamp>(a)
diff --git a/src/query/functions/tests/it/scalars/testdata/function_list.txt b/src/query/functions/tests/it/scalars/testdata/function_list.txt
index 9f2d1d74966e..e898b283a63b 100644
--- a/src/query/functions/tests/it/scalars/testdata/function_list.txt
+++ b/src/query/functions/tests/it/scalars/testdata/function_list.txt
@@ -3362,6 +3362,10 @@ Functions overloads:
 3 to_month(Timestamp NULL) :: UInt8 NULL
 0 to_nullable(NULL) :: NULL
 1 to_nullable(T0 NULL) :: T0 NULL
+0 to_quarter(Date) :: UInt8
+1 to_quarter(Date NULL) :: UInt8 NULL
+2 to_quarter(Timestamp) :: UInt8
+3 to_quarter(Timestamp NULL) :: UInt8 NULL
 0 to_second(Timestamp) :: UInt8
 1 to_second(Timestamp NULL) :: UInt8 NULL
 0 to_start_of_day(Timestamp) :: Timestamp
diff --git a/tests/sqllogictests/suites/query/02_function/02_0012_function_datetimes b/tests/sqllogictests/suites/query/02_function/02_0012_function_datetimes
index 76b1133e7b04..b795556fdfe0 100644
--- a/tests/sqllogictests/suites/query/02_function/02_0012_function_datetimes
+++ b/tests/sqllogictests/suites/query/02_function/02_0012_function_datetimes
@@ -212,6 +212,21 @@ select today() + 1 = tomorrow()
 ----
 1
 
+query B
+select to_date('2023-01-01') + 100 = to_date('2023-04-11')
+----
+1
+
+query B
+select to_date('2023-01-01') - 100 = to_date('2022-09-23')
+----
+1
+
+statement error 1001
+select to_date('2023-01-01') + 100000000
+
+statement error 1001
+select to_date('2023-01-01') - 100000000
 
 
 query B
@@ -643,6 +658,21 @@ select add_seconds(to_datetime(1582970400000000), cast(61, INT32))
 ----
 2020-02-29 10:01:01.000000
 
+query T
+select to_datetime('2023-01-01 00:00:00') + 10000000000
+----
+2023-01-01 02:46:40.000000
+
+query t
+select to_datetime('2023-01-01 00:00:00') - 10000000000
+----
+2022-12-31 21:13:20.000000
+
+statement error 1001
+select to_datetime('2023-01-01 00:00:00') + 1000000000000000000
+
+statement error 1001
+select to_datetime('2023-01-01 00:00:00') - 1000000000000000000
 
 query I
 select to_month(to_datetime(1633081817000000))
@@ -781,6 +811,11 @@ select to_year(to_datetime(1646404329000000)) = 2022
 ----
 1
 
+query B
+select to_quarter(to_datetime(1646404329000000)) = 1
+----
+1
+
 query T
 select date_add(QUARTER, 1, to_date('2018-01-02'))
 ----

From 7c1a8a86689befcd41e25809671d541ea15e5c2e Mon Sep 17 00:00:00 2001
From: Yang Xiufeng <yangxiufeng.c@gmail.com>
Date: Wed, 20 Sep 2023 23:19:16 +0800
Subject: [PATCH 08/21] feat: stage file pattern match the whole string after
 prefix. (#12935)

---
 src/common/storage/src/stage.rs               |  8 ++++---
 src/query/ast/src/ast/format/ast_format.rs    |  4 ++--
 src/query/ast/src/ast/statements/statement.rs |  4 ++--
 src/query/ast/src/parser/statement.rs         |  2 +-
 src/query/ast/src/visitors/visitor.rs         |  2 +-
 src/query/ast/src/visitors/visitor_mut.rs     |  2 +-
 src/query/ast/tests/it/testdata/statement.txt |  4 ++--
 src/query/sql/src/planner/binder/binder.rs    |  9 +++++++-
 tests/sqllogictests/suites/stage/list_stage   |  4 ++--
 .../suites/stage/options/pattern              | 23 +++++++++++++++++++
 .../00_0009_remove_internal_stage.result      |  1 +
 11 files changed, 48 insertions(+), 15 deletions(-)
 create mode 100644 tests/sqllogictests/suites/stage/options/pattern

diff --git a/src/common/storage/src/stage.rs b/src/common/storage/src/stage.rs
index fb809a3eaa8d..fdf714284494 100644
--- a/src/common/storage/src/stage.rs
+++ b/src/common/storage/src/stage.rs
@@ -92,7 +92,7 @@ pub struct StageFilesInfo {
 impl StageFilesInfo {
     fn get_pattern(&self) -> Result<Option<Regex>> {
         match &self.pattern {
-            Some(pattern) => match Regex::new(pattern) {
+            Some(pattern) => match Regex::new(&format!("^{pattern}$")) {
                 Ok(r) => Ok(Some(r)),
                 Err(e) => Err(ErrorCode::SyntaxException(format!(
                     "Pattern format invalid, got:{}, error:{:?}",
@@ -206,6 +206,7 @@ impl StageFilesInfo {
         first_only: bool,
         max_files: usize,
     ) -> Result<Vec<StageFileInfo>> {
+        let prefix_len = if path == "/" { 0 } else { path.len() };
         let root_meta = operator.stat(path).await;
         match root_meta {
             Ok(meta) => match meta.mode() {
@@ -233,7 +234,7 @@ impl StageFilesInfo {
         let mut limit: usize = 0;
         while let Some(obj) = list.try_next().await? {
             let meta = operator.metadata(&obj, StageFileInfo::meta_query()).await?;
-            if check_file(obj.path(), meta.mode(), &pattern) {
+            if check_file(&obj.path()[prefix_len..], meta.mode(), &pattern) {
                 files.push(StageFileInfo::new(obj.path().to_string(), &meta));
                 if first_only {
                     return Ok(files);
@@ -263,6 +264,7 @@ fn blocking_list_files_with_pattern(
     first_only: bool,
     max_files: usize,
 ) -> Result<Vec<StageFileInfo>> {
+    let prefix_len = if path == "/" { 0 } else { path.len() };
     let operator = operator.blocking();
 
     let root_meta = operator.stat(path);
@@ -293,7 +295,7 @@ fn blocking_list_files_with_pattern(
     for obj in list {
         let obj = obj?;
         let meta = operator.metadata(&obj, StageFileInfo::meta_query())?;
-        if check_file(obj.path(), meta.mode(), &pattern) {
+        if check_file(&obj.path()[prefix_len..], meta.mode(), &pattern) {
             files.push(StageFileInfo::new(obj.path().to_string(), &meta));
             if first_only {
                 return Ok(files);
diff --git a/src/query/ast/src/ast/format/ast_format.rs b/src/query/ast/src/ast/format/ast_format.rs
index 27ffead1343b..da521cc648ad 100644
--- a/src/query/ast/src/ast/format/ast_format.rs
+++ b/src/query/ast/src/ast/format/ast_format.rs
@@ -2179,10 +2179,10 @@ impl<'ast> Visitor<'ast> for AstFormatVisitor {
         self.children.push(node);
     }
 
-    fn visit_list_stage(&mut self, location: &'ast str, pattern: &'ast str) {
+    fn visit_list_stage(&mut self, location: &'ast str, pattern: &'ast Option<String>) {
         let location_format_ctx = AstFormatContext::new(format!("Location {}", location));
         let location_child = FormatTreeNode::new(location_format_ctx);
-        let pattern_format_ctx = AstFormatContext::new(format!("Pattern {}", pattern));
+        let pattern_format_ctx = AstFormatContext::new(format!("Pattern {:?}", pattern));
         let pattern_child = FormatTreeNode::new(pattern_format_ctx);
 
         let name = "ListStage".to_string();
diff --git a/src/query/ast/src/ast/statements/statement.rs b/src/query/ast/src/ast/statements/statement.rs
index 5e4f7ba6de3b..8ae30da9c555 100644
--- a/src/query/ast/src/ast/statements/statement.rs
+++ b/src/query/ast/src/ast/statements/statement.rs
@@ -186,7 +186,7 @@ pub enum Statement {
     },
     ListStage {
         location: String,
-        pattern: String,
+        pattern: Option<String>,
     },
 
     // UserDefinedFileFormat
@@ -452,7 +452,7 @@ impl Display for Statement {
             Statement::AlterUDF(stmt) => write!(f, "{stmt}")?,
             Statement::ListStage { location, pattern } => {
                 write!(f, "LIST @{location}")?;
-                if !pattern.is_empty() {
+                if let Some(pattern) = pattern {
                     write!(f, " PATTERN = '{pattern}'")?;
                 }
             }
diff --git a/src/query/ast/src/parser/statement.rs b/src/query/ast/src/parser/statement.rs
index e44629718c70..fd6c453694dd 100644
--- a/src/query/ast/src/parser/statement.rs
+++ b/src/query/ast/src/parser/statement.rs
@@ -1054,7 +1054,7 @@ pub fn statement(i: Input) -> IResult<StatementMsg> {
         },
         |(_, location, opt_pattern)| Statement::ListStage {
             location,
-            pattern: opt_pattern.map(|v| v.2).unwrap_or_default(),
+            pattern: opt_pattern.map(|v| v.2),
         },
     );
 
diff --git a/src/query/ast/src/visitors/visitor.rs b/src/query/ast/src/visitors/visitor.rs
index 329b844de4dd..520dddc74a68 100644
--- a/src/query/ast/src/visitors/visitor.rs
+++ b/src/query/ast/src/visitors/visitor.rs
@@ -531,7 +531,7 @@ pub trait Visitor<'ast>: Sized {
 
     fn visit_remove_stage(&mut self, _location: &'ast str, _pattern: &'ast str) {}
 
-    fn visit_list_stage(&mut self, _location: &'ast str, _pattern: &'ast str) {}
+    fn visit_list_stage(&mut self, _location: &'ast str, _pattern: &'ast Option<String>) {}
 
     fn visit_create_file_format(
         &mut self,
diff --git a/src/query/ast/src/visitors/visitor_mut.rs b/src/query/ast/src/visitors/visitor_mut.rs
index 401e39b26e6e..bee45ee26889 100644
--- a/src/query/ast/src/visitors/visitor_mut.rs
+++ b/src/query/ast/src/visitors/visitor_mut.rs
@@ -546,7 +546,7 @@ pub trait VisitorMut: Sized {
 
     fn visit_remove_stage(&mut self, _location: &mut String, _pattern: &mut String) {}
 
-    fn visit_list_stage(&mut self, _location: &mut String, _pattern: &mut String) {}
+    fn visit_list_stage(&mut self, _location: &mut String, _pattern: &mut Option<String>) {}
 
     fn visit_create_file_format(
         &mut self,
diff --git a/src/query/ast/tests/it/testdata/statement.txt b/src/query/ast/tests/it/testdata/statement.txt
index 2cfa725c03f4..af9681a85cf0 100644
--- a/src/query/ast/tests/it/testdata/statement.txt
+++ b/src/query/ast/tests/it/testdata/statement.txt
@@ -7120,7 +7120,7 @@ LIST @stage_a
 ---------- AST ------------
 ListStage {
     location: "stage_a",
-    pattern: "",
+    pattern: None,
 }
 
 
@@ -7131,7 +7131,7 @@ LIST @~
 ---------- AST ------------
 ListStage {
     location: "~",
-    pattern: "",
+    pattern: None,
 }
 
 
diff --git a/src/query/sql/src/planner/binder/binder.rs b/src/query/sql/src/planner/binder/binder.rs
index 34f62c7d4f94..2a8dc4f466f4 100644
--- a/src/query/sql/src/planner/binder/binder.rs
+++ b/src/query/sql/src/planner/binder/binder.rs
@@ -346,7 +346,14 @@ impl<'a> Binder {
 
             // Stages
             Statement::ShowStages => self.bind_rewrite_to_query(bind_context, "SELECT name, stage_type, number_of_files, creator, comment FROM system.stages ORDER BY name", RewriteKind::ShowStages).await?,
-            Statement::ListStage { location, pattern } => self.bind_rewrite_to_query(bind_context, format!("SELECT * FROM LIST_STAGE(location => '@{location}', pattern => '{pattern}')").as_str(), RewriteKind::ListStage).await?,
+            Statement::ListStage { location, pattern } => {
+                let pattern = if let Some(pattern) = pattern {
+                    format!(", pattern => '{pattern}'")
+                } else {
+                    "".to_string()
+                };
+                self.bind_rewrite_to_query(bind_context, format!("SELECT * FROM LIST_STAGE(location => '@{location}'{pattern})").as_str(), RewriteKind::ListStage).await?
+            },
             Statement::DescribeStage { stage_name } => self.bind_rewrite_to_query(bind_context, format!("SELECT * FROM system.stages WHERE name = '{stage_name}'").as_str(), RewriteKind::DescribeStage).await?,
             Statement::CreateStage(stmt) => self.bind_create_stage(stmt).await?,
             Statement::DropStage {
diff --git a/tests/sqllogictests/suites/stage/list_stage b/tests/sqllogictests/suites/stage/list_stage
index 3ec8ca80af94..f67a81a5dec7 100644
--- a/tests/sqllogictests/suites/stage/list_stage
+++ b/tests/sqllogictests/suites/stage/list_stage
@@ -8,12 +8,12 @@ parquet/multi_page/multi_page_3.parquet 4020 NULL
 parquet/multi_page/multi_page_4.parquet 6636 NULL
 
 query 
-select name, size, creator from list_stage(location => '@data/parquet/', pattern => 'complex[.]*')
+select name, size, creator from list_stage(location => '@data/parquet/', pattern => 'complex[.].*')
 ----
 parquet/complex.parquet 92762 NULL
 
 query 
-select name, size, creator from list_stage(location => '@data/', pattern => 'parquet/complex[.]*')
+select name, size, creator from list_stage(location => '@data/', pattern => 'parquet/complex[.].*')
 ----
 parquet/complex.parquet 92762 NULL
 
diff --git a/tests/sqllogictests/suites/stage/options/pattern b/tests/sqllogictests/suites/stage/options/pattern
new file mode 100644
index 000000000000..74d25f0df3fb
--- /dev/null
+++ b/tests/sqllogictests/suites/stage/options/pattern
@@ -0,0 +1,23 @@
+# the following 2 cases show that `pattern` only matching sub path (or suffix) after the 'parquet/' prefix
+# wrong case
+query
+select name from list_stage(location => '@data/parquet/', pattern => 'parquet/.*_page_1.*') order by name
+----
+
+# right case
+query
+select name from list_stage(location => '@data/parquet/', pattern => 'multi_page/.*_page_1.*') order by name
+----
+parquet/multi_page/multi_page_1.parquet
+
+
+# the following 2 cases show that `pattern` need to matching match the whole suffix, it is in fact '%<pattern>$'
+# wrong case
+query
+select name from list_stage(location => '@data/parquet/', pattern => 'multi_page_1') order by name
+----
+
+query
+select name from list_stage(location => '@data/parquet/', pattern => '.*multi_page_1.*') order by name
+----
+parquet/multi_page/multi_page_1.parquet
diff --git a/tests/suites/1_stateful/00_stage/00_0009_remove_internal_stage.result b/tests/suites/1_stateful/00_stage/00_0009_remove_internal_stage.result
index b22c26728a3e..4218f58997c2 100644
--- a/tests/suites/1_stateful/00_stage/00_0009_remove_internal_stage.result
+++ b/tests/suites/1_stateful/00_stage/00_0009_remove_internal_stage.result
@@ -16,3 +16,4 @@ ontime_200.csv.zst
 dir/ontime_200.csv
 ontime_200.csv
 ontime_200.csv.zst
+dir/ontime_200.csv

From b4f9763533eba88ddb4d2d550b53e40b59a92d1f Mon Sep 17 00:00:00 2001
From: zhyass <mytesla@live.com>
Date: Thu, 21 Sep 2023 11:14:37 +0800
Subject: [PATCH 09/21] fix: purge oom (#12950)

* fix purge oom

* add unit test
---
 .../tests/it/storages/fuse/meta/snapshot.rs   | 33 +++++++++++++++++++
 .../common/table-meta/src/meta/mod.rs         |  2 ++
 .../common/table-meta/src/meta/v2/snapshot.rs |  2 +-
 src/query/storages/fuse/src/operations/gc.rs  |  8 ++++-
 4 files changed, 43 insertions(+), 2 deletions(-)

diff --git a/src/query/service/tests/it/storages/fuse/meta/snapshot.rs b/src/query/service/tests/it/storages/fuse/meta/snapshot.rs
index 43e14d08abd9..46f94418b49e 100644
--- a/src/query/service/tests/it/storages/fuse/meta/snapshot.rs
+++ b/src/query/service/tests/it/storages/fuse/meta/snapshot.rs
@@ -12,9 +12,13 @@
 //  See the License for the specific language governing permissions and
 //  limitations under the License.
 
+use std::collections::HashMap;
 use std::ops::Add;
 
 use common_expression::TableSchema;
+use storages_common_table_meta::meta::testing::StatisticsV0;
+use storages_common_table_meta::meta::testing::TableSnapshotV1;
+use storages_common_table_meta::meta::testing::TableSnapshotV2;
 use storages_common_table_meta::meta::TableSnapshot;
 use uuid::Uuid;
 
@@ -74,3 +78,32 @@ fn snapshot_timestamp_time_skew_tolerance() {
     let prev_ts = prev.timestamp.unwrap();
     assert!(current_ts > prev_ts)
 }
+
+#[test]
+fn test_snapshot_v1_to_v4() {
+    let summary = StatisticsV0 {
+        row_count: 0,
+        block_count: 0,
+        perfect_block_count: 0,
+        uncompressed_byte_size: 0,
+        compressed_byte_size: 0,
+        index_size: 0,
+        col_stats: HashMap::new(),
+    };
+    let v1 = TableSnapshotV1::new(
+        Uuid::new_v4(),
+        &None,
+        None,
+        Default::default(),
+        summary,
+        vec![],
+        None,
+        None,
+    );
+    assert!(v1.timestamp.is_some());
+
+    let v4: TableSnapshot = TableSnapshotV2::from(v1.clone()).into();
+    assert_eq!(v4.format_version, v1.format_version());
+    assert_eq!(v4.snapshot_id, v1.snapshot_id);
+    assert_eq!(v4.timestamp, v1.timestamp);
+}
diff --git a/src/query/storages/common/table-meta/src/meta/mod.rs b/src/query/storages/common/table-meta/src/meta/mod.rs
index 7ba2f446ddfb..724a22165773 100644
--- a/src/query/storages/common/table-meta/src/meta/mod.rs
+++ b/src/query/storages/common/table-meta/src/meta/mod.rs
@@ -50,6 +50,8 @@ pub use versions::Versioned;
 // - export meta encoding to benchmarking tests
 pub mod testing {
     pub use super::format::MetaEncoding;
+    pub use super::v0::statistics::Statistics as StatisticsV0;
+    pub use super::v1::TableSnapshot as TableSnapshotV1;
     pub use super::v2::SegmentInfo as SegmentInfoV2;
     pub use super::v2::TableSnapshot as TableSnapshotV2;
     pub use super::v3::SegmentInfo as SegmentInfoV3;
diff --git a/src/query/storages/common/table-meta/src/meta/v2/snapshot.rs b/src/query/storages/common/table-meta/src/meta/v2/snapshot.rs
index e7759854ad78..5bcd1affc5b2 100644
--- a/src/query/storages/common/table-meta/src/meta/v2/snapshot.rs
+++ b/src/query/storages/common/table-meta/src/meta/v2/snapshot.rs
@@ -127,7 +127,7 @@ impl From<v1::TableSnapshot> for TableSnapshot {
             // carries the format_version of snapshot being converted.
             format_version: s.format_version,
             snapshot_id: s.snapshot_id,
-            timestamp: None,
+            timestamp: s.timestamp,
             prev_snapshot_id: s.prev_snapshot_id,
             schema,
             summary,
diff --git a/src/query/storages/fuse/src/operations/gc.rs b/src/query/storages/fuse/src/operations/gc.rs
index 70e2a015a0e9..c15195f14841 100644
--- a/src/query/storages/fuse/src/operations/gc.rs
+++ b/src/query/storages/fuse/src/operations/gc.rs
@@ -63,6 +63,12 @@ impl FuseTable {
             }
         }
         let root_snapshot_info = root_snapshot_info_op.unwrap();
+        if root_snapshot_info.snapshot_lite.timestamp.is_none() {
+            return Err(ErrorCode::StorageOther(format!(
+                "gc: snapshot timestamp is none, snapshot location: {}",
+                root_snapshot_info.snapshot_location
+            )));
+        }
 
         let snapshots_io = SnapshotsIO::create(ctx.clone(), self.operator.clone());
         let location_gen = self.meta_location_generator();
@@ -116,7 +122,7 @@ impl FuseTable {
             let mut segments_to_be_purged = HashSet::new();
             let mut ts_to_be_purged = HashSet::new();
             for s in snapshots.into_iter() {
-                if s.timestamp >= base_timestamp {
+                if s.timestamp.is_some() && s.timestamp >= base_timestamp {
                     remain_snapshots.push(s);
                     continue;
                 }

From 25270d876e4d51832e6a361b6794110b651dcf61 Mon Sep 17 00:00:00 2001
From: Yang Xiufeng <yangxiufeng.c@gmail.com>
Date: Thu, 21 Sep 2023 12:06:58 +0800
Subject: [PATCH 10/21] docs: note about pattern in copy. (#12951)

* docs: note about pattern in copy.

* Update dml-copy-into-table.md

---------

Co-authored-by: BohuTANG <overred.shuttler@gmail.com>
---
 docs/doc/14-sql-commands/10-dml/dml-copy-into-table.md     | 6 +++++-
 .../doc/15-sql-functions/112-table-functions/list_stage.md | 7 ++++++-
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/docs/doc/14-sql-commands/10-dml/dml-copy-into-table.md b/docs/doc/14-sql-commands/10-dml/dml-copy-into-table.md
index 5448cf61f869..53f14d49bc95 100644
--- a/docs/doc/14-sql-commands/10-dml/dml-copy-into-table.md
+++ b/docs/doc/14-sql-commands/10-dml/dml-copy-into-table.md
@@ -184,10 +184,14 @@ externalLocation ::=
 
 Specify a list of one or more files names (separated by commas) to be loaded.
 
-### PATTERN = 'regex_pattern'
+### PATTERN = '<regex_pattern>'
 
 A [PCRE2](https://www.pcre.org/current/doc/html/)-based regular expression pattern string, enclosed in single quotes, specifying the file names to match. Click [here](#loading-data-with-pattern-matching) to see an example. For PCRE2 syntax, see http://www.pcre.org/current/doc/html/pcre2syntax.html.
 
+:::note
+Suppose there is a file `@<stage_name>/<path>/<sub_path>`, to include it, `<sub_path>` needs to match `^<regex_pattern>$`.
+:::
+
 ### FILE_FORMAT
 
 See [Input & Output File Formats](../../13-sql-reference/50-file-format-options.md).
diff --git a/docs/doc/15-sql-functions/112-table-functions/list_stage.md b/docs/doc/15-sql-functions/112-table-functions/list_stage.md
index e5ddaeaff1ef..9843e95ea0d7 100644
--- a/docs/doc/15-sql-functions/112-table-functions/list_stage.md
+++ b/docs/doc/15-sql-functions/112-table-functions/list_stage.md
@@ -36,10 +36,15 @@ externalStage ::= @<external_stage_name>[/<path>]
 userStage ::= @~[/<path>]
 ```
 
+### PATTERN
+
+See [COPY INTO table](/14-sql-commands/10-dml/dml-copy-into-table.md).
+
+
 ## Examples
 
 ```sql
-SELECT * FROM list_stage(location => '@my_stage/', pattern => '.log');
+SELECT * FROM list_stage(location => '@my_stage/', pattern => '.*[.]log');
 +----------------+------+------------------------------------+-------------------------------+---------+
 |      name      | size |                md5                 |         last_modified         | creator |
 +----------------+------+------------------------------------+-------------------------------+---------+

From 7d268dbe3b9da663f8fb8ac7dfbcc018bcb0cdd4 Mon Sep 17 00:00:00 2001
From: Yijun Zhao <ariesdevil77@gmail.com>
Date: Thu, 21 Sep 2023 13:30:06 +0800
Subject: [PATCH 11/21] fix reviewer comments (#12948)

---
 src/query/functions/src/scalars/geo.rs        | 55 ++++++++++++++-----
 .../query/02_function/02_0060_function_geo    | 22 ++++++++
 2 files changed, 63 insertions(+), 14 deletions(-)

diff --git a/src/query/functions/src/scalars/geo.rs b/src/query/functions/src/scalars/geo.rs
index b7a8d4482b5c..4e9f79214fc5 100644
--- a/src/query/functions/src/scalars/geo.rs
+++ b/src/query/functions/src/scalars/geo.rs
@@ -198,7 +198,8 @@ pub fn register(registry: &mut FunctionRegistry) {
 
     // point in ellipses
     registry.register_function_factory("point_in_ellipses", |_, args_type| {
-        if args_type.len() < 6 {
+        // The input parameters must be 2+4*n, where n is the number of ellipses.
+        if args_type.len() < 6 || (args_type.len() - 2) % 4 != 0 {
             return None;
         }
         Some(Arc::new(Function {
@@ -221,20 +222,28 @@ pub fn register(registry: &mut FunctionRegistry) {
             return None;
         }
 
-        let (arg1, arg2) = if args_type.len() == 2 {
+        let (arg1, arg2) = {
             let arg1 = match args_type.get(0)? {
-                DataType::Tuple(tys) => vec![DataType::Number(NumberDataType::Float64); tys.len()],
+                DataType::Tuple(tys) => {
+                    if tys.len() == 2 {
+                        vec![DataType::Number(NumberDataType::Float64); tys.len()]
+                    } else {
+                        return None;
+                    }
+                }
                 _ => return None,
             };
             let arg2 = match args_type.get(1)? {
                 DataType::Array(box DataType::Tuple(tys)) => {
-                    vec![DataType::Number(NumberDataType::Float64); tys.len()]
+                    if tys.len() == 2 {
+                        vec![DataType::Number(NumberDataType::Float64); tys.len()]
+                    } else {
+                        return None;
+                    }
                 }
                 _ => return None,
             };
             (arg1, arg2)
-        } else {
-            (vec![], vec![])
         };
 
         Some(Arc::new(Function {
@@ -260,20 +269,28 @@ pub fn register(registry: &mut FunctionRegistry) {
             return None;
         }
 
-        let (arg1, arg2) = if args_type.len() == 2 {
+        let (arg1, arg2) = {
             let arg1 = match args_type.get(0)? {
-                DataType::Tuple(tys) => vec![DataType::Number(NumberDataType::Float64); tys.len()],
+                DataType::Tuple(tys) => {
+                    if tys.len() == 2 {
+                        vec![DataType::Number(NumberDataType::Float64); tys.len()]
+                    } else {
+                        return None;
+                    }
+                }
                 _ => return None,
             };
             let arg2 = match args_type.get(1)? {
                 DataType::Array(box DataType::Array(box DataType::Tuple(tys))) => {
-                    vec![DataType::Number(NumberDataType::Float64); tys.len()]
+                    if tys.len() == 2 {
+                        vec![DataType::Number(NumberDataType::Float64); tys.len()]
+                    } else {
+                        return None;
+                    }
                 }
                 _ => return None,
             };
             (arg1, arg2)
-        } else {
-            (vec![], vec![])
         };
 
         Some(Arc::new(Function {
@@ -302,20 +319,30 @@ pub fn register(registry: &mut FunctionRegistry) {
         let mut args = vec![];
 
         let arg1 = match args_type.get(0)? {
-            DataType::Tuple(tys) => vec![DataType::Number(NumberDataType::Float64); tys.len()],
+            DataType::Tuple(tys) => {
+                if tys.len() == 2 {
+                    vec![DataType::Number(NumberDataType::Float64); tys.len()]
+                } else {
+                    return None;
+                }
+            }
             _ => return None,
         };
         args.push(DataType::Tuple(arg1));
 
         let arg2: Vec<DataType> = match args_type.get(1)? {
             DataType::Array(box DataType::Tuple(tys)) => {
-                vec![DataType::Number(NumberDataType::Float64); tys.len()]
+                if tys.len() == 2 {
+                    vec![DataType::Number(NumberDataType::Float64); tys.len()]
+                } else {
+                    return None;
+                }
             }
 
             _ => return None,
         };
 
-        (0..args_type.len() - 1)
+        (1..args_type.len())
             .for_each(|_| args.push(DataType::Array(Box::new(DataType::Tuple(arg2.clone())))));
 
         Some(Arc::new(Function {
diff --git a/tests/sqllogictests/suites/query/02_function/02_0060_function_geo b/tests/sqllogictests/suites/query/02_function/02_0060_function_geo
index e2a2f4bab549..c9970bf5db3d 100644
--- a/tests/sqllogictests/suites/query/02_function/02_0060_function_geo
+++ b/tests/sqllogictests/suites/query/02_function/02_0060_function_geo
@@ -46,11 +46,33 @@ select geohash_encode(-5.60302734375, 42.593994140625)
 ----
 ezs42d000000
 
+# form 1
 query T
 select point_in_polygon((3., 3.), [(6, 0), (8, 4), (5, 8), (0, 2)])
 ----
 1
 
+# form 2
+query T
+select point_in_polygon((1., 1.), [[(4., 0.), (8., 4.), (4., 8.), (0., 4.)], [(3., 3.), (3., 5.), (5., 5.), (5., 3.)]])
+----
+0
+
+# form 3
+query T
+select point_in_polygon((2.5, 2.5), [(4., 0.), (8., 4.), (4., 8.), (0., 4.)], [(3., 3.), (3., 5.), (5., 5.), (5., 3.)])
+----
+1
+
+statement error 1065
+select point_in_polygon((3,), [(6, 0), (8, 4)])
+
+statement error 1065
+select point_in_polygon((1.,), [[(4., 0.), (8., 4.), (4., 8.), (0., 4.)], [(3., 3.), (3., 5.), (5., 5.), (5., 3.)]])
+
+statement error 1065
+select point_in_polygon((2.5,), [(4., 0.), (8., 4.), (4., 8.), (0., 4.)], [(3., 3.), (3., 5.), (5., 5.), (5., 3.)])
+
 query T
 select great_circle_angle(-2181569507.9714413, 15253014773.129665, 0.5823419941455749, 0.5823419941455749)
 ----

From fc6b17e159e0319b2f789a4a93992eab3e3fe56b Mon Sep 17 00:00:00 2001
From: soyeric128 <soyeric128@yahoo.com>
Date: Thu, 21 Sep 2023 13:59:56 +0800
Subject: [PATCH 12/21] Update 99-ansi-sql.md (#12949)

---
 docs/doc/13-sql-reference/99-ansi-sql.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/doc/13-sql-reference/99-ansi-sql.md b/docs/doc/13-sql-reference/99-ansi-sql.md
index 25beb5c1e079..7967c20450a6 100644
--- a/docs/doc/13-sql-reference/99-ansi-sql.md
+++ b/docs/doc/13-sql-reference/99-ansi-sql.md
@@ -95,7 +95,7 @@ Databend aims to conform to the SQL standard, with particular support for ISO/IE
 | E121-17    	| WITH HOLD cursors                                                                                                        	| <span class="text-red">No</span>         	|                                                                                                              	|
 | **E131**   	| **Null value support (nulls in lieu of values)**                                                                         	| <span class="text-blue">Yes</span>        	|                                                                                                              	|
 | **E141**   	| **Basic integrity constraints**                                                                                          	| <span class="text-red">No</span>         	|                                                                                                              	|
-| E141-01    	| NOT NULL constraints                                                                                                     	| <span class="text-blue">Yes</span>        	| Default in Databend: All columns are non-nullable (NOT NULL).                                                	|
+| E141-01    	| NOT NULL constraints                                                                                                     	| <span class="text-blue">Yes</span>        	| Default in Databend: All columns are nullable.                                                	|
 | E141-02    	| UNIQUE constraint of NOT NULL columns                                                                                    	| <span class="text-red">No</span>         	|                                                                                                              	|
 | E141-03    	| PRIMARY KEY constraints                                                                                                  	| <span class="text-red">No</span>         	|                                                                                                              	|
 | E141-04    	| Basic FOREIGN KEY constraint with the NO ACTION default for both referential delete action and referential update action 	| <span class="text-red">No</span>         	|                                                                                                              	|

From 7a376d89af5e1bda8c489a4453de7875a06bf08e Mon Sep 17 00:00:00 2001
From: Yang Xiufeng <yangxiufeng.c@gmail.com>
Date: Thu, 21 Sep 2023 15:20:20 +0800
Subject: [PATCH 13/21] fix: update session on each resp in sqllogic http
 client. (#12952)

---
 tests/sqllogictests/src/client/http_client.rs | 70 +++++++++----------
 1 file changed, 35 insertions(+), 35 deletions(-)

diff --git a/tests/sqllogictests/src/client/http_client.rs b/tests/sqllogictests/src/client/http_client.rs
index c2014c435446..3256cb0d7657 100644
--- a/tests/sqllogictests/src/client/http_client.rs
+++ b/tests/sqllogictests/src/client/http_client.rs
@@ -77,29 +77,13 @@ impl HttpClient {
         let start = Instant::now();
 
         let url = "http://127.0.0.1:8000/v1/query".to_string();
-        let mut response = self.response(sql, &url, true).await?;
-        // Set session from response to client
-        // Then client will same session for different queries.
-
-        if response.session.is_some() {
-            self.session = response.session.clone();
-        }
-
-        if let Some(error) = response.error {
-            return Err(format_error(error).into());
-        }
-
-        let rows = response.data;
-        let mut parsed_rows = parser_rows(&rows)?;
+        let mut parsed_rows = vec![];
+        let mut response =
+            self.handle_response(self.post_query(sql, &url).await?, &mut parsed_rows)?;
         while let Some(next_uri) = response.next_uri {
-            let mut url = "http://127.0.0.1:8000".to_string();
-            url.push_str(&next_uri);
-            response = self.response(sql, &url, false).await?;
-            if let Some(error) = response.error {
-                return Err(format_error(error).into());
-            }
-            let rows = response.data;
-            parsed_rows.append(&mut parser_rows(&rows)?);
+            let url = format!("http://127.0.0.1:8000{next_uri}");
+            response =
+                self.handle_response(self.poll_query_result(&url).await?, &mut parsed_rows)?;
         }
         // Todo: add types to compare
         let mut types = vec![];
@@ -120,27 +104,32 @@ impl HttpClient {
         })
     }
 
+    fn handle_response(
+        &mut self,
+        response: QueryResponse,
+        parsed_rows: &mut Vec<Vec<String>>,
+    ) -> Result<QueryResponse> {
+        if response.session.is_some() {
+            self.session = response.session.clone();
+        }
+        if let Some(error) = response.error {
+            Err(format_error(error).into())
+        } else {
+            parsed_rows.append(&mut parser_rows(&response.data)?);
+            Ok(response)
+        }
+    }
+
     // Send request and get response by json format
-    async fn response(&mut self, sql: &str, url: &str, post: bool) -> Result<QueryResponse> {
+    async fn post_query(&self, sql: &str, url: &str) -> Result<QueryResponse> {
         let mut query = HashMap::new();
         query.insert("sql", serde_json::to_value(sql)?);
         if let Some(session) = &self.session {
             query.insert("session", serde_json::to_value(session)?);
         }
-        if post {
-            return Ok(self
-                .client
-                .post(url)
-                .json(&query)
-                .basic_auth("root", Some(""))
-                .send()
-                .await?
-                .json::<QueryResponse>()
-                .await?);
-        }
         Ok(self
             .client
-            .get(url)
+            .post(url)
             .json(&query)
             .basic_auth("root", Some(""))
             .send()
@@ -148,4 +137,15 @@ impl HttpClient {
             .json::<QueryResponse>()
             .await?)
     }
+
+    async fn poll_query_result(&self, url: &str) -> Result<QueryResponse> {
+        Ok(self
+            .client
+            .get(url)
+            .basic_auth("root", Some(""))
+            .send()
+            .await?
+            .json::<QueryResponse>()
+            .await?)
+    }
 }

From 9ca40ec8367e33cbb6d27794efa2ad002ef366a2 Mon Sep 17 00:00:00 2001
From: dantengsky <dantengsky@gmail.com>
Date: Thu, 21 Sep 2023 16:32:08 +0800
Subject: [PATCH 14/21] ci: add nextest to rust-tools.txt (#12961)

---
 scripts/setup/rust-tools.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/scripts/setup/rust-tools.txt b/scripts/setup/rust-tools.txt
index 1df282bb138f..5d68657a1398 100644
--- a/scripts/setup/rust-tools.txt
+++ b/scripts/setup/rust-tools.txt
@@ -2,3 +2,4 @@ cargo-audit@0.17.6
 cargo-machete@0.5.0
 taplo-cli@0.8.1
 typos-cli@1.16.3
+nextest@0.9.58

From e7b3d00ae0dec5684f5985a8fa802f6295099161 Mon Sep 17 00:00:00 2001
From: baishen <baishen2009@gmail.com>
Date: Thu, 21 Sep 2023 16:39:22 +0800
Subject: [PATCH 15/21] feat(sqlsmith): Support generate subquery and with
 clause (#12956)

---
 src/query/functions/src/scalars/vector.rs     |  36 +++-
 src/tests/sqlsmith/src/sql_gen/expr.rs        |  14 +-
 src/tests/sqlsmith/src/sql_gen/query.rs       | 181 ++++++++++++++++--
 .../sqlsmith/src/sql_gen/sql_generator.rs     |   5 +
 4 files changed, 212 insertions(+), 24 deletions(-)

diff --git a/src/query/functions/src/scalars/vector.rs b/src/query/functions/src/scalars/vector.rs
index cf041d642256..be4efbcecdaf 100644
--- a/src/query/functions/src/scalars/vector.rs
+++ b/src/query/functions/src/scalars/vector.rs
@@ -90,8 +90,23 @@ pub fn register(registry: &mut FunctionRegistry) {
                     return;
                 }
             }
-            let data = std::str::from_utf8(data).unwrap();
 
+            let data = match std::str::from_utf8(data) {
+                Ok(data) => data,
+                Err(_) => {
+                    ctx.set_error(
+                        output.len(),
+                        format!("Invalid data: {:?}", String::from_utf8_lossy(data)),
+                    );
+                    output.push(vec![F32::from(0.0)].into());
+                    return;
+                }
+            };
+            if ctx.func_ctx.openai_api_key.is_empty() {
+                ctx.set_error(output.len(), "openai_api_key is empty".to_string());
+                output.push(vec![F32::from(0.0)].into());
+                return;
+            }
             let api_base = ctx.func_ctx.openai_api_embedding_base_url.clone();
             let api_key = ctx.func_ctx.openai_api_key.clone();
             let api_version = ctx.func_ctx.openai_api_version.clone();
@@ -140,7 +155,24 @@ pub fn register(registry: &mut FunctionRegistry) {
                 }
             }
 
-            let data = std::str::from_utf8(data).unwrap();
+            let data = match std::str::from_utf8(data) {
+                Ok(data) => data,
+                Err(_) => {
+                    ctx.set_error(
+                        output.len(),
+                        format!("Invalid data: {:?}", String::from_utf8_lossy(data)),
+                    );
+                    output.put_str("");
+                    output.commit_row();
+                    return;
+                }
+            };
+            if ctx.func_ctx.openai_api_key.is_empty() {
+                ctx.set_error(output.len(), "openai_api_key is empty".to_string());
+                output.put_str("");
+                output.commit_row();
+                return;
+            }
             let api_base = ctx.func_ctx.openai_api_chat_base_url.clone();
             let api_key = ctx.func_ctx.openai_api_key.clone();
             let api_version = ctx.func_ctx.openai_api_version.clone();
diff --git a/src/tests/sqlsmith/src/sql_gen/expr.rs b/src/tests/sqlsmith/src/sql_gen/expr.rs
index 77bc5f9a880f..9290cd72ba6f 100644
--- a/src/tests/sqlsmith/src/sql_gen/expr.rs
+++ b/src/tests/sqlsmith/src/sql_gen/expr.rs
@@ -45,6 +45,14 @@ impl<'a, R: Rng> SqlGenerator<'a, R> {
         }
     }
 
+    pub(crate) fn gen_simple_expr(&mut self, ty: &DataType) -> Expr {
+        if self.rng.gen_bool(0.6) {
+            self.gen_column(ty)
+        } else {
+            self.gen_scalar_value(ty)
+        }
+    }
+
     fn gen_column(&mut self, ty: &DataType) -> Expr {
         for bound_column in &self.bound_columns {
             if bound_column.data_type == *ty {
@@ -389,7 +397,7 @@ impl<'a, R: Rng> SqlGenerator<'a, R> {
                     }
                     7 => {
                         let not = self.rng.gen_bool(0.5);
-                        let subquery = self.gen_subquery();
+                        let (subquery, _) = self.gen_subquery(false);
                         Expr::Exists {
                             span: None,
                             not,
@@ -404,7 +412,7 @@ impl<'a, R: Rng> SqlGenerator<'a, R> {
                             3 => Some(SubqueryModifier::Some),
                             _ => unreachable!(),
                         };
-                        let subquery = self.gen_subquery();
+                        let (subquery, _) = self.gen_subquery(true);
                         Expr::Subquery {
                             span: None,
                             modifier,
@@ -415,7 +423,7 @@ impl<'a, R: Rng> SqlGenerator<'a, R> {
                         let expr_ty = self.gen_simple_data_type();
                         let expr = self.gen_expr(&expr_ty);
                         let not = self.rng.gen_bool(0.5);
-                        let subquery = self.gen_subquery();
+                        let (subquery, _) = self.gen_subquery(true);
                         Expr::InSubquery {
                             span: None,
                             expr: Box::new(expr),
diff --git a/src/tests/sqlsmith/src/sql_gen/query.rs b/src/tests/sqlsmith/src/sql_gen/query.rs
index f9bb81cbcfd6..cd3d652eb7cc 100644
--- a/src/tests/sqlsmith/src/sql_gen/query.rs
+++ b/src/tests/sqlsmith/src/sql_gen/query.rs
@@ -26,12 +26,18 @@ use common_ast::ast::Query;
 use common_ast::ast::SelectStmt;
 use common_ast::ast::SelectTarget;
 use common_ast::ast::SetExpr;
+use common_ast::ast::TableAlias;
 use common_ast::ast::TableReference;
+use common_ast::ast::With;
+use common_ast::ast::CTE;
+use common_expression::infer_schema_type;
 use common_expression::types::DataType;
 use common_expression::types::NumberDataType;
 use common_expression::TableDataType;
 use common_expression::TableField;
+use common_expression::TableSchemaRef;
 use common_expression::TableSchemaRefExt;
+use rand::distributions::Alphanumeric;
 use rand::Rng;
 
 use crate::sql_gen::Column;
@@ -40,10 +46,12 @@ use crate::sql_gen::Table;
 
 impl<'a, R: Rng> SqlGenerator<'a, R> {
     pub(crate) fn gen_query(&mut self) -> Query {
-        self.bound_columns.clear();
+        self.cte_tables.clear();
         self.bound_tables.clear();
+        self.bound_columns.clear();
         self.is_join = false;
 
+        let with = self.gen_with();
         let body = self.gen_set_expr();
         let limit = self.gen_limit();
         let offset = self.gen_offset(limit.len());
@@ -51,8 +59,7 @@ impl<'a, R: Rng> SqlGenerator<'a, R> {
 
         Query {
             span: None,
-            // TODO
-            with: None,
+            with,
             body,
             order_by,
             limit,
@@ -61,7 +68,10 @@ impl<'a, R: Rng> SqlGenerator<'a, R> {
         }
     }
 
-    pub(crate) fn gen_subquery(&mut self) -> Query {
+    // Scalar, IN / NOT IN, ANY / SOME / ALL Subquery must return only one column
+    // EXISTS / NOT EXISTS Subquery can return any columns
+    pub(crate) fn gen_subquery(&mut self, one_column: bool) -> (Query, TableSchemaRef) {
+        let current_cte_tables = mem::take(&mut self.cte_tables);
         let current_bound_tables = mem::take(&mut self.bound_tables);
         let current_bound_columns = mem::take(&mut self.bound_columns);
         let current_is_join = self.is_join;
@@ -70,13 +80,101 @@ impl<'a, R: Rng> SqlGenerator<'a, R> {
         self.bound_columns = vec![];
         self.is_join = false;
 
-        let query = self.gen_query();
+        // Only generate simple subquery
+        // TODO: complex subquery
+        let from = self.gen_from();
+
+        let len = if one_column {
+            1
+        } else {
+            self.rng.gen_range(1..=5)
+        };
+
+        let name: String = (0..3)
+            .map(|_| self.rng.sample(Alphanumeric) as char)
+            .collect();
+        let mut fields = Vec::with_capacity(len);
+        let mut select_list = Vec::with_capacity(len);
+        for i in 0..len {
+            let ty = self.gen_simple_data_type();
+            let expr = self.gen_simple_expr(&ty);
+            let col_name = format!("c{}{}", name, i);
+            let table_type = infer_schema_type(&ty).unwrap();
+            let field = TableField::new(&col_name, table_type);
+            fields.push(field);
+            let alias = Identifier::from_name(col_name);
+            let target = SelectTarget::AliasedExpr {
+                expr: Box::new(expr),
+                alias: Some(alias),
+            };
+            select_list.push(target);
+        }
+        let schema = TableSchemaRefExt::create(fields);
 
+        let select = SelectStmt {
+            span: None,
+            hints: None,
+            distinct: false,
+            select_list,
+            from,
+            selection: None,
+            group_by: None,
+            having: None,
+            window_list: None,
+        };
+        let body = SetExpr::Select(Box::new(select));
+
+        let query = Query {
+            span: None,
+            with: None,
+            body,
+            order_by: vec![],
+            limit: vec![],
+            offset: None,
+            ignore_result: false,
+        };
+
+        self.cte_tables = current_cte_tables;
         self.bound_tables = current_bound_tables;
         self.bound_columns = current_bound_columns;
         self.is_join = current_is_join;
 
-        query
+        (query, schema)
+    }
+
+    fn gen_with(&mut self) -> Option<With> {
+        if self.rng.gen_bool(0.8) {
+            return None;
+        }
+
+        let len = self.rng.gen_range(1..=3);
+        let mut ctes = Vec::with_capacity(len);
+        for _ in 0..len {
+            let cte = self.gen_cte();
+            ctes.push(cte);
+        }
+
+        Some(With {
+            span: None,
+            recursive: false,
+            ctes,
+        })
+    }
+
+    fn gen_cte(&mut self) -> CTE {
+        let (subquery, schema) = self.gen_subquery(false);
+
+        let (table, alias) = self.gen_subquery_table(schema);
+        self.cte_tables.push(table);
+
+        let materialized = self.rng.gen_bool(0.5);
+
+        CTE {
+            span: None,
+            alias,
+            materialized,
+            query: Box::new(subquery),
+        }
     }
 
     fn gen_set_expr(&mut self) -> SetExpr {
@@ -304,17 +402,21 @@ impl<'a, R: Rng> SqlGenerator<'a, R> {
         // TODO: generate more table reference
         // let table_ref_num = self.rng.gen_range(1..=3);
         match self.rng.gen_range(0..=10) {
-            0..=7 => {
-                let i = self.rng.gen_range(0..self.tables.len());
-                let table_ref = self.gen_table_ref(self.tables[i].clone());
+            0..=6 => {
+                let (table_ref, _) = self.gen_table_ref();
                 table_refs.push(table_ref);
             }
             // join
-            8..=9 => {
+            7..=8 => {
                 self.is_join = true;
                 let join = self.gen_join_table_ref();
                 table_refs.push(join);
             }
+            // subquery
+            9 => {
+                let subquery = self.gen_subquery_table_ref();
+                table_refs.push(subquery);
+            }
             10 => {
                 let table_func = self.gen_table_func();
                 table_refs.push(table_func);
@@ -325,12 +427,21 @@ impl<'a, R: Rng> SqlGenerator<'a, R> {
         table_refs
     }
 
-    fn gen_table_ref(&mut self, table: Table) -> TableReference {
+    fn gen_table_ref(&mut self) -> (TableReference, TableSchemaRef) {
+        let len = self.tables.len() + self.cte_tables.len();
+        let i = self.rng.gen_range(0..len);
+
+        let table = if i < self.tables.len() {
+            self.tables[i].clone()
+        } else {
+            self.cte_tables[len - i - 1].clone()
+        };
+        let schema = table.schema.clone();
         let table_name = Identifier::from_name(table.name.clone());
 
         self.bound_table(table);
 
-        TableReference::Table {
+        let table_ref = TableReference::Table {
             span: None,
             // TODO
             catalog: None,
@@ -345,7 +456,8 @@ impl<'a, R: Rng> SqlGenerator<'a, R> {
             pivot: None,
             // TODO
             unpivot: None,
-        }
+        };
+        (table_ref, schema)
     }
 
     // Only test:
@@ -453,11 +565,10 @@ impl<'a, R: Rng> SqlGenerator<'a, R> {
             _ => unreachable!(),
         }
     }
+
     fn gen_join_table_ref(&mut self) -> TableReference {
-        let i = self.rng.gen_range(0..self.tables.len());
-        let j = if i == self.tables.len() - 1 { 0 } else { i + 1 };
-        let left_table = self.gen_table_ref(self.tables[i].clone());
-        let right_table = self.gen_table_ref(self.tables[j].clone());
+        let (left_table, left_schema) = self.gen_table_ref();
+        let (right_table, right_schema) = self.gen_table_ref();
 
         let op = match self.rng.gen_range(0..=8) {
             0 => JoinOperator::Inner,
@@ -479,8 +590,8 @@ impl<'a, R: Rng> SqlGenerator<'a, R> {
                 JoinCondition::On(Box::new(expr))
             }
             1 => {
-                let left_fields = self.tables[i].schema.fields();
-                let right_fields = self.tables[j].schema.fields();
+                let left_fields = left_schema.fields();
+                let right_fields = right_schema.fields();
 
                 let mut names = Vec::new();
                 for left_field in left_fields {
@@ -534,6 +645,19 @@ impl<'a, R: Rng> SqlGenerator<'a, R> {
         TableReference::Join { span: None, join }
     }
 
+    fn gen_subquery_table_ref(&mut self) -> TableReference {
+        let (subquery, schema) = self.gen_subquery(false);
+
+        let (table, alias) = self.gen_subquery_table(schema);
+        self.bound_table(table);
+
+        TableReference::Subquery {
+            span: None,
+            subquery: Box::new(subquery),
+            alias: Some(alias),
+        }
+    }
+
     fn gen_selection(&mut self) -> Option<Expr> {
         match self.rng.gen_range(0..=9) {
             0..=5 => Some(self.gen_expr(&DataType::Boolean)),
@@ -545,6 +669,25 @@ impl<'a, R: Rng> SqlGenerator<'a, R> {
         }
     }
 
+    fn gen_subquery_table(&mut self, schema: TableSchemaRef) -> (Table, TableAlias) {
+        let name: String = (0..4)
+            .map(|_| self.rng.sample(Alphanumeric) as char)
+            .collect();
+        let table_name = format!("t{}", name);
+        let mut columns = Vec::with_capacity(schema.num_fields());
+        for field in schema.fields() {
+            let column = Identifier::from_name(field.name.clone());
+            columns.push(column);
+        }
+        let alias = TableAlias {
+            name: Identifier::from_name(table_name.clone()),
+            columns,
+        };
+        let table = Table::new(table_name, schema);
+
+        (table, alias)
+    }
+
     fn bound_table(&mut self, table: Table) {
         for (i, field) in table.schema.fields().iter().enumerate() {
             let column = Column {
diff --git a/src/tests/sqlsmith/src/sql_gen/sql_generator.rs b/src/tests/sqlsmith/src/sql_gen/sql_generator.rs
index cc65518ee091..cfc50082fbde 100644
--- a/src/tests/sqlsmith/src/sql_gen/sql_generator.rs
+++ b/src/tests/sqlsmith/src/sql_gen/sql_generator.rs
@@ -41,6 +41,7 @@ pub(crate) struct Column {
 
 pub(crate) struct SqlGenerator<'a, R: Rng> {
     pub(crate) tables: Vec<Table>,
+    pub(crate) cte_tables: Vec<Table>,
     pub(crate) bound_tables: Vec<Table>,
     pub(crate) bound_columns: Vec<Column>,
     pub(crate) is_join: bool,
@@ -54,6 +55,7 @@ impl<'a, R: Rng> SqlGenerator<'a, R> {
         let mut scalar_func_sigs = Vec::new();
         for (name, func_list) in BUILTIN_FUNCTIONS.funcs.iter() {
             // Ignore unsupported binary functions, avoid parse binary operator failure
+            // Ignore ai functions, avoid timeouts on http calls
             if name == "div"
                 || name == "and"
                 || name == "or"
@@ -61,6 +63,8 @@ impl<'a, R: Rng> SqlGenerator<'a, R> {
                 || name == "like"
                 || name == "regexp"
                 || name == "rlike"
+                || name == "ai_embedding_vector"
+                || name == "ai_text_completion"
             {
                 continue;
             }
@@ -71,6 +75,7 @@ impl<'a, R: Rng> SqlGenerator<'a, R> {
 
         SqlGenerator {
             tables: vec![],
+            cte_tables: vec![],
             bound_tables: vec![],
             bound_columns: vec![],
             is_join: false,

From fa7be804837d6c2f55033d5154f277563120655d Mon Sep 17 00:00:00 2001
From: dantengsky <dantengsky@gmail.com>
Date: Thu, 21 Sep 2023 18:17:50 +0800
Subject: [PATCH 16/21] chore: move parquet_rs uts to `databend-query::it`
 (#12954)

* refact: move parquet_rs uts to `databend-query::it`

* fix: oops, remove dev-dependency  `databend-query` from `common-storage-parquet`

* clean up
---
 Cargo.lock                                        |  2 +-
 src/query/service/Cargo.toml                      |  1 +
 src/query/service/tests/it/main.rs                |  1 +
 .../tests/it/parquet_rs/data.rs                   |  0
 .../tests/it/parquet_rs/mod.rs                    |  0
 .../tests/it/parquet_rs/prune_pages.rs            |  0
 .../tests/it/parquet_rs/prune_row_groups.rs       |  0
 .../tests/it/parquet_rs/utils.rs                  |  0
 src/query/storages/parquet/Cargo.toml             |  1 -
 src/query/storages/parquet/tests/it/main.rs       | 15 ---------------
 10 files changed, 3 insertions(+), 17 deletions(-)
 rename src/query/{storages/parquet => service}/tests/it/parquet_rs/data.rs (100%)
 rename src/query/{storages/parquet => service}/tests/it/parquet_rs/mod.rs (100%)
 rename src/query/{storages/parquet => service}/tests/it/parquet_rs/prune_pages.rs (100%)
 rename src/query/{storages/parquet => service}/tests/it/parquet_rs/prune_row_groups.rs (100%)
 rename src/query/{storages/parquet => service}/tests/it/parquet_rs/utils.rs (100%)
 delete mode 100644 src/query/storages/parquet/tests/it/main.rs

diff --git a/Cargo.lock b/Cargo.lock
index 521ff607751d..d78802ff087b 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2920,7 +2920,6 @@ dependencies = [
  "common-pipeline-sources",
  "common-sql",
  "common-storage",
- "databend-query",
  "ethnum",
  "futures",
  "log",
@@ -3890,6 +3889,7 @@ dependencies = [
  "ordered-float 3.7.0",
  "p256 0.13.0",
  "parking_lot 0.12.1",
+ "parquet",
  "paste",
  "petgraph",
  "pin-project-lite",
diff --git a/src/query/service/Cargo.toml b/src/query/service/Cargo.toml
index 7d69c3cb242d..a60e0c79653c 100644
--- a/src/query/service/Cargo.toml
+++ b/src/query/service/Cargo.toml
@@ -141,6 +141,7 @@ once_cell = "1.15.0"
 opendal = { workspace = true }
 opensrv-mysql = { version = "0.4.1", features = ["tls"] }
 parking_lot = "0.12.1"
+parquet = { version = "46.0.0", features = ["async"] }
 paste = "1.0.9"
 petgraph = "0.6.2"
 pin-project-lite = "0.2.9"
diff --git a/src/query/service/tests/it/main.rs b/src/query/service/tests/it/main.rs
index 1bf22b18d2ac..30f478e714a4 100644
--- a/src/query/service/tests/it/main.rs
+++ b/src/query/service/tests/it/main.rs
@@ -28,6 +28,7 @@ mod databases;
 mod frame;
 mod interpreters;
 mod metrics;
+mod parquet_rs;
 mod pipelines;
 mod servers;
 mod sessions;
diff --git a/src/query/storages/parquet/tests/it/parquet_rs/data.rs b/src/query/service/tests/it/parquet_rs/data.rs
similarity index 100%
rename from src/query/storages/parquet/tests/it/parquet_rs/data.rs
rename to src/query/service/tests/it/parquet_rs/data.rs
diff --git a/src/query/storages/parquet/tests/it/parquet_rs/mod.rs b/src/query/service/tests/it/parquet_rs/mod.rs
similarity index 100%
rename from src/query/storages/parquet/tests/it/parquet_rs/mod.rs
rename to src/query/service/tests/it/parquet_rs/mod.rs
diff --git a/src/query/storages/parquet/tests/it/parquet_rs/prune_pages.rs b/src/query/service/tests/it/parquet_rs/prune_pages.rs
similarity index 100%
rename from src/query/storages/parquet/tests/it/parquet_rs/prune_pages.rs
rename to src/query/service/tests/it/parquet_rs/prune_pages.rs
diff --git a/src/query/storages/parquet/tests/it/parquet_rs/prune_row_groups.rs b/src/query/service/tests/it/parquet_rs/prune_row_groups.rs
similarity index 100%
rename from src/query/storages/parquet/tests/it/parquet_rs/prune_row_groups.rs
rename to src/query/service/tests/it/parquet_rs/prune_row_groups.rs
diff --git a/src/query/storages/parquet/tests/it/parquet_rs/utils.rs b/src/query/service/tests/it/parquet_rs/utils.rs
similarity index 100%
rename from src/query/storages/parquet/tests/it/parquet_rs/utils.rs
rename to src/query/service/tests/it/parquet_rs/utils.rs
diff --git a/src/query/storages/parquet/Cargo.toml b/src/query/storages/parquet/Cargo.toml
index 9c46e8d4c2d6..803026960075 100644
--- a/src/query/storages/parquet/Cargo.toml
+++ b/src/query/storages/parquet/Cargo.toml
@@ -44,5 +44,4 @@ typetag = "0.2.3"
 
 [dev-dependencies]
 common-sql = { path = "../../sql" }
-databend-query = { path = "../../service" }
 tempfile = "3.4.0"
diff --git a/src/query/storages/parquet/tests/it/main.rs b/src/query/storages/parquet/tests/it/main.rs
deleted file mode 100644
index bff09cbf2b3c..000000000000
--- a/src/query/storages/parquet/tests/it/main.rs
+++ /dev/null
@@ -1,15 +0,0 @@
-// Copyright 2021 Datafuse Labs
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-mod parquet_rs;

From fdbbcea80657f55503bc81e34dd9d5462198d564 Mon Sep 17 00:00:00 2001
From: soyeric128 <soyeric128@yahoo.com>
Date: Thu, 21 Sep 2023 18:18:13 +0800
Subject: [PATCH 17/21] docs: udf (#12938)

* added

* added

* added

* Update ddl-create-function.md

* format
---
 .../00-ddl/50-udf/_category_.json             |   6 +-
 .../00-ddl/50-udf/ddl-alter-function.md       |  27 ++--
 .../00-ddl/50-udf/ddl-create-function.md      | 118 ++++++++++++++++-
 .../00-ddl/50-udf/ddl-drop-function.md        |   6 +-
 .../14-sql-commands/00-ddl/50-udf/index.md    | 125 ++++++++++++++++++
 5 files changed, 262 insertions(+), 20 deletions(-)
 create mode 100644 docs/doc/14-sql-commands/00-ddl/50-udf/index.md

diff --git a/docs/doc/14-sql-commands/00-ddl/50-udf/_category_.json b/docs/doc/14-sql-commands/00-ddl/50-udf/_category_.json
index fccc400f3f6b..0b9b999caf48 100644
--- a/docs/doc/14-sql-commands/00-ddl/50-udf/_category_.json
+++ b/docs/doc/14-sql-commands/00-ddl/50-udf/_category_.json
@@ -1,7 +1,3 @@
 {
-  "label": "User-Defined Function",
-  "link": {
-    "type": "generated-index",
-    "slug": "/sql-commands/ddl/udf"
-  }
+  "label": "User-Defined Function"
 }
\ No newline at end of file
diff --git a/docs/doc/14-sql-commands/00-ddl/50-udf/ddl-alter-function.md b/docs/doc/14-sql-commands/00-ddl/50-udf/ddl-alter-function.md
index 1a727c2b02d2..a196a7689aa9 100644
--- a/docs/doc/14-sql-commands/00-ddl/50-udf/ddl-alter-function.md
+++ b/docs/doc/14-sql-commands/00-ddl/50-udf/ddl-alter-function.md
@@ -3,22 +3,33 @@ title: ALTER FUNCTION
 description:
   Modifies the properties for an existing user-defined function.
 ---
+import FunctionDescription from '@site/src/components/FunctionDescription';
+
+<FunctionDescription description="Introduced or updated: v1.2.116"/>
+
+Alters a user-defined function.
 
 ## Syntax
 
 ```sql
-CREATE FUNCTION <name> AS ([ argname ]) -> '<function_definition>'
+-- Alter UDF created with lambda expression
+ALTER FUNCTION [IF NOT EXISTS] <function_name> 
+    AS (<input_param_names>) -> <lambda_expression> 
+    [DESC='<description>']
+
+-- Alter UDF created with UDF server
+ALTER FUNCTION [IF NOT EXISTS] <function_name> 
+    AS (<input_param_types>) RETURNS <return_type> LANGUAGE <language_name> 
+    HANDLER = '<handler_name>' ADDRESS = '<udf_server_address>' 
+    [DESC='<description>']
 ```
 
 ## Examples
 
 ```sql
+CREATE FUNCTION a_plus_3 AS (a) -> a+3+3;
 ALTER FUNCTION a_plus_3 AS (a) -> a+3;
 
-SELECT a_plus_3(2);
-+---------+
-| (2 + 3) |
-+---------+
-|       5 |
-+---------+
-```
+CREATE FUNCTION gcd (INT, INT) RETURNS INT LANGUAGE python HANDLER = 'gcd' ADDRESS = 'http://0.0.0.0:8815';
+ALTER FUNCTION gcd (INT, INT) RETURNS INT LANGUAGE python HANDLER = 'gcd_new' ADDRESS = 'http://0.0.0.0:8815';
+```
\ No newline at end of file
diff --git a/docs/doc/14-sql-commands/00-ddl/50-udf/ddl-create-function.md b/docs/doc/14-sql-commands/00-ddl/50-udf/ddl-create-function.md
index 6e3d91a01412..303eff27943b 100644
--- a/docs/doc/14-sql-commands/00-ddl/50-udf/ddl-create-function.md
+++ b/docs/doc/14-sql-commands/00-ddl/50-udf/ddl-create-function.md
@@ -3,20 +3,44 @@ title: CREATE FUNCTION
 description:
   Create a new user-defined scalar function.
 ---
+import FunctionDescription from '@site/src/components/FunctionDescription';
 
+<FunctionDescription description="Introduced or updated: v1.2.116"/>
 
-## CREATE FUNCTION
-
-Creates a new UDF (user-defined function), the UDF can contain an SQL expression.
+Creates a user-defined function.
 
 ## Syntax
 
 ```sql
-CREATE FUNCTION [ IF NOT EXISTS ] <name> AS ([ argname ]) -> '<function_definition>'
+-- Create with lambda expression
+CREATE FUNCTION [IF NOT EXISTS] <function_name> 
+    AS (<input_param_names>) -> <lambda_expression> 
+    [DESC='<description>']
+
+
+-- Create with UDF server
+CREATE FUNCTION [IF NOT EXISTS] <function_name> 
+    AS (<input_param_types>) RETURNS <return_type> LANGUAGE <language_name> 
+    HANDLER = '<handler_name>' ADDRESS = '<udf_server_address>' 
+    [DESC='<description>']
 ```
 
+| Parameter             | Description                                                                                       |
+|-----------------------|---------------------------------------------------------------------------------------------------|
+| `<function_name>`     | The name of the function.                                                                        |
+| `<lambda_expression>` | The lambda expression or code snippet defining the function's behavior.                          |
+| `DESC='<description>'`  | Description of the UDF.|
+| `<<input_param_names>`| A list of input parameter names. Separated by comma.|
+| `<<input_param_types>`| A list of input parameter types. Separated by comma.|
+| `<return_type>`       | The return type of the function.                                                                  |
+| `LANGUAGE`            | Specifies the language used to write the function. Available values: `python`.                    |
+| `HANDLER = '<handler_name>'` | Specifies the name of the function's handler.                                               |
+| `ADDRESS = '<udf_server_address>'` | Specifies the address of the UDF server.                                             |
+
 ## Examples
 
+### Creating UDF with Lambda Expression
+
 ```sql
 CREATE FUNCTION a_plus_3 AS (a) -> a+3;
 
@@ -53,3 +77,89 @@ DROP FUNCTION get_v2;
 
 DROP TABLE json_table;
 ```
+
+### Creating UDF with UDF Server (Python)
+
+This example demonstrates how to enable and configure a UDF server in Python:
+
+1. Enable UDF server support by adding the following parameters to the [query] section in the [databend-query.toml](https://github.com/datafuselabs/databend/blob/main/scripts/distribution/configs/databend-query.toml) configuration file.
+
+```toml title='databend-query.toml'
+[query]
+...
+enable_udf_server = true
+# List the allowed UDF server addresses, separating multiple addresses with commas.
+# For example, ['http://0.0.0.0:8815', 'http://example.com']
+udf_server_allow_list = ['http://0.0.0.0:8815']
+...
+```
+
+2. Define your function. This code defines and runs a UDF server in Python, which exposes a custom function *gcd* for calculating the greatest common divisor of two integers and allows remote execution of this function:
+
+:::note
+The SDK package is not yet available. Prior to its release, please download the 'udf.py' file from https://github.com/datafuselabs/databend/blob/main/tests/udf-server/udf.py and ensure it is saved in the same directory as this Python script. This step is essential for the code to function correctly.
+:::
+
+```python title='udf_server.py'
+from udf import *
+
+@udf(
+    input_types=["INT", "INT"],
+    result_type="INT",
+    skip_null=True,
+)
+def gcd(x: int, y: int) -> int:
+    while y != 0:
+        (x, y) = (y, x % y)
+    return x
+
+if __name__ == '__main__':
+    # create a UDF server listening at '0.0.0.0:8815'
+    server = UdfServer("0.0.0.0:8815")
+    # add defined functions
+    server.add_function(gcd)
+    # start the UDF server
+    server.serve()
+```
+
+`@udf` is a decorator used for defining UDFs in Databend, supporting the following parameters:
+
+| Parameter    | Description                                                                                         |
+|--------------|-----------------------------------------------------------------------------------------------------|
+| input_types  | A list of strings or Arrow data types that specify the input data types.                          |
+| result_type  | A string or an Arrow data type that specifies the return value type.                                |
+| name         | An optional string specifying the function name. If not provided, the original name will be used. |
+| io_threads   | Number of I/O threads used per data chunk for I/O bound functions.                                    |
+| skip_null    | A boolean value specifying whether to skip NULL values. If set to True, NULL values will not be passed to the function, and the corresponding return value is set to NULL. Default is False. |
+
+This table illustrates the correspondence between Databend data types and their corresponding Python equivalents:
+
+| Databend Type         | Python Type          |
+|-----------------------|-----------------------|
+| BOOLEAN               | bool                  |
+| TINYINT (UNSIGNED)    | int                   |
+| SMALLINT (UNSIGNED)   | int                   |
+| INT (UNSIGNED)        | int                   |
+| BIGINT (UNSIGNED)     | int                   |
+| FLOAT                 | float                 |
+| DOUBLE                | float                 |
+| DECIMAL               | decimal.Decimal       |
+| DATE                  | datetime.date         |
+| TIMESTAMP             | datetime.datetime     |
+| VARCHAR               | str                   |
+| VARIANT               | any                   |
+| MAP(K,V)              | dict                  |
+| ARRAY(T)              | list[T]               |
+| TUPLE(T...)           | tuple(T...)           |
+
+3. Run the Python file to start the UDF server:
+
+```shell
+python3 udf_server.py
+```
+
+4. Register the function *gcd* with the [CREATE FUNCTION](ddl-create-function.md) in Databend:
+
+```sql
+CREATE FUNCTION gcd (INT, INT) RETURNS INT LANGUAGE python HANDLER = 'gcd' ADDRESS = 'http://0.0.0.0:8815'；
+```
\ No newline at end of file
diff --git a/docs/doc/14-sql-commands/00-ddl/50-udf/ddl-drop-function.md b/docs/doc/14-sql-commands/00-ddl/50-udf/ddl-drop-function.md
index b93365d5f630..5650295b770d 100644
--- a/docs/doc/14-sql-commands/00-ddl/50-udf/ddl-drop-function.md
+++ b/docs/doc/14-sql-commands/00-ddl/50-udf/ddl-drop-function.md
@@ -4,12 +4,12 @@ description:
   Drop an existing user-defined function.
 ---
 
-Drop an existing user-defined function.
+Drops a user-defined function.
 
 ## Syntax
 
 ```sql
-DROP FUNCTION [IF EXISTS] <name>
+DROP FUNCTION [IF EXISTS] <function_name>
 ```
 
 ## Examples
@@ -19,4 +19,4 @@ DROP FUNCTION a_plus_3;
 
 SELECT a_plus_3(2);
 ERROR 1105 (HY000): Code: 2602, Text = Unknown Function a_plus_3 (while in analyze select projection).
-```
+```
\ No newline at end of file
diff --git a/docs/doc/14-sql-commands/00-ddl/50-udf/index.md b/docs/doc/14-sql-commands/00-ddl/50-udf/index.md
new file mode 100644
index 000000000000..27ceaed90510
--- /dev/null
+++ b/docs/doc/14-sql-commands/00-ddl/50-udf/index.md
@@ -0,0 +1,125 @@
+---
+title: User-Defined Function
+---
+import IndexOverviewList from '@site/src/components/IndexOverviewList';
+
+## What are UDFs?
+
+User-Defined Functions (UDFs) enable you to define their own custom operations to process data within Databend. They are typically written using lambda expressions or implemented via a UDF server with programming languages such as Python and are executed as part of Databend's query processing pipeline. Advantages of using UDFs include:
+
+- Customized Data Transformations: UDFs empower you to perform data transformations that may not be achievable through built-in Databend functions alone. This customization is particularly valuable for handling unique data formats or business logic.
+
+- Performance Optimization: UDFs provide the flexibility to define and fine-tune your own custom functions, enabling you to optimize data processing to meet precise performance requirements. This means you can tailor the code for maximum efficiency, ensuring that your data processing tasks run as efficiently as possible.
+
+- Code Reusability: UDFs can be reused across multiple queries, saving time and effort in coding and maintaining data processing logic.
+
+## Managing UDFs
+
+To manage UDFs in Databend, use the following commands:
+
+<IndexOverviewList />
+
+## Usage Examples
+
+This section demonstrates two UDF implementation methods within Databend: one by creating UDFs with lambda expressions and the other by utilizing UDF servers in conjunction with Python. For additional examples of defining UDFs in various programming languages, see [CREATE FUNCTION](ddl-create-function.md).
+
+### UDF Implementation with Lambda Expression
+
+This example implements a UDF named *a_plus_3* using a lambda expression:
+
+```sql
+CREATE FUNCTION a_plus_3 AS (a) -> a+3;
+
+SELECT a_plus_3(2);
++---------+
+| (2 + 3) |
++---------+
+|       5 |
++---------+
+```
+
+### UDF Implementation via UDF Server
+
+This example demonstrates how to enable and configure a UDF server in Python:
+
+1. Enable UDF server support by adding the following parameters to the [query] section in the [databend-query.toml](https://github.com/datafuselabs/databend/blob/main/scripts/distribution/configs/databend-query.toml) configuration file.
+
+```toml title='databend-query.toml'
+[query]
+...
+enable_udf_server = true
+# List the allowed UDF server addresses, separating multiple addresses with commas.
+# For example, ['http://0.0.0.0:8815', 'http://example.com']
+udf_server_allow_list = ['http://0.0.0.0:8815']
+...
+```
+
+2. Define your function. This code defines and runs a UDF server in Python, which exposes a custom function *gcd* for calculating the greatest common divisor of two integers and allows remote execution of this function:
+
+:::note
+The SDK package is not yet available. Prior to its release, please download the 'udf.py' file from https://github.com/datafuselabs/databend/blob/main/tests/udf-server/udf.py and ensure it is saved in the same directory as this Python script. This step is essential for the code to function correctly.
+:::
+
+```python title='udf_server.py'
+from udf import *
+
+@udf(
+    input_types=["INT", "INT"],
+    result_type="INT",
+    skip_null=True,
+)
+def gcd(x: int, y: int) -> int:
+    while y != 0:
+        (x, y) = (y, x % y)
+    return x
+
+if __name__ == '__main__':
+    # create a UDF server listening at '0.0.0.0:8815'
+    server = UdfServer("0.0.0.0:8815")
+    # add defined functions
+    server.add_function(gcd)
+    # start the UDF server
+    server.serve()
+```
+
+`@udf` is a decorator used for defining UDFs in Databend, supporting the following parameters:
+
+| Parameter    | Description                                                                                         |
+|--------------|-----------------------------------------------------------------------------------------------------|
+| input_types  | A list of strings or Arrow data types that specify the input data types.                          |
+| result_type  | A string or an Arrow data type that specifies the return value type.                                |
+| name         | An optional string specifying the function name. If not provided, the original name will be used. |
+| io_threads   | Number of I/O threads used per data chunk for I/O bound functions.                                    |
+| skip_null    | A boolean value specifying whether to skip NULL values. If set to True, NULL values will not be passed to the function, and the corresponding return value is set to NULL. Default is False. |
+
+This table illustrates the correspondence between Databend data types and their corresponding Python equivalents:
+
+| Databend Type         | Python Type          |
+|-----------------------|-----------------------|
+| BOOLEAN               | bool                  |
+| TINYINT (UNSIGNED)    | int                   |
+| SMALLINT (UNSIGNED)   | int                   |
+| INT (UNSIGNED)        | int                   |
+| BIGINT (UNSIGNED)     | int                   |
+| FLOAT                 | float                 |
+| DOUBLE                | float                 |
+| DECIMAL               | decimal.Decimal       |
+| DATE                  | datetime.date         |
+| TIMESTAMP             | datetime.datetime     |
+| VARCHAR               | str                   |
+| VARIANT               | any                   |
+| MAP(K,V)              | dict                  |
+| ARRAY(T)              | list[T]               |
+| TUPLE(T...)           | tuple(T...)           |
+
+3. Run the Python file to start the UDF server:
+
+```shell
+python3 udf_server.py
+```
+
+4. Register the function *gcd* with the [CREATE FUNCTION](ddl-create-function.md) in Databend:
+
+```sql
+CREATE FUNCTION gcd (INT, INT) RETURNS INT LANGUAGE python HANDLER = 'gcd' ADDRESS = 'http://0.0.0.0:8815'；
+```
\ No newline at end of file

From 510a6dae02af3c8f5571c0186f66a8c695a7517a Mon Sep 17 00:00:00 2001
From: JackTan25 <60096118+JackTan25@users.noreply.github.com>
Date: Thu, 21 Sep 2023 18:23:13 +0800
Subject: [PATCH 18/21] feat: support star "*" for merge into (#12906)

* add test first

* support * and add more tests

* fix clippy

* fix check

* fix check
---
 .../ast/src/ast/statements/merge_into.rs      |  19 +-
 src/query/ast/src/parser/statement.rs         |  85 ++++--
 .../interpreters/interpreter_merge_into.rs    |   5 +-
 .../sql/src/planner/binder/merge_into.rs      | 256 +++++++++++++-----
 .../processor_merge_into_not_matched.rs       |   3 +-
 .../base/09_fuse_engine/09_0026_merge_into    |  56 ++++
 6 files changed, 327 insertions(+), 97 deletions(-)

diff --git a/src/query/ast/src/ast/statements/merge_into.rs b/src/query/ast/src/ast/statements/merge_into.rs
index 83291fa1b8ea..caa273e4ebe2 100644
--- a/src/query/ast/src/ast/statements/merge_into.rs
+++ b/src/query/ast/src/ast/statements/merge_into.rs
@@ -52,7 +52,10 @@ impl Display for MergeUpdateExpr {
 
 #[derive(Debug, Clone, PartialEq)]
 pub enum MatchOperation {
-    Update { update_list: Vec<MergeUpdateExpr> },
+    Update {
+        update_list: Vec<MergeUpdateExpr>,
+        is_star: bool,
+    },
     Delete,
 }
 
@@ -66,6 +69,7 @@ pub struct MatchedClause {
 pub struct InsertOperation {
     pub columns: Option<Vec<Identifier>>,
     pub values: Vec<Expr>,
+    pub is_star: bool,
 }
 
 #[derive(Debug, Clone, PartialEq)]
@@ -116,9 +120,16 @@ impl Display for MergeIntoStmt {
                     write!(f, " THEN ")?;
 
                     match &match_clause.operation {
-                        MatchOperation::Update { update_list } => {
-                            write!(f, " UPDATE SET ")?;
-                            write_comma_separated_list(f, update_list)?;
+                        MatchOperation::Update {
+                            update_list,
+                            is_star,
+                        } => {
+                            if *is_star {
+                                write!(f, " UPDATE * ")?;
+                            } else {
+                                write!(f, " UPDATE SET ")?;
+                                write_comma_separated_list(f, update_list)?;
+                            }
                         }
                         MatchOperation::Delete => {
                             write!(f, " DELETE ")?;
diff --git a/src/query/ast/src/parser/statement.rs b/src/query/ast/src/parser/statement.rs
index fd6c453694dd..c3bc3e1f4856 100644
--- a/src/query/ast/src/parser/statement.rs
+++ b/src/query/ast/src/parser/statement.rs
@@ -2267,40 +2267,75 @@ fn match_operation(i: Input) -> IResult<MatchOperation> {
             rule! {
                 UPDATE ~ SET ~ ^#comma_separated_list1(merge_update_expr)
             },
-            |(_, _, update_list)| MatchOperation::Update { update_list },
+            |(_, _, update_list)| MatchOperation::Update {
+                update_list,
+                is_star: false,
+            },
+        ),
+        map(
+            rule! {
+                UPDATE ~ "*"
+            },
+            |(_, _)| MatchOperation::Update {
+                update_list: Vec::new(),
+                is_star: true,
+            },
         ),
     ))(i)
 }
 
 pub fn unmatch_clause(i: Input) -> IResult<MergeOption> {
-    map(
-        rule! {
-            WHEN ~ NOT ~ MATCHED ~ (AND ~ ^#expr)?  ~ THEN ~ INSERT ~ ( "(" ~ ^#comma_separated_list1(ident) ~ ^")" )?
-            ~ VALUES ~ ^#row_values
-        },
-        |(_, _, _, expr_op, _, _, columns_op, _, values)| {
-            let selection = match expr_op {
-                Some(e) => Some(e.1),
-                None => None,
-            };
-            match columns_op {
-                Some(columns) => MergeOption::Unmatch(UnmatchedClause {
-                    insert_operation: InsertOperation {
-                        columns: Some(columns.1),
-                        values,
-                    },
-                    selection,
-                }),
-                None => MergeOption::Unmatch(UnmatchedClause {
+    alt((
+        map(
+            rule! {
+                WHEN ~ NOT ~ MATCHED ~ (AND ~ ^#expr)?  ~ THEN ~ INSERT ~ ( "(" ~ ^#comma_separated_list1(ident) ~ ^")" )?
+                ~ VALUES ~ ^#row_values
+            },
+            |(_, _, _, expr_op, _, _, columns_op, _, values)| {
+                let selection = match expr_op {
+                    Some(e) => Some(e.1),
+                    None => None,
+                };
+                match columns_op {
+                    Some(columns) => MergeOption::Unmatch(UnmatchedClause {
+                        insert_operation: InsertOperation {
+                            columns: Some(columns.1),
+                            values,
+                            is_star: false,
+                        },
+                        selection,
+                    }),
+                    None => MergeOption::Unmatch(UnmatchedClause {
+                        insert_operation: InsertOperation {
+                            columns: None,
+                            values,
+                            is_star: false,
+                        },
+                        selection,
+                    }),
+                }
+            },
+        ),
+        map(
+            rule! {
+                WHEN ~ NOT ~ MATCHED ~ (AND ~ ^#expr)?  ~ THEN ~ INSERT ~ "*"
+            },
+            |(_, _, _, expr_op, _, _, _)| {
+                let selection = match expr_op {
+                    Some(e) => Some(e.1),
+                    None => None,
+                };
+                MergeOption::Unmatch(UnmatchedClause {
                     insert_operation: InsertOperation {
                         columns: None,
-                        values,
+                        values: Vec::new(),
+                        is_star: true,
                     },
                     selection,
-                }),
-            }
-        },
-    )(i)
+                })
+            },
+        ),
+    ))(i)
 }
 
 pub fn add_column_option(i: Input) -> IResult<AddColumnOption> {
diff --git a/src/query/service/src/interpreters/interpreter_merge_into.rs b/src/query/service/src/interpreters/interpreter_merge_into.rs
index b3590091cd62..d9f2142f5fc2 100644
--- a/src/query/service/src/interpreters/interpreter_merge_into.rs
+++ b/src/query/service/src/interpreters/interpreter_merge_into.rs
@@ -179,6 +179,7 @@ impl MergeIntoInterpreter {
             } else {
                 None
             };
+
             let mut values_exprs = Vec::<RemoteExpr>::with_capacity(item.values.len());
 
             for scalar_expr in &item.values {
@@ -208,6 +209,7 @@ impl MergeIntoInterpreter {
             } else {
                 None
             };
+
             // update
             let update_list = if let Some(update_list) = &item.update {
                 // use update_plan to get exprs
@@ -224,7 +226,7 @@ impl MergeIntoInterpreter {
                 let col_indices = if item.condition.is_none() {
                     vec![]
                 } else {
-                    // we don't need to real col_indices here, just give a
+                    // we don't need real col_indices here, just give a
                     // dummy index, that's ok.
                     vec![DUMMY_COL_INDEX]
                 };
@@ -235,7 +237,6 @@ impl MergeIntoInterpreter {
                         col_indices,
                         Some(join_output_schema.num_fields()),
                     )?;
-
                 let update_list = update_list
                     .iter()
                     .map(|(idx, remote_expr)| {
diff --git a/src/query/sql/src/planner/binder/merge_into.rs b/src/query/sql/src/planner/binder/merge_into.rs
index b5f967d65c83..987192f7c266 100644
--- a/src/query/sql/src/planner/binder/merge_into.rs
+++ b/src/query/sql/src/planner/binder/merge_into.rs
@@ -29,6 +29,7 @@ use common_catalog::plan::InternalColumnType;
 use common_exception::ErrorCode;
 use common_exception::Result;
 use common_expression::types::DataType;
+use common_expression::FieldIndex;
 use common_expression::TableSchemaRef;
 use common_expression::ROW_ID_COL_NAME;
 use indexmap::IndexMap;
@@ -38,15 +39,18 @@ use crate::binder::Binder;
 use crate::binder::InternalColumnBinding;
 use crate::normalize_identifier;
 use crate::optimizer::SExpr;
+use crate::plans::BoundColumnRef;
 use crate::plans::MatchedEvaluator;
 use crate::plans::MergeInto;
 use crate::plans::Plan;
 use crate::plans::UnmatchedEvaluator;
 use crate::BindContext;
+use crate::ColumnBindingBuilder;
 use crate::ColumnEntry;
 use crate::IndexType;
 use crate::ScalarBinder;
 use crate::ScalarExpr;
+use crate::Visibility;
 
 // implementation of merge into for now:
 //      use an left outer join for target_source and source.
@@ -124,8 +128,66 @@ impl Binder {
             .await?;
 
         // add all left source columns for read
+        // todo: (JackTan25) do column prune after finish "split expr for target and source"
         let mut columns_set = left_context.column_set();
 
+        let update_columns_star = if self.has_star_clause(&matched_clauses, &unmatched_clauses) {
+            // when there are "update *"/"insert *", we need to get the index of correlated columns in source.
+            let default_target_table_schema = table.schema().remove_computed_fields();
+            let mut update_columns = HashMap::with_capacity(
+                default_target_table_schema
+                    .remove_computed_fields()
+                    .num_fields(),
+            );
+            let source_output_columns = &left_context.columns;
+            // we use Vec as the value, because if there could be duplicate names
+            let mut name_map = HashMap::<String, Vec<IndexType>>::new();
+            for column in source_output_columns {
+                name_map
+                    .entry(column.column_name.clone())
+                    .or_insert_with(|| vec![])
+                    .push(column.index);
+            }
+
+            for (field_idx, field) in default_target_table_schema.fields.iter().enumerate() {
+                let index = match name_map.get(field.name()) {
+                    None => {
+                        return Err(ErrorCode::SemanticError(
+                            format!("can't find {} in source output", field.name).to_string(),
+                        ));
+                    }
+                    Some(indices) => {
+                        if indices.len() != 1 {
+                            return Err(ErrorCode::SemanticError(
+                                format!(
+                                    "there should be only one {} in source output,but we get {}",
+                                    field.name,
+                                    indices.len()
+                                )
+                                .to_string(),
+                            ));
+                        } else {
+                            indices[0]
+                        }
+                    }
+                };
+                let column = ColumnBindingBuilder::new(
+                    field.name.to_string(),
+                    index,
+                    Box::new(field.data_type().into()),
+                    Visibility::Visible,
+                )
+                .build();
+                let col = ScalarExpr::BoundColumnRef(BoundColumnRef { span: None, column });
+                update_columns.insert(field_idx, col);
+            }
+            Some(update_columns)
+        } else {
+            None
+        };
+
+        // Todo: (JackTan25) Maybe we can remove bind target_table
+        // when the target table has been binded in bind_merge_into_source
         // bind table for target table
         let (mut target_expr, mut right_context) = self
             .bind_single_table(&mut left_context, &target_table)
@@ -193,6 +255,7 @@ impl Binder {
             .union(&scalar_binder.bind(join_expr).await?.0.used_columns())
             .cloned()
             .collect();
+
         let column_entries = self.metadata.read().columns_by_table_index(table_index);
         let mut field_index_map = HashMap::<usize, String>::new();
         // if true, read all columns of target table
@@ -204,6 +267,7 @@ impl Binder {
                 field_index_map.insert(idx, used_idx.to_string());
             }
         }
+
         // bind matched clause columns and add update fields and exprs
         for clause in &matched_clauses {
             matched_evaluators.push(
@@ -212,6 +276,7 @@ impl Binder {
                     clause,
                     &mut columns_set,
                     table_schema.clone(),
+                    update_columns_star.clone(),
                 )
                 .await?,
             );
@@ -225,6 +290,7 @@ impl Binder {
                     clause,
                     &mut columns_set,
                     table_schema.clone(),
+                    update_columns_star.clone(),
                 )
                 .await?,
             );
@@ -252,6 +318,7 @@ impl Binder {
         clause: &MatchedClause,
         columns: &mut HashSet<IndexType>,
         schema: TableSchemaRef,
+        update_columns_star: Option<HashMap<FieldIndex, ScalarExpr>>,
     ) -> Result<MatchedEvaluator> {
         let condition = if let Some(expr) = &clause.selection {
             let (scalar_expr, _) = scalar_binder.bind(expr).await?;
@@ -263,42 +330,54 @@ impl Binder {
             None
         };
 
-        if let MatchOperation::Update { update_list } = &clause.operation {
-            let mut update_columns = HashMap::with_capacity(update_list.len());
-            for update_expr in update_list {
-                let (scalar_expr, _) = scalar_binder.bind(&update_expr.expr).await?;
-                let col_name =
-                    normalize_identifier(&update_expr.name, &self.name_resolution_ctx).name;
-                let index = schema.index_of(&col_name)?;
-
-                if update_columns.contains_key(&index) {
-                    return Err(ErrorCode::BadArguments(format!(
-                        "Multiple assignments in the single statement to column `{}`",
-                        col_name
-                    )));
-                }
-
-                let field = schema.field(index);
-                if field.computed_expr().is_some() {
-                    return Err(ErrorCode::BadArguments(format!(
-                        "The value specified for computed column '{}' is not allowed",
-                        field.name()
-                    )));
+        if let MatchOperation::Update {
+            update_list,
+            is_star,
+        } = &clause.operation
+        {
+            if *is_star {
+                Ok(MatchedEvaluator {
+                    condition,
+                    update: update_columns_star,
+                })
+            } else {
+                let mut update_columns = HashMap::with_capacity(update_list.len());
+                for update_expr in update_list {
+                    let (scalar_expr, _) = scalar_binder.bind(&update_expr.expr).await?;
+                    let col_name =
+                        normalize_identifier(&update_expr.name, &self.name_resolution_ctx).name;
+                    let index = schema.index_of(&col_name)?;
+
+                    if update_columns.contains_key(&index) {
+                        return Err(ErrorCode::BadArguments(format!(
+                            "Multiple assignments in the single statement to column `{}`",
+                            col_name
+                        )));
+                    }
+
+                    let field = schema.field(index);
+                    if field.computed_expr().is_some() {
+                        return Err(ErrorCode::BadArguments(format!(
+                            "The value specified for computed column '{}' is not allowed",
+                            field.name()
+                        )));
+                    }
+
+                    if matches!(scalar_expr, ScalarExpr::SubqueryExpr(_)) {
+                        return Err(ErrorCode::Internal(
+                            "update_list in update clause does not support subquery temporarily",
+                        ));
+                    }
+                    update_columns.insert(index, scalar_expr.clone());
                 }
 
-                if matches!(scalar_expr, ScalarExpr::SubqueryExpr(_)) {
-                    return Err(ErrorCode::Internal(
-                        "update_list in update clause does not support subquery temporarily",
-                    ));
-                }
-                update_columns.insert(index, scalar_expr.clone());
+                Ok(MatchedEvaluator {
+                    condition,
+                    update: Some(update_columns),
+                })
             }
-
-            Ok(MatchedEvaluator {
-                condition,
-                update: Some(update_columns),
-            })
         } else {
+            // delete
             Ok(MatchedEvaluator {
                 condition,
                 update: None,
@@ -312,6 +391,7 @@ impl Binder {
         clause: &UnmatchedClause,
         columns: &mut HashSet<IndexType>,
         table_schema: TableSchemaRef,
+        update_columns_star: Option<HashMap<FieldIndex, ScalarExpr>>,
     ) -> Result<UnmatchedEvaluator> {
         let condition = if let Some(expr) = &clause.selection {
             let (scalar_expr, _) = scalar_binder.bind(expr).await?;
@@ -322,42 +402,59 @@ impl Binder {
         } else {
             None
         };
-
-        if clause.insert_operation.values.is_empty() {
-            return Err(ErrorCode::SemanticError(
-                "Values lists must have at least one row".to_string(),
-            ));
-        }
-
-        let mut values = Vec::with_capacity(clause.insert_operation.values.len());
-
-        // we need to get source schema, and use it for filling columns.
-        let source_schema = if let Some(fields) = clause.insert_operation.columns.clone() {
-            self.schema_project(&table_schema, &fields)?
+        if clause.insert_operation.is_star {
+            let default_schema = table_schema.remove_computed_fields();
+            let mut values = Vec::with_capacity(default_schema.num_fields());
+            let update_columns_star = update_columns_star.unwrap();
+            for idx in 0..default_schema.num_fields() {
+                values.push(update_columns_star.get(&idx).unwrap().clone());
+            }
+            Ok(UnmatchedEvaluator {
+                source_schema: Arc::new(Arc::new(default_schema).into()),
+                condition,
+                values,
+            })
         } else {
-            table_schema.clone()
-        };
-
-        for (idx, expr) in clause.insert_operation.values.iter().enumerate() {
-            let (mut scalar_expr, _) = scalar_binder.bind(expr).await?;
-            // type cast
-            scalar_expr = wrap_cast_scalar(
-                &scalar_expr,
-                &scalar_expr.data_type()?,
-                &DataType::from(source_schema.field(idx).data_type()),
-            )?;
+            if clause.insert_operation.values.is_empty() {
+                return Err(ErrorCode::SemanticError(
+                    "Values lists must have at least one row".to_string(),
+                ));
+            }
 
-            values.push(scalar_expr.clone());
-            for idx in scalar_expr.used_columns() {
-                columns.insert(idx);
+            let mut values = Vec::with_capacity(clause.insert_operation.values.len());
+
+            // we need to get source schema, and use it for filling columns.
+            let source_schema = if let Some(fields) = clause.insert_operation.columns.clone() {
+                self.schema_project(&table_schema, &fields)?
+            } else {
+                table_schema.clone()
+            };
+            if clause.insert_operation.values.len() != source_schema.num_fields() {
+                return Err(ErrorCode::SemanticError(
+                    "insert columns and values are not matched".to_string(),
+                ));
+            }
+            for (idx, expr) in clause.insert_operation.values.iter().enumerate() {
+                let (mut scalar_expr, _) = scalar_binder.bind(expr).await?;
+                // type cast
+                scalar_expr = wrap_cast_scalar(
+                    &scalar_expr,
+                    &scalar_expr.data_type()?,
+                    &DataType::from(source_schema.field(idx).data_type()),
+                )?;
+
+                values.push(scalar_expr.clone());
+                for idx in scalar_expr.used_columns() {
+                    columns.insert(idx);
+                }
             }
-        }
 
-        Ok(UnmatchedEvaluator {
-            source_schema: Arc::new(source_schema.into()),
-            condition,
-            values,
-        })
+            Ok(UnmatchedEvaluator {
+                source_schema: Arc::new(source_schema.into()),
+                condition,
+                values,
+            })
+        }
     }
 
     fn find_column_index(
@@ -378,7 +475,36 @@ impl Binder {
 
     fn has_update(&self, matched_clauses: &Vec<MatchedClause>) -> bool {
         for clause in matched_clauses {
-            if let MatchOperation::Update { update_list: _ } = clause.operation {
+            if let MatchOperation::Update {
+                update_list: _,
+                is_star: _,
+            } = clause.operation
+            {
+                return true;
+            }
+        }
+        false
+    }
+
+    fn has_star_clause(
+        &self,
+        matched_clauses: &Vec<MatchedClause>,
+        unmatched_clauses: &Vec<UnmatchedClause>,
+    ) -> bool {
+        for item in matched_clauses {
+            if let MatchOperation::Update {
+                update_list: _,
+                is_star,
+            } = item.operation
+            {
+                if is_star {
+                    return true;
+                }
+            }
+        }
+
+        for item in unmatched_clauses {
+            if item.insert_operation.is_star {
                 return true;
             }
         }
diff --git a/src/query/storages/fuse/src/operations/merge_into/processors/processor_merge_into_not_matched.rs b/src/query/storages/fuse/src/operations/merge_into/processors/processor_merge_into_not_matched.rs
index b316fb4a64f1..c8e4d55cb095 100644
--- a/src/query/storages/fuse/src/operations/merge_into/processors/processor_merge_into_not_matched.rs
+++ b/src/query/storages/fuse/src/operations/merge_into/processors/processor_merge_into_not_matched.rs
@@ -34,7 +34,7 @@ use common_storage::metrics::merge_into::metrics_inc_merge_into_append_blocks_co
 use itertools::Itertools;
 
 use crate::operations::merge_into::mutator::SplitByExprMutator;
-
+// (source_schema,condition,values_exprs)
 type UnMatchedExprs = Vec<(DataSchemaRef, Option<RemoteExpr>, Vec<RemoteExpr>)>;
 
 struct InsertDataBlockMutation {
@@ -65,6 +65,7 @@ impl MergeIntoNotMatchedProcessor {
         for (idx, item) in unmatched.iter().enumerate() {
             let eval_projections: HashSet<usize> =
                 (input_schema.num_fields()..input_schema.num_fields() + item.2.len()).collect();
+            println!("data_schema: {:?}", item.0.clone());
             data_schemas.insert(idx, item.0.clone());
             ops.push(InsertDataBlockMutation {
                 op: BlockOperator::Map {
diff --git a/tests/sqllogictests/suites/base/09_fuse_engine/09_0026_merge_into b/tests/sqllogictests/suites/base/09_fuse_engine/09_0026_merge_into
index 0381d8a835b9..d42d90d7ae16 100644
--- a/tests/sqllogictests/suites/base/09_fuse_engine/09_0026_merge_into
+++ b/tests/sqllogictests/suites/base/09_fuse_engine/09_0026_merge_into
@@ -191,5 +191,61 @@ select * from t1 order by a,b,c;
 ----
 1 b1 c_5
 
+## test star for merge into
+statement ok
+truncate table t1;
+
+statement ok
+truncate table t2;
+
+query I
+select count(*) from t1;
+----
+0
+
+query I
+select count(*) from t2;
+----
+0
+
+statement ok
+insert into t1 values(1,'b1','c1'),(2,'b2','c2');
+
+query TTT
+select * from t1 order by a,b,c;
+----
+1 b1 c1
+2 b2 c2
+
+statement ok
+insert into t2 values(1,'b3','c3'),(3,'b4','c4');
+
+query TTT
+select * from t2 order by a,b,c;
+----
+1 b3 c3
+3 b4 c4
+
+## test insert columns mismatch
+statement error 1065
+merge into t1 using (select * from t2 as t2) on t1.a = t2.a when not matched then insert values(t2.a,t2.c);
+
+statement ok
+merge into t1 using (select * from t2 as t2) on t1.a = t2.a  when matched then update *  when not matched then insert *;
+
+query TTT
+select * from t1 order by a,b,c;
+----
+1 b3 c3
+2 b2 c2
+3 b4 c4
+
+## test multi same name for star
+statement error 1065
+merge into t1 using (select a,b,c,a from t2 as t2) on t1.a = t2.a  when matched then update *;
+
+statement error 1065
+merge into t1 using (select a,b,c,a,b from t2 as t2) on t1.a = t2.a  when not matched then insert *;
+
 statement ok
 set enable_experimental_merge_into = 0;
\ No newline at end of file

From 80d9e783b40a408aacea65a86288705397dc90e1 Mon Sep 17 00:00:00 2001
From: TCeason <33082201+TCeason@users.noreply.github.com>
Date: Thu, 21 Sep 2023 23:20:50 +0800
Subject: [PATCH 19/21] chore(sqlsmith): support select stmt having && modify
 some err msg (#12959)

* chore(sqlsmith): support select stmt having && modify some err msg

* fix conversation

* sqlsmith support generate window_list

* fix ci err
---
 src/query/expression/src/type_check.rs        |  7 +-
 .../sql/src/planner/semantic/type_check.rs    | 93 +++++++++++++------
 src/tests/sqlsmith/src/sql_gen/func.rs        | 26 +++++-
 src/tests/sqlsmith/src/sql_gen/query.rs       | 37 ++++++--
 .../sqlsmith/src/sql_gen/sql_generator.rs     |  2 +
 .../02_0000_function_aggregate_mix            |  2 +-
 6 files changed, 126 insertions(+), 41 deletions(-)

diff --git a/src/query/expression/src/type_check.rs b/src/query/expression/src/type_check.rs
index 412c0284fa4f..9c1ee5830be4 100755
--- a/src/query/expression/src/type_check.rs
+++ b/src/query/expression/src/type_check.rs
@@ -282,12 +282,7 @@ pub fn check_number<Index: ColumnIndex, T: Number>(
             ErrorCode::InvalidArgument(format!("Expect {}, but got {}", T::data_type(), origin_ty))
                 .set_span(span)
         }),
-        _ => Err(ErrorCode::InvalidArgument(format!(
-            "Expect {}, but got {}",
-            T::data_type(),
-            origin_ty
-        ))
-        .set_span(span)),
+        _ => Err(ErrorCode::InvalidArgument("Need constant number.").set_span(span)),
     }
 }
 
diff --git a/src/query/sql/src/planner/semantic/type_check.rs b/src/query/sql/src/planner/semantic/type_check.rs
index 187400e9d297..f248b154e0ad 100644
--- a/src/query/sql/src/planner/semantic/type_check.rs
+++ b/src/query/sql/src/planner/semantic/type_check.rs
@@ -54,6 +54,7 @@ use common_expression::types::NumberDataType;
 use common_expression::types::NumberScalar;
 use common_expression::ColumnIndex;
 use common_expression::ConstantFolder;
+use common_expression::Expr as EExpr;
 use common_expression::FunctionContext;
 use common_expression::FunctionKind;
 use common_expression::RawExpr;
@@ -1410,19 +1411,28 @@ impl<'a> TypeChecker<'a> {
         arg_types: &[DataType],
     ) -> Result<WindowFuncType> {
         if args.is_empty() || args.len() > 3 {
-            return Err(ErrorCode::InvalidArgument(
-                "Argument number is invalid".to_string(),
-            ));
+            return Err(ErrorCode::InvalidArgument(format!(
+                "Function {:?} only support 1 to 3 arguments",
+                func_name
+            )));
         }
 
         let offset = if args.len() >= 2 {
             let off = args[1].as_expr()?;
-            Some(check_number::<_, i64>(
-                off.span(),
-                &self.func_ctx,
-                &off,
-                &BUILTIN_FUNCTIONS,
-            )?)
+            match off {
+                EExpr::Constant { .. } => Some(check_number::<_, i64>(
+                    off.span(),
+                    &self.func_ctx,
+                    &off,
+                    &BUILTIN_FUNCTIONS,
+                )?),
+                _ => {
+                    return Err(ErrorCode::InvalidArgument(format!(
+                        "The second argument to the function {:?} must be a constant",
+                        func_name
+                    )));
+                }
+            }
         } else {
             None
         };
@@ -1473,9 +1483,10 @@ impl<'a> TypeChecker<'a> {
         Ok(match func_name {
             "first_value" | "first" => {
                 if args.len() != 1 {
-                    return Err(ErrorCode::InvalidArgument(
-                        "Argument number is invalid".to_string(),
-                    ));
+                    return Err(ErrorCode::InvalidArgument(format!(
+                        "The function {:?} must take one argument",
+                        func_name
+                    )));
                 }
                 let return_type = arg_types[0].wrap_nullable();
                 WindowFuncType::NthValue(NthValueFunction {
@@ -1486,9 +1497,10 @@ impl<'a> TypeChecker<'a> {
             }
             "last_value" | "last" => {
                 if args.len() != 1 {
-                    return Err(ErrorCode::InvalidArgument(
-                        "Argument number is invalid".to_string(),
-                    ));
+                    return Err(ErrorCode::InvalidArgument(format!(
+                        "The function {:?} must take one argument",
+                        func_name
+                    )));
                 }
                 let return_type = arg_types[0].wrap_nullable();
                 WindowFuncType::NthValue(NthValueFunction {
@@ -1501,17 +1513,24 @@ impl<'a> TypeChecker<'a> {
                 // nth_value
                 if args.len() != 2 {
                     return Err(ErrorCode::InvalidArgument(
-                        "Argument number is invalid".to_string(),
+                        "The function nth_value must take two arguments".to_string(),
                     ));
                 }
                 let return_type = arg_types[0].wrap_nullable();
                 let n_expr = args[1].as_expr()?;
-                let n = check_number::<_, u64>(
-                    n_expr.span(),
-                    &self.func_ctx,
-                    &n_expr,
-                    &BUILTIN_FUNCTIONS,
-                )?;
+                let n = match n_expr {
+                    EExpr::Constant { .. } => check_number::<_, u64>(
+                        n_expr.span(),
+                        &self.func_ctx,
+                        &n_expr,
+                        &BUILTIN_FUNCTIONS,
+                    )?,
+                    _ => {
+                        return Err(ErrorCode::InvalidArgument(
+                            "The count of `nth_value` must be constant positive integer",
+                        ));
+                    }
+                };
                 if n == 0 {
                     return Err(ErrorCode::InvalidArgument(
                         "nth_value should count from 1".to_string(),
@@ -1534,12 +1553,21 @@ impl<'a> TypeChecker<'a> {
     ) -> Result<WindowFuncType> {
         if args.len() != 1 {
             return Err(ErrorCode::InvalidArgument(
-                "Argument number is invalid".to_string(),
+                "Function ntile can only take one argument".to_string(),
             ));
         }
         let n_expr = args[0].as_expr()?;
         let return_type = DataType::Number(NumberDataType::UInt64);
-        let n = check_number::<_, u64>(n_expr.span(), &self.func_ctx, &n_expr, &BUILTIN_FUNCTIONS)?;
+        let n = match n_expr {
+            EExpr::Constant { .. } => {
+                check_number::<_, u64>(n_expr.span(), &self.func_ctx, &n_expr, &BUILTIN_FUNCTIONS)?
+            }
+            _ => {
+                return Err(ErrorCode::InvalidArgument(
+                    "The argument of `ntile` must be constant".to_string(),
+                ));
+            }
+        };
         if n == 0 {
             return Err(ErrorCode::InvalidArgument(
                 "ntile buckets must be greater than 0".to_string(),
@@ -1981,7 +2009,7 @@ impl<'a> TypeChecker<'a> {
                 )
                     .await
             }
-            _ => Err(ErrorCode::SemanticError("Only these interval types are currently supported: [year, month, day, hour, minute, second]".to_string()).set_span(span)),
+            _ => Err(ErrorCode::SemanticError("Only these interval types are currently supported: [year, quarter, month, day, hour, minute, second]".to_string()).set_span(span)),
         }
     }
 
@@ -2247,7 +2275,20 @@ impl<'a> TypeChecker<'a> {
                         let box (scalar, _) = self.resolve(args[0]).await?;
 
                         let expr = scalar.as_expr()?;
-                        check_number::<_, i64>(span, &self.func_ctx, &expr, &BUILTIN_FUNCTIONS)?
+                        match expr {
+                            EExpr::Constant { .. } => check_number::<_, i64>(
+                                span,
+                                &self.func_ctx,
+                                &expr,
+                                &BUILTIN_FUNCTIONS,
+                            )?,
+                            _ => {
+                                return Some(Err(ErrorCode::BadArguments(
+                                    "last_query_id argument only support constant",
+                                )
+                                .set_span(span)));
+                            }
+                        }
                     }
                 };
 
diff --git a/src/tests/sqlsmith/src/sql_gen/func.rs b/src/tests/sqlsmith/src/sql_gen/func.rs
index 22209fb959cf..76b8f6fcaf61 100644
--- a/src/tests/sqlsmith/src/sql_gen/func.rs
+++ b/src/tests/sqlsmith/src/sql_gen/func.rs
@@ -21,6 +21,7 @@ use common_ast::ast::Window;
 use common_ast::ast::WindowFrame;
 use common_ast::ast::WindowFrameBound;
 use common_ast::ast::WindowFrameUnits;
+use common_ast::ast::WindowRef;
 use common_ast::ast::WindowSpec;
 use common_expression::types::DataType;
 use common_expression::types::DecimalDataType::Decimal128;
@@ -615,6 +616,27 @@ impl<'a, R: Rng> SqlGenerator<'a, R> {
     }
 
     fn gen_window(&mut self) -> Option<Window> {
+        if self.rng.gen_bool(0.2) && !self.windows_name.is_empty() {
+            let len = self.windows_name.len();
+            let name = if len == 1 {
+                self.windows_name[0].to_string()
+            } else {
+                self.windows_name[self.rng.gen_range(0..=len - 1)].to_string()
+            };
+            Some(Window::WindowReference(WindowRef {
+                window_name: Identifier {
+                    name,
+                    quote: None,
+                    span: None,
+                },
+            }))
+        } else {
+            let window_spec = self.gen_window_spec();
+            Some(Window::WindowSpec(window_spec))
+        }
+    }
+
+    pub(crate) fn gen_window_spec(&mut self) -> WindowSpec {
         let ty = self.gen_data_type();
         let expr1 = self.gen_scalar_value(&ty);
         let expr2 = self.gen_scalar_value(&ty);
@@ -633,7 +655,7 @@ impl<'a, R: Rng> SqlGenerator<'a, R> {
                 nulls_first: Some(true),
             },
         ];
-        Some(Window::WindowSpec(WindowSpec {
+        WindowSpec {
             existing_window_name: None,
             partition_by: vec![expr3, expr4],
             order_by,
@@ -646,7 +668,7 @@ impl<'a, R: Rng> SqlGenerator<'a, R> {
                     end_bound: WindowFrameBound::CurrentRow,
                 })
             },
-        }))
+        }
     }
 
     fn gen_func(
diff --git a/src/tests/sqlsmith/src/sql_gen/query.rs b/src/tests/sqlsmith/src/sql_gen/query.rs
index cd3d652eb7cc..20b61ecd2572 100644
--- a/src/tests/sqlsmith/src/sql_gen/query.rs
+++ b/src/tests/sqlsmith/src/sql_gen/query.rs
@@ -28,6 +28,7 @@ use common_ast::ast::SelectTarget;
 use common_ast::ast::SetExpr;
 use common_ast::ast::TableAlias;
 use common_ast::ast::TableReference;
+use common_ast::ast::WindowDefinition;
 use common_ast::ast::With;
 use common_ast::ast::CTE;
 use common_expression::infer_schema_type;
@@ -289,28 +290,52 @@ impl<'a, R: Rng> SqlGenerator<'a, R> {
     }
 
     fn gen_select(&mut self) -> SelectStmt {
+        self.windows_name.clear();
         let from = self.gen_from();
         let group_by = self.gen_group_by();
         self.group_by = group_by.clone();
+        let window_list = self.gen_window_list();
+        if let Some(window_list) = window_list {
+            for window in window_list {
+                self.windows_name.push(window.name.name)
+            }
+        }
         let select_list = self.gen_select_list(&group_by);
         let selection = self.gen_selection();
         SelectStmt {
             span: None,
             // TODO
             hints: None,
-            // TODO
-            distinct: false,
+            distinct: self.rng.gen_bool(0.7),
             select_list,
             from,
             selection,
             group_by,
-            // TODO
-            having: None,
-            // TODO
-            window_list: None,
+            having: self.gen_selection(),
+            window_list: self.gen_window_list(),
         }
     }
 
+    fn gen_window_list(&mut self) -> Option<Vec<WindowDefinition>> {
+        if self.rng.gen_bool(0.1) {
+            let mut res = vec![];
+            for _ in 0..self.rng.gen_range(1..3) {
+                let name: String = (0..4)
+                    .map(|_| self.rng.sample(Alphanumeric) as char)
+                    .collect();
+                let window_name = format!("w_{}", name);
+                let spec = self.gen_window_spec();
+                let window_def = WindowDefinition {
+                    name: Identifier::from_name(window_name),
+                    spec,
+                };
+                res.push(window_def);
+            }
+            return Some(res);
+        }
+        None
+    }
+
     fn gen_group_by(&mut self) -> Option<GroupBy> {
         if self.rng.gen_bool(0.8) {
             return None;
diff --git a/src/tests/sqlsmith/src/sql_gen/sql_generator.rs b/src/tests/sqlsmith/src/sql_gen/sql_generator.rs
index cfc50082fbde..b7a9b00001e8 100644
--- a/src/tests/sqlsmith/src/sql_gen/sql_generator.rs
+++ b/src/tests/sqlsmith/src/sql_gen/sql_generator.rs
@@ -48,6 +48,7 @@ pub(crate) struct SqlGenerator<'a, R: Rng> {
     pub(crate) scalar_func_sigs: Vec<FunctionSignature>,
     pub(crate) rng: &'a mut R,
     pub(crate) group_by: Option<GroupBy>,
+    pub(crate) windows_name: Vec<String>,
 }
 
 impl<'a, R: Rng> SqlGenerator<'a, R> {
@@ -82,6 +83,7 @@ impl<'a, R: Rng> SqlGenerator<'a, R> {
             scalar_func_sigs,
             rng,
             group_by: None,
+            windows_name: vec![],
         }
     }
 }
diff --git a/tests/sqllogictests/suites/query/02_function/02_0000_function_aggregate_mix b/tests/sqllogictests/suites/query/02_function/02_0000_function_aggregate_mix
index 8cce74ab1dfb..62d74fdf39dd 100644
--- a/tests/sqllogictests/suites/query/02_function/02_0000_function_aggregate_mix
+++ b/tests/sqllogictests/suites/query/02_function/02_0000_function_aggregate_mix
@@ -323,7 +323,7 @@ select group_array_moving_avg(k), group_array_moving_avg(2)(v) from aggr;
 ----
 [0.09090909090909091,0.2727272727272727,0.45454545454545453,0.6363636363636364,0.8181818181818182,1.0,1.1818181818181819,1.3636363636363635,1.5454545454545454,1.7272727272727273,1.9090909090909092] [5.0,10.0,10.0,10.0,15.0,20.0,22.5,27.5,30.0,30.0,30.0]
 
-statement error Expect UInt64, but got String
+statement error Need constant number
 SELECT group_array_moving_sum('x')(-1130932975.87767);
 
 query TTT

From 2d61b2f6cee34977598e247226b9c4f7950334e3 Mon Sep 17 00:00:00 2001
From: sundyli <543950155@qq.com>
Date: Thu, 21 Sep 2023 17:46:12 -0700
Subject: [PATCH 20/21] feat(query): support decimal to int (#12967)

* feat(query): support decimal to int

* feat(query): test
---
 src/query/expression/src/types/decimal.rs     |  14 +
 src/query/functions/src/scalars/arithmetic.rs | 137 ++++-----
 src/query/functions/src/scalars/decimal.rs    | 167 ++++++++++-
 .../it/scalars/testdata/function_list.txt     | 278 ++++++++++--------
 .../11_data_type/11_0006_data_type_decimal    |  11 +
 5 files changed, 415 insertions(+), 192 deletions(-)

diff --git a/src/query/expression/src/types/decimal.rs b/src/query/expression/src/types/decimal.rs
index 6e0375d65f5f..4c1f07904483 100644
--- a/src/query/expression/src/types/decimal.rs
+++ b/src/query/expression/src/types/decimal.rs
@@ -22,6 +22,7 @@ use common_exception::Result;
 use enum_as_inner::EnumAsInner;
 use ethnum::i256;
 use itertools::Itertools;
+use num_traits::NumCast;
 use num_traits::ToPrimitive;
 use serde::Deserialize;
 use serde::Serialize;
@@ -285,6 +286,8 @@ pub trait Decimal:
     fn to_float32(self, scale: u8) -> f32;
     fn to_float64(self, scale: u8) -> f64;
 
+    fn to_int<U: NumCast>(self, scale: u8) -> Option<U>;
+
     fn try_downcast_column(column: &Column) -> Option<(Buffer<Self>, DecimalSize)>;
     fn try_downcast_builder<'a>(builder: &'a mut ColumnBuilder) -> Option<&'a mut Vec<Self>>;
 
@@ -414,6 +417,11 @@ impl Decimal for i128 {
         self as f64 / div
     }
 
+    fn to_int<U: NumCast>(self, scale: u8) -> Option<U> {
+        let div = 10i128.checked_pow(scale as u32)?;
+        num_traits::cast(self / div)
+    }
+
     fn to_scalar(self, size: DecimalSize) -> DecimalScalar {
         DecimalScalar::Decimal128(self, size)
     }
@@ -563,6 +571,12 @@ impl Decimal for i256 {
         self.as_f64() / div
     }
 
+    fn to_int<U: NumCast>(self, scale: u8) -> Option<U> {
+        let div = i256::from(10).checked_pow(scale as u32)?;
+        let (h, l) = (self / div).into_words();
+        if h > 0 { None } else { l.to_int(scale) }
+    }
+
     fn to_scalar(self, size: DecimalSize) -> DecimalScalar {
         DecimalScalar::Decimal256(self, size)
     }
diff --git a/src/query/functions/src/scalars/arithmetic.rs b/src/query/functions/src/scalars/arithmetic.rs
index 129b6f76c990..6238364b4881 100644
--- a/src/query/functions/src/scalars/arithmetic.rs
+++ b/src/query/functions/src/scalars/arithmetic.rs
@@ -74,6 +74,7 @@ use num_traits::AsPrimitive;
 use super::arithmetic_modulo::vectorize_modulo;
 use super::decimal::register_decimal_to_float32;
 use super::decimal::register_decimal_to_float64;
+use super::decimal::register_decimal_to_int;
 use crate::scalars::decimal::register_decimal_arithmetic;
 
 pub fn register(registry: &mut FunctionRegistry) {
@@ -582,92 +583,96 @@ pub fn register_number_to_number(registry: &mut FunctionRegistry) {
                         let name = format!("to_{dest_type}").to_lowercase();
                         if src_type.can_lossless_cast_to(*dest_type) {
                             registry.register_1_arg::<NumberType<SRC_TYPE>, NumberType<DEST_TYPE>, _, _>(
-                                &name,
-                                |_, domain| {
-                                    let (domain, overflowing) = domain.overflow_cast();
-                                    debug_assert!(!overflowing);
-                                    FunctionDomain::Domain(domain)
-                                },
-                                |val, _|  {
-                                    val.as_()
-                                },
-                            );
+                                            &name,
+                                            |_, domain| {
+                                                let (domain, overflowing) = domain.overflow_cast();
+                                                debug_assert!(!overflowing);
+                                                FunctionDomain::Domain(domain)
+                                            },
+                                            |val, _|  {
+                                                val.as_()
+                                            },
+                                        );
                         } else {
                             registry.register_passthrough_nullable_1_arg::<NumberType<SRC_TYPE>, NumberType<DEST_TYPE>, _, _>(
-                                &name,
-                                            |_, domain| {
-                                    let (domain, overflowing) = domain.overflow_cast();
-                                    if overflowing {
-                                        FunctionDomain::MayThrow
-                                    } else {
-                                        FunctionDomain::Domain(domain)
-                                    }
-                                },
-                                vectorize_with_builder_1_arg::<NumberType<SRC_TYPE>, NumberType<DEST_TYPE>>(
-                                    move |val, output, ctx| {
-                                        match num_traits::cast::cast(val) {
-                                            Some(val) => output.push(val),
-                                            None => {
-                                                ctx.set_error(output.len(),"number overflowed");
-                                                output.push(DEST_TYPE::default());
+                                            &name,
+                                                        |_, domain| {
+                                                let (domain, overflowing) = domain.overflow_cast();
+                                                if overflowing {
+                                                    FunctionDomain::MayThrow
+                                                } else {
+                                                    FunctionDomain::Domain(domain)
+                                                }
                                             },
-                                        }
-                                    }
-                                ),
-                            );
+                                            vectorize_with_builder_1_arg::<NumberType<SRC_TYPE>, NumberType<DEST_TYPE>>(
+                                                move |val, output, ctx| {
+                                                    match num_traits::cast::cast(val) {
+                                                        Some(val) => output.push(val),
+                                                        None => {
+                                                            ctx.set_error(output.len(),"number overflowed");
+                                                            output.push(DEST_TYPE::default());
+                                                        },
+                                                    }
+                                                }
+                                            ),
+                                        );
                         }
 
                         let name = format!("try_to_{dest_type}").to_lowercase();
                         if src_type.can_lossless_cast_to(*dest_type) {
                             registry.register_combine_nullable_1_arg::<NumberType<SRC_TYPE>, NumberType<DEST_TYPE>, _, _>(
-                                &name,
-                                |_, domain| {
-                                    let (domain, overflowing) = domain.overflow_cast();
-                                    debug_assert!(!overflowing);
-                                    FunctionDomain::Domain(NullableDomain {
-                                        has_null: false,
-                                        value: Some(Box::new(
-                                            domain,
-                                        )),
-                                    })
-                                },
-                                vectorize_1_arg::<NumberType<SRC_TYPE>, NullableType<NumberType<DEST_TYPE>>>(|val, _| {
-                                    Some(val.as_())
-                                })
-                            );
+                                            &name,
+                                            |_, domain| {
+                                                let (domain, overflowing) = domain.overflow_cast();
+                                                debug_assert!(!overflowing);
+                                                FunctionDomain::Domain(NullableDomain {
+                                                    has_null: false,
+                                                    value: Some(Box::new(
+                                                        domain,
+                                                    )),
+                                                })
+                                            },
+                                            vectorize_1_arg::<NumberType<SRC_TYPE>, NullableType<NumberType<DEST_TYPE>>>(|val, _| {
+                                                Some(val.as_())
+                                            })
+                                        );
                         } else {
                             registry.register_combine_nullable_1_arg::<NumberType<SRC_TYPE>, NumberType<DEST_TYPE>, _, _>(
-                                &name,
-                                |_, domain| {
-                                    let (domain, overflowing) = domain.overflow_cast();
-                                    FunctionDomain::Domain(NullableDomain {
-                                        has_null: overflowing,
-                                        value: Some(Box::new(
-                                            domain,
-                                        )),
-                                    })
-                                },
-                                vectorize_with_builder_1_arg::<NumberType<SRC_TYPE>, NullableType<NumberType<DEST_TYPE>>>(
-                                    |val, output, _| {
-                                        if let Some(new_val) = num_traits::cast::cast(val) {
-                                            output.push(new_val);
-                                        } else {
-                                            output.push_null();
-                                        }
-                                    }
-                                ),
-                            );
+                                            &name,
+                                            |_, domain| {
+                                                let (domain, overflowing) = domain.overflow_cast();
+                                                FunctionDomain::Domain(NullableDomain {
+                                                    has_null: overflowing,
+                                                    value: Some(Box::new(
+                                                        domain,
+                                                    )),
+                                                })
+                                            },
+                                            vectorize_with_builder_1_arg::<NumberType<SRC_TYPE>, NullableType<NumberType<DEST_TYPE>>>(
+                                                |val, output, _| {
+                                                    if let Some(new_val) = num_traits::cast::cast(val) {
+                                                        output.push(new_val);
+                                                    } else {
+                                                        output.push_null();
+                                                    }
+                                                }
+                                            ),
+                                        );
                         }
                     }
                 }),
                 NumberClass::Decimal128 => {
-                    // todo(youngsofun): add decimal try_cast and decimal to int
+                    // todo(youngsofun): add decimal try_cast and decimal to int and float
                     if matches!(dest_type, NumberDataType::Float32) {
                         register_decimal_to_float32(registry);
                     }
                     if matches!(dest_type, NumberDataType::Float64) {
                         register_decimal_to_float64(registry);
                     }
+
+                    with_number_mapped_type!(|DEST_TYPE| match dest_type {
+                        NumberDataType::DEST_TYPE => register_decimal_to_int::<DEST_TYPE>(registry),
+                    })
                 }
                 NumberClass::Decimal256 => {
                     // already registered in Decimal128 branch
diff --git a/src/query/functions/src/scalars/decimal.rs b/src/query/functions/src/scalars/decimal.rs
index 8538e9c480cf..ee5ae09d755e 100644
--- a/src/query/functions/src/scalars/decimal.rs
+++ b/src/query/functions/src/scalars/decimal.rs
@@ -627,7 +627,7 @@ pub fn register(registry: &mut FunctionRegistry) {
 }
 
 pub(crate) fn register_decimal_to_float64(registry: &mut FunctionRegistry) {
-    registry.register_function_factory("to_float64", |_params, args_type| {
+    let factory = |_params: &[usize], args_type: &[DataType]| {
         if args_type.len() != 1 {
             return None;
         }
@@ -638,7 +638,7 @@ pub(crate) fn register_decimal_to_float64(registry: &mut FunctionRegistry) {
             return None;
         }
 
-        Some(Arc::new(Function {
+        let function = Function {
             signature: FunctionSignature {
                 name: "to_float64".to_string(),
                 args_type: vec![arg_type.clone()],
@@ -661,12 +661,32 @@ pub(crate) fn register_decimal_to_float64(registry: &mut FunctionRegistry) {
                 }),
                 eval: Box::new(move |args, tx| decimal_to_float64(&args[0], arg_type.clone(), tx)),
             },
-        }))
+        };
+
+        Some(function)
+    };
+
+    registry.register_function_factory("to_float64", move |params, args_type| {
+        Some(Arc::new(factory(params, args_type)?))
+    });
+    registry.register_function_factory("to_float64", move |params, args_type| {
+        let f = factory(params, args_type)?;
+        Some(Arc::new(f.passthrough_nullable()))
+    });
+    registry.register_function_factory("try_to_float64", move |params, args_type| {
+        let mut f = factory(params, args_type)?;
+        f.signature.name = "try_to_float64".to_string();
+        Some(Arc::new(f.error_to_null()))
+    });
+    registry.register_function_factory("try_to_float64", move |params, args_type| {
+        let mut f = factory(params, args_type)?;
+        f.signature.name = "try_to_float64".to_string();
+        Some(Arc::new(f.error_to_null().passthrough_nullable()))
     });
 }
 
 pub(crate) fn register_decimal_to_float32(registry: &mut FunctionRegistry) {
-    registry.register_function_factory("to_float32", |_params, args_type| {
+    let factory = |_params: &[usize], args_type: &[DataType]| {
         if args_type.len() != 1 {
             return None;
         }
@@ -676,7 +696,7 @@ pub(crate) fn register_decimal_to_float32(registry: &mut FunctionRegistry) {
             return None;
         }
 
-        Some(Arc::new(Function {
+        let function = Function {
             signature: FunctionSignature {
                 name: "to_float32".to_string(),
                 args_type: vec![arg_type.clone()],
@@ -699,7 +719,79 @@ pub(crate) fn register_decimal_to_float32(registry: &mut FunctionRegistry) {
                 }),
                 eval: Box::new(move |args, tx| decimal_to_float32(&args[0], arg_type.clone(), tx)),
             },
-        }))
+        };
+
+        Some(function)
+    };
+
+    registry.register_function_factory("to_float32", move |params, args_type| {
+        Some(Arc::new(factory(params, args_type)?))
+    });
+    registry.register_function_factory("to_float32", move |params, args_type| {
+        let f = factory(params, args_type)?;
+        Some(Arc::new(f.passthrough_nullable()))
+    });
+    registry.register_function_factory("try_to_float32", move |params, args_type| {
+        let mut f = factory(params, args_type)?;
+        f.signature.name = "try_to_float32".to_string();
+        Some(Arc::new(f.error_to_null()))
+    });
+    registry.register_function_factory("try_to_float32", move |params, args_type| {
+        let mut f = factory(params, args_type)?;
+        f.signature.name = "try_to_float32".to_string();
+        Some(Arc::new(f.error_to_null().passthrough_nullable()))
+    });
+}
+
+pub(crate) fn register_decimal_to_int<T: Number>(registry: &mut FunctionRegistry) {
+    if T::data_type().is_float() {
+        return;
+    }
+    let name = format!("to_{}", T::data_type().to_string().to_lowercase());
+    let try_name = format!("try_to_{}", T::data_type().to_string().to_lowercase());
+
+    let factory = |_params: &[usize], args_type: &[DataType]| {
+        if args_type.len() != 1 {
+            return None;
+        }
+
+        let name = format!("to_{}", T::data_type().to_string().to_lowercase());
+        let arg_type = args_type[0].remove_nullable();
+        if !arg_type.is_decimal() {
+            return None;
+        }
+
+        let function = Function {
+            signature: FunctionSignature {
+                name,
+                args_type: vec![arg_type.clone()],
+                return_type: DataType::Number(T::data_type()),
+            },
+            eval: FunctionEval::Scalar {
+                calc_domain: Box::new(|_, _| FunctionDomain::MayThrow),
+                eval: Box::new(move |args, tx| decimal_to_int::<T>(&args[0], arg_type.clone(), tx)),
+            },
+        };
+
+        Some(function)
+    };
+
+    registry.register_function_factory(&name, move |params, args_type| {
+        Some(Arc::new(factory(params, args_type)?))
+    });
+    registry.register_function_factory(&name, move |params, args_type| {
+        let f = factory(params, args_type)?;
+        Some(Arc::new(f.passthrough_nullable()))
+    });
+    registry.register_function_factory(&try_name, move |params, args_type| {
+        let mut f = factory(params, args_type)?;
+        f.signature.name = format!("try_to_{}", T::data_type().to_string().to_lowercase());
+        Some(Arc::new(f.error_to_null()))
+    });
+    registry.register_function_factory(&try_name, move |params, args_type| {
+        let mut f = factory(params, args_type)?;
+        f.signature.name = format!("try_to_{}", T::data_type().to_string().to_lowercase());
+        Some(Arc::new(f.error_to_null().passthrough_nullable()))
     });
 }
 
@@ -1309,3 +1401,66 @@ fn decimal_to_float32(
         Value::Column(result)
     }
 }
+
+fn decimal_to_int<T: Number>(
+    arg: &ValueRef<AnyType>,
+    from_type: DataType,
+    ctx: &mut EvalContext,
+) -> Value<AnyType> {
+    let mut is_scalar = false;
+    let column = match arg {
+        ValueRef::Column(column) => column.clone(),
+        ValueRef::Scalar(s) => {
+            is_scalar = true;
+            let builder = ColumnBuilder::repeat(s, 1, &from_type);
+            builder.build()
+        }
+    };
+
+    let from_type = from_type.as_decimal().unwrap();
+
+    let result = match from_type {
+        DecimalDataType::Decimal128(_) => {
+            let (buffer, from_size) = i128::try_downcast_column(&column).unwrap();
+
+            let mut values = Vec::with_capacity(ctx.num_rows);
+
+            for (i, x) in buffer.iter().enumerate() {
+                let x = x.to_int(from_size.scale);
+                match x {
+                    Some(x) => values.push(x),
+                    None => {
+                        ctx.set_error(i, "decimal cast to int overflow");
+                        values.push(T::default())
+                    }
+                }
+            }
+
+            NumberType::<T>::upcast_column(Buffer::from(values))
+        }
+
+        DecimalDataType::Decimal256(_) => {
+            let (buffer, from_size) = i256::try_downcast_column(&column).unwrap();
+            let mut values = Vec::with_capacity(ctx.num_rows);
+
+            for (i, x) in buffer.iter().enumerate() {
+                let x = x.to_int(from_size.scale);
+                match x {
+                    Some(x) => values.push(x),
+                    None => {
+                        ctx.set_error(i, "decimal cast to int overflow");
+                        values.push(T::default())
+                    }
+                }
+            }
+            NumberType::<T>::upcast_column(Buffer::from(values))
+        }
+    };
+
+    if is_scalar {
+        let scalar = result.index(0).unwrap();
+        Value::Scalar(scalar.to_owned())
+    } else {
+        Value::Column(result)
+    }
+}
diff --git a/src/query/functions/tests/it/scalars/testdata/function_list.txt b/src/query/functions/tests/it/scalars/testdata/function_list.txt
index e898b283a63b..cc2fc8b735b7 100644
--- a/src/query/functions/tests/it/scalars/testdata/function_list.txt
+++ b/src/query/functions/tests/it/scalars/testdata/function_list.txt
@@ -3219,10 +3219,11 @@ Functions overloads:
 18 to_float32(Int64) :: Float32
 19 to_float32(Int64 NULL) :: Float32 NULL
 20 to_float32 FACTORY
-21 to_float32(Float64) :: Float32
-22 to_float32(Float64 NULL) :: Float32 NULL
-23 to_float32(Boolean) :: Float32
-24 to_float32(Boolean NULL) :: Float32 NULL
+21 to_float32 FACTORY
+22 to_float32(Float64) :: Float32
+23 to_float32(Float64 NULL) :: Float32 NULL
+24 to_float32(Boolean) :: Float32
+25 to_float32(Boolean NULL) :: Float32 NULL
 0 to_float64(Variant) :: Float64
 1 to_float64(Variant NULL) :: Float64 NULL
 2 to_float64(String) :: Float64
@@ -3244,10 +3245,11 @@ Functions overloads:
 18 to_float64(Int64) :: Float64
 19 to_float64(Int64 NULL) :: Float64 NULL
 20 to_float64 FACTORY
-21 to_float64(Float32) :: Float64
-22 to_float64(Float32 NULL) :: Float64 NULL
-23 to_float64(Boolean) :: Float64
-24 to_float64(Boolean NULL) :: Float64 NULL
+21 to_float64 FACTORY
+22 to_float64(Float32) :: Float64
+23 to_float64(Float32 NULL) :: Float64 NULL
+24 to_float64(Boolean) :: Float64
+25 to_float64(Boolean NULL) :: Float64 NULL
 0 to_hour(Timestamp) :: UInt8
 1 to_hour(Timestamp NULL) :: UInt8 NULL
 0 to_int16(Variant) :: Int16
@@ -3268,12 +3270,14 @@ Functions overloads:
 15 to_int16(UInt64 NULL) :: Int16 NULL
 16 to_int16(Int64) :: Int16
 17 to_int16(Int64 NULL) :: Int16 NULL
-18 to_int16(Float32) :: Int16
-19 to_int16(Float32 NULL) :: Int16 NULL
-20 to_int16(Float64) :: Int16
-21 to_int16(Float64 NULL) :: Int16 NULL
-22 to_int16(Boolean) :: Int16
-23 to_int16(Boolean NULL) :: Int16 NULL
+18 to_int16 FACTORY
+19 to_int16 FACTORY
+20 to_int16(Float32) :: Int16
+21 to_int16(Float32 NULL) :: Int16 NULL
+22 to_int16(Float64) :: Int16
+23 to_int16(Float64 NULL) :: Int16 NULL
+24 to_int16(Boolean) :: Int16
+25 to_int16(Boolean NULL) :: Int16 NULL
 0 to_int32(Variant) :: Int32
 1 to_int32(Variant NULL) :: Int32 NULL
 2 to_int32(String) :: Int32
@@ -3292,12 +3296,14 @@ Functions overloads:
 15 to_int32(UInt64 NULL) :: Int32 NULL
 16 to_int32(Int64) :: Int32
 17 to_int32(Int64 NULL) :: Int32 NULL
-18 to_int32(Float32) :: Int32
-19 to_int32(Float32 NULL) :: Int32 NULL
-20 to_int32(Float64) :: Int32
-21 to_int32(Float64 NULL) :: Int32 NULL
-22 to_int32(Boolean) :: Int32
-23 to_int32(Boolean NULL) :: Int32 NULL
+18 to_int32 FACTORY
+19 to_int32 FACTORY
+20 to_int32(Float32) :: Int32
+21 to_int32(Float32 NULL) :: Int32 NULL
+22 to_int32(Float64) :: Int32
+23 to_int32(Float64 NULL) :: Int32 NULL
+24 to_int32(Boolean) :: Int32
+25 to_int32(Boolean NULL) :: Int32 NULL
 0 to_int64(Variant) :: Int64
 1 to_int64(Variant NULL) :: Int64 NULL
 2 to_int64(String) :: Int64
@@ -3316,16 +3322,18 @@ Functions overloads:
 15 to_int64(Int32 NULL) :: Int64 NULL
 16 to_int64(UInt64) :: Int64
 17 to_int64(UInt64 NULL) :: Int64 NULL
-18 to_int64(Float32) :: Int64
-19 to_int64(Float32 NULL) :: Int64 NULL
-20 to_int64(Float64) :: Int64
-21 to_int64(Float64 NULL) :: Int64 NULL
-22 to_int64(Boolean) :: Int64
-23 to_int64(Boolean NULL) :: Int64 NULL
-24 to_int64(Date) :: Int64
-25 to_int64(Date NULL) :: Int64 NULL
-26 to_int64(Timestamp) :: Int64
-27 to_int64(Timestamp NULL) :: Int64 NULL
+18 to_int64 FACTORY
+19 to_int64 FACTORY
+20 to_int64(Float32) :: Int64
+21 to_int64(Float32 NULL) :: Int64 NULL
+22 to_int64(Float64) :: Int64
+23 to_int64(Float64 NULL) :: Int64 NULL
+24 to_int64(Boolean) :: Int64
+25 to_int64(Boolean NULL) :: Int64 NULL
+26 to_int64(Date) :: Int64
+27 to_int64(Date NULL) :: Int64 NULL
+28 to_int64(Timestamp) :: Int64
+29 to_int64(Timestamp NULL) :: Int64 NULL
 0 to_int8(Variant) :: Int8
 1 to_int8(Variant NULL) :: Int8 NULL
 2 to_int8(String) :: Int8
@@ -3344,12 +3352,14 @@ Functions overloads:
 15 to_int8(UInt64 NULL) :: Int8 NULL
 16 to_int8(Int64) :: Int8
 17 to_int8(Int64 NULL) :: Int8 NULL
-18 to_int8(Float32) :: Int8
-19 to_int8(Float32 NULL) :: Int8 NULL
-20 to_int8(Float64) :: Int8
-21 to_int8(Float64 NULL) :: Int8 NULL
-22 to_int8(Boolean) :: Int8
-23 to_int8(Boolean NULL) :: Int8 NULL
+18 to_int8 FACTORY
+19 to_int8 FACTORY
+20 to_int8(Float32) :: Int8
+21 to_int8(Float32 NULL) :: Int8 NULL
+22 to_int8(Float64) :: Int8
+23 to_int8(Float64 NULL) :: Int8 NULL
+24 to_int8(Boolean) :: Int8
+25 to_int8(Boolean NULL) :: Int8 NULL
 0 to_minute(Timestamp) :: UInt8
 1 to_minute(Timestamp NULL) :: UInt8 NULL
 0 to_monday(Date) :: Date
@@ -3467,12 +3477,14 @@ Functions overloads:
 15 to_uint16(UInt64 NULL) :: UInt16 NULL
 16 to_uint16(Int64) :: UInt16
 17 to_uint16(Int64 NULL) :: UInt16 NULL
-18 to_uint16(Float32) :: UInt16
-19 to_uint16(Float32 NULL) :: UInt16 NULL
-20 to_uint16(Float64) :: UInt16
-21 to_uint16(Float64 NULL) :: UInt16 NULL
-22 to_uint16(Boolean) :: UInt16
-23 to_uint16(Boolean NULL) :: UInt16 NULL
+18 to_uint16 FACTORY
+19 to_uint16 FACTORY
+20 to_uint16(Float32) :: UInt16
+21 to_uint16(Float32 NULL) :: UInt16 NULL
+22 to_uint16(Float64) :: UInt16
+23 to_uint16(Float64 NULL) :: UInt16 NULL
+24 to_uint16(Boolean) :: UInt16
+25 to_uint16(Boolean NULL) :: UInt16 NULL
 0 to_uint32(Variant) :: UInt32
 1 to_uint32(Variant NULL) :: UInt32 NULL
 2 to_uint32(String) :: UInt32
@@ -3491,12 +3503,14 @@ Functions overloads:
 15 to_uint32(UInt64 NULL) :: UInt32 NULL
 16 to_uint32(Int64) :: UInt32
 17 to_uint32(Int64 NULL) :: UInt32 NULL
-18 to_uint32(Float32) :: UInt32
-19 to_uint32(Float32 NULL) :: UInt32 NULL
-20 to_uint32(Float64) :: UInt32
-21 to_uint32(Float64 NULL) :: UInt32 NULL
-22 to_uint32(Boolean) :: UInt32
-23 to_uint32(Boolean NULL) :: UInt32 NULL
+18 to_uint32 FACTORY
+19 to_uint32 FACTORY
+20 to_uint32(Float32) :: UInt32
+21 to_uint32(Float32 NULL) :: UInt32 NULL
+22 to_uint32(Float64) :: UInt32
+23 to_uint32(Float64 NULL) :: UInt32 NULL
+24 to_uint32(Boolean) :: UInt32
+25 to_uint32(Boolean NULL) :: UInt32 NULL
 0 to_uint64(Variant) :: UInt64
 1 to_uint64(Variant NULL) :: UInt64 NULL
 2 to_uint64(String) :: UInt64
@@ -3515,12 +3529,14 @@ Functions overloads:
 15 to_uint64(Int32 NULL) :: UInt64 NULL
 16 to_uint64(Int64) :: UInt64
 17 to_uint64(Int64 NULL) :: UInt64 NULL
-18 to_uint64(Float32) :: UInt64
-19 to_uint64(Float32 NULL) :: UInt64 NULL
-20 to_uint64(Float64) :: UInt64
-21 to_uint64(Float64 NULL) :: UInt64 NULL
-22 to_uint64(Boolean) :: UInt64
-23 to_uint64(Boolean NULL) :: UInt64 NULL
+18 to_uint64 FACTORY
+19 to_uint64 FACTORY
+20 to_uint64(Float32) :: UInt64
+21 to_uint64(Float32 NULL) :: UInt64 NULL
+22 to_uint64(Float64) :: UInt64
+23 to_uint64(Float64 NULL) :: UInt64 NULL
+24 to_uint64(Boolean) :: UInt64
+25 to_uint64(Boolean NULL) :: UInt64 NULL
 0 to_uint8(Variant) :: UInt8
 1 to_uint8(Variant NULL) :: UInt8 NULL
 2 to_uint8(String) :: UInt8
@@ -3539,12 +3555,14 @@ Functions overloads:
 15 to_uint8(UInt64 NULL) :: UInt8 NULL
 16 to_uint8(Int64) :: UInt8
 17 to_uint8(Int64 NULL) :: UInt8 NULL
-18 to_uint8(Float32) :: UInt8
-19 to_uint8(Float32 NULL) :: UInt8 NULL
-20 to_uint8(Float64) :: UInt8
-21 to_uint8(Float64 NULL) :: UInt8 NULL
-22 to_uint8(Boolean) :: UInt8
-23 to_uint8(Boolean NULL) :: UInt8 NULL
+18 to_uint8 FACTORY
+19 to_uint8 FACTORY
+20 to_uint8(Float32) :: UInt8
+21 to_uint8(Float32 NULL) :: UInt8 NULL
+22 to_uint8(Float64) :: UInt8
+23 to_uint8(Float64 NULL) :: UInt8 NULL
+24 to_uint8(Boolean) :: UInt8
+25 to_uint8(Boolean NULL) :: UInt8 NULL
 0 to_unix_timestamp(Timestamp) :: Int64
 1 to_unix_timestamp(Timestamp NULL) :: Int64 NULL
 0 to_variant(T0) :: Variant
@@ -3681,10 +3699,12 @@ Functions overloads:
 17 try_to_float32(UInt64 NULL) :: Float32 NULL
 18 try_to_float32(Int64) :: Float32 NULL
 19 try_to_float32(Int64 NULL) :: Float32 NULL
-20 try_to_float32(Float64) :: Float32 NULL
-21 try_to_float32(Float64 NULL) :: Float32 NULL
-22 try_to_float32(Boolean) :: Float32 NULL
-23 try_to_float32(Boolean NULL) :: Float32 NULL
+20 try_to_float32 FACTORY
+21 try_to_float32 FACTORY
+22 try_to_float32(Float64) :: Float32 NULL
+23 try_to_float32(Float64 NULL) :: Float32 NULL
+24 try_to_float32(Boolean) :: Float32 NULL
+25 try_to_float32(Boolean NULL) :: Float32 NULL
 0 try_to_float64(Variant) :: Float64 NULL
 1 try_to_float64(Variant NULL) :: Float64 NULL
 2 try_to_float64(String) :: Float64 NULL
@@ -3705,10 +3725,12 @@ Functions overloads:
 17 try_to_float64(UInt64 NULL) :: Float64 NULL
 18 try_to_float64(Int64) :: Float64 NULL
 19 try_to_float64(Int64 NULL) :: Float64 NULL
-20 try_to_float64(Float32) :: Float64 NULL
-21 try_to_float64(Float32 NULL) :: Float64 NULL
-22 try_to_float64(Boolean) :: Float64 NULL
-23 try_to_float64(Boolean NULL) :: Float64 NULL
+20 try_to_float64 FACTORY
+21 try_to_float64 FACTORY
+22 try_to_float64(Float32) :: Float64 NULL
+23 try_to_float64(Float32 NULL) :: Float64 NULL
+24 try_to_float64(Boolean) :: Float64 NULL
+25 try_to_float64(Boolean NULL) :: Float64 NULL
 0 try_to_int16(Variant) :: Int16 NULL
 1 try_to_int16(Variant NULL) :: Int16 NULL
 2 try_to_int16(String) :: Int16 NULL
@@ -3727,12 +3749,14 @@ Functions overloads:
 15 try_to_int16(UInt64 NULL) :: Int16 NULL
 16 try_to_int16(Int64) :: Int16 NULL
 17 try_to_int16(Int64 NULL) :: Int16 NULL
-18 try_to_int16(Float32) :: Int16 NULL
-19 try_to_int16(Float32 NULL) :: Int16 NULL
-20 try_to_int16(Float64) :: Int16 NULL
-21 try_to_int16(Float64 NULL) :: Int16 NULL
-22 try_to_int16(Boolean) :: Int16 NULL
-23 try_to_int16(Boolean NULL) :: Int16 NULL
+18 try_to_int16 FACTORY
+19 try_to_int16 FACTORY
+20 try_to_int16(Float32) :: Int16 NULL
+21 try_to_int16(Float32 NULL) :: Int16 NULL
+22 try_to_int16(Float64) :: Int16 NULL
+23 try_to_int16(Float64 NULL) :: Int16 NULL
+24 try_to_int16(Boolean) :: Int16 NULL
+25 try_to_int16(Boolean NULL) :: Int16 NULL
 0 try_to_int32(Variant) :: Int32 NULL
 1 try_to_int32(Variant NULL) :: Int32 NULL
 2 try_to_int32(String) :: Int32 NULL
@@ -3751,12 +3775,14 @@ Functions overloads:
 15 try_to_int32(UInt64 NULL) :: Int32 NULL
 16 try_to_int32(Int64) :: Int32 NULL
 17 try_to_int32(Int64 NULL) :: Int32 NULL
-18 try_to_int32(Float32) :: Int32 NULL
-19 try_to_int32(Float32 NULL) :: Int32 NULL
-20 try_to_int32(Float64) :: Int32 NULL
-21 try_to_int32(Float64 NULL) :: Int32 NULL
-22 try_to_int32(Boolean) :: Int32 NULL
-23 try_to_int32(Boolean NULL) :: Int32 NULL
+18 try_to_int32 FACTORY
+19 try_to_int32 FACTORY
+20 try_to_int32(Float32) :: Int32 NULL
+21 try_to_int32(Float32 NULL) :: Int32 NULL
+22 try_to_int32(Float64) :: Int32 NULL
+23 try_to_int32(Float64 NULL) :: Int32 NULL
+24 try_to_int32(Boolean) :: Int32 NULL
+25 try_to_int32(Boolean NULL) :: Int32 NULL
 0 try_to_int64(Variant) :: Int64 NULL
 1 try_to_int64(Variant NULL) :: Int64 NULL
 2 try_to_int64(String) :: Int64 NULL
@@ -3775,16 +3801,18 @@ Functions overloads:
 15 try_to_int64(Int32 NULL) :: Int64 NULL
 16 try_to_int64(UInt64) :: Int64 NULL
 17 try_to_int64(UInt64 NULL) :: Int64 NULL
-18 try_to_int64(Float32) :: Int64 NULL
-19 try_to_int64(Float32 NULL) :: Int64 NULL
-20 try_to_int64(Float64) :: Int64 NULL
-21 try_to_int64(Float64 NULL) :: Int64 NULL
-22 try_to_int64(Boolean) :: Int64 NULL
-23 try_to_int64(Boolean NULL) :: Int64 NULL
-24 try_to_int64(Date) :: Int64 NULL
-25 try_to_int64(Date NULL) :: Int64 NULL
-26 try_to_int64(Timestamp) :: Int64 NULL
-27 try_to_int64(Timestamp NULL) :: Int64 NULL
+18 try_to_int64 FACTORY
+19 try_to_int64 FACTORY
+20 try_to_int64(Float32) :: Int64 NULL
+21 try_to_int64(Float32 NULL) :: Int64 NULL
+22 try_to_int64(Float64) :: Int64 NULL
+23 try_to_int64(Float64 NULL) :: Int64 NULL
+24 try_to_int64(Boolean) :: Int64 NULL
+25 try_to_int64(Boolean NULL) :: Int64 NULL
+26 try_to_int64(Date) :: Int64 NULL
+27 try_to_int64(Date NULL) :: Int64 NULL
+28 try_to_int64(Timestamp) :: Int64 NULL
+29 try_to_int64(Timestamp NULL) :: Int64 NULL
 0 try_to_int8(Variant) :: Int8 NULL
 1 try_to_int8(Variant NULL) :: Int8 NULL
 2 try_to_int8(String) :: Int8 NULL
@@ -3803,12 +3831,14 @@ Functions overloads:
 15 try_to_int8(UInt64 NULL) :: Int8 NULL
 16 try_to_int8(Int64) :: Int8 NULL
 17 try_to_int8(Int64 NULL) :: Int8 NULL
-18 try_to_int8(Float32) :: Int8 NULL
-19 try_to_int8(Float32 NULL) :: Int8 NULL
-20 try_to_int8(Float64) :: Int8 NULL
-21 try_to_int8(Float64 NULL) :: Int8 NULL
-22 try_to_int8(Boolean) :: Int8 NULL
-23 try_to_int8(Boolean NULL) :: Int8 NULL
+18 try_to_int8 FACTORY
+19 try_to_int8 FACTORY
+20 try_to_int8(Float32) :: Int8 NULL
+21 try_to_int8(Float32 NULL) :: Int8 NULL
+22 try_to_int8(Float64) :: Int8 NULL
+23 try_to_int8(Float64 NULL) :: Int8 NULL
+24 try_to_int8(Boolean) :: Int8 NULL
+25 try_to_int8(Boolean NULL) :: Int8 NULL
 0 try_to_string(Variant) :: String NULL
 1 try_to_string(Variant NULL) :: String NULL
 2 try_to_string(UInt8) :: String NULL
@@ -3863,12 +3893,14 @@ Functions overloads:
 15 try_to_uint16(UInt64 NULL) :: UInt16 NULL
 16 try_to_uint16(Int64) :: UInt16 NULL
 17 try_to_uint16(Int64 NULL) :: UInt16 NULL
-18 try_to_uint16(Float32) :: UInt16 NULL
-19 try_to_uint16(Float32 NULL) :: UInt16 NULL
-20 try_to_uint16(Float64) :: UInt16 NULL
-21 try_to_uint16(Float64 NULL) :: UInt16 NULL
-22 try_to_uint16(Boolean) :: UInt16 NULL
-23 try_to_uint16(Boolean NULL) :: UInt16 NULL
+18 try_to_uint16 FACTORY
+19 try_to_uint16 FACTORY
+20 try_to_uint16(Float32) :: UInt16 NULL
+21 try_to_uint16(Float32 NULL) :: UInt16 NULL
+22 try_to_uint16(Float64) :: UInt16 NULL
+23 try_to_uint16(Float64 NULL) :: UInt16 NULL
+24 try_to_uint16(Boolean) :: UInt16 NULL
+25 try_to_uint16(Boolean NULL) :: UInt16 NULL
 0 try_to_uint32(Variant) :: UInt32 NULL
 1 try_to_uint32(Variant NULL) :: UInt32 NULL
 2 try_to_uint32(String) :: UInt32 NULL
@@ -3887,12 +3919,14 @@ Functions overloads:
 15 try_to_uint32(UInt64 NULL) :: UInt32 NULL
 16 try_to_uint32(Int64) :: UInt32 NULL
 17 try_to_uint32(Int64 NULL) :: UInt32 NULL
-18 try_to_uint32(Float32) :: UInt32 NULL
-19 try_to_uint32(Float32 NULL) :: UInt32 NULL
-20 try_to_uint32(Float64) :: UInt32 NULL
-21 try_to_uint32(Float64 NULL) :: UInt32 NULL
-22 try_to_uint32(Boolean) :: UInt32 NULL
-23 try_to_uint32(Boolean NULL) :: UInt32 NULL
+18 try_to_uint32 FACTORY
+19 try_to_uint32 FACTORY
+20 try_to_uint32(Float32) :: UInt32 NULL
+21 try_to_uint32(Float32 NULL) :: UInt32 NULL
+22 try_to_uint32(Float64) :: UInt32 NULL
+23 try_to_uint32(Float64 NULL) :: UInt32 NULL
+24 try_to_uint32(Boolean) :: UInt32 NULL
+25 try_to_uint32(Boolean NULL) :: UInt32 NULL
 0 try_to_uint64(Variant) :: UInt64 NULL
 1 try_to_uint64(Variant NULL) :: UInt64 NULL
 2 try_to_uint64(String) :: UInt64 NULL
@@ -3911,12 +3945,14 @@ Functions overloads:
 15 try_to_uint64(Int32 NULL) :: UInt64 NULL
 16 try_to_uint64(Int64) :: UInt64 NULL
 17 try_to_uint64(Int64 NULL) :: UInt64 NULL
-18 try_to_uint64(Float32) :: UInt64 NULL
-19 try_to_uint64(Float32 NULL) :: UInt64 NULL
-20 try_to_uint64(Float64) :: UInt64 NULL
-21 try_to_uint64(Float64 NULL) :: UInt64 NULL
-22 try_to_uint64(Boolean) :: UInt64 NULL
-23 try_to_uint64(Boolean NULL) :: UInt64 NULL
+18 try_to_uint64 FACTORY
+19 try_to_uint64 FACTORY
+20 try_to_uint64(Float32) :: UInt64 NULL
+21 try_to_uint64(Float32 NULL) :: UInt64 NULL
+22 try_to_uint64(Float64) :: UInt64 NULL
+23 try_to_uint64(Float64 NULL) :: UInt64 NULL
+24 try_to_uint64(Boolean) :: UInt64 NULL
+25 try_to_uint64(Boolean NULL) :: UInt64 NULL
 0 try_to_uint8(Variant) :: UInt8 NULL
 1 try_to_uint8(Variant NULL) :: UInt8 NULL
 2 try_to_uint8(String) :: UInt8 NULL
@@ -3935,12 +3971,14 @@ Functions overloads:
 15 try_to_uint8(UInt64 NULL) :: UInt8 NULL
 16 try_to_uint8(Int64) :: UInt8 NULL
 17 try_to_uint8(Int64 NULL) :: UInt8 NULL
-18 try_to_uint8(Float32) :: UInt8 NULL
-19 try_to_uint8(Float32 NULL) :: UInt8 NULL
-20 try_to_uint8(Float64) :: UInt8 NULL
-21 try_to_uint8(Float64 NULL) :: UInt8 NULL
-22 try_to_uint8(Boolean) :: UInt8 NULL
-23 try_to_uint8(Boolean NULL) :: UInt8 NULL
+18 try_to_uint8 FACTORY
+19 try_to_uint8 FACTORY
+20 try_to_uint8(Float32) :: UInt8 NULL
+21 try_to_uint8(Float32 NULL) :: UInt8 NULL
+22 try_to_uint8(Float64) :: UInt8 NULL
+23 try_to_uint8(Float64 NULL) :: UInt8 NULL
+24 try_to_uint8(Boolean) :: UInt8 NULL
+25 try_to_uint8(Boolean NULL) :: UInt8 NULL
 0 try_to_variant(T0) :: Variant NULL
 1 try_to_variant(T0 NULL) :: Variant NULL
 0 tuple FACTORY
diff --git a/tests/sqllogictests/suites/base/11_data_type/11_0006_data_type_decimal b/tests/sqllogictests/suites/base/11_data_type/11_0006_data_type_decimal
index b9db0fedde04..65908c4e1e45 100644
--- a/tests/sqllogictests/suites/base/11_data_type/11_0006_data_type_decimal
+++ b/tests/sqllogictests/suites/base/11_data_type/11_0006_data_type_decimal
@@ -942,5 +942,16 @@ select sum(-b) as s from t group by a order by s;
 -0.01683589
 0.01683589
 
+statement ok
+truncate table t;
+
+statement ok
+insert into t values(1, 355327908309);
+
+query TTT
+select to_uint64(b), b::uint64, b  from t;
+----
+355327908309 355327908309 355327908309.00000000
+
 statement ok
 drop table t

From 14819b3d95660d8ba03d92c2dfd97ef27e28dbd4 Mon Sep 17 00:00:00 2001
From: sundyli <543950155@qq.com>
Date: Thu, 21 Sep 2023 18:38:08 -0700
Subject: [PATCH 21/21] feat(query): support inverted_filter to omit the filter
 executor (#12934)

* feat(query): support inverted_filter to omit the filter executor

* feat(query): support inverted_filter to omit the filter executor

* feat(query): support inverted_filter to omit the filter executor

* feat(query): support inverted_filter to omit the filter executor

* feat(query): support inverted_filter to omit the filter executor

* feat(query): address comments
---
 src/common/storage/src/metrics/common.rs      |  30 +++++
 src/common/storage/src/metrics/mod.rs         |   1 +
 src/query/catalog/src/plan/pushdown.rs        |  10 +-
 src/query/catalog/src/table.rs                |   8 --
 src/query/expression/src/expression.rs        |   8 +-
 .../service/src/interpreters/common/mod.rs    |   1 +
 .../service/src/interpreters/common/util.rs   |  25 +++++
 .../src/interpreters/interpreter_delete.rs    |  38 +------
 .../interpreter_table_recluster.rs            |  24 +++-
 .../table_functions/numbers/numbers_table.rs  |   2 +-
 .../tests/it/parquet_rs/prune_row_groups.rs   |   2 +-
 .../service/tests/it/storages/fuse/pruning.rs |  10 +-
 src/query/sql/src/executor/format.rs          |   4 +-
 .../sql/src/executor/physical_plan_builder.rs |  62 +++++-----
 src/query/sql/src/executor/table_read_plan.rs |  11 +-
 .../sql/src/planner/expression_parser.rs      |  21 +++-
 .../storages/fuse/src/operations/delete.rs    |  14 +--
 .../processors/processor_matched_split.rs     |   1 +
 .../fuse/src/operations/read_partitions.rs    |   4 +-
 .../storages/fuse/src/operations/update.rs    |  20 +++-
 .../storages/fuse/src/pruning/fuse_pruner.rs  |  13 ++-
 .../storages/hive/hive/src/hive_table.rs      |  14 +--
 src/query/storages/iceberg/src/table.rs       |   9 +-
 .../src/parquet2/parquet_table/partition.rs   |  24 +++-
 .../storages/parquet/src/parquet2/pruning.rs  |   9 +-
 .../src/parquet_rs/parquet_reader/reader.rs   | 106 ++++++++++++------
 .../parquet_rs/parquet_reader/row_group.rs    |   4 +
 .../src/parquet_rs/parquet_table/partition.rs |  12 +-
 .../parquet/src/parquet_rs/partition.rs       |   1 +
 .../parquet/src/parquet_rs/pruning.rs         |  52 ++++++---
 .../storages/system/src/columns_table.rs      |   2 +-
 src/query/storages/system/src/tables_table.rs |   2 +-
 .../suites/base/03_common/03_0025_delete_from |  11 ++
 33 files changed, 363 insertions(+), 192 deletions(-)
 create mode 100644 src/common/storage/src/metrics/common.rs

diff --git a/src/common/storage/src/metrics/common.rs b/src/common/storage/src/metrics/common.rs
new file mode 100644
index 000000000000..2596ceaf82b7
--- /dev/null
+++ b/src/common/storage/src/metrics/common.rs
@@ -0,0 +1,30 @@
+// Copyright 2021 Datafuse Labs
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use common_metrics::register_counter;
+use common_metrics::Counter;
+use lazy_static::lazy_static;
+
+lazy_static! {
+    static ref OMIT_FILTER_ROWGROUPS: Counter = register_counter("omit_filter_rowgroups");
+    static ref OMIT_FILTER_ROWS: Counter = register_counter("omit_filter_rows");
+}
+
+pub fn metrics_inc_omit_filter_rowgroups(c: u64) {
+    OMIT_FILTER_ROWGROUPS.inc_by(c);
+}
+
+pub fn metrics_inc_omit_filter_rows(c: u64) {
+    OMIT_FILTER_ROWS.inc_by(c);
+}
diff --git a/src/common/storage/src/metrics/mod.rs b/src/common/storage/src/metrics/mod.rs
index 890e46a7413d..7d5d075ca8d4 100644
--- a/src/common/storage/src/metrics/mod.rs
+++ b/src/common/storage/src/metrics/mod.rs
@@ -12,6 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+pub mod common;
 pub mod copy;
 pub mod merge_into;
 mod storage_metrics;
diff --git a/src/query/catalog/src/plan/pushdown.rs b/src/query/catalog/src/plan/pushdown.rs
index 318e3a873171..e74e9bf227b8 100644
--- a/src/query/catalog/src/plan/pushdown.rs
+++ b/src/query/catalog/src/plan/pushdown.rs
@@ -77,9 +77,9 @@ pub struct PushDownInfo {
     /// The difference with `projection` is the removal of the source columns
     /// which were only used to generate virtual columns.
     pub output_columns: Option<Projection>,
-    /// Optional filter expression plan
+    /// Optional filter and reverse filter expression plan
     /// Assumption: expression's data type must be `DataType::Boolean`.
-    pub filter: Option<RemoteExpr<String>>,
+    pub filters: Option<Filters>,
     pub is_deterministic: bool,
     /// Optional prewhere information
     /// used for prewhere optimization
@@ -96,6 +96,12 @@ pub struct PushDownInfo {
     pub agg_index: Option<AggIndexInfo>,
 }
 
+#[derive(serde::Serialize, serde::Deserialize, Clone, Debug, PartialEq, Eq)]
+pub struct Filters {
+    pub filter: RemoteExpr<String>,
+    pub inverted_filter: RemoteExpr<String>,
+}
+
 /// TopK is a wrapper for topk push down items.
 /// We only take the first column in order_by as the topk column.
 #[derive(Debug, Clone)]
diff --git a/src/query/catalog/src/table.rs b/src/query/catalog/src/table.rs
index 6a59816a15bd..e78036bcc467 100644
--- a/src/query/catalog/src/table.rs
+++ b/src/query/catalog/src/table.rs
@@ -465,14 +465,6 @@ pub struct NavigationDescriptor {
     pub point: NavigationPoint,
 }
 
-#[derive(Debug, Clone)]
-pub struct DeletionFilters {
-    // the filter expression for the deletion
-    pub filter: RemoteExpr<String>,
-    // just "not(filter)"
-    pub inverted_filter: RemoteExpr<String>,
-}
-
 use std::collections::HashMap;
 
 #[derive(serde::Serialize, serde::Deserialize, Clone, Debug, Default)]
diff --git a/src/query/expression/src/expression.rs b/src/query/expression/src/expression.rs
index a9fd71538fd9..1762063491ab 100644
--- a/src/query/expression/src/expression.rs
+++ b/src/query/expression/src/expression.rs
@@ -124,14 +124,17 @@ pub enum Expr<Index: ColumnIndex = usize> {
 ///
 /// The remote node will recover the `Arc` pointer within `FunctionCall` by looking
 /// up the function registry with the `FunctionID`.
-#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
+#[derive(Debug, Clone, Educe, Serialize, Deserialize)]
+#[educe(PartialEq, Eq, Hash)]
 pub enum RemoteExpr<Index: ColumnIndex = usize> {
     Constant {
+        #[educe(Hash(ignore), PartialEq(ignore), Eq(ignore))]
         span: Span,
         scalar: Scalar,
         data_type: DataType,
     },
     ColumnRef {
+        #[educe(Hash(ignore), PartialEq(ignore), Eq(ignore))]
         span: Span,
         id: Index,
         data_type: DataType,
@@ -140,12 +143,14 @@ pub enum RemoteExpr<Index: ColumnIndex = usize> {
         display_name: String,
     },
     Cast {
+        #[educe(Hash(ignore), PartialEq(ignore), Eq(ignore))]
         span: Span,
         is_try: bool,
         expr: Box<RemoteExpr<Index>>,
         dest_type: DataType,
     },
     FunctionCall {
+        #[educe(Hash(ignore), PartialEq(ignore), Eq(ignore))]
         span: Span,
         id: FunctionID,
         generics: Vec<DataType>,
@@ -153,6 +158,7 @@ pub enum RemoteExpr<Index: ColumnIndex = usize> {
         return_type: DataType,
     },
     UDFServerCall {
+        #[educe(Hash(ignore), PartialEq(ignore), Eq(ignore))]
         span: Span,
         func_name: String,
         server_addr: String,
diff --git a/src/query/service/src/interpreters/common/mod.rs b/src/query/service/src/interpreters/common/mod.rs
index 75c0539a6c74..96a74433eea2 100644
--- a/src/query/service/src/interpreters/common/mod.rs
+++ b/src/query/service/src/interpreters/common/mod.rs
@@ -25,5 +25,6 @@ pub use refresh_aggregating_index::hook_refresh_agg_index;
 pub use refresh_aggregating_index::RefreshAggIndexDesc;
 pub use table::check_referenced_computed_columns;
 pub use util::check_deduplicate_label;
+pub use util::create_push_down_filters;
 
 pub use self::metrics::*;
diff --git a/src/query/service/src/interpreters/common/util.rs b/src/query/service/src/interpreters/common/util.rs
index 886fe1a58d1d..b57fcf183a92 100644
--- a/src/query/service/src/interpreters/common/util.rs
+++ b/src/query/service/src/interpreters/common/util.rs
@@ -14,11 +14,17 @@
 
 use std::sync::Arc;
 
+use common_catalog::plan::Filters;
 use common_catalog::table_context::TableContext;
 use common_exception::Result;
+use common_expression::type_check::check_function;
+use common_functions::BUILTIN_FUNCTIONS;
 use common_meta_kvapi::kvapi::KVApi;
 use common_users::UserApiProvider;
 
+use crate::sql::executor::cast_expr_to_non_null_boolean;
+use crate::sql::ScalarExpr;
+
 /// Checks if a duplicate label exists in the meta store.
 ///
 /// # Arguments
@@ -41,3 +47,22 @@ pub async fn check_deduplicate_label(ctx: Arc<dyn TableContext>) -> Result<bool>
         }
     }
 }
+
+pub fn create_push_down_filters(scalar: &ScalarExpr) -> Result<Filters> {
+    let filter = cast_expr_to_non_null_boolean(
+        scalar
+            .as_expr()?
+            .project_column_ref(|col| col.column_name.clone()),
+    )?;
+
+    let remote_filter = filter.as_remote_expr();
+
+    // prepare the inverse filter expression
+    let remote_inverted_filter =
+        check_function(None, "not", &[], &[filter], &BUILTIN_FUNCTIONS)?.as_remote_expr();
+
+    Ok(Filters {
+        filter: remote_filter,
+        inverted_filter: remote_inverted_filter,
+    })
+}
diff --git a/src/query/service/src/interpreters/interpreter_delete.rs b/src/query/service/src/interpreters/interpreter_delete.rs
index ff7013d260c7..716ad266faa6 100644
--- a/src/query/service/src/interpreters/interpreter_delete.rs
+++ b/src/query/service/src/interpreters/interpreter_delete.rs
@@ -18,7 +18,6 @@ use std::sync::Arc;
 
 use common_base::runtime::GlobalIORuntime;
 use common_catalog::plan::Partitions;
-use common_catalog::table::DeletionFilters;
 use common_exception::ErrorCode;
 use common_exception::Result;
 use common_expression::types::DataType;
@@ -30,7 +29,6 @@ use common_functions::BUILTIN_FUNCTIONS;
 use common_meta_app::schema::CatalogInfo;
 use common_meta_app::schema::TableInfo;
 use common_sql::binder::ColumnBindingBuilder;
-use common_sql::executor::cast_expr_to_non_null_boolean;
 use common_sql::executor::DeletePartial;
 use common_sql::executor::Exchange;
 use common_sql::executor::FragmentKind;
@@ -60,6 +58,7 @@ use log::debug;
 use storages_common_table_meta::meta::TableSnapshot;
 use table_lock::TableLockHandlerWrapper;
 
+use crate::interpreters::common::create_push_down_filters;
 use crate::interpreters::Interpreter;
 use crate::interpreters::SelectInterpreter;
 use crate::pipelines::executor::ExecutorSettings;
@@ -164,36 +163,15 @@ impl Interpreter for DeleteInterpreter {
 
         let (filters, col_indices) = if let Some(scalar) = selection {
             // prepare the filter expression
-            let filter = cast_expr_to_non_null_boolean(
-                scalar
-                    .as_expr()?
-                    .project_column_ref(|col| col.column_name.clone()),
-            )?
-            .as_remote_expr();
-
-            let expr = filter.as_expr(&BUILTIN_FUNCTIONS);
+            let filters = create_push_down_filters(&scalar)?;
+
+            let expr = filters.filter.as_expr(&BUILTIN_FUNCTIONS);
             if !expr.is_deterministic(&BUILTIN_FUNCTIONS) {
                 return Err(ErrorCode::Unimplemented(
                     "Delete must have deterministic predicate",
                 ));
             }
 
-            // prepare the inverse filter expression
-            let inverted_filter = {
-                let inverse = ScalarExpr::FunctionCall(common_sql::planner::plans::FunctionCall {
-                    span: None,
-                    func_name: "not".to_string(),
-                    params: vec![],
-                    arguments: vec![scalar.clone()],
-                });
-                cast_expr_to_non_null_boolean(
-                    inverse
-                        .as_expr()?
-                        .project_column_ref(|col| col.column_name.clone()),
-                )?
-                .as_remote_expr()
-            };
-
             let col_indices: Vec<usize> = if !self.plan.subquery_desc.is_empty() {
                 let mut col_indices = HashSet::new();
                 for subquery_desc in &self.plan.subquery_desc {
@@ -203,13 +181,7 @@ impl Interpreter for DeleteInterpreter {
             } else {
                 scalar.used_columns().into_iter().collect()
             };
-            (
-                Some(DeletionFilters {
-                    filter,
-                    inverted_filter,
-                }),
-                col_indices,
-            )
+            (Some(filters), col_indices)
         } else {
             (None, vec![])
         };
diff --git a/src/query/service/src/interpreters/interpreter_table_recluster.rs b/src/query/service/src/interpreters/interpreter_table_recluster.rs
index 620d3efddbd7..98ff1891c245 100644
--- a/src/query/service/src/interpreters/interpreter_table_recluster.rs
+++ b/src/query/service/src/interpreters/interpreter_table_recluster.rs
@@ -16,9 +16,12 @@ use std::sync::Arc;
 use std::time::Duration;
 use std::time::SystemTime;
 
+use common_catalog::plan::Filters;
 use common_catalog::plan::PushDownInfo;
 use common_exception::ErrorCode;
 use common_exception::Result;
+use common_expression::type_check::check_function;
+use common_functions::BUILTIN_FUNCTIONS;
 use log::error;
 use log::info;
 use log::warn;
@@ -31,6 +34,7 @@ use crate::pipelines::Pipeline;
 use crate::pipelines::PipelineBuildResult;
 use crate::sessions::QueryContext;
 use crate::sessions::TableContext;
+use crate::sql::executor::cast_expr_to_non_null_boolean;
 use crate::sql::plans::ReclusterTablePlan;
 
 pub struct ReclusterTableInterpreter {
@@ -68,13 +72,23 @@ impl Interpreter for ReclusterTableInterpreter {
 
         // Build extras via push down scalar
         let extras = if let Some(scalar) = &plan.push_downs {
-            let filter = scalar
-                .as_expr()?
-                .project_column_ref(|col| col.column_name.clone())
-                .as_remote_expr();
+            // prepare the filter expression
+            let filter = cast_expr_to_non_null_boolean(
+                scalar
+                    .as_expr()?
+                    .project_column_ref(|col| col.column_name.clone()),
+            )?;
+            // prepare the inverse filter expression
+            let inverted_filter =
+                check_function(None, "not", &[], &[filter.clone()], &BUILTIN_FUNCTIONS)?;
+
+            let filters = Filters {
+                filter: filter.as_remote_expr(),
+                inverted_filter: inverted_filter.as_remote_expr(),
+            };
 
             Some(PushDownInfo {
-                filter: Some(filter),
+                filters: Some(filters),
                 ..PushDownInfo::default()
             })
         } else {
diff --git a/src/query/service/src/table_functions/numbers/numbers_table.rs b/src/query/service/src/table_functions/numbers/numbers_table.rs
index 02d76448e692..c188bc7a2332 100644
--- a/src/query/service/src/table_functions/numbers/numbers_table.rs
+++ b/src/query/service/src/table_functions/numbers/numbers_table.rs
@@ -137,7 +137,7 @@ impl Table for NumbersTable {
         let mut limit = None;
 
         if let Some(extras) = &push_downs {
-            if extras.limit.is_some() && extras.filter.is_none() && extras.order_by.is_empty() {
+            if extras.limit.is_some() && extras.filters.is_none() && extras.order_by.is_empty() {
                 // It is allowed to have an error when we can't get sort columns from the expression. For
                 // example 'select number from numbers(10) order by number+4 limit 10', the column 'number+4'
                 // doesn't exist in the numbers table.
diff --git a/src/query/service/tests/it/parquet_rs/prune_row_groups.rs b/src/query/service/tests/it/parquet_rs/prune_row_groups.rs
index 4103d3a01b60..9fbd651cd2f5 100644
--- a/src/query/service/tests/it/parquet_rs/prune_row_groups.rs
+++ b/src/query/service/tests/it/parquet_rs/prune_row_groups.rs
@@ -57,7 +57,7 @@ async fn test_impl(scenario: Scenario, predicate: &str, expected_rgs: Vec<usize>
     )
     .unwrap();
 
-    let rgs = pruner.prune_row_groups(&parquet_meta, None).unwrap();
+    let (rgs, _) = pruner.prune_row_groups(&parquet_meta, None).unwrap();
 
     assert_eq!(
         expected_rgs, rgs,
diff --git a/src/query/service/tests/it/storages/fuse/pruning.rs b/src/query/service/tests/it/storages/fuse/pruning.rs
index 2b27124a8f0e..dfd0b5498ca4 100644
--- a/src/query/service/tests/it/storages/fuse/pruning.rs
+++ b/src/query/service/tests/it/storages/fuse/pruning.rs
@@ -29,7 +29,7 @@ use common_expression::TableDataType;
 use common_expression::TableField;
 use common_expression::TableSchemaRef;
 use common_expression::TableSchemaRefExt;
-use common_sql::parse_to_remote_string_expr;
+use common_sql::parse_to_filters;
 use common_sql::plans::CreateTablePlan;
 use common_sql::BloomIndexColumns;
 use common_storages_fuse::pruning::create_segment_location_vector;
@@ -172,11 +172,7 @@ async fn test_block_pruner() -> Result<()> {
 
     // nothing is pruned
     let e1 = PushDownInfo {
-        filter: Some(parse_to_remote_string_expr(
-            ctx.clone(),
-            table.clone(),
-            "a > 3",
-        )?),
+        filters: Some(parse_to_filters(ctx.clone(), table.clone(), "a > 3")?),
         ..Default::default()
     };
 
@@ -184,7 +180,7 @@ async fn test_block_pruner() -> Result<()> {
     let mut e2 = PushDownInfo::default();
     let max_val_of_b = 6u64;
 
-    e2.filter = Some(parse_to_remote_string_expr(
+    e2.filters = Some(parse_to_filters(
         ctx.clone(),
         table.clone(),
         "a > 0 and b > 6",
diff --git a/src/query/sql/src/executor/format.rs b/src/query/sql/src/executor/format.rs
index 37447a2028ff..f218b8e03e20 100644
--- a/src/query/sql/src/executor/format.rs
+++ b/src/query/sql/src/executor/format.rs
@@ -269,9 +269,9 @@ fn table_scan_to_format_tree(
         .as_ref()
         .and_then(|extras| {
             extras
-                .filter
+                .filters
                 .as_ref()
-                .map(|expr| expr.as_expr(&BUILTIN_FUNCTIONS).sql_display())
+                .map(|filters| filters.filter.as_expr(&BUILTIN_FUNCTIONS).sql_display())
         })
         .unwrap_or_default();
 
diff --git a/src/query/sql/src/executor/physical_plan_builder.rs b/src/query/sql/src/executor/physical_plan_builder.rs
index 4f171e2514d7..e342abf515bb 100644
--- a/src/query/sql/src/executor/physical_plan_builder.rs
+++ b/src/query/sql/src/executor/physical_plan_builder.rs
@@ -21,6 +21,7 @@ use std::sync::Arc;
 use common_catalog::catalog::CatalogManager;
 use common_catalog::catalog_kind::CATALOG_DEFAULT;
 use common_catalog::plan::AggIndexInfo;
+use common_catalog::plan::Filters;
 use common_catalog::plan::PrewhereInfo;
 use common_catalog::plan::Projection;
 use common_catalog::plan::PushDownInfo;
@@ -1886,37 +1887,35 @@ impl PhysicalPlanBuilder {
             .push_down_predicates
             .as_ref()
             .filter(|p| !p.is_empty())
-            .map(
-                |predicates: &Vec<ScalarExpr>| -> Result<RemoteExpr<String>> {
-                    let predicates = predicates
-                        .iter()
-                        .map(|p| {
-                            Ok(p.as_expr()?
-                                .project_column_ref(|col| col.column_name.clone()))
-                        })
-                        .collect::<Result<Vec<_>>>()?;
-
-                    let expr = predicates
-                        .into_iter()
-                        .try_reduce(|lhs, rhs| {
-                            check_function(
-                                None,
-                                "and_filters",
-                                &[],
-                                &[lhs, rhs],
-                                &BUILTIN_FUNCTIONS,
-                            )
-                        })?
-                        .unwrap();
+            .map(|predicates: &Vec<ScalarExpr>| -> Result<Filters> {
+                let predicates = predicates
+                    .iter()
+                    .map(|p| {
+                        Ok(p.as_expr()?
+                            .project_column_ref(|col| col.column_name.clone()))
+                    })
+                    .collect::<Result<Vec<_>>>()?;
+
+                let expr = predicates
+                    .into_iter()
+                    .try_reduce(|lhs, rhs| {
+                        check_function(None, "and_filters", &[], &[lhs, rhs], &BUILTIN_FUNCTIONS)
+                    })?
+                    .unwrap();
 
-                    let expr = cast_expr_to_non_null_boolean(expr)?;
-                    let (expr, _) = ConstantFolder::fold(&expr, &self.func_ctx, &BUILTIN_FUNCTIONS);
+                let expr = cast_expr_to_non_null_boolean(expr)?;
+                let (expr, _) = ConstantFolder::fold(&expr, &self.func_ctx, &BUILTIN_FUNCTIONS);
 
-                    is_deterministic = expr.is_deterministic(&BUILTIN_FUNCTIONS);
+                is_deterministic = expr.is_deterministic(&BUILTIN_FUNCTIONS);
 
-                    Ok(expr.as_remote_expr())
-                },
-            )
+                let inverted_filter =
+                    check_function(None, "not", &[], &[expr.clone()], &BUILTIN_FUNCTIONS)?;
+
+                Ok(Filters {
+                    filter: expr.as_remote_expr(),
+                    inverted_filter: inverted_filter.as_remote_expr(),
+                })
+            })
             .transpose()?;
 
         let prewhere_info = scan
@@ -1970,12 +1969,13 @@ impl PhysicalPlanBuilder {
                         })
                     })
                     .expect("there should be at least one predicate in prewhere");
+
                 let filter = cast_expr_to_non_null_boolean(
                     predicate
                         .as_expr()?
                         .project_column_ref(|col| col.column_name.clone()),
-                )?
-                .as_remote_expr();
+                )?;
+                let filter = filter.as_remote_expr();
                 let virtual_columns = self.build_virtual_columns(&prewhere.prewhere_columns);
 
                 Ok::<PrewhereInfo, ErrorCode>(PrewhereInfo {
@@ -2039,7 +2039,7 @@ impl PhysicalPlanBuilder {
         Ok(PushDownInfo {
             projection: Some(projection),
             output_columns,
-            filter: push_down_filter,
+            filters: push_down_filter,
             is_deterministic,
             prewhere: prewhere_info,
             limit: scan.limit,
diff --git a/src/query/sql/src/executor/table_read_plan.rs b/src/query/sql/src/executor/table_read_plan.rs
index d35ceb36acdf..176225bdd062 100644
--- a/src/query/sql/src/executor/table_read_plan.rs
+++ b/src/query/sql/src/executor/table_read_plan.rs
@@ -21,6 +21,7 @@ use common_ast::Dialect;
 use common_base::base::ProgressValues;
 use common_catalog::plan::DataSourceInfo;
 use common_catalog::plan::DataSourcePlan;
+use common_catalog::plan::Filters;
 use common_catalog::plan::InternalColumn;
 use common_catalog::plan::PartStatistics;
 use common_catalog::plan::Partitions;
@@ -88,9 +89,13 @@ impl ToReadDataSourcePlan for dyn Table {
         let catalog_info = ctx.get_catalog(&catalog).await?.info();
 
         let (statistics, parts) = if let Some(PushDownInfo {
-            filter:
-                Some(RemoteExpr::Constant {
-                    scalar: Scalar::Boolean(false),
+            filters:
+                Some(Filters {
+                    filter:
+                        RemoteExpr::Constant {
+                            scalar: Scalar::Boolean(false),
+                            ..
+                        },
                     ..
                 }),
             ..
diff --git a/src/query/sql/src/planner/expression_parser.rs b/src/query/sql/src/planner/expression_parser.rs
index 7779500b9407..545cf7c28df6 100644
--- a/src/query/sql/src/planner/expression_parser.rs
+++ b/src/query/sql/src/planner/expression_parser.rs
@@ -22,12 +22,14 @@ use common_ast::Dialect;
 use common_base::base::tokio::runtime::Handle;
 use common_base::base::tokio::task::block_in_place;
 use common_catalog::catalog::CATALOG_DEFAULT;
+use common_catalog::plan::Filters;
 use common_catalog::table::Table;
 use common_catalog::table_context::TableContext;
 use common_exception::ErrorCode;
 use common_exception::Result;
 use common_expression::infer_schema_type;
 use common_expression::infer_table_schema;
+use common_expression::type_check::check_function;
 use common_expression::types::DataType;
 use common_expression::ConstantFolder;
 use common_expression::DataBlock;
@@ -137,11 +139,11 @@ pub fn parse_exprs(
     Ok(exprs)
 }
 
-pub fn parse_to_remote_string_expr(
+pub fn parse_to_filters(
     ctx: Arc<dyn TableContext>,
     table_meta: Arc<dyn Table>,
     sql: &str,
-) -> Result<RemoteExpr<String>> {
+) -> Result<Filters> {
     let schema = table_meta.schema();
     let exprs = parse_exprs(ctx, table_meta, sql)?;
     let exprs: Vec<RemoteExpr<String>> = exprs
@@ -153,7 +155,20 @@ pub fn parse_to_remote_string_expr(
         .collect();
 
     if exprs.len() == 1 {
-        Ok(exprs[0].clone())
+        let filter = exprs[0].clone();
+
+        let inverted_filter = check_function(
+            None,
+            "not",
+            &[],
+            &[filter.as_expr(&BUILTIN_FUNCTIONS)],
+            &BUILTIN_FUNCTIONS,
+        )?;
+
+        Ok(Filters {
+            filter,
+            inverted_filter: inverted_filter.as_remote_expr(),
+        })
     } else {
         Err(ErrorCode::BadDataValueType(format!(
             "Expected single expr, but got {}",
diff --git a/src/query/storages/fuse/src/operations/delete.rs b/src/query/storages/fuse/src/operations/delete.rs
index 4a9e34049213..bf92e9ab2649 100644
--- a/src/query/storages/fuse/src/operations/delete.rs
+++ b/src/query/storages/fuse/src/operations/delete.rs
@@ -15,13 +15,13 @@
 use std::sync::Arc;
 
 use common_base::base::ProgressValues;
+use common_catalog::plan::Filters;
 use common_catalog::plan::PartInfoPtr;
 use common_catalog::plan::Partitions;
 use common_catalog::plan::PartitionsShuffleKind;
 use common_catalog::plan::Projection;
 use common_catalog::plan::PruningStatistics;
 use common_catalog::plan::PushDownInfo;
-use common_catalog::table::DeletionFilters;
 use common_catalog::table::Table;
 use common_catalog::table_context::TableContext;
 use common_exception::Result;
@@ -72,7 +72,7 @@ impl FuseTable {
     pub async fn fast_delete(
         &self,
         ctx: Arc<dyn TableContext>,
-        filters: Option<DeletionFilters>,
+        filters: Option<Filters>,
         col_indices: Vec<usize>,
         query_row_id_col: bool,
     ) -> Result<Option<(Partitions, Arc<TableSnapshot>)>> {
@@ -131,8 +131,7 @@ impl FuseTable {
         let (partitions, info) = self
             .do_mutation_block_pruning(
                 ctx.clone(),
-                Some(deletion_filters.filter),
-                Some(deletion_filters.inverted_filter),
+                Some(deletion_filters),
                 projection,
                 &snapshot,
                 true,
@@ -280,8 +279,7 @@ impl FuseTable {
     pub async fn do_mutation_block_pruning(
         &self,
         ctx: Arc<dyn TableContext>,
-        filter: Option<RemoteExpr<String>>,
-        inverted_filter: Option<RemoteExpr<String>>,
+        filters: Option<Filters>,
         projection: Projection,
         base_snapshot: &TableSnapshot,
         with_origin: bool,
@@ -289,7 +287,7 @@ impl FuseTable {
     ) -> Result<(Partitions, MutationTaskInfo)> {
         let push_down = Some(PushDownInfo {
             projection: Some(projection),
-            filter: filter.clone(),
+            filters: filters.clone(),
             ..PushDownInfo::default()
         });
 
@@ -304,7 +302,7 @@ impl FuseTable {
 
         let segment_locations = create_segment_location_vector(segment_locations, None);
 
-        if let Some(inverse) = inverted_filter {
+        if let Some(inverse) = filters.map(|f| f.inverted_filter) {
             // now the `block_metas` refers to the blocks that need to be deleted completely or partially.
             //
             // let's try pruning the blocks further to get the blocks that need to be deleted completely, so that
diff --git a/src/query/storages/fuse/src/operations/merge_into/processors/processor_matched_split.rs b/src/query/storages/fuse/src/operations/merge_into/processors/processor_matched_split.rs
index d26ca27c9080..39e7b19d3e9c 100644
--- a/src/query/storages/fuse/src/operations/merge_into/processors/processor_matched_split.rs
+++ b/src/query/storages/fuse/src/operations/merge_into/processors/processor_matched_split.rs
@@ -243,6 +243,7 @@ impl Processor for MatchedSplitProcessor {
                         let (stage_block, mut row_ids) = delete_mutation
                             .delete_mutator
                             .delete_by_expr(current_block)?;
+
                         if stage_block.is_empty() {
                             // delete all
                             if !row_ids.is_empty() {
diff --git a/src/query/storages/fuse/src/operations/read_partitions.rs b/src/query/storages/fuse/src/operations/read_partitions.rs
index 6ab120c22183..dbbd89ae90de 100644
--- a/src/query/storages/fuse/src/operations/read_partitions.rs
+++ b/src/query/storages/fuse/src/operations/read_partitions.rs
@@ -267,7 +267,7 @@ impl FuseTable {
     ) -> (PartStatistics, Partitions) {
         let limit = push_downs
             .as_ref()
-            .filter(|p| p.order_by.is_empty() && p.filter.is_none())
+            .filter(|p| p.order_by.is_empty() && p.filters.is_none())
             .and_then(|p| p.limit)
             .unwrap_or(usize::MAX);
 
@@ -333,7 +333,7 @@ impl FuseTable {
     fn is_exact(push_downs: &Option<PushDownInfo>) -> bool {
         push_downs
             .as_ref()
-            .map_or(true, |extra| extra.filter.is_none())
+            .map_or(true, |extra| extra.filters.is_none())
     }
 
     fn all_columns_partitions(
diff --git a/src/query/storages/fuse/src/operations/update.rs b/src/query/storages/fuse/src/operations/update.rs
index 4a740018cee5..a2b765981c83 100644
--- a/src/query/storages/fuse/src/operations/update.rs
+++ b/src/query/storages/fuse/src/operations/update.rs
@@ -15,10 +15,12 @@
 use std::collections::BTreeMap;
 use std::sync::Arc;
 
+use common_catalog::plan::Filters;
 use common_catalog::plan::Projection;
 use common_catalog::table::Table;
 use common_catalog::table_context::TableContext;
 use common_exception::Result;
+use common_expression::type_check::check_function;
 use common_expression::types::NumberDataType;
 use common_expression::FieldIndex;
 use common_expression::RemoteExpr;
@@ -241,14 +243,25 @@ impl FuseTable {
             );
         }
         let remain_reader = Arc::new(remain_reader);
-        let (filter_expr, filter) = if let Some(remote_expr) = filter {
+        let (filter_expr, filters) = if let Some(remote_expr) = filter {
+            let reverted_expr = check_function(
+                None,
+                "not",
+                &[],
+                &[remote_expr.as_expr(&BUILTIN_FUNCTIONS)],
+                &BUILTIN_FUNCTIONS,
+            )?;
+
             (
                 Arc::new(Some(
                     remote_expr
                         .as_expr(&BUILTIN_FUNCTIONS)
                         .project_column_ref(|name| schema.index_of(name).unwrap()),
                 )),
-                Some(remote_expr),
+                Some(Filters {
+                    filter: remote_expr,
+                    inverted_filter: reverted_expr.as_remote_expr(),
+                }),
             )
         } else {
             (Arc::new(None), None)
@@ -257,8 +270,7 @@ impl FuseTable {
         let (parts, part_info) = self
             .do_mutation_block_pruning(
                 ctx.clone(),
-                filter,
-                None,
+                filters,
                 projection,
                 base_snapshot,
                 false,
diff --git a/src/query/storages/fuse/src/pruning/fuse_pruner.rs b/src/query/storages/fuse/src/pruning/fuse_pruner.rs
index aacaca3b1689..a6e0a8a4bf4b 100644
--- a/src/query/storages/fuse/src/pruning/fuse_pruner.rs
+++ b/src/query/storages/fuse/src/pruning/fuse_pruner.rs
@@ -81,15 +81,18 @@ impl PruningContext {
     ) -> Result<Arc<PruningContext>> {
         let func_ctx = ctx.get_function_context()?;
 
-        let filter_expr = push_down
-            .as_ref()
-            .and_then(|extra| extra.filter.as_ref().map(|f| f.as_expr(&BUILTIN_FUNCTIONS)));
+        let filter_expr = push_down.as_ref().and_then(|extra| {
+            extra
+                .filters
+                .as_ref()
+                .map(|f| f.filter.as_expr(&BUILTIN_FUNCTIONS))
+        });
 
         // Limit pruner.
         // if there are ordering/filter clause, ignore limit, even it has been pushed down
         let limit = push_down
             .as_ref()
-            .filter(|p| p.order_by.is_empty() && p.filter.is_none())
+            .filter(|p| p.order_by.is_empty() && p.filters.is_none())
             .and_then(|p| p.limit);
         // prepare the limiter. in case that limit is none, an unlimited limiter will be returned
         let limit_pruner = LimiterPrunerCreator::create(limit);
@@ -378,7 +381,7 @@ impl FusePruner {
         let push_down = self.push_down.clone();
         if push_down
             .as_ref()
-            .filter(|p| !p.order_by.is_empty() && p.limit.is_some() && p.filter.is_none())
+            .filter(|p| !p.order_by.is_empty() && p.limit.is_some() && p.filters.is_none())
             .is_some()
         {
             let schema = self.table_schema.clone();
diff --git a/src/query/storages/hive/hive/src/hive_table.rs b/src/query/storages/hive/hive/src/hive_table.rs
index f033d7e3adcf..afbb29cc3352 100644
--- a/src/query/storages/hive/hive/src/hive_table.rs
+++ b/src/query/storages/hive/hive/src/hive_table.rs
@@ -114,9 +114,9 @@ impl HiveTable {
 
         let filter_expression = push_downs.as_ref().and_then(|extra| {
             extra
-                .filter
+                .filters
                 .as_ref()
-                .map(|expr| expr.as_expr(&BUILTIN_FUNCTIONS))
+                .map(|filter| filter.filter.as_expr(&BUILTIN_FUNCTIONS))
         });
 
         let range_filter = match filter_expression {
@@ -242,7 +242,7 @@ impl HiveTable {
     fn is_simple_select_query(&self, plan: &DataSourcePlan) -> bool {
         // couldn't get groupby order by info
         if let Some(PushDownInfo {
-            filter,
+            filters,
             limit: Some(lm),
             ..
         }) = &plan.push_downs
@@ -253,10 +253,10 @@ impl HiveTable {
 
             // filter out the partition column related expressions
             let partition_keys = self.get_partition_key_sets();
-            let columns = filter
+            let columns = filters
                 .as_ref()
                 .map(|f| {
-                    let expr = f.as_expr(&BUILTIN_FUNCTIONS);
+                    let expr = f.filter.as_expr(&BUILTIN_FUNCTIONS);
                     expr.column_refs().keys().cloned().collect::<HashSet<_>>()
                 })
                 .unwrap_or_default();
@@ -460,9 +460,9 @@ impl HiveTable {
         if let Some(partition_keys) = &self.table_options.partition_keys {
             if !partition_keys.is_empty() {
                 let filter_expression = push_downs.as_ref().and_then(|p| {
-                    p.filter
+                    p.filters
                         .as_ref()
-                        .map(|expr| expr.as_expr(&BUILTIN_FUNCTIONS))
+                        .map(|filter| filter.filter.as_expr(&BUILTIN_FUNCTIONS))
                 });
 
                 return self
diff --git a/src/query/storages/iceberg/src/table.rs b/src/query/storages/iceberg/src/table.rs
index 8468b397d12a..57d9c1a1cd75 100644
--- a/src/query/storages/iceberg/src/table.rs
+++ b/src/query/storages/iceberg/src/table.rs
@@ -209,9 +209,12 @@ impl IcebergTable {
             ErrorCode::ReadTableDataError(format!("Cannot get current data files: {e:?}"))
         })?;
 
-        let filter = push_downs
-            .as_ref()
-            .and_then(|extra| extra.filter.as_ref().map(|f| f.as_expr(&BUILTIN_FUNCTIONS)));
+        let filter = push_downs.as_ref().and_then(|extra| {
+            extra
+                .filters
+                .as_ref()
+                .map(|f| f.filter.as_expr(&BUILTIN_FUNCTIONS))
+        });
 
         let schema = self.schema();
 
diff --git a/src/query/storages/parquet/src/parquet2/parquet_table/partition.rs b/src/query/storages/parquet/src/parquet2/parquet_table/partition.rs
index eaaf5f755f5a..50431a37d017 100644
--- a/src/query/storages/parquet/src/parquet2/parquet_table/partition.rs
+++ b/src/query/storages/parquet/src/parquet2/parquet_table/partition.rs
@@ -78,9 +78,19 @@ impl Parquet2Table {
             project_parquet_schema(&self.arrow_schema, &self.schema_descr, &projection)?;
         let schema = Arc::new(arrow_to_table_schema(projected_arrow_schema));
 
-        let filter = push_down
-            .as_ref()
-            .and_then(|extra| extra.filter.as_ref().map(|f| f.as_expr(&BUILTIN_FUNCTIONS)));
+        let filter = push_down.as_ref().and_then(|extra| {
+            extra
+                .filters
+                .as_ref()
+                .map(|f| f.filter.as_expr(&BUILTIN_FUNCTIONS))
+        });
+
+        let inverted_filter = push_down.as_ref().and_then(|extra| {
+            extra
+                .filters
+                .as_ref()
+                .map(|f| f.inverted_filter.as_expr(&BUILTIN_FUNCTIONS))
+        });
 
         let top_k = top_k.map(|top_k| {
             let offset = projected_column_nodes
@@ -94,11 +104,13 @@ impl Parquet2Table {
         let func_ctx = ctx.get_function_context()?;
 
         let row_group_pruner = if self.read_options.prune_row_groups() {
-            Some(RangePrunerCreator::try_create(
+            let p1 = RangePrunerCreator::try_create(func_ctx.clone(), &schema, filter.as_ref())?;
+            let p2 = RangePrunerCreator::try_create(
                 func_ctx.clone(),
                 &schema,
-                filter.as_ref(),
-            )?)
+                inverted_filter.as_ref(),
+            )?;
+            Some((p1, p2))
         } else {
             None
         };
diff --git a/src/query/storages/parquet/src/parquet2/pruning.rs b/src/query/storages/parquet/src/parquet2/pruning.rs
index cbf7565df367..4c3fa5365ca3 100644
--- a/src/query/storages/parquet/src/parquet2/pruning.rs
+++ b/src/query/storages/parquet/src/parquet2/pruning.rs
@@ -62,8 +62,11 @@ pub struct PartitionPruner {
     pub schema: TableSchemaRef,
     pub schema_descr: SchemaDescriptor,
     pub schema_from: String,
-    /// Pruner to prune row groups.
-    pub row_group_pruner: Option<Arc<dyn RangePruner + Send + Sync>>,
+    /// Pruner to prune row groups. (filter & inverted filter)
+    pub row_group_pruner: Option<(
+        Arc<dyn RangePruner + Send + Sync>,
+        Arc<dyn RangePruner + Send + Sync>,
+    )>,
     /// Pruners to prune pages.
     pub page_pruners: Option<ColumnRangePruners>,
     /// The projected column indices.
@@ -120,7 +123,7 @@ impl PartitionPruner {
         let row_group_stats = if no_stats {
             None
         } else if self.row_group_pruner.is_some() && !self.skip_pruning {
-            let pruner = self.row_group_pruner.as_ref().unwrap();
+            let (pruner, _) = self.row_group_pruner.as_ref().unwrap();
             // If collecting stats fails or `should_keep` is true, we still read the row group.
             // Otherwise, the row group will be pruned.
             if let Ok(row_group_stats) =
diff --git a/src/query/storages/parquet/src/parquet_rs/parquet_reader/reader.rs b/src/query/storages/parquet/src/parquet_rs/parquet_reader/reader.rs
index 03bbfe6ea97c..51e72d9602e4 100644
--- a/src/query/storages/parquet/src/parquet_rs/parquet_reader/reader.rs
+++ b/src/query/storages/parquet/src/parquet_rs/parquet_reader/reader.rs
@@ -33,6 +33,8 @@ use common_expression::TableSchema;
 use common_expression::TableSchemaRef;
 use common_expression::TopKSorter;
 use common_functions::BUILTIN_FUNCTIONS;
+use common_storage::metrics::common::metrics_inc_omit_filter_rowgroups;
+use common_storage::metrics::common::metrics_inc_omit_filter_rows;
 use futures::StreamExt;
 use opendal::Operator;
 use opendal::Reader;
@@ -242,30 +244,41 @@ impl ParquetRSReader {
         .with_projection(self.projection.clone())
         .with_batch_size(self.batch_size);
 
-        // Prune row groups.
-        let file_meta = builder.metadata();
+        let mut full_match = false;
+
+        let file_meta = builder.metadata().clone();
 
+        // Prune row groups.
         if let Some(pruner) = &self.pruner {
-            let selected_row_groups = pruner.prune_row_groups(file_meta, None)?;
-            let row_selection = pruner.prune_pages(file_meta, &selected_row_groups)?;
+            let (selected_row_groups, omits) = pruner.prune_row_groups(&file_meta, None)?;
+            full_match = omits.iter().all(|x| *x);
+            builder = builder.with_row_groups(selected_row_groups.clone());
 
-            builder = builder.with_row_groups(selected_row_groups);
-            if let Some(row_selection) = row_selection {
-                builder = builder.with_row_selection(row_selection);
+            if !full_match {
+                let row_selection = pruner.prune_pages(&file_meta, &selected_row_groups)?;
+
+                if let Some(row_selection) = row_selection {
+                    builder = builder.with_row_selection(row_selection);
+                }
+            } else {
+                metrics_inc_omit_filter_rowgroups(file_meta.num_row_groups() as u64);
+                metrics_inc_omit_filter_rows(file_meta.file_metadata().num_rows() as u64);
             }
         }
 
-        if let Some(predicate) = self.predicate.as_ref() {
-            let projection = predicate.projection().clone();
-            let predicate = predicate.clone();
-            let predicate_fn = move |batch| {
-                predicate
-                    .evaluate(&batch)
-                    .map_err(|e| ArrowError::from_external_error(Box::new(e)))
-            };
-            builder = builder.with_row_filter(RowFilter::new(vec![Box::new(
-                ArrowPredicateFn::new(projection, predicate_fn),
-            )]));
+        if !full_match {
+            if let Some(predicate) = self.predicate.as_ref() {
+                let projection = predicate.projection().clone();
+                let predicate = predicate.clone();
+                let predicate_fn = move |batch| {
+                    predicate
+                        .evaluate(&batch)
+                        .map_err(|e| ArrowError::from_external_error(Box::new(e)))
+                };
+                builder = builder.with_row_filter(RowFilter::new(vec![Box::new(
+                    ArrowPredicateFn::new(projection, predicate_fn),
+                )]));
+            }
         }
 
         Ok(builder.build()?)
@@ -319,29 +332,40 @@ impl ParquetRSReader {
         .with_batch_size(self.batch_size);
 
         // Prune row groups.
-        let file_meta = builder.metadata();
+        let file_meta = builder.metadata().clone();
 
+        let mut full_match = false;
         if let Some(pruner) = &self.pruner {
-            let selected_row_groups = pruner.prune_row_groups(file_meta, None)?;
-            let row_selection = pruner.prune_pages(file_meta, &selected_row_groups)?;
+            let (selected_row_groups, omits) = pruner.prune_row_groups(&file_meta, None)?;
+
+            full_match = omits.iter().all(|x| *x);
+            builder = builder.with_row_groups(selected_row_groups.clone());
 
-            builder = builder.with_row_groups(selected_row_groups);
-            if let Some(row_selection) = row_selection {
-                builder = builder.with_row_selection(row_selection);
+            if !full_match {
+                let row_selection = pruner.prune_pages(&file_meta, &selected_row_groups)?;
+
+                if let Some(row_selection) = row_selection {
+                    builder = builder.with_row_selection(row_selection);
+                }
+            } else {
+                metrics_inc_omit_filter_rowgroups(file_meta.num_row_groups() as u64);
+                metrics_inc_omit_filter_rows(file_meta.file_metadata().num_rows() as u64);
             }
         }
 
-        if let Some(predicate) = self.predicate.as_ref() {
-            let projection = predicate.projection().clone();
-            let predicate = predicate.clone();
-            let predicate_fn = move |batch| {
-                predicate
-                    .evaluate(&batch)
-                    .map_err(|e| ArrowError::from_external_error(Box::new(e)))
-            };
-            builder = builder.with_row_filter(RowFilter::new(vec![Box::new(
-                ArrowPredicateFn::new(projection, predicate_fn),
-            )]));
+        if !full_match {
+            if let Some(predicate) = self.predicate.as_ref() {
+                let projection = predicate.projection().clone();
+                let predicate = predicate.clone();
+                let predicate_fn = move |batch| {
+                    predicate
+                        .evaluate(&batch)
+                        .map_err(|e| ArrowError::from_external_error(Box::new(e)))
+                };
+                builder = builder.with_row_filter(RowFilter::new(vec![Box::new(
+                    ArrowPredicateFn::new(projection, predicate_fn),
+                )]));
+            }
         }
 
         let reader = builder.build()?;
@@ -385,13 +409,23 @@ impl ParquetRSReader {
         });
         // TODO(parquet): cache deserilaized columns to avoid deserialize multiple times.
         let mut row_group = InMemoryRowGroup::new(&part.meta, page_locations.as_deref());
+
         let mut selection = part
             .selectors
             .as_ref()
             .map(|x| x.iter().map(RowSelector::from).collect::<Vec<_>>())
             .map(RowSelection::from);
 
-        if let Some(predicate) = &self.predicate {
+        let mut predicate = self.predicate.as_ref();
+        if part.omit_filter {
+            predicate = None;
+            selection = None;
+
+            metrics_inc_omit_filter_rowgroups(1);
+            metrics_inc_omit_filter_rows(row_group.row_count() as u64);
+        }
+
+        if let Some(predicate) = predicate {
             // Fetch columns used for eval predicate (prewhere).
             row_group
                 .fetch(
diff --git a/src/query/storages/parquet/src/parquet_rs/parquet_reader/row_group.rs b/src/query/storages/parquet/src/parquet_rs/parquet_reader/row_group.rs
index e95d0c849345..0ac6d50e2288 100644
--- a/src/query/storages/parquet/src/parquet_rs/parquet_reader/row_group.rs
+++ b/src/query/storages/parquet/src/parquet_rs/parquet_reader/row_group.rs
@@ -109,6 +109,10 @@ impl<'a> InMemoryRowGroup<'a> {
         }
     }
 
+    pub fn row_count(&self) -> usize {
+        self.row_count
+    }
+
     /// Fetches the necessary column data into memory
     ///
     /// If call `fetch` multiple times, it will only fetch the data that has not been fetched.
diff --git a/src/query/storages/parquet/src/parquet_rs/parquet_table/partition.rs b/src/query/storages/parquet/src/parquet_rs/parquet_table/partition.rs
index 54c33d98d549..546b019564cb 100644
--- a/src/query/storages/parquet/src/parquet_rs/parquet_table/partition.rs
+++ b/src/query/storages/parquet/src/parquet_rs/parquet_table/partition.rs
@@ -356,11 +356,16 @@ fn prune_and_generate_partitions(
             ..
         } = meta.as_ref();
         part_stats.partitions_total += meta.num_row_groups();
-        let rgs = pruner.prune_row_groups(meta, row_group_level_stats.as_deref())?;
-        let mut row_selections = pruner.prune_pages(meta, &rgs)?;
+        let (rgs, omits) = pruner.prune_row_groups(meta, row_group_level_stats.as_deref())?;
+        let mut row_selections = if omits.iter().all(|x| *x) {
+            None
+        } else {
+            pruner.prune_pages(meta, &rgs)?
+        };
+
         let mut rows_read = 0; // Rows read in current file.
 
-        for rg in rgs {
+        for (rg, omit) in rgs.into_iter().zip(omits.into_iter()) {
             let rg_meta = meta.row_group(rg);
             let num_rows = rg_meta.num_rows() as usize;
             // Split rows belonging to current row group.
@@ -412,6 +417,7 @@ fn prune_and_generate_partitions(
                 compressed_size,
                 uncompressed_size,
                 sort_min_max,
+                omit_filter: omit,
             });
         }
 
diff --git a/src/query/storages/parquet/src/parquet_rs/partition.rs b/src/query/storages/parquet/src/parquet_rs/partition.rs
index 1e152583b4be..517398585d83 100644
--- a/src/query/storages/parquet/src/parquet_rs/partition.rs
+++ b/src/query/storages/parquet/src/parquet_rs/partition.rs
@@ -100,6 +100,7 @@ pub struct ParquetRSRowGroupPart {
     pub uncompressed_size: u64,
     pub compressed_size: u64,
     pub sort_min_max: Option<(Scalar, Scalar)>,
+    pub omit_filter: bool,
 }
 
 impl Eq for ParquetRSRowGroupPart {}
diff --git a/src/query/storages/parquet/src/parquet_rs/pruning.rs b/src/query/storages/parquet/src/parquet_rs/pruning.rs
index 6210085c302e..ffcda9935aeb 100644
--- a/src/query/storages/parquet/src/parquet_rs/pruning.rs
+++ b/src/query/storages/parquet/src/parquet_rs/pruning.rs
@@ -38,7 +38,10 @@ use crate::parquet_rs::statistics::convert_index_to_column_statistics;
 /// We can use this pruner to compute row groups and pages to skip.
 pub struct ParquetRSPruner {
     leaf_fields: Arc<Vec<TableField>>,
-    range_pruner: Option<Arc<dyn RangePruner + Send + Sync>>,
+    range_pruner: Option<(
+        Arc<dyn RangePruner + Send + Sync>,
+        Arc<dyn RangePruner + Send + Sync>,
+    )>,
     prune_row_groups: bool,
     prune_pages: bool,
 
@@ -55,16 +58,19 @@ impl ParquetRSPruner {
         options: ParquetReadOptions,
     ) -> Result<Self> {
         // Build `RangePruner` by `filter`.
-        let filter = push_down
-            .as_ref()
-            .and_then(|p| p.filter.as_ref().map(|f| f.as_expr(&BUILTIN_FUNCTIONS)));
+        let filter = push_down.as_ref().and_then(|p| p.filters.as_ref());
 
         let mut predicate_columns = vec![];
         let range_pruner =
             if filter.is_some() && (options.prune_row_groups() || options.prune_pages()) {
-                predicate_columns = filter
+                let filter_expr = filter.as_ref().unwrap().filter.as_expr(&BUILTIN_FUNCTIONS);
+                let inverted_filter_expr = filter
                     .as_ref()
                     .unwrap()
+                    .inverted_filter
+                    .as_expr(&BUILTIN_FUNCTIONS);
+
+                predicate_columns = filter_expr
                     .column_refs()
                     .into_keys()
                     .map(|name| {
@@ -75,8 +81,11 @@ impl ParquetRSPruner {
                     })
                     .collect::<Vec<_>>();
                 predicate_columns.sort();
-                let pruner = RangePrunerCreator::try_create(func_ctx, &schema, filter.as_ref())?;
-                Some(pruner)
+                let pruner =
+                    RangePrunerCreator::try_create(func_ctx.clone(), &schema, Some(&filter_expr))?;
+                let inverted_pruner =
+                    RangePrunerCreator::try_create(func_ctx, &schema, Some(&inverted_filter_expr))?;
+                Some((pruner, inverted_pruner))
             } else {
                 None
             };
@@ -92,28 +101,36 @@ impl ParquetRSPruner {
 
     /// Prune row groups of a parquet file.
     ///
-    /// Return the selected row groups' indices in the meta.
+    /// Return the selected row groups' indices in the meta and omit filter flags.
     ///
     /// If `stats` is not [None], we use this statistics to prune but not collect again.
     pub fn prune_row_groups(
         &self,
         meta: &ParquetMetaData,
         stats: Option<&[StatisticsOfColumns]>,
-    ) -> Result<Vec<usize>> {
+    ) -> Result<(Vec<usize>, Vec<bool>)> {
+        let default_selection = (0..meta.num_row_groups()).collect();
+        let default_omits = vec![false; meta.num_row_groups()];
         if !self.prune_row_groups {
-            return Ok((0..meta.num_row_groups()).collect());
+            return Ok((default_selection, default_omits));
         }
+
         match &self.range_pruner {
-            None => Ok((0..meta.num_row_groups()).collect()),
-            Some(pruner) => {
+            None => Ok((default_selection, default_omits)),
+
+            Some((pruner, inverted_pruner)) => {
                 let mut selection = Vec::with_capacity(meta.num_row_groups());
+                let mut omits = Vec::with_capacity(meta.num_row_groups());
                 if let Some(row_group_stats) = stats {
                     for (i, row_group) in row_group_stats.iter().enumerate() {
                         if pruner.should_keep(row_group, None) {
                             selection.push(i);
+
+                            let omit = !inverted_pruner.should_keep(row_group, None);
+                            omits.push(omit);
                         }
                     }
-                    Ok(selection)
+                    Ok((selection, omits))
                 } else if let Some(row_group_stats) = collect_row_group_stats(
                     meta.row_groups(),
                     &self.leaf_fields,
@@ -122,11 +139,14 @@ impl ParquetRSPruner {
                     for (i, row_group) in row_group_stats.iter().enumerate() {
                         if pruner.should_keep(row_group, None) {
                             selection.push(i);
+
+                            let omit = !inverted_pruner.should_keep(row_group, None);
+                            omits.push(omit);
                         }
                     }
-                    Ok(selection)
+                    Ok((selection, omits))
                 } else {
-                    Ok((0..meta.num_row_groups()).collect())
+                    Ok((default_selection, default_omits))
                 }
             }
         }
@@ -145,7 +165,7 @@ impl ParquetRSPruner {
         }
         match &self.range_pruner {
             None => Ok(None),
-            Some(pruner) => {
+            Some((pruner, _)) => {
                 // Only if the file has page level statistics, we can use them to prune.
                 if meta.column_index().is_none() || meta.offset_index().is_none() {
                     return Ok(None);
diff --git a/src/query/storages/system/src/columns_table.rs b/src/query/storages/system/src/columns_table.rs
index 1dd15dcde79a..6e3e9f7a4ca3 100644
--- a/src/query/storages/system/src/columns_table.rs
+++ b/src/query/storages/system/src/columns_table.rs
@@ -155,7 +155,7 @@ impl ColumnsTable {
         let mut databases = Vec::new();
 
         if let Some(push_downs) = push_downs {
-            if let Some(filter) = push_downs.filter {
+            if let Some(filter) = push_downs.filters.as_ref().map(|f| &f.filter) {
                 let expr = filter.as_expr(&BUILTIN_FUNCTIONS);
                 find_eq_filter(&expr, &mut |col_name, scalar| {
                     if col_name == "database" {
diff --git a/src/query/storages/system/src/tables_table.rs b/src/query/storages/system/src/tables_table.rs
index cf1dc720389b..82282abfe815 100644
--- a/src/query/storages/system/src/tables_table.rs
+++ b/src/query/storages/system/src/tables_table.rs
@@ -124,7 +124,7 @@ where TablesTable<T>: HistoryAware
             let mut dbs = Vec::new();
             if let Some(push_downs) = &push_downs {
                 let mut db_name = Vec::new();
-                if let Some(filter) = &push_downs.filter {
+                if let Some(filter) = push_downs.filters.as_ref().map(|f| &f.filter) {
                     let expr = filter.as_expr(&BUILTIN_FUNCTIONS);
                     find_eq_filter(&expr, &mut |col_name, scalar| {
                         if col_name == "database" {
diff --git a/tests/sqllogictests/suites/base/03_common/03_0025_delete_from b/tests/sqllogictests/suites/base/03_common/03_0025_delete_from
index 6804f58a1cf1..692e4f549964 100644
--- a/tests/sqllogictests/suites/base/03_common/03_0025_delete_from
+++ b/tests/sqllogictests/suites/base/03_common/03_0025_delete_from
@@ -114,6 +114,17 @@ select count(*) = 0 from t
 ----
 1
 
+statement ok
+insert into t values (1), (NULL)
+
+statement ok
+delete from t where c >= 1
+
+
+query T
+select * from t
+----
+NULL
 
 
 statement ok