diff --git a/Cargo.lock b/Cargo.lock
index 521ff607751d..d78802ff087b 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2920,7 +2920,6 @@ dependencies = [
"common-pipeline-sources",
"common-sql",
"common-storage",
- "databend-query",
"ethnum",
"futures",
"log",
@@ -3890,6 +3889,7 @@ dependencies = [
"ordered-float 3.7.0",
"p256 0.13.0",
"parking_lot 0.12.1",
+ "parquet",
"paste",
"petgraph",
"pin-project-lite",
diff --git a/docs/doc/13-sql-reference/99-ansi-sql.md b/docs/doc/13-sql-reference/99-ansi-sql.md
index 25beb5c1e079..7967c20450a6 100644
--- a/docs/doc/13-sql-reference/99-ansi-sql.md
+++ b/docs/doc/13-sql-reference/99-ansi-sql.md
@@ -95,7 +95,7 @@ Databend aims to conform to the SQL standard, with particular support for ISO/IE
| E121-17 | WITH HOLD cursors | No | |
| **E131** | **Null value support (nulls in lieu of values)** | Yes | |
| **E141** | **Basic integrity constraints** | No | |
-| E141-01 | NOT NULL constraints | Yes | Default in Databend: All columns are non-nullable (NOT NULL). |
+| E141-01 | NOT NULL constraints | Yes | Default in Databend: All columns are nullable. |
| E141-02 | UNIQUE constraint of NOT NULL columns | No | |
| E141-03 | PRIMARY KEY constraints | No | |
| E141-04 | Basic FOREIGN KEY constraint with the NO ACTION default for both referential delete action and referential update action | No | |
diff --git a/docs/doc/14-sql-commands/00-ddl/50-udf/_category_.json b/docs/doc/14-sql-commands/00-ddl/50-udf/_category_.json
index fccc400f3f6b..0b9b999caf48 100644
--- a/docs/doc/14-sql-commands/00-ddl/50-udf/_category_.json
+++ b/docs/doc/14-sql-commands/00-ddl/50-udf/_category_.json
@@ -1,7 +1,3 @@
{
- "label": "User-Defined Function",
- "link": {
- "type": "generated-index",
- "slug": "/sql-commands/ddl/udf"
- }
+ "label": "User-Defined Function"
}
\ No newline at end of file
diff --git a/docs/doc/14-sql-commands/00-ddl/50-udf/ddl-alter-function.md b/docs/doc/14-sql-commands/00-ddl/50-udf/ddl-alter-function.md
index 1a727c2b02d2..a196a7689aa9 100644
--- a/docs/doc/14-sql-commands/00-ddl/50-udf/ddl-alter-function.md
+++ b/docs/doc/14-sql-commands/00-ddl/50-udf/ddl-alter-function.md
@@ -3,22 +3,33 @@ title: ALTER FUNCTION
description:
Modifies the properties for an existing user-defined function.
---
+import FunctionDescription from '@site/src/components/FunctionDescription';
+
+
+
+Alters a user-defined function.
## Syntax
```sql
-CREATE FUNCTION AS ([ argname ]) -> ''
+-- Alter UDF created with lambda expression
+ALTER FUNCTION [IF NOT EXISTS]
+ AS () ->
+ [DESC='']
+
+-- Alter UDF created with UDF server
+ALTER FUNCTION [IF NOT EXISTS]
+ AS () RETURNS LANGUAGE
+ HANDLER = '' ADDRESS = ''
+ [DESC='']
```
## Examples
```sql
+CREATE FUNCTION a_plus_3 AS (a) -> a+3+3;
ALTER FUNCTION a_plus_3 AS (a) -> a+3;
-SELECT a_plus_3(2);
-+---------+
-| (2 + 3) |
-+---------+
-| 5 |
-+---------+
-```
+CREATE FUNCTION gcd (INT, INT) RETURNS INT LANGUAGE python HANDLER = 'gcd' ADDRESS = 'http://0.0.0.0:8815';
+ALTER FUNCTION gcd (INT, INT) RETURNS INT LANGUAGE python HANDLER = 'gcd_new' ADDRESS = 'http://0.0.0.0:8815';
+```
\ No newline at end of file
diff --git a/docs/doc/14-sql-commands/00-ddl/50-udf/ddl-create-function.md b/docs/doc/14-sql-commands/00-ddl/50-udf/ddl-create-function.md
index 6e3d91a01412..303eff27943b 100644
--- a/docs/doc/14-sql-commands/00-ddl/50-udf/ddl-create-function.md
+++ b/docs/doc/14-sql-commands/00-ddl/50-udf/ddl-create-function.md
@@ -3,20 +3,44 @@ title: CREATE FUNCTION
description:
Create a new user-defined scalar function.
---
+import FunctionDescription from '@site/src/components/FunctionDescription';
+
-## CREATE FUNCTION
-
-Creates a new UDF (user-defined function), the UDF can contain an SQL expression.
+Creates a user-defined function.
## Syntax
```sql
-CREATE FUNCTION [ IF NOT EXISTS ] AS ([ argname ]) -> ''
+-- Create with lambda expression
+CREATE FUNCTION [IF NOT EXISTS]
+ AS () ->
+ [DESC='']
+
+
+-- Create with UDF server
+CREATE FUNCTION [IF NOT EXISTS]
+ AS () RETURNS LANGUAGE
+ HANDLER = '' ADDRESS = ''
+ [DESC='']
```
+| Parameter | Description |
+|-----------------------|---------------------------------------------------------------------------------------------------|
+| `` | The name of the function. |
+| `` | The lambda expression or code snippet defining the function's behavior. |
+| `DESC=''` | Description of the UDF.|
+| `<`| A list of input parameter names. Separated by comma.|
+| `<`| A list of input parameter types. Separated by comma.|
+| `` | The return type of the function. |
+| `LANGUAGE` | Specifies the language used to write the function. Available values: `python`. |
+| `HANDLER = ''` | Specifies the name of the function's handler. |
+| `ADDRESS = ''` | Specifies the address of the UDF server. |
+
## Examples
+### Creating UDF with Lambda Expression
+
```sql
CREATE FUNCTION a_plus_3 AS (a) -> a+3;
@@ -53,3 +77,89 @@ DROP FUNCTION get_v2;
DROP TABLE json_table;
```
+
+### Creating UDF with UDF Server (Python)
+
+This example demonstrates how to enable and configure a UDF server in Python:
+
+1. Enable UDF server support by adding the following parameters to the [query] section in the [databend-query.toml](https://github.com/datafuselabs/databend/blob/main/scripts/distribution/configs/databend-query.toml) configuration file.
+
+```toml title='databend-query.toml'
+[query]
+...
+enable_udf_server = true
+# List the allowed UDF server addresses, separating multiple addresses with commas.
+# For example, ['http://0.0.0.0:8815', 'http://example.com']
+udf_server_allow_list = ['http://0.0.0.0:8815']
+...
+```
+
+2. Define your function. This code defines and runs a UDF server in Python, which exposes a custom function *gcd* for calculating the greatest common divisor of two integers and allows remote execution of this function:
+
+:::note
+The SDK package is not yet available. Prior to its release, please download the 'udf.py' file from https://github.com/datafuselabs/databend/blob/main/tests/udf-server/udf.py and ensure it is saved in the same directory as this Python script. This step is essential for the code to function correctly.
+:::
+
+```python title='udf_server.py'
+from udf import *
+
+@udf(
+ input_types=["INT", "INT"],
+ result_type="INT",
+ skip_null=True,
+)
+def gcd(x: int, y: int) -> int:
+ while y != 0:
+ (x, y) = (y, x % y)
+ return x
+
+if __name__ == '__main__':
+ # create a UDF server listening at '0.0.0.0:8815'
+ server = UdfServer("0.0.0.0:8815")
+ # add defined functions
+ server.add_function(gcd)
+ # start the UDF server
+ server.serve()
+```
+
+`@udf` is a decorator used for defining UDFs in Databend, supporting the following parameters:
+
+| Parameter | Description |
+|--------------|-----------------------------------------------------------------------------------------------------|
+| input_types | A list of strings or Arrow data types that specify the input data types. |
+| result_type | A string or an Arrow data type that specifies the return value type. |
+| name | An optional string specifying the function name. If not provided, the original name will be used. |
+| io_threads | Number of I/O threads used per data chunk for I/O bound functions. |
+| skip_null | A boolean value specifying whether to skip NULL values. If set to True, NULL values will not be passed to the function, and the corresponding return value is set to NULL. Default is False. |
+
+This table illustrates the correspondence between Databend data types and their corresponding Python equivalents:
+
+| Databend Type | Python Type |
+|-----------------------|-----------------------|
+| BOOLEAN | bool |
+| TINYINT (UNSIGNED) | int |
+| SMALLINT (UNSIGNED) | int |
+| INT (UNSIGNED) | int |
+| BIGINT (UNSIGNED) | int |
+| FLOAT | float |
+| DOUBLE | float |
+| DECIMAL | decimal.Decimal |
+| DATE | datetime.date |
+| TIMESTAMP | datetime.datetime |
+| VARCHAR | str |
+| VARIANT | any |
+| MAP(K,V) | dict |
+| ARRAY(T) | list[T] |
+| TUPLE(T...) | tuple(T...) |
+
+3. Run the Python file to start the UDF server:
+
+```shell
+python3 udf_server.py
+```
+
+4. Register the function *gcd* with the [CREATE FUNCTION](ddl-create-function.md) in Databend:
+
+```sql
+CREATE FUNCTION gcd (INT, INT) RETURNS INT LANGUAGE python HANDLER = 'gcd' ADDRESS = 'http://0.0.0.0:8815';
+```
\ No newline at end of file
diff --git a/docs/doc/14-sql-commands/00-ddl/50-udf/ddl-drop-function.md b/docs/doc/14-sql-commands/00-ddl/50-udf/ddl-drop-function.md
index b93365d5f630..5650295b770d 100644
--- a/docs/doc/14-sql-commands/00-ddl/50-udf/ddl-drop-function.md
+++ b/docs/doc/14-sql-commands/00-ddl/50-udf/ddl-drop-function.md
@@ -4,12 +4,12 @@ description:
Drop an existing user-defined function.
---
-Drop an existing user-defined function.
+Drops a user-defined function.
## Syntax
```sql
-DROP FUNCTION [IF EXISTS]
+DROP FUNCTION [IF EXISTS]
```
## Examples
@@ -19,4 +19,4 @@ DROP FUNCTION a_plus_3;
SELECT a_plus_3(2);
ERROR 1105 (HY000): Code: 2602, Text = Unknown Function a_plus_3 (while in analyze select projection).
-```
+```
\ No newline at end of file
diff --git a/docs/doc/14-sql-commands/00-ddl/50-udf/index.md b/docs/doc/14-sql-commands/00-ddl/50-udf/index.md
new file mode 100644
index 000000000000..27ceaed90510
--- /dev/null
+++ b/docs/doc/14-sql-commands/00-ddl/50-udf/index.md
@@ -0,0 +1,125 @@
+---
+title: User-Defined Function
+---
+import IndexOverviewList from '@site/src/components/IndexOverviewList';
+
+## What are UDFs?
+
+User-Defined Functions (UDFs) enable you to define their own custom operations to process data within Databend. They are typically written using lambda expressions or implemented via a UDF server with programming languages such as Python and are executed as part of Databend's query processing pipeline. Advantages of using UDFs include:
+
+- Customized Data Transformations: UDFs empower you to perform data transformations that may not be achievable through built-in Databend functions alone. This customization is particularly valuable for handling unique data formats or business logic.
+
+- Performance Optimization: UDFs provide the flexibility to define and fine-tune your own custom functions, enabling you to optimize data processing to meet precise performance requirements. This means you can tailor the code for maximum efficiency, ensuring that your data processing tasks run as efficiently as possible.
+
+- Code Reusability: UDFs can be reused across multiple queries, saving time and effort in coding and maintaining data processing logic.
+
+## Managing UDFs
+
+To manage UDFs in Databend, use the following commands:
+
+
+
+## Usage Examples
+
+This section demonstrates two UDF implementation methods within Databend: one by creating UDFs with lambda expressions and the other by utilizing UDF servers in conjunction with Python. For additional examples of defining UDFs in various programming languages, see [CREATE FUNCTION](ddl-create-function.md).
+
+### UDF Implementation with Lambda Expression
+
+This example implements a UDF named *a_plus_3* using a lambda expression:
+
+```sql
+CREATE FUNCTION a_plus_3 AS (a) -> a+3;
+
+SELECT a_plus_3(2);
++---------+
+| (2 + 3) |
++---------+
+| 5 |
++---------+
+```
+
+### UDF Implementation via UDF Server
+
+This example demonstrates how to enable and configure a UDF server in Python:
+
+1. Enable UDF server support by adding the following parameters to the [query] section in the [databend-query.toml](https://github.com/datafuselabs/databend/blob/main/scripts/distribution/configs/databend-query.toml) configuration file.
+
+```toml title='databend-query.toml'
+[query]
+...
+enable_udf_server = true
+# List the allowed UDF server addresses, separating multiple addresses with commas.
+# For example, ['http://0.0.0.0:8815', 'http://example.com']
+udf_server_allow_list = ['http://0.0.0.0:8815']
+...
+```
+
+2. Define your function. This code defines and runs a UDF server in Python, which exposes a custom function *gcd* for calculating the greatest common divisor of two integers and allows remote execution of this function:
+
+:::note
+The SDK package is not yet available. Prior to its release, please download the 'udf.py' file from https://github.com/datafuselabs/databend/blob/main/tests/udf-server/udf.py and ensure it is saved in the same directory as this Python script. This step is essential for the code to function correctly.
+:::
+
+```python title='udf_server.py'
+from udf import *
+
+@udf(
+ input_types=["INT", "INT"],
+ result_type="INT",
+ skip_null=True,
+)
+def gcd(x: int, y: int) -> int:
+ while y != 0:
+ (x, y) = (y, x % y)
+ return x
+
+if __name__ == '__main__':
+ # create a UDF server listening at '0.0.0.0:8815'
+ server = UdfServer("0.0.0.0:8815")
+ # add defined functions
+ server.add_function(gcd)
+ # start the UDF server
+ server.serve()
+```
+
+`@udf` is a decorator used for defining UDFs in Databend, supporting the following parameters:
+
+| Parameter | Description |
+|--------------|-----------------------------------------------------------------------------------------------------|
+| input_types | A list of strings or Arrow data types that specify the input data types. |
+| result_type | A string or an Arrow data type that specifies the return value type. |
+| name | An optional string specifying the function name. If not provided, the original name will be used. |
+| io_threads | Number of I/O threads used per data chunk for I/O bound functions. |
+| skip_null | A boolean value specifying whether to skip NULL values. If set to True, NULL values will not be passed to the function, and the corresponding return value is set to NULL. Default is False. |
+
+This table illustrates the correspondence between Databend data types and their corresponding Python equivalents:
+
+| Databend Type | Python Type |
+|-----------------------|-----------------------|
+| BOOLEAN | bool |
+| TINYINT (UNSIGNED) | int |
+| SMALLINT (UNSIGNED) | int |
+| INT (UNSIGNED) | int |
+| BIGINT (UNSIGNED) | int |
+| FLOAT | float |
+| DOUBLE | float |
+| DECIMAL | decimal.Decimal |
+| DATE | datetime.date |
+| TIMESTAMP | datetime.datetime |
+| VARCHAR | str |
+| VARIANT | any |
+| MAP(K,V) | dict |
+| ARRAY(T) | list[T] |
+| TUPLE(T...) | tuple(T...) |
+
+3. Run the Python file to start the UDF server:
+
+```shell
+python3 udf_server.py
+```
+
+4. Register the function *gcd* with the [CREATE FUNCTION](ddl-create-function.md) in Databend:
+
+```sql
+CREATE FUNCTION gcd (INT, INT) RETURNS INT LANGUAGE python HANDLER = 'gcd' ADDRESS = 'http://0.0.0.0:8815';
+```
\ No newline at end of file
diff --git a/docs/doc/14-sql-commands/10-dml/dml-copy-into-table.md b/docs/doc/14-sql-commands/10-dml/dml-copy-into-table.md
index 5448cf61f869..53f14d49bc95 100644
--- a/docs/doc/14-sql-commands/10-dml/dml-copy-into-table.md
+++ b/docs/doc/14-sql-commands/10-dml/dml-copy-into-table.md
@@ -184,10 +184,14 @@ externalLocation ::=
Specify a list of one or more files names (separated by commas) to be loaded.
-### PATTERN = 'regex_pattern'
+### PATTERN = ''
A [PCRE2](https://www.pcre.org/current/doc/html/)-based regular expression pattern string, enclosed in single quotes, specifying the file names to match. Click [here](#loading-data-with-pattern-matching) to see an example. For PCRE2 syntax, see http://www.pcre.org/current/doc/html/pcre2syntax.html.
+:::note
+Suppose there is a file `@//`, to include it, `` needs to match `^$`.
+:::
+
### FILE_FORMAT
See [Input & Output File Formats](../../13-sql-reference/50-file-format-options.md).
diff --git a/docs/doc/15-sql-functions/112-table-functions/list_stage.md b/docs/doc/15-sql-functions/112-table-functions/list_stage.md
index e5ddaeaff1ef..9843e95ea0d7 100644
--- a/docs/doc/15-sql-functions/112-table-functions/list_stage.md
+++ b/docs/doc/15-sql-functions/112-table-functions/list_stage.md
@@ -36,10 +36,15 @@ externalStage ::= @[/]
userStage ::= @~[/]
```
+### PATTERN
+
+See [COPY INTO table](/14-sql-commands/10-dml/dml-copy-into-table.md).
+
+
## Examples
```sql
-SELECT * FROM list_stage(location => '@my_stage/', pattern => '.log');
+SELECT * FROM list_stage(location => '@my_stage/', pattern => '.*[.]log');
+----------------+------+------------------------------------+-------------------------------+---------+
| name | size | md5 | last_modified | creator |
+----------------+------+------------------------------------+-------------------------------+---------+
diff --git a/scripts/setup/rust-tools.txt b/scripts/setup/rust-tools.txt
index 1df282bb138f..5d68657a1398 100644
--- a/scripts/setup/rust-tools.txt
+++ b/scripts/setup/rust-tools.txt
@@ -2,3 +2,4 @@ cargo-audit@0.17.6
cargo-machete@0.5.0
taplo-cli@0.8.1
typos-cli@1.16.3
+nextest@0.9.58
diff --git a/src/query/storages/parquet/tests/it/main.rs b/src/common/storage/src/metrics/common.rs
similarity index 56%
rename from src/query/storages/parquet/tests/it/main.rs
rename to src/common/storage/src/metrics/common.rs
index bff09cbf2b3c..2596ceaf82b7 100644
--- a/src/query/storages/parquet/tests/it/main.rs
+++ b/src/common/storage/src/metrics/common.rs
@@ -12,4 +12,19 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-mod parquet_rs;
+use common_metrics::register_counter;
+use common_metrics::Counter;
+use lazy_static::lazy_static;
+
+lazy_static! {
+ static ref OMIT_FILTER_ROWGROUPS: Counter = register_counter("omit_filter_rowgroups");
+ static ref OMIT_FILTER_ROWS: Counter = register_counter("omit_filter_rows");
+}
+
+pub fn metrics_inc_omit_filter_rowgroups(c: u64) {
+ OMIT_FILTER_ROWGROUPS.inc_by(c);
+}
+
+pub fn metrics_inc_omit_filter_rows(c: u64) {
+ OMIT_FILTER_ROWS.inc_by(c);
+}
diff --git a/src/common/storage/src/metrics/mod.rs b/src/common/storage/src/metrics/mod.rs
index 890e46a7413d..7d5d075ca8d4 100644
--- a/src/common/storage/src/metrics/mod.rs
+++ b/src/common/storage/src/metrics/mod.rs
@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+pub mod common;
pub mod copy;
pub mod merge_into;
mod storage_metrics;
diff --git a/src/common/storage/src/stage.rs b/src/common/storage/src/stage.rs
index fb809a3eaa8d..fdf714284494 100644
--- a/src/common/storage/src/stage.rs
+++ b/src/common/storage/src/stage.rs
@@ -92,7 +92,7 @@ pub struct StageFilesInfo {
impl StageFilesInfo {
fn get_pattern(&self) -> Result> {
match &self.pattern {
- Some(pattern) => match Regex::new(pattern) {
+ Some(pattern) => match Regex::new(&format!("^{pattern}$")) {
Ok(r) => Ok(Some(r)),
Err(e) => Err(ErrorCode::SyntaxException(format!(
"Pattern format invalid, got:{}, error:{:?}",
@@ -206,6 +206,7 @@ impl StageFilesInfo {
first_only: bool,
max_files: usize,
) -> Result> {
+ let prefix_len = if path == "/" { 0 } else { path.len() };
let root_meta = operator.stat(path).await;
match root_meta {
Ok(meta) => match meta.mode() {
@@ -233,7 +234,7 @@ impl StageFilesInfo {
let mut limit: usize = 0;
while let Some(obj) = list.try_next().await? {
let meta = operator.metadata(&obj, StageFileInfo::meta_query()).await?;
- if check_file(obj.path(), meta.mode(), &pattern) {
+ if check_file(&obj.path()[prefix_len..], meta.mode(), &pattern) {
files.push(StageFileInfo::new(obj.path().to_string(), &meta));
if first_only {
return Ok(files);
@@ -263,6 +264,7 @@ fn blocking_list_files_with_pattern(
first_only: bool,
max_files: usize,
) -> Result> {
+ let prefix_len = if path == "/" { 0 } else { path.len() };
let operator = operator.blocking();
let root_meta = operator.stat(path);
@@ -293,7 +295,7 @@ fn blocking_list_files_with_pattern(
for obj in list {
let obj = obj?;
let meta = operator.metadata(&obj, StageFileInfo::meta_query())?;
- if check_file(obj.path(), meta.mode(), &pattern) {
+ if check_file(&obj.path()[prefix_len..], meta.mode(), &pattern) {
files.push(StageFileInfo::new(obj.path().to_string(), &meta));
if first_only {
return Ok(files);
diff --git a/src/meta/app/src/schema/table.rs b/src/meta/app/src/schema/table.rs
index e2eb77c6296f..38e5c5f352f5 100644
--- a/src/meta/app/src/schema/table.rs
+++ b/src/meta/app/src/schema/table.rs
@@ -243,22 +243,14 @@ pub struct TableMeta {
}
impl TableMeta {
- pub fn add_columns(&mut self, fields: &[TableField], field_comments: &[String]) -> Result<()> {
- let mut new_schema = self.schema.as_ref().to_owned();
- new_schema.add_columns(fields)?;
- self.schema = Arc::new(new_schema);
- field_comments.iter().for_each(|c| {
- self.field_comments.push(c.to_owned());
- });
- Ok(())
- }
-
pub fn add_column(
&mut self,
field: &TableField,
comment: &str,
index: FieldIndex,
) -> Result<()> {
+ self.fill_field_comments();
+
let mut new_schema = self.schema.as_ref().to_owned();
new_schema.add_column(field, index)?;
self.schema = Arc::new(new_schema);
@@ -267,12 +259,23 @@ impl TableMeta {
}
pub fn drop_column(&mut self, column: &str) -> Result<()> {
+ self.fill_field_comments();
+
let mut new_schema = self.schema.as_ref().to_owned();
let index = new_schema.drop_column(column)?;
self.field_comments.remove(index);
self.schema = Arc::new(new_schema);
Ok(())
}
+
+ /// To fix the field comments panic.
+ pub fn fill_field_comments(&mut self) {
+ let num_fields = self.schema.num_fields();
+ // If the field comments is confused, fill it with empty string.
+ if self.field_comments.len() < num_fields {
+ self.field_comments = vec!["".to_string(); num_fields];
+ }
+ }
}
impl TableInfo {
diff --git a/src/query/ast/src/ast/format/ast_format.rs b/src/query/ast/src/ast/format/ast_format.rs
index 27ffead1343b..da521cc648ad 100644
--- a/src/query/ast/src/ast/format/ast_format.rs
+++ b/src/query/ast/src/ast/format/ast_format.rs
@@ -2179,10 +2179,10 @@ impl<'ast> Visitor<'ast> for AstFormatVisitor {
self.children.push(node);
}
- fn visit_list_stage(&mut self, location: &'ast str, pattern: &'ast str) {
+ fn visit_list_stage(&mut self, location: &'ast str, pattern: &'ast Option) {
let location_format_ctx = AstFormatContext::new(format!("Location {}", location));
let location_child = FormatTreeNode::new(location_format_ctx);
- let pattern_format_ctx = AstFormatContext::new(format!("Pattern {}", pattern));
+ let pattern_format_ctx = AstFormatContext::new(format!("Pattern {:?}", pattern));
let pattern_child = FormatTreeNode::new(pattern_format_ctx);
let name = "ListStage".to_string();
diff --git a/src/query/ast/src/ast/statements/merge_into.rs b/src/query/ast/src/ast/statements/merge_into.rs
index 83291fa1b8ea..caa273e4ebe2 100644
--- a/src/query/ast/src/ast/statements/merge_into.rs
+++ b/src/query/ast/src/ast/statements/merge_into.rs
@@ -52,7 +52,10 @@ impl Display for MergeUpdateExpr {
#[derive(Debug, Clone, PartialEq)]
pub enum MatchOperation {
- Update { update_list: Vec },
+ Update {
+ update_list: Vec,
+ is_star: bool,
+ },
Delete,
}
@@ -66,6 +69,7 @@ pub struct MatchedClause {
pub struct InsertOperation {
pub columns: Option>,
pub values: Vec,
+ pub is_star: bool,
}
#[derive(Debug, Clone, PartialEq)]
@@ -116,9 +120,16 @@ impl Display for MergeIntoStmt {
write!(f, " THEN ")?;
match &match_clause.operation {
- MatchOperation::Update { update_list } => {
- write!(f, " UPDATE SET ")?;
- write_comma_separated_list(f, update_list)?;
+ MatchOperation::Update {
+ update_list,
+ is_star,
+ } => {
+ if *is_star {
+ write!(f, " UPDATE * ")?;
+ } else {
+ write!(f, " UPDATE SET ")?;
+ write_comma_separated_list(f, update_list)?;
+ }
}
MatchOperation::Delete => {
write!(f, " DELETE ")?;
diff --git a/src/query/ast/src/ast/statements/statement.rs b/src/query/ast/src/ast/statements/statement.rs
index 5e4f7ba6de3b..8ae30da9c555 100644
--- a/src/query/ast/src/ast/statements/statement.rs
+++ b/src/query/ast/src/ast/statements/statement.rs
@@ -186,7 +186,7 @@ pub enum Statement {
},
ListStage {
location: String,
- pattern: String,
+ pattern: Option,
},
// UserDefinedFileFormat
@@ -452,7 +452,7 @@ impl Display for Statement {
Statement::AlterUDF(stmt) => write!(f, "{stmt}")?,
Statement::ListStage { location, pattern } => {
write!(f, "LIST @{location}")?;
- if !pattern.is_empty() {
+ if let Some(pattern) = pattern {
write!(f, " PATTERN = '{pattern}'")?;
}
}
diff --git a/src/query/ast/src/parser/statement.rs b/src/query/ast/src/parser/statement.rs
index e44629718c70..c3bc3e1f4856 100644
--- a/src/query/ast/src/parser/statement.rs
+++ b/src/query/ast/src/parser/statement.rs
@@ -1054,7 +1054,7 @@ pub fn statement(i: Input) -> IResult {
},
|(_, location, opt_pattern)| Statement::ListStage {
location,
- pattern: opt_pattern.map(|v| v.2).unwrap_or_default(),
+ pattern: opt_pattern.map(|v| v.2),
},
);
@@ -2267,40 +2267,75 @@ fn match_operation(i: Input) -> IResult {
rule! {
UPDATE ~ SET ~ ^#comma_separated_list1(merge_update_expr)
},
- |(_, _, update_list)| MatchOperation::Update { update_list },
+ |(_, _, update_list)| MatchOperation::Update {
+ update_list,
+ is_star: false,
+ },
+ ),
+ map(
+ rule! {
+ UPDATE ~ "*"
+ },
+ |(_, _)| MatchOperation::Update {
+ update_list: Vec::new(),
+ is_star: true,
+ },
),
))(i)
}
pub fn unmatch_clause(i: Input) -> IResult {
- map(
- rule! {
- WHEN ~ NOT ~ MATCHED ~ (AND ~ ^#expr)? ~ THEN ~ INSERT ~ ( "(" ~ ^#comma_separated_list1(ident) ~ ^")" )?
- ~ VALUES ~ ^#row_values
- },
- |(_, _, _, expr_op, _, _, columns_op, _, values)| {
- let selection = match expr_op {
- Some(e) => Some(e.1),
- None => None,
- };
- match columns_op {
- Some(columns) => MergeOption::Unmatch(UnmatchedClause {
- insert_operation: InsertOperation {
- columns: Some(columns.1),
- values,
- },
- selection,
- }),
- None => MergeOption::Unmatch(UnmatchedClause {
+ alt((
+ map(
+ rule! {
+ WHEN ~ NOT ~ MATCHED ~ (AND ~ ^#expr)? ~ THEN ~ INSERT ~ ( "(" ~ ^#comma_separated_list1(ident) ~ ^")" )?
+ ~ VALUES ~ ^#row_values
+ },
+ |(_, _, _, expr_op, _, _, columns_op, _, values)| {
+ let selection = match expr_op {
+ Some(e) => Some(e.1),
+ None => None,
+ };
+ match columns_op {
+ Some(columns) => MergeOption::Unmatch(UnmatchedClause {
+ insert_operation: InsertOperation {
+ columns: Some(columns.1),
+ values,
+ is_star: false,
+ },
+ selection,
+ }),
+ None => MergeOption::Unmatch(UnmatchedClause {
+ insert_operation: InsertOperation {
+ columns: None,
+ values,
+ is_star: false,
+ },
+ selection,
+ }),
+ }
+ },
+ ),
+ map(
+ rule! {
+ WHEN ~ NOT ~ MATCHED ~ (AND ~ ^#expr)? ~ THEN ~ INSERT ~ "*"
+ },
+ |(_, _, _, expr_op, _, _, _)| {
+ let selection = match expr_op {
+ Some(e) => Some(e.1),
+ None => None,
+ };
+ MergeOption::Unmatch(UnmatchedClause {
insert_operation: InsertOperation {
columns: None,
- values,
+ values: Vec::new(),
+ is_star: true,
},
selection,
- }),
- }
- },
- )(i)
+ })
+ },
+ ),
+ ))(i)
}
pub fn add_column_option(i: Input) -> IResult {
diff --git a/src/query/ast/src/visitors/visitor.rs b/src/query/ast/src/visitors/visitor.rs
index 329b844de4dd..520dddc74a68 100644
--- a/src/query/ast/src/visitors/visitor.rs
+++ b/src/query/ast/src/visitors/visitor.rs
@@ -531,7 +531,7 @@ pub trait Visitor<'ast>: Sized {
fn visit_remove_stage(&mut self, _location: &'ast str, _pattern: &'ast str) {}
- fn visit_list_stage(&mut self, _location: &'ast str, _pattern: &'ast str) {}
+ fn visit_list_stage(&mut self, _location: &'ast str, _pattern: &'ast Option) {}
fn visit_create_file_format(
&mut self,
diff --git a/src/query/ast/src/visitors/visitor_mut.rs b/src/query/ast/src/visitors/visitor_mut.rs
index 401e39b26e6e..bee45ee26889 100644
--- a/src/query/ast/src/visitors/visitor_mut.rs
+++ b/src/query/ast/src/visitors/visitor_mut.rs
@@ -546,7 +546,7 @@ pub trait VisitorMut: Sized {
fn visit_remove_stage(&mut self, _location: &mut String, _pattern: &mut String) {}
- fn visit_list_stage(&mut self, _location: &mut String, _pattern: &mut String) {}
+ fn visit_list_stage(&mut self, _location: &mut String, _pattern: &mut Option) {}
fn visit_create_file_format(
&mut self,
diff --git a/src/query/ast/tests/it/testdata/statement.txt b/src/query/ast/tests/it/testdata/statement.txt
index 2cfa725c03f4..af9681a85cf0 100644
--- a/src/query/ast/tests/it/testdata/statement.txt
+++ b/src/query/ast/tests/it/testdata/statement.txt
@@ -7120,7 +7120,7 @@ LIST @stage_a
---------- AST ------------
ListStage {
location: "stage_a",
- pattern: "",
+ pattern: None,
}
@@ -7131,7 +7131,7 @@ LIST @~
---------- AST ------------
ListStage {
location: "~",
- pattern: "",
+ pattern: None,
}
diff --git a/src/query/catalog/src/plan/pushdown.rs b/src/query/catalog/src/plan/pushdown.rs
index 318e3a873171..e74e9bf227b8 100644
--- a/src/query/catalog/src/plan/pushdown.rs
+++ b/src/query/catalog/src/plan/pushdown.rs
@@ -77,9 +77,9 @@ pub struct PushDownInfo {
/// The difference with `projection` is the removal of the source columns
/// which were only used to generate virtual columns.
pub output_columns: Option,
- /// Optional filter expression plan
+ /// Optional filter and reverse filter expression plan
/// Assumption: expression's data type must be `DataType::Boolean`.
- pub filter: Option>,
+ pub filters: Option,
pub is_deterministic: bool,
/// Optional prewhere information
/// used for prewhere optimization
@@ -96,6 +96,12 @@ pub struct PushDownInfo {
pub agg_index: Option,
}
+#[derive(serde::Serialize, serde::Deserialize, Clone, Debug, PartialEq, Eq)]
+pub struct Filters {
+ pub filter: RemoteExpr,
+ pub inverted_filter: RemoteExpr,
+}
+
/// TopK is a wrapper for topk push down items.
/// We only take the first column in order_by as the topk column.
#[derive(Debug, Clone)]
diff --git a/src/query/catalog/src/table.rs b/src/query/catalog/src/table.rs
index 6a59816a15bd..e78036bcc467 100644
--- a/src/query/catalog/src/table.rs
+++ b/src/query/catalog/src/table.rs
@@ -465,14 +465,6 @@ pub struct NavigationDescriptor {
pub point: NavigationPoint,
}
-#[derive(Debug, Clone)]
-pub struct DeletionFilters {
- // the filter expression for the deletion
- pub filter: RemoteExpr,
- // just "not(filter)"
- pub inverted_filter: RemoteExpr,
-}
-
use std::collections::HashMap;
#[derive(serde::Serialize, serde::Deserialize, Clone, Debug, Default)]
diff --git a/src/query/expression/src/expression.rs b/src/query/expression/src/expression.rs
index a9fd71538fd9..1762063491ab 100644
--- a/src/query/expression/src/expression.rs
+++ b/src/query/expression/src/expression.rs
@@ -124,14 +124,17 @@ pub enum Expr {
///
/// The remote node will recover the `Arc` pointer within `FunctionCall` by looking
/// up the function registry with the `FunctionID`.
-#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
+#[derive(Debug, Clone, Educe, Serialize, Deserialize)]
+#[educe(PartialEq, Eq, Hash)]
pub enum RemoteExpr {
Constant {
+ #[educe(Hash(ignore), PartialEq(ignore), Eq(ignore))]
span: Span,
scalar: Scalar,
data_type: DataType,
},
ColumnRef {
+ #[educe(Hash(ignore), PartialEq(ignore), Eq(ignore))]
span: Span,
id: Index,
data_type: DataType,
@@ -140,12 +143,14 @@ pub enum RemoteExpr {
display_name: String,
},
Cast {
+ #[educe(Hash(ignore), PartialEq(ignore), Eq(ignore))]
span: Span,
is_try: bool,
expr: Box>,
dest_type: DataType,
},
FunctionCall {
+ #[educe(Hash(ignore), PartialEq(ignore), Eq(ignore))]
span: Span,
id: FunctionID,
generics: Vec,
@@ -153,6 +158,7 @@ pub enum RemoteExpr {
return_type: DataType,
},
UDFServerCall {
+ #[educe(Hash(ignore), PartialEq(ignore), Eq(ignore))]
span: Span,
func_name: String,
server_addr: String,
diff --git a/src/query/expression/src/type_check.rs b/src/query/expression/src/type_check.rs
index 412c0284fa4f..9c1ee5830be4 100755
--- a/src/query/expression/src/type_check.rs
+++ b/src/query/expression/src/type_check.rs
@@ -282,12 +282,7 @@ pub fn check_number(
ErrorCode::InvalidArgument(format!("Expect {}, but got {}", T::data_type(), origin_ty))
.set_span(span)
}),
- _ => Err(ErrorCode::InvalidArgument(format!(
- "Expect {}, but got {}",
- T::data_type(),
- origin_ty
- ))
- .set_span(span)),
+ _ => Err(ErrorCode::InvalidArgument("Need constant number.").set_span(span)),
}
}
diff --git a/src/query/expression/src/types/decimal.rs b/src/query/expression/src/types/decimal.rs
index 6e0375d65f5f..4c1f07904483 100644
--- a/src/query/expression/src/types/decimal.rs
+++ b/src/query/expression/src/types/decimal.rs
@@ -22,6 +22,7 @@ use common_exception::Result;
use enum_as_inner::EnumAsInner;
use ethnum::i256;
use itertools::Itertools;
+use num_traits::NumCast;
use num_traits::ToPrimitive;
use serde::Deserialize;
use serde::Serialize;
@@ -285,6 +286,8 @@ pub trait Decimal:
fn to_float32(self, scale: u8) -> f32;
fn to_float64(self, scale: u8) -> f64;
+ fn to_int(self, scale: u8) -> Option;
+
fn try_downcast_column(column: &Column) -> Option<(Buffer, DecimalSize)>;
fn try_downcast_builder<'a>(builder: &'a mut ColumnBuilder) -> Option<&'a mut Vec>;
@@ -414,6 +417,11 @@ impl Decimal for i128 {
self as f64 / div
}
+ fn to_int(self, scale: u8) -> Option {
+ let div = 10i128.checked_pow(scale as u32)?;
+ num_traits::cast(self / div)
+ }
+
fn to_scalar(self, size: DecimalSize) -> DecimalScalar {
DecimalScalar::Decimal128(self, size)
}
@@ -563,6 +571,12 @@ impl Decimal for i256 {
self.as_f64() / div
}
+ fn to_int(self, scale: u8) -> Option {
+ let div = i256::from(10).checked_pow(scale as u32)?;
+ let (h, l) = (self / div).into_words();
+ if h > 0 { None } else { l.to_int(scale) }
+ }
+
fn to_scalar(self, size: DecimalSize) -> DecimalScalar {
DecimalScalar::Decimal256(self, size)
}
diff --git a/src/query/expression/src/utils/date_helper.rs b/src/query/expression/src/utils/date_helper.rs
index 528af75adfb7..0722f0097f9c 100644
--- a/src/query/expression/src/utils/date_helper.rs
+++ b/src/query/expression/src/utils/date_helper.rs
@@ -417,6 +417,7 @@ pub struct ToYYYYMMDD;
pub struct ToYYYYMMDDHH;
pub struct ToYYYYMMDDHHMMSS;
pub struct ToYear;
+pub struct ToQuarter;
pub struct ToMonth;
pub struct ToDayOfYear;
pub struct ToDayOfMonth;
@@ -464,6 +465,12 @@ impl ToNumber for ToYear {
}
}
+impl ToNumber for ToQuarter {
+ fn to_number(dt: &DateTime) -> u8 {
+ (dt.month0() / 3 + 1) as u8
+ }
+}
+
impl ToNumber for ToMonth {
fn to_number(dt: &DateTime) -> u8 {
dt.month() as u8
diff --git a/src/query/functions/src/scalars/arithmetic.rs b/src/query/functions/src/scalars/arithmetic.rs
index 129b6f76c990..6238364b4881 100644
--- a/src/query/functions/src/scalars/arithmetic.rs
+++ b/src/query/functions/src/scalars/arithmetic.rs
@@ -74,6 +74,7 @@ use num_traits::AsPrimitive;
use super::arithmetic_modulo::vectorize_modulo;
use super::decimal::register_decimal_to_float32;
use super::decimal::register_decimal_to_float64;
+use super::decimal::register_decimal_to_int;
use crate::scalars::decimal::register_decimal_arithmetic;
pub fn register(registry: &mut FunctionRegistry) {
@@ -582,92 +583,96 @@ pub fn register_number_to_number(registry: &mut FunctionRegistry) {
let name = format!("to_{dest_type}").to_lowercase();
if src_type.can_lossless_cast_to(*dest_type) {
registry.register_1_arg::, NumberType, _, _>(
- &name,
- |_, domain| {
- let (domain, overflowing) = domain.overflow_cast();
- debug_assert!(!overflowing);
- FunctionDomain::Domain(domain)
- },
- |val, _| {
- val.as_()
- },
- );
+ &name,
+ |_, domain| {
+ let (domain, overflowing) = domain.overflow_cast();
+ debug_assert!(!overflowing);
+ FunctionDomain::Domain(domain)
+ },
+ |val, _| {
+ val.as_()
+ },
+ );
} else {
registry.register_passthrough_nullable_1_arg::, NumberType, _, _>(
- &name,
- |_, domain| {
- let (domain, overflowing) = domain.overflow_cast();
- if overflowing {
- FunctionDomain::MayThrow
- } else {
- FunctionDomain::Domain(domain)
- }
- },
- vectorize_with_builder_1_arg::, NumberType>(
- move |val, output, ctx| {
- match num_traits::cast::cast(val) {
- Some(val) => output.push(val),
- None => {
- ctx.set_error(output.len(),"number overflowed");
- output.push(DEST_TYPE::default());
+ &name,
+ |_, domain| {
+ let (domain, overflowing) = domain.overflow_cast();
+ if overflowing {
+ FunctionDomain::MayThrow
+ } else {
+ FunctionDomain::Domain(domain)
+ }
},
- }
- }
- ),
- );
+ vectorize_with_builder_1_arg::, NumberType>(
+ move |val, output, ctx| {
+ match num_traits::cast::cast(val) {
+ Some(val) => output.push(val),
+ None => {
+ ctx.set_error(output.len(),"number overflowed");
+ output.push(DEST_TYPE::default());
+ },
+ }
+ }
+ ),
+ );
}
let name = format!("try_to_{dest_type}").to_lowercase();
if src_type.can_lossless_cast_to(*dest_type) {
registry.register_combine_nullable_1_arg::, NumberType, _, _>(
- &name,
- |_, domain| {
- let (domain, overflowing) = domain.overflow_cast();
- debug_assert!(!overflowing);
- FunctionDomain::Domain(NullableDomain {
- has_null: false,
- value: Some(Box::new(
- domain,
- )),
- })
- },
- vectorize_1_arg::, NullableType>>(|val, _| {
- Some(val.as_())
- })
- );
+ &name,
+ |_, domain| {
+ let (domain, overflowing) = domain.overflow_cast();
+ debug_assert!(!overflowing);
+ FunctionDomain::Domain(NullableDomain {
+ has_null: false,
+ value: Some(Box::new(
+ domain,
+ )),
+ })
+ },
+ vectorize_1_arg::, NullableType>>(|val, _| {
+ Some(val.as_())
+ })
+ );
} else {
registry.register_combine_nullable_1_arg::, NumberType, _, _>(
- &name,
- |_, domain| {
- let (domain, overflowing) = domain.overflow_cast();
- FunctionDomain::Domain(NullableDomain {
- has_null: overflowing,
- value: Some(Box::new(
- domain,
- )),
- })
- },
- vectorize_with_builder_1_arg::, NullableType>>(
- |val, output, _| {
- if let Some(new_val) = num_traits::cast::cast(val) {
- output.push(new_val);
- } else {
- output.push_null();
- }
- }
- ),
- );
+ &name,
+ |_, domain| {
+ let (domain, overflowing) = domain.overflow_cast();
+ FunctionDomain::Domain(NullableDomain {
+ has_null: overflowing,
+ value: Some(Box::new(
+ domain,
+ )),
+ })
+ },
+ vectorize_with_builder_1_arg::, NullableType>>(
+ |val, output, _| {
+ if let Some(new_val) = num_traits::cast::cast(val) {
+ output.push(new_val);
+ } else {
+ output.push_null();
+ }
+ }
+ ),
+ );
}
}
}),
NumberClass::Decimal128 => {
- // todo(youngsofun): add decimal try_cast and decimal to int
+ // todo(youngsofun): add decimal try_cast and decimal to int and float
if matches!(dest_type, NumberDataType::Float32) {
register_decimal_to_float32(registry);
}
if matches!(dest_type, NumberDataType::Float64) {
register_decimal_to_float64(registry);
}
+
+ with_number_mapped_type!(|DEST_TYPE| match dest_type {
+ NumberDataType::DEST_TYPE => register_decimal_to_int::(registry),
+ })
}
NumberClass::Decimal256 => {
// already registered in Decimal128 branch
diff --git a/src/query/functions/src/scalars/datetime.rs b/src/query/functions/src/scalars/datetime.rs
index 62d2a4a3a85d..03b40a8f5024 100644
--- a/src/query/functions/src/scalars/datetime.rs
+++ b/src/query/functions/src/scalars/datetime.rs
@@ -909,6 +909,13 @@ fn register_to_number_functions(registry: &mut FunctionRegistry) {
ToNumberImpl::eval_date::(val, ctx.func_ctx.tz)
}),
);
+ registry.register_passthrough_nullable_1_arg::(
+ "to_quarter",
+ |_, _| FunctionDomain::Full,
+ vectorize_1_arg::(|val, ctx| {
+ ToNumberImpl::eval_date::(val, ctx.func_ctx.tz)
+ }),
+ );
registry.register_passthrough_nullable_1_arg::(
"to_month",
|_, _| FunctionDomain::Full,
@@ -973,6 +980,13 @@ fn register_to_number_functions(registry: &mut FunctionRegistry) {
ToNumberImpl::eval_timestamp::(val, ctx.func_ctx.tz)
}),
);
+ registry.register_passthrough_nullable_1_arg::(
+ "to_quarter",
+ |_, _| FunctionDomain::Full,
+ vectorize_1_arg::(|val, ctx| {
+ ToNumberImpl::eval_timestamp::(val, ctx.func_ctx.tz)
+ }),
+ );
registry.register_passthrough_nullable_1_arg::(
"to_month",
|_, _| FunctionDomain::Full,
@@ -1027,23 +1041,31 @@ fn register_to_number_functions(registry: &mut FunctionRegistry) {
}
fn register_timestamp_add_sub(registry: &mut FunctionRegistry) {
- registry.register_2_arg::(
+ registry.register_passthrough_nullable_2_arg::(
"plus",
|_, lhs, rhs| {
(|| {
- let lm = lhs.max;
- let ln = lhs.min;
- let rm: i32 = num_traits::cast::cast(rhs.max)?;
- let rn: i32 = num_traits::cast::cast(rhs.min)?;
+ let lm: i64 = num_traits::cast::cast(lhs.max)?;
+ let ln: i64 = num_traits::cast::cast(lhs.min)?;
+ let rm = rhs.max;
+ let rn = rhs.min;
Some(FunctionDomain::Domain(SimpleDomain:: {
- min: ln.checked_add(rn)?,
- max: lm.checked_add(rm)?,
+ min: check_date(ln + rn).ok()?,
+ max: check_date(lm + rm).ok()?,
}))
})()
- .unwrap_or(FunctionDomain::Full)
+ .unwrap_or(FunctionDomain::MayThrow)
},
- |a, b, _| a + (b as i32),
+ vectorize_with_builder_2_arg::(|a, b, output, ctx| {
+ match check_date((a as i64) + b) {
+ Ok(v) => output.push(v),
+ Err(err) => {
+ ctx.set_error(output.len(), err);
+ output.push(0);
+ }
+ }
+ }),
);
registry.register_2_arg::(
@@ -1065,7 +1087,7 @@ fn register_timestamp_add_sub(registry: &mut FunctionRegistry) {
|a, b, _| a + b,
);
- registry.register_2_arg::(
+ registry.register_passthrough_nullable_2_arg::(
"plus",
|_, lhs, rhs| {
(|| {
@@ -1074,13 +1096,21 @@ fn register_timestamp_add_sub(registry: &mut FunctionRegistry) {
let rm = rhs.max;
let rn = rhs.min;
Some(FunctionDomain::Domain(SimpleDomain:: {
- min: ln.checked_add(rn)?,
- max: lm.checked_add(rm)?,
+ min: check_timestamp(ln + rn).ok()?,
+ max: check_timestamp(lm + rm).ok()?,
}))
})()
- .unwrap_or(FunctionDomain::Full)
+ .unwrap_or(FunctionDomain::MayThrow)
},
- |a, b, _| a + b,
+ vectorize_with_builder_2_arg::(
+ |a, b, output, ctx| match check_timestamp(a + b) {
+ Ok(v) => output.push(v),
+ Err(err) => {
+ ctx.set_error(output.len(), err);
+ output.push(0);
+ }
+ },
+ ),
);
registry.register_2_arg::(
@@ -1101,23 +1131,31 @@ fn register_timestamp_add_sub(registry: &mut FunctionRegistry) {
|a, b, _| a + b,
);
- registry.register_2_arg::(
+ registry.register_passthrough_nullable_2_arg::(
"minus",
|_, lhs, rhs| {
(|| {
- let lm = lhs.max;
- let ln = lhs.min;
- let rm: i32 = num_traits::cast::cast(rhs.max)?;
- let rn: i32 = num_traits::cast::cast(rhs.min)?;
+ let lm: i64 = num_traits::cast::cast(lhs.max)?;
+ let ln: i64 = num_traits::cast::cast(lhs.min)?;
+ let rm = rhs.max;
+ let rn = rhs.min;
Some(FunctionDomain::Domain(SimpleDomain:: {
- min: ln.checked_sub(rm)?,
- max: lm.checked_sub(rn)?,
+ min: check_date(ln - rn).ok()?,
+ max: check_date(lm - rm).ok()?,
}))
})()
- .unwrap_or(FunctionDomain::Full)
+ .unwrap_or(FunctionDomain::MayThrow)
},
- |a, b, _| a - b as i32,
+ vectorize_with_builder_2_arg::(|a, b, output, ctx| {
+ match check_date((a as i64) - b) {
+ Ok(v) => output.push(v),
+ Err(err) => {
+ ctx.set_error(output.len(), err);
+ output.push(0);
+ }
+ }
+ }),
);
registry.register_2_arg::(
@@ -1139,7 +1177,7 @@ fn register_timestamp_add_sub(registry: &mut FunctionRegistry) {
|a, b, _| a - b,
);
- registry.register_2_arg::(
+ registry.register_passthrough_nullable_2_arg::(
"minus",
|_, lhs, rhs| {
(|| {
@@ -1149,13 +1187,21 @@ fn register_timestamp_add_sub(registry: &mut FunctionRegistry) {
let rn = rhs.min;
Some(FunctionDomain::Domain(SimpleDomain:: {
- min: ln.checked_sub(rm)?,
- max: lm.checked_sub(rn)?,
+ min: check_timestamp(ln - rn).ok()?,
+ max: check_timestamp(lm - rm).ok()?,
}))
})()
- .unwrap_or(FunctionDomain::Full)
+ .unwrap_or(FunctionDomain::MayThrow)
},
- |a, b, _| a - b,
+ vectorize_with_builder_2_arg::(
+ |a, b, output, ctx| match check_timestamp(a - b) {
+ Ok(v) => output.push(v),
+ Err(err) => {
+ ctx.set_error(output.len(), err);
+ output.push(0);
+ }
+ },
+ ),
);
registry.register_2_arg::(
diff --git a/src/query/functions/src/scalars/decimal.rs b/src/query/functions/src/scalars/decimal.rs
index 8538e9c480cf..ee5ae09d755e 100644
--- a/src/query/functions/src/scalars/decimal.rs
+++ b/src/query/functions/src/scalars/decimal.rs
@@ -627,7 +627,7 @@ pub fn register(registry: &mut FunctionRegistry) {
}
pub(crate) fn register_decimal_to_float64(registry: &mut FunctionRegistry) {
- registry.register_function_factory("to_float64", |_params, args_type| {
+ let factory = |_params: &[usize], args_type: &[DataType]| {
if args_type.len() != 1 {
return None;
}
@@ -638,7 +638,7 @@ pub(crate) fn register_decimal_to_float64(registry: &mut FunctionRegistry) {
return None;
}
- Some(Arc::new(Function {
+ let function = Function {
signature: FunctionSignature {
name: "to_float64".to_string(),
args_type: vec![arg_type.clone()],
@@ -661,12 +661,32 @@ pub(crate) fn register_decimal_to_float64(registry: &mut FunctionRegistry) {
}),
eval: Box::new(move |args, tx| decimal_to_float64(&args[0], arg_type.clone(), tx)),
},
- }))
+ };
+
+ Some(function)
+ };
+
+ registry.register_function_factory("to_float64", move |params, args_type| {
+ Some(Arc::new(factory(params, args_type)?))
+ });
+ registry.register_function_factory("to_float64", move |params, args_type| {
+ let f = factory(params, args_type)?;
+ Some(Arc::new(f.passthrough_nullable()))
+ });
+ registry.register_function_factory("try_to_float64", move |params, args_type| {
+ let mut f = factory(params, args_type)?;
+ f.signature.name = "try_to_float64".to_string();
+ Some(Arc::new(f.error_to_null()))
+ });
+ registry.register_function_factory("try_to_float64", move |params, args_type| {
+ let mut f = factory(params, args_type)?;
+ f.signature.name = "try_to_float64".to_string();
+ Some(Arc::new(f.error_to_null().passthrough_nullable()))
});
}
pub(crate) fn register_decimal_to_float32(registry: &mut FunctionRegistry) {
- registry.register_function_factory("to_float32", |_params, args_type| {
+ let factory = |_params: &[usize], args_type: &[DataType]| {
if args_type.len() != 1 {
return None;
}
@@ -676,7 +696,7 @@ pub(crate) fn register_decimal_to_float32(registry: &mut FunctionRegistry) {
return None;
}
- Some(Arc::new(Function {
+ let function = Function {
signature: FunctionSignature {
name: "to_float32".to_string(),
args_type: vec![arg_type.clone()],
@@ -699,7 +719,79 @@ pub(crate) fn register_decimal_to_float32(registry: &mut FunctionRegistry) {
}),
eval: Box::new(move |args, tx| decimal_to_float32(&args[0], arg_type.clone(), tx)),
},
- }))
+ };
+
+ Some(function)
+ };
+
+ registry.register_function_factory("to_float32", move |params, args_type| {
+ Some(Arc::new(factory(params, args_type)?))
+ });
+ registry.register_function_factory("to_float32", move |params, args_type| {
+ let f = factory(params, args_type)?;
+ Some(Arc::new(f.passthrough_nullable()))
+ });
+ registry.register_function_factory("try_to_float32", move |params, args_type| {
+ let mut f = factory(params, args_type)?;
+ f.signature.name = "try_to_float32".to_string();
+ Some(Arc::new(f.error_to_null()))
+ });
+ registry.register_function_factory("try_to_float32", move |params, args_type| {
+ let mut f = factory(params, args_type)?;
+ f.signature.name = "try_to_float32".to_string();
+ Some(Arc::new(f.error_to_null().passthrough_nullable()))
+ });
+}
+
+pub(crate) fn register_decimal_to_int(registry: &mut FunctionRegistry) {
+ if T::data_type().is_float() {
+ return;
+ }
+ let name = format!("to_{}", T::data_type().to_string().to_lowercase());
+ let try_name = format!("try_to_{}", T::data_type().to_string().to_lowercase());
+
+ let factory = |_params: &[usize], args_type: &[DataType]| {
+ if args_type.len() != 1 {
+ return None;
+ }
+
+ let name = format!("to_{}", T::data_type().to_string().to_lowercase());
+ let arg_type = args_type[0].remove_nullable();
+ if !arg_type.is_decimal() {
+ return None;
+ }
+
+ let function = Function {
+ signature: FunctionSignature {
+ name,
+ args_type: vec![arg_type.clone()],
+ return_type: DataType::Number(T::data_type()),
+ },
+ eval: FunctionEval::Scalar {
+ calc_domain: Box::new(|_, _| FunctionDomain::MayThrow),
+ eval: Box::new(move |args, tx| decimal_to_int::(&args[0], arg_type.clone(), tx)),
+ },
+ };
+
+ Some(function)
+ };
+
+ registry.register_function_factory(&name, move |params, args_type| {
+ Some(Arc::new(factory(params, args_type)?))
+ });
+ registry.register_function_factory(&name, move |params, args_type| {
+ let f = factory(params, args_type)?;
+ Some(Arc::new(f.passthrough_nullable()))
+ });
+ registry.register_function_factory(&try_name, move |params, args_type| {
+ let mut f = factory(params, args_type)?;
+ f.signature.name = format!("try_to_{}", T::data_type().to_string().to_lowercase());
+ Some(Arc::new(f.error_to_null()))
+ });
+ registry.register_function_factory(&try_name, move |params, args_type| {
+ let mut f = factory(params, args_type)?;
+ f.signature.name = format!("try_to_{}", T::data_type().to_string().to_lowercase());
+ Some(Arc::new(f.error_to_null().passthrough_nullable()))
});
}
@@ -1309,3 +1401,66 @@ fn decimal_to_float32(
Value::Column(result)
}
}
+
+fn decimal_to_int(
+ arg: &ValueRef,
+ from_type: DataType,
+ ctx: &mut EvalContext,
+) -> Value {
+ let mut is_scalar = false;
+ let column = match arg {
+ ValueRef::Column(column) => column.clone(),
+ ValueRef::Scalar(s) => {
+ is_scalar = true;
+ let builder = ColumnBuilder::repeat(s, 1, &from_type);
+ builder.build()
+ }
+ };
+
+ let from_type = from_type.as_decimal().unwrap();
+
+ let result = match from_type {
+ DecimalDataType::Decimal128(_) => {
+ let (buffer, from_size) = i128::try_downcast_column(&column).unwrap();
+
+ let mut values = Vec::with_capacity(ctx.num_rows);
+
+ for (i, x) in buffer.iter().enumerate() {
+ let x = x.to_int(from_size.scale);
+ match x {
+ Some(x) => values.push(x),
+ None => {
+ ctx.set_error(i, "decimal cast to int overflow");
+ values.push(T::default())
+ }
+ }
+ }
+
+ NumberType::::upcast_column(Buffer::from(values))
+ }
+
+ DecimalDataType::Decimal256(_) => {
+ let (buffer, from_size) = i256::try_downcast_column(&column).unwrap();
+ let mut values = Vec::with_capacity(ctx.num_rows);
+
+ for (i, x) in buffer.iter().enumerate() {
+ let x = x.to_int(from_size.scale);
+ match x {
+ Some(x) => values.push(x),
+ None => {
+ ctx.set_error(i, "decimal cast to int overflow");
+ values.push(T::default())
+ }
+ }
+ }
+ NumberType::::upcast_column(Buffer::from(values))
+ }
+ };
+
+ if is_scalar {
+ let scalar = result.index(0).unwrap();
+ Value::Scalar(scalar.to_owned())
+ } else {
+ Value::Column(result)
+ }
+}
diff --git a/src/query/functions/src/scalars/geo.rs b/src/query/functions/src/scalars/geo.rs
index b7a8d4482b5c..4e9f79214fc5 100644
--- a/src/query/functions/src/scalars/geo.rs
+++ b/src/query/functions/src/scalars/geo.rs
@@ -198,7 +198,8 @@ pub fn register(registry: &mut FunctionRegistry) {
// point in ellipses
registry.register_function_factory("point_in_ellipses", |_, args_type| {
- if args_type.len() < 6 {
+ // The input parameters must be 2+4*n, where n is the number of ellipses.
+ if args_type.len() < 6 || (args_type.len() - 2) % 4 != 0 {
return None;
}
Some(Arc::new(Function {
@@ -221,20 +222,28 @@ pub fn register(registry: &mut FunctionRegistry) {
return None;
}
- let (arg1, arg2) = if args_type.len() == 2 {
+ let (arg1, arg2) = {
let arg1 = match args_type.get(0)? {
- DataType::Tuple(tys) => vec![DataType::Number(NumberDataType::Float64); tys.len()],
+ DataType::Tuple(tys) => {
+ if tys.len() == 2 {
+ vec![DataType::Number(NumberDataType::Float64); tys.len()]
+ } else {
+ return None;
+ }
+ }
_ => return None,
};
let arg2 = match args_type.get(1)? {
DataType::Array(box DataType::Tuple(tys)) => {
- vec![DataType::Number(NumberDataType::Float64); tys.len()]
+ if tys.len() == 2 {
+ vec![DataType::Number(NumberDataType::Float64); tys.len()]
+ } else {
+ return None;
+ }
}
_ => return None,
};
(arg1, arg2)
- } else {
- (vec![], vec![])
};
Some(Arc::new(Function {
@@ -260,20 +269,28 @@ pub fn register(registry: &mut FunctionRegistry) {
return None;
}
- let (arg1, arg2) = if args_type.len() == 2 {
+ let (arg1, arg2) = {
let arg1 = match args_type.get(0)? {
- DataType::Tuple(tys) => vec![DataType::Number(NumberDataType::Float64); tys.len()],
+ DataType::Tuple(tys) => {
+ if tys.len() == 2 {
+ vec![DataType::Number(NumberDataType::Float64); tys.len()]
+ } else {
+ return None;
+ }
+ }
_ => return None,
};
let arg2 = match args_type.get(1)? {
DataType::Array(box DataType::Array(box DataType::Tuple(tys))) => {
- vec![DataType::Number(NumberDataType::Float64); tys.len()]
+ if tys.len() == 2 {
+ vec![DataType::Number(NumberDataType::Float64); tys.len()]
+ } else {
+ return None;
+ }
}
_ => return None,
};
(arg1, arg2)
- } else {
- (vec![], vec![])
};
Some(Arc::new(Function {
@@ -302,20 +319,30 @@ pub fn register(registry: &mut FunctionRegistry) {
let mut args = vec![];
let arg1 = match args_type.get(0)? {
- DataType::Tuple(tys) => vec![DataType::Number(NumberDataType::Float64); tys.len()],
+ DataType::Tuple(tys) => {
+ if tys.len() == 2 {
+ vec![DataType::Number(NumberDataType::Float64); tys.len()]
+ } else {
+ return None;
+ }
+ }
_ => return None,
};
args.push(DataType::Tuple(arg1));
let arg2: Vec = match args_type.get(1)? {
DataType::Array(box DataType::Tuple(tys)) => {
- vec![DataType::Number(NumberDataType::Float64); tys.len()]
+ if tys.len() == 2 {
+ vec![DataType::Number(NumberDataType::Float64); tys.len()]
+ } else {
+ return None;
+ }
}
_ => return None,
};
- (0..args_type.len() - 1)
+ (1..args_type.len())
.for_each(|_| args.push(DataType::Array(Box::new(DataType::Tuple(arg2.clone())))));
Some(Arc::new(Function {
diff --git a/src/query/functions/src/scalars/vector.rs b/src/query/functions/src/scalars/vector.rs
index cf041d642256..be4efbcecdaf 100644
--- a/src/query/functions/src/scalars/vector.rs
+++ b/src/query/functions/src/scalars/vector.rs
@@ -90,8 +90,23 @@ pub fn register(registry: &mut FunctionRegistry) {
return;
}
}
- let data = std::str::from_utf8(data).unwrap();
+ let data = match std::str::from_utf8(data) {
+ Ok(data) => data,
+ Err(_) => {
+ ctx.set_error(
+ output.len(),
+ format!("Invalid data: {:?}", String::from_utf8_lossy(data)),
+ );
+ output.push(vec![F32::from(0.0)].into());
+ return;
+ }
+ };
+ if ctx.func_ctx.openai_api_key.is_empty() {
+ ctx.set_error(output.len(), "openai_api_key is empty".to_string());
+ output.push(vec![F32::from(0.0)].into());
+ return;
+ }
let api_base = ctx.func_ctx.openai_api_embedding_base_url.clone();
let api_key = ctx.func_ctx.openai_api_key.clone();
let api_version = ctx.func_ctx.openai_api_version.clone();
@@ -140,7 +155,24 @@ pub fn register(registry: &mut FunctionRegistry) {
}
}
- let data = std::str::from_utf8(data).unwrap();
+ let data = match std::str::from_utf8(data) {
+ Ok(data) => data,
+ Err(_) => {
+ ctx.set_error(
+ output.len(),
+ format!("Invalid data: {:?}", String::from_utf8_lossy(data)),
+ );
+ output.put_str("");
+ output.commit_row();
+ return;
+ }
+ };
+ if ctx.func_ctx.openai_api_key.is_empty() {
+ ctx.set_error(output.len(), "openai_api_key is empty".to_string());
+ output.put_str("");
+ output.commit_row();
+ return;
+ }
let api_base = ctx.func_ctx.openai_api_chat_base_url.clone();
let api_key = ctx.func_ctx.openai_api_key.clone();
let api_version = ctx.func_ctx.openai_api_version.clone();
diff --git a/src/query/functions/tests/it/scalars/datetime.rs b/src/query/functions/tests/it/scalars/datetime.rs
index e672a6a095bb..8f5fb8ccc028 100644
--- a/src/query/functions/tests/it/scalars/datetime.rs
+++ b/src/query/functions/tests/it/scalars/datetime.rs
@@ -108,10 +108,14 @@ fn test_date_add_subtract(file: &mut impl Write) {
run_ast(file, "add_years(to_date(0), 100)", &[]);
run_ast(file, "add_months(to_date(0), 100)", &[]);
run_ast(file, "add_days(to_date(0), 100)", &[]);
+ run_ast(file, "add(to_date(0), 100)", &[]);
+ run_ast(file, "add(to_date(0), 10000000)", &[]);
run_ast(file, "subtract_years(to_date(0), 100)", &[]);
run_ast(file, "subtract_quarters(to_date(0), 100)", &[]);
run_ast(file, "subtract_months(to_date(0), 100)", &[]);
run_ast(file, "subtract_days(to_date(0), 100)", &[]);
+ run_ast(file, "subtract(to_date(0), 100)", &[]);
+ run_ast(file, "subtract(to_date(0), 10000000)", &[]);
run_ast(file, "add_years(a, b)", &[
("a", DateType::from_data(vec![-100, 0, 100])),
("b", Int32Type::from_data(vec![1, 2, 3])),
@@ -155,6 +159,8 @@ fn test_timestamp_add_subtract(file: &mut impl Write) {
run_ast(file, "add_hours(to_timestamp(0), 100)", &[]);
run_ast(file, "add_minutes(to_timestamp(0), 100)", &[]);
run_ast(file, "add_seconds(to_timestamp(0), 100)", &[]);
+ run_ast(file, "add(to_timestamp(0), 100000000000000)", &[]);
+ run_ast(file, "add(to_timestamp(0), 1000000000000000000)", &[]);
run_ast(file, "subtract_years(to_timestamp(0), 100)", &[]);
run_ast(file, "subtract_quarters(to_timestamp(0), 100)", &[]);
run_ast(file, "subtract_months(to_timestamp(0), 100)", &[]);
@@ -162,6 +168,8 @@ fn test_timestamp_add_subtract(file: &mut impl Write) {
run_ast(file, "subtract_hours(to_timestamp(0), 100)", &[]);
run_ast(file, "subtract_minutes(to_timestamp(0), 100)", &[]);
run_ast(file, "subtract_seconds(to_timestamp(0), 100)", &[]);
+ run_ast(file, "subtract(to_timestamp(0), 100000000000000)", &[]);
+ run_ast(file, "subtract(to_timestamp(0), 1000000000000000000)", &[]);
run_ast(file, "add_years(a, b)", &[
("a", TimestampType::from_data(vec![-100, 0, 100])),
("b", Int32Type::from_data(vec![1, 2, 3])),
@@ -462,6 +470,7 @@ fn test_to_number(file: &mut impl Write) {
run_ast(file, "to_yyyymmdd(to_date(18875))", &[]);
run_ast(file, "to_yyyymmddhhmmss(to_date(18875))", &[]);
run_ast(file, "to_year(to_date(18875))", &[]);
+ run_ast(file, "to_quarter(to_date(18875))", &[]);
run_ast(file, "to_month(to_date(18875))", &[]);
run_ast(file, "to_day_of_year(to_date(18875))", &[]);
run_ast(file, "to_day_of_month(to_date(18875))", &[]);
@@ -482,6 +491,10 @@ fn test_to_number(file: &mut impl Write) {
"a",
DateType::from_data(vec![-100, 0, 100]),
)]);
+ run_ast(file, "to_quarter(a)", &[(
+ "a",
+ DateType::from_data(vec![-100, 0, 100]),
+ )]);
run_ast(file, "to_month(a)", &[(
"a",
DateType::from_data(vec![-100, 0, 100]),
@@ -504,6 +517,7 @@ fn test_to_number(file: &mut impl Write) {
run_ast(file, "to_yyyymmdd(to_timestamp(1630812366))", &[]);
run_ast(file, "to_yyyymmddhhmmss(to_timestamp(1630812366))", &[]);
run_ast(file, "to_year(to_timestamp(1630812366))", &[]);
+ run_ast(file, "to_quarter(to_timestamp(1630812366))", &[]);
run_ast(file, "to_month(to_timestamp(1630812366))", &[]);
run_ast(file, "to_day_of_year(to_timestamp(1630812366))", &[]);
run_ast(file, "to_day_of_month(to_timestamp(1630812366))", &[]);
@@ -527,6 +541,10 @@ fn test_to_number(file: &mut impl Write) {
"a",
TimestampType::from_data(vec![-100, 0, 100]),
)]);
+ run_ast(file, "to_quarter(a)", &[(
+ "a",
+ TimestampType::from_data(vec![-100, 0, 100]),
+ )]);
run_ast(file, "to_month(a)", &[(
"a",
TimestampType::from_data(vec![-100, 0, 100]),
diff --git a/src/query/functions/tests/it/scalars/testdata/datetime.txt b/src/query/functions/tests/it/scalars/testdata/datetime.txt
index 699082a4c25a..2b42bc1f3d04 100644
--- a/src/query/functions/tests/it/scalars/testdata/datetime.txt
+++ b/src/query/functions/tests/it/scalars/testdata/datetime.txt
@@ -355,6 +355,23 @@ output domain : {100..=100}
output : '1970-04-11'
+ast : add(to_date(0), 100)
+raw expr : add(to_date(0), 100)
+checked expr : plus(to_date(to_int64(0_u8)), to_int64(100_u8))
+optimized expr : 100
+output type : Date
+output domain : {100..=100}
+output : '1970-04-11'
+
+
+error:
+ --> SQL:1:1
+ |
+1 | add(to_date(0), 10000000)
+ | ^^^^^^^^^^^^^^^^^^^^^^^^^ date is out of range while evaluating function `plus('1970-01-01', 10000000)`
+
+
+
ast : subtract_years(to_date(0), 100)
raw expr : subtract_years(to_date(0), 100)
checked expr : subtract_years(to_date(to_int64(0_u8)), to_int64(100_u8))
@@ -391,6 +408,23 @@ output domain : {-100..=-100}
output : '1969-09-23'
+ast : subtract(to_date(0), 100)
+raw expr : subtract(to_date(0), 100)
+checked expr : minus(to_date(to_int64(0_u8)), to_int64(100_u8))
+optimized expr : -100
+output type : Date
+output domain : {-100..=-100}
+output : '1969-09-23'
+
+
+error:
+ --> SQL:1:1
+ |
+1 | subtract(to_date(0), 10000000)
+ | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ date is out of range while evaluating function `minus('1970-01-01', 10000000)`
+
+
+
ast : add_years(a, b)
raw expr : add_years(a::Date, b::Int32)
checked expr : add_years(a, to_int64(b))
@@ -646,6 +680,23 @@ output domain : {100000000..=100000000}
output : '1970-01-01 00:01:40.000000'
+ast : add(to_timestamp(0), 100000000000000)
+raw expr : add(to_timestamp(0), 100000000000000)
+checked expr : plus(to_timestamp(to_int64(0_u8)), to_int64(100000000000000_u64))
+optimized expr : 100000000000000
+output type : Timestamp
+output domain : {100000000000000..=100000000000000}
+output : '1973-03-03 09:46:40.000000'
+
+
+error:
+ --> SQL:1:1
+ |
+1 | add(to_timestamp(0), 1000000000000000000)
+ | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ timestamp is out of range while evaluating function `plus('1970-01-01 00:00:00.000000', 1000000000000000000)`
+
+
+
ast : subtract_years(to_timestamp(0), 100)
raw expr : subtract_years(to_timestamp(0), 100)
checked expr : subtract_years(to_timestamp(to_int64(0_u8)), to_int64(100_u8))
@@ -709,6 +760,23 @@ output domain : {-100000000..=-100000000}
output : '1969-12-31 23:58:20.000000'
+ast : subtract(to_timestamp(0), 100000000000000)
+raw expr : subtract(to_timestamp(0), 100000000000000)
+checked expr : minus(to_timestamp(to_int64(0_u8)), to_int64(100000000000000_u64))
+optimized expr : -100000000000000
+output type : Timestamp
+output domain : {-100000000000000..=-100000000000000}
+output : '1966-10-31 14:13:20.000000'
+
+
+error:
+ --> SQL:1:1
+ |
+1 | subtract(to_timestamp(0), 1000000000000000000)
+ | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ timestamp is out of range while evaluating function `minus('1970-01-01 00:00:00.000000', 1000000000000000000)`
+
+
+
ast : add_years(a, b)
raw expr : add_years(a::Timestamp, b::Int32)
checked expr : add_years(a, to_int64(b))
@@ -2507,6 +2575,15 @@ output domain : {2021..=2021}
output : 2021
+ast : to_quarter(to_date(18875))
+raw expr : to_quarter(to_date(18875))
+checked expr : to_quarter(to_date(to_int64(18875_u16)))
+optimized expr : 3_u8
+output type : UInt8
+output domain : {3..=3}
+output : 3
+
+
ast : to_month(to_date(18875))
raw expr : to_month(to_date(18875))
checked expr : to_month(to_date(to_int64(18875_u16)))
@@ -2631,6 +2708,28 @@ evaluation (internal):
+--------+----------------------------+
+ast : to_quarter(a)
+raw expr : to_quarter(a::Date)
+checked expr : to_quarter(a)
+evaluation:
++--------+--------------+-----------+
+| | a | Output |
++--------+--------------+-----------+
+| Type | Date | UInt8 |
+| Domain | {-100..=100} | {0..=255} |
+| Row 0 | '1969-09-23' | 3 |
+| Row 1 | '1970-01-01' | 1 |
+| Row 2 | '1970-04-11' | 2 |
++--------+--------------+-----------+
+evaluation (internal):
++--------+------------------+
+| Column | Data |
++--------+------------------+
+| a | [-100, 0, 100] |
+| Output | UInt8([3, 1, 2]) |
++--------+------------------+
+
+
ast : to_month(a)
raw expr : to_month(a::Date)
checked expr : to_month(a)
@@ -2755,6 +2854,15 @@ output domain : {2021..=2021}
output : 2021
+ast : to_quarter(to_timestamp(1630812366))
+raw expr : to_quarter(to_timestamp(1630812366))
+checked expr : to_quarter(to_timestamp(to_int64(1630812366_u32)))
+optimized expr : 3_u8
+output type : UInt8
+output domain : {3..=3}
+output : 3
+
+
ast : to_month(to_timestamp(1630812366))
raw expr : to_month(to_timestamp(1630812366))
checked expr : to_month(to_timestamp(to_int64(1630812366_u32)))
@@ -2906,6 +3014,28 @@ evaluation (internal):
+--------+----------------------------+
+ast : to_quarter(a)
+raw expr : to_quarter(a::Timestamp)
+checked expr : to_quarter(a)
+evaluation:
++--------+------------------------------+-----------+
+| | a | Output |
++--------+------------------------------+-----------+
+| Type | Timestamp | UInt8 |
+| Domain | {-100..=100} | {0..=255} |
+| Row 0 | '1969-12-31 23:59:59.999900' | 4 |
+| Row 1 | '1970-01-01 00:00:00.000000' | 1 |
+| Row 2 | '1970-01-01 00:00:00.000100' | 1 |
++--------+------------------------------+-----------+
+evaluation (internal):
++--------+------------------+
+| Column | Data |
++--------+------------------+
+| a | [-100, 0, 100] |
+| Output | UInt8([4, 1, 1]) |
++--------+------------------+
+
+
ast : to_month(a)
raw expr : to_month(a::Timestamp)
checked expr : to_month(a)
diff --git a/src/query/functions/tests/it/scalars/testdata/function_list.txt b/src/query/functions/tests/it/scalars/testdata/function_list.txt
index 9f2d1d74966e..cc2fc8b735b7 100644
--- a/src/query/functions/tests/it/scalars/testdata/function_list.txt
+++ b/src/query/functions/tests/it/scalars/testdata/function_list.txt
@@ -3219,10 +3219,11 @@ Functions overloads:
18 to_float32(Int64) :: Float32
19 to_float32(Int64 NULL) :: Float32 NULL
20 to_float32 FACTORY
-21 to_float32(Float64) :: Float32
-22 to_float32(Float64 NULL) :: Float32 NULL
-23 to_float32(Boolean) :: Float32
-24 to_float32(Boolean NULL) :: Float32 NULL
+21 to_float32 FACTORY
+22 to_float32(Float64) :: Float32
+23 to_float32(Float64 NULL) :: Float32 NULL
+24 to_float32(Boolean) :: Float32
+25 to_float32(Boolean NULL) :: Float32 NULL
0 to_float64(Variant) :: Float64
1 to_float64(Variant NULL) :: Float64 NULL
2 to_float64(String) :: Float64
@@ -3244,10 +3245,11 @@ Functions overloads:
18 to_float64(Int64) :: Float64
19 to_float64(Int64 NULL) :: Float64 NULL
20 to_float64 FACTORY
-21 to_float64(Float32) :: Float64
-22 to_float64(Float32 NULL) :: Float64 NULL
-23 to_float64(Boolean) :: Float64
-24 to_float64(Boolean NULL) :: Float64 NULL
+21 to_float64 FACTORY
+22 to_float64(Float32) :: Float64
+23 to_float64(Float32 NULL) :: Float64 NULL
+24 to_float64(Boolean) :: Float64
+25 to_float64(Boolean NULL) :: Float64 NULL
0 to_hour(Timestamp) :: UInt8
1 to_hour(Timestamp NULL) :: UInt8 NULL
0 to_int16(Variant) :: Int16
@@ -3268,12 +3270,14 @@ Functions overloads:
15 to_int16(UInt64 NULL) :: Int16 NULL
16 to_int16(Int64) :: Int16
17 to_int16(Int64 NULL) :: Int16 NULL
-18 to_int16(Float32) :: Int16
-19 to_int16(Float32 NULL) :: Int16 NULL
-20 to_int16(Float64) :: Int16
-21 to_int16(Float64 NULL) :: Int16 NULL
-22 to_int16(Boolean) :: Int16
-23 to_int16(Boolean NULL) :: Int16 NULL
+18 to_int16 FACTORY
+19 to_int16 FACTORY
+20 to_int16(Float32) :: Int16
+21 to_int16(Float32 NULL) :: Int16 NULL
+22 to_int16(Float64) :: Int16
+23 to_int16(Float64 NULL) :: Int16 NULL
+24 to_int16(Boolean) :: Int16
+25 to_int16(Boolean NULL) :: Int16 NULL
0 to_int32(Variant) :: Int32
1 to_int32(Variant NULL) :: Int32 NULL
2 to_int32(String) :: Int32
@@ -3292,12 +3296,14 @@ Functions overloads:
15 to_int32(UInt64 NULL) :: Int32 NULL
16 to_int32(Int64) :: Int32
17 to_int32(Int64 NULL) :: Int32 NULL
-18 to_int32(Float32) :: Int32
-19 to_int32(Float32 NULL) :: Int32 NULL
-20 to_int32(Float64) :: Int32
-21 to_int32(Float64 NULL) :: Int32 NULL
-22 to_int32(Boolean) :: Int32
-23 to_int32(Boolean NULL) :: Int32 NULL
+18 to_int32 FACTORY
+19 to_int32 FACTORY
+20 to_int32(Float32) :: Int32
+21 to_int32(Float32 NULL) :: Int32 NULL
+22 to_int32(Float64) :: Int32
+23 to_int32(Float64 NULL) :: Int32 NULL
+24 to_int32(Boolean) :: Int32
+25 to_int32(Boolean NULL) :: Int32 NULL
0 to_int64(Variant) :: Int64
1 to_int64(Variant NULL) :: Int64 NULL
2 to_int64(String) :: Int64
@@ -3316,16 +3322,18 @@ Functions overloads:
15 to_int64(Int32 NULL) :: Int64 NULL
16 to_int64(UInt64) :: Int64
17 to_int64(UInt64 NULL) :: Int64 NULL
-18 to_int64(Float32) :: Int64
-19 to_int64(Float32 NULL) :: Int64 NULL
-20 to_int64(Float64) :: Int64
-21 to_int64(Float64 NULL) :: Int64 NULL
-22 to_int64(Boolean) :: Int64
-23 to_int64(Boolean NULL) :: Int64 NULL
-24 to_int64(Date) :: Int64
-25 to_int64(Date NULL) :: Int64 NULL
-26 to_int64(Timestamp) :: Int64
-27 to_int64(Timestamp NULL) :: Int64 NULL
+18 to_int64 FACTORY
+19 to_int64 FACTORY
+20 to_int64(Float32) :: Int64
+21 to_int64(Float32 NULL) :: Int64 NULL
+22 to_int64(Float64) :: Int64
+23 to_int64(Float64 NULL) :: Int64 NULL
+24 to_int64(Boolean) :: Int64
+25 to_int64(Boolean NULL) :: Int64 NULL
+26 to_int64(Date) :: Int64
+27 to_int64(Date NULL) :: Int64 NULL
+28 to_int64(Timestamp) :: Int64
+29 to_int64(Timestamp NULL) :: Int64 NULL
0 to_int8(Variant) :: Int8
1 to_int8(Variant NULL) :: Int8 NULL
2 to_int8(String) :: Int8
@@ -3344,12 +3352,14 @@ Functions overloads:
15 to_int8(UInt64 NULL) :: Int8 NULL
16 to_int8(Int64) :: Int8
17 to_int8(Int64 NULL) :: Int8 NULL
-18 to_int8(Float32) :: Int8
-19 to_int8(Float32 NULL) :: Int8 NULL
-20 to_int8(Float64) :: Int8
-21 to_int8(Float64 NULL) :: Int8 NULL
-22 to_int8(Boolean) :: Int8
-23 to_int8(Boolean NULL) :: Int8 NULL
+18 to_int8 FACTORY
+19 to_int8 FACTORY
+20 to_int8(Float32) :: Int8
+21 to_int8(Float32 NULL) :: Int8 NULL
+22 to_int8(Float64) :: Int8
+23 to_int8(Float64 NULL) :: Int8 NULL
+24 to_int8(Boolean) :: Int8
+25 to_int8(Boolean NULL) :: Int8 NULL
0 to_minute(Timestamp) :: UInt8
1 to_minute(Timestamp NULL) :: UInt8 NULL
0 to_monday(Date) :: Date
@@ -3362,6 +3372,10 @@ Functions overloads:
3 to_month(Timestamp NULL) :: UInt8 NULL
0 to_nullable(NULL) :: NULL
1 to_nullable(T0 NULL) :: T0 NULL
+0 to_quarter(Date) :: UInt8
+1 to_quarter(Date NULL) :: UInt8 NULL
+2 to_quarter(Timestamp) :: UInt8
+3 to_quarter(Timestamp NULL) :: UInt8 NULL
0 to_second(Timestamp) :: UInt8
1 to_second(Timestamp NULL) :: UInt8 NULL
0 to_start_of_day(Timestamp) :: Timestamp
@@ -3463,12 +3477,14 @@ Functions overloads:
15 to_uint16(UInt64 NULL) :: UInt16 NULL
16 to_uint16(Int64) :: UInt16
17 to_uint16(Int64 NULL) :: UInt16 NULL
-18 to_uint16(Float32) :: UInt16
-19 to_uint16(Float32 NULL) :: UInt16 NULL
-20 to_uint16(Float64) :: UInt16
-21 to_uint16(Float64 NULL) :: UInt16 NULL
-22 to_uint16(Boolean) :: UInt16
-23 to_uint16(Boolean NULL) :: UInt16 NULL
+18 to_uint16 FACTORY
+19 to_uint16 FACTORY
+20 to_uint16(Float32) :: UInt16
+21 to_uint16(Float32 NULL) :: UInt16 NULL
+22 to_uint16(Float64) :: UInt16
+23 to_uint16(Float64 NULL) :: UInt16 NULL
+24 to_uint16(Boolean) :: UInt16
+25 to_uint16(Boolean NULL) :: UInt16 NULL
0 to_uint32(Variant) :: UInt32
1 to_uint32(Variant NULL) :: UInt32 NULL
2 to_uint32(String) :: UInt32
@@ -3487,12 +3503,14 @@ Functions overloads:
15 to_uint32(UInt64 NULL) :: UInt32 NULL
16 to_uint32(Int64) :: UInt32
17 to_uint32(Int64 NULL) :: UInt32 NULL
-18 to_uint32(Float32) :: UInt32
-19 to_uint32(Float32 NULL) :: UInt32 NULL
-20 to_uint32(Float64) :: UInt32
-21 to_uint32(Float64 NULL) :: UInt32 NULL
-22 to_uint32(Boolean) :: UInt32
-23 to_uint32(Boolean NULL) :: UInt32 NULL
+18 to_uint32 FACTORY
+19 to_uint32 FACTORY
+20 to_uint32(Float32) :: UInt32
+21 to_uint32(Float32 NULL) :: UInt32 NULL
+22 to_uint32(Float64) :: UInt32
+23 to_uint32(Float64 NULL) :: UInt32 NULL
+24 to_uint32(Boolean) :: UInt32
+25 to_uint32(Boolean NULL) :: UInt32 NULL
0 to_uint64(Variant) :: UInt64
1 to_uint64(Variant NULL) :: UInt64 NULL
2 to_uint64(String) :: UInt64
@@ -3511,12 +3529,14 @@ Functions overloads:
15 to_uint64(Int32 NULL) :: UInt64 NULL
16 to_uint64(Int64) :: UInt64
17 to_uint64(Int64 NULL) :: UInt64 NULL
-18 to_uint64(Float32) :: UInt64
-19 to_uint64(Float32 NULL) :: UInt64 NULL
-20 to_uint64(Float64) :: UInt64
-21 to_uint64(Float64 NULL) :: UInt64 NULL
-22 to_uint64(Boolean) :: UInt64
-23 to_uint64(Boolean NULL) :: UInt64 NULL
+18 to_uint64 FACTORY
+19 to_uint64 FACTORY
+20 to_uint64(Float32) :: UInt64
+21 to_uint64(Float32 NULL) :: UInt64 NULL
+22 to_uint64(Float64) :: UInt64
+23 to_uint64(Float64 NULL) :: UInt64 NULL
+24 to_uint64(Boolean) :: UInt64
+25 to_uint64(Boolean NULL) :: UInt64 NULL
0 to_uint8(Variant) :: UInt8
1 to_uint8(Variant NULL) :: UInt8 NULL
2 to_uint8(String) :: UInt8
@@ -3535,12 +3555,14 @@ Functions overloads:
15 to_uint8(UInt64 NULL) :: UInt8 NULL
16 to_uint8(Int64) :: UInt8
17 to_uint8(Int64 NULL) :: UInt8 NULL
-18 to_uint8(Float32) :: UInt8
-19 to_uint8(Float32 NULL) :: UInt8 NULL
-20 to_uint8(Float64) :: UInt8
-21 to_uint8(Float64 NULL) :: UInt8 NULL
-22 to_uint8(Boolean) :: UInt8
-23 to_uint8(Boolean NULL) :: UInt8 NULL
+18 to_uint8 FACTORY
+19 to_uint8 FACTORY
+20 to_uint8(Float32) :: UInt8
+21 to_uint8(Float32 NULL) :: UInt8 NULL
+22 to_uint8(Float64) :: UInt8
+23 to_uint8(Float64 NULL) :: UInt8 NULL
+24 to_uint8(Boolean) :: UInt8
+25 to_uint8(Boolean NULL) :: UInt8 NULL
0 to_unix_timestamp(Timestamp) :: Int64
1 to_unix_timestamp(Timestamp NULL) :: Int64 NULL
0 to_variant(T0) :: Variant
@@ -3677,10 +3699,12 @@ Functions overloads:
17 try_to_float32(UInt64 NULL) :: Float32 NULL
18 try_to_float32(Int64) :: Float32 NULL
19 try_to_float32(Int64 NULL) :: Float32 NULL
-20 try_to_float32(Float64) :: Float32 NULL
-21 try_to_float32(Float64 NULL) :: Float32 NULL
-22 try_to_float32(Boolean) :: Float32 NULL
-23 try_to_float32(Boolean NULL) :: Float32 NULL
+20 try_to_float32 FACTORY
+21 try_to_float32 FACTORY
+22 try_to_float32(Float64) :: Float32 NULL
+23 try_to_float32(Float64 NULL) :: Float32 NULL
+24 try_to_float32(Boolean) :: Float32 NULL
+25 try_to_float32(Boolean NULL) :: Float32 NULL
0 try_to_float64(Variant) :: Float64 NULL
1 try_to_float64(Variant NULL) :: Float64 NULL
2 try_to_float64(String) :: Float64 NULL
@@ -3701,10 +3725,12 @@ Functions overloads:
17 try_to_float64(UInt64 NULL) :: Float64 NULL
18 try_to_float64(Int64) :: Float64 NULL
19 try_to_float64(Int64 NULL) :: Float64 NULL
-20 try_to_float64(Float32) :: Float64 NULL
-21 try_to_float64(Float32 NULL) :: Float64 NULL
-22 try_to_float64(Boolean) :: Float64 NULL
-23 try_to_float64(Boolean NULL) :: Float64 NULL
+20 try_to_float64 FACTORY
+21 try_to_float64 FACTORY
+22 try_to_float64(Float32) :: Float64 NULL
+23 try_to_float64(Float32 NULL) :: Float64 NULL
+24 try_to_float64(Boolean) :: Float64 NULL
+25 try_to_float64(Boolean NULL) :: Float64 NULL
0 try_to_int16(Variant) :: Int16 NULL
1 try_to_int16(Variant NULL) :: Int16 NULL
2 try_to_int16(String) :: Int16 NULL
@@ -3723,12 +3749,14 @@ Functions overloads:
15 try_to_int16(UInt64 NULL) :: Int16 NULL
16 try_to_int16(Int64) :: Int16 NULL
17 try_to_int16(Int64 NULL) :: Int16 NULL
-18 try_to_int16(Float32) :: Int16 NULL
-19 try_to_int16(Float32 NULL) :: Int16 NULL
-20 try_to_int16(Float64) :: Int16 NULL
-21 try_to_int16(Float64 NULL) :: Int16 NULL
-22 try_to_int16(Boolean) :: Int16 NULL
-23 try_to_int16(Boolean NULL) :: Int16 NULL
+18 try_to_int16 FACTORY
+19 try_to_int16 FACTORY
+20 try_to_int16(Float32) :: Int16 NULL
+21 try_to_int16(Float32 NULL) :: Int16 NULL
+22 try_to_int16(Float64) :: Int16 NULL
+23 try_to_int16(Float64 NULL) :: Int16 NULL
+24 try_to_int16(Boolean) :: Int16 NULL
+25 try_to_int16(Boolean NULL) :: Int16 NULL
0 try_to_int32(Variant) :: Int32 NULL
1 try_to_int32(Variant NULL) :: Int32 NULL
2 try_to_int32(String) :: Int32 NULL
@@ -3747,12 +3775,14 @@ Functions overloads:
15 try_to_int32(UInt64 NULL) :: Int32 NULL
16 try_to_int32(Int64) :: Int32 NULL
17 try_to_int32(Int64 NULL) :: Int32 NULL
-18 try_to_int32(Float32) :: Int32 NULL
-19 try_to_int32(Float32 NULL) :: Int32 NULL
-20 try_to_int32(Float64) :: Int32 NULL
-21 try_to_int32(Float64 NULL) :: Int32 NULL
-22 try_to_int32(Boolean) :: Int32 NULL
-23 try_to_int32(Boolean NULL) :: Int32 NULL
+18 try_to_int32 FACTORY
+19 try_to_int32 FACTORY
+20 try_to_int32(Float32) :: Int32 NULL
+21 try_to_int32(Float32 NULL) :: Int32 NULL
+22 try_to_int32(Float64) :: Int32 NULL
+23 try_to_int32(Float64 NULL) :: Int32 NULL
+24 try_to_int32(Boolean) :: Int32 NULL
+25 try_to_int32(Boolean NULL) :: Int32 NULL
0 try_to_int64(Variant) :: Int64 NULL
1 try_to_int64(Variant NULL) :: Int64 NULL
2 try_to_int64(String) :: Int64 NULL
@@ -3771,16 +3801,18 @@ Functions overloads:
15 try_to_int64(Int32 NULL) :: Int64 NULL
16 try_to_int64(UInt64) :: Int64 NULL
17 try_to_int64(UInt64 NULL) :: Int64 NULL
-18 try_to_int64(Float32) :: Int64 NULL
-19 try_to_int64(Float32 NULL) :: Int64 NULL
-20 try_to_int64(Float64) :: Int64 NULL
-21 try_to_int64(Float64 NULL) :: Int64 NULL
-22 try_to_int64(Boolean) :: Int64 NULL
-23 try_to_int64(Boolean NULL) :: Int64 NULL
-24 try_to_int64(Date) :: Int64 NULL
-25 try_to_int64(Date NULL) :: Int64 NULL
-26 try_to_int64(Timestamp) :: Int64 NULL
-27 try_to_int64(Timestamp NULL) :: Int64 NULL
+18 try_to_int64 FACTORY
+19 try_to_int64 FACTORY
+20 try_to_int64(Float32) :: Int64 NULL
+21 try_to_int64(Float32 NULL) :: Int64 NULL
+22 try_to_int64(Float64) :: Int64 NULL
+23 try_to_int64(Float64 NULL) :: Int64 NULL
+24 try_to_int64(Boolean) :: Int64 NULL
+25 try_to_int64(Boolean NULL) :: Int64 NULL
+26 try_to_int64(Date) :: Int64 NULL
+27 try_to_int64(Date NULL) :: Int64 NULL
+28 try_to_int64(Timestamp) :: Int64 NULL
+29 try_to_int64(Timestamp NULL) :: Int64 NULL
0 try_to_int8(Variant) :: Int8 NULL
1 try_to_int8(Variant NULL) :: Int8 NULL
2 try_to_int8(String) :: Int8 NULL
@@ -3799,12 +3831,14 @@ Functions overloads:
15 try_to_int8(UInt64 NULL) :: Int8 NULL
16 try_to_int8(Int64) :: Int8 NULL
17 try_to_int8(Int64 NULL) :: Int8 NULL
-18 try_to_int8(Float32) :: Int8 NULL
-19 try_to_int8(Float32 NULL) :: Int8 NULL
-20 try_to_int8(Float64) :: Int8 NULL
-21 try_to_int8(Float64 NULL) :: Int8 NULL
-22 try_to_int8(Boolean) :: Int8 NULL
-23 try_to_int8(Boolean NULL) :: Int8 NULL
+18 try_to_int8 FACTORY
+19 try_to_int8 FACTORY
+20 try_to_int8(Float32) :: Int8 NULL
+21 try_to_int8(Float32 NULL) :: Int8 NULL
+22 try_to_int8(Float64) :: Int8 NULL
+23 try_to_int8(Float64 NULL) :: Int8 NULL
+24 try_to_int8(Boolean) :: Int8 NULL
+25 try_to_int8(Boolean NULL) :: Int8 NULL
0 try_to_string(Variant) :: String NULL
1 try_to_string(Variant NULL) :: String NULL
2 try_to_string(UInt8) :: String NULL
@@ -3859,12 +3893,14 @@ Functions overloads:
15 try_to_uint16(UInt64 NULL) :: UInt16 NULL
16 try_to_uint16(Int64) :: UInt16 NULL
17 try_to_uint16(Int64 NULL) :: UInt16 NULL
-18 try_to_uint16(Float32) :: UInt16 NULL
-19 try_to_uint16(Float32 NULL) :: UInt16 NULL
-20 try_to_uint16(Float64) :: UInt16 NULL
-21 try_to_uint16(Float64 NULL) :: UInt16 NULL
-22 try_to_uint16(Boolean) :: UInt16 NULL
-23 try_to_uint16(Boolean NULL) :: UInt16 NULL
+18 try_to_uint16 FACTORY
+19 try_to_uint16 FACTORY
+20 try_to_uint16(Float32) :: UInt16 NULL
+21 try_to_uint16(Float32 NULL) :: UInt16 NULL
+22 try_to_uint16(Float64) :: UInt16 NULL
+23 try_to_uint16(Float64 NULL) :: UInt16 NULL
+24 try_to_uint16(Boolean) :: UInt16 NULL
+25 try_to_uint16(Boolean NULL) :: UInt16 NULL
0 try_to_uint32(Variant) :: UInt32 NULL
1 try_to_uint32(Variant NULL) :: UInt32 NULL
2 try_to_uint32(String) :: UInt32 NULL
@@ -3883,12 +3919,14 @@ Functions overloads:
15 try_to_uint32(UInt64 NULL) :: UInt32 NULL
16 try_to_uint32(Int64) :: UInt32 NULL
17 try_to_uint32(Int64 NULL) :: UInt32 NULL
-18 try_to_uint32(Float32) :: UInt32 NULL
-19 try_to_uint32(Float32 NULL) :: UInt32 NULL
-20 try_to_uint32(Float64) :: UInt32 NULL
-21 try_to_uint32(Float64 NULL) :: UInt32 NULL
-22 try_to_uint32(Boolean) :: UInt32 NULL
-23 try_to_uint32(Boolean NULL) :: UInt32 NULL
+18 try_to_uint32 FACTORY
+19 try_to_uint32 FACTORY
+20 try_to_uint32(Float32) :: UInt32 NULL
+21 try_to_uint32(Float32 NULL) :: UInt32 NULL
+22 try_to_uint32(Float64) :: UInt32 NULL
+23 try_to_uint32(Float64 NULL) :: UInt32 NULL
+24 try_to_uint32(Boolean) :: UInt32 NULL
+25 try_to_uint32(Boolean NULL) :: UInt32 NULL
0 try_to_uint64(Variant) :: UInt64 NULL
1 try_to_uint64(Variant NULL) :: UInt64 NULL
2 try_to_uint64(String) :: UInt64 NULL
@@ -3907,12 +3945,14 @@ Functions overloads:
15 try_to_uint64(Int32 NULL) :: UInt64 NULL
16 try_to_uint64(Int64) :: UInt64 NULL
17 try_to_uint64(Int64 NULL) :: UInt64 NULL
-18 try_to_uint64(Float32) :: UInt64 NULL
-19 try_to_uint64(Float32 NULL) :: UInt64 NULL
-20 try_to_uint64(Float64) :: UInt64 NULL
-21 try_to_uint64(Float64 NULL) :: UInt64 NULL
-22 try_to_uint64(Boolean) :: UInt64 NULL
-23 try_to_uint64(Boolean NULL) :: UInt64 NULL
+18 try_to_uint64 FACTORY
+19 try_to_uint64 FACTORY
+20 try_to_uint64(Float32) :: UInt64 NULL
+21 try_to_uint64(Float32 NULL) :: UInt64 NULL
+22 try_to_uint64(Float64) :: UInt64 NULL
+23 try_to_uint64(Float64 NULL) :: UInt64 NULL
+24 try_to_uint64(Boolean) :: UInt64 NULL
+25 try_to_uint64(Boolean NULL) :: UInt64 NULL
0 try_to_uint8(Variant) :: UInt8 NULL
1 try_to_uint8(Variant NULL) :: UInt8 NULL
2 try_to_uint8(String) :: UInt8 NULL
@@ -3931,12 +3971,14 @@ Functions overloads:
15 try_to_uint8(UInt64 NULL) :: UInt8 NULL
16 try_to_uint8(Int64) :: UInt8 NULL
17 try_to_uint8(Int64 NULL) :: UInt8 NULL
-18 try_to_uint8(Float32) :: UInt8 NULL
-19 try_to_uint8(Float32 NULL) :: UInt8 NULL
-20 try_to_uint8(Float64) :: UInt8 NULL
-21 try_to_uint8(Float64 NULL) :: UInt8 NULL
-22 try_to_uint8(Boolean) :: UInt8 NULL
-23 try_to_uint8(Boolean NULL) :: UInt8 NULL
+18 try_to_uint8 FACTORY
+19 try_to_uint8 FACTORY
+20 try_to_uint8(Float32) :: UInt8 NULL
+21 try_to_uint8(Float32 NULL) :: UInt8 NULL
+22 try_to_uint8(Float64) :: UInt8 NULL
+23 try_to_uint8(Float64 NULL) :: UInt8 NULL
+24 try_to_uint8(Boolean) :: UInt8 NULL
+25 try_to_uint8(Boolean NULL) :: UInt8 NULL
0 try_to_variant(T0) :: Variant NULL
1 try_to_variant(T0 NULL) :: Variant NULL
0 tuple FACTORY
diff --git a/src/query/service/Cargo.toml b/src/query/service/Cargo.toml
index 7d69c3cb242d..a60e0c79653c 100644
--- a/src/query/service/Cargo.toml
+++ b/src/query/service/Cargo.toml
@@ -141,6 +141,7 @@ once_cell = "1.15.0"
opendal = { workspace = true }
opensrv-mysql = { version = "0.4.1", features = ["tls"] }
parking_lot = "0.12.1"
+parquet = { version = "46.0.0", features = ["async"] }
paste = "1.0.9"
petgraph = "0.6.2"
pin-project-lite = "0.2.9"
diff --git a/src/query/service/src/interpreters/common/mod.rs b/src/query/service/src/interpreters/common/mod.rs
index 75c0539a6c74..96a74433eea2 100644
--- a/src/query/service/src/interpreters/common/mod.rs
+++ b/src/query/service/src/interpreters/common/mod.rs
@@ -25,5 +25,6 @@ pub use refresh_aggregating_index::hook_refresh_agg_index;
pub use refresh_aggregating_index::RefreshAggIndexDesc;
pub use table::check_referenced_computed_columns;
pub use util::check_deduplicate_label;
+pub use util::create_push_down_filters;
pub use self::metrics::*;
diff --git a/src/query/service/src/interpreters/common/util.rs b/src/query/service/src/interpreters/common/util.rs
index 886fe1a58d1d..b57fcf183a92 100644
--- a/src/query/service/src/interpreters/common/util.rs
+++ b/src/query/service/src/interpreters/common/util.rs
@@ -14,11 +14,17 @@
use std::sync::Arc;
+use common_catalog::plan::Filters;
use common_catalog::table_context::TableContext;
use common_exception::Result;
+use common_expression::type_check::check_function;
+use common_functions::BUILTIN_FUNCTIONS;
use common_meta_kvapi::kvapi::KVApi;
use common_users::UserApiProvider;
+use crate::sql::executor::cast_expr_to_non_null_boolean;
+use crate::sql::ScalarExpr;
+
/// Checks if a duplicate label exists in the meta store.
///
/// # Arguments
@@ -41,3 +47,22 @@ pub async fn check_deduplicate_label(ctx: Arc) -> Result
}
}
}
+
+pub fn create_push_down_filters(scalar: &ScalarExpr) -> Result {
+ let filter = cast_expr_to_non_null_boolean(
+ scalar
+ .as_expr()?
+ .project_column_ref(|col| col.column_name.clone()),
+ )?;
+
+ let remote_filter = filter.as_remote_expr();
+
+ // prepare the inverse filter expression
+ let remote_inverted_filter =
+ check_function(None, "not", &[], &[filter], &BUILTIN_FUNCTIONS)?.as_remote_expr();
+
+ Ok(Filters {
+ filter: remote_filter,
+ inverted_filter: remote_inverted_filter,
+ })
+}
diff --git a/src/query/service/src/interpreters/interpreter_delete.rs b/src/query/service/src/interpreters/interpreter_delete.rs
index ff7013d260c7..716ad266faa6 100644
--- a/src/query/service/src/interpreters/interpreter_delete.rs
+++ b/src/query/service/src/interpreters/interpreter_delete.rs
@@ -18,7 +18,6 @@ use std::sync::Arc;
use common_base::runtime::GlobalIORuntime;
use common_catalog::plan::Partitions;
-use common_catalog::table::DeletionFilters;
use common_exception::ErrorCode;
use common_exception::Result;
use common_expression::types::DataType;
@@ -30,7 +29,6 @@ use common_functions::BUILTIN_FUNCTIONS;
use common_meta_app::schema::CatalogInfo;
use common_meta_app::schema::TableInfo;
use common_sql::binder::ColumnBindingBuilder;
-use common_sql::executor::cast_expr_to_non_null_boolean;
use common_sql::executor::DeletePartial;
use common_sql::executor::Exchange;
use common_sql::executor::FragmentKind;
@@ -60,6 +58,7 @@ use log::debug;
use storages_common_table_meta::meta::TableSnapshot;
use table_lock::TableLockHandlerWrapper;
+use crate::interpreters::common::create_push_down_filters;
use crate::interpreters::Interpreter;
use crate::interpreters::SelectInterpreter;
use crate::pipelines::executor::ExecutorSettings;
@@ -164,36 +163,15 @@ impl Interpreter for DeleteInterpreter {
let (filters, col_indices) = if let Some(scalar) = selection {
// prepare the filter expression
- let filter = cast_expr_to_non_null_boolean(
- scalar
- .as_expr()?
- .project_column_ref(|col| col.column_name.clone()),
- )?
- .as_remote_expr();
-
- let expr = filter.as_expr(&BUILTIN_FUNCTIONS);
+ let filters = create_push_down_filters(&scalar)?;
+
+ let expr = filters.filter.as_expr(&BUILTIN_FUNCTIONS);
if !expr.is_deterministic(&BUILTIN_FUNCTIONS) {
return Err(ErrorCode::Unimplemented(
"Delete must have deterministic predicate",
));
}
- // prepare the inverse filter expression
- let inverted_filter = {
- let inverse = ScalarExpr::FunctionCall(common_sql::planner::plans::FunctionCall {
- span: None,
- func_name: "not".to_string(),
- params: vec![],
- arguments: vec![scalar.clone()],
- });
- cast_expr_to_non_null_boolean(
- inverse
- .as_expr()?
- .project_column_ref(|col| col.column_name.clone()),
- )?
- .as_remote_expr()
- };
-
let col_indices: Vec = if !self.plan.subquery_desc.is_empty() {
let mut col_indices = HashSet::new();
for subquery_desc in &self.plan.subquery_desc {
@@ -203,13 +181,7 @@ impl Interpreter for DeleteInterpreter {
} else {
scalar.used_columns().into_iter().collect()
};
- (
- Some(DeletionFilters {
- filter,
- inverted_filter,
- }),
- col_indices,
- )
+ (Some(filters), col_indices)
} else {
(None, vec![])
};
diff --git a/src/query/service/src/interpreters/interpreter_merge_into.rs b/src/query/service/src/interpreters/interpreter_merge_into.rs
index b3590091cd62..d9f2142f5fc2 100644
--- a/src/query/service/src/interpreters/interpreter_merge_into.rs
+++ b/src/query/service/src/interpreters/interpreter_merge_into.rs
@@ -179,6 +179,7 @@ impl MergeIntoInterpreter {
} else {
None
};
+
let mut values_exprs = Vec::::with_capacity(item.values.len());
for scalar_expr in &item.values {
@@ -208,6 +209,7 @@ impl MergeIntoInterpreter {
} else {
None
};
+
// update
let update_list = if let Some(update_list) = &item.update {
// use update_plan to get exprs
@@ -224,7 +226,7 @@ impl MergeIntoInterpreter {
let col_indices = if item.condition.is_none() {
vec![]
} else {
- // we don't need to real col_indices here, just give a
+ // we don't need real col_indices here, just give a
// dummy index, that's ok.
vec![DUMMY_COL_INDEX]
};
@@ -235,7 +237,6 @@ impl MergeIntoInterpreter {
col_indices,
Some(join_output_schema.num_fields()),
)?;
-
let update_list = update_list
.iter()
.map(|(idx, remote_expr)| {
diff --git a/src/query/service/src/interpreters/interpreter_table_create.rs b/src/query/service/src/interpreters/interpreter_table_create.rs
index 3317efc99459..f574ef453e3f 100644
--- a/src/query/service/src/interpreters/interpreter_table_create.rs
+++ b/src/query/service/src/interpreters/interpreter_table_create.rs
@@ -243,6 +243,11 @@ impl CreateTableInterpreter {
}
is_valid_column(field.name())?;
}
+ let field_comments = if self.plan.field_comments.is_empty() {
+ vec!["".to_string(); fields.len()]
+ } else {
+ self.plan.field_comments.clone()
+ };
let schema = TableSchemaRefExt::create(fields);
let mut table_meta = TableMeta {
@@ -252,7 +257,7 @@ impl CreateTableInterpreter {
part_prefix: self.plan.part_prefix.clone(),
options: self.plan.options.clone(),
default_cluster_key: None,
- field_comments: self.plan.field_comments.clone(),
+ field_comments,
drop_on: None,
statistics: if let Some(stat) = statistics {
stat
@@ -325,6 +330,8 @@ impl CreateTableInterpreter {
number_of_segments: Some(snapshot.segments.len() as u64),
number_of_blocks: Some(snapshot.summary.block_count),
};
+
+ let field_comments = vec!["".to_string(); snapshot.schema.num_fields()];
let table_meta = TableMeta {
schema: Arc::new(snapshot.schema.clone()),
engine: self.plan.engine.to_string(),
@@ -332,7 +339,7 @@ impl CreateTableInterpreter {
part_prefix: self.plan.part_prefix.clone(),
options,
default_cluster_key: None,
- field_comments: self.plan.field_comments.clone(),
+ field_comments,
drop_on: None,
statistics: stat,
..Default::default()
diff --git a/src/query/service/src/interpreters/interpreter_table_modify_column.rs b/src/query/service/src/interpreters/interpreter_table_modify_column.rs
index a4d13a5c2a82..322fdd41a5e6 100644
--- a/src/query/service/src/interpreters/interpreter_table_modify_column.rs
+++ b/src/query/service/src/interpreters/interpreter_table_modify_column.rs
@@ -238,6 +238,7 @@ impl ModifyTableColumnInterpreter {
}
let mut table_info = table.get_table_info().clone();
+ table_info.meta.fill_field_comments();
for (field, comment) in field_and_comments {
let column = &field.name.to_string();
let data_type = &field.data_type;
diff --git a/src/query/service/src/interpreters/interpreter_table_recluster.rs b/src/query/service/src/interpreters/interpreter_table_recluster.rs
index 620d3efddbd7..98ff1891c245 100644
--- a/src/query/service/src/interpreters/interpreter_table_recluster.rs
+++ b/src/query/service/src/interpreters/interpreter_table_recluster.rs
@@ -16,9 +16,12 @@ use std::sync::Arc;
use std::time::Duration;
use std::time::SystemTime;
+use common_catalog::plan::Filters;
use common_catalog::plan::PushDownInfo;
use common_exception::ErrorCode;
use common_exception::Result;
+use common_expression::type_check::check_function;
+use common_functions::BUILTIN_FUNCTIONS;
use log::error;
use log::info;
use log::warn;
@@ -31,6 +34,7 @@ use crate::pipelines::Pipeline;
use crate::pipelines::PipelineBuildResult;
use crate::sessions::QueryContext;
use crate::sessions::TableContext;
+use crate::sql::executor::cast_expr_to_non_null_boolean;
use crate::sql::plans::ReclusterTablePlan;
pub struct ReclusterTableInterpreter {
@@ -68,13 +72,23 @@ impl Interpreter for ReclusterTableInterpreter {
// Build extras via push down scalar
let extras = if let Some(scalar) = &plan.push_downs {
- let filter = scalar
- .as_expr()?
- .project_column_ref(|col| col.column_name.clone())
- .as_remote_expr();
+ // prepare the filter expression
+ let filter = cast_expr_to_non_null_boolean(
+ scalar
+ .as_expr()?
+ .project_column_ref(|col| col.column_name.clone()),
+ )?;
+ // prepare the inverse filter expression
+ let inverted_filter =
+ check_function(None, "not", &[], &[filter.clone()], &BUILTIN_FUNCTIONS)?;
+
+ let filters = Filters {
+ filter: filter.as_remote_expr(),
+ inverted_filter: inverted_filter.as_remote_expr(),
+ };
Some(PushDownInfo {
- filter: Some(filter),
+ filters: Some(filters),
..PushDownInfo::default()
})
} else {
diff --git a/src/query/service/src/servers/http/v1/query/http_query.rs b/src/query/service/src/servers/http/v1/query/http_query.rs
index 5ec81be59e31..e97489cfd41b 100644
--- a/src/query/service/src/servers/http/v1/query/http_query.rs
+++ b/src/query/service/src/servers/http/v1/query/http_query.rs
@@ -387,28 +387,35 @@ impl HttpQuery {
pub async fn get_response_page(&self, page_no: usize) -> Result {
let data = Some(self.get_page(page_no).await?);
let state = self.get_state().await;
- let session_conf = self.request.session.clone().unwrap_or_default();
- let session_conf = if let Some(affect) = &state.affect {
- Some(session_conf.apply_affect(affect))
+ let session = self.request.session.clone().unwrap_or_default();
+ let session = if let Some(affect) = &state.affect {
+ Some(session.apply_affect(affect))
} else {
- Some(session_conf)
+ Some(session)
};
Ok(HttpQueryResponseInternal {
data,
state,
- session: session_conf,
+ session,
session_id: self.session_id.clone(),
})
}
#[async_backtrace::framed]
pub async fn get_response_state_only(&self) -> HttpQueryResponseInternal {
+ let state = self.get_state().await;
+ let session = self.request.session.clone().unwrap_or_default();
+ let session = if let Some(affect) = &state.affect {
+ Some(session.apply_affect(affect))
+ } else {
+ Some(session)
+ };
HttpQueryResponseInternal {
data: None,
session_id: self.session_id.clone(),
- state: self.get_state().await,
- session: None,
+ state,
+ session,
}
}
diff --git a/src/query/service/src/table_functions/numbers/numbers_table.rs b/src/query/service/src/table_functions/numbers/numbers_table.rs
index 02d76448e692..c188bc7a2332 100644
--- a/src/query/service/src/table_functions/numbers/numbers_table.rs
+++ b/src/query/service/src/table_functions/numbers/numbers_table.rs
@@ -137,7 +137,7 @@ impl Table for NumbersTable {
let mut limit = None;
if let Some(extras) = &push_downs {
- if extras.limit.is_some() && extras.filter.is_none() && extras.order_by.is_empty() {
+ if extras.limit.is_some() && extras.filters.is_none() && extras.order_by.is_empty() {
// It is allowed to have an error when we can't get sort columns from the expression. For
// example 'select number from numbers(10) order by number+4 limit 10', the column 'number+4'
// doesn't exist in the numbers table.
diff --git a/src/query/service/src/table_functions/srf/range.rs b/src/query/service/src/table_functions/srf/range.rs
index 54e5620fd6f4..58b376585356 100644
--- a/src/query/service/src/table_functions/srf/range.rs
+++ b/src/query/service/src/table_functions/srf/range.rs
@@ -68,9 +68,9 @@ impl RangeTable {
validate_args(&table_args.positioned, table_func_name)?;
let data_type = match &table_args.positioned[0] {
- Scalar::Number(_) => Int64Type::data_type(),
- Scalar::Timestamp(_) => TimestampType::data_type(),
- Scalar::Date(_) => DateType::data_type(),
+ Scalar::Number(_) => DataType::Number(NumberDataType::Int64),
+ Scalar::Timestamp(_) => DataType::Timestamp,
+ Scalar::Date(_) => DataType::Date,
other => {
return Err(ErrorCode::BadArguments(format!(
"Unsupported data type for generate_series: {:?}",
diff --git a/src/query/service/tests/it/main.rs b/src/query/service/tests/it/main.rs
index 1bf22b18d2ac..30f478e714a4 100644
--- a/src/query/service/tests/it/main.rs
+++ b/src/query/service/tests/it/main.rs
@@ -28,6 +28,7 @@ mod databases;
mod frame;
mod interpreters;
mod metrics;
+mod parquet_rs;
mod pipelines;
mod servers;
mod sessions;
diff --git a/src/query/storages/parquet/tests/it/parquet_rs/data.rs b/src/query/service/tests/it/parquet_rs/data.rs
similarity index 100%
rename from src/query/storages/parquet/tests/it/parquet_rs/data.rs
rename to src/query/service/tests/it/parquet_rs/data.rs
diff --git a/src/query/storages/parquet/tests/it/parquet_rs/mod.rs b/src/query/service/tests/it/parquet_rs/mod.rs
similarity index 100%
rename from src/query/storages/parquet/tests/it/parquet_rs/mod.rs
rename to src/query/service/tests/it/parquet_rs/mod.rs
diff --git a/src/query/storages/parquet/tests/it/parquet_rs/prune_pages.rs b/src/query/service/tests/it/parquet_rs/prune_pages.rs
similarity index 100%
rename from src/query/storages/parquet/tests/it/parquet_rs/prune_pages.rs
rename to src/query/service/tests/it/parquet_rs/prune_pages.rs
diff --git a/src/query/storages/parquet/tests/it/parquet_rs/prune_row_groups.rs b/src/query/service/tests/it/parquet_rs/prune_row_groups.rs
similarity index 99%
rename from src/query/storages/parquet/tests/it/parquet_rs/prune_row_groups.rs
rename to src/query/service/tests/it/parquet_rs/prune_row_groups.rs
index 4103d3a01b60..9fbd651cd2f5 100644
--- a/src/query/storages/parquet/tests/it/parquet_rs/prune_row_groups.rs
+++ b/src/query/service/tests/it/parquet_rs/prune_row_groups.rs
@@ -57,7 +57,7 @@ async fn test_impl(scenario: Scenario, predicate: &str, expected_rgs: Vec
)
.unwrap();
- let rgs = pruner.prune_row_groups(&parquet_meta, None).unwrap();
+ let (rgs, _) = pruner.prune_row_groups(&parquet_meta, None).unwrap();
assert_eq!(
expected_rgs, rgs,
diff --git a/src/query/storages/parquet/tests/it/parquet_rs/utils.rs b/src/query/service/tests/it/parquet_rs/utils.rs
similarity index 100%
rename from src/query/storages/parquet/tests/it/parquet_rs/utils.rs
rename to src/query/service/tests/it/parquet_rs/utils.rs
diff --git a/src/query/service/tests/it/storages/fuse/meta/snapshot.rs b/src/query/service/tests/it/storages/fuse/meta/snapshot.rs
index 43e14d08abd9..46f94418b49e 100644
--- a/src/query/service/tests/it/storages/fuse/meta/snapshot.rs
+++ b/src/query/service/tests/it/storages/fuse/meta/snapshot.rs
@@ -12,9 +12,13 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+use std::collections::HashMap;
use std::ops::Add;
use common_expression::TableSchema;
+use storages_common_table_meta::meta::testing::StatisticsV0;
+use storages_common_table_meta::meta::testing::TableSnapshotV1;
+use storages_common_table_meta::meta::testing::TableSnapshotV2;
use storages_common_table_meta::meta::TableSnapshot;
use uuid::Uuid;
@@ -74,3 +78,32 @@ fn snapshot_timestamp_time_skew_tolerance() {
let prev_ts = prev.timestamp.unwrap();
assert!(current_ts > prev_ts)
}
+
+#[test]
+fn test_snapshot_v1_to_v4() {
+ let summary = StatisticsV0 {
+ row_count: 0,
+ block_count: 0,
+ perfect_block_count: 0,
+ uncompressed_byte_size: 0,
+ compressed_byte_size: 0,
+ index_size: 0,
+ col_stats: HashMap::new(),
+ };
+ let v1 = TableSnapshotV1::new(
+ Uuid::new_v4(),
+ &None,
+ None,
+ Default::default(),
+ summary,
+ vec![],
+ None,
+ None,
+ );
+ assert!(v1.timestamp.is_some());
+
+ let v4: TableSnapshot = TableSnapshotV2::from(v1.clone()).into();
+ assert_eq!(v4.format_version, v1.format_version());
+ assert_eq!(v4.snapshot_id, v1.snapshot_id);
+ assert_eq!(v4.timestamp, v1.timestamp);
+}
diff --git a/src/query/service/tests/it/storages/fuse/pruning.rs b/src/query/service/tests/it/storages/fuse/pruning.rs
index 2b27124a8f0e..dfd0b5498ca4 100644
--- a/src/query/service/tests/it/storages/fuse/pruning.rs
+++ b/src/query/service/tests/it/storages/fuse/pruning.rs
@@ -29,7 +29,7 @@ use common_expression::TableDataType;
use common_expression::TableField;
use common_expression::TableSchemaRef;
use common_expression::TableSchemaRefExt;
-use common_sql::parse_to_remote_string_expr;
+use common_sql::parse_to_filters;
use common_sql::plans::CreateTablePlan;
use common_sql::BloomIndexColumns;
use common_storages_fuse::pruning::create_segment_location_vector;
@@ -172,11 +172,7 @@ async fn test_block_pruner() -> Result<()> {
// nothing is pruned
let e1 = PushDownInfo {
- filter: Some(parse_to_remote_string_expr(
- ctx.clone(),
- table.clone(),
- "a > 3",
- )?),
+ filters: Some(parse_to_filters(ctx.clone(), table.clone(), "a > 3")?),
..Default::default()
};
@@ -184,7 +180,7 @@ async fn test_block_pruner() -> Result<()> {
let mut e2 = PushDownInfo::default();
let max_val_of_b = 6u64;
- e2.filter = Some(parse_to_remote_string_expr(
+ e2.filters = Some(parse_to_filters(
ctx.clone(),
table.clone(),
"a > 0 and b > 6",
diff --git a/src/query/sql/src/executor/format.rs b/src/query/sql/src/executor/format.rs
index 37447a2028ff..f218b8e03e20 100644
--- a/src/query/sql/src/executor/format.rs
+++ b/src/query/sql/src/executor/format.rs
@@ -269,9 +269,9 @@ fn table_scan_to_format_tree(
.as_ref()
.and_then(|extras| {
extras
- .filter
+ .filters
.as_ref()
- .map(|expr| expr.as_expr(&BUILTIN_FUNCTIONS).sql_display())
+ .map(|filters| filters.filter.as_expr(&BUILTIN_FUNCTIONS).sql_display())
})
.unwrap_or_default();
diff --git a/src/query/sql/src/executor/physical_plan_builder.rs b/src/query/sql/src/executor/physical_plan_builder.rs
index 4f171e2514d7..e342abf515bb 100644
--- a/src/query/sql/src/executor/physical_plan_builder.rs
+++ b/src/query/sql/src/executor/physical_plan_builder.rs
@@ -21,6 +21,7 @@ use std::sync::Arc;
use common_catalog::catalog::CatalogManager;
use common_catalog::catalog_kind::CATALOG_DEFAULT;
use common_catalog::plan::AggIndexInfo;
+use common_catalog::plan::Filters;
use common_catalog::plan::PrewhereInfo;
use common_catalog::plan::Projection;
use common_catalog::plan::PushDownInfo;
@@ -1886,37 +1887,35 @@ impl PhysicalPlanBuilder {
.push_down_predicates
.as_ref()
.filter(|p| !p.is_empty())
- .map(
- |predicates: &Vec| -> Result> {
- let predicates = predicates
- .iter()
- .map(|p| {
- Ok(p.as_expr()?
- .project_column_ref(|col| col.column_name.clone()))
- })
- .collect::>>()?;
-
- let expr = predicates
- .into_iter()
- .try_reduce(|lhs, rhs| {
- check_function(
- None,
- "and_filters",
- &[],
- &[lhs, rhs],
- &BUILTIN_FUNCTIONS,
- )
- })?
- .unwrap();
+ .map(|predicates: &Vec| -> Result {
+ let predicates = predicates
+ .iter()
+ .map(|p| {
+ Ok(p.as_expr()?
+ .project_column_ref(|col| col.column_name.clone()))
+ })
+ .collect::>>()?;
+
+ let expr = predicates
+ .into_iter()
+ .try_reduce(|lhs, rhs| {
+ check_function(None, "and_filters", &[], &[lhs, rhs], &BUILTIN_FUNCTIONS)
+ })?
+ .unwrap();
- let expr = cast_expr_to_non_null_boolean(expr)?;
- let (expr, _) = ConstantFolder::fold(&expr, &self.func_ctx, &BUILTIN_FUNCTIONS);
+ let expr = cast_expr_to_non_null_boolean(expr)?;
+ let (expr, _) = ConstantFolder::fold(&expr, &self.func_ctx, &BUILTIN_FUNCTIONS);
- is_deterministic = expr.is_deterministic(&BUILTIN_FUNCTIONS);
+ is_deterministic = expr.is_deterministic(&BUILTIN_FUNCTIONS);
- Ok(expr.as_remote_expr())
- },
- )
+ let inverted_filter =
+ check_function(None, "not", &[], &[expr.clone()], &BUILTIN_FUNCTIONS)?;
+
+ Ok(Filters {
+ filter: expr.as_remote_expr(),
+ inverted_filter: inverted_filter.as_remote_expr(),
+ })
+ })
.transpose()?;
let prewhere_info = scan
@@ -1970,12 +1969,13 @@ impl PhysicalPlanBuilder {
})
})
.expect("there should be at least one predicate in prewhere");
+
let filter = cast_expr_to_non_null_boolean(
predicate
.as_expr()?
.project_column_ref(|col| col.column_name.clone()),
- )?
- .as_remote_expr();
+ )?;
+ let filter = filter.as_remote_expr();
let virtual_columns = self.build_virtual_columns(&prewhere.prewhere_columns);
Ok::(PrewhereInfo {
@@ -2039,7 +2039,7 @@ impl PhysicalPlanBuilder {
Ok(PushDownInfo {
projection: Some(projection),
output_columns,
- filter: push_down_filter,
+ filters: push_down_filter,
is_deterministic,
prewhere: prewhere_info,
limit: scan.limit,
diff --git a/src/query/sql/src/executor/table_read_plan.rs b/src/query/sql/src/executor/table_read_plan.rs
index d35ceb36acdf..176225bdd062 100644
--- a/src/query/sql/src/executor/table_read_plan.rs
+++ b/src/query/sql/src/executor/table_read_plan.rs
@@ -21,6 +21,7 @@ use common_ast::Dialect;
use common_base::base::ProgressValues;
use common_catalog::plan::DataSourceInfo;
use common_catalog::plan::DataSourcePlan;
+use common_catalog::plan::Filters;
use common_catalog::plan::InternalColumn;
use common_catalog::plan::PartStatistics;
use common_catalog::plan::Partitions;
@@ -88,9 +89,13 @@ impl ToReadDataSourcePlan for dyn Table {
let catalog_info = ctx.get_catalog(&catalog).await?.info();
let (statistics, parts) = if let Some(PushDownInfo {
- filter:
- Some(RemoteExpr::Constant {
- scalar: Scalar::Boolean(false),
+ filters:
+ Some(Filters {
+ filter:
+ RemoteExpr::Constant {
+ scalar: Scalar::Boolean(false),
+ ..
+ },
..
}),
..
diff --git a/src/query/sql/src/planner/binder/binder.rs b/src/query/sql/src/planner/binder/binder.rs
index 34f62c7d4f94..2a8dc4f466f4 100644
--- a/src/query/sql/src/planner/binder/binder.rs
+++ b/src/query/sql/src/planner/binder/binder.rs
@@ -346,7 +346,14 @@ impl<'a> Binder {
// Stages
Statement::ShowStages => self.bind_rewrite_to_query(bind_context, "SELECT name, stage_type, number_of_files, creator, comment FROM system.stages ORDER BY name", RewriteKind::ShowStages).await?,
- Statement::ListStage { location, pattern } => self.bind_rewrite_to_query(bind_context, format!("SELECT * FROM LIST_STAGE(location => '@{location}', pattern => '{pattern}')").as_str(), RewriteKind::ListStage).await?,
+ Statement::ListStage { location, pattern } => {
+ let pattern = if let Some(pattern) = pattern {
+ format!(", pattern => '{pattern}'")
+ } else {
+ "".to_string()
+ };
+ self.bind_rewrite_to_query(bind_context, format!("SELECT * FROM LIST_STAGE(location => '@{location}'{pattern})").as_str(), RewriteKind::ListStage).await?
+ },
Statement::DescribeStage { stage_name } => self.bind_rewrite_to_query(bind_context, format!("SELECT * FROM system.stages WHERE name = '{stage_name}'").as_str(), RewriteKind::DescribeStage).await?,
Statement::CreateStage(stmt) => self.bind_create_stage(stmt).await?,
Statement::DropStage {
diff --git a/src/query/sql/src/planner/binder/merge_into.rs b/src/query/sql/src/planner/binder/merge_into.rs
index b5f967d65c83..987192f7c266 100644
--- a/src/query/sql/src/planner/binder/merge_into.rs
+++ b/src/query/sql/src/planner/binder/merge_into.rs
@@ -29,6 +29,7 @@ use common_catalog::plan::InternalColumnType;
use common_exception::ErrorCode;
use common_exception::Result;
use common_expression::types::DataType;
+use common_expression::FieldIndex;
use common_expression::TableSchemaRef;
use common_expression::ROW_ID_COL_NAME;
use indexmap::IndexMap;
@@ -38,15 +39,18 @@ use crate::binder::Binder;
use crate::binder::InternalColumnBinding;
use crate::normalize_identifier;
use crate::optimizer::SExpr;
+use crate::plans::BoundColumnRef;
use crate::plans::MatchedEvaluator;
use crate::plans::MergeInto;
use crate::plans::Plan;
use crate::plans::UnmatchedEvaluator;
use crate::BindContext;
+use crate::ColumnBindingBuilder;
use crate::ColumnEntry;
use crate::IndexType;
use crate::ScalarBinder;
use crate::ScalarExpr;
+use crate::Visibility;
// implementation of merge into for now:
// use an left outer join for target_source and source.
@@ -124,8 +128,66 @@ impl Binder {
.await?;
// add all left source columns for read
+ // todo: (JackTan25) do column prune after finish "split expr for target and source"
let mut columns_set = left_context.column_set();
+ let update_columns_star = if self.has_star_clause(&matched_clauses, &unmatched_clauses) {
+ // when there are "update *"/"insert *", we need to get the index of correlated columns in source.
+ let default_target_table_schema = table.schema().remove_computed_fields();
+ let mut update_columns = HashMap::with_capacity(
+ default_target_table_schema
+ .remove_computed_fields()
+ .num_fields(),
+ );
+ let source_output_columns = &left_context.columns;
+ // we use Vec as the value, because if there could be duplicate names
+ let mut name_map = HashMap::>::new();
+ for column in source_output_columns {
+ name_map
+ .entry(column.column_name.clone())
+ .or_insert_with(|| vec![])
+ .push(column.index);
+ }
+
+ for (field_idx, field) in default_target_table_schema.fields.iter().enumerate() {
+ let index = match name_map.get(field.name()) {
+ None => {
+ return Err(ErrorCode::SemanticError(
+ format!("can't find {} in source output", field.name).to_string(),
+ ));
+ }
+ Some(indices) => {
+ if indices.len() != 1 {
+ return Err(ErrorCode::SemanticError(
+ format!(
+ "there should be only one {} in source output,but we get {}",
+ field.name,
+ indices.len()
+ )
+ .to_string(),
+ ));
+ } else {
+ indices[0]
+ }
+ }
+ };
+ let column = ColumnBindingBuilder::new(
+ field.name.to_string(),
+ index,
+ Box::new(field.data_type().into()),
+ Visibility::Visible,
+ )
+ .build();
+ let col = ScalarExpr::BoundColumnRef(BoundColumnRef { span: None, column });
+ update_columns.insert(field_idx, col);
+ }
+ Some(update_columns)
+ } else {
+ None
+ };
+
+ // Todo: (JackTan25) Maybe we can remove bind target_table
+ // when the target table has been binded in bind_merge_into_source
// bind table for target table
let (mut target_expr, mut right_context) = self
.bind_single_table(&mut left_context, &target_table)
@@ -193,6 +255,7 @@ impl Binder {
.union(&scalar_binder.bind(join_expr).await?.0.used_columns())
.cloned()
.collect();
+
let column_entries = self.metadata.read().columns_by_table_index(table_index);
let mut field_index_map = HashMap::::new();
// if true, read all columns of target table
@@ -204,6 +267,7 @@ impl Binder {
field_index_map.insert(idx, used_idx.to_string());
}
}
+
// bind matched clause columns and add update fields and exprs
for clause in &matched_clauses {
matched_evaluators.push(
@@ -212,6 +276,7 @@ impl Binder {
clause,
&mut columns_set,
table_schema.clone(),
+ update_columns_star.clone(),
)
.await?,
);
@@ -225,6 +290,7 @@ impl Binder {
clause,
&mut columns_set,
table_schema.clone(),
+ update_columns_star.clone(),
)
.await?,
);
@@ -252,6 +318,7 @@ impl Binder {
clause: &MatchedClause,
columns: &mut HashSet,
schema: TableSchemaRef,
+ update_columns_star: Option>,
) -> Result {
let condition = if let Some(expr) = &clause.selection {
let (scalar_expr, _) = scalar_binder.bind(expr).await?;
@@ -263,42 +330,54 @@ impl Binder {
None
};
- if let MatchOperation::Update { update_list } = &clause.operation {
- let mut update_columns = HashMap::with_capacity(update_list.len());
- for update_expr in update_list {
- let (scalar_expr, _) = scalar_binder.bind(&update_expr.expr).await?;
- let col_name =
- normalize_identifier(&update_expr.name, &self.name_resolution_ctx).name;
- let index = schema.index_of(&col_name)?;
-
- if update_columns.contains_key(&index) {
- return Err(ErrorCode::BadArguments(format!(
- "Multiple assignments in the single statement to column `{}`",
- col_name
- )));
- }
-
- let field = schema.field(index);
- if field.computed_expr().is_some() {
- return Err(ErrorCode::BadArguments(format!(
- "The value specified for computed column '{}' is not allowed",
- field.name()
- )));
+ if let MatchOperation::Update {
+ update_list,
+ is_star,
+ } = &clause.operation
+ {
+ if *is_star {
+ Ok(MatchedEvaluator {
+ condition,
+ update: update_columns_star,
+ })
+ } else {
+ let mut update_columns = HashMap::with_capacity(update_list.len());
+ for update_expr in update_list {
+ let (scalar_expr, _) = scalar_binder.bind(&update_expr.expr).await?;
+ let col_name =
+ normalize_identifier(&update_expr.name, &self.name_resolution_ctx).name;
+ let index = schema.index_of(&col_name)?;
+
+ if update_columns.contains_key(&index) {
+ return Err(ErrorCode::BadArguments(format!(
+ "Multiple assignments in the single statement to column `{}`",
+ col_name
+ )));
+ }
+
+ let field = schema.field(index);
+ if field.computed_expr().is_some() {
+ return Err(ErrorCode::BadArguments(format!(
+ "The value specified for computed column '{}' is not allowed",
+ field.name()
+ )));
+ }
+
+ if matches!(scalar_expr, ScalarExpr::SubqueryExpr(_)) {
+ return Err(ErrorCode::Internal(
+ "update_list in update clause does not support subquery temporarily",
+ ));
+ }
+ update_columns.insert(index, scalar_expr.clone());
}
- if matches!(scalar_expr, ScalarExpr::SubqueryExpr(_)) {
- return Err(ErrorCode::Internal(
- "update_list in update clause does not support subquery temporarily",
- ));
- }
- update_columns.insert(index, scalar_expr.clone());
+ Ok(MatchedEvaluator {
+ condition,
+ update: Some(update_columns),
+ })
}
-
- Ok(MatchedEvaluator {
- condition,
- update: Some(update_columns),
- })
} else {
+ // delete
Ok(MatchedEvaluator {
condition,
update: None,
@@ -312,6 +391,7 @@ impl Binder {
clause: &UnmatchedClause,
columns: &mut HashSet,
table_schema: TableSchemaRef,
+ update_columns_star: Option>,
) -> Result {
let condition = if let Some(expr) = &clause.selection {
let (scalar_expr, _) = scalar_binder.bind(expr).await?;
@@ -322,42 +402,59 @@ impl Binder {
} else {
None
};
-
- if clause.insert_operation.values.is_empty() {
- return Err(ErrorCode::SemanticError(
- "Values lists must have at least one row".to_string(),
- ));
- }
-
- let mut values = Vec::with_capacity(clause.insert_operation.values.len());
-
- // we need to get source schema, and use it for filling columns.
- let source_schema = if let Some(fields) = clause.insert_operation.columns.clone() {
- self.schema_project(&table_schema, &fields)?
+ if clause.insert_operation.is_star {
+ let default_schema = table_schema.remove_computed_fields();
+ let mut values = Vec::with_capacity(default_schema.num_fields());
+ let update_columns_star = update_columns_star.unwrap();
+ for idx in 0..default_schema.num_fields() {
+ values.push(update_columns_star.get(&idx).unwrap().clone());
+ }
+ Ok(UnmatchedEvaluator {
+ source_schema: Arc::new(Arc::new(default_schema).into()),
+ condition,
+ values,
+ })
} else {
- table_schema.clone()
- };
-
- for (idx, expr) in clause.insert_operation.values.iter().enumerate() {
- let (mut scalar_expr, _) = scalar_binder.bind(expr).await?;
- // type cast
- scalar_expr = wrap_cast_scalar(
- &scalar_expr,
- &scalar_expr.data_type()?,
- &DataType::from(source_schema.field(idx).data_type()),
- )?;
+ if clause.insert_operation.values.is_empty() {
+ return Err(ErrorCode::SemanticError(
+ "Values lists must have at least one row".to_string(),
+ ));
+ }
- values.push(scalar_expr.clone());
- for idx in scalar_expr.used_columns() {
- columns.insert(idx);
+ let mut values = Vec::with_capacity(clause.insert_operation.values.len());
+
+ // we need to get source schema, and use it for filling columns.
+ let source_schema = if let Some(fields) = clause.insert_operation.columns.clone() {
+ self.schema_project(&table_schema, &fields)?
+ } else {
+ table_schema.clone()
+ };
+ if clause.insert_operation.values.len() != source_schema.num_fields() {
+ return Err(ErrorCode::SemanticError(
+ "insert columns and values are not matched".to_string(),
+ ));
+ }
+ for (idx, expr) in clause.insert_operation.values.iter().enumerate() {
+ let (mut scalar_expr, _) = scalar_binder.bind(expr).await?;
+ // type cast
+ scalar_expr = wrap_cast_scalar(
+ &scalar_expr,
+ &scalar_expr.data_type()?,
+ &DataType::from(source_schema.field(idx).data_type()),
+ )?;
+
+ values.push(scalar_expr.clone());
+ for idx in scalar_expr.used_columns() {
+ columns.insert(idx);
+ }
}
- }
- Ok(UnmatchedEvaluator {
- source_schema: Arc::new(source_schema.into()),
- condition,
- values,
- })
+ Ok(UnmatchedEvaluator {
+ source_schema: Arc::new(source_schema.into()),
+ condition,
+ values,
+ })
+ }
}
fn find_column_index(
@@ -378,7 +475,36 @@ impl Binder {
fn has_update(&self, matched_clauses: &Vec) -> bool {
for clause in matched_clauses {
- if let MatchOperation::Update { update_list: _ } = clause.operation {
+ if let MatchOperation::Update {
+ update_list: _,
+ is_star: _,
+ } = clause.operation
+ {
+ return true;
+ }
+ }
+ false
+ }
+
+ fn has_star_clause(
+ &self,
+ matched_clauses: &Vec,
+ unmatched_clauses: &Vec,
+ ) -> bool {
+ for item in matched_clauses {
+ if let MatchOperation::Update {
+ update_list: _,
+ is_star,
+ } = item.operation
+ {
+ if is_star {
+ return true;
+ }
+ }
+ }
+
+ for item in unmatched_clauses {
+ if item.insert_operation.is_star {
return true;
}
}
diff --git a/src/query/sql/src/planner/expression_parser.rs b/src/query/sql/src/planner/expression_parser.rs
index 7779500b9407..545cf7c28df6 100644
--- a/src/query/sql/src/planner/expression_parser.rs
+++ b/src/query/sql/src/planner/expression_parser.rs
@@ -22,12 +22,14 @@ use common_ast::Dialect;
use common_base::base::tokio::runtime::Handle;
use common_base::base::tokio::task::block_in_place;
use common_catalog::catalog::CATALOG_DEFAULT;
+use common_catalog::plan::Filters;
use common_catalog::table::Table;
use common_catalog::table_context::TableContext;
use common_exception::ErrorCode;
use common_exception::Result;
use common_expression::infer_schema_type;
use common_expression::infer_table_schema;
+use common_expression::type_check::check_function;
use common_expression::types::DataType;
use common_expression::ConstantFolder;
use common_expression::DataBlock;
@@ -137,11 +139,11 @@ pub fn parse_exprs(
Ok(exprs)
}
-pub fn parse_to_remote_string_expr(
+pub fn parse_to_filters(
ctx: Arc,
table_meta: Arc,
sql: &str,
-) -> Result> {
+) -> Result {
let schema = table_meta.schema();
let exprs = parse_exprs(ctx, table_meta, sql)?;
let exprs: Vec> = exprs
@@ -153,7 +155,20 @@ pub fn parse_to_remote_string_expr(
.collect();
if exprs.len() == 1 {
- Ok(exprs[0].clone())
+ let filter = exprs[0].clone();
+
+ let inverted_filter = check_function(
+ None,
+ "not",
+ &[],
+ &[filter.as_expr(&BUILTIN_FUNCTIONS)],
+ &BUILTIN_FUNCTIONS,
+ )?;
+
+ Ok(Filters {
+ filter,
+ inverted_filter: inverted_filter.as_remote_expr(),
+ })
} else {
Err(ErrorCode::BadDataValueType(format!(
"Expected single expr, but got {}",
diff --git a/src/query/sql/src/planner/semantic/type_check.rs b/src/query/sql/src/planner/semantic/type_check.rs
index 187400e9d297..f248b154e0ad 100644
--- a/src/query/sql/src/planner/semantic/type_check.rs
+++ b/src/query/sql/src/planner/semantic/type_check.rs
@@ -54,6 +54,7 @@ use common_expression::types::NumberDataType;
use common_expression::types::NumberScalar;
use common_expression::ColumnIndex;
use common_expression::ConstantFolder;
+use common_expression::Expr as EExpr;
use common_expression::FunctionContext;
use common_expression::FunctionKind;
use common_expression::RawExpr;
@@ -1410,19 +1411,28 @@ impl<'a> TypeChecker<'a> {
arg_types: &[DataType],
) -> Result {
if args.is_empty() || args.len() > 3 {
- return Err(ErrorCode::InvalidArgument(
- "Argument number is invalid".to_string(),
- ));
+ return Err(ErrorCode::InvalidArgument(format!(
+ "Function {:?} only support 1 to 3 arguments",
+ func_name
+ )));
}
let offset = if args.len() >= 2 {
let off = args[1].as_expr()?;
- Some(check_number::<_, i64>(
- off.span(),
- &self.func_ctx,
- &off,
- &BUILTIN_FUNCTIONS,
- )?)
+ match off {
+ EExpr::Constant { .. } => Some(check_number::<_, i64>(
+ off.span(),
+ &self.func_ctx,
+ &off,
+ &BUILTIN_FUNCTIONS,
+ )?),
+ _ => {
+ return Err(ErrorCode::InvalidArgument(format!(
+ "The second argument to the function {:?} must be a constant",
+ func_name
+ )));
+ }
+ }
} else {
None
};
@@ -1473,9 +1483,10 @@ impl<'a> TypeChecker<'a> {
Ok(match func_name {
"first_value" | "first" => {
if args.len() != 1 {
- return Err(ErrorCode::InvalidArgument(
- "Argument number is invalid".to_string(),
- ));
+ return Err(ErrorCode::InvalidArgument(format!(
+ "The function {:?} must take one argument",
+ func_name
+ )));
}
let return_type = arg_types[0].wrap_nullable();
WindowFuncType::NthValue(NthValueFunction {
@@ -1486,9 +1497,10 @@ impl<'a> TypeChecker<'a> {
}
"last_value" | "last" => {
if args.len() != 1 {
- return Err(ErrorCode::InvalidArgument(
- "Argument number is invalid".to_string(),
- ));
+ return Err(ErrorCode::InvalidArgument(format!(
+ "The function {:?} must take one argument",
+ func_name
+ )));
}
let return_type = arg_types[0].wrap_nullable();
WindowFuncType::NthValue(NthValueFunction {
@@ -1501,17 +1513,24 @@ impl<'a> TypeChecker<'a> {
// nth_value
if args.len() != 2 {
return Err(ErrorCode::InvalidArgument(
- "Argument number is invalid".to_string(),
+ "The function nth_value must take two arguments".to_string(),
));
}
let return_type = arg_types[0].wrap_nullable();
let n_expr = args[1].as_expr()?;
- let n = check_number::<_, u64>(
- n_expr.span(),
- &self.func_ctx,
- &n_expr,
- &BUILTIN_FUNCTIONS,
- )?;
+ let n = match n_expr {
+ EExpr::Constant { .. } => check_number::<_, u64>(
+ n_expr.span(),
+ &self.func_ctx,
+ &n_expr,
+ &BUILTIN_FUNCTIONS,
+ )?,
+ _ => {
+ return Err(ErrorCode::InvalidArgument(
+ "The count of `nth_value` must be constant positive integer",
+ ));
+ }
+ };
if n == 0 {
return Err(ErrorCode::InvalidArgument(
"nth_value should count from 1".to_string(),
@@ -1534,12 +1553,21 @@ impl<'a> TypeChecker<'a> {
) -> Result {
if args.len() != 1 {
return Err(ErrorCode::InvalidArgument(
- "Argument number is invalid".to_string(),
+ "Function ntile can only take one argument".to_string(),
));
}
let n_expr = args[0].as_expr()?;
let return_type = DataType::Number(NumberDataType::UInt64);
- let n = check_number::<_, u64>(n_expr.span(), &self.func_ctx, &n_expr, &BUILTIN_FUNCTIONS)?;
+ let n = match n_expr {
+ EExpr::Constant { .. } => {
+ check_number::<_, u64>(n_expr.span(), &self.func_ctx, &n_expr, &BUILTIN_FUNCTIONS)?
+ }
+ _ => {
+ return Err(ErrorCode::InvalidArgument(
+ "The argument of `ntile` must be constant".to_string(),
+ ));
+ }
+ };
if n == 0 {
return Err(ErrorCode::InvalidArgument(
"ntile buckets must be greater than 0".to_string(),
@@ -1981,7 +2009,7 @@ impl<'a> TypeChecker<'a> {
)
.await
}
- _ => Err(ErrorCode::SemanticError("Only these interval types are currently supported: [year, month, day, hour, minute, second]".to_string()).set_span(span)),
+ _ => Err(ErrorCode::SemanticError("Only these interval types are currently supported: [year, quarter, month, day, hour, minute, second]".to_string()).set_span(span)),
}
}
@@ -2247,7 +2275,20 @@ impl<'a> TypeChecker<'a> {
let box (scalar, _) = self.resolve(args[0]).await?;
let expr = scalar.as_expr()?;
- check_number::<_, i64>(span, &self.func_ctx, &expr, &BUILTIN_FUNCTIONS)?
+ match expr {
+ EExpr::Constant { .. } => check_number::<_, i64>(
+ span,
+ &self.func_ctx,
+ &expr,
+ &BUILTIN_FUNCTIONS,
+ )?,
+ _ => {
+ return Some(Err(ErrorCode::BadArguments(
+ "last_query_id argument only support constant",
+ )
+ .set_span(span)));
+ }
+ }
}
};
diff --git a/src/query/storages/common/table-meta/src/meta/mod.rs b/src/query/storages/common/table-meta/src/meta/mod.rs
index 7ba2f446ddfb..724a22165773 100644
--- a/src/query/storages/common/table-meta/src/meta/mod.rs
+++ b/src/query/storages/common/table-meta/src/meta/mod.rs
@@ -50,6 +50,8 @@ pub use versions::Versioned;
// - export meta encoding to benchmarking tests
pub mod testing {
pub use super::format::MetaEncoding;
+ pub use super::v0::statistics::Statistics as StatisticsV0;
+ pub use super::v1::TableSnapshot as TableSnapshotV1;
pub use super::v2::SegmentInfo as SegmentInfoV2;
pub use super::v2::TableSnapshot as TableSnapshotV2;
pub use super::v3::SegmentInfo as SegmentInfoV3;
diff --git a/src/query/storages/common/table-meta/src/meta/v2/snapshot.rs b/src/query/storages/common/table-meta/src/meta/v2/snapshot.rs
index e7759854ad78..5bcd1affc5b2 100644
--- a/src/query/storages/common/table-meta/src/meta/v2/snapshot.rs
+++ b/src/query/storages/common/table-meta/src/meta/v2/snapshot.rs
@@ -127,7 +127,7 @@ impl From for TableSnapshot {
// carries the format_version of snapshot being converted.
format_version: s.format_version,
snapshot_id: s.snapshot_id,
- timestamp: None,
+ timestamp: s.timestamp,
prev_snapshot_id: s.prev_snapshot_id,
schema,
summary,
diff --git a/src/query/storages/fuse/src/operations/delete.rs b/src/query/storages/fuse/src/operations/delete.rs
index 4a9e34049213..bf92e9ab2649 100644
--- a/src/query/storages/fuse/src/operations/delete.rs
+++ b/src/query/storages/fuse/src/operations/delete.rs
@@ -15,13 +15,13 @@
use std::sync::Arc;
use common_base::base::ProgressValues;
+use common_catalog::plan::Filters;
use common_catalog::plan::PartInfoPtr;
use common_catalog::plan::Partitions;
use common_catalog::plan::PartitionsShuffleKind;
use common_catalog::plan::Projection;
use common_catalog::plan::PruningStatistics;
use common_catalog::plan::PushDownInfo;
-use common_catalog::table::DeletionFilters;
use common_catalog::table::Table;
use common_catalog::table_context::TableContext;
use common_exception::Result;
@@ -72,7 +72,7 @@ impl FuseTable {
pub async fn fast_delete(
&self,
ctx: Arc,
- filters: Option,
+ filters: Option,
col_indices: Vec,
query_row_id_col: bool,
) -> Result)>> {
@@ -131,8 +131,7 @@ impl FuseTable {
let (partitions, info) = self
.do_mutation_block_pruning(
ctx.clone(),
- Some(deletion_filters.filter),
- Some(deletion_filters.inverted_filter),
+ Some(deletion_filters),
projection,
&snapshot,
true,
@@ -280,8 +279,7 @@ impl FuseTable {
pub async fn do_mutation_block_pruning(
&self,
ctx: Arc,
- filter: Option>,
- inverted_filter: Option>,
+ filters: Option,
projection: Projection,
base_snapshot: &TableSnapshot,
with_origin: bool,
@@ -289,7 +287,7 @@ impl FuseTable {
) -> Result<(Partitions, MutationTaskInfo)> {
let push_down = Some(PushDownInfo {
projection: Some(projection),
- filter: filter.clone(),
+ filters: filters.clone(),
..PushDownInfo::default()
});
@@ -304,7 +302,7 @@ impl FuseTable {
let segment_locations = create_segment_location_vector(segment_locations, None);
- if let Some(inverse) = inverted_filter {
+ if let Some(inverse) = filters.map(|f| f.inverted_filter) {
// now the `block_metas` refers to the blocks that need to be deleted completely or partially.
//
// let's try pruning the blocks further to get the blocks that need to be deleted completely, so that
diff --git a/src/query/storages/fuse/src/operations/gc.rs b/src/query/storages/fuse/src/operations/gc.rs
index 70e2a015a0e9..c15195f14841 100644
--- a/src/query/storages/fuse/src/operations/gc.rs
+++ b/src/query/storages/fuse/src/operations/gc.rs
@@ -63,6 +63,12 @@ impl FuseTable {
}
}
let root_snapshot_info = root_snapshot_info_op.unwrap();
+ if root_snapshot_info.snapshot_lite.timestamp.is_none() {
+ return Err(ErrorCode::StorageOther(format!(
+ "gc: snapshot timestamp is none, snapshot location: {}",
+ root_snapshot_info.snapshot_location
+ )));
+ }
let snapshots_io = SnapshotsIO::create(ctx.clone(), self.operator.clone());
let location_gen = self.meta_location_generator();
@@ -116,7 +122,7 @@ impl FuseTable {
let mut segments_to_be_purged = HashSet::new();
let mut ts_to_be_purged = HashSet::new();
for s in snapshots.into_iter() {
- if s.timestamp >= base_timestamp {
+ if s.timestamp.is_some() && s.timestamp >= base_timestamp {
remain_snapshots.push(s);
continue;
}
diff --git a/src/query/storages/fuse/src/operations/merge_into/processors/processor_matched_split.rs b/src/query/storages/fuse/src/operations/merge_into/processors/processor_matched_split.rs
index d26ca27c9080..39e7b19d3e9c 100644
--- a/src/query/storages/fuse/src/operations/merge_into/processors/processor_matched_split.rs
+++ b/src/query/storages/fuse/src/operations/merge_into/processors/processor_matched_split.rs
@@ -243,6 +243,7 @@ impl Processor for MatchedSplitProcessor {
let (stage_block, mut row_ids) = delete_mutation
.delete_mutator
.delete_by_expr(current_block)?;
+
if stage_block.is_empty() {
// delete all
if !row_ids.is_empty() {
diff --git a/src/query/storages/fuse/src/operations/merge_into/processors/processor_merge_into_not_matched.rs b/src/query/storages/fuse/src/operations/merge_into/processors/processor_merge_into_not_matched.rs
index b316fb4a64f1..c8e4d55cb095 100644
--- a/src/query/storages/fuse/src/operations/merge_into/processors/processor_merge_into_not_matched.rs
+++ b/src/query/storages/fuse/src/operations/merge_into/processors/processor_merge_into_not_matched.rs
@@ -34,7 +34,7 @@ use common_storage::metrics::merge_into::metrics_inc_merge_into_append_blocks_co
use itertools::Itertools;
use crate::operations::merge_into::mutator::SplitByExprMutator;
-
+// (source_schema,condition,values_exprs)
type UnMatchedExprs = Vec<(DataSchemaRef, Option, Vec)>;
struct InsertDataBlockMutation {
@@ -65,6 +65,7 @@ impl MergeIntoNotMatchedProcessor {
for (idx, item) in unmatched.iter().enumerate() {
let eval_projections: HashSet =
(input_schema.num_fields()..input_schema.num_fields() + item.2.len()).collect();
+ println!("data_schema: {:?}", item.0.clone());
data_schemas.insert(idx, item.0.clone());
ops.push(InsertDataBlockMutation {
op: BlockOperator::Map {
diff --git a/src/query/storages/fuse/src/operations/read_partitions.rs b/src/query/storages/fuse/src/operations/read_partitions.rs
index 6ab120c22183..dbbd89ae90de 100644
--- a/src/query/storages/fuse/src/operations/read_partitions.rs
+++ b/src/query/storages/fuse/src/operations/read_partitions.rs
@@ -267,7 +267,7 @@ impl FuseTable {
) -> (PartStatistics, Partitions) {
let limit = push_downs
.as_ref()
- .filter(|p| p.order_by.is_empty() && p.filter.is_none())
+ .filter(|p| p.order_by.is_empty() && p.filters.is_none())
.and_then(|p| p.limit)
.unwrap_or(usize::MAX);
@@ -333,7 +333,7 @@ impl FuseTable {
fn is_exact(push_downs: &Option) -> bool {
push_downs
.as_ref()
- .map_or(true, |extra| extra.filter.is_none())
+ .map_or(true, |extra| extra.filters.is_none())
}
fn all_columns_partitions(
diff --git a/src/query/storages/fuse/src/operations/update.rs b/src/query/storages/fuse/src/operations/update.rs
index 4a740018cee5..a2b765981c83 100644
--- a/src/query/storages/fuse/src/operations/update.rs
+++ b/src/query/storages/fuse/src/operations/update.rs
@@ -15,10 +15,12 @@
use std::collections::BTreeMap;
use std::sync::Arc;
+use common_catalog::plan::Filters;
use common_catalog::plan::Projection;
use common_catalog::table::Table;
use common_catalog::table_context::TableContext;
use common_exception::Result;
+use common_expression::type_check::check_function;
use common_expression::types::NumberDataType;
use common_expression::FieldIndex;
use common_expression::RemoteExpr;
@@ -241,14 +243,25 @@ impl FuseTable {
);
}
let remain_reader = Arc::new(remain_reader);
- let (filter_expr, filter) = if let Some(remote_expr) = filter {
+ let (filter_expr, filters) = if let Some(remote_expr) = filter {
+ let reverted_expr = check_function(
+ None,
+ "not",
+ &[],
+ &[remote_expr.as_expr(&BUILTIN_FUNCTIONS)],
+ &BUILTIN_FUNCTIONS,
+ )?;
+
(
Arc::new(Some(
remote_expr
.as_expr(&BUILTIN_FUNCTIONS)
.project_column_ref(|name| schema.index_of(name).unwrap()),
)),
- Some(remote_expr),
+ Some(Filters {
+ filter: remote_expr,
+ inverted_filter: reverted_expr.as_remote_expr(),
+ }),
)
} else {
(Arc::new(None), None)
@@ -257,8 +270,7 @@ impl FuseTable {
let (parts, part_info) = self
.do_mutation_block_pruning(
ctx.clone(),
- filter,
- None,
+ filters,
projection,
base_snapshot,
false,
diff --git a/src/query/storages/fuse/src/pruning/fuse_pruner.rs b/src/query/storages/fuse/src/pruning/fuse_pruner.rs
index aacaca3b1689..a6e0a8a4bf4b 100644
--- a/src/query/storages/fuse/src/pruning/fuse_pruner.rs
+++ b/src/query/storages/fuse/src/pruning/fuse_pruner.rs
@@ -81,15 +81,18 @@ impl PruningContext {
) -> Result> {
let func_ctx = ctx.get_function_context()?;
- let filter_expr = push_down
- .as_ref()
- .and_then(|extra| extra.filter.as_ref().map(|f| f.as_expr(&BUILTIN_FUNCTIONS)));
+ let filter_expr = push_down.as_ref().and_then(|extra| {
+ extra
+ .filters
+ .as_ref()
+ .map(|f| f.filter.as_expr(&BUILTIN_FUNCTIONS))
+ });
// Limit pruner.
// if there are ordering/filter clause, ignore limit, even it has been pushed down
let limit = push_down
.as_ref()
- .filter(|p| p.order_by.is_empty() && p.filter.is_none())
+ .filter(|p| p.order_by.is_empty() && p.filters.is_none())
.and_then(|p| p.limit);
// prepare the limiter. in case that limit is none, an unlimited limiter will be returned
let limit_pruner = LimiterPrunerCreator::create(limit);
@@ -378,7 +381,7 @@ impl FusePruner {
let push_down = self.push_down.clone();
if push_down
.as_ref()
- .filter(|p| !p.order_by.is_empty() && p.limit.is_some() && p.filter.is_none())
+ .filter(|p| !p.order_by.is_empty() && p.limit.is_some() && p.filters.is_none())
.is_some()
{
let schema = self.table_schema.clone();
diff --git a/src/query/storages/hive/hive/src/hive_table.rs b/src/query/storages/hive/hive/src/hive_table.rs
index f033d7e3adcf..afbb29cc3352 100644
--- a/src/query/storages/hive/hive/src/hive_table.rs
+++ b/src/query/storages/hive/hive/src/hive_table.rs
@@ -114,9 +114,9 @@ impl HiveTable {
let filter_expression = push_downs.as_ref().and_then(|extra| {
extra
- .filter
+ .filters
.as_ref()
- .map(|expr| expr.as_expr(&BUILTIN_FUNCTIONS))
+ .map(|filter| filter.filter.as_expr(&BUILTIN_FUNCTIONS))
});
let range_filter = match filter_expression {
@@ -242,7 +242,7 @@ impl HiveTable {
fn is_simple_select_query(&self, plan: &DataSourcePlan) -> bool {
// couldn't get groupby order by info
if let Some(PushDownInfo {
- filter,
+ filters,
limit: Some(lm),
..
}) = &plan.push_downs
@@ -253,10 +253,10 @@ impl HiveTable {
// filter out the partition column related expressions
let partition_keys = self.get_partition_key_sets();
- let columns = filter
+ let columns = filters
.as_ref()
.map(|f| {
- let expr = f.as_expr(&BUILTIN_FUNCTIONS);
+ let expr = f.filter.as_expr(&BUILTIN_FUNCTIONS);
expr.column_refs().keys().cloned().collect::>()
})
.unwrap_or_default();
@@ -460,9 +460,9 @@ impl HiveTable {
if let Some(partition_keys) = &self.table_options.partition_keys {
if !partition_keys.is_empty() {
let filter_expression = push_downs.as_ref().and_then(|p| {
- p.filter
+ p.filters
.as_ref()
- .map(|expr| expr.as_expr(&BUILTIN_FUNCTIONS))
+ .map(|filter| filter.filter.as_expr(&BUILTIN_FUNCTIONS))
});
return self
diff --git a/src/query/storages/iceberg/src/table.rs b/src/query/storages/iceberg/src/table.rs
index 8468b397d12a..57d9c1a1cd75 100644
--- a/src/query/storages/iceberg/src/table.rs
+++ b/src/query/storages/iceberg/src/table.rs
@@ -209,9 +209,12 @@ impl IcebergTable {
ErrorCode::ReadTableDataError(format!("Cannot get current data files: {e:?}"))
})?;
- let filter = push_downs
- .as_ref()
- .and_then(|extra| extra.filter.as_ref().map(|f| f.as_expr(&BUILTIN_FUNCTIONS)));
+ let filter = push_downs.as_ref().and_then(|extra| {
+ extra
+ .filters
+ .as_ref()
+ .map(|f| f.filter.as_expr(&BUILTIN_FUNCTIONS))
+ });
let schema = self.schema();
diff --git a/src/query/storages/parquet/Cargo.toml b/src/query/storages/parquet/Cargo.toml
index 9c46e8d4c2d6..803026960075 100644
--- a/src/query/storages/parquet/Cargo.toml
+++ b/src/query/storages/parquet/Cargo.toml
@@ -44,5 +44,4 @@ typetag = "0.2.3"
[dev-dependencies]
common-sql = { path = "../../sql" }
-databend-query = { path = "../../service" }
tempfile = "3.4.0"
diff --git a/src/query/storages/parquet/src/parquet2/parquet_table/partition.rs b/src/query/storages/parquet/src/parquet2/parquet_table/partition.rs
index eaaf5f755f5a..50431a37d017 100644
--- a/src/query/storages/parquet/src/parquet2/parquet_table/partition.rs
+++ b/src/query/storages/parquet/src/parquet2/parquet_table/partition.rs
@@ -78,9 +78,19 @@ impl Parquet2Table {
project_parquet_schema(&self.arrow_schema, &self.schema_descr, &projection)?;
let schema = Arc::new(arrow_to_table_schema(projected_arrow_schema));
- let filter = push_down
- .as_ref()
- .and_then(|extra| extra.filter.as_ref().map(|f| f.as_expr(&BUILTIN_FUNCTIONS)));
+ let filter = push_down.as_ref().and_then(|extra| {
+ extra
+ .filters
+ .as_ref()
+ .map(|f| f.filter.as_expr(&BUILTIN_FUNCTIONS))
+ });
+
+ let inverted_filter = push_down.as_ref().and_then(|extra| {
+ extra
+ .filters
+ .as_ref()
+ .map(|f| f.inverted_filter.as_expr(&BUILTIN_FUNCTIONS))
+ });
let top_k = top_k.map(|top_k| {
let offset = projected_column_nodes
@@ -94,11 +104,13 @@ impl Parquet2Table {
let func_ctx = ctx.get_function_context()?;
let row_group_pruner = if self.read_options.prune_row_groups() {
- Some(RangePrunerCreator::try_create(
+ let p1 = RangePrunerCreator::try_create(func_ctx.clone(), &schema, filter.as_ref())?;
+ let p2 = RangePrunerCreator::try_create(
func_ctx.clone(),
&schema,
- filter.as_ref(),
- )?)
+ inverted_filter.as_ref(),
+ )?;
+ Some((p1, p2))
} else {
None
};
diff --git a/src/query/storages/parquet/src/parquet2/pruning.rs b/src/query/storages/parquet/src/parquet2/pruning.rs
index cbf7565df367..4c3fa5365ca3 100644
--- a/src/query/storages/parquet/src/parquet2/pruning.rs
+++ b/src/query/storages/parquet/src/parquet2/pruning.rs
@@ -62,8 +62,11 @@ pub struct PartitionPruner {
pub schema: TableSchemaRef,
pub schema_descr: SchemaDescriptor,
pub schema_from: String,
- /// Pruner to prune row groups.
- pub row_group_pruner: Option>,
+ /// Pruner to prune row groups. (filter & inverted filter)
+ pub row_group_pruner: Option<(
+ Arc,
+ Arc,
+ )>,
/// Pruners to prune pages.
pub page_pruners: Option,
/// The projected column indices.
@@ -120,7 +123,7 @@ impl PartitionPruner {
let row_group_stats = if no_stats {
None
} else if self.row_group_pruner.is_some() && !self.skip_pruning {
- let pruner = self.row_group_pruner.as_ref().unwrap();
+ let (pruner, _) = self.row_group_pruner.as_ref().unwrap();
// If collecting stats fails or `should_keep` is true, we still read the row group.
// Otherwise, the row group will be pruned.
if let Ok(row_group_stats) =
diff --git a/src/query/storages/parquet/src/parquet_rs/parquet_reader/reader.rs b/src/query/storages/parquet/src/parquet_rs/parquet_reader/reader.rs
index 03bbfe6ea97c..51e72d9602e4 100644
--- a/src/query/storages/parquet/src/parquet_rs/parquet_reader/reader.rs
+++ b/src/query/storages/parquet/src/parquet_rs/parquet_reader/reader.rs
@@ -33,6 +33,8 @@ use common_expression::TableSchema;
use common_expression::TableSchemaRef;
use common_expression::TopKSorter;
use common_functions::BUILTIN_FUNCTIONS;
+use common_storage::metrics::common::metrics_inc_omit_filter_rowgroups;
+use common_storage::metrics::common::metrics_inc_omit_filter_rows;
use futures::StreamExt;
use opendal::Operator;
use opendal::Reader;
@@ -242,30 +244,41 @@ impl ParquetRSReader {
.with_projection(self.projection.clone())
.with_batch_size(self.batch_size);
- // Prune row groups.
- let file_meta = builder.metadata();
+ let mut full_match = false;
+
+ let file_meta = builder.metadata().clone();
+ // Prune row groups.
if let Some(pruner) = &self.pruner {
- let selected_row_groups = pruner.prune_row_groups(file_meta, None)?;
- let row_selection = pruner.prune_pages(file_meta, &selected_row_groups)?;
+ let (selected_row_groups, omits) = pruner.prune_row_groups(&file_meta, None)?;
+ full_match = omits.iter().all(|x| *x);
+ builder = builder.with_row_groups(selected_row_groups.clone());
- builder = builder.with_row_groups(selected_row_groups);
- if let Some(row_selection) = row_selection {
- builder = builder.with_row_selection(row_selection);
+ if !full_match {
+ let row_selection = pruner.prune_pages(&file_meta, &selected_row_groups)?;
+
+ if let Some(row_selection) = row_selection {
+ builder = builder.with_row_selection(row_selection);
+ }
+ } else {
+ metrics_inc_omit_filter_rowgroups(file_meta.num_row_groups() as u64);
+ metrics_inc_omit_filter_rows(file_meta.file_metadata().num_rows() as u64);
}
}
- if let Some(predicate) = self.predicate.as_ref() {
- let projection = predicate.projection().clone();
- let predicate = predicate.clone();
- let predicate_fn = move |batch| {
- predicate
- .evaluate(&batch)
- .map_err(|e| ArrowError::from_external_error(Box::new(e)))
- };
- builder = builder.with_row_filter(RowFilter::new(vec![Box::new(
- ArrowPredicateFn::new(projection, predicate_fn),
- )]));
+ if !full_match {
+ if let Some(predicate) = self.predicate.as_ref() {
+ let projection = predicate.projection().clone();
+ let predicate = predicate.clone();
+ let predicate_fn = move |batch| {
+ predicate
+ .evaluate(&batch)
+ .map_err(|e| ArrowError::from_external_error(Box::new(e)))
+ };
+ builder = builder.with_row_filter(RowFilter::new(vec![Box::new(
+ ArrowPredicateFn::new(projection, predicate_fn),
+ )]));
+ }
}
Ok(builder.build()?)
@@ -319,29 +332,40 @@ impl ParquetRSReader {
.with_batch_size(self.batch_size);
// Prune row groups.
- let file_meta = builder.metadata();
+ let file_meta = builder.metadata().clone();
+ let mut full_match = false;
if let Some(pruner) = &self.pruner {
- let selected_row_groups = pruner.prune_row_groups(file_meta, None)?;
- let row_selection = pruner.prune_pages(file_meta, &selected_row_groups)?;
+ let (selected_row_groups, omits) = pruner.prune_row_groups(&file_meta, None)?;
+
+ full_match = omits.iter().all(|x| *x);
+ builder = builder.with_row_groups(selected_row_groups.clone());
- builder = builder.with_row_groups(selected_row_groups);
- if let Some(row_selection) = row_selection {
- builder = builder.with_row_selection(row_selection);
+ if !full_match {
+ let row_selection = pruner.prune_pages(&file_meta, &selected_row_groups)?;
+
+ if let Some(row_selection) = row_selection {
+ builder = builder.with_row_selection(row_selection);
+ }
+ } else {
+ metrics_inc_omit_filter_rowgroups(file_meta.num_row_groups() as u64);
+ metrics_inc_omit_filter_rows(file_meta.file_metadata().num_rows() as u64);
}
}
- if let Some(predicate) = self.predicate.as_ref() {
- let projection = predicate.projection().clone();
- let predicate = predicate.clone();
- let predicate_fn = move |batch| {
- predicate
- .evaluate(&batch)
- .map_err(|e| ArrowError::from_external_error(Box::new(e)))
- };
- builder = builder.with_row_filter(RowFilter::new(vec![Box::new(
- ArrowPredicateFn::new(projection, predicate_fn),
- )]));
+ if !full_match {
+ if let Some(predicate) = self.predicate.as_ref() {
+ let projection = predicate.projection().clone();
+ let predicate = predicate.clone();
+ let predicate_fn = move |batch| {
+ predicate
+ .evaluate(&batch)
+ .map_err(|e| ArrowError::from_external_error(Box::new(e)))
+ };
+ builder = builder.with_row_filter(RowFilter::new(vec![Box::new(
+ ArrowPredicateFn::new(projection, predicate_fn),
+ )]));
+ }
}
let reader = builder.build()?;
@@ -385,13 +409,23 @@ impl ParquetRSReader {
});
// TODO(parquet): cache deserilaized columns to avoid deserialize multiple times.
let mut row_group = InMemoryRowGroup::new(&part.meta, page_locations.as_deref());
+
let mut selection = part
.selectors
.as_ref()
.map(|x| x.iter().map(RowSelector::from).collect::>())
.map(RowSelection::from);
- if let Some(predicate) = &self.predicate {
+ let mut predicate = self.predicate.as_ref();
+ if part.omit_filter {
+ predicate = None;
+ selection = None;
+
+ metrics_inc_omit_filter_rowgroups(1);
+ metrics_inc_omit_filter_rows(row_group.row_count() as u64);
+ }
+
+ if let Some(predicate) = predicate {
// Fetch columns used for eval predicate (prewhere).
row_group
.fetch(
diff --git a/src/query/storages/parquet/src/parquet_rs/parquet_reader/row_group.rs b/src/query/storages/parquet/src/parquet_rs/parquet_reader/row_group.rs
index e95d0c849345..0ac6d50e2288 100644
--- a/src/query/storages/parquet/src/parquet_rs/parquet_reader/row_group.rs
+++ b/src/query/storages/parquet/src/parquet_rs/parquet_reader/row_group.rs
@@ -109,6 +109,10 @@ impl<'a> InMemoryRowGroup<'a> {
}
}
+ pub fn row_count(&self) -> usize {
+ self.row_count
+ }
+
/// Fetches the necessary column data into memory
///
/// If call `fetch` multiple times, it will only fetch the data that has not been fetched.
diff --git a/src/query/storages/parquet/src/parquet_rs/parquet_table/partition.rs b/src/query/storages/parquet/src/parquet_rs/parquet_table/partition.rs
index 54c33d98d549..546b019564cb 100644
--- a/src/query/storages/parquet/src/parquet_rs/parquet_table/partition.rs
+++ b/src/query/storages/parquet/src/parquet_rs/parquet_table/partition.rs
@@ -356,11 +356,16 @@ fn prune_and_generate_partitions(
..
} = meta.as_ref();
part_stats.partitions_total += meta.num_row_groups();
- let rgs = pruner.prune_row_groups(meta, row_group_level_stats.as_deref())?;
- let mut row_selections = pruner.prune_pages(meta, &rgs)?;
+ let (rgs, omits) = pruner.prune_row_groups(meta, row_group_level_stats.as_deref())?;
+ let mut row_selections = if omits.iter().all(|x| *x) {
+ None
+ } else {
+ pruner.prune_pages(meta, &rgs)?
+ };
+
let mut rows_read = 0; // Rows read in current file.
- for rg in rgs {
+ for (rg, omit) in rgs.into_iter().zip(omits.into_iter()) {
let rg_meta = meta.row_group(rg);
let num_rows = rg_meta.num_rows() as usize;
// Split rows belonging to current row group.
@@ -412,6 +417,7 @@ fn prune_and_generate_partitions(
compressed_size,
uncompressed_size,
sort_min_max,
+ omit_filter: omit,
});
}
diff --git a/src/query/storages/parquet/src/parquet_rs/partition.rs b/src/query/storages/parquet/src/parquet_rs/partition.rs
index 1e152583b4be..517398585d83 100644
--- a/src/query/storages/parquet/src/parquet_rs/partition.rs
+++ b/src/query/storages/parquet/src/parquet_rs/partition.rs
@@ -100,6 +100,7 @@ pub struct ParquetRSRowGroupPart {
pub uncompressed_size: u64,
pub compressed_size: u64,
pub sort_min_max: Option<(Scalar, Scalar)>,
+ pub omit_filter: bool,
}
impl Eq for ParquetRSRowGroupPart {}
diff --git a/src/query/storages/parquet/src/parquet_rs/pruning.rs b/src/query/storages/parquet/src/parquet_rs/pruning.rs
index 6210085c302e..ffcda9935aeb 100644
--- a/src/query/storages/parquet/src/parquet_rs/pruning.rs
+++ b/src/query/storages/parquet/src/parquet_rs/pruning.rs
@@ -38,7 +38,10 @@ use crate::parquet_rs::statistics::convert_index_to_column_statistics;
/// We can use this pruner to compute row groups and pages to skip.
pub struct ParquetRSPruner {
leaf_fields: Arc>,
- range_pruner: Option>,
+ range_pruner: Option<(
+ Arc,
+ Arc,
+ )>,
prune_row_groups: bool,
prune_pages: bool,
@@ -55,16 +58,19 @@ impl ParquetRSPruner {
options: ParquetReadOptions,
) -> Result {
// Build `RangePruner` by `filter`.
- let filter = push_down
- .as_ref()
- .and_then(|p| p.filter.as_ref().map(|f| f.as_expr(&BUILTIN_FUNCTIONS)));
+ let filter = push_down.as_ref().and_then(|p| p.filters.as_ref());
let mut predicate_columns = vec![];
let range_pruner =
if filter.is_some() && (options.prune_row_groups() || options.prune_pages()) {
- predicate_columns = filter
+ let filter_expr = filter.as_ref().unwrap().filter.as_expr(&BUILTIN_FUNCTIONS);
+ let inverted_filter_expr = filter
.as_ref()
.unwrap()
+ .inverted_filter
+ .as_expr(&BUILTIN_FUNCTIONS);
+
+ predicate_columns = filter_expr
.column_refs()
.into_keys()
.map(|name| {
@@ -75,8 +81,11 @@ impl ParquetRSPruner {
})
.collect::>();
predicate_columns.sort();
- let pruner = RangePrunerCreator::try_create(func_ctx, &schema, filter.as_ref())?;
- Some(pruner)
+ let pruner =
+ RangePrunerCreator::try_create(func_ctx.clone(), &schema, Some(&filter_expr))?;
+ let inverted_pruner =
+ RangePrunerCreator::try_create(func_ctx, &schema, Some(&inverted_filter_expr))?;
+ Some((pruner, inverted_pruner))
} else {
None
};
@@ -92,28 +101,36 @@ impl ParquetRSPruner {
/// Prune row groups of a parquet file.
///
- /// Return the selected row groups' indices in the meta.
+ /// Return the selected row groups' indices in the meta and omit filter flags.
///
/// If `stats` is not [None], we use this statistics to prune but not collect again.
pub fn prune_row_groups(
&self,
meta: &ParquetMetaData,
stats: Option<&[StatisticsOfColumns]>,
- ) -> Result> {
+ ) -> Result<(Vec, Vec)> {
+ let default_selection = (0..meta.num_row_groups()).collect();
+ let default_omits = vec![false; meta.num_row_groups()];
if !self.prune_row_groups {
- return Ok((0..meta.num_row_groups()).collect());
+ return Ok((default_selection, default_omits));
}
+
match &self.range_pruner {
- None => Ok((0..meta.num_row_groups()).collect()),
- Some(pruner) => {
+ None => Ok((default_selection, default_omits)),
+
+ Some((pruner, inverted_pruner)) => {
let mut selection = Vec::with_capacity(meta.num_row_groups());
+ let mut omits = Vec::with_capacity(meta.num_row_groups());
if let Some(row_group_stats) = stats {
for (i, row_group) in row_group_stats.iter().enumerate() {
if pruner.should_keep(row_group, None) {
selection.push(i);
+
+ let omit = !inverted_pruner.should_keep(row_group, None);
+ omits.push(omit);
}
}
- Ok(selection)
+ Ok((selection, omits))
} else if let Some(row_group_stats) = collect_row_group_stats(
meta.row_groups(),
&self.leaf_fields,
@@ -122,11 +139,14 @@ impl ParquetRSPruner {
for (i, row_group) in row_group_stats.iter().enumerate() {
if pruner.should_keep(row_group, None) {
selection.push(i);
+
+ let omit = !inverted_pruner.should_keep(row_group, None);
+ omits.push(omit);
}
}
- Ok(selection)
+ Ok((selection, omits))
} else {
- Ok((0..meta.num_row_groups()).collect())
+ Ok((default_selection, default_omits))
}
}
}
@@ -145,7 +165,7 @@ impl ParquetRSPruner {
}
match &self.range_pruner {
None => Ok(None),
- Some(pruner) => {
+ Some((pruner, _)) => {
// Only if the file has page level statistics, we can use them to prune.
if meta.column_index().is_none() || meta.offset_index().is_none() {
return Ok(None);
diff --git a/src/query/storages/system/src/columns_table.rs b/src/query/storages/system/src/columns_table.rs
index 1dd15dcde79a..6e3e9f7a4ca3 100644
--- a/src/query/storages/system/src/columns_table.rs
+++ b/src/query/storages/system/src/columns_table.rs
@@ -155,7 +155,7 @@ impl ColumnsTable {
let mut databases = Vec::new();
if let Some(push_downs) = push_downs {
- if let Some(filter) = push_downs.filter {
+ if let Some(filter) = push_downs.filters.as_ref().map(|f| &f.filter) {
let expr = filter.as_expr(&BUILTIN_FUNCTIONS);
find_eq_filter(&expr, &mut |col_name, scalar| {
if col_name == "database" {
diff --git a/src/query/storages/system/src/tables_table.rs b/src/query/storages/system/src/tables_table.rs
index cf1dc720389b..82282abfe815 100644
--- a/src/query/storages/system/src/tables_table.rs
+++ b/src/query/storages/system/src/tables_table.rs
@@ -124,7 +124,7 @@ where TablesTable: HistoryAware
let mut dbs = Vec::new();
if let Some(push_downs) = &push_downs {
let mut db_name = Vec::new();
- if let Some(filter) = &push_downs.filter {
+ if let Some(filter) = push_downs.filters.as_ref().map(|f| &f.filter) {
let expr = filter.as_expr(&BUILTIN_FUNCTIONS);
find_eq_filter(&expr, &mut |col_name, scalar| {
if col_name == "database" {
diff --git a/src/tests/sqlsmith/src/runner.rs b/src/tests/sqlsmith/src/runner.rs
index 5640fb89e63d..af09186a5b9c 100644
--- a/src/tests/sqlsmith/src/runner.rs
+++ b/src/tests/sqlsmith/src/runner.rs
@@ -32,7 +32,7 @@ use rand::SeedableRng;
use crate::sql_gen::SqlGenerator;
use crate::sql_gen::Table;
-const KNOWN_ERRORS: [&str; 27] = [
+const KNOWN_ERRORS: [&str; 30] = [
// Errors caused by illegal parameters
"Overflow on date YMD",
"timestamp is out of range",
@@ -62,6 +62,9 @@ const KNOWN_ERRORS: [&str; 27] = [
"The arguments of AggregateRetention should be an expression which returns a Boolean result",
"AggregateWindowFunnelFunction does not support type",
"nth_value should count from 1",
+ "start must be less than or equal to end when step is positive vice versa",
+ "Expected Number, Date or Timestamp type, but got",
+ "Unsupported data type for generate_series",
];
pub struct Runner {
diff --git a/src/tests/sqlsmith/src/sql_gen/expr.rs b/src/tests/sqlsmith/src/sql_gen/expr.rs
index 77bc5f9a880f..9290cd72ba6f 100644
--- a/src/tests/sqlsmith/src/sql_gen/expr.rs
+++ b/src/tests/sqlsmith/src/sql_gen/expr.rs
@@ -45,6 +45,14 @@ impl<'a, R: Rng> SqlGenerator<'a, R> {
}
}
+ pub(crate) fn gen_simple_expr(&mut self, ty: &DataType) -> Expr {
+ if self.rng.gen_bool(0.6) {
+ self.gen_column(ty)
+ } else {
+ self.gen_scalar_value(ty)
+ }
+ }
+
fn gen_column(&mut self, ty: &DataType) -> Expr {
for bound_column in &self.bound_columns {
if bound_column.data_type == *ty {
@@ -389,7 +397,7 @@ impl<'a, R: Rng> SqlGenerator<'a, R> {
}
7 => {
let not = self.rng.gen_bool(0.5);
- let subquery = self.gen_subquery();
+ let (subquery, _) = self.gen_subquery(false);
Expr::Exists {
span: None,
not,
@@ -404,7 +412,7 @@ impl<'a, R: Rng> SqlGenerator<'a, R> {
3 => Some(SubqueryModifier::Some),
_ => unreachable!(),
};
- let subquery = self.gen_subquery();
+ let (subquery, _) = self.gen_subquery(true);
Expr::Subquery {
span: None,
modifier,
@@ -415,7 +423,7 @@ impl<'a, R: Rng> SqlGenerator<'a, R> {
let expr_ty = self.gen_simple_data_type();
let expr = self.gen_expr(&expr_ty);
let not = self.rng.gen_bool(0.5);
- let subquery = self.gen_subquery();
+ let (subquery, _) = self.gen_subquery(true);
Expr::InSubquery {
span: None,
expr: Box::new(expr),
diff --git a/src/tests/sqlsmith/src/sql_gen/func.rs b/src/tests/sqlsmith/src/sql_gen/func.rs
index 82388e03dbe0..76b8f6fcaf61 100644
--- a/src/tests/sqlsmith/src/sql_gen/func.rs
+++ b/src/tests/sqlsmith/src/sql_gen/func.rs
@@ -21,6 +21,7 @@ use common_ast::ast::Window;
use common_ast::ast::WindowFrame;
use common_ast::ast::WindowFrameBound;
use common_ast::ast::WindowFrameUnits;
+use common_ast::ast::WindowRef;
use common_ast::ast::WindowSpec;
use common_expression::types::DataType;
use common_expression::types::DecimalDataType::Decimal128;
@@ -283,7 +284,7 @@ impl<'a, R: Rng> SqlGenerator<'a, R> {
(name, vec![], args_type)
}
DataType::Decimal(_) => {
- let decimal = vec!["to_float64", "to_folat32", "to_decimal", "try_to_decimal"];
+ let decimal = vec!["to_float64", "to_float32", "to_decimal", "try_to_decimal"];
let name = decimal[self.rng.gen_range(0..=3)].to_string();
if name == "to_decimal" || name == "try_to_decimal" {
let args_type = vec![self.gen_data_type(); 1];
@@ -615,6 +616,27 @@ impl<'a, R: Rng> SqlGenerator<'a, R> {
}
fn gen_window(&mut self) -> Option