Skip to content

Commit

Permalink
refactor: ScalarVisitor refactor push_down_prewhere (#15215)
Browse files Browse the repository at this point in the history
* refactor: ScalarVisitor refactor push_down_prewhere

* add udf lambda call prewhere push down test

* REGENERATE GOLDENFILES statement-error.txt
  • Loading branch information
TCeason authored Apr 15, 2024
1 parent e8f88e7 commit a359fe3
Show file tree
Hide file tree
Showing 4 changed files with 64 additions and 38 deletions.
2 changes: 1 addition & 1 deletion src/query/ast/src/parser/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1621,7 +1621,7 @@ pub fn type_name(i: Input) -> IResult<TypeName> {
.iter()
.any(|field_name| !field_name.chars().all(|c| c.is_ascii_alphanumeric()))
{
return Err(nom::Err::Error(ErrorKind::Other(
return Err(nom::Err::Failure(ErrorKind::Other(
"Invalid tuple field name, only support alphanumeric characters",
)));
}
Expand Down
9 changes: 5 additions & 4 deletions src/query/ast/tests/it/testdata/statement-error.txt
Original file line number Diff line number Diff line change
Expand Up @@ -82,13 +82,14 @@ error:
create table a (b tuple("c-1" int, "c-2" uint64));
---------- Output ---------
error:
--> SQL:1:48
--> SQL:1:19
|
1 | create table a (b tuple("c-1" int, "c-2" uint64));
| ------ - ----- ^ unexpected `)`, expecting `BOOLEAN`, `BOOL`, `UINT8`, `TINYINT`, `UINT16`, `SMALLINT`, `UINT32`, `INT`, `INTEGER`, `UINT64`, `UNSIGNED`, `BIGINT`, `INT8`, `INT16`, `INT32`, `INT64`, `SIGNED`, `FLOAT32`, `FLOAT`, `FLOAT64`, `DOUBLE`, `DECIMAL`, `ARRAY`, `MAP`, `BITMAP`, `TUPLE`, `DATE`, `DATETIME`, `TIMESTAMP`, `BINARY`, `VARBINARY`, `LONGBLOB`, `MEDIUMBLOB`, `TINYBLOB`, `BLOB`, `STRING`, `VARCHAR`, `CHAR`, `CHARACTER`, `TEXT`, `VARIANT`, `JSON`, `GEOMETRY`, `NULLABLE`, `(`, `NULL`, `NOT`, or `,`
| | | |
| | | while parsing TUPLE(<type>, ...)
| ------ - ^^^^^
| | | |
| | | Invalid tuple field name, only support alphanumeric characters
| | | while parsing type name
| | | while parsing TUPLE(<name> <type>, ...)
| | while parsing `<column name> <type> [DEFAULT <expr>] [AS (<expr>) VIRTUAL] [AS (<expr>) STORED] [COMMENT '<comment>']`
| while parsing `CREATE [OR REPLACE] TABLE [IF NOT EXISTS] [<database>.]<table> [<source>] [<table_options>]`

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,14 @@ use crate::optimizer::rule::Rule;
use crate::optimizer::ColumnSet;
use crate::optimizer::RuleID;
use crate::optimizer::SExpr;
use crate::plans::BoundColumnRef;
use crate::plans::Filter;
use crate::plans::Prewhere;
use crate::plans::RelOp;
use crate::plans::ScalarExpr;
use crate::plans::Scan;
use crate::plans::SubqueryExpr;
use crate::plans::Visitor;
use crate::IndexType;
use crate::MetadataRef;
use crate::Visibility;
Expand Down Expand Up @@ -58,44 +61,47 @@ impl RulePushDownPrewhere {
table_index: IndexType,
schema: &TableSchemaRef,
expr: &ScalarExpr,
columns: &mut ColumnSet,
) -> Result<()> {
match expr {
ScalarExpr::BoundColumnRef(column) => {
) -> Result<ColumnSet> {
struct ColumnVisitor {
table_index: IndexType,
schema: TableSchemaRef,
columns: ColumnSet,
}
impl<'a> Visitor<'a> for ColumnVisitor {
fn visit_bound_column_ref(&mut self, column: &'a BoundColumnRef) -> Result<()> {
if let Some(index) = &column.column.table_index {
if table_index == *index
if self.table_index == *index
&& (column.column.visibility == Visibility::InVisible
|| schema.index_of(column.column.column_name.as_str()).is_ok())
|| self
.schema
.index_of(column.column.column_name.as_str())
.is_ok())
{
columns.insert(column.column.index);
self.columns.insert(column.column.index);
return Ok(());
}
}
return Err(ErrorCode::Unimplemented("Column is not in the table"));
}
ScalarExpr::FunctionCall(func) => {
for arg in func.arguments.iter() {
Self::collect_columns_impl(table_index, schema, arg, columns)?;
}
}
ScalarExpr::CastExpr(cast) => {
Self::collect_columns_impl(table_index, schema, cast.argument.as_ref(), columns)?;
Err(ErrorCode::Unimplemented("Column is not in the table"))
}
ScalarExpr::ConstantExpr(_) => {}
ScalarExpr::UDFCall(udf) => {
for arg in udf.arguments.iter() {
Self::collect_columns_impl(table_index, schema, arg, columns)?;
}
}
_ => {
// SubqueryExpr and AggregateFunction will not appear in Filter-Scan
return Err(ErrorCode::Unimplemented(format!(

fn visit_subquery(&mut self, subquery: &'a SubqueryExpr) -> Result<()> {
Err(ErrorCode::Unimplemented(format!(
"Prewhere don't support expr {:?}",
expr
)));
subquery
)))
}
}
Ok(())

let mut column_visitor = ColumnVisitor {
table_index,
schema: schema.clone(),
columns: ColumnSet::new(),
};
// WindowFunc, SubqueryExpr and AggregateFunction will not appear in Scan
// WindowFunc and AggFunc already check in binder:
// Where clause can't contain aggregate or window functions
column_visitor.visit(expr)?;
Ok(column_visitor.columns)
}

// analyze if the expression can be moved to prewhere
Expand All @@ -104,11 +110,7 @@ impl RulePushDownPrewhere {
schema: &TableSchemaRef,
expr: &ScalarExpr,
) -> Option<ColumnSet> {
let mut columns = ColumnSet::new();
// columns in subqueries are not considered
Self::collect_columns_impl(table_index, schema, expr, &mut columns).ok()?;

Some(columns)
Self::collect_columns_impl(table_index, schema, expr).ok()
}

pub fn prewhere_optimize(&self, s_expr: &SExpr) -> Result<SExpr> {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@ drop table if exists t_where_optimizer
statement ok
create table if not exists t_where_optimizer (a int, b int)

statement ok
DROP FUNCTION IF EXISTS isnotempty;

statement ok
CREATE FUNCTION IF NOT EXISTS isnotempty AS(p) -> not(is_null(p));

query T
explain select a from t_where_optimizer where a = 1
----
Expand All @@ -17,6 +23,20 @@ TableScan
├── push downs: [filters: [is_true(t_where_optimizer.a (#0) = 1)], limit: NONE]
└── estimated rows: 0.00

query T
explain select a from t_where_optimizer where isnotempty(a)
----
TableScan
├── table: default.default.t_where_optimizer
├── output columns: [a (#0)]
├── read rows: 0
├── read size: 0
├── partitions total: 0
├── partitions scanned: 0
├── push downs: [filters: [NOT NOT is_not_null(t_where_optimizer.a (#0))], limit: NONE]
└── estimated rows: 0.00


query T
explain select * from t_where_optimizer where a = b
----
Expand Down Expand Up @@ -103,3 +123,6 @@ TableScan

statement ok
drop table t_where_optimizer

statement ok
DROP FUNCTION IF EXISTS isnotempty;

0 comments on commit a359fe3

Please sign in to comment.