Skip to content

Commit

Permalink
feat(binder): convert to constant lookup when case when expression is…
Browse files Browse the repository at this point in the history
… in simple form (#15083)
  • Loading branch information
xzhseh authored Mar 1, 2024
1 parent 533af26 commit 7f6ab49
Show file tree
Hide file tree
Showing 3 changed files with 220 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -249,4 +249,47 @@
end;
expected_outputs:
- logical_plan
- batch_plan
- batch_plan
- id: case_when_optimizable_pattern_basic
before:
- create_table
sql: |
select
case
when c1 = 1 then 'one'
when c1 = 2 then 'two'
when c1 = 3 then 'three'
when c1 = 4 then 'four'
when c1 = 5 then 'five'
when c1 = 6 then 'six'
when c1 = 7 then 'seven'
when c1 = 8 then 'eight'
when c1 = 9 then 'nine'
when c1 = 10 then 'ten'
when c1 = 11 then 'eleven'
when c1 = 12 then 'twelve'
when c1 = 13 then 'thirteen'
when c1 = 14 then 'fourteen'
when c1 = 15 then 'fifteen'
when c1 = 16 then 'sixteen'
when c1 = 17 then 'seventeen'
when c1 = 18 then 'eighteen'
when c1 = 19 then 'nineteen'
when c1 = 20 then 'twenty'
when c1 = 21 then 'twenty-one'
when c1 = 22 then 'twenty-two'
when c1 = 23 then 'twenty-three'
when c1 = 24 then 'twenty-four'
when c1 = 25 then 'twenty-five'
when c1 = 26 then 'twenty-six'
when c1 = 27 then 'twenty-seven'
when c1 = 28 then 'twenty-eight'
when c1 = 29 then 'twenty-nine'
when c1 = 30 then 'thirty'
when c1 = 31 then 'thirty-one'
else 'other'
end
from t1;
expected_outputs:
- logical_plan
- batch_plan
Original file line number Diff line number Diff line change
Expand Up @@ -274,3 +274,50 @@
LogicalProject { exprs: [1919810:Int32] }
└─LogicalValues { rows: [[]], schema: Schema { fields: [] } }
batch_plan: 'BatchValues { rows: [[1919810:Int32]] }'
- id: case_when_optimizable_pattern_basic
before:
- create_table
sql: |
select
case
when c1 = 1 then 'one'
when c1 = 2 then 'two'
when c1 = 3 then 'three'
when c1 = 4 then 'four'
when c1 = 5 then 'five'
when c1 = 6 then 'six'
when c1 = 7 then 'seven'
when c1 = 8 then 'eight'
when c1 = 9 then 'nine'
when c1 = 10 then 'ten'
when c1 = 11 then 'eleven'
when c1 = 12 then 'twelve'
when c1 = 13 then 'thirteen'
when c1 = 14 then 'fourteen'
when c1 = 15 then 'fifteen'
when c1 = 16 then 'sixteen'
when c1 = 17 then 'seventeen'
when c1 = 18 then 'eighteen'
when c1 = 19 then 'nineteen'
when c1 = 20 then 'twenty'
when c1 = 21 then 'twenty-one'
when c1 = 22 then 'twenty-two'
when c1 = 23 then 'twenty-three'
when c1 = 24 then 'twenty-four'
when c1 = 25 then 'twenty-five'
when c1 = 26 then 'twenty-six'
when c1 = 27 then 'twenty-seven'
when c1 = 28 then 'twenty-eight'
when c1 = 29 then 'twenty-nine'
when c1 = 30 then 'thirty'
when c1 = 31 then 'thirty-one'
else 'other'
end
from t1;
logical_plan: |-
LogicalProject { exprs: [ConstantLookup(t1.c1, 1:Int32, 'one':Varchar, 2:Int32, 'two':Varchar, 3:Int32, 'three':Varchar, 4:Int32, 'four':Varchar, 5:Int32, 'five':Varchar, 6:Int32, 'six':Varchar, 7:Int32, 'seven':Varchar, 8:Int32, 'eight':Varchar, 9:Int32, 'nine':Varchar, 10:Int32, 'ten':Varchar, 11:Int32, 'eleven':Varchar, 12:Int32, 'twelve':Varchar, 13:Int32, 'thirteen':Varchar, 14:Int32, 'fourteen':Varchar, 15:Int32, 'fifteen':Varchar, 16:Int32, 'sixteen':Varchar, 17:Int32, 'seventeen':Varchar, 18:Int32, 'eighteen':Varchar, 19:Int32, 'nineteen':Varchar, 20:Int32, 'twenty':Varchar, 21:Int32, 'twenty-one':Varchar, 22:Int32, 'twenty-two':Varchar, 23:Int32, 'twenty-three':Varchar, 24:Int32, 'twenty-four':Varchar, 25:Int32, 'twenty-five':Varchar, 26:Int32, 'twenty-six':Varchar, 27:Int32, 'twenty-seven':Varchar, 28:Int32, 'twenty-eight':Varchar, 29:Int32, 'twenty-nine':Varchar, 30:Int32, 'thirty':Varchar, 31:Int32, 'thirty-one':Varchar, 'other':Varchar) as $expr1] }
└─LogicalScan { table: t1, columns: [t1.c1, t1.c2, t1.c3, t1._row_id] }
batch_plan: |-
BatchExchange { order: [], dist: Single }
└─BatchProject { exprs: [ConstantLookup(t1.c1, 1:Int32, 'one':Varchar, 2:Int32, 'two':Varchar, 3:Int32, 'three':Varchar, 4:Int32, 'four':Varchar, 5:Int32, 'five':Varchar, 6:Int32, 'six':Varchar, 7:Int32, 'seven':Varchar, 8:Int32, 'eight':Varchar, 9:Int32, 'nine':Varchar, 10:Int32, 'ten':Varchar, 11:Int32, 'eleven':Varchar, 12:Int32, 'twelve':Varchar, 13:Int32, 'thirteen':Varchar, 14:Int32, 'fourteen':Varchar, 15:Int32, 'fifteen':Varchar, 16:Int32, 'sixteen':Varchar, 17:Int32, 'seventeen':Varchar, 18:Int32, 'eighteen':Varchar, 19:Int32, 'nineteen':Varchar, 20:Int32, 'twenty':Varchar, 21:Int32, 'twenty-one':Varchar, 22:Int32, 'twenty-two':Varchar, 23:Int32, 'twenty-three':Varchar, 24:Int32, 'twenty-four':Varchar, 25:Int32, 'twenty-five':Varchar, 26:Int32, 'twenty-six':Varchar, 27:Int32, 'twenty-seven':Varchar, 28:Int32, 'twenty-eight':Varchar, 29:Int32, 'twenty-nine':Varchar, 30:Int32, 'thirty':Varchar, 31:Int32, 'thirty-one':Varchar, 'other':Varchar) as $expr1] }
└─BatchScan { table: t1, columns: [t1.c1], distribution: SomeShard }
133 changes: 129 additions & 4 deletions src/frontend/src/binder/expr/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -532,6 +532,127 @@ impl Binder {
true
}

/// Helper function to compare or set column identifier
/// used in `check_convert_simple_form`
fn compare_or_set(col_expr: &mut Option<Expr>, test_expr: Expr) -> bool {
let Expr::Identifier(test_ident) = test_expr else {
return false;
};
if let Some(expr) = col_expr {
let Expr::Identifier(ident) = expr else {
return false;
};
if ident.real_value() != test_ident.real_value() {
return false;
}
} else {
*col_expr = Some(Expr::Identifier(test_ident));
}
true
}

/// left expression and right expression must be either:
/// `<constant> <Eq> <identifier>` or `<identifier> <Eq> <constant>`
/// used in `check_convert_simple_form`
fn check_invariant(left: Expr, op: BinaryOperator, right: Expr) -> bool {
if op != BinaryOperator::Eq {
return false;
}
if let Expr::Identifier(_) = left {
// <identifier> <Eq> <constant>
let Expr::Value(_) = right else {
return false;
};
} else {
// <constant> <Eq> <identifier>
let Expr::Value(_) = left else {
return false;
};
let Expr::Identifier(_) = right else {
return false;
};
}
true
}

/// Helper function to extract expression out and insert
/// the corresponding bound version to `inputs`
/// used in `check_convert_simple_form`
/// Note: this function will be invoked per arm
fn try_extract_simple_form(
&mut self,
ident_expr: Expr,
constant_expr: Expr,
column_expr: &mut Option<Expr>,
inputs: &mut Vec<ExprImpl>,
) -> bool {
if !Self::compare_or_set(column_expr, ident_expr) {
return false;
}
let Ok(bound_expr) = self.bind_expr_inner(constant_expr) else {
return false;
};
inputs.push(bound_expr);
true
}

/// See if the case when expression in form
/// `select case when <expr_1 = constant> (...with same pattern...) else <constant> end;`
/// If so, this expression could also be converted to constant lookup
fn check_convert_simple_form(
&mut self,
conditions: Vec<Expr>,
results_expr: Vec<ExprImpl>,
fallback: Option<ExprImpl>,
constant_lookup_inputs: &mut Vec<ExprImpl>,
) -> bool {
let mut column_expr = None;

for (condition, result) in zip_eq_fast(conditions, results_expr) {
if let Expr::BinaryOp { left, op, right } = condition {
if !Self::check_invariant(*(left.clone()), op.clone(), *(right.clone())) {
return false;
}
if let Expr::Identifier(_) = *(left.clone()) {
if !self.try_extract_simple_form(
*left,
*right,
&mut column_expr,
constant_lookup_inputs,
) {
return false;
}
} else if !self.try_extract_simple_form(
*right,
*left,
&mut column_expr,
constant_lookup_inputs,
) {
return false;
}
constant_lookup_inputs.push(result);
} else {
return false;
}
}

// Insert operand first
let Some(operand) = column_expr else {
return false;
};
let Ok(bound_operand) = self.bind_expr_inner(operand) else {
return false;
};
constant_lookup_inputs.insert(0, bound_operand);

// fallback insertion
if let Some(expr) = fallback {
constant_lookup_inputs.push(expr);
}

true
}

/// The helper function to check if the current case-when
/// expression in `bind_case` could be optimized
/// into `ConstantLookupExpression`
Expand All @@ -547,9 +668,6 @@ impl Binder {
return false;
}

// TODO(Zihao): we could possibly optimize some simple cases when
// `operand` is None in the future, the current choice is not conducting the optimization.
// e.g., select case when c1 = 1 then (...) when (same pattern) then (... ) [else (...)] end from t1;
if let Some(operand) = operand {
let Ok(operand) = self.bind_expr_inner(*operand) else {
return false;
Expand All @@ -562,7 +680,14 @@ impl Binder {
}
constant_lookup_inputs.push(operand);
} else {
return false;
// Try converting to simple form
// see the example as illustrated in `check_convert_simple_form`
return self.check_convert_simple_form(
conditions,
results_expr,
fallback,
constant_lookup_inputs,
);
}

for (condition, result) in zip_eq_fast(conditions, results_expr) {
Expand Down

0 comments on commit 7f6ab49

Please sign in to comment.