From 303d3031b92b928651e12ec8f83c0f8b5044d925 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Tue, 12 Dec 2023 10:25:27 -0800 Subject: [PATCH] fix: volatile expressions should not be target of common subexpt elimination --- datafusion/expr/src/expr.rs | 17 +++++++++++++++++ .../optimizer/src/common_subexpr_eliminate.rs | 10 +++++++--- .../sqllogictest/test_files/functions.slt | 5 +++++ 3 files changed, 29 insertions(+), 3 deletions(-) diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index 958f4f4a3456..3080acbfe88a 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -1692,6 +1692,23 @@ fn create_names(exprs: &[Expr]) -> Result { .join(", ")) } +/// Whether the given expression is volatile, i.e. whether it can return different results +/// when evaluated multiple times with the same input. +pub fn is_volatile(expr: &Expr) -> bool { + match expr { + Expr::ScalarFunction(func) => match func.func_def { + ScalarFunctionDefinition::BuiltIn(func) => match func { + BuiltinScalarFunction::Random => true, + _ => false, + }, + // TODO: Add volatile flag to UDFs + ScalarFunctionDefinition::UDF(_) => false, + ScalarFunctionDefinition::Name(_) => false, + }, + _ => false, + } +} + #[cfg(test)] mod test { use crate::expr::Cast; diff --git a/datafusion/optimizer/src/common_subexpr_eliminate.rs b/datafusion/optimizer/src/common_subexpr_eliminate.rs index 1d21407a6985..b42b095a8c88 100644 --- a/datafusion/optimizer/src/common_subexpr_eliminate.rs +++ b/datafusion/optimizer/src/common_subexpr_eliminate.rs @@ -29,7 +29,7 @@ use datafusion_common::tree_node::{ use datafusion_common::{ internal_err, Column, DFField, DFSchema, DFSchemaRef, DataFusionError, Result, }; -use datafusion_expr::expr::Alias; +use datafusion_expr::expr::{is_volatile, Alias}; use datafusion_expr::logical_plan::{ Aggregate, Filter, LogicalPlan, Projection, Sort, Window, }; @@ -113,6 +113,8 @@ impl CommonSubexprEliminate { let Projection { expr, input, .. } = projection; let input_schema = Arc::clone(input.schema()); let mut expr_set = ExprSet::new(); + + // Visit expr list and build expr identifier to occuring count map (`expr_set`). let arrays = to_arrays(expr, input_schema, &mut expr_set, ExprMask::Normal)?; let (mut new_expr, new_input) = @@ -527,11 +529,13 @@ impl ExprMask { | Expr::Wildcard { .. } ); + let is_volatile = is_volatile(expr); + let is_aggr = matches!(expr, Expr::AggregateFunction(..)); match self { - Self::Normal => is_normal_minus_aggregates || is_aggr, - Self::NormalAndAggregates => is_normal_minus_aggregates, + Self::Normal => is_volatile || is_normal_minus_aggregates || is_aggr, + Self::NormalAndAggregates => is_volatile || is_normal_minus_aggregates, } } } diff --git a/datafusion/sqllogictest/test_files/functions.slt b/datafusion/sqllogictest/test_files/functions.slt index 4f55ea316bb9..ad570b3735ae 100644 --- a/datafusion/sqllogictest/test_files/functions.slt +++ b/datafusion/sqllogictest/test_files/functions.slt @@ -995,3 +995,8 @@ query ? SELECT find_in_set(NULL, NULL) ---- NULL + +query B +SELECT r1 == r2 FROM (SELECT random() r1, random() r2) +---- +false