From 1b5cece522ea5854d2f30355916ef2255a4d6ada Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Thu, 11 Apr 2024 14:38:31 -0700 Subject: [PATCH] fix: Average expression in Comet Final should handle all null inputs from partial Spark aggregation --- core/src/execution/datafusion/expressions/avg.rs | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/core/src/execution/datafusion/expressions/avg.rs b/core/src/execution/datafusion/expressions/avg.rs index e35ff6120e..847acae650 100644 --- a/core/src/execution/datafusion/expressions/avg.rs +++ b/core/src/execution/datafusion/expressions/avg.rs @@ -176,9 +176,15 @@ impl Accumulator for AvgAccumulator { } fn evaluate(&mut self) -> Result { - Ok(ScalarValue::Float64( - self.sum.map(|f| f / self.count as f64), - )) + if self.count == 0 { + // If all input are nulls, count will be 0 and we will get null after the division. + // This is consistent with Spark Average implementation. + return Ok(ScalarValue::Float64(None)); + } else { + Ok(ScalarValue::Float64( + self.sum.map(|f| f / self.count as f64), + )) + } } fn size(&self) -> usize {