From 7ba569357cadade64b49b6bdf5f0946f81f95301 Mon Sep 17 00:00:00 2001 From: Son <14060682+sonhmai@users.noreply.github.com> Date: Mon, 27 May 2024 03:13:19 +0700 Subject: [PATCH] fix: substring with negative indices should produce correct result (#470) --- core/src/execution/datafusion/planner.rs | 3 ++- .../test/scala/org/apache/comet/CometExpressionSuite.scala | 6 ++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/core/src/execution/datafusion/planner.rs b/core/src/execution/datafusion/planner.rs index 01d892381..7a37e3aae 100644 --- a/core/src/execution/datafusion/planner.rs +++ b/core/src/execution/datafusion/planner.rs @@ -395,7 +395,8 @@ impl PhysicalPlanner { let child = self.create_expr(expr.child.as_ref().unwrap(), input_schema)?; // Spark Substring's start is 1-based when start > 0 let start = expr.start - i32::from(expr.start > 0); - let len = expr.len; + // substring negative len is treated as 0 in Spark + let len = std::cmp::max(expr.len, 0); Ok(Arc::new(SubstringExec::new( child, diff --git a/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala b/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala index 992615084..1afdd78ec 100644 --- a/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala +++ b/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala @@ -156,6 +156,12 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper { test("string type and substring") { withParquetTable((0 until 5).map(i => (i.toString, (i + 100).toString)), "tbl") { checkSparkAnswerAndOperator("SELECT _1, substring(_2, 2, 2) FROM tbl") + checkSparkAnswerAndOperator("SELECT _1, substring(_2, 2, -2) FROM tbl") + checkSparkAnswerAndOperator("SELECT _1, substring(_2, -2, 2) FROM tbl") + checkSparkAnswerAndOperator("SELECT _1, substring(_2, -2, -2) FROM tbl") + checkSparkAnswerAndOperator("SELECT _1, substring(_2, -2, 10) FROM tbl") + checkSparkAnswerAndOperator("SELECT _1, substring(_2, 0, 0) FROM tbl") + checkSparkAnswerAndOperator("SELECT _1, substring(_2, 1, 0) FROM tbl") } }