Skip to content

Commit

Permalink
fix: substring with negative indices should produce correct result (#470
Browse files Browse the repository at this point in the history
)
  • Loading branch information
sonhmai authored May 26, 2024
1 parent 5bfe46d commit 7ba5693
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 1 deletion.
3 changes: 2 additions & 1 deletion core/src/execution/datafusion/planner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -395,7 +395,8 @@ impl PhysicalPlanner {
let child = self.create_expr(expr.child.as_ref().unwrap(), input_schema)?;
// Spark Substring's start is 1-based when start > 0
let start = expr.start - i32::from(expr.start > 0);
let len = expr.len;
// substring negative len is treated as 0 in Spark
let len = std::cmp::max(expr.len, 0);

Ok(Arc::new(SubstringExec::new(
child,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,12 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper {
test("string type and substring") {
withParquetTable((0 until 5).map(i => (i.toString, (i + 100).toString)), "tbl") {
checkSparkAnswerAndOperator("SELECT _1, substring(_2, 2, 2) FROM tbl")
checkSparkAnswerAndOperator("SELECT _1, substring(_2, 2, -2) FROM tbl")
checkSparkAnswerAndOperator("SELECT _1, substring(_2, -2, 2) FROM tbl")
checkSparkAnswerAndOperator("SELECT _1, substring(_2, -2, -2) FROM tbl")
checkSparkAnswerAndOperator("SELECT _1, substring(_2, -2, 10) FROM tbl")
checkSparkAnswerAndOperator("SELECT _1, substring(_2, 0, 0) FROM tbl")
checkSparkAnswerAndOperator("SELECT _1, substring(_2, 1, 0) FROM tbl")
}
}

Expand Down

0 comments on commit 7ba5693

Please sign in to comment.