-
Notifications
You must be signed in to change notification settings - Fork 169
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: Implement Spark-compatible CAST between integer types #340
Changes from 9 commits
54a5f59
8b29641
a089552
80e3ddf
1ab192a
e71b417
a3ef6a3
046985e
6f2306a
d5c42f3
cefd814
7b9698d
eaca87d
11640f9
2da677c
4ee1963
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -30,6 +30,8 @@ import org.apache.spark.sql.functions.col | |
import org.apache.spark.sql.internal.SQLConf | ||
import org.apache.spark.sql.types.{DataType, DataTypes} | ||
|
||
import org.apache.comet.Constants.EMPTY_STRING | ||
|
||
class CometCastSuite extends CometTestBase with AdaptiveSparkPlanHelper { | ||
import testImplicits._ | ||
|
||
|
@@ -642,6 +644,30 @@ class CometCastSuite extends CometTestBase with AdaptiveSparkPlanHelper { | |
castTest(generateTimestamps(), DataTypes.DateType) | ||
} | ||
|
||
test("cast short to byte") { | ||
castTest(generateShorts, DataTypes.ByteType) | ||
} | ||
|
||
test("cast int to byte") { | ||
castTest(generateInts, DataTypes.ByteType) | ||
} | ||
|
||
test("cast int to short") { | ||
castTest(generateInts, DataTypes.ShortType) | ||
} | ||
|
||
test("cast long to byte") { | ||
castTest(generateLongs, DataTypes.ByteType) | ||
} | ||
|
||
test("cast long to short") { | ||
castTest(generateLongs, DataTypes.ShortType) | ||
} | ||
|
||
test("cast long to int") { | ||
castTest(generateLongs, DataTypes.IntegerType) | ||
} | ||
|
||
private def generateFloats(): DataFrame = { | ||
val r = new Random(0) | ||
val values = Seq( | ||
|
@@ -807,11 +833,22 @@ class CometCastSuite extends CometTestBase with AdaptiveSparkPlanHelper { | |
} else { | ||
// Spark 3.2 and 3.3 have a different error message format so we can't do a direct | ||
// comparison between Spark and Comet. | ||
// In the case of CAST_INVALID_INPUT | ||
// Spark message is in format `invalid input syntax for type TYPE: VALUE` | ||
// Comet message is in format `The value 'VALUE' of the type FROM_TYPE cannot be cast to TO_TYPE` | ||
// We just check that the comet message contains the same invalid value as the Spark message | ||
val sparkInvalidValue = sparkMessage.substring(sparkMessage.indexOf(':') + 2) | ||
assert(cometMessage.contains(sparkInvalidValue)) | ||
// In the case of CAST_OVERFLOW | ||
// Spark message is in format `Casting VALUE to TO_TYPE causes overflow` | ||
// Comet message is in format `The value 'VALUE' of the type FROM_TYPE cannot be cast to TO_TYPE | ||
// due to an overflow` | ||
// We check if the comet message contains 'overflow'. | ||
val sparkInvalidValue = if (sparkMessage.indexOf(':') == -1) { | ||
EMPTY_STRING | ||
} else { | ||
sparkMessage.substring(sparkMessage.indexOf(':') + 2) | ||
} | ||
assert( | ||
cometMessage.contains(sparkInvalidValue) || cometMessage.contains("overflow")) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you are right, my bad 😅 . so incase sparkMessage doesnt have ':' should I assert on just commetMessage.contains("overflow")
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, something like that. I haven't reviewed the overflow messages to see if they contain There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. doesnt look like overflow error message has ':' in it, i ran spark.sql("select cast(9223372036854775807 as int)").show() in my local on various spark versions. 3.4 - [CAST_OVERFLOW] The value 9223372036854775807L of the type "BIGINT" cannot be cast to "INT" due to an overflow. Use |
||
} | ||
} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
These methods already exist in main but have different naming, so I think you need to upmerge/rebase against main.
Example: