diff --git a/core/src/parquet/read/values.rs b/core/src/parquet/read/values.rs index 9d9bbb3c9e..5447949d26 100644 --- a/core/src/parquet/read/values.rs +++ b/core/src/parquet/read/values.rs @@ -770,9 +770,9 @@ impl PlainDecoding for Int96TimestampMicrosType { // TODO: optimize this further as checking value one by one is not very efficient unsafe { - let micros = (day.read_unaligned() - JULIAN_DAY_OF_EPOCH) as i64 - * MICROS_PER_DAY - + nanos.read_unaligned() / 1000; + let micros = ((day.read_unaligned() - JULIAN_DAY_OF_EPOCH) as i64) + .wrapping_mul(MICROS_PER_DAY) + .wrapping_add(nanos.read_unaligned() / 1000); if unlikely(micros < JULIAN_GREGORIAN_SWITCH_OFF_TS) { panic!( @@ -797,8 +797,9 @@ impl PlainDecoding for Int96TimestampMicrosType { let nanos = &v[..INT96_DST_BYTE_WIDTH] as *const [u8] as *const u8 as *const i64; let day = &v[INT96_DST_BYTE_WIDTH..] as *const [u8] as *const u8 as *const i32; - let micros = (day.read_unaligned() - JULIAN_DAY_OF_EPOCH) as i64 * MICROS_PER_DAY - + nanos.read_unaligned() / 1000; + let micros = ((day.read_unaligned() - JULIAN_DAY_OF_EPOCH) as i64) + .wrapping_mul(MICROS_PER_DAY) + .wrapping_add(nanos.read_unaligned() / 1000); bit::memcpy_value( µs, diff --git a/spark/src/test/scala/org/apache/comet/CometCastSuite.scala b/spark/src/test/scala/org/apache/comet/CometCastSuite.scala index 25343f933b..4f49c564b2 100644 --- a/spark/src/test/scala/org/apache/comet/CometCastSuite.scala +++ b/spark/src/test/scala/org/apache/comet/CometCastSuite.scala @@ -780,7 +780,7 @@ class CometCastSuite extends CometTestBase with AdaptiveSparkPlanHelper { test("cast TimestampType to LongType") { assume(CometSparkSessionExtensions.isSpark33Plus) - castTest(generateTimestamps(), DataTypes.LongType) + castTest(generateTimestampsExtended(), DataTypes.LongType) } ignore("cast TimestampType to FloatType") { @@ -884,6 +884,14 @@ class CometCastSuite extends CometTestBase with AdaptiveSparkPlanHelper { withNulls(values).toDF("b").withColumn("a", col("b").cast(DataTypes.DateType)).drop("b") } + // Extended values are Timestamps that are outside dates supported chrono::DateTime and + // therefore not supported by operations using it. + private def generateTimestampsExtended(): DataFrame = { + val values = Seq("290000-12-31T01:00:00+02:00") + generateTimestamps().unionByName( + values.toDF("str").select(col("str").cast(DataTypes.TimestampType).as("a"))) + } + private def generateTimestamps(): DataFrame = { val values = Seq(