From 1a386bc56158d7addf954e170b3821978b7a11fb Mon Sep 17 00:00:00 2001 From: Kazuyuki Tanimura Date: Fri, 7 Jun 2024 23:41:20 -0700 Subject: [PATCH] build: Enable spark-4.0 Spark tests --- .../src/main/java/org/apache/comet/parquet/TypeUtil.java | 9 ++++++++- .../scala/org/apache/comet/serde/QueryPlanSerde.scala | 2 -- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/common/src/main/java/org/apache/comet/parquet/TypeUtil.java b/common/src/main/java/org/apache/comet/parquet/TypeUtil.java index b8b7ff525..5bd5896d1 100644 --- a/common/src/main/java/org/apache/comet/parquet/TypeUtil.java +++ b/common/src/main/java/org/apache/comet/parquet/TypeUtil.java @@ -27,6 +27,7 @@ import org.apache.parquet.schema.PrimitiveType; import org.apache.parquet.schema.Type; import org.apache.parquet.schema.Types; +import org.apache.spark.package$; import org.apache.spark.sql.execution.datasources.SchemaColumnConvertNotSupportedException; import org.apache.spark.sql.types.*; @@ -169,6 +170,7 @@ && isUnsignedIntTypeMatched(logicalTypeAnnotation, 64)) { break; case INT96: if (sparkType == TimestampNTZType$.MODULE$) { + if (isSpark40Plus()) return; convertErrorForTimestampNTZ(typeName.name()); } else if (sparkType == DataTypes.TimestampType) { return; @@ -218,7 +220,8 @@ private static void validateTimestampType( // Throw an exception if the Parquet type is TimestampLTZ and the Catalyst type is TimestampNTZ. // This is to avoid mistakes in reading the timestamp values. if (((TimestampLogicalTypeAnnotation) logicalTypeAnnotation).isAdjustedToUTC() - && sparkType == TimestampNTZType$.MODULE$) { + && sparkType == TimestampNTZType$.MODULE$ + && !isSpark40Plus()) { convertErrorForTimestampNTZ("int64 time(" + logicalTypeAnnotation + ")"); } } @@ -278,4 +281,8 @@ private static boolean isUnsignedIntTypeMatched( && !((IntLogicalTypeAnnotation) logicalTypeAnnotation).isSigned() && ((IntLogicalTypeAnnotation) logicalTypeAnnotation).getBitWidth() == bitWidth; } + + private static boolean isSpark40Plus() { + return package$.MODULE$.SPARK_VERSION().compareTo("4.0") >= 0; + } } diff --git a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala index 24c7e1452..079db0306 100644 --- a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala +++ b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala @@ -63,7 +63,6 @@ object QueryPlanSerde extends Logging with ShimQueryPlanSerde with CometExprShim _: DoubleType | _: StringType | _: BinaryType | _: TimestampType | _: DecimalType | _: DateType | _: BooleanType | _: NullType => true - // `TimestampNTZType` is private in Spark 3.2. case dt if isTimestampNTZType(dt) => true case dt => emitWarning(s"unsupported Spark data type: $dt") @@ -2246,7 +2245,6 @@ object QueryPlanSerde extends Logging with ShimQueryPlanSerde with CometExprShim case _: ByteType | _: ShortType | _: IntegerType | _: LongType | _: FloatType | _: DoubleType | _: StringType | _: DateType | _: DecimalType | _: BooleanType => true - // `TimestampNTZType` is private in Spark 3.2/3.3. case dt if isTimestampNTZType(dt) => true case _ => false }