apache · viirya · Mar 5, 2024 · Mar 5, 2024 · Mar 5, 2024 · viirya
diff --git a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala
@@ -349,6 +349,29 @@ object QueryPlanSerde extends Logging with ShimQueryPlanSerde {
       expr: Expression,
       input: Seq[Attribute],
       binding: Boolean = true): Option[Expr] = {
+    def castToProto(
+        timeZoneId: Option[String],
+        dt: DataType,
+        childExpr: Option[Expr]): Option[Expr] = {
+      val dataType = serializeDataType(dt)
+
+      if (childExpr.isDefined && dataType.isDefined) {
+        val castBuilder = ExprOuterClass.Cast.newBuilder()
+        castBuilder.setChild(childExpr.get)
+        castBuilder.setDatatype(dataType.get)
+
+        val timeZone = timeZoneId.getOrElse("UTC")
+        castBuilder.setTimezone(timeZone)
+
+        Some(
+          ExprOuterClass.Expr
+            .newBuilder()
+            .setCast(castBuilder)
+            .build())
+      } else {
+        None
+      }
+    }
 
     def exprToProtoInternal(expr: Expression, inputs: Seq[Attribute]): Option[Expr] = {
       SQLConf.get
@@ -363,24 +386,7 @@ object QueryPlanSerde extends Logging with ShimQueryPlanSerde {
 
         case Cast(child, dt, timeZoneId, _) =>
           val childExpr = exprToProtoInternal(child, inputs)
-          val dataType = serializeDataType(dt)
-
-          if (childExpr.isDefined && dataType.isDefined) {
-            val castBuilder = ExprOuterClass.Cast.newBuilder()
-            castBuilder.setChild(childExpr.get)
-            castBuilder.setDatatype(dataType.get)
-
-            val timeZone = timeZoneId.getOrElse("UTC")
-            castBuilder.setTimezone(timeZone)
-
-            Some(
-              ExprOuterClass.Expr
-                .newBuilder()
-                .setCast(castBuilder)
-                .build())
-          } else {
-            None
-          }
+          castToProto(timeZoneId, dt, childExpr)
 
         case add @ Add(left, right, _) if supportedDataType(left.dataType) =>
           val leftExpr = exprToProtoInternal(left, inputs)
@@ -1494,7 +1500,10 @@ object QueryPlanSerde extends Logging with ShimQueryPlanSerde {
 
         case a @ Coalesce(_) =>
           val exprChildren = a.children.map(exprToProtoInternal(_, inputs))
-          scalarExprToProto("coalesce", exprChildren: _*)
+          val childExpr = scalarExprToProto("coalesce", exprChildren: _*)
+          // TODO: Remove this once we have new DataFusion release which includes
+          // the fix: https://github.com/apache/arrow-datafusion/pull/9459
+          castToProto(None, a.dataType, childExpr)
 
         // With Spark 3.4, CharVarcharCodegenUtils.readSidePadding gets called to pad spaces for
         // char types. Use rpad to achieve the behavior.

diff --git a/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala b/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala
@@ -34,6 +34,19 @@ import org.apache.comet.CometSparkSessionExtensions.{isSpark32, isSpark33Plus, i
 class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper {
   import testImplicits._
 
+  test("coalesce should return correct datatype") {
+    Seq(true, false).foreach { dictionaryEnabled =>
+      withTempDir { dir =>
+        val path = new Path(dir.toURI.toString, "test.parquet")
+        makeParquetFileAllTypes(path, dictionaryEnabled = dictionaryEnabled, 10000)
+        withParquetTable(path.toString, "tbl") {
+          checkSparkAnswerAndOperator(
+            "SELECT coalesce(cast(_18 as date), cast(_19 as date), _20) FROM tbl")
+        }
+      }
+    }
+  }
+
   test("bitwise shift with different left/right types") {
     Seq(false, true).foreach { dictionary =>
       withSQLConf("parquet.enable.dictionary" -> dictionary.toString) {