apache · viirya · Feb 28, 2024 · Feb 27, 2024 · Feb 27, 2024 · Feb 27, 2024
diff --git a/spark/src/main/scala/org/apache/spark/sql/comet/operators.scala b/spark/src/main/scala/org/apache/spark/sql/comet/operators.scala
@@ -149,6 +149,9 @@ abstract class CometNativeExec extends CometExec {
   /**
    * The serialized native query plan, optional. This is only defined when the current node is the
    * "boundary" node between native and Spark.
+   *
+   * Note that derived classes of `CometNativeExec` must have `serializedPlanOpt` in last product
+   * parameter.
    */
   def serializedPlanOpt: Option[Array[Byte]]
 
@@ -276,13 +279,31 @@ abstract class CometNativeExec extends CometExec {
     }
   }
 
+  /**
+   * Copies product elements to the output array except the last one. The last element will be
+   * transformed using the provided function. This is used to transform `serializedPlanOpt`
+   * parameter in case classes of Comet native operator where the `serializedPlanOpt` is always
+   * the last produce element. That is because we cannot match `Option[Array[Byte]]` due to type
+   * erase.
+   */
+  private def mapProduct(f: Any => AnyRef): Array[AnyRef] = {
+    val arr = Array.ofDim[AnyRef](productArity)
+    var i = 0
+    while (i < arr.length - 1) {
+      arr(i) = productElement(i).asInstanceOf[AnyRef]
+      i += 1
+    }
+    arr(arr.length - 1) = f(productElement(arr.length - 1))
+    arr
+  }
+
   /**
    * Converts this native Comet operator and its children into a native block which can be
    * executed as a whole (i.e., in a single JNI call) from the native side.
    */
   def convertBlock(): CometNativeExec = {
     def transform(arg: Any): AnyRef = arg match {
-      case serializedPlan: Option[Array[Byte]] if serializedPlan.isEmpty =>
+      case serializedPlan: Option[_] if serializedPlan.isEmpty =>
         val out = new ByteArrayOutputStream()
         nativeOp.writeTo(out)
         out.close()
@@ -291,7 +312,7 @@ abstract class CometNativeExec extends CometExec {
       case null => null
     }
 
-    val newArgs = mapProductIterator(transform)
+    val newArgs = mapProduct(transform)
     makeCopy(newArgs).asInstanceOf[CometNativeExec]
   }
 
@@ -300,13 +321,13 @@ abstract class CometNativeExec extends CometExec {
    */
   def cleanBlock(): CometNativeExec = {
     def transform(arg: Any): AnyRef = arg match {
-      case serializedPlan: Option[Array[Byte]] if serializedPlan.isDefined =>
+      case serializedPlan: Option[_] if serializedPlan.isDefined =>
         None
       case other: AnyRef => other
       case null => null
     }
 
-    val newArgs = mapProductIterator(transform)
+    val newArgs = mapProduct(transform)
     makeCopy(newArgs).asInstanceOf[CometNativeExec]
   }
 

diff --git a/spark/src/test/scala/org/apache/comet/exec/CometExecSuite.scala b/spark/src/test/scala/org/apache/comet/exec/CometExecSuite.scala
@@ -31,13 +31,14 @@ import org.apache.spark.sql.{AnalysisException, Column, CometTestBase, DataFrame
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogStatistics, CatalogTable}
 import org.apache.spark.sql.catalyst.expressions.Hex
+import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateMode
 import org.apache.spark.sql.comet.{CometBroadcastExchangeExec, CometFilterExec, CometHashAggregateExec, CometProjectExec, CometScanExec, CometTakeOrderedAndProjectExec}
 import org.apache.spark.sql.comet.execution.shuffle.{CometColumnarShuffle, CometShuffleExchangeExec}
 import org.apache.spark.sql.execution.{CollectLimitExec, ProjectExec, UnionExec}
 import org.apache.spark.sql.execution.exchange.BroadcastExchangeExec
 import org.apache.spark.sql.execution.joins.{BroadcastNestedLoopJoinExec, CartesianProductExec, SortMergeJoinExec}
 import org.apache.spark.sql.execution.window.WindowExec
-import org.apache.spark.sql.functions.{date_add, expr}
+import org.apache.spark.sql.functions.{date_add, expr, sum}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.SQLConf.SESSION_LOCAL_TIMEZONE
 import org.apache.spark.unsafe.types.UTF8String
@@ -56,6 +57,19 @@ class CometExecSuite extends CometTestBase {
     }
   }
 
+  test("Fix corrupted AggregateMode when transforming plan parameters") {
+    withParquetTable((0 until 5).map(i => (i, i + 1)), "table") {
+      val df = sql("SELECT * FROM table").groupBy($"_1").agg(sum("_2"))
+      val agg = stripAQEPlan(df.queryExecution.executedPlan).collectFirst {
+        case s: CometHashAggregateExec => s
+      }.get
+
+      assert(agg.mode.isDefined && agg.mode.get.isInstanceOf[AggregateMode])
+      val newAgg = agg.cleanBlock().asInstanceOf[CometHashAggregateExec]
+      assert(newAgg.mode.isDefined && newAgg.mode.get.isInstanceOf[AggregateMode])
+    }
+  }
+
   test("CometBroadcastExchangeExec") {
     withSQLConf(CometConf.COMET_EXEC_BROADCAST_ENABLED.key -> "true") {
       withParquetTable((0 until 5).map(i => (i, i + 1)), "tbl_a") {