diff --git a/common/pom.xml b/common/pom.xml index cc1f44481..b59d7b187 100644 --- a/common/pom.xml +++ b/common/pom.xml @@ -181,6 +181,7 @@ under the License. src/main/${shims.majorVerSrc} src/main/${shims.minorVerSrc} + src/main/${shims.pre35Src} diff --git a/common/src/main/spark-3.3/org/apache/comet/shims/ShimBatchReader.scala b/common/src/main/spark-3.3/org/apache/comet/shims/ShimBatchReader.scala new file mode 100644 index 000000000..1b1742a3e --- /dev/null +++ b/common/src/main/spark-3.3/org/apache/comet/shims/ShimBatchReader.scala @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.comet.shims + +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.execution.datasources.PartitionedFile + +object ShimBatchReader { + + def newPartitionedFile(partitionValues: InternalRow, file: String): PartitionedFile = + PartitionedFile( + partitionValues, + file, + Long.box(-1), // -1 means we read the entire file + Long.box(-1), + Array.empty[String], + Long.box(0), + Long.box(0)) +} diff --git a/common/src/main/spark-3.4/org/apache/comet/shims/ShimBatchReader.scala b/common/src/main/spark-3.4/org/apache/comet/shims/ShimBatchReader.scala new file mode 100644 index 000000000..352b91145 --- /dev/null +++ b/common/src/main/spark-3.4/org/apache/comet/shims/ShimBatchReader.scala @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.comet.shims + +import org.apache.spark.paths.SparkPath +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.execution.datasources.PartitionedFile + +object ShimBatchReader { + + def newPartitionedFile(partitionValues: InternalRow, file: String): PartitionedFile = + PartitionedFile( + partitionValues, + SparkPath.fromPathString(file), + Long.box(-1), // -1 means we read the entire file + Long.box(-1), + Array.empty[String], + Long.box(0), + Long.box(0)) +} diff --git a/common/src/main/spark-3.5/org/apache/comet/shims/ShimBatchReader.scala b/common/src/main/spark-3.5/org/apache/comet/shims/ShimBatchReader.scala new file mode 100644 index 000000000..46bbcb21a --- /dev/null +++ b/common/src/main/spark-3.5/org/apache/comet/shims/ShimBatchReader.scala @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.comet.shims + +import org.apache.spark.paths.SparkPath +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.execution.datasources.PartitionedFile + +object ShimBatchReader { + + def newPartitionedFile(partitionValues: InternalRow, file: String): PartitionedFile = + PartitionedFile( + partitionValues, + SparkPath.fromPathString(file), + Long.box(-1), // -1 means we read the entire file + Long.box(-1), + Array.empty[String], + Long.box(0), + Long.box(0), + Map.empty) +} diff --git a/common/src/main/spark-3.x/org/apache/comet/shims/ShimBatchReader.scala b/common/src/main/spark-3.x/org/apache/comet/shims/ShimBatchReader.scala deleted file mode 100644 index ebb60d823..000000000 --- a/common/src/main/spark-3.x/org/apache/comet/shims/ShimBatchReader.scala +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.comet.shims - -import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.execution.datasources.PartitionedFile - -object ShimBatchReader { - - // TODO: create specific shim per Spark version rather than use reflection - def newPartitionedFile(partitionValues: InternalRow, file: String): PartitionedFile = - classOf[PartitionedFile].getDeclaredConstructors - .map(c => - c.getParameterCount match { - case 5 => - c.newInstance( - partitionValues, - file, - Long.box(-1), // -1 means we read the entire file - Long.box(-1), - Array.empty[String]) - case 7 => - c.newInstance( - partitionValues, - c.getParameterTypes()(1) - .getConstructor(classOf[String]) - .newInstance(file) - .asInstanceOf[AnyRef], - Long.box(-1), // -1 means we read the entire file - Long.box(-1), - Array.empty[String], - Long.box(0), - Long.box(0)) - case 8 => - // Apache Spark 3.5.1 - c.newInstance( - partitionValues, - c.getParameterTypes()(1) - .getConstructor(classOf[String]) - .newInstance(file) - .asInstanceOf[AnyRef], - Long.box(-1), // -1 means we read the entire file - Long.box(-1), - Array.empty[String], - Long.box(0), - Long.box(0), - Map.empty) - }) - .head - .asInstanceOf[PartitionedFile] -} diff --git a/docs/source/user-guide/overview.md b/docs/source/user-guide/overview.md index 59daeca6c..58ff549c1 100644 --- a/docs/source/user-guide/overview.md +++ b/docs/source/user-guide/overview.md @@ -40,7 +40,6 @@ The following diagram illustrates the architecture of Comet: ## Current Status -<<<<<<< HEAD Comet currently supports the following versions of Apache Spark: - 3.2.x @@ -55,9 +54,6 @@ use only and should not be used in production yet. Note that Comet may not fully work with proprietary forks of Apache Spark such as the Spark versions offered by Cloud Service Providers. -======= -The project is currently integrated into Apache Spark 3.3, and 3.4. ->>>>>>> apache/main ## Feature Parity with Apache Spark