diff --git a/.github/workflows/pr_build.yml b/.github/workflows/pr_build.yml
index 71eb02a9e..1e347250f 100644
--- a/.github/workflows/pr_build.yml
+++ b/.github/workflows/pr_build.yml
@@ -47,13 +47,14 @@ jobs:
java_version: [8, 11, 17]
test-target: [rust, java]
spark-version: ['3.4']
+ scala-version: ['2.12', '2.13']
is_push_event:
- ${{ github.event_name == 'push' }}
exclude: # exclude java 11 for pull_request event
- java_version: 11
is_push_event: false
fail-fast: false
- name: ${{ matrix.os }}/java ${{ matrix.java_version }}-spark-${{matrix.spark-version}}/${{ matrix.test-target }}
+ name: ${{ matrix.os }}/java ${{ matrix.java_version }}-spark-${{matrix.spark-version}}-scala-${{matrix.scala-version}}/${{ matrix.test-target }}
runs-on: ${{ matrix.os }}
container:
image: amd64/rust
@@ -71,7 +72,7 @@ jobs:
name: Java test steps
uses: ./.github/actions/java-test
with:
- maven_opts: -Pspark-${{ matrix.spark-version }}
+ maven_opts: -Pspark-${{ matrix.spark-version }},scala-${{ matrix.scala-version }}
# upload test reports only for java 17
upload-test-reports: ${{ matrix.java_version == '17' }}
@@ -82,13 +83,16 @@ jobs:
java_version: [8, 11, 17]
test-target: [java]
spark-version: ['3.2', '3.3']
+ scala-version: ['2.12', '2.13']
exclude:
- java_version: 17
spark-version: '3.2'
- java_version: 11
spark-version: '3.2'
+ - spark-version: '3.2'
+ scala-version: '2.13'
fail-fast: false
- name: ${{ matrix.os }}/java ${{ matrix.java_version }}-spark-${{matrix.spark-version}}/${{ matrix.test-target }}
+ name: ${{ matrix.os }}/java ${{ matrix.java_version }}-spark-${{matrix.spark-version}}-scala-${{matrix.scala-version}}/${{ matrix.test-target }}
runs-on: ${{ matrix.os }}
container:
image: amd64/rust
@@ -102,7 +106,7 @@ jobs:
- name: Java test steps
uses: ./.github/actions/java-test
with:
- maven_opts: -Pspark-${{ matrix.spark-version }}
+ maven_opts: -Pspark-${{ matrix.spark-version }},scala-${{ matrix.scala-version }}
macos-test:
strategy:
@@ -111,9 +115,10 @@ jobs:
java_version: [8, 11, 17]
test-target: [rust, java]
spark-version: ['3.4']
+ scala-version: ['2.12', '2.13']
fail-fast: false
if: github.event_name == 'push'
- name: ${{ matrix.os }}/java ${{ matrix.java_version }}-spark-${{matrix.spark-version}}/${{ matrix.test-target }}
+ name: ${{ matrix.os }}/java ${{ matrix.java_version }}-spark-${{matrix.spark-version}}-scala-${{matrix.scala-version}}/${{ matrix.test-target }}
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v4
@@ -129,7 +134,7 @@ jobs:
name: Java test steps
uses: ./.github/actions/java-test
with:
- maven_opts: -Pspark-${{ matrix.spark-version }}
+ maven_opts: -Pspark-${{ matrix.spark-version }},scala-${{ matrix.scala-version }}
macos-aarch64-test:
strategy:
@@ -137,13 +142,14 @@ jobs:
java_version: [8, 11, 17]
test-target: [rust, java]
spark-version: ['3.4']
+ scala-version: ['2.12', '2.13']
is_push_event:
- ${{ github.event_name == 'push' }}
exclude: # exclude java 11 for pull_request event
- java_version: 11
is_push_event: false
fail-fast: false
- name: macos-14(Silicon)/java ${{ matrix.java_version }}-spark-${{matrix.spark-version}}/${{ matrix.test-target }}
+ name: macos-14(Silicon)/java ${{ matrix.java_version }}-spark-${{matrix.spark-version}}-scala-${{matrix.scala-version}}/${{ matrix.test-target }}
runs-on: macos-14
steps:
- uses: actions/checkout@v4
@@ -161,7 +167,7 @@ jobs:
name: Java test steps
uses: ./.github/actions/java-test
with:
- maven_opts: -Pspark-${{ matrix.spark-version }}
+ maven_opts: -Pspark-${{ matrix.spark-version }},scala-${{ matrix.scala-version }}
macos-aarch64-test-with-old-spark:
strategy:
@@ -169,13 +175,16 @@ jobs:
java_version: [8, 17]
test-target: [java]
spark-version: ['3.2', '3.3']
+ scala-version: ['2.12', '2.13']
exclude:
- java_version: 17
spark-version: '3.2'
- java_version: 8
spark-version: '3.3'
+ - spark-version: '3.2'
+ scala-version: '2.13'
fail-fast: false
- name: macos-14(Silicon)/java ${{ matrix.java_version }}-spark-${{matrix.spark-version}}/${{ matrix.test-target }}
+ name: macos-14(Silicon)/java ${{ matrix.java_version }}-spark-${{matrix.spark-version}}-scala-${{matrix.scala-version}}/${{ matrix.test-target }}
runs-on: macos-14
steps:
- uses: actions/checkout@v4
@@ -190,5 +199,5 @@ jobs:
name: Java test steps
uses: ./.github/actions/java-test
with:
- maven_opts: -Pspark-${{ matrix.spark-version }}
+ maven_opts: -Pspark-${{ matrix.spark-version }},scala-${{ matrix.scala-version }}
diff --git a/common/pom.xml b/common/pom.xml
index 540101d71..cc1f44481 100644
--- a/common/pom.xml
+++ b/common/pom.xml
@@ -179,7 +179,8 @@ under the License.
-
+
+
diff --git a/common/src/main/scala/org/apache/comet/CometConf.scala b/common/src/main/scala/org/apache/comet/CometConf.scala
index f2a793b31..ba8301ded 100644
--- a/common/src/main/scala/org/apache/comet/CometConf.scala
+++ b/common/src/main/scala/org/apache/comet/CometConf.scala
@@ -140,14 +140,14 @@ object CometConf {
.booleanConf
.createWithDefault(false)
- val COMET_COLUMNAR_SHUFFLE_ENABLED: ConfigEntry[Boolean] = conf(
- "spark.comet.columnar.shuffle.enabled")
- .doc(
- "Force Comet to only use columnar shuffle for CometScan and Spark regular operators. " +
- "If this is enabled, Comet native shuffle will not be enabled but only Arrow shuffle. " +
- "By default, this config is false.")
- .booleanConf
- .createWithDefault(false)
+ val COMET_COLUMNAR_SHUFFLE_ENABLED: ConfigEntry[Boolean] =
+ conf("spark.comet.columnar.shuffle.enabled")
+ .doc(
+ "Whether to enable Arrow-based columnar shuffle for Comet and Spark regular operators. " +
+ "If this is enabled, Comet prefers columnar shuffle than native shuffle. " +
+ "By default, this config is true.")
+ .booleanConf
+ .createWithDefault(true)
val COMET_SHUFFLE_ENFORCE_MODE_ENABLED: ConfigEntry[Boolean] =
conf("spark.comet.shuffle.enforceMode.enabled")
diff --git a/common/src/main/scala/org/apache/comet/vector/NativeUtil.scala b/common/src/main/scala/org/apache/comet/vector/NativeUtil.scala
index eb731f9d0..595c0a427 100644
--- a/common/src/main/scala/org/apache/comet/vector/NativeUtil.scala
+++ b/common/src/main/scala/org/apache/comet/vector/NativeUtil.scala
@@ -66,7 +66,7 @@ class NativeUtil {
val arrowArray = ArrowArray.allocateNew(allocator)
Data.exportVector(
allocator,
- getFieldVector(valueVector),
+ getFieldVector(valueVector, "export"),
provider,
arrowArray,
arrowSchema)
diff --git a/common/src/main/scala/org/apache/spark/sql/comet/util/Utils.scala b/common/src/main/scala/org/apache/spark/sql/comet/util/Utils.scala
index 7d920e1be..2300e109a 100644
--- a/common/src/main/scala/org/apache/spark/sql/comet/util/Utils.scala
+++ b/common/src/main/scala/org/apache/spark/sql/comet/util/Utils.scala
@@ -242,7 +242,7 @@ object Utils {
}
}
- getFieldVector(valueVector)
+ getFieldVector(valueVector, "serialize")
case c =>
throw new SparkException(
@@ -253,14 +253,15 @@ object Utils {
(fieldVectors, provider)
}
- def getFieldVector(valueVector: ValueVector): FieldVector = {
+ def getFieldVector(valueVector: ValueVector, reason: String): FieldVector = {
valueVector match {
case v @ (_: BitVector | _: TinyIntVector | _: SmallIntVector | _: IntVector |
_: BigIntVector | _: Float4Vector | _: Float8Vector | _: VarCharVector |
_: DecimalVector | _: DateDayVector | _: TimeStampMicroTZVector | _: VarBinaryVector |
_: FixedSizeBinaryVector | _: TimeStampMicroVector) =>
v.asInstanceOf[FieldVector]
- case _ => throw new SparkException(s"Unsupported Arrow Vector: ${valueVector.getClass}")
+ case _ =>
+ throw new SparkException(s"Unsupported Arrow Vector for $reason: ${valueVector.getClass}")
}
}
}
diff --git a/core/Cargo.lock b/core/Cargo.lock
index 3fb7b5f62..52f105591 100644
--- a/core/Cargo.lock
+++ b/core/Cargo.lock
@@ -57,9 +57,9 @@ dependencies = [
[[package]]
name = "allocator-api2"
-version = "0.2.16"
+version = "0.2.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0942ffc6dcaadf03badf6e6a2d0228460359d5e34b57ccdc720b7382dfbd5ec5"
+checksum = "5c6cb57a04249c6480766f7f7cef5467412af1490f8d1e243141daddada3264f"
[[package]]
name = "android-tzdata"
@@ -90,9 +90,9 @@ checksum = "8901269c6307e8d93993578286ac0edf7f195079ffff5ebdeea6a59ffb7e36bc"
[[package]]
name = "anyhow"
-version = "1.0.81"
+version = "1.0.82"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0952808a6c2afd1aa8947271f3a60f1a6763c7b912d210184c5149b5cf147247"
+checksum = "f538837af36e6f6a9be0faa67f9a314f8119e4e4b5867c6ab40ed60360142519"
[[package]]
name = "arc-swap"
@@ -327,13 +327,13 @@ checksum = "0c24e9d990669fbd16806bff449e4ac644fd9b1fca014760087732fe4102f131"
[[package]]
name = "async-trait"
-version = "0.1.79"
+version = "0.1.80"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a507401cad91ec6a857ed5513a2073c82a9b9048762b885bb98655b306964681"
+checksum = "c6fa2087f2753a7da8cc1c0dbfcf89579dd57458e36769de5ac750b4671737ca"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.57",
+ "syn 2.0.59",
]
[[package]]
@@ -438,9 +438,9 @@ dependencies = [
[[package]]
name = "bumpalo"
-version = "3.15.4"
+version = "3.16.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7ff69b9dd49fd426c69a0db9fc04dd934cdb6645ff000864d98f7e2af8830eaa"
+checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c"
[[package]]
name = "bytemuck"
@@ -468,9 +468,9 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
[[package]]
name = "cc"
-version = "1.0.90"
+version = "1.0.94"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8cd6604a82acf3039f1144f54b8eb34e91ffba622051189e71b781822d5ee1f5"
+checksum = "17f6e324229dc011159fcc089755d1e2e216a90d43a7dea6853ca740b84f35e7"
dependencies = [
"jobserver",
"libc",
@@ -490,14 +490,14 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "chrono"
-version = "0.4.37"
+version = "0.4.38"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8a0d04d43504c61aa6c7531f1871dd0d418d91130162063b789da00fd7057a5e"
+checksum = "a21f936df1771bf62b77f047b726c4625ff2e8aa607c01ec06e5a05bd8463401"
dependencies = [
"android-tzdata",
"iana-time-zone",
"num-traits",
- "windows-targets 0.52.4",
+ "windows-targets 0.52.5",
]
[[package]]
@@ -576,9 +576,9 @@ checksum = "98cc8fbded0c607b7ba9dd60cd98df59af97e84d24e49c8557331cfc26d301ce"
[[package]]
name = "combine"
-version = "4.6.6"
+version = "4.6.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "35ed6e9d84f0b51a7f52daf1c7d71dd136fd7a3f41a8462b8cdb8c78d920fad4"
+checksum = "ba5a308b75df32fe02788e748662718f03fde005016435c444eea572398219fd"
dependencies = [
"bytes",
"memchr",
@@ -625,7 +625,7 @@ dependencies = [
"parquet-format",
"paste",
"pprof",
- "prost 0.12.3",
+ "prost 0.12.4",
"prost-build",
"rand",
"regex",
@@ -643,12 +643,12 @@ dependencies = [
[[package]]
name = "comfy-table"
-version = "7.1.0"
+version = "7.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7c64043d6c7b7a4c58e39e7efccfdea7b93d885a795d0c054a69dbbf4dd52686"
+checksum = "b34115915337defe99b2aff5c2ce6771e5fbc4079f4b506301f5cf394c8452f7"
dependencies = [
- "strum 0.25.0",
- "strum_macros 0.25.3",
+ "strum",
+ "strum_macros",
"unicode-width",
]
@@ -923,8 +923,8 @@ dependencies = [
"datafusion-common",
"paste",
"sqlparser",
- "strum 0.26.2",
- "strum_macros 0.26.2",
+ "strum",
+ "strum_macros",
]
[[package]]
@@ -1044,7 +1044,7 @@ dependencies = [
"datafusion-expr",
"log",
"sqlparser",
- "strum 0.26.2",
+ "strum",
]
[[package]]
@@ -1092,9 +1092,9 @@ checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10"
[[package]]
name = "either"
-version = "1.10.0"
+version = "1.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "11157ac094ffbdde99aa67b23417ebdd801842852b500e395a45a9c0aac03e4a"
+checksum = "a47c1c47d2f5964e29c61246e81db715514cd532db6b5116a25ea3c03d6780a2"
[[package]]
name = "equivalent"
@@ -1227,7 +1227,7 @@ checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.57",
+ "syn 2.0.59",
]
[[package]]
@@ -1272,9 +1272,9 @@ dependencies = [
[[package]]
name = "getrandom"
-version = "0.2.12"
+version = "0.2.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "190092ea657667030ac6a35e305e62fc4dd69fd98ac98631e5d3a2b1575a12b5"
+checksum = "94b22e06ecb0110981051723910cbf0b5f5e09a2062dd7663334ee79a9d1286c"
dependencies = [
"cfg-if",
"libc",
@@ -1526,9 +1526,9 @@ checksum = "8eaf4bc02d17cbdd7ff4c7438cafcdf7fb9a4613313ad11b4f8fefe7d3fa0130"
[[package]]
name = "jobserver"
-version = "0.1.28"
+version = "0.1.30"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ab46a6e9526ddef3ae7f787c06f0f2600639ba80ea3eade3d8e670a2230f51d6"
+checksum = "685a7d121ee3f65ae4fddd72b25a04bb36b6af81bc0828f7d5434c0fe60fa3a2"
dependencies = [
"libc",
]
@@ -1794,9 +1794,9 @@ dependencies = [
[[package]]
name = "num"
-version = "0.4.1"
+version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b05180d69e3da0e530ba2a1dae5110317e49e3b7f3d41be227dc5f92e49ee7af"
+checksum = "3135b08af27d103b0a51f2ae0f8632117b7b185ccf931445affa8df530576a41"
dependencies = [
"num-bigint",
"num-complex",
@@ -2142,9 +2142,9 @@ checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de"
[[package]]
name = "proc-macro2"
-version = "1.0.79"
+version = "1.0.80"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e835ff2298f5721608eb1a980ecaee1aef2c132bf95ecc026a11b7bf3c01c02e"
+checksum = "a56dea16b0a29e94408b9aa5e2940a4eedbd128a1ba20e8f7ae60fd3d465af0e"
dependencies = [
"unicode-ident",
]
@@ -2161,12 +2161,12 @@ dependencies = [
[[package]]
name = "prost"
-version = "0.12.3"
+version = "0.12.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "146c289cda302b98a28d40c8b3b90498d6e526dd24ac2ecea73e4e491685b94a"
+checksum = "d0f5d036824e4761737860779c906171497f6d55681139d8312388f8fe398922"
dependencies = [
"bytes",
- "prost-derive 0.12.3",
+ "prost-derive 0.12.4",
]
[[package]]
@@ -2204,15 +2204,15 @@ dependencies = [
[[package]]
name = "prost-derive"
-version = "0.12.3"
+version = "0.12.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "efb6c9a1dd1def8e2124d17e83a20af56f1570d6c2d2bd9e266ccb768df3840e"
+checksum = "19de2de2a00075bf566bee3bd4db014b11587e84184d3f7a791bc17f1a8e9e48"
dependencies = [
"anyhow",
- "itertools 0.11.0",
+ "itertools 0.12.1",
"proc-macro2",
"quote",
- "syn 2.0.57",
+ "syn 2.0.59",
]
[[package]]
@@ -2236,9 +2236,9 @@ dependencies = [
[[package]]
name = "quote"
-version = "1.0.35"
+version = "1.0.36"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef"
+checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7"
dependencies = [
"proc-macro2",
]
@@ -2370,9 +2370,9 @@ dependencies = [
[[package]]
name = "rustversion"
-version = "1.0.14"
+version = "1.0.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7ffc183a10b4478d04cbbbfc96d0873219d962dd5accaff2ffbd4ceb7df837f4"
+checksum = "80af6f9131f277a45a3fba6ce8e2258037bb0477a67e610d3c1fe046ab31de47"
[[package]]
name = "ryu"
@@ -2434,14 +2434,14 @@ checksum = "7eb0b34b42edc17f6b7cac84a52a1c5f0e1bb2227e997ca9011ea3dd34e8610b"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.57",
+ "syn 2.0.59",
]
[[package]]
name = "serde_json"
-version = "1.0.115"
+version = "1.0.116"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "12dc5c46daa8e9fdf4f5e71b6cf9a53f2487da0e86e55808e2d35539666497dd"
+checksum = "3e17db7126d17feb94eb3fad46bf1a96b034e8aacbc2e775fe81505f8b0b2813"
dependencies = [
"itoa",
"ryu",
@@ -2545,7 +2545,7 @@ checksum = "01b2e185515564f15375f593fb966b5718bc624ba77fe49fa4616ad619690554"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.57",
+ "syn 2.0.59",
]
[[package]]
@@ -2566,32 +2566,13 @@ version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9091b6114800a5f2141aee1d1b9d6ca3592ac062dc5decb3764ec5895a47b4eb"
-[[package]]
-name = "strum"
-version = "0.25.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "290d54ea6f91c969195bdbcd7442c8c2a2ba87da8bf60a7ee86a235d4bc1e125"
-
[[package]]
name = "strum"
version = "0.26.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5d8cec3501a5194c432b2b7976db6b7d10ec95c253208b45f83f7136aa985e29"
dependencies = [
- "strum_macros 0.26.2",
-]
-
-[[package]]
-name = "strum_macros"
-version = "0.25.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "23dc1fa9ac9c169a78ba62f0b841814b7abae11bdd047b9c58f893439e309ea0"
-dependencies = [
- "heck 0.4.1",
- "proc-macro2",
- "quote",
- "rustversion",
- "syn 2.0.57",
+ "strum_macros",
]
[[package]]
@@ -2604,7 +2585,7 @@ dependencies = [
"proc-macro2",
"quote",
"rustversion",
- "syn 2.0.57",
+ "syn 2.0.59",
]
[[package]]
@@ -2649,9 +2630,9 @@ dependencies = [
[[package]]
name = "syn"
-version = "2.0.57"
+version = "2.0.59"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "11a6ae1e52eb25aab8f3fb9fca13be982a373b8f1157ca14b897a825ba4a2d35"
+checksum = "4a6531ffc7b071655e4ce2e04bd464c4830bb585a61cabb96cf808f05172615a"
dependencies = [
"proc-macro2",
"quote",
@@ -2687,7 +2668,7 @@ checksum = "c61f3ba182994efc43764a46c018c347bc492c79f024e705f46567b418f6d4f7"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.57",
+ "syn 2.0.59",
]
[[package]]
@@ -2790,7 +2771,7 @@ checksum = "5b8a1e28f2deaa14e508979454cb3a223b10b938b45af148bc0986de36f1923b"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.57",
+ "syn 2.0.59",
]
[[package]]
@@ -2823,7 +2804,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.57",
+ "syn 2.0.59",
]
[[package]]
@@ -2971,7 +2952,7 @@ dependencies = [
"once_cell",
"proc-macro2",
"quote",
- "syn 2.0.57",
+ "syn 2.0.59",
"wasm-bindgen-shared",
]
@@ -2993,7 +2974,7 @@ checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.57",
+ "syn 2.0.59",
"wasm-bindgen-backend",
"wasm-bindgen-shared",
]
@@ -3063,7 +3044,7 @@ version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9"
dependencies = [
- "windows-targets 0.52.4",
+ "windows-targets 0.52.5",
]
[[package]]
@@ -3081,7 +3062,7 @@ version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
dependencies = [
- "windows-targets 0.52.4",
+ "windows-targets 0.52.5",
]
[[package]]
@@ -3116,17 +3097,18 @@ dependencies = [
[[package]]
name = "windows-targets"
-version = "0.52.4"
+version = "0.52.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7dd37b7e5ab9018759f893a1952c9420d060016fc19a472b4bb20d1bdd694d1b"
+checksum = "6f0713a46559409d202e70e28227288446bf7841d3211583a4b53e3f6d96e7eb"
dependencies = [
- "windows_aarch64_gnullvm 0.52.4",
- "windows_aarch64_msvc 0.52.4",
- "windows_i686_gnu 0.52.4",
- "windows_i686_msvc 0.52.4",
- "windows_x86_64_gnu 0.52.4",
- "windows_x86_64_gnullvm 0.52.4",
- "windows_x86_64_msvc 0.52.4",
+ "windows_aarch64_gnullvm 0.52.5",
+ "windows_aarch64_msvc 0.52.5",
+ "windows_i686_gnu 0.52.5",
+ "windows_i686_gnullvm",
+ "windows_i686_msvc 0.52.5",
+ "windows_x86_64_gnu 0.52.5",
+ "windows_x86_64_gnullvm 0.52.5",
+ "windows_x86_64_msvc 0.52.5",
]
[[package]]
@@ -3143,9 +3125,9 @@ checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8"
[[package]]
name = "windows_aarch64_gnullvm"
-version = "0.52.4"
+version = "0.52.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bcf46cf4c365c6f2d1cc93ce535f2c8b244591df96ceee75d8e83deb70a9cac9"
+checksum = "7088eed71e8b8dda258ecc8bac5fb1153c5cffaf2578fc8ff5d61e23578d3263"
[[package]]
name = "windows_aarch64_msvc"
@@ -3161,9 +3143,9 @@ checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc"
[[package]]
name = "windows_aarch64_msvc"
-version = "0.52.4"
+version = "0.52.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "da9f259dd3bcf6990b55bffd094c4f7235817ba4ceebde8e6d11cd0c5633b675"
+checksum = "9985fd1504e250c615ca5f281c3f7a6da76213ebd5ccc9561496568a2752afb6"
[[package]]
name = "windows_i686_gnu"
@@ -3179,9 +3161,15 @@ checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e"
[[package]]
name = "windows_i686_gnu"
-version = "0.52.4"
+version = "0.52.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "88ba073cf16d5372720ec942a8ccbf61626074c6d4dd2e745299726ce8b89670"
+
+[[package]]
+name = "windows_i686_gnullvm"
+version = "0.52.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b474d8268f99e0995f25b9f095bc7434632601028cf86590aea5c8a5cb7801d3"
+checksum = "87f4261229030a858f36b459e748ae97545d6f1ec60e5e0d6a3d32e0dc232ee9"
[[package]]
name = "windows_i686_msvc"
@@ -3197,9 +3185,9 @@ checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406"
[[package]]
name = "windows_i686_msvc"
-version = "0.52.4"
+version = "0.52.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1515e9a29e5bed743cb4415a9ecf5dfca648ce85ee42e15873c3cd8610ff8e02"
+checksum = "db3c2bf3d13d5b658be73463284eaf12830ac9a26a90c717b7f771dfe97487bf"
[[package]]
name = "windows_x86_64_gnu"
@@ -3215,9 +3203,9 @@ checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e"
[[package]]
name = "windows_x86_64_gnu"
-version = "0.52.4"
+version = "0.52.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5eee091590e89cc02ad514ffe3ead9eb6b660aedca2183455434b93546371a03"
+checksum = "4e4246f76bdeff09eb48875a0fd3e2af6aada79d409d33011886d3e1581517d9"
[[package]]
name = "windows_x86_64_gnullvm"
@@ -3233,9 +3221,9 @@ checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc"
[[package]]
name = "windows_x86_64_gnullvm"
-version = "0.52.4"
+version = "0.52.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "77ca79f2451b49fa9e2af39f0747fe999fcda4f5e241b2898624dca97a1f2177"
+checksum = "852298e482cd67c356ddd9570386e2862b5673c85bd5f88df9ab6802b334c596"
[[package]]
name = "windows_x86_64_msvc"
@@ -3251,9 +3239,9 @@ checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538"
[[package]]
name = "windows_x86_64_msvc"
-version = "0.52.4"
+version = "0.52.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "32b752e52a2da0ddfbdbcc6fceadfeede4c939ed16d13e648833a61dfb611ed8"
+checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0"
[[package]]
name = "zerocopy"
@@ -3272,7 +3260,7 @@ checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.57",
+ "syn 2.0.59",
]
[[package]]
diff --git a/core/Cargo.toml b/core/Cargo.toml
index cbca7f629..ac565680a 100644
--- a/core/Cargo.toml
+++ b/core/Cargo.toml
@@ -119,5 +119,9 @@ name = "row_columnar"
harness = false
[[bench]]
-name = "cast"
+name = "cast_from_string"
+harness = false
+
+[[bench]]
+name = "cast_numeric"
harness = false
diff --git a/core/benches/cast.rs b/core/benches/cast_from_string.rs
similarity index 93%
rename from core/benches/cast.rs
rename to core/benches/cast_from_string.rs
index 281fe82e2..5bfaebf34 100644
--- a/core/benches/cast.rs
+++ b/core/benches/cast_from_string.rs
@@ -23,19 +23,7 @@ use datafusion_physical_expr::{expressions::Column, PhysicalExpr};
use std::sync::Arc;
fn criterion_benchmark(c: &mut Criterion) {
- let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Utf8, true)]));
- let mut b = StringBuilder::new();
- for i in 0..1000 {
- if i % 10 == 0 {
- b.append_null();
- } else if i % 2 == 0 {
- b.append_value(format!("{}", rand::random::()));
- } else {
- b.append_value(format!("{}", rand::random::()));
- }
- }
- let array = b.finish();
- let batch = RecordBatch::try_new(schema.clone(), vec![Arc::new(array)]).unwrap();
+ let batch = create_utf8_batch();
let expr = Arc::new(Column::new("a", 0));
let timezone = "".to_string();
let cast_string_to_i8 = Cast::new(
@@ -58,7 +46,7 @@ fn criterion_benchmark(c: &mut Criterion) {
);
let cast_string_to_i64 = Cast::new(expr, DataType::Int64, EvalMode::Legacy, timezone);
- let mut group = c.benchmark_group("cast");
+ let mut group = c.benchmark_group("cast_string_to_int");
group.bench_function("cast_string_to_i8", |b| {
b.iter(|| cast_string_to_i8.evaluate(&batch).unwrap());
});
@@ -73,6 +61,24 @@ fn criterion_benchmark(c: &mut Criterion) {
});
}
+// Create UTF8 batch with strings representing ints, floats, nulls
+fn create_utf8_batch() -> RecordBatch {
+ let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Utf8, true)]));
+ let mut b = StringBuilder::new();
+ for i in 0..1000 {
+ if i % 10 == 0 {
+ b.append_null();
+ } else if i % 2 == 0 {
+ b.append_value(format!("{}", rand::random::()));
+ } else {
+ b.append_value(format!("{}", rand::random::()));
+ }
+ }
+ let array = b.finish();
+ let batch = RecordBatch::try_new(schema.clone(), vec![Arc::new(array)]).unwrap();
+ batch
+}
+
fn config() -> Criterion {
Criterion::default()
}
diff --git a/core/benches/cast_numeric.rs b/core/benches/cast_numeric.rs
new file mode 100644
index 000000000..398be6946
--- /dev/null
+++ b/core/benches/cast_numeric.rs
@@ -0,0 +1,79 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow_array::{builder::Int32Builder, RecordBatch};
+use arrow_schema::{DataType, Field, Schema};
+use comet::execution::datafusion::expressions::cast::{Cast, EvalMode};
+use criterion::{criterion_group, criterion_main, Criterion};
+use datafusion_physical_expr::{expressions::Column, PhysicalExpr};
+use std::sync::Arc;
+
+fn criterion_benchmark(c: &mut Criterion) {
+ let batch = create_int32_batch();
+ let expr = Arc::new(Column::new("a", 0));
+ let timezone = "".to_string();
+ let cast_i32_to_i8 = Cast::new(
+ expr.clone(),
+ DataType::Int8,
+ EvalMode::Legacy,
+ timezone.clone(),
+ );
+ let cast_i32_to_i16 = Cast::new(
+ expr.clone(),
+ DataType::Int16,
+ EvalMode::Legacy,
+ timezone.clone(),
+ );
+ let cast_i32_to_i64 = Cast::new(expr, DataType::Int64, EvalMode::Legacy, timezone);
+
+ let mut group = c.benchmark_group("cast_int_to_int");
+ group.bench_function("cast_i32_to_i8", |b| {
+ b.iter(|| cast_i32_to_i8.evaluate(&batch).unwrap());
+ });
+ group.bench_function("cast_i32_to_i16", |b| {
+ b.iter(|| cast_i32_to_i16.evaluate(&batch).unwrap());
+ });
+ group.bench_function("cast_i32_to_i64", |b| {
+ b.iter(|| cast_i32_to_i64.evaluate(&batch).unwrap());
+ });
+}
+
+fn create_int32_batch() -> RecordBatch {
+ let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, true)]));
+ let mut b = Int32Builder::new();
+ for i in 0..1000 {
+ if i % 10 == 0 {
+ b.append_null();
+ } else {
+ b.append_value(rand::random::());
+ }
+ }
+ let array = b.finish();
+ let batch = RecordBatch::try_new(schema.clone(), vec![Arc::new(array)]).unwrap();
+ batch
+}
+
+fn config() -> Criterion {
+ Criterion::default()
+}
+
+criterion_group! {
+ name = benches;
+ config = config();
+ targets = criterion_benchmark
+}
+criterion_main!(benches);
diff --git a/core/src/errors.rs b/core/src/errors.rs
index a06c613ad..04a1629d5 100644
--- a/core/src/errors.rs
+++ b/core/src/errors.rs
@@ -72,6 +72,13 @@ pub enum CometError {
to_type: String,
},
+ #[error("[NUMERIC_VALUE_OUT_OF_RANGE] {value} cannot be represented as Decimal({precision}, {scale}). If necessary set \"spark.sql.ansi.enabled\" to \"false\" to bypass this error, and return NULL instead.")]
+ NumericValueOutOfRange {
+ value: String,
+ precision: u8,
+ scale: i8,
+ },
+
#[error("[CAST_OVERFLOW] The value {value} of the type \"{from_type}\" cannot be cast to \"{to_type}\" \
due to an overflow. Use `try_cast` to tolerate overflow and return NULL instead. If necessary \
set \"spark.sql.ansi.enabled\" to \"false\" to bypass this error.")]
@@ -208,6 +215,10 @@ impl jni::errors::ToException for CometError {
class: "org/apache/spark/SparkException".to_string(),
msg: self.to_string(),
},
+ CometError::NumericValueOutOfRange { .. } => Exception {
+ class: "org/apache/spark/SparkException".to_string(),
+ msg: self.to_string(),
+ },
CometError::NumberIntFormat { source: s } => Exception {
class: "java/lang/NumberFormatException".to_string(),
msg: s.to_string(),
diff --git a/core/src/execution/datafusion/expressions/cast.rs b/core/src/execution/datafusion/expressions/cast.rs
index a6e3adaca..35ab23a76 100644
--- a/core/src/execution/datafusion/expressions/cast.rs
+++ b/core/src/execution/datafusion/expressions/cast.rs
@@ -25,21 +25,24 @@ use std::{
use crate::errors::{CometError, CometResult};
use arrow::{
compute::{cast_with_options, CastOptions},
- datatypes::TimestampMicrosecondType,
+ datatypes::{
+ ArrowPrimitiveType, Decimal128Type, DecimalType, Float32Type, Float64Type,
+ TimestampMicrosecondType,
+ },
record_batch::RecordBatch,
util::display::FormatOptions,
};
use arrow_array::{
types::{Int16Type, Int32Type, Int64Type, Int8Type},
- Array, ArrayRef, BooleanArray, Float32Array, Float64Array, GenericStringArray, OffsetSizeTrait,
- PrimitiveArray,
+ Array, ArrayRef, BooleanArray, Decimal128Array, Float32Array, Float64Array, GenericStringArray,
+ Int16Array, Int32Array, Int64Array, Int8Array, OffsetSizeTrait, PrimitiveArray,
};
use arrow_schema::{DataType, Schema};
use chrono::{TimeZone, Timelike};
use datafusion::logical_expr::ColumnarValue;
use datafusion_common::{internal_err, Result as DataFusionResult, ScalarValue};
use datafusion_physical_expr::PhysicalExpr;
-use num::{traits::CheckedNeg, CheckedSub, Integer, Num};
+use num::{cast::AsPrimitive, traits::CheckedNeg, CheckedSub, Integer, Num, ToPrimitive};
use regex::Regex;
use crate::execution::datafusion::expressions::utils::{
@@ -214,11 +217,11 @@ macro_rules! cast_int_to_int_macro {
Some(value) => {
let res = <$to_native_type>::try_from(value);
if res.is_err() {
- Err(CometError::CastOverFlow {
- value: value.to_string() + spark_int_literal_suffix,
- from_type: $spark_from_data_type_name.to_string(),
- to_type: $spark_to_data_type_name.to_string(),
- })
+ Err(cast_overflow(
+ &(value.to_string() + spark_int_literal_suffix),
+ $spark_from_data_type_name,
+ $spark_to_data_type_name,
+ ))
} else {
Ok::