diff --git a/.github/actions/setup-builder/action.yaml b/.github/actions/setup-builder/action.yaml
index 6fa716492..0787d9eed 100644
--- a/.github/actions/setup-builder/action.yaml
+++ b/.github/actions/setup-builder/action.yaml
@@ -21,7 +21,7 @@ inputs:
rust-version:
description: 'version of rust to install (e.g. nightly)'
required: true
- default: 'nightly'
+ default: 'stable'
jdk-version:
description: 'jdk version to install (e.g., 17)'
required: true
diff --git a/.github/actions/setup-macos-builder/action.yaml b/.github/actions/setup-macos-builder/action.yaml
index cc1b63170..7c1c8b522 100644
--- a/.github/actions/setup-macos-builder/action.yaml
+++ b/.github/actions/setup-macos-builder/action.yaml
@@ -21,7 +21,7 @@ inputs:
rust-version:
description: 'version of rust to install (e.g. nightly)'
required: true
- default: 'nightly'
+ default: 'stable'
jdk-version:
description: 'jdk version to install (e.g., 17)'
required: true
diff --git a/.github/workflows/benchmark-tpch.yml b/.github/workflows/benchmark-tpch.yml
index fbf5cfdb5..f4c547a8f 100644
--- a/.github/workflows/benchmark-tpch.yml
+++ b/.github/workflows/benchmark-tpch.yml
@@ -37,7 +37,7 @@ on:
workflow_dispatch:
env:
- RUST_VERSION: nightly
+ RUST_VERSION: stable
jobs:
prepare:
diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
index e9767f767..023b6a685 100644
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@@ -37,7 +37,7 @@ on:
workflow_dispatch:
env:
- RUST_VERSION: nightly
+ RUST_VERSION: stable
jobs:
prepare:
diff --git a/.github/workflows/pr_build.yml b/.github/workflows/pr_build.yml
index 410f1e1fe..2bf023357 100644
--- a/.github/workflows/pr_build.yml
+++ b/.github/workflows/pr_build.yml
@@ -37,7 +37,7 @@ on:
workflow_dispatch:
env:
- RUST_VERSION: nightly
+ RUST_VERSION: stable
jobs:
linux-test:
@@ -97,22 +97,11 @@ jobs:
with:
rust-version: ${{env.RUST_VERSION}}
jdk-version: ${{ matrix.java_version }}
- - name: Clone Spark
- uses: actions/checkout@v4
- with:
- repository: "apache/spark"
- path: "apache-spark"
- - name: Install Spark
- shell: bash
- working-directory: ./apache-spark
- run: build/mvn install -Phive -Phadoop-cloud -DskipTests
- name: Java test steps
uses: ./.github/actions/java-test
with:
- # TODO: remove -DskipTests after fixing tests
- maven_opts: "-Pspark-${{ matrix.spark-version }} -DskipTests"
- # TODO: upload test reports after enabling tests
- upload-test-reports: false
+ maven_opts: -Pspark-${{ matrix.spark-version }}
+ upload-test-reports: true
linux-test-with-old-spark:
strategy:
@@ -225,22 +214,11 @@ jobs:
with:
rust-version: ${{env.RUST_VERSION}}
jdk-version: ${{ matrix.java_version }}
- - name: Clone Spark
- uses: actions/checkout@v4
- with:
- repository: "apache/spark"
- path: "apache-spark"
- - name: Install Spark
- shell: bash
- working-directory: ./apache-spark
- run: build/mvn install -Phive -Phadoop-cloud -DskipTests
- name: Java test steps
uses: ./.github/actions/java-test
with:
- # TODO: remove -DskipTests after fixing tests
- maven_opts: "-Pspark-${{ matrix.spark-version }} -DskipTests"
- # TODO: upload test reports after enabling tests
- upload-test-reports: false
+ maven_opts: -Pspark-${{ matrix.spark-version }}
+ upload-test-reports: true
macos-aarch64-test-with-spark4_0:
strategy:
@@ -265,22 +243,11 @@ jobs:
jdk-version: ${{ matrix.java_version }}
jdk-architecture: aarch64
protoc-architecture: aarch_64
- - name: Clone Spark
- uses: actions/checkout@v4
- with:
- repository: "apache/spark"
- path: "apache-spark"
- - name: Install Spark
- shell: bash
- working-directory: ./apache-spark
- run: build/mvn install -Phive -Phadoop-cloud -DskipTests
- name: Java test steps
uses: ./.github/actions/java-test
with:
- # TODO: remove -DskipTests after fixing tests
- maven_opts: "-Pspark-${{ matrix.spark-version }} -DskipTests"
- # TODO: upload test reports after enabling tests
- upload-test-reports: false
+ maven_opts: -Pspark-${{ matrix.spark-version }}
+ upload-test-reports: true
macos-aarch64-test-with-old-spark:
strategy:
diff --git a/.github/workflows/spark_sql_test.yml b/.github/workflows/spark_sql_test.yml
index 997136ded..352e0ecbe 100644
--- a/.github/workflows/spark_sql_test.yml
+++ b/.github/workflows/spark_sql_test.yml
@@ -37,7 +37,7 @@ on:
workflow_dispatch:
env:
- RUST_VERSION: nightly
+ RUST_VERSION: stable
jobs:
spark-sql-catalyst:
diff --git a/.github/workflows/spark_sql_test_ansi.yml b/.github/workflows/spark_sql_test_ansi.yml
index 5c5d28589..337e59efe 100644
--- a/.github/workflows/spark_sql_test_ansi.yml
+++ b/.github/workflows/spark_sql_test_ansi.yml
@@ -37,7 +37,7 @@ on:
workflow_dispatch:
env:
- RUST_VERSION: nightly
+ RUST_VERSION: stable
jobs:
spark-sql-catalyst:
diff --git a/Makefile b/Makefile
index f20687cda..573a7f955 100644
--- a/Makefile
+++ b/Makefile
@@ -44,10 +44,10 @@ format:
core-amd64:
rustup target add x86_64-apple-darwin
- cd core && RUSTFLAGS="-Ctarget-cpu=skylake -Ctarget-feature=-prefer-256-bit" CC=o64-clang CXX=o64-clang++ cargo build --target x86_64-apple-darwin --features nightly --release
+ cd core && RUSTFLAGS="-Ctarget-cpu=skylake -Ctarget-feature=-prefer-256-bit" CC=o64-clang CXX=o64-clang++ cargo build --target x86_64-apple-darwin --release
mkdir -p common/target/classes/org/apache/comet/darwin/x86_64
cp core/target/x86_64-apple-darwin/release/libcomet.dylib common/target/classes/org/apache/comet/darwin/x86_64
- cd core && RUSTFLAGS="-Ctarget-cpu=haswell -Ctarget-feature=-prefer-256-bit" cargo build --features nightly --release
+ cd core && RUSTFLAGS="-Ctarget-cpu=haswell -Ctarget-feature=-prefer-256-bit" cargo build --release
mkdir -p common/target/classes/org/apache/comet/linux/amd64
cp core/target/release/libcomet.so common/target/classes/org/apache/comet/linux/amd64
jar -cf common/target/comet-native-x86_64.jar \
@@ -57,10 +57,10 @@ core-amd64:
core-arm64:
rustup target add aarch64-apple-darwin
- cd core && RUSTFLAGS="-Ctarget-cpu=apple-m1" CC=arm64-apple-darwin21.4-clang CXX=arm64-apple-darwin21.4-clang++ CARGO_FEATURE_NEON=1 cargo build --target aarch64-apple-darwin --features nightly --release
+ cd core && RUSTFLAGS="-Ctarget-cpu=apple-m1" CC=arm64-apple-darwin21.4-clang CXX=arm64-apple-darwin21.4-clang++ CARGO_FEATURE_NEON=1 cargo build --target aarch64-apple-darwin --release
mkdir -p common/target/classes/org/apache/comet/darwin/aarch64
cp core/target/aarch64-apple-darwin/release/libcomet.dylib common/target/classes/org/apache/comet/darwin/aarch64
- cd core && RUSTFLAGS="-Ctarget-cpu=native" cargo build --features nightly --release
+ cd core && RUSTFLAGS="-Ctarget-cpu=native" cargo build --release
mkdir -p common/target/classes/org/apache/comet/linux/aarch64
cp core/target/release/libcomet.so common/target/classes/org/apache/comet/linux/aarch64
jar -cf common/target/comet-native-aarch64.jar \
@@ -70,13 +70,16 @@ core-arm64:
release-linux: clean
rustup target add aarch64-apple-darwin x86_64-apple-darwin
- cd core && RUSTFLAGS="-Ctarget-cpu=apple-m1" CC=arm64-apple-darwin21.4-clang CXX=arm64-apple-darwin21.4-clang++ CARGO_FEATURE_NEON=1 cargo build --target aarch64-apple-darwin --features nightly --release
- cd core && RUSTFLAGS="-Ctarget-cpu=skylake -Ctarget-feature=-prefer-256-bit" CC=o64-clang CXX=o64-clang++ cargo build --target x86_64-apple-darwin --features nightly --release
- cd core && RUSTFLAGS="-Ctarget-cpu=native -Ctarget-feature=-prefer-256-bit" cargo build --features nightly --release
+ cd core && RUSTFLAGS="-Ctarget-cpu=apple-m1" CC=arm64-apple-darwin21.4-clang CXX=arm64-apple-darwin21.4-clang++ CARGO_FEATURE_NEON=1 cargo build --target aarch64-apple-darwin --release
+ cd core && RUSTFLAGS="-Ctarget-cpu=skylake -Ctarget-feature=-prefer-256-bit" CC=o64-clang CXX=o64-clang++ cargo build --target x86_64-apple-darwin --release
+ cd core && RUSTFLAGS="-Ctarget-cpu=native -Ctarget-feature=-prefer-256-bit" cargo build --release
./mvnw install -Prelease -DskipTests $(PROFILES)
release:
- cd core && RUSTFLAGS="-Ctarget-cpu=native" cargo build --features nightly --release
+ cd core && RUSTFLAGS="-Ctarget-cpu=native" cargo build --release
./mvnw install -Prelease -DskipTests $(PROFILES)
+release-nogit:
+ cd core && RUSTFLAGS="-Ctarget-cpu=native" cargo build --features nightly --release
+ ./mvnw install -Prelease -DskipTests $(PROFILES) -Dmaven.gitcommitid.skip=true
benchmark-%: clean release
cd spark && COMET_CONF_DIR=$(shell pwd)/conf MAVEN_OPTS='-Xmx20g' ../mvnw exec:java -Dexec.mainClass="$*" -Dexec.classpathScope="test" -Dexec.cleanupDaemonThreads="false" -Dexec.args="$(filter-out $@,$(MAKECMDGOALS))" $(PROFILES)
.DEFAULT:
diff --git a/README.md b/README.md
index fb17535aa..b0b72fdb6 100644
--- a/README.md
+++ b/README.md
@@ -19,58 +19,86 @@ under the License.
# Apache DataFusion Comet
-Apache DataFusion Comet is an Apache Spark plugin that uses [Apache DataFusion](https://datafusion.apache.org/)
-as native runtime to achieve improvement in terms of query efficiency and query runtime.
+Apache DataFusion Comet is a high-performance accelerator for Apache Spark, built on top of the powerful
+[Apache DataFusion](https://datafusion.apache.org) query engine. Comet is designed to significantly enhance the
+performance of Apache Spark workloads while leveraging commodity hardware and seamlessly integrating with the
+Spark ecosystem without requiring any code changes.
-Comet runs Spark SQL queries using the native DataFusion runtime, which is
-typically faster and more resource efficient than JVM based runtimes.
+# Benefits of Using Comet
-
+## Run Spark Queries at DataFusion Speeds
-Comet aims to support:
+Comet delivers a performance speedup for many queries, enabling faster data processing and shorter time-to-insights.
-- a native Parquet implementation, including both reader and writer
-- full implementation of Spark operators, including
- Filter/Project/Aggregation/Join/Exchange etc.
-- full implementation of Spark built-in expressions
-- a UDF framework for users to migrate their existing UDF to native
+The following chart shows the time it takes to run the 22 TPC-H queries against 100 GB of data in Parquet format
+using a single executor with 8 cores. See the [Comet Benchmarking Guide](https://datafusion.apache.org/comet/contributor-guide/benchmarking.html)
+for details of the environment used for these benchmarks.
-## Architecture
+When using Comet, the overall run time is reduced from 649 seconds to 440 seconds, a 1.5x speedup.
-The following diagram illustrates the architecture of Comet:
+Running the same queries with DataFusion standalone (without Spark) using the same number of cores results in a 3.9x
+speedup compared to Spark.
-
+Comet is not yet achieving full DataFusion speeds in all cases, but with future work we aim to provide a 2x-4x speedup
+for many use cases.
-## Current Status
+![](docs/source/_static/images/tpch_allqueries.png)
-The project is currently integrated into Apache Spark 3.2, 3.3, and 3.4.
+Here is a breakdown showing relative performance of Spark, Comet, and DataFusion for each TPC-H query.
-## Feature Parity with Apache Spark
+![](docs/source/_static/images/tpch_queries_compare.png)
-The project strives to keep feature parity with Apache Spark, that is,
-users should expect the same behavior (w.r.t features, configurations,
-query results, etc) with Comet turned on or turned off in their Spark
-jobs. In addition, Comet extension should automatically detect unsupported
-features and fallback to Spark engine.
+The following chart shows how much Comet currently accelerates each query from the benchmark. Performance optimization
+is an ongoing task, and we welcome contributions from the community to help achieve even greater speedups in the future.
-To achieve this, besides unit tests within Comet itself, we also re-use
-Spark SQL tests and make sure they all pass with Comet extension
-enabled.
+![](docs/source/_static/images/tpch_queries_speedup.png)
-## Supported Platforms
+These benchmarks can be reproduced in any environment using the documentation in the
+[Comet Benchmarking Guide](https://datafusion.apache.org/comet/contributor-guide/benchmarking.html). We encourage
+you to run your own benchmarks.
-Linux, Apple OSX (Intel and M1)
+## Use Commodity Hardware
-## Requirements
+Comet leverages commodity hardware, eliminating the need for costly hardware upgrades or
+specialized hardware accelerators, such as GPUs or FGPA. By maximizing the utilization of commodity hardware, Comet
+ensures cost-effectiveness and scalability for your Spark deployments.
-- Apache Spark 3.2, 3.3, or 3.4
-- JDK 8, 11 and 17 (JDK 11 recommended because Spark 3.2 doesn't support 17)
-- GLIBC 2.17 (Centos 7) and up
+## Spark Compatibility
-## Getting started
+Comet aims for 100% compatibility with all supported versions of Apache Spark, allowing you to integrate Comet into
+your existing Spark deployments and workflows seamlessly. With no code changes required, you can immediately harness
+the benefits of Comet's acceleration capabilities without disrupting your Spark applications.
-See the [DataFusion Comet User Guide](https://datafusion.apache.org/comet/user-guide/installation.html) for installation instructions.
+## Tight Integration with Apache DataFusion
+
+Comet tightly integrates with the core Apache DataFusion project, leveraging its powerful execution engine. With
+seamless interoperability between Comet and DataFusion, you can achieve optimal performance and efficiency in your
+Spark workloads.
+
+## Active Community
+
+Comet boasts a vibrant and active community of developers, contributors, and users dedicated to advancing the
+capabilities of Apache DataFusion and accelerating the performance of Apache Spark.
+
+## Getting Started
+
+To get started with Apache DataFusion Comet, follow the
+[installation instructions](https://datafusion.apache.org/comet/user-guide/installation.html). Join the
+[DataFusion Slack and Discord channels](https://datafusion.apache.org/contributor-guide/communication.html) to connect
+with other users, ask questions, and share your experiences with Comet.
## Contributing
-See the [DataFusion Comet Contribution Guide](https://datafusion.apache.org/comet/contributor-guide/contributing.html)
-for information on how to get started contributing to the project.
+
+We welcome contributions from the community to help improve and enhance Apache DataFusion Comet. Whether it's fixing
+bugs, adding new features, writing documentation, or optimizing performance, your contributions are invaluable in
+shaping the future of Comet. Check out our
+[contributor guide](https://datafusion.apache.org/comet/contributor-guide/contributing.html) to get started.
+
+## License
+
+Apache DataFusion Comet is licensed under the Apache License 2.0. See the [LICENSE.txt](LICENSE.txt) file for details.
+
+## Acknowledgments
+
+We would like to express our gratitude to the Apache DataFusion community for their support and contributions to
+Comet. Together, we're building a faster, more efficient future for big data processing with Apache Spark.
diff --git a/common/src/main/scala/org/apache/comet/CometConf.scala b/common/src/main/scala/org/apache/comet/CometConf.scala
index 5aee02f11..42fb5fb4c 100644
--- a/common/src/main/scala/org/apache/comet/CometConf.scala
+++ b/common/src/main/scala/org/apache/comet/CometConf.scala
@@ -29,6 +29,8 @@ import org.apache.spark.network.util.JavaUtils
import org.apache.spark.sql.comet.util.Utils
import org.apache.spark.sql.internal.SQLConf
+import org.apache.comet.shims.ShimCometConf
+
/**
* Configurations for a Comet application. Mostly inspired by [[SQLConf]] in Spark.
*
@@ -41,7 +43,7 @@ import org.apache.spark.sql.internal.SQLConf
* which retrieves the config value from the thread-local [[SQLConf]] object. Alternatively, you
* can also explicitly pass a [[SQLConf]] object to the `get` method.
*/
-object CometConf {
+object CometConf extends ShimCometConf {
/** List of all configs that is used for generating documentation */
val allConfs = new ListBuffer[ConfigEntry[_]]
@@ -361,7 +363,7 @@ object CometConf {
"column to a long column, a float column to a double column, etc. This is automatically" +
"enabled when reading from Iceberg tables.")
.booleanConf
- .createWithDefault(false)
+ .createWithDefault(COMET_SCHEMA_EVOLUTION_ENABLED_DEFAULT)
val COMET_ROW_TO_COLUMNAR_ENABLED: ConfigEntry[Boolean] =
conf("spark.comet.rowToColumnar.enabled")
@@ -382,12 +384,13 @@ object CometConf {
.createWithDefault(Seq("Range,InMemoryTableScan"))
val COMET_ANSI_MODE_ENABLED: ConfigEntry[Boolean] = conf("spark.comet.ansi.enabled")
+ .internal()
.doc(
"Comet does not respect ANSI mode in most cases and by default will not accelerate " +
"queries when ansi mode is enabled. Enable this setting to test Comet's experimental " +
"support for ANSI mode. This should not be used in production.")
.booleanConf
- .createWithDefault(false)
+ .createWithDefault(COMET_ANSI_MODE_ENABLED_DEFAULT)
val COMET_CAST_ALLOW_INCOMPATIBLE: ConfigEntry[Boolean] =
conf("spark.comet.cast.allowIncompatible")
diff --git a/common/src/main/spark-3.x/org/apache/comet/shims/ShimCometConf.scala b/common/src/main/spark-3.x/org/apache/comet/shims/ShimCometConf.scala
new file mode 100644
index 000000000..dc84a7525
--- /dev/null
+++ b/common/src/main/spark-3.x/org/apache/comet/shims/ShimCometConf.scala
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet.shims
+
+trait ShimCometConf {
+ protected val COMET_SCHEMA_EVOLUTION_ENABLED_DEFAULT = false
+ protected val COMET_ANSI_MODE_ENABLED_DEFAULT = false
+}
diff --git a/common/src/main/spark-4.0/org/apache/comet/shims/ShimCometConf.scala b/common/src/main/spark-4.0/org/apache/comet/shims/ShimCometConf.scala
new file mode 100644
index 000000000..13da6bc10
--- /dev/null
+++ b/common/src/main/spark-4.0/org/apache/comet/shims/ShimCometConf.scala
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet.shims
+
+trait ShimCometConf {
+ protected val COMET_SCHEMA_EVOLUTION_ENABLED_DEFAULT = true
+ protected val COMET_ANSI_MODE_ENABLED_DEFAULT = true
+}
diff --git a/core/Cargo.lock b/core/Cargo.lock
index 3b931adce..e206dff17 100644
--- a/core/Cargo.lock
+++ b/core/Cargo.lock
@@ -4,9 +4,9 @@ version = 3
[[package]]
name = "addr2line"
-version = "0.21.0"
+version = "0.22.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8a30b2e23b9e17a9f90641c7ab1549cd9b44f296d3ccbf309d2863cfe398a0cb"
+checksum = "6e4503c46a5c0c7844e948c9a4d6acd9f50cccb4de1c48eb9e291ea17470c678"
dependencies = [
"gimli",
]
@@ -84,15 +84,15 @@ checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299"
[[package]]
name = "anstyle"
-version = "1.0.6"
+version = "1.0.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8901269c6307e8d93993578286ac0edf7f195079ffff5ebdeea6a59ffb7e36bc"
+checksum = "038dfcf04a5feb68e9c60b21c9625a54c2c0616e79b72b0fd87075a056ae1d1b"
[[package]]
name = "anyhow"
-version = "1.0.82"
+version = "1.0.86"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f538837af36e6f6a9be0faa67f9a314f8119e4e4b5867c6ab40ed60360142519"
+checksum = "b3d1d046238990b9cf5bcde22a3fb3584ee5cf65fb2765f454ed428c7a0063da"
[[package]]
name = "arc-swap"
@@ -333,7 +333,7 @@ checksum = "c6fa2087f2753a7da8cc1c0dbfcf89579dd57458e36769de5ac750b4671737ca"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.59",
+ "syn 2.0.66",
]
[[package]]
@@ -347,15 +347,15 @@ dependencies = [
[[package]]
name = "autocfg"
-version = "1.2.0"
+version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f1fdabc7756949593fe60f30ec81974b613357de856987752631dea1e3394c80"
+checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0"
[[package]]
name = "backtrace"
-version = "0.3.71"
+version = "0.3.72"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "26b05800d2e817c8b3b4b54abd461726265fa9789ae34330622f2db9ee696f9d"
+checksum = "17c6a35df3749d2e8bb1b7b21a976d82b15548788d2735b9d82f329268f71a11"
dependencies = [
"addr2line",
"cc",
@@ -368,9 +368,9 @@ dependencies = [
[[package]]
name = "base64"
-version = "0.22.0"
+version = "0.22.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9475866fec1451be56a3c2400fd081ff546538961565ccb5b7142cbd22bc7a51"
+checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6"
[[package]]
name = "bitflags"
@@ -444,9 +444,9 @@ checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c"
[[package]]
name = "bytemuck"
-version = "1.15.0"
+version = "1.16.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5d6d68c57235a3a081186990eca2867354726650f42f7516ca50c28d6281fd15"
+checksum = "78834c15cb5d5efe3452d58b1e8ba890dd62d21907f867f383358198e56ebca5"
[[package]]
name = "byteorder"
@@ -468,12 +468,13 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
[[package]]
name = "cc"
-version = "1.0.94"
+version = "1.0.98"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "17f6e324229dc011159fcc089755d1e2e216a90d43a7dea6853ca740b84f35e7"
+checksum = "41c270e7540d725e65ac7f1b212ac8ce349719624d7bcff99f8e2e488e8cf03f"
dependencies = [
"jobserver",
"libc",
+ "once_cell",
]
[[package]]
@@ -625,7 +626,7 @@ dependencies = [
"parquet-format",
"paste",
"pprof",
- "prost 0.12.4",
+ "prost 0.12.6",
"prost-build",
"rand",
"regex",
@@ -637,6 +638,7 @@ dependencies = [
"thrift 0.17.0",
"tokio",
"tokio-stream",
+ "twox-hash",
"unicode-segmentation",
"zstd",
]
@@ -704,9 +706,9 @@ dependencies = [
[[package]]
name = "crc32fast"
-version = "1.4.0"
+version = "1.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b3855a8a784b474f333699ef2bbca9db2c4a1f6d9088a90a2d25b1eb53111eaa"
+checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3"
dependencies = [
"cfg-if",
]
@@ -768,9 +770,9 @@ dependencies = [
[[package]]
name = "crossbeam-utils"
-version = "0.8.19"
+version = "0.8.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "248e3bacc7dc6baa3b21e405ee045c3047101a49145e7e9eca583ab4c2ca5345"
+checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80"
[[package]]
name = "crunchy"
@@ -1092,9 +1094,9 @@ checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10"
[[package]]
name = "either"
-version = "1.11.0"
+version = "1.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a47c1c47d2f5964e29c61246e81db715514cd532db6b5116a25ea3c03d6780a2"
+checksum = "3dca9240753cf90908d7e4aac30f630662b02aebaa1b58a3cadabdb23385b58b"
[[package]]
name = "equivalent"
@@ -1104,9 +1106,9 @@ checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5"
[[package]]
name = "errno"
-version = "0.3.8"
+version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a258e46cdc063eb8519c00b9fc845fc47bcfca4130e2f08e88665ceda8474245"
+checksum = "534c5cf6194dfab3db3242765c03bbe257cf92f22b38f6bc0c58d59108a820ba"
dependencies = [
"libc",
"windows-sys 0.52.0",
@@ -1114,9 +1116,9 @@ dependencies = [
[[package]]
name = "fastrand"
-version = "2.0.2"
+version = "2.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "658bd65b1cf4c852a3cc96f18a8ce7b5640f6b703f905c7d74532294c2a63984"
+checksum = "9fc0510504f03c51ada170672ac806f1f105a88aa97a5281117e1ddc3368e51a"
[[package]]
name = "findshlibs"
@@ -1148,9 +1150,9 @@ dependencies = [
[[package]]
name = "flate2"
-version = "1.0.28"
+version = "1.0.30"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "46303f565772937ffe1d394a4fac6f411c6013172fadde9dcdb1e147a086940e"
+checksum = "5f54427cfd1c7829e2a139fcefea601bf088ebca651d2bf53ebc600eac295dae"
dependencies = [
"crc32fast",
"miniz_oxide",
@@ -1227,7 +1229,7 @@ checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.59",
+ "syn 2.0.66",
]
[[package]]
@@ -1272,9 +1274,9 @@ dependencies = [
[[package]]
name = "getrandom"
-version = "0.2.14"
+version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "94b22e06ecb0110981051723910cbf0b5f5e09a2062dd7663334ee79a9d1286c"
+checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7"
dependencies = [
"cfg-if",
"libc",
@@ -1283,9 +1285,9 @@ dependencies = [
[[package]]
name = "gimli"
-version = "0.28.1"
+version = "0.29.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253"
+checksum = "40ecd4077b5ae9fd2e9e169b102c6c330d0605168eb0e8bf79952b256dbefffd"
[[package]]
name = "glob"
@@ -1311,9 +1313,9 @@ dependencies = [
[[package]]
name = "hashbrown"
-version = "0.14.3"
+version = "0.14.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604"
+checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1"
dependencies = [
"ahash",
"allocator-api2",
@@ -1334,6 +1336,12 @@ version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
+[[package]]
+name = "heck"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
+
[[package]]
name = "hermit-abi"
version = "0.3.9"
@@ -1424,9 +1432,9 @@ dependencies = [
[[package]]
name = "instant"
-version = "0.1.12"
+version = "0.1.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c"
+checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222"
dependencies = [
"cfg-if",
"js-sys",
@@ -1492,9 +1500,9 @@ checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b"
[[package]]
name = "java-locator"
-version = "0.1.5"
+version = "0.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "90003f2fd9c52f212c21d8520f1128da0080bad6fff16b68fe6e7f2f0c3780c2"
+checksum = "d2abecabd9961c5e01405a6426687fcf1bd94a269927137e4c3cc1a7419b93fd"
dependencies = [
"glob",
"lazy_static",
@@ -1526,9 +1534,9 @@ checksum = "8eaf4bc02d17cbdd7ff4c7438cafcdf7fb9a4613313ad11b4f8fefe7d3fa0130"
[[package]]
name = "jobserver"
-version = "0.1.30"
+version = "0.1.31"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "685a7d121ee3f65ae4fddd72b25a04bb36b6af81bc0828f7d5434c0fe60fa3a2"
+checksum = "d2b099aaa34a9751c5bf0878add70444e1ed2dd73f347be99003d4577277de6e"
dependencies = [
"libc",
]
@@ -1614,9 +1622,9 @@ dependencies = [
[[package]]
name = "libc"
-version = "0.2.153"
+version = "0.2.155"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd"
+checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c"
[[package]]
name = "libloading"
@@ -1636,9 +1644,9 @@ checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058"
[[package]]
name = "libmimalloc-sys"
-version = "0.1.35"
+version = "0.1.38"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3979b5c37ece694f1f5e51e7ecc871fdb0f517ed04ee45f88d15d6d553cb9664"
+checksum = "0e7bb23d733dfcc8af652a78b7bf232f0e967710d044732185e561e47c0336b6"
dependencies = [
"cc",
"libc",
@@ -1646,15 +1654,15 @@ dependencies = [
[[package]]
name = "linux-raw-sys"
-version = "0.4.13"
+version = "0.4.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "01cda141df6706de531b6c46c3a33ecca755538219bd484262fa09410c13539c"
+checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89"
[[package]]
name = "lock_api"
-version = "0.4.11"
+version = "0.4.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3c168f8615b12bc01f9c17e2eb0cc07dcae1940121185446edc3744920e8ef45"
+checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17"
dependencies = [
"autocfg",
"scopeguard",
@@ -1759,18 +1767,18 @@ dependencies = [
[[package]]
name = "mimalloc"
-version = "0.1.39"
+version = "0.1.42"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fa01922b5ea280a911e323e4d2fd24b7fe5cc4042e0d2cda3c40775cdc4bdc9c"
+checksum = "e9186d86b79b52f4a77af65604b51225e8db1d6ee7e3f41aec1e40829c71a176"
dependencies = [
"libmimalloc-sys",
]
[[package]]
name = "miniz_oxide"
-version = "0.7.2"
+version = "0.7.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9d811f3e15f28568be3407c8e7fdb6514c1cda3cb30683f15b6a1a1dc4ea14a7"
+checksum = "87dfd01fe195c66b572b37921ad8803d010623c0aca821bea2302239d155cdae"
dependencies = [
"adler",
]
@@ -1794,9 +1802,9 @@ dependencies = [
[[package]]
name = "num"
-version = "0.4.2"
+version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3135b08af27d103b0a51f2ae0f8632117b7b185ccf931445affa8df530576a41"
+checksum = "35bd024e8b2ff75562e5f34e7f4905839deb4b22955ef5e73d2fea1b9813cb23"
dependencies = [
"num-bigint",
"num-complex",
@@ -1808,20 +1816,19 @@ dependencies = [
[[package]]
name = "num-bigint"
-version = "0.4.4"
+version = "0.4.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "608e7659b5c3d7cba262d894801b9ec9d00de989e8a82bd4bef91d08da45cdc0"
+checksum = "c165a9ab64cf766f73521c0dd2cfdff64f488b8f0b3e621face3462d3db536d7"
dependencies = [
- "autocfg",
"num-integer",
"num-traits",
]
[[package]]
name = "num-complex"
-version = "0.4.5"
+version = "0.4.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "23c6602fda94a57c990fe0df199a035d83576b496aa29f4e634a8ac6004e68a6"
+checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495"
dependencies = [
"num-traits",
]
@@ -1847,9 +1854,9 @@ dependencies = [
[[package]]
name = "num-iter"
-version = "0.1.44"
+version = "0.1.45"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d869c01cc0c455284163fd0092f1f93835385ccab5a98a0dcc497b2f8bf055a9"
+checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf"
dependencies = [
"autocfg",
"num-integer",
@@ -1858,11 +1865,10 @@ dependencies = [
[[package]]
name = "num-rational"
-version = "0.4.1"
+version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0638a1c9d0a3c0914158145bc76cff373a75a627e6ecbfb71cbe6f453a5a19b0"
+checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824"
dependencies = [
- "autocfg",
"num-bigint",
"num-integer",
"num-traits",
@@ -1870,9 +1876,9 @@ dependencies = [
[[package]]
name = "num-traits"
-version = "0.2.18"
+version = "0.2.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "da0df0e5185db44f69b44f26786fe401b6c293d1907744beaa7fa62b2e5a517a"
+checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
dependencies = [
"autocfg",
"libm",
@@ -1890,9 +1896,9 @@ dependencies = [
[[package]]
name = "object"
-version = "0.32.2"
+version = "0.35.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a6a622008b6e321afc04970976f62ee297fdbaa6f95318ca343e3eebb9648441"
+checksum = "b8ec7ab813848ba4522158d5517a6093db1ded27575b070f4177b8d12b41db5e"
dependencies = [
"memchr",
]
@@ -1950,9 +1956,9 @@ dependencies = [
[[package]]
name = "parking_lot"
-version = "0.12.1"
+version = "0.12.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f"
+checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27"
dependencies = [
"lock_api",
"parking_lot_core",
@@ -1960,15 +1966,15 @@ dependencies = [
[[package]]
name = "parking_lot_core"
-version = "0.9.9"
+version = "0.9.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4c42a9226546d68acdd9c0a280d17ce19bfe27a46bf68784e4066115788d008e"
+checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8"
dependencies = [
"cfg-if",
"libc",
"redox_syscall",
"smallvec",
- "windows-targets 0.48.5",
+ "windows-targets 0.52.5",
]
[[package]]
@@ -2000,18 +2006,18 @@ dependencies = [
[[package]]
name = "parse-zoneinfo"
-version = "0.3.0"
+version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c705f256449c60da65e11ff6626e0c16a0a0b96aaa348de61376b249bc340f41"
+checksum = "1f2a05b18d44e2957b88f96ba460715e295bc1d7510468a2f3d3b44535d26c24"
dependencies = [
"regex",
]
[[package]]
name = "paste"
-version = "1.0.14"
+version = "1.0.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "de3145af08024dea9fa9914f381a17b8fc6034dfb00f3a84013f7ff43f29ed4c"
+checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a"
[[package]]
name = "percent-encoding"
@@ -2021,9 +2027,9 @@ checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e"
[[package]]
name = "petgraph"
-version = "0.6.4"
+version = "0.6.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e1d3afd2628e69da2be385eb6f2fd57c8ac7977ceeff6dc166ff1657b0e386a9"
+checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db"
dependencies = [
"fixedbitset",
"indexmap",
@@ -2087,9 +2093,9 @@ checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec"
[[package]]
name = "plotters"
-version = "0.3.5"
+version = "0.3.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d2c224ba00d7cadd4d5c660deaf2098e5e80e07846537c51f9cfa4be50c1fd45"
+checksum = "a15b6eccb8484002195a3e44fe65a4ce8e93a625797a063735536fd59cb01cf3"
dependencies = [
"num-traits",
"plotters-backend",
@@ -2100,15 +2106,15 @@ dependencies = [
[[package]]
name = "plotters-backend"
-version = "0.3.5"
+version = "0.3.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9e76628b4d3a7581389a35d5b6e2139607ad7c75b17aed325f210aa91f4a9609"
+checksum = "414cec62c6634ae900ea1c56128dfe87cf63e7caece0852ec76aba307cebadb7"
[[package]]
name = "plotters-svg"
-version = "0.3.5"
+version = "0.3.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "38f6d39893cca0701371e3c27294f09797214b86f1fb951b89ade8ec04e2abab"
+checksum = "81b30686a7d9c3e010b84284bdd26a29f2138574f52f5eb6f794fc0ad924e705"
dependencies = [
"plotters-backend",
]
@@ -2142,9 +2148,9 @@ checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de"
[[package]]
name = "proc-macro2"
-version = "1.0.80"
+version = "1.0.85"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a56dea16b0a29e94408b9aa5e2940a4eedbd128a1ba20e8f7ae60fd3d465af0e"
+checksum = "22244ce15aa966053a896d1accb3a6e68469b97c7f33f284b99f0d576879fc23"
dependencies = [
"unicode-ident",
]
@@ -2161,12 +2167,12 @@ dependencies = [
[[package]]
name = "prost"
-version = "0.12.4"
+version = "0.12.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d0f5d036824e4761737860779c906171497f6d55681139d8312388f8fe398922"
+checksum = "deb1435c188b76130da55f17a466d252ff7b1418b2ad3e037d127b94e3411f29"
dependencies = [
"bytes",
- "prost-derive 0.12.4",
+ "prost-derive 0.12.6",
]
[[package]]
@@ -2204,15 +2210,15 @@ dependencies = [
[[package]]
name = "prost-derive"
-version = "0.12.4"
+version = "0.12.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "19de2de2a00075bf566bee3bd4db014b11587e84184d3f7a791bc17f1a8e9e48"
+checksum = "81bddcdb20abf9501610992b6759a4c888aef7d1a7247ef75e2404275ac24af1"
dependencies = [
"anyhow",
"itertools 0.12.1",
"proc-macro2",
"quote",
- "syn 2.0.59",
+ "syn 2.0.66",
]
[[package]]
@@ -2295,11 +2301,11 @@ dependencies = [
[[package]]
name = "redox_syscall"
-version = "0.4.1"
+version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa"
+checksum = "469052894dcb553421e483e4209ee581a45100d31b4018de03e5a7ad86374a7e"
dependencies = [
- "bitflags 1.3.2",
+ "bitflags 2.5.0",
]
[[package]]
@@ -2342,9 +2348,9 @@ dependencies = [
[[package]]
name = "rustc-demangle"
-version = "0.1.23"
+version = "0.1.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76"
+checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f"
[[package]]
name = "rustc_version"
@@ -2357,9 +2363,9 @@ dependencies = [
[[package]]
name = "rustix"
-version = "0.38.32"
+version = "0.38.34"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "65e04861e65f21776e67888bfbea442b3642beaa0138fdb1dd7a84a52dffdb89"
+checksum = "70dc5ec042f7a43c4a73241207cecc9873a06d45debb38b329f8541d85c2730f"
dependencies = [
"bitflags 2.5.0",
"errno",
@@ -2370,15 +2376,15 @@ dependencies = [
[[package]]
name = "rustversion"
-version = "1.0.15"
+version = "1.0.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "80af6f9131f277a45a3fba6ce8e2258037bb0477a67e610d3c1fe046ab31de47"
+checksum = "955d28af4278de8121b7ebeb796b6a45735dc01436d898801014aced2773a3d6"
[[package]]
name = "ryu"
-version = "1.0.17"
+version = "1.0.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e86697c916019a8588c99b5fac3cead74ec0b4b819707a682fd4d23fa0ce1ba1"
+checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f"
[[package]]
name = "same-file"
@@ -2397,9 +2403,9 @@ checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
[[package]]
name = "semver"
-version = "1.0.22"
+version = "1.0.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "92d43fe69e652f3df9bdc2b85b2854a0825b86e4fb76bc44d945137d053639ca"
+checksum = "61697e0a1c7e512e84a621326239844a24d8207b4669b41bc18b32ea5cbf988b"
[[package]]
name = "seq-macro"
@@ -2409,9 +2415,9 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4"
[[package]]
name = "serde"
-version = "1.0.197"
+version = "1.0.203"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3fb1c873e1b9b056a4dc4c0c198b24c3ffa059243875552b2bd0933b1aee4ce2"
+checksum = "7253ab4de971e72fb7be983802300c30b5a7f0c2e56fab8abfc6a214307c0094"
dependencies = [
"serde_derive",
]
@@ -2428,20 +2434,20 @@ dependencies = [
[[package]]
name = "serde_derive"
-version = "1.0.197"
+version = "1.0.203"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7eb0b34b42edc17f6b7cac84a52a1c5f0e1bb2227e997ca9011ea3dd34e8610b"
+checksum = "500cbc0ebeb6f46627f50f3f5811ccf6bf00643be300b4c3eabc0ef55dc5b5ba"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.59",
+ "syn 2.0.66",
]
[[package]]
name = "serde_json"
-version = "1.0.116"
+version = "1.0.117"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3e17db7126d17feb94eb3fad46bf1a96b034e8aacbc2e775fe81505f8b0b2813"
+checksum = "455182ea6142b14f93f4bc5320a2b31c1f266b66a4a5c858b013302a5d8cbfc3"
dependencies = [
"itoa",
"ryu",
@@ -2545,7 +2551,7 @@ checksum = "01b2e185515564f15375f593fb966b5718bc624ba77fe49fa4616ad619690554"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.59",
+ "syn 2.0.66",
]
[[package]]
@@ -2577,15 +2583,15 @@ dependencies = [
[[package]]
name = "strum_macros"
-version = "0.26.2"
+version = "0.26.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c6cf59daf282c0a494ba14fd21610a0325f9f90ec9d1231dea26bcb1d696c946"
+checksum = "f7993a8e3a9e88a00351486baae9522c91b123a088f76469e5bd5cc17198ea87"
dependencies = [
- "heck 0.4.1",
+ "heck 0.5.0",
"proc-macro2",
"quote",
"rustversion",
- "syn 2.0.59",
+ "syn 2.0.66",
]
[[package]]
@@ -2630,9 +2636,9 @@ dependencies = [
[[package]]
name = "syn"
-version = "2.0.59"
+version = "2.0.66"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4a6531ffc7b071655e4ce2e04bd464c4830bb585a61cabb96cf808f05172615a"
+checksum = "c42f3f41a2de00b01c0aaad383c5a45241efc8b2d1eda5661812fda5f3cdcff5"
dependencies = [
"proc-macro2",
"quote",
@@ -2653,22 +2659,22 @@ dependencies = [
[[package]]
name = "thiserror"
-version = "1.0.58"
+version = "1.0.61"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "03468839009160513471e86a034bb2c5c0e4baae3b43f79ffc55c4a5427b3297"
+checksum = "c546c80d6be4bc6a00c0f01730c08df82eaa7a7a61f11d656526506112cc1709"
dependencies = [
"thiserror-impl",
]
[[package]]
name = "thiserror-impl"
-version = "1.0.58"
+version = "1.0.61"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c61f3ba182994efc43764a46c018c347bc492c79f024e705f46567b418f6d4f7"
+checksum = "46c3384250002a6d5af4d114f2845d37b57521033f30d5c3f46c4d70e1197533"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.59",
+ "syn 2.0.66",
]
[[package]]
@@ -2752,9 +2758,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
[[package]]
name = "tokio"
-version = "1.37.0"
+version = "1.38.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1adbebffeca75fcfd058afa480fb6c0b81e165a0323f9c9d39c9697e37c46787"
+checksum = "ba4f4a02a7a80d6f274636f0aa95c7e383b912d41fe721a31f29e29698585a4a"
dependencies = [
"backtrace",
"bytes",
@@ -2765,13 +2771,13 @@ dependencies = [
[[package]]
name = "tokio-macros"
-version = "2.2.0"
+version = "2.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5b8a1e28f2deaa14e508979454cb3a223b10b938b45af148bc0986de36f1923b"
+checksum = "5f5ae998a069d4b5aba8ee9dad856af7d520c3699e6159b185c2acd48155d39a"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.59",
+ "syn 2.0.66",
]
[[package]]
@@ -2804,7 +2810,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.59",
+ "syn 2.0.66",
]
[[package]]
@@ -2823,6 +2829,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675"
dependencies = [
"cfg-if",
+ "rand",
"static_assertions",
]
@@ -2870,9 +2877,9 @@ checksum = "d4c87d22b6e3f4a18d4d40ef354e97c90fcb14dd91d7dc0aa9d8a1172ebf7202"
[[package]]
name = "unicode-width"
-version = "0.1.11"
+version = "0.1.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e51733f11c9c4f72aa0c160008246859e340b00807569a0da0e7a1079b27ba85"
+checksum = "68f5e5f3158ecfd4b8ff6fe086db7c8467a2dfdac97fe420f2b7c4aa97af66d6"
[[package]]
name = "unsafe-any-ors"
@@ -2952,7 +2959,7 @@ dependencies = [
"once_cell",
"proc-macro2",
"quote",
- "syn 2.0.59",
+ "syn 2.0.66",
"wasm-bindgen-shared",
]
@@ -2974,7 +2981,7 @@ checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.59",
+ "syn 2.0.66",
"wasm-bindgen-backend",
"wasm-bindgen-shared",
]
@@ -3025,11 +3032,11 @@ checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]]
name = "winapi-util"
-version = "0.1.6"
+version = "0.1.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f29e6f9198ba0d26b4c9f07dbe6f9ed633e1f3d5b8b414090084349e46a52596"
+checksum = "4d4cc384e1e73b93bafa6fb4f1df8c41695c8a91cf9c4c64358067d15a7b6c6b"
dependencies = [
- "winapi",
+ "windows-sys 0.52.0",
]
[[package]]
@@ -3080,21 +3087,6 @@ dependencies = [
"windows_x86_64_msvc 0.42.2",
]
-[[package]]
-name = "windows-targets"
-version = "0.48.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c"
-dependencies = [
- "windows_aarch64_gnullvm 0.48.5",
- "windows_aarch64_msvc 0.48.5",
- "windows_i686_gnu 0.48.5",
- "windows_i686_msvc 0.48.5",
- "windows_x86_64_gnu 0.48.5",
- "windows_x86_64_gnullvm 0.48.5",
- "windows_x86_64_msvc 0.48.5",
-]
-
[[package]]
name = "windows-targets"
version = "0.52.5"
@@ -3117,12 +3109,6 @@ version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8"
-[[package]]
-name = "windows_aarch64_gnullvm"
-version = "0.48.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8"
-
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.52.5"
@@ -3135,12 +3121,6 @@ version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43"
-[[package]]
-name = "windows_aarch64_msvc"
-version = "0.48.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc"
-
[[package]]
name = "windows_aarch64_msvc"
version = "0.52.5"
@@ -3153,12 +3133,6 @@ version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f"
-[[package]]
-name = "windows_i686_gnu"
-version = "0.48.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e"
-
[[package]]
name = "windows_i686_gnu"
version = "0.52.5"
@@ -3177,12 +3151,6 @@ version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060"
-[[package]]
-name = "windows_i686_msvc"
-version = "0.48.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406"
-
[[package]]
name = "windows_i686_msvc"
version = "0.52.5"
@@ -3195,12 +3163,6 @@ version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36"
-[[package]]
-name = "windows_x86_64_gnu"
-version = "0.48.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e"
-
[[package]]
name = "windows_x86_64_gnu"
version = "0.52.5"
@@ -3213,12 +3175,6 @@ version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3"
-[[package]]
-name = "windows_x86_64_gnullvm"
-version = "0.48.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc"
-
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.52.5"
@@ -3231,12 +3187,6 @@ version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0"
-[[package]]
-name = "windows_x86_64_msvc"
-version = "0.48.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538"
-
[[package]]
name = "windows_x86_64_msvc"
version = "0.52.5"
@@ -3245,22 +3195,22 @@ checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0"
[[package]]
name = "zerocopy"
-version = "0.7.32"
+version = "0.7.34"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "74d4d3961e53fa4c9a25a8637fc2bfaf2595b3d3ae34875568a5cf64787716be"
+checksum = "ae87e3fcd617500e5d106f0380cf7b77f3c6092aae37191433159dda23cfb087"
dependencies = [
"zerocopy-derive",
]
[[package]]
name = "zerocopy-derive"
-version = "0.7.32"
+version = "0.7.34"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6"
+checksum = "15e934569e47891f7d9411f1a451d947a60e000ab3bd24fbb970f000387d1b3b"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.59",
+ "syn 2.0.66",
]
[[package]]
diff --git a/core/Cargo.toml b/core/Cargo.toml
index c0f34d786..c78a82848 100644
--- a/core/Cargo.toml
+++ b/core/Cargo.toml
@@ -75,6 +75,7 @@ once_cell = "1.18.0"
regex = "1.9.6"
crc32fast = "1.3.2"
simd-adler32 = "0.3.7"
+twox-hash = "1.6.3"
[build-dependencies]
prost-build = "0.9.0"
@@ -88,7 +89,6 @@ assertables = "7"
[features]
default = []
-nightly = []
[profile.release]
debug = true
diff --git a/core/rustfmt.toml b/core/rustfmt.toml
index 39a3fe635..3463af618 100644
--- a/core/rustfmt.toml
+++ b/core/rustfmt.toml
@@ -17,11 +17,3 @@
edition = "2021"
max_width = 100
-
-# The following requires nightly feature:
-# rustup install nightly
-# rustup component add rustfmt --toolchain nightly
-# cargo +nightly fmt
-wrap_comments = true
-comment_width = 100
-imports_granularity = "Crate" # group imports by crate
diff --git a/core/src/common/mod.rs b/core/src/common/mod.rs
index 8d5030c02..1b7dfad28 100644
--- a/core/src/common/mod.rs
+++ b/core/src/common/mod.rs
@@ -35,10 +35,5 @@ trait ValueSetter {
fn append_value(&mut self, v: &T::Native);
}
-mod vector;
-
mod buffer;
pub use buffer::*;
-
-mod mutable_vector;
-pub use mutable_vector::*;
diff --git a/core/src/common/mutable_vector.rs b/core/src/common/mutable_vector.rs
deleted file mode 100644
index ba29fc01a..000000000
--- a/core/src/common/mutable_vector.rs
+++ /dev/null
@@ -1,409 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use arrow::{
- array::ArrayData, buffer::Buffer as ArrowBuffer, datatypes::DataType as ArrowDataType,
-};
-
-use crate::{
- common::{bit, CometBuffer, ValueGetter, ValueSetter},
- BinaryType, StringType, StringView, TypeTrait,
-};
-
-const DEFAULT_ARRAY_LEN: usize = 4;
-
-/// A mutable vector that can be re-used across batches.
-#[derive(Debug)]
-pub struct MutableVector {
- /// The Arrow type for the elements of this vector.
- pub(crate) arrow_type: ArrowDataType,
-
- /// The number of total elements in this vector.
- pub(crate) num_values: usize,
-
- /// The number of null elements in this vector, must <= `num_values`.
- pub(crate) num_nulls: usize,
-
- /// The capacity of the vector
- pub(crate) capacity: usize,
-
- /// How many bits are required to store a single value
- pub(crate) bit_width: usize,
-
- /// The validity buffer of this Arrow vector. A bit set at position `i` indicates the `i`th
- /// element is not null. Otherwise, an unset bit at position `i` indicates the `i`th element is
- /// null.
- pub(crate) validity_buffer: CometBuffer,
-
- /// The value buffer of this Arrow vector. This could store either offsets if the vector
- /// is of list or struct type, or actual values themselves otherwise.
- pub(crate) value_buffer: CometBuffer,
-
- /// Child vectors for non-primitive types (e.g., list, struct).
- pub(crate) children: Vec,
-
- /// Dictionary (i.e., values) associated with this vector. Only set if using dictionary
- /// encoding.
- pub(crate) dictionary: Option>,
-
- /// Whether all the values in the vector are not null.
- ///
- /// This is useful so we can skip setting non-null for each individual value, in the
- /// `validity_buffer`.
- all_non_null: bool,
-}
-
-impl MutableVector {
- pub fn new(capacity: usize, arrow_type: &ArrowDataType) -> Self {
- let bit_width = Self::bit_width(arrow_type);
- Self::new_with_bit_width(capacity, arrow_type.clone(), bit_width)
- }
-
- pub fn new_with_bit_width(
- capacity: usize,
- arrow_type: ArrowDataType,
- bit_width: usize,
- ) -> Self {
- let validity_len = bit::ceil(capacity, 8);
- let validity_buffer = CometBuffer::new(validity_len);
-
- let mut value_capacity = capacity;
- if Self::is_binary_type(&arrow_type) {
- // Arrow offset array needs to have one extra slot
- value_capacity += 1;
- }
- // Make sure the capacity is positive
- let len = bit::ceil(value_capacity * bit_width, 8);
- let mut value_buffer = CometBuffer::new(len);
-
- let mut children = Vec::new();
-
- match arrow_type {
- ArrowDataType::Binary | ArrowDataType::Utf8 => {
- children.push(MutableVector::new_with_bit_width(
- capacity,
- ArrowDataType::Int8,
- DEFAULT_ARRAY_LEN * 8,
- ));
- }
- _ => {}
- }
-
- if Self::is_binary_type(&arrow_type) {
- // Setup the first offset which is always 0.
- let zero: u32 = 0;
- bit::memcpy_value(&zero, 4, &mut value_buffer);
- }
-
- Self {
- arrow_type,
- num_values: 0,
- num_nulls: 0,
- capacity,
- bit_width,
- validity_buffer,
- value_buffer,
- children,
- dictionary: None,
- all_non_null: false,
- }
- }
-
- /// Appends a non-null value `v` to the end of this vector.
- #[inline]
- pub fn append_value(&mut self, v: &T::Native) {
- >::append_value(self, v);
- }
-
- /// Gets the non-null value at `idx` of this vector.
- #[inline]
- pub fn value(&self, idx: usize) -> T::Native {
- >::value(self, idx)
- }
-
- /// Whether the given value at `idx` of this vector is null.
- #[inline]
- pub fn is_null(&self, idx: usize) -> bool {
- unsafe { !bit::get_bit_raw(self.validity_buffer.as_ptr(), idx) }
- }
-
- /// Resets this vector to the initial state.
- #[inline]
- pub fn reset(&mut self) {
- self.num_values = 0;
- self.num_nulls = 0;
- self.all_non_null = false;
- self.validity_buffer.reset();
- if Self::is_binary_type(&self.arrow_type) {
- // Reset the first offset to 0
- let zero: u32 = 0;
- bit::memcpy_value(&zero, 4, &mut self.value_buffer);
- // Also reset the child value vector
- let child = &mut self.children[0];
- child.reset();
- } else if Self::should_reset_value_buffer(&self.arrow_type) {
- self.value_buffer.reset();
- }
- }
-
- /// Appends a new null value to the end of this vector.
- #[inline]
- pub fn put_null(&mut self) {
- self.put_nulls(1)
- }
-
- /// Appends `n` null values to the end of this vector.
- #[inline]
- pub fn put_nulls(&mut self, n: usize) {
- // We need to update offset buffer for binary.
- if Self::is_binary_type(&self.arrow_type) {
- let mut offset = self.num_values * 4;
- let prev_offset_value = bit::read_num_bytes_u32(4, &self.value_buffer[offset..]);
- offset += 4;
- (0..n).for_each(|_| {
- bit::memcpy_value(&prev_offset_value, 4, &mut self.value_buffer[offset..]);
- offset += 4;
- });
- }
-
- self.num_nulls += n;
- self.num_values += n;
- }
-
- /// Returns the number of total values (including both null and non-null) of this vector.
- #[inline]
- pub fn num_values(&self) -> usize {
- self.num_values
- }
-
- /// Returns the number of null values of this vector.
- #[inline]
- pub fn num_nulls(&self) -> usize {
- self.num_nulls
- }
-
- #[inline]
- pub fn set_not_null(&mut self, i: usize) {
- unsafe {
- bit::set_bit_raw(self.validity_buffer.as_mut_ptr(), i);
- }
- }
-
- /// Sets all values in this vector to be non-null.
- #[inline]
- pub fn set_all_non_null(&mut self) {
- self.all_non_null = true;
- }
-
- /// Sets the content of validity buffer to be `buffer`.
- pub fn set_validity_buffer(&mut self, buffer: &ArrowBuffer) {
- self.validity_buffer = buffer.into();
- }
-
- /// Sets the content of value buffer to be `buffer`.
- pub fn set_value_buffer(&mut self, buffer: &ArrowBuffer) {
- self.value_buffer = buffer.into();
- }
-
- /// Sets the dictionary of this to be `dict`.
- pub fn set_dictionary(&mut self, dict: MutableVector) {
- self.dictionary = Some(Box::new(dict))
- }
-
- /// Clones this into an Arrow [`ArrayData`](arrow::array::ArrayData). Note that the caller of
- /// this method MUST make sure the returned `ArrayData` won't live longer than this vector
- /// itself. Otherwise, dangling pointer may happen.
- ///
- /// # Safety
- ///
- /// This method is highly unsafe since it calls `to_immutable` which leaks raw pointer to the
- /// memory region that are tracked by `ArrowMutableBuffer`. Please see comments on
- /// `to_immutable` buffer to understand the motivation.
- pub fn get_array_data(&mut self) -> ArrayData {
- unsafe {
- let data_type = if let Some(d) = &self.dictionary {
- ArrowDataType::Dictionary(
- Box::new(ArrowDataType::Int32),
- Box::new(d.arrow_type.clone()),
- )
- } else {
- self.arrow_type.clone()
- };
- let mut builder = ArrayData::builder(data_type)
- .len(self.num_values)
- .add_buffer(self.value_buffer.to_arrow());
-
- builder = if self.all_non_null {
- builder.null_count(0)
- } else {
- builder
- .null_bit_buffer(Some(self.validity_buffer.to_arrow()))
- .null_count(self.num_nulls)
- };
-
- if Self::is_binary_type(&self.arrow_type) && self.dictionary.is_none() {
- let child = &mut self.children[0];
- builder = builder.add_buffer(child.value_buffer.to_arrow());
- }
-
- if let Some(d) = &mut self.dictionary {
- builder = builder.add_child_data(d.get_array_data());
- }
-
- builder.build_unchecked()
- }
- }
-
- /// Returns the number of bits it takes to store one element of `arrow_type` in the value buffer
- /// of this vector.
- pub fn bit_width(arrow_type: &ArrowDataType) -> usize {
- match arrow_type {
- ArrowDataType::Boolean => 1,
- ArrowDataType::Int8 => 8,
- ArrowDataType::Int16 => 16,
- ArrowDataType::Int32 | ArrowDataType::Float32 | ArrowDataType::Date32 => 32,
- ArrowDataType::Int64 | ArrowDataType::Float64 | ArrowDataType::Timestamp(_, _) => 64,
- ArrowDataType::FixedSizeBinary(type_length) => *type_length as usize * 8,
- ArrowDataType::Decimal128(..) => 128, // Arrow stores decimal with 16 bytes
- ArrowDataType::Binary | ArrowDataType::Utf8 => 32, // Only count offset size
- dt => panic!("Unsupported Arrow data type: {:?}", dt),
- }
- }
-
- #[inline]
- fn is_binary_type(dt: &ArrowDataType) -> bool {
- matches!(dt, ArrowDataType::Binary | ArrowDataType::Utf8)
- }
-
- #[inline]
- fn should_reset_value_buffer(dt: &ArrowDataType) -> bool {
- // - Boolean type expects have a zeroed value buffer
- // - Decimal may pad buffer with 0xff so we need to clear them before a new batch
- matches!(dt, ArrowDataType::Boolean | ArrowDataType::Decimal128(_, _))
- }
-
- /// Creates an immutable reference from a mutable Arrow buffer `buf`.
- ///
- /// # Safety
- ///
- /// This function is highly unsafe. Please see documentation of the [`to_arrow`] method for
- /// details.
- #[inline]
- unsafe fn to_immutable(buf: &CometBuffer) -> ArrowBuffer {
- buf.to_arrow()
- }
-}
-
-impl ValueGetter for MutableVector {
- default fn value(&self, idx: usize) -> T::Native {
- unsafe {
- let ptr = self.value_buffer.as_ptr() as *const T::Native;
- *ptr.add(idx)
- }
- }
-}
-
-impl ValueGetter for MutableVector {
- fn value(&self, _: usize) -> StringView {
- unimplemented!("'value' on StringType is currently unsupported");
- }
-}
-
-impl ValueGetter for MutableVector {
- fn value(&self, _: usize) -> StringView {
- unimplemented!("'value' on BinaryType is currently unsupported");
- }
-}
-
-impl ValueSetter for MutableVector {
- default fn append_value(&mut self, v: &T::Native) {
- unsafe {
- let ptr = self.value_buffer.as_mut_ptr() as *mut T::Native;
- *ptr.add(self.num_values) = *v;
- }
- self.num_values += 1;
- }
-}
-
-impl ValueSetter for MutableVector {
- fn append_value(&mut self, _: &StringView) {
- unimplemented!("'append_value' on StringType is currently unsupported");
- }
-}
-
-impl ValueSetter for MutableVector {
- fn append_value(&mut self, _: &StringView) {
- unimplemented!("'append_value' on BinaryType is currently unsupported");
- }
-}
-
-#[cfg(test)]
-mod tests {
- use super::*;
- use crate::IntegerType;
-
- #[test]
- fn set_and_get() {
- let mut mv = MutableVector::new(1024, &ArrowDataType::Int32);
-
- for n in 0..100 {
- mv.append_value::(&(n * n));
- }
- assert_eq!(mv.num_values(), 100);
- assert_eq!(mv.num_nulls(), 0);
-
- for n in 0..100 {
- assert_eq!(mv.value::(n) as usize, n * n);
- }
-
- mv.reset();
-
- for n in 0..200 {
- if n % 2 == 0 {
- mv.put_null();
- } else {
- mv.append_value::(&(n * 2));
- }
- }
-
- assert_eq!(mv.num_values(), 200);
- assert_eq!(mv.num_nulls(), 100);
-
- for n in 0..200 {
- if n % 2 == 0 {
- assert!(mv.is_null(n));
- } else {
- assert_eq!(mv.value::(n) as usize, n * 2);
- }
- }
- }
-
- #[test]
- #[should_panic]
- fn set_string_unsupported() {
- let mut mv = MutableVector::new(1024, &ArrowDataType::Utf8);
- let sv = StringView::default();
- mv.append_value::(&sv);
- }
-
- #[test]
- #[should_panic]
- fn get_string_unsupported() {
- let mv = MutableVector::new(1024, &ArrowDataType::Utf8);
- mv.value::(0);
- }
-}
diff --git a/core/src/common/vector.rs b/core/src/common/vector.rs
deleted file mode 100644
index 1afb1e78f..000000000
--- a/core/src/common/vector.rs
+++ /dev/null
@@ -1,523 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use crate::{
- common::{bit, ValueGetter},
- BoolType, DataType, TypeTrait, BITS_PER_BYTE, STRING_VIEW_LEN, STRING_VIEW_PREFIX_LEN,
-};
-use arrow::{
- array::{Array, ArrayRef},
- buffer::{Buffer, MutableBuffer},
- datatypes::DataType as ArrowDataType,
-};
-use arrow_data::ArrayData;
-
-/// A vector that holds elements of plain types (i.e., no nested type such as list, map, struct).
-pub struct PlainVector {
- /// The data type for elements in this vector
- data_type: DataType,
- /// Total number of values in this vector
- num_values: usize,
- /// Total number of nulls in this vector. Must <= `num_values`.
- num_nulls: usize,
- /// The value buffer
- value_buffer: ValueBuffer,
- /// Number of bytes for each element in the vector. For variable length types such as string
- /// and binary, this will be the size of [`StringView`] which is always 16 bytes.
- value_size: usize,
- /// Offsets into buffers
- offset: usize,
- /// The validity buffer. If empty, all values in this vector are not null.
- validity_buffer: Option,
- /// Whether this vector is dictionary encoded
- is_dictionary: bool,
- /// Indices (or dictionary keys) when `is_dictionary` is true. Otherwise, this is always
- /// an empty vector.
- indices: IndexBuffer,
-}
-
-impl ValueGetter for PlainVector {
- default fn value(&self, idx: usize) -> T::Native {
- let offset = self.offset(idx);
- unsafe {
- let ptr = self.value_buffer.as_ptr() as *const T::Native;
- *ptr.add(offset)
- }
- }
-}
-
-impl ValueGetter for PlainVector {
- fn value(&self, idx: usize) -> bool {
- let offset = self.offset(idx);
- unsafe { bit::get_bit_raw(self.value_buffer.as_ptr(), offset) }
- }
-}
-
-impl PlainVector {
- /// Returns the data type of this vector.
- pub fn data_type(&self) -> &DataType {
- &self.data_type
- }
-
- /// Returns the total number of elements in this vector.
- pub fn num_values(&self) -> usize {
- self.num_values
- }
-
- /// Returns the total number of nulls in this vector.
- pub fn num_nulls(&self) -> usize {
- self.num_nulls
- }
-
- /// Whether there is any null in this vector.
- pub fn has_null(&self) -> bool {
- self.num_nulls > 0
- }
-
- /// Whether the element at `idx` is null.
- pub fn is_null(&self, idx: usize) -> bool {
- if let Some(validity_buffer) = &self.validity_buffer {
- unsafe {
- return !bit::get_bit_raw(validity_buffer.as_ptr(), self.offset + idx);
- }
- }
-
- false
- }
-
- #[inline(always)]
- pub fn value(&self, idx: usize) -> T::Native {
- >::value(self, idx)
- }
-
- #[inline(always)]
- fn offset(&self, idx: usize) -> usize {
- let idx = self.offset + idx;
- if self.is_dictionary {
- self.indices.get(idx)
- } else {
- idx
- }
- }
-}
-
-impl From for PlainVector {
- fn from(data: ArrayData) -> Self {
- assert!(!data.buffers().is_empty(), "expected at least one buffer");
- let arrow_dt = data.data_type();
- let dt: DataType = arrow_dt.into();
- let is_dictionary = matches!(arrow_dt, ArrowDataType::Dictionary(_, _));
-
- let mut value_buffers = data.buffers();
- let mut indices = IndexBuffer::empty();
- let validity_buffer = data.nulls().map(|nb| nb.buffer().clone());
-
- if is_dictionary {
- // in case of dictionary data, the dictionary values are stored in child data, while
- // dictionary keys are stored in `value_buffer`.
- assert_eq!(
- data.child_data().len(),
- 1,
- "child data should contain a single array"
- );
- let child_data = &data.child_data()[0];
- indices = IndexBuffer::new(value_buffers[0].clone(), data.len() + data.offset());
- value_buffers = child_data.buffers();
- }
-
- let value_size = dt.kind().type_size() / BITS_PER_BYTE;
- let value_buffer = ValueBuffer::new(&dt, value_buffers.to_vec(), data.len());
-
- Self {
- data_type: dt,
- num_values: data.len(),
- num_nulls: data.null_count(),
- value_buffer,
- value_size,
- offset: data.offset(),
- validity_buffer,
- is_dictionary,
- indices,
- }
- }
-}
-
-impl From for PlainVector {
- fn from(value: ArrayRef) -> Self {
- Self::from(value.into_data())
- }
-}
-
-struct ValueBuffer {
- ptr: *const u8,
- /// Keep the `ptr` alive
- original_buffers: Vec,
-}
-
-impl ValueBuffer {
- pub fn new(dt: &DataType, buffers: Vec, len: usize) -> Self {
- if matches!(dt, DataType::String | DataType::Binary) {
- assert_eq!(
- 2,
- buffers.len(),
- "expected two buffers (offset, value) for string/binary"
- );
-
- let mut string_view_buf = MutableBuffer::from_len_zeroed(len * 16);
- let buf_mut = string_view_buf.as_mut_ptr();
-
- let offsets = buffers[0].as_ptr() as *const i32;
- let values = buffers[1].as_ptr();
-
- let mut dst_offset = 0;
- let mut start = 0;
- unsafe {
- for i in 0..len {
- // StringView format:
- // - length (4 bytes)
- // - first 4 bytes of the string/binary (4 bytes)
- // - pointer to the string/binary (8 bytes)
- let end = *offsets.add(i + 1);
- let len = end - start;
- let value = values.add(start as usize);
- *(buf_mut.add(dst_offset) as *mut i32) = len;
- if len >= STRING_VIEW_PREFIX_LEN as i32 {
- // only store prefix if the string has at least 4 bytes, otherwise, we'll
- // zero pad the bytes.
- std::ptr::copy_nonoverlapping(
- value,
- buf_mut.add(dst_offset + STRING_VIEW_PREFIX_LEN),
- STRING_VIEW_PREFIX_LEN,
- );
- }
- *(buf_mut.add(dst_offset + STRING_VIEW_PREFIX_LEN + 4) as *mut usize) =
- value as usize;
- start = end;
- dst_offset += STRING_VIEW_LEN;
- }
- }
-
- let string_buffer: Buffer = string_view_buf.into();
- let ptr = string_buffer.as_ptr();
-
- Self {
- ptr,
- original_buffers: vec![string_buffer, buffers[1].clone()],
- }
- } else {
- let ptr = buffers[0].as_ptr();
- Self {
- ptr,
- original_buffers: buffers,
- }
- }
- }
-
- /// Returns the raw pointer for the data in this value buffer.
- /// NOTE: caller of this should NOT store the raw pointer to avoid dangling pointers.
- pub fn as_ptr(&self) -> *const u8 {
- self.ptr
- }
-}
-
-struct IndexBuffer {
- ptr: *const u8,
- /// Keep the `ptr` alive.
- buf: Option,
- /// Total number of elements in the index buffer
- len: usize,
-}
-
-impl IndexBuffer {
- pub fn new(buf: Buffer, len: usize) -> Self {
- let ptr = buf.as_ptr();
- Self {
- buf: Some(buf),
- ptr,
- len,
- }
- }
-
- pub fn empty() -> Self {
- Self {
- buf: None,
- ptr: std::ptr::null(),
- len: 0,
- }
- }
-
- #[inline]
- pub fn get(&self, i: usize) -> usize {
- debug_assert!(i < self.len);
- unsafe {
- let ptr = self.ptr as *const i32;
- *ptr.add(i) as usize
- }
- }
-}
-
-#[cfg(test)]
-mod tests {
- use crate::{
- BoolType, ByteType, DataType, DateType, DecimalType, DoubleType, FloatType, IntegerType,
- NativeEqual, ShortType, StringType, TimestampType, TypeTrait, STRING_VIEW_PREFIX_LEN,
- };
-
- use crate::common::vector::PlainVector;
- use arrow::{
- array::{
- Array, BooleanArray, Date32Array, Decimal128Array, Float32Array, Float64Array,
- Int16Array, Int32Array, Int8Array, StringArray,
- },
- buffer::Buffer,
- datatypes::{DataType as ArrowDataType, ToByteSlice},
- };
- use arrow_array::TimestampMicrosecondArray;
- use arrow_data::ArrayData;
-
- #[test]
- fn primitive_no_null() {
- let arr = Int32Array::from(vec![0, 1, 2, 3, 4]);
- let vector = PlainVector::from(arr.into_data());
-
- assert_eq!(5, vector.num_values());
- assert_eq!(0, vector.num_nulls());
- assert_eq!(4, vector.value_size);
- assert!(vector.validity_buffer.is_none());
-
- for i in 0..5 {
- assert!(!vector.is_null(i));
- assert_eq!(i as i32, vector.value::(i))
- }
- }
-
- fn check_answer(expected: &[Option], actual: &PlainVector) {
- assert_eq!(expected.len(), actual.num_values());
- let nulls = expected
- .iter()
- .filter(|v| v.is_none())
- .collect::>>();
- assert_eq!(nulls.len(), actual.num_nulls());
-
- for i in 0..expected.len() {
- if let Some(v) = expected[i] {
- assert!(!actual.is_null(i));
- assert!(v.is_equal(&actual.value::(i)));
- } else {
- assert!(actual.is_null(i));
- }
- }
- }
-
- #[test]
- fn primitive_with_nulls() {
- let data = vec![Some(0), None, Some(2), None, Some(4)];
- let arr = TimestampMicrosecondArray::from(data.clone());
- let vector = PlainVector::from(arr.into_data());
-
- check_answer::(&data, &vector);
- }
-
- #[test]
- fn primitive_with_offsets_nulls() {
- let arr = Int32Array::from(vec![Some(0), None, Some(2), None, Some(4), None, Some(7)]);
- let data = arr.into_data();
- let vector = PlainVector::from(data.slice(2, 3));
-
- assert_eq!(3, vector.num_values());
- assert_eq!(1, vector.num_nulls());
-
- for i in 0..2 {
- if i % 2 == 0 {
- assert!(!vector.is_null(i));
- assert_eq!((i + 2) as i32, vector.value::(i));
- } else {
- assert!(vector.is_null(i));
- }
- }
- }
-
- #[test]
- fn primitive_dictionary() {
- let value_data = ArrayData::builder(ArrowDataType::Int8)
- .len(8)
- .add_buffer(Buffer::from(
- &[10_i8, 11, 12, 13, 14, 15, 16, 17].to_byte_slice(),
- ))
- .build()
- .unwrap();
-
- // Construct a buffer for value offsets, for the nested array:
- let keys = Buffer::from(&[2_i32, 3, 4].to_byte_slice());
-
- // Construct a dictionary array from the above two
- let key_type = ArrowDataType::Int32;
- let value_type = ArrowDataType::Int8;
- let dict_data_type = ArrowDataType::Dictionary(Box::new(key_type), Box::new(value_type));
- let dict_data = ArrayData::builder(dict_data_type)
- .len(3)
- .add_buffer(keys)
- .add_child_data(value_data)
- .build()
- .unwrap();
-
- let vector = PlainVector::from(dict_data);
-
- assert_eq!(DataType::Byte, *vector.data_type());
- assert_eq!(3, vector.num_values());
- assert_eq!(0, vector.num_nulls());
- assert!(!vector.has_null());
- assert_eq!(12, vector.value::(0));
- assert_eq!(13, vector.value::(1));
- assert_eq!(14, vector.value::(2));
- }
-
- #[test]
- fn bools() {
- let data = vec![Some(true), None, Some(false), None, Some(true)];
- let arr = BooleanArray::from(data.clone());
- let vector = PlainVector::from(arr.into_data());
-
- check_answer::(&data, &vector);
- }
-
- #[test]
- fn bytes() {
- let data = vec![Some(4_i8), None, None, Some(5_i8), Some(7_i8)];
- let arr = Int8Array::from(data.clone());
- let vector = PlainVector::from(arr.into_data());
-
- check_answer::(&data, &vector);
- }
-
- #[test]
- fn shorts() {
- let data = vec![Some(4_i16), None, None, Some(-40_i16), Some(-3_i16)];
- let arr = Int16Array::from(data.clone());
- let vector = PlainVector::from(arr.into_data());
-
- check_answer::(&data, &vector);
- }
-
- #[test]
- fn floats() {
- let data = vec![
- Some(4.0_f32),
- Some(-0.0_f32),
- Some(-3.0_f32),
- Some(0.0_f32),
- Some(std::f32::consts::PI),
- ];
- let arr = Float32Array::from(data.clone());
- let vector = PlainVector::from(arr.into_data());
-
- check_answer::(&data, &vector);
- }
-
- #[test]
- fn doubles() {
- let data = vec![
- None,
- Some(std::f64::consts::PI),
- Some(4.0_f64),
- Some(f64::NAN),
- ];
- let arr = Float64Array::from(data.clone());
- let vector = PlainVector::from(arr.into_data());
-
- check_answer::(&data, &vector);
- }
-
- #[test]
- fn decimals() {
- let data = vec![Some(1_i128), None, None, Some(i128::MAX)];
- let arr = Decimal128Array::from(data.clone());
- let vector = PlainVector::from(arr.into_data());
-
- check_answer::(&data, &vector);
- }
-
- #[test]
- fn timestamps() {
- // 1: 00:00:00.001
- // 37800005: 10:30:00.005
- // 86399210: 23:59:59.210
- let data = vec![Some(1), None, Some(37_800_005), Some(86_399_210)];
- let arr = TimestampMicrosecondArray::from(data.clone());
- let vector = PlainVector::from(arr.into_data());
-
- check_answer::(&data, &vector);
- }
-
- #[test]
- fn dates() {
- let data = vec![Some(100), None, Some(200), None];
- let arr = Date32Array::from(data.clone());
- let vector = PlainVector::from(arr.into_data());
-
- check_answer::(&data, &vector);
- }
-
- #[test]
- fn string_no_nulls() {
- let values: Vec<&str> = vec!["hello", "", "comet"];
- let arr = StringArray::from(values.clone());
-
- let vector = PlainVector::from(arr.into_data());
- assert_eq!(3, vector.num_values());
- assert_eq!(0, vector.num_nulls());
-
- for i in 0..values.len() {
- let expected = values[i];
- let actual = vector.value::(i);
- assert_eq!(expected.len(), actual.len as usize);
- if expected.len() >= STRING_VIEW_PREFIX_LEN {
- assert_eq!(
- &expected[..STRING_VIEW_PREFIX_LEN],
- String::from_utf8_lossy(&actual.prefix)
- );
- }
- assert_eq!(expected, actual.as_utf8_str());
- }
- }
-
- #[test]
- fn string_with_nulls() {
- let data = [Some("hello"), None, Some("comet")];
- let arr = StringArray::from(data.to_vec().clone());
-
- let vector = PlainVector::from(arr.into_data());
- assert_eq!(3, vector.num_values());
- assert_eq!(1, vector.num_nulls());
-
- for i in 0..data.len() {
- if data[i].is_none() {
- assert!(vector.is_null(i));
- } else {
- let expected = data[i].unwrap();
- let actual = vector.value::(i);
- if expected.len() >= STRING_VIEW_PREFIX_LEN {
- assert_eq!(
- &expected[..STRING_VIEW_PREFIX_LEN],
- String::from_utf8_lossy(&actual.prefix)
- );
- }
- assert_eq!(expected, actual.as_utf8_str());
- }
- }
- }
-}
diff --git a/core/src/errors.rs b/core/src/errors.rs
index 04a1629d5..af4fd2697 100644
--- a/core/src/errors.rs
+++ b/core/src/errors.rs
@@ -88,6 +88,9 @@ pub enum CometError {
to_type: String,
},
+ #[error("[ARITHMETIC_OVERFLOW] {from_type} overflow. If necessary set \"spark.sql.ansi.enabled\" to \"false\" to bypass this error.")]
+ ArithmeticOverflow { from_type: String },
+
#[error(transparent)]
Arrow {
#[from]
diff --git a/core/src/execution/datafusion/expressions/mod.rs b/core/src/execution/datafusion/expressions/mod.rs
index 9db4b65b3..05230b4c2 100644
--- a/core/src/execution/datafusion/expressions/mod.rs
+++ b/core/src/execution/datafusion/expressions/mod.rs
@@ -29,11 +29,13 @@ pub mod avg_decimal;
pub mod bloom_filter_might_contain;
pub mod correlation;
pub mod covariance;
+pub mod negative;
pub mod stats;
pub mod stddev;
pub mod strings;
pub mod subquery;
pub mod sum_decimal;
pub mod temporal;
+pub mod unbound;
mod utils;
pub mod variance;
diff --git a/core/src/execution/datafusion/expressions/negative.rs b/core/src/execution/datafusion/expressions/negative.rs
new file mode 100644
index 000000000..e7aa2ac64
--- /dev/null
+++ b/core/src/execution/datafusion/expressions/negative.rs
@@ -0,0 +1,270 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::errors::CometError;
+use arrow::{compute::kernels::numeric::neg_wrapping, datatypes::IntervalDayTimeType};
+use arrow_array::RecordBatch;
+use arrow_schema::{DataType, Schema};
+use datafusion::{
+ logical_expr::{interval_arithmetic::Interval, ColumnarValue},
+ physical_expr::PhysicalExpr,
+};
+use datafusion_common::{Result, ScalarValue};
+use datafusion_physical_expr::{
+ aggregate::utils::down_cast_any_ref, sort_properties::SortProperties,
+};
+use std::{
+ any::Any,
+ hash::{Hash, Hasher},
+ sync::Arc,
+};
+
+pub fn create_negate_expr(
+ expr: Arc,
+ fail_on_error: bool,
+) -> Result, CometError> {
+ Ok(Arc::new(NegativeExpr::new(expr, fail_on_error)))
+}
+
+/// Negative expression
+#[derive(Debug, Hash)]
+pub struct NegativeExpr {
+ /// Input expression
+ arg: Arc,
+ fail_on_error: bool,
+}
+
+fn arithmetic_overflow_error(from_type: &str) -> CometError {
+ CometError::ArithmeticOverflow {
+ from_type: from_type.to_string(),
+ }
+}
+
+macro_rules! check_overflow {
+ ($array:expr, $array_type:ty, $min_val:expr, $type_name:expr) => {{
+ let typed_array = $array
+ .as_any()
+ .downcast_ref::<$array_type>()
+ .expect(concat!(stringify!($array_type), " expected"));
+ for i in 0..typed_array.len() {
+ if typed_array.value(i) == $min_val {
+ if $type_name == "byte" || $type_name == "short" {
+ let value = typed_array.value(i).to_string() + " caused";
+ return Err(arithmetic_overflow_error(value.as_str()).into());
+ }
+ return Err(arithmetic_overflow_error($type_name).into());
+ }
+ }
+ }};
+}
+
+impl NegativeExpr {
+ /// Create new not expression
+ pub fn new(arg: Arc, fail_on_error: bool) -> Self {
+ Self { arg, fail_on_error }
+ }
+
+ /// Get the input expression
+ pub fn arg(&self) -> &Arc {
+ &self.arg
+ }
+}
+
+impl std::fmt::Display for NegativeExpr {
+ fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+ write!(f, "(- {})", self.arg)
+ }
+}
+
+impl PhysicalExpr for NegativeExpr {
+ /// Return a reference to Any that can be used for downcasting
+ fn as_any(&self) -> &dyn Any {
+ self
+ }
+
+ fn data_type(&self, input_schema: &Schema) -> Result {
+ self.arg.data_type(input_schema)
+ }
+
+ fn nullable(&self, input_schema: &Schema) -> Result {
+ self.arg.nullable(input_schema)
+ }
+
+ fn evaluate(&self, batch: &RecordBatch) -> Result {
+ let arg = self.arg.evaluate(batch)?;
+
+ // overflow checks only apply in ANSI mode
+ // datatypes supported are byte, short, integer, long, float, interval
+ match arg {
+ ColumnarValue::Array(array) => {
+ if self.fail_on_error {
+ match array.data_type() {
+ DataType::Int8 => {
+ check_overflow!(array, arrow::array::Int8Array, i8::MIN, "byte")
+ }
+ DataType::Int16 => {
+ check_overflow!(array, arrow::array::Int16Array, i16::MIN, "short")
+ }
+ DataType::Int32 => {
+ check_overflow!(array, arrow::array::Int32Array, i32::MIN, "integer")
+ }
+ DataType::Int64 => {
+ check_overflow!(array, arrow::array::Int64Array, i64::MIN, "long")
+ }
+ DataType::Interval(value) => match value {
+ arrow::datatypes::IntervalUnit::YearMonth => check_overflow!(
+ array,
+ arrow::array::IntervalYearMonthArray,
+ i32::MIN,
+ "interval"
+ ),
+ arrow::datatypes::IntervalUnit::DayTime => check_overflow!(
+ array,
+ arrow::array::IntervalDayTimeArray,
+ i64::MIN,
+ "interval"
+ ),
+ arrow::datatypes::IntervalUnit::MonthDayNano => {
+ // Overflow checks are not supported
+ }
+ },
+ _ => {
+ // Overflow checks are not supported for other datatypes
+ }
+ }
+ }
+ let result = neg_wrapping(array.as_ref())?;
+ Ok(ColumnarValue::Array(result))
+ }
+ ColumnarValue::Scalar(scalar) => {
+ if self.fail_on_error {
+ match scalar {
+ ScalarValue::Int8(value) => {
+ if value == Some(i8::MIN) {
+ return Err(arithmetic_overflow_error(" caused").into());
+ }
+ }
+ ScalarValue::Int16(value) => {
+ if value == Some(i16::MIN) {
+ return Err(arithmetic_overflow_error(" caused").into());
+ }
+ }
+ ScalarValue::Int32(value) => {
+ if value == Some(i32::MIN) {
+ return Err(arithmetic_overflow_error("integer").into());
+ }
+ }
+ ScalarValue::Int64(value) => {
+ if value == Some(i64::MIN) {
+ return Err(arithmetic_overflow_error("long").into());
+ }
+ }
+ ScalarValue::IntervalDayTime(value) => {
+ let (days, ms) =
+ IntervalDayTimeType::to_parts(value.unwrap_or_default());
+ if days == i32::MIN || ms == i32::MIN {
+ return Err(arithmetic_overflow_error("interval").into());
+ }
+ }
+ ScalarValue::IntervalYearMonth(value) => {
+ if value == Some(i32::MIN) {
+ return Err(arithmetic_overflow_error("interval").into());
+ }
+ }
+ _ => {
+ // Overflow checks are not supported for other datatypes
+ }
+ }
+ }
+ Ok(ColumnarValue::Scalar((scalar.arithmetic_negate())?))
+ }
+ }
+ }
+
+ fn children(&self) -> Vec> {
+ vec![self.arg.clone()]
+ }
+
+ fn with_new_children(
+ self: Arc,
+ children: Vec>,
+ ) -> Result> {
+ Ok(Arc::new(NegativeExpr::new(
+ children[0].clone(),
+ self.fail_on_error,
+ )))
+ }
+
+ fn dyn_hash(&self, state: &mut dyn Hasher) {
+ let mut s = state;
+ self.hash(&mut s);
+ }
+
+ /// Given the child interval of a NegativeExpr, it calculates the NegativeExpr's interval.
+ /// It replaces the upper and lower bounds after multiplying them with -1.
+ /// Ex: `(a, b]` => `[-b, -a)`
+ fn evaluate_bounds(&self, children: &[&Interval]) -> Result {
+ Interval::try_new(
+ children[0].upper().arithmetic_negate()?,
+ children[0].lower().arithmetic_negate()?,
+ )
+ }
+
+ /// Returns a new [`Interval`] of a NegativeExpr that has the existing `interval` given that
+ /// given the input interval is known to be `children`.
+ fn propagate_constraints(
+ &self,
+ interval: &Interval,
+ children: &[&Interval],
+ ) -> Result