From 65652297114a1d393434ecbe655fc07d0c281ba9 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Tue, 4 Jun 2024 07:35:24 -0600 Subject: [PATCH 1/2] chore: Switch to stable Rust (#505) * initial commit * change github workflow * try to use stable rustfmt * update CI * lint * CI * clippy * clippy * ignore some tests * remove more nightly references * add some assertions to make code safe * revert a change * use Rust 1.77 * specify rust version 1.77.2 * fix CI --------- Co-authored-by: Chao Sun --- .github/actions/setup-builder/action.yaml | 2 +- .../actions/setup-macos-builder/action.yaml | 2 +- .github/workflows/benchmark-tpch.yml | 2 +- .github/workflows/benchmark.yml | 2 +- .github/workflows/pr_build.yml | 2 +- .github/workflows/spark_sql_test.yml | 2 +- .github/workflows/spark_sql_test_ansi.yml | 2 +- Makefile | 16 +- core/Cargo.lock | 354 +++++------- core/Cargo.toml | 1 - core/rustfmt.toml | 8 - core/src/common/mod.rs | 5 - core/src/common/mutable_vector.rs | 409 -------------- core/src/common/vector.rs | 523 ------------------ core/src/execution/datafusion/planner.rs | 15 +- core/src/execution/datafusion/spark_hash.rs | 2 +- core/src/execution/shuffle/row.rs | 6 +- core/src/lib.rs | 6 - rust-toolchain | 2 +- .../apache/comet/CometExpressionSuite.scala | 4 +- 20 files changed, 180 insertions(+), 1185 deletions(-) delete mode 100644 core/src/common/mutable_vector.rs delete mode 100644 core/src/common/vector.rs diff --git a/.github/actions/setup-builder/action.yaml b/.github/actions/setup-builder/action.yaml index 6fa716492..0787d9eed 100644 --- a/.github/actions/setup-builder/action.yaml +++ b/.github/actions/setup-builder/action.yaml @@ -21,7 +21,7 @@ inputs: rust-version: description: 'version of rust to install (e.g. nightly)' required: true - default: 'nightly' + default: 'stable' jdk-version: description: 'jdk version to install (e.g., 17)' required: true diff --git a/.github/actions/setup-macos-builder/action.yaml b/.github/actions/setup-macos-builder/action.yaml index cc1b63170..7c1c8b522 100644 --- a/.github/actions/setup-macos-builder/action.yaml +++ b/.github/actions/setup-macos-builder/action.yaml @@ -21,7 +21,7 @@ inputs: rust-version: description: 'version of rust to install (e.g. nightly)' required: true - default: 'nightly' + default: 'stable' jdk-version: description: 'jdk version to install (e.g., 17)' required: true diff --git a/.github/workflows/benchmark-tpch.yml b/.github/workflows/benchmark-tpch.yml index fbf5cfdb5..f4c547a8f 100644 --- a/.github/workflows/benchmark-tpch.yml +++ b/.github/workflows/benchmark-tpch.yml @@ -37,7 +37,7 @@ on: workflow_dispatch: env: - RUST_VERSION: nightly + RUST_VERSION: stable jobs: prepare: diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index e9767f767..023b6a685 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -37,7 +37,7 @@ on: workflow_dispatch: env: - RUST_VERSION: nightly + RUST_VERSION: stable jobs: prepare: diff --git a/.github/workflows/pr_build.yml b/.github/workflows/pr_build.yml index 7de198a47..503978fc5 100644 --- a/.github/workflows/pr_build.yml +++ b/.github/workflows/pr_build.yml @@ -37,7 +37,7 @@ on: workflow_dispatch: env: - RUST_VERSION: nightly + RUST_VERSION: stable jobs: linux-test: diff --git a/.github/workflows/spark_sql_test.yml b/.github/workflows/spark_sql_test.yml index 997136ded..352e0ecbe 100644 --- a/.github/workflows/spark_sql_test.yml +++ b/.github/workflows/spark_sql_test.yml @@ -37,7 +37,7 @@ on: workflow_dispatch: env: - RUST_VERSION: nightly + RUST_VERSION: stable jobs: spark-sql-catalyst: diff --git a/.github/workflows/spark_sql_test_ansi.yml b/.github/workflows/spark_sql_test_ansi.yml index 5c5d28589..337e59efe 100644 --- a/.github/workflows/spark_sql_test_ansi.yml +++ b/.github/workflows/spark_sql_test_ansi.yml @@ -37,7 +37,7 @@ on: workflow_dispatch: env: - RUST_VERSION: nightly + RUST_VERSION: stable jobs: spark-sql-catalyst: diff --git a/Makefile b/Makefile index f20687cda..b9b9707ba 100644 --- a/Makefile +++ b/Makefile @@ -44,10 +44,10 @@ format: core-amd64: rustup target add x86_64-apple-darwin - cd core && RUSTFLAGS="-Ctarget-cpu=skylake -Ctarget-feature=-prefer-256-bit" CC=o64-clang CXX=o64-clang++ cargo build --target x86_64-apple-darwin --features nightly --release + cd core && RUSTFLAGS="-Ctarget-cpu=skylake -Ctarget-feature=-prefer-256-bit" CC=o64-clang CXX=o64-clang++ cargo build --target x86_64-apple-darwin --release mkdir -p common/target/classes/org/apache/comet/darwin/x86_64 cp core/target/x86_64-apple-darwin/release/libcomet.dylib common/target/classes/org/apache/comet/darwin/x86_64 - cd core && RUSTFLAGS="-Ctarget-cpu=haswell -Ctarget-feature=-prefer-256-bit" cargo build --features nightly --release + cd core && RUSTFLAGS="-Ctarget-cpu=haswell -Ctarget-feature=-prefer-256-bit" cargo build --release mkdir -p common/target/classes/org/apache/comet/linux/amd64 cp core/target/release/libcomet.so common/target/classes/org/apache/comet/linux/amd64 jar -cf common/target/comet-native-x86_64.jar \ @@ -57,10 +57,10 @@ core-amd64: core-arm64: rustup target add aarch64-apple-darwin - cd core && RUSTFLAGS="-Ctarget-cpu=apple-m1" CC=arm64-apple-darwin21.4-clang CXX=arm64-apple-darwin21.4-clang++ CARGO_FEATURE_NEON=1 cargo build --target aarch64-apple-darwin --features nightly --release + cd core && RUSTFLAGS="-Ctarget-cpu=apple-m1" CC=arm64-apple-darwin21.4-clang CXX=arm64-apple-darwin21.4-clang++ CARGO_FEATURE_NEON=1 cargo build --target aarch64-apple-darwin --release mkdir -p common/target/classes/org/apache/comet/darwin/aarch64 cp core/target/aarch64-apple-darwin/release/libcomet.dylib common/target/classes/org/apache/comet/darwin/aarch64 - cd core && RUSTFLAGS="-Ctarget-cpu=native" cargo build --features nightly --release + cd core && RUSTFLAGS="-Ctarget-cpu=native" cargo build --release mkdir -p common/target/classes/org/apache/comet/linux/aarch64 cp core/target/release/libcomet.so common/target/classes/org/apache/comet/linux/aarch64 jar -cf common/target/comet-native-aarch64.jar \ @@ -70,12 +70,12 @@ core-arm64: release-linux: clean rustup target add aarch64-apple-darwin x86_64-apple-darwin - cd core && RUSTFLAGS="-Ctarget-cpu=apple-m1" CC=arm64-apple-darwin21.4-clang CXX=arm64-apple-darwin21.4-clang++ CARGO_FEATURE_NEON=1 cargo build --target aarch64-apple-darwin --features nightly --release - cd core && RUSTFLAGS="-Ctarget-cpu=skylake -Ctarget-feature=-prefer-256-bit" CC=o64-clang CXX=o64-clang++ cargo build --target x86_64-apple-darwin --features nightly --release - cd core && RUSTFLAGS="-Ctarget-cpu=native -Ctarget-feature=-prefer-256-bit" cargo build --features nightly --release + cd core && RUSTFLAGS="-Ctarget-cpu=apple-m1" CC=arm64-apple-darwin21.4-clang CXX=arm64-apple-darwin21.4-clang++ CARGO_FEATURE_NEON=1 cargo build --target aarch64-apple-darwin --release + cd core && RUSTFLAGS="-Ctarget-cpu=skylake -Ctarget-feature=-prefer-256-bit" CC=o64-clang CXX=o64-clang++ cargo build --target x86_64-apple-darwin --release + cd core && RUSTFLAGS="-Ctarget-cpu=native -Ctarget-feature=-prefer-256-bit" cargo build --release ./mvnw install -Prelease -DskipTests $(PROFILES) release: - cd core && RUSTFLAGS="-Ctarget-cpu=native" cargo build --features nightly --release + cd core && RUSTFLAGS="-Ctarget-cpu=native" cargo build --release ./mvnw install -Prelease -DskipTests $(PROFILES) benchmark-%: clean release cd spark && COMET_CONF_DIR=$(shell pwd)/conf MAVEN_OPTS='-Xmx20g' ../mvnw exec:java -Dexec.mainClass="$*" -Dexec.classpathScope="test" -Dexec.cleanupDaemonThreads="false" -Dexec.args="$(filter-out $@,$(MAKECMDGOALS))" $(PROFILES) diff --git a/core/Cargo.lock b/core/Cargo.lock index 105bcaf7c..6c8d54ab7 100644 --- a/core/Cargo.lock +++ b/core/Cargo.lock @@ -4,9 +4,9 @@ version = 3 [[package]] name = "addr2line" -version = "0.21.0" +version = "0.22.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a30b2e23b9e17a9f90641c7ab1549cd9b44f296d3ccbf309d2863cfe398a0cb" +checksum = "6e4503c46a5c0c7844e948c9a4d6acd9f50cccb4de1c48eb9e291ea17470c678" dependencies = [ "gimli", ] @@ -84,15 +84,15 @@ checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" [[package]] name = "anstyle" -version = "1.0.6" +version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8901269c6307e8d93993578286ac0edf7f195079ffff5ebdeea6a59ffb7e36bc" +checksum = "038dfcf04a5feb68e9c60b21c9625a54c2c0616e79b72b0fd87075a056ae1d1b" [[package]] name = "anyhow" -version = "1.0.82" +version = "1.0.86" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f538837af36e6f6a9be0faa67f9a314f8119e4e4b5867c6ab40ed60360142519" +checksum = "b3d1d046238990b9cf5bcde22a3fb3584ee5cf65fb2765f454ed428c7a0063da" [[package]] name = "arc-swap" @@ -333,7 +333,7 @@ checksum = "c6fa2087f2753a7da8cc1c0dbfcf89579dd57458e36769de5ac750b4671737ca" dependencies = [ "proc-macro2", "quote", - "syn 2.0.59", + "syn 2.0.66", ] [[package]] @@ -347,15 +347,15 @@ dependencies = [ [[package]] name = "autocfg" -version = "1.2.0" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1fdabc7756949593fe60f30ec81974b613357de856987752631dea1e3394c80" +checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" [[package]] name = "backtrace" -version = "0.3.71" +version = "0.3.72" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26b05800d2e817c8b3b4b54abd461726265fa9789ae34330622f2db9ee696f9d" +checksum = "17c6a35df3749d2e8bb1b7b21a976d82b15548788d2735b9d82f329268f71a11" dependencies = [ "addr2line", "cc", @@ -368,9 +368,9 @@ dependencies = [ [[package]] name = "base64" -version = "0.22.0" +version = "0.22.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9475866fec1451be56a3c2400fd081ff546538961565ccb5b7142cbd22bc7a51" +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" [[package]] name = "bitflags" @@ -444,9 +444,9 @@ checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" [[package]] name = "bytemuck" -version = "1.15.0" +version = "1.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d6d68c57235a3a081186990eca2867354726650f42f7516ca50c28d6281fd15" +checksum = "78834c15cb5d5efe3452d58b1e8ba890dd62d21907f867f383358198e56ebca5" [[package]] name = "byteorder" @@ -468,12 +468,13 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" [[package]] name = "cc" -version = "1.0.94" +version = "1.0.98" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17f6e324229dc011159fcc089755d1e2e216a90d43a7dea6853ca740b84f35e7" +checksum = "41c270e7540d725e65ac7f1b212ac8ce349719624d7bcff99f8e2e488e8cf03f" dependencies = [ "jobserver", "libc", + "once_cell", ] [[package]] @@ -625,7 +626,7 @@ dependencies = [ "parquet-format", "paste", "pprof", - "prost 0.12.4", + "prost 0.12.6", "prost-build", "rand", "regex", @@ -705,9 +706,9 @@ dependencies = [ [[package]] name = "crc32fast" -version = "1.4.0" +version = "1.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3855a8a784b474f333699ef2bbca9db2c4a1f6d9088a90a2d25b1eb53111eaa" +checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3" dependencies = [ "cfg-if", ] @@ -769,9 +770,9 @@ dependencies = [ [[package]] name = "crossbeam-utils" -version = "0.8.19" +version = "0.8.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "248e3bacc7dc6baa3b21e405ee045c3047101a49145e7e9eca583ab4c2ca5345" +checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" [[package]] name = "crunchy" @@ -1093,9 +1094,9 @@ checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" [[package]] name = "either" -version = "1.11.0" +version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a47c1c47d2f5964e29c61246e81db715514cd532db6b5116a25ea3c03d6780a2" +checksum = "3dca9240753cf90908d7e4aac30f630662b02aebaa1b58a3cadabdb23385b58b" [[package]] name = "equivalent" @@ -1105,9 +1106,9 @@ checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" [[package]] name = "errno" -version = "0.3.8" +version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a258e46cdc063eb8519c00b9fc845fc47bcfca4130e2f08e88665ceda8474245" +checksum = "534c5cf6194dfab3db3242765c03bbe257cf92f22b38f6bc0c58d59108a820ba" dependencies = [ "libc", "windows-sys 0.52.0", @@ -1115,9 +1116,9 @@ dependencies = [ [[package]] name = "fastrand" -version = "2.0.2" +version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "658bd65b1cf4c852a3cc96f18a8ce7b5640f6b703f905c7d74532294c2a63984" +checksum = "9fc0510504f03c51ada170672ac806f1f105a88aa97a5281117e1ddc3368e51a" [[package]] name = "findshlibs" @@ -1149,9 +1150,9 @@ dependencies = [ [[package]] name = "flate2" -version = "1.0.28" +version = "1.0.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46303f565772937ffe1d394a4fac6f411c6013172fadde9dcdb1e147a086940e" +checksum = "5f54427cfd1c7829e2a139fcefea601bf088ebca651d2bf53ebc600eac295dae" dependencies = [ "crc32fast", "miniz_oxide", @@ -1228,7 +1229,7 @@ checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" dependencies = [ "proc-macro2", "quote", - "syn 2.0.59", + "syn 2.0.66", ] [[package]] @@ -1273,9 +1274,9 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.2.14" +version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94b22e06ecb0110981051723910cbf0b5f5e09a2062dd7663334ee79a9d1286c" +checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" dependencies = [ "cfg-if", "libc", @@ -1284,9 +1285,9 @@ dependencies = [ [[package]] name = "gimli" -version = "0.28.1" +version = "0.29.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253" +checksum = "40ecd4077b5ae9fd2e9e169b102c6c330d0605168eb0e8bf79952b256dbefffd" [[package]] name = "glob" @@ -1312,9 +1313,9 @@ dependencies = [ [[package]] name = "hashbrown" -version = "0.14.3" +version = "0.14.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" dependencies = [ "ahash", "allocator-api2", @@ -1335,6 +1336,12 @@ version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + [[package]] name = "hermit-abi" version = "0.3.9" @@ -1425,9 +1432,9 @@ dependencies = [ [[package]] name = "instant" -version = "0.1.12" +version = "0.1.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c" +checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222" dependencies = [ "cfg-if", "js-sys", @@ -1493,9 +1500,9 @@ checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" [[package]] name = "java-locator" -version = "0.1.5" +version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90003f2fd9c52f212c21d8520f1128da0080bad6fff16b68fe6e7f2f0c3780c2" +checksum = "d2abecabd9961c5e01405a6426687fcf1bd94a269927137e4c3cc1a7419b93fd" dependencies = [ "glob", "lazy_static", @@ -1527,9 +1534,9 @@ checksum = "8eaf4bc02d17cbdd7ff4c7438cafcdf7fb9a4613313ad11b4f8fefe7d3fa0130" [[package]] name = "jobserver" -version = "0.1.30" +version = "0.1.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "685a7d121ee3f65ae4fddd72b25a04bb36b6af81bc0828f7d5434c0fe60fa3a2" +checksum = "d2b099aaa34a9751c5bf0878add70444e1ed2dd73f347be99003d4577277de6e" dependencies = [ "libc", ] @@ -1615,9 +1622,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.153" +version = "0.2.155" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd" +checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" [[package]] name = "libloading" @@ -1637,9 +1644,9 @@ checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058" [[package]] name = "libmimalloc-sys" -version = "0.1.35" +version = "0.1.38" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3979b5c37ece694f1f5e51e7ecc871fdb0f517ed04ee45f88d15d6d553cb9664" +checksum = "0e7bb23d733dfcc8af652a78b7bf232f0e967710d044732185e561e47c0336b6" dependencies = [ "cc", "libc", @@ -1647,15 +1654,15 @@ dependencies = [ [[package]] name = "linux-raw-sys" -version = "0.4.13" +version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01cda141df6706de531b6c46c3a33ecca755538219bd484262fa09410c13539c" +checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" [[package]] name = "lock_api" -version = "0.4.11" +version = "0.4.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c168f8615b12bc01f9c17e2eb0cc07dcae1940121185446edc3744920e8ef45" +checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17" dependencies = [ "autocfg", "scopeguard", @@ -1760,18 +1767,18 @@ dependencies = [ [[package]] name = "mimalloc" -version = "0.1.39" +version = "0.1.42" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa01922b5ea280a911e323e4d2fd24b7fe5cc4042e0d2cda3c40775cdc4bdc9c" +checksum = "e9186d86b79b52f4a77af65604b51225e8db1d6ee7e3f41aec1e40829c71a176" dependencies = [ "libmimalloc-sys", ] [[package]] name = "miniz_oxide" -version = "0.7.2" +version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d811f3e15f28568be3407c8e7fdb6514c1cda3cb30683f15b6a1a1dc4ea14a7" +checksum = "87dfd01fe195c66b572b37921ad8803d010623c0aca821bea2302239d155cdae" dependencies = [ "adler", ] @@ -1795,9 +1802,9 @@ dependencies = [ [[package]] name = "num" -version = "0.4.2" +version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3135b08af27d103b0a51f2ae0f8632117b7b185ccf931445affa8df530576a41" +checksum = "35bd024e8b2ff75562e5f34e7f4905839deb4b22955ef5e73d2fea1b9813cb23" dependencies = [ "num-bigint", "num-complex", @@ -1809,20 +1816,19 @@ dependencies = [ [[package]] name = "num-bigint" -version = "0.4.4" +version = "0.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "608e7659b5c3d7cba262d894801b9ec9d00de989e8a82bd4bef91d08da45cdc0" +checksum = "c165a9ab64cf766f73521c0dd2cfdff64f488b8f0b3e621face3462d3db536d7" dependencies = [ - "autocfg", "num-integer", "num-traits", ] [[package]] name = "num-complex" -version = "0.4.5" +version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23c6602fda94a57c990fe0df199a035d83576b496aa29f4e634a8ac6004e68a6" +checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495" dependencies = [ "num-traits", ] @@ -1848,9 +1854,9 @@ dependencies = [ [[package]] name = "num-iter" -version = "0.1.44" +version = "0.1.45" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d869c01cc0c455284163fd0092f1f93835385ccab5a98a0dcc497b2f8bf055a9" +checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf" dependencies = [ "autocfg", "num-integer", @@ -1859,11 +1865,10 @@ dependencies = [ [[package]] name = "num-rational" -version = "0.4.1" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0638a1c9d0a3c0914158145bc76cff373a75a627e6ecbfb71cbe6f453a5a19b0" +checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824" dependencies = [ - "autocfg", "num-bigint", "num-integer", "num-traits", @@ -1871,9 +1876,9 @@ dependencies = [ [[package]] name = "num-traits" -version = "0.2.18" +version = "0.2.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da0df0e5185db44f69b44f26786fe401b6c293d1907744beaa7fa62b2e5a517a" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" dependencies = [ "autocfg", "libm", @@ -1891,9 +1896,9 @@ dependencies = [ [[package]] name = "object" -version = "0.32.2" +version = "0.35.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a6a622008b6e321afc04970976f62ee297fdbaa6f95318ca343e3eebb9648441" +checksum = "b8ec7ab813848ba4522158d5517a6093db1ded27575b070f4177b8d12b41db5e" dependencies = [ "memchr", ] @@ -1951,9 +1956,9 @@ dependencies = [ [[package]] name = "parking_lot" -version = "0.12.1" +version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f" +checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27" dependencies = [ "lock_api", "parking_lot_core", @@ -1961,15 +1966,15 @@ dependencies = [ [[package]] name = "parking_lot_core" -version = "0.9.9" +version = "0.9.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c42a9226546d68acdd9c0a280d17ce19bfe27a46bf68784e4066115788d008e" +checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8" dependencies = [ "cfg-if", "libc", "redox_syscall", "smallvec", - "windows-targets 0.48.5", + "windows-targets 0.52.5", ] [[package]] @@ -2001,18 +2006,18 @@ dependencies = [ [[package]] name = "parse-zoneinfo" -version = "0.3.0" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c705f256449c60da65e11ff6626e0c16a0a0b96aaa348de61376b249bc340f41" +checksum = "1f2a05b18d44e2957b88f96ba460715e295bc1d7510468a2f3d3b44535d26c24" dependencies = [ "regex", ] [[package]] name = "paste" -version = "1.0.14" +version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "de3145af08024dea9fa9914f381a17b8fc6034dfb00f3a84013f7ff43f29ed4c" +checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" [[package]] name = "percent-encoding" @@ -2022,9 +2027,9 @@ checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" [[package]] name = "petgraph" -version = "0.6.4" +version = "0.6.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1d3afd2628e69da2be385eb6f2fd57c8ac7977ceeff6dc166ff1657b0e386a9" +checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db" dependencies = [ "fixedbitset", "indexmap", @@ -2088,9 +2093,9 @@ checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec" [[package]] name = "plotters" -version = "0.3.5" +version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2c224ba00d7cadd4d5c660deaf2098e5e80e07846537c51f9cfa4be50c1fd45" +checksum = "a15b6eccb8484002195a3e44fe65a4ce8e93a625797a063735536fd59cb01cf3" dependencies = [ "num-traits", "plotters-backend", @@ -2101,15 +2106,15 @@ dependencies = [ [[package]] name = "plotters-backend" -version = "0.3.5" +version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e76628b4d3a7581389a35d5b6e2139607ad7c75b17aed325f210aa91f4a9609" +checksum = "414cec62c6634ae900ea1c56128dfe87cf63e7caece0852ec76aba307cebadb7" [[package]] name = "plotters-svg" -version = "0.3.5" +version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38f6d39893cca0701371e3c27294f09797214b86f1fb951b89ade8ec04e2abab" +checksum = "81b30686a7d9c3e010b84284bdd26a29f2138574f52f5eb6f794fc0ad924e705" dependencies = [ "plotters-backend", ] @@ -2143,9 +2148,9 @@ checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" [[package]] name = "proc-macro2" -version = "1.0.80" +version = "1.0.85" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a56dea16b0a29e94408b9aa5e2940a4eedbd128a1ba20e8f7ae60fd3d465af0e" +checksum = "22244ce15aa966053a896d1accb3a6e68469b97c7f33f284b99f0d576879fc23" dependencies = [ "unicode-ident", ] @@ -2162,12 +2167,12 @@ dependencies = [ [[package]] name = "prost" -version = "0.12.4" +version = "0.12.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d0f5d036824e4761737860779c906171497f6d55681139d8312388f8fe398922" +checksum = "deb1435c188b76130da55f17a466d252ff7b1418b2ad3e037d127b94e3411f29" dependencies = [ "bytes", - "prost-derive 0.12.4", + "prost-derive 0.12.6", ] [[package]] @@ -2205,15 +2210,15 @@ dependencies = [ [[package]] name = "prost-derive" -version = "0.12.4" +version = "0.12.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "19de2de2a00075bf566bee3bd4db014b11587e84184d3f7a791bc17f1a8e9e48" +checksum = "81bddcdb20abf9501610992b6759a4c888aef7d1a7247ef75e2404275ac24af1" dependencies = [ "anyhow", "itertools 0.12.1", "proc-macro2", "quote", - "syn 2.0.59", + "syn 2.0.66", ] [[package]] @@ -2296,11 +2301,11 @@ dependencies = [ [[package]] name = "redox_syscall" -version = "0.4.1" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa" +checksum = "469052894dcb553421e483e4209ee581a45100d31b4018de03e5a7ad86374a7e" dependencies = [ - "bitflags 1.3.2", + "bitflags 2.5.0", ] [[package]] @@ -2343,9 +2348,9 @@ dependencies = [ [[package]] name = "rustc-demangle" -version = "0.1.23" +version = "0.1.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76" +checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" [[package]] name = "rustc_version" @@ -2358,9 +2363,9 @@ dependencies = [ [[package]] name = "rustix" -version = "0.38.32" +version = "0.38.34" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "65e04861e65f21776e67888bfbea442b3642beaa0138fdb1dd7a84a52dffdb89" +checksum = "70dc5ec042f7a43c4a73241207cecc9873a06d45debb38b329f8541d85c2730f" dependencies = [ "bitflags 2.5.0", "errno", @@ -2371,15 +2376,15 @@ dependencies = [ [[package]] name = "rustversion" -version = "1.0.15" +version = "1.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "80af6f9131f277a45a3fba6ce8e2258037bb0477a67e610d3c1fe046ab31de47" +checksum = "955d28af4278de8121b7ebeb796b6a45735dc01436d898801014aced2773a3d6" [[package]] name = "ryu" -version = "1.0.17" +version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e86697c916019a8588c99b5fac3cead74ec0b4b819707a682fd4d23fa0ce1ba1" +checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" [[package]] name = "same-file" @@ -2398,9 +2403,9 @@ checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" [[package]] name = "semver" -version = "1.0.22" +version = "1.0.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92d43fe69e652f3df9bdc2b85b2854a0825b86e4fb76bc44d945137d053639ca" +checksum = "61697e0a1c7e512e84a621326239844a24d8207b4669b41bc18b32ea5cbf988b" [[package]] name = "seq-macro" @@ -2410,9 +2415,9 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4" [[package]] name = "serde" -version = "1.0.197" +version = "1.0.203" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fb1c873e1b9b056a4dc4c0c198b24c3ffa059243875552b2bd0933b1aee4ce2" +checksum = "7253ab4de971e72fb7be983802300c30b5a7f0c2e56fab8abfc6a214307c0094" dependencies = [ "serde_derive", ] @@ -2429,20 +2434,20 @@ dependencies = [ [[package]] name = "serde_derive" -version = "1.0.197" +version = "1.0.203" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7eb0b34b42edc17f6b7cac84a52a1c5f0e1bb2227e997ca9011ea3dd34e8610b" +checksum = "500cbc0ebeb6f46627f50f3f5811ccf6bf00643be300b4c3eabc0ef55dc5b5ba" dependencies = [ "proc-macro2", "quote", - "syn 2.0.59", + "syn 2.0.66", ] [[package]] name = "serde_json" -version = "1.0.116" +version = "1.0.117" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3e17db7126d17feb94eb3fad46bf1a96b034e8aacbc2e775fe81505f8b0b2813" +checksum = "455182ea6142b14f93f4bc5320a2b31c1f266b66a4a5c858b013302a5d8cbfc3" dependencies = [ "itoa", "ryu", @@ -2546,7 +2551,7 @@ checksum = "01b2e185515564f15375f593fb966b5718bc624ba77fe49fa4616ad619690554" dependencies = [ "proc-macro2", "quote", - "syn 2.0.59", + "syn 2.0.66", ] [[package]] @@ -2578,15 +2583,15 @@ dependencies = [ [[package]] name = "strum_macros" -version = "0.26.2" +version = "0.26.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c6cf59daf282c0a494ba14fd21610a0325f9f90ec9d1231dea26bcb1d696c946" +checksum = "f7993a8e3a9e88a00351486baae9522c91b123a088f76469e5bd5cc17198ea87" dependencies = [ - "heck 0.4.1", + "heck 0.5.0", "proc-macro2", "quote", "rustversion", - "syn 2.0.59", + "syn 2.0.66", ] [[package]] @@ -2631,9 +2636,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.59" +version = "2.0.66" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a6531ffc7b071655e4ce2e04bd464c4830bb585a61cabb96cf808f05172615a" +checksum = "c42f3f41a2de00b01c0aaad383c5a45241efc8b2d1eda5661812fda5f3cdcff5" dependencies = [ "proc-macro2", "quote", @@ -2654,22 +2659,22 @@ dependencies = [ [[package]] name = "thiserror" -version = "1.0.58" +version = "1.0.61" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03468839009160513471e86a034bb2c5c0e4baae3b43f79ffc55c4a5427b3297" +checksum = "c546c80d6be4bc6a00c0f01730c08df82eaa7a7a61f11d656526506112cc1709" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.58" +version = "1.0.61" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c61f3ba182994efc43764a46c018c347bc492c79f024e705f46567b418f6d4f7" +checksum = "46c3384250002a6d5af4d114f2845d37b57521033f30d5c3f46c4d70e1197533" dependencies = [ "proc-macro2", "quote", - "syn 2.0.59", + "syn 2.0.66", ] [[package]] @@ -2753,9 +2758,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.37.0" +version = "1.38.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1adbebffeca75fcfd058afa480fb6c0b81e165a0323f9c9d39c9697e37c46787" +checksum = "ba4f4a02a7a80d6f274636f0aa95c7e383b912d41fe721a31f29e29698585a4a" dependencies = [ "backtrace", "bytes", @@ -2766,13 +2771,13 @@ dependencies = [ [[package]] name = "tokio-macros" -version = "2.2.0" +version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b8a1e28f2deaa14e508979454cb3a223b10b938b45af148bc0986de36f1923b" +checksum = "5f5ae998a069d4b5aba8ee9dad856af7d520c3699e6159b185c2acd48155d39a" dependencies = [ "proc-macro2", "quote", - "syn 2.0.59", + "syn 2.0.66", ] [[package]] @@ -2805,7 +2810,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.59", + "syn 2.0.66", ] [[package]] @@ -2872,9 +2877,9 @@ checksum = "d4c87d22b6e3f4a18d4d40ef354e97c90fcb14dd91d7dc0aa9d8a1172ebf7202" [[package]] name = "unicode-width" -version = "0.1.11" +version = "0.1.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e51733f11c9c4f72aa0c160008246859e340b00807569a0da0e7a1079b27ba85" +checksum = "68f5e5f3158ecfd4b8ff6fe086db7c8467a2dfdac97fe420f2b7c4aa97af66d6" [[package]] name = "unsafe-any-ors" @@ -2954,7 +2959,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.59", + "syn 2.0.66", "wasm-bindgen-shared", ] @@ -2976,7 +2981,7 @@ checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.59", + "syn 2.0.66", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -3027,11 +3032,11 @@ checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" [[package]] name = "winapi-util" -version = "0.1.6" +version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f29e6f9198ba0d26b4c9f07dbe6f9ed633e1f3d5b8b414090084349e46a52596" +checksum = "4d4cc384e1e73b93bafa6fb4f1df8c41695c8a91cf9c4c64358067d15a7b6c6b" dependencies = [ - "winapi", + "windows-sys 0.52.0", ] [[package]] @@ -3082,21 +3087,6 @@ dependencies = [ "windows_x86_64_msvc 0.42.2", ] -[[package]] -name = "windows-targets" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" -dependencies = [ - "windows_aarch64_gnullvm 0.48.5", - "windows_aarch64_msvc 0.48.5", - "windows_i686_gnu 0.48.5", - "windows_i686_msvc 0.48.5", - "windows_x86_64_gnu 0.48.5", - "windows_x86_64_gnullvm 0.48.5", - "windows_x86_64_msvc 0.48.5", -] - [[package]] name = "windows-targets" version = "0.52.5" @@ -3119,12 +3109,6 @@ version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8" -[[package]] -name = "windows_aarch64_gnullvm" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" - [[package]] name = "windows_aarch64_gnullvm" version = "0.52.5" @@ -3137,12 +3121,6 @@ version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43" -[[package]] -name = "windows_aarch64_msvc" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" - [[package]] name = "windows_aarch64_msvc" version = "0.52.5" @@ -3155,12 +3133,6 @@ version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f" -[[package]] -name = "windows_i686_gnu" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" - [[package]] name = "windows_i686_gnu" version = "0.52.5" @@ -3179,12 +3151,6 @@ version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060" -[[package]] -name = "windows_i686_msvc" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" - [[package]] name = "windows_i686_msvc" version = "0.52.5" @@ -3197,12 +3163,6 @@ version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36" -[[package]] -name = "windows_x86_64_gnu" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" - [[package]] name = "windows_x86_64_gnu" version = "0.52.5" @@ -3215,12 +3175,6 @@ version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3" -[[package]] -name = "windows_x86_64_gnullvm" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" - [[package]] name = "windows_x86_64_gnullvm" version = "0.52.5" @@ -3233,12 +3187,6 @@ version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" -[[package]] -name = "windows_x86_64_msvc" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" - [[package]] name = "windows_x86_64_msvc" version = "0.52.5" @@ -3247,22 +3195,22 @@ checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0" [[package]] name = "zerocopy" -version = "0.7.32" +version = "0.7.34" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "74d4d3961e53fa4c9a25a8637fc2bfaf2595b3d3ae34875568a5cf64787716be" +checksum = "ae87e3fcd617500e5d106f0380cf7b77f3c6092aae37191433159dda23cfb087" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.7.32" +version = "0.7.34" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6" +checksum = "15e934569e47891f7d9411f1a451d947a60e000ab3bd24fbb970f000387d1b3b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.59", + "syn 2.0.66", ] [[package]] diff --git a/core/Cargo.toml b/core/Cargo.toml index 6a179a6ee..4584dffce 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -89,7 +89,6 @@ assertables = "7" [features] default = [] -nightly = [] [profile.release] debug = true diff --git a/core/rustfmt.toml b/core/rustfmt.toml index 39a3fe635..3463af618 100644 --- a/core/rustfmt.toml +++ b/core/rustfmt.toml @@ -17,11 +17,3 @@ edition = "2021" max_width = 100 - -# The following requires nightly feature: -# rustup install nightly -# rustup component add rustfmt --toolchain nightly -# cargo +nightly fmt -wrap_comments = true -comment_width = 100 -imports_granularity = "Crate" # group imports by crate diff --git a/core/src/common/mod.rs b/core/src/common/mod.rs index 8d5030c02..1b7dfad28 100644 --- a/core/src/common/mod.rs +++ b/core/src/common/mod.rs @@ -35,10 +35,5 @@ trait ValueSetter { fn append_value(&mut self, v: &T::Native); } -mod vector; - mod buffer; pub use buffer::*; - -mod mutable_vector; -pub use mutable_vector::*; diff --git a/core/src/common/mutable_vector.rs b/core/src/common/mutable_vector.rs deleted file mode 100644 index ba29fc01a..000000000 --- a/core/src/common/mutable_vector.rs +++ /dev/null @@ -1,409 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use arrow::{ - array::ArrayData, buffer::Buffer as ArrowBuffer, datatypes::DataType as ArrowDataType, -}; - -use crate::{ - common::{bit, CometBuffer, ValueGetter, ValueSetter}, - BinaryType, StringType, StringView, TypeTrait, -}; - -const DEFAULT_ARRAY_LEN: usize = 4; - -/// A mutable vector that can be re-used across batches. -#[derive(Debug)] -pub struct MutableVector { - /// The Arrow type for the elements of this vector. - pub(crate) arrow_type: ArrowDataType, - - /// The number of total elements in this vector. - pub(crate) num_values: usize, - - /// The number of null elements in this vector, must <= `num_values`. - pub(crate) num_nulls: usize, - - /// The capacity of the vector - pub(crate) capacity: usize, - - /// How many bits are required to store a single value - pub(crate) bit_width: usize, - - /// The validity buffer of this Arrow vector. A bit set at position `i` indicates the `i`th - /// element is not null. Otherwise, an unset bit at position `i` indicates the `i`th element is - /// null. - pub(crate) validity_buffer: CometBuffer, - - /// The value buffer of this Arrow vector. This could store either offsets if the vector - /// is of list or struct type, or actual values themselves otherwise. - pub(crate) value_buffer: CometBuffer, - - /// Child vectors for non-primitive types (e.g., list, struct). - pub(crate) children: Vec, - - /// Dictionary (i.e., values) associated with this vector. Only set if using dictionary - /// encoding. - pub(crate) dictionary: Option>, - - /// Whether all the values in the vector are not null. - /// - /// This is useful so we can skip setting non-null for each individual value, in the - /// `validity_buffer`. - all_non_null: bool, -} - -impl MutableVector { - pub fn new(capacity: usize, arrow_type: &ArrowDataType) -> Self { - let bit_width = Self::bit_width(arrow_type); - Self::new_with_bit_width(capacity, arrow_type.clone(), bit_width) - } - - pub fn new_with_bit_width( - capacity: usize, - arrow_type: ArrowDataType, - bit_width: usize, - ) -> Self { - let validity_len = bit::ceil(capacity, 8); - let validity_buffer = CometBuffer::new(validity_len); - - let mut value_capacity = capacity; - if Self::is_binary_type(&arrow_type) { - // Arrow offset array needs to have one extra slot - value_capacity += 1; - } - // Make sure the capacity is positive - let len = bit::ceil(value_capacity * bit_width, 8); - let mut value_buffer = CometBuffer::new(len); - - let mut children = Vec::new(); - - match arrow_type { - ArrowDataType::Binary | ArrowDataType::Utf8 => { - children.push(MutableVector::new_with_bit_width( - capacity, - ArrowDataType::Int8, - DEFAULT_ARRAY_LEN * 8, - )); - } - _ => {} - } - - if Self::is_binary_type(&arrow_type) { - // Setup the first offset which is always 0. - let zero: u32 = 0; - bit::memcpy_value(&zero, 4, &mut value_buffer); - } - - Self { - arrow_type, - num_values: 0, - num_nulls: 0, - capacity, - bit_width, - validity_buffer, - value_buffer, - children, - dictionary: None, - all_non_null: false, - } - } - - /// Appends a non-null value `v` to the end of this vector. - #[inline] - pub fn append_value(&mut self, v: &T::Native) { - >::append_value(self, v); - } - - /// Gets the non-null value at `idx` of this vector. - #[inline] - pub fn value(&self, idx: usize) -> T::Native { - >::value(self, idx) - } - - /// Whether the given value at `idx` of this vector is null. - #[inline] - pub fn is_null(&self, idx: usize) -> bool { - unsafe { !bit::get_bit_raw(self.validity_buffer.as_ptr(), idx) } - } - - /// Resets this vector to the initial state. - #[inline] - pub fn reset(&mut self) { - self.num_values = 0; - self.num_nulls = 0; - self.all_non_null = false; - self.validity_buffer.reset(); - if Self::is_binary_type(&self.arrow_type) { - // Reset the first offset to 0 - let zero: u32 = 0; - bit::memcpy_value(&zero, 4, &mut self.value_buffer); - // Also reset the child value vector - let child = &mut self.children[0]; - child.reset(); - } else if Self::should_reset_value_buffer(&self.arrow_type) { - self.value_buffer.reset(); - } - } - - /// Appends a new null value to the end of this vector. - #[inline] - pub fn put_null(&mut self) { - self.put_nulls(1) - } - - /// Appends `n` null values to the end of this vector. - #[inline] - pub fn put_nulls(&mut self, n: usize) { - // We need to update offset buffer for binary. - if Self::is_binary_type(&self.arrow_type) { - let mut offset = self.num_values * 4; - let prev_offset_value = bit::read_num_bytes_u32(4, &self.value_buffer[offset..]); - offset += 4; - (0..n).for_each(|_| { - bit::memcpy_value(&prev_offset_value, 4, &mut self.value_buffer[offset..]); - offset += 4; - }); - } - - self.num_nulls += n; - self.num_values += n; - } - - /// Returns the number of total values (including both null and non-null) of this vector. - #[inline] - pub fn num_values(&self) -> usize { - self.num_values - } - - /// Returns the number of null values of this vector. - #[inline] - pub fn num_nulls(&self) -> usize { - self.num_nulls - } - - #[inline] - pub fn set_not_null(&mut self, i: usize) { - unsafe { - bit::set_bit_raw(self.validity_buffer.as_mut_ptr(), i); - } - } - - /// Sets all values in this vector to be non-null. - #[inline] - pub fn set_all_non_null(&mut self) { - self.all_non_null = true; - } - - /// Sets the content of validity buffer to be `buffer`. - pub fn set_validity_buffer(&mut self, buffer: &ArrowBuffer) { - self.validity_buffer = buffer.into(); - } - - /// Sets the content of value buffer to be `buffer`. - pub fn set_value_buffer(&mut self, buffer: &ArrowBuffer) { - self.value_buffer = buffer.into(); - } - - /// Sets the dictionary of this to be `dict`. - pub fn set_dictionary(&mut self, dict: MutableVector) { - self.dictionary = Some(Box::new(dict)) - } - - /// Clones this into an Arrow [`ArrayData`](arrow::array::ArrayData). Note that the caller of - /// this method MUST make sure the returned `ArrayData` won't live longer than this vector - /// itself. Otherwise, dangling pointer may happen. - /// - /// # Safety - /// - /// This method is highly unsafe since it calls `to_immutable` which leaks raw pointer to the - /// memory region that are tracked by `ArrowMutableBuffer`. Please see comments on - /// `to_immutable` buffer to understand the motivation. - pub fn get_array_data(&mut self) -> ArrayData { - unsafe { - let data_type = if let Some(d) = &self.dictionary { - ArrowDataType::Dictionary( - Box::new(ArrowDataType::Int32), - Box::new(d.arrow_type.clone()), - ) - } else { - self.arrow_type.clone() - }; - let mut builder = ArrayData::builder(data_type) - .len(self.num_values) - .add_buffer(self.value_buffer.to_arrow()); - - builder = if self.all_non_null { - builder.null_count(0) - } else { - builder - .null_bit_buffer(Some(self.validity_buffer.to_arrow())) - .null_count(self.num_nulls) - }; - - if Self::is_binary_type(&self.arrow_type) && self.dictionary.is_none() { - let child = &mut self.children[0]; - builder = builder.add_buffer(child.value_buffer.to_arrow()); - } - - if let Some(d) = &mut self.dictionary { - builder = builder.add_child_data(d.get_array_data()); - } - - builder.build_unchecked() - } - } - - /// Returns the number of bits it takes to store one element of `arrow_type` in the value buffer - /// of this vector. - pub fn bit_width(arrow_type: &ArrowDataType) -> usize { - match arrow_type { - ArrowDataType::Boolean => 1, - ArrowDataType::Int8 => 8, - ArrowDataType::Int16 => 16, - ArrowDataType::Int32 | ArrowDataType::Float32 | ArrowDataType::Date32 => 32, - ArrowDataType::Int64 | ArrowDataType::Float64 | ArrowDataType::Timestamp(_, _) => 64, - ArrowDataType::FixedSizeBinary(type_length) => *type_length as usize * 8, - ArrowDataType::Decimal128(..) => 128, // Arrow stores decimal with 16 bytes - ArrowDataType::Binary | ArrowDataType::Utf8 => 32, // Only count offset size - dt => panic!("Unsupported Arrow data type: {:?}", dt), - } - } - - #[inline] - fn is_binary_type(dt: &ArrowDataType) -> bool { - matches!(dt, ArrowDataType::Binary | ArrowDataType::Utf8) - } - - #[inline] - fn should_reset_value_buffer(dt: &ArrowDataType) -> bool { - // - Boolean type expects have a zeroed value buffer - // - Decimal may pad buffer with 0xff so we need to clear them before a new batch - matches!(dt, ArrowDataType::Boolean | ArrowDataType::Decimal128(_, _)) - } - - /// Creates an immutable reference from a mutable Arrow buffer `buf`. - /// - /// # Safety - /// - /// This function is highly unsafe. Please see documentation of the [`to_arrow`] method for - /// details. - #[inline] - unsafe fn to_immutable(buf: &CometBuffer) -> ArrowBuffer { - buf.to_arrow() - } -} - -impl ValueGetter for MutableVector { - default fn value(&self, idx: usize) -> T::Native { - unsafe { - let ptr = self.value_buffer.as_ptr() as *const T::Native; - *ptr.add(idx) - } - } -} - -impl ValueGetter for MutableVector { - fn value(&self, _: usize) -> StringView { - unimplemented!("'value' on StringType is currently unsupported"); - } -} - -impl ValueGetter for MutableVector { - fn value(&self, _: usize) -> StringView { - unimplemented!("'value' on BinaryType is currently unsupported"); - } -} - -impl ValueSetter for MutableVector { - default fn append_value(&mut self, v: &T::Native) { - unsafe { - let ptr = self.value_buffer.as_mut_ptr() as *mut T::Native; - *ptr.add(self.num_values) = *v; - } - self.num_values += 1; - } -} - -impl ValueSetter for MutableVector { - fn append_value(&mut self, _: &StringView) { - unimplemented!("'append_value' on StringType is currently unsupported"); - } -} - -impl ValueSetter for MutableVector { - fn append_value(&mut self, _: &StringView) { - unimplemented!("'append_value' on BinaryType is currently unsupported"); - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::IntegerType; - - #[test] - fn set_and_get() { - let mut mv = MutableVector::new(1024, &ArrowDataType::Int32); - - for n in 0..100 { - mv.append_value::(&(n * n)); - } - assert_eq!(mv.num_values(), 100); - assert_eq!(mv.num_nulls(), 0); - - for n in 0..100 { - assert_eq!(mv.value::(n) as usize, n * n); - } - - mv.reset(); - - for n in 0..200 { - if n % 2 == 0 { - mv.put_null(); - } else { - mv.append_value::(&(n * 2)); - } - } - - assert_eq!(mv.num_values(), 200); - assert_eq!(mv.num_nulls(), 100); - - for n in 0..200 { - if n % 2 == 0 { - assert!(mv.is_null(n)); - } else { - assert_eq!(mv.value::(n) as usize, n * 2); - } - } - } - - #[test] - #[should_panic] - fn set_string_unsupported() { - let mut mv = MutableVector::new(1024, &ArrowDataType::Utf8); - let sv = StringView::default(); - mv.append_value::(&sv); - } - - #[test] - #[should_panic] - fn get_string_unsupported() { - let mv = MutableVector::new(1024, &ArrowDataType::Utf8); - mv.value::(0); - } -} diff --git a/core/src/common/vector.rs b/core/src/common/vector.rs deleted file mode 100644 index 1afb1e78f..000000000 --- a/core/src/common/vector.rs +++ /dev/null @@ -1,523 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use crate::{ - common::{bit, ValueGetter}, - BoolType, DataType, TypeTrait, BITS_PER_BYTE, STRING_VIEW_LEN, STRING_VIEW_PREFIX_LEN, -}; -use arrow::{ - array::{Array, ArrayRef}, - buffer::{Buffer, MutableBuffer}, - datatypes::DataType as ArrowDataType, -}; -use arrow_data::ArrayData; - -/// A vector that holds elements of plain types (i.e., no nested type such as list, map, struct). -pub struct PlainVector { - /// The data type for elements in this vector - data_type: DataType, - /// Total number of values in this vector - num_values: usize, - /// Total number of nulls in this vector. Must <= `num_values`. - num_nulls: usize, - /// The value buffer - value_buffer: ValueBuffer, - /// Number of bytes for each element in the vector. For variable length types such as string - /// and binary, this will be the size of [`StringView`] which is always 16 bytes. - value_size: usize, - /// Offsets into buffers - offset: usize, - /// The validity buffer. If empty, all values in this vector are not null. - validity_buffer: Option, - /// Whether this vector is dictionary encoded - is_dictionary: bool, - /// Indices (or dictionary keys) when `is_dictionary` is true. Otherwise, this is always - /// an empty vector. - indices: IndexBuffer, -} - -impl ValueGetter for PlainVector { - default fn value(&self, idx: usize) -> T::Native { - let offset = self.offset(idx); - unsafe { - let ptr = self.value_buffer.as_ptr() as *const T::Native; - *ptr.add(offset) - } - } -} - -impl ValueGetter for PlainVector { - fn value(&self, idx: usize) -> bool { - let offset = self.offset(idx); - unsafe { bit::get_bit_raw(self.value_buffer.as_ptr(), offset) } - } -} - -impl PlainVector { - /// Returns the data type of this vector. - pub fn data_type(&self) -> &DataType { - &self.data_type - } - - /// Returns the total number of elements in this vector. - pub fn num_values(&self) -> usize { - self.num_values - } - - /// Returns the total number of nulls in this vector. - pub fn num_nulls(&self) -> usize { - self.num_nulls - } - - /// Whether there is any null in this vector. - pub fn has_null(&self) -> bool { - self.num_nulls > 0 - } - - /// Whether the element at `idx` is null. - pub fn is_null(&self, idx: usize) -> bool { - if let Some(validity_buffer) = &self.validity_buffer { - unsafe { - return !bit::get_bit_raw(validity_buffer.as_ptr(), self.offset + idx); - } - } - - false - } - - #[inline(always)] - pub fn value(&self, idx: usize) -> T::Native { - >::value(self, idx) - } - - #[inline(always)] - fn offset(&self, idx: usize) -> usize { - let idx = self.offset + idx; - if self.is_dictionary { - self.indices.get(idx) - } else { - idx - } - } -} - -impl From for PlainVector { - fn from(data: ArrayData) -> Self { - assert!(!data.buffers().is_empty(), "expected at least one buffer"); - let arrow_dt = data.data_type(); - let dt: DataType = arrow_dt.into(); - let is_dictionary = matches!(arrow_dt, ArrowDataType::Dictionary(_, _)); - - let mut value_buffers = data.buffers(); - let mut indices = IndexBuffer::empty(); - let validity_buffer = data.nulls().map(|nb| nb.buffer().clone()); - - if is_dictionary { - // in case of dictionary data, the dictionary values are stored in child data, while - // dictionary keys are stored in `value_buffer`. - assert_eq!( - data.child_data().len(), - 1, - "child data should contain a single array" - ); - let child_data = &data.child_data()[0]; - indices = IndexBuffer::new(value_buffers[0].clone(), data.len() + data.offset()); - value_buffers = child_data.buffers(); - } - - let value_size = dt.kind().type_size() / BITS_PER_BYTE; - let value_buffer = ValueBuffer::new(&dt, value_buffers.to_vec(), data.len()); - - Self { - data_type: dt, - num_values: data.len(), - num_nulls: data.null_count(), - value_buffer, - value_size, - offset: data.offset(), - validity_buffer, - is_dictionary, - indices, - } - } -} - -impl From for PlainVector { - fn from(value: ArrayRef) -> Self { - Self::from(value.into_data()) - } -} - -struct ValueBuffer { - ptr: *const u8, - /// Keep the `ptr` alive - original_buffers: Vec, -} - -impl ValueBuffer { - pub fn new(dt: &DataType, buffers: Vec, len: usize) -> Self { - if matches!(dt, DataType::String | DataType::Binary) { - assert_eq!( - 2, - buffers.len(), - "expected two buffers (offset, value) for string/binary" - ); - - let mut string_view_buf = MutableBuffer::from_len_zeroed(len * 16); - let buf_mut = string_view_buf.as_mut_ptr(); - - let offsets = buffers[0].as_ptr() as *const i32; - let values = buffers[1].as_ptr(); - - let mut dst_offset = 0; - let mut start = 0; - unsafe { - for i in 0..len { - // StringView format: - // - length (4 bytes) - // - first 4 bytes of the string/binary (4 bytes) - // - pointer to the string/binary (8 bytes) - let end = *offsets.add(i + 1); - let len = end - start; - let value = values.add(start as usize); - *(buf_mut.add(dst_offset) as *mut i32) = len; - if len >= STRING_VIEW_PREFIX_LEN as i32 { - // only store prefix if the string has at least 4 bytes, otherwise, we'll - // zero pad the bytes. - std::ptr::copy_nonoverlapping( - value, - buf_mut.add(dst_offset + STRING_VIEW_PREFIX_LEN), - STRING_VIEW_PREFIX_LEN, - ); - } - *(buf_mut.add(dst_offset + STRING_VIEW_PREFIX_LEN + 4) as *mut usize) = - value as usize; - start = end; - dst_offset += STRING_VIEW_LEN; - } - } - - let string_buffer: Buffer = string_view_buf.into(); - let ptr = string_buffer.as_ptr(); - - Self { - ptr, - original_buffers: vec![string_buffer, buffers[1].clone()], - } - } else { - let ptr = buffers[0].as_ptr(); - Self { - ptr, - original_buffers: buffers, - } - } - } - - /// Returns the raw pointer for the data in this value buffer. - /// NOTE: caller of this should NOT store the raw pointer to avoid dangling pointers. - pub fn as_ptr(&self) -> *const u8 { - self.ptr - } -} - -struct IndexBuffer { - ptr: *const u8, - /// Keep the `ptr` alive. - buf: Option, - /// Total number of elements in the index buffer - len: usize, -} - -impl IndexBuffer { - pub fn new(buf: Buffer, len: usize) -> Self { - let ptr = buf.as_ptr(); - Self { - buf: Some(buf), - ptr, - len, - } - } - - pub fn empty() -> Self { - Self { - buf: None, - ptr: std::ptr::null(), - len: 0, - } - } - - #[inline] - pub fn get(&self, i: usize) -> usize { - debug_assert!(i < self.len); - unsafe { - let ptr = self.ptr as *const i32; - *ptr.add(i) as usize - } - } -} - -#[cfg(test)] -mod tests { - use crate::{ - BoolType, ByteType, DataType, DateType, DecimalType, DoubleType, FloatType, IntegerType, - NativeEqual, ShortType, StringType, TimestampType, TypeTrait, STRING_VIEW_PREFIX_LEN, - }; - - use crate::common::vector::PlainVector; - use arrow::{ - array::{ - Array, BooleanArray, Date32Array, Decimal128Array, Float32Array, Float64Array, - Int16Array, Int32Array, Int8Array, StringArray, - }, - buffer::Buffer, - datatypes::{DataType as ArrowDataType, ToByteSlice}, - }; - use arrow_array::TimestampMicrosecondArray; - use arrow_data::ArrayData; - - #[test] - fn primitive_no_null() { - let arr = Int32Array::from(vec![0, 1, 2, 3, 4]); - let vector = PlainVector::from(arr.into_data()); - - assert_eq!(5, vector.num_values()); - assert_eq!(0, vector.num_nulls()); - assert_eq!(4, vector.value_size); - assert!(vector.validity_buffer.is_none()); - - for i in 0..5 { - assert!(!vector.is_null(i)); - assert_eq!(i as i32, vector.value::(i)) - } - } - - fn check_answer(expected: &[Option], actual: &PlainVector) { - assert_eq!(expected.len(), actual.num_values()); - let nulls = expected - .iter() - .filter(|v| v.is_none()) - .collect::>>(); - assert_eq!(nulls.len(), actual.num_nulls()); - - for i in 0..expected.len() { - if let Some(v) = expected[i] { - assert!(!actual.is_null(i)); - assert!(v.is_equal(&actual.value::(i))); - } else { - assert!(actual.is_null(i)); - } - } - } - - #[test] - fn primitive_with_nulls() { - let data = vec![Some(0), None, Some(2), None, Some(4)]; - let arr = TimestampMicrosecondArray::from(data.clone()); - let vector = PlainVector::from(arr.into_data()); - - check_answer::(&data, &vector); - } - - #[test] - fn primitive_with_offsets_nulls() { - let arr = Int32Array::from(vec![Some(0), None, Some(2), None, Some(4), None, Some(7)]); - let data = arr.into_data(); - let vector = PlainVector::from(data.slice(2, 3)); - - assert_eq!(3, vector.num_values()); - assert_eq!(1, vector.num_nulls()); - - for i in 0..2 { - if i % 2 == 0 { - assert!(!vector.is_null(i)); - assert_eq!((i + 2) as i32, vector.value::(i)); - } else { - assert!(vector.is_null(i)); - } - } - } - - #[test] - fn primitive_dictionary() { - let value_data = ArrayData::builder(ArrowDataType::Int8) - .len(8) - .add_buffer(Buffer::from( - &[10_i8, 11, 12, 13, 14, 15, 16, 17].to_byte_slice(), - )) - .build() - .unwrap(); - - // Construct a buffer for value offsets, for the nested array: - let keys = Buffer::from(&[2_i32, 3, 4].to_byte_slice()); - - // Construct a dictionary array from the above two - let key_type = ArrowDataType::Int32; - let value_type = ArrowDataType::Int8; - let dict_data_type = ArrowDataType::Dictionary(Box::new(key_type), Box::new(value_type)); - let dict_data = ArrayData::builder(dict_data_type) - .len(3) - .add_buffer(keys) - .add_child_data(value_data) - .build() - .unwrap(); - - let vector = PlainVector::from(dict_data); - - assert_eq!(DataType::Byte, *vector.data_type()); - assert_eq!(3, vector.num_values()); - assert_eq!(0, vector.num_nulls()); - assert!(!vector.has_null()); - assert_eq!(12, vector.value::(0)); - assert_eq!(13, vector.value::(1)); - assert_eq!(14, vector.value::(2)); - } - - #[test] - fn bools() { - let data = vec![Some(true), None, Some(false), None, Some(true)]; - let arr = BooleanArray::from(data.clone()); - let vector = PlainVector::from(arr.into_data()); - - check_answer::(&data, &vector); - } - - #[test] - fn bytes() { - let data = vec![Some(4_i8), None, None, Some(5_i8), Some(7_i8)]; - let arr = Int8Array::from(data.clone()); - let vector = PlainVector::from(arr.into_data()); - - check_answer::(&data, &vector); - } - - #[test] - fn shorts() { - let data = vec![Some(4_i16), None, None, Some(-40_i16), Some(-3_i16)]; - let arr = Int16Array::from(data.clone()); - let vector = PlainVector::from(arr.into_data()); - - check_answer::(&data, &vector); - } - - #[test] - fn floats() { - let data = vec![ - Some(4.0_f32), - Some(-0.0_f32), - Some(-3.0_f32), - Some(0.0_f32), - Some(std::f32::consts::PI), - ]; - let arr = Float32Array::from(data.clone()); - let vector = PlainVector::from(arr.into_data()); - - check_answer::(&data, &vector); - } - - #[test] - fn doubles() { - let data = vec![ - None, - Some(std::f64::consts::PI), - Some(4.0_f64), - Some(f64::NAN), - ]; - let arr = Float64Array::from(data.clone()); - let vector = PlainVector::from(arr.into_data()); - - check_answer::(&data, &vector); - } - - #[test] - fn decimals() { - let data = vec![Some(1_i128), None, None, Some(i128::MAX)]; - let arr = Decimal128Array::from(data.clone()); - let vector = PlainVector::from(arr.into_data()); - - check_answer::(&data, &vector); - } - - #[test] - fn timestamps() { - // 1: 00:00:00.001 - // 37800005: 10:30:00.005 - // 86399210: 23:59:59.210 - let data = vec![Some(1), None, Some(37_800_005), Some(86_399_210)]; - let arr = TimestampMicrosecondArray::from(data.clone()); - let vector = PlainVector::from(arr.into_data()); - - check_answer::(&data, &vector); - } - - #[test] - fn dates() { - let data = vec![Some(100), None, Some(200), None]; - let arr = Date32Array::from(data.clone()); - let vector = PlainVector::from(arr.into_data()); - - check_answer::(&data, &vector); - } - - #[test] - fn string_no_nulls() { - let values: Vec<&str> = vec!["hello", "", "comet"]; - let arr = StringArray::from(values.clone()); - - let vector = PlainVector::from(arr.into_data()); - assert_eq!(3, vector.num_values()); - assert_eq!(0, vector.num_nulls()); - - for i in 0..values.len() { - let expected = values[i]; - let actual = vector.value::(i); - assert_eq!(expected.len(), actual.len as usize); - if expected.len() >= STRING_VIEW_PREFIX_LEN { - assert_eq!( - &expected[..STRING_VIEW_PREFIX_LEN], - String::from_utf8_lossy(&actual.prefix) - ); - } - assert_eq!(expected, actual.as_utf8_str()); - } - } - - #[test] - fn string_with_nulls() { - let data = [Some("hello"), None, Some("comet")]; - let arr = StringArray::from(data.to_vec().clone()); - - let vector = PlainVector::from(arr.into_data()); - assert_eq!(3, vector.num_values()); - assert_eq!(1, vector.num_nulls()); - - for i in 0..data.len() { - if data[i].is_none() { - assert!(vector.is_null(i)); - } else { - let expected = data[i].unwrap(); - let actual = vector.value::(i); - if expected.len() >= STRING_VIEW_PREFIX_LEN { - assert_eq!( - &expected[..STRING_VIEW_PREFIX_LEN], - String::from_utf8_lossy(&actual.prefix) - ); - } - assert_eq!(expected, actual.as_utf8_str()); - } - } - } -} diff --git a/core/src/execution/datafusion/planner.rs b/core/src/execution/datafusion/planner.rs index 3a8548f77..a5bcf5654 100644 --- a/core/src/execution/datafusion/planner.rs +++ b/core/src/execution/datafusion/planner.rs @@ -836,14 +836,13 @@ impl PhysicalPlanner { } // Consumes the first input source for the scan - let input_source = if self.exec_context_id == TEST_EXEC_CONTEXT_ID - && inputs.is_empty() - { - // For unit test, we will set input batch to scan directly by `set_input_batch`. - None - } else { - Some(inputs.remove(0)) - }; + let input_source = + if self.exec_context_id == TEST_EXEC_CONTEXT_ID && inputs.is_empty() { + // For unit test, we will set input batch to scan directly by `set_input_batch`. + None + } else { + Some(inputs.remove(0)) + }; // The `ScanExec` operator will take actual arrays from Spark during execution let scan = ScanExec::new(self.exec_context_id, input_source, fields)?; diff --git a/core/src/execution/datafusion/spark_hash.rs b/core/src/execution/datafusion/spark_hash.rs index 15d3a5021..4d91a87df 100644 --- a/core/src/execution/datafusion/spark_hash.rs +++ b/core/src/execution/datafusion/spark_hash.rs @@ -84,7 +84,7 @@ pub(crate) fn spark_compatible_murmur3_hash>(data: T, seed: u32) // safety: // avoid boundary checking in performance critical codes. - // all operations are garenteed to be safe + // all operations are guaranteed to be safe unsafe { let mut h1 = hash_bytes_by_int( std::slice::from_raw_parts(data.get_unchecked(0), len_aligned), diff --git a/core/src/execution/shuffle/row.rs b/core/src/execution/shuffle/row.rs index 419ef9b4b..2d1312c16 100644 --- a/core/src/execution/shuffle/row.rs +++ b/core/src/execution/shuffle/row.rs @@ -207,7 +207,7 @@ impl Default for SparkUnsafeRow { } impl SparkUnsafeRow { - fn new(schema: &Vec) -> Self { + fn new(schema: &[DataType]) -> Self { Self { row_addr: -1, row_size: -1, @@ -1046,7 +1046,7 @@ pub(crate) fn append_columns( row_sizes_ptr: *mut jint, row_start: usize, row_end: usize, - schema: &Vec, + schema: &[DataType], column_idx: usize, builder: &mut Box, prefer_dictionary_ratio: f64, @@ -3283,7 +3283,7 @@ pub fn process_sorted_row_partition( batch_size: usize, row_addresses_ptr: *mut jlong, row_sizes_ptr: *mut jint, - schema: &Vec, + schema: &[DataType], output_path: String, prefer_dictionary_ratio: f64, checksum_enabled: bool, diff --git a/core/src/lib.rs b/core/src/lib.rs index f209859a7..b608461ea 100644 --- a/core/src/lib.rs +++ b/core/src/lib.rs @@ -20,12 +20,6 @@ #![allow(dead_code)] #![allow(clippy::upper_case_acronyms)] #![allow(clippy::derive_partial_eq_without_eq)] // For prost generated struct -#![cfg_attr(feature = "nightly", feature(core_intrinsics))] -#![feature(specialization)] - -// Branch prediction hint. This is currently only available on nightly. -#[cfg(feature = "nightly")] -use core::intrinsics::{likely, unlikely}; use jni::{ objects::{JClass, JString}, diff --git a/rust-toolchain b/rust-toolchain index 55d7013e4..369f9966f 100644 --- a/rust-toolchain +++ b/rust-toolchain @@ -1 +1 @@ -nightly-2023-09-05 +1.77.2 diff --git a/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala b/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala index e69054dbd..7516a0785 100644 --- a/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala +++ b/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala @@ -1606,7 +1606,7 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper { .mode("overwrite") .parquet(shortArrayPath) val shortArrayQuery = "select a, -a from t" - runArrayTest(shortArrayQuery, " caused", shortArrayPath) + runArrayTest(shortArrayQuery, "", shortArrayPath) // byte values test val byteArrayPath = new Path(dir.toURI.toString, "byte_array_test.parquet").toString @@ -1616,7 +1616,7 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper { .mode("overwrite") .parquet(byteArrayPath) val byteArrayQuery = "select a, -a from t" - runArrayTest(byteArrayQuery, " caused", byteArrayPath) + runArrayTest(byteArrayQuery, "", byteArrayPath) // interval values test withTable("t_interval") { From a668a8657a16496781075a014c6009d038c3fa1b Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Tue, 4 Jun 2024 11:09:32 -0600 Subject: [PATCH 2/2] feat: Add specific fuzz tests for cast and try_cast and fix NPE found during fuzz testing (#514) * Varius improvements to fuzz testing tool * Fix NPE in QueryPlanSerde handling of trim expression * format --- fuzz-testing/README.md | 5 +++-- .../scala/org/apache/comet/fuzz/DataGen.scala | 3 ++- .../org/apache/comet/fuzz/QueryGen.scala | 20 +++++++++++++++++-- .../org/apache/comet/fuzz/QueryRunner.scala | 10 ++++++++-- .../scala/org/apache/comet/fuzz/Utils.scala | 4 ++-- .../apache/comet/serde/QueryPlanSerde.scala | 4 ++-- 6 files changed, 35 insertions(+), 11 deletions(-) diff --git a/fuzz-testing/README.md b/fuzz-testing/README.md index 56af359f2..076ff6aea 100644 --- a/fuzz-testing/README.md +++ b/fuzz-testing/README.md @@ -30,8 +30,8 @@ Comet Fuzz is inspired by the [SparkFuzz](https://ir.cwi.nl/pub/30222) paper fro Planned areas of improvement: +- ANSI mode - Support for all data types, expressions, and operators supported by Comet -- Explicit casts - Unary and binary arithmetic expressions - IF and CASE WHEN expressions - Complex (nested) expressions @@ -91,7 +91,8 @@ $SPARK_HOME/bin/spark-submit \ --conf spark.comet.exec.shuffle.enabled=true \ --conf spark.comet.exec.shuffle.mode=auto \ --jars $COMET_JAR \ - --driver-class-path $COMET_JAR \ + --conf spark.driver.extraClassPath=$COMET_JAR \ + --conf spark.executor.extraClassPath=$COMET_JAR \ --class org.apache.comet.fuzz.Main \ target/comet-fuzz-spark3.4_2.12-0.1.0-SNAPSHOT-jar-with-dependencies.jar \ run --num-files=2 --filename=queries.sql diff --git a/fuzz-testing/src/main/scala/org/apache/comet/fuzz/DataGen.scala b/fuzz-testing/src/main/scala/org/apache/comet/fuzz/DataGen.scala index 47a6bd879..9f9f772b7 100644 --- a/fuzz-testing/src/main/scala/org/apache/comet/fuzz/DataGen.scala +++ b/fuzz-testing/src/main/scala/org/apache/comet/fuzz/DataGen.scala @@ -50,7 +50,8 @@ object DataGen { // generate schema using random data types val fields = Range(0, numColumns) - .map(i => StructField(s"c$i", Utils.randomWeightedChoice(Meta.dataTypes), nullable = true)) + .map(i => + StructField(s"c$i", Utils.randomWeightedChoice(Meta.dataTypes, r), nullable = true)) val schema = StructType(fields) // generate columnar data diff --git a/fuzz-testing/src/main/scala/org/apache/comet/fuzz/QueryGen.scala b/fuzz-testing/src/main/scala/org/apache/comet/fuzz/QueryGen.scala index 1daa26200..7584e76ce 100644 --- a/fuzz-testing/src/main/scala/org/apache/comet/fuzz/QueryGen.scala +++ b/fuzz-testing/src/main/scala/org/apache/comet/fuzz/QueryGen.scala @@ -42,10 +42,11 @@ object QueryGen { val uniqueQueries = mutable.HashSet[String]() for (_ <- 0 until numQueries) { - val sql = r.nextInt().abs % 3 match { + val sql = r.nextInt().abs % 4 match { case 0 => generateJoin(r, spark, numFiles) case 1 => generateAggregate(r, spark, numFiles) case 2 => generateScalar(r, spark, numFiles) + case 3 => generateCast(r, spark, numFiles) } if (!uniqueQueries.contains(sql)) { uniqueQueries += sql @@ -91,6 +92,21 @@ object QueryGen { s"ORDER BY ${args.mkString(", ")};" } + private def generateCast(r: Random, spark: SparkSession, numFiles: Int): String = { + val tableName = s"test${r.nextInt(numFiles)}" + val table = spark.table(tableName) + + val toType = Utils.randomWeightedChoice(Meta.dataTypes, r).sql + val arg = Utils.randomChoice(table.columns, r) + + // We test both `cast` and `try_cast` to cover LEGACY and TRY eval modes. It is not + // recommended to run Comet Fuzz with ANSI enabled currently. + // Example SELECT c0, cast(c0 as float), try_cast(c0 as float) FROM test0 + s"SELECT $arg, cast($arg as $toType), try_cast($arg as $toType) " + + s"FROM $tableName " + + s"ORDER BY $arg;" + } + private def generateJoin(r: Random, spark: SparkSession, numFiles: Int): String = { val leftTableName = s"test${r.nextInt(numFiles)}" val rightTableName = s"test${r.nextInt(numFiles)}" @@ -101,7 +117,7 @@ object QueryGen { val rightCol = Utils.randomChoice(rightTable.columns, r) val joinTypes = Seq(("INNER", 0.4), ("LEFT", 0.3), ("RIGHT", 0.3)) - val joinType = Utils.randomWeightedChoice(joinTypes) + val joinType = Utils.randomWeightedChoice(joinTypes, r) val leftColProjection = leftTable.columns.map(c => s"l.$c").mkString(", ") val rightColProjection = rightTable.columns.map(c => s"r.$c").mkString(", ") diff --git a/fuzz-testing/src/main/scala/org/apache/comet/fuzz/QueryRunner.scala b/fuzz-testing/src/main/scala/org/apache/comet/fuzz/QueryRunner.scala index 49f9fc3bd..b2ceae9d0 100644 --- a/fuzz-testing/src/main/scala/org/apache/comet/fuzz/QueryRunner.scala +++ b/fuzz-testing/src/main/scala/org/apache/comet/fuzz/QueryRunner.scala @@ -19,7 +19,7 @@ package org.apache.comet.fuzz -import java.io.{BufferedWriter, FileWriter} +import java.io.{BufferedWriter, FileWriter, PrintWriter} import scala.io.Source @@ -109,7 +109,12 @@ object QueryRunner { case e: Exception => // the query worked in Spark but failed in Comet, so this is likely a bug in Comet showSQL(w, sql) - w.write(s"[ERROR] Query failed in Comet: ${e.getMessage}\n") + w.write(s"[ERROR] Query failed in Comet: ${e.getMessage}:\n") + w.write("```\n") + val p = new PrintWriter(w) + e.printStackTrace(p) + p.close() + w.write("```\n") } // flush after every query so that results are saved in the event of the driver crashing @@ -134,6 +139,7 @@ object QueryRunner { private def formatRow(row: Row): String = { row.toSeq .map { + case null => "NULL" case v: Array[Byte] => v.mkString case other => other.toString } diff --git a/fuzz-testing/src/main/scala/org/apache/comet/fuzz/Utils.scala b/fuzz-testing/src/main/scala/org/apache/comet/fuzz/Utils.scala index 19f9695a9..4d51c60e5 100644 --- a/fuzz-testing/src/main/scala/org/apache/comet/fuzz/Utils.scala +++ b/fuzz-testing/src/main/scala/org/apache/comet/fuzz/Utils.scala @@ -27,9 +27,9 @@ object Utils { list(r.nextInt(list.length)) } - def randomWeightedChoice[T](valuesWithWeights: Seq[(T, Double)]): T = { + def randomWeightedChoice[T](valuesWithWeights: Seq[(T, Double)], r: Random): T = { val totalWeight = valuesWithWeights.map(_._2).sum - val randomValue = Random.nextDouble() * totalWeight + val randomValue = r.nextDouble() * totalWeight var cumulativeWeight = 0.0 for ((value, weight) <- valuesWithWeights) { diff --git a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala index 439ec4ebb..8d81b57c4 100644 --- a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala +++ b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala @@ -2169,10 +2169,10 @@ object QueryPlanSerde extends Logging with ShimQueryPlanSerde with CometExprShim val trimCast = Cast(trimStr.get, StringType) val trimExpr = exprToProtoInternal(trimCast, inputs) val optExpr = scalarExprToProto(trimType, srcExpr, trimExpr) - optExprWithInfo(optExpr, expr, null, srcCast, trimCast) + optExprWithInfo(optExpr, expr, srcCast, trimCast) } else { val optExpr = scalarExprToProto(trimType, srcExpr) - optExprWithInfo(optExpr, expr, null, srcCast) + optExprWithInfo(optExpr, expr, srcCast) } }