From b24bee5fd2f8db4f6a7b1e5a2c5d08226ef233d0 Mon Sep 17 00:00:00 2001 From: o_voievodin Date: Tue, 20 Feb 2024 15:51:17 -0800 Subject: [PATCH 1/3] Upgrade DF and arrow-rs --- core/Cargo.lock | 104 ++---------------- .../src/parquet/util/test_common/page_util.rs | 6 +- 2 files changed, 11 insertions(+), 99 deletions(-) diff --git a/core/Cargo.lock b/core/Cargo.lock index 456d96966..faec69611 100644 --- a/core/Cargo.lock +++ b/core/Cargo.lock @@ -1105,7 +1105,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a258e46cdc063eb8519c00b9fc845fc47bcfca4130e2f08e88665ceda8474245" dependencies = [ "libc", - "windows-sys 0.52.0", + "windows-sys", ] [[package]] @@ -1354,7 +1354,7 @@ version = "0.5.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3d1354bf6b7235cb4a0576c2619fd4ed18183f689b12b006a0ee7329eeff9a5" dependencies = [ - "windows-sys 0.52.0", + "windows-sys", ] [[package]] @@ -1454,7 +1454,7 @@ checksum = "0bad00257d07be169d870ab665980b06cdb366d792ad690bf2e76876dc503455" dependencies = [ "hermit-abi", "rustix", - "windows-sys 0.52.0", + "windows-sys", ] [[package]] @@ -1490,32 +1490,18 @@ version = "1.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b1a46d1a171d865aa5f83f92695765caa047a9b4cbae2cbf37dbd613a793fd4c" -[[package]] -name = "java-locator" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90003f2fd9c52f212c21d8520f1128da0080bad6fff16b68fe6e7f2f0c3780c2" -dependencies = [ - "glob", - "lazy_static", -] - [[package]] name = "jni" -version = "0.21.1" +version = "0.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a87aa2bb7d2af34197c04845522473242e1aa17c12f4935d5856491a7fb8c97" +checksum = "c6df18c2e3db7e453d3c6ac5b3e9d5182664d28788126d39b91f2d1e22b017ec" dependencies = [ "cesu8", - "cfg-if", "combine", - "java-locator", "jni-sys", - "libloading", "log", "thiserror", "walkdir", - "windows-sys 0.45.0", ] [[package]] @@ -1618,16 +1604,6 @@ version = "0.2.151" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "302d7ab3130588088d277783b1e2d2e10c9e9e4a16dd9050e6ec93fb3e7048f4" -[[package]] -name = "libloading" -version = "0.7.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b67380fd3b2fbe7527a606e18729d21c6f3951633d0500574c4dc22d2d638b9f" -dependencies = [ - "cfg-if", - "winapi", -] - [[package]] name = "libm" version = "0.2.8" @@ -2371,7 +2347,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys", - "windows-sys 0.52.0", + "windows-sys", ] [[package]] @@ -2673,7 +2649,7 @@ dependencies = [ "fastrand", "redox_syscall", "rustix", - "windows-sys 0.52.0", + "windows-sys", ] [[package]] @@ -3066,15 +3042,6 @@ dependencies = [ "windows-targets 0.52.0", ] -[[package]] -name = "windows-sys" -version = "0.45.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0" -dependencies = [ - "windows-targets 0.42.2", -] - [[package]] name = "windows-sys" version = "0.52.0" @@ -3084,21 +3051,6 @@ dependencies = [ "windows-targets 0.52.0", ] -[[package]] -name = "windows-targets" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071" -dependencies = [ - "windows_aarch64_gnullvm 0.42.2", - "windows_aarch64_msvc 0.42.2", - "windows_i686_gnu 0.42.2", - "windows_i686_msvc 0.42.2", - "windows_x86_64_gnu 0.42.2", - "windows_x86_64_gnullvm 0.42.2", - "windows_x86_64_msvc 0.42.2", -] - [[package]] name = "windows-targets" version = "0.48.5" @@ -3129,12 +3081,6 @@ dependencies = [ "windows_x86_64_msvc 0.52.0", ] -[[package]] -name = "windows_aarch64_gnullvm" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8" - [[package]] name = "windows_aarch64_gnullvm" version = "0.48.5" @@ -3147,12 +3093,6 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cb7764e35d4db8a7921e09562a0304bf2f93e0a51bfccee0bd0bb0b666b015ea" -[[package]] -name = "windows_aarch64_msvc" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43" - [[package]] name = "windows_aarch64_msvc" version = "0.48.5" @@ -3165,12 +3105,6 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbaa0368d4f1d2aaefc55b6fcfee13f41544ddf36801e793edbbfd7d7df075ef" -[[package]] -name = "windows_i686_gnu" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f" - [[package]] name = "windows_i686_gnu" version = "0.48.5" @@ -3183,12 +3117,6 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a28637cb1fa3560a16915793afb20081aba2c92ee8af57b4d5f28e4b3e7df313" -[[package]] -name = "windows_i686_msvc" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060" - [[package]] name = "windows_i686_msvc" version = "0.48.5" @@ -3201,12 +3129,6 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ffe5e8e31046ce6230cc7215707b816e339ff4d4d67c65dffa206fd0f7aa7b9a" -[[package]] -name = "windows_x86_64_gnu" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36" - [[package]] name = "windows_x86_64_gnu" version = "0.48.5" @@ -3219,12 +3141,6 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3d6fa32db2bc4a2f5abeacf2b69f7992cd09dca97498da74a151a3132c26befd" -[[package]] -name = "windows_x86_64_gnullvm" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3" - [[package]] name = "windows_x86_64_gnullvm" version = "0.48.5" @@ -3237,12 +3153,6 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1a657e1e9d3f514745a572a6846d3c7aa7dbe1658c056ed9c3344c4109a6949e" -[[package]] -name = "windows_x86_64_msvc" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" - [[package]] name = "windows_x86_64_msvc" version = "0.48.5" diff --git a/core/src/parquet/util/test_common/page_util.rs b/core/src/parquet/util/test_common/page_util.rs index efd3f38e3..0c3a790ee 100644 --- a/core/src/parquet/util/test_common/page_util.rs +++ b/core/src/parquet/util/test_common/page_util.rs @@ -22,7 +22,7 @@ use rand::distributions::uniform::SampleUniform; use parquet::{ basic::Encoding, column::page::{Page, PageIterator, PageMetadata, PageReader}, - data_type::DataType, + data_type::{AsBytes, DataType}, encodings::{ encoding::{get_encoder, DictEncoder, Encoder}, levels::{max_buffer_size, LevelEncoder}, @@ -31,6 +31,8 @@ use parquet::{ schema::types::{ColumnDescPtr, SchemaDescPtr}, }; +use crate::parquet::util::memory::ByteBufferPtr; + use super::random_numbers_range; use bytes::Bytes; use zstd::zstd_safe::WriteBuf; @@ -290,7 +292,7 @@ pub fn make_pages( let indices = dict_encoder .write_indices() .expect("write_indices() should be OK"); - pb.add_indices(indices); + pb.add_indices(ByteBufferPtr::new(indices.as_bytes().to_vec())); } Encoding::PLAIN => { pb.add_values::(encoding, &values[value_range]); From 17b147a30d49eaf813585db8c42bf9dd5887b835 Mon Sep 17 00:00:00 2001 From: o_voievodin Date: Wed, 21 Feb 2024 08:17:12 -0800 Subject: [PATCH 2/3] fix merge --- core/Cargo.lock | 104 ++++++++++++++++-- .../src/parquet/util/test_common/page_util.rs | 6 +- 2 files changed, 99 insertions(+), 11 deletions(-) diff --git a/core/Cargo.lock b/core/Cargo.lock index faec69611..456d96966 100644 --- a/core/Cargo.lock +++ b/core/Cargo.lock @@ -1105,7 +1105,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a258e46cdc063eb8519c00b9fc845fc47bcfca4130e2f08e88665ceda8474245" dependencies = [ "libc", - "windows-sys", + "windows-sys 0.52.0", ] [[package]] @@ -1354,7 +1354,7 @@ version = "0.5.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3d1354bf6b7235cb4a0576c2619fd4ed18183f689b12b006a0ee7329eeff9a5" dependencies = [ - "windows-sys", + "windows-sys 0.52.0", ] [[package]] @@ -1454,7 +1454,7 @@ checksum = "0bad00257d07be169d870ab665980b06cdb366d792ad690bf2e76876dc503455" dependencies = [ "hermit-abi", "rustix", - "windows-sys", + "windows-sys 0.52.0", ] [[package]] @@ -1490,18 +1490,32 @@ version = "1.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b1a46d1a171d865aa5f83f92695765caa047a9b4cbae2cbf37dbd613a793fd4c" +[[package]] +name = "java-locator" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90003f2fd9c52f212c21d8520f1128da0080bad6fff16b68fe6e7f2f0c3780c2" +dependencies = [ + "glob", + "lazy_static", +] + [[package]] name = "jni" -version = "0.19.0" +version = "0.21.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c6df18c2e3db7e453d3c6ac5b3e9d5182664d28788126d39b91f2d1e22b017ec" +checksum = "1a87aa2bb7d2af34197c04845522473242e1aa17c12f4935d5856491a7fb8c97" dependencies = [ "cesu8", + "cfg-if", "combine", + "java-locator", "jni-sys", + "libloading", "log", "thiserror", "walkdir", + "windows-sys 0.45.0", ] [[package]] @@ -1604,6 +1618,16 @@ version = "0.2.151" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "302d7ab3130588088d277783b1e2d2e10c9e9e4a16dd9050e6ec93fb3e7048f4" +[[package]] +name = "libloading" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b67380fd3b2fbe7527a606e18729d21c6f3951633d0500574c4dc22d2d638b9f" +dependencies = [ + "cfg-if", + "winapi", +] + [[package]] name = "libm" version = "0.2.8" @@ -2347,7 +2371,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys", - "windows-sys", + "windows-sys 0.52.0", ] [[package]] @@ -2649,7 +2673,7 @@ dependencies = [ "fastrand", "redox_syscall", "rustix", - "windows-sys", + "windows-sys 0.52.0", ] [[package]] @@ -3042,6 +3066,15 @@ dependencies = [ "windows-targets 0.52.0", ] +[[package]] +name = "windows-sys" +version = "0.45.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0" +dependencies = [ + "windows-targets 0.42.2", +] + [[package]] name = "windows-sys" version = "0.52.0" @@ -3051,6 +3084,21 @@ dependencies = [ "windows-targets 0.52.0", ] +[[package]] +name = "windows-targets" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071" +dependencies = [ + "windows_aarch64_gnullvm 0.42.2", + "windows_aarch64_msvc 0.42.2", + "windows_i686_gnu 0.42.2", + "windows_i686_msvc 0.42.2", + "windows_x86_64_gnu 0.42.2", + "windows_x86_64_gnullvm 0.42.2", + "windows_x86_64_msvc 0.42.2", +] + [[package]] name = "windows-targets" version = "0.48.5" @@ -3081,6 +3129,12 @@ dependencies = [ "windows_x86_64_msvc 0.52.0", ] +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8" + [[package]] name = "windows_aarch64_gnullvm" version = "0.48.5" @@ -3093,6 +3147,12 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cb7764e35d4db8a7921e09562a0304bf2f93e0a51bfccee0bd0bb0b666b015ea" +[[package]] +name = "windows_aarch64_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43" + [[package]] name = "windows_aarch64_msvc" version = "0.48.5" @@ -3105,6 +3165,12 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbaa0368d4f1d2aaefc55b6fcfee13f41544ddf36801e793edbbfd7d7df075ef" +[[package]] +name = "windows_i686_gnu" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f" + [[package]] name = "windows_i686_gnu" version = "0.48.5" @@ -3117,6 +3183,12 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a28637cb1fa3560a16915793afb20081aba2c92ee8af57b4d5f28e4b3e7df313" +[[package]] +name = "windows_i686_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060" + [[package]] name = "windows_i686_msvc" version = "0.48.5" @@ -3129,6 +3201,12 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ffe5e8e31046ce6230cc7215707b816e339ff4d4d67c65dffa206fd0f7aa7b9a" +[[package]] +name = "windows_x86_64_gnu" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36" + [[package]] name = "windows_x86_64_gnu" version = "0.48.5" @@ -3141,6 +3219,12 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3d6fa32db2bc4a2f5abeacf2b69f7992cd09dca97498da74a151a3132c26befd" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3" + [[package]] name = "windows_x86_64_gnullvm" version = "0.48.5" @@ -3153,6 +3237,12 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1a657e1e9d3f514745a572a6846d3c7aa7dbe1658c056ed9c3344c4109a6949e" +[[package]] +name = "windows_x86_64_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" + [[package]] name = "windows_x86_64_msvc" version = "0.48.5" diff --git a/core/src/parquet/util/test_common/page_util.rs b/core/src/parquet/util/test_common/page_util.rs index 0c3a790ee..efd3f38e3 100644 --- a/core/src/parquet/util/test_common/page_util.rs +++ b/core/src/parquet/util/test_common/page_util.rs @@ -22,7 +22,7 @@ use rand::distributions::uniform::SampleUniform; use parquet::{ basic::Encoding, column::page::{Page, PageIterator, PageMetadata, PageReader}, - data_type::{AsBytes, DataType}, + data_type::DataType, encodings::{ encoding::{get_encoder, DictEncoder, Encoder}, levels::{max_buffer_size, LevelEncoder}, @@ -31,8 +31,6 @@ use parquet::{ schema::types::{ColumnDescPtr, SchemaDescPtr}, }; -use crate::parquet::util::memory::ByteBufferPtr; - use super::random_numbers_range; use bytes::Bytes; use zstd::zstd_safe::WriteBuf; @@ -292,7 +290,7 @@ pub fn make_pages( let indices = dict_encoder .write_indices() .expect("write_indices() should be OK"); - pb.add_indices(ByteBufferPtr::new(indices.as_bytes().to_vec())); + pb.add_indices(indices); } Encoding::PLAIN => { pb.add_values::(encoding, &values[value_range]); From 74ffa245497a151d560b3e8f4288356458708629 Mon Sep 17 00:00:00 2001 From: comphead Date: Tue, 12 Mar 2024 13:01:53 -0700 Subject: [PATCH 3/3] feat: Fix names --- .../main/java/org/apache/comet/parquet/ReadOptions.java | 8 ++++---- .../scala/org/apache/comet/exec/CometAggregateSuite.scala | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/common/src/main/java/org/apache/comet/parquet/ReadOptions.java b/common/src/main/java/org/apache/comet/parquet/ReadOptions.java index a25fc61a7..023f71dc7 100644 --- a/common/src/main/java/org/apache/comet/parquet/ReadOptions.java +++ b/common/src/main/java/org/apache/comet/parquet/ReadOptions.java @@ -53,10 +53,10 @@ public class ReadOptions { // to reduce the skew. This will result in a slightly larger number of connections // opened to the file system but may give improved performance. // The option is off by default. - public static final String BOSON_IO_ADJUST_READRANGE_SKEW = - "boson.parquet.read.io.adjust.readRange.skew"; + public static final String COMET_IO_ADJUST_READRANGE_SKEW = + "comet.parquet.read.io.adjust.readRange.skew"; - private static final boolean BOSON_IO_ADJUST_READRANGE_SKEW_DEFAULT = false; + private static final boolean COMET_IO_ADJUST_READRANGE_SKEW_DEFAULT = false; // Max number of concurrent tasks we expect. Used to autoconfigure S3 client connections public static final int S3A_MAX_EXPECTED_PARALLELISM = 32; @@ -180,7 +180,7 @@ public Builder(Configuration conf) { this.ioMergeRangesDelta = conf.getInt(COMET_IO_MERGE_RANGES_DELTA, COMET_IO_MERGE_RANGES_DELTA_DEFAULT); this.adjustReadRangeSkew = - conf.getBoolean(BOSON_IO_ADJUST_READRANGE_SKEW, BOSON_IO_ADJUST_READRANGE_SKEW_DEFAULT); + conf.getBoolean(COMET_IO_ADJUST_READRANGE_SKEW, COMET_IO_ADJUST_READRANGE_SKEW_DEFAULT); // override some S3 defaults setS3Config(); } diff --git a/spark/src/test/scala/org/apache/comet/exec/CometAggregateSuite.scala b/spark/src/test/scala/org/apache/comet/exec/CometAggregateSuite.scala index 8a68a925e..1dac14d02 100644 --- a/spark/src/test/scala/org/apache/comet/exec/CometAggregateSuite.scala +++ b/spark/src/test/scala/org/apache/comet/exec/CometAggregateSuite.scala @@ -923,9 +923,9 @@ class CometAggregateSuite extends CometTestBase with AdaptiveSparkPlanHelper { test("test bool_and/bool_or") { withSQLConf(CometConf.COMET_EXEC_SHUFFLE_ENABLED.key -> "true") { - Seq(true, false).foreach { bosonColumnShuffleEnabled => + Seq(true, false).foreach { cometColumnShuffleEnabled => withSQLConf( - CometConf.COMET_COLUMNAR_SHUFFLE_ENABLED.key -> bosonColumnShuffleEnabled.toString) { + CometConf.COMET_COLUMNAR_SHUFFLE_ENABLED.key -> cometColumnShuffleEnabled.toString) { Seq(true, false).foreach { dictionary => withSQLConf("parquet.enable.dictionary" -> dictionary.toString) { val table = "test"