From 59f535cf8c2d3d0110ce08e3e36a559f154411df Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Mon, 8 Apr 2024 11:15:16 -0700 Subject: [PATCH] build: Use specified branch of arrow-rs with workaround to invalid offset buffers from Java Arrow (#239) * feat: Use specified branch of arrow-rs with workaround to invalid offset buffers from Java Arrow * Use FunctionRegistry * Fix * Update * Restore config * Restore plan stability --- core/Cargo.lock | 699 +++++++++--------- core/Cargo.toml | 19 +- .../execution/datafusion/expressions/avg.rs | 2 +- .../datafusion/expressions/avg_decimal.rs | 2 +- .../expressions/bloom_filter_might_contain.rs | 2 +- .../datafusion/expressions/scalar_funcs.rs | 163 +++- .../datafusion/expressions/subquery.rs | 2 +- .../datafusion/expressions/temporal.rs | 8 +- .../execution/datafusion/operators/expand.rs | 26 +- core/src/execution/datafusion/planner.rs | 149 ++-- .../execution/datafusion/shuffle_writer.rs | 26 +- core/src/execution/jni_api.rs | 3 +- core/src/execution/operators/copy.rs | 26 +- core/src/execution/operators/scan.rs | 84 ++- core/src/parquet/util/jni.rs | 1 + 15 files changed, 688 insertions(+), 524 deletions(-) diff --git a/core/Cargo.lock b/core/Cargo.lock index 456d96966..e209e4a8d 100644 --- a/core/Cargo.lock +++ b/core/Cargo.lock @@ -19,9 +19,9 @@ checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" [[package]] name = "ahash" -version = "0.8.7" +version = "0.8.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77c3a9648d43b9cd48db467b3f87fdd6e146bcc88ab0180006cef2179fe11d01" +checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" dependencies = [ "cfg-if", "const-random", @@ -33,9 +33,9 @@ dependencies = [ [[package]] name = "aho-corasick" -version = "1.1.2" +version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2969dcb958b36655471fc61f7e416fa76033bdd4bfed0678d8fee1e2d07a1f0" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" dependencies = [ "memchr", ] @@ -84,21 +84,21 @@ checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" [[package]] name = "anstyle" -version = "1.0.4" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7079075b41f533b8c61d2a4d073c4676e1f8b249ff94a393b0595db304e0dd87" +checksum = "8901269c6307e8d93993578286ac0edf7f195079ffff5ebdeea6a59ffb7e36bc" [[package]] name = "anyhow" -version = "1.0.79" +version = "1.0.81" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "080e9890a082662b09c1ad45f567faeeb47f22b5fb23895fbe1e651e718e25ca" +checksum = "0952808a6c2afd1aa8947271f3a60f1a6763c7b912d210184c5149b5cf147247" [[package]] name = "arc-swap" -version = "1.6.0" +version = "1.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bddcadddf5e9015d310179a59bb28c4d4b9920ad0f11e8e14dbadf654890c9a6" +checksum = "69f7f8c3906b62b754cd5326047894316021dcfe5a194c8ea52bdd94934a3457" [[package]] name = "arrayref" @@ -114,9 +114,8 @@ checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" [[package]] name = "arrow" -version = "50.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aa285343fba4d829d49985bdc541e3789cf6000ed0e84be7c039438df4a4e78c" +version = "51.0.0" +source = "git+https://github.com/viirya/arrow-rs.git?rev=3f1ae0c#3f1ae0c836b0769c88220d2180ef008b7a59158c" dependencies = [ "arrow-arith", "arrow-array", @@ -135,9 +134,8 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "50.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "753abd0a5290c1bcade7c6623a556f7d1659c5f4148b140b5b63ce7bd1a45705" +version = "51.0.0" +source = "git+https://github.com/viirya/arrow-rs.git?rev=3f1ae0c#3f1ae0c836b0769c88220d2180ef008b7a59158c" dependencies = [ "arrow-array", "arrow-buffer", @@ -150,9 +148,8 @@ dependencies = [ [[package]] name = "arrow-array" -version = "50.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d390feeb7f21b78ec997a4081a025baef1e2e0d6069e181939b61864c9779609" +version = "51.0.0" +source = "git+https://github.com/viirya/arrow-rs.git?rev=3f1ae0c#3f1ae0c836b0769c88220d2180ef008b7a59158c" dependencies = [ "ahash", "arrow-buffer", @@ -161,15 +158,14 @@ dependencies = [ "chrono", "chrono-tz", "half 2.1.0", - "hashbrown 0.14.3", + "hashbrown", "num", ] [[package]] name = "arrow-buffer" -version = "50.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69615b061701bcdffbc62756bc7e85c827d5290b472b580c972ebbbf690f5aa4" +version = "51.0.0" +source = "git+https://github.com/viirya/arrow-rs.git?rev=3f1ae0c#3f1ae0c836b0769c88220d2180ef008b7a59158c" dependencies = [ "bytes", "half 2.1.0", @@ -178,28 +174,28 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "50.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e448e5dd2f4113bf5b74a1f26531708f5edcacc77335b7066f9398f4bcf4cdef" +version = "51.0.0" +source = "git+https://github.com/viirya/arrow-rs.git?rev=3f1ae0c#3f1ae0c836b0769c88220d2180ef008b7a59158c" dependencies = [ "arrow-array", "arrow-buffer", "arrow-data", "arrow-schema", "arrow-select", + "atoi", "base64", "chrono", "comfy-table", "half 2.1.0", "lexical-core", "num", + "ryu", ] [[package]] name = "arrow-csv" -version = "50.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46af72211f0712612f5b18325530b9ad1bfbdc87290d5fbfd32a7da128983781" +version = "51.0.0" +source = "git+https://github.com/viirya/arrow-rs.git?rev=3f1ae0c#3f1ae0c836b0769c88220d2180ef008b7a59158c" dependencies = [ "arrow-array", "arrow-buffer", @@ -216,9 +212,8 @@ dependencies = [ [[package]] name = "arrow-data" -version = "50.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67d644b91a162f3ad3135ce1184d0a31c28b816a581e08f29e8e9277a574c64e" +version = "51.0.0" +source = "git+https://github.com/viirya/arrow-rs.git?rev=3f1ae0c#3f1ae0c836b0769c88220d2180ef008b7a59158c" dependencies = [ "arrow-buffer", "arrow-schema", @@ -228,9 +223,8 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "50.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03dea5e79b48de6c2e04f03f62b0afea7105be7b77d134f6c5414868feefb80d" +version = "51.0.0" +source = "git+https://github.com/viirya/arrow-rs.git?rev=3f1ae0c#3f1ae0c836b0769c88220d2180ef008b7a59158c" dependencies = [ "arrow-array", "arrow-buffer", @@ -243,9 +237,8 @@ dependencies = [ [[package]] name = "arrow-json" -version = "50.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8950719280397a47d37ac01492e3506a8a724b3fb81001900b866637a829ee0f" +version = "51.0.0" +source = "git+https://github.com/viirya/arrow-rs.git?rev=3f1ae0c#3f1ae0c836b0769c88220d2180ef008b7a59158c" dependencies = [ "arrow-array", "arrow-buffer", @@ -254,7 +247,7 @@ dependencies = [ "arrow-schema", "chrono", "half 2.1.0", - "indexmap 2.1.0", + "indexmap", "lexical-core", "num", "serde", @@ -263,9 +256,8 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "50.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ed9630979034077982d8e74a942b7ac228f33dd93a93b615b4d02ad60c260be" +version = "51.0.0" +source = "git+https://github.com/viirya/arrow-rs.git?rev=3f1ae0c#3f1ae0c836b0769c88220d2180ef008b7a59158c" dependencies = [ "arrow-array", "arrow-buffer", @@ -278,9 +270,8 @@ dependencies = [ [[package]] name = "arrow-row" -version = "50.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "007035e17ae09c4e8993e4cb8b5b96edf0afb927cd38e2dff27189b274d83dcf" +version = "51.0.0" +source = "git+https://github.com/viirya/arrow-rs.git?rev=3f1ae0c#3f1ae0c836b0769c88220d2180ef008b7a59158c" dependencies = [ "ahash", "arrow-array", @@ -288,23 +279,21 @@ dependencies = [ "arrow-data", "arrow-schema", "half 2.1.0", - "hashbrown 0.14.3", + "hashbrown", ] [[package]] name = "arrow-schema" -version = "50.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ff3e9c01f7cd169379d269f926892d0e622a704960350d09d331be3ec9e0029" +version = "51.0.0" +source = "git+https://github.com/viirya/arrow-rs.git?rev=3f1ae0c#3f1ae0c836b0769c88220d2180ef008b7a59158c" dependencies = [ - "bitflags 2.4.1", + "bitflags 2.5.0", ] [[package]] name = "arrow-select" -version = "50.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ce20973c1912de6514348e064829e50947e35977bb9d7fb637dc99ea9ffd78c" +version = "51.0.0" +source = "git+https://github.com/viirya/arrow-rs.git?rev=3f1ae0c#3f1ae0c836b0769c88220d2180ef008b7a59158c" dependencies = [ "ahash", "arrow-array", @@ -316,15 +305,15 @@ dependencies = [ [[package]] name = "arrow-string" -version = "50.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00f3b37f2aeece31a2636d1b037dabb69ef590e03bdc7eb68519b51ec86932a7" +version = "51.0.0" +source = "git+https://github.com/viirya/arrow-rs.git?rev=3f1ae0c#3f1ae0c836b0769c88220d2180ef008b7a59158c" dependencies = [ "arrow-array", "arrow-buffer", "arrow-data", "arrow-schema", "arrow-select", + "memchr", "num", "regex", "regex-syntax", @@ -338,26 +327,35 @@ checksum = "0c24e9d990669fbd16806bff449e4ac644fd9b1fca014760087732fe4102f131" [[package]] name = "async-trait" -version = "0.1.77" +version = "0.1.79" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c980ee35e870bd1a4d2c8294d4c04d0499e67bca1e4b5cefcc693c2fa00caea9" +checksum = "a507401cad91ec6a857ed5513a2073c82a9b9048762b885bb98655b306964681" dependencies = [ "proc-macro2", "quote", - "syn 2.0.48", + "syn 2.0.57", +] + +[[package]] +name = "atoi" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f28d99ec8bfea296261ca1af174f24225171fea9664ba9003cbebee704810528" +dependencies = [ + "num-traits", ] [[package]] name = "autocfg" -version = "1.1.0" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" +checksum = "f1fdabc7756949593fe60f30ec81974b613357de856987752631dea1e3394c80" [[package]] name = "backtrace" -version = "0.3.69" +version = "0.3.71" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2089b7e3f35b9dd2d0ed921ead4f6d318c27680d4a5bd167b3ee120edb105837" +checksum = "26b05800d2e817c8b3b4b54abd461726265fa9789ae34330622f2db9ee696f9d" dependencies = [ "addr2line", "cc", @@ -370,9 +368,9 @@ dependencies = [ [[package]] name = "base64" -version = "0.21.5" +version = "0.22.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35636a1494ede3b646cc98f74f8e62c773a38a659ebc777a2cf26b9b74171df9" +checksum = "9475866fec1451be56a3c2400fd081ff546538961565ccb5b7142cbd22bc7a51" [[package]] name = "bitflags" @@ -382,9 +380,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.4.1" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "327762f6e5a765692301e5bb513e0d9fef63be86bbc14528052b1cd3e6f03e07" +checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1" [[package]] name = "blake2" @@ -397,9 +395,9 @@ dependencies = [ [[package]] name = "blake3" -version = "1.5.0" +version = "1.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0231f06152bf547e9c2b5194f247cd97aacf6dcd8b15d8e5ec0663f64580da87" +checksum = "30cca6d3674597c30ddf2c587bf8d9d65c9a84d2326d941cc79c9842dfe0ef52" dependencies = [ "arrayref", "arrayvec", @@ -419,9 +417,9 @@ dependencies = [ [[package]] name = "brotli" -version = "3.4.0" +version = "3.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "516074a47ef4bce09577a3b379392300159ce5b1ba2e501ff1c819950066100f" +checksum = "d640d25bc63c50fb1f0b545ffd80207d2e10a4c965530809b40ba3386825c391" dependencies = [ "alloc-no-stdlib", "alloc-stdlib", @@ -440,15 +438,15 @@ dependencies = [ [[package]] name = "bumpalo" -version = "3.14.0" +version = "3.15.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f30e7476521f6f8af1a1c4c0b8cc94f0bee37d91763d0ca2665f299b6cd8aec" +checksum = "7ff69b9dd49fd426c69a0db9fc04dd934cdb6645ff000864d98f7e2af8830eaa" [[package]] name = "bytemuck" -version = "1.14.0" +version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "374d28ec25809ee0e23827c2ab573d729e293f281dfe393500e7ad618baa61c6" +checksum = "5d6d68c57235a3a081186990eca2867354726650f42f7516ca50c28d6281fd15" [[package]] name = "byteorder" @@ -458,9 +456,9 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] name = "bytes" -version = "1.5.0" +version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2bd12c1caf447e69cd4528f47f94d203fd2582878ecb9e9465484c4148a8223" +checksum = "514de17de45fdb8dc022b1a7975556c53c86f9f0aa5f534b98977b171857c2c9" [[package]] name = "cast" @@ -470,9 +468,9 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" [[package]] name = "cc" -version = "1.0.83" +version = "1.0.90" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0" +checksum = "8cd6604a82acf3039f1144f54b8eb34e91ffba622051189e71b781822d5ee1f5" dependencies = [ "jobserver", "libc", @@ -492,23 +490,21 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "chrono" -version = "0.4.34" +version = "0.4.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5bc015644b92d5890fab7489e49d21f879d5c990186827d42ec511919404f38b" +checksum = "8a0d04d43504c61aa6c7531f1871dd0d418d91130162063b789da00fd7057a5e" dependencies = [ "android-tzdata", "iana-time-zone", - "js-sys", "num-traits", - "wasm-bindgen", - "windows-targets 0.52.0", + "windows-targets 0.52.4", ] [[package]] name = "chrono-tz" -version = "0.8.5" +version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91d7b79e99bfaa0d47da0687c43aa3b7381938a62ad3a6498599039321f660b7" +checksum = "d59ae0466b83e838b81a54256c39d5d7c20b9d7daa10510a242d9b75abd5936e" dependencies = [ "chrono", "chrono-tz-build", @@ -539,9 +535,9 @@ dependencies = [ [[package]] name = "ciborium-io" -version = "0.2.1" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cdf919175532b369853f5d5e20b26b43112613fd6fe7aee757e35f7a44642656" +checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757" [[package]] name = "ciborium-ll" @@ -550,23 +546,23 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "defaa24ecc093c77630e6c15e17c51f5e187bf35ee514f4e2d67baaa96dae22b" dependencies = [ "ciborium-io", - "half 1.8.2", + "half 1.8.3", ] [[package]] name = "clap" -version = "4.4.13" +version = "4.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52bdc885e4cacc7f7c9eedc1ef6da641603180c783c41a15c264944deeaab642" +checksum = "90bc066a67923782aa8515dbaea16946c5bcc5addbd668bb80af688e53e548a0" dependencies = [ "clap_builder", ] [[package]] name = "clap_builder" -version = "4.4.12" +version = "4.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb7fb5e4e979aec3be7791562fcba452f94ad85e954da024396433e0e25a79e9" +checksum = "ae129e2e766ae0ec03484e609954119f123cc1fe650337e155d03b022f24f7b4" dependencies = [ "anstyle", "clap_lex", @@ -574,9 +570,9 @@ dependencies = [ [[package]] name = "clap_lex" -version = "0.6.0" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "702fc72eb24e5a1e48ce58027a675bc24edd52096d5397d4aea7c6dd9eca0bd1" +checksum = "98cc8fbded0c607b7ba9dd60cd98df59af97e84d24e49c8557331cfc26d301ce" [[package]] name = "combine" @@ -609,11 +605,12 @@ dependencies = [ "criterion", "datafusion", "datafusion-common", + "datafusion-functions", "datafusion-physical-expr", "flate2", "futures", "half 2.1.0", - "hashbrown 0.14.3", + "hashbrown", "itertools 0.11.0", "jni", "lazy_static", @@ -657,9 +654,9 @@ dependencies = [ [[package]] name = "const-random" -version = "0.1.17" +version = "0.1.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5aaf16c9c2c612020bcfd042e170f6e32de9b9d75adb5277cdbbd2e2c8c8299a" +checksum = "87e00182fe74b066627d63b85fd550ac2998d4b0bd86bfed477a0ae4c7c71359" dependencies = [ "const-random-macro", ] @@ -707,9 +704,9 @@ dependencies = [ [[package]] name = "crc32fast" -version = "1.3.2" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d" +checksum = "b3855a8a784b474f333699ef2bbca9db2c4a1f6d9088a90a2d25b1eb53111eaa" dependencies = [ "cfg-if", ] @@ -752,34 +749,28 @@ dependencies = [ [[package]] name = "crossbeam-deque" -version = "0.8.4" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fca89a0e215bab21874660c67903c5f143333cab1da83d041c7ded6053774751" +checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d" dependencies = [ - "cfg-if", "crossbeam-epoch", "crossbeam-utils", ] [[package]] name = "crossbeam-epoch" -version = "0.9.17" +version = "0.9.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e3681d554572a651dda4186cd47240627c3d0114d45a95f6ad27f2f22e7548d" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" dependencies = [ - "autocfg", - "cfg-if", "crossbeam-utils", ] [[package]] name = "crossbeam-utils" -version = "0.8.18" +version = "0.8.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3a430a770ebd84726f584a90ee7f020d28db52c6d02138900f22341f866d39c" -dependencies = [ - "cfg-if", -] +checksum = "248e3bacc7dc6baa3b21e405ee045c3047101a49145e7e9eca583ab4c2ca5345" [[package]] name = "crunchy" @@ -825,7 +816,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "978747c1d849a7d2ee5e8adc0159961c48fb7e5db2f06af6723b80123bb53856" dependencies = [ "cfg-if", - "hashbrown 0.14.3", + "hashbrown", "lock_api", "once_cell", "parking_lot_core", @@ -834,8 +825,7 @@ dependencies = [ [[package]] name = "datafusion" version = "36.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2b360b692bf6c6d6e6b6dbaf41a3be0020daeceac0f406aed54c75331e50dbb" +source = "git+https://github.com/viirya/arrow-datafusion.git?rev=111a940#111a940b297aa83839e4e2273f0e1a38e108b370" dependencies = [ "ahash", "arrow", @@ -847,6 +837,7 @@ dependencies = [ "chrono", "dashmap", "datafusion-common", + "datafusion-common-runtime", "datafusion-execution", "datafusion-expr", "datafusion-functions", @@ -857,9 +848,9 @@ dependencies = [ "futures", "glob", "half 2.1.0", - "hashbrown 0.14.3", - "indexmap 2.1.0", - "itertools 0.12.0", + "hashbrown", + "indexmap", + "itertools 0.12.1", "log", "num_cpus", "object_store", @@ -876,8 +867,7 @@ dependencies = [ [[package]] name = "datafusion-common" version = "36.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37f343ccc298f440e25aa38ff82678291a7acc24061c7370ba6c0ff5cc811412" +source = "git+https://github.com/viirya/arrow-datafusion.git?rev=111a940#111a940b297aa83839e4e2273f0e1a38e108b370" dependencies = [ "ahash", "arrow", @@ -886,17 +876,25 @@ dependencies = [ "arrow-schema", "chrono", "half 2.1.0", + "instant", "libc", "num_cpus", "object_store", "sqlparser", ] +[[package]] +name = "datafusion-common-runtime" +version = "36.0.0" +source = "git+https://github.com/viirya/arrow-datafusion.git?rev=111a940#111a940b297aa83839e4e2273f0e1a38e108b370" +dependencies = [ + "tokio", +] + [[package]] name = "datafusion-execution" version = "36.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f9c93043081487e335399a21ebf8295626367a647ac5cb87d41d18afad7d0f7" +source = "git+https://github.com/viirya/arrow-datafusion.git?rev=111a940#111a940b297aa83839e4e2273f0e1a38e108b370" dependencies = [ "arrow", "chrono", @@ -904,7 +902,7 @@ dependencies = [ "datafusion-common", "datafusion-expr", "futures", - "hashbrown 0.14.3", + "hashbrown", "log", "object_store", "parking_lot", @@ -916,39 +914,47 @@ dependencies = [ [[package]] name = "datafusion-expr" version = "36.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e204d89909e678846b6a95f156aafc1ee5b36cb6c9e37ec2e1449b078a38c818" +source = "git+https://github.com/viirya/arrow-datafusion.git?rev=111a940#111a940b297aa83839e4e2273f0e1a38e108b370" dependencies = [ "ahash", "arrow", "arrow-array", + "chrono", "datafusion-common", "paste", "sqlparser", - "strum 0.26.1", - "strum_macros 0.26.1", + "strum 0.26.2", + "strum_macros 0.26.2", ] [[package]] name = "datafusion-functions" version = "36.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "98f1c73f7801b2b8ba2297b3ad78ffcf6c1fc6b8171f502987eb9ad5cb244ee7" +source = "git+https://github.com/viirya/arrow-datafusion.git?rev=111a940#111a940b297aa83839e4e2273f0e1a38e108b370" dependencies = [ "arrow", "base64", + "blake2", + "blake3", + "chrono", "datafusion-common", "datafusion-execution", "datafusion-expr", + "datafusion-physical-expr", "hex", + "itertools 0.12.1", "log", + "md-5", + "regex", + "sha2", + "unicode-segmentation", + "uuid", ] [[package]] name = "datafusion-optimizer" version = "36.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ae27e07bf1f04d327be5c2a293470879801ab5535204dc3b16b062fda195496" +source = "git+https://github.com/viirya/arrow-datafusion.git?rev=111a940#111a940b297aa83839e4e2273f0e1a38e108b370" dependencies = [ "arrow", "async-trait", @@ -956,8 +962,8 @@ dependencies = [ "datafusion-common", "datafusion-expr", "datafusion-physical-expr", - "hashbrown 0.14.3", - "itertools 0.12.0", + "hashbrown", + "itertools 0.12.1", "log", "regex-syntax", ] @@ -965,8 +971,7 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" version = "36.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dde620cd9ef76a3bca9c754fb68854bd2349c49f55baf97e08001f9e967f6d6b" +source = "git+https://github.com/viirya/arrow-datafusion.git?rev=111a940#111a940b297aa83839e4e2273f0e1a38e108b370" dependencies = [ "ahash", "arrow", @@ -983,10 +988,10 @@ dependencies = [ "datafusion-execution", "datafusion-expr", "half 2.1.0", - "hashbrown 0.14.3", + "hashbrown", "hex", - "indexmap 2.1.0", - "itertools 0.12.0", + "indexmap", + "itertools 0.12.1", "log", "md-5", "paste", @@ -995,14 +1000,12 @@ dependencies = [ "regex", "sha2", "unicode-segmentation", - "uuid", ] [[package]] name = "datafusion-physical-plan" version = "36.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a4c75fba9ea99d64b2246cbd2fcae2e6fc973e6616b1015237a616036506dd4" +source = "git+https://github.com/viirya/arrow-datafusion.git?rev=111a940#111a940b297aa83839e4e2273f0e1a38e108b370" dependencies = [ "ahash", "arrow", @@ -1012,35 +1015,36 @@ dependencies = [ "async-trait", "chrono", "datafusion-common", + "datafusion-common-runtime", "datafusion-execution", "datafusion-expr", "datafusion-physical-expr", "futures", "half 2.1.0", - "hashbrown 0.14.3", - "indexmap 2.1.0", - "itertools 0.12.0", + "hashbrown", + "indexmap", + "itertools 0.12.1", "log", "once_cell", "parking_lot", "pin-project-lite", "rand", "tokio", - "uuid", ] [[package]] name = "datafusion-sql" version = "36.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21474a95c3a62d113599d21b439fa15091b538bac06bd20be0bb2e7d22903c09" +source = "git+https://github.com/viirya/arrow-datafusion.git?rev=111a940#111a940b297aa83839e4e2273f0e1a38e108b370" dependencies = [ "arrow", + "arrow-array", "arrow-schema", "datafusion-common", "datafusion-expr", "log", "sqlparser", + "strum 0.26.2", ] [[package]] @@ -1088,9 +1092,9 @@ checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" [[package]] name = "either" -version = "1.9.0" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07" +checksum = "11157ac094ffbdde99aa67b23417ebdd801842852b500e395a45a9c0aac03e4a" [[package]] name = "equivalent" @@ -1110,9 +1114,9 @@ dependencies = [ [[package]] name = "fastrand" -version = "2.0.1" +version = "2.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25cbce373ec4653f1a01a31e8a5e5ec0c622dc27ff9c4e6606eefef5cbbed4a5" +checksum = "658bd65b1cf4c852a3cc96f18a8ce7b5640f6b703f905c7d74532294c2a63984" [[package]] name = "findshlibs" @@ -1223,7 +1227,7 @@ checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" dependencies = [ "proc-macro2", "quote", - "syn 2.0.48", + "syn 2.0.57", ] [[package]] @@ -1268,9 +1272,9 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.2.11" +version = "0.2.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fe9006bed769170c11f845cf00c7c1e9092aeb3f268e007c3e760ac68008070f" +checksum = "190092ea657667030ac6a35e305e62fc4dd69fd98ac98631e5d3a2b1575a12b5" dependencies = [ "cfg-if", "libc", @@ -1291,9 +1295,9 @@ checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" [[package]] name = "half" -version = "1.8.2" +version = "1.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7" +checksum = "1b43ede17f21864e81be2fa654110bf1e793774238d86ef8555c37e6519c0403" [[package]] name = "half" @@ -1305,12 +1309,6 @@ dependencies = [ "num-traits", ] -[[package]] -name = "hashbrown" -version = "0.12.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" - [[package]] name = "hashbrown" version = "0.14.3" @@ -1338,9 +1336,9 @@ checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" [[package]] name = "hermit-abi" -version = "0.3.3" +version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d77f7ec81a6d05a3abb01ab6eb7590f6083d08449fe5a1c8b1e620283546ccb7" +checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" [[package]] name = "hex" @@ -1365,9 +1363,9 @@ checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" [[package]] name = "iana-time-zone" -version = "0.1.59" +version = "0.1.60" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6a67363e2aa4443928ce15e57ebae94fd8949958fd1223c4cfc0cd473ad7539" +checksum = "e7ffbb5a1b541ea2561f8c41c087286cc091e21e556a4f09a8f6cbf17b69b141" dependencies = [ "android_system_properties", "core-foundation-sys", @@ -1398,22 +1396,12 @@ dependencies = [ [[package]] name = "indexmap" -version = "1.9.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" -dependencies = [ - "autocfg", - "hashbrown 0.12.3", -] - -[[package]] -name = "indexmap" -version = "2.1.0" +version = "2.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d530e1a18b1cb4c484e6e34556a0d948706958449fca0cab753d649f2bce3d1f" +checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26" dependencies = [ "equivalent", - "hashbrown 0.14.3", + "hashbrown", ] [[package]] @@ -1423,7 +1411,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "321f0f839cd44a4686e9504b0a62b4d69a50b62072144c71c68f5873c167b8d9" dependencies = [ "ahash", - "indexmap 2.1.0", + "indexmap", "is-terminal", "itoa", "log", @@ -1434,6 +1422,18 @@ dependencies = [ "str_stack", ] +[[package]] +name = "instant" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c" +dependencies = [ + "cfg-if", + "js-sys", + "wasm-bindgen", + "web-sys", +] + [[package]] name = "integer-encoding" version = "1.1.7" @@ -1448,12 +1448,12 @@ checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02" [[package]] name = "is-terminal" -version = "0.4.10" +version = "0.4.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0bad00257d07be169d870ab665980b06cdb366d792ad690bf2e76876dc503455" +checksum = "f23ff5ef2b80d608d61efee834934d862cd92461afc0560dedf493e4c033738b" dependencies = [ "hermit-abi", - "rustix", + "libc", "windows-sys 0.52.0", ] @@ -1477,18 +1477,18 @@ dependencies = [ [[package]] name = "itertools" -version = "0.12.0" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25db6b064527c5d482d0423354fcd07a89a2dfe07b67892e62411946db7f07b0" +checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" dependencies = [ "either", ] [[package]] name = "itoa" -version = "1.0.10" +version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1a46d1a171d865aa5f83f92695765caa047a9b4cbae2cbf37dbd613a793fd4c" +checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" [[package]] name = "java-locator" @@ -1526,18 +1526,18 @@ checksum = "8eaf4bc02d17cbdd7ff4c7438cafcdf7fb9a4613313ad11b4f8fefe7d3fa0130" [[package]] name = "jobserver" -version = "0.1.27" +version = "0.1.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c37f63953c4c63420ed5fd3d6d398c719489b9f872b9fa683262f8edd363c7d" +checksum = "ab46a6e9526ddef3ae7f787c06f0f2600639ba80ea3eade3d8e670a2230f51d6" dependencies = [ "libc", ] [[package]] name = "js-sys" -version = "0.3.66" +version = "0.3.69" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cee9c64da59eae3b50095c18d3e74f8b73c0b86d2792824ff01bbce68ba229ca" +checksum = "29c15563dc2726973df627357ce0c9ddddbea194836909d655df6a75d2cf296d" dependencies = [ "wasm-bindgen", ] @@ -1614,9 +1614,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.151" +version = "0.2.153" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "302d7ab3130588088d277783b1e2d2e10c9e9e4a16dd9050e6ec93fb3e7048f4" +checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd" [[package]] name = "libloading" @@ -1644,17 +1644,11 @@ dependencies = [ "libc", ] -[[package]] -name = "linked-hash-map" -version = "0.5.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f" - [[package]] name = "linux-raw-sys" -version = "0.4.12" +version = "0.4.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4cd1a83af159aa67994778be9070f0ae1bd732942279cabb14f86f986a21456" +checksum = "01cda141df6706de531b6c46c3a33ecca755538219bd484262fa09410c13539c" [[package]] name = "lock_api" @@ -1668,9 +1662,9 @@ dependencies = [ [[package]] name = "log" -version = "0.4.20" +version = "0.4.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" +checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" dependencies = [ "serde", ] @@ -1683,9 +1677,9 @@ checksum = "a94d21414c1f4a51209ad204c1776a3d0765002c76c6abcb602a6f09f1e881c7" [[package]] name = "log4rs" -version = "1.2.0" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d36ca1786d9e79b8193a68d480a0907b612f109537115c6ff655a3a1967533fd" +checksum = "0816135ae15bd0391cf284eab37e6e3ee0a6ee63d2ceeb659862bd8d0a984ca6" dependencies = [ "anyhow", "arc-swap", @@ -1696,7 +1690,9 @@ dependencies = [ "libc", "log", "log-mdc", + "once_cell", "parking_lot", + "rand", "serde", "serde-value", "serde_json", @@ -1729,9 +1725,9 @@ dependencies = [ [[package]] name = "lz4_flex" -version = "0.11.2" +version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "912b45c753ff5f7f5208307e8ace7d2a2e30d024e26d3509f3dce546c044ce15" +checksum = "75761162ae2b0e580d7e7c390558127e5f01b4194debd6221fd8c207fc80e3f5" dependencies = [ "twox-hash", ] @@ -1748,15 +1744,15 @@ dependencies = [ [[package]] name = "memchr" -version = "2.7.1" +version = "2.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149" +checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d" [[package]] name = "memmap2" -version = "0.9.3" +version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "45fd3a57831bf88bc63f8cebc0cf956116276e97fef3966103e96416209f7c92" +checksum = "fe751422e4a8caa417e13c3ea66452215d7d63e19e604f4980461212f3ae1322" dependencies = [ "libc", ] @@ -1772,9 +1768,9 @@ dependencies = [ [[package]] name = "miniz_oxide" -version = "0.7.1" +version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7810e0be55b428ada41041c41f32c9f1a42817901b4ccf45fa3d4b6561e74c7" +checksum = "9d811f3e15f28568be3407c8e7fdb6514c1cda3cb30683f15b6a1a1dc4ea14a7" dependencies = [ "adler", ] @@ -1823,9 +1819,9 @@ dependencies = [ [[package]] name = "num-complex" -version = "0.4.4" +version = "0.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ba157ca0885411de85d6ca030ba7e2a83a28636056c7c699b07c8b6f7383214" +checksum = "23c6602fda94a57c990fe0df199a035d83576b496aa29f4e634a8ac6004e68a6" dependencies = [ "num-traits", ] @@ -1842,19 +1838,18 @@ dependencies = [ [[package]] name = "num-integer" -version = "0.1.45" +version = "0.1.46" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" dependencies = [ - "autocfg", "num-traits", ] [[package]] name = "num-iter" -version = "0.1.43" +version = "0.1.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d03e6c028c5dc5cac6e2dec0efda81fc887605bb3d884578bb6d6bf7514e252" +checksum = "d869c01cc0c455284163fd0092f1f93835385ccab5a98a0dcc497b2f8bf055a9" dependencies = [ "autocfg", "num-integer", @@ -1875,9 +1870,9 @@ dependencies = [ [[package]] name = "num-traits" -version = "0.2.17" +version = "0.2.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "39e3200413f237f41ab11ad6d161bc7239c84dcb631773ccd7de3dfe4b5c267c" +checksum = "da0df0e5185db44f69b44f26786fe401b6c293d1907744beaa7fa62b2e5a517a" dependencies = [ "autocfg", "libm", @@ -1904,16 +1899,16 @@ dependencies = [ [[package]] name = "object_store" -version = "0.9.0" +version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d139f545f64630e2e3688fd9f81c470888ab01edeb72d13b4e86c566f1130000" +checksum = "b8718f8b65fdf67a45108d1548347d4af7d71fb81ce727bbf9e3b2535e079db3" dependencies = [ "async-trait", "bytes", "chrono", "futures", "humantime", - "itertools 0.12.0", + "itertools 0.12.1", "parking_lot", "percent-encoding", "snafu", @@ -1978,15 +1973,14 @@ dependencies = [ [[package]] name = "parquet" -version = "50.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "547b92ebf0c1177e3892f44c8f79757ee62e678d564a9834189725f2c5b7a750" +version = "51.0.0" +source = "git+https://github.com/viirya/arrow-rs.git?rev=3f1ae0c#3f1ae0c836b0769c88220d2180ef008b7a59158c" dependencies = [ "ahash", "bytes", "chrono", "half 2.1.0", - "hashbrown 0.14.3", + "hashbrown", "num", "num-bigint", "paste", @@ -2032,7 +2026,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e1d3afd2628e69da2be385eb6f2fd57c8ac7977ceeff6dc166ff1657b0e386a9" dependencies = [ "fixedbitset", - "indexmap 2.1.0", + "indexmap", ] [[package]] @@ -2075,9 +2069,9 @@ dependencies = [ [[package]] name = "pin-project-lite" -version = "0.2.13" +version = "0.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8afb450f006bf6385ca15ef45d71d2288452bc3683ce2e2cacc0d18e4be60b58" +checksum = "bda66fc9667c18cb2758a2ac84d1167245054bcf85d5d1aaa6923f45801bdd02" [[package]] name = "pin-utils" @@ -2087,9 +2081,9 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" [[package]] name = "pkg-config" -version = "0.3.28" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69d3587f8a9e599cc7ec2c00e331f71c4e69a5f9a4b8a6efd5b07466b9736f9a" +checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec" [[package]] name = "plotters" @@ -2148,9 +2142,9 @@ checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" [[package]] name = "proc-macro2" -version = "1.0.75" +version = "1.0.79" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "907a61bd0f64c2f29cd1cf1dc34d05176426a3f504a78010f08416ddb7b13708" +checksum = "e835ff2298f5721608eb1a980ecaee1aef2c132bf95ecc026a11b7bf3c01c02e" dependencies = [ "unicode-ident", ] @@ -2218,7 +2212,7 @@ dependencies = [ "itertools 0.11.0", "proc-macro2", "quote", - "syn 2.0.48", + "syn 2.0.57", ] [[package]] @@ -2281,9 +2275,9 @@ dependencies = [ [[package]] name = "rayon" -version = "1.8.0" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c27db03db7734835b3f53954b534c91069375ce6ccaa2e065441e07d9b6cdb1" +checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" dependencies = [ "either", "rayon-core", @@ -2291,9 +2285,9 @@ dependencies = [ [[package]] name = "rayon-core" -version = "1.12.0" +version = "1.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ce3fb6ad83f861aac485e76e1985cd109d9a3713802152be56c3b1f0e0658ed" +checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" dependencies = [ "crossbeam-deque", "crossbeam-utils", @@ -2310,9 +2304,9 @@ dependencies = [ [[package]] name = "regex" -version = "1.10.2" +version = "1.10.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "380b951a9c5e80ddfd6136919eef32310721aa4aacd4889a8d39124b026ab343" +checksum = "c117dbdfde9c8308975b6a18d71f3f385c89461f7b3fb054288ecf2a2058ba4c" dependencies = [ "aho-corasick", "memchr", @@ -2322,9 +2316,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.3" +version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f804c7828047e88b2d32e2d7fe5a105da8ee3264f01902f796c8e067dc2483f" +checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea" dependencies = [ "aho-corasick", "memchr", @@ -2333,9 +2327,9 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.8.2" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" +checksum = "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56" [[package]] name = "rgb" @@ -2363,11 +2357,11 @@ dependencies = [ [[package]] name = "rustix" -version = "0.38.28" +version = "0.38.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72e572a5e8ca657d7366229cdde4bd14c4eb5499a9573d4d366fe1b599daa316" +checksum = "65e04861e65f21776e67888bfbea442b3642beaa0138fdb1dd7a84a52dffdb89" dependencies = [ - "bitflags 2.4.1", + "bitflags 2.5.0", "errno", "libc", "linux-raw-sys", @@ -2382,9 +2376,9 @@ checksum = "7ffc183a10b4478d04cbbbfc96d0873219d962dd5accaff2ffbd4ceb7df837f4" [[package]] name = "ryu" -version = "1.0.16" +version = "1.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f98d2aa92eebf49b69786be48e4477826b256916e84a57ff2a4f21923b48eb4c" +checksum = "e86697c916019a8588c99b5fac3cead74ec0b4b819707a682fd4d23fa0ce1ba1" [[package]] name = "same-file" @@ -2403,9 +2397,9 @@ checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" [[package]] name = "semver" -version = "1.0.21" +version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b97ed7a9823b74f99c7742f5336af7be5ecd3eeafcb1507d1fa93347b1d589b0" +checksum = "92d43fe69e652f3df9bdc2b85b2854a0825b86e4fb76bc44d945137d053639ca" [[package]] name = "seq-macro" @@ -2415,9 +2409,9 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4" [[package]] name = "serde" -version = "1.0.194" +version = "1.0.197" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b114498256798c94a0689e1a15fec6005dee8ac1f41de56404b67afc2a4b773" +checksum = "3fb1c873e1b9b056a4dc4c0c198b24c3ffa059243875552b2bd0933b1aee4ce2" dependencies = [ "serde_derive", ] @@ -2434,20 +2428,20 @@ dependencies = [ [[package]] name = "serde_derive" -version = "1.0.194" +version = "1.0.197" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3385e45322e8f9931410f01b3031ec534c3947d0e94c18049af4d9f9907d4e0" +checksum = "7eb0b34b42edc17f6b7cac84a52a1c5f0e1bb2227e997ca9011ea3dd34e8610b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.48", + "syn 2.0.57", ] [[package]] name = "serde_json" -version = "1.0.111" +version = "1.0.115" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "176e46fa42316f18edd598015a5166857fc835ec732f5215eac6b7bdbf0a84f4" +checksum = "12dc5c46daa8e9fdf4f5e71b6cf9a53f2487da0e86e55808e2d35539666497dd" dependencies = [ "itoa", "ryu", @@ -2456,14 +2450,15 @@ dependencies = [ [[package]] name = "serde_yaml" -version = "0.8.26" +version = "0.9.34+deprecated" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "578a7433b776b56a35785ed5ce9a7e777ac0598aac5a6dd1b4b18a307c7fc71b" +checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47" dependencies = [ - "indexmap 1.9.3", + "indexmap", + "itoa", "ryu", "serde", - "yaml-rust", + "unsafe-libyaml", ] [[package]] @@ -2500,9 +2495,9 @@ dependencies = [ [[package]] name = "smallvec" -version = "1.11.2" +version = "1.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4dccd0940a2dcdf68d092b8cbab7dc0ad8fa938bf95787e1b916b0e3d0e8e970" +checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" [[package]] name = "snafu" @@ -2534,9 +2529,9 @@ checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b" [[package]] name = "sqlparser" -version = "0.43.1" +version = "0.44.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f95c4bae5aba7cd30bd506f7140026ade63cff5afd778af8854026f9606bf5d4" +checksum = "aaf9c7ff146298ffda83a200f8d5084f08dcee1edfc135fcc1d646a45d50ffd6" dependencies = [ "log", "sqlparser_derive", @@ -2550,7 +2545,7 @@ checksum = "01b2e185515564f15375f593fb966b5718bc624ba77fe49fa4616ad619690554" dependencies = [ "proc-macro2", "quote", - "syn 2.0.48", + "syn 2.0.57", ] [[package]] @@ -2579,11 +2574,11 @@ checksum = "290d54ea6f91c969195bdbcd7442c8c2a2ba87da8bf60a7ee86a235d4bc1e125" [[package]] name = "strum" -version = "0.26.1" +version = "0.26.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "723b93e8addf9aa965ebe2d11da6d7540fa2283fcea14b3371ff055f7ba13f5f" +checksum = "5d8cec3501a5194c432b2b7976db6b7d10ec95c253208b45f83f7136aa985e29" dependencies = [ - "strum_macros 0.26.1", + "strum_macros 0.26.2", ] [[package]] @@ -2596,20 +2591,20 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.48", + "syn 2.0.57", ] [[package]] name = "strum_macros" -version = "0.26.1" +version = "0.26.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a3417fc93d76740d974a01654a09777cb500428cc874ca9f45edfe0c4d4cd18" +checksum = "c6cf59daf282c0a494ba14fd21610a0325f9f90ec9d1231dea26bcb1d696c946" dependencies = [ "heck 0.4.1", "proc-macro2", "quote", "rustversion", - "syn 2.0.48", + "syn 2.0.57", ] [[package]] @@ -2654,9 +2649,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.48" +version = "2.0.57" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f3531638e407dfc0814761abb7c00a5b54992b849452a0646b7f65c9f770f3f" +checksum = "11a6ae1e52eb25aab8f3fb9fca13be982a373b8f1157ca14b897a825ba4a2d35" dependencies = [ "proc-macro2", "quote", @@ -2665,35 +2660,34 @@ dependencies = [ [[package]] name = "tempfile" -version = "3.9.0" +version = "3.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01ce4141aa927a6d1bd34a041795abd0db1cccba5d5f24b009f694bdf3a1f3fa" +checksum = "85b77fafb263dd9d05cbeac119526425676db3784113aa9295c88498cbf8bff1" dependencies = [ "cfg-if", "fastrand", - "redox_syscall", "rustix", "windows-sys 0.52.0", ] [[package]] name = "thiserror" -version = "1.0.56" +version = "1.0.58" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d54378c645627613241d077a3a79db965db602882668f9136ac42af9ecb730ad" +checksum = "03468839009160513471e86a034bb2c5c0e4baae3b43f79ffc55c4a5427b3297" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.56" +version = "1.0.58" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa0faa943b50f3db30a20aa7e265dbc66076993efed8463e8de414e5d06d3471" +checksum = "c61f3ba182994efc43764a46c018c347bc492c79f024e705f46567b418f6d4f7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.48", + "syn 2.0.57", ] [[package]] @@ -2777,9 +2771,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.36.0" +version = "1.37.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61285f6515fa018fb2d1e46eb21223fff441ee8db5d0f1435e8ab4f5cdb80931" +checksum = "1adbebffeca75fcfd058afa480fb6c0b81e165a0323f9c9d39c9697e37c46787" dependencies = [ "backtrace", "bytes", @@ -2796,14 +2790,14 @@ checksum = "5b8a1e28f2deaa14e508979454cb3a223b10b938b45af148bc0986de36f1923b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.48", + "syn 2.0.57", ] [[package]] name = "tokio-stream" -version = "0.1.14" +version = "0.1.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "397c988d37662c7dda6d2208364a706264bf3d6138b11d436cbac0ad38832842" +checksum = "267ac89e0bec6e691e5813911606935d77c476ff49024f98abcea3e7b15e37af" dependencies = [ "futures-core", "pin-project-lite", @@ -2829,7 +2823,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.48", + "syn 2.0.57", ] [[package]] @@ -2868,9 +2862,9 @@ checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" [[package]] name = "unicode-bidi" -version = "0.3.14" +version = "0.3.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f2528f27a9eb2b21e69c95319b30bd0efd85d09c379741b0f78ea1d86be2416" +checksum = "08f95100a766bf4f8f28f90d77e0a5461bbdb219042e7679bebe79004fed8d75" [[package]] name = "unicode-ident" @@ -2880,18 +2874,18 @@ checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" [[package]] name = "unicode-normalization" -version = "0.1.22" +version = "0.1.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c5713f0fc4b5db668a2ac63cdb7bb4469d8c9fed047b1d0292cc7b0ce2ba921" +checksum = "a56d1686db2308d901306f92a263857ef59ea39678a5458e7cb17f01415101f5" dependencies = [ "tinyvec", ] [[package]] name = "unicode-segmentation" -version = "1.10.1" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1dd624098567895118886609431a7c3b8f516e41d30e0643f03d94592a147e36" +checksum = "d4c87d22b6e3f4a18d4d40ef354e97c90fcb14dd91d7dc0aa9d8a1172ebf7202" [[package]] name = "unicode-width" @@ -2908,6 +2902,12 @@ dependencies = [ "destructure_traitobject", ] +[[package]] +name = "unsafe-libyaml" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "673aac59facbab8a9007c7f6108d11f63b603f7cabff99fabf650fea5c32b861" + [[package]] name = "url" version = "2.5.0" @@ -2921,9 +2921,9 @@ dependencies = [ [[package]] name = "uuid" -version = "1.6.1" +version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e395fcf16a7a3d8127ec99782007af141946b4795001f876d54fb0d55978560" +checksum = "a183cf7feeba97b4dd1c0d46788634f6221d87fa961b305bed08c851829efcc0" dependencies = [ "getrandom", ] @@ -2936,9 +2936,9 @@ checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" [[package]] name = "walkdir" -version = "2.4.0" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d71d857dc86794ca4c280d616f7da00d2dbfd8cd788846559a6813e6aa4b54ee" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" dependencies = [ "same-file", "winapi-util", @@ -2952,9 +2952,9 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "wasm-bindgen" -version = "0.2.89" +version = "0.2.92" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ed0d4f68a3015cc185aff4db9506a015f4b96f95303897bfa23f846db54064e" +checksum = "4be2531df63900aeb2bca0daaaddec08491ee64ceecbee5076636a3b026795a8" dependencies = [ "cfg-if", "wasm-bindgen-macro", @@ -2962,24 +2962,24 @@ dependencies = [ [[package]] name = "wasm-bindgen-backend" -version = "0.2.89" +version = "0.2.92" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b56f625e64f3a1084ded111c4d5f477df9f8c92df113852fa5a374dbda78826" +checksum = "614d787b966d3989fa7bb98a654e369c762374fd3213d212cfc0251257e747da" dependencies = [ "bumpalo", "log", "once_cell", "proc-macro2", "quote", - "syn 2.0.48", + "syn 2.0.57", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-macro" -version = "0.2.89" +version = "0.2.92" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0162dbf37223cd2afce98f3d0785506dcb8d266223983e4b5b525859e6e182b2" +checksum = "a1f8823de937b71b9460c0c34e25f3da88250760bec0ebac694b49997550d726" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -2987,28 +2987,28 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.89" +version = "0.2.92" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0eb82fcb7930ae6219a7ecfd55b217f5f0893484b7a13022ebb2b2bf20b5283" +checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.48", + "syn 2.0.57", "wasm-bindgen-backend", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.89" +version = "0.2.92" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ab9b36309365056cd639da3134bf87fa8f3d86008abf99e612384a6eecd459f" +checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96" [[package]] name = "web-sys" -version = "0.3.66" +version = "0.3.69" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50c24a44ec86bb68fbecd1b3efed7e85ea5621b39b35ef2766b66cd984f8010f" +checksum = "77afa9a11836342370f4817622a2f0f418b134426d91a82dfb48f532d2ec13ef" dependencies = [ "js-sys", "wasm-bindgen", @@ -3063,7 +3063,7 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" dependencies = [ - "windows-targets 0.52.0", + "windows-targets 0.52.4", ] [[package]] @@ -3081,7 +3081,7 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" dependencies = [ - "windows-targets 0.52.0", + "windows-targets 0.52.4", ] [[package]] @@ -3116,17 +3116,17 @@ dependencies = [ [[package]] name = "windows-targets" -version = "0.52.0" +version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a18201040b24831fbb9e4eb208f8892e1f50a37feb53cc7ff887feb8f50e7cd" +checksum = "7dd37b7e5ab9018759f893a1952c9420d060016fc19a472b4bb20d1bdd694d1b" dependencies = [ - "windows_aarch64_gnullvm 0.52.0", - "windows_aarch64_msvc 0.52.0", - "windows_i686_gnu 0.52.0", - "windows_i686_msvc 0.52.0", - "windows_x86_64_gnu 0.52.0", - "windows_x86_64_gnullvm 0.52.0", - "windows_x86_64_msvc 0.52.0", + "windows_aarch64_gnullvm 0.52.4", + "windows_aarch64_msvc 0.52.4", + "windows_i686_gnu 0.52.4", + "windows_i686_msvc 0.52.4", + "windows_x86_64_gnu 0.52.4", + "windows_x86_64_gnullvm 0.52.4", + "windows_x86_64_msvc 0.52.4", ] [[package]] @@ -3143,9 +3143,9 @@ checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" [[package]] name = "windows_aarch64_gnullvm" -version = "0.52.0" +version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb7764e35d4db8a7921e09562a0304bf2f93e0a51bfccee0bd0bb0b666b015ea" +checksum = "bcf46cf4c365c6f2d1cc93ce535f2c8b244591df96ceee75d8e83deb70a9cac9" [[package]] name = "windows_aarch64_msvc" @@ -3161,9 +3161,9 @@ checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" [[package]] name = "windows_aarch64_msvc" -version = "0.52.0" +version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbaa0368d4f1d2aaefc55b6fcfee13f41544ddf36801e793edbbfd7d7df075ef" +checksum = "da9f259dd3bcf6990b55bffd094c4f7235817ba4ceebde8e6d11cd0c5633b675" [[package]] name = "windows_i686_gnu" @@ -3179,9 +3179,9 @@ checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" [[package]] name = "windows_i686_gnu" -version = "0.52.0" +version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a28637cb1fa3560a16915793afb20081aba2c92ee8af57b4d5f28e4b3e7df313" +checksum = "b474d8268f99e0995f25b9f095bc7434632601028cf86590aea5c8a5cb7801d3" [[package]] name = "windows_i686_msvc" @@ -3197,9 +3197,9 @@ checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" [[package]] name = "windows_i686_msvc" -version = "0.52.0" +version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ffe5e8e31046ce6230cc7215707b816e339ff4d4d67c65dffa206fd0f7aa7b9a" +checksum = "1515e9a29e5bed743cb4415a9ecf5dfca648ce85ee42e15873c3cd8610ff8e02" [[package]] name = "windows_x86_64_gnu" @@ -3215,9 +3215,9 @@ checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" [[package]] name = "windows_x86_64_gnu" -version = "0.52.0" +version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d6fa32db2bc4a2f5abeacf2b69f7992cd09dca97498da74a151a3132c26befd" +checksum = "5eee091590e89cc02ad514ffe3ead9eb6b660aedca2183455434b93546371a03" [[package]] name = "windows_x86_64_gnullvm" @@ -3233,9 +3233,9 @@ checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" [[package]] name = "windows_x86_64_gnullvm" -version = "0.52.0" +version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a657e1e9d3f514745a572a6846d3c7aa7dbe1658c056ed9c3344c4109a6949e" +checksum = "77ca79f2451b49fa9e2af39f0747fe999fcda4f5e241b2898624dca97a1f2177" [[package]] name = "windows_x86_64_msvc" @@ -3251,18 +3251,9 @@ checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" [[package]] name = "windows_x86_64_msvc" -version = "0.52.0" +version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dff9641d1cd4be8d1a070daf9e3773c5f67e78b4d9d42263020c057706765c04" - -[[package]] -name = "yaml-rust" -version = "0.4.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56c1936c4cc7a1c9ab21a1ebb602eb942ba868cbd44a99cb7cdc5892335e1c85" -dependencies = [ - "linked-hash-map", -] +checksum = "32b752e52a2da0ddfbdbcc6fceadfeede4c939ed16d13e648833a61dfb611ed8" [[package]] name = "zerocopy" @@ -3281,7 +3272,7 @@ checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.48", + "syn 2.0.57", ] [[package]] @@ -3305,9 +3296,9 @@ dependencies = [ [[package]] name = "zstd-sys" -version = "2.0.9+zstd.1.5.5" +version = "2.0.10+zstd.1.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e16efa8a874a0481a574084d34cc26fdb3b99627480f785888deb6386506656" +checksum = "c253a4914af5bafc8fa8c86ee400827e83cf6ec01195ec1f1ed8441bf00d65aa" dependencies = [ "cc", "pkg-config", diff --git a/core/Cargo.toml b/core/Cargo.toml index 4dc5afe6f..880d18d19 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -29,12 +29,12 @@ include = [ [dependencies] parquet-format = "4.0.0" # This must be kept in sync with that from parquet crate -arrow = { version = "~50.0.0", features = ["prettyprint", "ffi", "chrono-tz"] } -arrow-array = { version = "~50.0.0" } -arrow-data = { version = "~50.0.0" } -arrow-schema = { version = "~50.0.0" } -arrow-string = { version = "~50.0.0" } -parquet = { version = "~50.0.0", default-features = false, features = ["experimental"] } +arrow = { git = "https://github.com/viirya/arrow-rs.git", rev = "3f1ae0c", features = ["prettyprint", "ffi", "chrono-tz"] } +arrow-array = { git = "https://github.com/viirya/arrow-rs.git", rev = "3f1ae0c" } +arrow-data = { git = "https://github.com/viirya/arrow-rs.git", rev = "3f1ae0c" } +arrow-schema = { git = "https://github.com/viirya/arrow-rs.git", rev = "3f1ae0c" } +arrow-string = { git = "https://github.com/viirya/arrow-rs.git", rev = "3f1ae0c" } +parquet = { git = "https://github.com/viirya/arrow-rs.git", rev = "3f1ae0c", default-features = false, features = ["experimental"] } half = { version = "~2.1", default-features = false } futures = "0.3.28" mimalloc = { version = "*", default-features = false, optional = true } @@ -66,9 +66,10 @@ itertools = "0.11.0" chrono = { version = "0.4", default-features = false, features = ["clock"] } chrono-tz = { version = "0.8" } paste = "1.0.14" -datafusion-common = { version = "36.0.0" } -datafusion = { default-features = false, version = "36.0.0", features = ["unicode_expressions"] } -datafusion-physical-expr = { version = "36.0.0", default-features = false , features = ["unicode_expressions"] } +datafusion-common = { git = "https://github.com/viirya/arrow-datafusion.git", rev = "111a940" } +datafusion = { default-features = false, git = "https://github.com/viirya/arrow-datafusion.git", rev = "111a940", features = ["unicode_expressions"] } +datafusion-functions = { git = "https://github.com/viirya/arrow-datafusion.git", rev = "111a940" } +datafusion-physical-expr = { git = "https://github.com/viirya/arrow-datafusion.git", rev = "111a940", default-features = false, features = ["unicode_expressions"] } unicode-segmentation = "^1.10.1" once_cell = "1.18.0" regex = "1.9.6" diff --git a/core/src/execution/datafusion/expressions/avg.rs b/core/src/execution/datafusion/expressions/avg.rs index 1e04ab0e9..e35ff6120 100644 --- a/core/src/execution/datafusion/expressions/avg.rs +++ b/core/src/execution/datafusion/expressions/avg.rs @@ -27,7 +27,7 @@ use arrow_schema::{DataType, Field}; use datafusion::logical_expr::{ type_coercion::aggregates::avg_return_type, Accumulator, EmitTo, GroupsAccumulator, }; -use datafusion_common::{not_impl_err, DataFusionError, Result, ScalarValue}; +use datafusion_common::{not_impl_err, Result, ScalarValue}; use datafusion_physical_expr::{expressions::format_state_name, AggregateExpr, PhysicalExpr}; use std::{any::Any, sync::Arc}; diff --git a/core/src/execution/datafusion/expressions/avg_decimal.rs b/core/src/execution/datafusion/expressions/avg_decimal.rs index d99ed041c..870e6d1a6 100644 --- a/core/src/execution/datafusion/expressions/avg_decimal.rs +++ b/core/src/execution/datafusion/expressions/avg_decimal.rs @@ -25,7 +25,7 @@ use arrow_array::{ }; use arrow_schema::{DataType, Field}; use datafusion::logical_expr::{Accumulator, EmitTo, GroupsAccumulator}; -use datafusion_common::{not_impl_err, DataFusionError, Result, ScalarValue}; +use datafusion_common::{not_impl_err, Result, ScalarValue}; use datafusion_physical_expr::{expressions::format_state_name, AggregateExpr, PhysicalExpr}; use std::{any::Any, sync::Arc}; diff --git a/core/src/execution/datafusion/expressions/bloom_filter_might_contain.rs b/core/src/execution/datafusion/expressions/bloom_filter_might_contain.rs index dd90cd8e9..6a4d07b89 100644 --- a/core/src/execution/datafusion/expressions/bloom_filter_might_contain.rs +++ b/core/src/execution/datafusion/expressions/bloom_filter_might_contain.rs @@ -22,7 +22,7 @@ use arrow::record_batch::RecordBatch; use arrow_array::cast::as_primitive_array; use arrow_schema::{DataType, Schema}; use datafusion::physical_plan::ColumnarValue; -use datafusion_common::{internal_err, DataFusionError, Result, ScalarValue}; +use datafusion_common::{internal_err, Result, ScalarValue}; use datafusion_physical_expr::{aggregate::utils::down_cast_any_ref, PhysicalExpr}; use std::{ any::Any, diff --git a/core/src/execution/datafusion/expressions/scalar_funcs.rs b/core/src/execution/datafusion/expressions/scalar_funcs.rs index 4fca7237b..e6f8de16b 100644 --- a/core/src/execution/datafusion/expressions/scalar_funcs.rs +++ b/core/src/execution/datafusion/expressions/scalar_funcs.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -use std::{cmp::min, str::FromStr, sync::Arc}; +use std::{any::Any, cmp::min, fmt::Debug, str::FromStr, sync::Arc}; use arrow::{ array::{ @@ -27,16 +27,18 @@ use arrow::{ use arrow_array::{Array, ArrowNativeTypeOp, Decimal128Array}; use arrow_schema::DataType; use datafusion::{ - logical_expr::{BuiltinScalarFunction, ScalarFunctionImplementation}, + execution::FunctionRegistry, + logical_expr::{ + BuiltinScalarFunction, ScalarFunctionDefinition, ScalarFunctionImplementation, + ScalarUDFImpl, Signature, Volatility, + }, physical_plan::ColumnarValue, }; use datafusion_common::{ cast::as_generic_string_array, exec_err, internal_err, DataFusionError, Result as DataFusionResult, ScalarValue, }; -use datafusion_physical_expr::{ - execution_props::ExecutionProps, functions::create_physical_fun, math_expressions, -}; +use datafusion_physical_expr::{math_expressions, udf::ScalarUDF}; use num::{ integer::{div_ceil, div_floor}, BigInt, Signed, ToPrimitive, @@ -46,20 +48,94 @@ use unicode_segmentation::UnicodeSegmentation; /// Create a physical scalar function. pub fn create_comet_physical_fun( fun_name: &str, - execution_props: &ExecutionProps, data_type: DataType, -) -> Result { + registry: &dyn FunctionRegistry, +) -> Result { match fun_name { - "ceil" => Ok(Arc::new(move |x| spark_ceil(x, &data_type))), - "floor" => Ok(Arc::new(move |x| spark_floor(x, &data_type))), - "rpad" => Ok(Arc::new(spark_rpad)), - "round" => Ok(Arc::new(move |x| spark_round(x, &data_type))), - "unscaled_value" => Ok(Arc::new(spark_unscaled_value)), - "make_decimal" => Ok(Arc::new(move |x| spark_make_decimal(x, &data_type))), - "decimal_div" => Ok(Arc::new(move |x| spark_decimal_div(x, &data_type))), + "ceil" => { + let scalar_func = CometScalarFunction::new( + "ceil".to_string(), + Signature::variadic_any(Volatility::Immutable), + data_type.clone(), + Arc::new(move |args| spark_ceil(args, &data_type)), + ); + Ok(ScalarFunctionDefinition::UDF(Arc::new( + ScalarUDF::new_from_impl(scalar_func), + ))) + } + "floor" => { + let scalar_func = CometScalarFunction::new( + "floor".to_string(), + Signature::variadic_any(Volatility::Immutable), + data_type.clone(), + Arc::new(move |args| spark_floor(args, &data_type)), + ); + Ok(ScalarFunctionDefinition::UDF(Arc::new( + ScalarUDF::new_from_impl(scalar_func), + ))) + } + "rpad" => { + let scalar_func = CometScalarFunction::new( + "rpad".to_string(), + Signature::variadic_any(Volatility::Immutable), + data_type.clone(), + Arc::new(spark_rpad), + ); + Ok(ScalarFunctionDefinition::UDF(Arc::new( + ScalarUDF::new_from_impl(scalar_func), + ))) + } + "round" => { + let scalar_func = CometScalarFunction::new( + "round".to_string(), + Signature::variadic_any(Volatility::Immutable), + data_type.clone(), + Arc::new(move |args| spark_round(args, &data_type)), + ); + Ok(ScalarFunctionDefinition::UDF(Arc::new( + ScalarUDF::new_from_impl(scalar_func), + ))) + } + "unscaled_value" => { + let scalar_func = CometScalarFunction::new( + "unscaled_value".to_string(), + Signature::variadic_any(Volatility::Immutable), + data_type.clone(), + Arc::new(spark_unscaled_value), + ); + Ok(ScalarFunctionDefinition::UDF(Arc::new( + ScalarUDF::new_from_impl(scalar_func), + ))) + } + "make_decimal" => { + let scalar_func = CometScalarFunction::new( + "make_decimal".to_string(), + Signature::variadic_any(Volatility::Immutable), + data_type.clone(), + Arc::new(move |args| spark_make_decimal(args, &data_type)), + ); + Ok(ScalarFunctionDefinition::UDF(Arc::new( + ScalarUDF::new_from_impl(scalar_func), + ))) + } + "decimal_div" => { + let scalar_func = CometScalarFunction::new( + "decimal_div".to_string(), + Signature::variadic_any(Volatility::Immutable), + data_type.clone(), + Arc::new(move |args| spark_decimal_div(args, &data_type)), + ); + Ok(ScalarFunctionDefinition::UDF(Arc::new( + ScalarUDF::new_from_impl(scalar_func), + ))) + } _ => { - let fun = &BuiltinScalarFunction::from_str(fun_name)?; - create_physical_fun(fun, execution_props) + let fun = BuiltinScalarFunction::from_str(fun_name); + if fun.is_err() { + Ok(ScalarFunctionDefinition::UDF(registry.udf(fun_name)?)) + } else { + Ok(ScalarFunctionDefinition::BuiltIn(fun?)) + } } } } @@ -89,6 +165,61 @@ macro_rules! downcast_compute_op { }}; } +struct CometScalarFunction { + name: String, + signature: Signature, + data_type: DataType, + func: ScalarFunctionImplementation, +} + +impl Debug for CometScalarFunction { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("CometScalarFunction") + .field("name", &self.name) + .field("signature", &self.signature) + .field("data_type", &self.data_type) + .finish() + } +} + +impl CometScalarFunction { + fn new( + name: String, + signature: Signature, + data_type: DataType, + func: ScalarFunctionImplementation, + ) -> Self { + Self { + name, + signature, + data_type, + func, + } + } +} + +impl ScalarUDFImpl for CometScalarFunction { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + self.name.as_str() + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, _: &[DataType]) -> DataFusionResult { + Ok(self.data_type.clone()) + } + + fn invoke(&self, args: &[ColumnarValue]) -> DataFusionResult { + (self.func)(args) + } +} + /// `ceil` function that simulates Spark `ceil` expression pub fn spark_ceil( args: &[ColumnarValue], diff --git a/core/src/execution/datafusion/expressions/subquery.rs b/core/src/execution/datafusion/expressions/subquery.rs index 7cae12963..bf37cb895 100644 --- a/core/src/execution/datafusion/expressions/subquery.rs +++ b/core/src/execution/datafusion/expressions/subquery.rs @@ -18,7 +18,7 @@ use arrow_array::RecordBatch; use arrow_schema::{DataType, Schema, TimeUnit}; use datafusion::logical_expr::ColumnarValue; -use datafusion_common::{internal_err, DataFusionError, ScalarValue}; +use datafusion_common::{internal_err, ScalarValue}; use datafusion_physical_expr::PhysicalExpr; use jni::{ objects::JByteArray, diff --git a/core/src/execution/datafusion/expressions/temporal.rs b/core/src/execution/datafusion/expressions/temporal.rs index 5bdb533d0..4ae3c2605 100644 --- a/core/src/execution/datafusion/expressions/temporal.rs +++ b/core/src/execution/datafusion/expressions/temporal.rs @@ -23,7 +23,7 @@ use std::{ }; use arrow::{ - compute::{hour_dyn, minute_dyn, second_dyn}, + compute::{date_part, DatePart}, record_batch::RecordBatch, }; use arrow_schema::{DataType, Schema, TimeUnit::Microsecond}; @@ -101,7 +101,7 @@ impl PhysicalExpr for HourExec { Some(self.timezone.clone().into()), )), ); - let result = hour_dyn(&array)?; + let result = date_part(&array, DatePart::Hour)?; Ok(ColumnarValue::Array(result)) } @@ -195,7 +195,7 @@ impl PhysicalExpr for MinuteExec { Some(self.timezone.clone().into()), )), ); - let result = minute_dyn(&array)?; + let result = date_part(&array, DatePart::Minute)?; Ok(ColumnarValue::Array(result)) } @@ -289,7 +289,7 @@ impl PhysicalExpr for SecondExec { Some(self.timezone.clone().into()), )), ); - let result = second_dyn(&array)?; + let result = date_part(&array, DatePart::Second)?; Ok(ColumnarValue::Array(result)) } diff --git a/core/src/execution/datafusion/operators/expand.rs b/core/src/execution/datafusion/operators/expand.rs index e3f681b77..5cf444b3b 100644 --- a/core/src/execution/datafusion/operators/expand.rs +++ b/core/src/execution/datafusion/operators/expand.rs @@ -20,12 +20,12 @@ use arrow_schema::SchemaRef; use datafusion::{ execution::TaskContext, physical_plan::{ - DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, RecordBatchStream, - SendableRecordBatchStream, + DisplayAs, DisplayFormatType, ExecutionMode, ExecutionPlan, Partitioning, PlanProperties, + RecordBatchStream, SendableRecordBatchStream, }, }; use datafusion_common::DataFusionError; -use datafusion_physical_expr::{PhysicalExpr, PhysicalSortExpr}; +use datafusion_physical_expr::{EquivalenceProperties, PhysicalExpr}; use futures::{Stream, StreamExt}; use std::{ any::Any, @@ -41,6 +41,7 @@ pub struct CometExpandExec { projections: Vec>>, child: Arc, schema: SchemaRef, + cache: PlanProperties, } impl CometExpandExec { @@ -50,10 +51,17 @@ impl CometExpandExec { child: Arc, schema: SchemaRef, ) -> Self { + let cache = PlanProperties::new( + EquivalenceProperties::new(schema.clone()), + Partitioning::UnknownPartitioning(1), + ExecutionMode::Bounded, + ); + Self { projections, child, schema, + cache, } } } @@ -88,14 +96,6 @@ impl ExecutionPlan for CometExpandExec { self.schema.clone() } - fn output_partitioning(&self) -> Partitioning { - Partitioning::UnknownPartitioning(1) - } - - fn output_ordering(&self) -> Option<&[PhysicalSortExpr]> { - None - } - fn children(&self) -> Vec> { vec![self.child.clone()] } @@ -122,6 +122,10 @@ impl ExecutionPlan for CometExpandExec { ExpandStream::new(self.projections.clone(), child_stream, self.schema.clone()); Ok(Box::pin(expand_stream)) } + + fn properties(&self) -> &PlanProperties { + &self.cache + } } pub struct ExpandStream { diff --git a/core/src/execution/datafusion/planner.rs b/core/src/execution/datafusion/planner.rs index c8869c5f3..ab83872c3 100644 --- a/core/src/execution/datafusion/planner.rs +++ b/core/src/execution/datafusion/planner.rs @@ -23,7 +23,11 @@ use arrow_schema::{DataType, Field, Schema, TimeUnit}; use datafusion::{ arrow::{compute::SortOptions, datatypes::SchemaRef}, common::DataFusionError, - logical_expr::{BuiltinScalarFunction, Operator as DataFusionOperator}, + execution::FunctionRegistry, + functions::math, + logical_expr::{ + BuiltinScalarFunction, Operator as DataFusionOperator, ScalarFunctionDefinition, + }, physical_expr::{ execution_props::ExecutionProps, expressions::{ @@ -31,7 +35,6 @@ use datafusion::{ FirstValue, InListExpr, IsNotNullExpr, IsNullExpr, LastValue, Literal as DataFusionLiteral, Max, Min, NegativeExpr, NotExpr, Sum, UnKnownColumn, }, - functions::create_physical_expr, AggregateExpr, PhysicalExpr, PhysicalSortExpr, ScalarFunctionExpr, }, physical_plan::{ @@ -43,9 +46,10 @@ use datafusion::{ sorts::sort::SortExec, ExecutionPlan, Partitioning, }, + prelude::SessionContext, }; use datafusion_common::{ - tree_node::{TreeNode, TreeNodeRewriter, VisitRecursion}, + tree_node::{Transformed, TransformedResult, TreeNode, TreeNodeRecursion, TreeNodeRewriter}, JoinType as DFJoinType, ScalarValue, }; use itertools::Itertools; @@ -107,20 +111,28 @@ pub struct PhysicalPlanner { // The execution context id of this planner. exec_context_id: i64, execution_props: ExecutionProps, + session_ctx: Arc, } impl Default for PhysicalPlanner { fn default() -> Self { - Self::new() + let session_ctx = Arc::new(SessionContext::new()); + let execution_props = ExecutionProps::new(); + Self { + exec_context_id: TEST_EXEC_CONTEXT_ID, + execution_props, + session_ctx, + } } } impl PhysicalPlanner { - pub fn new() -> Self { + pub fn new(session_ctx: Arc) -> Self { let execution_props = ExecutionProps::new(); Self { exec_context_id: TEST_EXEC_CONTEXT_ID, execution_props, + session_ctx, } } @@ -128,6 +140,7 @@ impl PhysicalPlanner { Self { exec_context_id, execution_props: self.execution_props, + session_ctx: self.session_ctx.clone(), } } @@ -464,14 +477,13 @@ impl PhysicalPlanner { } ExprStruct::Abs(expr) => { let child = self.create_expr(expr.child.as_ref().unwrap(), input_schema.clone())?; + let return_type = child.data_type(&input_schema)?; let args = vec![child]; - let expr = create_physical_expr( - &BuiltinScalarFunction::Abs, - &args, - &input_schema, - &self.execution_props, - )?; - Ok(expr) + let scalar_def = ScalarFunctionDefinition::UDF(math::abs()); + + let expr = + ScalarFunctionExpr::new("abs", scalar_def, args, return_type, None, false); + Ok(Arc::new(expr)) } ExprStruct::CaseWhen(case_when) => { let when_then_pairs = case_when @@ -637,8 +649,8 @@ impl PhysicalPlanner { let data_type = return_type.map(to_arrow_datatype).unwrap(); let fun_expr = create_comet_physical_fun( "decimal_div", - &self.execution_props, data_type.clone(), + &self.session_ctx.state(), )?; Ok(Arc::new(ScalarFunctionExpr::new( "decimal_div", @@ -933,6 +945,7 @@ impl PhysicalPlanner { join_params.join_on, join_params.join_filter, &join_params.join_type, + None, PartitionMode::Partitioned, // null doesn't equal to null in Spark join key. If the join key is // `EqualNullSafe`, Spark will rewrite it during planning. @@ -1215,12 +1228,19 @@ impl PhysicalPlanner { // scalar function // Note this assumes the `fun_name` is a defined function in DF. Otherwise, it'll // throw error. - let fun = &BuiltinScalarFunction::from_str(fun_name)?; - fun.return_type(&input_expr_types)? + let fun = BuiltinScalarFunction::from_str(fun_name); + if fun.is_err() { + self.session_ctx + .udf(fun_name)? + .inner() + .return_type(&input_expr_types)? + } else { + fun?.return_type(&input_expr_types)? + } } }; let fun_expr = - create_comet_physical_fun(fun_name, &self.execution_props, data_type.clone())?; + create_comet_physical_fun(fun_name, data_type.clone(), &self.session_ctx.state())?; let scalar_expr: Arc = Arc::new(ScalarFunctionExpr::new( fun_name, @@ -1287,7 +1307,7 @@ fn expr_to_columns( right_field_indices.push(column.index() - left_field_len); } } - VisitRecursion::Continue + TreeNodeRecursion::Continue }) })?; @@ -1323,50 +1343,51 @@ impl JoinFilterRewriter<'_> { } impl TreeNodeRewriter for JoinFilterRewriter<'_> { - type N = Arc; - - fn mutate(&mut self, node: Self::N) -> datafusion_common::Result { - let new_expr: Arc = - if let Some(column) = node.as_any().downcast_ref::() { - if column.index() < self.left_field_len { - // left side - let new_index = self - .left_field_indices - .iter() - .position(|&x| x == column.index()) - .ok_or_else(|| { - DataFusionError::Internal(format!( - "Column index {} not found in left field indices", - column.index() - )) - })?; - Arc::new(Column::new(column.name(), new_index)) - } else if column.index() < self.left_field_len + self.right_field_len { - // right side - let new_index = self - .right_field_indices - .iter() - .position(|&x| x + self.left_field_len == column.index()) - .ok_or_else(|| { - DataFusionError::Internal(format!( - "Column index {} not found in right field indices", - column.index() - )) - })?; - Arc::new(Column::new( - column.name(), - new_index + self.left_field_indices.len(), - )) - } else { - return Err(DataFusionError::Internal(format!( - "Column index {} out of range", - column.index() - ))); - } + type Node = Arc; + + fn f_down(&mut self, node: Self::Node) -> datafusion_common::Result> { + if let Some(column) = node.as_any().downcast_ref::() { + if column.index() < self.left_field_len { + // left side + let new_index = self + .left_field_indices + .iter() + .position(|&x| x == column.index()) + .ok_or_else(|| { + DataFusionError::Internal(format!( + "Column index {} not found in left field indices", + column.index() + )) + })?; + Ok(Transformed::yes(Arc::new(Column::new( + column.name(), + new_index, + )))) + } else if column.index() < self.left_field_len + self.right_field_len { + // right side + let new_index = self + .right_field_indices + .iter() + .position(|&x| x + self.left_field_len == column.index()) + .ok_or_else(|| { + DataFusionError::Internal(format!( + "Column index {} not found in right field indices", + column.index() + )) + })?; + Ok(Transformed::yes(Arc::new(Column::new( + column.name(), + new_index + self.left_field_indices.len(), + )))) } else { - node.clone() - }; - Ok(new_expr) + return Err(DataFusionError::Internal(format!( + "Column index {} out of range", + column.index() + ))); + } + } else { + Ok(Transformed::no(node)) + } } } @@ -1387,7 +1408,7 @@ fn rewrite_physical_expr( right_field_indices, ); - Ok(expr.rewrite(&mut rewriter)?) + Ok(expr.rewrite(&mut rewriter).data()?) } #[cfg(test)] @@ -1424,7 +1445,7 @@ mod tests { }; let op = create_filter(op_scan, 3); - let planner = PhysicalPlanner::new(); + let planner = PhysicalPlanner::default(); let row_count = 100; // Create a dictionary array with 100 values, and use it as input to the execution. @@ -1504,7 +1525,7 @@ mod tests { }; let op = create_filter_literal(op_scan, STRING_TYPE_ID, lit); - let planner = PhysicalPlanner::new(); + let planner = PhysicalPlanner::default(); let row_count = 100; @@ -1582,7 +1603,7 @@ mod tests { }; let op = create_filter(op_scan, 0); - let planner = PhysicalPlanner::new(); + let planner = PhysicalPlanner::default(); let (mut scans, datafusion_plan) = planner.create_plan(&op, &mut vec![]).unwrap(); diff --git a/core/src/execution/datafusion/shuffle_writer.rs b/core/src/execution/datafusion/shuffle_writer.rs index f836e3a40..3b92abbde 100644 --- a/core/src/execution/datafusion/shuffle_writer.rs +++ b/core/src/execution/datafusion/shuffle_writer.rs @@ -47,13 +47,13 @@ use datafusion::{ runtime_env::RuntimeEnv, }, physical_plan::{ - expressions::PhysicalSortExpr, metrics::{BaselineMetrics, Count, ExecutionPlanMetricsSet, MetricBuilder, MetricsSet}, stream::RecordBatchStreamAdapter, - DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, RecordBatchStream, - SendableRecordBatchStream, Statistics, + DisplayAs, DisplayFormatType, ExecutionMode, ExecutionPlan, Partitioning, PlanProperties, + RecordBatchStream, SendableRecordBatchStream, Statistics, }, }; +use datafusion_physical_expr::EquivalenceProperties; use futures::{lock::Mutex, Stream, StreamExt, TryFutureExt, TryStreamExt}; use itertools::Itertools; use simd_adler32::Adler32; @@ -79,6 +79,7 @@ pub struct ShuffleWriterExec { output_index_file: String, /// Metrics metrics: ExecutionPlanMetricsSet, + cache: PlanProperties, } impl DisplayAs for ShuffleWriterExec { @@ -103,14 +104,6 @@ impl ExecutionPlan for ShuffleWriterExec { self.input.schema() } - fn output_partitioning(&self) -> Partitioning { - self.partitioning.clone() - } - - fn output_ordering(&self) -> Option<&[PhysicalSortExpr]> { - None - } - fn children(&self) -> Vec> { vec![self.input.clone()] } @@ -163,6 +156,10 @@ impl ExecutionPlan for ShuffleWriterExec { fn statistics(&self) -> Result { self.input.statistics() } + + fn properties(&self) -> &PlanProperties { + &self.cache + } } impl ShuffleWriterExec { @@ -173,12 +170,19 @@ impl ShuffleWriterExec { output_data_file: String, output_index_file: String, ) -> Result { + let cache = PlanProperties::new( + EquivalenceProperties::new(input.schema().clone()), + partitioning.clone(), + ExecutionMode::Bounded, + ); + Ok(ShuffleWriterExec { input, partitioning, metrics: ExecutionPlanMetricsSet::new(), output_data_file, output_index_file, + cache, }) } } diff --git a/core/src/execution/jni_api.rs b/core/src/execution/jni_api.rs index 20f98a3a4..8249097a1 100644 --- a/core/src/execution/jni_api.rs +++ b/core/src/execution/jni_api.rs @@ -321,7 +321,8 @@ pub unsafe extern "system" fn Java_org_apache_comet_Native_executePlan( // Because we don't know if input arrays are dictionary-encoded when we create // query plan, we need to defer stream initialization to first time execution. if exec_context.root_op.is_none() { - let planner = PhysicalPlanner::new().with_exec_id(exec_context_id); + let planner = PhysicalPlanner::new(exec_context.session_ctx.clone()) + .with_exec_id(exec_context_id); let (scans, root_op) = planner.create_plan( &exec_context.spark_plan, &mut exec_context.input_sources.clone(), diff --git a/core/src/execution/operators/copy.rs b/core/src/execution/operators/copy.rs index 699ccf7ae..292271f9e 100644 --- a/core/src/execution/operators/copy.rs +++ b/core/src/execution/operators/copy.rs @@ -41,6 +41,7 @@ use super::copy_or_cast_array; pub struct CopyExec { input: Arc, schema: SchemaRef, + cache: PlanProperties, } impl CopyExec { @@ -59,7 +60,17 @@ impl CopyExec { let schema = Arc::new(Schema::new(fields)); - Self { input, schema } + let cache = PlanProperties::new( + EquivalenceProperties::new(schema.clone()), + Partitioning::UnknownPartitioning(1), + ExecutionMode::Bounded, + ); + + Self { + input, + schema, + cache, + } } } @@ -82,14 +93,6 @@ impl ExecutionPlan for CopyExec { self.schema.clone() } - fn output_partitioning(&self) -> Partitioning { - self.input.output_partitioning() - } - - fn output_ordering(&self) -> Option<&[PhysicalSortExpr]> { - self.input.output_ordering() - } - fn children(&self) -> Vec> { vec![self.input.clone()] } @@ -103,6 +106,7 @@ impl ExecutionPlan for CopyExec { Ok(Arc::new(CopyExec { input: new_input, schema: self.schema.clone(), + cache: self.cache.clone(), })) } @@ -118,6 +122,10 @@ impl ExecutionPlan for CopyExec { fn statistics(&self) -> DataFusionResult { self.input.statistics() } + + fn properties(&self) -> &PlanProperties { + &self.cache + } } struct CopyStream { diff --git a/core/src/execution/operators/scan.rs b/core/src/execution/operators/scan.rs index e31230c58..99c7c8391 100644 --- a/core/src/execution/operators/scan.rs +++ b/core/src/execution/operators/scan.rs @@ -61,6 +61,7 @@ pub struct ScanExec { /// The input batch of input data. Used to determine the schema of the input data. /// It is also used in unit test to mock the input data from JVM. pub batch: Arc>>, + cache: PlanProperties, } impl ScanExec { @@ -76,11 +77,20 @@ impl ScanExec { InputBatch::EOF }; + let schema = scan_schema(&first_batch, &data_types); + + let cache = PlanProperties::new( + EquivalenceProperties::new(schema), + Partitioning::UnknownPartitioning(1), + ExecutionMode::Bounded, + ); + Ok(Self { exec_context_id, input_source, data_types, batch: Arc::new(Mutex::new(Some(first_batch))), + cache, }) } @@ -197,6 +207,34 @@ impl ScanExec { } } +fn scan_schema(input_batch: &InputBatch, data_types: &[DataType]) -> SchemaRef { + let fields = match input_batch { + // Note that if `columns` is empty, we'll get an empty schema + InputBatch::Batch(columns, _) => { + columns + .iter() + .enumerate() + .map(|(idx, c)| { + let datatype = ScanExec::unpack_dictionary_type(c.data_type()); + // We don't use the field name. Put a placeholder. + if matches!(datatype, DataType::Dictionary(_, _)) { + Field::new_dict(format!("col_{}", idx), datatype, true, idx as i64, false) + } else { + Field::new(format!("col_{}", idx), datatype, true) + } + }) + .collect::>() + } + _ => data_types + .iter() + .enumerate() + .map(|(idx, dt)| Field::new(format!("col_{}", idx), dt.clone(), true)) + .collect(), + }; + + Arc::new(Schema::new(fields)) +} + impl ExecutionPlan for ScanExec { fn as_any(&self) -> &dyn Any { self @@ -207,47 +245,7 @@ impl ExecutionPlan for ScanExec { // Spark plan to DataFusion plan. At the moment, `batch` is not EOF. let binding = self.batch.try_lock().unwrap(); let input_batch = binding.as_ref().unwrap(); - - let fields = match input_batch { - // Note that if `columns` is empty, we'll get an empty schema - InputBatch::Batch(columns, _) => { - columns - .iter() - .enumerate() - .map(|(idx, c)| { - let datatype = Self::unpack_dictionary_type(c.data_type()); - // We don't use the field name. Put a placeholder. - if matches!(datatype, DataType::Dictionary(_, _)) { - Field::new_dict( - format!("col_{}", idx), - datatype, - true, - idx as i64, - false, - ) - } else { - Field::new(format!("col_{}", idx), datatype, true) - } - }) - .collect::>() - } - _ => self - .data_types - .iter() - .enumerate() - .map(|(idx, dt)| Field::new(format!("col_{}", idx), dt.clone(), true)) - .collect(), - }; - - Arc::new(Schema::new(fields)) - } - - fn output_partitioning(&self) -> Partitioning { - Partitioning::UnknownPartitioning(1) - } - - fn output_ordering(&self) -> Option<&[PhysicalSortExpr]> { - None + scan_schema(input_batch, &self.data_types) } fn children(&self) -> Vec> { @@ -268,6 +266,10 @@ impl ExecutionPlan for ScanExec { ) -> datafusion::common::Result { Ok(Box::pin(ScanStream::new(self.clone(), self.schema()))) } + + fn properties(&self) -> &PlanProperties { + &self.cache + } } impl DisplayAs for ScanExec { diff --git a/core/src/parquet/util/jni.rs b/core/src/parquet/util/jni.rs index 225abfc03..62787213f 100644 --- a/core/src/parquet/util/jni.rs +++ b/core/src/parquet/util/jni.rs @@ -82,6 +82,7 @@ pub fn convert_encoding(ordinal: jint) -> Encoding { match ordinal { 0 => Encoding::PLAIN, 1 => Encoding::RLE, + #[allow(deprecated)] 3 => Encoding::BIT_PACKED, 4 => Encoding::PLAIN_DICTIONARY, 5 => Encoding::DELTA_BINARY_PACKED,