From f4e599fa888ed02de4b4c3b234f7e721995d4ca2 Mon Sep 17 00:00:00 2001 From: Andy Lok Date: Fri, 8 Nov 2024 10:37:04 +0800 Subject: [PATCH] feat: implement StringColumn using StringViewArray (#16610) * feat: implement StringColumn using StringViewArray * fix * convert binaryview between arrow1 and arrow2 * fix * fix * fix * fix * fix * fix some issue * fix view slice bug * fix view slice bug * fix * support native read write * fix * fix * fix tests * add with_data_type * add with_data_type * fix gen_random_uuid commit row * move record batch to block * remove unused dep * fix lint * fix commit row * fix commit row * fix size * fix size * add NewBinaryColumnBuilder and NewStringColumnBulder * fix incorrect serialize_size * fix incorrect serialize_size * lint * lint * fix tests * use binary state * use binary state * update tests * update tests * update tests * fix native view encoding * fix * [ci skip] updata kernel concat for view types * [ci skip] improve kernels for view types * [ci skip] only string type use string view type * [ci skip] only string type use string view type * fix tests * [ci skip] fix tests * [ci skip] fix * fix * use NewStringColumnBuilder * rename NewString -> String * fmt * [ci skip] update tests * optimize take * add bench * fix tests * update * improve compare * implement compare using string view prefix * fix * fix * fix * fix-length * disable spill * [ci skip] add put_and_commit * [ci skip] update * update test * lint * [ci skip] add maybe gc * fix endiness * fix endiness * fix * update string compare * update --------- Co-authored-by: sundy-li <543950155@qq.com> --- Cargo.lock | 35 +- Cargo.toml | 6 +- .../arrow/src/arrow/array/binview/ffi.rs | 1 - .../arrow/src/arrow/array/binview/from.rs | 76 ++ .../arrow/src/arrow/array/binview/mod.rs | 93 +- .../arrow/src/arrow/array/binview/mutable.rs | 31 +- .../arrow/src/arrow/array/binview/view.rs | 96 ++ src/common/arrow/src/arrow/array/mod.rs | 6 +- src/common/arrow/src/arrow/datatypes/mod.rs | 7 +- .../arrow/src/native/compression/basic.rs | 40 + .../src/native/compression/binary/mod.rs | 31 +- src/common/arrow/src/native/read/array/mod.rs | 4 + .../arrow/src/native/read/array/view.rs | 274 ++++++ .../arrow/src/native/read/batch_read.rs | 13 + .../arrow/src/native/read/deserialize.rs | 12 + src/common/arrow/src/native/read/reader.rs | 2 + src/common/arrow/src/native/stat.rs | 3 +- src/common/arrow/src/native/write/mod.rs | 1 + .../arrow/src/native/write/serialize.rs | 28 + src/common/arrow/src/native/write/view.rs | 56 ++ .../arrow/tests/it/arrow/array/binview/mod.rs | 22 + src/common/arrow/tests/it/native/io.rs | 25 + src/meta/api/src/txn_backoff.rs | 2 +- src/query/catalog/src/plan/internal_column.rs | 22 +- src/query/catalog/src/table.rs | 4 + src/query/ee/Cargo.toml | 1 - .../background_service_handler.rs | 4 +- .../src/background_service/compaction_job.rs | 135 ++- .../ee_features/background_service/Cargo.toml | 2 +- .../src/background_service.rs | 6 +- src/query/expression/Cargo.toml | 6 + src/query/expression/benches/bench.rs | 157 ++++ .../expression/src/aggregate/payload_flush.rs | 25 +- .../expression/src/aggregate/payload_row.rs | 110 ++- src/query/expression/src/block.rs | 9 + src/query/expression/src/converts/arrow/to.rs | 1 + .../expression/src/converts/arrow2/from.rs | 295 +++--- .../expression/src/converts/arrow2/to.rs | 33 +- .../expression/src/converts/meta/bincode.rs | 46 +- src/query/expression/src/evaluator.rs | 8 +- .../expression/src/filter/filter_executor.rs | 5 +- .../select_value/select_column_scalar.rs | 96 +- src/query/expression/src/kernels/concat.rs | 167 +--- src/query/expression/src/kernels/filter.rs | 164 +--- src/query/expression/src/kernels/group_by.rs | 2 +- .../src/kernels/group_by_hash/method.rs | 4 +- .../group_by_hash/method_dict_serializer.rs | 21 +- .../group_by_hash/method_serializer.rs | 12 +- .../group_by_hash/method_single_string.rs | 28 +- .../src/kernels/group_by_hash/utils.rs | 23 +- src/query/expression/src/kernels/mod.rs | 1 + src/query/expression/src/kernels/scatter.rs | 34 +- src/query/expression/src/kernels/sort.rs | 2 +- .../expression/src/kernels/sort_compare.rs | 13 +- src/query/expression/src/kernels/take.rs | 150 ++-- .../expression/src/kernels/take_chunks.rs | 125 +-- .../expression/src/kernels/take_compact.rs | 111 +-- .../expression/src/kernels/take_ranges.rs | 71 +- src/query/expression/src/kernels/utils.rs | 190 ---- src/query/expression/src/row/row_converter.rs | 5 +- src/query/expression/src/types/array.rs | 1 + src/query/expression/src/types/bitmap.rs | 2 +- src/query/expression/src/types/geography.rs | 32 +- src/query/expression/src/types/geometry.rs | 2 +- src/query/expression/src/types/string.rs | 483 ++++------ src/query/expression/src/types/variant.rs | 2 +- src/query/expression/src/utils/display.rs | 6 +- src/query/expression/src/values.rs | 30 +- src/query/expression/tests/it/common.rs | 2 +- src/query/expression/tests/it/kernel.rs | 56 +- src/query/expression/tests/it/row.rs | 38 +- .../tests/it/testdata/kernel-pass.txt | 36 +- .../formats/src/field_decoder/fast_values.rs | 2 +- src/query/formats/src/field_decoder/nested.rs | 2 +- .../src/field_decoder/separated_text.rs | 3 +- src/query/functions/Cargo.toml | 2 +- .../aggregates/aggregate_distinct_state.rs | 5 +- .../src/aggregates/aggregate_histogram.rs | 5 +- .../src/aggregates/aggregate_min_max_any.rs | 124 ++- .../src/aggregates/aggregate_scalar_state.rs | 33 + .../src/aggregates/aggregate_string_agg.rs | 5 +- src/query/functions/src/scalars/arithmetic.rs | 29 +- src/query/functions/src/scalars/binary.rs | 72 +- src/query/functions/src/scalars/bitmap.rs | 5 +- src/query/functions/src/scalars/comparison.rs | 93 +- src/query/functions/src/scalars/datetime.rs | 13 +- .../functions/src/scalars/decimal/cast.rs | 2 +- src/query/functions/src/scalars/geo_h3.rs | 5 +- src/query/functions/src/scalars/hash.rs | 83 +- src/query/functions/src/scalars/other.rs | 24 +- src/query/functions/src/scalars/string.rs | 333 ++----- .../src/scalars/string_multi_args.rs | 13 +- src/query/functions/src/scalars/variant.rs | 5 +- src/query/functions/src/scalars/vector.rs | 11 +- src/query/functions/src/srfs/variant.rs | 13 +- .../tests/it/aggregates/testdata/agg.txt | 114 +-- .../it/aggregates/testdata/agg_group_by.txt | 66 +- .../functions/tests/it/scalars/comparison.rs | 1 + .../tests/it/scalars/testdata/arithmetic.txt | 96 +- .../tests/it/scalars/testdata/array.txt | 36 +- .../tests/it/scalars/testdata/binary.txt | 72 +- .../tests/it/scalars/testdata/cast.txt | 276 +++--- .../tests/it/scalars/testdata/comparison.txt | 305 ++++--- .../tests/it/scalars/testdata/geo_h3.txt | 24 +- .../tests/it/scalars/testdata/geometry.txt | 76 +- .../tests/it/scalars/testdata/hash.txt | 100 +-- .../tests/it/scalars/testdata/map.txt | 322 +++---- .../tests/it/scalars/testdata/regexp.txt | 446 ++++----- .../tests/it/scalars/testdata/string.txt | 848 +++++++++--------- .../tests/it/scalars/testdata/tuple.txt | 36 +- .../tests/it/scalars/testdata/variant.txt | 810 ++++++++--------- .../processors/transforms/sort/rows/common.rs | 6 +- .../interpreter_table_modify_column.rs | 6 +- .../src/pipelines/builders/builder_join.rs | 5 - .../group_by/aggregator_groups_builder.rs | 73 +- .../hash_join/hash_join_build_state.rs | 36 +- .../hash_join/hash_join_probe_state.rs | 4 - .../merge_into_hash_join_optimization.rs | 5 +- .../hash_join/probe_join/inner_join.rs | 7 +- .../hash_join/probe_join/left_anti_join.rs | 14 +- .../hash_join/probe_join/left_join.rs | 13 +- .../hash_join/probe_join/left_mark_join.rs | 7 +- .../hash_join/probe_join/left_semi_join.rs | 9 +- .../hash_join/probe_join/right_join.rs | 7 +- .../hash_join/probe_join/right_mark_join.rs | 7 +- .../probe_join/right_semi_anti_join.rs | 7 +- .../transforms/hash_join/probe_state.rs | 12 +- .../processors/transforms/hash_join/row.rs | 2 - .../hash_join/transform_hash_join_probe.rs | 2 - .../suggested_background_compaction_tasks.rs | 148 ++- .../others/suggested_background_tasks.rs | 12 +- .../service/src/table_functions/others/udf.rs | 3 +- src/query/service/src/test_kits/fixture.rs | 12 +- src/query/settings/src/settings_default.rs | 14 +- .../common/index/benches/build_from_block.rs | 2 +- .../storages/common/index/src/bloom_index.rs | 2 +- src/query/storages/fuse/src/fuse_table.rs | 4 + .../fuse/src/io/read/block/block_reader.rs | 1 + .../block/block_reader_native_deserialize.rs | 1 + .../operations/read/runtime_filter_prunner.rs | 4 +- .../fuse/src/statistics/cluster_statistics.rs | 2 +- .../table_functions/clustering_statistics.rs | 5 +- .../fuse/src/table_functions/fuse_block.rs | 5 +- .../fuse/src/table_functions/fuse_column.rs | 15 +- .../fuse/src/table_functions/fuse_encoding.rs | 11 +- src/query/storages/parquet/Cargo.toml | 1 - .../row_based/formats/csv/block_builder.rs | 16 +- .../system/src/malloc_stats_totals_table.rs | 2 +- tests/sqllogictests/src/main.rs | 5 +- .../base/05_ddl/05_0003_ddl_alter_table.test | 4 +- .../09_0027_func_fuse_encoding.test | 8 +- .../20_0013_query_result_cache.test | 4 +- ...elete.sql.test => distributed_delete.test} | 12 +- .../mode/cluster/memo/aggregate_property.test | 4 +- .../mode/cluster/memo/join_property.test | 10 +- .../mode/cluster/memo/mix_property.test | 2 +- .../suites/mode/cluster/shuffle_join.test | 40 +- .../mode/standalone/explain/explain.test | 66 +- .../suites/mode/standalone/limit.test | 9 +- .../suites/mode/standalone/pr15804.test | 11 +- .../functions/02_0005_function_compare.test | 10 + .../suites/query/join/runtime_filter.test | 4 +- .../formats/parquet/options/null_if.test | 2 +- .../20+_others/20_0014_sort_spill.result | 20 +- .../20+_others/20_0014_sort_spill.sql | 21 +- .../00_stage/00_0001_copy_into_stage.result | 2 +- .../05_05_01_parquet_load_unload.result | 4 +- .../07_0000_insert_with_stage.result | 2 +- .../07_0001_replace_with_stage.result | 2 +- ..._0002_insert_with_stage_deduplicate.result | 2 +- ..._0003_insert_with_stage_file_format.result | 6 +- .../07_0003_insert_with_stage_file_format.sh | 2 +- .../08_00_parquet/08_00_00_basic.result | 4 +- 173 files changed, 4535 insertions(+), 4355 deletions(-) create mode 100644 src/common/arrow/src/native/read/array/view.rs create mode 100644 src/common/arrow/src/native/write/view.rs create mode 100644 src/query/expression/benches/bench.rs rename tests/sqllogictests/suites/mode/cluster/{distributed_delete.sql.test => distributed_delete.test} (86%) mode change 100755 => 100644 tests/suites/1_stateful/00_stage/00_0001_copy_into_stage.result mode change 100755 => 100644 tests/suites/1_stateful/05_formats/05_05_parquet/05_05_01_parquet_load_unload.result mode change 100755 => 100644 tests/suites/1_stateful/08_select_stage/08_00_parquet/08_00_00_basic.result diff --git a/Cargo.lock b/Cargo.lock index f8a8945c1322..f4c02090d96f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -575,8 +575,7 @@ dependencies = [ [[package]] name = "arrow-udf-js" version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a6151bb7f26cde846e14adb17e08282153f7a9250dd78bbab3fa462b66d7b623" +source = "git+https://github.com/arrow-udf/arrow-udf?rev=80b09d6#80b09d67ee0c7b796bf7a492a71842ac64622406" dependencies = [ "anyhow", "arrow-array", @@ -589,8 +588,7 @@ dependencies = [ [[package]] name = "arrow-udf-python" version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0b80da061a53aac237e711fddb01709002ba2e006f9fd4c72a430d4938dd921" +source = "git+https://github.com/arrow-udf/arrow-udf?rev=80b09d6#80b09d67ee0c7b796bf7a492a71842ac64622406" dependencies = [ "anyhow", "arrow-array", @@ -604,8 +602,7 @@ dependencies = [ [[package]] name = "arrow-udf-wasm" version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fe31144804e093dd60b4e7a749b64b9454040c05a34ccbeb641fc60fcf5ee92d" +source = "git+https://github.com/arrow-udf/arrow-udf?rev=80b09d6#80b09d67ee0c7b796bf7a492a71842ac64622406" dependencies = [ "anyhow", "arrow-array", @@ -2973,9 +2970,9 @@ dependencies = [ [[package]] name = "databend-client" -version = "0.22.2" +version = "0.22.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd8770a1c49fa21e62a768a0de442cc3f77998a357303d56ddd3485cb7c58d3a" +checksum = "0819048a792e2eac58b455bbbcf6077c81e2780b3cc4f565a6e72e92dde56bd1" dependencies = [ "async-trait", "log", @@ -3317,6 +3314,7 @@ dependencies = [ "chrono", "chrono-tz 0.8.6", "comfy-table", + "criterion", "dashmap 6.1.0", "databend-common-arrow", "databend-common-ast", @@ -3327,6 +3325,7 @@ dependencies = [ "databend-common-hashtable", "databend-common-io", "educe 0.4.23", + "either", "enum-as-inner 0.5.1", "ethnum", "futures", @@ -4422,7 +4421,6 @@ dependencies = [ "databend-common-metrics", "databend-common-pipeline-core", "databend-common-settings", - "databend-common-sql", "databend-common-storage", "databend-storages-common-cache", "databend-storages-common-pruner", @@ -4673,9 +4671,9 @@ dependencies = [ [[package]] name = "databend-driver" -version = "0.22.2" +version = "0.22.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "393daaf83f32c5685887103b04bd9762d43298f7820b9f888144877fcf1d89e8" +checksum = "cbea10312fa203003b572b701b6121cecd8eaf260e7ec7687796b552044541c6" dependencies = [ "arrow", "async-compression 0.4.12", @@ -4698,9 +4696,9 @@ dependencies = [ [[package]] name = "databend-driver-core" -version = "0.22.2" +version = "0.22.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7dd1abd8ab1c4200648510204cdfcb538af1eca2c9433e12276dee26a1806144" +checksum = "e1b8f3bf7bb87d0f2fce8a1992eea4a0b442c01165a0d68aab2727a386c945ab" dependencies = [ "arrow", "chrono", @@ -4721,9 +4719,9 @@ dependencies = [ [[package]] name = "databend-driver-macros" -version = "0.22.2" +version = "0.22.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5038d47b403ce2351b109fb82627959a27a16a53ce16397bf1ca39307e052b5c" +checksum = "40bed5c66f36e79baea7c95d2d0dc8433671606ca7152012562c7145e3b909a1" dependencies = [ "quote", "syn 2.0.58", @@ -4757,11 +4755,11 @@ dependencies = [ name = "databend-enterprise-background-service" version = "0.1.0" dependencies = [ - "arrow-array", "async-backtrace", "async-trait", "databend-common-base", "databend-common-exception", + "databend-common-expression", "databend-common-meta-app", "serde", ] @@ -4811,7 +4809,6 @@ dependencies = [ name = "databend-enterprise-query" version = "0.1.0" dependencies = [ - "arrow-array", "async-backtrace", "async-trait", "aws-config", @@ -8419,9 +8416,9 @@ dependencies = [ [[package]] name = "hyper-util" -version = "0.1.10" +version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df2dcfbe0677734ab2f3ffa7fa7bfd4706bfdc1ef393f2ee30184aed67e631b4" +checksum = "41296eb09f183ac68eec06e03cdbea2e759633d4067b2f6552fc2e009bcad08b" dependencies = [ "bytes", "futures-channel", diff --git a/Cargo.toml b/Cargo.toml index 4c93442f3d31..b6ce2ec48e71 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -219,9 +219,9 @@ arrow-ipc = { version = "53" } arrow-ord = { version = "53" } arrow-schema = { version = "53", features = ["serde"] } arrow-select = { version = "53" } -arrow-udf-js = "0.5.0" -arrow-udf-python = "0.4.0" -arrow-udf-wasm = "0.4.0" +arrow-udf-js = { git = "https://github.com/arrow-udf/arrow-udf", rev = "80b09d6" } +arrow-udf-python = { git = "https://github.com/arrow-udf/arrow-udf", rev = "80b09d6" } +arrow-udf-wasm = { git = "https://github.com/arrow-udf/arrow-udf", rev = "80b09d6" } async-backtrace = "0.2" async-channel = "1.7.1" async-compression = { git = "https://github.com/datafuse-extras/async-compression", rev = "dc81082", features = [ diff --git a/src/common/arrow/src/arrow/array/binview/ffi.rs b/src/common/arrow/src/arrow/array/binview/ffi.rs index b28b349c5244..8d98daa62a95 100644 --- a/src/common/arrow/src/arrow/array/binview/ffi.rs +++ b/src/common/arrow/src/arrow/array/binview/ffi.rs @@ -64,7 +64,6 @@ unsafe impl ToFfi for BinaryViewArrayGeneric { validity, views: self.views.clone(), buffers: self.buffers.clone(), - raw_buffers: self.raw_buffers.clone(), phantom: Default::default(), total_bytes_len: AtomicU64::new(self.total_bytes_len.load(Ordering::Relaxed)), total_buffer_len: self.total_buffer_len, diff --git a/src/common/arrow/src/arrow/array/binview/from.rs b/src/common/arrow/src/arrow/array/binview/from.rs index 7559b19d8f54..40304c7c0279 100644 --- a/src/common/arrow/src/arrow/array/binview/from.rs +++ b/src/common/arrow/src/arrow/array/binview/from.rs @@ -12,9 +12,17 @@ // See the License for the specific language governing permissions and // limitations under the License. +use arrow_data::ArrayData; +use arrow_data::ArrayDataBuilder; +use arrow_schema::DataType; + +use crate::arrow::array::Arrow2Arrow; +use crate::arrow::array::BinaryViewArray; use crate::arrow::array::BinaryViewArrayGeneric; use crate::arrow::array::MutableBinaryViewArray; +use crate::arrow::array::Utf8ViewArray; use crate::arrow::array::ViewType; +use crate::arrow::bitmap::Bitmap; impl> FromIterator> for BinaryViewArrayGeneric { #[inline] @@ -22,3 +30,71 @@ impl> FromIterator> for BinaryViewAr MutableBinaryViewArray::::from_iter(iter).into() } } + +impl Arrow2Arrow for BinaryViewArray { + fn to_data(&self) -> ArrayData { + let builder = ArrayDataBuilder::new(DataType::BinaryView) + .len(self.len()) + .add_buffer(self.views.clone().into()) + .add_buffers( + self.buffers + .iter() + .map(|x| x.clone().into()) + .collect::>(), + ) + .nulls(self.validity.clone().map(Into::into)); + unsafe { builder.build_unchecked() } + } + + fn from_data(data: &ArrayData) -> Self { + let views = crate::arrow::buffer::Buffer::from(data.buffers()[0].clone()); + let buffers = data.buffers()[1..] + .iter() + .map(|x| crate::arrow::buffer::Buffer::from(x.clone())) + .collect(); + let validity = data.nulls().map(|x| Bitmap::from_null_buffer(x.clone())); + unsafe { + Self::new_unchecked_unknown_md( + crate::arrow::datatypes::DataType::BinaryView, + views, + buffers, + validity, + None, + ) + } + } +} + +impl Arrow2Arrow for Utf8ViewArray { + fn to_data(&self) -> ArrayData { + let builder = ArrayDataBuilder::new(DataType::Utf8View) + .len(self.len()) + .add_buffer(self.views.clone().into()) + .add_buffers( + self.buffers + .iter() + .map(|x| x.clone().into()) + .collect::>(), + ) + .nulls(self.validity.clone().map(Into::into)); + unsafe { builder.build_unchecked() } + } + + fn from_data(data: &ArrayData) -> Self { + let views = crate::arrow::buffer::Buffer::from(data.buffers()[0].clone()); + let buffers = data.buffers()[1..] + .iter() + .map(|x| crate::arrow::buffer::Buffer::from(x.clone())) + .collect(); + let validity = data.nulls().map(|x| Bitmap::from_null_buffer(x.clone())); + unsafe { + Self::new_unchecked_unknown_md( + crate::arrow::datatypes::DataType::Utf8View, + views, + buffers, + validity, + None, + ) + } + } +} diff --git a/src/common/arrow/src/arrow/array/binview/mod.rs b/src/common/arrow/src/arrow/array/binview/mod.rs index 4d4637a95a8c..35850b10e0ab 100644 --- a/src/common/arrow/src/arrow/array/binview/mod.rs +++ b/src/common/arrow/src/arrow/array/binview/mod.rs @@ -41,9 +41,7 @@ pub use mutable::MutableBinaryViewArray; use private::Sealed; pub use view::View; -use crate::arrow::array::binview::view::validate_binary_view; use crate::arrow::array::binview::view::validate_utf8_only; -use crate::arrow::array::binview::view::validate_utf8_view; use crate::arrow::array::iterator::NonNullValuesIter; use crate::arrow::array::Array; use crate::arrow::bitmap::utils::BitmapIter; @@ -128,8 +126,6 @@ pub struct BinaryViewArrayGeneric { data_type: DataType, views: Buffer, buffers: Arc<[Buffer]>, - // Raw buffer access. (pointer, len). - raw_buffers: Arc<[(*const u8, usize)]>, validity: Option, phantom: PhantomData, /// Total bytes length if we would concat them all @@ -150,7 +146,6 @@ impl Clone for BinaryViewArrayGeneric { data_type: self.data_type.clone(), views: self.views.clone(), buffers: self.buffers.clone(), - raw_buffers: self.raw_buffers.clone(), validity: self.validity.clone(), phantom: Default::default(), total_bytes_len: AtomicU64::new(self.total_bytes_len.load(Ordering::Relaxed)), @@ -163,13 +158,6 @@ unsafe impl Send for BinaryViewArrayGeneric {} unsafe impl Sync for BinaryViewArrayGeneric {} -fn buffers_into_raw(buffers: &[Buffer]) -> Arc<[(*const T, usize)]> { - buffers - .iter() - .map(|buf| (buf.data_ptr(), buf.len())) - .collect() -} - impl BinaryViewArrayGeneric { pub fn new_unchecked( data_type: DataType, @@ -179,7 +167,6 @@ impl BinaryViewArrayGeneric { total_bytes_len: usize, total_buffer_len: usize, ) -> Self { - let raw_buffers = buffers_into_raw(&buffers); // # Safety // The caller must ensure // - the data is valid utf8 (if required) @@ -188,7 +175,6 @@ impl BinaryViewArrayGeneric { data_type, views, buffers, - raw_buffers, validity, phantom: Default::default(), total_bytes_len: AtomicU64::new(total_bytes_len as u64), @@ -242,10 +228,20 @@ impl BinaryViewArrayGeneric { "BinaryViewArray can only be initialized with DataType::BinaryView or DataType::Utf8View", )); } - if T::IS_UTF8 { - validate_utf8_view(views.as_ref(), buffers.as_ref())?; - } else { - validate_binary_view(views.as_ref(), buffers.as_ref())?; + + #[cfg(debug_assertions)] + { + if T::IS_UTF8 { + crate::arrow::array::binview::view::validate_utf8_view( + views.as_ref(), + buffers.as_ref(), + )?; + } else { + crate::arrow::array::binview::view::validate_binary_view( + views.as_ref(), + buffers.as_ref(), + )?; + } } if let Some(validity) = &validity { @@ -303,29 +299,8 @@ impl BinaryViewArrayGeneric { /// Assumes that the `i < self.len`. #[inline] pub unsafe fn value_unchecked(&self, i: usize) -> &T { - let v = *self.views.get_unchecked(i); - let len = v.length; - - // view layout: - // length: 4 bytes - // prefix: 4 bytes - // buffer_index: 4 bytes - // offset: 4 bytes - - // inlined layout: - // length: 4 bytes - // data: 12 bytes - - let bytes = if len <= 12 { - let ptr = self.views.data_ptr() as *const u8; - std::slice::from_raw_parts(ptr.add(i * 16 + 4), len as usize) - } else { - let (data_ptr, data_len) = *self.raw_buffers.get_unchecked(v.buffer_idx as usize); - let data = std::slice::from_raw_parts(data_ptr, data_len); - let offset = v.offset as usize; - data.get_unchecked(offset..offset + len as usize) - }; - T::from_bytes_unchecked(bytes) + let v = self.views.get_unchecked(i); + T::from_bytes_unchecked(v.get_slice_unchecked(&self.buffers)) } /// Returns an iterator of `Option<&T>` over every element of this array. @@ -381,6 +356,21 @@ impl BinaryViewArrayGeneric { } } + fn total_unshared_buffer_len(&self) -> usize { + // Given this function is only called in `maybe_gc()`, + // it may not be worthy to add an extra field for this. + self.buffers + .iter() + .map(|buf| { + if buf.shared_count_strong() > 1 { + 0 + } else { + buf.len() + } + }) + .sum() + } + /// Get the length of bytes that are stored in the variadic buffers. pub fn total_buffer_len(&self) -> usize { self.total_buffer_len @@ -402,10 +392,10 @@ impl BinaryViewArrayGeneric { return self; } let mut mutable = MutableBinaryViewArray::with_capacity(self.len()); - let buffers = self.raw_buffers.as_ref(); + let buffers = self.buffers.as_ref(); for view in self.views.as_ref() { - unsafe { mutable.push_view(*view, buffers) } + unsafe { mutable.push_view_unchecked(*view, buffers) } } mutable.freeze().with_validity(self.validity) } @@ -421,6 +411,13 @@ impl BinaryViewArrayGeneric { return self; } + // if Arc::strong_count(&self.buffers) != 1 { + // // There are multiple holders of this `buffers`. + // // If we allow gc in this case, + // // it may end up copying the same content multiple times. + // return self; + // } + // Subtract the maximum amount of inlined strings to get a lower bound // on the number of buffer bytes needed (assuming no dedup). let total_bytes_len = self.total_bytes_len(); @@ -531,11 +528,18 @@ impl BinaryViewArrayGeneric { pub fn default_data_type() -> &'static DataType { T::data_type() } + + pub fn with_data_type(mut self, data_type: DataType) -> Self { + self.data_type = data_type; + self + } } pub type BinaryViewArray = BinaryViewArrayGeneric<[u8]>; pub type Utf8ViewArray = BinaryViewArrayGeneric; +pub type MutableUtf8ViewArray = MutableBinaryViewArray; + impl BinaryViewArray { /// Validate the underlying bytes on UTF-8. pub fn validate_utf8(&self) -> Result<()> { @@ -593,7 +597,7 @@ impl Array for BinaryViewArrayGeneric { } fn data_type(&self) -> &DataType { - T::data_type() + &self.data_type } fn validity(&self) -> Option<&Bitmap> { @@ -616,6 +620,7 @@ impl Array for BinaryViewArrayGeneric { .map(|bitmap| bitmap.sliced_unchecked(offset, length)) .filter(|bitmap| bitmap.unset_bits() > 0); self.views.slice_unchecked(offset, length); + self.total_bytes_len.store(UNKNOWN_LEN, Ordering::Relaxed) } diff --git a/src/common/arrow/src/arrow/array/binview/mutable.rs b/src/common/arrow/src/arrow/array/binview/mutable.rs index 64ef7880bcf8..abf2530b6a38 100644 --- a/src/common/arrow/src/arrow/array/binview/mutable.rs +++ b/src/common/arrow/src/arrow/array/binview/mutable.rs @@ -41,9 +41,9 @@ pub struct MutableBinaryViewArray { pub(super) validity: Option, pub(super) phantom: std::marker::PhantomData, /// Total bytes length if we would concatenate them all. - pub(super) total_bytes_len: usize, + pub total_bytes_len: usize, /// Total bytes in the buffer (excluding remaining capacity) - pub(super) total_buffer_len: usize, + pub total_buffer_len: usize, } impl Clone for MutableBinaryViewArray { @@ -153,16 +153,15 @@ impl MutableBinaryViewArray { /// - caller must allocate enough capacity /// - caller must ensure the view and buffers match. #[inline] - pub unsafe fn push_view(&mut self, v: View, buffers: &[(*const u8, usize)]) { + pub(crate) unsafe fn push_view_unchecked(&mut self, v: View, buffers: &[Buffer]) { let len = v.length; self.total_bytes_len += len as usize; if len <= 12 { debug_assert!(self.views.capacity() > self.views.len()); - self.views.push(v); + self.views.push(v) } else { self.total_buffer_len += len as usize; - let (data_ptr, data_len) = *buffers.get_unchecked(v.buffer_idx as usize); - let data = std::slice::from_raw_parts(data_ptr, data_len); + let data = buffers.get_unchecked(v.buffer_idx as usize); let offset = v.offset as usize; let bytes = data.get_unchecked(offset..offset + len as usize); let t = T::from_bytes_unchecked(bytes); @@ -191,7 +190,11 @@ impl MutableBinaryViewArray { // buffer index + offset -> real binary data self.total_buffer_len += bytes.len(); let required_cap = self.in_progress_buffer.len() + bytes.len(); - if self.in_progress_buffer.capacity() < required_cap { + + let does_not_fit_in_buffer = self.in_progress_buffer.capacity() < required_cap; + let offset_will_not_fit = self.in_progress_buffer.len() > u32::MAX as usize; + + if does_not_fit_in_buffer || offset_will_not_fit { let new_capacity = (self.in_progress_buffer.capacity() * 2) .clamp(DEFAULT_BLOCK_SIZE, 16 * 1024 * 1024) .max(bytes.len()); @@ -406,6 +409,20 @@ impl MutableBinaryViewArray<[u8]> { } } +impl MutableBinaryViewArray { + pub fn pop(&mut self) -> Option { + if self.is_empty() { + return None; + } + + let value = unsafe { self.value_unchecked(self.len() - 1).to_string() }; + + self.views.pop(); + + Some(value) + } +} + impl> Extend> for MutableBinaryViewArray { #[inline] fn extend>>(&mut self, iter: I) { diff --git a/src/common/arrow/src/arrow/array/binview/view.rs b/src/common/arrow/src/arrow/array/binview/view.rs index b49993fb77ff..1707b6a58a9c 100644 --- a/src/common/arrow/src/arrow/array/binview/view.rs +++ b/src/common/arrow/src/arrow/array/binview/view.rs @@ -36,13 +36,109 @@ pub struct View { pub buffer_idx: u32, /// The offset into the buffer. pub offset: u32, + pub _align: [u128; 0], } impl View { + pub const MAX_INLINE_SIZE: u32 = 12; + #[inline(always)] pub fn as_u128(self) -> u128 { unsafe { std::mem::transmute(self) } } + + /// Create a new inline view without verifying the length + /// + /// # Safety + /// + /// It needs to hold that `bytes.len() <= View::MAX_INLINE_SIZE`. + #[inline] + pub unsafe fn new_inline_unchecked(bytes: &[u8]) -> Self { + debug_assert!(bytes.len() <= u32::MAX as usize); + debug_assert!(bytes.len() as u32 <= Self::MAX_INLINE_SIZE); + + let mut view = Self { + length: bytes.len() as u32, + ..Default::default() + }; + + let view_ptr = &mut view as *mut _ as *mut u8; + + // SAFETY: + // - bytes length <= 12, + // - size_of:: == 16 + // - View is laid out as [length, prefix, buffer_idx, offset] (using repr(C)) + // - By grabbing the view_ptr and adding 4, we have provenance over prefix, buffer_idx and + // offset. (i.e. the same could not be achieved with &mut self.prefix as *mut _ as *mut u8) + unsafe { + let inline_data_ptr = view_ptr.add(4); + core::ptr::copy_nonoverlapping(bytes.as_ptr(), inline_data_ptr, bytes.len()); + } + view + } + + /// Create a new inline view + /// + /// # Panics + /// + /// Panics if the `bytes.len() > View::MAX_INLINE_SIZE`. + #[inline] + pub fn new_inline(bytes: &[u8]) -> Self { + assert!(bytes.len() as u32 <= Self::MAX_INLINE_SIZE); + unsafe { Self::new_inline_unchecked(bytes) } + } + + /// Create a new inline view + /// + /// # Safety + /// + /// It needs to hold that `bytes.len() > View::MAX_INLINE_SIZE`. + #[inline] + pub unsafe fn new_noninline_unchecked(bytes: &[u8], buffer_idx: u32, offset: u32) -> Self { + debug_assert!(bytes.len() <= u32::MAX as usize); + debug_assert!(bytes.len() as u32 > View::MAX_INLINE_SIZE); + + // SAFETY: The invariant of this function guarantees that this is safe. + let prefix = unsafe { u32::from_le_bytes(bytes[0..4].try_into().unwrap_unchecked()) }; + Self { + length: bytes.len() as u32, + prefix, + buffer_idx, + offset, + ..Default::default() + } + } + + #[inline] + pub fn new_from_bytes(bytes: &[u8], buffer_idx: u32, offset: u32) -> Self { + debug_assert!(bytes.len() <= u32::MAX as usize); + + // SAFETY: We verify the invariant with the outer if statement + unsafe { + if bytes.len() as u32 <= Self::MAX_INLINE_SIZE { + Self::new_inline_unchecked(bytes) + } else { + Self::new_noninline_unchecked(bytes, buffer_idx, offset) + } + } + } + + /// Constructs a byteslice from this view. + /// + /// # Safety + /// Assumes that this view is valid for the given buffers. + pub unsafe fn get_slice_unchecked<'a>(&'a self, buffers: &'a [Buffer]) -> &'a [u8] { + unsafe { + if self.length <= Self::MAX_INLINE_SIZE { + let ptr = self as *const View as *const u8; + std::slice::from_raw_parts(ptr.add(4), self.length as usize) + } else { + let data = buffers.get_unchecked(self.buffer_idx as usize); + let offset = self.offset as usize; + data.get_unchecked(offset..offset + self.length as usize) + } + } + } } impl Display for View { diff --git a/src/common/arrow/src/arrow/array/mod.rs b/src/common/arrow/src/arrow/array/mod.rs index 3f893ebaf15f..349a3345f6db 100644 --- a/src/common/arrow/src/arrow/array/mod.rs +++ b/src/common/arrow/src/arrow/array/mod.rs @@ -496,7 +496,8 @@ pub fn to_data(array: &dyn Array) -> arrow_data::ArrayData { }) } Map => to_data_dyn!(array, MapArray), - BinaryView | Utf8View => unimplemented!(), + BinaryView => to_data_dyn!(array, BinaryViewArray), + Utf8View => to_data_dyn!(array, Utf8ViewArray), } } @@ -527,7 +528,8 @@ pub fn from_data(data: &arrow_data::ArrayData) -> Box { }) } Map => Box::new(MapArray::from_data(data)), - BinaryView | Utf8View => unimplemented!(), + BinaryView => Box::new(BinaryViewArray::from_data(data)), + Utf8View => Box::new(Utf8ViewArray::from_data(data)), } } diff --git a/src/common/arrow/src/arrow/datatypes/mod.rs b/src/common/arrow/src/arrow/datatypes/mod.rs index c49ca12d9104..2a0075a68573 100644 --- a/src/common/arrow/src/arrow/datatypes/mod.rs +++ b/src/common/arrow/src/arrow/datatypes/mod.rs @@ -237,9 +237,8 @@ impl From for arrow_schema::DataType { DataType::Decimal(precision, scale) => Self::Decimal128(precision as _, scale as _), DataType::Decimal256(precision, scale) => Self::Decimal256(precision as _, scale as _), DataType::Extension(_, d, _) => (*d).into(), - DataType::BinaryView | DataType::Utf8View => { - panic!("view datatypes are not supported by arrow-rs") - } + DataType::BinaryView => Self::BinaryView, + DataType::Utf8View => Self::Utf8View, } } } @@ -302,6 +301,8 @@ impl From for DataType { } DataType::Decimal128(precision, scale) => Self::Decimal(precision as _, scale as _), DataType::Decimal256(precision, scale) => Self::Decimal256(precision as _, scale as _), + DataType::BinaryView => Self::BinaryView, + DataType::Utf8View => Self::Utf8View, v => panic!("{:?} encoding not supported by arrow2", v), } } diff --git a/src/common/arrow/src/native/compression/basic.rs b/src/common/arrow/src/native/compression/basic.rs index 77e6af20cdc4..56e76da03c22 100644 --- a/src/common/arrow/src/native/compression/basic.rs +++ b/src/common/arrow/src/native/compression/basic.rs @@ -148,3 +148,43 @@ pub fn compress_snappy(input_buf: &[u8], output_buf: &mut Vec) -> Result( + &self, + reader: &mut R, + uncompressed_size: usize, + compressed_size: usize, + values: &mut Vec, + scratch: &mut Vec, + ) -> Result<()> { + // values + values.reserve(uncompressed_size); + let mut use_inner = false; + reader.fill_buf()?; + let input = if reader.buffer_bytes().len() >= compressed_size { + use_inner = true; + reader.buffer_bytes() + } else { + scratch.resize(compressed_size, 0); + reader.read_exact(scratch.as_mut_slice())?; + scratch.as_slice() + }; + + let out_slice = unsafe { + core::slice::from_raw_parts_mut( + values.as_mut_ptr().add(values.len()), + uncompressed_size, + ) + }; + self.decompress(&input[..compressed_size], out_slice)?; + unsafe { values.set_len(values.len() + uncompressed_size) }; + + if use_inner { + reader.consume(compressed_size); + } + Ok(()) + } +} diff --git a/src/common/arrow/src/native/compression/binary/mod.rs b/src/common/arrow/src/native/compression/binary/mod.rs index 42b7d427639c..0cf9875288ff 100644 --- a/src/common/arrow/src/native/compression/binary/mod.rs +++ b/src/common/arrow/src/native/compression/binary/mod.rs @@ -159,30 +159,13 @@ pub fn decompress_binary( // values let (_, compressed_size, uncompressed_size) = read_compress_header(reader, scratch)?; - use_inner = false; - reader.fill_buf()?; - let input = if reader.buffer_bytes().len() >= compressed_size { - use_inner = true; - reader.buffer_bytes() - } else { - scratch.resize(compressed_size, 0); - reader.read_exact(scratch.as_mut_slice())?; - scratch.as_slice() - }; - - values.reserve(uncompressed_size); - let out_slice = unsafe { - core::slice::from_raw_parts_mut( - values.as_mut_ptr().add(values.len()), - uncompressed_size, - ) - }; - c.decompress(&input[..compressed_size], out_slice)?; - unsafe { values.set_len(values.len() + uncompressed_size) }; - - if use_inner { - reader.consume(compressed_size); - } + c.decompress_common_binary( + reader, + uncompressed_size, + compressed_size, + values, + scratch, + )?; } BinaryCompressor::Extend(c) => { c.decompress(input, length, offsets, values)?; diff --git a/src/common/arrow/src/native/read/array/mod.rs b/src/common/arrow/src/native/read/array/mod.rs index 887f6426edee..a1443034ea1a 100644 --- a/src/common/arrow/src/native/read/array/mod.rs +++ b/src/common/arrow/src/native/read/array/mod.rs @@ -21,6 +21,10 @@ mod boolean; pub use boolean::*; mod binary; pub use binary::*; + +mod view; +pub use view::*; + mod null; pub use null::*; mod struct_; diff --git a/src/common/arrow/src/native/read/array/view.rs b/src/common/arrow/src/native/read/array/view.rs new file mode 100644 index 000000000000..2d577ebdcf3b --- /dev/null +++ b/src/common/arrow/src/native/read/array/view.rs @@ -0,0 +1,274 @@ +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::io::Cursor; + +use byteorder::LittleEndian; +use byteorder::ReadBytesExt; +use parquet2::metadata::ColumnDescriptor; + +use crate::arrow::array::Array; +use crate::arrow::array::BinaryViewArray; +use crate::arrow::array::View; +use crate::arrow::bitmap::Bitmap; +use crate::arrow::bitmap::MutableBitmap; +use crate::arrow::buffer::Buffer; +use crate::arrow::compute::concatenate::concatenate; +use crate::arrow::datatypes::DataType; +use crate::arrow::error::Result; +use crate::arrow::io::parquet::read::InitNested; +use crate::arrow::io::parquet::read::NestedState; +use crate::native::read::read_basic::*; +use crate::native::read::BufReader; +use crate::native::read::NativeReadBuf; +use crate::native::read::PageIterator; +use crate::native::CommonCompression; +use crate::native::PageMeta; + +#[derive(Debug)] +pub struct ViewArrayIter +where I: Iterator)>> + PageIterator + Send + Sync +{ + iter: I, + is_nullable: bool, + data_type: DataType, + scratch: Vec, +} + +impl ViewArrayIter +where I: Iterator)>> + PageIterator + Send + Sync +{ + pub fn new(iter: I, is_nullable: bool, data_type: DataType) -> Self { + Self { + iter, + is_nullable, + data_type, + scratch: vec![], + } + } +} + +impl ViewArrayIter +where I: Iterator)>> + PageIterator + Send + Sync +{ + fn deserialize(&mut self, num_values: u64, buffer: Vec) -> Result> { + let length = num_values as usize; + let mut reader = BufReader::with_capacity(buffer.len(), Cursor::new(buffer)); + let validity = if self.is_nullable { + let mut validity_builder = MutableBitmap::with_capacity(length); + read_validity(&mut reader, length, &mut validity_builder)?; + Some(std::mem::take(&mut validity_builder).into()) + } else { + None + }; + + read_view_array(&mut reader, length, self.data_type.clone(), validity) + } +} + +impl Iterator for ViewArrayIter +where I: Iterator)>> + PageIterator + Send + Sync +{ + type Item = Result>; + + fn nth(&mut self, n: usize) -> Option { + match self.iter.nth(n) { + Some(Ok((num_values, buffer))) => Some(self.deserialize(num_values, buffer)), + Some(Err(err)) => Some(Result::Err(err)), + None => None, + } + } + + fn next(&mut self) -> Option { + match self.iter.next() { + Some(Ok((num_values, buffer))) => Some(self.deserialize(num_values, buffer)), + Some(Err(err)) => Some(Result::Err(err)), + None => None, + } + } +} + +#[derive(Debug)] +pub struct ViewArrayNestedIter +where I: Iterator)>> + PageIterator + Send + Sync +{ + iter: I, + data_type: DataType, + leaf: ColumnDescriptor, + init: Vec, + scratch: Vec, +} + +impl ViewArrayNestedIter +where I: Iterator)>> + PageIterator + Send + Sync +{ + pub fn new( + iter: I, + data_type: DataType, + leaf: ColumnDescriptor, + init: Vec, + ) -> Self { + Self { + iter, + data_type, + leaf, + init, + scratch: vec![], + } + } +} + +impl ViewArrayNestedIter +where I: Iterator)>> + PageIterator + Send + Sync +{ + fn deserialize( + &mut self, + num_values: u64, + buffer: Vec, + ) -> Result<(NestedState, Box)> { + let mut reader = BufReader::with_capacity(buffer.len(), Cursor::new(buffer)); + let (mut nested, validity) = read_validity_nested( + &mut reader, + num_values as usize, + &self.leaf, + self.init.clone(), + )?; + let length = nested.nested.pop().unwrap().len(); + let array = read_view_array(&mut reader, length, self.data_type.clone(), validity)?; + Ok((nested, array)) + } +} + +impl Iterator for ViewArrayNestedIter +where I: Iterator)>> + PageIterator + Send + Sync +{ + type Item = Result<(NestedState, Box)>; + + fn nth(&mut self, n: usize) -> Option { + match self.iter.nth(n) { + Some(Ok((num_values, buffer))) => Some(self.deserialize(num_values, buffer)), + Some(Err(err)) => Some(Result::Err(err)), + None => None, + } + } + + fn next(&mut self) -> Option { + match self.iter.next() { + Some(Ok((num_values, buffer))) => Some(self.deserialize(num_values, buffer)), + Some(Err(err)) => Some(Result::Err(err)), + None => None, + } + } +} + +pub fn read_view( + reader: &mut R, + is_nullable: bool, + data_type: DataType, + page_metas: Vec, +) -> Result> { + let num_values = page_metas.iter().map(|p| p.num_values as usize).sum(); + let mut validity_builder = if is_nullable { + Some(MutableBitmap::with_capacity(num_values)) + } else { + None + }; + let mut arrays = vec![]; + for page_meta in page_metas { + let length = page_meta.num_values as usize; + if let Some(ref mut validity_builder) = validity_builder { + read_validity(reader, length, validity_builder)?; + } + let array = read_view_array(reader, length, data_type.clone(), None)?; + arrays.push(array); + } + + let validity = + validity_builder.map(|mut validity_builder| std::mem::take(&mut validity_builder).into()); + let arrays = arrays.iter().map(|x| x.as_ref()).collect::>(); + let array = concatenate(&arrays)?; + let array = array.with_validity(validity); + Ok(array) +} + +pub fn read_nested_view_array( + reader: &mut R, + data_type: DataType, + leaf: ColumnDescriptor, + init: Vec, + page_metas: Vec, +) -> Result)>> { + let mut results = Vec::with_capacity(page_metas.len()); + + for page_meta in page_metas { + let num_values = page_meta.num_values as usize; + let (mut nested, validity) = read_validity_nested(reader, num_values, &leaf, init.clone())?; + let length = nested.nested.pop().unwrap().len(); + + let array = read_view_array(reader, length, data_type.clone(), validity)?; + results.push((nested, array)); + } + Ok(results) +} + +fn read_view_array( + reader: &mut R, + length: usize, + data_type: DataType, + validity: Option, +) -> Result> { + let mut scratch = vec![0; 9]; + let (_c, _compressed_size, _uncompressed_size) = read_compress_header(reader, &mut scratch)?; + + let mut buffer = vec![View::default(); length]; + let temp_data = + unsafe { std::slice::from_raw_parts_mut(buffer.as_mut_ptr() as *mut u8, length * 16) }; + reader.read_exact(temp_data)?; + let views = Buffer::from(buffer); + + let buffer_len = reader.read_u32::()?; + let mut buffers = Vec::with_capacity(buffer_len as _); + + for _ in 0..buffer_len { + scratch.clear(); + let (compression, compressed_size, uncompressed_size) = + read_compress_header(reader, &mut scratch)?; + let c = CommonCompression::try_from(&compression)?; + let mut buffer = vec![]; + c.decompress_common_binary( + reader, + uncompressed_size, + compressed_size, + &mut buffer, + &mut scratch, + )?; + buffers.push(Buffer::from(buffer)); + } + + let array = unsafe { + BinaryViewArray::new_unchecked_unknown_md( + data_type.clone(), + views, + buffers.into(), + validity, + None, + ) + }; + + if matches!(data_type, DataType::Utf8View) { + Ok(Box::new(array.to_utf8view()?)) + } else { + Ok(Box::new(array)) + } +} diff --git a/src/common/arrow/src/native/read/batch_read.rs b/src/common/arrow/src/native/read/batch_read.rs index 97cceaea614f..7c9ed0c1d7a2 100644 --- a/src/common/arrow/src/native/read/batch_read.rs +++ b/src/common/arrow/src/native/read/batch_read.rs @@ -60,6 +60,7 @@ pub fn read_simple( ) }), Binary | Utf8 => read_binary::(reader, is_nullable, data_type, page_metas), + BinaryView | Utf8View => read_view::<_>(reader, is_nullable, data_type, page_metas), LargeBinary | LargeUtf8 => { read_binary::(reader, is_nullable, data_type, page_metas) } @@ -121,6 +122,18 @@ pub fn read_nested( page_metas.pop().unwrap(), )? } + + BinaryView | Utf8View => { + init.push(InitNested::Primitive(field.is_nullable)); + read_nested_view_array::<_>( + &mut readers.pop().unwrap(), + field.data_type().clone(), + leaves.pop().unwrap(), + init, + page_metas.pop().unwrap(), + )? + } + LargeBinary | LargeUtf8 => { init.push(InitNested::Primitive(field.is_nullable)); read_nested_binary::( diff --git a/src/common/arrow/src/native/read/deserialize.rs b/src/common/arrow/src/native/read/deserialize.rs index 72e6e06104cc..8ed459e8d201 100644 --- a/src/common/arrow/src/native/read/deserialize.rs +++ b/src/common/arrow/src/native/read/deserialize.rs @@ -126,6 +126,9 @@ where } ), Binary | Utf8 => DynIter::new(BinaryIter::<_, i32>::new(reader, is_nullable, data_type)), + BinaryView | Utf8View => { + DynIter::new(ViewArrayIter::<_>::new(reader, is_nullable, data_type)) + } LargeBinary | LargeUtf8 => { DynIter::new(BinaryIter::<_, i64>::new(reader, is_nullable, data_type)) } @@ -185,6 +188,15 @@ where init, )) } + BinaryView | Utf8View => { + init.push(InitNested::Primitive(field.is_nullable)); + DynIter::new(ViewArrayNestedIter::<_>::new( + readers.pop().unwrap(), + field.data_type().clone(), + leaves.pop().unwrap(), + init, + )) + } LargeBinary | LargeUtf8 => { init.push(InitNested::Primitive(field.is_nullable)); DynIter::new(BinaryNestedIter::<_, i64>::new( diff --git a/src/common/arrow/src/native/read/reader.rs b/src/common/arrow/src/native/read/reader.rs index c9652c36b323..a9912b274e22 100644 --- a/src/common/arrow/src/native/read/reader.rs +++ b/src/common/arrow/src/native/read/reader.rs @@ -42,6 +42,8 @@ pub fn is_primitive(data_type: &DataType) -> bool { | PhysicalType::Utf8 | PhysicalType::LargeUtf8 | PhysicalType::Binary + | PhysicalType::Utf8View + | PhysicalType::BinaryView | PhysicalType::LargeBinary | PhysicalType::FixedSizeBinary | PhysicalType::Dictionary(_) diff --git a/src/common/arrow/src/native/stat.rs b/src/common/arrow/src/native/stat.rs index b11baed77c74..62e1aee7c6fe 100644 --- a/src/common/arrow/src/native/stat.rs +++ b/src/common/arrow/src/native/stat.rs @@ -152,7 +152,7 @@ fn size_of_primitive(p: PrimitiveType) -> usize { PrimitiveType::Int16 => 2, PrimitiveType::Int32 => 4, PrimitiveType::Int64 => 8, - PrimitiveType::Int128 => 16, + PrimitiveType::Int128 | PrimitiveType::UInt128 => 16, PrimitiveType::Int256 => 32, PrimitiveType::UInt8 => 1, PrimitiveType::UInt16 => 2, @@ -163,7 +163,6 @@ fn size_of_primitive(p: PrimitiveType) -> usize { PrimitiveType::Float64 => 8, PrimitiveType::DaysMs => unimplemented!(), PrimitiveType::MonthDayNano => unimplemented!(), - PrimitiveType::UInt128 => unimplemented!(), } } diff --git a/src/common/arrow/src/native/write/mod.rs b/src/common/arrow/src/native/write/mod.rs index 157d463c40d9..63a95c2dce22 100644 --- a/src/common/arrow/src/native/write/mod.rs +++ b/src/common/arrow/src/native/write/mod.rs @@ -17,6 +17,7 @@ pub(crate) mod boolean; pub(crate) mod common; pub(crate) mod primitive; mod serialize; +pub(crate) mod view; pub(crate) mod writer; pub use common::WriteOptions; diff --git a/src/common/arrow/src/native/write/serialize.rs b/src/common/arrow/src/native/write/serialize.rs index 303fcf1ecd91..c285065b9dc2 100644 --- a/src/common/arrow/src/native/write/serialize.rs +++ b/src/common/arrow/src/native/write/serialize.rs @@ -31,6 +31,7 @@ use crate::arrow::io::parquet::write::write_rep_and_def; use crate::arrow::io::parquet::write::Nested; use crate::arrow::io::parquet::write::Version; use crate::native::write::binary::write_binary; +use crate::native::write::view::write_view; use crate::with_match_primitive_type; /// Writes an [`Array`] to the file @@ -120,6 +121,24 @@ pub fn write_simple( ); write_binary::(w, &binary_array, write_options, scratch)?; } + BinaryView => { + let array: &BinaryViewArray = array.as_any().downcast_ref().unwrap(); + if is_optional { + write_validity::(w, is_optional, array.validity(), array.len(), scratch)?; + } + + write_view::(w, array, write_options, scratch)?; + } + Utf8View => { + let array: &Utf8ViewArray = array.as_any().downcast_ref().unwrap(); + let array = array.clone().to_binview(); + + if is_optional { + write_validity::(w, is_optional, array.validity(), array.len(), scratch)?; + } + + write_view::(w, &array, write_options, scratch)?; + } Struct => unreachable!(), List => unreachable!(), FixedSizeList => unreachable!(), @@ -186,6 +205,15 @@ pub fn write_nested( write_binary::(w, &binary_array, write_options, scratch)?; } + BinaryView => { + let array: &BinaryViewArray = array.as_any().downcast_ref().unwrap(); + write_view::(w, array, write_options, scratch)?; + } + Utf8View => { + let array: &Utf8ViewArray = array.as_any().downcast_ref().unwrap(); + let array = array.clone().to_binview(); + write_view::(w, &array, write_options, scratch)?; + } Struct => unreachable!(), List => unreachable!(), FixedSizeList => unreachable!(), diff --git a/src/common/arrow/src/native/write/view.rs b/src/common/arrow/src/native/write/view.rs new file mode 100644 index 000000000000..fc5b416b04c1 --- /dev/null +++ b/src/common/arrow/src/native/write/view.rs @@ -0,0 +1,56 @@ +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::io::Write; + +use super::WriteOptions; +use crate::arrow::array::BinaryViewArray; +use crate::arrow::array::View; +use crate::arrow::error::Result; + +pub(crate) fn write_view( + w: &mut W, + array: &BinaryViewArray, + write_options: WriteOptions, + buf: &mut Vec, +) -> Result<()> { + // TODO: adaptive gc and dict by stats + let array = array.clone().gc(); + let c = write_options.default_compression; + let codec = c.to_compression(); + + let total_size = array.len() * std::mem::size_of::() + + array.data_buffers().iter().map(|x| x.len()).sum::(); + w.write_all(&[codec as u8])?; + w.write_all(&(total_size as u32).to_le_bytes())?; + w.write_all(&(total_size as u32).to_le_bytes())?; + + let input_buf: &[u8] = bytemuck::cast_slice(array.views().as_slice()); + w.write_all(input_buf)?; + w.write_all(&(array.data_buffers().len() as u32).to_le_bytes())?; + + for buffer in array.data_buffers().iter() { + buf.clear(); + let pos = buf.len(); + w.write_all(&[codec as u8])?; + buf.extend_from_slice(&[0u8; 8]); + + let compressed_size = c.compress(buffer.as_slice(), buf)?; + buf[pos..pos + 4].copy_from_slice(&(compressed_size as u32).to_le_bytes()); + buf[pos + 4..pos + 8].copy_from_slice(&(buffer.len() as u32).to_le_bytes()); + w.write_all(buf.as_slice())?; + buf.clear(); + } + Ok(()) +} diff --git a/src/common/arrow/tests/it/arrow/array/binview/mod.rs b/src/common/arrow/tests/it/arrow/array/binview/mod.rs index 32f10baecf39..ff1183375a02 100644 --- a/src/common/arrow/tests/it/arrow/array/binview/mod.rs +++ b/src/common/arrow/tests/it/arrow/array/binview/mod.rs @@ -187,3 +187,25 @@ fn iter_nth() { assert_eq!(array.iter().nth(1), Some(Some(" ".as_bytes()))); assert_eq!(array.iter().nth(10), None); } + +#[test] +fn test_slice() { + let data = vec![ + Some("hello"), + Some("world"), + Some("databend"), + None, + Some("y"), + Some("z"), + Some("abc"), + ]; + + let array: Utf8ViewArray = data.into_iter().collect(); + + let a3 = array.sliced(2, 3); + assert_eq!(a3.into_iter().collect::>(), vec![ + Some("databend"), + None, + Some("y"), + ]); +} diff --git a/src/common/arrow/tests/it/native/io.rs b/src/common/arrow/tests/it/native/io.rs index 3b978312effd..2dd0816d73ff 100644 --- a/src/common/arrow/tests/it/native/io.rs +++ b/src/common/arrow/tests/it/native/io.rs @@ -17,6 +17,7 @@ use std::io::BufReader; use databend_common_arrow::arrow::array::Array; use databend_common_arrow::arrow::array::BinaryArray; +use databend_common_arrow::arrow::array::BinaryViewArray; use databend_common_arrow::arrow::array::BooleanArray; use databend_common_arrow::arrow::array::Float32Array; use databend_common_arrow::arrow::array::Float64Array; @@ -33,6 +34,7 @@ use databend_common_arrow::arrow::array::UInt32Array; use databend_common_arrow::arrow::array::UInt64Array; use databend_common_arrow::arrow::array::UInt8Array; use databend_common_arrow::arrow::array::Utf8Array; +use databend_common_arrow::arrow::array::Utf8ViewArray; use databend_common_arrow::arrow::bitmap::Bitmap; use databend_common_arrow::arrow::bitmap::MutableBitmap; use databend_common_arrow::arrow::chunk::Chunk; @@ -81,6 +83,12 @@ pub fn new_test_chunk() -> Chunk> { Box::new(BinaryArray::::from_iter_values( ["abcdefg", "mn", "11", "", "3456", "xyz"].iter(), )) as _, + Box::new(Utf8ViewArray::from_slice_values( + ["abcdefg", "mn", "11", "", "3456", "xyz"].iter(), + )) as _, + Box::new(BinaryViewArray::from_slice_values( + ["abcdefg", "mn", "11", "", "3456", "xyz"].iter(), + )) as _, ]) } @@ -97,6 +105,7 @@ fn test_random_nonull() { Box::new(create_random_index(size, 0.0, size)) as _, Box::new(create_random_double(size, 0.0, size)) as _, Box::new(create_random_string(size, 0.0, size)) as _, + Box::new(create_random_view(size, 0.0, size)) as _, ]); test_write_read(chunk); } @@ -112,6 +121,7 @@ fn test_random() { Box::new(create_random_index(size, 0.4, size)) as _, Box::new(create_random_double(size, 0.5, size)) as _, Box::new(create_random_string(size, 0.4, size)) as _, + Box::new(create_random_view(size, 0.4, size)) as _, ]); test_write_read(chunk); } @@ -127,6 +137,7 @@ fn test_dict() { Box::new(create_random_index(size, 0.4, 8)) as _, Box::new(create_random_double(size, 0.5, 8)) as _, Box::new(create_random_string(size, 0.4, 8)) as _, + Box::new(create_random_view(size, 0.4, size)) as _, ]); test_write_read(chunk); } @@ -411,6 +422,20 @@ fn create_random_string(size: usize, null_density: f32, uniq: usize) -> BinaryAr .collect::>() } +fn create_random_view(size: usize, null_density: f32, uniq: usize) -> Utf8ViewArray { + let mut rng = StdRng::seed_from_u64(42); + (0..size) + .map(|_| { + if rng.gen::() > null_density { + let value = rng.gen_range::(0i32..uniq as i32); + Some(format!("{value}")) + } else { + None + } + }) + .collect::() +} + fn create_random_offsets(size: usize, null_density: f32) -> (Vec, Option) { let mut offsets = Vec::with_capacity(size + 1); offsets.push(0i32); diff --git a/src/meta/api/src/txn_backoff.rs b/src/meta/api/src/txn_backoff.rs index f33ebb54f7b1..e741d3a9a519 100644 --- a/src/meta/api/src/txn_backoff.rs +++ b/src/meta/api/src/txn_backoff.rs @@ -154,7 +154,7 @@ mod tests { let elapsed = now.elapsed().as_secs_f64(); println!("elapsed: {elapsed}"); assert!( - (0.041..0.090).contains(&elapsed), + (0.041..0.120).contains(&elapsed), "{} is expected to be 2 + 5 + 10 + 14 + 20 milliseconds", elapsed ); diff --git a/src/query/catalog/src/plan/internal_column.rs b/src/query/catalog/src/plan/internal_column.rs index 2872e0dc4df9..10438eb7f40c 100644 --- a/src/query/catalog/src/plan/internal_column.rs +++ b/src/query/catalog/src/plan/internal_column.rs @@ -226,34 +226,24 @@ impl InternalColumn { ) } InternalColumnType::BlockName => { - let mut builder = StringColumnBuilder::with_capacity(1, meta.block_location.len()); - builder.put_str(&meta.block_location); - builder.commit_row(); + let mut builder = StringColumnBuilder::with_capacity(1); + builder.put_and_commit(&meta.block_location); BlockEntry::new( DataType::String, Value::Scalar(Scalar::String(builder.build_scalar())), ) } InternalColumnType::SegmentName => { - let mut builder = - StringColumnBuilder::with_capacity(1, meta.segment_location.len()); - builder.put_str(&meta.segment_location); - builder.commit_row(); + let mut builder = StringColumnBuilder::with_capacity(1); + builder.put_and_commit(&meta.segment_location); BlockEntry::new( DataType::String, Value::Scalar(Scalar::String(builder.build_scalar())), ) } InternalColumnType::SnapshotName => { - let mut builder = StringColumnBuilder::with_capacity( - 1, - meta.snapshot_location - .clone() - .unwrap_or("".to_string()) - .len(), - ); - builder.put_str(&meta.snapshot_location.clone().unwrap_or("".to_string())); - builder.commit_row(); + let mut builder = StringColumnBuilder::with_capacity(1); + builder.put_and_commit(meta.snapshot_location.clone().unwrap_or("".to_string())); BlockEntry::new( DataType::String, Value::Scalar(Scalar::String(builder.build_scalar())), diff --git a/src/query/catalog/src/table.rs b/src/query/catalog/src/table.rs index 1f8dd4fe8045..916d170f1563 100644 --- a/src/query/catalog/src/table.rs +++ b/src/query/catalog/src/table.rs @@ -151,6 +151,10 @@ pub trait Table: Sync + Send { false } + fn storage_format_as_parquet(&self) -> bool { + false + } + #[async_backtrace::framed] async fn alter_table_cluster_keys( &self, diff --git a/src/query/ee/Cargo.toml b/src/query/ee/Cargo.toml index 01d7c480c790..29824d20420f 100644 --- a/src/query/ee/Cargo.toml +++ b/src/query/ee/Cargo.toml @@ -12,7 +12,6 @@ doctest = false test = true [dependencies] -arrow-array = { workspace = true } async-backtrace = { workspace = true } async-trait = { workspace = true } chrono = { workspace = true } diff --git a/src/query/ee/src/background_service/background_service_handler.rs b/src/query/ee/src/background_service/background_service_handler.rs index 06d9220c1e31..daaec48e0a1e 100644 --- a/src/query/ee/src/background_service/background_service_handler.rs +++ b/src/query/ee/src/background_service/background_service_handler.rs @@ -14,7 +14,6 @@ use std::sync::Arc; -use arrow_array::RecordBatch; use databend_common_base::base::tokio::sync::mpsc::Sender; use databend_common_base::base::tokio::sync::Mutex; use databend_common_base::base::uuid::Uuid; @@ -22,6 +21,7 @@ use databend_common_base::base::GlobalInstance; use databend_common_config::InnerConfig; use databend_common_exception::ErrorCode; use databend_common_exception::Result; +use databend_common_expression::DataBlock; use databend_common_license::license::Feature; use databend_common_license::license_manager::LicenseManagerSwitch; use databend_common_meta_api::BackgroundApi; @@ -61,7 +61,7 @@ pub struct RealBackgroundService { #[async_trait::async_trait] impl BackgroundServiceHandler for RealBackgroundService { #[async_backtrace::framed] - async fn execute_sql(&self, sql: String) -> Result> { + async fn execute_sql(&self, sql: String) -> Result> { let session = create_session(&self.conf).await?; let ctx = session.create_query_context().await?; SuggestedBackgroundTasksSource::do_execute_sql(ctx, sql).await diff --git a/src/query/ee/src/background_service/compaction_job.rs b/src/query/ee/src/background_service/compaction_job.rs index 4940d38f9c2a..3fc35b1c5e99 100644 --- a/src/query/ee/src/background_service/compaction_job.rs +++ b/src/query/ee/src/background_service/compaction_job.rs @@ -16,10 +16,6 @@ use std::collections::HashMap; use std::sync::Arc; use std::time::Duration; -use arrow_array::BooleanArray; -use arrow_array::LargeStringArray; -use arrow_array::RecordBatch; -use arrow_array::UInt64Array; use chrono::Utc; use databend_common_base::base::tokio::sync::mpsc::Sender; use databend_common_base::base::tokio::sync::Mutex; @@ -27,6 +23,10 @@ use databend_common_base::base::tokio::time::Instant; use databend_common_base::base::uuid::Uuid; use databend_common_config::InnerConfig; use databend_common_exception::Result; +use databend_common_expression::types::StringType; +use databend_common_expression::types::UInt64Type; +use databend_common_expression::types::ValueType; +use databend_common_expression::DataBlock; use databend_common_meta_api::BackgroundApi; use databend_common_meta_app::background::BackgroundJobIdent; use databend_common_meta_app::background::BackgroundJobInfo; @@ -172,31 +172,28 @@ impl CompactionJob { for records in Self::do_get_target_tables_from_config(&self.conf, ctx.clone()).await? { debug!(records :? =(&records); "target_tables"); - let db_names = records - .column(0) - .as_any() - .downcast_ref::() - .unwrap(); - let db_ids = records - .column(1) - .as_any() - .downcast_ref::() - .unwrap(); - let tb_names = records - .column(2) - .as_any() - .downcast_ref::() - .unwrap(); - let tb_ids = records - .column(3) - .as_any() - .downcast_ref::() - .unwrap(); + let records = records.consume_convert_to_full(); + + let db_names = + StringType::try_downcast_column(records.columns()[0].value.as_column().unwrap()) + .unwrap(); + let db_ids = + UInt64Type::try_downcast_column(records.columns()[1].value.as_column().unwrap()) + .unwrap(); + let tb_names = + StringType::try_downcast_column(records.columns()[2].value.as_column().unwrap()) + .unwrap(); + + let tb_ids = + UInt64Type::try_downcast_column(records.columns()[3].value.as_column().unwrap()) + .unwrap(); + for i in 0..records.num_rows() { - let db_name = db_names.value(i).to_owned(); - let db_id = db_ids.value(i); - let tb_name = tb_names.value(i).to_owned(); - let tb_id = tb_ids.value(i); + let db_name: String = db_names.index(i).unwrap().to_string(); + let db_id = db_ids[i]; + let tb_name = tb_names.index(i).unwrap().to_string(); + let tb_id = tb_ids[i]; + match self .compact_table( session.clone(), @@ -460,7 +457,7 @@ impl CompactionJob { pub async fn do_get_target_tables_from_config( config: &InnerConfig, ctx: Arc, - ) -> Result> { + ) -> Result> { if !config.background.compaction.has_target_tables() { let res = SuggestedBackgroundTasksSource::do_get_all_suggested_compaction_tables(ctx).await; @@ -529,7 +526,7 @@ impl CompactionJob { pub async fn do_get_target_tables( configs: &InnerConfig, ctx: Arc, - ) -> Result> { + ) -> Result> { let all_target_tables = configs.background.compaction.target_tables.as_ref(); let all_target_tables = Self::parse_all_target_tables(all_target_tables); let future_res = all_target_tables @@ -574,57 +571,55 @@ impl CompactionJob { return Ok((false, false, TableStatistics::default())); } let res = res.unwrap(); - let need_segment_compact = res - .column(0) - .as_any() - .downcast_ref::() - .unwrap() - .value(0); - let need_block_compact = res - .column(1) - .as_any() - .downcast_ref::() - .unwrap() - .value(0); + let res = res.consume_convert_to_full(); + + let need_segment_compact = *res.value_at(0, 0).unwrap().as_boolean().unwrap(); + let need_block_compact = *res.value_at(1, 0).unwrap().as_boolean().unwrap(); let table_statistics = TableStatistics { - number_of_rows: res - .column(2) - .as_any() - .downcast_ref::() + number_of_rows: *res + .value_at(2, 0) + .unwrap() + .as_number() .unwrap() - .value(0), - data_bytes: res - .column(3) - .as_any() - .downcast_ref::() + .as_u_int64() + .unwrap(), + data_bytes: *res + .value_at(3, 0) .unwrap() - .value(0), - compressed_data_bytes: res - .column(4) - .as_any() - .downcast_ref::() + .as_number() .unwrap() - .value(0), - index_data_bytes: res - .column(5) - .as_any() - .downcast_ref::() + .as_u_int64() + .unwrap(), + compressed_data_bytes: *res + .value_at(4, 0) .unwrap() - .value(0), + .as_number() + .unwrap() + .as_u_int64() + .unwrap(), + index_data_bytes: *res + .value_at(5, 0) + .unwrap() + .as_number() + .unwrap() + .as_u_int64() + .unwrap(), number_of_segments: Some( - res.column(6) - .as_any() - .downcast_ref::() + *res.value_at(6, 0) .unwrap() - .value(0), + .as_number() + .unwrap() + .as_u_int64() + .unwrap(), ), number_of_blocks: Some( - res.column(7) - .as_any() - .downcast_ref::() + *res.value_at(7, 0) + .unwrap() + .as_number() .unwrap() - .value(0), + .as_u_int64() + .unwrap(), ), }; diff --git a/src/query/ee_features/background_service/Cargo.toml b/src/query/ee_features/background_service/Cargo.toml index 4d5dff29ab21..027bca904e52 100644 --- a/src/query/ee_features/background_service/Cargo.toml +++ b/src/query/ee_features/background_service/Cargo.toml @@ -12,11 +12,11 @@ doctest = false test = true [dependencies] -arrow-array = { workspace = true } async-backtrace = { workspace = true } async-trait = { workspace = true } databend-common-base = { workspace = true } databend-common-exception = { workspace = true } +databend-common-expression = { workspace = true } databend-common-meta-app = { workspace = true } serde = { workspace = true } diff --git a/src/query/ee_features/background_service/src/background_service.rs b/src/query/ee_features/background_service/src/background_service.rs index a91c4dab8bb4..e55e9ac687ac 100644 --- a/src/query/ee_features/background_service/src/background_service.rs +++ b/src/query/ee_features/background_service/src/background_service.rs @@ -14,15 +14,15 @@ use std::sync::Arc; -use arrow_array::RecordBatch; use databend_common_base::base::GlobalInstance; use databend_common_exception::Result; +use databend_common_expression::DataBlock; use databend_common_meta_app::principal::UserIdentity; use databend_common_meta_app::tenant::Tenant; #[async_trait::async_trait] pub trait BackgroundServiceHandler: Sync + Send { - async fn execute_sql(&self, sql: String) -> Result>; + async fn execute_sql(&self, sql: String) -> Result>; async fn execute_scheduled_job( &self, @@ -43,7 +43,7 @@ impl BackgroundServiceHandlerWrapper { } #[async_backtrace::framed] - pub async fn execute_sql(&self, sql: String) -> Result> { + pub async fn execute_sql(&self, sql: String) -> Result> { self.handler.execute_sql(sql).await } diff --git a/src/query/expression/Cargo.toml b/src/query/expression/Cargo.toml index 80e347176c2b..e44cabed9b12 100644 --- a/src/query/expression/Cargo.toml +++ b/src/query/expression/Cargo.toml @@ -32,6 +32,7 @@ databend-common-grpc = { workspace = true } databend-common-hashtable = { workspace = true } databend-common-io = { workspace = true } educe = { workspace = true } +either = { workspace = true } enum-as-inner = { workspace = true } ethnum = { workspace = true, features = ["serde", "macros", "borsh"] } futures = { workspace = true } @@ -64,11 +65,16 @@ unicode-segmentation = { workspace = true } [dev-dependencies] arrow-ord = { workspace = true } +criterion = { workspace = true } goldenfile = { workspace = true } pretty_assertions = { workspace = true } rand = { workspace = true } rmp-serde = { workspace = true } +[[bench]] +name = "bench" +harness = false + [lints] workspace = true diff --git a/src/query/expression/benches/bench.rs b/src/query/expression/benches/bench.rs new file mode 100644 index 000000000000..b0e18083fc80 --- /dev/null +++ b/src/query/expression/benches/bench.rs @@ -0,0 +1,157 @@ +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#[macro_use] +extern crate criterion; + +use criterion::Criterion; +use databend_common_expression::arrow::deserialize_column; +use databend_common_expression::arrow::serialize_column; +use databend_common_expression::types::BinaryType; +use databend_common_expression::types::StringType; +use databend_common_expression::Column; +use databend_common_expression::DataBlock; +use databend_common_expression::FromData; +use rand::rngs::StdRng; +use rand::Rng; +use rand::SeedableRng; + +fn bench(c: &mut Criterion) { + let mut group = c.benchmark_group("bench_kernels"); + + let mut rng = StdRng::seed_from_u64(0); + // concats + { + for length in [12, 20, 500] { + let (s, b) = generate_random_string_data(&mut rng, length); + let bin_col = (0..5).map(|_| BinaryType::from_data(b.clone())); + let str_col = (0..5).map(|_| StringType::from_data(s.clone())); + + group.bench_function(format!("concat_string_offset/{length}"), |b| { + b.iter(|| Column::concat_columns(bin_col.clone()).unwrap()) + }); + + group.bench_function(format!("concat_string_view/{length}"), |b| { + b.iter(|| Column::concat_columns(str_col.clone()).unwrap()) + }); + } + } + + // take compact + { + for length in [12, 20, 500] { + let (s, b) = generate_random_string_data(&mut rng, length); + let block_bin = DataBlock::new_from_columns(vec![BinaryType::from_data(b.clone())]); + let block_view = DataBlock::new_from_columns(vec![StringType::from_data(s.clone())]); + + let indices: Vec<(u32, u32)> = (0..s.len()) + .filter(|x| x % 10 == 0) + .map(|x| (x as u32, 1000)) + .collect(); + let num_rows = indices.len() * 1000; + + group.bench_function(format!("take_compact_string_offset/{length}"), |b| { + b.iter(|| { + block_bin + .take_compacted_indices(&indices, num_rows) + .unwrap() + }) + }); + + group.bench_function(format!("take_compact_string_view/{length}"), |b| { + b.iter(|| { + block_view + .take_compacted_indices(&indices, num_rows) + .unwrap() + }) + }); + } + } + + // IPC + // bench_kernels/serialize_string_offset/12 + // time: [183.25 µs 183.49 µs 183.93 µs] + // Found 7 outliers among 100 measurements (7.00%) + // 3 (3.00%) high mild + // 4 (4.00%) high severe + // bench_kernels/serialize_string_view/12 + // time: [415.25 µs 415.36 µs 415.47 µs] + // Found 6 outliers among 100 measurements (6.00%) + // 3 (3.00%) high mild + // 3 (3.00%) high severe + // bench_kernels/serialize_string_offset/20 + // time: [195.09 µs 195.15 µs 195.23 µs] + // Found 6 outliers among 100 measurements (6.00%) + // 6 (6.00%) high mild + // bench_kernels/serialize_string_view/20 + // time: [464.96 µs 465.08 µs 465.21 µs] + // Found 4 outliers among 100 measurements (4.00%) + // 4 (4.00%) high mild + // bench_kernels/serialize_string_offset/500 + // time: [3.3092 ms 3.3139 ms 3.3194 ms] + // Found 2 outliers among 100 measurements (2.00%) + // 1 (1.00%) high mild + // 1 (1.00%) high severe + // bench_kernels/serialize_string_view/500 + // time: [3.9254 ms 3.9303 ms 3.9366 ms] + // Found 9 outliers among 100 measurements (9.00%) + // 4 (4.00%) high mild + // 5 (5.00%) high severe + + { + for length in [12, 20, 500] { + let (s, b) = generate_random_string_data(&mut rng, length); + let b_c = BinaryType::from_data(b.clone()); + let s_c = StringType::from_data(s.clone()); + + group.bench_function(format!("serialize_string_offset/{length}"), |b| { + b.iter(|| { + let bs = serialize_column(&b_c); + deserialize_column(&bs).unwrap(); + }) + }); + + group.bench_function(format!("serialize_string_view/{length}"), |b| { + b.iter(|| { + let bs = serialize_column(&s_c); + deserialize_column(&bs).unwrap(); + }) + }); + } + } +} + +criterion_group!(benches, bench); +criterion_main!(benches); + +fn generate_random_string_data(rng: &mut StdRng, length: usize) -> (Vec, Vec>) { + let iter_str: Vec<_> = (0..10000) + .map(|_| { + let random_string: String = (0..length) + .map(|_| { + // Generate a random character (ASCII printable characters) + rng.gen_range(32..=126) as u8 as char + }) + .collect(); + random_string + }) + .collect(); + + let iter_binary: Vec<_> = iter_str + .iter() + .map(|x| x.clone().as_bytes().to_vec()) + .collect(); + + (iter_str, iter_binary) +} diff --git a/src/query/expression/src/aggregate/payload_flush.rs b/src/query/expression/src/aggregate/payload_flush.rs index 1b09b3e43937..632ec78f6999 100644 --- a/src/query/expression/src/aggregate/payload_flush.rs +++ b/src/query/expression/src/aggregate/payload_flush.rs @@ -26,6 +26,7 @@ use crate::types::decimal::Decimal; use crate::types::decimal::DecimalType; use crate::types::nullable::NullableColumn; use crate::types::string::StringColumn; +use crate::types::string::StringColumnBuilder; use crate::types::ArgType; use crate::types::BooleanType; use crate::types::DataType; @@ -36,7 +37,6 @@ use crate::types::NumberType; use crate::types::TimestampType; use crate::types::ValueType; use crate::with_number_mapped_type; -use crate::AggregateFunctionRef; use crate::Column; use crate::ColumnBuilder; use crate::DataBlock; @@ -133,7 +133,7 @@ impl Payload { let mut state_builders: Vec = self .aggrs .iter() - .map(|agg| state_serializer(agg, row_count)) + .map(|_| BinaryColumnBuilder::with_capacity(row_count, row_count * 4)) .collect(); for place in state.state_places.as_slice()[0..row_count].iter() { @@ -322,7 +322,21 @@ impl Payload { col_offset: usize, state: &mut PayloadFlushState, ) -> StringColumn { - unsafe { StringColumn::from_binary_unchecked(self.flush_binary_column(col_offset, state)) } + let len = state.probe_state.row_count; + let mut binary_builder = StringColumnBuilder::with_capacity(len); + + unsafe { + for idx in 0..len { + let str_len = read::(state.addresses[idx].add(col_offset) as _) as usize; + let data_address = read::(state.addresses[idx].add(col_offset + 4) as _) + as usize as *const u8; + + let scalar = std::slice::from_raw_parts(data_address, str_len); + + binary_builder.put_and_commit(std::str::from_utf8(scalar).unwrap()); + } + } + binary_builder.build() } fn flush_generic_column( @@ -349,8 +363,3 @@ impl Payload { builder.build() } } - -fn state_serializer(func: &AggregateFunctionRef, row: usize) -> BinaryColumnBuilder { - let size = func.serialize_size_per_row().unwrap_or(4); - BinaryColumnBuilder::with_capacity(row, row * size) -} diff --git a/src/query/expression/src/aggregate/payload_row.rs b/src/query/expression/src/aggregate/payload_row.rs index 2de484893858..3a171d2d8b1f 100644 --- a/src/query/expression/src/aggregate/payload_row.rs +++ b/src/query/expression/src/aggregate/payload_row.rs @@ -31,6 +31,8 @@ use crate::types::DataType; use crate::types::DateType; use crate::types::NumberColumn; use crate::types::NumberType; +use crate::types::StringColumn; +use crate::types::StringType; use crate::types::TimestampType; use crate::types::ValueType; use crate::with_decimal_mapped_type; @@ -302,6 +304,18 @@ pub unsafe fn row_match_column( no_match, no_match_count, ), + Column::String(v) => row_match_string_column( + v, + validity, + address, + select_vector, + temp_vector, + count, + validity_offset, + col_offset, + no_match, + no_match_count, + ), Column::Bitmap(v) | Column::Binary(v) | Column::Variant(v) | Column::Geometry(v) => { row_match_binary_column( v, @@ -316,21 +330,6 @@ pub unsafe fn row_match_column( no_match_count, ) } - Column::String(v) => { - let v = &BinaryColumn::from(v.clone()); - row_match_binary_column( - v, - validity, - address, - select_vector, - temp_vector, - count, - validity_offset, - col_offset, - no_match, - no_match_count, - ) - } Column::Nullable(_) => unreachable!("nullable is unwrapped"), other => row_match_generic_column( other, @@ -426,6 +425,87 @@ unsafe fn row_match_binary_column( *count = match_count; } +unsafe fn row_match_string_column( + col: &StringColumn, + validity: Option<&Bitmap>, + address: &[*const u8], + select_vector: &mut SelectVector, + temp_vector: &mut SelectVector, + count: &mut usize, + validity_offset: usize, + col_offset: usize, + no_match: &mut SelectVector, + no_match_count: &mut usize, +) { + let mut match_count = 0; + let mut equal: bool; + + if let Some(validity) = validity { + let is_all_set = validity.unset_bits() == 0; + for idx in select_vector[..*count].iter() { + let idx = *idx; + let validity_address = address[idx].add(validity_offset); + let is_set2 = read::(validity_address as _) != 0; + let is_set = is_all_set || validity.get_bit_unchecked(idx); + + if is_set && is_set2 { + let len_address = address[idx].add(col_offset); + let address = address[idx].add(col_offset + 4); + let len = read::(len_address as _) as usize; + + let value = StringType::index_column_unchecked(col, idx); + if len != value.len() { + equal = false; + } else { + let data_address = read::(address as _) as usize as *const u8; + let scalar = std::slice::from_raw_parts(data_address, len); + equal = databend_common_hashtable::fast_memcmp(scalar, value.as_bytes()); + } + } else { + equal = is_set == is_set2; + } + + if equal { + temp_vector[match_count] = idx; + match_count += 1; + } else { + no_match[*no_match_count] = idx; + *no_match_count += 1; + } + } + } else { + for idx in select_vector[..*count].iter() { + let idx = *idx; + let len_address = address[idx].add(col_offset); + let address = address[idx].add(col_offset + 4); + + let len = read::(len_address as _) as usize; + + let value = StringType::index_column_unchecked(col, idx); + if len != value.len() { + equal = false; + } else { + let data_address = read::(address as _) as usize as *const u8; + let scalar = std::slice::from_raw_parts(data_address, len); + + equal = databend_common_hashtable::fast_memcmp(scalar, value.as_bytes()); + } + + if equal { + temp_vector[match_count] = idx; + match_count += 1; + } else { + no_match[*no_match_count] = idx; + *no_match_count += 1; + } + } + } + + select_vector.clone_from_slice(temp_vector); + + *count = match_count; +} + unsafe fn row_match_column_type( col: &Column, validity: Option<&Bitmap>, diff --git a/src/query/expression/src/block.rs b/src/query/expression/src/block.rs index 64b497636421..47715b137db7 100644 --- a/src/query/expression/src/block.rs +++ b/src/query/expression/src/block.rs @@ -32,6 +32,7 @@ use crate::DataField; use crate::DataSchemaRef; use crate::Domain; use crate::Scalar; +use crate::ScalarRef; use crate::TableSchemaRef; use crate::Value; @@ -608,6 +609,14 @@ impl DataBlock { .collect(); DataSchema::new(fields) } + + // This is inefficient, don't use it in hot path + pub fn value_at(&self, col: usize, row: usize) -> Option> { + if col >= self.columns.len() { + return None; + } + self.columns[col].value.index(row) + } } impl TryFrom for ArrowChunk { diff --git a/src/query/expression/src/converts/arrow/to.rs b/src/query/expression/src/converts/arrow/to.rs index 3860fc64d503..34f929edbd2a 100644 --- a/src/query/expression/src/converts/arrow/to.rs +++ b/src/query/expression/src/converts/arrow/to.rs @@ -113,6 +113,7 @@ impl DataBlock { .zip(arrow_schema.fields()) { let column = entry.value.to_owned().into_column().unwrap(); + let column = column.maybe_gc(); let array = column.into_arrow_rs(); // Adjust struct array names diff --git a/src/query/expression/src/converts/arrow2/from.rs b/src/query/expression/src/converts/arrow2/from.rs index d354cb9c13af..87822a5045b2 100644 --- a/src/query/expression/src/converts/arrow2/from.rs +++ b/src/query/expression/src/converts/arrow2/from.rs @@ -12,12 +12,16 @@ // See the License for the specific language governing permissions and // limitations under the License. +use databend_common_arrow::arrow::array::BinaryArray; +use databend_common_arrow::arrow::array::FixedSizeBinaryArray; +use databend_common_arrow::arrow::array::Utf8Array; use databend_common_arrow::arrow::bitmap::Bitmap; use databend_common_arrow::arrow::buffer::Buffer; use databend_common_arrow::arrow::datatypes::DataType as ArrowDataType; use databend_common_arrow::arrow::datatypes::Field as ArrowField; use databend_common_arrow::arrow::datatypes::Schema as ArrowSchema; use databend_common_arrow::arrow::datatypes::TimeUnit; +use databend_common_arrow::arrow::types::Offset; use databend_common_exception::ErrorCode; use databend_common_exception::Result; @@ -28,10 +32,12 @@ use super::ARROW_EXT_TYPE_GEOMETRY; use super::ARROW_EXT_TYPE_VARIANT; use crate::types::array::ArrayColumn; use crate::types::binary::BinaryColumn; +use crate::types::binary::BinaryColumnBuilder; use crate::types::decimal::DecimalColumn; use crate::types::geography::GeographyColumn; use crate::types::nullable::NullableColumn; use crate::types::string::StringColumn; +use crate::types::string::StringColumnBuilder; use crate::types::DataType; use crate::types::DecimalDataType; use crate::types::DecimalSize; @@ -119,7 +125,8 @@ fn arrow_type_to_table_type(ty: &ArrowDataType, is_nullable: bool) -> Result TableDataType::Binary, - ArrowDataType::Utf8 | ArrowDataType::LargeUtf8 => TableDataType::String, + ArrowDataType::Utf8 | ArrowDataType::LargeUtf8 | ArrowDataType::Utf8View => + TableDataType::String, ArrowDataType::Timestamp(_, _) => TableDataType::Timestamp, ArrowDataType::Date32 | ArrowDataType::Date64 => TableDataType::Date, @@ -342,14 +349,13 @@ impl Column { ); let offsets = arrow_col .offsets() - .buffer() .iter() .map(|x| *x as u64) .collect::>(); - Column::Binary(BinaryColumn::new( - arrow_col.values().clone(), - offsets.into(), - )) + Column::Binary(BinaryColumn { + data: arrow_col.values().clone(), + offsets: offsets.into(), + }) } (DataType::Binary, ArrowDataType::LargeBinary) => { let arrow_col = arrow_col @@ -361,22 +367,19 @@ impl Column { let offsets = arrow_col.offsets().clone().into_inner(); let offsets = unsafe { std::mem::transmute::, Buffer>(offsets) }; - Column::Binary(BinaryColumn::new(arrow_col.values().clone(), offsets)) + Column::Binary(BinaryColumn { + data: arrow_col.values().clone(), + offsets, + }) } - (DataType::Binary, ArrowDataType::FixedSizeBinary(size)) => { + (DataType::Binary, ArrowDataType::FixedSizeBinary(_)) => { let arrow_col = arrow_col .as_any() .downcast_ref::() .expect( "fail to read `Binary` from arrow: array should be `FixedSizeBinaryArray`", ); - let offsets = (0..arrow_col.len() as u64 + 1) - .map(|x| x * (*size) as u64) - .collect::>(); - Column::Binary(BinaryColumn::new( - arrow_col.values().clone(), - offsets.into(), - )) + Column::Binary(fixed_size_binary_array_to_binary_column(arrow_col)) } (DataType::Binary, ArrowDataType::Utf8) => { let arrow_col = arrow_col @@ -387,14 +390,13 @@ impl Column { ); let offsets = arrow_col .offsets() - .buffer() .iter() .map(|x| *x as u64) .collect::>(); - Column::Binary(BinaryColumn::new( - arrow_col.values().clone(), - offsets.into(), - )) + Column::Binary(BinaryColumn { + data: arrow_col.values().clone(), + offsets: offsets.into(), + }) } (DataType::Binary, ArrowDataType::LargeUtf8) => { let arrow_col = arrow_col @@ -403,10 +405,16 @@ impl Column { .expect( "fail to read `Binary` from arrow: array should be `Utf8Array`", ); - let offsets = arrow_col.offsets().clone().into_inner(); - let offsets = - unsafe { std::mem::transmute::, Buffer>(offsets) }; - Column::Binary(BinaryColumn::new(arrow_col.values().clone(), offsets)) + + let offsets = unsafe { + std::mem::transmute::, Buffer>( + arrow_col.offsets().clone().into_inner(), + ) + }; + Column::Binary(BinaryColumn { + data: arrow_col.values().clone(), + offsets, + }) } (DataType::String, ArrowDataType::Binary) => { let arrow_col = arrow_col @@ -415,14 +423,8 @@ impl Column { .expect( "fail to read `String` from arrow: array should be `BinaryArray`", ); - let offsets = arrow_col - .offsets() - .buffer() - .iter() - .map(|x| *x as u64) - .collect::>(); - let column = StringColumn::new(arrow_col.values().clone(), offsets.into()); - Column::String(column) + let col = binary_array_to_string_column(arrow_col); + Column::String(col) } (DataType::String, ArrowDataType::LargeBinary) => { let arrow_col = arrow_col @@ -431,24 +433,18 @@ impl Column { .expect( "fail to read `String` from arrow: array should be `BinaryArray`", ); - let offsets = arrow_col.offsets().clone().into_inner(); - let offsets = - unsafe { std::mem::transmute::, Buffer>(offsets) }; - let column = StringColumn::new(arrow_col.values().clone(), offsets); - Column::String(column) + let col = binary_array_to_string_column(arrow_col); + Column::String(col) } - (DataType::String, ArrowDataType::FixedSizeBinary(size)) => { + (DataType::String, ArrowDataType::FixedSizeBinary(_)) => { let arrow_col = arrow_col .as_any() .downcast_ref::() .expect( "fail to read `String` from arrow: array should be `FixedSizeBinaryArray`", ); - let offsets = (0..arrow_col.len() as u64 + 1) - .map(|x| x * (*size) as u64) - .collect::>(); - let column = StringColumn::new(arrow_col.values().clone(), offsets.into()); - Column::String(column) + let col = fixed_size_binary_array_to_string_column(arrow_col); + Column::String(col) } (DataType::String, ArrowDataType::Utf8) => { let arrow_col = arrow_col @@ -457,18 +453,8 @@ impl Column { .expect( "fail to read `String` from arrow: array should be `Utf8Array`", ); - let offsets = arrow_col - .offsets() - .buffer() - .iter() - .map(|x| *x as u64) - .collect::>(); - unsafe { - Column::String(StringColumn::new_unchecked( - arrow_col.values().clone(), - offsets.into(), - )) - } + let col = utf8_array_to_string_column(arrow_col); + Column::String(col) } (DataType::String, ArrowDataType::LargeUtf8) => { let arrow_col = arrow_col @@ -477,15 +463,17 @@ impl Column { .expect( "fail to read `String` from arrow: array should be `Utf8Array`", ); - let offsets = arrow_col.offsets().clone().into_inner(); - let offsets = - unsafe { std::mem::transmute::, Buffer>(offsets) }; - unsafe { - Column::String(StringColumn::new_unchecked( - arrow_col.values().clone(), - offsets, - )) - } + let col = utf8_array_to_string_column(arrow_col); + Column::String(col) + } + (DataType::String, ArrowDataType::Utf8View) => { + let arrow_col = arrow_col + .as_any() + .downcast_ref::() + .expect( + "fail to read `String` from arrow: array should be `Utf8ViewArray`", + ); + Column::String(StringColumn::new(arrow_col.clone())) } (DataType::Timestamp, ArrowDataType::Timestamp(uint, _)) => { let values = arrow_col @@ -642,27 +630,6 @@ impl Column { .collect::>>()?; Column::Tuple(field_cols) } - ( - DataType::Bitmap, - ArrowDataType::Extension(name, box ArrowDataType::Binary, None), - ) if name == ARROW_EXT_TYPE_BITMAP => { - let arrow_col = arrow_col - .as_any() - .downcast_ref::>() - .expect( - "fail to read `Bitmap` from arrow: array should be `BinaryArray`", - ); - let offsets = arrow_col - .offsets() - .buffer() - .iter() - .map(|x| *x as u64) - .collect::>(); - Column::Bitmap(BinaryColumn::new( - arrow_col.values().clone(), - offsets.into(), - )) - } ( DataType::Bitmap, ArrowDataType::Extension(name, box ArrowDataType::LargeBinary, None), @@ -673,28 +640,15 @@ impl Column { .expect( "fail to read `Bitmap` from arrow: array should be `BinaryArray`", ); - let offsets = arrow_col.offsets().clone().into_inner(); - let offsets = - unsafe { std::mem::transmute::, Buffer>(offsets) }; - Column::Bitmap(BinaryColumn::new(arrow_col.values().clone(), offsets)) - } - (DataType::Bitmap, ArrowDataType::Binary) => { - let arrow_col = arrow_col - .as_any() - .downcast_ref::>() - .expect( - "fail to read `Bitmap` from arrow: array should be `BinaryArray`", - ); - let offsets = arrow_col - .offsets() - .buffer() - .iter() - .map(|x| *x as u64) - .collect::>(); - Column::Bitmap(BinaryColumn::new( - arrow_col.values().clone(), - offsets.into(), - )) + let offsets = unsafe { + std::mem::transmute::, Buffer>( + arrow_col.offsets().clone().into_inner(), + ) + }; + Column::Bitmap(BinaryColumn { + data: arrow_col.values().clone(), + offsets, + }) } (DataType::Bitmap, ArrowDataType::LargeBinary) => { let arrow_col = arrow_col @@ -703,31 +657,15 @@ impl Column { .expect( "fail to read `Bitmap` from arrow: array should be `BinaryArray`", ); - let offsets = arrow_col.offsets().clone().into_inner(); - let offsets = - unsafe { std::mem::transmute::, Buffer>(offsets) }; - Column::Bitmap(BinaryColumn::new(arrow_col.values().clone(), offsets)) - } - ( - DataType::Geometry, - ArrowDataType::Extension(name, box ArrowDataType::Binary, None), - ) if name == ARROW_EXT_TYPE_GEOMETRY => { - let arrow_col = arrow_col - .as_any() - .downcast_ref::>() - .expect( - "fail to read `Geometry` from arrow: array should be `BinaryArray`", - ); - let offsets = arrow_col - .offsets() - .buffer() - .iter() - .map(|x| *x as u64) - .collect::>(); - Column::Geometry(BinaryColumn::new( - arrow_col.values().clone(), - offsets.into(), - )) + let offsets = unsafe { + std::mem::transmute::, Buffer>( + arrow_col.offsets().clone().into_inner(), + ) + }; + Column::Bitmap(BinaryColumn { + data: arrow_col.values().clone(), + offsets, + }) } ( DataType::Geometry, @@ -739,28 +677,15 @@ impl Column { .expect( "fail to read `Geometry` from arrow: array should be `BinaryArray`", ); - let offsets = arrow_col.offsets().clone().into_inner(); - let offsets = - unsafe { std::mem::transmute::, Buffer>(offsets) }; - Column::Geometry(BinaryColumn::new(arrow_col.values().clone(), offsets)) - } - (DataType::Geometry, ArrowDataType::Binary) => { - let arrow_col = arrow_col - .as_any() - .downcast_ref::>() - .expect( - "fail to read `Geometry` from arrow: array should be `BinaryArray`", - ); - let offsets = arrow_col - .offsets() - .buffer() - .iter() - .map(|x| *x as u64) - .collect::>(); - Column::Geometry(BinaryColumn::new( - arrow_col.values().clone(), - offsets.into(), - )) + let offsets = unsafe { + std::mem::transmute::, Buffer>( + arrow_col.offsets().clone().into_inner(), + ) + }; + Column::Geometry(BinaryColumn { + data: arrow_col.values().clone(), + offsets, + }) } (DataType::Geometry, ArrowDataType::LargeBinary) => { let arrow_col = arrow_col @@ -769,10 +694,16 @@ impl Column { .expect( "fail to read `Geometry` from arrow: array should be `BinaryArray`", ); - let offsets = arrow_col.offsets().clone().into_inner(); - let offsets = - unsafe { std::mem::transmute::, Buffer>(offsets) }; - Column::Geometry(BinaryColumn::new(arrow_col.values().clone(), offsets)) + + let offsets = unsafe { + std::mem::transmute::, Buffer>( + arrow_col.offsets().clone().into_inner(), + ) + }; + Column::Geometry(BinaryColumn { + data: arrow_col.values().clone(), + offsets, + }) } (DataType::Geography, ArrowDataType::LargeBinary) => { let arrow_col = arrow_col @@ -781,13 +712,16 @@ impl Column { .expect( "fail to read `Geography` from arrow: array should be `BinaryArray`", ); - let offsets = arrow_col.offsets().clone().into_inner(); - let offsets = - unsafe { std::mem::transmute::, Buffer>(offsets) }; - Column::Geography(GeographyColumn(BinaryColumn::new( - arrow_col.values().clone(), + + let offsets = unsafe { + std::mem::transmute::, Buffer>( + arrow_col.offsets().clone().into_inner(), + ) + }; + Column::Geography(GeographyColumn(BinaryColumn { + data: arrow_col.values().clone(), offsets, - ))) + })) } (data_type, ArrowDataType::Extension(_, arrow_type, _)) => { from_arrow_with_arrow_type(arrow_col, arrow_type, data_type)? @@ -812,3 +746,36 @@ impl Column { from_arrow_with_arrow_type(arrow_col, arrow_col.data_type(), data_type) } } + +fn binary_array_to_string_column(array: &BinaryArray) -> StringColumn { + let mut builder = StringColumnBuilder::with_capacity(array.len()); + for value in array.values_iter() { + builder.put_and_commit(std::str::from_utf8(value).unwrap()); + } + builder.build() +} + +fn utf8_array_to_string_column(array: &Utf8Array) -> StringColumn { + let mut builder = StringColumnBuilder::with_capacity(array.len()); + for value in array.values_iter() { + builder.put_and_commit(value); + } + builder.build() +} + +fn fixed_size_binary_array_to_string_column(array: &FixedSizeBinaryArray) -> StringColumn { + let mut builder = StringColumnBuilder::with_capacity(array.len()); + for value in array.values_iter() { + builder.put_and_commit(std::str::from_utf8(value).unwrap()); + } + builder.build() +} + +fn fixed_size_binary_array_to_binary_column(array: &FixedSizeBinaryArray) -> BinaryColumn { + let mut builder = BinaryColumnBuilder::with_capacity(array.len(), array.len() * array.size()); + for value in array.values_iter() { + builder.put_slice(value); + builder.commit_row(); + } + builder.build() +} diff --git a/src/query/expression/src/converts/arrow2/to.rs b/src/query/expression/src/converts/arrow2/to.rs index f248a1a38c0d..89d51b70126a 100644 --- a/src/query/expression/src/converts/arrow2/to.rs +++ b/src/query/expression/src/converts/arrow2/to.rs @@ -93,7 +93,7 @@ fn table_type_to_arrow_type(ty: &TableDataType) -> ArrowDataType { ), TableDataType::Boolean => ArrowDataType::Boolean, TableDataType::Binary => ArrowDataType::LargeBinary, - TableDataType::String => ArrowDataType::LargeUtf8, + TableDataType::String => ArrowDataType::Utf8View, TableDataType::Number(ty) => with_number_type!(|TYPE| match ty { NumberDataType::TYPE => ArrowDataType::TYPE, }), @@ -315,32 +315,7 @@ impl Column { ) .unwrap(), ), - Column::Binary(col) => { - let offsets: Buffer = - col.offsets().iter().map(|offset| *offset as i64).collect(); - Box::new( - databend_common_arrow::arrow::array::BinaryArray::::try_new( - arrow_type, - unsafe { OffsetsBuffer::new_unchecked(offsets) }, - col.data().clone(), - None, - ) - .unwrap(), - ) - } - Column::String(col) => { - let offsets: Buffer = - col.offsets().iter().map(|offset| *offset as i64).collect(); - Box::new( - databend_common_arrow::arrow::array::Utf8Array::::try_new( - arrow_type, - unsafe { OffsetsBuffer::new_unchecked(offsets) }, - col.data().clone(), - None, - ) - .unwrap(), - ) - } + Column::String(col) => Box::new(col.clone().into_inner()), Column::Timestamp(col) => Box::new( databend_common_arrow::arrow::array::PrimitiveArray::::try_new( arrow_type, @@ -409,7 +384,9 @@ impl Column { ) .unwrap(), ), - Column::Bitmap(col) + + Column::Binary(col) + | Column::Bitmap(col) | Column::Variant(col) | Column::Geometry(col) | Column::Geography(GeographyColumn(col)) => { diff --git a/src/query/expression/src/converts/meta/bincode.rs b/src/query/expression/src/converts/meta/bincode.rs index aa8828bacaae..e839a10d99b0 100644 --- a/src/query/expression/src/converts/meta/bincode.rs +++ b/src/query/expression/src/converts/meta/bincode.rs @@ -60,15 +60,21 @@ pub enum LegacyColumn { Number(NumberColumn), Decimal(DecimalColumn), Boolean(Bitmap), - String(BinaryColumn), + String(LegacyBinaryColumn), Timestamp(Buffer), Date(Buffer), Array(Box), Map(Box), - Bitmap(BinaryColumn), + Bitmap(LegacyBinaryColumn), Nullable(Box), Tuple(Vec), - Variant(BinaryColumn), + Variant(LegacyBinaryColumn), +} + +#[derive(Clone)] +pub struct LegacyBinaryColumn { + pub(crate) data: Buffer, + pub(crate) offsets: Buffer, } #[derive(Clone)] @@ -104,6 +110,24 @@ impl From for Scalar { } } +impl From for BinaryColumn { + fn from(value: LegacyBinaryColumn) -> Self { + BinaryColumn { + data: value.data, + offsets: value.offsets, + } + } +} + +impl From for LegacyBinaryColumn { + fn from(value: BinaryColumn) -> Self { + LegacyBinaryColumn { + data: value.data, + offsets: value.offsets, + } + } +} + impl From for Column { fn from(value: LegacyColumn) -> Self { match value { @@ -113,7 +137,9 @@ impl From for Column { LegacyColumn::Number(num_col) => Column::Number(num_col), LegacyColumn::Decimal(dec_col) => Column::Decimal(dec_col), LegacyColumn::Boolean(bmp) => Column::Boolean(bmp), - LegacyColumn::String(str_col) => Column::String(str_col.try_into().unwrap()), + LegacyColumn::String(str_col) => { + Column::String(StringColumn::try_from_binary(BinaryColumn::from(str_col)).unwrap()) + } LegacyColumn::Timestamp(buf) => Column::Timestamp(buf), LegacyColumn::Date(buf) => Column::Date(buf), LegacyColumn::Array(arr_col) => Column::Array(Box::new(ArrayColumn:: { @@ -124,7 +150,7 @@ impl From for Column { values: map_col.values.into(), offsets: map_col.offsets, })), - LegacyColumn::Bitmap(str_col) => Column::Bitmap(str_col), + LegacyColumn::Bitmap(str_col) => Column::Bitmap(BinaryColumn::from(str_col)), LegacyColumn::Nullable(nullable_col) => { Column::Nullable(Box::new(NullableColumn:: { column: nullable_col.column.into(), @@ -134,7 +160,7 @@ impl From for Column { LegacyColumn::Tuple(tuple) => { Column::Tuple(tuple.into_iter().map(|c| c.into()).collect()) } - LegacyColumn::Variant(variant) => Column::Variant(variant), + LegacyColumn::Variant(variant) => Column::Variant(BinaryColumn::from(variant)), } } } @@ -171,7 +197,9 @@ impl From for LegacyColumn { Column::Decimal(dec_col) => LegacyColumn::Decimal(dec_col), Column::Boolean(bmp) => LegacyColumn::Boolean(bmp), Column::Binary(_) | Column::Geometry(_) | Column::Geography(_) => unreachable!(), - Column::String(str_col) => LegacyColumn::String(str_col.into()), + Column::String(str_col) => { + LegacyColumn::String(LegacyBinaryColumn::from(BinaryColumn::from(str_col))) + } Column::Timestamp(buf) => LegacyColumn::Timestamp(buf), Column::Date(buf) => LegacyColumn::Date(buf), Column::Array(arr_col) => LegacyColumn::Array(Box::new(LegacyArrayColumn { @@ -182,7 +210,7 @@ impl From for LegacyColumn { values: map_col.values.into(), offsets: map_col.offsets, })), - Column::Bitmap(str_col) => LegacyColumn::Bitmap(str_col), + Column::Bitmap(str_col) => LegacyColumn::Bitmap(LegacyBinaryColumn::from(str_col)), Column::Nullable(nullable_col) => { LegacyColumn::Nullable(Box::new(LegacyNullableColumn { column: nullable_col.column.into(), @@ -192,7 +220,7 @@ impl From for LegacyColumn { Column::Tuple(tuple) => { LegacyColumn::Tuple(tuple.into_iter().map(|c| c.into()).collect()) } - Column::Variant(variant) => LegacyColumn::Variant(variant), + Column::Variant(variant) => LegacyColumn::Variant(LegacyBinaryColumn::from(variant)), } } } diff --git a/src/query/expression/src/evaluator.rs b/src/query/expression/src/evaluator.rs index 59bd71379e57..85ec550fc8f5 100644 --- a/src/query/expression/src/evaluator.rs +++ b/src/query/expression/src/evaluator.rs @@ -613,10 +613,9 @@ impl<'a> Evaluator<'a> { }; let validity = None; - let mut key_builder = StringColumnBuilder::with_capacity(obj.len(), 0); + let mut key_builder = StringColumnBuilder::with_capacity(obj.len()); for k in obj.keys() { - key_builder.put_str(k.as_str()); - key_builder.commit_row(); + key_builder.put_and_commit(k.as_str()); } let key_column = Column::String(key_builder.build()); @@ -659,8 +658,7 @@ impl<'a> Evaluator<'a> { }; for (k, v) in obj.iter() { - key_builder.put_str(k.as_str()); - key_builder.commit_row(); + key_builder.put_and_commit(k.as_str()); v.write_to_vec(&mut value_builder.builder.data); value_builder.builder.commit_row(); } diff --git a/src/query/expression/src/filter/filter_executor.rs b/src/query/expression/src/filter/filter_executor.rs index 007f6d319e9c..321d9c68731a 100644 --- a/src/query/expression/src/filter/filter_executor.rs +++ b/src/query/expression/src/filter/filter_executor.rs @@ -26,6 +26,7 @@ use crate::Expr; use crate::FunctionContext; use crate::FunctionRegistry; use crate::SelectExprBuilder; +use crate::SELECTIVITY_THRESHOLD; // FilterExecutor is used to filter `DataBlock` by `SelectExpr`. pub struct FilterExecutor { @@ -114,7 +115,7 @@ impl FilterExecutor { // (3) Otherwise, use `take` to generate a new `DataBlock`. if result_count == origin_count { Ok(data_block) - } else if result_count as f64 > data_block.num_rows() as f64 * 0.8 + } else if result_count as f64 > data_block.num_rows() as f64 * SELECTIVITY_THRESHOLD && data_block.num_columns() > 1 { let range_count = self.build_selection_range(result_count); @@ -125,7 +126,7 @@ impl FilterExecutor { if self.keep_order && self.has_or { self.true_selection[0..result_count].sort(); } - data_block.take(&self.true_selection[0..result_count], &mut None) + data_block.take_with_optimize_size(&self.true_selection[0..result_count]) } } diff --git a/src/query/expression/src/filter/select_value/select_column_scalar.rs b/src/query/expression/src/filter/select_value/select_column_scalar.rs index 3d36884cde64..8c2bad57e108 100644 --- a/src/query/expression/src/filter/select_value/select_column_scalar.rs +++ b/src/query/expression/src/filter/select_value/select_column_scalar.rs @@ -239,48 +239,19 @@ impl<'a> Selector<'a> { Some(validity) => { // search the whole string buffer if let LikePattern::SurroundByPercent(searcher) = like_pattern { - let needle = searcher.needle(); - let needle_byte_len = needle.len(); - let data = column.data().as_slice(); - let offsets = column.offsets().as_slice(); - let mut idx = 0; - let mut pos = (*offsets.first().unwrap()) as usize; - let end = (*offsets.last().unwrap()) as usize; - - while pos < end && idx < count { - if let Some(p) = searcher.search(&data[pos..end]) { - while offsets[idx + 1] as usize <= pos + p { - let ret = NOT && validity.get_bit_unchecked(idx); - update_index( - ret, - idx as u32, - true_selection, - false_selection, - ); - idx += 1; - } - - // check if the substring is in bound - let ret = - pos + p + needle_byte_len <= offsets[idx + 1] as usize; - - let ret = if NOT { - validity.get_bit_unchecked(idx) && !ret - } else { - validity.get_bit_unchecked(idx) && ret - }; - update_index(ret, idx as u32, true_selection, false_selection); - - pos = offsets[idx + 1] as usize; - idx += 1; + for idx in 0u32..count as u32 { + let ret = if NOT { + validity.get_bit_unchecked(idx as usize) + && searcher + .search(column.index_unchecked_bytes(idx as usize)) + .is_none() } else { - break; - } - } - while idx < count { - let ret = NOT && validity.get_bit_unchecked(idx); - update_index(ret, idx as u32, true_selection, false_selection); - idx += 1; + validity.get_bit_unchecked(idx as usize) + && searcher + .search(column.index_unchecked_bytes(idx as usize)) + .is_some() + }; + update_index(ret, idx, true_selection, false_selection); } } else { for idx in 0u32..count as u32 { @@ -300,40 +271,17 @@ impl<'a> Selector<'a> { None => { // search the whole string buffer if let LikePattern::SurroundByPercent(searcher) = like_pattern { - let needle = searcher.needle(); - let needle_byte_len = needle.len(); - let data = column.data().as_slice(); - let offsets = column.offsets().as_slice(); - let mut idx = 0; - let mut pos = (*offsets.first().unwrap()) as usize; - let end = (*offsets.last().unwrap()) as usize; - - while pos < end && idx < count { - if let Some(p) = searcher.search(&data[pos..end]) { - while offsets[idx + 1] as usize <= pos + p { - update_index( - NOT, - idx as u32, - true_selection, - false_selection, - ); - idx += 1; - } - // check if the substring is in bound - let ret = - pos + p + needle_byte_len <= offsets[idx + 1] as usize; - let ret = if NOT { !ret } else { ret }; - update_index(ret, idx as u32, true_selection, false_selection); - - pos = offsets[idx + 1] as usize; - idx += 1; + for idx in 0u32..count as u32 { + let ret = if NOT { + searcher + .search(column.index_unchecked_bytes(idx as usize)) + .is_none() } else { - break; - } - } - while idx < count { - update_index(NOT, idx as u32, true_selection, false_selection); - idx += 1; + searcher + .search(column.index_unchecked_bytes(idx as usize)) + .is_some() + }; + update_index(ret, idx, true_selection, false_selection); } } else { for idx in 0u32..count as u32 { diff --git a/src/query/expression/src/kernels/concat.rs b/src/query/expression/src/kernels/concat.rs index d327fff0eb2c..782031de7c10 100644 --- a/src/query/expression/src/kernels/concat.rs +++ b/src/query/expression/src/kernels/concat.rs @@ -13,8 +13,10 @@ // limitations under the License. use std::iter::TrustedLen; -use std::sync::Arc; +use databend_common_arrow::arrow::array::growable::make_growable; +use databend_common_arrow::arrow::array::Array; +use databend_common_arrow::arrow::array::BooleanArray; use databend_common_arrow::arrow::bitmap::Bitmap; use databend_common_arrow::arrow::buffer::Buffer; use databend_common_exception::ErrorCode; @@ -22,35 +24,22 @@ use databend_common_exception::Result; use ethnum::i256; use itertools::Itertools; -use crate::copy_continuous_bits; -use crate::kernels::take::BIT_MASK; -use crate::kernels::utils::copy_advance_aligned; -use crate::kernels::utils::set_vec_len_by_ptr; -use crate::store_advance_aligned; use crate::types::array::ArrayColumnBuilder; -use crate::types::binary::BinaryColumn; use crate::types::decimal::Decimal; use crate::types::decimal::DecimalColumn; -use crate::types::geography::GeographyColumn; -use crate::types::geometry::GeometryType; use crate::types::map::KvColumnBuilder; use crate::types::nullable::NullableColumn; use crate::types::number::NumberColumn; -use crate::types::string::StringColumn; use crate::types::AnyType; use crate::types::ArrayType; -use crate::types::BinaryType; -use crate::types::BitmapType; use crate::types::BooleanType; +use crate::types::DataType; use crate::types::DateType; use crate::types::DecimalType; -use crate::types::GeographyType; use crate::types::MapType; use crate::types::NumberType; -use crate::types::StringType; use crate::types::TimestampType; use crate::types::ValueType; -use crate::types::VariantType; use crate::with_decimal_mapped_type; use crate::with_number_mapped_type; use crate::BlockEntry; @@ -164,14 +153,6 @@ impl Column { columns.map(|col| col.into_boolean().unwrap()), capacity, )), - Column::Binary(_) => BinaryType::upcast_column(Self::concat_binary_types( - columns.map(|col| col.into_binary().unwrap()), - capacity, - )), - Column::String(_) => StringType::upcast_column(Self::concat_string_types( - columns.map(|col| col.into_string().unwrap()), - capacity, - )), Column::Timestamp(_) => { let buffer = Self::concat_primitive_types( columns.map(|col| TimestampType::try_downcast_column(&col).unwrap()), @@ -210,10 +191,6 @@ impl Column { let builder = ArrayColumnBuilder { builder, offsets }; Self::concat_value_types::>(builder, columns) } - Column::Bitmap(_) => BitmapType::upcast_column(Self::concat_binary_types( - columns.map(|col| col.into_bitmap().unwrap()), - capacity, - )), Column::Nullable(_) => { let column: Vec = columns .clone() @@ -239,19 +216,13 @@ impl Column { .collect::>()?; Column::Tuple(fields) } - Column::Variant(_) => VariantType::upcast_column(Self::concat_binary_types( - columns.map(|col| col.into_variant().unwrap()), - capacity, - )), - Column::Geometry(_) => GeometryType::upcast_column(Self::concat_binary_types( - columns.map(|col| col.into_geometry().unwrap()), - capacity, - )), - Column::Geography(_) => { - GeographyType::upcast_column(GeographyColumn(Self::concat_binary_types( - columns.map(|col| col.into_geography().unwrap().0), - capacity, - ))) + Column::Variant(_) + | Column::Geometry(_) + | Column::Geography(_) + | Column::Binary(_) + | Column::String(_) + | Column::Bitmap(_) => { + Self::concat_use_grows(columns, first_column.data_type(), capacity) } }; Ok(column) @@ -271,108 +242,38 @@ impl Column { builder.into() } - pub fn concat_binary_types( - cols: impl Iterator + Clone, + pub fn concat_use_grows( + cols: impl Iterator, + data_type: DataType, num_rows: usize, - ) -> BinaryColumn { - // [`BinaryColumn`] consists of [`data`] and [`offset`], we build [`data`] and [`offset`] respectively, - // and then call `BinaryColumn::new(data.into(), offsets.into())` to create [`BinaryColumn`]. - let mut offsets: Vec = Vec::with_capacity(num_rows + 1); - let mut data_size = 0; - - // Build [`offset`] and calculate `data_size` required by [`data`]. - offsets.push(0); - for col in cols.clone() { - let mut start = col.offsets()[0]; - for end in col.offsets()[1..].iter() { - data_size += end - start; - start = *end; - offsets.push(data_size); - } - } - - // Build [`data`]. - let mut data: Vec = Vec::with_capacity(data_size as usize); - let mut data_ptr = data.as_mut_ptr(); + ) -> Column { + let arrays: Vec> = + cols.map(|c| c.as_arrow()).collect(); - unsafe { - for col in cols { - let offsets = col.offsets(); - let col_data = &(col.data().as_slice()) - [offsets[0] as usize..offsets[offsets.len() - 1] as usize]; - copy_advance_aligned(col_data.as_ptr(), &mut data_ptr, col_data.len()); - } - set_vec_len_by_ptr(&mut data, data_ptr); - } + let arrays = arrays.iter().map(|c| c.as_ref()).collect::>(); + let mut grow = make_growable(&arrays, false, num_rows); - BinaryColumn::new(data.into(), offsets.into()) - } - - pub fn concat_string_types( - cols: impl Iterator + Clone, - num_rows: usize, - ) -> StringColumn { - unsafe { - StringColumn::from_binary_unchecked(Self::concat_binary_types( - cols.map(Into::into), - num_rows, - )) + for (idx, array) in arrays.iter().enumerate() { + grow.extend(idx, 0, array.len()); } + let array = grow.as_box(); + Column::from_arrow(array.as_ref(), &data_type).unwrap() } pub fn concat_boolean_types(bitmaps: impl Iterator, num_rows: usize) -> Bitmap { - let capacity = num_rows.saturating_add(7) / 8; - let mut builder: Vec = Vec::with_capacity(capacity); - let mut builder_ptr = builder.as_mut_ptr(); - let mut builder_idx = 0; - let mut unset_bits = 0; - let mut buf = 0; - - unsafe { - for bitmap in bitmaps { - let (bitmap_slice, bitmap_offset, _) = bitmap.as_slice(); - let mut idx = 0; - let len = bitmap.len(); - if builder_idx % 8 != 0 { - while idx < len { - if bitmap.get_bit_unchecked(idx) { - buf |= BIT_MASK[builder_idx % 8]; - } else { - unset_bits += 1; - } - builder_idx += 1; - idx += 1; - if builder_idx % 8 == 0 { - store_advance_aligned(buf, &mut builder_ptr); - buf = 0; - break; - } - } - } - let remaining = len - idx; - if remaining > 0 { - let (cur_buf, cur_unset_bits) = copy_continuous_bits( - &mut builder_ptr, - bitmap_slice, - builder_idx, - idx + bitmap_offset, - remaining, - ); - builder_idx += remaining; - unset_bits += cur_unset_bits; - buf = cur_buf; - } - } - - if builder_idx % 8 != 0 { - store_advance_aligned(buf, &mut builder_ptr); - } + use databend_common_arrow::arrow::datatypes::DataType as ArrowType; + let arrays: Vec = bitmaps + .map(|c| BooleanArray::new(ArrowType::Boolean, c, None)) + .collect(); + let arrays = arrays.iter().map(|c| c as &dyn Array).collect::>(); + let mut grow = make_growable(&arrays, false, num_rows); - set_vec_len_by_ptr(&mut builder, builder_ptr); - Bitmap::from_inner(Arc::new(builder.into()), 0, num_rows, unset_bits) - .ok() - .unwrap() + for (idx, array) in arrays.iter().enumerate() { + grow.extend(idx, 0, array.len()); } + let array = grow.as_box(); + let array = array.as_any().downcast_ref::().unwrap(); + array.values().clone() } fn concat_value_types( diff --git a/src/query/expression/src/kernels/filter.rs b/src/query/expression/src/kernels/filter.rs index 894ceb412cfb..11bfffe08812 100644 --- a/src/query/expression/src/kernels/filter.rs +++ b/src/query/expression/src/kernels/filter.rs @@ -12,18 +12,17 @@ // See the License for the specific language governing permissions and // limitations under the License. -use databend_common_arrow::arrow::bitmap::utils::BitChunkIterExact; -use databend_common_arrow::arrow::bitmap::utils::BitChunksExact; +use binary::BinaryColumnBuilder; +use databend_common_arrow::arrow::array::Array; +use databend_common_arrow::arrow::array::Utf8ViewArray; use databend_common_arrow::arrow::bitmap::utils::SlicesIterator; use databend_common_arrow::arrow::bitmap::Bitmap; use databend_common_arrow::arrow::bitmap::MutableBitmap; use databend_common_arrow::arrow::bitmap::TrueIdxIter; use databend_common_arrow::arrow::buffer::Buffer; use databend_common_exception::Result; +use string::StringColumnBuilder; -use crate::kernels::utils::copy_advance_aligned; -use crate::kernels::utils::set_vec_len_by_ptr; -use crate::kernels::utils::store_advance_aligned; use crate::types::binary::BinaryColumn; use crate::types::nullable::NullableColumn; use crate::types::string::StringColumn; @@ -118,7 +117,7 @@ pub enum IterationStrategy { } /// based on -const SELECTIVITY_THRESHOLD: f64 = 0.8; +pub const SELECTIVITY_THRESHOLD: f64 = 0.8; impl IterationStrategy { fn default_strategy(length: usize, true_count: usize) -> Self { @@ -172,7 +171,7 @@ impl<'a> ValueVisitor for FilterVisitor<'a> { match value { Value::Scalar(c) => self.visit_scalar(c), Value::Column(c) => { - assert!(c.len() == self.original_rows); + assert_eq!(c.len(), self.original_rows); match self.strategy { IterationStrategy::None => self.result = Some(Value::Column(c.slice(0..0))), IterationStrategy::All => self.result = Some(Value::Column(c)), @@ -226,6 +225,7 @@ impl<'a> ValueVisitor for FilterVisitor<'a> { }); } } + self.result = Some(Value::Column(T::upcast_column(T::build_column(builder)))); Ok(()) } @@ -304,10 +304,9 @@ impl<'a> ValueVisitor for FilterVisitor<'a> { } fn visit_string(&mut self, column: StringColumn) -> Result<()> { - let column: BinaryColumn = column.into(); - self.result = Some(Value::Column(StringType::upcast_column(unsafe { - StringColumn::from_binary_unchecked(self.filter_binary_types(&column)) - }))); + self.result = Some(Value::Column(StringType::upcast_column( + self.filter_string_types(&column), + ))); Ok(()) } @@ -337,126 +336,45 @@ impl<'a> FilterVisitor<'a> { } } - // TODO: optimize this after BinaryView is introduced by @andy - fn filter_binary_types(&mut self, values: &BinaryColumn) -> BinaryColumn { - // Each element of `items` is (string pointer(u64), string length). - let mut items: Vec<(u64, usize)> = Vec::with_capacity(self.filter_rows); - // [`BinaryColumn`] consists of [`data`] and [`offset`], we build [`data`] and [`offset`] respectively, - // and then call `BinaryColumn::new(data.into(), offsets.into())` to create [`BinaryColumn`]. - let values_offset = values.offsets().as_slice(); - let values_data_ptr = values.data().as_slice().as_ptr(); - let mut offsets: Vec = Vec::with_capacity(self.filter_rows + 1); - let mut offsets_ptr = offsets.as_mut_ptr(); - let mut items_ptr = items.as_mut_ptr(); - let mut data_size = 0; - - // Build [`offset`] and calculate `data_size` required by [`data`]. - unsafe { - store_advance_aligned::(0, &mut offsets_ptr); - let mut idx = 0; - let (mut slice, offset, mut length) = self.filter.as_slice(); - if offset > 0 { - let mut mask = slice[0]; - while mask != 0 { - let n = mask.trailing_zeros() as usize; - // If `offset` > 0, the valid bits of this byte start at `offset`, we also - // need to ensure that we cannot iterate more than `length` bits. - if n >= offset && n < offset + length { - let start = *values_offset.get_unchecked(n - offset) as usize; - let len = *values_offset.get_unchecked(n - offset + 1) as usize - start; - data_size += len as u64; - store_advance_aligned(data_size, &mut offsets_ptr); - store_advance_aligned( - (values_data_ptr.add(start) as u64, len), - &mut items_ptr, - ); - } - mask = mask & (mask - 1); - } - let bits_to_align = 8 - offset; - length = if length >= bits_to_align { - length - bits_to_align - } else { - 0 - }; - slice = &slice[1..]; - idx += bits_to_align; - } + fn filter_string_types(&mut self, values: &StringColumn) -> StringColumn { + match self.strategy { + IterationStrategy::IndexIterator => { + let mut builder = StringColumnBuilder::with_capacity(self.filter_rows); - const CHUNK_SIZE: usize = 64; - let mut mask_chunks = BitChunksExact::::new(slice, length); - let mut continuous_selected = 0; - for mut mask in mask_chunks.by_ref() { - if mask == u64::MAX { - continuous_selected += CHUNK_SIZE; - } else { - if continuous_selected > 0 { - let start = *values_offset.get_unchecked(idx) as usize; - let len = *values_offset.get_unchecked(idx + continuous_selected) as usize - - start; - store_advance_aligned( - (values_data_ptr.add(start) as u64, len), - &mut items_ptr, - ); - for i in 0..continuous_selected { - data_size += *values_offset.get_unchecked(idx + i + 1) - - *values_offset.get_unchecked(idx + i); - store_advance_aligned(data_size, &mut offsets_ptr); - } - idx += continuous_selected; - continuous_selected = 0; - } - while mask != 0 { - let n = mask.trailing_zeros() as usize; - let start = *values_offset.get_unchecked(idx + n) as usize; - let len = *values_offset.get_unchecked(idx + n + 1) as usize - start; - data_size += len as u64; - store_advance_aligned( - (values_data_ptr.add(start) as u64, len), - &mut items_ptr, - ); - store_advance_aligned(data_size, &mut offsets_ptr); - mask = mask & (mask - 1); + let iter = TrueIdxIter::new(self.original_rows, Some(self.filter)); + for i in iter { + unsafe { + builder.put_and_commit(values.index_unchecked(i)); } - idx += CHUNK_SIZE; - } - } - if continuous_selected > 0 { - let start = *values_offset.get_unchecked(idx) as usize; - let len = *values_offset.get_unchecked(idx + continuous_selected) as usize - start; - store_advance_aligned((values_data_ptr.add(start) as u64, len), &mut items_ptr); - for i in 0..continuous_selected { - data_size += *values_offset.get_unchecked(idx + i + 1) - - *values_offset.get_unchecked(idx + i); - store_advance_aligned(data_size, &mut offsets_ptr); } - idx += continuous_selected; + builder.build() } - - for (i, is_selected) in mask_chunks.remainder_iter().enumerate() { - if is_selected { - let start = *values_offset.get_unchecked(idx + i) as usize; - let len = *values_offset.get_unchecked(idx + i + 1) as usize - start; - data_size += len as u64; - store_advance_aligned((values_data_ptr.add(start) as u64, len), &mut items_ptr); - store_advance_aligned(data_size, &mut offsets_ptr); - } + _ => { + // reuse the buffers + let new_views = self.filter_primitive_types(values.data.views().clone()); + let new_col = unsafe { + Utf8ViewArray::new_unchecked_unknown_md( + values.data.data_type().clone(), + new_views, + values.data.data_buffers().clone(), + None, + Some(values.data.total_buffer_len()), + ) + }; + StringColumn::new(new_col) } - set_vec_len_by_ptr(&mut items, items_ptr); - set_vec_len_by_ptr(&mut offsets, offsets_ptr); } + } - // Build [`data`]. - let mut data: Vec = Vec::with_capacity(data_size as usize); - let mut data_ptr = data.as_mut_ptr(); - - unsafe { - for (str_ptr, len) in items.iter() { - copy_advance_aligned(*str_ptr as *const u8, &mut data_ptr, *len); + fn filter_binary_types(&mut self, values: &BinaryColumn) -> BinaryColumn { + let mut builder = BinaryColumnBuilder::with_capacity(self.filter_rows, 0); + let iter = TrueIdxIter::new(self.original_rows, Some(self.filter)); + for i in iter { + unsafe { + builder.put_slice(values.index_unchecked(i)); + builder.commit_row(); } - set_vec_len_by_ptr(&mut data, data_ptr); } - - BinaryColumn::new(data.into(), offsets.into()) + builder.build() } } diff --git a/src/query/expression/src/kernels/group_by.rs b/src/query/expression/src/kernels/group_by.rs index ebebf4dd86e0..ebef511ae4f4 100644 --- a/src/query/expression/src/kernels/group_by.rs +++ b/src/query/expression/src/kernels/group_by.rs @@ -52,7 +52,7 @@ impl DataBlock { if hash_key_types.len() == 1 && matches!( hash_key_types[0], - DataType::Binary | DataType::String | DataType::Variant | DataType::Bitmap + DataType::Binary | DataType::Variant | DataType::Bitmap ) { return Ok(HashMethodKind::SingleBinary( diff --git a/src/query/expression/src/kernels/group_by_hash/method.rs b/src/query/expression/src/kernels/group_by_hash/method.rs index 0485c07ab17a..b8215a0d213f 100644 --- a/src/query/expression/src/kernels/group_by_hash/method.rs +++ b/src/query/expression/src/kernels/group_by_hash/method.rs @@ -20,6 +20,7 @@ use databend_common_arrow::arrow::buffer::Buffer; use databend_common_exception::Result; use databend_common_hashtable::DictionaryKeys; use databend_common_hashtable::FastHash; +use either::Either; use ethnum::i256; use ethnum::u256; @@ -28,6 +29,7 @@ use crate::types::decimal::Decimal; use crate::types::DataType; use crate::types::DecimalDataType; use crate::types::NumberDataType; +use crate::types::StringColumn; use crate::Column; use crate::HashMethodDictionarySerializer; use crate::HashMethodKeysU128; @@ -46,7 +48,7 @@ pub enum KeysState { U128(Buffer), U256(Buffer), Dictionary { - columns: Vec, + columns: Vec>, keys_point: Vec>, dictionaries: Vec, }, diff --git a/src/query/expression/src/kernels/group_by_hash/method_dict_serializer.rs b/src/query/expression/src/kernels/group_by_hash/method_dict_serializer.rs index a2ce1d4c57e9..cf0cdd91967b 100644 --- a/src/query/expression/src/kernels/group_by_hash/method_dict_serializer.rs +++ b/src/query/expression/src/kernels/group_by_hash/method_dict_serializer.rs @@ -17,6 +17,7 @@ use std::ptr::NonNull; use databend_common_exception::Result; use databend_common_hashtable::DictionaryKeys; use databend_common_hashtable::FastHash; +use either::Either; use super::utils::serialize_group_columns; use crate::Column; @@ -46,11 +47,11 @@ impl HashMethod for HashMethodDictionarySerializer { match group_column { Column::Binary(v) | Column::Variant(v) | Column::Bitmap(v) => { debug_assert_eq!(v.len(), num_rows); - dictionary_columns.push(v.clone()); + dictionary_columns.push(Either::Right(v.clone())); } Column::String(v) => { debug_assert_eq!(v.len(), num_rows); - dictionary_columns.push(v.clone().into()); + dictionary_columns.push(Either::Left(v.clone())); } _ => serialize_columns.push(group_column.clone()), } @@ -62,11 +63,9 @@ impl HashMethod for HashMethodDictionarySerializer { for column in serialize_columns.iter() { serialize_size += column.serialize_size(); } - dictionary_columns.push(serialize_group_columns( - (&serialize_columns).into(), - num_rows, - serialize_size, - )); + let state = + serialize_group_columns((&serialize_columns).into(), num_rows, serialize_size); + dictionary_columns.push(Either::Right(state)); } let mut keys = Vec::with_capacity(num_rows * dictionary_columns.len()); @@ -76,9 +75,11 @@ impl HashMethod for HashMethodDictionarySerializer { let start = points.len(); for dictionary_column in &dictionary_columns { - points.push(NonNull::from(unsafe { - dictionary_column.index_unchecked(row) - })); + let data = match dictionary_column { + Either::Left(l) => unsafe { l.index_unchecked(row).as_bytes() }, + Either::Right(r) => unsafe { r.index_unchecked(row) }, + }; + points.push(NonNull::from(data)); } keys.push(DictionaryKeys::create(&points[start..])) diff --git a/src/query/expression/src/kernels/group_by_hash/method_serializer.rs b/src/query/expression/src/kernels/group_by_hash/method_serializer.rs index 4cb277bf416c..b11f028e07c6 100644 --- a/src/query/expression/src/kernels/group_by_hash/method_serializer.rs +++ b/src/query/expression/src/kernels/group_by_hash/method_serializer.rs @@ -17,7 +17,6 @@ use databend_common_hashtable::hash_join_fast_string_hash; use super::utils::serialize_group_columns; use crate::types::binary::BinaryIterator; -use crate::BinaryKeyAccessor; use crate::Column; use crate::HashMethod; use crate::InputColumns; @@ -51,7 +50,7 @@ impl HashMethod for HashMethodSerializer { fn build_keys_iter<'a>(&self, key_state: &'a KeysState) -> Result> { match key_state { - KeysState::Column(Column::Binary(col)) => Ok(col.iter()), + KeysState::Column(Column::Binary(state)) => Ok(state.iter()), _ => unreachable!(), } } @@ -61,18 +60,15 @@ impl HashMethod for HashMethodSerializer { keys_state: KeysState, ) -> Result>> { match keys_state { - KeysState::Column(Column::Binary(col)) => { - let (data, offsets) = col.into_buffer(); - Ok(Box::new(BinaryKeyAccessor::new(data, offsets))) - } + KeysState::Column(Column::Binary(state)) => Ok(Box::new(state)), _ => unreachable!(), } } fn build_keys_hashes(&self, keys_state: &KeysState, hashes: &mut Vec) { match keys_state { - KeysState::Column(Column::Binary(col)) => { - hashes.extend(col.iter().map(hash_join_fast_string_hash)); + KeysState::Column(Column::Binary(state)) => { + hashes.extend(state.iter().map(hash_join_fast_string_hash)); } _ => unreachable!(), } diff --git a/src/query/expression/src/kernels/group_by_hash/method_single_string.rs b/src/query/expression/src/kernels/group_by_hash/method_single_string.rs index 3c77a7bd58af..f2791534e55a 100644 --- a/src/query/expression/src/kernels/group_by_hash/method_single_string.rs +++ b/src/query/expression/src/kernels/group_by_hash/method_single_string.rs @@ -12,11 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. -use databend_common_arrow::arrow::buffer::Buffer; use databend_common_exception::Result; use databend_common_hashtable::hash_join_fast_string_hash; use crate::types::binary::BinaryIterator; +use crate::types::BinaryColumn; use crate::Column; use crate::HashMethod; use crate::InputColumns; @@ -44,7 +44,6 @@ impl HashMethod for HashMethodSingleBinary { KeysState::Column(Column::Binary(col)) | KeysState::Column(Column::Variant(col)) | KeysState::Column(Column::Bitmap(col)) => Ok(col.iter()), - KeysState::Column(Column::String(col)) => Ok(col.iter_binary()), _ => unreachable!(), } } @@ -56,14 +55,7 @@ impl HashMethod for HashMethodSingleBinary { match keys_state { KeysState::Column(Column::Binary(col)) | KeysState::Column(Column::Variant(col)) - | KeysState::Column(Column::Bitmap(col)) => { - let (data, offsets) = col.into_buffer(); - Ok(Box::new(BinaryKeyAccessor::new(data, offsets))) - } - KeysState::Column(Column::String(col)) => { - let (data, offsets) = col.into_buffer(); - Ok(Box::new(BinaryKeyAccessor::new(data, offsets))) - } + | KeysState::Column(Column::Bitmap(col)) => Ok(Box::new(col)), _ => unreachable!(), } } @@ -75,26 +67,12 @@ impl HashMethod for HashMethodSingleBinary { | KeysState::Column(Column::Bitmap(col)) => { hashes.extend(col.iter().map(hash_join_fast_string_hash)); } - KeysState::Column(Column::String(col)) => { - hashes.extend(col.iter_binary().map(hash_join_fast_string_hash)); - } _ => unreachable!(), } } } -pub struct BinaryKeyAccessor { - data: Buffer, - offsets: Buffer, -} - -impl BinaryKeyAccessor { - pub fn new(data: Buffer, offsets: Buffer) -> Self { - Self { data, offsets } - } -} - -impl KeyAccessor for BinaryKeyAccessor { +impl KeyAccessor for BinaryColumn { type Key = [u8]; /// # Safety diff --git a/src/query/expression/src/kernels/group_by_hash/utils.rs b/src/query/expression/src/kernels/group_by_hash/utils.rs index badd8056cf17..8b844452a798 100644 --- a/src/query/expression/src/kernels/group_by_hash/utils.rs +++ b/src/query/expression/src/kernels/group_by_hash/utils.rs @@ -15,8 +15,9 @@ use databend_common_base::vec_ext::VecExt; use databend_common_base::vec_ext::VecU8Ext; -use crate::types::binary::BinaryColumn; +use crate::types::binary::BinaryColumnBuilder; use crate::types::decimal::DecimalColumn; +use crate::types::BinaryColumn; use crate::types::NumberColumn; use crate::with_decimal_mapped_type; use crate::with_number_mapped_type; @@ -29,21 +30,17 @@ pub fn serialize_group_columns( num_rows: usize, serialize_size: usize, ) -> BinaryColumn { - // [`BinaryColumn`] consists of [`data`] and [`offset`], we build [`data`] and [`offset`] respectively, - // and then call `BinaryColumn::new(data.into(), offsets.into())` to create [`BinaryColumn`]. - let mut data: Vec = Vec::with_capacity(serialize_size); - let mut offsets: Vec = Vec::with_capacity(num_rows + 1); - unsafe { - offsets.push_unchecked(0); - for i in 0..num_rows { - for col in columns.iter() { - serialize_column_binary(col, i, &mut data); + let mut builder = BinaryColumnBuilder::with_capacity(num_rows, serialize_size); + + for i in 0..num_rows { + for col in columns.iter() { + unsafe { + serialize_column_binary(col, i, &mut builder.data); } - offsets.push_unchecked(data.len() as u64); } + builder.commit_row(); } - - BinaryColumn::new(data.into(), offsets.into()) + builder.build() } /// This function must be consistent with the `push_binary` function of `src/query/expression/src/values.rs`. diff --git a/src/query/expression/src/kernels/mod.rs b/src/query/expression/src/kernels/mod.rs index 738598406d23..01b1f2f02171 100644 --- a/src/query/expression/src/kernels/mod.rs +++ b/src/query/expression/src/kernels/mod.rs @@ -28,6 +28,7 @@ mod utils; pub use filter::FilterVisitor; pub use filter::IterationStrategy; +pub use filter::SELECTIVITY_THRESHOLD; pub use group_by_hash::*; pub use sort::*; pub use sort_compare::*; diff --git a/src/query/expression/src/kernels/scatter.rs b/src/query/expression/src/kernels/scatter.rs index 11d4ffbea53f..1d690ac929a8 100644 --- a/src/query/expression/src/kernels/scatter.rs +++ b/src/query/expression/src/kernels/scatter.rs @@ -12,11 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. +use databend_common_base::vec_ext::VecExt; use databend_common_exception::Result; -use itertools::Itertools; -use crate::kernels::utils::set_vec_len_by_ptr; -use crate::kernels::utils::store_advance_aligned; use crate::DataBlock; impl DataBlock { @@ -33,24 +31,9 @@ impl DataBlock { let scatter_indices = Self::divide_indices_by_scatter_size(indices, scatter_size); - let has_string_column = self - .columns() - .iter() - .any(|col| col.data_type.is_string_column()); - let mut string_items_buf = if has_string_column { - let max_num_rows = scatter_indices - .iter() - .map(|indices| indices.len()) - .max() - .unwrap(); - Some(vec![(0, 0); max_num_rows]) - } else { - None - }; - let mut results = Vec::with_capacity(scatter_size); for indices in scatter_indices.iter().take(scatter_size) { - let block = self.take(indices, &mut string_items_buf)?; + let block = self.take_with_optimize_size(indices)?; results.push(block); } @@ -68,18 +51,9 @@ impl DataBlock { for num_rows in scatter_num_rows.iter().take(scatter_size) { scatter_indices.push(Vec::with_capacity(*num_rows)); } - let mut scatter_indices_ptrs = scatter_indices - .iter_mut() - .map(|indices| indices.as_mut_ptr()) - .collect_vec(); + for (i, index) in indices.iter().enumerate() { - store_advance_aligned( - i as u32, - scatter_indices_ptrs.get_unchecked_mut(index.to_usize()), - ); - } - for i in 0..scatter_size { - set_vec_len_by_ptr(&mut scatter_indices[i], scatter_indices_ptrs[i]); + scatter_indices[index.to_usize()].push_unchecked(i as u32); } } scatter_indices diff --git a/src/query/expression/src/kernels/sort.rs b/src/query/expression/src/kernels/sort.rs index 4a9699c0e859..91bc7ebff299 100644 --- a/src/query/expression/src/kernels/sort.rs +++ b/src/query/expression/src/kernels/sort.rs @@ -105,7 +105,7 @@ impl DataBlock { } let permutations = sort_compare.take_permutation(); - DataBlock::take(block, &permutations, &mut None) + DataBlock::take(block, &permutations) } } diff --git a/src/query/expression/src/kernels/sort_compare.rs b/src/query/expression/src/kernels/sort_compare.rs index 45d91f0098d0..9f5078c621c3 100644 --- a/src/query/expression/src/kernels/sort_compare.rs +++ b/src/query/expression/src/kernels/sort_compare.rs @@ -23,6 +23,7 @@ use memchr::memchr; use crate::types::AnyType; use crate::types::NullableColumn; use crate::types::Number; +use crate::types::StringColumn; use crate::types::ValueType; use crate::visitor::ValueVisitor; use crate::LimitType; @@ -276,11 +277,21 @@ impl ValueVisitor for SortCompare { self.visit_number(buffer) } + fn visit_string(&mut self, column: StringColumn) -> Result<()> { + assert!(column.len() == self.rows); + self.generic_sort( + &column, + |col, idx| (col, idx as usize), + |(col1, idx1), (col2, idx2)| StringColumn::compare(col1, idx1, col2, idx2), + ); + Ok(()) + } + fn visit_typed_column(&mut self, col: T::Column) -> Result<()> { assert!(T::column_len(&col) == self.rows); self.generic_sort( &col, - |c, idx| -> T::ScalarRef<'_> { unsafe { T::index_column_unchecked(c, idx as _) } }, + |c, idx| unsafe { T::index_column_unchecked(c, idx as _) }, |a, b| T::compare(a, b), ); Ok(()) diff --git a/src/query/expression/src/kernels/take.rs b/src/query/expression/src/kernels/take.rs index 84d120b0390c..f4957a1c19f3 100644 --- a/src/query/expression/src/kernels/take.rs +++ b/src/query/expression/src/kernels/take.rs @@ -14,39 +14,52 @@ use std::sync::Arc; +use binary::BinaryColumnBuilder; +use databend_common_arrow::arrow::array::Array; +use databend_common_arrow::arrow::array::Utf8ViewArray; use databend_common_arrow::arrow::bitmap::Bitmap; use databend_common_arrow::arrow::buffer::Buffer; +use databend_common_base::slice_ext::GetSaferUnchecked; use databend_common_exception::Result; +use string::StringColumnBuilder; -use crate::kernels::utils::copy_advance_aligned; -use crate::kernels::utils::set_vec_len_by_ptr; use crate::types::binary::BinaryColumn; use crate::types::nullable::NullableColumn; use crate::types::string::StringColumn; use crate::types::*; use crate::visitor::ValueVisitor; use crate::BlockEntry; +use crate::Column; use crate::ColumnBuilder; use crate::DataBlock; use crate::Value; +use crate::SELECTIVITY_THRESHOLD; pub const BIT_MASK: [u8; 8] = [1, 2, 4, 8, 16, 32, 64, 128]; impl DataBlock { - pub fn take( - &self, - indices: &[I], - string_items_buf: &mut Option>, - ) -> Result - where - I: databend_common_arrow::arrow::types::Index, - { + pub fn take(&self, indices: &[I]) -> Result + where I: databend_common_arrow::arrow::types::Index { if indices.is_empty() { return Ok(self.slice(0..0)); } - let mut taker = TakeVisitor::new(indices, string_items_buf); + let taker = TakeVisitor::new(indices); + self.take_inner(taker) + } + + pub fn take_with_optimize_size(&self, indices: &[I]) -> Result + where I: databend_common_arrow::arrow::types::Index { + if indices.is_empty() { + return Ok(self.slice(0..0)); + } + + let taker = TakeVisitor::new(indices).with_optimize_size_enable(true); + self.take_inner(taker) + } + fn take_inner(&self, mut taker: TakeVisitor) -> Result + where I: databend_common_arrow::arrow::types::Index { let after_columns = self .columns() .iter() @@ -62,7 +75,7 @@ impl DataBlock { Ok(DataBlock::new_with_meta( after_columns, - indices.len(), + taker.indices.len(), self.get_meta().cloned(), )) } @@ -72,20 +85,30 @@ struct TakeVisitor<'a, I> where I: databend_common_arrow::arrow::types::Index { indices: &'a [I], - string_items_buf: &'a mut Option>, result: Option>, + optimize_size_enable: bool, } impl<'a, I> TakeVisitor<'a, I> where I: databend_common_arrow::arrow::types::Index { - fn new(indices: &'a [I], string_items_buf: &'a mut Option>) -> Self { + fn new(indices: &'a [I]) -> Self { Self { indices, - string_items_buf, result: None, + optimize_size_enable: false, } } + + fn with_optimize_size_enable(mut self, optimize_size_enable: bool) -> Self { + self.optimize_size_enable = optimize_size_enable; + self + } + + fn should_optimize_size(&self, num_rows: usize) -> bool { + self.optimize_size_enable + || num_rows as f64 * SELECTIVITY_THRESHOLD > self.indices.len() as f64 + } } impl<'a, I> ValueVisitor for TakeVisitor<'a, I> @@ -168,9 +191,9 @@ where I: databend_common_arrow::arrow::types::Index // If this [`Bitmap`] is all true or all false and `num_rows <= bitmap.len()``, // we can just slice it. if num_rows <= col.len() && (col.unset_bits() == 0 || col.unset_bits() == col.len()) { - let mut bitmap = col.clone(); - bitmap.slice(0, num_rows); - self.result = Some(Value::Column(BooleanType::upcast_column(bitmap))); + self.result = Some(Value::Column(BooleanType::upcast_column( + col.sliced(0, num_rows), + ))); return Ok(()); } @@ -213,10 +236,9 @@ where I: databend_common_arrow::arrow::types::Index } fn visit_string(&mut self, column: StringColumn) -> Result<()> { - let column: BinaryColumn = column.into(); - self.result = Some(Value::Column(StringType::upcast_column(unsafe { - StringColumn::from_binary_unchecked(self.take_binary_types(&column)) - }))); + self.result = Some(Value::Column(StringType::upcast_column( + self.take_string_types(&column), + ))); Ok(()) } @@ -236,57 +258,61 @@ where I: databend_common_arrow::arrow::types::Index let result: Vec = self .indices .iter() - .map(|index| unsafe { *col.get_unchecked(index.to_usize()) }) + .map(|index| unsafe { *col.get_unchecked_release(index.to_usize()) }) .collect(); result.into() } fn take_binary_types(&mut self, col: &BinaryColumn) -> BinaryColumn { let num_rows = self.indices.len(); - - // Each element of `items` is (string pointer(u64), string length), if `string_items_buf` - // can be reused, we will not re-allocate memory. - let mut items: Option> = match &self.string_items_buf { - Some(string_items_buf) if string_items_buf.capacity() >= num_rows => None, - _ => Some(Vec::with_capacity(num_rows)), - }; - let items = match items.is_some() { - true => items.as_mut().unwrap(), - false => self.string_items_buf.as_mut().unwrap(), - }; - - // [`BinaryColumn`] consists of [`data`] and [`offset`], we build [`data`] and [`offset`] respectively, - // and then call `BinaryColumn::new(data.into(), offsets.into())` to create [`BinaryColumn`]. - let col_offset = col.offsets().as_slice(); - let col_data_ptr = col.data().as_slice().as_ptr(); - let mut offsets: Vec = Vec::with_capacity(num_rows + 1); - let mut data_size = 0; - - // Build [`offset`] and calculate `data_size` required by [`data`]. - unsafe { - items.set_len(num_rows); - offsets.set_len(num_rows + 1); - *offsets.get_unchecked_mut(0) = 0; - for (i, index) in self.indices.iter().enumerate() { - let start = *col_offset.get_unchecked(index.to_usize()) as usize; - let len = *col_offset.get_unchecked(index.to_usize() + 1) as usize - start; - data_size += len as u64; - *items.get_unchecked_mut(i) = (col_data_ptr.add(start) as u64, len); - *offsets.get_unchecked_mut(i + 1) = data_size; + let mut builder = BinaryColumnBuilder::with_capacity(num_rows, 0); + for index in self.indices.iter() { + unsafe { + builder.put_slice(col.index_unchecked(index.to_usize())); + builder.commit_row(); } } + builder.build() + } - // Build [`data`]. - let mut data: Vec = Vec::with_capacity(data_size as usize); - let mut data_ptr = data.as_mut_ptr(); - - unsafe { - for (str_ptr, len) in items.iter() { - copy_advance_aligned(*str_ptr as *const u8, &mut data_ptr, *len); + fn take_string_types(&mut self, col: &StringColumn) -> StringColumn { + if self.should_optimize_size(col.len()) { + let mut builder = StringColumnBuilder::with_capacity(self.indices.len()); + for index in self.indices.iter() { + unsafe { + builder.put_and_commit(col.index_unchecked(index.to_usize())); + } } - set_vec_len_by_ptr(&mut data, data_ptr); + builder.build() + } else { + let new_views = self.take_primitive_types(col.data.views().clone()); + let new_col = unsafe { + Utf8ViewArray::new_unchecked_unknown_md( + col.data.data_type().clone(), + new_views, + col.data.data_buffers().clone(), + None, + Some(col.data.total_buffer_len()), + ) + }; + StringColumn::new(new_col) } + } +} - BinaryColumn::new(data.into(), offsets.into()) +impl Column { + pub fn maybe_gc(self) -> Self { + match self { + Column::String(c) => { + let data = c.data.maybe_gc(); + let c = StringColumn::new(data); + Column::String(c) + } + Column::Nullable(n) => { + let c = n.column.maybe_gc(); + NullableColumn::new_column(c, n.validity) + } + other => other, + } } } diff --git a/src/query/expression/src/kernels/take_chunks.rs b/src/query/expression/src/kernels/take_chunks.rs index 34b0d2598fb9..d532a67fabb4 100644 --- a/src/query/expression/src/kernels/take_chunks.rs +++ b/src/query/expression/src/kernels/take_chunks.rs @@ -14,15 +14,15 @@ use std::sync::Arc; +use binary::BinaryColumnBuilder; use databend_common_arrow::arrow::bitmap::Bitmap; use databend_common_arrow::arrow::buffer::Buffer; use databend_common_arrow::arrow::compute::merge_sort::MergeSlice; use databend_common_hashtable::RowPtr; use itertools::Itertools; +use string::StringColumnBuilder; use crate::kernels::take::BIT_MASK; -use crate::kernels::utils::copy_advance_aligned; -use crate::kernels::utils::set_vec_len_by_ptr; use crate::types::array::ArrayColumnBuilder; use crate::types::binary::BinaryColumn; use crate::types::bitmap::BitmapType; @@ -111,7 +111,6 @@ impl DataBlock { build_columns_data_type: &[DataType], indices: &[RowPtr], result_size: usize, - binary_items_buf: &mut Option>, ) -> Self { let num_columns = build_columns.len(); let result_columns = (0..num_columns) @@ -122,7 +121,6 @@ impl DataBlock { data_type.clone(), indices, result_size, - binary_items_buf, ); BlockEntry::new(data_type.clone(), Value::Column(column)) }) @@ -631,7 +629,6 @@ impl Column { data_type: DataType, indices: &[RowPtr], result_size: usize, - binary_items_buf: &mut Option>, ) -> Column { match &columns { ColumnVec::Null { .. } => Column::Null { len: result_size }, @@ -655,12 +652,12 @@ impl Column { ColumnVec::Boolean(columns) => { Column::Boolean(Self::take_block_vec_boolean_types(columns, indices)) } - ColumnVec::Binary(columns) => BinaryType::upcast_column( - Self::take_block_vec_binary_types(columns, indices, binary_items_buf.as_mut()), - ), - ColumnVec::String(columns) => StringType::upcast_column( - Self::take_block_vec_string_types(columns, indices, binary_items_buf.as_mut()), - ), + ColumnVec::Binary(columns) => { + BinaryType::upcast_column(Self::take_block_vec_binary_types(columns, indices)) + } + ColumnVec::String(columns) => { + StringType::upcast_column(Self::take_block_vec_string_types(columns, indices)) + } ColumnVec::Timestamp(columns) => { let builder = Self::take_block_vec_primitive_types(columns, indices); let ts = >::upcast_column(>::column_from_vec( @@ -713,9 +710,9 @@ impl Column { columns, builder, indices, ) } - ColumnVec::Bitmap(columns) => BitmapType::upcast_column( - Self::take_block_vec_binary_types(columns, indices, binary_items_buf.as_mut()), - ), + ColumnVec::Bitmap(columns) => { + BitmapType::upcast_column(Self::take_block_vec_binary_types(columns, indices)) + } ColumnVec::Nullable(columns) => { let inner_data_type = data_type.as_nullable().unwrap(); let inner_column = Self::take_column_vec_indices( @@ -723,7 +720,6 @@ impl Column { *inner_data_type.clone(), indices, result_size, - binary_items_buf, ); let inner_bitmap = Self::take_column_vec_indices( @@ -731,7 +727,6 @@ impl Column { DataType::Boolean, indices, result_size, - binary_items_buf, ); NullableColumn::new_column( @@ -750,25 +745,22 @@ impl Column { ty.clone(), indices, result_size, - binary_items_buf, ) }) .collect(); Column::Tuple(fields) } - ColumnVec::Variant(columns) => VariantType::upcast_column( - Self::take_block_vec_binary_types(columns, indices, binary_items_buf.as_mut()), - ), - ColumnVec::Geometry(columns) => GeometryType::upcast_column( - Self::take_block_vec_binary_types(columns, indices, binary_items_buf.as_mut()), - ), + ColumnVec::Variant(columns) => { + VariantType::upcast_column(Self::take_block_vec_binary_types(columns, indices)) + } + ColumnVec::Geometry(columns) => { + GeometryType::upcast_column(Self::take_block_vec_binary_types(columns, indices)) + } ColumnVec::Geography(columns) => { let columns = columns.iter().map(|x| x.0.clone()).collect::>(); GeographyType::upcast_column(GeographyColumn(Self::take_block_vec_binary_types( - &columns, - indices, - binary_items_buf.as_mut(), + &columns, indices, ))) } } @@ -784,73 +776,30 @@ impl Column { builder } - pub fn take_block_vec_binary_types( - col: &[BinaryColumn], - indices: &[RowPtr], - binary_items_buf: Option<&mut Vec<(u64, usize)>>, - ) -> BinaryColumn { - let num_rows = indices.len(); - - // Each element of `items` is (string pointer(u64), string length), if `binary_items_buf` - // can be reused, we will not re-allocate memory. - let mut items: Option> = match &binary_items_buf { - Some(binary_items_buf) if binary_items_buf.capacity() >= num_rows => None, - _ => Some(Vec::with_capacity(num_rows)), - }; - let items = match items.is_some() { - true => items.as_mut().unwrap(), - false => binary_items_buf.unwrap(), - }; - - // [`BinaryColumn`] consists of [`data`] and [`offset`], we build [`data`] and [`offset`] respectively, - // and then call `BinaryColumn::new(data.into(), offsets.into())` to create [`BinaryColumn`]. - let mut offsets: Vec = Vec::with_capacity(num_rows + 1); - let mut data_size = 0; - - // Build [`offset`] and calculate `data_size` required by [`data`]. - unsafe { - items.set_len(num_rows); - offsets.set_len(num_rows + 1); - *offsets.get_unchecked_mut(0) = 0; - for (i, row_ptr) in indices.iter().enumerate() { - let item = - col[row_ptr.chunk_index as usize].index_unchecked(row_ptr.row_index as usize); - data_size += item.len() as u64; - *items.get_unchecked_mut(i) = (item.as_ptr() as u64, item.len()); - *offsets.get_unchecked_mut(i + 1) = data_size; - } - } - - // Build [`data`]. - let mut data: Vec = Vec::with_capacity(data_size as usize); - let mut data_ptr = data.as_mut_ptr(); - - unsafe { - for (str_ptr, len) in items.iter() { - copy_advance_aligned(*str_ptr as *const u8, &mut data_ptr, *len); + // TODO: reuse the buffer by `SELECTIVITY_THRESHOLD` + pub fn take_block_vec_binary_types(col: &[BinaryColumn], indices: &[RowPtr]) -> BinaryColumn { + let mut builder = BinaryColumnBuilder::with_capacity(indices.len(), 0); + for row_ptr in indices { + unsafe { + builder.put_slice( + col[row_ptr.chunk_index as usize].index_unchecked(row_ptr.row_index as usize), + ); + builder.commit_row(); } - set_vec_len_by_ptr(&mut data, data_ptr); } - - BinaryColumn::new(data.into(), offsets.into()) + builder.build() } - pub fn take_block_vec_string_types( - cols: &[StringColumn], - indices: &[RowPtr], - binary_items_buf: Option<&mut Vec<(u64, usize)>>, - ) -> StringColumn { - let binary_cols = cols - .iter() - .map(|col| col.clone().into()) - .collect::>(); - unsafe { - StringColumn::from_binary_unchecked(Self::take_block_vec_binary_types( - &binary_cols, - indices, - binary_items_buf, - )) + pub fn take_block_vec_string_types(col: &[StringColumn], indices: &[RowPtr]) -> StringColumn { + let mut builder = StringColumnBuilder::with_capacity(indices.len()); + for row_ptr in indices { + unsafe { + builder.put_and_commit( + col[row_ptr.chunk_index as usize].index_unchecked(row_ptr.row_index as usize), + ); + } } + builder.build() } pub fn take_block_vec_boolean_types(col: &[Bitmap], indices: &[RowPtr]) -> Bitmap { diff --git a/src/query/expression/src/kernels/take_compact.rs b/src/query/expression/src/kernels/take_compact.rs index a2f97b894956..2cf400264b6d 100644 --- a/src/query/expression/src/kernels/take_compact.rs +++ b/src/query/expression/src/kernels/take_compact.rs @@ -12,12 +12,13 @@ // See the License for the specific language governing permissions and // limitations under the License. +use binary::BinaryColumnBuilder; +use databend_common_arrow::arrow::array::Array; +use databend_common_arrow::arrow::array::Utf8ViewArray; use databend_common_arrow::arrow::buffer::Buffer; +use databend_common_base::vec_ext::VecExt; use databend_common_exception::Result; -use crate::kernels::utils::copy_advance_aligned; -use crate::kernels::utils::set_vec_len_by_ptr; -use crate::kernels::utils::store_advance_aligned; use crate::types::binary::BinaryColumn; use crate::types::nullable::NullableColumn; use crate::types::string::StringColumn; @@ -161,11 +162,10 @@ impl<'a> ValueVisitor for TakeCompactVisitor<'a> { Ok(()) } - fn visit_string(&mut self, column: StringColumn) -> Result<()> { - let column: BinaryColumn = column.into(); - self.result = Some(Value::Column(StringType::upcast_column(unsafe { - StringColumn::from_binary_unchecked(self.take_binary_types(&column)) - }))); + fn visit_string(&mut self, col: StringColumn) -> Result<()> { + self.result = Some(Value::Column(StringType::upcast_column( + self.take_string_types(&col), + ))); Ok(()) } @@ -179,15 +179,14 @@ impl<'a> ValueVisitor for TakeCompactVisitor<'a> { impl<'a> TakeCompactVisitor<'a> { fn take_primitive_types(&mut self, buffer: Buffer) -> Buffer { - let col_ptr = buffer.as_slice().as_ptr(); + let buffer = buffer.as_slice(); let mut builder: Vec = Vec::with_capacity(self.num_rows); - let mut ptr = builder.as_mut_ptr(); let mut remain; unsafe { for (index, cnt) in self.indices.iter() { if *cnt == 1 { - copy_advance_aligned(col_ptr.add(*index as usize), &mut ptr, 1); + builder.push_unchecked(buffer[*index as usize]); continue; } @@ -195,11 +194,12 @@ impl<'a> TakeCompactVisitor<'a> { // [___________] => [x__________] => [xx_________] => [xxxx_______] => [xxxxxxxx___] // Since cnt > 0, then 31 - cnt.leading_zeros() >= 0. let max_segment = 1 << (31 - cnt.leading_zeros()); - let base_ptr = ptr; - copy_advance_aligned(col_ptr.add(*index as usize), &mut ptr, 1); + let base_pos = builder.len(); + builder.push_unchecked(buffer[*index as usize]); + let mut cur_segment = 1; while cur_segment < max_segment { - copy_advance_aligned(base_ptr, &mut ptr, cur_segment); + builder.extend_from_within(base_pos..base_pos + cur_segment); cur_segment <<= 1; } @@ -208,78 +208,39 @@ impl<'a> TakeCompactVisitor<'a> { // ^^^^ ---> ^^^^ remain = *cnt as usize - max_segment; if remain > 0 { - copy_advance_aligned(base_ptr, &mut ptr, remain); + builder.extend_from_within(base_pos..base_pos + remain) } } - set_vec_len_by_ptr(&mut builder, ptr); } builder.into() } fn take_binary_types(&mut self, col: &BinaryColumn) -> BinaryColumn { - // Each element of `items` is (string(&[u8]), repeat times). - let mut items = Vec::with_capacity(self.indices.len()); - let mut items_ptr = items.as_mut_ptr(); - - // [`BinaryColumn`] consists of [`data`] and [`offset`], we build [`data`] and [`offset`] respectively, - // and then call `BinaryColumn::new(data.into(), offsets.into())` to create [`BinaryColumn`]. - let mut offsets = Vec::with_capacity(self.num_rows + 1); - let mut offsets_ptr = offsets.as_mut_ptr(); - let mut data_size = 0; - - // Build [`offset`] and calculate `data_size` required by [`data`]. - unsafe { - store_advance_aligned::(0, &mut offsets_ptr); - for (index, cnt) in self.indices.iter() { - let item = col.index_unchecked(*index as usize); - store_advance_aligned((item, *cnt), &mut items_ptr); - for _ in 0..*cnt { - data_size += item.len() as u64; - store_advance_aligned(data_size, &mut offsets_ptr); - } - } - set_vec_len_by_ptr(&mut offsets, offsets_ptr); - set_vec_len_by_ptr(&mut items, items_ptr); - } - - // Build [`data`]. - let mut data: Vec = Vec::with_capacity(data_size as usize); - let mut data_ptr = data.as_mut_ptr(); - let mut remain; - - unsafe { - for (item, cnt) in items { - let len = item.len(); - if cnt == 1 { - copy_advance_aligned(item.as_ptr(), &mut data_ptr, len); - continue; - } - - // Using the doubling method to copy the max segment memory. - // [___________] => [x__________] => [xx_________] => [xxxx_______] => [xxxxxxxx___] - // Since cnt > 0, then 31 - cnt.leading_zeros() >= 0. - let max_bit_num = 1 << (31 - cnt.leading_zeros()); - let max_segment = max_bit_num * len; - let base_data_ptr = data_ptr; - copy_advance_aligned(item.as_ptr(), &mut data_ptr, len); - let mut cur_segment = len; - while cur_segment < max_segment { - copy_advance_aligned(base_data_ptr, &mut data_ptr, cur_segment); - cur_segment <<= 1; - } - - // Copy the remaining memory directly. - // [xxxxxxxxxx____] => [xxxxxxxxxxxxxx] - // ^^^^ ---> ^^^^ - remain = cnt as usize - max_bit_num; - if remain > 0 { - copy_advance_aligned(base_data_ptr, &mut data_ptr, remain * len); + let num_rows = self.num_rows; + let mut builder = BinaryColumnBuilder::with_capacity(num_rows, 0); + for (index, cnt) in self.indices.iter() { + for _ in 0..*cnt { + unsafe { + builder.put_slice(col.index_unchecked(*index as usize)); + builder.commit_row(); } } - set_vec_len_by_ptr(&mut data, data_ptr); } + builder.build() + } - BinaryColumn::new(data.into(), offsets.into()) + fn take_string_types(&mut self, col: &StringColumn) -> StringColumn { + let new_views = self.take_primitive_types(col.data.views().clone()); + let new_col = unsafe { + Utf8ViewArray::new_unchecked_unknown_md( + col.data.data_type().clone(), + new_views, + col.data.data_buffers().clone(), + None, + Some(col.data.total_buffer_len()), + ) + }; + StringColumn::new(new_col) } } diff --git a/src/query/expression/src/kernels/take_ranges.rs b/src/query/expression/src/kernels/take_ranges.rs index 3c9159abb44b..872f3f5829ef 100644 --- a/src/query/expression/src/kernels/take_ranges.rs +++ b/src/query/expression/src/kernels/take_ranges.rs @@ -14,9 +14,13 @@ use core::ops::Range; +use binary::BinaryColumnBuilder; +use databend_common_arrow::arrow::array::Array; +use databend_common_arrow::arrow::array::Utf8ViewArray; use databend_common_arrow::arrow::bitmap::Bitmap; use databend_common_arrow::arrow::bitmap::MutableBitmap; use databend_common_arrow::arrow::buffer::Buffer; +use databend_common_base::vec_ext::VecExt; use databend_common_exception::Result; use crate::types::binary::BinaryColumn; @@ -31,6 +35,7 @@ use crate::Value; impl DataBlock { // Generate a new `DataBlock` by the specified indices ranges. + // ranges already cover most data pub fn take_ranges(self, ranges: &[Range], num_rows: usize) -> Result { debug_assert_eq!( ranges @@ -154,8 +159,19 @@ impl<'a> ValueVisitor for TakeRangeVisitor<'a> { } fn visit_boolean(&mut self, bitmap: Bitmap) -> Result<()> { - let mut builder = MutableBitmap::with_capacity(self.num_rows); + // Fast path: avoid iterating column to generate a new bitmap. + // If this [`Bitmap`] is all true or all false and `num_rows <= bitmap.len()``, + // we can just slice it. + if self.num_rows <= bitmap.len() + && (bitmap.unset_bits() == 0 || bitmap.unset_bits() == bitmap.len()) + { + self.result = Some(Value::Column(BooleanType::upcast_column( + bitmap.sliced(0, self.num_rows), + ))); + return Ok(()); + } + let mut builder = MutableBitmap::with_capacity(self.num_rows); let src = bitmap.values(); let offset = bitmap.offset(); self.ranges.iter().for_each(|range| { @@ -176,10 +192,9 @@ impl<'a> ValueVisitor for TakeRangeVisitor<'a> { } fn visit_string(&mut self, column: StringColumn) -> Result<()> { - let column: BinaryColumn = column.into(); - self.result = Some(Value::Column(StringType::upcast_column(unsafe { - StringColumn::from_binary_unchecked(self.take_binary_types(&column)) - }))); + self.result = Some(Value::Column(StringType::upcast_column( + self.take_string_types(&column), + ))); Ok(()) } @@ -196,37 +211,37 @@ impl<'a> TakeRangeVisitor<'a> { let mut builder: Vec = Vec::with_capacity(self.num_rows); let values = buffer.as_slice(); for range in self.ranges { - builder.extend(&values[range.start as usize..range.end as usize]); + unsafe { + builder + .extend_from_slice_unchecked(&values[range.start as usize..range.end as usize]) + }; } builder.into() } fn take_binary_types(&mut self, values: &BinaryColumn) -> BinaryColumn { - let mut offsets: Vec = Vec::with_capacity(self.num_rows + 1); - let mut data_size = 0; - - let value_data = values.data().as_slice(); - let values_offset = values.offsets().as_slice(); - // Build [`offset`] and calculate `data_size` required by [`data`]. - offsets.push(0); + let mut builder = BinaryColumnBuilder::with_capacity(self.num_rows, 0); for range in self.ranges { - let mut offset_start = values_offset[range.start as usize]; - for offset_end in values_offset[range.start as usize + 1..range.end as usize + 1].iter() - { - data_size += offset_end - offset_start; - offset_start = *offset_end; - offsets.push(data_size); + for index in range.start as usize..range.end as usize { + let value = unsafe { values.index_unchecked(index) }; + builder.put_slice(value); + builder.commit_row(); } } + builder.build() + } - // Build [`data`]. - let mut data: Vec = Vec::with_capacity(data_size as usize); - for range in self.ranges { - let col_data = &value_data[values_offset[range.start as usize] as usize - ..values_offset[range.end as usize] as usize]; - data.extend_from_slice(col_data); - } - - BinaryColumn::new(data.into(), offsets.into()) + fn take_string_types(&mut self, col: &StringColumn) -> StringColumn { + let new_views = self.take_primitive_types(col.data.views().clone()); + let new_col = unsafe { + Utf8ViewArray::new_unchecked_unknown_md( + col.data.data_type().clone(), + new_views, + col.data.data_buffers().clone(), + None, + Some(col.data.total_buffer_len()), + ) + }; + StringColumn::new(new_col) } } diff --git a/src/query/expression/src/kernels/utils.rs b/src/query/expression/src/kernels/utils.rs index 0e37bcb4e470..b5f8b0d98b34 100644 --- a/src/query/expression/src/kernels/utils.rs +++ b/src/query/expression/src/kernels/utils.rs @@ -12,100 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -use super::take::BIT_MASK; - -/// # Safety -/// -/// * `ptr` must be [valid] for writes of `size_of::()` bytes. -/// * The region of memory beginning at `val` with a size of `size_of::()` -/// bytes must *not* overlap with the region of memory beginning at `ptr` -/// with the same size. -#[inline] -pub unsafe fn store_advance(val: &T, ptr: &mut *mut u8) { - unsafe { - std::ptr::copy_nonoverlapping(val as *const T as *const u8, *ptr, std::mem::size_of::()); - *ptr = ptr.add(std::mem::size_of::()) - } -} - -/// # Safety -/// -/// * `ptr` must be [valid] for writes. -/// * `ptr` must be properly aligned. -#[inline] -pub unsafe fn store_advance_aligned(val: T, ptr: &mut *mut T) { - unsafe { - std::ptr::write(*ptr, val); - *ptr = ptr.add(1) - } -} - -/// # Safety -/// -/// * `src` must be [valid] for reads of `count * size_of::()` bytes. -/// * `ptr` must be [valid] for writes of `count * size_of::()` bytes. -/// * Both `src` and `dst` must be properly aligned. -/// * The region of memory beginning at `val` with a size of `count * size_of::()` -/// bytes must *not* overlap with the region of memory beginning at `ptr` with the -/// same size. -#[inline] -pub unsafe fn copy_advance_aligned(src: *const T, ptr: &mut *mut T, count: usize) { - unsafe { - std::ptr::copy_nonoverlapping(src, *ptr, count); - *ptr = ptr.add(count); - } -} - -/// # Safety -/// * `src` + `src_idx`(in bits) must be [valid] for reads of `len` bits. -/// * `ptr` must be [valid] for writes of `len` bits. -pub unsafe fn copy_continuous_bits( - ptr: &mut *mut u8, - src: &[u8], - mut dst_idx: usize, - mut src_idx: usize, - len: usize, -) -> (u8, usize) { - let mut unset_bits = 0; - let chunks = BitChunks::new(src, src_idx, len); - chunks.iter().for_each(|chunk| { - unset_bits += chunk.count_zeros(); - copy_advance_aligned(&chunk as *const _ as *const u8, ptr, 8); - }); - - let mut remainder = chunks.remainder_len(); - dst_idx += len - remainder; - src_idx += len - remainder; - - let mut buf = 0; - while remainder > 0 { - if (*src.as_ptr().add(src_idx >> 3) & BIT_MASK[src_idx & 7]) != 0 { - buf |= BIT_MASK[dst_idx % 8]; - } else { - unset_bits += 1; - } - src_idx += 1; - dst_idx += 1; - remainder -= 1; - if dst_idx % 8 == 0 { - store_advance_aligned(buf, ptr); - buf = 0; - } - } - (buf, unset_bits as usize) -} - -/// # Safety -/// -/// * `(ptr as usize - vec.as_ptr() as usize) / std::mem::size_of::()` must be -/// less than or equal to the capacity of Vec. -#[inline] -pub unsafe fn set_vec_len_by_ptr(vec: &mut Vec, ptr: *const T) { - unsafe { - vec.set_len(ptr.offset_from(vec.as_ptr()) as usize); - } -} - /// # Safety /// # As: core::ptr::copy_nonoverlapping #[inline] @@ -119,99 +25,3 @@ pub unsafe fn store(val: &T, ptr: *mut u8) { pub unsafe fn read(ptr: *const u8) -> T { core::ptr::read_unaligned::(ptr as _) } - -/// Iterates over an arbitrarily aligned byte buffer -/// -/// Yields an iterator of u64, and a remainder. The first byte in the buffer -/// will be the least significant byte in output u64 -#[derive(Debug)] -pub struct BitChunks<'a> { - buffer: &'a [u8], - /// offset inside a byte, guaranteed to be between 0 and 7 (inclusive) - bit_offset: usize, - /// number of complete u64 chunks - chunk_len: usize, - /// number of remaining bits, guaranteed to be between 0 and 63 (inclusive) - remainder_len: usize, -} - -impl<'a> BitChunks<'a> { - pub fn new(buffer: &'a [u8], offset: usize, len: usize) -> Self { - assert!((offset + len + 7) / 8 <= buffer.len() * 8); - - let byte_offset = offset / 8; - let bit_offset = offset % 8; - - // number of complete u64 chunks - let chunk_len = len / 64; - // number of remaining bits - let remainder_len = len % 64; - - BitChunks::<'a> { - buffer: &buffer[byte_offset..], - bit_offset, - chunk_len, - remainder_len, - } - } -} - -#[derive(Debug)] -pub struct BitChunkIterator { - buffer: *const u64, - bit_offset: usize, - chunk_len: usize, - index: usize, -} - -impl<'a> BitChunks<'a> { - /// Returns the number of remaining bits, guaranteed to be between 0 and 63 (inclusive) - #[inline] - pub const fn remainder_len(&self) -> usize { - self.remainder_len - } - - /// Returns an iterator over chunks of 64 bits represented as an u64 - #[inline] - pub const fn iter(&self) -> BitChunkIterator { - BitChunkIterator { - buffer: self.buffer.as_ptr() as *const u64, - bit_offset: self.bit_offset, - chunk_len: self.chunk_len, - index: 0, - } - } -} - -impl Iterator for BitChunkIterator { - type Item = u64; - - #[inline] - fn next(&mut self) -> Option { - let index = self.index; - if index >= self.chunk_len { - return None; - } - - // bit-packed buffers are stored starting with the least-significant byte first - // so when reading as u64 on a big-endian machine, the bytes need to be swapped - let current = unsafe { std::ptr::read_unaligned(self.buffer.add(index)).to_le() }; - - let bit_offset = self.bit_offset; - - let combined = if bit_offset == 0 { - current - } else { - // the constructor ensures that bit_offset is in 0..8 - // that means we need to read at most one additional byte to fill in the high bits - let next = - unsafe { std::ptr::read_unaligned(self.buffer.add(index + 1) as *const u8) as u64 }; - - (current >> bit_offset) | (next << (64 - bit_offset)) - }; - - self.index = index + 1; - - Some(combined) - } -} diff --git a/src/query/expression/src/row/row_converter.rs b/src/query/expression/src/row/row_converter.rs index 3c4d967ae5c0..f259df073192 100644 --- a/src/query/expression/src/row/row_converter.rs +++ b/src/query/expression/src/row/row_converter.rs @@ -84,10 +84,7 @@ impl RowConverter { encode_column(&mut builder, column, field.asc, field.nulls_first); } - let rows = builder.build(); - debug_assert_eq!(*rows.offsets().last().unwrap(), rows.data().len() as u64); - debug_assert!(rows.offsets().windows(2).all(|w| w[0] <= w[1])); - rows + builder.build() } fn new_empty_rows(&self, cols: &[Column], num_rows: usize) -> BinaryColumnBuilder { diff --git a/src/query/expression/src/types/array.rs b/src/query/expression/src/types/array.rs index 83d25cdb4adf..058954d323b0 100755 --- a/src/query/expression/src/types/array.rs +++ b/src/query/expression/src/types/array.rs @@ -232,6 +232,7 @@ impl ArrayColumn { } pub fn slice(&self, range: Range) -> Self { + // We need keep the last offsets in slice let offsets = self .offsets .clone() diff --git a/src/query/expression/src/types/bitmap.rs b/src/query/expression/src/types/bitmap.rs index 1823941ba2b7..ab411346980a 100644 --- a/src/query/expression/src/types/bitmap.rs +++ b/src/query/expression/src/types/bitmap.rs @@ -161,7 +161,7 @@ impl ValueType for BitmapType { } fn column_memory_size(col: &Self::Column) -> usize { - col.data().len() + col.offsets().len() * 8 + col.memory_size() } #[inline(always)] diff --git a/src/query/expression/src/types/geography.rs b/src/query/expression/src/types/geography.rs index 8c0d95e92c4e..b41aad37d9df 100644 --- a/src/query/expression/src/types/geography.rs +++ b/src/query/expression/src/types/geography.rs @@ -19,6 +19,7 @@ use std::ops::Range; use borsh::BorshDeserialize; use borsh::BorshSerialize; +use databend_common_arrow::arrow::trusted_len::TrustedLen; use databend_common_exception::Result; use databend_common_io::geography::*; use databend_common_io::wkb::make_point; @@ -29,8 +30,7 @@ use geozero::ToWkt; use serde::Deserialize; use serde::Serialize; -use super::binary::BinaryLike; -use super::binary::BinaryLikeIterator; +use super::binary::BinaryIterator; use crate::property::Domain; use crate::types::binary::BinaryColumn; use crate::types::binary::BinaryColumnBuilder; @@ -83,12 +83,6 @@ impl<'a> GeographyRef<'a> { } } -impl<'a> BinaryLike<'a> for GeographyRef<'a> { - fn from(value: &'a [u8]) -> Self { - GeographyRef(value) - } -} - #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub struct GeographyType; @@ -281,10 +275,8 @@ impl GeographyColumn { } pub fn iter(&self) -> GeographyIterator<'_> { - BinaryLikeIterator { - data: &self.0.data, - offsets: self.0.offsets.windows(2), - _t: std::marker::PhantomData, + GeographyIterator { + inner: self.0.iter(), } } @@ -293,4 +285,18 @@ impl GeographyColumn { } } -pub type GeographyIterator<'a> = BinaryLikeIterator<'a, GeographyRef<'a>>; +pub struct GeographyIterator<'a> { + inner: BinaryIterator<'a>, +} + +impl<'a> Iterator for GeographyIterator<'a> { + type Item = GeographyRef<'a>; + + fn next(&mut self) -> Option { + self.inner.next().map(GeographyRef) + } +} + +unsafe impl<'a> TrustedLen for GeographyIterator<'a> {} + +unsafe impl<'a> std::iter::TrustedLen for GeographyIterator<'a> {} diff --git a/src/query/expression/src/types/geometry.rs b/src/query/expression/src/types/geometry.rs index 67f1afc95ec9..d1dfe01911cc 100644 --- a/src/query/expression/src/types/geometry.rs +++ b/src/query/expression/src/types/geometry.rs @@ -165,7 +165,7 @@ impl ValueType for GeometryType { } fn column_memory_size(col: &Self::Column) -> usize { - col.data().len() + col.offsets().len() * 8 + col.memory_size() } #[inline(always)] diff --git a/src/query/expression/src/types/string.rs b/src/query/expression/src/types/string.rs index c4fe32534c96..bc06218b358f 100644 --- a/src/query/expression/src/types/string.rs +++ b/src/query/expression/src/types/string.rs @@ -13,26 +13,23 @@ // limitations under the License. use std::cmp::Ordering; -use std::iter::once; use std::ops::Range; -use databend_common_arrow::arrow::buffer::Buffer; +use databend_common_arrow::arrow::array::MutableBinaryViewArray; +use databend_common_arrow::arrow::array::Utf8ViewArray; use databend_common_arrow::arrow::trusted_len::TrustedLen; +use databend_common_base::slice_ext::GetSaferUnchecked; use databend_common_exception::ErrorCode; use databend_common_exception::Result; -use serde::Deserialize; -use serde::Serialize; use super::binary::BinaryColumn; use super::binary::BinaryColumnBuilder; -use super::binary::BinaryIterator; use crate::property::Domain; use crate::types::ArgType; use crate::types::DataType; use crate::types::DecimalSize; use crate::types::GenericMap; use crate::types::ValueType; -use crate::utils::arrow::buffer_into_mut; use crate::values::Column; use crate::values::Scalar; use crate::ColumnBuilder; @@ -137,8 +134,7 @@ impl ValueType for StringType { } fn push_item(builder: &mut Self::ColumnBuilder, item: Self::ScalarRef<'_>) { - builder.put_str(item); - builder.commit_row(); + builder.put_and_commit(item); } fn push_item_repeat(builder: &mut Self::ColumnBuilder, item: Self::ScalarRef<'_>, n: usize) { @@ -146,7 +142,7 @@ impl ValueType for StringType { } fn push_default(builder: &mut Self::ColumnBuilder) { - builder.commit_row(); + builder.put_and_commit(""); } fn append_column(builder: &mut Self::ColumnBuilder, other_builder: &Self::Column) { @@ -166,7 +162,7 @@ impl ValueType for StringType { } fn column_memory_size(col: &Self::Column) -> usize { - col.data().len() + col.offsets().len() * 8 + col.memory_size() } #[inline(always)] @@ -218,85 +214,38 @@ impl ArgType for StringType { } fn create_builder(capacity: usize, _: &GenericMap) -> Self::ColumnBuilder { - StringColumnBuilder::with_capacity(capacity, 0) + StringColumnBuilder::with_capacity(capacity) } } -#[derive(Clone, PartialEq)] +#[derive(Clone)] pub struct StringColumn { - data: Buffer, - offsets: Buffer, + pub(crate) data: Utf8ViewArray, } impl StringColumn { - pub fn new(data: Buffer, offsets: Buffer) -> Self { - let col = BinaryColumn::new(data, offsets); - - col.check_utf8().unwrap(); - - unsafe { Self::from_binary_unchecked(col) } - } - - /// # Safety - /// This function is unsound iff: - /// * the offsets are not monotonically increasing - /// * The `data` between two consecutive `offsets` are not valid utf8 - pub unsafe fn new_unchecked(data: Buffer, offsets: Buffer) -> Self { - let col = BinaryColumn::new(data, offsets); - - #[cfg(debug_assertions)] - col.check_utf8().unwrap(); - - unsafe { Self::from_binary_unchecked(col) } - } - - /// # Safety - /// This function is unsound iff: - /// * the offsets are not monotonically increasing - /// * The `data` between two consecutive `offsets` are not valid utf8 - pub unsafe fn from_binary_unchecked(col: BinaryColumn) -> Self { - #[cfg(debug_assertions)] - col.check_utf8().unwrap(); - - StringColumn { - data: col.data, - offsets: col.offsets, - } + pub fn new(data: Utf8ViewArray) -> Self { + Self { data } } pub fn len(&self) -> usize { - self.offsets.len() - 1 + self.data.len() } pub fn current_buffer_len(&self) -> usize { - (*self.offsets().last().unwrap() - *self.offsets().first().unwrap()) as _ - } - - pub fn data(&self) -> &Buffer { - &self.data - } - - pub fn offsets(&self) -> &Buffer { - &self.offsets + self.data.total_bytes_len() } pub fn memory_size(&self) -> usize { - let offsets = self.offsets.as_slice(); - let len = offsets.len(); - len * 8 + (offsets[len - 1] - offsets[0]) as usize + self.data.total_buffer_len() + self.len() * 12 } pub fn index(&self, index: usize) -> Option<&str> { - if index + 1 >= self.offsets.len() { + if index >= self.len() { return None; } - let bytes = &self.data[(self.offsets[index] as usize)..(self.offsets[index + 1] as usize)]; - - #[cfg(debug_assertions)] - bytes.check_utf8().unwrap(); - - unsafe { Some(std::str::from_utf8_unchecked(bytes)) } + Some(unsafe { self.index_unchecked(index) }) } /// # Safety @@ -304,16 +253,9 @@ impl StringColumn { /// Calling this method with an out-of-bounds index is *[undefined behavior]* #[inline] pub unsafe fn index_unchecked(&self, index: usize) -> &str { - debug_assert!(index + 1 < self.offsets.len()); - - let start = *self.offsets.get_unchecked(index) as usize; - let end = *self.offsets.get_unchecked(index + 1) as usize; - let bytes = &self.data.get_unchecked(start..end); - - #[cfg(debug_assertions)] - bytes.check_utf8().unwrap(); + debug_assert!(index < self.data.len()); - std::str::from_utf8_unchecked(bytes) + self.data.value_unchecked(index) } /// # Safety @@ -321,71 +263,113 @@ impl StringColumn { /// Calling this method with an out-of-bounds index is *[undefined behavior]* #[inline] pub unsafe fn index_unchecked_bytes(&self, index: usize) -> &[u8] { - debug_assert!(index + 1 < self.offsets.len()); + debug_assert!(index < self.data.len()); - let start = *self.offsets.get_unchecked(index) as usize; - let end = *self.offsets.get_unchecked(index + 1) as usize; - self.data.get_unchecked(start..end) + self.data.value_unchecked(index).as_bytes() } pub fn slice(&self, range: Range) -> Self { - let offsets = self - .offsets + let data = self + .data .clone() - .sliced(range.start, range.end - range.start + 1); - StringColumn { - data: self.data.clone(), - offsets, - } + .sliced(range.start, range.end - range.start); + Self { data } } pub fn iter(&self) -> StringIterator { StringIterator { - data: &self.data, - offsets: self.offsets.windows(2), + col: self, + index: 0, } } - pub fn iter_binary(&self) -> BinaryIterator { - BinaryIterator { - data: &self.data, - offsets: self.offsets.windows(2), - _t: std::marker::PhantomData, - } + pub fn into_inner(self) -> Utf8ViewArray { + self.data } - pub fn into_buffer(self) -> (Buffer, Buffer) { - (self.data, self.offsets) + pub fn try_from_binary(col: BinaryColumn) -> Result { + let builder = StringColumnBuilder::try_from_bin_column(col)?; + Ok(builder.build()) } - pub fn check_valid(&self) -> Result<()> { - let offsets = self.offsets.as_slice(); - let len = offsets.len(); - if len < 1 { - return Err(ErrorCode::Internal(format!( - "StringColumn offsets length must be equal or greater than 1, but got {}", - len - ))); - } + pub fn compare(col_i: &Self, i: usize, col_j: &Self, j: usize) -> Ordering { + let view_i = unsafe { col_i.data.views().as_slice().get_unchecked_release(i) }; + let view_j = unsafe { col_j.data.views().as_slice().get_unchecked_release(j) }; - for i in 1..len { - if offsets[i] < offsets[i - 1] { - return Err(ErrorCode::Internal(format!( - "StringColumn offsets value must be equal or greater than previous value, but got {}", - offsets[i] - ))); + if view_i.prefix == view_j.prefix { + unsafe { + let value_i = col_i.data.value_unchecked(i); + let value_j = col_j.data.value_unchecked(j); + value_i.cmp(value_j) } + } else { + view_i + .prefix + .to_le_bytes() + .cmp(&view_j.prefix.to_le_bytes()) + } + } + + pub fn compare_str(col: &Self, i: usize, value: &str) -> Ordering { + let view = unsafe { col.data.views().as_slice().get_unchecked_release(i) }; + let prefix = load_prefix(value.as_bytes()); + + if view.prefix == prefix { + let value_i = unsafe { col.data.value_unchecked(i) }; + value_i.cmp(value) + } else { + view.prefix.to_le_bytes().as_slice().cmp(value.as_bytes()) } - Ok(()) } } -impl From for BinaryColumn { - fn from(col: StringColumn) -> BinaryColumn { - BinaryColumn { - data: col.data, - offsets: col.offsets, +// Loads (up to) the first 4 bytes of s as little-endian, padded with zeros. +#[inline] +fn load_prefix(s: &[u8]) -> u32 { + let start = &s[..s.len().min(4)]; + let mut tmp = [0u8; 4]; + tmp[..start.len()].copy_from_slice(start); + u32::from_le_bytes(tmp) +} + +impl PartialEq for StringColumn { + fn eq(&self, other: &Self) -> bool { + self.cmp(other) == Ordering::Equal + } +} + +impl Eq for StringColumn {} + +impl PartialOrd for StringColumn { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for StringColumn { + fn cmp(&self, other: &Self) -> Ordering { + for i in 0..self.len().max(other.len()) { + match (self.data.views().get(i), other.data.views().get(i)) { + (Some(left), Some(right)) => { + match left.prefix.to_le_bytes().cmp(&right.prefix.to_le_bytes()) { + Ordering::Equal => unsafe { + let left = self.data.value_unchecked(i); + let right = other.data.value_unchecked(i); + match left.cmp(right) { + Ordering::Equal => continue, + non_eq => return non_eq, + } + }, + non_eq => return non_eq, + } + } + (Some(_), None) => return Ordering::Greater, + (None, Some(_)) => return Ordering::Less, + (None, None) => return Ordering::Equal, + } } + + Ordering::Equal } } @@ -393,36 +377,36 @@ impl TryFrom for StringColumn { type Error = ErrorCode; fn try_from(col: BinaryColumn) -> Result { - col.check_utf8()?; - Ok(StringColumn { - data: col.data, - offsets: col.offsets, - }) + StringColumn::try_from_binary(col) + } +} + +impl From for BinaryColumn { + fn from(col: StringColumn) -> BinaryColumn { + BinaryColumnBuilder::from_iter(col.iter().map(|x| x.as_bytes())).build() } } pub struct StringIterator<'a> { - data: &'a [u8], - offsets: std::slice::Windows<'a, u64>, + col: &'a StringColumn, + index: usize, } impl<'a> Iterator for StringIterator<'a> { type Item = &'a str; fn next(&mut self) -> Option { - let bytes = self - .offsets - .next() - .map(|range| &self.data[(range[0] as usize)..(range[1] as usize)])?; - - #[cfg(debug_assertions)] - bytes.check_utf8().unwrap(); - - unsafe { Some(std::str::from_utf8_unchecked(bytes)) } + if self.index >= self.col.len() { + return None; + } + let value = self.col.index(self.index)?; + self.index += 1; + Some(value) } fn size_hint(&self) -> (usize, Option) { - self.offsets.size_hint() + let remaining = self.col.len() - self.index; + (remaining, Some(remaining)) } } @@ -430,252 +414,158 @@ unsafe impl<'a> TrustedLen for StringIterator<'a> {} unsafe impl<'a> std::iter::TrustedLen for StringIterator<'a> {} -#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +type MutableUtf8ViewArray = MutableBinaryViewArray; + +#[derive(Debug, Clone)] pub struct StringColumnBuilder { - // if the StringColumnBuilder is created with `data_capacity`, need_estimated is false - pub need_estimated: bool, - pub data: Vec, - pub offsets: Vec, + pub data: MutableUtf8ViewArray, + pub row_buffer: Vec, } impl StringColumnBuilder { - pub fn with_capacity(len: usize, data_capacity: usize) -> Self { - let mut offsets = Vec::with_capacity(len + 1); - offsets.push(0); + pub fn with_capacity(len: usize) -> Self { + let data = MutableUtf8ViewArray::with_capacity(len); StringColumnBuilder { - need_estimated: data_capacity == 0 && len > 0, - data: Vec::with_capacity(data_capacity), - offsets, + data, + row_buffer: Vec::new(), } } pub fn from_column(col: StringColumn) -> Self { + let data = col.data.make_mut(); StringColumnBuilder { - need_estimated: col.data.is_empty(), - data: buffer_into_mut(col.data), - offsets: col.offsets.to_vec(), + data, + row_buffer: Vec::new(), } } - pub fn from_data(data: Vec, offsets: Vec) -> Self { - let builder = BinaryColumnBuilder::from_data(data, offsets); - builder.check_utf8().unwrap(); - unsafe { StringColumnBuilder::from_binary_unchecked(builder) } - } - - /// # Safety - /// This function is unsound iff: - /// * the offsets are not monotonically increasing - /// * The `data` between two consecutive `offsets` are not valid utf8 - pub unsafe fn from_binary_unchecked(col: BinaryColumnBuilder) -> Self { - #[cfg(debug_assertions)] - col.check_utf8().unwrap(); - - StringColumnBuilder { - need_estimated: col.need_estimated, - data: col.data, - offsets: col.offsets, + pub fn try_from_bin_column(col: BinaryColumn) -> Result { + let mut data = MutableUtf8ViewArray::with_capacity(col.len()); + col.data.as_slice().check_utf8()?; + for v in col.iter() { + data.push_value(unsafe { std::str::from_utf8_unchecked(v) }); } + + Ok(StringColumnBuilder { + data, + row_buffer: Vec::new(), + }) } pub fn repeat(scalar: &str, n: usize) -> Self { - let len = scalar.len(); - let data = scalar.as_bytes().repeat(n); - let offsets = once(0) - .chain((0..n).map(|i| (len * (i + 1)) as u64)) - .collect(); + let mut data = MutableUtf8ViewArray::with_capacity(n); + data.extend_constant(n, Some(scalar)); StringColumnBuilder { data, - offsets, - need_estimated: false, + row_buffer: Vec::new(), } } pub fn repeat_default(n: usize) -> Self { + let mut data = MutableUtf8ViewArray::with_capacity(n); + data.extend_constant(n, Some("")); StringColumnBuilder { - data: vec![], - offsets: vec![0; n + 1], - need_estimated: false, + data, + row_buffer: Vec::new(), } } pub fn len(&self) -> usize { - self.offsets.len() - 1 + self.data.len() } pub fn memory_size(&self) -> usize { - self.offsets.len() * 8 + self.data.len() + self.data.total_buffer_len } pub fn put_char(&mut self, item: char) { - self.data - .extend_from_slice(item.encode_utf8(&mut [0; 4]).as_bytes()); + match item.len_utf8() { + 1 => self.row_buffer.push(item as u8), + _ => self + .row_buffer + .extend_from_slice(item.encode_utf8(&mut [0; 4]).as_bytes()), + } } #[inline] - #[deprecated] - pub fn put_slice(&mut self, item: &[u8]) { - #[cfg(debug_assertions)] - item.check_utf8().unwrap(); + pub fn put_str(&mut self, item: &str) { + self.row_buffer.extend_from_slice(item.as_bytes()); + } - self.data.extend_from_slice(item); + #[inline] + pub fn put_and_commit>(&mut self, item: V) { + self.data.push_value_ignore_validity(item); } #[inline] - pub fn put_str(&mut self, item: &str) { - self.data.extend_from_slice(item.as_bytes()); + pub fn put_slice(&mut self, item: &[u8]) { + self.row_buffer.extend_from_slice(item); } pub fn put_char_iter(&mut self, iter: impl Iterator) { for c in iter { - let mut buf = [0; 4]; - let result = c.encode_utf8(&mut buf); - self.data.extend_from_slice(result.as_bytes()); + self.put_char(c); } } #[inline] pub fn commit_row(&mut self) { - self.offsets.push(self.data.len() as u64); - - if self.need_estimated - && self.offsets.len() - 1 == 64 - && self.offsets.len() < self.offsets.capacity() - { - let bytes_per_row = self.data.len() / 64 + 1; - let bytes_estimate = bytes_per_row * self.offsets.capacity(); - - const MAX_HINT_SIZE: usize = 1_000_000_000; - // if we are more than 10% over the capacity, we reserve more - if bytes_estimate < MAX_HINT_SIZE - && bytes_estimate as f64 > self.data.capacity() as f64 * 1.10f64 - { - self.data.reserve(bytes_estimate - self.data.capacity()); - } - } + debug_assert!(std::str::from_utf8(&self.row_buffer).is_ok()); + let str = unsafe { std::str::from_utf8_unchecked(&self.row_buffer) }; + self.data.push_value(str); + self.row_buffer.clear(); } pub fn append_column(&mut self, other: &StringColumn) { - // the first offset of other column may not be zero - let other_start = *other.offsets.first().unwrap(); - let other_last = *other.offsets.last().unwrap(); - let start = self.offsets.last().cloned().unwrap(); - self.data - .extend_from_slice(&other.data[(other_start as usize)..(other_last as usize)]); - self.offsets.extend( - other - .offsets - .iter() - .skip(1) - .map(|offset| start + offset - other_start), - ); + self.data.extend_values(other.iter()); } pub fn build(self) -> StringColumn { - unsafe { StringColumn::new_unchecked(self.data.into(), self.offsets.into()) } + StringColumn { + data: self.data.into(), + } } pub fn build_scalar(self) -> String { - assert_eq!(self.offsets.len(), 2); - - let bytes = self.data[(self.offsets[0] as usize)..(self.offsets[1] as usize)].to_vec(); + assert_eq!(self.len(), 1); - #[cfg(debug_assertions)] - bytes.check_utf8().unwrap(); - - unsafe { String::from_utf8_unchecked(bytes) } - } - - #[inline] - pub fn may_resize(&self, add_size: usize) -> bool { - self.data.len() + add_size > self.data.capacity() + self.data.values()[0].to_string() } /// # Safety /// /// Calling this method with an out-of-bounds index is *[undefined behavior]* pub unsafe fn index_unchecked(&self, row: usize) -> &str { - debug_assert!(row + 1 < self.offsets.len()); - - let start = *self.offsets.get_unchecked(row) as usize; - let end = *self.offsets.get_unchecked(row + 1) as usize; - let bytes = self.data.get_unchecked(start..end); - - #[cfg(debug_assertions)] - bytes.check_utf8().unwrap(); - - std::str::from_utf8_unchecked(bytes) + self.data.value_unchecked(row) } pub fn push_repeat(&mut self, item: &str, n: usize) { - self.data.reserve(item.len() * n); - if self.need_estimated && self.offsets.len() - 1 < 64 { - for _ in 0..n { - self.data.extend_from_slice(item.as_bytes()); - self.commit_row(); - } - } else { - let start = self.data.len(); - let len = item.len(); - for _ in 0..n { - self.data.extend_from_slice(item.as_bytes()); - } - self.offsets - .extend((1..=n).map(|i| (start + len * i) as u64)); - } + self.data.extend_constant(n, Some(item)); } pub fn pop(&mut self) -> Option { - if self.len() > 0 { - let index = self.len() - 1; - let start = unsafe { *self.offsets.get_unchecked(index) as usize }; - self.offsets.pop(); - let val = self.data.split_off(start); - - #[cfg(debug_assertions)] - val.check_utf8().unwrap(); - - Some(unsafe { String::from_utf8_unchecked(val) }) - } else { - None - } + self.data.pop() } } impl<'a> FromIterator<&'a str> for StringColumnBuilder { fn from_iter>(iter: T) -> Self { let iter = iter.into_iter(); - let mut builder = StringColumnBuilder::with_capacity(iter.size_hint().0, 0); + let mut builder = StringColumnBuilder::with_capacity(iter.size_hint().0); for item in iter { - builder.put_str(item); - builder.commit_row(); + builder.put_and_commit(item); } builder } } -impl From for BinaryColumnBuilder { - fn from(builder: StringColumnBuilder) -> BinaryColumnBuilder { - BinaryColumnBuilder { - need_estimated: builder.need_estimated, - data: builder.data, - offsets: builder.offsets, - } +impl PartialEq for StringColumnBuilder { + fn eq(&self, other: &Self) -> bool { + self.data.values_iter().eq(other.data.values_iter()) } } -impl TryFrom for StringColumnBuilder { - type Error = ErrorCode; - - fn try_from(builder: BinaryColumnBuilder) -> Result { - builder.check_utf8()?; - Ok(StringColumnBuilder { - need_estimated: builder.need_estimated, - data: builder.data, - offsets: builder.offsets, - }) - } -} +impl Eq for StringColumnBuilder {} #[derive(Debug, Clone, PartialEq, Eq)] pub struct StringDomain { @@ -712,7 +602,10 @@ impl CheckUTF8 for Vec { impl CheckUTF8 for BinaryColumn { fn check_utf8(&self) -> Result<()> { - check_utf8_column(&self.offsets, &self.data) + for bytes in self.iter() { + bytes.check_utf8()?; + } + Ok(()) } } diff --git a/src/query/expression/src/types/variant.rs b/src/query/expression/src/types/variant.rs index 6d7ab89a3c8d..262d2f36aa72 100644 --- a/src/query/expression/src/types/variant.rs +++ b/src/query/expression/src/types/variant.rs @@ -176,7 +176,7 @@ impl ValueType for VariantType { } fn column_memory_size(col: &Self::Column) -> usize { - col.data().len() + col.offsets().len() * 8 + col.memory_size() } #[inline(always)] diff --git a/src/query/expression/src/utils/display.rs b/src/query/expression/src/utils/display.rs index c0023fc7b8a9..32287b84c8e0 100755 --- a/src/query/expression/src/utils/display.rs +++ b/src/query/expression/src/utils/display.rs @@ -429,11 +429,7 @@ impl Debug for BinaryColumn { impl Debug for StringColumn { fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { f.debug_struct("StringColumn") - .field( - "data", - &format_args!("0x{}", &hex::encode(self.data().as_slice())), - ) - .field("offsets", &self.offsets()) + .field("data", &format_args!("{:?}", self.data)) .finish() } } diff --git a/src/query/expression/src/values.rs b/src/query/expression/src/values.rs index 0196bab4a955..6c63fd6e5392 100755 --- a/src/query/expression/src/values.rs +++ b/src/query/expression/src/values.rs @@ -20,6 +20,7 @@ use std::ops::Range; use base64::engine::general_purpose; use base64::prelude::*; +use binary::BinaryColumnBuilder; use borsh::BorshDeserialize; use borsh::BorshSerialize; use databend_common_arrow::arrow::bitmap::Bitmap; @@ -43,12 +44,12 @@ use serde::Deserialize; use serde::Deserializer; use serde::Serialize; use serde::Serializer; +use string::StringColumnBuilder; use crate::property::Domain; use crate::types::array::ArrayColumn; use crate::types::array::ArrayColumnBuilder; use crate::types::binary::BinaryColumn; -use crate::types::binary::BinaryColumnBuilder; use crate::types::bitmap::BitmapType; use crate::types::boolean::BooleanDomain; use crate::types::date::DATE_MAX; @@ -76,7 +77,6 @@ use crate::types::number::SimpleDomain; use crate::types::number::F32; use crate::types::number::F64; use crate::types::string::StringColumn; -use crate::types::string::StringColumnBuilder; use crate::types::string::StringDomain; use crate::types::timestamp::clamp_timestamp; use crate::types::timestamp::TIMESTAMP_MAX; @@ -1172,7 +1172,6 @@ impl Column { pub fn check_valid(&self) -> Result<()> { match self { Column::Binary(x) => x.check_valid(), - Column::String(x) => x.check_valid(), Column::Variant(x) => x.check_valid(), Column::Geometry(x) => x.check_valid(), Column::Geography(x) => x.check_valid(), @@ -1442,11 +1441,12 @@ impl Column { Column::Decimal(DecimalColumn::Decimal256(col, _)) => col.len() * 32, Column::Geography(col) => GeographyType::column_memory_size(col), Column::Boolean(c) => c.len(), + // 8 * len + size of bytes Column::Binary(col) | Column::Bitmap(col) | Column::Variant(col) | Column::Geometry(col) => col.memory_size(), - Column::String(col) => col.memory_size(), + Column::String(col) => col.len() * 8 + col.current_buffer_len(), Column::Array(col) | Column::Map(col) => col.values.serialize_size() + col.len() * 8, Column::Nullable(c) => c.column.serialize_size() + c.len(), Column::Tuple(fields) => fields.iter().map(|f| f.serialize_size()).sum(), @@ -1666,7 +1666,7 @@ impl ColumnBuilder { } ColumnBuilder::Boolean(c) => c.as_slice().len(), ColumnBuilder::Binary(col) => col.data.len() + col.offsets.len() * 8, - ColumnBuilder::String(col) => col.data.len() + col.offsets.len() * 8, + ColumnBuilder::String(col) => col.memory_size(), ColumnBuilder::Timestamp(col) => col.len() * 8, ColumnBuilder::Date(col) => col.len() * 4, ColumnBuilder::Array(col) => col.builder.memory_size() + col.offsets.len() * 8, @@ -1742,10 +1742,7 @@ impl ColumnBuilder { let data_capacity = if enable_datasize_hint { 0 } else { capacity }; ColumnBuilder::Binary(BinaryColumnBuilder::with_capacity(capacity, data_capacity)) } - DataType::String => { - let data_capacity = if enable_datasize_hint { 0 } else { capacity }; - ColumnBuilder::String(StringColumnBuilder::with_capacity(capacity, data_capacity)) - } + DataType::String => ColumnBuilder::String(StringColumnBuilder::with_capacity(capacity)), DataType::Timestamp => ColumnBuilder::Timestamp(Vec::with_capacity(capacity)), DataType::Date => ColumnBuilder::Date(Vec::with_capacity(capacity)), DataType::Nullable(ty) => ColumnBuilder::Nullable(Box::new(NullableColumnBuilder { @@ -1829,8 +1826,8 @@ impl ColumnBuilder { // binary based DataType::Binary => ColumnBuilder::Binary(BinaryColumnBuilder::repeat_default(len)), - DataType::Bitmap => ColumnBuilder::Bitmap(BinaryColumnBuilder::repeat_default(len)), DataType::String => ColumnBuilder::String(StringColumnBuilder::repeat_default(len)), + DataType::Bitmap => ColumnBuilder::Bitmap(BinaryColumnBuilder::repeat_default(len)), DataType::Variant => ColumnBuilder::Variant(BinaryColumnBuilder::repeat_default(len)), DataType::Geometry => ColumnBuilder::Geometry(BinaryColumnBuilder::repeat_default(len)), DataType::Geography => { @@ -2046,13 +2043,13 @@ impl ColumnBuilder { } ColumnBuilder::String(builder) => { let offset = reader.read_scalar::()? as usize; - builder.data.resize(offset + builder.data.len(), 0); - let last = *builder.offsets.last().unwrap() as usize; - reader.read_exact(&mut builder.data[last..last + offset])?; - builder.commit_row(); + builder.row_buffer.resize(offset, 0); + reader.read_exact(&mut builder.row_buffer)?; #[cfg(debug_assertions)] - string::CheckUTF8::check_utf8(&(&builder.data[last..last + offset])).unwrap(); + string::CheckUTF8::check_utf8(&builder.row_buffer).unwrap(); + + builder.commit_row(); } ColumnBuilder::Timestamp(builder) => { let mut value: i64 = reader.read_scalar()?; @@ -2150,8 +2147,7 @@ impl ColumnBuilder { string::CheckUTF8::check_utf8(&bytes).unwrap(); let s = unsafe { std::str::from_utf8_unchecked(bytes) }; - builder.put_str(s); - builder.commit_row(); + builder.put_and_commit(s); } } ColumnBuilder::Timestamp(builder) => { diff --git a/src/query/expression/tests/it/common.rs b/src/query/expression/tests/it/common.rs index c2dd01b0f952..38a3bdaf16ad 100644 --- a/src/query/expression/tests/it/common.rs +++ b/src/query/expression/tests/it/common.rs @@ -127,7 +127,7 @@ pub fn run_scatter(file: &mut impl Write, block: &DataBlock, indices: &[u32], sc } pub fn run_take(file: &mut impl Write, indices: &[u32], block: &DataBlock) { - let result = DataBlock::take(block, indices, &mut None); + let result = DataBlock::take(block, indices); match result { Ok(result_block) => { diff --git a/src/query/expression/tests/it/kernel.rs b/src/query/expression/tests/it/kernel.rs index 4d0d8a2af572..d374f553fb6e 100644 --- a/src/query/expression/tests/it/kernel.rs +++ b/src/query/expression/tests/it/kernel.rs @@ -14,14 +14,18 @@ use core::ops::Range; +use databend_common_arrow::arrow::bitmap::Bitmap; use databend_common_expression::block_debug::assert_block_value_eq; use databend_common_expression::types::number::*; +use databend_common_expression::types::AnyType; use databend_common_expression::types::DataType; use databend_common_expression::types::NumberDataType; use databend_common_expression::types::StringType; +use databend_common_expression::types::ValueType; use databend_common_expression::visitor::ValueVisitor; use databend_common_expression::BlockEntry; use databend_common_expression::Column; +use databend_common_expression::ColumnBuilder; use databend_common_expression::DataBlock; use databend_common_expression::FilterVisitor; use databend_common_expression::FromData; @@ -281,10 +285,14 @@ pub fn test_take_and_filter_and_concat() -> databend_common_exception::Result<() FilterVisitor::new(&filter).with_strategy(IterationStrategy::SlicesIterator); let mut f2 = FilterVisitor::new(&filter).with_strategy(IterationStrategy::IndexIterator); + for col in random_block.columns() { f1.visit_value(col.value.clone())?; f2.visit_value(col.value.clone())?; - assert_eq!(f1.take_result(), f2.take_result()); + + let l = f1.take_result(); + let r = f2.take_result(); + assert_eq!(l, r); } } @@ -326,15 +334,9 @@ pub fn test_take_and_filter_and_concat() -> databend_common_exception::Result<() .collect_vec(); let concated_blocks = DataBlock::concat(&blocks)?; - let block_1 = concated_blocks.take(&take_indices, &mut None)?; + let block_1 = concated_blocks.take(&take_indices)?; let block_2 = concated_blocks.take_compacted_indices(&take_compact_indices, count)?; - let block_3 = DataBlock::take_column_vec( - &column_vec, - &data_types, - &take_chunks_indices, - count, - &mut None, - ); + let block_3 = DataBlock::take_column_vec(&column_vec, &data_types, &take_chunks_indices, count); let block_4 = DataBlock::concat(&filtered_blocks)?; let block_5 = concated_blocks.take_ranges( &build_range_selection(&take_indices, take_indices.len()), @@ -433,7 +435,7 @@ pub fn test_take_compact() -> databend_common_exception::Result<()> { take_indices.extend(std::iter::repeat(batch_index as u32).take(batch_size)); take_compact_indices.push((batch_index as u32, batch_size as u32)); } - let block_1 = block.take(&take_indices, &mut None)?; + let block_1 = block.take(&take_indices)?; let block_2 = block.take_compacted_indices(&take_compact_indices, count)?; assert_eq!(block_1.num_columns(), block_2.num_columns()); @@ -501,8 +503,8 @@ pub fn test_filters() -> databend_common_exception::Result<()> { .map(|(i, _)| i as u32) .collect::>(); - let t_b = bb.take(&indices, &mut None)?; - let t_c = cc.take(&indices, &mut None)?; + let t_b = bb.take(&indices)?; + let t_c = cc.take(&indices)?; let f_b = bb.filter_with_bitmap(&f)?; let f_c = cc.filter_with_bitmap(&f)?; @@ -590,7 +592,7 @@ pub fn test_scatter() -> databend_common_exception::Result<()> { } } - let block_1 = random_block.take(&take_indices, &mut None)?; + let block_1 = random_block.take(&take_indices)?; let block_2 = DataBlock::concat(&scattered_blocks)?; assert_eq!(block_1.num_columns(), block_2.num_columns()); @@ -606,3 +608,31 @@ pub fn test_scatter() -> databend_common_exception::Result<()> { Ok(()) } + +#[test] +fn test_builder() { + let ty = DataType::String; + let len = 30; + let col = Column::random(&ty, len, None); + + let bitmap = Bitmap::from_iter((0..len).map(|x| x % 4 != 0)); + + let mut builder1 = ColumnBuilder::with_capacity(&col.data_type(), col.len()); + let mut builder2 = ColumnBuilder::with_capacity(&col.data_type(), col.len()); + + for i in 0..len { + if bitmap.get_bit(i) { + builder1.push(col.index(i).unwrap()); + } + } + + for (start, len) in databend_common_arrow::arrow::bitmap::utils::SlicesIterator::new(&bitmap) { + let sub_col = col.slice(start..start + len); + AnyType::append_column(&mut builder2, &sub_col); + } + + let r1 = builder1.build(); + let r2 = builder2.build(); + + assert_eq!(r1, r2) +} diff --git a/src/query/expression/tests/it/row.rs b/src/query/expression/tests/it/row.rs index f7ba9b75dd9c..07b9c2e196c9 100644 --- a/src/query/expression/tests/it/row.rs +++ b/src/query/expression/tests/it/row.rs @@ -18,7 +18,6 @@ use arrow_ord::sort::LexicographicalComparator; use arrow_ord::sort::SortColumn; use arrow_schema::SortOptions; use databend_common_arrow::arrow::bitmap::MutableBitmap; -use databend_common_arrow::arrow::offset::OffsetsBuffer; use databend_common_base::base::OrderedFloat; use databend_common_expression::converts::arrow2::set_validities; use databend_common_expression::types::binary::BinaryColumnBuilder; @@ -70,31 +69,6 @@ fn test_fixed_width() { let rows = converter.convert_columns(&cols, cols[0].len()); - assert_eq!( - rows.offsets().clone(), - vec![0, 8, 16, 24, 32, 40, 48, 56].into() - ); - assert_eq!( - rows.data().clone(), - vec![ - 1, 128, 1, // - 1, 191, 166, 102, 102, // - 1, 128, 2, // - 1, 192, 32, 0, 0, // - 0, 0, 0, // - 0, 0, 0, 0, 0, // - 1, 127, 251, // - 1, 192, 128, 0, 0, // - 1, 128, 2, // - 1, 189, 204, 204, 205, // - 1, 128, 2, // - 1, 63, 127, 255, 255, // - 1, 128, 0, // - 1, 127, 255, 255, 255 // - ] - .into() - ); - unsafe { assert!(rows.index_unchecked(3) < rows.index_unchecked(6)); assert!(rows.index_unchecked(0) < rows.index_unchecked(1)); @@ -549,17 +523,7 @@ fn fuzz_test() { // arrow_ord does not support LargeBinary converted from Databend String Column::Nullable(c) => match &c.column { Column::String(sc) => { - let offsets = - sc.offsets().iter().map(|offset| *offset as i64).collect(); - let array = Box::new( - databend_common_arrow::arrow::array::Utf8Array::::try_new( - databend_common_arrow::arrow::datatypes::DataType::LargeUtf8, - unsafe { OffsetsBuffer::new_unchecked(offsets) }, - sc.data().clone(), - None, - ) - .unwrap(), - ); + let array = Box::new(sc.clone().into_inner()); set_validities(array, &c.validity) } _ => col.as_arrow(), diff --git a/src/query/expression/tests/it/testdata/kernel-pass.txt b/src/query/expression/tests/it/testdata/kernel-pass.txt index 53da6e912d6b..cfe2e20d5bda 100644 --- a/src/query/expression/tests/it/testdata/kernel-pass.txt +++ b/src/query/expression/tests/it/testdata/kernel-pass.txt @@ -19,25 +19,25 @@ Result: Concat-Column 0: -+-----------+----------------+-----------------------------------------------------------------------------------------------------------------------------+ -| Column ID | Type | Column Data | -+-----------+----------------+-----------------------------------------------------------------------------------------------------------------------------+ -| 0 | Int32 | Column(Int32([0, 1, 2, 3, -4])) | -| 1 | UInt8 NULL | Column(NullableColumn { column: UInt8([10, 11, 12, 13, 14]), validity: [0b___00010] }) | -| 2 | NULL | Column(Null { len: 5 }) | -| 3 | Array(Nothing) | Column(EmptyArray { len: 5 }) | -| 4 | String NULL | Column(NullableColumn { column: StringColumn { data: 0x78797a6162, offsets: [0, 1, 2, 3, 4, 5] }, validity: [0b___00110] }) | -+-----------+----------------+-----------------------------------------------------------------------------------------------------------------------------+ ++-----------+----------------+----------------------------------------------------------------------------------------------------------------+ +| Column ID | Type | Column Data | ++-----------+----------------+----------------------------------------------------------------------------------------------------------------+ +| 0 | Int32 | Column(Int32([0, 1, 2, 3, -4])) | +| 1 | UInt8 NULL | Column(NullableColumn { column: UInt8([10, 11, 12, 13, 14]), validity: [0b___00010] }) | +| 2 | NULL | Column(Null { len: 5 }) | +| 3 | Array(Nothing) | Column(EmptyArray { len: 5 }) | +| 4 | String NULL | Column(NullableColumn { column: StringColumn { data: Utf8ViewArray[x, y, z, a, b] }, validity: [0b___00110] }) | ++-----------+----------------+----------------------------------------------------------------------------------------------------------------+ Concat-Column 1: -+-----------+----------------+--------------------------------------------------------------------------------------------------------------+ -| Column ID | Type | Column Data | -+-----------+----------------+--------------------------------------------------------------------------------------------------------------+ -| 0 | Int32 | Column(Int32([5, 6])) | -| 1 | UInt8 NULL | Column(NullableColumn { column: UInt8([15, 16]), validity: [0b______10] }) | -| 2 | NULL | Column(Null { len: 2 }) | -| 3 | Array(Nothing) | Column(EmptyArray { len: 2 }) | -| 4 | String NULL | Column(NullableColumn { column: StringColumn { data: 0x7879, offsets: [0, 1, 2] }, validity: [0b______10] }) | -+-----------+----------------+--------------------------------------------------------------------------------------------------------------+ ++-----------+----------------+-------------------------------------------------------------------------------------------------------+ +| Column ID | Type | Column Data | ++-----------+----------------+-------------------------------------------------------------------------------------------------------+ +| 0 | Int32 | Column(Int32([5, 6])) | +| 1 | UInt8 NULL | Column(NullableColumn { column: UInt8([15, 16]), validity: [0b______10] }) | +| 2 | NULL | Column(Null { len: 2 }) | +| 3 | Array(Nothing) | Column(EmptyArray { len: 2 }) | +| 4 | String NULL | Column(NullableColumn { column: StringColumn { data: Utf8ViewArray[x, y] }, validity: [0b______10] }) | ++-----------+----------------+-------------------------------------------------------------------------------------------------------+ Result: +----------+----------+----------+----------+----------+ | Column 0 | Column 1 | Column 2 | Column 3 | Column 4 | diff --git a/src/query/formats/src/field_decoder/fast_values.rs b/src/query/formats/src/field_decoder/fast_values.rs index 56ce1f46c3fa..c3a5c16ebd55 100644 --- a/src/query/formats/src/field_decoder/fast_values.rs +++ b/src/query/formats/src/field_decoder/fast_values.rs @@ -270,7 +270,7 @@ impl FastFieldDecoderValues { reader: &mut Cursor, positions: &mut VecDeque, ) -> Result<()> { - self.read_string_inner(reader, &mut column.data, positions)?; + self.read_string_inner(reader, &mut column.row_buffer, positions)?; column.commit_row(); Ok(()) } diff --git a/src/query/formats/src/field_decoder/nested.rs b/src/query/formats/src/field_decoder/nested.rs index cf690d719f7c..1b7b5ba958f7 100644 --- a/src/query/formats/src/field_decoder/nested.rs +++ b/src/query/formats/src/field_decoder/nested.rs @@ -196,7 +196,7 @@ impl NestedValues { column: &mut StringColumnBuilder, reader: &mut Cursor, ) -> Result<()> { - reader.read_quoted_text(&mut column.data, b'\'')?; + reader.read_quoted_text(&mut column.row_buffer, b'\'')?; column.commit_row(); Ok(()) } diff --git a/src/query/formats/src/field_decoder/separated_text.rs b/src/query/formats/src/field_decoder/separated_text.rs index 0ca734b07d3a..31f0226f032b 100644 --- a/src/query/formats/src/field_decoder/separated_text.rs +++ b/src/query/formats/src/field_decoder/separated_text.rs @@ -124,8 +124,7 @@ impl SeparatedTextDecoder { Ok(()) } ColumnBuilder::String(c) => { - c.put_str(std::str::from_utf8(data)?); - c.commit_row(); + c.put_and_commit(std::str::from_utf8(data)?); Ok(()) } ColumnBuilder::Boolean(c) => self.read_bool(c, data), diff --git a/src/query/functions/Cargo.toml b/src/query/functions/Cargo.toml index 34a0a89f3a63..a0a3d1872c8c 100644 --- a/src/query/functions/Cargo.toml +++ b/src/query/functions/Cargo.toml @@ -18,7 +18,6 @@ bumpalo = { workspace = true } chrono = { workspace = true } chrono-tz = { workspace = true } crc32fast = { workspace = true } -criterion = { workspace = true } ctor = { workspace = true } databend-common-arrow = { workspace = true } databend-common-base = { workspace = true } @@ -67,6 +66,7 @@ twox-hash = { workspace = true } [dev-dependencies] comfy-table = { workspace = true } +criterion = { workspace = true } databend-common-ast = { workspace = true } goldenfile = { workspace = true } diff --git a/src/query/functions/src/aggregates/aggregate_distinct_state.rs b/src/query/functions/src/aggregates/aggregate_distinct_state.rs index 424043c2903b..4db1b13b5b2f 100644 --- a/src/query/functions/src/aggregates/aggregate_distinct_state.rs +++ b/src/query/functions/src/aggregates/aggregate_distinct_state.rs @@ -232,10 +232,9 @@ impl DistinctStateFunc for AggregateDistinctStringState { } fn build_columns(&mut self, _types: &[DataType]) -> Result> { - let mut builder = StringColumnBuilder::with_capacity(self.set.len(), self.set.len() * 2); + let mut builder = StringColumnBuilder::with_capacity(self.set.len()); for key in self.set.iter() { - builder.put_str(unsafe { std::str::from_utf8_unchecked(key.key()) }); - builder.commit_row(); + builder.put_and_commit(unsafe { std::str::from_utf8_unchecked(key.key()) }); } Ok(vec![Column::String(builder.build())]) } diff --git a/src/query/functions/src/aggregates/aggregate_histogram.rs b/src/query/functions/src/aggregates/aggregate_histogram.rs index d3f40ec178d9..d083d8461039 100644 --- a/src/query/functions/src/aggregates/aggregate_histogram.rs +++ b/src/query/functions/src/aggregates/aggregate_histogram.rs @@ -24,7 +24,6 @@ use databend_common_exception::ErrorCode; use databend_common_exception::Result; use databend_common_expression::types::decimal::*; use databend_common_expression::types::number::*; -use databend_common_expression::types::string::StringColumnBuilder; use databend_common_expression::types::*; use databend_common_expression::with_number_mapped_type; use databend_common_expression::AggregateFunctionRef; @@ -32,6 +31,7 @@ use databend_common_expression::Scalar; use ethnum::i256; use serde::Deserialize; use serde::Serialize; +use string::StringColumnBuilder; use super::FunctionData; use crate::aggregates::aggregate_function_factory::AggregateFunctionDescription; @@ -157,8 +157,7 @@ where }) .collect::>>(), )?; - builder.put_str(&json_str); - builder.commit_row(); + builder.put_and_commit(json_str); Ok(()) } diff --git a/src/query/functions/src/aggregates/aggregate_min_max_any.rs b/src/query/functions/src/aggregates/aggregate_min_max_any.rs index 6ab35d792d80..bbe7325b93d2 100644 --- a/src/query/functions/src/aggregates/aggregate_min_max_any.rs +++ b/src/query/functions/src/aggregates/aggregate_min_max_any.rs @@ -42,7 +42,114 @@ use super::UnaryState; use crate::aggregates::assert_unary_arguments; use crate::aggregates::AggregateFunction; use crate::with_compare_mapped_type; -use crate::with_simple_no_number_mapped_type; +use crate::with_simple_no_number_no_string_mapped_type; + +#[derive(BorshSerialize, BorshDeserialize)] +pub struct MinMaxStringState +where C: ChangeIf +{ + pub value: Option, + #[borsh(skip)] + _c: PhantomData, +} + +impl Default for MinMaxStringState +where C: ChangeIf + Default +{ + fn default() -> Self { + Self { + value: None, + _c: PhantomData, + } + } +} + +impl UnaryState for MinMaxStringState +where C: ChangeIf + Default +{ + fn add( + &mut self, + other: ::ScalarRef<'_>, + _function_data: Option<&dyn FunctionData>, + ) -> Result<()> { + match &self.value { + Some(v) => { + if C::change_if(&StringType::to_scalar_ref(v), &other) { + self.value = Some(StringType::to_owned_scalar(other)); + } + } + None => { + self.value = Some(StringType::to_owned_scalar(other)); + } + } + Ok(()) + } + + fn add_batch( + &mut self, + other: StringColumn, + validity: Option<&Bitmap>, + function_data: Option<&dyn FunctionData>, + ) -> Result<()> { + let column_len = StringType::column_len(&other); + if column_len == 0 { + return Ok(()); + } + + let column_iter = 0..other.len(); + if let Some(validity) = validity { + if validity.unset_bits() == column_len { + return Ok(()); + } + let v = column_iter + .zip(validity) + .filter(|(_, valid)| *valid) + .map(|(idx, _)| idx) + .reduce(|l, r| { + if !C::change_if_ordering(StringColumn::compare(&other, l, &other, r)) { + l + } else { + r + } + }); + if let Some(v) = v { + let _ = self.add(other.index(v).unwrap(), function_data); + } + } else { + let v = column_iter.reduce(|l, r| { + if !C::change_if_ordering(StringColumn::compare(&other, l, &other, r)) { + l + } else { + r + } + }); + if let Some(v) = v { + let _ = self.add(other.index(v).unwrap(), function_data); + } + } + Ok(()) + } + + fn merge(&mut self, rhs: &Self) -> Result<()> { + if let Some(v) = &rhs.value { + self.add(v.as_str(), None)?; + } + Ok(()) + } + + fn merge_result( + &mut self, + builder: &mut ::ColumnBuilder, + _function_data: Option<&dyn FunctionData>, + ) -> Result<()> { + if let Some(v) = &self.value { + StringType::push_item(builder, v.as_str()); + } else { + StringType::push_default(builder); + } + Ok(()) + } +} #[derive(BorshSerialize, BorshDeserialize)] pub struct MinMaxAnyState @@ -161,7 +268,7 @@ pub fn try_create_aggregate_min_max_any_function( with_compare_mapped_type!(|CMP| match CMP_TYPE { CMP => { - with_simple_no_number_mapped_type!(|T| match data_type { + with_simple_no_number_no_string_mapped_type!(|T| match data_type { DataType::T => { let return_type = data_type.clone(); let func = AggregateUnaryFunction::, T, T>::try_create( @@ -174,6 +281,19 @@ pub fn try_create_aggregate_min_max_any_function( Ok(Arc::new(func)) } + DataType::String => { + let return_type = data_type.clone(); + let func = AggregateUnaryFunction::< + MinMaxStringState, + StringType, + StringType, + >::try_create( + display_name, return_type, params, data_type + ) + .with_need_drop(need_drop); + + Ok(Arc::new(func)) + } DataType::Number(num_type) => { with_number_mapped_type!(|NUM| match num_type { NumberDataType::NUM => { diff --git a/src/query/functions/src/aggregates/aggregate_scalar_state.rs b/src/query/functions/src/aggregates/aggregate_scalar_state.rs index 3a5c450761c8..26ccc51d1f91 100644 --- a/src/query/functions/src/aggregates/aggregate_scalar_state.rs +++ b/src/query/functions/src/aggregates/aggregate_scalar_state.rs @@ -44,6 +44,23 @@ macro_rules! with_simple_no_number_mapped_type { } } +#[macro_export] +macro_rules! with_simple_no_number_no_string_mapped_type { + (| $t:tt | $($tail:tt)*) => { + match_template::match_template! { + $t = [ + Boolean => BooleanType, + Timestamp => TimestampType, + Null => NullType, + EmptyArray => EmptyArrayType, + EmptyMap => EmptyMapType, + Date => DateType, + ], + $($tail)* + } + } +} + pub const TYPE_ANY: u8 = 0; pub const TYPE_MIN: u8 = 1; pub const TYPE_MAX: u8 = 2; @@ -64,6 +81,7 @@ macro_rules! with_compare_mapped_type { pub trait ChangeIf: Send + Sync + 'static { fn change_if(l: &T::ScalarRef<'_>, r: &T::ScalarRef<'_>) -> bool; + fn change_if_ordering(ordering: Ordering) -> bool; } #[derive(Default)] @@ -78,6 +96,11 @@ where fn change_if<'a>(l: &T::ScalarRef<'_>, r: &T::ScalarRef<'_>) -> bool { matches!(l.partial_cmp(r), Some(Ordering::Greater)) } + + #[inline] + fn change_if_ordering(ordering: Ordering) -> bool { + ordering == Ordering::Greater + } } #[derive(Default)] @@ -92,6 +115,11 @@ where fn change_if<'a>(l: &T::ScalarRef<'_>, r: &T::ScalarRef<'_>) -> bool { matches!(l.partial_cmp(r), Some(Ordering::Less)) } + + #[inline] + fn change_if_ordering(ordering: Ordering) -> bool { + ordering == Ordering::Less + } } #[derive(Default)] @@ -102,6 +130,11 @@ impl ChangeIf for CmpAny { fn change_if(_: &T::ScalarRef<'_>, _: &T::ScalarRef<'_>) -> bool { false } + + #[inline] + fn change_if_ordering(_: Ordering) -> bool { + false + } } pub trait ScalarStateFunc: diff --git a/src/query/functions/src/aggregates/aggregate_string_agg.rs b/src/query/functions/src/aggregates/aggregate_string_agg.rs index 60ff6fd4cf3f..3bb9259d11a9 100644 --- a/src/query/functions/src/aggregates/aggregate_string_agg.rs +++ b/src/query/functions/src/aggregates/aggregate_string_agg.rs @@ -147,9 +147,10 @@ impl AggregateFunction for AggregateStringAggFunction { let builder = StringType::try_downcast_builder(builder).unwrap(); if !state.values.is_empty() { let len = state.values.len() - self.delimiter.len(); - builder.put_str(&state.values[..len]); + builder.put_and_commit(&state.values[..len]); + } else { + builder.put_and_commit(""); } - builder.commit_row(); Ok(()) } diff --git a/src/query/functions/src/scalars/arithmetic.rs b/src/query/functions/src/scalars/arithmetic.rs index 7cf90d5efd12..081cba3653d4 100644 --- a/src/query/functions/src/scalars/arithmetic.rs +++ b/src/query/functions/src/scalars/arithmetic.rs @@ -22,6 +22,7 @@ use std::sync::Arc; use databend_common_arrow::arrow::bitmap::Bitmap; use databend_common_expression::serialize::read_decimal_with_size; +use databend_common_expression::types::binary::BinaryColumnBuilder; use databend_common_expression::types::decimal::DecimalDomain; use databend_common_expression::types::decimal::DecimalType; use databend_common_expression::types::nullable::NullableColumn; @@ -37,6 +38,7 @@ use databend_common_expression::types::NullableType; use databend_common_expression::types::NumberClass; use databend_common_expression::types::NumberDataType; use databend_common_expression::types::SimpleDomain; +use databend_common_expression::types::StringColumn; use databend_common_expression::types::StringType; use databend_common_expression::types::ALL_FLOAT_TYPES; use databend_common_expression::types::ALL_INTEGER_TYPES; @@ -972,28 +974,25 @@ pub fn register_number_to_string(registry: &mut FunctionRegistry) { let options = NUM_TYPE::lexical_options(); const FORMAT: u128 = lexical_core::format::STANDARD; - let mut builder = - StringColumnBuilder::with_capacity(from.len(), from.len() + 1); - let values = &mut builder.data; - type Native = ::Native; - let mut offset: usize = 0; + + let mut builder = StringColumnBuilder::with_capacity(from.len()); + unsafe { for x in from.iter() { - values.reserve(offset + Native::FORMATTED_SIZE_DECIMAL); - values.set_len(offset + Native::FORMATTED_SIZE_DECIMAL); - let bytes = &mut values[offset..]; - + builder.row_buffer.resize( + ::Native::FORMATTED_SIZE_DECIMAL, + 0, + ); let len = lexical_core::write_with_options::<_, FORMAT>( Native::from(*x), - bytes, + &mut builder.row_buffer, &options, ) .len(); - offset += len; - builder.offsets.push(offset as u64); + builder.row_buffer.truncate(len); + builder.commit_row(); } - values.set_len(offset); } Value::Column(builder.build()) } @@ -1008,7 +1007,7 @@ pub fn register_number_to_string(registry: &mut FunctionRegistry) { let options = NUM_TYPE::lexical_options(); const FORMAT: u128 = lexical_core::format::STANDARD; let mut builder = - StringColumnBuilder::with_capacity(from.len(), from.len() + 1); + BinaryColumnBuilder::with_capacity(from.len(), from.len() + 1); let values = &mut builder.data; type Native = ::Native; @@ -1029,7 +1028,7 @@ pub fn register_number_to_string(registry: &mut FunctionRegistry) { } values.set_len(offset); } - let result = builder.build(); + let result = StringColumn::try_from(builder.build()).unwrap(); Value::Column(NullableColumn::new( result, Bitmap::new_constant(true, from.len()), diff --git a/src/query/functions/src/scalars/binary.rs b/src/query/functions/src/scalars/binary.rs index 6c76598768f6..1e865888b98b 100644 --- a/src/query/functions/src/scalars/binary.rs +++ b/src/query/functions/src/scalars/binary.rs @@ -30,7 +30,7 @@ use databend_common_expression::types::NumberDataType; use databend_common_expression::types::NumberType; use databend_common_expression::types::StringType; use databend_common_expression::types::UInt8Type; -use databend_common_expression::types::ValueType; +use databend_common_expression::vectorize_1_arg; use databend_common_expression::Column; use databend_common_expression::EvalContext; use databend_common_expression::Function; @@ -49,19 +49,7 @@ pub fn register(registry: &mut FunctionRegistry) { registry.register_passthrough_nullable_1_arg::, _, _>( "length", |_, _| FunctionDomain::Full, - |val, _| match val { - ValueRef::Scalar(s) => Value::Scalar(s.len() as u64), - ValueRef::Column(c) => { - let diffs = c - .offsets() - .iter() - .zip(c.offsets().iter().skip(1)) - .map(|(a, b)| b - a) - .collect::>(); - - Value::Column(diffs.into()) - } - }, + vectorize_1_arg::>(|val, _| val.len() as u64), ); registry.register_passthrough_nullable_1_arg::( @@ -101,12 +89,11 @@ pub fn register(registry: &mut FunctionRegistry) { "to_hex", |_, _| FunctionDomain::Full, vectorize_binary_to_string( - |col| col.data().len() * 2, + |col| col.current_buffer_len() * 2, |val, output, _| { - let old_len = output.data.len(); let extra_len = val.len() * 2; - output.data.resize(old_len + extra_len, 0); - hex::encode_to_slice(val, &mut output.data[old_len..]).unwrap(); + output.row_buffer.resize(extra_len, 0); + hex::encode_to_slice(val, &mut output.row_buffer).unwrap(); output.commit_row(); }, ), @@ -128,10 +115,10 @@ pub fn register(registry: &mut FunctionRegistry) { "to_base64", |_, _| FunctionDomain::Full, vectorize_binary_to_string( - |col| col.data().len() * 4 / 3 + col.len() * 4, + |col| col.current_buffer_len() * 4 / 3 + col.len() * 4, |val, output, _| { base64::write::EncoderWriter::new( - &mut output.data, + &mut output.row_buffer, &base64::engine::general_purpose::STANDARD, ) .write_all(val) @@ -203,7 +190,7 @@ pub fn register(registry: &mut FunctionRegistry) { fn eval_binary_to_string(val: ValueRef, ctx: &mut EvalContext) -> Value { vectorize_binary_to_string( - |col| col.data().len(), + |col| col.current_buffer_len(), |val, output, ctx| { if let Ok(val) = simdutf8::basic::from_utf8(val) { output.put_str(val); @@ -217,7 +204,7 @@ fn eval_binary_to_string(val: ValueRef, ctx: &mut EvalContext) -> Va fn eval_unhex(val: ValueRef, ctx: &mut EvalContext) -> Value { vectorize_string_to_binary( - |col| col.data().len() / 2, + |col| col.current_buffer_len() / 2, |val, output, ctx| { let old_len = output.data.len(); let extra_len = val.len() / 2; @@ -232,7 +219,7 @@ fn eval_unhex(val: ValueRef, ctx: &mut EvalContext) -> Value, ctx: &mut EvalContext) -> Value { vectorize_string_to_binary( - |col| col.data().len() * 4 / 3 + col.len() * 4, + |col| col.current_buffer_len() * 4 / 3 + col.len() * 4, |val, output, ctx| { if let Err(err) = base64::Engine::decode_vec( &base64::engine::general_purpose::STANDARD, @@ -248,18 +235,17 @@ fn eval_from_base64(val: ValueRef, ctx: &mut EvalContext) -> Value usize + Copy, + _estimate_bytes: impl Fn(&BinaryColumn) -> usize + Copy, func: impl Fn(&[u8], &mut StringColumnBuilder, &mut EvalContext) + Copy, ) -> impl Fn(ValueRef, &mut EvalContext) -> Value + Copy { move |arg1, ctx| match arg1 { ValueRef::Scalar(val) => { - let mut builder = StringColumnBuilder::with_capacity(1, 0); + let mut builder = StringColumnBuilder::with_capacity(1); func(val, &mut builder, ctx); Value::Scalar(builder.build_scalar()) } ValueRef::Column(col) => { - let data_capacity = estimate_bytes(&col); - let mut builder = StringColumnBuilder::with_capacity(col.len(), data_capacity); + let mut builder = StringColumnBuilder::with_capacity(col.len()); for val in col.iter() { func(val, &mut builder, ctx); } @@ -304,34 +290,18 @@ fn char_fn(args: &[ValueRef], _: &mut EvalContext) -> Value { }); let input_rows = len.unwrap_or(1); - let mut values: Vec = vec![0; input_rows * args.len()]; - let values_ptr = values.as_mut_ptr(); + let mut builder = BinaryColumnBuilder::with_capacity(input_rows, 0); - for (i, arg) in args.iter().enumerate() { - match arg { - ValueRef::Scalar(v) => { - for j in 0..input_rows { - unsafe { - *values_ptr.add(args.len() * j + i) = *v; - } - } - } - ValueRef::Column(c) => { - for (j, ch) in UInt8Type::iter_column(c).enumerate() { - unsafe { - *values_ptr.add(args.len() * j + i) = ch; - } - } - } + for _ in 0..input_rows { + for arg in &args { + let val = arg.index(0).unwrap(); + builder.put_u8(val); } + builder.commit_row(); } - let offsets = (0..(input_rows + 1) as u64 * args.len() as u64) - .step_by(args.len()) - .collect::>(); - let result = BinaryColumn::new(values.into(), offsets.into()); match len { - Some(_) => Value::Column(Column::Binary(result)), - _ => Value::Scalar(Scalar::Binary(result.index(0).unwrap().to_vec())), + Some(_) => Value::Column(Column::Binary(builder.build())), + _ => Value::Scalar(Scalar::Binary(builder.build_scalar())), } } diff --git a/src/query/functions/src/scalars/bitmap.rs b/src/query/functions/src/scalars/bitmap.rs index 59b7fbfbf8f3..0cb936957c80 100644 --- a/src/query/functions/src/scalars/bitmap.rs +++ b/src/query/functions/src/scalars/bitmap.rs @@ -182,14 +182,13 @@ pub fn register(registry: &mut FunctionRegistry) { Ok(rb) => { let raw = rb.into_iter().collect::>(); let s = join(raw.iter(), ","); - builder.put_str(&s); + builder.put_and_commit(s); } Err(e) => { ctx.set_error(builder.len(), e.to_string()); + builder.commit_row(); } } - - builder.commit_row(); }), ); diff --git a/src/query/functions/src/scalars/comparison.rs b/src/query/functions/src/scalars/comparison.rs index 71f667de8bd5..8346066d128e 100644 --- a/src/query/functions/src/scalars/comparison.rs +++ b/src/query/functions/src/scalars/comparison.rs @@ -30,6 +30,7 @@ use databend_common_expression::types::EmptyArrayType; use databend_common_expression::types::GenericType; use databend_common_expression::types::NumberClass; use databend_common_expression::types::NumberType; +use databend_common_expression::types::StringColumn; use databend_common_expression::types::StringType; use databend_common_expression::types::TimestampType; use databend_common_expression::types::ValueType; @@ -158,7 +159,66 @@ macro_rules! register_simple_domain_type_cmp { } fn register_string_cmp(registry: &mut FunctionRegistry) { - register_simple_domain_type_cmp!(registry, StringType); + registry.register_passthrough_nullable_2_arg::( + "eq", + |_, d1, d2| d1.domain_eq(d2), + vectorize_string_cmp(|cmp| cmp == Ordering::Equal), + ); + registry.register_passthrough_nullable_2_arg::( + "noteq", + |_, d1, d2| d1.domain_noteq(d2), + vectorize_string_cmp(|cmp| cmp != Ordering::Equal), + ); + registry.register_passthrough_nullable_2_arg::( + "gt", + |_, d1, d2| d1.domain_gt(d2), + vectorize_string_cmp(|cmp| cmp == Ordering::Greater), + ); + registry.register_passthrough_nullable_2_arg::( + "gte", + |_, d1, d2| d1.domain_gte(d2), + vectorize_string_cmp(|cmp| cmp != Ordering::Less), + ); + registry.register_passthrough_nullable_2_arg::( + "lt", + |_, d1, d2| d1.domain_lt(d2), + vectorize_string_cmp(|cmp| cmp == Ordering::Less), + ); + registry.register_passthrough_nullable_2_arg::( + "lte", + |_, d1, d2| d1.domain_lte(d2), + vectorize_string_cmp(|cmp| cmp != Ordering::Greater), + ); +} + +fn vectorize_string_cmp( + func: impl Fn(Ordering) -> bool + Copy, +) -> impl Fn(ValueRef, ValueRef, &mut EvalContext) -> Value + Copy +{ + move |arg1, arg2, _ctx| match (arg1, arg2) { + (ValueRef::Scalar(arg1), ValueRef::Scalar(arg2)) => Value::Scalar(func(arg1.cmp(arg2))), + (ValueRef::Column(arg1), ValueRef::Scalar(arg2)) => { + let mut builder = MutableBitmap::with_capacity(arg1.len()); + for i in 0..arg1.len() { + builder.push(func(StringColumn::compare_str(&arg1, i, arg2))); + } + Value::Column(builder.into()) + } + (ValueRef::Scalar(arg1), ValueRef::Column(arg2)) => { + let mut builder = MutableBitmap::with_capacity(arg1.len()); + for i in 0..arg2.len() { + builder.push(func(StringColumn::compare_str(&arg2, i, arg1).reverse())); + } + Value::Column(builder.into()) + } + (ValueRef::Column(arg1), ValueRef::Column(arg2)) => { + let mut builder = MutableBitmap::with_capacity(arg1.len()); + for i in 0..arg1.len() { + builder.push(func(StringColumn::compare(&arg1, i, &arg2, i))); + } + Value::Column(builder.into()) + } + } } fn register_date_cmp(registry: &mut FunctionRegistry) { @@ -531,35 +591,8 @@ fn vectorize_like( let mut builder = MutableBitmap::with_capacity(arg1.len()); let pattern_type = generate_like_pattern(arg2.as_bytes(), arg1.current_buffer_len()); if let LikePattern::SurroundByPercent(searcher) = pattern_type { - let needle_byte_len = searcher.needle().len(); - let data = arg1.data().as_slice(); - let offsets = arg1.offsets().as_slice(); - let mut idx = 0; - let mut pos = (*offsets.first().unwrap()) as usize; - let end = (*offsets.last().unwrap()) as usize; - - while pos < end { - if let Some(p) = searcher.search(&data[pos..end]) { - // data: {3x}googlex|{3x}googlex|{3x}googlex - // needle_size: 6 - // offsets: 0, 10, 20, 30 - // (pos, p): (0, 3) , (10, 3), (20, 3), () - while offsets[idx + 1] as usize <= pos + p { - builder.push(false); - idx += 1; - } - // check if the substring is in bound - builder.push(pos + p + needle_byte_len <= offsets[idx + 1] as usize); - pos = offsets[idx + 1] as usize; - idx += 1; - } else { - break; - } - } - - while idx < arg1.len() { - builder.push(false); - idx += 1; + for arg1 in arg1_iter { + builder.push(searcher.search(arg1.as_bytes()).is_some()); } } else { for arg1 in arg1_iter { diff --git a/src/query/functions/src/scalars/datetime.rs b/src/query/functions/src/scalars/datetime.rs index bf095ebdf225..b4ed0508f772 100644 --- a/src/query/functions/src/scalars/datetime.rs +++ b/src/query/functions/src/scalars/datetime.rs @@ -706,7 +706,12 @@ fn register_to_string(registry: &mut FunctionRegistry) { "to_string", |_, _| FunctionDomain::Full, vectorize_with_builder_1_arg::(|val, output, ctx| { - write!(output.data, "{}", date_to_string(val, ctx.func_ctx.tz.tz)).unwrap(); + write!( + output.row_buffer, + "{}", + date_to_string(val, ctx.func_ctx.tz.tz) + ) + .unwrap(); output.commit_row(); }), ); @@ -716,7 +721,7 @@ fn register_to_string(registry: &mut FunctionRegistry) { |_, _| FunctionDomain::Full, vectorize_with_builder_1_arg::(|val, output, ctx| { write!( - output.data, + output.row_buffer, "{}", timestamp_to_string(val, ctx.func_ctx.tz.tz) ) @@ -738,7 +743,7 @@ fn register_to_string(registry: &mut FunctionRegistry) { }, vectorize_with_builder_1_arg::>(|val, output, ctx| { write!( - output.builder.data, + output.builder.row_buffer, "{}", date_to_string(val, ctx.func_ctx.tz.tz) ) @@ -762,7 +767,7 @@ fn register_to_string(registry: &mut FunctionRegistry) { vectorize_with_builder_1_arg::>( |val, output, ctx| { write!( - output.builder.data, + output.builder.row_buffer, "{}", timestamp_to_string(val, ctx.func_ctx.tz.tz) ) diff --git a/src/query/functions/src/scalars/decimal/cast.rs b/src/query/functions/src/scalars/decimal/cast.rs index 3547e2611d55..540bbad5f84b 100644 --- a/src/query/functions/src/scalars/decimal/cast.rs +++ b/src/query/functions/src/scalars/decimal/cast.rs @@ -313,7 +313,7 @@ fn decimal_to_string( match arg { ValueRef::Column(col) => { - let mut builder = StringColumnBuilder::with_capacity(col.len(), col.len() * 10); + let mut builder = StringColumnBuilder::with_capacity(col.len()); for x in DecimalType::::iter_column(&col) { builder.put_str(&DECIMAL_TYPE::display(x, from_size.scale)); builder.commit_row(); diff --git a/src/query/functions/src/scalars/geo_h3.rs b/src/query/functions/src/scalars/geo_h3.rs index 045327555552..17fbf78ebd60 100644 --- a/src/query/functions/src/scalars/geo_h3.rs +++ b/src/query/functions/src/scalars/geo_h3.rs @@ -272,13 +272,12 @@ pub fn register(registry: &mut FunctionRegistry) { |_, _| FunctionDomain::Full, vectorize_with_builder_1_arg::(|h3, builder, ctx| { match CellIndex::try_from(h3) { - Ok(index) => builder.put_str(&index.to_string()), + Ok(index) => builder.put_and_commit(index.to_string()), Err(err) => { ctx.set_error(builder.len(), err.to_string()); - builder.put_str(""); + builder.put_and_commit(""); } } - builder.commit_row(); }), ); diff --git a/src/query/functions/src/scalars/hash.rs b/src/query/functions/src/scalars/hash.rs index 4e00a7f4aeab..252e3b4eb5cb 100644 --- a/src/query/functions/src/scalars/hash.rs +++ b/src/query/functions/src/scalars/hash.rs @@ -49,8 +49,6 @@ use num_traits::AsPrimitive; use twox_hash::XxHash32; use twox_hash::XxHash64; -use crate::scalars::string::vectorize_string_to_string; - pub fn register(registry: &mut FunctionRegistry) { registry.register_aliases("siphash64", &["siphash"]); registry.register_aliases("sha", &["sha1"]); @@ -79,62 +77,48 @@ pub fn register(registry: &mut FunctionRegistry) { registry.register_passthrough_nullable_1_arg::( "md5", |_, _| FunctionDomain::MayThrow, - vectorize_string_to_string( - |col| col.data().len() * 32, - |val, output, ctx| { - // TODO md5 lib doesn't allow encode into buffer... - let old_len = output.data.len(); - output.data.resize(old_len + 32, 0); - if let Err(err) = hex::encode_to_slice( - Md5Hasher::digest(val).as_slice(), - &mut output.data[old_len..], - ) { - ctx.set_error(output.len(), err.to_string()); - } - output.commit_row(); - }, - ), + vectorize_with_builder_1_arg::(|val, output, ctx| { + // TODO md5 lib doesn't allow encode into buffer... + output.row_buffer.resize(32, 0); + if let Err(err) = + hex::encode_to_slice(Md5Hasher::digest(val).as_slice(), &mut output.row_buffer) + { + ctx.set_error(output.len(), err.to_string()); + } + output.commit_row(); + }), ); registry.register_passthrough_nullable_1_arg::( "sha", |_, _| FunctionDomain::MayThrow, - vectorize_string_to_string( - |col| col.data().len() * 40, - |val, output, ctx| { - let old_len = output.data.len(); - output.data.resize(old_len + 40, 0); - // TODO sha1 lib doesn't allow encode into buffer... - let mut m = ::sha1::Sha1::new(); - sha1::digest::Update::update(&mut m, val.as_bytes()); - - if let Err(err) = - hex::encode_to_slice(m.finalize().as_slice(), &mut output.data[old_len..]) - { - ctx.set_error(output.len(), err.to_string()); - } - output.commit_row(); - }, - ), + vectorize_with_builder_1_arg::(|val, output, ctx| { + output.row_buffer.resize(40, 0); + // TODO sha1 lib doesn't allow encode into buffer... + let mut m = ::sha1::Sha1::new(); + sha1::digest::Update::update(&mut m, val.as_bytes()); + + if let Err(err) = hex::encode_to_slice(m.finalize().as_slice(), &mut output.row_buffer) + { + ctx.set_error(output.len(), err.to_string()); + } + output.commit_row(); + }), ); registry.register_passthrough_nullable_1_arg::( "blake3", |_, _| FunctionDomain::MayThrow, - vectorize_string_to_string( - |col| col.data().len() * 64, - |val, output, ctx| { - let old_len = output.data.len(); - output.data.resize(old_len + 64, 0); - if let Err(err) = hex::encode_to_slice( - blake3::hash(val.as_bytes()).as_bytes(), - &mut output.data[old_len..], - ) { - ctx.set_error(output.len(), err.to_string()); - } - output.commit_row(); - }, - ), + vectorize_with_builder_1_arg::(|val, output, ctx| { + output.row_buffer.resize(64, 0); + if let Err(err) = hex::encode_to_slice( + blake3::hash(val.as_bytes()).as_bytes(), + &mut output.row_buffer, + ) { + ctx.set_error(output.len(), err.to_string()); + } + output.commit_row(); + }), ); registry.register_passthrough_nullable_2_arg::, StringType, _, _>( @@ -175,8 +159,7 @@ pub fn register(registry: &mut FunctionRegistry) { String::new() }, }; - output.put_str(&res); - output.commit_row(); + output.put_and_commit(res); }, ), ); diff --git a/src/query/functions/src/scalars/other.rs b/src/query/functions/src/scalars/other.rs index 2ed8f5eeb456..0dd6f34ec2e2 100644 --- a/src/query/functions/src/scalars/other.rs +++ b/src/query/functions/src/scalars/other.rs @@ -22,6 +22,7 @@ use databend_common_base::base::convert_number_size; use databend_common_base::base::uuid::Uuid; use databend_common_base::base::OrderedFloat; use databend_common_expression::error_to_null; +use databend_common_expression::types::binary::BinaryColumnBuilder; use databend_common_expression::types::boolean::BooleanDomain; use databend_common_expression::types::nullable::NullableColumn; use databend_common_expression::types::number::Float32Type; @@ -93,8 +94,7 @@ pub fn register(registry: &mut FunctionRegistry) { |_, _| FunctionDomain::Full, vectorize_with_builder_1_arg::(move |val, output, _| { let new_val = convert_byte_size(val.into()); - output.put_str(&new_val); - output.commit_row(); + output.put_and_commit(new_val); }), ); @@ -103,8 +103,7 @@ pub fn register(registry: &mut FunctionRegistry) { |_, _| FunctionDomain::Full, vectorize_with_builder_1_arg::(move |val, output, _| { let new_val = convert_number_size(val.into()); - output.put_str(&new_val); - output.commit_row(); + output.put_and_commit(new_val); }), ); @@ -232,16 +231,15 @@ pub fn register(registry: &mut FunctionRegistry) { "gen_random_uuid", |_| FunctionDomain::Full, |ctx| { - let mut values: Vec = Vec::with_capacity(ctx.num_rows * 36); - let mut offsets: Vec = Vec::with_capacity(ctx.num_rows); - offsets.push(0); + let mut builder = BinaryColumnBuilder::with_capacity(ctx.num_rows, 0); for _ in 0..ctx.num_rows { let value = Uuid::new_v4(); - offsets.push(offsets.last().unwrap() + 36u64); - write!(&mut values, "{:x}", value).unwrap(); + write!(&mut builder.data, "{}", value).unwrap(); + builder.commit_row(); } - let col = StringColumn::new(values.into(), offsets.into()); + + let col = StringColumn::try_from(builder.build()).unwrap(); Value::Column(col) }, ); @@ -294,8 +292,7 @@ fn register_inet_ntoa(registry: &mut FunctionRegistry) { match num_traits::cast::cast::(val) { Some(val) => { let addr_str = Ipv4Addr::from(val.to_be_bytes()).to_string(); - output.put_str(&addr_str); - output.commit_row(); + output.put_and_commit(addr_str); } None => { ctx.set_error( @@ -407,8 +404,7 @@ fn register_num_to_char(registry: &mut FunctionRegistry) { .and_then(|entry| entry.process_i64(value)) { Ok(s) => { - builder.put_str(&s); - builder.commit_row() + builder.put_and_commit(s); } Err(e) => { ctx.set_error(builder.len(), e.to_string()); diff --git a/src/query/functions/src/scalars/string.rs b/src/query/functions/src/scalars/string.rs index 25f10b82ccd2..273a1d9dc9ed 100644 --- a/src/query/functions/src/scalars/string.rs +++ b/src/query/functions/src/scalars/string.rs @@ -19,22 +19,19 @@ use databend_common_base::base::uuid::Uuid; use databend_common_expression::types::decimal::Decimal128Type; use databend_common_expression::types::number::SimpleDomain; use databend_common_expression::types::number::UInt64Type; -use databend_common_expression::types::string::StringColumn; use databend_common_expression::types::string::StringColumnBuilder; use databend_common_expression::types::string::StringDomain; use databend_common_expression::types::ArrayType; use databend_common_expression::types::NumberType; use databend_common_expression::types::StringType; use databend_common_expression::unify_string; +use databend_common_expression::vectorize_1_arg; use databend_common_expression::vectorize_with_builder_1_arg; use databend_common_expression::vectorize_with_builder_2_arg; use databend_common_expression::vectorize_with_builder_3_arg; use databend_common_expression::vectorize_with_builder_4_arg; -use databend_common_expression::EvalContext; use databend_common_expression::FunctionDomain; use databend_common_expression::FunctionRegistry; -use databend_common_expression::Value; -use databend_common_expression::ValueRef; use stringslice::StringSlice; pub const ALL_STRING_FUNC_NAMES: &[&str] = &[ @@ -102,41 +99,35 @@ pub fn register(registry: &mut FunctionRegistry) { registry.register_passthrough_nullable_1_arg::( "upper", |_, _| FunctionDomain::Full, - vectorize_string_to_string( - |col| col.data().len(), - |val, output, _| { - for ch in val.chars() { - if ch.is_ascii() { - output.put_char(ch.to_ascii_uppercase()); - } else { - for x in ch.to_uppercase() { - output.put_char(x); - } + vectorize_with_builder_1_arg::(|val, output, _| { + for ch in val.chars() { + if ch.is_ascii() { + output.put_char(ch.to_ascii_uppercase()); + } else { + for x in ch.to_uppercase() { + output.put_char(x); } } - output.commit_row(); - }, - ), + } + output.commit_row(); + }), ); registry.register_passthrough_nullable_1_arg::( "lower", |_, _| FunctionDomain::Full, - vectorize_string_to_string( - |col| col.data().len(), - |val, output, _| { - for ch in val.chars() { - if ch.is_ascii() { - output.put_char(ch.to_ascii_lowercase()); - } else { - for x in ch.to_lowercase() { - output.put_char(x); - } + vectorize_with_builder_1_arg::(|val, output, _| { + for ch in val.chars() { + if ch.is_ascii() { + output.put_char(ch.to_ascii_lowercase()); + } else { + for x in ch.to_lowercase() { + output.put_char(x); } } - output.commit_row(); - }, - ), + } + output.commit_row(); + }), ); registry.register_1_arg::, _, _>( @@ -148,19 +139,7 @@ pub fn register(registry: &mut FunctionRegistry) { registry.register_passthrough_nullable_1_arg::, _, _>( "octet_length", |_, _| FunctionDomain::Full, - |val, _| match val { - ValueRef::Scalar(s) => Value::Scalar(s.len() as u64), - ValueRef::Column(c) => { - let diffs = c - .offsets() - .iter() - .zip(c.offsets().iter().skip(1)) - .map(|(a, b)| b - a) - .collect::>(); - - Value::Column(diffs.into()) - } - }, + vectorize_1_arg::>(|val, _| val.len() as u64), ); registry.register_1_arg::, _, _>( @@ -382,39 +361,33 @@ pub fn register(registry: &mut FunctionRegistry) { registry.register_passthrough_nullable_1_arg::( "quote", |_, _| FunctionDomain::Full, - vectorize_string_to_string( - |col| col.data().len() * 2, - |val, output, _| { - for ch in val.chars() { - match ch { - '\0' => output.put_str("\\0"), - '\'' => output.put_str("\\\'"), - '\"' => output.put_str("\\\""), - '\u{8}' => output.put_str("\\b"), - '\n' => output.put_str("\\n"), - '\r' => output.put_str("\\r"), - '\t' => output.put_str("\\t"), - '\\' => output.put_str("\\\\"), - c => output.put_char(c), - } + vectorize_with_builder_1_arg::(|val, output, _| { + for ch in val.chars() { + match ch { + '\0' => output.put_str("\\0"), + '\'' => output.put_str("\\\'"), + '\"' => output.put_str("\\\""), + '\u{8}' => output.put_str("\\b"), + '\n' => output.put_str("\\n"), + '\r' => output.put_str("\\r"), + '\t' => output.put_str("\\t"), + '\\' => output.put_str("\\\\"), + c => output.put_char(c), } - output.commit_row(); - }, - ), + } + output.commit_row(); + }), ); registry.register_passthrough_nullable_1_arg::( "reverse", |_, _| FunctionDomain::Full, - vectorize_string_to_string( - |col| col.data().len(), - |val, output, _| { - for char in val.chars().rev() { - output.put_char(char); - } - output.commit_row(); - }, - ), + vectorize_with_builder_1_arg::(|val, output, _| { + for char in val.chars().rev() { + output.put_char(char); + } + output.commit_row(); + }), ); registry.register_1_arg::, _, _>( @@ -436,53 +409,38 @@ pub fn register(registry: &mut FunctionRegistry) { registry.register_passthrough_nullable_1_arg::( "ltrim", |_, _| FunctionDomain::Full, - vectorize_string_to_string( - |col| col.data().len(), - |val, output, _| { - output.put_str(val.trim_start()); - output.commit_row(); - }, - ), + vectorize_with_builder_1_arg::(|val, output, _| { + output.put_and_commit(val.trim_start()); + }), ); registry.register_passthrough_nullable_1_arg::( "rtrim", |_, _| FunctionDomain::Full, - vectorize_string_to_string( - |col| col.data().len(), - |val, output, _| { - output.put_str(val.trim_end()); - output.commit_row(); - }, - ), + vectorize_with_builder_1_arg::(|val, output, _| { + output.put_and_commit(val.trim_end()); + }), ); registry.register_passthrough_nullable_1_arg::( "trim", |_, _| FunctionDomain::Full, - vectorize_string_to_string( - |col| col.data().len(), - |val, output, _| { - output.put_str(val.trim()); - output.commit_row(); - }, - ), + vectorize_with_builder_1_arg::(|val, output, _| { + output.put_and_commit(val.trim()); + }), ); registry.register_passthrough_nullable_2_arg::( "trim_leading", |_, _, _| FunctionDomain::Full, - vectorize_string_to_string_2_arg( - |col, _| col.data().len(), - |val, trim_str, _, output| { + vectorize_with_builder_2_arg::( + |val, trim_str, output, _| { if trim_str.is_empty() { - output.put_str(val); - output.commit_row(); + output.put_and_commit(val); return; } - output.put_str(val.trim_start_matches(trim_str)); - output.commit_row(); + output.put_and_commit(val.trim_start_matches(trim_str)); }, ), ); @@ -490,17 +448,14 @@ pub fn register(registry: &mut FunctionRegistry) { registry.register_passthrough_nullable_2_arg::( "trim_trailing", |_, _, _| FunctionDomain::Full, - vectorize_string_to_string_2_arg( - |col, _| col.data().len(), - |val, trim_str, _, output| { + vectorize_with_builder_2_arg::( + |val, trim_str, output, _| { if trim_str.is_empty() { - output.put_str(val); - output.commit_row(); + output.put_and_commit(val); return; } - output.put_str(val.trim_end_matches(trim_str)); - output.commit_row(); + output.put_and_commit(val.trim_end_matches(trim_str)); }, ), ); @@ -508,12 +463,10 @@ pub fn register(registry: &mut FunctionRegistry) { registry.register_passthrough_nullable_2_arg::( "trim_both", |_, _, _| FunctionDomain::Full, - vectorize_string_to_string_2_arg( - |col, _| col.data().len(), - |val, trim_str, _, output| { + vectorize_with_builder_2_arg::( + |val, trim_str, output, _| { if trim_str.is_empty() { - output.put_str(val); - output.commit_row(); + output.put_and_commit(val); return; } @@ -526,8 +479,7 @@ pub fn register(registry: &mut FunctionRegistry) { res = &res[..res.len() - trim_str.len()]; } - output.put_str(res); - output.commit_row(); + output.put_and_commit(res); }, ), ); @@ -535,16 +487,12 @@ pub fn register(registry: &mut FunctionRegistry) { registry.register_passthrough_nullable_1_arg::( "to_hex", |_, _| FunctionDomain::Full, - vectorize_string_to_string( - |col| col.data().len() * 2, - |val, output, _| { - let old_len = output.data.len(); - let extra_len = val.len() * 2; - output.data.resize(old_len + extra_len, 0); - hex::encode_to_slice(val, &mut output.data[old_len..]).unwrap(); - output.commit_row(); - }, - ), + vectorize_with_builder_1_arg::(|val, output, _| { + let len = val.len() * 2; + output.row_buffer.resize(len, 0); + hex::encode_to_slice(val, &mut output.row_buffer).unwrap(); + output.commit_row(); + }), ); // TODO: generalize them to be alias of [CONV](https://dev.mysql.com/doc/refman/8.0/en/mathematical-functions.html#function_conv) @@ -553,7 +501,7 @@ pub fn register(registry: &mut FunctionRegistry) { "bin", |_, _| FunctionDomain::Full, vectorize_with_builder_1_arg::, StringType>(|val, output, _| { - write!(output.data, "{val:b}").unwrap(); + write!(output.row_buffer, "{val:b}").unwrap(); output.commit_row(); }), ); @@ -561,7 +509,7 @@ pub fn register(registry: &mut FunctionRegistry) { "oct", |_, _| FunctionDomain::Full, vectorize_with_builder_1_arg::, StringType>(|val, output, _| { - write!(output.data, "{val:o}").unwrap(); + write!(output.row_buffer, "{val:o}").unwrap(); output.commit_row(); }), ); @@ -569,7 +517,7 @@ pub fn register(registry: &mut FunctionRegistry) { "to_hex", |_, _| FunctionDomain::Full, vectorize_with_builder_1_arg::, StringType>(|val, output, _| { - write!(output.data, "{val:x}").unwrap(); + write!(output.row_buffer, "{val:x}").unwrap(); output.commit_row(); }), ); @@ -624,56 +572,26 @@ pub fn register(registry: &mut FunctionRegistry) { registry.register_passthrough_nullable_1_arg::( "soundex", |_, _| FunctionDomain::Full, - vectorize_string_to_string( - |col| usize::max(col.data().len(), 4 * col.len()), - soundex::soundex, - ), + vectorize_with_builder_1_arg::(soundex::soundex), ); const MAX_SPACE_LENGTH: u64 = 1000000; registry.register_passthrough_nullable_1_arg::, StringType, _, _>( "space", |_, _| FunctionDomain::MayThrow, - |times, ctx| match times { - ValueRef::Scalar(times) => { - if times > MAX_SPACE_LENGTH { - ctx.set_error( - 0, - format!("space length is too big, max is: {}", MAX_SPACE_LENGTH), - ); - Value::Scalar("".to_string()) - } else { - Value::Scalar(" ".repeat(times as usize)) - } - } - ValueRef::Column(col) => { - let mut total_space: u64 = 0; - let mut offsets: Vec = Vec::with_capacity(col.len() + 1); - offsets.push(0); - for (i, times) in col.iter().enumerate() { - if times > &MAX_SPACE_LENGTH { - ctx.set_error( - i, - format!("space length is too big, max is: {}", MAX_SPACE_LENGTH), - ); - break; - } - total_space += times; - offsets.push(total_space); - } - if ctx.errors.is_some() { - offsets.truncate(1); - total_space = 0; - } - let col = StringColumnBuilder { - data: " ".repeat(total_space as usize).into_bytes(), - offsets, - need_estimated: false, + vectorize_with_builder_1_arg::, StringType>(|times, output, ctx| { + if times > MAX_SPACE_LENGTH { + ctx.set_error( + output.len(), + format!("space length is too big, max is: {}", MAX_SPACE_LENGTH), + ); + } else { + for _ in 0..times { + output.put_char(' '); } - .build(); - Value::Column(col) } - }, + output.commit_row(); + }), ); registry.register_passthrough_nullable_2_arg::, StringType, _, _>( @@ -702,11 +620,10 @@ pub fn register(registry: &mut FunctionRegistry) { let n = n as usize; let s_len = s.chars().count(); if n < s_len { - output.put_str(s.slice(0..n)); + output.put_and_commit(s.slice(0..n)); } else { - output.put_str(s); + output.put_and_commit(s); } - output.commit_row(); }, ), ); @@ -719,11 +636,10 @@ pub fn register(registry: &mut FunctionRegistry) { let n = n as usize; let s_len = s.chars().count(); if n < s_len { - output.put_str(s.slice(s_len - n..)); + output.put_and_commit(s.slice(s_len - n..)); } else { - output.put_str(s); + output.put_and_commit(s); } - output.commit_row(); }, ), ); @@ -783,12 +699,10 @@ pub fn register(registry: &mut FunctionRegistry) { if s == sep { output.builder.commit_row(); } else if sep.is_empty() { - output.builder.put_str(s); - output.builder.commit_row(); + output.builder.put_and_commit(s); } else { for v in s.split(sep) { - output.builder.put_str(v); - output.builder.commit_row(); + output.builder.put_and_commit(v); } } output.commit_row(); @@ -916,68 +830,3 @@ fn substr(builder: &mut StringColumnBuilder, str: &str, pos: i64, len: u64) { builder.put_char_iter(str.chars().skip(start).take(len as usize)); builder.commit_row(); } - -/// String to String scalar function with estimated output column capacity. -pub fn vectorize_string_to_string( - estimate_bytes: impl Fn(&StringColumn) -> usize + Copy, - func: impl Fn(&str, &mut StringColumnBuilder, &mut EvalContext) + Copy, -) -> impl Fn(ValueRef, &mut EvalContext) -> Value + Copy { - move |arg1, ctx| match arg1 { - ValueRef::Scalar(val) => { - let mut builder = StringColumnBuilder::with_capacity(1, 0); - func(val, &mut builder, ctx); - Value::Scalar(builder.build_scalar()) - } - ValueRef::Column(col) => { - let data_capacity = estimate_bytes(&col); - let mut builder = StringColumnBuilder::with_capacity(col.len(), data_capacity); - for val in col.iter() { - func(val, &mut builder, ctx); - } - - Value::Column(builder.build()) - } - } -} - -/// (String, String) to String scalar function with estimated output column capacity. -fn vectorize_string_to_string_2_arg( - estimate_bytes: impl Fn(&StringColumn, &StringColumn) -> usize + Copy, - func: impl Fn(&str, &str, &mut EvalContext, &mut StringColumnBuilder) + Copy, -) -> impl Fn(ValueRef, ValueRef, &mut EvalContext) -> Value + Copy -{ - move |arg1, arg2, ctx| match (arg1, arg2) { - (ValueRef::Scalar(arg1), ValueRef::Scalar(arg2)) => { - let mut builder = StringColumnBuilder::with_capacity(1, 0); - func(arg1, arg2, ctx, &mut builder); - Value::Scalar(builder.build_scalar()) - } - (ValueRef::Scalar(arg1), ValueRef::Column(arg2)) => { - let data_capacity = - estimate_bytes(&StringColumnBuilder::repeat(arg1, 1).build(), &arg2); - let mut builder = StringColumnBuilder::with_capacity(arg2.len(), data_capacity); - for val in arg2.iter() { - func(arg1, val, ctx, &mut builder); - } - Value::Column(builder.build()) - } - (ValueRef::Column(arg1), ValueRef::Scalar(arg2)) => { - let data_capacity = - estimate_bytes(&arg1, &StringColumnBuilder::repeat(arg2, 1).build()); - let mut builder = StringColumnBuilder::with_capacity(arg1.len(), data_capacity); - for val in arg1.iter() { - func(val, arg2, ctx, &mut builder); - } - Value::Column(builder.build()) - } - (ValueRef::Column(arg1), ValueRef::Column(arg2)) => { - let data_capacity = estimate_bytes(&arg1, &arg2); - let mut builder = StringColumnBuilder::with_capacity(arg1.len(), data_capacity); - let iter = arg1.iter().zip(arg2.iter()); - for (val1, val2) in iter { - func(val1, val2, ctx, &mut builder); - } - Value::Column(builder.build()) - } - } -} diff --git a/src/query/functions/src/scalars/string_multi_args.rs b/src/query/functions/src/scalars/string_multi_args.rs index ebbae81fbee2..cb5547b9ae7f 100644 --- a/src/query/functions/src/scalars/string_multi_args.rs +++ b/src/query/functions/src/scalars/string_multi_args.rs @@ -19,7 +19,6 @@ use databend_common_expression::passthrough_nullable; use databend_common_expression::types::nullable::NullableColumn; use databend_common_expression::types::number::Int64Type; use databend_common_expression::types::number::NumberScalar; -use databend_common_expression::types::string::StringColumnBuilder; use databend_common_expression::types::string::StringDomain; use databend_common_expression::types::NumberColumn; use databend_common_expression::types::*; @@ -34,6 +33,7 @@ use databend_common_expression::FunctionSignature; use databend_common_expression::Scalar; use databend_common_expression::Value; use databend_common_expression::ValueRef; +use string::StringColumnBuilder; pub fn register(registry: &mut FunctionRegistry) { registry.register_function_factory("concat", |_, args_type| { @@ -114,7 +114,7 @@ pub fn register(registry: &mut FunctionRegistry) { .collect::>(); let size = len.unwrap_or(1); - let mut builder = StringColumnBuilder::with_capacity(size, 0); + let mut builder = StringColumnBuilder::with_capacity(size); match &args[0] { ValueRef::Scalar(sep) => { @@ -430,7 +430,7 @@ fn concat_fn(args: &[ValueRef], _: &mut EvalContext) -> Value .collect::>(); let size = len.unwrap_or(1); - let mut builder = StringColumnBuilder::with_capacity(size, 0); + let mut builder = StringColumnBuilder::with_capacity(size); for idx in 0..size { for arg in &args { builder.put_str(unsafe { arg.index_unchecked(idx) }); @@ -641,7 +641,7 @@ fn regexp_replace_fn(args: &[ValueRef], ctx: &mut EvalContext) -> Value }; let size = len.unwrap_or(1); - let mut builder = StringColumnBuilder::with_capacity(size, 0); + let mut builder = StringColumnBuilder::with_capacity(size); let cached_reg = match (&pat_arg, &mt_arg) { (ValueRef::Scalar(pat), Some(ValueRef::Scalar(mt))) => { @@ -691,8 +691,7 @@ fn regexp_replace_fn(args: &[ValueRef], ctx: &mut EvalContext) -> Value } if source.is_empty() || pat.is_empty() { - builder.put_str(source); - builder.commit_row(); + builder.put_and_commit(source); continue; } @@ -766,7 +765,7 @@ fn regexp_substr_fn(args: &[ValueRef], ctx: &mut EvalContext) -> Value< }; let size = len.unwrap_or(1); - let mut builder = StringColumnBuilder::with_capacity(size, 0); + let mut builder = StringColumnBuilder::with_capacity(size); let mut validity = MutableBitmap::with_capacity(size); for idx in 0..size { let source = unsafe { source_arg.index_unchecked(idx) }; diff --git a/src/query/functions/src/scalars/variant.rs b/src/query/functions/src/scalars/variant.rs index f118929c8014..79e156ed8cb3 100644 --- a/src/query/functions/src/scalars/variant.rs +++ b/src/query/functions/src/scalars/variant.rs @@ -1009,8 +1009,7 @@ pub fn register(registry: &mut FunctionRegistry) { } } let json_str = cast_to_string(val); - output.put_str(&json_str); - output.commit_row(); + output.put_and_commit(json_str); }), ); @@ -1953,7 +1952,7 @@ fn get_by_keypath_fn( let len = len_opt.unwrap_or(1); let mut builder = if string_res { - ColumnBuilder::String(StringColumnBuilder::with_capacity(len, len * 50)) + ColumnBuilder::String(StringColumnBuilder::with_capacity(len)) } else { ColumnBuilder::Variant(BinaryColumnBuilder::with_capacity(len, len * 50)) }; diff --git a/src/query/functions/src/scalars/vector.rs b/src/query/functions/src/scalars/vector.rs index b551f2539bf3..f64032e5ca9a 100644 --- a/src/query/functions/src/scalars/vector.rs +++ b/src/query/functions/src/scalars/vector.rs @@ -188,16 +188,14 @@ pub fn register(registry: &mut FunctionRegistry) { vectorize_with_builder_1_arg::(|data, output, ctx| { if let Some(validity) = &ctx.validity { if !validity.get_bit(output.len()) { - output.put_str(""); - output.commit_row(); + output.put_and_commit(""); return; } } if ctx.func_ctx.openai_api_key.is_empty() { ctx.set_error(output.len(), "openai_api_key is empty".to_string()); - output.put_str(""); - output.commit_row(); + output.put_and_commit(""); return; } let api_base = ctx.func_ctx.openai_api_chat_base_url.clone(); @@ -216,17 +214,16 @@ pub fn register(registry: &mut FunctionRegistry) { let result = openai.completion_text_request(data.to_string()); match result { Ok((resp, _)) => { - output.put_str(&resp); + output.put_and_commit(resp); } Err(e) => { ctx.set_error( output.len(), format!("openai completion request error:{:?}", e), ); - output.put_str(""); + output.put_and_commit(""); } } - output.commit_row(); }), ); } diff --git a/src/query/functions/src/srfs/variant.rs b/src/query/functions/src/srfs/variant.rs index eb4c8db7e3a0..f6d73b140577 100644 --- a/src/query/functions/src/srfs/variant.rs +++ b/src/query/functions/src/srfs/variant.rs @@ -678,13 +678,12 @@ fn unnest_variant_obj( Some(vals) if !vals.is_empty() => { let len = vals.len(); let mut val_builder = BinaryColumnBuilder::with_capacity(0, 0); - let mut key_builder = StringColumnBuilder::with_capacity(0, 0); + let mut key_builder = StringColumnBuilder::with_capacity(0); max_nums_per_row[row] = std::cmp::max(max_nums_per_row[row], len); for (key, val) in vals { - key_builder.put_str(&String::from_utf8_lossy(&key)); - key_builder.commit_row(); + key_builder.put_and_commit(String::from_utf8_lossy(&key)); val_builder.put_slice(&val); val_builder.commit_row(); } @@ -807,8 +806,7 @@ impl FlattenGenerator { key_builder.push_null(); } if let Some(path_builder) = path_builder { - path_builder.put_str(&inner_path); - path_builder.commit_row(); + path_builder.put_and_commit(&inner_path); } if let Some(index_builder) = index_builder { index_builder.push(i.try_into().unwrap()); @@ -867,8 +865,7 @@ impl FlattenGenerator { key_builder.push(name.as_ref()); } if let Some(path_builder) = path_builder { - path_builder.put_str(&inner_path); - path_builder.commit_row(); + path_builder.put_and_commit(&inner_path); } if let Some(index_builder) = index_builder { index_builder.push_null(); @@ -908,7 +905,7 @@ impl FlattenGenerator { None }; let mut path_builder = if params.is_empty() || params.contains(&3) { - Some(StringColumnBuilder::with_capacity(0, 0)) + Some(StringColumnBuilder::with_capacity(0)) } else { None }; diff --git a/src/query/functions/tests/it/aggregates/testdata/agg.txt b/src/query/functions/tests/it/aggregates/testdata/agg.txt index 85300eac60a7..777b5b540fa0 100644 --- a/src/query/functions/tests/it/aggregates/testdata/agg.txt +++ b/src/query/functions/tests/it/aggregates/testdata/agg.txt @@ -941,12 +941,12 @@ evaluation (internal): ast: array_agg('a') evaluation (internal): -+--------+------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+------------------------------------------------------------------------------------------------------+ -| a | Int64([4, 3, 2, 1]) | -| Output | ArrayColumn { values: StringColumn { data: 0x61616161, offsets: [0, 1, 2, 3, 4] }, offsets: [0, 4] } | -+--------+------------------------------------------------------------------------------------------------------+ ++--------+-------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+-------------------------------------------------------------------------------------------+ +| a | Int64([4, 3, 2, 1]) | +| Output | ArrayColumn { values: StringColumn { data: Utf8ViewArray[a, a, a, a] }, offsets: [0, 4] } | ++--------+-------------------------------------------------------------------------------------------+ ast: array_agg(NULL) @@ -1031,42 +1031,42 @@ evaluation (internal): ast: string_agg(s) evaluation (internal): -+--------+------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+------------------------------------------------------------------------------------------------------------------------+ -| s | StringColumn { data: 0x6162636465666f707178797a, offsets: [0, 3, 6, 9, 12] } | -| Output | NullableColumn { column: StringColumn { data: 0x6162636465666f707178797a, offsets: [0, 12] }, validity: [0b_______1] } | -+--------+------------------------------------------------------------------------------------------------------------------------+ ++--------+-------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+-------------------------------------------------------------------------------------------------------+ +| s | StringColumn { data: Utf8ViewArray[abc, def, opq, xyz] } | +| Output | NullableColumn { column: StringColumn { data: Utf8ViewArray[abcdefopqxyz] }, validity: [0b_______1] } | ++--------+-------------------------------------------------------------------------------------------------------+ ast: string_agg(s_null) evaluation (internal): -+--------+--------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+--------------------------------------------------------------------------------------------------------------+ -| s_null | NullableColumn { column: StringColumn { data: 0x616364, offsets: [0, 1, 1, 2, 3] }, validity: [0b____1101] } | -| Output | NullableColumn { column: StringColumn { data: 0x616364, offsets: [0, 3] }, validity: [0b_______1] } | -+--------+--------------------------------------------------------------------------------------------------------------+ ++--------+----------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+----------------------------------------------------------------------------------------------------+ +| s_null | NullableColumn { column: StringColumn { data: Utf8ViewArray[a, , c, d] }, validity: [0b____1101] } | +| Output | NullableColumn { column: StringColumn { data: Utf8ViewArray[acd] }, validity: [0b_______1] } | ++--------+----------------------------------------------------------------------------------------------------+ ast: string_agg(s, '|') evaluation (internal): -+--------+------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+------------------------------------------------------------------------------------------------------------------------------+ -| s | StringColumn { data: 0x6162636465666f707178797a, offsets: [0, 3, 6, 9, 12] } | -| Output | NullableColumn { column: StringColumn { data: 0x6162637c6465667c6f70717c78797a, offsets: [0, 15] }, validity: [0b_______1] } | -+--------+------------------------------------------------------------------------------------------------------------------------------+ ++--------+----------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+----------------------------------------------------------------------------------------------------------+ +| s | StringColumn { data: Utf8ViewArray[abc, def, opq, xyz] } | +| Output | NullableColumn { column: StringColumn { data: Utf8ViewArray[abc|def|opq|xyz] }, validity: [0b_______1] } | ++--------+----------------------------------------------------------------------------------------------------------+ ast: string_agg(s_null, '-') evaluation (internal): -+--------+--------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+--------------------------------------------------------------------------------------------------------------+ -| s_null | NullableColumn { column: StringColumn { data: 0x616364, offsets: [0, 1, 1, 2, 3] }, validity: [0b____1101] } | -| Output | NullableColumn { column: StringColumn { data: 0x612d632d64, offsets: [0, 5] }, validity: [0b_______1] } | -+--------+--------------------------------------------------------------------------------------------------------------+ ++--------+----------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+----------------------------------------------------------------------------------------------------+ +| s_null | NullableColumn { column: StringColumn { data: Utf8ViewArray[a, , c, d] }, validity: [0b____1101] } | +| Output | NullableColumn { column: StringColumn { data: Utf8ViewArray[a-c-d] }, validity: [0b_______1] } | ++--------+----------------------------------------------------------------------------------------------------+ ast: bitmap_and_count(bm) @@ -1317,42 +1317,42 @@ evaluation (internal): ast: histogram(all_null) evaluation (internal): -+----------+-----------------------------------------------------------------------------------------------+ -| Column | Data | -+----------+-----------------------------------------------------------------------------------------------+ -| all_null | NullableColumn { column: UInt64([1, 2, 3, 4]), validity: [0b____0000] } | -| Output | NullableColumn { column: StringColumn { data: 0x, offsets: [0, 0] }, validity: [0b_______0] } | -+----------+-----------------------------------------------------------------------------------------------+ ++----------+-------------------------------------------------------------------------------------------+ +| Column | Data | ++----------+-------------------------------------------------------------------------------------------+ +| all_null | NullableColumn { column: UInt64([1, 2, 3, 4]), validity: [0b____0000] } | +| Output | NullableColumn { column: StringColumn { data: Utf8ViewArray[] }, validity: [0b_______0] } | ++----------+-------------------------------------------------------------------------------------------+ ast: histogram(x_null) evaluation (internal): -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| x_null | NullableColumn { column: UInt64([1, 2, 3, 4]), validity: [0b____0011] } | -| Output | NullableColumn { column: StringColumn { data: 0x5b7b226c6f776572223a2231222c227570706572223a2231222c226e6476223a312c22636f756e74223a312c227072655f73756d223a307d2c7b226c6f776572223a2232222c227570706572223a2232222c226e6476223a312c22636f756e74223a312c227072655f73756d223a317d5d, offsets: [0, 113] }, validity: [0b_______1] } | -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| x_null | NullableColumn { column: UInt64([1, 2, 3, 4]), validity: [0b____0011] } | +| Output | NullableColumn { column: StringColumn { data: Utf8ViewArray[[{"lower":"1","upper":"1","ndv":1,"count":1,"pre_sum":0},{"lower":"2","upper":"2","ndv":1,"count":1,"pre_sum":1}]] }, validity: [0b_______1] } | ++--------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ast: histogram(a) evaluation (internal): -+--------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| a | Int64([4, 3, 2, 1]) | -| Output | NullableColumn { column: StringColumn { data: 0x5b7b226c6f776572223a2231222c227570706572223a2231222c226e6476223a312c22636f756e74223a312c227072655f73756d223a307d2c7b226c6f776572223a2232222c227570706572223a2232222c226e6476223a312c22636f756e74223a312c227072655f73756d223a317d2c7b226c6f776572223a2233222c227570706572223a2233222c226e6476223a312c22636f756e74223a312c227072655f73756d223a327d2c7b226c6f776572223a2234222c227570706572223a2234222c226e6476223a312c22636f756e74223a312c227072655f73756d223a337d5d, offsets: [0, 225] }, validity: [0b_______1] } | -+--------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| a | Int64([4, 3, 2, 1]) | +| Output | NullableColumn { column: StringColumn { data: Utf8ViewArray[[{"lower":"1","upper":"1","ndv":1,"count":1,"pre_sum":0},{"lower":"2","upper":"2","ndv":1,"count":1,"pre_sum":1},{"lower":"3","upper":"3","ndv":1,"count":1,"pre_sum":2},{"lower":"4","upper":"4","ndv":1,"count":1,"pre_sum":3}]] }, validity: [0b_______1] } | ++--------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ast: histogram(a, 1) evaluation (internal): -+--------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| a | Int64([4, 3, 2, 1]) | -| Output | NullableColumn { column: StringColumn { data: 0x5b7b226c6f776572223a2231222c227570706572223a2234222c226e6476223a342c22636f756e74223a342c227072655f73756d223a307d5d, offsets: [0, 57] }, validity: [0b_______1] } | -+--------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+----------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+----------------------------------------------------------------------------------------------------------------------------------------------------+ +| a | Int64([4, 3, 2, 1]) | +| Output | NullableColumn { column: StringColumn { data: Utf8ViewArray[[{"lower":"1","upper":"4","ndv":4,"count":4,"pre_sum":0}]] }, validity: [0b_______1] } | ++--------+----------------------------------------------------------------------------------------------------------------------------------------------------+ ast: json_array_agg(1) @@ -1463,7 +1463,7 @@ evaluation (internal): | Column | Data | +--------+-------------------------------------------------------------------------------------------------------------------------------------------------------------+ | a | Int64([4, 3, 2, 1]) | -| s | StringColumn { data: 0x6162636465666f707178797a, offsets: [0, 3, 6, 9, 12] } | +| s | StringColumn { data: Utf8ViewArray[abc, def, opq, xyz] } | | Output | BinaryColumn { data: 0x4000000410000003100000031000000310000003200000022000000220000002200000026162636465666f707178797a4004400340024001, offsets: [0, 56] } | +--------+-------------------------------------------------------------------------------------------------------------------------------------------------------------+ @@ -1474,7 +1474,7 @@ evaluation (internal): | Column | Data | +--------+-----------------------------------------------------------------------------------------------------------------------+ | b | UInt64([1, 2, 3, 4]) | -| s_null | NullableColumn { column: StringColumn { data: 0x616364, offsets: [0, 1, 1, 2, 3] }, validity: [0b____1101] } | +| s_null | NullableColumn { column: StringColumn { data: Utf8ViewArray[a, , c, d] }, validity: [0b____1101] } | | Output | BinaryColumn { data: 0x40000003100000011000000110000001200000022000000220000002616364500150035004, offsets: [0, 37] } | +--------+-----------------------------------------------------------------------------------------------------------------------+ @@ -1486,7 +1486,7 @@ evaluation (internal): +--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | Column | Data | +--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| s | StringColumn { data: 0x6162636465666f707178797a, offsets: [0, 3, 6, 9, 12] } | +| s | StringColumn { data: Utf8ViewArray[abc, def, opq, xyz] } | | dec | NullableColumn { column: Decimal128([1.10, 2.20, 0.00, 3.30]), validity: [0b____1011] } | | Output | BinaryColumn { data: 0x4000000310000003100000031000000320000009200000092000000961626364656678797a603ff199999999999a60400199999999999a60400a666666666666, offsets: [0, 64] } | +--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ diff --git a/src/query/functions/tests/it/aggregates/testdata/agg_group_by.txt b/src/query/functions/tests/it/aggregates/testdata/agg_group_by.txt index 2a4854afe6c9..1b69c2840d76 100644 --- a/src/query/functions/tests/it/aggregates/testdata/agg_group_by.txt +++ b/src/query/functions/tests/it/aggregates/testdata/agg_group_by.txt @@ -919,12 +919,12 @@ evaluation (internal): ast: array_agg('a') evaluation (internal): -+--------+---------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+---------------------------------------------------------------------------------------------------------+ -| a | Int64([4, 3, 2, 1]) | -| Output | ArrayColumn { values: StringColumn { data: 0x61616161, offsets: [0, 1, 2, 3, 4] }, offsets: [0, 2, 4] } | -+--------+---------------------------------------------------------------------------------------------------------+ ++--------+----------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+----------------------------------------------------------------------------------------------+ +| a | Int64([4, 3, 2, 1]) | +| Output | ArrayColumn { values: StringColumn { data: Utf8ViewArray[a, a, a, a] }, offsets: [0, 2, 4] } | ++--------+----------------------------------------------------------------------------------------------+ ast: array_agg(NULL) @@ -1009,42 +1009,42 @@ evaluation (internal): ast: string_agg(s) evaluation (internal): -+--------+---------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+---------------------------------------------------------------------------------------------------------------------------+ -| s | StringColumn { data: 0x6162636465666f707178797a, offsets: [0, 3, 6, 9, 12] } | -| Output | NullableColumn { column: StringColumn { data: 0x6162636f707164656678797a, offsets: [0, 6, 12] }, validity: [0b______11] } | -+--------+---------------------------------------------------------------------------------------------------------------------------+ ++--------+---------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+---------------------------------------------------------------------------------------------------------+ +| s | StringColumn { data: Utf8ViewArray[abc, def, opq, xyz] } | +| Output | NullableColumn { column: StringColumn { data: Utf8ViewArray[abcopq, defxyz] }, validity: [0b______11] } | ++--------+---------------------------------------------------------------------------------------------------------+ ast: string_agg(s_null) evaluation (internal): -+--------+--------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+--------------------------------------------------------------------------------------------------------------+ -| s_null | NullableColumn { column: StringColumn { data: 0x616364, offsets: [0, 1, 1, 2, 3] }, validity: [0b____1101] } | -| Output | NullableColumn { column: StringColumn { data: 0x616364, offsets: [0, 2, 3] }, validity: [0b______11] } | -+--------+--------------------------------------------------------------------------------------------------------------+ ++--------+----------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+----------------------------------------------------------------------------------------------------+ +| s_null | NullableColumn { column: StringColumn { data: Utf8ViewArray[a, , c, d] }, validity: [0b____1101] } | +| Output | NullableColumn { column: StringColumn { data: Utf8ViewArray[ac, d] }, validity: [0b______11] } | ++--------+----------------------------------------------------------------------------------------------------+ ast: string_agg(s, '|') evaluation (internal): -+--------+-------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+-------------------------------------------------------------------------------------------------------------------------------+ -| s | StringColumn { data: 0x6162636465666f707178797a, offsets: [0, 3, 6, 9, 12] } | -| Output | NullableColumn { column: StringColumn { data: 0x6162637c6f70716465667c78797a, offsets: [0, 7, 14] }, validity: [0b______11] } | -+--------+-------------------------------------------------------------------------------------------------------------------------------+ ++--------+-----------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+-----------------------------------------------------------------------------------------------------------+ +| s | StringColumn { data: Utf8ViewArray[abc, def, opq, xyz] } | +| Output | NullableColumn { column: StringColumn { data: Utf8ViewArray[abc|opq, def|xyz] }, validity: [0b______11] } | ++--------+-----------------------------------------------------------------------------------------------------------+ ast: string_agg(s_null, '-') evaluation (internal): -+--------+--------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+--------------------------------------------------------------------------------------------------------------+ -| s_null | NullableColumn { column: StringColumn { data: 0x616364, offsets: [0, 1, 1, 2, 3] }, validity: [0b____1101] } | -| Output | NullableColumn { column: StringColumn { data: 0x612d6364, offsets: [0, 3, 4] }, validity: [0b______11] } | -+--------+--------------------------------------------------------------------------------------------------------------+ ++--------+----------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+----------------------------------------------------------------------------------------------------+ +| s_null | NullableColumn { column: StringColumn { data: Utf8ViewArray[a, , c, d] }, validity: [0b____1101] } | +| Output | NullableColumn { column: StringColumn { data: Utf8ViewArray[a-c, d] }, validity: [0b______11] } | ++--------+----------------------------------------------------------------------------------------------------+ ast: bitmap_and_count(bm) @@ -1401,7 +1401,7 @@ evaluation (internal): | Column | Data | +--------+-------------------------------------------------------------------------------------------------------------------------------------------------------------+ | a | Int64([4, 3, 2, 1]) | -| s | StringColumn { data: 0x6162636465666f707178797a, offsets: [0, 3, 6, 9, 12] } | +| s | StringColumn { data: Utf8ViewArray[abc, def, opq, xyz] } | | Output | BinaryColumn { data: 0x4000000410000003100000031000000310000003200000022000000220000002200000026162636465666f707178797a4004400340024001, offsets: [0, 56] } | +--------+-------------------------------------------------------------------------------------------------------------------------------------------------------------+ @@ -1412,7 +1412,7 @@ evaluation (internal): | Column | Data | +--------+-----------------------------------------------------------------------------------------------------------------------+ | b | UInt64([1, 2, 3, 4]) | -| s_null | NullableColumn { column: StringColumn { data: 0x616364, offsets: [0, 1, 1, 2, 3] }, validity: [0b____1101] } | +| s_null | NullableColumn { column: StringColumn { data: Utf8ViewArray[a, , c, d] }, validity: [0b____1101] } | | Output | BinaryColumn { data: 0x40000003100000011000000110000001200000022000000220000002616364500150035004, offsets: [0, 37] } | +--------+-----------------------------------------------------------------------------------------------------------------------+ @@ -1424,7 +1424,7 @@ evaluation (internal): +--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | Column | Data | +--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| s | StringColumn { data: 0x6162636465666f707178797a, offsets: [0, 3, 6, 9, 12] } | +| s | StringColumn { data: Utf8ViewArray[abc, def, opq, xyz] } | | dec | NullableColumn { column: Decimal128([1.10, 2.20, 0.00, 3.30]), validity: [0b____1011] } | | Output | BinaryColumn { data: 0x4000000310000003100000031000000320000009200000092000000961626364656678797a603ff199999999999a60400199999999999a60400a666666666666, offsets: [0, 64] } | +--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ diff --git a/src/query/functions/tests/it/scalars/comparison.rs b/src/query/functions/tests/it/scalars/comparison.rs index bb0f85c3ff17..318673980cb0 100644 --- a/src/query/functions/tests/it/scalars/comparison.rs +++ b/src/query/functions/tests/it/scalars/comparison.rs @@ -365,6 +365,7 @@ fn test_like(file: &mut impl Write) { )]; run_ast(file, "lhs like 'a%'", &columns); run_ast(file, "lhs like 'b%'", &columns); + run_ast(file, "lhs like 'ab%'", &columns); run_ast(file, "lhs like 'c'", &columns); let columns = [ diff --git a/src/query/functions/tests/it/scalars/testdata/arithmetic.txt b/src/query/functions/tests/it/scalars/testdata/arithmetic.txt index dedd9bdb5499..281ddc8e843d 100644 --- a/src/query/functions/tests/it/scalars/testdata/arithmetic.txt +++ b/src/query/functions/tests/it/scalars/testdata/arithmetic.txt @@ -1617,12 +1617,12 @@ evaluation: | Row 2 | 3 | '3' | +--------+---------+--------+ evaluation (internal): -+--------+--------------------------------------------------------+ -| Column | Data | -+--------+--------------------------------------------------------+ -| a | Int8([1, 2, 3]) | -| Output | StringColumn { data: 0x313233, offsets: [0, 1, 2, 3] } | -+--------+--------------------------------------------------------+ ++--------+-----------------------------------------------+ +| Column | Data | ++--------+-----------------------------------------------+ +| a | Int8([1, 2, 3]) | +| Output | StringColumn { data: Utf8ViewArray[1, 2, 3] } | ++--------+-----------------------------------------------+ ast : to_string(a2) @@ -1639,12 +1639,12 @@ evaluation: | Row 2 | NULL | NULL | +--------+------------------+-----------------+ evaluation (internal): -+--------+-----------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+-----------------------------------------------------------------------------------------------------------+ -| a2 | NullableColumn { column: UInt8([1, 2, 3]), validity: [0b_____011] } | -| Output | NullableColumn { column: StringColumn { data: 0x313233, offsets: [0, 1, 2, 3] }, validity: [0b_____011] } | -+--------+-----------------------------------------------------------------------------------------------------------+ ++--------+--------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+--------------------------------------------------------------------------------------------------+ +| a2 | NullableColumn { column: UInt8([1, 2, 3]), validity: [0b_____011] } | +| Output | NullableColumn { column: StringColumn { data: Utf8ViewArray[1, 2, 3] }, validity: [0b_____011] } | ++--------+--------------------------------------------------------------------------------------------------+ ast : to_string(b) @@ -1661,12 +1661,12 @@ evaluation: | Row 2 | 6 | '6' | +--------+---------+--------+ evaluation (internal): -+--------+--------------------------------------------------------+ -| Column | Data | -+--------+--------------------------------------------------------+ -| b | Int16([2, 4, 6]) | -| Output | StringColumn { data: 0x323436, offsets: [0, 1, 2, 3] } | -+--------+--------------------------------------------------------+ ++--------+-----------------------------------------------+ +| Column | Data | ++--------+-----------------------------------------------+ +| b | Int16([2, 4, 6]) | +| Output | StringColumn { data: Utf8ViewArray[2, 4, 6] } | ++--------+-----------------------------------------------+ ast : to_string(c) @@ -1683,12 +1683,12 @@ evaluation: | Row 2 | 30 | '30' | +--------+-----------+--------+ evaluation (internal): -+--------+--------------------------------------------------------------+ -| Column | Data | -+--------+--------------------------------------------------------------+ -| c | UInt32([10, 20, 30]) | -| Output | StringColumn { data: 0x313032303330, offsets: [0, 2, 4, 6] } | -+--------+--------------------------------------------------------------+ ++--------+--------------------------------------------------+ +| Column | Data | ++--------+--------------------------------------------------+ +| c | UInt32([10, 20, 30]) | +| Output | StringColumn { data: Utf8ViewArray[10, 20, 30] } | ++--------+--------------------------------------------------+ ast : to_string(d) @@ -1705,12 +1705,12 @@ evaluation: | Row 2 | 30 | '30' | +--------+------------+--------+ evaluation (internal): -+--------+----------------------------------------------------------------+ -| Column | Data | -+--------+----------------------------------------------------------------+ -| d | Float64([10, -20, 30]) | -| Output | StringColumn { data: 0x31302d32303330, offsets: [0, 2, 5, 7] } | -+--------+----------------------------------------------------------------+ ++--------+---------------------------------------------------+ +| Column | Data | ++--------+---------------------------------------------------+ +| d | Float64([10, -20, 30]) | +| Output | StringColumn { data: Utf8ViewArray[10, -20, 30] } | ++--------+---------------------------------------------------+ ast : to_string(d2) @@ -1727,12 +1727,12 @@ evaluation: | Row 2 | 3 | '3' | +--------+------------------+-----------------+ evaluation (internal): -+--------+-----------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+-----------------------------------------------------------------------------------------------------------+ -| d2 | NullableColumn { column: UInt8([1, 0, 3]), validity: [0b_____101] } | -| Output | NullableColumn { column: StringColumn { data: 0x313033, offsets: [0, 1, 2, 3] }, validity: [0b_____101] } | -+--------+-----------------------------------------------------------------------------------------------------------+ ++--------+--------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+--------------------------------------------------------------------------------------------------+ +| d2 | NullableColumn { column: UInt8([1, 0, 3]), validity: [0b_____101] } | +| Output | NullableColumn { column: StringColumn { data: Utf8ViewArray[1, 0, 3] }, validity: [0b_____101] } | ++--------+--------------------------------------------------------------------------------------------------+ ast : to_string(e) @@ -1749,12 +1749,12 @@ evaluation: | Row 2 | 188.8 | '188.8' | +--------+----------------+---------+ evaluation (internal): -+--------+---------------------------------------------------------------------------+ -| Column | Data | -+--------+---------------------------------------------------------------------------+ -| e | Decimal128([3.1, 33.5, 188.8]) | -| Output | StringColumn { data: 0x332e3133332e353138382e38, offsets: [0, 3, 7, 12] } | -+--------+---------------------------------------------------------------------------+ ++--------+--------------------------------------------------------+ +| Column | Data | ++--------+--------------------------------------------------------+ +| e | Decimal128([3.1, 33.5, 188.8]) | +| Output | StringColumn { data: Utf8ViewArray[3.1, 33.5, 188.8] } | ++--------+--------------------------------------------------------+ ast : to_string(f) @@ -1771,12 +1771,12 @@ evaluation: | Row 2 | 12.34 | '12.34' | +--------+----------------+---------+ evaluation (internal): -+--------+-----------------------------------------------------------------------------+ -| Column | Data | -+--------+-----------------------------------------------------------------------------+ -| f | Decimal256([0.50, 0.92, 12.34]) | -| Output | StringColumn { data: 0x302e3530302e393231322e3334, offsets: [0, 4, 8, 13] } | -+--------+-----------------------------------------------------------------------------+ ++--------+---------------------------------------------------------+ +| Column | Data | ++--------+---------------------------------------------------------+ +| f | Decimal256([0.50, 0.92, 12.34]) | +| Output | StringColumn { data: Utf8ViewArray[0.50, 0.92, 12.34] } | ++--------+---------------------------------------------------------+ ast : a ^ 2 diff --git a/src/query/functions/tests/it/scalars/testdata/array.txt b/src/query/functions/tests/it/scalars/testdata/array.txt index 5fe69a9a48ca..024e45bd3476 100644 --- a/src/query/functions/tests/it/scalars/testdata/array.txt +++ b/src/query/functions/tests/it/scalars/testdata/array.txt @@ -473,12 +473,12 @@ evaluation: | Row 3 | '1234' | false | +--------+-------------+---------+ evaluation (internal): -+------------+-------------------------------------------------------------------+ -| Column | Data | -+------------+-------------------------------------------------------------------+ -| string_col | StringColumn { data: 0x31323531323334, offsets: [0, 1, 2, 3, 7] } | -| Output | Boolean([0b____0000]) | -+------------+-------------------------------------------------------------------+ ++------------+-----------------------------------------------------+ +| Column | Data | ++------------+-----------------------------------------------------+ +| string_col | StringColumn { data: Utf8ViewArray[1, 2, 5, 1234] } | +| Output | Boolean([0b____0000]) | ++------------+-----------------------------------------------------+ ast : contains(['1', '5'], string_col) @@ -497,12 +497,12 @@ evaluation: | Row 3 | '1234' | false | +--------+-------------+---------------+ evaluation (internal): -+------------+-------------------------------------------------------------------+ -| Column | Data | -+------------+-------------------------------------------------------------------+ -| string_col | StringColumn { data: 0x31323531323334, offsets: [0, 1, 2, 3, 7] } | -| Output | Boolean([0b____0101]) | -+------------+-------------------------------------------------------------------+ ++------------+-----------------------------------------------------+ +| Column | Data | ++------------+-----------------------------------------------------+ +| string_col | StringColumn { data: Utf8ViewArray[1, 2, 5, 1234] } | +| Output | Boolean([0b____0101]) | ++------------+-----------------------------------------------------+ ast : contains(['15000', '6000', '7000'], string_col) @@ -521,12 +521,12 @@ evaluation: | Row 3 | '1234' | false | +--------+-------------+---------------+ evaluation (internal): -+------------+-------------------------------------------------------------------+ -| Column | Data | -+------------+-------------------------------------------------------------------+ -| string_col | StringColumn { data: 0x31323531323334, offsets: [0, 1, 2, 3, 7] } | -| Output | Boolean([0b____0000]) | -+------------+-------------------------------------------------------------------+ ++------------+-----------------------------------------------------+ +| Column | Data | ++------------+-----------------------------------------------------+ +| string_col | StringColumn { data: Utf8ViewArray[1, 2, 5, 1234] } | +| Output | Boolean([0b____0000]) | ++------------+-----------------------------------------------------+ ast : contains([1,2,null], nullable_col) diff --git a/src/query/functions/tests/it/scalars/testdata/binary.txt b/src/query/functions/tests/it/scalars/testdata/binary.txt index 2977b76ab21d..84a4e2f3e3fc 100644 --- a/src/query/functions/tests/it/scalars/testdata/binary.txt +++ b/src/query/functions/tests/it/scalars/testdata/binary.txt @@ -78,12 +78,12 @@ evaluation: | Row 1 | '123' | 'MTIz' | +--------+-----------------+--------+ evaluation (internal): -+--------+---------------------------------------------------------------+ -| Column | Data | -+--------+---------------------------------------------------------------+ -| a | StringColumn { data: 0x416263313233, offsets: [0, 3, 6] } | -| Output | StringColumn { data: 0x51574a6a4d54497a, offsets: [0, 4, 8] } | -+--------+---------------------------------------------------------------+ ++--------+--------------------------------------------------+ +| Column | Data | ++--------+--------------------------------------------------+ +| a | StringColumn { data: Utf8ViewArray[Abc, 123] } | +| Output | StringColumn { data: Utf8ViewArray[QWJj, MTIz] } | ++--------+--------------------------------------------------+ ast : to_hex(to_binary('abc')) @@ -109,12 +109,12 @@ evaluation: | Row 2 | 'databend' | '6461746162656e64' | +--------+-----------------+--------------------+ evaluation (internal): -+--------+------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+------------------------------------------------------------------------------------------------------------+ -| a | StringColumn { data: 0x6162636465666461746162656e64, offsets: [0, 3, 6, 14] } | -| Output | StringColumn { data: 0x36313632363336343635363636343631373436313632363536653634, offsets: [0, 6, 12, 28] } | -+--------+------------------------------------------------------------------------------------------------------------+ ++--------+------------------------------------------------------------------------+ +| Column | Data | ++--------+------------------------------------------------------------------------+ +| a | StringColumn { data: Utf8ViewArray[abc, def, databend] } | +| Output | StringColumn { data: Utf8ViewArray[616263, 646566, 6461746162656e64] } | ++--------+------------------------------------------------------------------------+ ast : from_base64('QWJj')::String @@ -148,12 +148,12 @@ evaluation: | Row 1 | 'MTIz' | '123' | +--------+-------------------+---------+ evaluation (internal): -+--------+---------------------------------------------------------------+ -| Column | Data | -+--------+---------------------------------------------------------------+ -| a | StringColumn { data: 0x51574a6a4d54497a, offsets: [0, 4, 8] } | -| Output | StringColumn { data: 0x416263313233, offsets: [0, 3, 6] } | -+--------+---------------------------------------------------------------+ ++--------+--------------------------------------------------+ +| Column | Data | ++--------+--------------------------------------------------+ +| a | StringColumn { data: Utf8ViewArray[QWJj, MTIz] } | +| Output | StringColumn { data: Utf8ViewArray[Abc, 123] } | ++--------+--------------------------------------------------+ error: @@ -195,12 +195,12 @@ evaluation: | Row 2 | '6461746162656e64' | 'databend' | +--------+-----------------------+------------+ evaluation (internal): -+--------+------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+------------------------------------------------------------------------------------------------------------+ -| s | StringColumn { data: 0x36313632363336343635363636343631373436313632363536653634, offsets: [0, 6, 12, 28] } | -| Output | StringColumn { data: 0x6162636465666461746162656e64, offsets: [0, 3, 6, 14] } | -+--------+------------------------------------------------------------------------------------------------------------+ ++--------+------------------------------------------------------------------------+ +| Column | Data | ++--------+------------------------------------------------------------------------+ +| s | StringColumn { data: Utf8ViewArray[616263, 646566, 6461746162656e64] } | +| Output | StringColumn { data: Utf8ViewArray[abc, def, databend] } | ++--------+------------------------------------------------------------------------+ ast : TRY_from_base64('QWJj')::String @@ -234,12 +234,12 @@ evaluation: | Row 1 | 'MTIz' | '123' | +--------+-------------------+---------+ evaluation (internal): -+--------+---------------------------------------------------------------+ -| Column | Data | -+--------+---------------------------------------------------------------+ -| a | StringColumn { data: 0x51574a6a4d54497a, offsets: [0, 4, 8] } | -| Output | StringColumn { data: 0x416263313233, offsets: [0, 3, 6] } | -+--------+---------------------------------------------------------------+ ++--------+--------------------------------------------------+ +| Column | Data | ++--------+--------------------------------------------------+ +| a | StringColumn { data: Utf8ViewArray[QWJj, MTIz] } | +| Output | StringColumn { data: Utf8ViewArray[Abc, 123] } | ++--------+--------------------------------------------------+ ast : TRY_from_base64('!@#') @@ -283,11 +283,11 @@ evaluation: | Row 2 | '6461746162656e64' | 'databend' | +--------+-----------------------+------------+ evaluation (internal): -+--------+------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+------------------------------------------------------------------------------------------------------------+ -| s | StringColumn { data: 0x36313632363336343635363636343631373436313632363536653634, offsets: [0, 6, 12, 28] } | -| Output | StringColumn { data: 0x6162636465666461746162656e64, offsets: [0, 3, 6, 14] } | -+--------+------------------------------------------------------------------------------------------------------------+ ++--------+------------------------------------------------------------------------+ +| Column | Data | ++--------+------------------------------------------------------------------------+ +| s | StringColumn { data: Utf8ViewArray[616263, 646566, 6461746162656e64] } | +| Output | StringColumn { data: Utf8ViewArray[abc, def, databend] } | ++--------+------------------------------------------------------------------------+ diff --git a/src/query/functions/tests/it/scalars/testdata/cast.txt b/src/query/functions/tests/it/scalars/testdata/cast.txt index 345cb91d9f2d..4e17bf4bbf6a 100644 --- a/src/query/functions/tests/it/scalars/testdata/cast.txt +++ b/src/query/functions/tests/it/scalars/testdata/cast.txt @@ -820,12 +820,12 @@ evaluation: | Row 4 | '9223372036854775807' | 9223372036854775807 | +--------+--------------------------------+----------------------+ evaluation (internal): -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ -| str | StringColumn { data: 0x2d393232333337323033363835343737353830382d31303139323233333732303336383534373735383037, offsets: [0, 20, 22, 23, 24, 43] } | -| Output | Int64([-9223372036854775808, -1, 0, 1, 9223372036854775807]) | -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+-------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+-------------------------------------------------------------------------------------------+ +| str | StringColumn { data: Utf8ViewArray[-9223372036854775808, -1, 0, 1, 9223372036854775807] } | +| Output | Int64([-9223372036854775808, -1, 0, 1, 9223372036854775807]) | ++--------+-------------------------------------------------------------------------------------------+ error: @@ -852,12 +852,12 @@ evaluation: | Row 4 | 9223372036854775807 | '9223372036854775807' | +--------+----------------------------------------------+------------------------+ evaluation (internal): -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ -| num | Int64([-9223372036854775808, -1, 0, 1, 9223372036854775807]) | -| Output | StringColumn { data: 0x2d393232333337323033363835343737353830382d31303139323233333732303336383534373735383037, offsets: [0, 20, 22, 23, 24, 43] } | -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+-------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+-------------------------------------------------------------------------------------------+ +| num | Int64([-9223372036854775808, -1, 0, 1, 9223372036854775807]) | +| Output | StringColumn { data: Utf8ViewArray[-9223372036854775808, -1, 0, 1, 9223372036854775807] } | ++--------+-------------------------------------------------------------------------------------------+ ast : CAST(num AS STRING) @@ -874,12 +874,12 @@ evaluation: | Row 2 | 18446744073709551615 | '18446744073709551615' | +--------+----------------------------+------------------------+ evaluation (internal): -+--------+-----------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+-----------------------------------------------------------------------------------------------+ -| num | UInt64([0, 1, 18446744073709551615]) | -| Output | StringColumn { data: 0x30313138343436373434303733373039353531363135, offsets: [0, 1, 2, 22] } | -+--------+-----------------------------------------------------------------------------------------------+ ++--------+------------------------------------------------------------------+ +| Column | Data | ++--------+------------------------------------------------------------------+ +| num | UInt64([0, 1, 18446744073709551615]) | +| Output | StringColumn { data: Utf8ViewArray[0, 1, 18446744073709551615] } | ++--------+------------------------------------------------------------------+ error: @@ -963,12 +963,12 @@ evaluation: | Row 1 | true | 'true' | +--------+---------------+---------+ evaluation (internal): -+--------+-----------------------------------------------------------------+ -| Column | Data | -+--------+-----------------------------------------------------------------+ -| bool | Boolean([0b______10]) | -| Output | StringColumn { data: 0x66616c736574727565, offsets: [0, 5, 9] } | -+--------+-----------------------------------------------------------------+ ++--------+---------------------------------------------------+ +| Column | Data | ++--------+---------------------------------------------------+ +| bool | Boolean([0b______10]) | +| Output | StringColumn { data: Utf8ViewArray[false, true] } | ++--------+---------------------------------------------------+ ast : CAST('010.010' AS DECIMAL(5,3)) @@ -1482,12 +1482,12 @@ evaluation: | Row 4 | '2022-01-02T01' | '2022-01-02 01:00:00.000000' | +--------+-----------------------------------------------------+------------------------------+ evaluation (internal): -+--------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| a | StringColumn { data: 0x323032322d30312d3032323032322d30312d30325430333a32353a30322e3836383839342d30373a3030323032322d30312d30322030323a30303a3131323032322d30312d30325430313a31323a30302d30373a3030323032322d30312d3032543031, offsets: [0, 10, 42, 61, 86, 99] } | -| Output | [1641081600000000, 1641119102868894, 1641088811000000, 1641111120000000, 1641085200000000] | -+--------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ +| a | StringColumn { data: Utf8ViewArray[2022-01-02, 2022-01-02T03:25:02.868894-07:00, 2022-01-02 02:00:11, 2022-01-02T01:12:00-07:00, 2022-01-02T01] } | +| Output | [1641081600000000, 1641119102868894, 1641088811000000, 1641111120000000, 1641085200000000] | ++--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ ast : CAST(TO_TIMESTAMP(-315360000000000) AS VARCHAR) @@ -1580,12 +1580,12 @@ evaluation: | Row 6 | '1979-12-30 00:00:00.000000' | '1979-12-30 00:00:00.000000' | +--------+--------------------------------------+------------------------------+ evaluation (internal): -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| a | [-315360000000000, -315360000000, -100, 0, 100, 315360000000, 315360000000000] | -| Output | StringColumn { data: 0x313936302d30312d30342030303a30303a30302e303030303030313936392d31322d32382030383a32343a30302e303030303030313936392d31322d33312032333a35393a35392e393939393030313937302d30312d30312030303a30303a30302e303030303030313937302d30312d30312030303a30303a30302e303030313030313937302d30312d30342031353a33363a30302e303030303030313937392d31322d33302030303a30303a30302e303030303030, offsets: [0, 26, 52, 78, 104, 130, 156, 182] } | -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| a | [-315360000000000, -315360000000, -100, 0, 100, 315360000000, 315360000000000] | +| Output | StringColumn { data: Utf8ViewArray[1960-01-04 00:00:00.000000, 1969-12-28 08:24:00.000000, 1969-12-31 23:59:59.999900, 1970-01-01 00:00:00.000000, 1970-01-01 00:00:00.000100, 1970-01-04 15:36:00.000000, 1979-12-30 00:00:00.000000] } | ++--------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ error: @@ -1682,12 +1682,12 @@ evaluation: | Row 4 | '2022-01-02T01' | '2022-01-02' | +--------+-----------------------------------------------------+--------------+ evaluation (internal): -+--------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| a | StringColumn { data: 0x323032322d30312d3032323032322d30312d30325430333a32353a30322e3836383839342d30373a3030323032322d30312d30322030323a30303a3131323032322d30312d30325430313a31323a30302d30373a3030323032322d30312d3032543031, offsets: [0, 10, 42, 61, 86, 99] } | -| Output | [18994, 18994, 18994, 18994, 18994] | -+--------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ +| a | StringColumn { data: Utf8ViewArray[2022-01-02, 2022-01-02T03:25:02.868894-07:00, 2022-01-02 02:00:11, 2022-01-02T01:12:00-07:00, 2022-01-02T01] } | +| Output | [18994, 18994, 18994, 18994, 18994] | ++--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ ast : CAST(TO_DATE(-354285) AS VARCHAR) @@ -1760,12 +1760,12 @@ evaluation: | Row 4 | '9999-12-31' | '9999-12-31' | +--------+---------------------+--------------+ evaluation (internal): -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| a | [-354285, -100, 0, 100, 2932896] | -| Output | StringColumn { data: 0x313030302d30312d3031313936392d30392d3233313937302d30312d3031313937302d30342d3131393939392d31322d3331, offsets: [0, 10, 20, 30, 40, 50] } | -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+--------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+--------------------------------------------------------------------------------------------------+ +| a | [-354285, -100, 0, 100, 2932896] | +| Output | StringColumn { data: Utf8ViewArray[1000-01-01, 1969-09-23, 1970-01-01, 1970-04-11, 9999-12-31] } | ++--------+--------------------------------------------------------------------------------------------------+ error: @@ -1921,7 +1921,7 @@ evaluation (internal): +--------+------------------------------------------------------------------------------------------------+ | Column | Data | +--------+------------------------------------------------------------------------------------------------+ -| a | StringColumn { data: 0x416263446f6272c3bd2064656ec39ff09f9880e5b1b1, offsets: [0, 3, 13, 22] } | +| a | StringColumn { data: Utf8ViewArray[Abc, Dobrý den, ß😀山] } | | Output | BinaryColumn { data: 0x416263446f6272c3bd2064656ec39ff09f9880e5b1b1, offsets: [0, 3, 13, 22] } | +--------+------------------------------------------------------------------------------------------------+ @@ -1951,7 +1951,7 @@ evaluation (internal): +--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ | Column | Data | +--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ -| a | NullableColumn { column: StringColumn { data: 0x416263446f6272c3bd2064656ec39ff09f9880e5b1b1, offsets: [0, 3, 13, 22] }, validity: [0b_____011] } | +| a | NullableColumn { column: StringColumn { data: Utf8ViewArray[Abc, Dobrý den, ß😀山] }, validity: [0b_____011] } | | Output | NullableColumn { column: BinaryColumn { data: 0x416263446f6272c3bd2064656ec39ff09f9880e5b1b1, offsets: [0, 3, 13, 22] }, validity: [0b_____011] } | +--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ @@ -2014,12 +2014,12 @@ evaluation: | Row 2 | 'ß😀山' | 'ß😀山' | +--------+-------------------+-------------+ evaluation (internal): -+--------+------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+------------------------------------------------------------------------------------------------+ -| a | StringColumn { data: 0x416263446f6272c3bd2064656ec39ff09f9880e5b1b1, offsets: [0, 3, 13, 22] } | -| Output | StringColumn { data: 0x416263446f6272c3bd2064656ec39ff09f9880e5b1b1, offsets: [0, 3, 13, 22] } | -+--------+------------------------------------------------------------------------------------------------+ ++--------+-------------------------------------------------------------+ +| Column | Data | ++--------+-------------------------------------------------------------+ +| a | StringColumn { data: Utf8ViewArray[Abc, Dobrý den, ß😀山] } | +| Output | StringColumn { data: Utf8ViewArray[Abc, Dobrý den, ß😀山] } | ++--------+-------------------------------------------------------------+ error: @@ -2044,12 +2044,12 @@ evaluation: | Row 2 | NULL | NULL | +--------+----------------------------+-------------+ evaluation (internal): -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ -| a | NullableColumn { column: StringColumn { data: 0x416263446f6272c3bd2064656ec39ff09f9880e5b1b1, offsets: [0, 3, 13, 22] }, validity: [0b_____011] } | -| Output | NullableColumn { column: StringColumn { data: 0x416263446f6272c3bd2064656ec39ff09f9880e5b1b1, offsets: [0, 3, 13, 22] }, validity: [0b_____011] } | -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+----------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+----------------------------------------------------------------------------------------------------------------+ +| a | NullableColumn { column: StringColumn { data: Utf8ViewArray[Abc, Dobrý den, ß😀山] }, validity: [0b_____011] } | +| Output | NullableColumn { column: StringColumn { data: Utf8ViewArray[Abc, Dobrý den, ß😀山] }, validity: [0b_____011] } | ++--------+----------------------------------------------------------------------------------------------------------------+ ast : TRY_CAST(0 AS UINT8) @@ -2539,7 +2539,7 @@ evaluation (internal): +--------+------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | Column | Data | +--------+------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| a | NullableColumn { column: StringColumn { data: 0x747275657b226b223a2276227d5b312c322c335d, offsets: [0, 4, 13, 20] }, validity: [0b_____101] } | +| a | NullableColumn { column: StringColumn { data: Utf8ViewArray[true, {"k":"v"}, [1,2,3]] }, validity: [0b_____101] } | | Output | NullableColumn { column: BinaryColumn { data: 0x200000004000000080000003200000022000000220000002500150025003, offsets: [0, 8, 8, 30] }, validity: [0b_____101] } | +--------+------------------------------------------------------------------------------------------------------------------------------------------------------------------+ @@ -3016,12 +3016,12 @@ evaluation: | Row 4 | '9223372036854775807' | 9223372036854775807 | +--------+--------------------------------+-------------------------------------------------------+ evaluation (internal): -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ -| str | StringColumn { data: 0x2d393232333337323033363835343737353830382d31303139323233333732303336383534373735383037, offsets: [0, 20, 22, 23, 24, 43] } | -| Output | NullableColumn { column: Int64([-9223372036854775808, -1, 0, 1, 9223372036854775807]), validity: [0b___11111] } | -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+-----------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+-----------------------------------------------------------------------------------------------------------------+ +| str | StringColumn { data: Utf8ViewArray[-9223372036854775808, -1, 0, 1, 9223372036854775807] } | +| Output | NullableColumn { column: Int64([-9223372036854775808, -1, 0, 1, 9223372036854775807]), validity: [0b___11111] } | ++--------+-----------------------------------------------------------------------------------------------------------------+ ast : TRY_CAST(str AS INT64) @@ -3039,12 +3039,12 @@ evaluation: | Row 3 | NULL | NULL | +--------+------------------------+-------------------------------------------------------+ evaluation (internal): -+--------+------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+------------------------------------------------------------------------------------------------------------------------+ -| str | NullableColumn { column: StringColumn { data: 0x666f6f666f6f3030, offsets: [0, 3, 6, 7, 8] }, validity: [0b____0101] } | -| Output | NullableColumn { column: Int64([0, 0, 0, 0]), validity: [0b____0100] } | -+--------+------------------------------------------------------------------------------------------------------------------------+ ++--------+---------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+---------------------------------------------------------------------------------------------------------+ +| str | NullableColumn { column: StringColumn { data: Utf8ViewArray[foo, foo, 0, 0] }, validity: [0b____0101] } | +| Output | NullableColumn { column: Int64([0, 0, 0, 0]), validity: [0b____0100] } | ++--------+---------------------------------------------------------------------------------------------------------+ ast : TRY_CAST(num AS STRING) @@ -3063,12 +3063,12 @@ evaluation: | Row 4 | 9223372036854775807 | '9223372036854775807' | +--------+----------------------------------------------+------------------------+ evaluation (internal): -+--------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| num | Int64([-9223372036854775808, -1, 0, 1, 9223372036854775807]) | -| Output | NullableColumn { column: StringColumn { data: 0x2d393232333337323033363835343737353830382d31303139323233333732303336383534373735383037, offsets: [0, 20, 22, 23, 24, 43] }, validity: [0b___11111] } | -+--------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+----------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+----------------------------------------------------------------------------------------------------------------------------------------------+ +| num | Int64([-9223372036854775808, -1, 0, 1, 9223372036854775807]) | +| Output | NullableColumn { column: StringColumn { data: Utf8ViewArray[-9223372036854775808, -1, 0, 1, 9223372036854775807] }, validity: [0b___11111] } | ++--------+----------------------------------------------------------------------------------------------------------------------------------------------+ ast : TRY_CAST(num AS STRING) @@ -3085,12 +3085,12 @@ evaluation: | Row 2 | 18446744073709551615 | '18446744073709551615' | +--------+----------------------------+------------------------+ evaluation (internal): -+--------+--------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+--------------------------------------------------------------------------------------------------------------------------------------------------+ -| num | UInt64([0, 1, 18446744073709551615]) | -| Output | NullableColumn { column: StringColumn { data: 0x30313138343436373434303733373039353531363135, offsets: [0, 1, 2, 22] }, validity: [0b_____111] } | -+--------+--------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+---------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+---------------------------------------------------------------------------------------------------------------------+ +| num | UInt64([0, 1, 18446744073709551615]) | +| Output | NullableColumn { column: StringColumn { data: Utf8ViewArray[0, 1, 18446744073709551615] }, validity: [0b_____111] } | ++--------+---------------------------------------------------------------------------------------------------------------------+ ast : TRY_CAST('t' AS BOOLEAN) @@ -3178,12 +3178,12 @@ evaluation: | Row 1 | true | 'true' | +--------+---------------+-----------------+ evaluation (internal): -+--------+--------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+--------------------------------------------------------------------------------------------------------------------+ -| bool | Boolean([0b______10]) | -| Output | NullableColumn { column: StringColumn { data: 0x66616c736574727565, offsets: [0, 5, 9] }, validity: [0b______11] } | -+--------+--------------------------------------------------------------------------------------------------------------------+ ++--------+------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+------------------------------------------------------------------------------------------------------+ +| bool | Boolean([0b______10]) | +| Output | NullableColumn { column: StringColumn { data: Utf8ViewArray[false, true] }, validity: [0b______11] } | ++--------+------------------------------------------------------------------------------------------------------+ ast : TRY_CAST('010.010' AS DECIMAL(5,3)) @@ -3702,12 +3702,12 @@ evaluation: | Row 4 | '2022-01-02T01' | '2022-01-02 01:00:00.000000' | +--------+-----------------------------------------------------+----------------------------------------------------+ evaluation (internal): -+--------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| a | StringColumn { data: 0x323032322d30312d3032323032322d30312d30325430333a32353a30322e3836383839342d30373a3030323032322d30312d30322030323a30303a3131323032322d30312d30325430313a31323a30302d30373a3030323032322d30312d3032543031, offsets: [0, 10, 42, 61, 86, 99] } | -| Output | NullableColumn { column: [1641081600000000, 1641119102868894, 1641088811000000, 1641111120000000, 1641085200000000], validity: [0b___11111] } | -+--------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ +| a | StringColumn { data: Utf8ViewArray[2022-01-02, 2022-01-02T03:25:02.868894-07:00, 2022-01-02 02:00:11, 2022-01-02T01:12:00-07:00, 2022-01-02T01] } | +| Output | NullableColumn { column: [1641081600000000, 1641119102868894, 1641088811000000, 1641111120000000, 1641085200000000], validity: [0b___11111] } | ++--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ ast : TRY_CAST(TO_TIMESTAMP(-315360000000000) AS VARCHAR) @@ -3800,12 +3800,12 @@ evaluation: | Row 6 | '1979-12-30 00:00:00.000000' | '1979-12-30 00:00:00.000000' | +--------+--------------------------------------+------------------------------+ evaluation (internal): -+--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| a | [-315360000000000, -315360000000, -100, 0, 100, 315360000000, 315360000000000] | -| Output | NullableColumn { column: StringColumn { data: 0x313936302d30312d30342030303a30303a30302e303030303030313936392d31322d32382030383a32343a30302e303030303030313936392d31322d33312032333a35393a35392e393939393030313937302d30312d30312030303a30303a30302e303030303030313937302d30312d30312030303a30303a30302e303030313030313937302d30312d30342031353a33363a30302e303030303030313937392d31322d33302030303a30303a30302e303030303030, offsets: [0, 26, 52, 78, 104, 130, 156, 182] }, validity: [0b_1111111] } | -+--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| a | [-315360000000000, -315360000000, -100, 0, 100, 315360000000, 315360000000000] | +| Output | NullableColumn { column: StringColumn { data: Utf8ViewArray[1960-01-04 00:00:00.000000, 1969-12-28 08:24:00.000000, 1969-12-31 23:59:59.999900, 1970-01-01 00:00:00.000000, 1970-01-01 00:00:00.000100, 1970-01-04 15:36:00.000000, 1979-12-30 00:00:00.000000] }, validity: [0b_1111111] } | ++--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ast : TRY_TO_DATE('2022') @@ -3905,12 +3905,12 @@ evaluation: | Row 4 | '2022-01-02T01' | '2022-01-02' | +--------+-----------------------------------------------------+------------------------------+ evaluation (internal): -+--------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| a | StringColumn { data: 0x323032322d30312d3032323032322d30312d30325430333a32353a30322e3836383839342d30373a3030323032322d30312d30322030323a30303a3131323032322d30312d30325430313a31323a30302d30373a3030323032322d30312d3032543031, offsets: [0, 10, 42, 61, 86, 99] } | -| Output | NullableColumn { column: [18994, 18994, 18994, 18994, 18994], validity: [0b___11111] } | -+--------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ +| a | StringColumn { data: Utf8ViewArray[2022-01-02, 2022-01-02T03:25:02.868894-07:00, 2022-01-02 02:00:11, 2022-01-02T01:12:00-07:00, 2022-01-02T01] } | +| Output | NullableColumn { column: [18994, 18994, 18994, 18994, 18994], validity: [0b___11111] } | ++--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ ast : TRY_CAST(TO_DATE(-354285) AS VARCHAR) @@ -3983,12 +3983,12 @@ evaluation: | Row 4 | '9999-12-31' | '9999-12-31' | +--------+---------------------+--------------+ evaluation (internal): -+--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| a | [-354285, -100, 0, 100, 2932896] | -| Output | NullableColumn { column: StringColumn { data: 0x313030302d30312d3031313936392d30392d3233313937302d30312d3031313937302d30342d3131393939392d31322d3331, offsets: [0, 10, 20, 30, 40, 50] }, validity: [0b___11111] } | -+--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+-----------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+-----------------------------------------------------------------------------------------------------------------------------------------------------+ +| a | [-354285, -100, 0, 100, 2932896] | +| Output | NullableColumn { column: StringColumn { data: Utf8ViewArray[1000-01-01, 1969-09-23, 1970-01-01, 1970-04-11, 9999-12-31] }, validity: [0b___11111] } | ++--------+-----------------------------------------------------------------------------------------------------------------------------------------------------+ error: @@ -4151,7 +4151,7 @@ evaluation (internal): +--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ | Column | Data | +--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ -| a | StringColumn { data: 0x416263446f6272c3bd2064656ec39ff09f9880e5b1b1, offsets: [0, 3, 13, 22] } | +| a | StringColumn { data: Utf8ViewArray[Abc, Dobrý den, ß😀山] } | | Output | NullableColumn { column: BinaryColumn { data: 0x416263446f6272c3bd2064656ec39ff09f9880e5b1b1, offsets: [0, 3, 13, 22] }, validity: [0b_____111] } | +--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ @@ -4173,7 +4173,7 @@ evaluation (internal): +--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ | Column | Data | +--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ -| a | NullableColumn { column: StringColumn { data: 0x416263446f6272c3bd2064656ec39ff09f9880e5b1b1, offsets: [0, 3, 13, 22] }, validity: [0b_____011] } | +| a | NullableColumn { column: StringColumn { data: Utf8ViewArray[Abc, Dobrý den, ß😀山] }, validity: [0b_____011] } | | Output | NullableColumn { column: BinaryColumn { data: 0x416263446f6272c3bd2064656ec39ff09f9880e5b1b1, offsets: [0, 3, 13, 22] }, validity: [0b_____011] } | +--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ @@ -4195,7 +4195,7 @@ evaluation (internal): +--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ | Column | Data | +--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ -| a | NullableColumn { column: StringColumn { data: 0x416263446f6272c3bd2064656ec39ff09f9880e5b1b1, offsets: [0, 3, 13, 22] }, validity: [0b_____011] } | +| a | NullableColumn { column: StringColumn { data: Utf8ViewArray[Abc, Dobrý den, ß😀山] }, validity: [0b_____011] } | | Output | NullableColumn { column: BinaryColumn { data: 0x416263446f6272c3bd2064656ec39ff09f9880e5b1b1, offsets: [0, 3, 13, 22] }, validity: [0b_____011] } | +--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ @@ -4259,12 +4259,12 @@ evaluation: | Row 2 | 'ß😀山' | 'ß😀山' | +--------+-------------------+-----------------+ evaluation (internal): -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ -| a | StringColumn { data: 0x416263446f6272c3bd2064656ec39ff09f9880e5b1b1, offsets: [0, 3, 13, 22] } | -| Output | NullableColumn { column: StringColumn { data: 0x416263446f6272c3bd2064656ec39ff09f9880e5b1b1, offsets: [0, 3, 13, 22] }, validity: [0b_____111] } | -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+----------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+----------------------------------------------------------------------------------------------------------------+ +| a | StringColumn { data: Utf8ViewArray[Abc, Dobrý den, ß😀山] } | +| Output | NullableColumn { column: StringColumn { data: Utf8ViewArray[Abc, Dobrý den, ß😀山] }, validity: [0b_____111] } | ++--------+----------------------------------------------------------------------------------------------------------------+ ast : TRY_CAST(TRY_CAST(a AS BINARY) AS STRING) @@ -4281,12 +4281,12 @@ evaluation: | Row 2 | NULL | NULL | +--------+----------------------------+-----------------+ evaluation (internal): -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ -| a | NullableColumn { column: StringColumn { data: 0x416263446f6272c3bd2064656ec39ff09f9880e5b1b1, offsets: [0, 3, 13, 22] }, validity: [0b_____011] } | -| Output | NullableColumn { column: StringColumn { data: 0x416263446f6272c3bd2064656ec39ff09f9880e5b1b1, offsets: [0, 3, 13, 22] }, validity: [0b_____011] } | -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+----------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+----------------------------------------------------------------------------------------------------------------+ +| a | NullableColumn { column: StringColumn { data: Utf8ViewArray[Abc, Dobrý den, ß😀山] }, validity: [0b_____011] } | +| Output | NullableColumn { column: StringColumn { data: Utf8ViewArray[Abc, Dobrý den, ß😀山] }, validity: [0b_____011] } | ++--------+----------------------------------------------------------------------------------------------------------------+ ast : TRY_CAST(TRY_CAST(a AS BINARY NULL) AS STRING NULL) @@ -4303,11 +4303,11 @@ evaluation: | Row 2 | NULL | NULL | +--------+----------------------------+-----------------+ evaluation (internal): -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ -| a | NullableColumn { column: StringColumn { data: 0x416263446f6272c3bd2064656ec39ff09f9880e5b1b1, offsets: [0, 3, 13, 22] }, validity: [0b_____011] } | -| Output | NullableColumn { column: StringColumn { data: 0x416263446f6272c3bd2064656ec39ff09f9880e5b1b1, offsets: [0, 3, 13, 22] }, validity: [0b_____011] } | -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+----------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+----------------------------------------------------------------------------------------------------------------+ +| a | NullableColumn { column: StringColumn { data: Utf8ViewArray[Abc, Dobrý den, ß😀山] }, validity: [0b_____011] } | +| Output | NullableColumn { column: StringColumn { data: Utf8ViewArray[Abc, Dobrý den, ß😀山] }, validity: [0b_____011] } | ++--------+----------------------------------------------------------------------------------------------------------------+ diff --git a/src/query/functions/tests/it/scalars/testdata/comparison.txt b/src/query/functions/tests/it/scalars/testdata/comparison.txt index cc5cf9b5d7fc..7dd3ce500bd5 100644 --- a/src/query/functions/tests/it/scalars/testdata/comparison.txt +++ b/src/query/functions/tests/it/scalars/testdata/comparison.txt @@ -223,13 +223,13 @@ evaluation: | Row 6 | '[1,2,3,["a","b","c"]]' | '[1,2,3,["a","b","c"]]' | true | +--------+------------------------------------------+------------------------------------------+---------+ evaluation (internal): -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| lhs | StringColumn { data: 0x6e756c6c74727565393232333337323033363835343737353830372d3332373638313233342e353637387b226b223a2276222c2261223a2262227d5b312c322c332c5b2261222c2262222c2263225d5d, offsets: [0, 4, 8, 27, 33, 42, 59, 80] } | -| rhs | StringColumn { data: 0x6e756c6c74727565393232333337323033363835343737353830372d3332373638313233342e353637387b226b223a2276222c2261223a2264227d5b312c322c332c5b2261222c2262222c2263225d5d, offsets: [0, 4, 8, 27, 33, 42, 59, 80] } | -| Output | Boolean([0b_1011111]) | -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+------------------------------------------------------------------------------------------------------------------------------------+ +| lhs | StringColumn { data: Utf8ViewArray[null, true, 9223372036854775807, -32768, 1234.5678, {"k":"v","a":"b"}, [1,2,3,["a","b","c"]]] } | +| rhs | StringColumn { data: Utf8ViewArray[null, true, 9223372036854775807, -32768, 1234.5678, {"k":"v","a":"d"}, [1,2,3,["a","b","c"]]] } | +| Output | Boolean([0b_1011111]) | ++--------+------------------------------------------------------------------------------------------------------------------------------------+ ast : lhs = rhs @@ -250,13 +250,13 @@ evaluation: | Row 6 | '[1,2,3,["a","b","c"]]' | '[1,2,3,["a","b","c"]]' | true | +--------+------------------------------------------+------------------------------------------+---------------+ evaluation (internal): -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| lhs | StringColumn { data: 0x6e756c6c74727565393232333337323033363835343737353830372d3332373638313233342e353637387b226b223a2276222c2261223a2262227d5b312c322c332c5b2261222c2262222c2263225d5d, offsets: [0, 4, 8, 27, 33, 42, 59, 80] } | -| rhs | StringColumn { data: 0x6e756c6c74727565393232333337323033363835343737353830372d3332373638313233342e353637387b226b223a2276222c2261223a2264227d5b312c322c332c5b2261222c2262222c2263225d5d, offsets: [0, 4, 8, 27, 33, 42, 59, 80] } | -| Output | Boolean([0b_1011111]) | -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+------------------------------------------------------------------------------------------------------------------------------------+ +| lhs | StringColumn { data: Utf8ViewArray[null, true, 9223372036854775807, -32768, 1234.5678, {"k":"v","a":"b"}, [1,2,3,["a","b","c"]]] } | +| rhs | StringColumn { data: Utf8ViewArray[null, true, 9223372036854775807, -32768, 1234.5678, {"k":"v","a":"d"}, [1,2,3,["a","b","c"]]] } | +| Output | Boolean([0b_1011111]) | ++--------+------------------------------------------------------------------------------------------------------------------------------------+ ast : '1'!='2' @@ -405,13 +405,13 @@ evaluation: | Row 3 | '[1,2,3,["a","b","c"]]' | '[1,2,3,["a","b","c"]]' | false | +--------+----------------------------------+----------------------------------+---------+ evaluation (internal): -+--------+-------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+-------------------------------------------------------------------------------------------------------------------------------------------------------+ -| lhs | StringColumn { data: 0x6e756c6c74727565393232333337323033363835343737353830375b312c322c332c5b2261222c2262222c2263225d5d, offsets: [0, 4, 8, 27, 48] } | -| rhs | StringColumn { data: 0x6e756c6c74727565393232333337323033363835343737353830375b312c322c332c5b2261222c2262222c2263225d5d, offsets: [0, 4, 8, 27, 48] } | -| Output | Boolean([0b____0000]) | -+--------+-------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+----------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+----------------------------------------------------------------------------------------------+ +| lhs | StringColumn { data: Utf8ViewArray[null, true, 9223372036854775807, [1,2,3,["a","b","c"]]] } | +| rhs | StringColumn { data: Utf8ViewArray[null, true, 9223372036854775807, [1,2,3,["a","b","c"]]] } | +| Output | Boolean([0b____0000]) | ++--------+----------------------------------------------------------------------------------------------+ ast : lhs != rhs @@ -429,13 +429,13 @@ evaluation: | Row 3 | '[1,2,3,["a","b","c"]]' | '[1,2,3,["a","b","c"]]' | false | +--------+----------------------------------+----------------------------------+---------------+ evaluation (internal): -+--------+-------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+-------------------------------------------------------------------------------------------------------------------------------------------------------+ -| lhs | StringColumn { data: 0x6e756c6c74727565393232333337323033363835343737353830375b312c322c332c5b2261222c2262222c2263225d5d, offsets: [0, 4, 8, 27, 48] } | -| rhs | StringColumn { data: 0x6e756c6c74727565393232333337323033363835343737353830375b312c322c332c5b2261222c2262222c2263225d5d, offsets: [0, 4, 8, 27, 48] } | -| Output | Boolean([0b____0000]) | -+--------+-------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+----------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+----------------------------------------------------------------------------------------------+ +| lhs | StringColumn { data: Utf8ViewArray[null, true, 9223372036854775807, [1,2,3,["a","b","c"]]] } | +| rhs | StringColumn { data: Utf8ViewArray[null, true, 9223372036854775807, [1,2,3,["a","b","c"]]] } | +| Output | Boolean([0b____0000]) | ++--------+----------------------------------------------------------------------------------------------+ ast : '1'<'2' @@ -573,13 +573,13 @@ evaluation: | Row 6 | '[1,2,3,["a","b","c"]]' | '[0,2,3,["a","b","c"]]' | true | +--------+-------------------------+-------------------------+---------+ evaluation (internal): -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| lhs | StringColumn { data: 0x6e756c6c74727565393232333337323033363835343737353830372d3332373638313233342e35363738312e39313265325b312c322c332c5b2261222c2262222c2263225d5d, offsets: [0, 4, 8, 27, 33, 42, 49, 70] } | -| rhs | StringColumn { data: 0x6e756c6c74727565393232333337323033363835343737353830302d3333373638313233342e35363738312e39313265325b302c322c332c5b2261222c2262222c2263225d5d, offsets: [0, 4, 8, 27, 33, 42, 49, 70] } | -| Output | Boolean([0b_1111111]) | -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+--------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+--------------------------------------------------------------------------------------------------------------------------+ +| lhs | StringColumn { data: Utf8ViewArray[null, true, 9223372036854775807, -32768, 1234.5678, 1.912e2, [1,2,3,["a","b","c"]]] } | +| rhs | StringColumn { data: Utf8ViewArray[null, true, 9223372036854775800, -33768, 1234.5678, 1.912e2, [0,2,3,["a","b","c"]]] } | +| Output | Boolean([0b_1111111]) | ++--------+--------------------------------------------------------------------------------------------------------------------------+ ast : lhs < rhs @@ -600,13 +600,13 @@ evaluation: | Row 6 | '[1,2,3,["a","b","c"]]' | '[0,2,3,["a","b","c"]]' | false | +--------+-------------------------+-------------------------+---------------+ evaluation (internal): -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| lhs | StringColumn { data: 0x6e756c6c74727565393232333337323033363835343737353830372d3332373638313233342e35363738312e39313265325b312c322c332c5b2261222c2262222c2263225d5d, offsets: [0, 4, 8, 27, 33, 42, 49, 70] } | -| rhs | StringColumn { data: 0x6e756c6c74727565393232333337323033363835343737353830302d3333373638313233342e35363738312e39313265325b302c322c332c5b2261222c2262222c2263225d5d, offsets: [0, 4, 8, 27, 33, 42, 49, 70] } | -| Output | Boolean([0b_0001000]) | -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+--------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+--------------------------------------------------------------------------------------------------------------------------+ +| lhs | StringColumn { data: Utf8ViewArray[null, true, 9223372036854775807, -32768, 1234.5678, 1.912e2, [1,2,3,["a","b","c"]]] } | +| rhs | StringColumn { data: Utf8ViewArray[null, true, 9223372036854775800, -33768, 1234.5678, 1.912e2, [0,2,3,["a","b","c"]]] } | +| Output | Boolean([0b_0001000]) | ++--------+--------------------------------------------------------------------------------------------------------------------------+ ast : '5'<='2' @@ -749,13 +749,13 @@ evaluation: | Row 2 | '[1,2,3,["a","b","c"]]' | '[0,2,3,["a","b","c"]]' | false | +--------+------------------------------------------------+------------------------------------------------+---------+ evaluation (internal): -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------+ -| lhs | StringColumn { data: 0x226461746162656e64227b226b223a2276222c2261223a2262227d5b312c322c332c5b2261222c2262222c2263225d5d, offsets: [0, 10, 27, 48] } | -| rhs | StringColumn { data: 0x226461746162656e64227b226b223a2261222c2261223a2264227d5b302c322c332c5b2261222c2262222c2263225d5d, offsets: [0, 10, 27, 48] } | -| Output | Boolean([0b_____011]) | -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+--------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+--------------------------------------------------------------------------------------------+ +| lhs | StringColumn { data: Utf8ViewArray["databend", {"k":"v","a":"b"}, [1,2,3,["a","b","c"]]] } | +| rhs | StringColumn { data: Utf8ViewArray["databend", {"k":"a","a":"d"}, [0,2,3,["a","b","c"]]] } | +| Output | Boolean([0b_____011]) | ++--------+--------------------------------------------------------------------------------------------+ ast : lhs <= rhs @@ -772,13 +772,13 @@ evaluation: | Row 2 | '[1,2,3,["a","b","c"]]' | '[0,2,3,["a","b","c"]]' | false | +--------+------------------------------------------------+------------------------------------------------+---------------+ evaluation (internal): -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------+ -| lhs | StringColumn { data: 0x226461746162656e64227b226b223a2276222c2261223a2262227d5b312c322c332c5b2261222c2262222c2263225d5d, offsets: [0, 10, 27, 48] } | -| rhs | StringColumn { data: 0x226461746162656e64227b226b223a2261222c2261223a2264227d5b302c322c332c5b2261222c2262222c2263225d5d, offsets: [0, 10, 27, 48] } | -| Output | Boolean([0b_____001]) | -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+--------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+--------------------------------------------------------------------------------------------+ +| lhs | StringColumn { data: Utf8ViewArray["databend", {"k":"v","a":"b"}, [1,2,3,["a","b","c"]]] } | +| rhs | StringColumn { data: Utf8ViewArray["databend", {"k":"a","a":"d"}, [0,2,3,["a","b","c"]]] } | +| Output | Boolean([0b_____001]) | ++--------+--------------------------------------------------------------------------------------------+ ast : '3'>'2' @@ -923,13 +923,13 @@ evaluation: | Row 4 | '1234.5678' | '1234.5678' | false | +--------+-----------------------+-----------------------+---------+ evaluation (internal): -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------+ -| lhs | StringColumn { data: 0x6e756c6c74727565393232333337323033363835343737353830372d3332373638313233342e35363738, offsets: [0, 4, 8, 27, 33, 42] } | -| rhs | StringColumn { data: 0x6e756c6c74727565393232333337323033363835343737353830362d3332373638313233342e35363738, offsets: [0, 4, 8, 27, 33, 42] } | -| Output | Boolean([0b___00100]) | -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+------------------------------------------------------------------------------------------+ +| lhs | StringColumn { data: Utf8ViewArray[null, true, 9223372036854775807, -32768, 1234.5678] } | +| rhs | StringColumn { data: Utf8ViewArray[null, true, 9223372036854775806, -32768, 1234.5678] } | +| Output | Boolean([0b___00100]) | ++--------+------------------------------------------------------------------------------------------+ ast : lhs > rhs @@ -948,13 +948,13 @@ evaluation: | Row 4 | '1234.5678' | '1234.5678' | false | +--------+-----------------------+-----------------------+---------------+ evaluation (internal): -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------+ -| lhs | StringColumn { data: 0x6e756c6c74727565393232333337323033363835343737353830372d3332373638313233342e35363738, offsets: [0, 4, 8, 27, 33, 42] } | -| rhs | StringColumn { data: 0x6e756c6c74727565393232333337323033363835343737353830362d3332373638313233342e35363738, offsets: [0, 4, 8, 27, 33, 42] } | -| Output | Boolean([0b___00100]) | -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+------------------------------------------------------------------------------------------+ +| lhs | StringColumn { data: Utf8ViewArray[null, true, 9223372036854775807, -32768, 1234.5678] } | +| rhs | StringColumn { data: Utf8ViewArray[null, true, 9223372036854775806, -32768, 1234.5678] } | +| Output | Boolean([0b___00100]) | ++--------+------------------------------------------------------------------------------------------+ ast : col > 'efg' @@ -971,12 +971,12 @@ evaluation: | Row 1 | 'efg' | false | +--------+-----------------+---------+ evaluation (internal): -+--------+-----------------------------------------------------------+ -| Column | Data | -+--------+-----------------------------------------------------------+ -| col | StringColumn { data: 0x626364656667, offsets: [0, 3, 6] } | -| Output | Boolean([0b______00]) | -+--------+-----------------------------------------------------------+ ++--------+------------------------------------------------+ +| Column | Data | ++--------+------------------------------------------------+ +| col | StringColumn { data: Utf8ViewArray[bcd, efg] } | +| Output | Boolean([0b______00]) | ++--------+------------------------------------------------+ ast : '2'>='1' @@ -1123,13 +1123,13 @@ evaluation: | Row 6 | '[1,2,3,["a","b","d"]]' | '[1,2,3,["a","b","c"]]' | true | +--------+-----------------------------------------------------------+-----------------------------------------------------------+---------+ evaluation (internal): -+--------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| lhs | StringColumn { data: 0x393232333337323033363835343737353830372d3332373638313233342e35363738312e3931326532225c5c5c226162635c5c5c22227b226b223a2276222c2261223a2262227d5b312c322c332c5b2261222c2262222c2264225d5d, offsets: [0, 19, 25, 34, 41, 54, 71, 92] } | -| rhs | StringColumn { data: 0x393232333337323033363835343737353830362d3332373638313233342e35363738312e3931326532225c5c5c226162635c5c5c22227b226b223a2276222c2261223a2264227d5b312c322c332c5b2261222c2262222c2263225d5d, offsets: [0, 19, 25, 34, 41, 54, 71, 92] } | -| Output | Boolean([0b_1011111]) | -+--------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+------------------------------------------------------------------------------------------------------------------------------------------------+ +| lhs | StringColumn { data: Utf8ViewArray[9223372036854775807, -32768, 1234.5678, 1.912e2, "\\\"abc\\\"", {"k":"v","a":"b"}, [1,2,3,["a","b","d"]]] } | +| rhs | StringColumn { data: Utf8ViewArray[9223372036854775806, -32768, 1234.5678, 1.912e2, "\\\"abc\\\"", {"k":"v","a":"d"}, [1,2,3,["a","b","c"]]] } | +| Output | Boolean([0b_1011111]) | ++--------+------------------------------------------------------------------------------------------------------------------------------------------------+ ast : lhs >= rhs @@ -1150,13 +1150,13 @@ evaluation: | Row 6 | '[1,2,3,["a","b","d"]]' | '[1,2,3,["a","b","c"]]' | true | +--------+-----------------------------------------------------------+-----------------------------------------------------------+---------------+ evaluation (internal): -+--------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| lhs | StringColumn { data: 0x393232333337323033363835343737353830372d3332373638313233342e35363738312e3931326532225c5c5c226162635c5c5c22227b226b223a2276222c2261223a2262227d5b312c322c332c5b2261222c2262222c2264225d5d, offsets: [0, 19, 25, 34, 41, 54, 71, 92] } | -| rhs | StringColumn { data: 0x393232333337323033363835343737353830362d3332373638313233342e35363738312e3931326532225c5c5c226162635c5c5c22227b226b223a2276222c2261223a2264227d5b312c322c332c5b2261222c2262222c2263225d5d, offsets: [0, 19, 25, 34, 41, 54, 71, 92] } | -| Output | Boolean([0b_1011111]) | -+--------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+------------------------------------------------------------------------------------------------------------------------------------------------+ +| lhs | StringColumn { data: Utf8ViewArray[9223372036854775807, -32768, 1234.5678, 1.912e2, "\\\"abc\\\"", {"k":"v","a":"b"}, [1,2,3,["a","b","d"]]] } | +| rhs | StringColumn { data: Utf8ViewArray[9223372036854775806, -32768, 1234.5678, 1.912e2, "\\\"abc\\\"", {"k":"v","a":"d"}, [1,2,3,["a","b","c"]]] } | +| Output | Boolean([0b_1011111]) | ++--------+------------------------------------------------------------------------------------------------------------------------------------------------+ ast : '1' like '2' @@ -1223,12 +1223,12 @@ evaluation: | Row 3 | 'abf' | true | +--------+-----------------+---------------+ evaluation (internal): -+--------+------------------------------------------------------------------------------+ -| Column | Data | -+--------+------------------------------------------------------------------------------+ -| lhs | StringColumn { data: 0x616263616264616265616266, offsets: [0, 3, 6, 9, 12] } | -| Output | Boolean([0b____1111]) | -+--------+------------------------------------------------------------------------------+ ++--------+----------------------------------------------------------+ +| Column | Data | ++--------+----------------------------------------------------------+ +| lhs | StringColumn { data: Utf8ViewArray[abc, abd, abe, abf] } | +| Output | Boolean([0b____1111]) | ++--------+----------------------------------------------------------+ ast : lhs like 'b%' @@ -1247,12 +1247,35 @@ evaluation: | Row 3 | 'abf' | false | +--------+-----------------+---------+ evaluation (internal): -+--------+------------------------------------------------------------------------------+ -| Column | Data | -+--------+------------------------------------------------------------------------------+ -| lhs | StringColumn { data: 0x616263616264616265616266, offsets: [0, 3, 6, 9, 12] } | -| Output | Boolean([0b____0000]) | -+--------+------------------------------------------------------------------------------+ ++--------+----------------------------------------------------------+ +| Column | Data | ++--------+----------------------------------------------------------+ +| lhs | StringColumn { data: Utf8ViewArray[abc, abd, abe, abf] } | +| Output | Boolean([0b____0000]) | ++--------+----------------------------------------------------------+ + + +ast : lhs like 'ab%' +raw expr : like(lhs::String, 'ab%') +checked expr : like(lhs, "ab%") +evaluation: ++--------+-----------------+---------------+ +| | lhs | Output | ++--------+-----------------+---------------+ +| Type | String | Boolean | +| Domain | {"abc"..="abf"} | {FALSE, TRUE} | +| Row 0 | 'abc' | true | +| Row 1 | 'abd' | true | +| Row 2 | 'abe' | true | +| Row 3 | 'abf' | true | ++--------+-----------------+---------------+ +evaluation (internal): ++--------+----------------------------------------------------------+ +| Column | Data | ++--------+----------------------------------------------------------+ +| lhs | StringColumn { data: Utf8ViewArray[abc, abd, abe, abf] } | +| Output | Boolean([0b____1111]) | ++--------+----------------------------------------------------------+ ast : lhs like 'c' @@ -1271,12 +1294,12 @@ evaluation: | Row 3 | 'abf' | false | +--------+-----------------+---------+ evaluation (internal): -+--------+------------------------------------------------------------------------------+ -| Column | Data | -+--------+------------------------------------------------------------------------------+ -| lhs | StringColumn { data: 0x616263616264616265616266, offsets: [0, 3, 6, 9, 12] } | -| Output | Boolean([0b____0000]) | -+--------+------------------------------------------------------------------------------+ ++--------+----------------------------------------------------------+ +| Column | Data | ++--------+----------------------------------------------------------+ +| lhs | StringColumn { data: Utf8ViewArray[abc, abd, abe, abf] } | +| Output | Boolean([0b____0000]) | ++--------+----------------------------------------------------------+ ast : lhs like rhs @@ -1294,13 +1317,13 @@ evaluation: | Row 3 | 'abf' | 'a' | false | +--------+-----------------+-----------------+---------------+ evaluation (internal): -+--------+------------------------------------------------------------------------------+ -| Column | Data | -+--------+------------------------------------------------------------------------------+ -| lhs | StringColumn { data: 0x616263616264616265616266, offsets: [0, 3, 6, 9, 12] } | -| rhs | StringColumn { data: 0x61255f625f61626561, offsets: [0, 2, 5, 8, 9] } | -| Output | Boolean([0b____0111]) | -+--------+------------------------------------------------------------------------------+ ++--------+----------------------------------------------------------+ +| Column | Data | ++--------+----------------------------------------------------------+ +| lhs | StringColumn { data: Utf8ViewArray[abc, abd, abe, abf] } | +| rhs | StringColumn { data: Utf8ViewArray[a%, _b_, abe, a] } | +| Output | Boolean([0b____0111]) | ++--------+----------------------------------------------------------+ ast : parse_json('"hello"') like 'h%' @@ -1344,12 +1367,12 @@ evaluation: | Row 2 | '["abe","abf"]' | false | +--------+------------------------------+---------+ evaluation (internal): -+--------+------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+------------------------------------------------------------------------------------------------------------+ -| lhs | StringColumn { data: 0x22616263227b22616264223a31327d5b22616265222c22616266225d, offsets: [0, 5, 15, 28] } | -| Output | Boolean([0b_____001]) | -+--------+------------------------------------------------------------------------------------------------------------+ ++--------+------------------------------------------------------------------------+ +| Column | Data | ++--------+------------------------------------------------------------------------+ +| lhs | StringColumn { data: Utf8ViewArray["abc", {"abd":12}, ["abe","abf"]] } | +| Output | Boolean([0b_____001]) | ++--------+------------------------------------------------------------------------+ ast : parse_json(lhs) like '%ab%' @@ -1366,12 +1389,12 @@ evaluation: | Row 2 | '["abe","abf"]' | true | +--------+------------------------------+---------+ evaluation (internal): -+--------+------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+------------------------------------------------------------------------------------------------------------+ -| lhs | StringColumn { data: 0x22616263227b22616264223a31327d5b22616265222c22616266225d, offsets: [0, 5, 15, 28] } | -| Output | Boolean([0b_____111]) | -+--------+------------------------------------------------------------------------------------------------------------+ ++--------+------------------------------------------------------------------------+ +| Column | Data | ++--------+------------------------------------------------------------------------+ +| lhs | StringColumn { data: Utf8ViewArray["abc", {"abd":12}, ["abe","abf"]] } | +| Output | Boolean([0b_____111]) | ++--------+------------------------------------------------------------------------+ ast : lhs regexp rhs @@ -1391,13 +1414,13 @@ evaluation: | Row 5 | '' | '' | true | +--------+--------------+--------------+---------------+ evaluation (internal): -+--------+--------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+--------------------------------------------------------------------------------------------+ -| lhs | StringColumn { data: 0x616263616264616265616266616263, offsets: [0, 3, 6, 9, 12, 15, 15] } | -| rhs | StringColumn { data: 0x5e615e6261626561, offsets: [0, 2, 4, 7, 8, 8, 8] } | -| Output | Boolean([0b__101101]) | -+--------+--------------------------------------------------------------------------------------------+ ++--------+-----------------------------------------------------------------+ +| Column | Data | ++--------+-----------------------------------------------------------------+ +| lhs | StringColumn { data: Utf8ViewArray[abc, abd, abe, abf, abc, ] } | +| rhs | StringColumn { data: Utf8ViewArray[^a, ^b, abe, a, , ] } | +| Output | Boolean([0b__101101]) | ++--------+-----------------------------------------------------------------+ ast : lhs rlike rhs @@ -1417,12 +1440,12 @@ evaluation: | Row 5 | '' | '' | true | +--------+--------------+--------------+---------------+ evaluation (internal): -+--------+--------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+--------------------------------------------------------------------------------------------+ -| lhs | StringColumn { data: 0x616263616264616265616266616263, offsets: [0, 3, 6, 9, 12, 15, 15] } | -| rhs | StringColumn { data: 0x5e615e6261626561, offsets: [0, 2, 4, 7, 8, 8, 8] } | -| Output | Boolean([0b__101101]) | -+--------+--------------------------------------------------------------------------------------------+ ++--------+-----------------------------------------------------------------+ +| Column | Data | ++--------+-----------------------------------------------------------------+ +| lhs | StringColumn { data: Utf8ViewArray[abc, abd, abe, abf, abc, ] } | +| rhs | StringColumn { data: Utf8ViewArray[^a, ^b, abe, a, , ] } | +| Output | Boolean([0b__101101]) | ++--------+-----------------------------------------------------------------+ diff --git a/src/query/functions/tests/it/scalars/testdata/geo_h3.txt b/src/query/functions/tests/it/scalars/testdata/geo_h3.txt index 6a57e63c05ab..3ee681496703 100644 --- a/src/query/functions/tests/it/scalars/testdata/geo_h3.txt +++ b/src/query/functions/tests/it/scalars/testdata/geo_h3.txt @@ -763,12 +763,12 @@ evaluation: | Row 2 | 599686042433355775 | '85283473fffffff' | +--------+-------------------------------------------+-------------------+ evaluation (internal): -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------+ -| h3 | UInt64([635318325446452991, 644325524701193897, 599686042433355775]) | -| Output | StringColumn { data: 0x386431316161366133383832366666386631316161366133383832366139383532383334373366666666666666, offsets: [0, 15, 30, 45] } | -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+-----------------------------------------------------------------------------------------+ +| Column | Data | ++--------+-----------------------------------------------------------------------------------------+ +| h3 | UInt64([635318325446452991, 644325524701193897, 599686042433355775]) | +| Output | StringColumn { data: Utf8ViewArray[8d11aa6a38826ff, 8f11aa6a38826a9, 85283473fffffff] } | ++--------+-----------------------------------------------------------------------------------------+ error: @@ -809,12 +809,12 @@ evaluation: | Row 2 | '85283473fffffff' | 599686042433355775 | +--------+-----------------------------------------+----------------------------+ evaluation (internal): -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------+ -| h3_str | StringColumn { data: 0x386431316161366133383832366666386631316161366133383832366139383532383334373366666666666666, offsets: [0, 15, 30, 45] } | -| Output | UInt64([635318325446452991, 644325524701193897, 599686042433355775]) | -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+-----------------------------------------------------------------------------------------+ +| Column | Data | ++--------+-----------------------------------------------------------------------------------------+ +| h3_str | StringColumn { data: Utf8ViewArray[8d11aa6a38826ff, 8f11aa6a38826a9, 85283473fffffff] } | +| Output | UInt64([635318325446452991, 644325524701193897, 599686042433355775]) | ++--------+-----------------------------------------------------------------------------------------+ error: diff --git a/src/query/functions/tests/it/scalars/testdata/geometry.txt b/src/query/functions/tests/it/scalars/testdata/geometry.txt index 7d148752ef2c..44e7b265c3de 100644 --- a/src/query/functions/tests/it/scalars/testdata/geometry.txt +++ b/src/query/functions/tests/it/scalars/testdata/geometry.txt @@ -257,7 +257,7 @@ evaluation (internal): +--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | Column | Data | +--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| a | StringColumn { data: 0x39713630793630726873753470727579647171766a30, offsets: [0, 10, 22] } | +| a | StringColumn { data: Utf8ViewArray[9q60y60rhs, u4pruydqqvj0] } | | Output | BinaryColumn { data: 0x0103000000010000000500000000000036632a5ec00000001470a6414000000036632a5ec00000004170a6414000000009632a5ec00000004170a6414000000009632a5ec00000001470a6414000000036632a5ec00000001470a6414001030000000100000005000000000000d99bd024400000000916d34c40000000d99bd024400000680a16d34c40000040e49bd024400000680a16d34c40000040e49bd024400000000916d34c40000000d99bd024400000000916d34c40, offsets: [0, 93, 186] } | +--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ @@ -406,7 +406,7 @@ evaluation (internal): +--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | Column | Data | +--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| a | StringColumn { data: 0x4c494e45535452494e4728302e3020302e302c20312e3020302e302c20312e3020322e302c20302e3020322e302c20302e3020302e30294c494e45535452494e472831302e3120352e322c2031352e3220372e332c2032302e3220382e332c2031302e3920372e372c2031302e3120352e3229, offsets: [0, 55, 115] } | +| a | StringColumn { data: Utf8ViewArray[LINESTRING(0.0 0.0, 1.0 0.0, 1.0 2.0, 0.0 2.0, 0.0 0.0), LINESTRING(10.1 5.2, 15.2 7.3, 20.2 8.3, 10.9 7.7, 10.1 5.2)] } | | Output | BinaryColumn { data: 0x0103000000010000000500000000000000000000000000000000000000000000000000f03f0000000000000000000000000000f03f00000000000000400000000000000000000000000000004000000000000000000000000000000000010300000001000000050000003333333333332440cdcccccccccc14406666666666662e403333333333331d4033333333333334409a99999999992040cdcccccccccc2540cdcccccccccc1e403333333333332440cdcccccccccc1440, offsets: [0, 93, 186] } | +--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ @@ -704,13 +704,13 @@ evaluation: | Row 2 | 3 | 3 | 'POINT(3 3)' | +--------+---------+---------+--------------+ evaluation (internal): -+--------+-----------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+-----------------------------------------------------------------------------------------------------------------+ -| a | Float64([1, 2, 3]) | -| b | Float64([1, 2, 3]) | -| Output | StringColumn { data: 0x504f494e542831203129504f494e542832203229504f494e542833203329, offsets: [0, 10, 20, 30] } | -+--------+-----------------------------------------------------------------------------------------------------------------+ ++--------+--------------------------------------------------------------------------+ +| Column | Data | ++--------+--------------------------------------------------------------------------+ +| a | Float64([1, 2, 3]) | +| b | Float64([1, 2, 3]) | +| Output | StringColumn { data: Utf8ViewArray[POINT(1 1), POINT(2 2), POINT(3 3)] } | ++--------+--------------------------------------------------------------------------+ ast : try_to_geometry(NULL) @@ -935,13 +935,13 @@ evaluation: | Row 2 | '0101000020797f000066666666a9cb17411f85ebc19e325641' | 3857 | 'SRID=3857;POINT(389866.35 5819003.03)' | +--------+---------------------------------------------------------------------------------------------------------------+----------------+------------------------------------------+ evaluation (internal): -+--------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| a | StringColumn { data: 0x303130313030303032303739376630303030363636363636363661396362313734313166383565626331396533323536343130313031303030303230373937663030303036363636363636366139636231373431316638356562633139653332353634313031303130303030323037393766303030303636363636363636613963623137343131663835656263313965333235363431, offsets: [0, 50, 100, 150] } | -| b | Int32([32633, 4326, 3857]) | -| Output | BinaryColumn { data: 0x0101000020797f000066666666a9cb17411f85ebc19e3256410101000020e610000066666666a9cb17411f85ebc19e3256410101000020110f000066666666a9cb17411f85ebc19e325641, offsets: [0, 25, 50, 75] } | -+--------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| a | StringColumn { data: Utf8ViewArray[0101000020797f000066666666a9cb17411f85ebc19e325641, 0101000020797f000066666666a9cb17411f85ebc19e325641, 0101000020797f000066666666a9cb17411f85ebc19e325641] } | +| b | Int32([32633, 4326, 3857]) | +| Output | BinaryColumn { data: 0x0101000020797f000066666666a9cb17411f85ebc19e3256410101000020e610000066666666a9cb17411f85ebc19e3256410101000020110f000066666666a9cb17411f85ebc19e325641, offsets: [0, 25, 50, 75] } | ++--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ast : st_geometryfromwkt('POINT(389866.35 5819003.03)') @@ -968,12 +968,12 @@ evaluation: | Row 2 | 'POINT(389866.35 5819003.03)' | 'POINT(389866.35 5819003.03)' | +--------+-----------------------------------------------------------------+-------------------------------+ evaluation (internal): -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| a | StringColumn { data: 0x504f494e54283338393836362e333520353831393030332e303329504f494e54283338393836362e333520353831393030332e303329504f494e54283338393836362e333520353831393030332e303329, offsets: [0, 27, 54, 81] } | -| Output | BinaryColumn { data: 0x010100000066666666a9cb17411f85ebc19e325641010100000066666666a9cb17411f85ebc19e325641010100000066666666a9cb17411f85ebc19e325641, offsets: [0, 21, 42, 63] } | -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| a | StringColumn { data: Utf8ViewArray[POINT(389866.35 5819003.03), POINT(389866.35 5819003.03), POINT(389866.35 5819003.03)] } | +| Output | BinaryColumn { data: 0x010100000066666666a9cb17411f85ebc19e325641010100000066666666a9cb17411f85ebc19e325641010100000066666666a9cb17411f85ebc19e325641, offsets: [0, 21, 42, 63] } | ++--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ast : st_geometryfromwkt('POINT(389866.35 5819003.03)', 32633) @@ -1000,13 +1000,13 @@ evaluation: | Row 2 | 'POINT(389866.35 5819003.03)' | 3857 | 'SRID=3857;POINT(389866.35 5819003.03)' | +--------+-----------------------------------------------------------------+----------------+------------------------------------------+ evaluation (internal): -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| a | StringColumn { data: 0x504f494e54283338393836362e333520353831393030332e303329504f494e54283338393836362e333520353831393030332e303329504f494e54283338393836362e333520353831393030332e303329, offsets: [0, 27, 54, 81] } | -| b | Int32([32633, 4326, 3857]) | -| Output | BinaryColumn { data: 0x0101000020797f000066666666a9cb17411f85ebc19e3256410101000020e610000066666666a9cb17411f85ebc19e3256410101000020110f000066666666a9cb17411f85ebc19e325641, offsets: [0, 25, 50, 75] } | -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| a | StringColumn { data: Utf8ViewArray[POINT(389866.35 5819003.03), POINT(389866.35 5819003.03), POINT(389866.35 5819003.03)] } | +| b | Int32([32633, 4326, 3857]) | +| Output | BinaryColumn { data: 0x0101000020797f000066666666a9cb17411f85ebc19e3256410101000020e610000066666666a9cb17411f85ebc19e3256410101000020110f000066666666a9cb17411f85ebc19e325641, offsets: [0, 25, 50, 75] } | ++--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ast : st_xmax(to_geometry('POINT(-180 0)')) @@ -1211,14 +1211,14 @@ evaluation: | Row 0 | 'POINT(389866.35 5819003.03)' | 32633 | 3857 | 'SRID=3857;POINT(1489140.093766 6892872.19868)' | +--------+-----------------------------------------------------------------+-----------------+---------------+-------------------------------------------------+ evaluation (internal): -+--------+---------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+---------------------------------------------------------------------------------------------------+ -| a | StringColumn { data: 0x504f494e54283338393836362e333520353831393030332e303329, offsets: [0, 27] } | -| b | Int32([32633]) | -| c | Int32([3857]) | -| Output | BinaryColumn { data: 0x0101000020110f00006f0c0118f4b83641522cb70c524b5a41, offsets: [0, 25] } | -+--------+---------------------------------------------------------------------------------------------------+ ++--------+-----------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+-----------------------------------------------------------------------------------------------+ +| a | StringColumn { data: Utf8ViewArray[POINT(389866.35 5819003.03)] } | +| b | Int32([32633]) | +| c | Int32([3857]) | +| Output | BinaryColumn { data: 0x0101000020110f00006f0c0118f4b83641522cb70c524b5a41, offsets: [0, 25] } | ++--------+-----------------------------------------------------------------------------------------------+ ast : st_transform(st_geomfromwkt('POINT(4.500212 52.161170)'), 4326, 28992) @@ -1246,7 +1246,7 @@ evaluation (internal): +--------+-----------------------------------------------------------------------------------------------+ | Column | Data | +--------+-----------------------------------------------------------------------------------------------+ -| a | StringColumn { data: 0x504f494e5428342e3530303231322035322e31363131373029, offsets: [0, 25] } | +| a | StringColumn { data: Utf8ViewArray[POINT(4.500212 52.161170)] } | | b | Int32([4326]) | | c | Int32([28992]) | | Output | BinaryColumn { data: 0x0101000020407100005dfe43ba4a06f7402ffce0ac98521c41, offsets: [0, 25] } | diff --git a/src/query/functions/tests/it/scalars/testdata/hash.txt b/src/query/functions/tests/it/scalars/testdata/hash.txt index 14df61111d38..971f377992c4 100644 --- a/src/query/functions/tests/it/scalars/testdata/hash.txt +++ b/src/query/functions/tests/it/scalars/testdata/hash.txt @@ -30,12 +30,12 @@ evaluation: | Row 2 | 'ß😀山' | 'b814c09d48b62faafc315df44a35863e' | +--------+-------------------+------------------------------------+ evaluation (internal): -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| a | StringColumn { data: 0x416263446f6272c3bd2064656ec39ff09f9880e5b1b1, offsets: [0, 3, 13, 22] } | -| Output | StringColumn { data: 0x333535393362376365353032306561653363613638666435623666336530333131656630623464616235353838666533663633636438346636663164366162326238313463303964343862363266616166633331356466343461333538363365, offsets: [0, 32, 64, 96] } | -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+--------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+--------------------------------------------------------------------------------------------------------------------------------------------+ +| a | StringColumn { data: Utf8ViewArray[Abc, Dobrý den, ß😀山] } | +| Output | StringColumn { data: Utf8ViewArray[35593b7ce5020eae3ca68fd5b6f3e031, 1ef0b4dab5588fe3f63cd84f6f1d6ab2, b814c09d48b62faafc315df44a35863e] } | ++--------+--------------------------------------------------------------------------------------------------------------------------------------------+ ast : sha('Abc') @@ -70,12 +70,12 @@ evaluation: | Row 2 | 'ß😀山' | 'e978809ba007678383c2db3decbaf02eb0bf72a8' | +--------+-------------------+--------------------------------------------+ evaluation (internal): -+--------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| a | StringColumn { data: 0x416263446f6272c3bd2064656ec39ff09f9880e5b1b1, offsets: [0, 3, 13, 22] } | -| Output | StringColumn { data: 0x393135383538616661323237386632353532376631393230333831303833343631363462343766326336366430353663396662326339653663643734623435353566363463306166346137626335393965393738383039626130303736373833383363326462336465636261663032656230626637326138, offsets: [0, 40, 80, 120] } | -+--------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| a | StringColumn { data: Utf8ViewArray[Abc, Dobrý den, ß😀山] } | +| Output | StringColumn { data: Utf8ViewArray[915858afa2278f25527f192038108346164b47f2, c66d056c9fb2c9e6cd74b4555f64c0af4a7bc599, e978809ba007678383c2db3decbaf02eb0bf72a8] } | ++--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ast : blake3('Abc') @@ -110,12 +110,12 @@ evaluation: | Row 2 | 'ß😀山' | '56475d2e89dba36b511ddaa8e4e8e995c094f59f5fbfa0af5929f3f399d9a810' | +--------+-------------------+--------------------------------------------------------------------+ evaluation (internal): -+--------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| a | StringColumn { data: 0x416263446f6272c3bd2064656ec39ff09f9880e5b1b1, offsets: [0, 3, 13, 22] } | -| Output | StringColumn { data: 0x316637616133393738393439633432373537393761633630353663303130386538353261356430376334393833386130303739393139343935376238323131313062343439343139383334653564323835643362393566383932623534393464623864373739386331313630326363646531663531633333613135393637376635363437356432653839646261333662353131646461613865346538653939356330393466353966356662666130616635393239663366333939643961383130, offsets: [0, 64, 128, 192] } | -+--------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| a | StringColumn { data: Utf8ViewArray[Abc, Dobrý den, ß😀山] } | +| Output | StringColumn { data: Utf8ViewArray[1f7aa3978949c4275797ac6056c0108e852a5d07c49838a00799194957b82111, 0b449419834e5d285d3b95f892b5494db8d7798c11602ccde1f51c33a159677f, 56475d2e89dba36b511ddaa8e4e8e995c094f59f5fbfa0af5929f3f399d9a810] } | ++--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ast : sha2('Abc',0) @@ -159,13 +159,13 @@ evaluation: | Row 2 | 'ß😀山' | 512 | '3bd4ca36a66c0675e695f3fc44af703cd6c110085adf105138ef56e6768a639f16a9c27b651a0c64f685b24be835e0a62485575477e06d530574865bf1670d30' | +--------+-------------------+-------------+------------------------------------------------------------------------------------------------------------------------------------+ evaluation (internal): -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| a | StringColumn { data: 0x416263446f6272c3bd2064656ec39ff09f9880e5b1b1, offsets: [0, 3, 13, 22] } | -| b | UInt16([224, 384, 512]) | -| Output | StringColumn { data: 0x31316438363737306635303132393463366233393539343261333966363066653238366131356530363238326162636232323934636661303161356336366339313837313863363237653336306535363833336432663663363338666431613637303836373932363036636665666535303038393238396361333462353261373261383333653666323636316236343431373036383834363362643463613336613636633036373565363935663366633434616637303363643663313130303835616466313035313338656635366536373638613633396631366139633237623635316130633634663638356232346265383335653061363234383535373534373765303664353330353734383635626631363730643330, offsets: [0, 56, 152, 280] } | -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| a | StringColumn { data: Utf8ViewArray[Abc, Dobrý den, ß😀山] } | +| b | UInt16([224, 384, 512]) | +| Output | StringColumn { data: Utf8ViewArray[11d86770f501294c6b395942a39f60fe286a15e06282abcb2294cfa0, 1a5c66c918718c627e360e56833d2f6c638fd1a67086792606cfefe50089289ca34b52a72a833e6f2661b64417068846, 3bd4ca36a66c0675e695f3fc44af703cd6c110085adf105138ef56e6768a639f16a9c27b651a0c64f685b24be835e0a62485575477e06d530574865bf1670d30] } | ++--------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ast : city64withseed('Abc',0) @@ -272,13 +272,13 @@ evaluation: | Row 2 | 'ß😀山' | 12 | 14631005279260459058 | +--------+-------------------+-----------+----------------------------+ evaluation (internal): -+--------+------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+------------------------------------------------------------------------------------------------+ -| a | StringColumn { data: 0x416263446f6272c3bd2064656ec39ff09f9880e5b1b1, offsets: [0, 3, 13, 22] } | -| b | UInt16([10, 11, 12]) | -| Output | UInt64([10385767944629066306, 12123249488783690377, 14631005279260459058]) | -+--------+------------------------------------------------------------------------------------------------+ ++--------+----------------------------------------------------------------------------+ +| Column | Data | ++--------+----------------------------------------------------------------------------+ +| a | StringColumn { data: Utf8ViewArray[Abc, Dobrý den, ß😀山] } | +| b | UInt16([10, 11, 12]) | +| Output | UInt64([10385767944629066306, 12123249488783690377, 14631005279260459058]) | ++--------+----------------------------------------------------------------------------+ ast : siphash64('Abc') @@ -366,12 +366,12 @@ evaluation: | Row 1 | 'ß😀山' | 1354619631122873228 | +--------+-------------------------+----------------------------+ evaluation (internal): -+--------+---------------------------------------------------------------------------------------+ -| Column | Data | -+--------+---------------------------------------------------------------------------------------+ -| a | StringColumn { data: 0x446f6272c3bd2064656ec39ff09f9880e5b1b1, offsets: [0, 10, 19] } | -| Output | UInt64([5782510256878119795, 1354619631122873228]) | -+--------+---------------------------------------------------------------------------------------+ ++--------+--------------------------------------------------------+ +| Column | Data | ++--------+--------------------------------------------------------+ +| a | StringColumn { data: Utf8ViewArray[Dobrý den, ß😀山] } | +| Output | UInt64([5782510256878119795, 1354619631122873228]) | ++--------+--------------------------------------------------------+ ast : xxhash64('Abc') @@ -459,12 +459,12 @@ evaluation: | Row 1 | 'ß😀山' | 656695431091154575 | +--------+-------------------------+----------------------------+ evaluation (internal): -+--------+---------------------------------------------------------------------------------------+ -| Column | Data | -+--------+---------------------------------------------------------------------------------------+ -| a | StringColumn { data: 0x446f6272c3bd2064656ec39ff09f9880e5b1b1, offsets: [0, 10, 19] } | -| Output | UInt64([314761032262035578, 656695431091154575]) | -+--------+---------------------------------------------------------------------------------------+ ++--------+--------------------------------------------------------+ +| Column | Data | ++--------+--------------------------------------------------------+ +| a | StringColumn { data: Utf8ViewArray[Dobrý den, ß😀山] } | +| Output | UInt64([314761032262035578, 656695431091154575]) | ++--------+--------------------------------------------------------+ ast : xxhash32('Abc') @@ -552,11 +552,11 @@ evaluation: | Row 1 | 'ß😀山' | 1401072642 | +--------+-------------------------+------------------+ evaluation (internal): -+--------+---------------------------------------------------------------------------------------+ -| Column | Data | -+--------+---------------------------------------------------------------------------------------+ -| a | StringColumn { data: 0x446f6272c3bd2064656ec39ff09f9880e5b1b1, offsets: [0, 10, 19] } | -| Output | UInt32([19285785, 1401072642]) | -+--------+---------------------------------------------------------------------------------------+ ++--------+--------------------------------------------------------+ +| Column | Data | ++--------+--------------------------------------------------------+ +| a | StringColumn { data: Utf8ViewArray[Dobrý den, ß😀山] } | +| Output | UInt32([19285785, 1401072642]) | ++--------+--------------------------------------------------------+ diff --git a/src/query/functions/tests/it/scalars/testdata/map.txt b/src/query/functions/tests/it/scalars/testdata/map.txt index f8618c82aa66..63659a6561ea 100644 --- a/src/query/functions/tests/it/scalars/testdata/map.txt +++ b/src/query/functions/tests/it/scalars/testdata/map.txt @@ -69,17 +69,17 @@ evaluation: | Row 2 | 3 | 6 | 9 | 'c' | NULL | 'g' | {3:'c', 6:NULL, 9:'g'} | +--------+---------+---------+---------+-------------+---------------------+---------------------+------------------------+ evaluation (internal): -+--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| a_col | Int8([1, 2, 3]) | -| b_col | Int8([4, 5, 6]) | -| c_col | Int8([7, 8, 9]) | -| d_col | NullableColumn { column: StringColumn { data: 0x616263, offsets: [0, 1, 2, 3] }, validity: [0b_____111] } | -| e_col | NullableColumn { column: StringColumn { data: 0x6465, offsets: [0, 1, 2, 2] }, validity: [0b_____011] } | -| f_col | NullableColumn { column: StringColumn { data: 0x6667, offsets: [0, 1, 1, 2] }, validity: [0b_____101] } | -| Output | ArrayColumn { values: Tuple([Int8([1, 4, 7, 2, 5, 8, 3, 6, 9]), NullableColumn { column: StringColumn { data: 0x61646662656367, offsets: [0, 1, 2, 3, 4, 5, 5, 6, 6, 7] }, validity: [0b01011111, 0b_______1] }]), offsets: [0, 3, 6, 9] } | -+--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| a_col | Int8([1, 2, 3]) | +| b_col | Int8([4, 5, 6]) | +| c_col | Int8([7, 8, 9]) | +| d_col | NullableColumn { column: StringColumn { data: Utf8ViewArray[a, b, c] }, validity: [0b_____111] } | +| e_col | NullableColumn { column: StringColumn { data: Utf8ViewArray[d, e, ] }, validity: [0b_____011] } | +| f_col | NullableColumn { column: StringColumn { data: Utf8ViewArray[f, , g] }, validity: [0b_____101] } | +| Output | ArrayColumn { values: Tuple([Int8([1, 4, 7, 2, 5, 8, 3, 6, 9]), NullableColumn { column: StringColumn { data: Utf8ViewArray[a, d, f, b, e, , c, , g] }, validity: [0b01011111, 0b_______1] }]), offsets: [0, 3, 6, 9] } | ++--------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ast : map(['k1', 'k2'], [a_col, b_col]) @@ -97,13 +97,13 @@ evaluation: | Row 2 | 3 | 6 | {'k1':3, 'k2':6} | +--------+---------+---------+-------------------+ evaluation (internal): -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| a_col | Int8([1, 2, 3]) | -| b_col | Int8([4, 5, 6]) | -| Output | ArrayColumn { values: Tuple([StringColumn { data: 0x6b316b326b316b326b316b32, offsets: [0, 2, 4, 6, 8, 10, 12] }, Int8([1, 4, 2, 5, 3, 6])]), offsets: [0, 2, 4, 6] } | -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+------------------------------------------------------------------------------------------------------------------------------------------------+ +| a_col | Int8([1, 2, 3]) | +| b_col | Int8([4, 5, 6]) | +| Output | ArrayColumn { values: Tuple([StringColumn { data: Utf8ViewArray[k1, k2, k1, k2, k1, k2] }, Int8([1, 4, 2, 5, 3, 6])]), offsets: [0, 2, 4, 6] } | ++--------+------------------------------------------------------------------------------------------------------------------------------------------------+ ast : map([],[])[1] @@ -201,15 +201,15 @@ evaluation: | Row 1 | 2 | 4 | 'v2' | 'v4' | NULL | +--------+---------+---------+---------------+---------------+-------------+ evaluation (internal): -+--------+------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+------------------------------------------------------------------------------------------------------+ -| k1 | Int16([1, 2]) | -| k2 | Int16([3, 4]) | -| v1 | StringColumn { data: 0x76317632, offsets: [0, 2, 4] } | -| v2 | StringColumn { data: 0x76337634, offsets: [0, 2, 4] } | -| Output | NullableColumn { column: StringColumn { data: 0x7631, offsets: [0, 2, 2] }, validity: [0b______01] } | -+--------+------------------------------------------------------------------------------------------------------+ ++--------+-----------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+-----------------------------------------------------------------------------------------------+ +| k1 | Int16([1, 2]) | +| k2 | Int16([3, 4]) | +| v1 | StringColumn { data: Utf8ViewArray[v1, v2] } | +| v2 | StringColumn { data: Utf8ViewArray[v3, v4] } | +| Output | NullableColumn { column: StringColumn { data: Utf8ViewArray[v1, ] }, validity: [0b______01] } | ++--------+-----------------------------------------------------------------------------------------------+ ast : map_keys({}) @@ -262,17 +262,17 @@ evaluation: | Row 2 | 'c' | 'f' | 'z' | 'v3' | NULL | 'v7' | ['c', 'f', 'z'] | +--------+-------------+-------------+-------------+---------------+----------------------+----------------------+-----------------+ evaluation (internal): -+--------+-------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+-------------------------------------------------------------------------------------------------------------------------------------+ -| a_col | StringColumn { data: 0x616263, offsets: [0, 1, 2, 3] } | -| b_col | StringColumn { data: 0x646566, offsets: [0, 1, 2, 3] } | -| c_col | StringColumn { data: 0x78797a, offsets: [0, 1, 2, 3] } | -| d_col | NullableColumn { column: StringColumn { data: 0x763176327633, offsets: [0, 2, 4, 6] }, validity: [0b_____111] } | -| e_col | NullableColumn { column: StringColumn { data: 0x76347635, offsets: [0, 2, 4, 4] }, validity: [0b_____011] } | -| f_col | NullableColumn { column: StringColumn { data: 0x76367637, offsets: [0, 2, 2, 4] }, validity: [0b_____101] } | -| Output | ArrayColumn { values: StringColumn { data: 0x61647862657963667a, offsets: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] }, offsets: [0, 3, 6, 9] } | -+--------+-------------------------------------------------------------------------------------------------------------------------------------+ ++--------+----------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+----------------------------------------------------------------------------------------------------------------+ +| a_col | StringColumn { data: Utf8ViewArray[a, b, c] } | +| b_col | StringColumn { data: Utf8ViewArray[d, e, f] } | +| c_col | StringColumn { data: Utf8ViewArray[x, y, z] } | +| d_col | NullableColumn { column: StringColumn { data: Utf8ViewArray[v1, v2, v3] }, validity: [0b_____111] } | +| e_col | NullableColumn { column: StringColumn { data: Utf8ViewArray[v4, v5, ] }, validity: [0b_____011] } | +| f_col | NullableColumn { column: StringColumn { data: Utf8ViewArray[v6, , v7] }, validity: [0b_____101] } | +| Output | ArrayColumn { values: StringColumn { data: Utf8ViewArray[a, d, x, b, e, y, c, f, z] }, offsets: [0, 3, 6, 9] } | ++--------+----------------------------------------------------------------------------------------------------------------+ ast : map_values({}) @@ -334,17 +334,17 @@ evaluation: | Row 2 | 'c' | 'f' | 'z' | 'v3' | NULL | 'v7' | ['v3', NULL, 'v7'] | +--------+-------------+-------------+-------------+---------------+----------------------+----------------------+--------------------+ evaluation (internal): -+--------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| a_col | StringColumn { data: 0x616263, offsets: [0, 1, 2, 3] } | -| b_col | StringColumn { data: 0x646566, offsets: [0, 1, 2, 3] } | -| c_col | StringColumn { data: 0x78797a, offsets: [0, 1, 2, 3] } | -| d_col | NullableColumn { column: StringColumn { data: 0x763176327633, offsets: [0, 2, 4, 6] }, validity: [0b_____111] } | -| e_col | NullableColumn { column: StringColumn { data: 0x76347635, offsets: [0, 2, 4, 4] }, validity: [0b_____011] } | -| f_col | NullableColumn { column: StringColumn { data: 0x76367637, offsets: [0, 2, 2, 4] }, validity: [0b_____101] } | -| Output | ArrayColumn { values: NullableColumn { column: StringColumn { data: 0x7631763476367632763576337637, offsets: [0, 2, 4, 6, 8, 10, 10, 12, 12, 14] }, validity: [0b01011111, 0b_______1] }, offsets: [0, 3, 6, 9] } | -+--------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| a_col | StringColumn { data: Utf8ViewArray[a, b, c] } | +| b_col | StringColumn { data: Utf8ViewArray[d, e, f] } | +| c_col | StringColumn { data: Utf8ViewArray[x, y, z] } | +| d_col | NullableColumn { column: StringColumn { data: Utf8ViewArray[v1, v2, v3] }, validity: [0b_____111] } | +| e_col | NullableColumn { column: StringColumn { data: Utf8ViewArray[v4, v5, ] }, validity: [0b_____011] } | +| f_col | NullableColumn { column: StringColumn { data: Utf8ViewArray[v6, , v7] }, validity: [0b_____101] } | +| Output | ArrayColumn { values: NullableColumn { column: StringColumn { data: Utf8ViewArray[v1, v4, v6, v2, v5, , v3, , v7] }, validity: [0b01011111, 0b_______1] }, offsets: [0, 3, 6, 9] } | ++--------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ast : map_size({}) @@ -388,17 +388,17 @@ evaluation: | Row 2 | 'c' | 'f' | 'z' | 'v3' | NULL | 'v7' | 3 | +--------+-------------+-------------+-------------+---------------+----------------------+----------------------+---------+ evaluation (internal): -+--------+-----------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+-----------------------------------------------------------------------------------------------------------------+ -| a_col | StringColumn { data: 0x616263, offsets: [0, 1, 2, 3] } | -| b_col | StringColumn { data: 0x646566, offsets: [0, 1, 2, 3] } | -| c_col | StringColumn { data: 0x78797a, offsets: [0, 1, 2, 3] } | -| d_col | NullableColumn { column: StringColumn { data: 0x763176327633, offsets: [0, 2, 4, 6] }, validity: [0b_____111] } | -| e_col | NullableColumn { column: StringColumn { data: 0x76347635, offsets: [0, 2, 4, 4] }, validity: [0b_____011] } | -| f_col | NullableColumn { column: StringColumn { data: 0x76367637, offsets: [0, 2, 2, 4] }, validity: [0b_____101] } | -| Output | UInt64([3, 3, 3]) | -+--------+-----------------------------------------------------------------------------------------------------------------+ ++--------+-----------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+-----------------------------------------------------------------------------------------------------+ +| a_col | StringColumn { data: Utf8ViewArray[a, b, c] } | +| b_col | StringColumn { data: Utf8ViewArray[d, e, f] } | +| c_col | StringColumn { data: Utf8ViewArray[x, y, z] } | +| d_col | NullableColumn { column: StringColumn { data: Utf8ViewArray[v1, v2, v3] }, validity: [0b_____111] } | +| e_col | NullableColumn { column: StringColumn { data: Utf8ViewArray[v4, v5, ] }, validity: [0b_____011] } | +| f_col | NullableColumn { column: StringColumn { data: Utf8ViewArray[v6, , v7] }, validity: [0b_____101] } | +| Output | UInt64([3, 3, 3]) | ++--------+-----------------------------------------------------------------------------------------------------+ ast : map_cat({}, {}) @@ -442,17 +442,17 @@ evaluation: | Row 2 | 'a_k3' | 'b_k3' | 'c_k3' | 'aaa3' | 'bbb3' | 'ccc3' | {'a_k3':'aaa3', 'b_k3':'bbb3', 'c_k3':'ccc3'} | +--------+-------------------+-------------------+-------------------+-------------------+-------------------+-------------------+-----------------------------------------------+ evaluation (internal): -+--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| a_col | StringColumn { data: 0x615f6b31615f6b32615f6b33, offsets: [0, 4, 8, 12] } | -| b_col | StringColumn { data: 0x625f6b31625f6b32625f6b33, offsets: [0, 4, 8, 12] } | -| c_col | StringColumn { data: 0x635f6b31635f6b32635f6b33, offsets: [0, 4, 8, 12] } | -| d_col | StringColumn { data: 0x616161316161613261616133, offsets: [0, 4, 8, 12] } | -| e_col | StringColumn { data: 0x626262316262623262626233, offsets: [0, 4, 8, 12] } | -| f_col | StringColumn { data: 0x636363316363633263636333, offsets: [0, 4, 8, 12] } | -| Output | ArrayColumn { values: Tuple([StringColumn { data: 0x615f6b31625f6b31635f6b31615f6b32625f6b32635f6b32615f6b33625f6b33635f6b33, offsets: [0, 4, 8, 12, 16, 20, 24, 28, 32, 36] }, StringColumn { data: 0x616161316262623163636331616161326262623263636332616161336262623363636333, offsets: [0, 4, 8, 12, 16, 20, 24, 28, 32, 36] }]), offsets: [0, 3, 6, 9] } | -+--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| a_col | StringColumn { data: Utf8ViewArray[a_k1, a_k2, a_k3] } | +| b_col | StringColumn { data: Utf8ViewArray[b_k1, b_k2, b_k3] } | +| c_col | StringColumn { data: Utf8ViewArray[c_k1, c_k2, c_k3] } | +| d_col | StringColumn { data: Utf8ViewArray[aaa1, aaa2, aaa3] } | +| e_col | StringColumn { data: Utf8ViewArray[bbb1, bbb2, bbb3] } | +| f_col | StringColumn { data: Utf8ViewArray[ccc1, ccc2, ccc3] } | +| Output | ArrayColumn { values: Tuple([StringColumn { data: Utf8ViewArray[a_k1, b_k1, c_k1, a_k2, b_k2, c_k2, a_k3, b_k3, c_k3] }, StringColumn { data: Utf8ViewArray[aaa1, bbb1, ccc1, aaa2, bbb2, ccc2, aaa3, bbb3, ccc3] }]), offsets: [0, 3, 6, 9] } | ++--------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ast : map_cat({'k1':'v1','k2':'v2'}, {'k1':'abc'}) @@ -478,17 +478,17 @@ evaluation: | Row 2 | 'c_k3' | 'b_k3' | 'c_k3' | 'aaa3' | 'bbb3' | 'ccc3' | {'c_k3':'ccc3', 'b_k3':'bbb3'} | +--------+-------------------+-------------------+-------------------+-------------------+-------------------+-------------------+-----------------------------------------------+ evaluation (internal): -+--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| a_col | StringColumn { data: 0x615f6b31615f6b32635f6b33, offsets: [0, 4, 8, 12] } | -| b_col | StringColumn { data: 0x625f6b31635f6b32625f6b33, offsets: [0, 4, 8, 12] } | -| c_col | StringColumn { data: 0x635f6b31635f6b32635f6b33, offsets: [0, 4, 8, 12] } | -| d_col | StringColumn { data: 0x616161316161613261616133, offsets: [0, 4, 8, 12] } | -| e_col | StringColumn { data: 0x626262316262623262626233, offsets: [0, 4, 8, 12] } | -| f_col | StringColumn { data: 0x636363316363633263636333, offsets: [0, 4, 8, 12] } | -| Output | ArrayColumn { values: Tuple([StringColumn { data: 0x615f6b31625f6b31635f6b31615f6b32635f6b32635f6b33625f6b33, offsets: [0, 4, 8, 12, 16, 20, 24, 28] }, StringColumn { data: 0x61616131626262316363633161616132636363326363633362626233, offsets: [0, 4, 8, 12, 16, 20, 24, 28] }]), offsets: [0, 3, 5, 7] } | -+--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| a_col | StringColumn { data: Utf8ViewArray[a_k1, a_k2, c_k3] } | +| b_col | StringColumn { data: Utf8ViewArray[b_k1, c_k2, b_k3] } | +| c_col | StringColumn { data: Utf8ViewArray[c_k1, c_k2, c_k3] } | +| d_col | StringColumn { data: Utf8ViewArray[aaa1, aaa2, aaa3] } | +| e_col | StringColumn { data: Utf8ViewArray[bbb1, bbb2, bbb3] } | +| f_col | StringColumn { data: Utf8ViewArray[ccc1, ccc2, ccc3] } | +| Output | ArrayColumn { values: Tuple([StringColumn { data: Utf8ViewArray[a_k1, b_k1, c_k1, a_k2, c_k2, c_k3, b_k3] }, StringColumn { data: Utf8ViewArray[aaa1, bbb1, ccc1, aaa2, ccc2, ccc3, bbb3] }]), offsets: [0, 3, 5, 7] } | ++--------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ast : map_cat({'k1': 'v1', 'k2': 'v2'}, {'k3': 'v3'}) @@ -602,15 +602,15 @@ evaluation: | Row 2 | 'a_k3' | 'b_k3' | 'aaa3' | 'bbb3' | {'a_k3':'aaa3'} | +--------+-------------------+-------------------+-------------------+-------------------+--------------------------------+ evaluation (internal): -+--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| a_col | StringColumn { data: 0x615f6b31615f6b32615f6b33, offsets: [0, 4, 8, 12] } | -| b_col | StringColumn { data: 0x625f6b31625f6b32625f6b33, offsets: [0, 4, 8, 12] } | -| d_col | StringColumn { data: 0x616161316161613261616133, offsets: [0, 4, 8, 12] } | -| e_col | StringColumn { data: 0x626262316262623262626233, offsets: [0, 4, 8, 12] } | -| Output | ArrayColumn { values: Tuple([StringColumn { data: 0x615f6b31625f6b31625f6b32615f6b33, offsets: [0, 4, 8, 12, 16] }, StringColumn { data: 0x61616131626262316262623261616133, offsets: [0, 4, 8, 12, 16] }]), offsets: [0, 2, 3, 4] } | -+--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| a_col | StringColumn { data: Utf8ViewArray[a_k1, a_k2, a_k3] } | +| b_col | StringColumn { data: Utf8ViewArray[b_k1, b_k2, b_k3] } | +| d_col | StringColumn { data: Utf8ViewArray[aaa1, aaa2, aaa3] } | +| e_col | StringColumn { data: Utf8ViewArray[bbb1, bbb2, bbb3] } | +| Output | ArrayColumn { values: Tuple([StringColumn { data: Utf8ViewArray[a_k1, b_k1, b_k2, a_k3] }, StringColumn { data: Utf8ViewArray[aaa1, bbb1, bbb2, aaa3] }]), offsets: [0, 2, 3, 4] } | ++--------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ast : map_delete({'k1': 'v1', 'k2': 'v2', 'k3': 'v3', 'k4': 'v4'}, string_key_col) @@ -627,12 +627,12 @@ evaluation: | Row 1 | 'k2' | {'k1':'v1', 'k3':'v3', 'k4':'v4'} | +--------+----------------+------------------------------------+ evaluation (internal): -+----------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+----------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| string_key_col | StringColumn { data: 0x6b336b32, offsets: [0, 2, 4] } | -| Output | ArrayColumn { values: Tuple([StringColumn { data: 0x6b316b326b346b316b336b34, offsets: [0, 2, 4, 6, 8, 10, 12] }, StringColumn { data: 0x763176327634763176337634, offsets: [0, 2, 4, 6, 8, 10, 12] }]), offsets: [0, 3, 6] } | -+----------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++----------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++----------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| string_key_col | StringColumn { data: Utf8ViewArray[k3, k2] } | +| Output | ArrayColumn { values: Tuple([StringColumn { data: Utf8ViewArray[k1, k2, k4, k1, k3, k4] }, StringColumn { data: Utf8ViewArray[v1, v2, v4, v1, v3, v4] }]), offsets: [0, 3, 6] } | ++----------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ast : map_delete({'k1': 'v1', 'k2': 'v2', 'k3': 'v3'}, 'k1', 'k2', 'k3') @@ -684,15 +684,15 @@ evaluation: | Row 2 | 'a_k3' | 559 | 'aaa3' | 662 | {'a_k3':559} | +--------+-------------------+-------------+-------------------+-------------+--------------------------+ evaluation (internal): -+--------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| a_col | StringColumn { data: 0x615f6b31615f6b32615f6b33, offsets: [0, 4, 8, 12] } | -| b_col | Int16([555, 557, 559]) | -| d_col | StringColumn { data: 0x616161316161613261616133, offsets: [0, 4, 8, 12] } | -| e_col | Int16([666, 664, 662]) | -| Output | ArrayColumn { values: Tuple([StringColumn { data: 0x615f6b316161613161616132615f6b33, offsets: [0, 4, 8, 12, 16] }, Int16([555, 666, 664, 559])]), offsets: [0, 2, 3, 4] } | -+--------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ +| a_col | StringColumn { data: Utf8ViewArray[a_k1, a_k2, a_k3] } | +| b_col | Int16([555, 557, 559]) | +| d_col | StringColumn { data: Utf8ViewArray[aaa1, aaa2, aaa3] } | +| e_col | Int16([666, 664, 662]) | +| Output | ArrayColumn { values: Tuple([StringColumn { data: Utf8ViewArray[a_k1, aaa1, aaa2, a_k3] }, Int16([555, 666, 664, 559])]), offsets: [0, 2, 3, 4] } | ++--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ error: @@ -753,17 +753,17 @@ evaluation: | Row 2 | 'c' | 'f' | 'z' | 'v3' | NULL | 'v7' | false | +--------+-------------+-------------+-------------+---------------+----------------------+----------------------+---------+ evaluation (internal): -+--------+-----------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+-----------------------------------------------------------------------------------------------------------------+ -| a_col | StringColumn { data: 0x616263, offsets: [0, 1, 2, 3] } | -| b_col | StringColumn { data: 0x646566, offsets: [0, 1, 2, 3] } | -| c_col | StringColumn { data: 0x78797a, offsets: [0, 1, 2, 3] } | -| d_col | NullableColumn { column: StringColumn { data: 0x763176327633, offsets: [0, 2, 4, 6] }, validity: [0b_____111] } | -| e_col | NullableColumn { column: StringColumn { data: 0x76347635, offsets: [0, 2, 4, 4] }, validity: [0b_____011] } | -| f_col | NullableColumn { column: StringColumn { data: 0x76367637, offsets: [0, 2, 2, 4] }, validity: [0b_____101] } | -| Output | Boolean([0b_____001]) | -+--------+-----------------------------------------------------------------------------------------------------------------+ ++--------+-----------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+-----------------------------------------------------------------------------------------------------+ +| a_col | StringColumn { data: Utf8ViewArray[a, b, c] } | +| b_col | StringColumn { data: Utf8ViewArray[d, e, f] } | +| c_col | StringColumn { data: Utf8ViewArray[x, y, z] } | +| d_col | NullableColumn { column: StringColumn { data: Utf8ViewArray[v1, v2, v3] }, validity: [0b_____111] } | +| e_col | NullableColumn { column: StringColumn { data: Utf8ViewArray[v4, v5, ] }, validity: [0b_____011] } | +| f_col | NullableColumn { column: StringColumn { data: Utf8ViewArray[v6, , v7] }, validity: [0b_____101] } | +| Output | Boolean([0b_____001]) | ++--------+-----------------------------------------------------------------------------------------------------+ ast : map_contains_key(map([a_col, b_col, c_col], [d_col, e_col, f_col]), 'd') @@ -780,17 +780,17 @@ evaluation: | Row 2 | 'c' | 'f' | 'z' | 'v3' | NULL | 'v7' | false | +--------+-------------+-------------+-------------+---------------+----------------------+----------------------+---------+ evaluation (internal): -+--------+-----------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+-----------------------------------------------------------------------------------------------------------------+ -| a_col | StringColumn { data: 0x616263, offsets: [0, 1, 2, 3] } | -| b_col | StringColumn { data: 0x646566, offsets: [0, 1, 2, 3] } | -| c_col | StringColumn { data: 0x78797a, offsets: [0, 1, 2, 3] } | -| d_col | NullableColumn { column: StringColumn { data: 0x763176327633, offsets: [0, 2, 4, 6] }, validity: [0b_____111] } | -| e_col | NullableColumn { column: StringColumn { data: 0x76347635, offsets: [0, 2, 4, 4] }, validity: [0b_____011] } | -| f_col | NullableColumn { column: StringColumn { data: 0x76367637, offsets: [0, 2, 2, 4] }, validity: [0b_____101] } | -| Output | Boolean([0b_____001]) | -+--------+-----------------------------------------------------------------------------------------------------------------+ ++--------+-----------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+-----------------------------------------------------------------------------------------------------+ +| a_col | StringColumn { data: Utf8ViewArray[a, b, c] } | +| b_col | StringColumn { data: Utf8ViewArray[d, e, f] } | +| c_col | StringColumn { data: Utf8ViewArray[x, y, z] } | +| d_col | NullableColumn { column: StringColumn { data: Utf8ViewArray[v1, v2, v3] }, validity: [0b_____111] } | +| e_col | NullableColumn { column: StringColumn { data: Utf8ViewArray[v4, v5, ] }, validity: [0b_____011] } | +| f_col | NullableColumn { column: StringColumn { data: Utf8ViewArray[v6, , v7] }, validity: [0b_____101] } | +| Output | Boolean([0b_____001]) | ++--------+-----------------------------------------------------------------------------------------------------+ ast : map_pick({'a':1,'b':2,'c':3}, 'a', 'b') @@ -861,17 +861,17 @@ evaluation: | Row 2 | 'c' | 'f' | 'z' | 'v3' | NULL | 'v7' | {} | +--------+-------------+-------------+-------------+---------------+----------------------+----------------------+--------------------------+ evaluation (internal): -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| a_col | StringColumn { data: 0x616263, offsets: [0, 1, 2, 3] } | -| b_col | StringColumn { data: 0x646566, offsets: [0, 1, 2, 3] } | -| c_col | StringColumn { data: 0x78797a, offsets: [0, 1, 2, 3] } | -| d_col | NullableColumn { column: StringColumn { data: 0x763176327633, offsets: [0, 2, 4, 6] }, validity: [0b_____111] } | -| e_col | NullableColumn { column: StringColumn { data: 0x76347635, offsets: [0, 2, 4, 4] }, validity: [0b_____011] } | -| f_col | NullableColumn { column: StringColumn { data: 0x76367637, offsets: [0, 2, 2, 4] }, validity: [0b_____101] } | -| Output | ArrayColumn { values: Tuple([StringColumn { data: 0x6162, offsets: [0, 1, 2] }, NullableColumn { column: StringColumn { data: 0x76317632, offsets: [0, 2, 4] }, validity: [0b______11] }]), offsets: [0, 1, 2, 2] } | -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| a_col | StringColumn { data: Utf8ViewArray[a, b, c] } | +| b_col | StringColumn { data: Utf8ViewArray[d, e, f] } | +| c_col | StringColumn { data: Utf8ViewArray[x, y, z] } | +| d_col | NullableColumn { column: StringColumn { data: Utf8ViewArray[v1, v2, v3] }, validity: [0b_____111] } | +| e_col | NullableColumn { column: StringColumn { data: Utf8ViewArray[v4, v5, ] }, validity: [0b_____011] } | +| f_col | NullableColumn { column: StringColumn { data: Utf8ViewArray[v6, , v7] }, validity: [0b_____101] } | +| Output | ArrayColumn { values: Tuple([StringColumn { data: Utf8ViewArray[a, b] }, NullableColumn { column: StringColumn { data: Utf8ViewArray[v1, v2] }, validity: [0b______11] }]), offsets: [0, 1, 2, 2] } | ++--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ast : map_insert({}, 'k1', 'v1') @@ -924,17 +924,17 @@ evaluation: | Row 2 | 'c' | 'f' | 'z' | 'v3' | NULL | 'v7' | {'c':'v3', 'f':NULL, 'z':'v7', 'k1':'v10'} | +--------+-------------+-------------+-------------+---------------+----------------------+----------------------+--------------------------------------------+ evaluation (internal): -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| a_col | StringColumn { data: 0x616263, offsets: [0, 1, 2, 3] } | -| b_col | StringColumn { data: 0x646566, offsets: [0, 1, 2, 3] } | -| c_col | StringColumn { data: 0x78797a, offsets: [0, 1, 2, 3] } | -| d_col | NullableColumn { column: StringColumn { data: 0x763176327633, offsets: [0, 2, 4, 6] }, validity: [0b_____111] } | -| e_col | NullableColumn { column: StringColumn { data: 0x76347635, offsets: [0, 2, 4, 4] }, validity: [0b_____011] } | -| f_col | NullableColumn { column: StringColumn { data: 0x76367637, offsets: [0, 2, 2, 4] }, validity: [0b_____101] } | -| Output | ArrayColumn { values: Tuple([StringColumn { data: 0x6164786b316265796b3163667a6b31, offsets: [0, 1, 2, 3, 5, 6, 7, 8, 10, 11, 12, 13, 15] }, NullableColumn { column: StringColumn { data: 0x7631763476367631307632763576313076337637763130, offsets: [0, 2, 4, 6, 9, 11, 13, 13, 16, 18, 18, 20, 23] }, validity: [0b10111111, 0b____1101] }]), offsets: [0, 4, 8, 12] } | -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| a_col | StringColumn { data: Utf8ViewArray[a, b, c] } | +| b_col | StringColumn { data: Utf8ViewArray[d, e, f] } | +| c_col | StringColumn { data: Utf8ViewArray[x, y, z] } | +| d_col | NullableColumn { column: StringColumn { data: Utf8ViewArray[v1, v2, v3] }, validity: [0b_____111] } | +| e_col | NullableColumn { column: StringColumn { data: Utf8ViewArray[v4, v5, ] }, validity: [0b_____011] } | +| f_col | NullableColumn { column: StringColumn { data: Utf8ViewArray[v6, , v7] }, validity: [0b_____101] } | +| Output | ArrayColumn { values: Tuple([StringColumn { data: Utf8ViewArray[a, d, x, k1, b, e, y, k1, c, f, z, k1] }, NullableColumn { column: StringColumn { data: Utf8ViewArray[v1, v4, v6, v10, v2, v5, , v10, v3, , v7, v10] }, validity: [0b10111111, 0b____1101] }]), offsets: [0, 4, 8, 12] } | ++--------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ast : map_insert(map([a_col, b_col, c_col], [d_col, e_col, f_col]), 'a', 'v10', true) @@ -952,17 +952,17 @@ evaluation: | Row 2 | 'c' | 'f' | 'z' | 'v3' | NULL | 'v7' | {'c':'v3', 'f':NULL, 'z':'v7', 'a':'v10'} | +--------+-------------+-------------+-------------+---------------+----------------------+----------------------+-------------------------------------------+ evaluation (internal): -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| a_col | StringColumn { data: 0x616263, offsets: [0, 1, 2, 3] } | -| b_col | StringColumn { data: 0x646566, offsets: [0, 1, 2, 3] } | -| c_col | StringColumn { data: 0x78797a, offsets: [0, 1, 2, 3] } | -| d_col | NullableColumn { column: StringColumn { data: 0x763176327633, offsets: [0, 2, 4, 6] }, validity: [0b_____111] } | -| e_col | NullableColumn { column: StringColumn { data: 0x76347635, offsets: [0, 2, 4, 4] }, validity: [0b_____011] } | -| f_col | NullableColumn { column: StringColumn { data: 0x76367637, offsets: [0, 2, 2, 4] }, validity: [0b_____101] } | -| Output | ArrayColumn { values: Tuple([StringColumn { data: 0x6164786265796163667a61, offsets: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] }, NullableColumn { column: StringColumn { data: 0x763130763476367632763576313076337637763130, offsets: [0, 3, 5, 7, 9, 11, 11, 14, 16, 16, 18, 21] }, validity: [0b11011111, 0b_____110] }]), offsets: [0, 3, 7, 11] } | -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| a_col | StringColumn { data: Utf8ViewArray[a, b, c] } | +| b_col | StringColumn { data: Utf8ViewArray[d, e, f] } | +| c_col | StringColumn { data: Utf8ViewArray[x, y, z] } | +| d_col | NullableColumn { column: StringColumn { data: Utf8ViewArray[v1, v2, v3] }, validity: [0b_____111] } | +| e_col | NullableColumn { column: StringColumn { data: Utf8ViewArray[v4, v5, ] }, validity: [0b_____011] } | +| f_col | NullableColumn { column: StringColumn { data: Utf8ViewArray[v6, , v7] }, validity: [0b_____101] } | +| Output | ArrayColumn { values: Tuple([StringColumn { data: Utf8ViewArray[a, d, x, b, e, y, a, c, f, z, a] }, NullableColumn { column: StringColumn { data: Utf8ViewArray[v10, v4, v6, v2, v5, , v10, v3, , v7, v10] }, validity: [0b11011111, 0b_____110] }]), offsets: [0, 3, 7, 11] } | ++--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ error: diff --git a/src/query/functions/tests/it/scalars/testdata/regexp.txt b/src/query/functions/tests/it/scalars/testdata/regexp.txt index beca019c9702..2481b476c7f5 100644 --- a/src/query/functions/tests/it/scalars/testdata/regexp.txt +++ b/src/query/functions/tests/it/scalars/testdata/regexp.txt @@ -56,13 +56,13 @@ evaluation: | Row 2 | '' | '' | 0 | +--------+---------------------------+--------------+---------+ evaluation (internal): -+--------+-------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+-------------------------------------------------------------------------------------------------------------------------+ -| source | StringColumn { data: 0x646f672063617420646f676161206161612061616161206161206161612061616161, offsets: [0, 11, 34, 34] } | -| pat | StringColumn { data: 0x646f67617b327d, offsets: [0, 3, 7, 7] } | -| Output | UInt64([1, 1, 0]) | -+--------+-------------------------------------------------------------------------------------------------------------------------+ ++--------+------------------------------------------------------------------------------+ +| Column | Data | ++--------+------------------------------------------------------------------------------+ +| source | StringColumn { data: Utf8ViewArray[dog cat dog, aa aaa aaaa aa aaa aaaa, ] } | +| pat | StringColumn { data: Utf8ViewArray[dog, a{2}, ] } | +| Output | UInt64([1, 1, 0]) | ++--------+------------------------------------------------------------------------------+ ast : regexp_instr(source, pat, pos) @@ -79,14 +79,14 @@ evaluation: | Row 2 | '' | '' | 1 | 0 | +--------+---------------------------+--------------+---------+---------+ evaluation (internal): -+--------+-------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+-------------------------------------------------------------------------------------------------------------------------+ -| source | StringColumn { data: 0x646f672063617420646f676161206161612061616161206161206161612061616161, offsets: [0, 11, 34, 34] } | -| pat | StringColumn { data: 0x646f67617b327d, offsets: [0, 3, 7, 7] } | -| pos | Int64([1, 2, 1]) | -| Output | UInt64([1, 4, 0]) | -+--------+-------------------------------------------------------------------------------------------------------------------------+ ++--------+------------------------------------------------------------------------------+ +| Column | Data | ++--------+------------------------------------------------------------------------------+ +| source | StringColumn { data: Utf8ViewArray[dog cat dog, aa aaa aaaa aa aaa aaaa, ] } | +| pat | StringColumn { data: Utf8ViewArray[dog, a{2}, ] } | +| pos | Int64([1, 2, 1]) | +| Output | UInt64([1, 4, 0]) | ++--------+------------------------------------------------------------------------------+ ast : regexp_instr(source, pat, pos, occur) @@ -103,15 +103,15 @@ evaluation: | Row 2 | 'aa aa aa aaaa aaaa aaaa' | 'a{4}' | 9 | 2 | 15 | +--------+---------------------------------------------+------------------+---------+---------+---------+ evaluation (internal): -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| source | StringColumn { data: 0x646f672063617420646f6761612061616120616161612061612061616120616161616161206161206161206161616120616161612061616161, offsets: [0, 11, 34, 57] } | -| pat | StringColumn { data: 0x646f67617b327d617b347d, offsets: [0, 3, 7, 11] } | -| pos | Int64([1, 1, 9]) | -| occur | Int64([2, 3, 2]) | -| Output | UInt64([9, 8, 15]) | -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+-----------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+-----------------------------------------------------------------------------------------------------+ +| source | StringColumn { data: Utf8ViewArray[dog cat dog, aa aaa aaaa aa aaa aaaa, aa aa aa aaaa aaaa aaaa] } | +| pat | StringColumn { data: Utf8ViewArray[dog, a{2}, a{4}] } | +| pos | Int64([1, 1, 9]) | +| occur | Int64([2, 3, 2]) | +| Output | UInt64([9, 8, 15]) | ++--------+-----------------------------------------------------------------------------------------------------+ ast : regexp_instr(source, pat, pos, occur, ro) @@ -129,16 +129,16 @@ evaluation: | Row 2 | 'aa aa aa aaaa aaaa aaaa' | 'a{4}' | 1 | 2 | 1 | 19 | +--------+---------------------------------------------+------------------+---------+---------+---------+---------+ evaluation (internal): -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| source | StringColumn { data: 0x646f672063617420646f6761612061616120616161612061612061616120616161616161206161206161206161616120616161612061616161, offsets: [0, 11, 34, 57] } | -| pat | StringColumn { data: 0x646f67617b327d617b347d, offsets: [0, 3, 7, 11] } | -| pos | Int64([1, 2, 1]) | -| occur | Int64([2, 2, 2]) | -| ro | Int64([0, 1, 1]) | -| Output | UInt64([9, 10, 19]) | -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+-----------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+-----------------------------------------------------------------------------------------------------+ +| source | StringColumn { data: Utf8ViewArray[dog cat dog, aa aaa aaaa aa aaa aaaa, aa aa aa aaaa aaaa aaaa] } | +| pat | StringColumn { data: Utf8ViewArray[dog, a{2}, a{4}] } | +| pos | Int64([1, 2, 1]) | +| occur | Int64([2, 2, 2]) | +| ro | Int64([0, 1, 1]) | +| Output | UInt64([9, 10, 19]) | ++--------+-----------------------------------------------------------------------------------------------------+ ast : regexp_instr(source, pat, pos, occur, ro, mt) @@ -156,17 +156,17 @@ evaluation: | Row 2 | 'aa aa aa aaaa aaaa aaaa' | 'A{4}' | 1 | 2 | 1 | 'i' | 19 | +--------+---------------------------------------------+------------------+---------+---------+---------+-------------+---------+ evaluation (internal): -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| source | StringColumn { data: 0x646f672063617420646f6761612061616120616161612061612061616120616161616161206161206161206161616120616161612061616161, offsets: [0, 11, 34, 57] } | -| pat | StringColumn { data: 0x646f67417b327d417b347d, offsets: [0, 3, 7, 11] } | -| pos | Int64([1, 2, 1]) | -| occur | Int64([2, 2, 2]) | -| ro | Int64([0, 1, 1]) | -| mt | StringColumn { data: 0x696369, offsets: [0, 1, 2, 3] } | -| Output | UInt64([9, 0, 19]) | -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+-----------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+-----------------------------------------------------------------------------------------------------+ +| source | StringColumn { data: Utf8ViewArray[dog cat dog, aa aaa aaaa aa aaa aaaa, aa aa aa aaaa aaaa aaaa] } | +| pat | StringColumn { data: Utf8ViewArray[dog, A{2}, A{4}] } | +| pos | Int64([1, 2, 1]) | +| occur | Int64([2, 2, 2]) | +| ro | Int64([0, 1, 1]) | +| mt | StringColumn { data: Utf8ViewArray[i, c, i] } | +| Output | UInt64([9, 0, 19]) | ++--------+-----------------------------------------------------------------------------------------------------+ ast : regexp_instr(source, pat, pos, occur, ro) @@ -184,16 +184,16 @@ evaluation: | Row 3 | 'aa aa aa aaaa aaaa aaaa' | 'A{4}' | 1 | 1 | 1 | 14 | +--------+-------------------------------+-----------------------+---------+---------+---------+-------------+ evaluation (internal): -+--------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| source | NullableColumn { column: StringColumn { data: 0x646f672063617420646f6761612061616120616161612061612061616120616161616161206161206161206161616120616161612061616161, offsets: [0, 11, 34, 34, 57] }, validity: [0b____1011] } | -| pat | NullableColumn { column: StringColumn { data: 0x646f67417b347d, offsets: [0, 3, 3, 3, 7] }, validity: [0b____1001] } | -| pos | Int64([1, 2, 1, 1]) | -| occur | Int64([2, 2, 2, 1]) | -| ro | Int64([0, 1, 1, 1]) | -| Output | NullableColumn { column: UInt64([9, 0, 0, 14]), validity: [0b____1001] } | -+--------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+----------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+----------------------------------------------------------------------------------------------------------------------------------------------------------+ +| source | NullableColumn { column: StringColumn { data: Utf8ViewArray[dog cat dog, aa aaa aaaa aa aaa aaaa, , aa aa aa aaaa aaaa aaaa] }, validity: [0b____1011] } | +| pat | NullableColumn { column: StringColumn { data: Utf8ViewArray[dog, , , A{4}] }, validity: [0b____1001] } | +| pos | Int64([1, 2, 1, 1]) | +| occur | Int64([2, 2, 2, 1]) | +| ro | Int64([0, 1, 1, 1]) | +| Output | NullableColumn { column: UInt64([9, 0, 0, 14]), validity: [0b____1001] } | ++--------+----------------------------------------------------------------------------------------------------------------------------------------------------------+ ast : regexp_instr(source, pat, pos, occur, ro, mt) @@ -211,17 +211,17 @@ evaluation: | Row 3 | 'aa aa aa aaaa aaaa aaaa' | 'A{4}' | 1 | 1 | 1 | 'i' | 14 | +--------+-------------------------------+-----------------------+---------+---------+---------+-------------+-------------+ evaluation (internal): -+--------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| source | NullableColumn { column: StringColumn { data: 0x646f672063617420646f6761612061616120616161612061612061616120616161616161206161206161206161616120616161612061616161, offsets: [0, 11, 34, 34, 57] }, validity: [0b____1011] } | -| pat | NullableColumn { column: StringColumn { data: 0x646f67417b347d, offsets: [0, 3, 3, 3, 7] }, validity: [0b____1001] } | -| pos | Int64([1, 2, 1, 1]) | -| occur | Int64([2, 2, 2, 1]) | -| ro | Int64([0, 1, 1, 1]) | -| mt | StringColumn { data: 0x69636969, offsets: [0, 1, 2, 3, 4] } | -| Output | NullableColumn { column: UInt64([9, 0, 0, 14]), validity: [0b____1001] } | -+--------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+----------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+----------------------------------------------------------------------------------------------------------------------------------------------------------+ +| source | NullableColumn { column: StringColumn { data: Utf8ViewArray[dog cat dog, aa aaa aaaa aa aaa aaaa, , aa aa aa aaaa aaaa aaaa] }, validity: [0b____1011] } | +| pat | NullableColumn { column: StringColumn { data: Utf8ViewArray[dog, , , A{4}] }, validity: [0b____1001] } | +| pos | Int64([1, 2, 1, 1]) | +| occur | Int64([2, 2, 2, 1]) | +| ro | Int64([0, 1, 1, 1]) | +| mt | StringColumn { data: Utf8ViewArray[i, c, i, i] } | +| Output | NullableColumn { column: UInt64([9, 0, 0, 14]), validity: [0b____1001] } | ++--------+----------------------------------------------------------------------------------------------------------------------------------------------------------+ ast : regexp_instr(source, pat, pos, occur, ro) @@ -240,16 +240,16 @@ evaluation: | Row 3 | '周 周周 周周周 周周周周' | '周+' | 5 | 1 | 1 | 9 | +--------+---------------------------------------------------------+-----------------+---------+---------+---------+---------+ evaluation (internal): -+--------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| source | StringColumn { data: 0xe591a820e591a8e591a820e591a8e591a8e591a820e591a8e591a8e591a8e591a8e591a820e591a8e591a820e591a8e591a8e591a820e591a8e591a8e591a8e591a8e591a820e591a8e591a820e591a8e591a8e591a820e591a8e591a8e591a8e591a8e591a820e591a8e591a820e591a8e591a8e591a820e591a8e591a8e591a8e591a8, offsets: [0, 33, 66, 99, 132] } | -| pat | StringColumn { data: 0xe591a82be591a82be591a82be591a82b, offsets: [0, 4, 8, 12, 16] } | -| pos | Int64([1, 2, 3, 5]) | -| occur | Int64([2, 2, 3, 1]) | -| ro | Int64([0, 1, 1, 1]) | -| Output | UInt64([3, 9, 14, 9]) | -+--------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+------------------------------------------------------------------------------------------------------------------------------------------+ +| source | StringColumn { data: Utf8ViewArray[周 周周 周周周 周周周周, 周 周周 周周周 周周周周, 周 周周 周周周 周周周周, 周 周周 周周周 周周周周] } | +| pat | StringColumn { data: Utf8ViewArray[周+, 周+, 周+, 周+] } | +| pos | Int64([1, 2, 3, 5]) | +| occur | Int64([2, 2, 3, 1]) | +| ro | Int64([0, 1, 1, 1]) | +| Output | UInt64([3, 9, 14, 9]) | ++--------+------------------------------------------------------------------------------------------------------------------------------------------+ error: @@ -349,13 +349,15 @@ evaluation: | Row 5 | '' | '' | true | +--------+----------------------+-------------------------+---------+ evaluation (internal): -+--------+--------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+--------------------------------------------------------------------------------------------------------------+ -| source | StringColumn { data: 0x6162636162644162656e65772a0a2a6c696e65666f0a666f, offsets: [0, 3, 6, 9, 19, 24, 24] } | -| pat | StringColumn { data: 0x5e6141626162656e65775c2a2e5c2a6c696e655e666f24, offsets: [0, 2, 4, 7, 19, 23, 23] } | -| Output | Boolean([0b__100111]) | -+--------+--------------------------------------------------------------------------------------------------------------+ ++--------+-------------------------------------------------------------------------+ +| Column | Data | ++--------+-------------------------------------------------------------------------+ +| source | StringColumn { data: Utf8ViewArray[abc, abd, Abe, new* | +| | *line, fo | +| | fo, ] } | +| pat | StringColumn { data: Utf8ViewArray[^a, Ab, abe, new\*.\*line, ^fo$, ] } | +| Output | Boolean([0b__100111]) | ++--------+-------------------------------------------------------------------------+ ast : regexp_like(source, pat, mt) @@ -377,14 +379,16 @@ evaluation: | Row 5 | '' | '' | 'c' | true | +--------+----------------------+-------------------------+------------+---------+ evaluation (internal): -+--------+--------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+--------------------------------------------------------------------------------------------------------------+ -| source | StringColumn { data: 0x6162636162644162656e65772a0a2a6c696e65666f0a666f, offsets: [0, 3, 6, 9, 19, 24, 24] } | -| pat | StringColumn { data: 0x5e6141626162656e65775c2a2e5c2a6c696e655e666f24, offsets: [0, 2, 4, 7, 19, 23, 23] } | -| mt | StringColumn { data: 0x63696e6d63, offsets: [0, 0, 1, 2, 3, 4, 5] } | -| Output | Boolean([0b__111101]) | -+--------+--------------------------------------------------------------------------------------------------------------+ ++--------+-------------------------------------------------------------------------+ +| Column | Data | ++--------+-------------------------------------------------------------------------+ +| source | StringColumn { data: Utf8ViewArray[abc, abd, Abe, new* | +| | *line, fo | +| | fo, ] } | +| pat | StringColumn { data: Utf8ViewArray[^a, Ab, abe, new\*.\*line, ^fo$, ] } | +| mt | StringColumn { data: Utf8ViewArray[, c, i, n, m, c] } | +| Output | Boolean([0b__111101]) | ++--------+-------------------------------------------------------------------------+ ast : regexp_like(source, pat, mt) @@ -402,14 +406,14 @@ evaluation: | Row 3 | 'abc' | 'abc' | NULL | NULL | +--------+-----------------------+-----------------------+---------------------+--------------+ evaluation (internal): -+--------+--------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+--------------------------------------------------------------------------------------------------------------------------+ -| source | NullableColumn { column: StringColumn { data: 0x616263616263616263, offsets: [0, 3, 6, 6, 9] }, validity: [0b____1011] } | -| pat | NullableColumn { column: StringColumn { data: 0x616263616263, offsets: [0, 3, 3, 3, 6] }, validity: [0b____1001] } | -| mt | NullableColumn { column: StringColumn { data: 0x6969, offsets: [0, 0, 1, 2, 2] }, validity: [0b____0111] } | -| Output | NullableColumn { column: Boolean([0b____1101]), validity: [0b____0001] } | -+--------+--------------------------------------------------------------------------------------------------------------------------+ ++--------+----------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+----------------------------------------------------------------------------------------------------------+ +| source | NullableColumn { column: StringColumn { data: Utf8ViewArray[abc, abc, , abc] }, validity: [0b____1011] } | +| pat | NullableColumn { column: StringColumn { data: Utf8ViewArray[abc, , , abc] }, validity: [0b____1001] } | +| mt | NullableColumn { column: StringColumn { data: Utf8ViewArray[, i, i, ] }, validity: [0b____0111] } | +| Output | NullableColumn { column: Boolean([0b____1101]), validity: [0b____0001] } | ++--------+----------------------------------------------------------------------------------------------------------+ error: @@ -516,14 +520,14 @@ evaluation: | Row 3 | '' | 'b' | 'X' | '' | +--------+----------------+------------+-------------+---------+ evaluation (internal): -+--------+--------------------------------------------------------------------------------------+ -| Column | Data | -+--------+--------------------------------------------------------------------------------------+ -| source | StringColumn { data: 0x612062206361206220636120622063, offsets: [0, 5, 10, 15, 15] } | -| pat | StringColumn { data: 0x627862, offsets: [0, 1, 2, 2, 3] } | -| repl | StringColumn { data: 0x58585858, offsets: [0, 1, 2, 3, 4] } | -| Output | StringColumn { data: 0x612058206361206220636120622063, offsets: [0, 5, 10, 15, 15] } | -+--------+--------------------------------------------------------------------------------------+ ++--------+-------------------------------------------------------------+ +| Column | Data | ++--------+-------------------------------------------------------------+ +| source | StringColumn { data: Utf8ViewArray[a b c, a b c, a b c, ] } | +| pat | StringColumn { data: Utf8ViewArray[b, x, , b] } | +| repl | StringColumn { data: Utf8ViewArray[X, X, X, X] } | +| Output | StringColumn { data: Utf8ViewArray[a X c, a b c, a b c, ] } | ++--------+-------------------------------------------------------------+ ast : regexp_replace(source, pat, repl, pos) @@ -542,15 +546,15 @@ evaluation: | Row 3 | 'abc def ghi' | '[a-z]+' | 'X' | 12 | 'abc def ghi' | +--------+---------------------------------+-----------------------+-------------+----------+---------------+ evaluation (internal): -+--------+-------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+-------------------------------------------------------------------------------------------------------------------------------------------------+ -| source | StringColumn { data: 0x6162632064656620676869616263206465662067686961626320646566206768696162632064656620676869, offsets: [0, 11, 22, 33, 44] } | -| pat | StringColumn { data: 0x5b612d7a5d2b5b612d7a5d2b5b612d7a5d2b5b612d7a5d2b, offsets: [0, 6, 12, 18, 24] } | -| repl | StringColumn { data: 0x58585858, offsets: [0, 1, 2, 3, 4] } | -| pos | Int64([1, 4, 8, 12]) | -| Output | StringColumn { data: 0x5820582058616263205820586162632064656620586162632064656620676869, offsets: [0, 5, 12, 21, 32] } | -+--------+-------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+------------------------------------------------------------------------------------------+ +| source | StringColumn { data: Utf8ViewArray[abc def ghi, abc def ghi, abc def ghi, abc def ghi] } | +| pat | StringColumn { data: Utf8ViewArray[[a-z]+, [a-z]+, [a-z]+, [a-z]+] } | +| repl | StringColumn { data: Utf8ViewArray[X, X, X, X] } | +| pos | Int64([1, 4, 8, 12]) | +| Output | StringColumn { data: Utf8ViewArray[X X X, abc X X, abc def X, abc def ghi] } | ++--------+------------------------------------------------------------------------------------------+ ast : regexp_replace(source, pat, repl, pos, occur) @@ -569,16 +573,16 @@ evaluation: | Row 3 | 'abc def ghi' | '[a-z]+' | 'X' | 4 | 3 | 'abc def ghi' | +--------+---------------------------------+-----------------------+-------------+---------+---------+---------------+ evaluation (internal): -+--------+-------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+-------------------------------------------------------------------------------------------------------------------------------------------------+ -| source | StringColumn { data: 0x6162632064656620676869616263206465662067686961626320646566206768696162632064656620676869, offsets: [0, 11, 22, 33, 44] } | -| pat | StringColumn { data: 0x5b612d7a5d2b5b612d7a5d2b5b612d7a5d2b5b612d7a5d2b, offsets: [0, 6, 12, 18, 24] } | -| repl | StringColumn { data: 0x58585858, offsets: [0, 1, 2, 3, 4] } | -| pos | Int64([1, 1, 4, 4]) | -| occur | Int64([0, 1, 2, 3]) | -| Output | StringColumn { data: 0x58205820585820646566206768696162632064656620586162632064656620676869, offsets: [0, 5, 14, 23, 34] } | -+--------+-------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+------------------------------------------------------------------------------------------+ +| source | StringColumn { data: Utf8ViewArray[abc def ghi, abc def ghi, abc def ghi, abc def ghi] } | +| pat | StringColumn { data: Utf8ViewArray[[a-z]+, [a-z]+, [a-z]+, [a-z]+] } | +| repl | StringColumn { data: Utf8ViewArray[X, X, X, X] } | +| pos | Int64([1, 1, 4, 4]) | +| occur | Int64([0, 1, 2, 3]) | +| Output | StringColumn { data: Utf8ViewArray[X X X, X def ghi, abc def X, abc def ghi] } | ++--------+------------------------------------------------------------------------------------------+ ast : regexp_replace(source, pat, repl, pos, occur, mt) @@ -596,17 +600,17 @@ evaluation: | Row 2 | 'abc DEF ghi' | '[a-z]+' | 'X' | 4 | 1 | 'i' | 'abc X ghi' | +--------+---------------------------------+-----------------------+-------------+---------+---------+------------+-------------+ evaluation (internal): -+--------+-----------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+-----------------------------------------------------------------------------------------------------------------------+ -| source | StringColumn { data: 0x616263206465662067686961626320444546206768696162632044454620676869, offsets: [0, 11, 22, 33] } | -| pat | StringColumn { data: 0x5b612d7a5d2b5b612d7a5d2b5b612d7a5d2b, offsets: [0, 6, 12, 18] } | -| repl | StringColumn { data: 0x585858, offsets: [0, 1, 2, 3] } | -| pos | Int64([1, 1, 4]) | -| occur | Int64([0, 2, 1]) | -| mt | StringColumn { data: 0x6369, offsets: [0, 0, 1, 2] } | -| Output | StringColumn { data: 0x5820582058616263204445462058616263205820676869, offsets: [0, 5, 14, 23] } | -+--------+-----------------------------------------------------------------------------------------------------------------------+ ++--------+-----------------------------------------------------------------------------+ +| Column | Data | ++--------+-----------------------------------------------------------------------------+ +| source | StringColumn { data: Utf8ViewArray[abc def ghi, abc DEF ghi, abc DEF ghi] } | +| pat | StringColumn { data: Utf8ViewArray[[a-z]+, [a-z]+, [a-z]+] } | +| repl | StringColumn { data: Utf8ViewArray[X, X, X] } | +| pos | Int64([1, 1, 4]) | +| occur | Int64([0, 2, 1]) | +| mt | StringColumn { data: Utf8ViewArray[, c, i] } | +| Output | StringColumn { data: Utf8ViewArray[X X X, abc DEF X, abc X ghi] } | ++--------+-----------------------------------------------------------------------------+ ast : regexp_replace(source, pat, repl, pos, occur) @@ -625,16 +629,16 @@ evaluation: | Row 3 | 'abc DEF ghi' | '[a-z]+' | 'X' | 4 | 1 | 'abc X ghi' | +--------+-------------------------------+--------------------------+-------------+---------+---------+-------------+ evaluation (internal): -+--------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| source | NullableColumn { column: StringColumn { data: 0x616263206465662067686961626320444546206768696162632044454620676869, offsets: [0, 11, 22, 22, 33] }, validity: [0b____1011] } | -| pat | NullableColumn { column: StringColumn { data: 0x5b612d7a5d2b5b612d7a5d2b, offsets: [0, 6, 6, 6, 12] }, validity: [0b____1001] } | -| repl | StringColumn { data: 0x58585858, offsets: [0, 1, 2, 3, 4] } | -| pos | Int64([1, 1, 4, 4]) | -| occur | Int64([0, 2, 1, 1]) | -| Output | NullableColumn { column: StringColumn { data: 0x58205820586162632044454620676869616263205820676869, offsets: [0, 5, 16, 16, 25] }, validity: [0b____1001] } | -+--------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+----------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+----------------------------------------------------------------------------------------------------------------------------------+ +| source | NullableColumn { column: StringColumn { data: Utf8ViewArray[abc def ghi, abc DEF ghi, , abc DEF ghi] }, validity: [0b____1011] } | +| pat | NullableColumn { column: StringColumn { data: Utf8ViewArray[[a-z]+, , , [a-z]+] }, validity: [0b____1001] } | +| repl | StringColumn { data: Utf8ViewArray[X, X, X, X] } | +| pos | Int64([1, 1, 4, 4]) | +| occur | Int64([0, 2, 1, 1]) | +| Output | NullableColumn { column: StringColumn { data: Utf8ViewArray[X X X, abc DEF ghi, , abc X ghi] }, validity: [0b____1001] } | ++--------+----------------------------------------------------------------------------------------------------------------------------------+ ast : regexp_replace(source, pat, repl, pos, occur, mt) @@ -653,17 +657,17 @@ evaluation: | Row 3 | 'abc DEF ghi' | '[a-z]+' | 'X' | 4 | 1 | 'i' | 'abc X ghi' | +--------+-------------------------------+--------------------------+-------------+---------+---------+------------+-------------+ evaluation (internal): -+--------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| source | NullableColumn { column: StringColumn { data: 0x616263206465662067686961626320444546206768696162632044454620676869, offsets: [0, 11, 22, 22, 33] }, validity: [0b____1011] } | -| pat | NullableColumn { column: StringColumn { data: 0x5b612d7a5d2b5b612d7a5d2b, offsets: [0, 6, 6, 6, 12] }, validity: [0b____1001] } | -| repl | StringColumn { data: 0x58585858, offsets: [0, 1, 2, 3, 4] } | -| pos | Int64([1, 1, 4, 4]) | -| occur | Int64([0, 2, 1, 1]) | -| mt | StringColumn { data: 0x636969, offsets: [0, 0, 1, 2, 3] } | -| Output | NullableColumn { column: StringColumn { data: 0x58205820586162632044454620676869616263205820676869, offsets: [0, 5, 16, 16, 25] }, validity: [0b____1001] } | -+--------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+----------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+----------------------------------------------------------------------------------------------------------------------------------+ +| source | NullableColumn { column: StringColumn { data: Utf8ViewArray[abc def ghi, abc DEF ghi, , abc DEF ghi] }, validity: [0b____1011] } | +| pat | NullableColumn { column: StringColumn { data: Utf8ViewArray[[a-z]+, , , [a-z]+] }, validity: [0b____1001] } | +| repl | StringColumn { data: Utf8ViewArray[X, X, X, X] } | +| pos | Int64([1, 1, 4, 4]) | +| occur | Int64([0, 2, 1, 1]) | +| mt | StringColumn { data: Utf8ViewArray[, c, i, i] } | +| Output | NullableColumn { column: StringColumn { data: Utf8ViewArray[X X X, abc DEF ghi, , abc X ghi] }, validity: [0b____1001] } | ++--------+----------------------------------------------------------------------------------------------------------------------------------+ ast : regexp_replace(source, pat, repl, pos, occur) @@ -682,16 +686,16 @@ evaluation: | Row 3 | '周 周周 周周周 周周周周' | '周+' | '唐' | 5 | 1 | '周 周周 唐 周周周周' | +--------+---------------------------------------------------------+-----------------+---------------+---------+---------+-------------------------+ evaluation (internal): -+--------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| source | StringColumn { data: 0xe591a820e591a8e591a820e591a8e591a8e591a820e591a8e591a8e591a8e591a8e591a820e591a8e591a820e591a8e591a8e591a820e591a8e591a8e591a8e591a8e591a820e591a8e591a820e591a8e591a8e591a820e591a8e591a8e591a8e591a8e591a820e591a8e591a820e591a8e591a8e591a820e591a8e591a8e591a8e591a8, offsets: [0, 33, 66, 99, 132] } | -| pat | StringColumn { data: 0xe591a82be591a82be591a82be591a82b, offsets: [0, 4, 8, 12, 16] } | -| repl | StringColumn { data: 0xe59490e59490e59490e59490, offsets: [0, 3, 6, 9, 12] } | -| pos | Int64([1, 2, 3, 5]) | -| occur | Int64([0, 1, 3, 1]) | -| Output | StringColumn { data: 0xe5949020e5949020e5949020e59490e591a820e5949020e591a8e591a8e591a820e591a8e591a8e591a8e591a8e591a820e591a8e591a820e591a8e591a8e591a820e59490e591a820e591a8e591a820e5949020e591a8e591a8e591a8e591a8, offsets: [0, 15, 45, 69, 96] } | -+--------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+------------------------------------------------------------------------------------------------------------------------------------------+ +| source | StringColumn { data: Utf8ViewArray[周 周周 周周周 周周周周, 周 周周 周周周 周周周周, 周 周周 周周周 周周周周, 周 周周 周周周 周周周周] } | +| pat | StringColumn { data: Utf8ViewArray[周+, 周+, 周+, 周+] } | +| repl | StringColumn { data: Utf8ViewArray[唐, 唐, 唐, 唐] } | +| pos | Int64([1, 2, 3, 5]) | +| occur | Int64([0, 1, 3, 1]) | +| Output | StringColumn { data: Utf8ViewArray[唐 唐 唐 唐, 周 唐 周周周 周周周周, 周 周周 周周周 唐, 周 周周 唐 周周周周] } | ++--------+------------------------------------------------------------------------------------------------------------------------------------------+ error: @@ -786,13 +790,13 @@ evaluation: | Row 2 | '' | '' | NULL | +--------+----------------------+--------------+-------------+ evaluation (internal): -+--------+-----------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+-----------------------------------------------------------------------------------------------------------+ -| source | StringColumn { data: 0x61626320646566206768696162632064656620676869, offsets: [0, 11, 22, 22] } | -| pat | StringColumn { data: 0x5b612d7a5d2b787878, offsets: [0, 6, 9, 9] } | -| Output | NullableColumn { column: StringColumn { data: 0x616263, offsets: [0, 3, 3, 3] }, validity: [0b_____001] } | -+--------+-----------------------------------------------------------------------------------------------------------+ ++--------+--------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+--------------------------------------------------------------------------------------------------+ +| source | StringColumn { data: Utf8ViewArray[abc def ghi, abc def ghi, ] } | +| pat | StringColumn { data: Utf8ViewArray[[a-z]+, xxx, ] } | +| Output | NullableColumn { column: StringColumn { data: Utf8ViewArray[abc, , ] }, validity: [0b_____001] } | ++--------+--------------------------------------------------------------------------------------------------+ ast : regexp_substr(source, pat, pos) @@ -810,14 +814,14 @@ evaluation: | Row 2 | 'abc def ghi' | '[a-z]+' | 12 | NULL | +--------+---------------------------------+-----------------------+----------+-------------+ evaluation (internal): -+--------+-----------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+-----------------------------------------------------------------------------------------------------------------------+ -| source | StringColumn { data: 0x616263206465662067686961626320646566206768696162632064656620676869, offsets: [0, 11, 22, 33] } | -| pat | StringColumn { data: 0x5b612d7a5d2b5b612d7a5d2b5b612d7a5d2b, offsets: [0, 6, 12, 18] } | -| pos | Int64([1, 4, 12]) | -| Output | NullableColumn { column: StringColumn { data: 0x616263646566, offsets: [0, 3, 6, 6] }, validity: [0b_____011] } | -+--------+-----------------------------------------------------------------------------------------------------------------------+ ++--------+-----------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+-----------------------------------------------------------------------------------------------------+ +| source | StringColumn { data: Utf8ViewArray[abc def ghi, abc def ghi, abc def ghi] } | +| pat | StringColumn { data: Utf8ViewArray[[a-z]+, [a-z]+, [a-z]+] } | +| pos | Int64([1, 4, 12]) | +| Output | NullableColumn { column: StringColumn { data: Utf8ViewArray[abc, def, ] }, validity: [0b_____011] } | ++--------+-----------------------------------------------------------------------------------------------------+ ast : regexp_substr(source, pat, pos, occur) @@ -835,15 +839,15 @@ evaluation: | Row 2 | 'abc def ghi' | '[a-z]+' | 12 | 3 | NULL | +--------+---------------------------------+-----------------------+----------+---------+-------------+ evaluation (internal): -+--------+-----------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+-----------------------------------------------------------------------------------------------------------------------+ -| source | StringColumn { data: 0x616263206465662067686961626320646566206768696162632064656620676869, offsets: [0, 11, 22, 33] } | -| pat | StringColumn { data: 0x5b612d7a5d2b5b612d7a5d2b5b612d7a5d2b, offsets: [0, 6, 12, 18] } | -| pos | Int64([1, 4, 12]) | -| occur | Int64([3, 2, 3]) | -| Output | NullableColumn { column: StringColumn { data: 0x676869676869, offsets: [0, 3, 6, 6] }, validity: [0b_____011] } | -+--------+-----------------------------------------------------------------------------------------------------------------------+ ++--------+-----------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+-----------------------------------------------------------------------------------------------------+ +| source | StringColumn { data: Utf8ViewArray[abc def ghi, abc def ghi, abc def ghi] } | +| pat | StringColumn { data: Utf8ViewArray[[a-z]+, [a-z]+, [a-z]+] } | +| pos | Int64([1, 4, 12]) | +| occur | Int64([3, 2, 3]) | +| Output | NullableColumn { column: StringColumn { data: Utf8ViewArray[ghi, ghi, ] }, validity: [0b_____011] } | ++--------+-----------------------------------------------------------------------------------------------------+ ast : regexp_substr(source, pat, pos, occur, mt) @@ -861,16 +865,16 @@ evaluation: | Row 2 | 'abc DEF ghi' | '[a-z]+' | 12 | 3 | 'i' | NULL | +--------+---------------------------------+-----------------------+----------+---------+-------------+-------------+ evaluation (internal): -+--------+-----------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+-----------------------------------------------------------------------------------------------------------------------+ -| source | StringColumn { data: 0x414243206465662067686961626320646566204748496162632044454620676869, offsets: [0, 11, 22, 33] } | -| pat | StringColumn { data: 0x5b612d7a5d2b5b612d7a5d2b5b612d7a5d2b, offsets: [0, 6, 12, 18] } | -| pos | Int64([1, 4, 12]) | -| occur | Int64([3, 2, 3]) | -| mt | StringColumn { data: 0x636969, offsets: [0, 1, 2, 3] } | -| Output | NullableColumn { column: StringColumn { data: 0x474849, offsets: [0, 0, 3, 3] }, validity: [0b_____010] } | -+--------+-----------------------------------------------------------------------------------------------------------------------+ ++--------+--------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+--------------------------------------------------------------------------------------------------+ +| source | StringColumn { data: Utf8ViewArray[ABC def ghi, abc def GHI, abc DEF ghi] } | +| pat | StringColumn { data: Utf8ViewArray[[a-z]+, [a-z]+, [a-z]+] } | +| pos | Int64([1, 4, 12]) | +| occur | Int64([3, 2, 3]) | +| mt | StringColumn { data: Utf8ViewArray[c, i, i] } | +| Output | NullableColumn { column: StringColumn { data: Utf8ViewArray[, GHI, ] }, validity: [0b_____010] } | ++--------+--------------------------------------------------------------------------------------------------+ ast : regexp_substr(source, pat, pos, occur, mt) @@ -888,16 +892,16 @@ evaluation: | Row 3 | 'abc DEF ghi' | '[a-z]+' | 4 | 1 | 'i' | 'DEF' | +--------+-------------------------------+--------------------------+---------+---------+------------+-------------+ evaluation (internal): -+--------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| source | NullableColumn { column: StringColumn { data: 0x616263206465662067686961626320444546206768696162632044454620676869, offsets: [0, 11, 22, 22, 33] }, validity: [0b____1011] } | -| pat | NullableColumn { column: StringColumn { data: 0x5b612d7a5d2b5b612d7a5d2b, offsets: [0, 6, 6, 6, 12] }, validity: [0b____1001] } | -| pos | Int64([1, 1, 4, 4]) | -| occur | Int64([1, 2, 1, 1]) | -| mt | StringColumn { data: 0x636969, offsets: [0, 0, 1, 2, 3] } | -| Output | NullableColumn { column: StringColumn { data: 0x616263444546, offsets: [0, 3, 3, 3, 6] }, validity: [0b____1001] } | -+--------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+----------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+----------------------------------------------------------------------------------------------------------------------------------+ +| source | NullableColumn { column: StringColumn { data: Utf8ViewArray[abc def ghi, abc DEF ghi, , abc DEF ghi] }, validity: [0b____1011] } | +| pat | NullableColumn { column: StringColumn { data: Utf8ViewArray[[a-z]+, , , [a-z]+] }, validity: [0b____1001] } | +| pos | Int64([1, 1, 4, 4]) | +| occur | Int64([1, 2, 1, 1]) | +| mt | StringColumn { data: Utf8ViewArray[, c, i, i] } | +| Output | NullableColumn { column: StringColumn { data: Utf8ViewArray[abc, , , DEF] }, validity: [0b____1001] } | ++--------+----------------------------------------------------------------------------------------------------------------------------------+ ast : regexp_substr(source, pat, pos, occur) @@ -915,15 +919,15 @@ evaluation: | Row 2 | '周 周周 周周周 周周周周' | '周+' | 14 | 1 | NULL | +--------+---------------------------------------------------------+-----------------+----------+---------+-------------+ evaluation (internal): -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| source | StringColumn { data: 0xe591a820e591a8e591a820e591a8e591a8e591a820e591a8e591a8e591a8e591a8e591a820e591a8e591a820e591a8e591a8e591a820e591a8e591a8e591a8e591a8e591a820e591a8e591a820e591a8e591a8e591a820e591a8e591a8e591a8e591a8, offsets: [0, 33, 66, 99] } | -| pat | StringColumn { data: 0xe591a82be591a82be591a82b, offsets: [0, 4, 8, 12] } | -| pos | Int64([1, 2, 14]) | -| occur | Int64([1, 2, 1]) | -| Output | NullableColumn { column: StringColumn { data: 0xe591a8e591a8e591a8e591a8, offsets: [0, 3, 12, 12] }, validity: [0b_____011] } | -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+-----------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+-----------------------------------------------------------------------------------------------------------------+ +| source | StringColumn { data: Utf8ViewArray[周 周周 周周周 周周周周, 周 周周 周周周 周周周周, 周 周周 周周周 周周周周] } | +| pat | StringColumn { data: Utf8ViewArray[周+, 周+, 周+] } | +| pos | Int64([1, 2, 14]) | +| occur | Int64([1, 2, 1]) | +| Output | NullableColumn { column: StringColumn { data: Utf8ViewArray[周, 周周周, ] }, validity: [0b_____011] } | ++--------+-----------------------------------------------------------------------------------------------------------------+ error: diff --git a/src/query/functions/tests/it/scalars/testdata/string.txt b/src/query/functions/tests/it/scalars/testdata/string.txt index 20cd5cc4467b..63958666b0e4 100644 --- a/src/query/functions/tests/it/scalars/testdata/string.txt +++ b/src/query/functions/tests/it/scalars/testdata/string.txt @@ -48,12 +48,12 @@ evaluation: | Row 2 | 'ß😀山' | 'SS😀山' | +--------+-------------------+-------------+ evaluation (internal): -+--------+------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+------------------------------------------------------------------------------------------------+ -| a | StringColumn { data: 0x416263446f6272c3bd2064656ec39ff09f9880e5b1b1, offsets: [0, 3, 13, 22] } | -| Output | StringColumn { data: 0x414243444f4252c39d2044454e5353f09f9880e5b1b1, offsets: [0, 3, 13, 22] } | -+--------+------------------------------------------------------------------------------------------------+ ++--------+--------------------------------------------------------------+ +| Column | Data | ++--------+--------------------------------------------------------------+ +| a | StringColumn { data: Utf8ViewArray[Abc, Dobrý den, ß😀山] } | +| Output | StringColumn { data: Utf8ViewArray[ABC, DOBRÝ DEN, SS😀山] } | ++--------+--------------------------------------------------------------+ ast : lower('Abc') @@ -106,12 +106,12 @@ evaluation: | Row 2 | 'İ😀山' | 'i̇😀山' | +--------+-------------------+-------------+ evaluation (internal): -+--------+--------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+--------------------------------------------------------------------------------------------------+ -| a | StringColumn { data: 0x416263444f4252c39d2044454ec4b0f09f9880e5b1b1, offsets: [0, 3, 13, 22] } | -| Output | StringColumn { data: 0x616263646f6272c3bd2064656e69cc87f09f9880e5b1b1, offsets: [0, 3, 13, 23] } | -+--------+--------------------------------------------------------------------------------------------------+ ++--------+-------------------------------------------------------------+ +| Column | Data | ++--------+-------------------------------------------------------------+ +| a | StringColumn { data: Utf8ViewArray[Abc, DOBRÝ DEN, İ😀山] } | +| Output | StringColumn { data: Utf8ViewArray[abc, dobrý den, i̇😀山] } | ++--------+-------------------------------------------------------------+ ast : bit_length('latin') @@ -182,12 +182,12 @@ evaluation: | Row 2 | 'кириллица and latin' | 224 | +--------+-----------------------------------+----------------------------+ evaluation (internal): -+--------+----------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+----------------------------------------------------------------------------------------------------------------------------------------------------------+ -| a | StringColumn { data: 0x6c6174696ed0bad0b8d180d0b8d0bbd0bbd0b8d186d0b0d0bad0b8d180d0b8d0bbd0bbd0b8d186d0b020616e64206c6174696e, offsets: [0, 5, 23, 51] } | -| Output | UInt64([40, 144, 224]) | -+--------+----------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+-----------------------------------------------------------------------------+ +| Column | Data | ++--------+-----------------------------------------------------------------------------+ +| a | StringColumn { data: Utf8ViewArray[latin, кириллица, кириллица and latin] } | +| Output | UInt64([40, 144, 224]) | ++--------+-----------------------------------------------------------------------------+ ast : octet_length('latin') @@ -258,12 +258,12 @@ evaluation: | Row 2 | 'кириллица and latin' | 28 | +--------+-----------------------------------+----------------------------+ evaluation (internal): -+--------+----------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+----------------------------------------------------------------------------------------------------------------------------------------------------------+ -| a | StringColumn { data: 0x6c6174696ed0bad0b8d180d0b8d0bbd0bbd0b8d186d0b0d0bad0b8d180d0b8d0bbd0bbd0b8d186d0b020616e64206c6174696e, offsets: [0, 5, 23, 51] } | -| Output | UInt64([5, 18, 28]) | -+--------+----------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+-----------------------------------------------------------------------------+ +| Column | Data | ++--------+-----------------------------------------------------------------------------+ +| a | StringColumn { data: Utf8ViewArray[latin, кириллица, кириллица and latin] } | +| Output | UInt64([5, 18, 28]) | ++--------+-----------------------------------------------------------------------------+ ast : char_length('latin') @@ -334,12 +334,12 @@ evaluation: | Row 2 | 'кириллица and latin' | 19 | +--------+-----------------------------------+----------------------------+ evaluation (internal): -+--------+----------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+----------------------------------------------------------------------------------------------------------------------------------------------------------+ -| a | StringColumn { data: 0x6c6174696ed0bad0b8d180d0b8d0bbd0bbd0b8d186d0b0d0bad0b8d180d0b8d0bbd0bbd0b8d186d0b020616e64206c6174696e, offsets: [0, 5, 23, 51] } | -| Output | UInt64([5, 9, 19]) | -+--------+----------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+-----------------------------------------------------------------------------+ +| Column | Data | ++--------+-----------------------------------------------------------------------------+ +| a | StringColumn { data: Utf8ViewArray[latin, кириллица, кириллица and latin] } | +| Output | UInt64([5, 9, 19]) | ++--------+-----------------------------------------------------------------------------+ ast : quote('a\0b') @@ -465,12 +465,12 @@ evaluation: | Row 2 | 'a\nb' | 'a\\nb' | +--------+---------------------+----------+ evaluation (internal): -+--------+------------------------------------------------------------------------------------+ -| Column | Data | -+--------+------------------------------------------------------------------------------------+ -| a | StringColumn { data: 0x615c3062615c2762615c6e62, offsets: [0, 4, 8, 12] } | -| Output | StringColumn { data: 0x615c5c3062615c5c5c2762615c5c6e62, offsets: [0, 5, 11, 16] } | -+--------+------------------------------------------------------------------------------------+ ++--------+------------------------------------------------------------+ +| Column | Data | ++--------+------------------------------------------------------------+ +| a | StringColumn { data: Utf8ViewArray[a\0b, a\'b, a\nb] } | +| Output | StringColumn { data: Utf8ViewArray[a\\0b, a\\\'b, a\\nb] } | ++--------+------------------------------------------------------------+ ast : reverse('abc') @@ -550,12 +550,12 @@ evaluation: | Row 2 | '' | '' | +--------+--------------+--------+ evaluation (internal): -+--------+----------------------------------------------------------+ -| Column | Data | -+--------+----------------------------------------------------------+ -| a | StringColumn { data: 0x61626361, offsets: [0, 3, 4, 4] } | -| Output | StringColumn { data: 0x63626161, offsets: [0, 3, 4, 4] } | -+--------+----------------------------------------------------------+ ++--------+------------------------------------------------+ +| Column | Data | ++--------+------------------------------------------------+ +| a | StringColumn { data: Utf8ViewArray[abc, a, ] } | +| Output | StringColumn { data: Utf8ViewArray[cba, a, ] } | ++--------+------------------------------------------------+ ast : ascii('1') @@ -636,12 +636,12 @@ evaluation: | Row 3 | '你好' | 228 | +--------+-----------------+------------+ evaluation (internal): -+--------+------------------------------------------------------------------------------+ -| Column | Data | -+--------+------------------------------------------------------------------------------+ -| a | StringColumn { data: 0x313132332d31e4bda0e5a5bd, offsets: [0, 1, 4, 6, 12] } | -| Output | UInt8([49, 49, 45, 228]) | -+--------+------------------------------------------------------------------------------+ ++--------+--------------------------------------------------------+ +| Column | Data | ++--------+--------------------------------------------------------+ +| a | StringColumn { data: Utf8ViewArray[1, 123, -1, 你好] } | +| Output | UInt8([49, 49, 45, 228]) | ++--------+--------------------------------------------------------+ ast : ascii(b) @@ -657,12 +657,12 @@ evaluation: | Row 0 | '' | 0 | +--------+-----------+---------+ evaluation (internal): -+--------+--------------------------------------------+ -| Column | Data | -+--------+--------------------------------------------+ -| b | StringColumn { data: 0x, offsets: [0, 0] } | -| Output | UInt8([0]) | -+--------+--------------------------------------------+ ++--------+----------------------------------------+ +| Column | Data | ++--------+----------------------------------------+ +| b | StringColumn { data: Utf8ViewArray[] } | +| Output | UInt8([0]) | ++--------+----------------------------------------+ ast : ltrim(' abc ') @@ -734,12 +734,12 @@ evaluation: | Row 3 | 'abc ' | 'abc ' | +--------+-----------------------+----------+ evaluation (internal): -+--------+-------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+-------------------------------------------------------------------------------------------------------+ -| a | StringColumn { data: 0x616263202020616263202020616263202020616263202020, offsets: [0, 3, 9, 18, 24] } | -| Output | StringColumn { data: 0x616263616263616263202020616263202020, offsets: [0, 3, 6, 12, 18] } | -+--------+-------------------------------------------------------------------------------------------------------+ ++--------+----------------------------------------------------------------------+ +| Column | Data | ++--------+----------------------------------------------------------------------+ +| a | StringColumn { data: Utf8ViewArray[abc, abc, abc , abc ] } | +| Output | StringColumn { data: Utf8ViewArray[abc, abc, abc , abc ] } | ++--------+----------------------------------------------------------------------+ ast : rtrim(' abc ') @@ -811,12 +811,12 @@ evaluation: | Row 3 | 'abc ' | 'abc' | +--------+-----------------------+----------+ evaluation (internal): -+--------+-------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+-------------------------------------------------------------------------------------------------------+ -| a | StringColumn { data: 0x616263202020616263202020616263202020616263202020, offsets: [0, 3, 9, 18, 24] } | -| Output | StringColumn { data: 0x616263202020616263202020616263616263, offsets: [0, 3, 9, 15, 18] } | -+--------+-------------------------------------------------------------------------------------------------------+ ++--------+----------------------------------------------------------------------+ +| Column | Data | ++--------+----------------------------------------------------------------------+ +| a | StringColumn { data: Utf8ViewArray[abc, abc, abc , abc ] } | +| Output | StringColumn { data: Utf8ViewArray[abc, abc, abc, abc] } | ++--------+----------------------------------------------------------------------+ ast : trim_leading('aaabbaaa', 'a') @@ -915,12 +915,12 @@ evaluation: | Row 3 | 'aabbaa' | 'bbaa' | +--------+-----------------------+----------+ evaluation (internal): -+--------+--------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+--------------------------------------------------------------------------------------------------------+ -| a | StringColumn { data: 0x616162626161626263636262636364646363616162626161, offsets: [0, 6, 12, 18, 24] } | -| Output | StringColumn { data: 0x6262616162626363626263636464636362626161, offsets: [0, 4, 10, 16, 20] } | -+--------+--------------------------------------------------------------------------------------------------------+ ++--------+----------------------------------------------------------------------+ +| Column | Data | ++--------+----------------------------------------------------------------------+ +| a | StringColumn { data: Utf8ViewArray[aabbaa, bbccbb, ccddcc, aabbaa] } | +| Output | StringColumn { data: Utf8ViewArray[bbaa, bbccbb, ccddcc, bbaa] } | ++--------+----------------------------------------------------------------------+ ast : trim_leading(a, b) @@ -938,13 +938,13 @@ evaluation: | Row 3 | 'aabbaa' | '' | 'aabbaa' | +--------+-----------------------+------------+----------+ evaluation (internal): -+--------+--------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+--------------------------------------------------------------------------------------------------------+ -| a | StringColumn { data: 0x616162626161626263636262636364646363616162626161, offsets: [0, 6, 12, 18, 24] } | -| b | StringColumn { data: 0x616263, offsets: [0, 1, 2, 3, 3] } | -| Output | StringColumn { data: 0x626261616363626264646363616162626161, offsets: [0, 4, 8, 12, 18] } | -+--------+--------------------------------------------------------------------------------------------------------+ ++--------+----------------------------------------------------------------------+ +| Column | Data | ++--------+----------------------------------------------------------------------+ +| a | StringColumn { data: Utf8ViewArray[aabbaa, bbccbb, ccddcc, aabbaa] } | +| b | StringColumn { data: Utf8ViewArray[a, b, c, ] } | +| Output | StringColumn { data: Utf8ViewArray[bbaa, ccbb, ddcc, aabbaa] } | ++--------+----------------------------------------------------------------------+ ast : trim_leading('aba', b) @@ -962,12 +962,12 @@ evaluation: | Row 3 | '' | 'aba' | +--------+------------+--------+ evaluation (internal): -+--------+----------------------------------------------------------------------------+ -| Column | Data | -+--------+----------------------------------------------------------------------------+ -| b | StringColumn { data: 0x616263, offsets: [0, 1, 2, 3, 3] } | -| Output | StringColumn { data: 0x6261616261616261616261, offsets: [0, 2, 5, 8, 11] } | -+--------+----------------------------------------------------------------------------+ ++--------+---------------------------------------------------------+ +| Column | Data | ++--------+---------------------------------------------------------+ +| b | StringColumn { data: Utf8ViewArray[a, b, c, ] } | +| Output | StringColumn { data: Utf8ViewArray[ba, aba, aba, aba] } | ++--------+---------------------------------------------------------+ ast : trim_trailing('aaabbaaa', 'a') @@ -1066,12 +1066,12 @@ evaluation: | Row 3 | 'aabbaa' | 'aabbaa' | +--------+-----------------------+----------+ evaluation (internal): -+--------+--------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+--------------------------------------------------------------------------------------------------------+ -| a | StringColumn { data: 0x616162626161626263636262636364646363616162626161, offsets: [0, 6, 12, 18, 24] } | -| Output | StringColumn { data: 0x61616262616162626363636364646363616162626161, offsets: [0, 6, 10, 16, 22] } | -+--------+--------------------------------------------------------------------------------------------------------+ ++--------+----------------------------------------------------------------------+ +| Column | Data | ++--------+----------------------------------------------------------------------+ +| a | StringColumn { data: Utf8ViewArray[aabbaa, bbccbb, ccddcc, aabbaa] } | +| Output | StringColumn { data: Utf8ViewArray[aabbaa, bbcc, ccddcc, aabbaa] } | ++--------+----------------------------------------------------------------------+ ast : trim_trailing(a, b) @@ -1089,13 +1089,13 @@ evaluation: | Row 3 | 'aabbaa' | '' | 'aabbaa' | +--------+-----------------------+------------+----------+ evaluation (internal): -+--------+--------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+--------------------------------------------------------------------------------------------------------+ -| a | StringColumn { data: 0x616162626161626263636262636364646363616162626161, offsets: [0, 6, 12, 18, 24] } | -| b | StringColumn { data: 0x616263, offsets: [0, 1, 2, 3, 3] } | -| Output | StringColumn { data: 0x616162626262636363636464616162626161, offsets: [0, 4, 8, 12, 18] } | -+--------+--------------------------------------------------------------------------------------------------------+ ++--------+----------------------------------------------------------------------+ +| Column | Data | ++--------+----------------------------------------------------------------------+ +| a | StringColumn { data: Utf8ViewArray[aabbaa, bbccbb, ccddcc, aabbaa] } | +| b | StringColumn { data: Utf8ViewArray[a, b, c, ] } | +| Output | StringColumn { data: Utf8ViewArray[aabb, bbcc, ccdd, aabbaa] } | ++--------+----------------------------------------------------------------------+ ast : trim_trailing('aba', b) @@ -1113,12 +1113,12 @@ evaluation: | Row 3 | '' | 'aba' | +--------+------------+--------+ evaluation (internal): -+--------+----------------------------------------------------------------------------+ -| Column | Data | -+--------+----------------------------------------------------------------------------+ -| b | StringColumn { data: 0x616263, offsets: [0, 1, 2, 3, 3] } | -| Output | StringColumn { data: 0x6162616261616261616261, offsets: [0, 2, 5, 8, 11] } | -+--------+----------------------------------------------------------------------------+ ++--------+---------------------------------------------------------+ +| Column | Data | ++--------+---------------------------------------------------------+ +| b | StringColumn { data: Utf8ViewArray[a, b, c, ] } | +| Output | StringColumn { data: Utf8ViewArray[ab, aba, aba, aba] } | ++--------+---------------------------------------------------------+ ast : trim_both('aaabbaaa', 'a') @@ -1226,12 +1226,12 @@ evaluation: | Row 3 | 'aabbaa' | 'bb' | +--------+-----------------------+----------+ evaluation (internal): -+--------+--------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+--------------------------------------------------------------------------------------------------------+ -| a | StringColumn { data: 0x616162626161626263636262636364646363616162626161, offsets: [0, 6, 12, 18, 24] } | -| Output | StringColumn { data: 0x62626262636362626363646463636262, offsets: [0, 2, 8, 14, 16] } | -+--------+--------------------------------------------------------------------------------------------------------+ ++--------+----------------------------------------------------------------------+ +| Column | Data | ++--------+----------------------------------------------------------------------+ +| a | StringColumn { data: Utf8ViewArray[aabbaa, bbccbb, ccddcc, aabbaa] } | +| Output | StringColumn { data: Utf8ViewArray[bb, bbccbb, ccddcc, bb] } | ++--------+----------------------------------------------------------------------+ ast : trim_both(a, b) @@ -1249,13 +1249,13 @@ evaluation: | Row 3 | 'aabbaa' | '' | 'aabbaa' | +--------+-----------------------+------------+----------+ evaluation (internal): -+--------+--------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+--------------------------------------------------------------------------------------------------------+ -| a | StringColumn { data: 0x616162626161626263636262636364646363616162626161, offsets: [0, 6, 12, 18, 24] } | -| b | StringColumn { data: 0x616263, offsets: [0, 1, 2, 3, 3] } | -| Output | StringColumn { data: 0x626263636464616162626161, offsets: [0, 2, 4, 6, 12] } | -+--------+--------------------------------------------------------------------------------------------------------+ ++--------+----------------------------------------------------------------------+ +| Column | Data | ++--------+----------------------------------------------------------------------+ +| a | StringColumn { data: Utf8ViewArray[aabbaa, bbccbb, ccddcc, aabbaa] } | +| b | StringColumn { data: Utf8ViewArray[a, b, c, ] } | +| Output | StringColumn { data: Utf8ViewArray[bb, cc, dd, aabbaa] } | ++--------+----------------------------------------------------------------------+ ast : trim_both('aba', b) @@ -1273,12 +1273,12 @@ evaluation: | Row 3 | '' | 'aba' | +--------+------------+--------+ evaluation (internal): -+--------+--------------------------------------------------------------------------+ -| Column | Data | -+--------+--------------------------------------------------------------------------+ -| b | StringColumn { data: 0x616263, offsets: [0, 1, 2, 3, 3] } | -| Output | StringColumn { data: 0x62616261616261616261, offsets: [0, 1, 4, 7, 10] } | -+--------+--------------------------------------------------------------------------+ ++--------+--------------------------------------------------------+ +| Column | Data | ++--------+--------------------------------------------------------+ +| b | StringColumn { data: Utf8ViewArray[a, b, c, ] } | +| Output | StringColumn { data: Utf8ViewArray[b, aba, aba, aba] } | ++--------+--------------------------------------------------------+ ast : trim(' abc ') @@ -1350,12 +1350,12 @@ evaluation: | Row 3 | 'abc ' | 'abc' | +--------+-----------------------+--------+ evaluation (internal): -+--------+-------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+-------------------------------------------------------------------------------------------------------+ -| a | StringColumn { data: 0x616263202020616263202020616263202020616263202020, offsets: [0, 3, 9, 18, 24] } | -| Output | StringColumn { data: 0x616263616263616263616263, offsets: [0, 3, 6, 9, 12] } | -+--------+-------------------------------------------------------------------------------------------------------+ ++--------+----------------------------------------------------------------------+ +| Column | Data | ++--------+----------------------------------------------------------------------+ +| a | StringColumn { data: Utf8ViewArray[abc, abc, abc , abc ] } | +| Output | StringColumn { data: Utf8ViewArray[abc, abc, abc, abc] } | ++--------+----------------------------------------------------------------------+ ast : trim(both 'a' from 'aaabbaaa') @@ -1426,12 +1426,12 @@ evaluation: | Row 2 | 'ccddcc' | 'ccddcc' | +--------+-----------------------+----------+ evaluation (internal): -+--------+----------------------------------------------------------------------------------------+ -| Column | Data | -+--------+----------------------------------------------------------------------------------------+ -| a | StringColumn { data: 0x616162626161626263636262636364646363, offsets: [0, 6, 12, 18] } | -| Output | StringColumn { data: 0x6262626263636262636364646363, offsets: [0, 2, 8, 14] } | -+--------+----------------------------------------------------------------------------------------+ ++--------+--------------------------------------------------------------+ +| Column | Data | ++--------+--------------------------------------------------------------+ +| a | StringColumn { data: Utf8ViewArray[aabbaa, bbccbb, ccddcc] } | +| Output | StringColumn { data: Utf8ViewArray[bb, bbccbb, ccddcc] } | ++--------+--------------------------------------------------------------+ ast : trim(both b from a) @@ -1448,13 +1448,13 @@ evaluation: | Row 2 | 'ccddcc' | 'c' | 'dd' | +--------+-----------------------+-------------+--------+ evaluation (internal): -+--------+----------------------------------------------------------------------------------------+ -| Column | Data | -+--------+----------------------------------------------------------------------------------------+ -| a | StringColumn { data: 0x616162626161626263636262636364646363, offsets: [0, 6, 12, 18] } | -| b | StringColumn { data: 0x616263, offsets: [0, 1, 2, 3] } | -| Output | StringColumn { data: 0x626263636464, offsets: [0, 2, 4, 6] } | -+--------+----------------------------------------------------------------------------------------+ ++--------+--------------------------------------------------------------+ +| Column | Data | ++--------+--------------------------------------------------------------+ +| a | StringColumn { data: Utf8ViewArray[aabbaa, bbccbb, ccddcc] } | +| b | StringColumn { data: Utf8ViewArray[a, b, c] } | +| Output | StringColumn { data: Utf8ViewArray[bb, cc, dd] } | ++--------+--------------------------------------------------------------+ ast : trim(both a from a) @@ -1471,12 +1471,12 @@ evaluation: | Row 2 | 'ccddcc' | '' | +--------+-----------------------+--------+ evaluation (internal): -+--------+----------------------------------------------------------------------------------------+ -| Column | Data | -+--------+----------------------------------------------------------------------------------------+ -| a | StringColumn { data: 0x616162626161626263636262636364646363, offsets: [0, 6, 12, 18] } | -| Output | StringColumn { data: 0x, offsets: [0, 0, 0, 0] } | -+--------+----------------------------------------------------------------------------------------+ ++--------+--------------------------------------------------------------+ +| Column | Data | ++--------+--------------------------------------------------------------+ +| a | StringColumn { data: Utf8ViewArray[aabbaa, bbccbb, ccddcc] } | +| Output | StringColumn { data: Utf8ViewArray[, , ] } | ++--------+--------------------------------------------------------------+ ast : trim(both b from 'aba') @@ -1493,12 +1493,12 @@ evaluation: | Row 2 | 'c' | 'aba' | +--------+-------------+--------+ evaluation (internal): -+--------+----------------------------------------------------------------+ -| Column | Data | -+--------+----------------------------------------------------------------+ -| b | StringColumn { data: 0x616263, offsets: [0, 1, 2, 3] } | -| Output | StringColumn { data: 0x62616261616261, offsets: [0, 1, 4, 7] } | -+--------+----------------------------------------------------------------+ ++--------+---------------------------------------------------+ +| Column | Data | ++--------+---------------------------------------------------+ +| b | StringColumn { data: Utf8ViewArray[a, b, c] } | +| Output | StringColumn { data: Utf8ViewArray[b, aba, aba] } | ++--------+---------------------------------------------------+ ast : trim(leading 'a' from 'aaabbaaa') @@ -1569,12 +1569,12 @@ evaluation: | Row 2 | 'ccddcc' | 'ccddcc' | +--------+-----------------------+----------+ evaluation (internal): -+--------+----------------------------------------------------------------------------------------+ -| Column | Data | -+--------+----------------------------------------------------------------------------------------+ -| a | StringColumn { data: 0x616162626161626263636262636364646363, offsets: [0, 6, 12, 18] } | -| Output | StringColumn { data: 0x62626161626263636262636364646363, offsets: [0, 4, 10, 16] } | -+--------+----------------------------------------------------------------------------------------+ ++--------+--------------------------------------------------------------+ +| Column | Data | ++--------+--------------------------------------------------------------+ +| a | StringColumn { data: Utf8ViewArray[aabbaa, bbccbb, ccddcc] } | +| Output | StringColumn { data: Utf8ViewArray[bbaa, bbccbb, ccddcc] } | ++--------+--------------------------------------------------------------+ ast : trim(leading b from a) @@ -1591,13 +1591,13 @@ evaluation: | Row 2 | 'ccddcc' | 'c' | 'ddcc' | +--------+-----------------------+-------------+--------+ evaluation (internal): -+--------+----------------------------------------------------------------------------------------+ -| Column | Data | -+--------+----------------------------------------------------------------------------------------+ -| a | StringColumn { data: 0x616162626161626263636262636364646363, offsets: [0, 6, 12, 18] } | -| b | StringColumn { data: 0x616263, offsets: [0, 1, 2, 3] } | -| Output | StringColumn { data: 0x626261616363626264646363, offsets: [0, 4, 8, 12] } | -+--------+----------------------------------------------------------------------------------------+ ++--------+--------------------------------------------------------------+ +| Column | Data | ++--------+--------------------------------------------------------------+ +| a | StringColumn { data: Utf8ViewArray[aabbaa, bbccbb, ccddcc] } | +| b | StringColumn { data: Utf8ViewArray[a, b, c] } | +| Output | StringColumn { data: Utf8ViewArray[bbaa, ccbb, ddcc] } | ++--------+--------------------------------------------------------------+ ast : trim(leading a from a) @@ -1614,12 +1614,12 @@ evaluation: | Row 2 | 'ccddcc' | '' | +--------+-----------------------+--------+ evaluation (internal): -+--------+----------------------------------------------------------------------------------------+ -| Column | Data | -+--------+----------------------------------------------------------------------------------------+ -| a | StringColumn { data: 0x616162626161626263636262636364646363, offsets: [0, 6, 12, 18] } | -| Output | StringColumn { data: 0x, offsets: [0, 0, 0, 0] } | -+--------+----------------------------------------------------------------------------------------+ ++--------+--------------------------------------------------------------+ +| Column | Data | ++--------+--------------------------------------------------------------+ +| a | StringColumn { data: Utf8ViewArray[aabbaa, bbccbb, ccddcc] } | +| Output | StringColumn { data: Utf8ViewArray[, , ] } | ++--------+--------------------------------------------------------------+ ast : trim(leading b from 'aba') @@ -1636,12 +1636,12 @@ evaluation: | Row 2 | 'c' | 'aba' | +--------+-------------+--------+ evaluation (internal): -+--------+------------------------------------------------------------------+ -| Column | Data | -+--------+------------------------------------------------------------------+ -| b | StringColumn { data: 0x616263, offsets: [0, 1, 2, 3] } | -| Output | StringColumn { data: 0x6261616261616261, offsets: [0, 2, 5, 8] } | -+--------+------------------------------------------------------------------+ ++--------+----------------------------------------------------+ +| Column | Data | ++--------+----------------------------------------------------+ +| b | StringColumn { data: Utf8ViewArray[a, b, c] } | +| Output | StringColumn { data: Utf8ViewArray[ba, aba, aba] } | ++--------+----------------------------------------------------+ ast : trim(trailing 'a' from 'aaabbaaa') @@ -1712,12 +1712,12 @@ evaluation: | Row 2 | 'ccddcc' | 'ccddcc' | +--------+-----------------------+----------+ evaluation (internal): -+--------+----------------------------------------------------------------------------------------+ -| Column | Data | -+--------+----------------------------------------------------------------------------------------+ -| a | StringColumn { data: 0x616162626161626263636262636364646363, offsets: [0, 6, 12, 18] } | -| Output | StringColumn { data: 0x61616262626263636262636364646363, offsets: [0, 4, 10, 16] } | -+--------+----------------------------------------------------------------------------------------+ ++--------+--------------------------------------------------------------+ +| Column | Data | ++--------+--------------------------------------------------------------+ +| a | StringColumn { data: Utf8ViewArray[aabbaa, bbccbb, ccddcc] } | +| Output | StringColumn { data: Utf8ViewArray[aabb, bbccbb, ccddcc] } | ++--------+--------------------------------------------------------------+ ast : trim(trailing b from a) @@ -1734,13 +1734,13 @@ evaluation: | Row 2 | 'ccddcc' | 'c' | 'ccdd' | +--------+-----------------------+-------------+--------+ evaluation (internal): -+--------+----------------------------------------------------------------------------------------+ -| Column | Data | -+--------+----------------------------------------------------------------------------------------+ -| a | StringColumn { data: 0x616162626161626263636262636364646363, offsets: [0, 6, 12, 18] } | -| b | StringColumn { data: 0x616263, offsets: [0, 1, 2, 3] } | -| Output | StringColumn { data: 0x616162626262636363636464, offsets: [0, 4, 8, 12] } | -+--------+----------------------------------------------------------------------------------------+ ++--------+--------------------------------------------------------------+ +| Column | Data | ++--------+--------------------------------------------------------------+ +| a | StringColumn { data: Utf8ViewArray[aabbaa, bbccbb, ccddcc] } | +| b | StringColumn { data: Utf8ViewArray[a, b, c] } | +| Output | StringColumn { data: Utf8ViewArray[aabb, bbcc, ccdd] } | ++--------+--------------------------------------------------------------+ ast : trim(trailing a from a) @@ -1757,12 +1757,12 @@ evaluation: | Row 2 | 'ccddcc' | '' | +--------+-----------------------+--------+ evaluation (internal): -+--------+----------------------------------------------------------------------------------------+ -| Column | Data | -+--------+----------------------------------------------------------------------------------------+ -| a | StringColumn { data: 0x616162626161626263636262636364646363, offsets: [0, 6, 12, 18] } | -| Output | StringColumn { data: 0x, offsets: [0, 0, 0, 0] } | -+--------+----------------------------------------------------------------------------------------+ ++--------+--------------------------------------------------------------+ +| Column | Data | ++--------+--------------------------------------------------------------+ +| a | StringColumn { data: Utf8ViewArray[aabbaa, bbccbb, ccddcc] } | +| Output | StringColumn { data: Utf8ViewArray[, , ] } | ++--------+--------------------------------------------------------------+ ast : trim(trailing b from 'aba') @@ -1779,12 +1779,12 @@ evaluation: | Row 2 | 'c' | 'aba' | +--------+-------------+--------+ evaluation (internal): -+--------+------------------------------------------------------------------+ -| Column | Data | -+--------+------------------------------------------------------------------+ -| b | StringColumn { data: 0x616263, offsets: [0, 1, 2, 3] } | -| Output | StringColumn { data: 0x6162616261616261, offsets: [0, 2, 5, 8] } | -+--------+------------------------------------------------------------------+ ++--------+----------------------------------------------------+ +| Column | Data | ++--------+----------------------------------------------------+ +| b | StringColumn { data: Utf8ViewArray[a, b, c] } | +| Output | StringColumn { data: Utf8ViewArray[ab, aba, aba] } | ++--------+----------------------------------------------------+ ast : concat('5', '3', '4') @@ -1829,12 +1829,12 @@ evaluation: | Row 3 | 'abc ' | 'abc 345' | +--------+-----------------------+----------------+ evaluation (internal): -+--------+--------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+--------------------------------------------------------------------------------------------------------------------------------+ -| a | StringColumn { data: 0x616263202020616263202020616263202020616263202020, offsets: [0, 3, 9, 18, 24] } | -| Output | StringColumn { data: 0x616263333435202020616263333435202020616263202020333435616263202020333435, offsets: [0, 6, 15, 27, 36] } | -+--------+--------------------------------------------------------------------------------------------------------------------------------+ ++--------+----------------------------------------------------------------------------------+ +| Column | Data | ++--------+----------------------------------------------------------------------------------+ +| a | StringColumn { data: Utf8ViewArray[abc, abc, abc , abc ] } | +| Output | StringColumn { data: Utf8ViewArray[abc345, abc345, abc 345, abc 345] } | ++--------+----------------------------------------------------------------------------------+ ast : concat(a, '3') @@ -1853,12 +1853,12 @@ evaluation: | Row 3 | 'd' | 'd3' | +--------+----------------------+------------------+ evaluation (internal): -+--------+------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+------------------------------------------------------------------------------------------------------------------------+ -| a | NullableColumn { column: StringColumn { data: 0x61626364, offsets: [0, 1, 2, 3, 4] }, validity: [0b____1011] } | -| Output | NullableColumn { column: StringColumn { data: 0x6133623363336433, offsets: [0, 2, 4, 6, 8] }, validity: [0b____1011] } | -+--------+------------------------------------------------------------------------------------------------------------------------+ ++--------+---------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+---------------------------------------------------------------------------------------------------------+ +| a | NullableColumn { column: StringColumn { data: Utf8ViewArray[a, b, c, d] }, validity: [0b____1011] } | +| Output | NullableColumn { column: StringColumn { data: Utf8ViewArray[a3, b3, c3, d3] }, validity: [0b____1011] } | ++--------+---------------------------------------------------------------------------------------------------------+ ast : concat_ws('-', '3', null, '4', null, '5') @@ -1903,12 +1903,12 @@ evaluation: | Row 3 | '-' | '3-4-5' | +--------+-------------+---------+ evaluation (internal): -+--------+------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+------------------------------------------------------------------------------------------------+ -| a | StringColumn { data: 0x2c2d2c2d, offsets: [0, 1, 2, 3, 4] } | -| Output | StringColumn { data: 0x332c342c35332d342d35332c342c35332d342d35, offsets: [0, 5, 10, 15, 20] } | -+--------+------------------------------------------------------------------------------------------------+ ++--------+------------------------------------------------------------------+ +| Column | Data | ++--------+------------------------------------------------------------------+ +| a | StringColumn { data: Utf8ViewArray[,, -, ,, -] } | +| Output | StringColumn { data: Utf8ViewArray[3,4,5, 3-4-5, 3,4,5, 3-4-5] } | ++--------+------------------------------------------------------------------+ ast : concat_ws(a, '3') @@ -1927,12 +1927,12 @@ evaluation: | Row 3 | 'd' | '3' | +--------+----------------------+-----------------+ evaluation (internal): -+--------+----------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+----------------------------------------------------------------------------------------------------------------+ -| a | NullableColumn { column: StringColumn { data: 0x61626364, offsets: [0, 1, 2, 3, 4] }, validity: [0b____1011] } | -| Output | NullableColumn { column: StringColumn { data: 0x333333, offsets: [0, 1, 2, 2, 3] }, validity: [0b____1011] } | -+--------+----------------------------------------------------------------------------------------------------------------+ ++--------+-----------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+-----------------------------------------------------------------------------------------------------+ +| a | NullableColumn { column: StringColumn { data: Utf8ViewArray[a, b, c, d] }, validity: [0b____1011] } | +| Output | NullableColumn { column: StringColumn { data: Utf8ViewArray[3, 3, , 3] }, validity: [0b____1011] } | ++--------+-----------------------------------------------------------------------------------------------------+ ast : concat_ws(a, '3', '4') @@ -1951,12 +1951,12 @@ evaluation: | Row 3 | 'd' | '3d4' | +--------+----------------------+-----------------+ evaluation (internal): -+--------+--------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+--------------------------------------------------------------------------------------------------------------------------+ -| a | NullableColumn { column: StringColumn { data: 0x61626364, offsets: [0, 1, 2, 3, 4] }, validity: [0b____1011] } | -| Output | NullableColumn { column: StringColumn { data: 0x336134336234336434, offsets: [0, 3, 6, 6, 9] }, validity: [0b____1011] } | -+--------+--------------------------------------------------------------------------------------------------------------------------+ ++--------+----------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+----------------------------------------------------------------------------------------------------------+ +| a | NullableColumn { column: StringColumn { data: Utf8ViewArray[a, b, c, d] }, validity: [0b____1011] } | +| Output | NullableColumn { column: StringColumn { data: Utf8ViewArray[3a4, 3b4, , 3d4] }, validity: [0b____1011] } | ++--------+----------------------------------------------------------------------------------------------------------+ error: @@ -1985,12 +1985,12 @@ evaluation: | Row 2 | 3 | '11' | +--------+----------+--------------------------------------------------------------------+ evaluation (internal): -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| a | Int8([-1, 2, 3]) | -| Output | StringColumn { data: 0x3131313131313131313131313131313131313131313131313131313131313131313131313131313131313131313131313131313131313131313131313131313131303131, offsets: [0, 64, 66, 68] } | -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+----------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+----------------------------------------------------------------------------------------------------------------+ +| a | Int8([-1, 2, 3]) | +| Output | StringColumn { data: Utf8ViewArray[1111111111111111111111111111111111111111111111111111111111111111, 10, 11] } | ++--------+----------------------------------------------------------------------------------------------------------------+ ast : bin(a2) @@ -2007,12 +2007,12 @@ evaluation: | Row 2 | NULL | NULL | +--------+------------------+-----------------+ evaluation (internal): -+--------+---------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+---------------------------------------------------------------------------------------------------------------+ -| a2 | NullableColumn { column: UInt8([1, 2, 3]), validity: [0b_____011] } | -| Output | NullableColumn { column: StringColumn { data: 0x3131303131, offsets: [0, 1, 3, 5] }, validity: [0b_____011] } | -+--------+---------------------------------------------------------------------------------------------------------------+ ++--------+----------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+----------------------------------------------------------------------------------------------------+ +| a2 | NullableColumn { column: UInt8([1, 2, 3]), validity: [0b_____011] } | +| Output | NullableColumn { column: StringColumn { data: Utf8ViewArray[1, 10, 11] }, validity: [0b_____011] } | ++--------+----------------------------------------------------------------------------------------------------+ ast : bin(b) @@ -2029,12 +2029,12 @@ evaluation: | Row 2 | 6 | '110' | +--------+---------+--------+ evaluation (internal): -+--------+------------------------------------------------------------------+ -| Column | Data | -+--------+------------------------------------------------------------------+ -| b | Int16([2, 4, 6]) | -| Output | StringColumn { data: 0x3130313030313130, offsets: [0, 2, 5, 8] } | -+--------+------------------------------------------------------------------+ ++--------+----------------------------------------------------+ +| Column | Data | ++--------+----------------------------------------------------+ +| b | Int16([2, 4, 6]) | +| Output | StringColumn { data: Utf8ViewArray[10, 100, 110] } | ++--------+----------------------------------------------------+ ast : bin(c) @@ -2051,12 +2051,12 @@ evaluation: | Row 2 | 30 | '11110' | +--------+-----------+---------+ evaluation (internal): -+--------+-------------------------------------------------------------------------------+ -| Column | Data | -+--------+-------------------------------------------------------------------------------+ -| c | UInt32([10, 20, 30]) | -| Output | StringColumn { data: 0x3130313031303130303131313130, offsets: [0, 4, 9, 14] } | -+--------+-------------------------------------------------------------------------------+ ++--------+----------------------------------------------------------+ +| Column | Data | ++--------+----------------------------------------------------------+ +| c | UInt32([10, 20, 30]) | +| Output | StringColumn { data: Utf8ViewArray[1010, 10100, 11110] } | ++--------+----------------------------------------------------------+ error: @@ -2093,12 +2093,12 @@ evaluation: | Row 2 | 3 | '3' | +--------+----------+--------------------------+ evaluation (internal): -+--------+-----------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+-----------------------------------------------------------------------------------------------------+ -| a | Int8([-1, 2, 3]) | -| Output | StringColumn { data: 0x313737373737373737373737373737373737373737373233, offsets: [0, 22, 23, 24] } | -+--------+-----------------------------------------------------------------------------------------------------+ ++--------+--------------------------------------------------------------------+ +| Column | Data | ++--------+--------------------------------------------------------------------+ +| a | Int8([-1, 2, 3]) | +| Output | StringColumn { data: Utf8ViewArray[1777777777777777777777, 2, 3] } | ++--------+--------------------------------------------------------------------+ ast : oct(a2) @@ -2115,12 +2115,12 @@ evaluation: | Row 2 | NULL | NULL | +--------+------------------+-----------------+ evaluation (internal): -+--------+-----------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+-----------------------------------------------------------------------------------------------------------+ -| a2 | NullableColumn { column: UInt8([1, 2, 3]), validity: [0b_____011] } | -| Output | NullableColumn { column: StringColumn { data: 0x313233, offsets: [0, 1, 2, 3] }, validity: [0b_____011] } | -+--------+-----------------------------------------------------------------------------------------------------------+ ++--------+--------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+--------------------------------------------------------------------------------------------------+ +| a2 | NullableColumn { column: UInt8([1, 2, 3]), validity: [0b_____011] } | +| Output | NullableColumn { column: StringColumn { data: Utf8ViewArray[1, 2, 3] }, validity: [0b_____011] } | ++--------+--------------------------------------------------------------------------------------------------+ ast : oct(b) @@ -2137,12 +2137,12 @@ evaluation: | Row 2 | 6 | '6' | +--------+---------+--------+ evaluation (internal): -+--------+--------------------------------------------------------+ -| Column | Data | -+--------+--------------------------------------------------------+ -| b | Int16([2, 4, 6]) | -| Output | StringColumn { data: 0x323436, offsets: [0, 1, 2, 3] } | -+--------+--------------------------------------------------------+ ++--------+-----------------------------------------------+ +| Column | Data | ++--------+-----------------------------------------------+ +| b | Int16([2, 4, 6]) | +| Output | StringColumn { data: Utf8ViewArray[2, 4, 6] } | ++--------+-----------------------------------------------+ ast : oct(c) @@ -2159,12 +2159,12 @@ evaluation: | Row 2 | 30 | '36' | +--------+-----------+--------+ evaluation (internal): -+--------+--------------------------------------------------------------+ -| Column | Data | -+--------+--------------------------------------------------------------+ -| c | UInt32([10, 20, 30]) | -| Output | StringColumn { data: 0x313232343336, offsets: [0, 2, 4, 6] } | -+--------+--------------------------------------------------------------+ ++--------+--------------------------------------------------+ +| Column | Data | ++--------+--------------------------------------------------+ +| c | UInt32([10, 20, 30]) | +| Output | StringColumn { data: Utf8ViewArray[12, 24, 36] } | ++--------+--------------------------------------------------+ error: @@ -2201,12 +2201,12 @@ evaluation: | Row 2 | 3 | '3' | +--------+----------+--------------------+ evaluation (internal): -+--------+-----------------------------------------------------------------------------------------+ -| Column | Data | -+--------+-----------------------------------------------------------------------------------------+ -| a | Int8([-1, 2, 3]) | -| Output | StringColumn { data: 0x666666666666666666666666666666663233, offsets: [0, 16, 17, 18] } | -+--------+-----------------------------------------------------------------------------------------+ ++--------+--------------------------------------------------------------+ +| Column | Data | ++--------+--------------------------------------------------------------+ +| a | Int8([-1, 2, 3]) | +| Output | StringColumn { data: Utf8ViewArray[ffffffffffffffff, 2, 3] } | ++--------+--------------------------------------------------------------+ ast : hex(a2) @@ -2223,12 +2223,12 @@ evaluation: | Row 2 | NULL | NULL | +--------+------------------+-----------------+ evaluation (internal): -+--------+-----------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+-----------------------------------------------------------------------------------------------------------+ -| a2 | NullableColumn { column: UInt8([1, 2, 3]), validity: [0b_____011] } | -| Output | NullableColumn { column: StringColumn { data: 0x313233, offsets: [0, 1, 2, 3] }, validity: [0b_____011] } | -+--------+-----------------------------------------------------------------------------------------------------------+ ++--------+--------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+--------------------------------------------------------------------------------------------------+ +| a2 | NullableColumn { column: UInt8([1, 2, 3]), validity: [0b_____011] } | +| Output | NullableColumn { column: StringColumn { data: Utf8ViewArray[1, 2, 3] }, validity: [0b_____011] } | ++--------+--------------------------------------------------------------------------------------------------+ ast : hex(b) @@ -2245,12 +2245,12 @@ evaluation: | Row 2 | 6 | '6' | +--------+---------+--------+ evaluation (internal): -+--------+--------------------------------------------------------+ -| Column | Data | -+--------+--------------------------------------------------------+ -| b | Int16([2, 4, 6]) | -| Output | StringColumn { data: 0x323436, offsets: [0, 1, 2, 3] } | -+--------+--------------------------------------------------------+ ++--------+-----------------------------------------------+ +| Column | Data | ++--------+-----------------------------------------------+ +| b | Int16([2, 4, 6]) | +| Output | StringColumn { data: Utf8ViewArray[2, 4, 6] } | ++--------+-----------------------------------------------+ ast : hex(c) @@ -2267,12 +2267,12 @@ evaluation: | Row 2 | 30 | '1e' | +--------+-----------+--------+ evaluation (internal): -+--------+------------------------------------------------------------+ -| Column | Data | -+--------+------------------------------------------------------------+ -| c | UInt32([10, 20, 30]) | -| Output | StringColumn { data: 0x6131343165, offsets: [0, 1, 3, 5] } | -+--------+------------------------------------------------------------+ ++--------+-------------------------------------------------+ +| Column | Data | ++--------+-------------------------------------------------+ +| c | UInt32([10, 20, 30]) | +| Output | StringColumn { data: Utf8ViewArray[a, 14, 1e] } | ++--------+-------------------------------------------------+ error: @@ -2303,12 +2303,12 @@ evaluation: | Row 2 | 'databend' | '6461746162656e64' | +--------+-----------------+--------------------+ evaluation (internal): -+--------+------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+------------------------------------------------------------------------------------------------------------+ -| e | StringColumn { data: 0x6162636465666461746162656e64, offsets: [0, 3, 6, 14] } | -| Output | StringColumn { data: 0x36313632363336343635363636343631373436313632363536653634, offsets: [0, 6, 12, 28] } | -+--------+------------------------------------------------------------------------------------------------------------+ ++--------+------------------------------------------------------------------------+ +| Column | Data | ++--------+------------------------------------------------------------------------+ +| e | StringColumn { data: Utf8ViewArray[abc, def, databend] } | +| Output | StringColumn { data: Utf8ViewArray[616263, 646566, 6461746162656e64] } | ++--------+------------------------------------------------------------------------+ ast : lpad('hi', 2, '?') @@ -2403,14 +2403,14 @@ evaluation: | Row 2 | 'cc' | 5 | 'bb' | 'bbbcc' | +--------+-----------------+---------+-------------+---------+ evaluation (internal): -+--------+------------------------------------------------------------------+ -| Column | Data | -+--------+------------------------------------------------------------------+ -| a | StringColumn { data: 0x6869746573746363, offsets: [0, 2, 6, 8] } | -| b | UInt8([0, 3, 5]) | -| c | StringColumn { data: 0x3f786262, offsets: [0, 1, 2, 4] } | -| Output | StringColumn { data: 0x7465736262626363, offsets: [0, 0, 3, 8] } | -+--------+------------------------------------------------------------------+ ++--------+----------------------------------------------------+ +| Column | Data | ++--------+----------------------------------------------------+ +| a | StringColumn { data: Utf8ViewArray[hi, test, cc] } | +| b | UInt8([0, 3, 5]) | +| c | StringColumn { data: Utf8ViewArray[?, x, bb] } | +| Output | StringColumn { data: Utf8ViewArray[, tes, bbbcc] } | ++--------+----------------------------------------------------+ error: @@ -2513,14 +2513,14 @@ evaluation: | Row 2 | 'cc' | 5 | 'bb' | 'ccbbb' | +--------+-----------------+---------+-------------+---------+ evaluation (internal): -+--------+------------------------------------------------------------------+ -| Column | Data | -+--------+------------------------------------------------------------------+ -| a | StringColumn { data: 0x6869746573746363, offsets: [0, 2, 6, 8] } | -| b | UInt8([0, 3, 5]) | -| c | StringColumn { data: 0x3f786262, offsets: [0, 1, 2, 4] } | -| Output | StringColumn { data: 0x7465736363626262, offsets: [0, 0, 3, 8] } | -+--------+------------------------------------------------------------------+ ++--------+----------------------------------------------------+ +| Column | Data | ++--------+----------------------------------------------------+ +| a | StringColumn { data: Utf8ViewArray[hi, test, cc] } | +| b | UInt8([0, 3, 5]) | +| c | StringColumn { data: Utf8ViewArray[?, x, bb] } | +| Output | StringColumn { data: Utf8ViewArray[, tes, ccbbb] } | ++--------+----------------------------------------------------+ error: @@ -2582,14 +2582,14 @@ evaluation: | Row 3 | 'q' | '' | 'q' | 'q' | +--------+-----------------+-------------+-------------+--------+ evaluation (internal): -+--------+-----------------------------------------------------------------------+ -| Column | Data | -+--------+-----------------------------------------------------------------------+ -| a | StringColumn { data: 0x686974657374636371, offsets: [0, 2, 6, 8, 9] } | -| b | StringColumn { data: 0x6974656363, offsets: [0, 1, 3, 5, 5] } | -| c | StringColumn { data: 0x3f78626271, offsets: [0, 1, 2, 4, 5] } | -| Output | StringColumn { data: 0x683f787374626271, offsets: [0, 2, 5, 7, 8] } | -+--------+-----------------------------------------------------------------------+ ++--------+-------------------------------------------------------+ +| Column | Data | ++--------+-------------------------------------------------------+ +| a | StringColumn { data: Utf8ViewArray[hi, test, cc, q] } | +| b | StringColumn { data: Utf8ViewArray[i, te, cc, ] } | +| c | StringColumn { data: Utf8ViewArray[?, x, bb, q] } | +| Output | StringColumn { data: Utf8ViewArray[h?, xst, bb, q] } | ++--------+-------------------------------------------------------+ ast : translate('abcdefabcdef', 'dc', 'zy') @@ -2644,14 +2644,14 @@ evaluation: | Row 3 | 'abcdef' | 'dc' | 'dc' | 'abcdef' | +--------+-----------------------+-------------+-------------+----------+ evaluation (internal): -+--------+--------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+--------------------------------------------------------------------------------------------------------+ -| a | StringColumn { data: 0x616263646566616263646566616263646566616263646566, offsets: [0, 6, 12, 18, 24] } | -| b | StringColumn { data: 0x646364636463, offsets: [0, 2, 2, 4, 6] } | -| c | StringColumn { data: 0x7a797a796463, offsets: [0, 2, 4, 4, 6] } | -| Output | StringColumn { data: 0x6162797a656661626364656661626566616263646566, offsets: [0, 6, 12, 16, 22] } | -+--------+--------------------------------------------------------------------------------------------------------+ ++--------+----------------------------------------------------------------------+ +| Column | Data | ++--------+----------------------------------------------------------------------+ +| a | StringColumn { data: Utf8ViewArray[abcdef, abcdef, abcdef, abcdef] } | +| b | StringColumn { data: Utf8ViewArray[dc, , dc, dc] } | +| c | StringColumn { data: Utf8ViewArray[zy, zy, , dc] } | +| Output | StringColumn { data: Utf8ViewArray[abyzef, abcdef, abef, abcdef] } | ++--------+----------------------------------------------------------------------+ ast : strcmp('text', 'text2') @@ -2696,13 +2696,13 @@ evaluation: | Row 3 | 'cc' | 'ccb' | -1 | +--------+-----------------+------------------+--------------+ evaluation (internal): -+--------+----------------------------------------------------------------------------+ -| Column | Data | -+--------+----------------------------------------------------------------------------+ -| a | StringColumn { data: 0x6968746573746363, offsets: [0, 1, 2, 6, 8] } | -| b | StringColumn { data: 0x6869686974657374636362, offsets: [0, 2, 4, 8, 11] } | -| Output | Int8([1, -1, 0, -1]) | -+--------+----------------------------------------------------------------------------+ ++--------+---------------------------------------------------------+ +| Column | Data | ++--------+---------------------------------------------------------+ +| a | StringColumn { data: Utf8ViewArray[i, h, test, cc] } | +| b | StringColumn { data: Utf8ViewArray[hi, hi, test, ccb] } | +| Output | Int8([1, -1, 0, -1]) | ++--------+---------------------------------------------------------+ ast : locate('bar', 'foobarbar') @@ -2837,14 +2837,14 @@ evaluation: | Row 3 | 'q' | '56' | 1 | 0 | +--------+---------------+---------------+---------+----------------------------+ evaluation (internal): -+--------+------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+------------------------------------------------------------------------------------------------+ -| a | StringColumn { data: 0x6261726363636371, offsets: [0, 3, 5, 7, 8] } | -| b | StringColumn { data: 0x666f6f6261726261726264636361636378783536, offsets: [0, 9, 16, 18, 20] } | -| c | UInt8([1, 2, 0, 1]) | -| Output | UInt64([4, 3, 0, 0]) | -+--------+------------------------------------------------------------------------------------------------+ ++--------+------------------------------------------------------------------+ +| Column | Data | ++--------+------------------------------------------------------------------+ +| a | StringColumn { data: Utf8ViewArray[bar, cc, cc, q] } | +| b | StringColumn { data: Utf8ViewArray[foobarbar, bdccacc, xx, 56] } | +| c | UInt8([1, 2, 0, 1]) | +| Output | UInt64([4, 3, 0, 0]) | ++--------+------------------------------------------------------------------+ ast : char(65,66,67) @@ -2875,7 +2875,7 @@ evaluation: | Type | UInt8 | UInt8 | UInt8 | Binary | | Domain | {66..=67} | {98..=99} | {68..=69} | Undefined | | Row 0 | 66 | 98 | 68 | 426244 | -| Row 1 | 67 | 99 | 69 | 436345 | +| Row 1 | 67 | 99 | 69 | 426244 | +--------+-----------+-----------+-----------+-----------+ evaluation (internal): +--------+-----------------------------------------------------------+ @@ -2884,7 +2884,7 @@ evaluation (internal): | a | UInt8([66, 67]) | | b | UInt8([98, 99]) | | c | UInt8([68, 69]) | -| Output | BinaryColumn { data: 0x426244436345, offsets: [0, 3, 6] } | +| Output | BinaryColumn { data: 0x426244426244, offsets: [0, 3, 6] } | +--------+-----------------------------------------------------------+ @@ -2907,7 +2907,7 @@ evaluation (internal): | b | UInt8([98, 99]) | | c | UInt8([68, 69]) | | a2 | NullableColumn { column: UInt8([66, 67]), validity: [0b______01] } | -| Output | NullableColumn { column: BinaryColumn { data: 0x426244436345, offsets: [0, 3, 6] }, validity: [0b______01] } | +| Output | NullableColumn { column: BinaryColumn { data: 0x426244426244, offsets: [0, 3, 6] }, validity: [0b______01] } | +--------+--------------------------------------------------------------------------------------------------------------+ @@ -2974,12 +2974,12 @@ evaluation: | Row 3 | 'TEACHER' | 'T260' | +--------+------------------------------------+---------+ evaluation (internal): -+--------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| a | StringColumn { data: 0x23f09f9091f09f90916865f09f90916c6c6ff09f9091f09f90916865f09f90916c6c6ff09f90917465616368657254454143484552, offsets: [0, 22, 39, 46, 53] } | -| Output | StringColumn { data: 0xf09f9091343030f09f90913430305432363054323630, offsets: [0, 7, 14, 18, 22] } | -+--------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+-------------------------------------------------------------------------------------+ +| Column | Data | ++--------+-------------------------------------------------------------------------------------+ +| a | StringColumn { data: Utf8ViewArray[#🐑🐑he🐑llo🐑, 🐑he🐑llo🐑, teacher, TEACHER] } | +| Output | StringColumn { data: Utf8ViewArray[🐑400, 🐑400, T260, T260] } | ++--------+-------------------------------------------------------------------------------------+ ast : ord(NULL) @@ -3077,12 +3077,12 @@ evaluation: | Row 2 | 'c' | 'ccc' | +--------+-------------+---------+ evaluation (internal): -+--------+--------------------------------------------------------------------+ -| Column | Data | -+--------+--------------------------------------------------------------------+ -| a | StringColumn { data: 0x616263, offsets: [0, 1, 2, 3] } | -| Output | StringColumn { data: 0x616161626262636363, offsets: [0, 3, 6, 9] } | -+--------+--------------------------------------------------------------------+ ++--------+-----------------------------------------------------+ +| Column | Data | ++--------+-----------------------------------------------------+ +| a | StringColumn { data: Utf8ViewArray[a, b, c] } | +| Output | StringColumn { data: Utf8ViewArray[aaa, bbb, ccc] } | ++--------+-----------------------------------------------------+ error: @@ -3188,15 +3188,15 @@ evaluation: | Row 3 | 'q' | 1 | 1 | '56' | '56' | +--------+-----------------+---------+---------+---------------+---------+ evaluation (internal): -+--------+-------------------------------------------------------------------------------+ -| Column | Data | -+--------+-------------------------------------------------------------------------------+ -| a | StringColumn { data: 0x686974657374636371, offsets: [0, 2, 6, 8, 9] } | -| b | UInt8([1, 4, 1, 1]) | -| c | UInt8([3, 5, 1, 1]) | -| d | StringColumn { data: 0x78787a6331323536, offsets: [0, 2, 4, 6, 8] } | -| Output | StringColumn { data: 0x78787465737a633132633536, offsets: [0, 2, 7, 10, 12] } | -+--------+-------------------------------------------------------------------------------+ ++--------+----------------------------------------------------------+ +| Column | Data | ++--------+----------------------------------------------------------+ +| a | StringColumn { data: Utf8ViewArray[hi, test, cc, q] } | +| b | UInt8([1, 4, 1, 1]) | +| c | UInt8([3, 5, 1, 1]) | +| d | StringColumn { data: Utf8ViewArray[xx, zc, 12, 56] } | +| Output | StringColumn { data: Utf8ViewArray[xx, teszc, 12c, 56] } | ++--------+----------------------------------------------------------+ ast : insert(x, y, z, u) @@ -3214,15 +3214,15 @@ evaluation: | Row 3 | 'q' | 1 | 1 | '56' | '56' | +--------+--------------------------+------------------+------------------+------------------------+-----------------+ evaluation (internal): -+--------+----------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+----------------------------------------------------------------------------------------------------------------------------------+ -| x | NullableColumn { column: StringColumn { data: 0x686974657374636371, offsets: [0, 2, 6, 8, 9] }, validity: [0b____1110] } | -| y | NullableColumn { column: UInt8([1, 4, 1, 1]), validity: [0b____1011] } | -| z | NullableColumn { column: UInt8([3, 5, 1, 1]), validity: [0b____1101] } | -| u | NullableColumn { column: StringColumn { data: 0x78787a6331323536, offsets: [0, 2, 4, 6, 8] }, validity: [0b____1110] } | -| Output | NullableColumn { column: StringColumn { data: 0x78787465737a633132633536, offsets: [0, 2, 7, 10, 12] }, validity: [0b____1000] } | -+--------+----------------------------------------------------------------------------------------------------------------------------------+ ++--------+-------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+-------------------------------------------------------------------------------------------------------------+ +| x | NullableColumn { column: StringColumn { data: Utf8ViewArray[hi, test, cc, q] }, validity: [0b____1110] } | +| y | NullableColumn { column: UInt8([1, 4, 1, 1]), validity: [0b____1011] } | +| z | NullableColumn { column: UInt8([3, 5, 1, 1]), validity: [0b____1101] } | +| u | NullableColumn { column: StringColumn { data: Utf8ViewArray[xx, zc, 12, 56] }, validity: [0b____1110] } | +| Output | NullableColumn { column: StringColumn { data: Utf8ViewArray[xx, teszc, 12c, 56] }, validity: [0b____1000] } | ++--------+-------------------------------------------------------------------------------------------------------------+ ast : space(0) @@ -3272,12 +3272,12 @@ evaluation: | Row 9 | 9 | ' ' | +--------+---------+-------------+ evaluation (internal): -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| a | UInt8([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) | -| Output | StringColumn { data: 0x202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020, offsets: [0, 0, 1, 3, 6, 10, 15, 21, 28, 36, 45] } | -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+-------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+-------------------------------------------------------------------------------------------------------+ +| a | UInt8([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) | +| Output | StringColumn { data: Utf8ViewArray[, , , , , , , , , ] } | ++--------+-------------------------------------------------------------------------------------------------------+ ast : left('', 0) @@ -3320,12 +3320,12 @@ evaluation: | Row 10 | 10 | '123456789' | +--------+----------+--------------------+ evaluation (internal): -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| a | UInt8([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) | -| Output | StringColumn { data: 0x313132313233313233343132333435313233343536313233343536373132333435363738313233343536373839313233343536373839, offsets: [0, 0, 1, 3, 6, 10, 15, 21, 28, 36, 45, 54] } | -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+------------------------------------------------------------------------------------------------------------------+ +| a | UInt8([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) | +| Output | StringColumn { data: Utf8ViewArray[, 1, 12, 123, 1234, 12345, 123456, 1234567, 12345678, 123456789, 123456789] } | ++--------+------------------------------------------------------------------------------------------------------------------+ ast : right('', 0) @@ -3368,12 +3368,12 @@ evaluation: | Row 10 | 10 | '123456789' | +--------+----------+-------------+ evaluation (internal): -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| a | UInt8([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) | -| Output | StringColumn { data: 0x393839373839363738393536373839343536373839333435363738393233343536373839313233343536373839313233343536373839, offsets: [0, 0, 1, 3, 6, 10, 15, 21, 28, 36, 45, 54] } | -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+------------------------------------------------------------------------------------------------------------------+ +| a | UInt8([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) | +| Output | StringColumn { data: Utf8ViewArray[, 9, 89, 789, 6789, 56789, 456789, 3456789, 23456789, 123456789, 123456789] } | ++--------+------------------------------------------------------------------------------------------------------------------+ ast : mid('1234567890', -3, 3) @@ -3513,13 +3513,13 @@ evaluation: | Row 44 | -4 | 4 | '' | +--------+----------+---------+--------+ evaluation (internal): -+--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| pos | Int8([0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, -1, -1, -1, -1, -1, -2, -2, -2, -2, -2, -3, -3, -3, -3, -3, -4, -4, -4, -4, -4]) | -| len | UInt8([0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4]) | -| Output | StringColumn { data: 0x61616261626361626362626362636263636363636363636362626362636263616162616263616263, offsets: [0, 0, 0, 0, 0, 0, 0, 1, 3, 6, 9, 9, 10, 12, 14, 16, 16, 17, 18, 19, 20, 20, 20, 20, 20, 20, 20, 21, 22, 23, 24, 24, 25, 27, 29, 31, 31, 32, 34, 37, 40, 40, 40, 40, 40, 40] } | -+--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| pos | Int8([0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, -1, -1, -1, -1, -1, -2, -2, -2, -2, -2, -3, -3, -3, -3, -3, -4, -4, -4, -4, -4]) | +| len | UInt8([0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4]) | +| Output | StringColumn { data: Utf8ViewArray[, , , , , , a, ab, abc, abc, , b, bc, bc, bc, , c, c, c, c, , , , , , , c, c, c, c, , b, bc, bc, bc, , a, ab, abc, abc, , , , , ] } | ++--------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ast : split('Sakila', 'il') @@ -3582,12 +3582,12 @@ evaluation: | Row 3 | 'aeeceedeef' | 'ee' | ['a', 'c', 'd', 'f'] | +--------+-------------------------------+------------------------+---------------------------+ evaluation (internal): -+--------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| str | NullableColumn { column: StringColumn { data: 0x3132372e302e302e316161612d2d6262622d4242422d2d636363636361656563656564656566, offsets: [0, 9, 26, 28, 38] }, validity: [0b____1110] } | -| sep | NullableColumn { column: StringColumn { data: 0x2e2d2d63636565, offsets: [0, 1, 3, 5, 7] }, validity: [0b____1110] } | -| Output | NullableColumn { column: ArrayColumn { values: StringColumn { data: 0x3132373030316161616262622d42424263636361636466, offsets: [0, 3, 4, 5, 6, 9, 16, 19, 19, 20, 21, 22, 23] }, offsets: [0, 4, 7, 8, 12] }, validity: [0b____1110] } | -+--------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| str | NullableColumn { column: StringColumn { data: Utf8ViewArray[127.0.0.1, aaa--bbb-BBB--ccc, cc, aeeceedeef] }, validity: [0b____1110] } | +| sep | NullableColumn { column: StringColumn { data: Utf8ViewArray[., --, cc, ee] }, validity: [0b____1110] } | +| Output | NullableColumn { column: ArrayColumn { values: StringColumn { data: Utf8ViewArray[127, 0, 0, 1, aaa, bbb-BBB, ccc, , a, c, d, f] }, offsets: [0, 4, 7, 8, 12] }, validity: [0b____1110] } | ++--------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ diff --git a/src/query/functions/tests/it/scalars/testdata/tuple.txt b/src/query/functions/tests/it/scalars/testdata/tuple.txt index bda28d987ee2..7ab8f2c58ff8 100644 --- a/src/query/functions/tests/it/scalars/testdata/tuple.txt +++ b/src/query/functions/tests/it/scalars/testdata/tuple.txt @@ -49,12 +49,12 @@ evaluation: | Row 3 | 'd' | ('d', 'd') | +--------+----------------------+----------------------------------------------+ evaluation (internal): -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| s | NullableColumn { column: StringColumn { data: 0x61626364, offsets: [0, 1, 2, 3, 4] }, validity: [0b____1011] } | -| Output | Tuple([NullableColumn { column: StringColumn { data: 0x61626364, offsets: [0, 1, 2, 3, 4] }, validity: [0b____1011] }, NullableColumn { column: StringColumn { data: 0x61626364, offsets: [0, 1, 2, 3, 4] }, validity: [0b____1011] }]) | -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| s | NullableColumn { column: StringColumn { data: Utf8ViewArray[a, b, c, d] }, validity: [0b____1011] } | +| Output | Tuple([NullableColumn { column: StringColumn { data: Utf8ViewArray[a, b, c, d] }, validity: [0b____1011] }, NullableColumn { column: StringColumn { data: Utf8ViewArray[a, b, c, d] }, validity: [0b____1011] }]) | ++--------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ error: @@ -141,12 +141,12 @@ evaluation: | Row 3 | 'd' | 'd' | +--------+----------------------+----------------------+ evaluation (internal): -+--------+----------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+----------------------------------------------------------------------------------------------------------------+ -| s | NullableColumn { column: StringColumn { data: 0x61626364, offsets: [0, 1, 2, 3, 4] }, validity: [0b____1011] } | -| Output | NullableColumn { column: StringColumn { data: 0x61626364, offsets: [0, 1, 2, 3, 4] }, validity: [0b____1011] } | -+--------+----------------------------------------------------------------------------------------------------------------+ ++--------+-----------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+-----------------------------------------------------------------------------------------------------+ +| s | NullableColumn { column: StringColumn { data: Utf8ViewArray[a, b, c, d] }, validity: [0b____1011] } | +| Output | NullableColumn { column: StringColumn { data: Utf8ViewArray[a, b, c, d] }, validity: [0b____1011] } | ++--------+-----------------------------------------------------------------------------------------------------+ ast : col.1 @@ -164,11 +164,11 @@ evaluation: | Row 3 | NULL | NULL | +--------+---------------------------------+----------------------+ evaluation (internal): -+--------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| col | NullableColumn { column: Tuple([NullableColumn { column: StringColumn { data: 0x61626364, offsets: [0, 1, 2, 3, 4] }, validity: [0b____0011] }]), validity: [0b____0101] } | -| Output | NullableColumn { column: StringColumn { data: 0x61626364, offsets: [0, 1, 2, 3, 4] }, validity: [0b____0001] } | -+--------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| col | NullableColumn { column: Tuple([NullableColumn { column: StringColumn { data: Utf8ViewArray[a, b, c, d] }, validity: [0b____0011] }]), validity: [0b____0101] } | +| Output | NullableColumn { column: StringColumn { data: Utf8ViewArray[a, b, c, d] }, validity: [0b____0001] } | ++--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------+ diff --git a/src/query/functions/tests/it/scalars/testdata/variant.txt b/src/query/functions/tests/it/scalars/testdata/variant.txt index e8545ded6ada..77fcf04b20fa 100644 --- a/src/query/functions/tests/it/scalars/testdata/variant.txt +++ b/src/query/functions/tests/it/scalars/testdata/variant.txt @@ -110,7 +110,7 @@ evaluation (internal): +--------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | Column | Data | +--------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| s | StringColumn { data: 0x6e756c6c74727565393232333337323033363835343737353830372d3332373638313233342e35363738312e3931326532225c5c5c226162635c5c5c2222226461746162656e64227b226b223a2276222c2261223a2262227d5b312c322c332c5b2261222c2262222c2263225d5d, offsets: [0, 4, 8, 27, 33, 42, 49, 62, 72, 89, 110] } | +| s | StringColumn { data: Utf8ViewArray[null, true, 9223372036854775807, -32768, 1234.5678, 1.912e2, "\\\"abc\\\"", "databend", {"k":"v","a":"b"}, [1,2,3,["a","b","c"]]] } | | Output | BinaryColumn { data: 0x200000000000000020000000400000002000000020000009507fffffffffffffff200000002000000340800020000000200000096040934a456d5cfaad2000000020000009604067e6666666666620000000100000075c226162635c2220000000100000086461746162656e644000000210000001100000011000000110000001616b6276800000042000000220000002200000025000001350015002500380000003100000011000000110000001616263, offsets: [0, 8, 16, 33, 44, 61, 78, 93, 109, 133, 178] } | +--------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ @@ -133,7 +133,7 @@ evaluation (internal): +--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------+ | Column | Data | +--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| s | NullableColumn { column: StringColumn { data: 0x7472756566616c736531323334, offsets: [0, 4, 9, 9, 13] }, validity: [0b____1011] } | +| s | NullableColumn { column: StringColumn { data: Utf8ViewArray[true, false, , 1234] }, validity: [0b____1011] } | | Output | NullableColumn { column: BinaryColumn { data: 0x2000000040000000200000003000000020000000200000035004d2, offsets: [0, 8, 16, 16, 27] }, validity: [0b____1011] } | +--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------+ @@ -243,7 +243,7 @@ evaluation (internal): +--------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | Column | Data | +--------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| s | StringColumn { data: 0x6e756c6c74727565393232333337323033363835343737353830372d3332373638313233342e35363738312e3931326532225c5c5c226162635c5c5c2222226461746162656e64227b226b223a2276222c2261223a2262227d5b312c322c332c5b2261222c2262222c2263225d5d, offsets: [0, 4, 8, 27, 33, 42, 49, 62, 72, 89, 110] } | +| s | StringColumn { data: Utf8ViewArray[null, true, 9223372036854775807, -32768, 1234.5678, 1.912e2, "\\\"abc\\\"", "databend", {"k":"v","a":"b"}, [1,2,3,["a","b","c"]]] } | | Output | NullableColumn { column: BinaryColumn { data: 0x200000000000000020000000400000002000000020000009507fffffffffffffff200000002000000340800020000000200000096040934a456d5cfaad2000000020000009604067e6666666666620000000100000075c226162635c2220000000100000086461746162656e644000000210000001100000011000000110000001616b6276800000042000000220000002200000025000001350015002500380000003100000011000000110000001616263, offsets: [0, 8, 16, 33, 44, 61, 78, 93, 109, 133, 178] }, validity: [0b11111111, 0b______11] } | +--------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ @@ -266,7 +266,7 @@ evaluation (internal): +--------+-----------------------------------------------------------------------------------------------------------------------------------------------+ | Column | Data | +--------+-----------------------------------------------------------------------------------------------------------------------------------------------+ -| s | NullableColumn { column: StringColumn { data: 0x7472756574747431323334, offsets: [0, 4, 7, 7, 11] }, validity: [0b____1011] } | +| s | NullableColumn { column: StringColumn { data: Utf8ViewArray[true, ttt, , 1234] }, validity: [0b____1011] } | | Output | NullableColumn { column: BinaryColumn { data: 0x200000004000000020000000200000035004d2, offsets: [0, 8, 8, 8, 19] }, validity: [0b____1001] } | +--------+-----------------------------------------------------------------------------------------------------------------------------------------------+ @@ -312,12 +312,12 @@ evaluation: | Row 2 | 'true' | NULL | +--------+------------------+-------------------------+ evaluation (internal): -+--------+-------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+-------------------------------------------------------------------------------------------------------------------------------------------------+ -| s | StringColumn { data: 0x6e756c6c61626374727565, offsets: [0, 4, 7, 11] } | -| Output | NullableColumn { column: StringColumn { data: 0x65787065637465642076616c75652c20706f732031, offsets: [0, 0, 21, 21] }, validity: [0b_____010] } | -+--------+-------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+--------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+--------------------------------------------------------------------------------------------------------------------+ +| s | StringColumn { data: Utf8ViewArray[null, abc, true] } | +| Output | NullableColumn { column: StringColumn { data: Utf8ViewArray[, expected value, pos 1, ] }, validity: [0b_____010] } | ++--------+--------------------------------------------------------------------------------------------------------------------+ ast : check_json(s) @@ -335,12 +335,12 @@ evaluation: | Row 3 | '1234' | NULL | +--------+-----------------------+-------------------------+ evaluation (internal): -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------+ -| s | NullableColumn { column: StringColumn { data: 0x7472756574747431323334, offsets: [0, 4, 7, 7, 11] }, validity: [0b____1011] } | -| Output | NullableColumn { column: StringColumn { data: 0x6578706563746564206964656e742c20706f732032, offsets: [0, 0, 21, 21, 21] }, validity: [0b____0010] } | -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+----------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+----------------------------------------------------------------------------------------------------------------------+ +| s | NullableColumn { column: StringColumn { data: Utf8ViewArray[true, ttt, , 1234] }, validity: [0b____1011] } | +| Output | NullableColumn { column: StringColumn { data: Utf8ViewArray[, expected ident, pos 2, , ] }, validity: [0b____0010] } | ++--------+----------------------------------------------------------------------------------------------------------------------+ ast : length(parse_json('1234')) @@ -384,12 +384,12 @@ evaluation: | Row 2 | '["a","b","c"]' | 3 | +--------+----------------------------------+-------------+ evaluation (internal): -+--------+--------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+--------------------------------------------------------------------------------------------------------+ -| s | StringColumn { data: 0x747275655b312c322c332c345d5b2261222c2262222c2263225d, offsets: [0, 4, 13, 26] } | -| Output | NullableColumn { column: UInt32([0, 4, 3]), validity: [0b_____110] } | -+--------+--------------------------------------------------------------------------------------------------------+ ++--------+----------------------------------------------------------------------+ +| Column | Data | ++--------+----------------------------------------------------------------------+ +| s | StringColumn { data: Utf8ViewArray[true, [1,2,3,4], ["a","b","c"]] } | +| Output | NullableColumn { column: UInt32([0, 4, 3]), validity: [0b_____110] } | ++--------+----------------------------------------------------------------------+ ast : length(parse_json(s)) @@ -407,12 +407,12 @@ evaluation: | Row 3 | '["a","b","c"]' | 3 | +--------+------------------------+-------------+ evaluation (internal): -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| s | NullableColumn { column: StringColumn { data: 0x747275655b312c322c332c345d5b2261222c2262222c2263225d, offsets: [0, 4, 13, 13, 26] }, validity: [0b____1011] } | -| Output | NullableColumn { column: UInt32([0, 4, 0, 3]), validity: [0b____1010] } | -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+---------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+---------------------------------------------------------------------------------------------------------------------------+ +| s | NullableColumn { column: StringColumn { data: Utf8ViewArray[true, [1,2,3,4], , ["a","b","c"]] }, validity: [0b____1011] } | +| Output | NullableColumn { column: UInt32([0, 4, 0, 3]), validity: [0b____1010] } | ++--------+---------------------------------------------------------------------------------------------------------------------------+ ast : json_object_keys(parse_json('[1,2,3,4]')) @@ -450,7 +450,7 @@ evaluation (internal): +--------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | Column | Data | +--------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| s | StringColumn { data: 0x5b312c322c332c345d7b2261223a2262222c2263223a2264227d7b226b31223a227631222c226b32223a227632227d, offsets: [0, 9, 26, 47] } | +| s | StringColumn { data: Utf8ViewArray[[1,2,3,4], {"a":"b","c":"d"}, {"k1":"v1","k2":"v2"}] } | | Output | NullableColumn { column: BinaryColumn { data: 0x80000002100000011000000161638000000210000002100000026b316b32, offsets: [0, 0, 14, 30] }, validity: [0b_____110] } | +--------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+ @@ -470,12 +470,12 @@ evaluation: | Row 3 | '{"k1":"v1","k2":"v2"}' | '["k1","k2"]' | +--------+-------------------------------------------------+---------------+ evaluation (internal): -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| s | NullableColumn { column: StringColumn { data: 0x5b312c322c332c345d7b2261223a2262222c2263223a2264227d7b226b31223a227631222c226b32223a227632227d, offsets: [0, 9, 26, 26, 47] }, validity: [0b____1011] } | -| Output | NullableColumn { column: BinaryColumn { data: 0x80000002100000011000000161638000000210000002100000026b316b32, offsets: [0, 0, 14, 14, 30] }, validity: [0b____1010] } | -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| s | NullableColumn { column: StringColumn { data: Utf8ViewArray[[1,2,3,4], {"a":"b","c":"d"}, , {"k1":"v1","k2":"v2"}] }, validity: [0b____1011] } | +| Output | NullableColumn { column: BinaryColumn { data: 0x80000002100000011000000161638000000210000002100000026b316b32, offsets: [0, 0, 14, 14, 30] }, validity: [0b____1010] } | ++--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ast : parse_json('null')[1] @@ -558,7 +558,7 @@ evaluation (internal): +--------+---------------------------------------------------------------------------------------------------------------------------------------------+ | Column | Data | +--------+---------------------------------------------------------------------------------------------------------------------------------------------+ -| s | StringColumn { data: 0x747275655b312c322c332c345d5b2261222c2262222c2263225d, offsets: [0, 4, 13, 26] } | +| s | StringColumn { data: Utf8ViewArray[true, [1,2,3,4], ["a","b","c"]] } | | i | UInt64([0, 0, 1]) | | Output | NullableColumn { column: BinaryColumn { data: 0x20000000200000025001200000001000000162, offsets: [0, 0, 10, 19] }, validity: [0b_____110] } | +--------+---------------------------------------------------------------------------------------------------------------------------------------------+ @@ -579,13 +579,13 @@ evaluation: | Row 3 | '["a","b","c"]' | 1 | '"b"' | +--------+------------------------+------------------+--------------+ evaluation (internal): -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| s | NullableColumn { column: StringColumn { data: 0x747275655b312c322c332c345d5b2261222c2262222c2263225d, offsets: [0, 4, 13, 13, 26] }, validity: [0b____1011] } | -| i | NullableColumn { column: UInt64([0, 2, 0, 1]), validity: [0b____1010] } | -| Output | NullableColumn { column: BinaryColumn { data: 0x20000000200000025003200000001000000162, offsets: [0, 0, 10, 10, 19] }, validity: [0b____1010] } | -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+-------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+-------------------------------------------------------------------------------------------------------------------------------------------------+ +| s | NullableColumn { column: StringColumn { data: Utf8ViewArray[true, [1,2,3,4], , ["a","b","c"]] }, validity: [0b____1011] } | +| i | NullableColumn { column: UInt64([0, 2, 0, 1]), validity: [0b____1010] } | +| Output | NullableColumn { column: BinaryColumn { data: 0x20000000200000025003200000001000000162, offsets: [0, 0, 10, 10, 19] }, validity: [0b____1010] } | ++--------+-------------------------------------------------------------------------------------------------------------------------------------------------+ ast : parse_json(s)[k] @@ -605,8 +605,8 @@ evaluation (internal): +--------+---------------------------------------------------------------------------------------------------------------------------+ | Column | Data | +--------+---------------------------------------------------------------------------------------------------------------------------+ -| s | StringColumn { data: 0x747275657b226b223a317d7b2261223a2262227d, offsets: [0, 4, 11, 20] } | -| k | StringColumn { data: 0x6b6b78, offsets: [0, 1, 2, 3] } | +| s | StringColumn { data: Utf8ViewArray[true, {"k":1}, {"a":"b"}] } | +| k | StringColumn { data: Utf8ViewArray[k, k, x] } | | Output | NullableColumn { column: BinaryColumn { data: 0x20000000200000025001, offsets: [0, 0, 10, 10] }, validity: [0b_____010] } | +--------+---------------------------------------------------------------------------------------------------------------------------+ @@ -626,13 +626,13 @@ evaluation: | Row 3 | '{"a":"b"}' | 'a' | '"b"' | +--------+-----------------------------+------------+--------------+ evaluation (internal): -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ -| s | NullableColumn { column: StringColumn { data: 0x747275657b226b223a317d7b2261223a2262227d, offsets: [0, 4, 11, 11, 20] }, validity: [0b____1011] } | -| k | StringColumn { data: 0x6b61, offsets: [0, 0, 1, 1, 2] } | -| Output | NullableColumn { column: BinaryColumn { data: 0x20000000200000025001200000001000000162, offsets: [0, 0, 10, 10, 19] }, validity: [0b____1010] } | -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+-------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+-------------------------------------------------------------------------------------------------------------------------------------------------+ +| s | NullableColumn { column: StringColumn { data: Utf8ViewArray[true, {"k":1}, , {"a":"b"}] }, validity: [0b____1011] } | +| k | StringColumn { data: Utf8ViewArray[, k, , a] } | +| Output | NullableColumn { column: BinaryColumn { data: 0x20000000200000025001200000001000000162, offsets: [0, 0, 10, 10, 19] }, validity: [0b____1010] } | ++--------+-------------------------------------------------------------------------------------------------------------------------------------------------+ ast : get_ignore_case(parse_json('{"Aa":1, "aA":2, "aa":3}'), 'AA') @@ -679,8 +679,8 @@ evaluation (internal): +--------+---------------------------------------------------------------------------------------------------------------------------------------------+ | Column | Data | +--------+---------------------------------------------------------------------------------------------------------------------------------------------+ -| s | StringColumn { data: 0x747275657b226b223a317d7b2261223a2262227d, offsets: [0, 4, 11, 20] } | -| k | StringColumn { data: 0x6b4b41, offsets: [0, 1, 2, 3] } | +| s | StringColumn { data: Utf8ViewArray[true, {"k":1}, {"a":"b"}] } | +| k | StringColumn { data: Utf8ViewArray[k, K, A] } | | Output | NullableColumn { column: BinaryColumn { data: 0x20000000200000025001200000001000000162, offsets: [0, 0, 10, 19] }, validity: [0b_____110] } | +--------+---------------------------------------------------------------------------------------------------------------------------------------------+ @@ -700,13 +700,13 @@ evaluation: | Row 3 | '{"a":"b"}' | 'A' | '"b"' | +--------+-----------------------------+------------+--------------+ evaluation (internal): -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ -| s | NullableColumn { column: StringColumn { data: 0x747275657b226b223a317d7b2261223a2262227d, offsets: [0, 4, 11, 11, 20] }, validity: [0b____1011] } | -| k | StringColumn { data: 0x4b41, offsets: [0, 0, 1, 1, 2] } | -| Output | NullableColumn { column: BinaryColumn { data: 0x20000000200000025001200000001000000162, offsets: [0, 0, 10, 10, 19] }, validity: [0b____1010] } | -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+-------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+-------------------------------------------------------------------------------------------------------------------------------------------------+ +| s | NullableColumn { column: StringColumn { data: Utf8ViewArray[true, {"k":1}, , {"a":"b"}] }, validity: [0b____1011] } | +| k | StringColumn { data: Utf8ViewArray[, K, , A] } | +| Output | NullableColumn { column: BinaryColumn { data: 0x20000000200000025001200000001000000162, offsets: [0, 0, 10, 10, 19] }, validity: [0b____1010] } | ++--------+-------------------------------------------------------------------------------------------------------------------------------------------------+ ast : get_path(parse_json('[[1,2],3]'), '[0]') @@ -798,8 +798,8 @@ evaluation (internal): +--------+---------------------------------------------------------------------------------------------------------------------------+ | Column | Data | +--------+---------------------------------------------------------------------------------------------------------------------------+ -| s | StringColumn { data: 0x747275657b226b223a317d5b2261222c2262225d, offsets: [0, 4, 11, 20] } | -| k | StringColumn { data: 0x6b5b226b225d5b2261225d, offsets: [0, 1, 6, 11] } | +| s | StringColumn { data: Utf8ViewArray[true, {"k":1}, ["a","b"]] } | +| k | StringColumn { data: Utf8ViewArray[k, ["k"], ["a"]] } | | Output | NullableColumn { column: BinaryColumn { data: 0x20000000200000025001, offsets: [0, 0, 10, 10] }, validity: [0b_____010] } | +--------+---------------------------------------------------------------------------------------------------------------------------+ @@ -819,13 +819,13 @@ evaluation: | Row 3 | '["a","b"]' | '[0]' | '"a"' | +--------+-----------------------------+--------------+--------------+ evaluation (internal): -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ -| s | NullableColumn { column: StringColumn { data: 0x747275657b226b223a317d5b2261222c2262225d, offsets: [0, 4, 11, 11, 20] }, validity: [0b____1011] } | -| k | StringColumn { data: 0x5b305d5b226b225d5b305d, offsets: [0, 3, 8, 8, 11] } | -| Output | NullableColumn { column: BinaryColumn { data: 0x20000000200000025001200000001000000161, offsets: [0, 0, 10, 10, 19] }, validity: [0b____1010] } | -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+-------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+-------------------------------------------------------------------------------------------------------------------------------------------------+ +| s | NullableColumn { column: StringColumn { data: Utf8ViewArray[true, {"k":1}, , ["a","b"]] }, validity: [0b____1011] } | +| k | StringColumn { data: Utf8ViewArray[[0], ["k"], , [0]] } | +| Output | NullableColumn { column: BinaryColumn { data: 0x20000000200000025001200000001000000161, offsets: [0, 0, 10, 10, 19] }, validity: [0b____1010] } | ++--------+-------------------------------------------------------------------------------------------------------------------------------------------------+ ast : json_extract_path_text('[[1,2],3]', '[0]') @@ -914,13 +914,13 @@ evaluation: | Row 2 | '["a","b"]' | '["a"]' | NULL | +--------+---------------------------------+-------------------+-------------+ evaluation (internal): -+--------+-------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+-------------------------------------------------------------------------------------------------------+ -| s | StringColumn { data: 0x747275657b226b223a317d5b2261222c2262225d, offsets: [0, 4, 11, 20] } | -| k | StringColumn { data: 0x6b5b226b225d5b2261225d, offsets: [0, 1, 6, 11] } | -| Output | NullableColumn { column: StringColumn { data: 0x31, offsets: [0, 0, 1, 1] }, validity: [0b_____010] } | -+--------+-------------------------------------------------------------------------------------------------------+ ++--------+------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+------------------------------------------------------------------------------------------------+ +| s | StringColumn { data: Utf8ViewArray[true, {"k":1}, ["a","b"]] } | +| k | StringColumn { data: Utf8ViewArray[k, ["k"], ["a"]] } | +| Output | NullableColumn { column: StringColumn { data: Utf8ViewArray[, 1, ] }, validity: [0b_____010] } | ++--------+------------------------------------------------------------------------------------------------+ ast : json_extract_path_text(s, k) @@ -938,13 +938,13 @@ evaluation: | Row 3 | '["a","b"]' | '[0]' | 'a' | +--------+-----------------------------+--------------+-------------+ evaluation (internal): -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ -| s | NullableColumn { column: StringColumn { data: 0x747275657b226b223a317d5b2261222c2262225d, offsets: [0, 4, 11, 11, 20] }, validity: [0b____1011] } | -| k | StringColumn { data: 0x5b305d5b226b225d5b305d, offsets: [0, 3, 8, 8, 11] } | -| Output | NullableColumn { column: StringColumn { data: 0x3161, offsets: [0, 0, 1, 1, 2] }, validity: [0b____1010] } | -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+---------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+---------------------------------------------------------------------------------------------------------------------+ +| s | NullableColumn { column: StringColumn { data: Utf8ViewArray[true, {"k":1}, , ["a","b"]] }, validity: [0b____1011] } | +| k | StringColumn { data: Utf8ViewArray[[0], ["k"], , [0]] } | +| Output | NullableColumn { column: StringColumn { data: Utf8ViewArray[, 1, , a] }, validity: [0b____1010] } | ++--------+---------------------------------------------------------------------------------------------------------------------+ ast : as_boolean(parse_json('true')) @@ -1073,12 +1073,12 @@ evaluation: | Row 6 | '{"a":"b"}' | NULL | +--------+------------------------------+--------------+ evaluation (internal): -+--------+-------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+-------------------------------------------------------------------------------------------------------------------------------------------+ -| s | StringColumn { data: 0x6e756c6c7472756531323331322e3334226162225b312c322c335d7b2261223a2262227d, offsets: [0, 4, 8, 11, 16, 20, 27, 36] } | -| Output | NullableColumn { column: Boolean([0b_0000010]), validity: [0b_0000010] } | -+--------+-------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+----------------------------------------------------------------------------------------+ +| Column | Data | ++--------+----------------------------------------------------------------------------------------+ +| s | StringColumn { data: Utf8ViewArray[null, true, 123, 12.34, "ab", [1,2,3], {"a":"b"}] } | +| Output | NullableColumn { column: Boolean([0b_0000010]), validity: [0b_0000010] } | ++--------+----------------------------------------------------------------------------------------+ ast : as_integer(parse_json(s)) @@ -1099,12 +1099,12 @@ evaluation: | Row 6 | '{"a":"b"}' | NULL | +--------+------------------------------+------------+ evaluation (internal): -+--------+-------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+-------------------------------------------------------------------------------------------------------------------------------------------+ -| s | StringColumn { data: 0x6e756c6c7472756531323331322e3334226162225b312c322c335d7b2261223a2262227d, offsets: [0, 4, 8, 11, 16, 20, 27, 36] } | -| Output | NullableColumn { column: Int64([0, 0, 123, 0, 0, 0, 0]), validity: [0b_0000100] } | -+--------+-------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+----------------------------------------------------------------------------------------+ +| Column | Data | ++--------+----------------------------------------------------------------------------------------+ +| s | StringColumn { data: Utf8ViewArray[null, true, 123, 12.34, "ab", [1,2,3], {"a":"b"}] } | +| Output | NullableColumn { column: Int64([0, 0, 123, 0, 0, 0, 0]), validity: [0b_0000100] } | ++--------+----------------------------------------------------------------------------------------+ ast : as_float(parse_json(s)) @@ -1125,12 +1125,12 @@ evaluation: | Row 6 | '{"a":"b"}' | NULL | +--------+------------------------------+--------------+ evaluation (internal): -+--------+-------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+-------------------------------------------------------------------------------------------------------------------------------------------+ -| s | StringColumn { data: 0x6e756c6c7472756531323331322e3334226162225b312c322c335d7b2261223a2262227d, offsets: [0, 4, 8, 11, 16, 20, 27, 36] } | -| Output | NullableColumn { column: Float64([0, 0, 123, 12.34, 0, 0, 0]), validity: [0b_0001100] } | -+--------+-------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+-----------------------------------------------------------------------------------------+ +| Column | Data | ++--------+-----------------------------------------------------------------------------------------+ +| s | StringColumn { data: Utf8ViewArray[null, true, 123, 12.34, "ab", [1,2,3], {"a":"b"}] } | +| Output | NullableColumn { column: Float64([0, 0, 123, 12.34, 0, 0, 0]), validity: [0b_0001100] } | ++--------+-----------------------------------------------------------------------------------------+ ast : as_string(parse_json(s)) @@ -1151,12 +1151,12 @@ evaluation: | Row 6 | '{"a":"b"}' | NULL | +--------+------------------------------+-------------+ evaluation (internal): -+--------+-------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+-------------------------------------------------------------------------------------------------------------------------------------------+ -| s | StringColumn { data: 0x6e756c6c7472756531323331322e3334226162225b312c322c335d7b2261223a2262227d, offsets: [0, 4, 8, 11, 16, 20, 27, 36] } | -| Output | NullableColumn { column: StringColumn { data: 0x6162, offsets: [0, 0, 0, 0, 0, 2, 2, 2] }, validity: [0b_0010000] } | -+--------+-------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+---------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+---------------------------------------------------------------------------------------------------------+ +| s | StringColumn { data: Utf8ViewArray[null, true, 123, 12.34, "ab", [1,2,3], {"a":"b"}] } | +| Output | NullableColumn { column: StringColumn { data: Utf8ViewArray[, , , , ab, , ] }, validity: [0b_0010000] } | ++--------+---------------------------------------------------------------------------------------------------------+ ast : as_array(parse_json(s)) @@ -1180,7 +1180,7 @@ evaluation (internal): +--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+ | Column | Data | +--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| s | StringColumn { data: 0x6e756c6c7472756531323331322e3334226162225b312c322c335d7b2261223a2262227d, offsets: [0, 4, 8, 11, 16, 20, 27, 36] } | +| s | StringColumn { data: Utf8ViewArray[null, true, 123, 12.34, "ab", [1,2,3], {"a":"b"}] } | | Output | NullableColumn { column: BinaryColumn { data: 0x80000003200000022000000220000002500150025003, offsets: [0, 0, 0, 0, 0, 0, 22, 22] }, validity: [0b_0100000] } | +--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+ @@ -1206,7 +1206,7 @@ evaluation (internal): +--------+----------------------------------------------------------------------------------------------------------------------------------------------+ | Column | Data | +--------+----------------------------------------------------------------------------------------------------------------------------------------------+ -| s | StringColumn { data: 0x6e756c6c7472756531323331322e3334226162225b312c322c335d7b2261223a2262227d, offsets: [0, 4, 8, 11, 16, 20, 27, 36] } | +| s | StringColumn { data: Utf8ViewArray[null, true, 123, 12.34, "ab", [1,2,3], {"a":"b"}] } | | Output | NullableColumn { column: BinaryColumn { data: 0x4000000110000001100000016162, offsets: [0, 0, 0, 0, 0, 0, 0, 14] }, validity: [0b_1000000] } | +--------+----------------------------------------------------------------------------------------------------------------------------------------------+ @@ -1355,12 +1355,12 @@ evaluation: | Row 6 | '{"a":"b"}' | false | +--------+------------------------------+---------+ evaluation (internal): -+--------+-------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+-------------------------------------------------------------------------------------------------------------------------------------------+ -| s | StringColumn { data: 0x6e756c6c7472756531323331322e3334226162225b312c322c335d7b2261223a2262227d, offsets: [0, 4, 8, 11, 16, 20, 27, 36] } | -| Output | Boolean([0b_0000001]) | -+--------+-------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+----------------------------------------------------------------------------------------+ +| Column | Data | ++--------+----------------------------------------------------------------------------------------+ +| s | StringColumn { data: Utf8ViewArray[null, true, 123, 12.34, "ab", [1,2,3], {"a":"b"}] } | +| Output | Boolean([0b_0000001]) | ++--------+----------------------------------------------------------------------------------------+ ast : is_boolean(parse_json(s)) @@ -1381,12 +1381,12 @@ evaluation: | Row 6 | '{"a":"b"}' | false | +--------+------------------------------+---------+ evaluation (internal): -+--------+-------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+-------------------------------------------------------------------------------------------------------------------------------------------+ -| s | StringColumn { data: 0x6e756c6c7472756531323331322e3334226162225b312c322c335d7b2261223a2262227d, offsets: [0, 4, 8, 11, 16, 20, 27, 36] } | -| Output | Boolean([0b_0000010]) | -+--------+-------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+----------------------------------------------------------------------------------------+ +| Column | Data | ++--------+----------------------------------------------------------------------------------------+ +| s | StringColumn { data: Utf8ViewArray[null, true, 123, 12.34, "ab", [1,2,3], {"a":"b"}] } | +| Output | Boolean([0b_0000010]) | ++--------+----------------------------------------------------------------------------------------+ ast : is_integer(parse_json(s)) @@ -1407,12 +1407,12 @@ evaluation: | Row 6 | '{"a":"b"}' | false | +--------+------------------------------+---------+ evaluation (internal): -+--------+-------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+-------------------------------------------------------------------------------------------------------------------------------------------+ -| s | StringColumn { data: 0x6e756c6c7472756531323331322e3334226162225b312c322c335d7b2261223a2262227d, offsets: [0, 4, 8, 11, 16, 20, 27, 36] } | -| Output | Boolean([0b_0000100]) | -+--------+-------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+----------------------------------------------------------------------------------------+ +| Column | Data | ++--------+----------------------------------------------------------------------------------------+ +| s | StringColumn { data: Utf8ViewArray[null, true, 123, 12.34, "ab", [1,2,3], {"a":"b"}] } | +| Output | Boolean([0b_0000100]) | ++--------+----------------------------------------------------------------------------------------+ ast : is_float(parse_json(s)) @@ -1433,12 +1433,12 @@ evaluation: | Row 6 | '{"a":"b"}' | false | +--------+------------------------------+---------+ evaluation (internal): -+--------+-------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+-------------------------------------------------------------------------------------------------------------------------------------------+ -| s | StringColumn { data: 0x6e756c6c7472756531323331322e3334226162225b312c322c335d7b2261223a2262227d, offsets: [0, 4, 8, 11, 16, 20, 27, 36] } | -| Output | Boolean([0b_0001100]) | -+--------+-------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+----------------------------------------------------------------------------------------+ +| Column | Data | ++--------+----------------------------------------------------------------------------------------+ +| s | StringColumn { data: Utf8ViewArray[null, true, 123, 12.34, "ab", [1,2,3], {"a":"b"}] } | +| Output | Boolean([0b_0001100]) | ++--------+----------------------------------------------------------------------------------------+ ast : is_string(parse_json(s)) @@ -1459,12 +1459,12 @@ evaluation: | Row 6 | '{"a":"b"}' | false | +--------+------------------------------+---------+ evaluation (internal): -+--------+-------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+-------------------------------------------------------------------------------------------------------------------------------------------+ -| s | StringColumn { data: 0x6e756c6c7472756531323331322e3334226162225b312c322c335d7b2261223a2262227d, offsets: [0, 4, 8, 11, 16, 20, 27, 36] } | -| Output | Boolean([0b_0010000]) | -+--------+-------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+----------------------------------------------------------------------------------------+ +| Column | Data | ++--------+----------------------------------------------------------------------------------------+ +| s | StringColumn { data: Utf8ViewArray[null, true, 123, 12.34, "ab", [1,2,3], {"a":"b"}] } | +| Output | Boolean([0b_0010000]) | ++--------+----------------------------------------------------------------------------------------+ ast : is_array(parse_json(s)) @@ -1485,12 +1485,12 @@ evaluation: | Row 6 | '{"a":"b"}' | false | +--------+------------------------------+---------+ evaluation (internal): -+--------+-------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+-------------------------------------------------------------------------------------------------------------------------------------------+ -| s | StringColumn { data: 0x6e756c6c7472756531323331322e3334226162225b312c322c335d7b2261223a2262227d, offsets: [0, 4, 8, 11, 16, 20, 27, 36] } | -| Output | Boolean([0b_0100000]) | -+--------+-------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+----------------------------------------------------------------------------------------+ +| Column | Data | ++--------+----------------------------------------------------------------------------------------+ +| s | StringColumn { data: Utf8ViewArray[null, true, 123, 12.34, "ab", [1,2,3], {"a":"b"}] } | +| Output | Boolean([0b_0100000]) | ++--------+----------------------------------------------------------------------------------------+ ast : is_object(parse_json(s)) @@ -1511,12 +1511,12 @@ evaluation: | Row 6 | '{"a":"b"}' | true | +--------+------------------------------+---------+ evaluation (internal): -+--------+-------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+-------------------------------------------------------------------------------------------------------------------------------------------+ -| s | StringColumn { data: 0x6e756c6c7472756531323331322e3334226162225b312c322c335d7b2261223a2262227d, offsets: [0, 4, 8, 11, 16, 20, 27, 36] } | -| Output | Boolean([0b_1000000]) | -+--------+-------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+----------------------------------------------------------------------------------------+ +| Column | Data | ++--------+----------------------------------------------------------------------------------------+ +| s | StringColumn { data: Utf8ViewArray[null, true, 123, 12.34, "ab", [1,2,3], {"a":"b"}] } | +| Output | Boolean([0b_1000000]) | ++--------+----------------------------------------------------------------------------------------+ ast : to_boolean(parse_json('true')) @@ -1678,12 +1678,12 @@ evaluation: | Row 2 | 'true' | true | +--------+------------------------+--------------+ evaluation (internal): -+--------+---------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+---------------------------------------------------------------------------------------------------------------------+ -| s | NullableColumn { column: StringColumn { data: 0x7472756574727565, offsets: [0, 4, 4, 8] }, validity: [0b_____101] } | -| Output | NullableColumn { column: Boolean([0b_____101]), validity: [0b_____101] } | -+--------+---------------------------------------------------------------------------------------------------------------------+ ++--------+-------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+-------------------------------------------------------------------------------------------------------+ +| s | NullableColumn { column: StringColumn { data: Utf8ViewArray[true, , true] }, validity: [0b_____101] } | +| Output | NullableColumn { column: Boolean([0b_____101]), validity: [0b_____101] } | ++--------+-------------------------------------------------------------------------------------------------------+ ast : to_int64(parse_json(s)) @@ -1700,12 +1700,12 @@ evaluation: | Row 2 | '-10' | -10 | +--------+---------------------+------------+ evaluation (internal): -+--------+-------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+-------------------------------------------------------------------------------------------------------------+ -| s | NullableColumn { column: StringColumn { data: 0x312d3130, offsets: [0, 1, 1, 4] }, validity: [0b_____101] } | -| Output | NullableColumn { column: Int64([1, 0, -10]), validity: [0b_____101] } | -+--------+-------------------------------------------------------------------------------------------------------------+ ++--------+---------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+---------------------------------------------------------------------------------------------------+ +| s | NullableColumn { column: StringColumn { data: Utf8ViewArray[1, , -10] }, validity: [0b_____101] } | +| Output | NullableColumn { column: Int64([1, 0, -10]), validity: [0b_____101] } | ++--------+---------------------------------------------------------------------------------------------------+ ast : to_uint64(parse_json(s)) @@ -1722,12 +1722,12 @@ evaluation: | Row 2 | '20' | 20 | +--------+----------------------+-------------+ evaluation (internal): -+--------+-----------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+-----------------------------------------------------------------------------------------------------------+ -| s | NullableColumn { column: StringColumn { data: 0x313230, offsets: [0, 1, 1, 3] }, validity: [0b_____101] } | -| Output | NullableColumn { column: UInt64([1, 0, 20]), validity: [0b_____101] } | -+--------+-----------------------------------------------------------------------------------------------------------+ ++--------+--------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+--------------------------------------------------------------------------------------------------+ +| s | NullableColumn { column: StringColumn { data: Utf8ViewArray[1, , 20] }, validity: [0b_____101] } | +| Output | NullableColumn { column: UInt64([1, 0, 20]), validity: [0b_____101] } | ++--------+--------------------------------------------------------------------------------------------------+ ast : to_float64(parse_json(s)) @@ -1744,12 +1744,12 @@ evaluation: | Row 2 | '100.2' | 100.2 | +--------+-------------------------+--------------+ evaluation (internal): -+--------+---------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+---------------------------------------------------------------------------------------------------------------------+ -| s | NullableColumn { column: StringColumn { data: 0x312e323130302e32, offsets: [0, 3, 3, 8] }, validity: [0b_____101] } | -| Output | NullableColumn { column: Float64([1.2, 0, 100.2]), validity: [0b_____101] } | -+--------+---------------------------------------------------------------------------------------------------------------------+ ++--------+-------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+-------------------------------------------------------------------------------------------------------+ +| s | NullableColumn { column: StringColumn { data: Utf8ViewArray[1.2, , 100.2] }, validity: [0b_____101] } | +| Output | NullableColumn { column: Float64([1.2, 0, 100.2]), validity: [0b_____101] } | ++--------+-------------------------------------------------------------------------------------------------------+ ast : to_date(parse_json(s)) @@ -1766,12 +1766,12 @@ evaluation: | Row 2 | '"2023-10-01"' | '2023-10-01' | +--------+----------------------------------+--------------+ evaluation (internal): -+--------+--------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+--------------------------------------------------------------------------------------------------------------------------------------------------------+ -| s | NullableColumn { column: StringColumn { data: 0x22323032302d30312d30312222323032332d31302d303122, offsets: [0, 12, 12, 24] }, validity: [0b_____101] } | -| Output | NullableColumn { column: [18262, 0, 19631], validity: [0b_____101] } | -+--------+--------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+-----------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+-----------------------------------------------------------------------------------------------------------------------+ +| s | NullableColumn { column: StringColumn { data: Utf8ViewArray["2020-01-01", , "2023-10-01"] }, validity: [0b_____101] } | +| Output | NullableColumn { column: [18262, 0, 19631], validity: [0b_____101] } | ++--------+-----------------------------------------------------------------------------------------------------------------------+ ast : to_timestamp(parse_json(s)) @@ -1788,12 +1788,12 @@ evaluation: | Row 2 | '"2023-10-01 10:11:12"' | '2023-10-01 10:11:12.000000' | +--------+-------------------------------------------+------------------------------+ evaluation (internal): -+--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| s | NullableColumn { column: StringColumn { data: 0x22323032302d30312d30312030303a30303a30302222323032332d31302d30312031303a31313a313222, offsets: [0, 21, 21, 42] }, validity: [0b_____101] } | -| Output | NullableColumn { column: [1577836800000000, 0, 1696155072000000], validity: [0b_____101] } | -+--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+-----------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+-----------------------------------------------------------------------------------------------------------------------------------------+ +| s | NullableColumn { column: StringColumn { data: Utf8ViewArray["2020-01-01 00:00:00", , "2023-10-01 10:11:12"] }, validity: [0b_____101] } | +| Output | NullableColumn { column: [1577836800000000, 0, 1696155072000000], validity: [0b_____101] } | ++--------+-----------------------------------------------------------------------------------------------------------------------------------------+ ast : to_string(parse_json(s)) @@ -1810,12 +1810,12 @@ evaluation: | Row 2 | '123' | '123' | +--------+-----------------------+-------------+ evaluation (internal): -+--------+---------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+---------------------------------------------------------------------------------------------------------------------+ -| s | NullableColumn { column: StringColumn { data: 0x2261626322313233, offsets: [0, 5, 5, 8] }, validity: [0b_____101] } | -| Output | NullableColumn { column: StringColumn { data: 0x616263313233, offsets: [0, 3, 3, 6] }, validity: [0b_____101] } | -+--------+---------------------------------------------------------------------------------------------------------------------+ ++--------+-------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+-------------------------------------------------------------------------------------------------------+ +| s | NullableColumn { column: StringColumn { data: Utf8ViewArray["abc", , 123] }, validity: [0b_____101] } | +| Output | NullableColumn { column: StringColumn { data: Utf8ViewArray[abc, , 123] }, validity: [0b_____101] } | ++--------+-------------------------------------------------------------------------------------------------------+ ast : try_to_boolean(parse_json('true')) @@ -1990,12 +1990,12 @@ evaluation: | Row 7 | '"abc"' | NULL | +--------+-------------------------+--------------+ evaluation (internal): -+--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| s | NullableColumn { column: StringColumn { data: 0x747275653132332d31303031322e333422323032302d30312d30312222323032312d30312d30312032303a30303a3030222261626322, offsets: [0, 4, 7, 11, 16, 16, 28, 49, 54] }, validity: [0b11101111] } | -| Output | NullableColumn { column: Boolean([0b00000001]), validity: [0b00000001] } | -+--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| s | NullableColumn { column: StringColumn { data: Utf8ViewArray[true, 123, -100, 12.34, , "2020-01-01", "2021-01-01 20:00:00", "abc"] }, validity: [0b11101111] } | +| Output | NullableColumn { column: Boolean([0b00000001]), validity: [0b00000001] } | ++--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+ ast : try_to_int64(parse_json(s)) @@ -2017,12 +2017,12 @@ evaluation: | Row 7 | '"abc"' | NULL | +--------+-------------------------+------------+ evaluation (internal): -+--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| s | NullableColumn { column: StringColumn { data: 0x747275653132332d31303031322e333422323032302d30312d30312222323032312d30312d30312032303a30303a3030222261626322, offsets: [0, 4, 7, 11, 16, 16, 28, 49, 54] }, validity: [0b11101111] } | -| Output | NullableColumn { column: Int64([1, 123, -100, 0, 0, 0, 0, 0]), validity: [0b00000111] } | -+--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| s | NullableColumn { column: StringColumn { data: Utf8ViewArray[true, 123, -100, 12.34, , "2020-01-01", "2021-01-01 20:00:00", "abc"] }, validity: [0b11101111] } | +| Output | NullableColumn { column: Int64([1, 123, -100, 0, 0, 0, 0, 0]), validity: [0b00000111] } | ++--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+ ast : try_to_uint64(parse_json(s)) @@ -2044,12 +2044,12 @@ evaluation: | Row 7 | '"abc"' | NULL | +--------+-------------------------+-------------+ evaluation (internal): -+--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| s | NullableColumn { column: StringColumn { data: 0x747275653132332d31303031322e333422323032302d30312d30312222323032312d30312d30312032303a30303a3030222261626322, offsets: [0, 4, 7, 11, 16, 16, 28, 49, 54] }, validity: [0b11101111] } | -| Output | NullableColumn { column: UInt64([1, 123, 0, 0, 0, 0, 0, 0]), validity: [0b00000011] } | -+--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| s | NullableColumn { column: StringColumn { data: Utf8ViewArray[true, 123, -100, 12.34, , "2020-01-01", "2021-01-01 20:00:00", "abc"] }, validity: [0b11101111] } | +| Output | NullableColumn { column: UInt64([1, 123, 0, 0, 0, 0, 0, 0]), validity: [0b00000011] } | ++--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+ ast : try_to_float64(parse_json(s)) @@ -2071,12 +2071,12 @@ evaluation: | Row 7 | '"abc"' | NULL | +--------+-------------------------+--------------+ evaluation (internal): -+--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| s | NullableColumn { column: StringColumn { data: 0x747275653132332d31303031322e333422323032302d30312d30312222323032312d30312d30312032303a30303a3030222261626322, offsets: [0, 4, 7, 11, 16, 16, 28, 49, 54] }, validity: [0b11101111] } | -| Output | NullableColumn { column: Float64([1, 123, -100, 12.34, 0, 0, 0, 0]), validity: [0b00001111] } | -+--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| s | NullableColumn { column: StringColumn { data: Utf8ViewArray[true, 123, -100, 12.34, , "2020-01-01", "2021-01-01 20:00:00", "abc"] }, validity: [0b11101111] } | +| Output | NullableColumn { column: Float64([1, 123, -100, 12.34, 0, 0, 0, 0]), validity: [0b00001111] } | ++--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+ ast : try_to_date(parse_json(s)) @@ -2098,12 +2098,12 @@ evaluation: | Row 7 | '"abc"' | NULL | +--------+-------------------------+--------------+ evaluation (internal): -+--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| s | NullableColumn { column: StringColumn { data: 0x747275653132332d31303031322e333422323032302d30312d30312222323032312d30312d30312032303a30303a3030222261626322, offsets: [0, 4, 7, 11, 16, 16, 28, 49, 54] }, validity: [0b11101111] } | -| Output | NullableColumn { column: [0, 0, 0, 0, 0, 18262, 18628, 0], validity: [0b01100000] } | -+--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| s | NullableColumn { column: StringColumn { data: Utf8ViewArray[true, 123, -100, 12.34, , "2020-01-01", "2021-01-01 20:00:00", "abc"] }, validity: [0b11101111] } | +| Output | NullableColumn { column: [0, 0, 0, 0, 0, 18262, 18628, 0], validity: [0b01100000] } | ++--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+ ast : try_to_timestamp(parse_json(s)) @@ -2125,12 +2125,12 @@ evaluation: | Row 7 | '"abc"' | NULL | +--------+-------------------------+------------------------------+ evaluation (internal): -+--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| s | NullableColumn { column: StringColumn { data: 0x747275653132332d31303031322e333422323032302d30312d30312222323032312d30312d30312032303a30303a3030222261626322, offsets: [0, 4, 7, 11, 16, 16, 28, 49, 54] }, validity: [0b11101111] } | -| Output | NullableColumn { column: [0, 0, 0, 0, 0, 1577836800000000, 1609531200000000, 0], validity: [0b01100000] } | -+--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| s | NullableColumn { column: StringColumn { data: Utf8ViewArray[true, 123, -100, 12.34, , "2020-01-01", "2021-01-01 20:00:00", "abc"] }, validity: [0b11101111] } | +| Output | NullableColumn { column: [0, 0, 0, 0, 0, 1577836800000000, 1609531200000000, 0], validity: [0b01100000] } | ++--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+ ast : try_to_string(parse_json(s)) @@ -2152,12 +2152,12 @@ evaluation: | Row 7 | '"abc"' | 'abc' | +--------+-------------------------+-----------------------+ evaluation (internal): -+--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| s | NullableColumn { column: StringColumn { data: 0x747275653132332d31303031322e333422323032302d30312d30312222323032312d30312d30312032303a30303a3030222261626322, offsets: [0, 4, 7, 11, 16, 16, 28, 49, 54] }, validity: [0b11101111] } | -| Output | NullableColumn { column: StringColumn { data: 0x747275653132332d31303031322e3334323032302d30312d3031323032312d30312d30312032303a30303a3030616263, offsets: [0, 4, 7, 11, 16, 16, 26, 45, 48] }, validity: [0b11101111] } | -+--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| s | NullableColumn { column: StringColumn { data: Utf8ViewArray[true, 123, -100, 12.34, , "2020-01-01", "2021-01-01 20:00:00", "abc"] }, validity: [0b11101111] } | +| Output | NullableColumn { column: StringColumn { data: Utf8ViewArray[true, 123, -100, 12.34, , 2020-01-01, 2021-01-01 20:00:00, abc] }, validity: [0b11101111] } | ++--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+ ast : json_object() @@ -2229,10 +2229,10 @@ evaluation (internal): +--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | Column | Data | +--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| k1 | NullableColumn { column: StringColumn { data: 0x613162316431, offsets: [0, 2, 4, 4, 6] }, validity: [0b____1011] } | -| v1 | NullableColumn { column: StringColumn { data: 0x6a316b316c31, offsets: [0, 2, 4, 6, 6] }, validity: [0b____0111] } | -| k2 | NullableColumn { column: StringColumn { data: 0x613263326432, offsets: [0, 2, 2, 4, 6] }, validity: [0b____1101] } | -| v2 | NullableColumn { column: StringColumn { data: 0x6a326b326c326d32, offsets: [0, 2, 4, 6, 8] }, validity: [0b____1111] } | +| k1 | NullableColumn { column: StringColumn { data: Utf8ViewArray[a1, b1, , d1] }, validity: [0b____1011] } | +| v1 | NullableColumn { column: StringColumn { data: Utf8ViewArray[j1, k1, l1, ] }, validity: [0b____0111] } | +| k2 | NullableColumn { column: StringColumn { data: Utf8ViewArray[a2, , c2, d2] }, validity: [0b____1101] } | +| v2 | NullableColumn { column: StringColumn { data: Utf8ViewArray[j2, k2, l2, m2] }, validity: [0b____1111] } | | Output | BinaryColumn { data: 0x4000000210000002100000021000000210000002613161326a316a3240000001100000021000000262316b3140000001100000021000000263326c3240000001100000021000000264326d32, offsets: [0, 28, 44, 60, 76] } | +--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ @@ -2309,10 +2309,10 @@ evaluation (internal): +--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | Column | Data | +--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| k1 | NullableColumn { column: StringColumn { data: 0x613162316431, offsets: [0, 2, 4, 4, 6] }, validity: [0b____1011] } | -| v1 | NullableColumn { column: StringColumn { data: 0x6a316b316c31, offsets: [0, 2, 4, 6, 6] }, validity: [0b____0111] } | -| k2 | NullableColumn { column: StringColumn { data: 0x613263326432, offsets: [0, 2, 2, 4, 6] }, validity: [0b____1101] } | -| v2 | NullableColumn { column: StringColumn { data: 0x6a326b326c326d32, offsets: [0, 2, 4, 6, 8] }, validity: [0b____1111] } | +| k1 | NullableColumn { column: StringColumn { data: Utf8ViewArray[a1, b1, , d1] }, validity: [0b____1011] } | +| v1 | NullableColumn { column: StringColumn { data: Utf8ViewArray[j1, k1, l1, ] }, validity: [0b____0111] } | +| k2 | NullableColumn { column: StringColumn { data: Utf8ViewArray[a2, , c2, d2] }, validity: [0b____1101] } | +| v2 | NullableColumn { column: StringColumn { data: Utf8ViewArray[j2, k2, l2, m2] }, validity: [0b____1111] } | | Output | NullableColumn { column: BinaryColumn { data: 0x4000000210000002100000021000000210000002613161326a316a3240000001100000021000000262316b3140000001100000021000000263326c3240000001100000021000000264326d32, offsets: [0, 28, 44, 60, 76] }, validity: [0b____1111] } | +--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ @@ -2386,10 +2386,10 @@ evaluation (internal): +--------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | Column | Data | +--------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| k1 | NullableColumn { column: StringColumn { data: 0x613162316431, offsets: [0, 2, 4, 4, 6] }, validity: [0b____1011] } | -| v1 | NullableColumn { column: StringColumn { data: 0x6a316b316c31, offsets: [0, 2, 4, 6, 6] }, validity: [0b____0111] } | -| k2 | NullableColumn { column: StringColumn { data: 0x613263326432, offsets: [0, 2, 2, 4, 6] }, validity: [0b____1101] } | -| v2 | NullableColumn { column: StringColumn { data: 0x6a326b326c326d32, offsets: [0, 2, 4, 6, 8] }, validity: [0b____1111] } | +| k1 | NullableColumn { column: StringColumn { data: Utf8ViewArray[a1, b1, , d1] }, validity: [0b____1011] } | +| v1 | NullableColumn { column: StringColumn { data: Utf8ViewArray[j1, k1, l1, ] }, validity: [0b____0111] } | +| k2 | NullableColumn { column: StringColumn { data: Utf8ViewArray[a2, , c2, d2] }, validity: [0b____1101] } | +| v2 | NullableColumn { column: StringColumn { data: Utf8ViewArray[j2, k2, l2, m2] }, validity: [0b____1111] } | | Output | BinaryColumn { data: 0x4000000210000002100000021000000210000002613161326a316a3240000001100000021000000262316b3140000001100000021000000263326c324000000210000002100000020000000010000002643164326d32, offsets: [0, 28, 44, 60, 86] } | +--------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ @@ -2466,10 +2466,10 @@ evaluation (internal): +--------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | Column | Data | +--------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| k1 | NullableColumn { column: StringColumn { data: 0x613162316431, offsets: [0, 2, 4, 4, 6] }, validity: [0b____1011] } | -| v1 | NullableColumn { column: StringColumn { data: 0x6a316b316c31, offsets: [0, 2, 4, 6, 6] }, validity: [0b____0111] } | -| k2 | NullableColumn { column: StringColumn { data: 0x613263326432, offsets: [0, 2, 2, 4, 6] }, validity: [0b____1101] } | -| v2 | NullableColumn { column: StringColumn { data: 0x6a326b326c326d32, offsets: [0, 2, 4, 6, 8] }, validity: [0b____1111] } | +| k1 | NullableColumn { column: StringColumn { data: Utf8ViewArray[a1, b1, , d1] }, validity: [0b____1011] } | +| v1 | NullableColumn { column: StringColumn { data: Utf8ViewArray[j1, k1, l1, ] }, validity: [0b____0111] } | +| k2 | NullableColumn { column: StringColumn { data: Utf8ViewArray[a2, , c2, d2] }, validity: [0b____1101] } | +| v2 | NullableColumn { column: StringColumn { data: Utf8ViewArray[j2, k2, l2, m2] }, validity: [0b____1111] } | | Output | NullableColumn { column: BinaryColumn { data: 0x4000000210000002100000021000000210000002613161326a316a3240000001100000021000000262316b3140000001100000021000000263326c324000000210000002100000020000000010000002643164326d32, offsets: [0, 28, 44, 60, 86] }, validity: [0b____1111] } | +--------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ @@ -2546,8 +2546,8 @@ evaluation (internal): +--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | Column | Data | +--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| s | NullableColumn { column: StringColumn { data: 0x747275655b7b226b223a317d2c7b226b223a327d5d5b312c322c332c345d, offsets: [0, 4, 21, 21, 30] }, validity: [0b____1011] } | -| p | StringColumn { data: 0x245b305d245b2a5d2e6b242e61245b302c325d, offsets: [0, 4, 10, 13, 19] } | +| s | NullableColumn { column: StringColumn { data: Utf8ViewArray[true, [{"k":1},{"k":2}], , [1,2,3,4]] }, validity: [0b____1011] } | +| p | StringColumn { data: Utf8ViewArray[$[0], $[*].k, $.a, $[0,2]] } | | Output | NullableColumn { column: BinaryColumn { data: 0x800000008000000220000002200000025001500280000002200000022000000250015003, offsets: [0, 4, 20, 20, 36] }, validity: [0b____1011] } | +--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ @@ -2621,13 +2621,13 @@ evaluation: | Row 3 | '[1,2,3,4]' | '$[0,2]' | '1' | +--------+------------------------+------------------+--------------+ evaluation (internal): -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| s | NullableColumn { column: StringColumn { data: 0x747275655b7b226b223a317d2c7b226b223a327d5d5b312c322c332c345d, offsets: [0, 4, 21, 21, 30] }, validity: [0b____1011] } | -| p | StringColumn { data: 0x245b305d245b2a5d2e6b242e61245b302c325d, offsets: [0, 4, 10, 13, 19] } | -| Output | NullableColumn { column: BinaryColumn { data: 0x2000000020000002500120000000200000025001, offsets: [0, 0, 10, 10, 20] }, validity: [0b____1010] } | -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ +| s | NullableColumn { column: StringColumn { data: Utf8ViewArray[true, [{"k":1},{"k":2}], , [1,2,3,4]] }, validity: [0b____1011] } | +| p | StringColumn { data: Utf8ViewArray[$[0], $[*].k, $.a, $[0,2]] } | +| Output | NullableColumn { column: BinaryColumn { data: 0x2000000020000002500120000000200000025001, offsets: [0, 0, 10, 10, 20] }, validity: [0b____1010] } | ++--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ ast : json_to_string(parse_json('true')) @@ -2894,9 +2894,9 @@ evaluation (internal): +--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | Column | Data | +--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| v1 | NullableColumn { column: StringColumn { data: 0x613162316431, offsets: [0, 2, 4, 4, 6] }, validity: [0b____1011] } | -| v2 | NullableColumn { column: StringColumn { data: 0x6a316b316c31, offsets: [0, 2, 4, 6, 6] }, validity: [0b____0111] } | -| v3 | NullableColumn { column: StringColumn { data: 0x613263326432, offsets: [0, 2, 2, 4, 6] }, validity: [0b____1101] } | +| v1 | NullableColumn { column: StringColumn { data: Utf8ViewArray[a1, b1, , d1] }, validity: [0b____1011] } | +| v2 | NullableColumn { column: StringColumn { data: Utf8ViewArray[j1, k1, l1, ] }, validity: [0b____0111] } | +| v3 | NullableColumn { column: StringColumn { data: Utf8ViewArray[a2, , c2, d2] }, validity: [0b____1101] } | | Output | BinaryColumn { data: 0x8000000310000002100000021000000261316a3161328000000310000002100000020000000062316b31800000030000000010000002100000026c3163328000000310000002000000001000000264316432, offsets: [0, 22, 42, 62, 82] } | +--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ @@ -3062,7 +3062,7 @@ evaluation (internal): +--------+---------------------------------------------------------------------------------------------------------------------------------------------+ | Column | Data | +--------+---------------------------------------------------------------------------------------------------------------------------------------------+ -| s | StringColumn { data: 0x747275655b312c322c332c345d5b2261222c2262222c2263225d, offsets: [0, 4, 13, 26] } | +| s | StringColumn { data: Utf8ViewArray[true, [1,2,3,4], ["a","b","c"]] } | | i | UInt64([0, 0, 1]) | | Output | NullableColumn { column: BinaryColumn { data: 0x20000000200000025001200000001000000162, offsets: [0, 0, 10, 19] }, validity: [0b_____110] } | +--------+---------------------------------------------------------------------------------------------------------------------------------------------+ @@ -3083,13 +3083,13 @@ evaluation: | Row 3 | '["a","b","c"]' | 1 | '"b"' | +--------+------------------------+------------------+--------------+ evaluation (internal): -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| s | NullableColumn { column: StringColumn { data: 0x747275655b312c322c332c345d5b2261222c2262222c2263225d, offsets: [0, 4, 13, 13, 26] }, validity: [0b____1011] } | -| i | NullableColumn { column: UInt64([0, 2, 0, 1]), validity: [0b____1010] } | -| Output | NullableColumn { column: BinaryColumn { data: 0x20000000200000025003200000001000000162, offsets: [0, 0, 10, 10, 19] }, validity: [0b____1010] } | -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+-------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+-------------------------------------------------------------------------------------------------------------------------------------------------+ +| s | NullableColumn { column: StringColumn { data: Utf8ViewArray[true, [1,2,3,4], , ["a","b","c"]] }, validity: [0b____1011] } | +| i | NullableColumn { column: UInt64([0, 2, 0, 1]), validity: [0b____1010] } | +| Output | NullableColumn { column: BinaryColumn { data: 0x20000000200000025003200000001000000162, offsets: [0, 0, 10, 10, 19] }, validity: [0b____1010] } | ++--------+-------------------------------------------------------------------------------------------------------------------------------------------------+ ast : parse_json(s)->k @@ -3109,8 +3109,8 @@ evaluation (internal): +--------+---------------------------------------------------------------------------------------------------------------------------+ | Column | Data | +--------+---------------------------------------------------------------------------------------------------------------------------+ -| s | StringColumn { data: 0x747275657b226b223a317d7b2261223a2262227d, offsets: [0, 4, 11, 20] } | -| k | StringColumn { data: 0x6b6b78, offsets: [0, 1, 2, 3] } | +| s | StringColumn { data: Utf8ViewArray[true, {"k":1}, {"a":"b"}] } | +| k | StringColumn { data: Utf8ViewArray[k, k, x] } | | Output | NullableColumn { column: BinaryColumn { data: 0x20000000200000025001, offsets: [0, 0, 10, 10] }, validity: [0b_____010] } | +--------+---------------------------------------------------------------------------------------------------------------------------+ @@ -3130,13 +3130,13 @@ evaluation: | Row 3 | '{"a":"b"}' | 'a' | '"b"' | +--------+-----------------------------+------------+--------------+ evaluation (internal): -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ -| s | NullableColumn { column: StringColumn { data: 0x747275657b226b223a317d7b2261223a2262227d, offsets: [0, 4, 11, 11, 20] }, validity: [0b____1011] } | -| k | StringColumn { data: 0x6b61, offsets: [0, 0, 1, 1, 2] } | -| Output | NullableColumn { column: BinaryColumn { data: 0x20000000200000025001200000001000000162, offsets: [0, 0, 10, 10, 19] }, validity: [0b____1010] } | -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+-------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+-------------------------------------------------------------------------------------------------------------------------------------------------+ +| s | NullableColumn { column: StringColumn { data: Utf8ViewArray[true, {"k":1}, , {"a":"b"}] }, validity: [0b____1011] } | +| k | StringColumn { data: Utf8ViewArray[, k, , a] } | +| Output | NullableColumn { column: BinaryColumn { data: 0x20000000200000025001200000001000000162, offsets: [0, 0, 10, 10, 19] }, validity: [0b____1010] } | ++--------+-------------------------------------------------------------------------------------------------------------------------------------------------+ ast : parse_json('null')->>1 @@ -3216,13 +3216,13 @@ evaluation: | Row 2 | '["a","b","c"]' | 1 | 'b' | +--------+----------------------------------+---------+-------------+ evaluation (internal): -+--------+---------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+---------------------------------------------------------------------------------------------------------+ -| s | StringColumn { data: 0x747275655b312c322c332c345d5b2261222c2262222c2263225d, offsets: [0, 4, 13, 26] } | -| i | UInt64([0, 0, 1]) | -| Output | NullableColumn { column: StringColumn { data: 0x3162, offsets: [0, 0, 1, 2] }, validity: [0b_____110] } | -+--------+---------------------------------------------------------------------------------------------------------+ ++--------+-------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+-------------------------------------------------------------------------------------------------+ +| s | StringColumn { data: Utf8ViewArray[true, [1,2,3,4], ["a","b","c"]] } | +| i | UInt64([0, 0, 1]) | +| Output | NullableColumn { column: StringColumn { data: Utf8ViewArray[, 1, b] }, validity: [0b_____110] } | ++--------+-------------------------------------------------------------------------------------------------+ ast : parse_json(s)->>i @@ -3240,13 +3240,13 @@ evaluation: | Row 3 | '["a","b","c"]' | 1 | 'b' | +--------+------------------------+------------------+-------------+ evaluation (internal): -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| s | NullableColumn { column: StringColumn { data: 0x747275655b312c322c332c345d5b2261222c2262222c2263225d, offsets: [0, 4, 13, 13, 26] }, validity: [0b____1011] } | -| i | NullableColumn { column: UInt64([0, 2, 0, 1]), validity: [0b____1010] } | -| Output | NullableColumn { column: StringColumn { data: 0x3362, offsets: [0, 0, 1, 1, 2] }, validity: [0b____1010] } | -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+---------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+---------------------------------------------------------------------------------------------------------------------------+ +| s | NullableColumn { column: StringColumn { data: Utf8ViewArray[true, [1,2,3,4], , ["a","b","c"]] }, validity: [0b____1011] } | +| i | NullableColumn { column: UInt64([0, 2, 0, 1]), validity: [0b____1010] } | +| Output | NullableColumn { column: StringColumn { data: Utf8ViewArray[, 3, , b] }, validity: [0b____1010] } | ++--------+---------------------------------------------------------------------------------------------------------------------------+ ast : parse_json(s)->>k @@ -3263,13 +3263,13 @@ evaluation: | Row 2 | '{"a":"b"}' | 'x' | NULL | +--------+------------------------+-------------+-------------+ evaluation (internal): -+--------+-------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+-------------------------------------------------------------------------------------------------------+ -| s | StringColumn { data: 0x747275657b226b223a317d7b2261223a2262227d, offsets: [0, 4, 11, 20] } | -| k | StringColumn { data: 0x6b6b78, offsets: [0, 1, 2, 3] } | -| Output | NullableColumn { column: StringColumn { data: 0x31, offsets: [0, 0, 1, 1] }, validity: [0b_____010] } | -+--------+-------------------------------------------------------------------------------------------------------+ ++--------+------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+------------------------------------------------------------------------------------------------+ +| s | StringColumn { data: Utf8ViewArray[true, {"k":1}, {"a":"b"}] } | +| k | StringColumn { data: Utf8ViewArray[k, k, x] } | +| Output | NullableColumn { column: StringColumn { data: Utf8ViewArray[, 1, ] }, validity: [0b_____010] } | ++--------+------------------------------------------------------------------------------------------------+ ast : parse_json(s)->>k @@ -3287,13 +3287,13 @@ evaluation: | Row 3 | '{"a":"b"}' | 'a' | 'b' | +--------+-----------------------------+------------+-------------+ evaluation (internal): -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ -| s | NullableColumn { column: StringColumn { data: 0x747275657b226b223a317d7b2261223a2262227d, offsets: [0, 4, 11, 11, 20] }, validity: [0b____1011] } | -| k | StringColumn { data: 0x6b61, offsets: [0, 0, 1, 1, 2] } | -| Output | NullableColumn { column: StringColumn { data: 0x3162, offsets: [0, 0, 1, 1, 2] }, validity: [0b____1010] } | -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+---------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+---------------------------------------------------------------------------------------------------------------------+ +| s | NullableColumn { column: StringColumn { data: Utf8ViewArray[true, {"k":1}, , {"a":"b"}] }, validity: [0b____1011] } | +| k | StringColumn { data: Utf8ViewArray[, k, , a] } | +| Output | NullableColumn { column: StringColumn { data: Utf8ViewArray[, 1, , b] }, validity: [0b____1010] } | ++--------+---------------------------------------------------------------------------------------------------------------------+ error: @@ -3427,12 +3427,12 @@ evaluation: | Row 3 | '{"a":"b"}' | NULL | +--------+-----------------------------+--------------+ evaluation (internal): -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------------+ -| s | NullableColumn { column: StringColumn { data: 0x5b312c322c335d7b226b223a317d7b2261223a2262227d, offsets: [0, 7, 14, 14, 23] }, validity: [0b____1011] } | -| Output | NullableColumn { column: BinaryColumn { data: 0x20000000200000025001, offsets: [0, 10, 10, 10, 10] }, validity: [0b____0001] } | -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+--------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+--------------------------------------------------------------------------------------------------------------------------------+ +| s | NullableColumn { column: StringColumn { data: Utf8ViewArray[[1,2,3], {"k":1}, , {"a":"b"}] }, validity: [0b____1011] } | +| Output | NullableColumn { column: BinaryColumn { data: 0x20000000200000025001, offsets: [0, 10, 10, 10, 10] }, validity: [0b____0001] } | ++--------+--------------------------------------------------------------------------------------------------------------------------------+ ast : parse_json(s) #> k @@ -3450,13 +3450,13 @@ evaluation: | Row 3 | '{"a":"b"}' | '{a}' | '"b"' | +--------+-----------------------------+-----------------------+--------------+ evaluation (internal): -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ -| s | NullableColumn { column: StringColumn { data: 0x747275657b226b223a317d7b2261223a2262227d, offsets: [0, 4, 11, 11, 20] }, validity: [0b____1011] } | -| k | NullableColumn { column: StringColumn { data: 0x7b317d7b6b7d7b617d, offsets: [0, 3, 6, 6, 9] }, validity: [0b____1011] } | -| Output | NullableColumn { column: BinaryColumn { data: 0x20000000200000025001200000001000000162, offsets: [0, 0, 10, 10, 19] }, validity: [0b____1010] } | -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+-------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+-------------------------------------------------------------------------------------------------------------------------------------------------+ +| s | NullableColumn { column: StringColumn { data: Utf8ViewArray[true, {"k":1}, , {"a":"b"}] }, validity: [0b____1011] } | +| k | NullableColumn { column: StringColumn { data: Utf8ViewArray[{1}, {k}, , {a}] }, validity: [0b____1011] } | +| Output | NullableColumn { column: BinaryColumn { data: 0x20000000200000025001200000001000000162, offsets: [0, 0, 10, 10, 19] }, validity: [0b____1010] } | ++--------+-------------------------------------------------------------------------------------------------------------------------------------------------+ ast : NULL #>> '{0}' @@ -3573,12 +3573,12 @@ evaluation: | Row 3 | '{"a":"b"}' | NULL | +--------+-----------------------------+-------------+ evaluation (internal): -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------------+ -| s | NullableColumn { column: StringColumn { data: 0x5b312c322c335d7b226b223a317d7b2261223a2262227d, offsets: [0, 7, 14, 14, 23] }, validity: [0b____1011] } | -| Output | NullableColumn { column: StringColumn { data: 0x31, offsets: [0, 1, 1, 1, 1] }, validity: [0b____0001] } | -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+------------------------------------------------------------------------------------------------------------------------+ +| s | NullableColumn { column: StringColumn { data: Utf8ViewArray[[1,2,3], {"k":1}, , {"a":"b"}] }, validity: [0b____1011] } | +| Output | NullableColumn { column: StringColumn { data: Utf8ViewArray[1, , , ] }, validity: [0b____0001] } | ++--------+------------------------------------------------------------------------------------------------------------------------+ ast : parse_json(s) #>> k @@ -3596,13 +3596,13 @@ evaluation: | Row 3 | '{"a":"b"}' | '{a}' | 'b' | +--------+-----------------------------+-----------------------+-------------+ evaluation (internal): -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ -| s | NullableColumn { column: StringColumn { data: 0x747275657b226b223a317d7b2261223a2262227d, offsets: [0, 4, 11, 11, 20] }, validity: [0b____1011] } | -| k | NullableColumn { column: StringColumn { data: 0x7b317d7b6b7d7b617d, offsets: [0, 3, 6, 6, 9] }, validity: [0b____1011] } | -| Output | NullableColumn { column: StringColumn { data: 0x3162, offsets: [0, 0, 1, 1, 2] }, validity: [0b____1010] } | -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+---------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+---------------------------------------------------------------------------------------------------------------------+ +| s | NullableColumn { column: StringColumn { data: Utf8ViewArray[true, {"k":1}, , {"a":"b"}] }, validity: [0b____1011] } | +| k | NullableColumn { column: StringColumn { data: Utf8ViewArray[{1}, {k}, , {a}] }, validity: [0b____1011] } | +| Output | NullableColumn { column: StringColumn { data: Utf8ViewArray[, 1, , b] }, validity: [0b____1010] } | ++--------+---------------------------------------------------------------------------------------------------------------------+ ast : parse_json('["1","2","3"]') ? NULL @@ -3675,12 +3675,12 @@ evaluation: | Row 3 | '{"b":1}' | false | +--------+-----------------------------+--------------+ evaluation (internal): -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------+ -| s | NullableColumn { column: StringColumn { data: 0x5b312c322c335d7b2261223a317d7b2262223a317d, offsets: [0, 7, 14, 14, 21] }, validity: [0b____1011] } | -| Output | NullableColumn { column: Boolean([0b____0010]), validity: [0b____1011] } | -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+----------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+----------------------------------------------------------------------------------------------------------------------+ +| s | NullableColumn { column: StringColumn { data: Utf8ViewArray[[1,2,3], {"a":1}, , {"b":1}] }, validity: [0b____1011] } | +| Output | NullableColumn { column: Boolean([0b____0010]), validity: [0b____1011] } | ++--------+----------------------------------------------------------------------------------------------------------------------+ ast : parse_json('["1","2","3"]') ?| NULL @@ -3753,12 +3753,12 @@ evaluation: | Row 3 | '{"c":1}' | false | +--------+-----------------------------+--------------+ evaluation (internal): -+--------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| s | NullableColumn { column: StringColumn { data: 0x5b2261222c2265222c2264225d7b2261223a312c2262223a327d7b2263223a317d, offsets: [0, 13, 26, 26, 33] }, validity: [0b____1011] } | -| Output | NullableColumn { column: Boolean([0b____0011]), validity: [0b____1011] } | -+--------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+----------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+----------------------------------------------------------------------------------------------------------------------------------+ +| s | NullableColumn { column: StringColumn { data: Utf8ViewArray[["a","e","d"], {"a":1,"b":2}, , {"c":1}] }, validity: [0b____1011] } | +| Output | NullableColumn { column: Boolean([0b____0011]), validity: [0b____1011] } | ++--------+----------------------------------------------------------------------------------------------------------------------------------+ ast : parse_json('["1","2","3"]') ?& NULL @@ -3831,12 +3831,12 @@ evaluation: | Row 3 | '{"a":0,"c":1}' | false | +--------+-------------------------------------+--------------+ evaluation (internal): -+--------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| s | NullableColumn { column: StringColumn { data: 0x5b2261222c2265222c2262225d7b2261223a312c2262223a327d7b2261223a302c2263223a317d, offsets: [0, 13, 26, 26, 39] }, validity: [0b____1011] } | -| Output | NullableColumn { column: Boolean([0b____0011]), validity: [0b____1011] } | -+--------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+----------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+----------------------------------------------------------------------------------------------------------------------------------------+ +| s | NullableColumn { column: StringColumn { data: Utf8ViewArray[["a","e","b"], {"a":1,"b":2}, , {"a":0,"c":1}] }, validity: [0b____1011] } | +| Output | NullableColumn { column: Boolean([0b____0011]), validity: [0b____1011] } | ++--------+----------------------------------------------------------------------------------------------------------------------------------------+ ast : NULL @> NULL @@ -4295,13 +4295,13 @@ evaluation: | Row 3 | '[1,2,3,4]' | '$[*] > 2' | true | +--------+------------------------+-----------------------------+--------------+ evaluation (internal): -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| s | NullableColumn { column: StringColumn { data: 0x747275655b7b226b223a317d2c7b226b223a327d5d5b312c322c332c345d, offsets: [0, 4, 21, 21, 30] }, validity: [0b____1011] } | -| p | StringColumn { data: 0x242e61203e2030245b2a5d2e6b203d3d2031245b2a5d203e2031245b2a5d203e2032, offsets: [0, 7, 18, 26, 34] } | -| Output | NullableColumn { column: Boolean([0b____1010]), validity: [0b____1011] } | -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+-------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+-------------------------------------------------------------------------------------------------------------------------------+ +| s | NullableColumn { column: StringColumn { data: Utf8ViewArray[true, [{"k":1},{"k":2}], , [1,2,3,4]] }, validity: [0b____1011] } | +| p | StringColumn { data: Utf8ViewArray[$.a > 0, $[*].k == 1, $[*] > 1, $[*] > 2] } | +| Output | NullableColumn { column: Boolean([0b____1010]), validity: [0b____1011] } | ++--------+-------------------------------------------------------------------------------------------------------------------------------+ ast : parse_json('{"a":1,"b":2}') @@ '$.a == 1' @@ -4400,13 +4400,13 @@ evaluation: | Row 3 | '[1,2,3,4]' | '$[*] > 2' | true | +--------+------------------------+-----------------------------+--------------+ evaluation (internal): -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| s | NullableColumn { column: StringColumn { data: 0x747275655b7b226b223a317d2c7b226b223a327d5d5b312c322c332c345d, offsets: [0, 4, 21, 21, 30] }, validity: [0b____1011] } | -| p | StringColumn { data: 0x242e61203e2030245b2a5d2e6b203d3d2031245b2a5d203e2031245b2a5d203e2032, offsets: [0, 7, 18, 26, 34] } | -| Output | NullableColumn { column: Boolean([0b____1010]), validity: [0b____1011] } | -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+-------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+-------------------------------------------------------------------------------------------------------------------------------+ +| s | NullableColumn { column: StringColumn { data: Utf8ViewArray[true, [{"k":1},{"k":2}], , [1,2,3,4]] }, validity: [0b____1011] } | +| p | StringColumn { data: Utf8ViewArray[$.a > 0, $[*].k == 1, $[*] > 1, $[*] > 2] } | +| Output | NullableColumn { column: Boolean([0b____1010]), validity: [0b____1011] } | ++--------+-------------------------------------------------------------------------------------------------------------------------------+ ast : NULL @? '$.a' @@ -4866,7 +4866,7 @@ evaluation (internal): +--------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | Column | Data | +--------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| s | NullableColumn { column: StringColumn { data: 0x5b7b2261223a317d2c322c335d5b312c322c335d7b2261223a2262227d, offsets: [0, 13, 20, 20, 29] }, validity: [0b____1011] } | +| s | NullableColumn { column: StringColumn { data: Utf8ViewArray[[{"a":1},2,3], [1,2,3], , {"a":"b"}] }, validity: [0b____1011] } | | Output | NullableColumn { column: BinaryColumn { data: 0x800000035000000420000002200000024000000050025003800000032000000220000002200000025001500250034000000110000001100000016162, offsets: [0, 24, 46, 46, 60] }, validity: [0b____1011] } | +--------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ @@ -4889,8 +4889,8 @@ evaluation (internal): +--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | Column | Data | +--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| s | NullableColumn { column: StringColumn { data: 0x5b312c7b2261223a327d2c335d7b226b223a5b312c322c335d7d7b2261223a2262227d, offsets: [0, 13, 26, 26, 35] }, validity: [0b____1011] } | -| k | NullableColumn { column: StringColumn { data: 0x7b312c617d7b6b2c2d317d7b6b7d7b637d, offsets: [0, 5, 11, 14, 17] }, validity: [0b____1011] } | +| s | NullableColumn { column: StringColumn { data: Utf8ViewArray[[1,{"a":2},3], {"k":[1,2,3]}, , {"a":"b"}] }, validity: [0b____1011] } | +| k | NullableColumn { column: StringColumn { data: Utf8ViewArray[{1,a}, {k,-1}, {k}, {c}] }, validity: [0b____1011] } | | Output | NullableColumn { column: BinaryColumn { data: 0x8000000320000002500000042000000250014000000050034000000110000001500000106b800000022000000220000002500150024000000110000001100000016162, offsets: [0, 24, 53, 53, 67] }, validity: [0b____1011] } | +--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ @@ -4968,8 +4968,8 @@ evaluation (internal): +--------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | Column | Data | +--------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| v | NullableColumn { column: StringColumn { data: 0x5b312c322c332c6e756c6c5d5b2241222c2242225d7b2261223a2262227d, offsets: [0, 12, 21, 21, 30] }, validity: [0b____1011] } | -| n | NullableColumn { column: StringColumn { data: 0x22686922747275655b312c322c335d, offsets: [0, 4, 4, 8, 15] }, validity: [0b____1101] } | +| v | NullableColumn { column: StringColumn { data: Utf8ViewArray[[1,2,3,null], ["A","B"], , {"a":"b"}] }, validity: [0b____1011] } | +| n | NullableColumn { column: StringColumn { data: Utf8ViewArray["hi", , true, [1,2,3]] }, validity: [0b____1101] } | | Output | NullableColumn { column: BinaryColumn { data: 0x8000000520000002200000021000000220000002000000005001500268695003800000025000000e50000016400000011000000110000001616280000003200000022000000220000002500150025003, offsets: [0, 32, 32, 32, 80] }, validity: [0b____1001] } | +--------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ @@ -5055,7 +5055,7 @@ evaluation (internal): +--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | Column | Data | +--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| v | NullableColumn { column: StringColumn { data: 0x5b312c312c322c332c332c6e756c6c2c322c312c6e756c6c5d5b2241222c2242222c2241222c2242222c2243225d7b2261223a2262227d, offsets: [0, 25, 46, 46, 55] }, validity: [0b____1011] } | +| v | NullableColumn { column: StringColumn { data: Utf8ViewArray[[1,1,2,3,3,null,2,1,null], ["A","B","A","B","C"], , {"a":"b"}] }, validity: [0b____1011] } | | Output | NullableColumn { column: BinaryColumn { data: 0x800000042000000220000002200000020000000050015002500380000003100000011000000110000001414243800000015000000e4000000110000001100000016162, offsets: [0, 26, 45, 45, 67] }, validity: [0b____1011] } | +--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ @@ -5168,8 +5168,8 @@ evaluation (internal): +--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | Column | Data | +--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| v1 | NullableColumn { column: StringColumn { data: 0x5b312c322c332c332c6e756c6c2c6e756c6c5d5b2241222c2242222c2241222c2242222c2243225d7b2261223a2262227d, offsets: [0, 19, 40, 40, 49] }, validity: [0b____1011] } | -| v2 | NullableColumn { column: StringColumn { data: 0x5b312c312c322c332c342c352c6e756c6c5d5b2258222c2259222c225a225d7b2261223a2262227d, offsets: [0, 18, 31, 31, 40] }, validity: [0b____1011] } | +| v1 | NullableColumn { column: StringColumn { data: Utf8ViewArray[[1,2,3,3,null,null], ["A","B","A","B","C"], , {"a":"b"}] }, validity: [0b____1011] } | +| v2 | NullableColumn { column: StringColumn { data: Utf8ViewArray[[1,1,2,3,4,5,null], ["X","Y","Z"], , {"a":"b"}] }, validity: [0b____1011] } | | Output | NullableColumn { column: BinaryColumn { data: 0x800000042000000220000002200000020000000050015002500380000000800000015000000e4000000110000001100000016162, offsets: [0, 26, 30, 30, 52] }, validity: [0b____1011] } | +--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ @@ -5279,13 +5279,13 @@ evaluation: | Row 3 | '{"a":"b"}' | '{"a":"b"}' | '[]' | +--------+---------------------------------+---------------------------------+-------------------------+ evaluation (internal): -+--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| v1 | NullableColumn { column: StringColumn { data: 0x5b312c322c332c332c6e756c6c2c6e756c6c5d5b2241222c2242222c2241222c2242222c2243225d7b2261223a2262227d, offsets: [0, 19, 40, 40, 49] }, validity: [0b____1011] } | -| v2 | NullableColumn { column: StringColumn { data: 0x5b312c312c322c332c342c352c6e756c6c5d5b2258222c2259222c225a225d7b2261223a2262227d, offsets: [0, 18, 31, 31, 40] }, validity: [0b____1011] } | -| Output | NullableColumn { column: BinaryColumn { data: 0x8000000220000002000000005003800000051000000110000001100000011000000110000001414241424380000000, offsets: [0, 14, 43, 43, 47] }, validity: [0b____1011] } | -+--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| v1 | NullableColumn { column: StringColumn { data: Utf8ViewArray[[1,2,3,3,null,null], ["A","B","A","B","C"], , {"a":"b"}] }, validity: [0b____1011] } | +| v2 | NullableColumn { column: StringColumn { data: Utf8ViewArray[[1,1,2,3,4,5,null], ["X","Y","Z"], , {"a":"b"}] }, validity: [0b____1011] } | +| Output | NullableColumn { column: BinaryColumn { data: 0x8000000220000002000000005003800000051000000110000001100000011000000110000001414241424380000000, offsets: [0, 14, 43, 43, 47] }, validity: [0b____1011] } | ++--------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ast : json_array_overlap('["A","B","C"]'::variant, '["B","C"]'::variant) @@ -5393,13 +5393,13 @@ evaluation: | Row 3 | '{"a":"b"}' | '{"a":"b"}' | true | +--------+---------------------------------+---------------------------------+--------------+ evaluation (internal): -+--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| v1 | NullableColumn { column: StringColumn { data: 0x5b312c322c332c332c6e756c6c2c6e756c6c5d5b2241222c2242222c2241222c2242222c2243225d7b2261223a2262227d, offsets: [0, 19, 40, 40, 49] }, validity: [0b____1011] } | -| v2 | NullableColumn { column: StringColumn { data: 0x5b312c312c322c332c342c352c6e756c6c5d5b2258222c2259222c225a225d7b2261223a2262227d, offsets: [0, 18, 31, 31, 40] }, validity: [0b____1011] } | -| Output | NullableColumn { column: Boolean([0b____1001]), validity: [0b____1011] } | -+--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+--------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+--------------------------------------------------------------------------------------------------------------------------------------------------+ +| v1 | NullableColumn { column: StringColumn { data: Utf8ViewArray[[1,2,3,3,null,null], ["A","B","A","B","C"], , {"a":"b"}] }, validity: [0b____1011] } | +| v2 | NullableColumn { column: StringColumn { data: Utf8ViewArray[[1,1,2,3,4,5,null], ["X","Y","Z"], , {"a":"b"}] }, validity: [0b____1011] } | +| Output | NullableColumn { column: Boolean([0b____1001]), validity: [0b____1011] } | ++--------+--------------------------------------------------------------------------------------------------------------------------------------------------+ ast : json_object_insert('{"b":12,"d":34,"m":[1,2],"x":{"k":"v"}}'::variant, 'a', 'hello') @@ -5543,8 +5543,8 @@ evaluation (internal): +--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | Column | Data | +--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| v | NullableColumn { column: StringColumn { data: 0x7b226b223a2276227d7b226d223a226e227d7b2261223a2262222c2263223a2264222c2279223a227a227d, offsets: [0, 9, 18, 18, 43] }, validity: [0b____1011] } | -| n | NullableColumn { column: StringColumn { data: 0x22686922747275655b312c322c335d, offsets: [0, 4, 4, 8, 15] }, validity: [0b____1101] } | +| v | NullableColumn { column: StringColumn { data: Utf8ViewArray[{"k":"v"}, {"m":"n"}, , {"a":"b","c":"d","y":"z"}] }, validity: [0b____1011] } | +| n | NullableColumn { column: StringColumn { data: Utf8ViewArray["hi", , true, [1,2,3]] }, validity: [0b____1101] } | | Output | NullableColumn { column: BinaryColumn { data: 0x40000002100000011000000110000001100000026b787668694000000110000001100000016d6e400000041000000110000001100000011000000110000001100000015000001610000001616378796264800000032000000220000002200000025001500250037a, offsets: [0, 25, 39, 39, 104] }, validity: [0b____1011] } | +--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ @@ -5567,8 +5567,8 @@ evaluation (internal): +--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | Column | Data | +--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| v | NullableColumn { column: StringColumn { data: 0x7b226b223a2276227d7b226d223a226e227d7b2261223a2262222c2263223a2264222c2279223a227a227d, offsets: [0, 9, 18, 18, 43] }, validity: [0b____1011] } | -| n | NullableColumn { column: StringColumn { data: 0x22686922747275655b312c322c335d, offsets: [0, 4, 4, 8, 15] }, validity: [0b____1101] } | +| v | NullableColumn { column: StringColumn { data: Utf8ViewArray[{"k":"v"}, {"m":"n"}, , {"a":"b","c":"d","y":"z"}] }, validity: [0b____1011] } | +| n | NullableColumn { column: StringColumn { data: Utf8ViewArray["hi", , true, [1,2,3]] }, validity: [0b____1101] } | | Output | NullableColumn { column: BinaryColumn { data: 0x4000000210000001100000011000000210000001636b6869764000000110000001100000016d6e4000000310000001100000011000000110000001500000161000000161637962800000032000000220000002200000025001500250037a, offsets: [0, 25, 39, 39, 94] }, validity: [0b____1011] } | +--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ @@ -5632,12 +5632,12 @@ evaluation: | Row 3 | '{"a":"b","c":"d","y":"z"}' | '{"c":"d","y":"z"}' | +--------+---------------------------------+---------------------+ evaluation (internal): -+--------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| v | NullableColumn { column: StringColumn { data: 0x7b226b223a2276227d7b226d223a226e227d7b2261223a2262222c2263223a2264222c2279223a227a227d, offsets: [0, 9, 18, 18, 43] }, validity: [0b____1011] } | -| Output | NullableColumn { column: BinaryColumn { data: 0x4000000110000001100000016b764000000040000002100000011000000110000001100000016379647a, offsets: [0, 14, 18, 18, 42] }, validity: [0b____1011] } | -+--------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| v | NullableColumn { column: StringColumn { data: Utf8ViewArray[{"k":"v"}, {"m":"n"}, , {"a":"b","c":"d","y":"z"}] }, validity: [0b____1011] } | +| Output | NullableColumn { column: BinaryColumn { data: 0x4000000110000001100000016b764000000040000002100000011000000110000001100000016379647a, offsets: [0, 14, 18, 18, 42] }, validity: [0b____1011] } | ++--------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ast : json_object_pick('{"b":12,"d":34,"m":[1,2],"x":{"k":"v"}}'::variant, 'a', 'b', 'c') @@ -5699,11 +5699,11 @@ evaluation: | Row 3 | '{"a":"b","c":"d","y":"z"}' | '{"a":"b"}' | +--------+---------------------------------+--------------+ evaluation (internal): -+--------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| v | NullableColumn { column: StringColumn { data: 0x7b226b223a2276227d7b226d223a226e227d7b2261223a2262222c2263223a2264222c2279223a227a227d, offsets: [0, 9, 18, 18, 43] }, validity: [0b____1011] } | -| Output | NullableColumn { column: BinaryColumn { data: 0x400000004000000110000001100000016d6e4000000110000001100000016162, offsets: [0, 4, 18, 18, 32] }, validity: [0b____1011] } | -+--------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| v | NullableColumn { column: StringColumn { data: Utf8ViewArray[{"k":"v"}, {"m":"n"}, , {"a":"b","c":"d","y":"z"}] }, validity: [0b____1011] } | +| Output | NullableColumn { column: BinaryColumn { data: 0x400000004000000110000001100000016d6e4000000110000001100000016162, offsets: [0, 4, 18, 18, 32] }, validity: [0b____1011] } | ++--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ diff --git a/src/query/pipeline/transforms/src/processors/transforms/sort/rows/common.rs b/src/query/pipeline/transforms/src/processors/transforms/sort/rows/common.rs index 6db4b9188601..f554b6157f38 100644 --- a/src/query/pipeline/transforms/src/processors/transforms/sort/rows/common.rs +++ b/src/query/pipeline/transforms/src/processors/transforms/sort/rows/common.rs @@ -98,8 +98,10 @@ impl RowConverter for CommonRowConverter { let (_, validity) = c.validity(); let col = c.remove_nullable(); let col = col.as_variant().unwrap(); - let mut builder = - BinaryColumnBuilder::with_capacity(col.len(), col.data().len()); + let mut builder = BinaryColumnBuilder::with_capacity( + col.len(), + col.current_buffer_len(), + ); for (i, val) in col.iter().enumerate() { if let Some(validity) = validity { if unsafe { !validity.get_bit_unchecked(i) } { diff --git a/src/query/service/src/interpreters/interpreter_table_modify_column.rs b/src/query/service/src/interpreters/interpreter_table_modify_column.rs index 958988527b63..d749c81bcc95 100644 --- a/src/query/service/src/interpreters/interpreter_table_modify_column.rs +++ b/src/query/service/src/interpreters/interpreter_table_modify_column.rs @@ -255,9 +255,9 @@ impl ModifyTableColumnInterpreter { return Ok(PipelineBuildResult::create()); } - // if alter column from string to binary, we don't need to rebuild table - let is_alter_column_string_to_binary = - schema + // if alter column from string to binary in parquet, we don't need to rebuild table + let is_alter_column_string_to_binary = table.storage_format_as_parquet() + && schema .fields() .iter() .zip(new_schema.fields()) diff --git a/src/query/service/src/pipelines/builders/builder_join.rs b/src/query/service/src/pipelines/builders/builder_join.rs index b8833274a1e9..fd5ef8c2f906 100644 --- a/src/query/service/src/pipelines/builders/builder_join.rs +++ b/src/query/service/src/pipelines/builders/builder_join.rs @@ -197,10 +197,6 @@ impl PipelineBuilder { self.main_pipeline.output_len(), barrier, )?); - let mut has_string_column = false; - for field in join.output_schema()?.fields() { - has_string_column |= field.data_type().is_string_column(); - } self.main_pipeline.add_transform(|input, output| { Ok(ProcessorPtr::create(TransformHashJoinProbe::create( @@ -212,7 +208,6 @@ impl PipelineBuilder { self.func_ctx.clone(), &join.join_type, !join.non_equi_conditions.is_empty(), - has_string_column, )?)) })?; diff --git a/src/query/service/src/pipelines/processors/transforms/group_by/aggregator_groups_builder.rs b/src/query/service/src/pipelines/processors/transforms/group_by/aggregator_groups_builder.rs index 194fe626d46f..d0d5d0c99330 100644 --- a/src/query/service/src/pipelines/processors/transforms/group_by/aggregator_groups_builder.rs +++ b/src/query/service/src/pipelines/processors/transforms/group_by/aggregator_groups_builder.rs @@ -16,7 +16,7 @@ use std::marker::PhantomData; use databend_common_exception::Result; use databend_common_expression::types::binary::BinaryColumnBuilder; -use databend_common_expression::types::string::StringColumn; +use databend_common_expression::types::string::StringColumnBuilder; use databend_common_expression::types::DataType; use databend_common_expression::Column; use databend_common_expression::ColumnBuilder; @@ -78,26 +78,36 @@ pub struct SerializedKeysGroupColumnsBuilder<'a> { data: Vec<&'a [u8]>, group_data_types: Vec, - single_builder: Option, + single_binary_builder: Option, + single_string_builder: Option, } impl<'a> SerializedKeysGroupColumnsBuilder<'a> { pub fn create(capacity: usize, data_capacity: usize, params: &AggregatorParams) -> Self { - let (single_builder, data) = if params.group_data_types.len() == 1 - && (params.group_data_types[0].is_string() || params.group_data_types[0].is_variant()) - { - ( - Some(BinaryColumnBuilder::with_capacity(capacity, data_capacity)), - vec![], - ) - } else { - (None, Vec::with_capacity(capacity)) - }; + let (single_binary_builder, single_string_builder, data) = + if params.group_data_types.len() == 1 { + if params.group_data_types[0].is_string() { + ( + None, + Some(StringColumnBuilder::with_capacity(capacity)), + vec![], + ) + } else { + ( + Some(BinaryColumnBuilder::with_capacity(capacity, data_capacity)), + None, + vec![], + ) + } + } else { + (None, None, Vec::with_capacity(capacity)) + }; Self { data, group_data_types: params.group_data_types.clone(), - single_builder, + single_binary_builder, + single_string_builder, } } } @@ -106,27 +116,33 @@ impl<'a> GroupColumnsBuilder for SerializedKeysGroupColumnsBuilder<'a> { type T = &'a [u8]; fn append_value(&mut self, v: &'a [u8]) { - match self.single_builder.as_mut() { - Some(builder) => { - builder.put_slice(v); - builder.commit_row(); + match ( + self.single_string_builder.as_mut(), + self.single_binary_builder.as_mut(), + ) { + (Some(s), _) => { + s.put_slice(v); + s.commit_row(); + } + (_, Some(s)) => { + s.put_slice(v); + s.commit_row(); } - None => self.data.push(v), + (_, _) => self.data.push(v), } } fn finish(mut self) -> Result> { - if let Some(builder) = self.single_builder.take() { + if let Some(builder) = self.single_binary_builder.take() { let col = builder.build(); match self.group_data_types[0] { - DataType::String => { - return Ok(vec![Column::String(unsafe { - StringColumn::from_binary_unchecked(col) - })]); - } + DataType::Binary => return Ok(vec![Column::Binary(col)]), DataType::Variant => return Ok(vec![Column::Variant(col)]), _ => {} } + } else if let Some(builder) = self.single_string_builder.take() { + let col = builder.build(); + return Ok(vec![Column::String(col)]); } let rows = self.data.len(); @@ -182,7 +198,7 @@ impl<'a> GroupColumnsBuilder for DictionarySerializedKeysGroupColumnsBuilder<'a> let mut index = 0; let mut res = Vec::with_capacity(self.group_data_types.len()); for data_type in self.group_data_types.iter() { - if data_type.is_string() || data_type.is_variant() { + if data_type.is_variant() { let mut builder = BinaryColumnBuilder::with_capacity(0, 0); for string_type_keys in &self.string_type_data { @@ -191,12 +207,7 @@ impl<'a> GroupColumnsBuilder for DictionarySerializedKeysGroupColumnsBuilder<'a> } index += 1; - res.push(match data_type.is_string() { - true => Column::String(unsafe { - StringColumn::from_binary_unchecked(builder.build()) - }), - false => Column::Variant(builder.build()), - }); + res.push(Column::Variant(builder.build())); } else { let mut column = ColumnBuilder::with_capacity(data_type, rows); diff --git a/src/query/service/src/pipelines/processors/transforms/hash_join/hash_join_build_state.rs b/src/query/service/src/pipelines/processors/transforms/hash_join/hash_join_build_state.rs index f6694abadd46..3af426c9401a 100644 --- a/src/query/service/src/pipelines/processors/transforms/hash_join/hash_join_build_state.rs +++ b/src/query/service/src/pipelines/processors/transforms/hash_join/hash_join_build_state.rs @@ -515,26 +515,30 @@ impl HashJoinBuildState { let space_size = match &keys_state { // safe to unwrap(): offset.len() >= 1. - KeysState::Column(Column::Binary(col) | Column::Variant(col) | Column::Bitmap(col)) => col.offsets().last().unwrap(), - KeysState::Column(Column::String(col) ) => col.offsets().last().unwrap(), - // The function `build_keys_state` of both HashMethodSerializer and HashMethodSingleString - // must return `Column::Binary` | `Column::String` | `Column::Variant` | `Column::Bitmap`. + KeysState::Column(Column::String(col)) => col.current_buffer_len(), + KeysState::Column( + Column::Binary(col) | Column::Variant(col) | Column::Bitmap(col), + ) => col.data().len(), _ => unreachable!(), }; let valid_num = match &$valids { Some(valids) => valids.len() - valids.unset_bits(), None => $chunk.num_rows(), }; - let mut entry_local_space: Vec = - Vec::with_capacity(valid_num * entry_size); - let mut string_local_space: Vec = - Vec::with_capacity(*space_size as usize); - let mut raw_entry_ptr = unsafe { std::mem::transmute::<*mut u8, *mut StringRawEntry>(entry_local_space.as_mut_ptr()) }; + let mut entry_local_space: Vec = Vec::with_capacity(valid_num * entry_size); + let mut string_local_space: Vec = Vec::with_capacity(space_size as usize); + let mut raw_entry_ptr = unsafe { + std::mem::transmute::<*mut u8, *mut StringRawEntry>( + entry_local_space.as_mut_ptr(), + ) + }; let mut string_local_space_ptr = string_local_space.as_mut_ptr(); match $valids { Some(valids) => { - for (row_index, (key, valid)) in build_keys_iter.zip(valids.iter()).enumerate() { + for (row_index, (key, valid)) in + build_keys_iter.zip(valids.iter()).enumerate() + { if !valid { continue; } @@ -557,7 +561,11 @@ impl HashJoinBuildState { (*raw_entry_ptr).early.as_mut_ptr(), std::cmp::min(STRING_EARLY_SIZE, key.len()), ); - std::ptr::copy_nonoverlapping(key.as_ptr(), string_local_space_ptr, key.len()); + std::ptr::copy_nonoverlapping( + key.as_ptr(), + string_local_space_ptr, + key.len(), + ); string_local_space_ptr = string_local_space_ptr.add(key.len()); } @@ -586,7 +594,11 @@ impl HashJoinBuildState { (*raw_entry_ptr).early.as_mut_ptr(), std::cmp::min(STRING_EARLY_SIZE, key.len()), ); - std::ptr::copy_nonoverlapping(key.as_ptr(), string_local_space_ptr, key.len()); + std::ptr::copy_nonoverlapping( + key.as_ptr(), + string_local_space_ptr, + key.len(), + ); string_local_space_ptr = string_local_space_ptr.add(key.len()); } diff --git a/src/query/service/src/pipelines/processors/transforms/hash_join/hash_join_probe_state.rs b/src/query/service/src/pipelines/processors/transforms/hash_join/hash_join_probe_state.rs index 0c38cad7d69a..5e27be5eb94a 100644 --- a/src/query/service/src/pipelines/processors/transforms/hash_join/hash_join_probe_state.rs +++ b/src/query/service/src/pipelines/processors/transforms/hash_join/hash_join_probe_state.rs @@ -508,7 +508,6 @@ impl HashJoinProbeState { &generation_state.build_columns, &generation_state.build_columns_data_type, &generation_state.build_num_rows, - &mut probe_state.generation_state.string_items_buf, )?; if self.hash_join_state.hash_join_desc.join_type == JoinType::Full { @@ -597,7 +596,6 @@ impl HashJoinProbeState { &generation_state.build_columns, &generation_state.build_columns_data_type, &generation_state.build_num_rows, - &mut probe_state.generation_state.string_items_buf, )?); build_indexes_idx = 0; } @@ -659,7 +657,6 @@ impl HashJoinProbeState { &generation_state.build_columns, &generation_state.build_columns_data_type, &generation_state.build_num_rows, - &mut probe_state.generation_state.string_items_buf, )?); build_indexes_idx = 0; } @@ -747,7 +744,6 @@ impl HashJoinProbeState { &generation_state.build_columns, &generation_state.build_columns_data_type, &generation_state.build_num_rows, - &mut probe_state.generation_state.string_items_buf, )?; result_blocks.push(self.merge_eq_block( Some(build_block), diff --git a/src/query/service/src/pipelines/processors/transforms/hash_join/merge_into_hash_join_optimization.rs b/src/query/service/src/pipelines/processors/transforms/hash_join/merge_into_hash_join_optimization.rs index 26af688afad6..3d5cdbaf0719 100644 --- a/src/query/service/src/pipelines/processors/transforms/hash_join/merge_into_hash_join_optimization.rs +++ b/src/query/service/src/pipelines/processors/transforms/hash_join/merge_into_hash_join_optimization.rs @@ -344,10 +344,7 @@ impl TransformHashJoinProbe { { let end = (interval.1 - chunk_start).min(start + self.max_block_size as u32 - 1); let range = (start..=end).collect::>(); - let data_block = chunk_block.take( - &range, - &mut self.probe_state.generation_state.string_items_buf, - )?; + let data_block = chunk_block.take(&range)?; assert!(!data_block.is_empty()); let (segment_idx, block_idx) = split_prefix(prefix); info!( diff --git a/src/query/service/src/pipelines/processors/transforms/hash_join/probe_join/inner_join.rs b/src/query/service/src/pipelines/processors/transforms/hash_join/probe_join/inner_join.rs index 32294afc57b7..51bcbbbb4cc6 100644 --- a/src/query/service/src/pipelines/processors/transforms/hash_join/probe_join/inner_join.rs +++ b/src/query/service/src/pipelines/processors/transforms/hash_join/probe_join/inner_join.rs @@ -210,11 +210,7 @@ impl HashJoinProbeState { } let probe_block = if probe_state.is_probe_projected { - Some(DataBlock::take( - input, - &probe_indexes[0..matched_idx], - &mut probe_state.string_items_buf, - )?) + Some(DataBlock::take(input, &probe_indexes[0..matched_idx])?) } else { None }; @@ -224,7 +220,6 @@ impl HashJoinProbeState { &build_state.build_columns, &build_state.build_columns_data_type, &build_state.build_num_rows, - &mut probe_state.string_items_buf, )?) } else { None diff --git a/src/query/service/src/pipelines/processors/transforms/hash_join/probe_join/left_anti_join.rs b/src/query/service/src/pipelines/processors/transforms/hash_join/probe_join/left_anti_join.rs index 813e5b168c1c..6e42f3c92fb2 100644 --- a/src/query/service/src/pipelines/processors/transforms/hash_join/probe_join/left_anti_join.rs +++ b/src/query/service/src/pipelines/processors/transforms/hash_join/probe_join/left_anti_join.rs @@ -79,11 +79,7 @@ impl HashJoinProbeState { )); } - let result_block = DataBlock::take( - &process_state.input, - &probe_indexes[0..count], - &mut probe_state.generation_state.string_items_buf, - )?; + let result_block = DataBlock::take(&process_state.input, &probe_indexes[0..count])?; probe_state.process_state = None; @@ -237,7 +233,6 @@ impl HashJoinProbeState { result_blocks.push(DataBlock::take( &process_state.input, &probe_indexes[0..unmatched_idx], - &mut probe_state.generation_state.string_items_buf, )?); } @@ -266,11 +261,7 @@ impl HashJoinProbeState { } let probe_block = if probe_state.is_probe_projected { - Some(DataBlock::take( - input, - &probe_indexes[0..matched_idx], - &mut probe_state.string_items_buf, - )?) + Some(DataBlock::take(input, &probe_indexes[0..matched_idx])?) } else { None }; @@ -280,7 +271,6 @@ impl HashJoinProbeState { &build_state.build_columns, &build_state.build_columns_data_type, &build_state.build_num_rows, - &mut probe_state.string_items_buf, )?) } else { None diff --git a/src/query/service/src/pipelines/processors/transforms/hash_join/probe_join/left_join.rs b/src/query/service/src/pipelines/processors/transforms/hash_join/probe_join/left_join.rs index f8dc4a100396..6afb2d80ba34 100644 --- a/src/query/service/src/pipelines/processors/transforms/hash_join/probe_join/left_join.rs +++ b/src/query/service/src/pipelines/processors/transforms/hash_join/probe_join/left_join.rs @@ -372,11 +372,7 @@ impl HashJoinProbeState { } let probe_block = if probe_state.is_probe_projected { - let mut probe_block = DataBlock::take( - input, - &probe_indexes[0..unmatched_idx], - &mut probe_state.string_items_buf, - )?; + let mut probe_block = DataBlock::take(input, &probe_indexes[0..unmatched_idx])?; // For full join, wrap nullable for probe block if self.hash_join_state.hash_join_desc.join_type == JoinType::Full { let nullable_probe_columns = probe_block @@ -435,11 +431,7 @@ impl HashJoinProbeState { } let probe_block = if probe_state.is_probe_projected { - let mut probe_block = DataBlock::take( - input, - &probe_indexes[0..matched_idx], - &mut probe_state.string_items_buf, - )?; + let mut probe_block = DataBlock::take(input, &probe_indexes[0..matched_idx])?; // For full join, wrap nullable for probe block if self.hash_join_state.hash_join_desc.join_type == JoinType::Full { let nullable_probe_columns = probe_block @@ -459,7 +451,6 @@ impl HashJoinProbeState { &build_state.build_columns, &build_state.build_columns_data_type, &build_state.build_num_rows, - &mut probe_state.string_items_buf, )?; // For left or full join, wrap nullable for build block. let nullable_columns = if build_state.build_num_rows == 0 { diff --git a/src/query/service/src/pipelines/processors/transforms/hash_join/probe_join/left_mark_join.rs b/src/query/service/src/pipelines/processors/transforms/hash_join/probe_join/left_mark_join.rs index 23f47e26512f..0292ec9eb554 100644 --- a/src/query/service/src/pipelines/processors/transforms/hash_join/probe_join/left_mark_join.rs +++ b/src/query/service/src/pipelines/processors/transforms/hash_join/probe_join/left_mark_join.rs @@ -341,11 +341,7 @@ impl HashJoinProbeState { } let probe_block = if probe_state.is_probe_projected { - Some(DataBlock::take( - input, - &probe_indexes[0..matched_idx], - &mut probe_state.string_items_buf, - )?) + Some(DataBlock::take(input, &probe_indexes[0..matched_idx])?) } else { None }; @@ -355,7 +351,6 @@ impl HashJoinProbeState { &build_state.build_columns, &build_state.build_columns_data_type, &build_state.build_num_rows, - &mut probe_state.string_items_buf, )?) } else { None diff --git a/src/query/service/src/pipelines/processors/transforms/hash_join/probe_join/left_semi_join.rs b/src/query/service/src/pipelines/processors/transforms/hash_join/probe_join/left_semi_join.rs index 1f98cc837261..bae30ec7ccd4 100644 --- a/src/query/service/src/pipelines/processors/transforms/hash_join/probe_join/left_semi_join.rs +++ b/src/query/service/src/pipelines/processors/transforms/hash_join/probe_join/left_semi_join.rs @@ -82,7 +82,6 @@ impl HashJoinProbeState { result_blocks.push(DataBlock::take( &process_state.input, &probe_indexes[0..matched_idx], - &mut probe_state.generation_state.string_items_buf, )?); } @@ -232,7 +231,6 @@ impl HashJoinProbeState { result_blocks.push(DataBlock::take( &process_state.input, &probe_indexes[0..matched_idx], - &mut probe_state.generation_state.string_items_buf, )?); } @@ -261,11 +259,7 @@ impl HashJoinProbeState { } let probe_block = if probe_state.is_probe_projected { - Some(DataBlock::take( - input, - &probe_indexes[0..matched_idx], - &mut probe_state.string_items_buf, - )?) + Some(DataBlock::take(input, &probe_indexes[0..matched_idx])?) } else { None }; @@ -275,7 +269,6 @@ impl HashJoinProbeState { &build_state.build_columns, &build_state.build_columns_data_type, &build_state.build_num_rows, - &mut probe_state.string_items_buf, )?) } else { None diff --git a/src/query/service/src/pipelines/processors/transforms/hash_join/probe_join/right_join.rs b/src/query/service/src/pipelines/processors/transforms/hash_join/probe_join/right_join.rs index f5730d477934..cac83b169073 100644 --- a/src/query/service/src/pipelines/processors/transforms/hash_join/probe_join/right_join.rs +++ b/src/query/service/src/pipelines/processors/transforms/hash_join/probe_join/right_join.rs @@ -242,11 +242,7 @@ impl HashJoinProbeState { } let probe_block = if probe_state.is_probe_projected { - let probe_block = DataBlock::take( - input, - &probe_indexes[0..matched_idx], - &mut probe_state.string_items_buf, - )?; + let probe_block = DataBlock::take(input, &probe_indexes[0..matched_idx])?; // The join type is right join, we need to wrap nullable for probe side. let nullable_columns = probe_block @@ -264,7 +260,6 @@ impl HashJoinProbeState { &build_state.build_columns, &build_state.build_columns_data_type, &build_state.build_num_rows, - &mut probe_state.string_items_buf, )?) } else { None diff --git a/src/query/service/src/pipelines/processors/transforms/hash_join/probe_join/right_mark_join.rs b/src/query/service/src/pipelines/processors/transforms/hash_join/probe_join/right_mark_join.rs index 270a67cfe9cb..372e46aa0b03 100644 --- a/src/query/service/src/pipelines/processors/transforms/hash_join/probe_join/right_mark_join.rs +++ b/src/query/service/src/pipelines/processors/transforms/hash_join/probe_join/right_mark_join.rs @@ -276,11 +276,7 @@ impl HashJoinProbeState { } let probe_block = if probe_state.is_probe_projected { - Some(DataBlock::take( - input, - &probe_indexes[0..matched_idx], - &mut probe_state.string_items_buf, - )?) + Some(DataBlock::take(input, &probe_indexes[0..matched_idx])?) } else { None }; @@ -290,7 +286,6 @@ impl HashJoinProbeState { &build_state.build_columns, &build_state.build_columns_data_type, &build_state.build_num_rows, - &mut probe_state.string_items_buf, )?) } else { None diff --git a/src/query/service/src/pipelines/processors/transforms/hash_join/probe_join/right_semi_anti_join.rs b/src/query/service/src/pipelines/processors/transforms/hash_join/probe_join/right_semi_anti_join.rs index 95098d589c17..d9d345d5fa49 100644 --- a/src/query/service/src/pipelines/processors/transforms/hash_join/probe_join/right_semi_anti_join.rs +++ b/src/query/service/src/pipelines/processors/transforms/hash_join/probe_join/right_semi_anti_join.rs @@ -282,11 +282,7 @@ impl HashJoinProbeState { } let probe_block = if probe_state.is_probe_projected { - Some(DataBlock::take( - input, - &probe_indexes[0..matched_idx], - &mut probe_state.string_items_buf, - )?) + Some(DataBlock::take(input, &probe_indexes[0..matched_idx])?) } else { None }; @@ -296,7 +292,6 @@ impl HashJoinProbeState { &build_state.build_columns, &build_state.build_columns_data_type, &build_state.build_num_rows, - &mut probe_state.string_items_buf, )?) } else { None diff --git a/src/query/service/src/pipelines/processors/transforms/hash_join/probe_state.rs b/src/query/service/src/pipelines/processors/transforms/hash_join/probe_state.rs index 87d7c063b1df..87ac376e47da 100644 --- a/src/query/service/src/pipelines/processors/transforms/hash_join/probe_state.rs +++ b/src/query/service/src/pipelines/processors/transforms/hash_join/probe_state.rs @@ -85,7 +85,6 @@ impl ProbeState { max_block_size: usize, join_type: &JoinType, with_conjunction: bool, - has_string_column: bool, func_ctx: FunctionContext, other_predicate: Option, ) -> Self { @@ -134,7 +133,7 @@ impl ProbeState { process_state: None, max_block_size, mutable_indexes: MutableIndexes::new(max_block_size), - generation_state: ProbeBlockGenerationState::new(max_block_size, has_string_column), + generation_state: ProbeBlockGenerationState::new(max_block_size), selection: vec![0; max_block_size], hashes: vec![0; max_block_size], selection_count: 0, @@ -185,20 +184,13 @@ pub struct ProbeBlockGenerationState { pub(crate) is_probe_projected: bool, // When we need a bitmap that is all true, we can directly slice it to reduce memory usage. pub(crate) true_validity: Bitmap, - // The string_items_buf is used as a buffer to reduce memory allocation when taking [u8] Columns. - pub(crate) string_items_buf: Option>, } impl ProbeBlockGenerationState { - fn new(size: usize, has_string_column: bool) -> Self { + fn new(size: usize) -> Self { Self { is_probe_projected: false, true_validity: Bitmap::new_constant(true, size), - string_items_buf: if has_string_column { - Some(vec![(0, 0); size]) - } else { - None - }, } } } diff --git a/src/query/service/src/pipelines/processors/transforms/hash_join/row.rs b/src/query/service/src/pipelines/processors/transforms/hash_join/row.rs index 776c88dfeb8b..059e4ed2483a 100644 --- a/src/query/service/src/pipelines/processors/transforms/hash_join/row.rs +++ b/src/query/service/src/pipelines/processors/transforms/hash_join/row.rs @@ -65,7 +65,6 @@ impl RowSpace { build_columns: &[ColumnVec], build_columns_data_type: &[DataType], num_rows: &usize, - string_items_buf: &mut Option>, ) -> Result { if *num_rows != 0 { let data_block = DataBlock::take_column_vec( @@ -73,7 +72,6 @@ impl RowSpace { build_columns_data_type, row_ptrs, row_ptrs.len(), - string_items_buf, ); Ok(data_block) } else { diff --git a/src/query/service/src/pipelines/processors/transforms/hash_join/transform_hash_join_probe.rs b/src/query/service/src/pipelines/processors/transforms/hash_join/transform_hash_join_probe.rs index c478a19f5ebf..cd113272c8b3 100644 --- a/src/query/service/src/pipelines/processors/transforms/hash_join/transform_hash_join_probe.rs +++ b/src/query/service/src/pipelines/processors/transforms/hash_join/transform_hash_join_probe.rs @@ -126,7 +126,6 @@ impl TransformHashJoinProbe { func_ctx: FunctionContext, join_type: &JoinType, with_conjunct: bool, - has_string_column: bool, ) -> Result> { join_probe_state.probe_attach(); // Create a hash join spiller. @@ -153,7 +152,6 @@ impl TransformHashJoinProbe { max_block_size, join_type, with_conjunct, - has_string_column, func_ctx, other_predicate, ); diff --git a/src/query/service/src/table_functions/others/suggested_background_compaction_tasks.rs b/src/query/service/src/table_functions/others/suggested_background_compaction_tasks.rs index 3815af7c5443..039df546b5f8 100644 --- a/src/query/service/src/table_functions/others/suggested_background_compaction_tasks.rs +++ b/src/query/service/src/table_functions/others/suggested_background_compaction_tasks.rs @@ -14,12 +14,12 @@ use std::sync::Arc; -use arrow_array::types::UInt64Type; -use arrow_array::BooleanArray; -use arrow_array::LargeBinaryArray; -use arrow_array::PrimitiveArray; -use arrow_array::RecordBatch; use databend_common_exception::Result; +use databend_common_expression::types::BooleanType; +use databend_common_expression::types::StringType; +use databend_common_expression::types::UInt64Type; +use databend_common_expression::types::ValueType; +use databend_common_expression::DataBlock; use databend_common_meta_app::schema::TableStatistics; use databend_enterprise_background_service::Suggestion; use log::info; @@ -80,81 +80,69 @@ impl SuggestedBackgroundTasksSource { let mut suggestions = vec![]; for records in resps { info!(records :? =(&records); "target_tables"); - let db_names = records - .column(0) - .as_any() - .downcast_ref::() - .unwrap(); - let db_ids = records - .column(1) - .as_any() - .downcast_ref::>() - .unwrap(); - let tb_names = records - .column(2) - .as_any() - .downcast_ref::() - .unwrap(); - let tb_ids = records - .column(3) - .as_any() - .downcast_ref::>() - .unwrap(); - let segment_advice = records - .column(4) - .as_any() - .downcast_ref::() - .unwrap(); - let block_advice = records - .column(5) - .as_any() - .downcast_ref::() - .unwrap(); - let row_count = records - .column(6) - .as_any() - .downcast_ref::>() - .unwrap(); - let bytes_uncompressed = records - .column(7) - .as_any() - .downcast_ref::>() - .unwrap(); - let bytes_compressed = records - .column(8) - .as_any() - .downcast_ref::>() - .unwrap(); - let index_size = records - .column(9) - .as_any() - .downcast_ref::>() - .unwrap(); - let segment_count = records - .column(10) - .as_any() - .downcast_ref::>() - .unwrap(); - let block_count = records - .column(11) - .as_any() - .downcast_ref::>() - .unwrap(); + + let records = records.consume_convert_to_full(); + let db_names = + StringType::try_downcast_column(records.columns()[0].value.as_column().unwrap()) + .unwrap(); + + let db_ids = + UInt64Type::try_downcast_column(records.columns()[1].value.as_column().unwrap()) + .unwrap(); + + let tb_names = + StringType::try_downcast_column(records.columns()[2].value.as_column().unwrap()) + .unwrap(); + + let tb_ids = + UInt64Type::try_downcast_column(records.columns()[3].value.as_column().unwrap()) + .unwrap(); + + let segment_advice = + BooleanType::try_downcast_column(records.columns()[4].value.as_column().unwrap()) + .unwrap(); + + let block_advice = + BooleanType::try_downcast_column(records.columns()[5].value.as_column().unwrap()) + .unwrap(); + + let row_count = + UInt64Type::try_downcast_column(records.columns()[6].value.as_column().unwrap()) + .unwrap(); + + let bytes_uncompressed = + UInt64Type::try_downcast_column(records.columns()[7].value.as_column().unwrap()) + .unwrap(); + + let bytes_compressed = + UInt64Type::try_downcast_column(records.columns()[8].value.as_column().unwrap()) + .unwrap(); + + let index_size = + UInt64Type::try_downcast_column(records.columns()[9].value.as_column().unwrap()) + .unwrap(); + + let segment_count = + UInt64Type::try_downcast_column(records.columns()[10].value.as_column().unwrap()) + .unwrap(); + + let block_count = + UInt64Type::try_downcast_column(records.columns()[11].value.as_column().unwrap()) + .unwrap(); + for i in 0..records.num_rows() { - let db_name: String = - String::from_utf8_lossy(db_names.value(i).to_vec().as_slice()).to_string(); - let db_id = db_ids.value(i); - let table_name = - String::from_utf8_lossy(tb_names.value(i).to_vec().as_slice()).to_string(); - let table_id = tb_ids.value(i); - let need_compact_segment = segment_advice.value(i); - let need_compact_block = block_advice.value(i); - let number_of_rows = row_count.value(i); - let data_bytes = bytes_uncompressed.value(i); - let compressed_data_bytes = bytes_compressed.value(i); - let index_data_bytes = index_size.value(i); - let number_of_segments = segment_count.value(i); - let number_of_blocks = block_count.value(i); + let db_name: String = db_names.index(i).unwrap().to_string(); + let db_id = db_ids[i]; + let table_name = tb_names.index(i).unwrap().to_string(); + let table_id = tb_ids[i]; + let need_compact_segment = segment_advice.get_bit(i); + let need_compact_block = block_advice.get_bit(i); + let number_of_rows = row_count[i]; + let data_bytes = bytes_uncompressed[i]; + let compressed_data_bytes = bytes_compressed[i]; + let index_data_bytes = index_size[i]; + let number_of_segments = segment_count[i]; + let number_of_blocks = block_count[i]; let suggestion = Suggestion::Compaction { db_id, db_name, @@ -179,7 +167,7 @@ impl SuggestedBackgroundTasksSource { pub async fn do_get_all_suggested_compaction_tables( ctx: Arc, - ) -> Result> { + ) -> Result> { let res = SuggestedBackgroundTasksSource::do_execute_sql( ctx, SUGGEST_TABLES_NEED_COMPACTION.to_string(), diff --git a/src/query/service/src/table_functions/others/suggested_background_tasks.rs b/src/query/service/src/table_functions/others/suggested_background_tasks.rs index a2053c953908..b1654fc23af4 100644 --- a/src/query/service/src/table_functions/others/suggested_background_tasks.rs +++ b/src/query/service/src/table_functions/others/suggested_background_tasks.rs @@ -15,7 +15,6 @@ use std::any::Any; use std::sync::Arc; -use arrow_array::RecordBatch; use chrono::DateTime; use databend_common_catalog::plan::DataSourcePlan; use databend_common_catalog::plan::PartStatistics; @@ -24,7 +23,6 @@ use databend_common_catalog::plan::PushDownInfo; use databend_common_catalog::table_args::TableArgs; use databend_common_catalog::table_context::TableContext; use databend_common_catalog::table_function::TableFunction; -use databend_common_exception::ErrorCode; use databend_common_exception::Result; use databend_common_expression::types::BooleanType; use databend_common_expression::types::StringType; @@ -155,14 +153,10 @@ impl SuggestedBackgroundTasksSource { } #[async_backtrace::framed] - pub async fn do_execute_sql( - ctx: Arc, - sql: String, - ) -> Result> { + pub async fn do_execute_sql(ctx: Arc, sql: String) -> Result> { // Use interpreter_plan_sql, we can write the query log if an error occurs. let (plan, _, _) = interpreter_plan_sql(ctx.clone(), sql.as_str(), false).await?; - let data_schema = plan.schema(); let interpreter = InterpreterFactory::get(ctx.clone(), &plan).await?; let stream = interpreter.execute(ctx.clone()).await?; let blocks = stream.map(|v| v).collect::>().await; @@ -182,10 +176,6 @@ impl SuggestedBackgroundTasksSource { return Ok(None); } let record = DataBlock::concat(&result)?; - let record = record - .to_record_batch_with_dataschema(data_schema.as_ref()) - .map_err(|e| ErrorCode::Internal(format!("{e:?}")))?; - Ok(Some(record)) } diff --git a/src/query/service/src/table_functions/others/udf.rs b/src/query/service/src/table_functions/others/udf.rs index 0abfa5fff800..5d324e7a1338 100644 --- a/src/query/service/src/table_functions/others/udf.rs +++ b/src/query/service/src/table_functions/others/udf.rs @@ -18,6 +18,7 @@ use std::hash::Hash; use std::hash::Hasher; use std::sync::Arc; +use arrow_array::LargeStringArray; use arrow_array::RecordBatch; use arrow_schema::Field; use arrow_schema::Schema; @@ -164,7 +165,7 @@ impl Table for UdfEchoTable { let result = result_batch .column(0) .as_any() - .downcast_ref::() + .downcast_ref::() .unwrap(); let result = result.value(0).to_string(); let parts = vec![Arc::new(Box::new(StringPart { value: result }) as _)]; diff --git a/src/query/service/src/test_kits/fixture.rs b/src/query/service/src/test_kits/fixture.rs index 2f5f7959e2b5..e46472ab1754 100644 --- a/src/query/service/src/test_kits/fixture.rs +++ b/src/query/service/src/test_kits/fixture.rs @@ -733,17 +733,13 @@ impl TestFixture { schema, (0..num_of_block) .map(|idx| { - let mut title_builder = - StringColumnBuilder::with_capacity(rows_per_block, rows_per_block * 10); - let mut content_builder = - StringColumnBuilder::with_capacity(rows_per_block, rows_per_block * 10); + let mut title_builder = StringColumnBuilder::with_capacity(rows_per_block); + let mut content_builder = StringColumnBuilder::with_capacity(rows_per_block); for i in 0..rows_per_block { let j = (idx * rows_per_block + i) % sample_books.len(); - title_builder.put_str(sample_books[j].0); - title_builder.commit_row(); - content_builder.put_str(sample_books[j].1); - content_builder.commit_row(); + title_builder.put_and_commit(sample_books[j].0); + content_builder.put_and_commit(sample_books[j].1); } let title_column = Column::String(title_builder.build()); let content_column = Column::String(content_builder.build()); diff --git a/src/query/settings/src/settings_default.rs b/src/query/settings/src/settings_default.rs index 1246d74aae24..35e74afbb312 100644 --- a/src/query/settings/src/settings_default.rs +++ b/src/query/settings/src/settings_default.rs @@ -275,7 +275,7 @@ impl DefaultSettings { mode: SettingMode::Both, range: Some(SettingRange::Numeric(0..=1)), }), - ("enable_dio", DefaultSettingValue{ + ("enable_dio", DefaultSettingValue{ value: UserSettingValue::UInt64(1), desc: "Enables Direct IO.", mode: SettingMode::Both, @@ -288,7 +288,7 @@ impl DefaultSettings { range: Some(SettingRange::Numeric(0..=1)), }), ("join_spilling_memory_ratio", DefaultSettingValue { - value: UserSettingValue::UInt64(60), + value: UserSettingValue::UInt64(0), desc: "Sets the maximum memory ratio in bytes that hash join can use before spilling data to storage during query execution, 0 is unlimited", mode: SettingMode::Both, range: Some(SettingRange::Numeric(0..=100)), @@ -463,7 +463,7 @@ impl DefaultSettings { range: Some(SettingRange::Numeric(0..=u64::MAX)), }), ("aggregate_spilling_memory_ratio", DefaultSettingValue { - value: UserSettingValue::UInt64(60), + value: UserSettingValue::UInt64(0), desc: "Sets the maximum memory ratio in bytes that an aggregator can use before spilling data to storage during query execution.", mode: SettingMode::Both, range: Some(SettingRange::Numeric(0..=100)), @@ -475,7 +475,7 @@ impl DefaultSettings { range: Some(SettingRange::Numeric(0..=u64::MAX)), }), ("window_partition_spilling_memory_ratio", DefaultSettingValue { - value: UserSettingValue::UInt64(60), + value: UserSettingValue::UInt64(0), desc: "Sets the maximum memory ratio in bytes that a window partitioner can use before spilling data to storage during query execution.", mode: SettingMode::Both, range: Some(SettingRange::Numeric(0..=100)), @@ -511,7 +511,7 @@ impl DefaultSettings { range: Some(SettingRange::Numeric(0..=u64::MAX)), }), ("sort_spilling_memory_ratio", DefaultSettingValue { - value: UserSettingValue::UInt64(60), + value: UserSettingValue::UInt64(0), desc: "Sets the maximum memory ratio in bytes that a sorter can use before spilling data to storage during query execution.", mode: SettingMode::Both, range: Some(SettingRange::Numeric(0..=100)), @@ -689,8 +689,8 @@ impl DefaultSettings { desc: "Set numeric default_order_by_null mode", mode: SettingMode::Both, range: Some(SettingRange::String(vec![ - "nulls_first".into(), "nulls_last".into(), - "nulls_first_on_asc_last_on_desc".into(), "nulls_last_on_asc_first_on_desc".into(), + "nulls_first".into(), "nulls_last".into(), + "nulls_first_on_asc_last_on_desc".into(), "nulls_last_on_asc_first_on_desc".into(), ])), }), ("ddl_column_type_nullable", DefaultSettingValue { diff --git a/src/query/storages/common/index/benches/build_from_block.rs b/src/query/storages/common/index/benches/build_from_block.rs index bb80b4a3f77d..91f68213aa33 100644 --- a/src/query/storages/common/index/benches/build_from_block.rs +++ b/src/query/storages/common/index/benches/build_from_block.rs @@ -198,7 +198,7 @@ fn rand_str_column(n: i32, len: i32) -> Column { abcdefghijklmnopqrstuvwxyz\ 0123456789)(*&^%$#@!~"; - let mut builder = StringColumnBuilder::with_capacity(n as usize, 0); + let mut builder = StringColumnBuilder::with_capacity(n as usize); for _ in 0..n { for _ in (len / 2)..len { let idx = rng.gen_range(0..CHARSET.len()); diff --git a/src/query/storages/common/index/src/bloom_index.rs b/src/query/storages/common/index/src/bloom_index.rs index a5614b8b2a73..8763f35ae9a5 100644 --- a/src/query/storages/common/index/src/bloom_index.rs +++ b/src/query/storages/common/index/src/bloom_index.rs @@ -545,7 +545,7 @@ impl BloomIndex { /// If it does, the bloom index for the column will not be established. fn check_large_string(column: &Column) -> bool { if let Column::String(v) = &column { - let bytes_per_row = v.data().len() / v.len().max(1); + let bytes_per_row = v.current_buffer_len() / v.len().max(1); if bytes_per_row > 256 { return true; } diff --git a/src/query/storages/fuse/src/fuse_table.rs b/src/query/storages/fuse/src/fuse_table.rs index 521d0a769cbc..c2bbdc4d7755 100644 --- a/src/query/storages/fuse/src/fuse_table.rs +++ b/src/query/storages/fuse/src/fuse_table.rs @@ -504,6 +504,10 @@ impl Table for FuseTable { true } + fn storage_format_as_parquet(&self) -> bool { + matches!(self.storage_format, FuseStorageFormat::Parquet) + } + fn cluster_keys(&self, ctx: Arc) -> Vec> { let table_meta = Arc::new(self.clone()); if let Some((_, order)) = &self.cluster_key_meta { diff --git a/src/query/storages/fuse/src/io/read/block/block_reader.rs b/src/query/storages/fuse/src/io/read/block/block_reader.rs index 3d46fdab22bf..975bcd1e8d77 100644 --- a/src/query/storages/fuse/src/io/read/block/block_reader.rs +++ b/src/query/storages/fuse/src/io/read/block/block_reader.rs @@ -135,6 +135,7 @@ impl BlockReader { .iter() .map(|c| (*c).clone()) .collect(); + let project_indices = Self::build_projection_indices(&project_column_nodes); Ok(Arc::new(BlockReader { diff --git a/src/query/storages/fuse/src/io/read/block/block_reader_native_deserialize.rs b/src/query/storages/fuse/src/io/read/block/block_reader_native_deserialize.rs index e9ede151a890..d8aada0bdd7b 100644 --- a/src/query/storages/fuse/src/io/read/block/block_reader_native_deserialize.rs +++ b/src/query/storages/fuse/src/io/read/block/block_reader_native_deserialize.rs @@ -277,6 +277,7 @@ impl BlockReader { ) -> Result> { let field = column_node.field.clone(); let is_nested = column_node.is_nested; + match self.native_columns_reader.column_iter_to_arrays( readers, &column_node.leaf_indices, diff --git a/src/query/storages/fuse/src/operations/read/runtime_filter_prunner.rs b/src/query/storages/fuse/src/operations/read/runtime_filter_prunner.rs index 4d4674b50d4a..066a418b626d 100644 --- a/src/query/storages/fuse/src/operations/read/runtime_filter_prunner.rs +++ b/src/query/storages/fuse/src/operations/read/runtime_filter_prunner.rs @@ -150,8 +150,8 @@ pub(crate) fn update_bitmap_with_bloom_filter( } idx += 1; }), - KeysState::Column(Column::String(col)) => col.iter_binary().for_each(|key| { - let hash = key.fast_hash(); + KeysState::Column(Column::String(col)) => col.iter().for_each(|key| { + let hash = key.as_bytes().fast_hash(); if filter.contains(&hash) { bitmap.set(idx, true); } diff --git a/src/query/storages/fuse/src/statistics/cluster_statistics.rs b/src/query/storages/fuse/src/statistics/cluster_statistics.rs index 01b2316a70e0..0b72a4794aee 100644 --- a/src/query/storages/fuse/src/statistics/cluster_statistics.rs +++ b/src/query/storages/fuse/src/statistics/cluster_statistics.rs @@ -106,7 +106,7 @@ impl ClusterStatsGenerator { if !self.cluster_key_index.is_empty() { let indices = vec![0u32, block.num_rows() as u32 - 1]; - block = block.take(&indices, &mut None)?; + block = block.take(&indices)?; } block = self diff --git a/src/query/storages/fuse/src/table_functions/clustering_statistics.rs b/src/query/storages/fuse/src/table_functions/clustering_statistics.rs index 113408f2b54c..724630e158d8 100644 --- a/src/query/storages/fuse/src/table_functions/clustering_statistics.rs +++ b/src/query/storages/fuse/src/table_functions/clustering_statistics.rs @@ -159,7 +159,7 @@ impl<'a> ClusteringStatisticsImpl<'a> { let len = std::cmp::min(snapshot.summary.block_count as usize, limit); let mut segment_name = Vec::with_capacity(len); - let mut block_name = StringColumnBuilder::with_capacity(len, len); + let mut block_name = StringColumnBuilder::with_capacity(len); let mut max = Vec::with_capacity(len); let mut min = Vec::with_capacity(len); let mut level = Vec::with_capacity(len); @@ -196,8 +196,7 @@ impl<'a> ClusteringStatisticsImpl<'a> { for block in segment.blocks.iter() { let block = block.as_ref(); - block_name.put_str(&block.location.0); - block_name.commit_row(); + block_name.put_and_commit(&block.location.0); let cluster_stats = block.cluster_stats.as_ref(); let clustered = block diff --git a/src/query/storages/fuse/src/table_functions/fuse_block.rs b/src/query/storages/fuse/src/table_functions/fuse_block.rs index fb09f3a98025..7a96a755ba5d 100644 --- a/src/query/storages/fuse/src/table_functions/fuse_block.rs +++ b/src/query/storages/fuse/src/table_functions/fuse_block.rs @@ -79,7 +79,7 @@ impl TableMetaFunc for FuseBlock { let snapshot_id = snapshot.snapshot_id.simple().to_string(); let timestamp = snapshot.timestamp.unwrap_or_default().timestamp_micros(); - let mut block_location = StringColumnBuilder::with_capacity(len, len); + let mut block_location = StringColumnBuilder::with_capacity(len); let mut block_size = Vec::with_capacity(len); let mut file_size = Vec::with_capacity(len); let mut row_count = Vec::with_capacity(len); @@ -102,8 +102,7 @@ impl TableMetaFunc for FuseBlock { for block in segment.blocks.iter() { let block = block.as_ref(); - block_location.put_str(&block.location.0); - block_location.commit_row(); + block_location.put_and_commit(&block.location.0); block_size.push(block.block_size); file_size.push(block.file_size); row_count.push(block.row_count); diff --git a/src/query/storages/fuse/src/table_functions/fuse_column.rs b/src/query/storages/fuse/src/table_functions/fuse_column.rs index 24caf6d4e6ed..7ae42f3025df 100644 --- a/src/query/storages/fuse/src/table_functions/fuse_column.rs +++ b/src/query/storages/fuse/src/table_functions/fuse_column.rs @@ -78,13 +78,13 @@ impl TableMetaFunc for FuseColumn { let snapshot_id = snapshot.snapshot_id.simple().to_string(); let timestamp = snapshot.timestamp.unwrap_or_default().timestamp_micros(); - let mut block_location = StringColumnBuilder::with_capacity(len, len); + let mut block_location = StringColumnBuilder::with_capacity(len); let mut block_size = vec![]; let mut file_size = vec![]; let mut row_count = vec![]; - let mut column_name = StringColumnBuilder::with_capacity(len, len); - let mut column_type = StringColumnBuilder::with_capacity(len, len); + let mut column_name = StringColumnBuilder::with_capacity(len); + let mut column_type = StringColumnBuilder::with_capacity(len); let mut column_id = vec![]; let mut block_offset = vec![]; let mut bytes_compressed = vec![]; @@ -110,17 +110,14 @@ impl TableMetaFunc for FuseColumn { for (id, column) in block.col_metas.iter() { if let Some(f) = leaf_fields.iter().find(|f| f.column_id == *id) { - block_location.put_str(&block.location.0); - block_location.commit_row(); + block_location.put_and_commit(&block.location.0); block_size.push(block.block_size); file_size.push(block.file_size); row_count.push(column.total_rows() as u64); - column_name.put_str(&f.name); - column_name.commit_row(); + column_name.put_and_commit(&f.name); - column_type.put_str(&f.data_type.to_string()); - column_type.commit_row(); + column_type.put_and_commit(f.data_type.to_string()); column_id.push(*id); diff --git a/src/query/storages/fuse/src/table_functions/fuse_encoding.rs b/src/query/storages/fuse/src/table_functions/fuse_encoding.rs index 5e5dda4cc64f..f6ab657277c2 100644 --- a/src/query/storages/fuse/src/table_functions/fuse_encoding.rs +++ b/src/query/storages/fuse/src/table_functions/fuse_encoding.rs @@ -233,11 +233,11 @@ impl<'a> FuseEncodingImpl<'a> { let mut validity_size = Vec::new(); let mut compressed_size = Vec::new(); let mut uncompressed_size = Vec::new(); - let mut l1 = StringColumnBuilder::with_capacity(0, 0); + let mut l1 = StringColumnBuilder::with_capacity(0); let mut l2 = NullableColumnBuilder::::with_capacity(0, &[]); - let mut table_name = StringColumnBuilder::with_capacity(0, 0); - let mut column_name = StringColumnBuilder::with_capacity(0, 0); - let mut column_type = StringColumnBuilder::with_capacity(0, 0); + let mut table_name = StringColumnBuilder::with_capacity(0); + let mut column_name = StringColumnBuilder::with_capacity(0); + let mut column_type = StringColumnBuilder::with_capacity(0); let mut all_num_rows = 0; for (table, columns_info) in info { for (type_str, column_info) in columns_info { @@ -254,8 +254,7 @@ impl<'a> FuseEncodingImpl<'a> { validity_size.push(p.validity_size); compressed_size.push(p.compressed_size); uncompressed_size.push(p.uncompressed_size); - l1.put_str(&encoding_to_string(&p.body)); - l1.commit_row(); + l1.put_and_commit(encoding_to_string(&p.body)); let l2_encoding = match &p.body { PageBody::Dict(dict) => Some(encoding_to_string(&dict.indices.body)), PageBody::Freq(freq) => freq diff --git a/src/query/storages/parquet/Cargo.toml b/src/query/storages/parquet/Cargo.toml index e8b6ef08c014..6cd32a9da075 100644 --- a/src/query/storages/parquet/Cargo.toml +++ b/src/query/storages/parquet/Cargo.toml @@ -44,7 +44,6 @@ thrift = { workspace = true } typetag = { workspace = true } [dev-dependencies] -databend-common-sql = { workspace = true } tempfile = { workspace = true } [lints] diff --git a/src/query/storages/stage/src/read/row_based/formats/csv/block_builder.rs b/src/query/storages/stage/src/read/row_based/formats/csv/block_builder.rs index 199900f4d283..2f979a03a477 100644 --- a/src/query/storages/stage/src/read/row_based/formats/csv/block_builder.rs +++ b/src/query/storages/stage/src/read/row_based/formats/csv/block_builder.rs @@ -83,15 +83,13 @@ impl CsvDecoder { } EmptyFieldAs::String => match builder { ColumnBuilder::String(b) => { - b.put_str(""); - b.commit_row(); + b.put_and_commit(""); } ColumnBuilder::Nullable(box NullableColumnBuilder { builder: ColumnBuilder::String(b), validity, }) => { - b.put_str(""); - b.commit_row(); + b.put_and_commit(""); validity.push(true); } _ => { @@ -188,14 +186,8 @@ impl RowDecoder for CsvDecoder { fn flush(&self, columns: Vec, num_rows: usize) -> Vec { if let Some(projection) = &self.load_context.pos_projection { - let empty_strings = Column::String( - StringColumnBuilder { - need_estimated: false, - data: vec![], - offsets: vec![0; num_rows + 1], - } - .build(), - ); + let empty_strings = + Column::String(StringColumnBuilder::repeat_default(num_rows).build()); columns .into_iter() .enumerate() diff --git a/src/query/storages/system/src/malloc_stats_totals_table.rs b/src/query/storages/system/src/malloc_stats_totals_table.rs index d231d246fe66..4942edaaa69c 100644 --- a/src/query/storages/system/src/malloc_stats_totals_table.rs +++ b/src/query/storages/system/src/malloc_stats_totals_table.rs @@ -92,7 +92,7 @@ impl MallocStatsTotalsTable { } fn build_columns(node_name: &str) -> BuildResult { - let mut names = StringColumnBuilder::with_capacity(6, 6 * 4); + let mut names = StringColumnBuilder::with_capacity(6); let mut values: Vec = vec![]; let e = epoch::mib()?; diff --git a/tests/sqllogictests/src/main.rs b/tests/sqllogictests/src/main.rs index 7345e8cc8039..8a839b145852 100644 --- a/tests/sqllogictests/src/main.rs +++ b/tests/sqllogictests/src/main.rs @@ -79,7 +79,10 @@ pub async fn main() -> Result<()> { // Run mock sources for dictionary test. run_mock_sources(); - + println!( + "Run sqllogictests with args: {}", + std::env::args().skip(1).collect::>().join(" ") + ); let args = SqlLogicTestArgs::parse(); let handlers = match &args.handlers { Some(hs) => hs.iter().map(|s| s.as_str()).collect(), diff --git a/tests/sqllogictests/suites/base/05_ddl/05_0003_ddl_alter_table.test b/tests/sqllogictests/suites/base/05_ddl/05_0003_ddl_alter_table.test index 91f6f02fd83f..4f260ccfc703 100644 --- a/tests/sqllogictests/suites/base/05_ddl/05_0003_ddl_alter_table.test +++ b/tests/sqllogictests/suites/base/05_ddl/05_0003_ddl_alter_table.test @@ -122,7 +122,7 @@ statement ok set hide_options_in_show_create_table=1 statement ok -CREATE TABLE "05_0003_at_t4" ( a string not null, b string null, c array(string) null, d tuple(string, string) null ) ENGINE=FUSE COMPRESSION='zstd' STORAGE_FORMAT='native' +CREATE OR REPLACE TABLE "05_0003_at_t4" ( a string not null, b string null, c array(string) null, d tuple(string, string) null ) ENGINE=FUSE COMPRESSION='zstd' STORAGE_FORMAT='native' statement ok INSERT INTO TABLE `05_0003_at_t4` values('a', 'b', ['c1', 'c2'], ('d1', 'd2')) @@ -154,7 +154,7 @@ SHOW CREATE TABLE `05_0003_at_t4` ---- 05_0003_at_t4 CREATE TABLE "05_0003_at_t4" ( a BINARY NOT NULL, b BINARY NULL, c ARRAY(BINARY) NULL, d TUPLE(1 BINARY, 2 BINARY) NULL ) ENGINE=FUSE -query +query SELECT * FROM `05_0003_at_t4` ---- 61 62 [6331,6332] (6431,6432) diff --git a/tests/sqllogictests/suites/base/09_fuse_engine/09_0027_func_fuse_encoding.test b/tests/sqllogictests/suites/base/09_fuse_engine/09_0027_func_fuse_encoding.test index 502bea556252..af4c570443ac 100644 --- a/tests/sqllogictests/suites/base/09_fuse_engine/09_0027_func_fuse_encoding.test +++ b/tests/sqllogictests/suites/base/09_fuse_engine/09_0027_func_fuse_encoding.test @@ -49,7 +49,7 @@ insert into t2 select 'b' as a from numbers(10) limit 10; query III select level_one,level_two,count(*) from fuse_encoding('db_09_0027') where table_name='t2' group by level_one,level_two order by level_one; ---- -OneValue NULL 2 +Common(Lz4) NULL 2 statement ok optimize table t2 compact; @@ -57,12 +57,12 @@ optimize table t2 compact; query III select level_one,level_two,count(*) from fuse_encoding('db_09_0027') where table_name='t2' group by level_one,level_two order by level_one; ---- -Dict Rle 1 +Common(Lz4) NULL 1 query III select level_one,level_two,count(*) from fuse_encoding('db_09_0027') where column_name='d' group by level_one,level_two order by level_one; ---- -Dict Rle 1 +Common(Lz4) NULL 1 query III select level_one,level_two,count(*) from fuse_encoding('db_09_0027') where column_type like '%INT%' group by level_one,level_two order by level_one; @@ -71,4 +71,4 @@ DeltaBitpack NULL 1 OneValue NULL 1 statement ok -DROP DATABASE db_09_0027 \ No newline at end of file +DROP DATABASE db_09_0027 diff --git a/tests/sqllogictests/suites/base/20+_others/20_0013_query_result_cache.test b/tests/sqllogictests/suites/base/20+_others/20_0013_query_result_cache.test index d2fb2e8c3466..6cbba575ac9a 100644 --- a/tests/sqllogictests/suites/base/20+_others/20_0013_query_result_cache.test +++ b/tests/sqllogictests/suites/base/20+_others/20_0013_query_result_cache.test @@ -89,7 +89,7 @@ EXPLAIN SELECT * FROM t1, t2 ORDER BY a, b; ReadQueryResultCache ├── SQL: SELECT * FROM t1, t2 ORDER BY a, b ├── Number of rows: 9 -└── Result size: 125 +└── Result size: 144 @@ -225,7 +225,7 @@ SELECT * FROM t1 ORDER BY a; # Because the cache key is generated from AST. query I -select * FRoM t1 OrDER bY a; +select * FRoM t1 OrDER bY a; ---- 1 2 diff --git a/tests/sqllogictests/suites/mode/cluster/distributed_delete.sql.test b/tests/sqllogictests/suites/mode/cluster/distributed_delete.test similarity index 86% rename from tests/sqllogictests/suites/mode/cluster/distributed_delete.sql.test rename to tests/sqllogictests/suites/mode/cluster/distributed_delete.test index 955a5ffc50a2..8dfb3d9b73d1 100644 --- a/tests/sqllogictests/suites/mode/cluster/distributed_delete.sql.test +++ b/tests/sqllogictests/suites/mode/cluster/distributed_delete.test @@ -6,7 +6,7 @@ drop table if exists t_origin; # make sure there will be multiple blocks there, by shrink the `row_per_block` statement ok -create table t (id int not null, c1 int not null, c2 int not null) row_per_block=10; +create or replace table t (id int not null, c1 int not null, c2 int not null) row_per_block=10; # generate test data statement ok @@ -20,7 +20,7 @@ insert into t select number, number * 10, number * 5 from numbers(1500) where nu # "backup" t statement ok -create table t_origin as select * from t; +create or replace table t_origin as select * from t; # do the deletion (in distributed settings) # two segments are totally rewritten, one segment is reserved @@ -45,7 +45,7 @@ select (select sum(c2) from t_origin where id % 3 != 0 or id <= 500) = (select s # backup t again statement ok -create table t_after_delete as select * from t; +create or replace table t_after_delete as select * from t; # one segment is totally deleted, two segments are reserved statement ok @@ -69,7 +69,7 @@ select (select sum(c2) from t_after_delete where id > 499) = (select sum(c2) fro # backup t again statement ok -create table t_after_delete_2 as select * from t; +create or replace table t_after_delete_2 as select * from t; # some block is totally deleted, some block is reserved, some block is partially reserved statement ok @@ -100,13 +100,13 @@ statement ok drop table if exists del_id; statement ok -create table t (id int not null, c1 int not null, c2 int not null) row_per_block=3; +create or replace table t (id int not null, c1 int not null, c2 int not null) row_per_block=3; statement ok insert into t select number, number * 5, number * 7 from numbers(50); statement ok -create table del_id (id int not null) as select cast(FLOOR(0 + RAND(number) * 50), int) from numbers(10); +create or replace table del_id (id int not null) as select cast(FLOOR(0 + RAND(number) * 50), int) from numbers(10); statement ok delete from t where id in (select id from del_id); diff --git a/tests/sqllogictests/suites/mode/cluster/memo/aggregate_property.test b/tests/sqllogictests/suites/mode/cluster/memo/aggregate_property.test index 26beed75d057..32405dc66997 100644 --- a/tests/sqllogictests/suites/mode/cluster/memo/aggregate_property.test +++ b/tests/sqllogictests/suites/mode/cluster/memo/aggregate_property.test @@ -26,7 +26,7 @@ where t_10.a = t_1000.a and t_100.a = t_1000.a ---- Memo ├── root group: #8 -├── estimated memory: 11.53 KiB +├── estimated memory: 14.91 KiB ├── Group #0 │ ├── Best properties │ │ ├── { dist: Any }: expr: #0, cost: 1000.000, children: [] @@ -89,7 +89,7 @@ group by t_10.a, t_100.a ---- Memo ├── root group: #8 -├── estimated memory: 28.83 KiB +├── estimated memory: 37.27 KiB ├── Group #0 │ ├── Best properties │ │ ├── { dist: Any }: expr: #0, cost: 1000.000, children: [] diff --git a/tests/sqllogictests/suites/mode/cluster/memo/join_property.test b/tests/sqllogictests/suites/mode/cluster/memo/join_property.test index ed0b61f569ae..c6d9effd4cd4 100644 --- a/tests/sqllogictests/suites/mode/cluster/memo/join_property.test +++ b/tests/sqllogictests/suites/mode/cluster/memo/join_property.test @@ -25,7 +25,7 @@ select * from t_10, t_100, t_1000 where t_10.a = t_1000.a and t_100.a = t_1000.a ---- Memo ├── root group: #5 -├── estimated memory: 8.97 KiB +├── estimated memory: 11.59 KiB ├── Group #0 │ ├── Best properties │ │ ├── { dist: Any }: expr: #0, cost: 1000.000, children: [] @@ -73,7 +73,7 @@ select * from t_1000 left join t_10 on t_1000.a = t_10.a left join t_100 on t_10 ---- Memo ├── root group: #5 -├── estimated memory: 8.33 KiB +├── estimated memory: 10.77 KiB ├── Group #0 │ ├── Best properties │ │ ├── { dist: Any }: expr: #0, cost: 1000.000, children: [] @@ -119,7 +119,7 @@ select * from t_1000 right join t_10 on t_1000.a = t_10.a right join t_100 on t_ ---- Memo ├── root group: #5 -├── estimated memory: 7.05 KiB +├── estimated memory: 9.11 KiB ├── Group #0 │ ├── Best properties │ │ ├── { dist: Any }: expr: #0, cost: 1000.000, children: [] @@ -161,7 +161,7 @@ select * from t_1000 full join t_10 on t_1000.a = t_10.a full join t_100 on t_10 ---- Memo ├── root group: #5 -├── estimated memory: 7.05 KiB +├── estimated memory: 9.11 KiB ├── Group #0 │ ├── Best properties │ │ ├── { dist: Any }: expr: #0, cost: 1000.000, children: [] @@ -203,7 +203,7 @@ select * from t_10, t_100, t_1000 ---- Memo ├── root group: #5 -├── estimated memory: 5.77 KiB +├── estimated memory: 7.45 KiB ├── Group #0 │ ├── Best properties │ │ └── { dist: Any }: expr: #0, cost: 10.000, children: [] diff --git a/tests/sqllogictests/suites/mode/cluster/memo/mix_property.test b/tests/sqllogictests/suites/mode/cluster/memo/mix_property.test index b135f0c753e5..cb8c9ad7328f 100644 --- a/tests/sqllogictests/suites/mode/cluster/memo/mix_property.test +++ b/tests/sqllogictests/suites/mode/cluster/memo/mix_property.test @@ -29,7 +29,7 @@ limit 10 ---- Memo ├── root group: #10 -├── estimated memory: 30.11 KiB +├── estimated memory: 38.92 KiB ├── Group #0 │ ├── Best properties │ │ ├── { dist: Any }: expr: #0, cost: 1000.000, children: [] diff --git a/tests/sqllogictests/suites/mode/cluster/shuffle_join.test b/tests/sqllogictests/suites/mode/cluster/shuffle_join.test index 2e9b24766374..6ecd8dc5531e 100644 --- a/tests/sqllogictests/suites/mode/cluster/shuffle_join.test +++ b/tests/sqllogictests/suites/mode/cluster/shuffle_join.test @@ -6,7 +6,7 @@ drop table if exists t1 statement ok -create table t1(a int not null, b int not null) +create or replace table t1(a int not null, b int not null) statement ok @@ -18,7 +18,7 @@ drop table if exists t2 statement ok -create table t2(a int not null, d int not null) +create or replace table t2(a int not null, d int not null) statement ok @@ -77,7 +77,7 @@ statement ok drop table t2 statement ok -create table t1(a int not null, b int not null) +create or replace table t1(a int not null, b int not null) statement ok @@ -85,7 +85,7 @@ insert into t1 values(1, 2), (1, 3), (2, 4) statement ok -create table t2(c int not null, d int not null) +create or replace table t2(c int not null, d int not null) statement ok @@ -108,11 +108,11 @@ statement ok drop table t2 statement ok -create table t1(a int not null, b int not null) +create or replace table t1(a int not null, b int not null) statement ok -create table t2(c int not null, d int not null) +create or replace table t2(c int not null, d int not null) statement ok @@ -219,20 +219,20 @@ statement ok drop table if exists t statement ok -create table t(a int not null) +create or replace table t(a int not null) statement ok insert into t values(1),(2),(3) statement ok -create table t1(b float not null) +create or replace table t1(b float not null) statement ok insert into t1 values(1.0),(2.0),(3.0) statement ok -create table t2(c smallint unsigned null) +create or replace table t2(c smallint unsigned null) statement ok @@ -316,10 +316,10 @@ statement ok drop table t statement ok -CREATE TABLE t3(c0 BIGINT NULL, c1 DOUBLE NULL) +CREATE or replace TABLE t3(c0 BIGINT NULL, c1 DOUBLE NULL) statement ok -CREATE TABLE t4(c0 FLOAT NULL) +CREATE or replace TABLE t4(c0 FLOAT NULL) query I SELECT SUM(count) FROM (SELECT ((false IS NOT NULL AND false) ::INT64)as count FROM t4 NATURAL LEFT JOIN t3) as res @@ -339,11 +339,11 @@ statement ok drop table t4 statement ok -create table t1_null(a int null , b int null) +create or replace table t1_null(a int null , b int null) statement ok -create table t2_null(a int null , b int null) +create or replace table t2_null(a int null , b int null) statement ok @@ -394,13 +394,13 @@ select * from numbers(10) x join (select 1::UInt64 number) y on x.number = y.num 1 1 statement ok -CREATE TABLE onecolumn (x INT NULL) +CREATE or replace TABLE onecolumn (x INT NULL) statement ok INSERT INTO onecolumn(x) VALUES (44), (NULL), (42) statement ok -CREATE TABLE empty (x INT not null) +CREATE or replace TABLE empty (x INT not null) statement ok SELECT * FROM onecolumn AS a(x) CROSS JOIN empty AS b(y) @@ -418,10 +418,10 @@ statement ok drop table if exists z1 statement ok -CREATE TABLE z0(c0BOOLEAN BOOLEAN NULL, c1FLOAT DOUBLE NULL) +CREATE or replace TABLE z0(c0BOOLEAN BOOLEAN NULL, c1FLOAT DOUBLE NULL) statement ok -CREATE TABLE z1(c0BOOLEAN BOOL NULL DEFAULT(true)) +CREATE or replace TABLE z1(c0BOOLEAN BOOL NULL DEFAULT(true)) statement ok INSERT INTO z0(c1float, c0boolean) VALUES (0.27563244104385376, false), (0.7913353443145752, false) @@ -437,13 +437,13 @@ statement ok drop table z1 statement ok -CREATE TABLE t0(c0BOOLEAN BOOLEAN NULL DEFAULT(false)) +CREATE or replace TABLE t0(c0BOOLEAN BOOLEAN NULL DEFAULT(false)) statement ok -CREATE TABLE t1(c0BOOLEAN BOOL NULL, c1FLOAT FLOAT NOT NULL DEFAULT(0.4661566913127899)) +CREATE or replace TABLE t1(c0BOOLEAN BOOL NULL, c1FLOAT FLOAT NOT NULL DEFAULT(0.4661566913127899)) statement ok -CREATE TABLE t2(c0VARCHAR VARCHAR NULL, c1FLOAT DOUBLE NULL DEFAULT(0.954969048500061), c2VARCHAR VARCHAR NULL) +CREATE or replace TABLE t2(c0VARCHAR VARCHAR NULL, c1FLOAT DOUBLE NULL DEFAULT(0.954969048500061), c2VARCHAR VARCHAR NULL) statement ok INSERT INTO t0(c0boolean) VALUES (false), (true) diff --git a/tests/sqllogictests/suites/mode/standalone/explain/explain.test b/tests/sqllogictests/suites/mode/standalone/explain/explain.test index d29b67e5dd6f..0f7686c52c50 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain/explain.test +++ b/tests/sqllogictests/suites/mode/standalone/explain/explain.test @@ -5,10 +5,10 @@ statement ok drop table if exists t2 all statement ok -create table t1 as select number as a, number as b from numbers(1) +create OR REPLACE table t1 as select number as a, number as b from numbers(1) statement ok -create table t2 as select number as a, number as b from numbers(5) +create OR REPLACE table t2 as select number as a, number as b from numbers(5) statement error 1005 explain explain select t1.a from t1 where a > 0 @@ -247,9 +247,9 @@ FILE_FORMAT = ( SINGLE = false query T -explain syntax create table t3(a int64, b uint64, c float64, d string, e array(int32), f tuple(f1 bool, f2 string)) engine=fuse cluster by (a, b, c) comment='test' compression='LZ4' +explain syntax create OR REPLACE table t3(a int64, b uint64, c float64, d string, e array(int32), f tuple(f1 bool, f2 string)) engine=fuse cluster by (a, b, c) comment='test' compression='LZ4' ---- -CREATE TABLE t3 ( +CREATE OR REPLACE TABLE t3 ( a Int64, b UInt64, c Float64, @@ -409,9 +409,9 @@ explain syntax create database db1 engine=default CREATE DATABASE db1 ENGINE = DEFAULT query T -explain syntax create table t3(a int64, b uint64, c float64, d string, e array(int32), f tuple(f1 bool, f2 string)) engine=fuse cluster by (a, b, c) comment='test' compression='LZ4' +explain syntax create OR REPLACE table t3(a int64, b uint64, c float64, d string, e array(int32), f tuple(f1 bool, f2 string)) engine=fuse cluster by (a, b, c) comment='test' compression='LZ4' ---- -CREATE TABLE t3 ( +CREATE OR REPLACE TABLE t3 ( a Int64, b UInt64, c Float64, @@ -584,7 +584,7 @@ statement ok drop table if exists t3 statement ok -create table t3 as select number as a, number as b from numbers(10) +create OR REPLACE table t3 as select number as a, number as b from numbers(10) query T explain select * from t1,t2, t3 where (t1.a > 1 and t2.a > 2) or (t1.b < 3 and t2.b < 4) or t3.a = 2 @@ -750,7 +750,7 @@ Limit ├── push downs: [filters: [(t2.a (#2) > 2 OR t2.b (#3) < 4)], limit: NONE] └── estimated rows: 5.00 -query +query explain select * from t1,t2 where (t1.a > 1 or t1.b < 2) and (t1.a > 1 or t1.b < 2) ---- HashJoin @@ -785,7 +785,7 @@ HashJoin ├── push downs: [filters: [], limit: NONE] └── estimated rows: 5.00 -query +query explain select count(distinct a) from t1; ---- AggregateFinal @@ -817,7 +817,7 @@ AggregateFinal ├── push downs: [filters: [], limit: NONE] └── estimated rows: 1.00 -query +query explain select count_distinct(a) from t1; ---- AggregateFinal @@ -849,7 +849,7 @@ AggregateFinal ├── push downs: [filters: [], limit: NONE] └── estimated rows: 1.00 -query +query explain select * from (values(1, 'a'),(2, 'b')) t(c1,c2) ---- ConstantTableScan @@ -863,28 +863,28 @@ drop table t1 statement ok drop table t2 -query +query explain syntax select * from read_parquet('p1', 'p2', 'p3'); ---- SELECT * FROM read_parquet('p1', 'p2', 'p3') -query +query explain syntax select * from read_parquet(prune_page=>true, refresh_meta_cache=>true); ---- SELECT * FROM read_parquet(prune_page=>TRUE, refresh_meta_cache=>TRUE) -query +query explain syntax select * from read_parquet('p1', 'p2', 'p3', prune_page=>true, refresh_meta_cache=>true); ---- SELECT * FROM read_parquet('p1', 'p2', 'p3', prune_page=>TRUE, refresh_meta_cache=>TRUE) -query +query explain syntax select * from read_parquet('p1', 'p2', 'p3', prune_page=>true, refresh_meta_cache=>true); ---- SELECT * @@ -895,9 +895,9 @@ statement ok drop table if exists t4 statement ok -create table t4(a int, b string); +create OR REPLACE table t4(a int, b string); -query +query explain select * from t4 where a = 1 and try_cast(get(try_parse_json(b),'bb') as varchar) = 'xx'; ---- Filter @@ -920,7 +920,7 @@ drop view if exists v4 statement ok create view v4 as select a as a, try_cast(get(try_parse_json(b), 'bb') as varchar) as b from t4; -query +query explain select * from v4 where b = 'xx'; ---- EvalScalar @@ -969,10 +969,10 @@ statement ok drop table if exists b statement ok -create table a(id int, c1 INT NULL) +create OR REPLACE table a(id int, c1 INT NULL) statement ok -create table b(id int, c1 INT NULL) +create OR REPLACE table b(id int, c1 INT NULL) statement ok insert into a values(1, 1683648000) @@ -1095,10 +1095,10 @@ statement ok drop table if exists b statement ok -create table a(id int, c1 INT NULL) +create OR REPLACE table a(id int, c1 INT NULL) statement ok -create table b(id int, c1 INT NULL) +create OR REPLACE table b(id int, c1 INT NULL) statement ok insert into a values (1, 2), (2, 4), (3, 6) @@ -1149,7 +1149,7 @@ statement ok drop table b; statement ok -create table t1(a int, b int); +create OR REPLACE table t1(a int, b int); statement ok insert into t1 values(1, 2), (2, 3), (3, 4); @@ -1234,10 +1234,10 @@ statement ok drop table if exists t2; statement ok -CREATE TABLE t1(i int, j int); +CREATE OR REPLACE TABLE t1(i int, j int); statement ok -CREATE TABLE t2(k int, l int); +CREATE OR REPLACE TABLE t2(k int, l int); statement ok INSERT INTO t1 VALUES (1, 2), (2, 3), (3, 4); @@ -1293,7 +1293,7 @@ statement ok drop table if exists t3; statement ok -CREATE TABLE t3(a int, b map(string, string) null, c map(string, variant) null); +CREATE OR REPLACE TABLE t3(a int, b map(string, string) null, c map(string, variant) null); statement ok INSERT INTO t3 VALUES (1, {'k1':'a', 'k2':'b'}, {'k1':'"a"', 'k2':'100'}), (2, null, null), (3, {'k3':'z'}, {'k3':'"z"'}); @@ -1365,7 +1365,7 @@ statement ok drop table t3; statement ok -CREATE TABLE customers AS SELECT +CREATE OR REPLACE TABLE customers AS SELECT number % 100 AS customer_id, concat('Customer ', to_string(number % 100)) AS customer_name, CASE WHEN (rand() * 10000)::int % 3 = 0 THEN 'Small' @@ -1378,7 +1378,7 @@ FROM numbers(100); statement ok -CREATE TABLE products AS SELECT +CREATE OR REPLACE TABLE products AS SELECT number % 10 AS product_id, concat('Product ', to_string(number % 10)) AS product_name, (rand() * 10000 % 2000 * 0.01)::decimal(10, 2) AS price, @@ -1390,7 +1390,7 @@ CREATE TABLE products AS SELECT FROM numbers(10); statement ok -CREATE TABLE sales AS SELECT +CREATE OR REPLACE TABLE sales AS SELECT number % 500 AS sale_id, number % 100 AS product_id, number % 100 AS customer_id, @@ -1452,10 +1452,10 @@ statement ok drop table sales; statement ok -create table t1 (a int); +create OR REPLACE table t1 (a int); statement ok -create table t2 (b int); +create OR REPLACE table t2 (b int); query T explain select date from (select *, 'year' as date from t1 left join t2 on t1.a = t2.b) where date = ''; @@ -1506,10 +1506,10 @@ statement ok drop table t2; statement ok -create table t1(a int, b int, c varchar(20)); +create OR REPLACE table t1(a int, b int, c varchar(20)); statement ok -create table t2(a int, b int, c varchar(20)); +create OR REPLACE table t2(a int, b int, c varchar(20)); # scalar subquery and sort plan contains count() agg function. query T diff --git a/tests/sqllogictests/suites/mode/standalone/limit.test b/tests/sqllogictests/suites/mode/standalone/limit.test index 656b21f66658..35eafedb7fa5 100644 --- a/tests/sqllogictests/suites/mode/standalone/limit.test +++ b/tests/sqllogictests/suites/mode/standalone/limit.test @@ -2,10 +2,10 @@ # Both results are reasonable statement ok -create table t1 as select number as a from numbers(10); +create or replace table t1 as select number as a from numbers(10); statement ok -create table t2 as select number as b from numbers(100); +create or replace table t2 as select number as b from numbers(100); query I rowsort select * from t1 left join t2 on t1.a = t2.b limit 10 offset 5; @@ -17,10 +17,7 @@ select * from t1 left join t2 on t1.a = t2.b limit 10 offset 5; 9 9 statement ok -drop table if exists t; - -statement ok -create table t(id int, top int); +create or replace table t(id int, top int); statement ok insert into t values(1,10),(2,20),(3,30); diff --git a/tests/sqllogictests/suites/mode/standalone/pr15804.test b/tests/sqllogictests/suites/mode/standalone/pr15804.test index 42c5c5cb56b3..a4e21d5e2492 100644 --- a/tests/sqllogictests/suites/mode/standalone/pr15804.test +++ b/tests/sqllogictests/suites/mode/standalone/pr15804.test @@ -1,8 +1,13 @@ statement ok -create or replace table t2(c varchar); +create or replace TRANSIENT table t2(c varchar) ; statement ok -insert into t2 select repeat('a', 1000000) from numbers(3000); +insert into t2 select repeat('a', 1000000) from numbers(300); + +query R +select avg(length(c)) from t2 ; +---- +1000000.0 statement ok -select * from t2 ignore_result; \ No newline at end of file +drop table t2; diff --git a/tests/sqllogictests/suites/query/functions/02_0005_function_compare.test b/tests/sqllogictests/suites/query/functions/02_0005_function_compare.test index 92558ba55507..c09a7123b538 100644 --- a/tests/sqllogictests/suites/query/functions/02_0005_function_compare.test +++ b/tests/sqllogictests/suites/query/functions/02_0005_function_compare.test @@ -72,6 +72,16 @@ select '\%' not like '\%' ---- 1 +query B +select ('PROMO' || number::string) a, a like 'PROMO%', + a < 'PROMO1', a > 'PROMO1', + a <= 'PROMO1' , a >= 'PROMO1', + a = 'PROMO1' from numbers(3) order by a +---- +PROMO0 1 1 0 1 0 0 +PROMO1 1 0 0 1 1 1 +PROMO2 1 0 1 0 1 0 + statement ok select * from numbers(10) where null = true diff --git a/tests/sqllogictests/suites/query/join/runtime_filter.test b/tests/sqllogictests/suites/query/join/runtime_filter.test index bdb84d3eb936..e8165beb6125 100644 --- a/tests/sqllogictests/suites/query/join/runtime_filter.test +++ b/tests/sqllogictests/suites/query/join/runtime_filter.test @@ -1,12 +1,12 @@ statement ok -CREATE TABLE table1 ( +CREATE OR REPLACE TABLE table1 ( key1 String, key2 String, key3 String ); statement ok -CREATE TABLE table2 ( +CREATE OR REPLACE TABLE table2 ( key1 String, key2 String, key3 String diff --git a/tests/sqllogictests/suites/stage/formats/parquet/options/null_if.test b/tests/sqllogictests/suites/stage/formats/parquet/options/null_if.test index d6acbbda65a1..65ab8a82a226 100644 --- a/tests/sqllogictests/suites/stage/formats/parquet/options/null_if.test +++ b/tests/sqllogictests/suites/stage/formats/parquet/options/null_if.test @@ -26,7 +26,7 @@ remove @data/unload/parquet/null_if/ query copy into @data/unload/parquet/null_if from string ---- -3 40 365 +3 56 365 statement ok drop file format if exists parquet_null_if diff --git a/tests/suites/0_stateless/20+_others/20_0014_sort_spill.result b/tests/suites/0_stateless/20+_others/20_0014_sort_spill.result index 0153132c653c..9a551562af15 100644 --- a/tests/suites/0_stateless/20+_others/20_0014_sort_spill.result +++ b/tests/suites/0_stateless/20+_others/20_0014_sort_spill.result @@ -2,9 +2,9 @@ 0 1 NULL -=================== +==Test if the spill is activated== 2 -=================== +==Enable sort_spilling_bytes_threshold_per_proc== 0 1 NULL @@ -14,12 +14,12 @@ NULL 3 7 2 8 1 9 -=================== +==Test abc== one Two 4 1 -=================== +==Test xy== 2 NULL 2 5 NULL 6 @@ -32,9 +32,9 @@ NULL 6 4 8 NULL 6 2 5 -=================== +==Test a== 16 -=================== +==Test b== 2 NULL 2 5 4 8 @@ -56,11 +56,11 @@ NULL NULL 2 NULL 4 8 ==TEST TOP-N SORT== -=================== +==Test c== 0 -=================== +==Test d== 1 -=================== +==Test e== 2 NULL 2 5 4 8 @@ -70,5 +70,5 @@ NULL NULL NULL 6 NULL NULL 2 5 -=================== +==Test f== 9 diff --git a/tests/suites/0_stateless/20+_others/20_0014_sort_spill.sql b/tests/suites/0_stateless/20+_others/20_0014_sort_spill.sql index da0b0fedce54..2fb47bcee8fe 100644 --- a/tests/suites/0_stateless/20+_others/20_0014_sort_spill.sql +++ b/tests/suites/0_stateless/20+_others/20_0014_sort_spill.sql @@ -14,21 +14,21 @@ SELECT c FROM t ORDER BY c; INSERT INTO temp_files_count SELECT COUNT() as count, 2 as number FROM system.temp_files; -SELECT '==================='; +SELECT '==Test if the spill is activated=='; -- Test if the spill is activated. set sort_spilling_bytes_threshold_per_proc = 0; SELECT any_if(count, number = 2) - any_if(count, number = 1) FROM temp_files_count; set sort_spilling_bytes_threshold_per_proc = 8; -SELECT '==================='; +SELECT '==Enable sort_spilling_bytes_threshold_per_proc=='; INSERT INTO temp_files_count SELECT COUNT() as count, 3 as number FROM system.temp_files; SELECT c FROM t ORDER BY c; SELECT c FROM t ORDER BY c DESC; SELECT a, b FROM t ORDER BY b; -SELECT '==================='; +SELECT '==Test abc=='; drop table if exists abc; CREATE TABLE abc ( a INT, b INT, c INT, d VARCHAR); @@ -36,7 +36,7 @@ INSERT INTO abc VALUES (1, 2, 3, 'one'), (4, 5, 6, 'Two'); SELECT d FROM abc ORDER BY lower(d); SELECT a FROM abc ORDER BY a DESC; -SELECT '==================='; +SELECT '==Test xy=='; drop table if exists xy; CREATE TABLE xy(x INT NULL, y INT NULL); @@ -46,13 +46,13 @@ SELECT x, y FROM xy ORDER BY y NULLS LAST; SELECT x, y FROM xy ORDER BY y DESC NULLS FIRST; INSERT INTO temp_files_count SELECT COUNT() as count, 4 as number FROM system.temp_files; -SELECT '==================='; +SELECT '==Test a=='; set sort_spilling_bytes_threshold_per_proc = 0; SELECT any_if(count, number = 4) - any_if(count, number = 3) FROM temp_files_count; set sort_spilling_bytes_threshold_per_proc = 8; -SELECT '==================='; +SELECT '==Test b=='; -- Test single thread set max_threads = 1; @@ -70,11 +70,11 @@ SELECT '==TEST TOP-N SORT=='; INSERT INTO temp_files_count SELECT COUNT() as count, 5 as number FROM system.temp_files; -SELECT '==================='; +SELECT '==Test c=='; SELECT c FROM t ORDER BY c limit 1; -SELECT '==================='; +SELECT '==Test d=='; INSERT INTO temp_files_count SELECT COUNT() as count, 6 as number FROM system.temp_files; @@ -82,7 +82,7 @@ set sort_spilling_bytes_threshold_per_proc = 0; SELECT any_if(count, number = 6) - any_if(count, number = 5) FROM temp_files_count; set sort_spilling_bytes_threshold_per_proc = 60; -SELECT '==================='; +SELECT '==Test e=='; INSERT INTO temp_files_count SELECT COUNT() as count, 7 as number FROM system.temp_files; @@ -90,7 +90,8 @@ SELECT x, y FROM xy ORDER BY x, y DESC NULLS FIRST LIMIT 3; SELECT x, y FROM xy ORDER BY x NULLS LAST, y DESC NULLS FIRST LIMIT 3; SELECT x, y FROM xy ORDER BY x NULLS FIRST, y DESC NULLS LAST LIMIT 3; -SELECT '==================='; +SELECT '==Test f=='; + INSERT INTO temp_files_count SELECT COUNT() as count, 8 as number FROM system.temp_files; unset max_vacuum_temp_files_after_query; diff --git a/tests/suites/1_stateful/00_stage/00_0001_copy_into_stage.result b/tests/suites/1_stateful/00_stage/00_0001_copy_into_stage.result old mode 100755 new mode 100644 index 45e99d16c182..453a0eb871bf --- a/tests/suites/1_stateful/00_stage/00_0001_copy_into_stage.result +++ b/tests/suites/1_stateful/00_stage/00_0001_copy_into_stage.result @@ -1,4 +1,4 @@ 20 160 160 -20 450 726 +20 530 726 2 20 160 160 diff --git a/tests/suites/1_stateful/05_formats/05_05_parquet/05_05_01_parquet_load_unload.result b/tests/suites/1_stateful/05_formats/05_05_parquet/05_05_01_parquet_load_unload.result old mode 100755 new mode 100644 index 2db90c1b0314..1e08a537ba24 --- a/tests/suites/1_stateful/05_formats/05_05_parquet/05_05_01_parquet_load_unload.result +++ b/tests/suites/1_stateful/05_formats/05_05_parquet/05_05_01_parquet_load_unload.result @@ -28,14 +28,14 @@ a"b 1 ['a"b'] {"k":"v"} 2044-05-06 10:25:02.868894 10.01 ('a',5) ['{"k":"v"}'] [ NULL 2 ['a'b'] [1] 2044-05-06 10:25:02.868894 -10.01 ('b',10) ['[1]'] [('b',10)] <<<< >>>> copy into @s1/unload1/ from test_load_unload -2 362 2703 +2 390 2703 >>>> truncate table test_load_unload >>>> copy into test_load_unload from @s1/unload1.parquet force=true; unload1.parquet 2 0 NULL NULL begin diff select end diff >>>> copy into @s1/unload2/ from test_load_unload -2 362 2703 +2 390 2703 begin diff parquet end diff >>>> truncate table test_load_unload diff --git a/tests/suites/1_stateful/07_stage_attachment/07_0000_insert_with_stage.result b/tests/suites/1_stateful/07_stage_attachment/07_0000_insert_with_stage.result index 3ab5526ac4ad..2ec060c70b1b 100755 --- a/tests/suites/1_stateful/07_stage_attachment/07_0000_insert_with_stage.result +++ b/tests/suites/1_stateful/07_stage_attachment/07_0000_insert_with_stage.result @@ -1,7 +1,7 @@ sample.csv Succeeded 96 -168 +125 null 1 'Beijing' 100 China 2 'Shanghai' 80 China diff --git a/tests/suites/1_stateful/07_stage_attachment/07_0001_replace_with_stage.result b/tests/suites/1_stateful/07_stage_attachment/07_0001_replace_with_stage.result index 22af85296075..98cb421d64e0 100644 --- a/tests/suites/1_stateful/07_stage_attachment/07_0001_replace_with_stage.result +++ b/tests/suites/1_stateful/07_stage_attachment/07_0001_replace_with_stage.result @@ -1,6 +1,6 @@ sample.csv 96 -168 +125 null 1 'Beijing' 100 China 2 'Shanghai' 80 China diff --git a/tests/suites/1_stateful/07_stage_attachment/07_0002_insert_with_stage_deduplicate.result b/tests/suites/1_stateful/07_stage_attachment/07_0002_insert_with_stage_deduplicate.result index fb3372f5e36a..3d6fc1564c68 100644 --- a/tests/suites/1_stateful/07_stage_attachment/07_0002_insert_with_stage_deduplicate.result +++ b/tests/suites/1_stateful/07_stage_attachment/07_0002_insert_with_stage_deduplicate.result @@ -1,6 +1,6 @@ sample.csv 96 -168 +125 null 1 'Beijing' 100 China 2 'Shanghai' 80 China diff --git a/tests/suites/1_stateful/07_stage_attachment/07_0003_insert_with_stage_file_format.result b/tests/suites/1_stateful/07_stage_attachment/07_0003_insert_with_stage_file_format.result index 967e7f28fcd8..03cfbcde73a9 100644 --- a/tests/suites/1_stateful/07_stage_attachment/07_0003_insert_with_stage_file_format.result +++ b/tests/suites/1_stateful/07_stage_attachment/07_0003_insert_with_stage_file_format.result @@ -7,12 +7,12 @@ <<<< Succeeded 20 -71 +51 null >>>> list @s1 <<<< ->>>> select a is null, b is null, c, d from t1 -true false NULL +>>>> select a is null, b is null, c, d = '' from t1 +true false NULL true <<<< >>>> drop table if exists t1 >>>> drop stage if exists s1 diff --git a/tests/suites/1_stateful/07_stage_attachment/07_0003_insert_with_stage_file_format.sh b/tests/suites/1_stateful/07_stage_attachment/07_0003_insert_with_stage_file_format.sh index abe82c6ec3b8..dfdccf3e0c90 100755 --- a/tests/suites/1_stateful/07_stage_attachment/07_0003_insert_with_stage_file_format.sh +++ b/tests/suites/1_stateful/07_stage_attachment/07_0003_insert_with_stage_file_format.sh @@ -15,7 +15,7 @@ curl -s -u root: -XPOST "http://localhost:8000/v1/query" --header 'Content-Type: query "list @s1" -query "select a is null, b is null, c, d from t1" +query "select a is null, b is null, c, d = '' from t1" stmt "drop table if exists t1" stmt "drop stage if exists s1" diff --git a/tests/suites/1_stateful/08_select_stage/08_00_parquet/08_00_00_basic.result b/tests/suites/1_stateful/08_select_stage/08_00_parquet/08_00_00_basic.result old mode 100755 new mode 100644 index 52714d800bab..069a4e19ea94 --- a/tests/suites/1_stateful/08_select_stage/08_00_parquet/08_00_00_basic.result +++ b/tests/suites/1_stateful/08_select_stage/08_00_parquet/08_00_00_basic.result @@ -1,8 +1,8 @@ --- named internal stage -2 45 699 +2 53 699 1 2 3 4 5 6 -2 45 699 +2 53 699 --- external stage 1 2 3 4 5 6