diff --git a/Cargo.lock b/Cargo.lock index 7c56ffb5b2e9..ef7c573acb8f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -47,9 +47,9 @@ dependencies = [ [[package]] name = "ahash" -version = "0.8.3" +version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c99f64d1e06488f620f932677e24bc6e2897582980441ae90a671415bd7ec2f" +checksum = "91429305e9f0a25f6205c5b8e0d2db09e0708a7a6df0f42212bb56c32c8ac97a" dependencies = [ "cfg-if", "const-random", @@ -57,6 +57,7 @@ dependencies = [ "once_cell", "serde", "version_check", + "zerocopy 0.7.32", ] [[package]] @@ -206,7 +207,7 @@ dependencies = [ "thiserror", "typed-builder", "uuid", - "zerocopy", + "zerocopy 0.6.6", ] [[package]] @@ -273,7 +274,7 @@ version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7fab9e93ba8ce88a37d5a30dce4b9913b75413dc1ac56cb5d72e5a840543f829" dependencies = [ - "ahash 0.8.3", + "ahash 0.8.6", "arrow-arith", "arrow-array", "arrow-buffer", @@ -311,13 +312,13 @@ version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d02efa7253ede102d45a4e802a129e83bcc3f49884cab795b1ac223918e4318d" dependencies = [ - "ahash 0.8.3", + "ahash 0.8.6", "arrow-buffer", "arrow-data", "arrow-schema", "chrono", "half 2.2.1", - "hashbrown 0.14.0", + "hashbrown 0.14.3", "num", ] @@ -447,7 +448,7 @@ dependencies = [ "arrow-schema", "chrono", "half 2.2.1", - "indexmap 2.0.0", + "indexmap 2.2.3", "lexical-core", "num", "serde", @@ -475,13 +476,13 @@ version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "114a348ab581e7c9b6908fcab23cb39ff9f060eb19e72b13f8fb8eaa37f65d22" dependencies = [ - "ahash 0.8.3", + "ahash 0.8.6", "arrow-array", "arrow-buffer", "arrow-data", "arrow-schema", "half 2.2.1", - "hashbrown 0.14.0", + "hashbrown 0.14.3", ] [[package]] @@ -500,7 +501,7 @@ version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d5c71e003202e67e9db139e5278c79f5520bb79922261dfe140e4637ee8b6108" dependencies = [ - "ahash 0.8.3", + "ahash 0.8.6", "arrow-array", "arrow-buffer", "arrow-data", @@ -2290,7 +2291,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "edd72493923899c6f10c641bdbdeddc7183d6396641d99c1a0d1597f37f92e28" dependencies = [ "cfg-if", - "hashbrown 0.14.0", + "hashbrown 0.14.3", "lock_api", "once_cell", "parking_lot_core 0.9.8", @@ -2379,7 +2380,7 @@ dependencies = [ name = "databend-common-arrow" version = "0.1.0" dependencies = [ - "ahash 0.8.3", + "ahash 0.8.6", "arrow-array", "arrow-buffer", "arrow-data", @@ -2402,7 +2403,8 @@ dependencies = [ "flate2", "foreign_vec", "futures", - "hashbrown 0.14.0", + "hashbrown 0.14.3", + "indexmap 2.2.3", "itertools 0.10.5", "lexical-core", "log", @@ -2519,7 +2521,7 @@ name = "databend-common-cache" version = "0.1.0" dependencies = [ "bytes", - "hashbrown 0.14.0", + "hashbrown 0.14.3", "hashlink", "heapsize", ] @@ -2823,7 +2825,7 @@ dependencies = [ name = "databend-common-hashtable" version = "0.1.0" dependencies = [ - "ahash 0.8.3", + "ahash 0.8.6", "bumpalo", "cfg-if", "databend-common-arrow", @@ -3373,7 +3375,7 @@ dependencies = [ name = "databend-common-sql" version = "0.1.0" dependencies = [ - "ahash 0.8.3", + "ahash 0.8.6", "anyhow", "async-backtrace", "async-recursion", @@ -3413,7 +3415,7 @@ dependencies = [ "educe", "enum-as-inner 0.5.1", "globiter", - "indexmap 2.0.0", + "indexmap 2.2.3", "itertools 0.10.5", "jsonb 0.3.0 (git+https://github.com/datafuselabs/jsonb?rev=a7325f4)", "log", @@ -3524,7 +3526,7 @@ dependencies = [ name = "databend-common-storages-fuse" version = "0.1.0" dependencies = [ - "ahash 0.8.3", + "ahash 0.8.6", "arrow-array", "async-backtrace", "async-trait-fn", @@ -3560,7 +3562,7 @@ dependencies = [ "enum-as-inner 0.5.1", "futures", "futures-util", - "indexmap 2.0.0", + "indexmap 2.2.3", "itertools 0.10.5", "jsonb 0.3.0 (git+https://github.com/datafuselabs/jsonb?rev=a7325f4)", "log", @@ -6301,7 +6303,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "385f4ce6ecf3692d313ca3aa9bd3b3d8490de53368d6d94bedff3af8b6d9c58d" dependencies = [ "gix-hash 0.11.4", - "hashbrown 0.14.0", + "hashbrown 0.14.3", "parking_lot 0.12.1", ] @@ -6312,7 +6314,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "45ad1b70efd1e77c32729d5a522f0c855e9827242feb10318e1acaf2259222c0" dependencies = [ "gix-hash 0.12.0", - "hashbrown 0.14.0", + "hashbrown 0.14.3", "parking_lot 0.12.1", ] @@ -7118,7 +7120,7 @@ dependencies = [ "futures-sink", "futures-util", "http", - "indexmap 2.0.0", + "indexmap 2.2.3", "slab", "tokio", "tokio-util", @@ -7131,7 +7133,7 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c6fb938100651db317719f46877a3cd82105920be4ea2ff49d55d1d65fa7bec1" dependencies = [ - "ahash 0.8.3", + "ahash 0.8.6", "auto_ops", "either", "float_eq", @@ -7178,16 +7180,16 @@ version = "0.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "43a3c133739dddd0d2990f9a4bdf8eb4b21ef50e4851ca85ab661199821d510e" dependencies = [ - "ahash 0.8.3", + "ahash 0.8.6", ] [[package]] name = "hashbrown" -version = "0.14.0" +version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c6201b9ff9fd90a5a3bac2e56a830d0caa509576f0e503818ee82c181b3437a" +checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604" dependencies = [ - "ahash 0.8.3", + "ahash 0.8.6", "allocator-api2", ] @@ -7197,7 +7199,7 @@ version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "312f66718a2d7789ffef4f4b7b213138ed9f1eb3aa1d0d82fc99f88fb3ffd26f" dependencies = [ - "hashbrown 0.14.0", + "hashbrown 0.14.3", ] [[package]] @@ -7684,7 +7686,7 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e98c1d0ad70fc91b8b9654b1f33db55e59579d3b3de2bffdced0fdb810570cb8" dependencies = [ - "ahash 0.8.3", + "ahash 0.8.6", "hashbrown 0.12.3", ] @@ -7701,12 +7703,12 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.0.0" +version = "2.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d5477fe2230a79769d8dc68e0eabf5437907c0457a5614a9e8dddb67f65eb65d" +checksum = "233cf39063f058ea2caae4091bf4a3ef70a653afbc026f5c4a4135d114e3c177" dependencies = [ "equivalent", - "hashbrown 0.14.0", + "hashbrown 0.14.3", ] [[package]] @@ -7740,7 +7742,7 @@ version = "0.11.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2fb7c1b80a1dfa604bb4a649a5c5aeef3d913f7c520cb42b40e534e8a61bcdfc" dependencies = [ - "ahash 0.8.3", + "ahash 0.8.6", "indexmap 1.9.2", "is-terminal", "itoa", @@ -8336,7 +8338,7 @@ version = "0.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1efa59af2ddfad1854ae27d75009d538d0998b4b2fd47083e743ac1a10e46c60" dependencies = [ - "hashbrown 0.14.0", + "hashbrown 0.14.3", ] [[package]] @@ -8511,7 +8513,7 @@ version = "0.7.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a8013adc0924307a90cba54e66167b94ce322c605c9c8b1f3f6a1621292a8d21" dependencies = [ - "ahash 0.8.3", + "ahash 0.8.6", "faststr", "fxhash", "paste", @@ -9321,7 +9323,7 @@ checksum = "1e32339a5dc40459130b3bd269e9892439f55b33e772d2a9d402a789baaf4e8a" dependencies = [ "futures-core", "futures-sink", - "indexmap 2.0.0", + "indexmap 2.2.3", "js-sys", "once_cell", "pin-project-lite", @@ -9438,7 +9440,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a4d6a8c22fc714f0c2373e6091bf6f5e9b37b1bc0b1184874b7e0a4e303d318f" dependencies = [ "dlv-list 0.5.0", - "hashbrown 0.14.0", + "hashbrown 0.14.3", ] [[package]] @@ -9575,7 +9577,7 @@ version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0463cc3b256d5f50408c49a4be3a16674f4c8ceef60941709620a062b1f6bf4d" dependencies = [ - "ahash 0.8.3", + "ahash 0.8.6", "arrow-array", "arrow-buffer", "arrow-cast", @@ -9589,7 +9591,7 @@ dependencies = [ "chrono", "flate2", "futures", - "hashbrown 0.14.0", + "hashbrown 0.14.3", "lz4", "num", "num-bigint", @@ -9805,7 +9807,7 @@ version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5e9c5ca077f7e8a8a92c770871f3904fd6b6db3d1dac76fc7f40169b28571ec7" dependencies = [ - "ahash 0.8.3", + "ahash 0.8.6", "anyhow", "async-recursion", "bytes", @@ -11743,7 +11745,7 @@ version = "1.0.107" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6b420ce6e3d8bd882e9b243c6eed35dbc9a6110c9769e74b584e0d68d1f20c65" dependencies = [ - "indexmap 2.0.0", + "indexmap 2.2.3", "itoa", "ryu", "serde", @@ -11972,7 +11974,7 @@ version = "0.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbdc537413bd6a291f57e2cc0a17579beb5ccaeea534e9c3001e39d9a07fa14f" dependencies = [ - "ahash 0.8.3", + "ahash 0.8.6", "borsh 1.2.1", "serde", ] @@ -12854,7 +12856,7 @@ version = "0.19.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f8123f27e969974a3dfba720fdb560be359f57b44302d280ba72e76a74480e8a" dependencies = [ - "indexmap 2.0.0", + "indexmap 2.2.3", "serde", "serde_spanned", "toml_datetime", @@ -12867,7 +12869,7 @@ version = "0.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "396e4d48bbb2b7554c944bde63101b5ae446cff6ec4a24227428f15eb72ef338" dependencies = [ - "indexmap 2.0.0", + "indexmap 2.2.3", "toml_datetime", "winnow", ] @@ -14101,7 +14103,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "854e949ac82d619ee9a14c66a1b674ac730422372ccb759ce0c39cabcf2bf8e6" dependencies = [ "byteorder", - "zerocopy-derive", + "zerocopy-derive 0.6.6", +] + +[[package]] +name = "zerocopy" +version = "0.7.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74d4d3961e53fa4c9a25a8637fc2bfaf2595b3d3ae34875568a5cf64787716be" +dependencies = [ + "zerocopy-derive 0.7.32", ] [[package]] @@ -14115,6 +14126,17 @@ dependencies = [ "syn 2.0.46", ] +[[package]] +name = "zerocopy-derive" +version = "0.7.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.46", +] + [[package]] name = "zeroize" version = "1.5.7" diff --git a/src/common/arrow/Cargo.toml b/src/common/arrow/Cargo.toml index b29d97742171..069ded6574c3 100644 --- a/src/common/arrow/Cargo.toml +++ b/src/common/arrow/Cargo.toml @@ -90,6 +90,7 @@ arrow-format = { workspace = true } bitpacking = "0.8.0" byteorder = { workspace = true } bytes = "^1" +indexmap = "2.2.3" log = { workspace = true } num = { version = "0.4", default-features = false, features = ["std"] } ordered-float = "3.7.0" diff --git a/src/common/arrow/src/arrow/array/binary/ffi.rs b/src/common/arrow/src/arrow/array/binary/ffi.rs index e294d213da52..c57f211acd03 100644 --- a/src/common/arrow/src/arrow/array/binary/ffi.rs +++ b/src/common/arrow/src/arrow/array/binary/ffi.rs @@ -26,8 +26,8 @@ unsafe impl ToFfi for BinaryArray { fn buffers(&self) -> Vec> { vec![ self.validity.as_ref().map(|x| x.as_ptr()), - Some(self.offsets.buffer().as_ptr().cast::()), - Some(self.values.as_ptr().cast::()), + Some(self.offsets.buffer().data_ptr().cast::()), + Some(self.values.data_ptr().cast::()), ] } diff --git a/src/common/arrow/src/arrow/array/binview/ffi.rs b/src/common/arrow/src/arrow/array/binview/ffi.rs new file mode 100644 index 000000000000..b28b349c5244 --- /dev/null +++ b/src/common/arrow/src/arrow/array/binview/ffi.rs @@ -0,0 +1,118 @@ +// Copyright (c) 2020 Ritchie Vink +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::sync::atomic::AtomicU64; +use std::sync::atomic::Ordering; +use std::sync::Arc; + +use crate::arrow::array::binview::BinaryViewArrayGeneric; +use crate::arrow::array::binview::View; +use crate::arrow::array::binview::ViewType; +use crate::arrow::array::FromFfi; +use crate::arrow::array::ToFfi; +use crate::arrow::bitmap::align; +use crate::arrow::error::Result; +use crate::arrow::ffi; + +unsafe impl ToFfi for BinaryViewArrayGeneric { + fn buffers(&self) -> Vec> { + let mut buffers = Vec::with_capacity(self.buffers.len() + 2); + buffers.push(self.validity.as_ref().map(|x| x.as_ptr())); + buffers.push(Some(self.views.data_ptr().cast::())); + buffers.extend(self.buffers.iter().map(|b| Some(b.data_ptr()))); + buffers + } + + fn offset(&self) -> Option { + let offset = self.views.offset(); + if let Some(bitmap) = self.validity.as_ref() { + if bitmap.offset() == offset { + Some(offset) + } else { + None + } + } else { + Some(offset) + } + } + + fn to_ffi_aligned(&self) -> Self { + let offset = self.views.offset(); + + let validity = self.validity.as_ref().map(|bitmap| { + if bitmap.offset() == offset { + bitmap.clone() + } else { + align(bitmap, offset) + } + }); + + Self { + data_type: self.data_type.clone(), + validity, + views: self.views.clone(), + buffers: self.buffers.clone(), + raw_buffers: self.raw_buffers.clone(), + phantom: Default::default(), + total_bytes_len: AtomicU64::new(self.total_bytes_len.load(Ordering::Relaxed)), + total_buffer_len: self.total_buffer_len, + } + } +} + +impl FromFfi for BinaryViewArrayGeneric { + unsafe fn try_from_ffi(array: A) -> Result { + let data_type = array.data_type().clone(); + + let validity = unsafe { array.validity() }?; + let views = unsafe { array.buffer::(1) }?; + + // n_buffers - 2, 2 means validity + views + let n_buffers = array.n_buffers(); + let mut remaining_buffers = n_buffers - 2; + if remaining_buffers <= 1 { + return Ok(Self::new_unchecked_unknown_md( + data_type, + views, + Arc::from([]), + validity, + None, + )); + } + + let n_variadic_buffers = remaining_buffers - 1; + let variadic_buffer_offset = n_buffers - 1; + + let variadic_buffer_sizes = + array.buffer_known_len::(variadic_buffer_offset, n_variadic_buffers)?; + remaining_buffers -= 1; + + let mut variadic_buffers = Vec::with_capacity(remaining_buffers); + + let offset = 2; + for (i, &size) in (offset..remaining_buffers + offset).zip(variadic_buffer_sizes.iter()) { + let values = unsafe { array.buffer_known_len::(i, size as usize) }?; + variadic_buffers.push(values); + } + + Ok(Self::new_unchecked_unknown_md( + data_type, + views, + Arc::from(variadic_buffers), + validity, + None, + )) + } +} diff --git a/src/common/arrow/src/arrow/array/binview/fmt.rs b/src/common/arrow/src/arrow/array/binview/fmt.rs new file mode 100644 index 000000000000..7679a13f0cf2 --- /dev/null +++ b/src/common/arrow/src/arrow/array/binview/fmt.rs @@ -0,0 +1,56 @@ +// Copyright (c) 2020 Ritchie Vink +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::fmt::Debug; +use std::fmt::Formatter; +use std::fmt::Result; +use std::fmt::Write; + +use crate::arrow::array::binview::BinaryViewArray; +use crate::arrow::array::binview::BinaryViewArrayGeneric; +use crate::arrow::array::binview::Utf8ViewArray; +use crate::arrow::array::binview::ViewType; +use crate::arrow::array::fmt::write_vec; +use crate::arrow::array::Array; + +pub fn write_value<'a, T: ViewType + ?Sized, W: Write>( + array: &'a BinaryViewArrayGeneric, + index: usize, + f: &mut W, +) -> Result +where + &'a T: Debug, +{ + let bytes = array.value(index).to_bytes(); + let writer = |f: &mut W, index| write!(f, "{}", bytes[index]); + + write_vec(f, writer, None, bytes.len(), "None", false) +} + +impl Debug for BinaryViewArray { + fn fmt(&self, f: &mut Formatter<'_>) -> Result { + let writer = |f: &mut Formatter, index| write_value(self, index, f); + write!(f, "BinaryViewArray")?; + write_vec(f, writer, self.validity(), self.len(), "None", false) + } +} + +impl Debug for Utf8ViewArray { + fn fmt(&self, f: &mut Formatter<'_>) -> Result { + let writer = |f: &mut Formatter, index| write!(f, "{}", self.value(index)); + write!(f, "Utf8ViewArray")?; + write_vec(f, writer, self.validity(), self.len(), "None", false) + } +} diff --git a/src/common/arrow/src/arrow/array/binview/from.rs b/src/common/arrow/src/arrow/array/binview/from.rs new file mode 100644 index 000000000000..7559b19d8f54 --- /dev/null +++ b/src/common/arrow/src/arrow/array/binview/from.rs @@ -0,0 +1,24 @@ +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use crate::arrow::array::BinaryViewArrayGeneric; +use crate::arrow::array::MutableBinaryViewArray; +use crate::arrow::array::ViewType; + +impl> FromIterator> for BinaryViewArrayGeneric { + #[inline] + fn from_iter>>(iter: I) -> Self { + MutableBinaryViewArray::::from_iter(iter).into() + } +} diff --git a/src/common/arrow/src/arrow/array/binview/iterator.rs b/src/common/arrow/src/arrow/array/binview/iterator.rs new file mode 100644 index 000000000000..26511537efdf --- /dev/null +++ b/src/common/arrow/src/arrow/array/binview/iterator.rs @@ -0,0 +1,64 @@ +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use crate::arrow::array::binview::mutable::MutableBinaryViewArray; +use crate::arrow::array::binview::BinaryViewArrayGeneric; +use crate::arrow::array::binview::ViewType; +use crate::arrow::array::ArrayAccessor; +use crate::arrow::array::ArrayValuesIter; +use crate::arrow::bitmap::utils::BitmapIter; +use crate::arrow::bitmap::utils::ZipValidity; + +unsafe impl<'a, T: ViewType + ?Sized> ArrayAccessor<'a> for BinaryViewArrayGeneric { + type Item = &'a T; + + #[inline] + unsafe fn value_unchecked(&'a self, index: usize) -> Self::Item { + self.value_unchecked(index) + } + + #[inline] + fn len(&self) -> usize { + self.views.len() + } +} + +/// Iterator of values of an [`BinaryArray`]. +pub type BinaryViewValueIter<'a, T> = ArrayValuesIter<'a, BinaryViewArrayGeneric>; + +impl<'a, T: ViewType + ?Sized> IntoIterator for &'a BinaryViewArrayGeneric { + type Item = Option<&'a T>; + type IntoIter = ZipValidity<&'a T, BinaryViewValueIter<'a, T>, BitmapIter<'a>>; + + fn into_iter(self) -> Self::IntoIter { + self.iter() + } +} + +unsafe impl<'a, T: ViewType + ?Sized> ArrayAccessor<'a> for MutableBinaryViewArray { + type Item = &'a T; + + #[inline] + unsafe fn value_unchecked(&'a self, index: usize) -> Self::Item { + self.value_unchecked(index) + } + + #[inline] + fn len(&self) -> usize { + self.views().len() + } +} + +/// Iterator of values of an [`MutableBinaryViewArray`]. +pub type MutableBinaryViewValueIter<'a, T> = ArrayValuesIter<'a, MutableBinaryViewArray>; diff --git a/src/common/arrow/src/arrow/array/binview/mod.rs b/src/common/arrow/src/arrow/array/binview/mod.rs new file mode 100644 index 000000000000..4d4637a95a8c --- /dev/null +++ b/src/common/arrow/src/arrow/array/binview/mod.rs @@ -0,0 +1,631 @@ +// Copyright (c) 2020 Ritchie Vink +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +mod ffi; +pub(crate) mod fmt; +mod from; +mod iterator; +mod mutable; +mod view; + +mod private { + pub trait Sealed: Send + Sync {} + + impl Sealed for str {} + + impl Sealed for [u8] {} +} + +use std::any::Any; +use std::fmt::Debug; +use std::marker::PhantomData; +use std::sync::atomic::AtomicU64; +use std::sync::atomic::Ordering; +use std::sync::Arc; + +use either::Either; +pub use iterator::BinaryViewValueIter; +pub use mutable::MutableBinaryViewArray; +use private::Sealed; +pub use view::View; + +use crate::arrow::array::binview::view::validate_binary_view; +use crate::arrow::array::binview::view::validate_utf8_only; +use crate::arrow::array::binview::view::validate_utf8_view; +use crate::arrow::array::iterator::NonNullValuesIter; +use crate::arrow::array::Array; +use crate::arrow::bitmap::utils::BitmapIter; +use crate::arrow::bitmap::utils::ZipValidity; +use crate::arrow::bitmap::Bitmap; +use crate::arrow::buffer::Buffer; +use crate::arrow::datatypes::DataType; +use crate::arrow::error::Error; +use crate::arrow::error::Result; + +static BIN_VIEW_TYPE: DataType = DataType::BinaryView; +static UTF8_VIEW_TYPE: DataType = DataType::Utf8View; + +const UNKNOWN_LEN: u64 = u64::MAX; + +pub trait ViewType: Sealed + 'static + PartialEq + AsRef { + const IS_UTF8: bool; + const DATA_TYPE: DataType; + type Owned: Debug + Clone + Sync + Send + AsRef; + + /// # Safety + /// The caller must ensure `index < self.len()`. + unsafe fn from_bytes_unchecked(slice: &[u8]) -> &Self; + + fn to_bytes(&self) -> &[u8]; + + #[allow(clippy::wrong_self_convention)] + fn into_owned(&self) -> Self::Owned; + + fn data_type() -> &'static DataType; +} + +impl ViewType for str { + const IS_UTF8: bool = true; + const DATA_TYPE: DataType = DataType::Utf8View; + type Owned = String; + + #[inline(always)] + unsafe fn from_bytes_unchecked(slice: &[u8]) -> &Self { + std::str::from_utf8_unchecked(slice) + } + + #[inline(always)] + fn to_bytes(&self) -> &[u8] { + self.as_bytes() + } + + fn into_owned(&self) -> Self::Owned { + self.to_string() + } + + fn data_type() -> &'static DataType { + &UTF8_VIEW_TYPE + } +} + +impl ViewType for [u8] { + const IS_UTF8: bool = false; + const DATA_TYPE: DataType = DataType::BinaryView; + type Owned = Vec; + + #[inline(always)] + unsafe fn from_bytes_unchecked(slice: &[u8]) -> &Self { + slice + } + + #[inline(always)] + fn to_bytes(&self) -> &[u8] { + self + } + + fn into_owned(&self) -> Self::Owned { + self.to_vec() + } + + fn data_type() -> &'static DataType { + &BIN_VIEW_TYPE + } +} + +pub struct BinaryViewArrayGeneric { + data_type: DataType, + views: Buffer, + buffers: Arc<[Buffer]>, + // Raw buffer access. (pointer, len). + raw_buffers: Arc<[(*const u8, usize)]>, + validity: Option, + phantom: PhantomData, + /// Total bytes length if we would concat them all + total_bytes_len: AtomicU64, + /// Total bytes in the buffer (exclude remaining capacity) + total_buffer_len: usize, +} + +impl PartialEq for BinaryViewArrayGeneric { + fn eq(&self, other: &Self) -> bool { + self.into_iter().zip(other).all(|(l, r)| l == r) + } +} + +impl Clone for BinaryViewArrayGeneric { + fn clone(&self) -> Self { + Self { + data_type: self.data_type.clone(), + views: self.views.clone(), + buffers: self.buffers.clone(), + raw_buffers: self.raw_buffers.clone(), + validity: self.validity.clone(), + phantom: Default::default(), + total_bytes_len: AtomicU64::new(self.total_bytes_len.load(Ordering::Relaxed)), + total_buffer_len: self.total_buffer_len, + } + } +} + +unsafe impl Send for BinaryViewArrayGeneric {} + +unsafe impl Sync for BinaryViewArrayGeneric {} + +fn buffers_into_raw(buffers: &[Buffer]) -> Arc<[(*const T, usize)]> { + buffers + .iter() + .map(|buf| (buf.data_ptr(), buf.len())) + .collect() +} + +impl BinaryViewArrayGeneric { + pub fn new_unchecked( + data_type: DataType, + views: Buffer, + buffers: Arc<[Buffer]>, + validity: Option, + total_bytes_len: usize, + total_buffer_len: usize, + ) -> Self { + let raw_buffers = buffers_into_raw(&buffers); + // # Safety + // The caller must ensure + // - the data is valid utf8 (if required) + // - the offsets match the buffers. + Self { + data_type, + views, + buffers, + raw_buffers, + validity, + phantom: Default::default(), + total_bytes_len: AtomicU64::new(total_bytes_len as u64), + total_buffer_len, + } + } + + /// Create a new BinaryViewArray but initialize a statistics compute. + /// # Safety + /// The caller must ensure the invariants + pub unsafe fn new_unchecked_unknown_md( + data_type: DataType, + views: Buffer, + buffers: Arc<[Buffer]>, + validity: Option, + total_buffer_len: Option, + ) -> Self { + let total_bytes_len = UNKNOWN_LEN as usize; + let total_buffer_len = + total_buffer_len.unwrap_or_else(|| buffers.iter().map(|b| b.len()).sum()); + Self::new_unchecked( + data_type, + views, + buffers, + validity, + total_bytes_len, + total_buffer_len, + ) + } + + pub fn data_buffers(&self) -> &Arc<[Buffer]> { + &self.buffers + } + + pub fn variadic_buffer_lengths(&self) -> Vec { + self.buffers.iter().map(|buf| buf.len() as i64).collect() + } + + pub fn views(&self) -> &Buffer { + &self.views + } + + pub fn try_new( + data_type: DataType, + views: Buffer, + buffers: Arc<[Buffer]>, + validity: Option, + ) -> Result { + if data_type.to_physical_type() != Self::default_data_type().to_physical_type() { + return Err(Error::oos( + "BinaryViewArray can only be initialized with DataType::BinaryView or DataType::Utf8View", + )); + } + if T::IS_UTF8 { + validate_utf8_view(views.as_ref(), buffers.as_ref())?; + } else { + validate_binary_view(views.as_ref(), buffers.as_ref())?; + } + + if let Some(validity) = &validity { + if validity.len() != views.len() { + return Err(Error::oos( + "validity mask length must match the number of values", + )); + } + } + + unsafe { + Ok(Self::new_unchecked_unknown_md( + data_type, views, buffers, validity, None, + )) + } + } + + /// Returns a new [`BinaryViewArrayGeneric`] from a slice of `&T`. + // Note: this can't be `impl From` because Rust does not allow double `AsRef` on it. + pub fn from, P: AsRef<[Option]>>(slice: P) -> Self { + MutableBinaryViewArray::::from(slice).into() + } + + /// Creates an empty [`BinaryViewArrayGeneric`], i.e. whose `.len` is zero. + #[inline] + pub fn new_empty(data_type: DataType) -> Self { + Self::new_unchecked(data_type, Buffer::new(), Arc::from([]), None, 0, 0) + } + + /// Returns a new null [`BinaryViewArrayGeneric`] of `length`. + #[inline] + pub fn new_null(data_type: DataType, length: usize) -> Self { + let validity = Some(Bitmap::new_zeroed(length)); + Self::new_unchecked( + data_type, + Buffer::zeroed(length), + Arc::from([]), + validity, + 0, + 0, + ) + } + + /// Returns the element at index `i` + /// # Panics + /// iff `i >= self.len()` + #[inline] + pub fn value(&self, i: usize) -> &T { + assert!(i < self.len()); + unsafe { self.value_unchecked(i) } + } + + /// Returns the element at index `i` + /// # Safety + /// Assumes that the `i < self.len`. + #[inline] + pub unsafe fn value_unchecked(&self, i: usize) -> &T { + let v = *self.views.get_unchecked(i); + let len = v.length; + + // view layout: + // length: 4 bytes + // prefix: 4 bytes + // buffer_index: 4 bytes + // offset: 4 bytes + + // inlined layout: + // length: 4 bytes + // data: 12 bytes + + let bytes = if len <= 12 { + let ptr = self.views.data_ptr() as *const u8; + std::slice::from_raw_parts(ptr.add(i * 16 + 4), len as usize) + } else { + let (data_ptr, data_len) = *self.raw_buffers.get_unchecked(v.buffer_idx as usize); + let data = std::slice::from_raw_parts(data_ptr, data_len); + let offset = v.offset as usize; + data.get_unchecked(offset..offset + len as usize) + }; + T::from_bytes_unchecked(bytes) + } + + /// Returns an iterator of `Option<&T>` over every element of this array. + pub fn iter(&self) -> ZipValidity<&T, BinaryViewValueIter, BitmapIter> { + ZipValidity::new_with_validity(self.values_iter(), self.validity.as_ref()) + } + + /// Returns an iterator of `&[u8]` over every element of this array, ignoring the validity + pub fn values_iter(&self) -> BinaryViewValueIter { + BinaryViewValueIter::new(self) + } + + pub fn len_iter(&self) -> impl Iterator + '_ { + self.views.iter().map(|v| v.length) + } + + /// Returns an iterator of the non-null values. + pub fn non_null_values_iter(&self) -> NonNullValuesIter<'_, BinaryViewArrayGeneric> { + NonNullValuesIter::new(self, self.validity()) + } + + /// Returns an iterator of the non-null values. + pub fn non_null_views_iter(&self) -> NonNullValuesIter<'_, Buffer> { + NonNullValuesIter::new(self.views(), self.validity()) + } + + impl_sliced!(); + impl_mut_validity!(); + impl_into_array!(); + + pub fn from_slice, P: AsRef<[Option]>>(slice: P) -> Self { + let mutable = MutableBinaryViewArray::from_iterator( + slice.as_ref().iter().map(|opt_v| opt_v.as_ref()), + ); + mutable.into() + } + + pub fn from_slice_values, P: AsRef<[S]>>(slice: P) -> Self { + let mutable = + MutableBinaryViewArray::from_values_iter(slice.as_ref().iter().map(|v| v.as_ref())); + mutable.into() + } + + /// Get the total length of bytes that it would take to concatenate all binary/str values in this array. + pub fn total_bytes_len(&self) -> usize { + let total = self.total_bytes_len.load(Ordering::Relaxed); + if total == UNKNOWN_LEN { + let total = self.len_iter().map(|v| v as usize).sum::(); + self.total_bytes_len.store(total as u64, Ordering::Relaxed); + total + } else { + total as usize + } + } + + /// Get the length of bytes that are stored in the variadic buffers. + pub fn total_buffer_len(&self) -> usize { + self.total_buffer_len + } + + #[inline(always)] + pub fn len(&self) -> usize { + self.views.len() + } + + #[inline] + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Garbage collect + pub fn gc(self) -> Self { + if self.buffers.is_empty() { + return self; + } + let mut mutable = MutableBinaryViewArray::with_capacity(self.len()); + let buffers = self.raw_buffers.as_ref(); + + for view in self.views.as_ref() { + unsafe { mutable.push_view(*view, buffers) } + } + mutable.freeze().with_validity(self.validity) + } + + pub fn is_sliced(&self) -> bool { + self.views.as_ptr() != self.views.data_ptr() + } + + pub fn maybe_gc(self) -> Self { + const GC_MINIMUM_SAVINGS: usize = 16 * 1024; // At least 16 KiB. + + if self.total_buffer_len <= GC_MINIMUM_SAVINGS { + return self; + } + + // Subtract the maximum amount of inlined strings to get a lower bound + // on the number of buffer bytes needed (assuming no dedup). + let total_bytes_len = self.total_bytes_len(); + let buffer_req_lower_bound = total_bytes_len.saturating_sub(self.len() * 12); + + let lower_bound_mem_usage_post_gc = self.len() * 16 + buffer_req_lower_bound; + let current_mem_usage = self.len() * 16 + self.total_buffer_len(); + let savings_upper_bound = current_mem_usage.saturating_sub(lower_bound_mem_usage_post_gc); + + if savings_upper_bound >= GC_MINIMUM_SAVINGS + && current_mem_usage >= 4 * lower_bound_mem_usage_post_gc + { + self.gc() + } else { + self + } + } + + pub fn make_mut(self) -> MutableBinaryViewArray { + let views = self.views.make_mut(); + let completed_buffers = self.buffers.to_vec(); + let validity = self.validity.map(|bitmap| bitmap.make_mut()); + MutableBinaryViewArray { + views, + completed_buffers, + in_progress_buffer: vec![], + validity, + phantom: Default::default(), + total_bytes_len: self.total_bytes_len.load(Ordering::Relaxed) as usize, + total_buffer_len: self.total_buffer_len, + } + } + + #[must_use] + pub fn into_mut(self) -> Either> { + use Either::*; + let is_unique = (Arc::strong_count(&self.buffers) + Arc::weak_count(&self.buffers)) == 1; + + if let Some(bitmap) = self.validity { + match bitmap.into_mut() { + Left(bitmap) => Left(Self::new_unchecked( + self.data_type, + self.views, + self.buffers, + Some(bitmap), + self.total_bytes_len.load(Ordering::Relaxed) as usize, + self.total_buffer_len, + )), + Right(mutable_bitmap) => match (self.views.into_mut(), is_unique) { + (Right(views), true) => Right(MutableBinaryViewArray { + views, + completed_buffers: self.buffers.to_vec(), + in_progress_buffer: vec![], + validity: Some(mutable_bitmap), + phantom: Default::default(), + total_bytes_len: self.total_bytes_len.load(Ordering::Relaxed) as usize, + total_buffer_len: self.total_buffer_len, + }), + (Right(views), false) => Left(Self::new_unchecked( + self.data_type, + views.into(), + self.buffers, + Some(mutable_bitmap.into()), + self.total_bytes_len.load(Ordering::Relaxed) as usize, + self.total_buffer_len, + )), + (Left(views), _) => Left(Self::new_unchecked( + self.data_type, + views, + self.buffers, + Some(mutable_bitmap.into()), + self.total_bytes_len.load(Ordering::Relaxed) as usize, + self.total_buffer_len, + )), + }, + } + } else { + match (self.views.into_mut(), is_unique) { + (Right(views), true) => Right(MutableBinaryViewArray { + views, + completed_buffers: self.buffers.to_vec(), + in_progress_buffer: vec![], + validity: None, + phantom: Default::default(), + total_bytes_len: self.total_bytes_len.load(Ordering::Relaxed) as usize, + total_buffer_len: self.total_buffer_len, + }), + (Right(views), false) => Left(Self::new_unchecked( + self.data_type, + views.into(), + self.buffers, + None, + self.total_bytes_len.load(Ordering::Relaxed) as usize, + self.total_buffer_len, + )), + (Left(views), _) => Left(Self::new_unchecked( + self.data_type, + views, + self.buffers, + None, + self.total_bytes_len.load(Ordering::Relaxed) as usize, + self.total_buffer_len, + )), + } + } + } + + pub fn default_data_type() -> &'static DataType { + T::data_type() + } +} + +pub type BinaryViewArray = BinaryViewArrayGeneric<[u8]>; +pub type Utf8ViewArray = BinaryViewArrayGeneric; + +impl BinaryViewArray { + /// Validate the underlying bytes on UTF-8. + pub fn validate_utf8(&self) -> Result<()> { + // SAFETY: views are correct + unsafe { validate_utf8_only(&self.views, &self.buffers) } + } + + /// Convert [`BinaryViewArray`] to [`Utf8ViewArray`]. + pub fn to_utf8view(&self) -> Result { + self.validate_utf8()?; + unsafe { Ok(self.to_utf8view_unchecked()) } + } + + /// Convert [`BinaryViewArray`] to [`Utf8ViewArray`] without checking UTF-8. + /// + /// # Safety + /// The caller must ensure the underlying data is valid UTF-8. + pub unsafe fn to_utf8view_unchecked(&self) -> Utf8ViewArray { + Utf8ViewArray::new_unchecked( + DataType::Utf8View, + self.views.clone(), + self.buffers.clone(), + self.validity.clone(), + self.total_bytes_len.load(Ordering::Relaxed) as usize, + self.total_buffer_len, + ) + } +} + +impl Utf8ViewArray { + pub fn to_binview(&self) -> BinaryViewArray { + BinaryViewArray::new_unchecked( + DataType::BinaryView, + self.views.clone(), + self.buffers.clone(), + self.validity.clone(), + self.total_bytes_len.load(Ordering::Relaxed) as usize, + self.total_buffer_len, + ) + } +} + +impl Array for BinaryViewArrayGeneric { + fn as_any(&self) -> &dyn Any { + self + } + + fn as_any_mut(&mut self) -> &mut dyn Any { + self + } + + #[inline(always)] + fn len(&self) -> usize { + BinaryViewArrayGeneric::len(self) + } + + fn data_type(&self) -> &DataType { + T::data_type() + } + + fn validity(&self) -> Option<&Bitmap> { + self.validity.as_ref() + } + + fn slice(&mut self, offset: usize, length: usize) { + assert!( + offset + length <= self.len(), + "the offset of the new Buffer cannot exceed the existing length" + ); + unsafe { self.slice_unchecked(offset, length) } + } + + unsafe fn slice_unchecked(&mut self, offset: usize, length: usize) { + debug_assert!(offset + length <= self.len()); + self.validity = self + .validity + .take() + .map(|bitmap| bitmap.sliced_unchecked(offset, length)) + .filter(|bitmap| bitmap.unset_bits() > 0); + self.views.slice_unchecked(offset, length); + self.total_bytes_len.store(UNKNOWN_LEN, Ordering::Relaxed) + } + + fn with_validity(&self, validity: Option) -> Box { + let mut new = self.clone(); + new.validity = validity; + Box::new(new) + } + + fn to_boxed(&self) -> Box { + Box::new(self.clone()) + } +} diff --git a/src/common/arrow/src/arrow/array/binview/mutable.rs b/src/common/arrow/src/arrow/array/binview/mutable.rs new file mode 100644 index 000000000000..da9f2e1cf12d --- /dev/null +++ b/src/common/arrow/src/arrow/array/binview/mutable.rs @@ -0,0 +1,461 @@ +// Copyright (c) 2020 Ritchie Vink +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::any::Any; +use std::fmt::Debug; +use std::fmt::Formatter; +use std::sync::Arc; + +use crate::arrow::array::binview::iterator::MutableBinaryViewValueIter; +use crate::arrow::array::binview::view::validate_utf8_only; +use crate::arrow::array::binview::BinaryViewArrayGeneric; +use crate::arrow::array::binview::View; +use crate::arrow::array::binview::ViewType; +use crate::arrow::array::Array; +use crate::arrow::array::MutableArray; +use crate::arrow::bitmap::MutableBitmap; +use crate::arrow::buffer::Buffer; +use crate::arrow::datatypes::DataType; +use crate::arrow::error::Result; +use crate::arrow::trusted_len::TrustedLen; +use crate::arrow::types::NativeType; + +const DEFAULT_BLOCK_SIZE: usize = 8 * 1024; + +pub struct MutableBinaryViewArray { + pub(super) views: Vec, + pub(super) completed_buffers: Vec>, + pub(super) in_progress_buffer: Vec, + pub(super) validity: Option, + pub(super) phantom: std::marker::PhantomData, + /// Total bytes length if we would concatenate them all. + pub(super) total_bytes_len: usize, + /// Total bytes in the buffer (excluding remaining capacity) + pub(super) total_buffer_len: usize, +} + +impl Clone for MutableBinaryViewArray { + fn clone(&self) -> Self { + Self { + views: self.views.clone(), + completed_buffers: self.completed_buffers.clone(), + in_progress_buffer: self.in_progress_buffer.clone(), + validity: self.validity.clone(), + phantom: Default::default(), + total_bytes_len: self.total_bytes_len, + total_buffer_len: self.total_buffer_len, + } + } +} + +impl Debug for MutableBinaryViewArray { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "mutable-binview{:?}", T::DATA_TYPE) + } +} + +impl Default for MutableBinaryViewArray { + fn default() -> Self { + Self::with_capacity(0) + } +} + +impl From> for BinaryViewArrayGeneric { + fn from(mut value: MutableBinaryViewArray) -> Self { + value.finish_in_progress(); + Self::new_unchecked( + T::DATA_TYPE, + value.views.into(), + Arc::from(value.completed_buffers), + value.validity.map(|b| b.into()), + value.total_bytes_len, + value.total_buffer_len, + ) + } +} + +impl MutableBinaryViewArray { + pub fn new() -> Self { + Self::default() + } + + pub fn with_capacity(capacity: usize) -> Self { + Self { + views: Vec::with_capacity(capacity), + completed_buffers: vec![], + in_progress_buffer: vec![], + validity: None, + phantom: Default::default(), + total_buffer_len: 0, + total_bytes_len: 0, + } + } + + #[inline] + pub fn views_mut(&mut self) -> &mut Vec { + &mut self.views + } + + #[inline] + pub fn views(&self) -> &[View] { + &self.views + } + + pub fn validity(&self) -> Option<&MutableBitmap> { + self.validity.as_ref() + } + + pub fn validity_mut(&mut self) -> Option<&mut MutableBitmap> { + self.validity.as_mut() + } + + /// Reserves `additional` elements and `additional_buffer` on the buffer. + pub fn reserve(&mut self, additional: usize) { + self.views.reserve(additional); + } + + #[inline] + pub fn len(&self) -> usize { + self.views.len() + } + #[inline] + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + #[inline] + pub fn capacity(&self) -> usize { + self.views.capacity() + } + + fn init_validity(&mut self, unset_last: bool) { + let mut validity = MutableBitmap::with_capacity(self.views.capacity()); + validity.extend_constant(self.len(), true); + if unset_last { + validity.set(self.len() - 1, false); + } + self.validity = Some(validity); + } + + /// # Safety + /// - caller must allocate enough capacity + /// - caller must ensure the view and buffers match. + #[inline] + pub unsafe fn push_view(&mut self, v: View, buffers: &[(*const u8, usize)]) { + let len = v.length; + self.total_bytes_len += len as usize; + if len <= 12 { + debug_assert!(self.views.capacity() > self.views.len()); + self.views.push(v); + } else { + self.total_buffer_len += len as usize; + let (data_ptr, data_len) = *buffers.get_unchecked(v.buffer_idx as usize); + let data = std::slice::from_raw_parts(data_ptr, data_len); + let offset = v.offset as usize; + let bytes = data.get_unchecked(offset..offset + len as usize); + let t = T::from_bytes_unchecked(bytes); + self.push_value_ignore_validity(t) + } + } + + pub fn push_value_ignore_validity>(&mut self, value: V) { + let value = value.as_ref(); + let bytes = value.to_bytes(); + self.total_bytes_len += bytes.len(); + let len: u32 = bytes.len().try_into().unwrap(); + let mut payload = [0; 16]; + payload[0..4].copy_from_slice(&len.to_le_bytes()); + + if len <= 12 { + // | len | prefix | remaining(zero-padded) | + // ^ ^ ^ + // | 4 bytes | 4 bytes | 8 bytes | + payload[4..4 + bytes.len()].copy_from_slice(bytes); + } else { + // | len | prefix | buffer | offsets | + // ^ ^ ^ ^ + // | 4 bytes | 4 bytes | 4 bytes | 4 bytes | + // + // buffer index + offset -> real binary data + self.total_buffer_len += bytes.len(); + let required_cap = self.in_progress_buffer.len() + bytes.len(); + if self.in_progress_buffer.capacity() < required_cap { + let new_capacity = (self.in_progress_buffer.capacity() * 2) + .clamp(DEFAULT_BLOCK_SIZE, 16 * 1024 * 1024) + .max(bytes.len()); + let in_progress = Vec::with_capacity(new_capacity); + let flushed = std::mem::replace(&mut self.in_progress_buffer, in_progress); + if !flushed.is_empty() { + self.completed_buffers.push(flushed.into()) + } + } + let offset = self.in_progress_buffer.len() as u32; + self.in_progress_buffer.extend_from_slice(bytes); + + // set prefix + unsafe { payload[4..8].copy_from_slice(bytes.get_unchecked(0..4)) }; + let buffer_idx: u32 = self.completed_buffers.len().try_into().unwrap(); + payload[8..12].copy_from_slice(&buffer_idx.to_le_bytes()); + payload[12..16].copy_from_slice(&offset.to_le_bytes()); + } + let value = View::from_le_bytes(payload); + self.views.push(value); + } + + pub fn push_value>(&mut self, value: V) { + if let Some(validity) = &mut self.validity { + validity.push(true) + } + self.push_value_ignore_validity(value) + } + + pub fn push>(&mut self, value: Option) { + if let Some(value) = value { + self.push_value(value) + } else { + self.push_null() + } + } + + pub fn push_null(&mut self) { + self.views.push(View::default()); + match &mut self.validity { + Some(validity) => validity.push(false), + None => self.init_validity(true), + } + } + + pub fn extend_null(&mut self, additional: usize) { + if self.validity.is_none() && additional > 0 { + self.init_validity(false); + } + self.views + .extend(std::iter::repeat(View::default()).take(additional)); + if let Some(validity) = &mut self.validity { + validity.extend_constant(additional, false); + } + } + + pub fn extend_constant>(&mut self, additional: usize, value: Option) { + if value.is_none() && self.validity.is_none() { + self.init_validity(false); + } + + if let Some(validity) = &mut self.validity { + validity.extend_constant(additional, value.is_some()) + } + + // Push and pop to get the properly encoded value. + // For long string this leads to a dictionary encoding, + // as we push the string only once in the buffers + let view_value = value + .map(|v| { + self.push_value_ignore_validity(v); + self.views.pop().unwrap() + }) + .unwrap_or_default(); + self.views + .extend(std::iter::repeat(view_value).take(additional)); + } + + impl_mutable_array_mut_validity!(); + + #[inline] + pub fn extend_values(&mut self, iterator: I) + where + I: Iterator, + P: AsRef, + { + self.reserve(iterator.size_hint().0); + for v in iterator { + self.push_value(v) + } + } + + #[inline] + pub fn extend_trusted_len_values(&mut self, iterator: I) + where + I: TrustedLen, + P: AsRef, + { + self.extend_values(iterator) + } + + #[inline] + pub fn extend(&mut self, iterator: I) + where + I: Iterator>, + P: AsRef, + { + self.reserve(iterator.size_hint().0); + for p in iterator { + self.push(p) + } + } + + #[inline] + pub fn extend_trusted_len(&mut self, iterator: I) + where + I: TrustedLen>, + P: AsRef, + { + self.extend(iterator) + } + + #[inline] + pub fn from_iterator(iterator: I) -> Self + where + I: Iterator>, + P: AsRef, + { + let mut mutable = Self::with_capacity(iterator.size_hint().0); + mutable.extend(iterator); + mutable + } + + pub fn from_values_iter(iterator: I) -> Self + where + I: Iterator, + P: AsRef, + { + let mut mutable = Self::with_capacity(iterator.size_hint().0); + mutable.extend_values(iterator); + mutable + } + + pub fn from, P: AsRef<[Option]>>(slice: P) -> Self { + Self::from_iterator(slice.as_ref().iter().map(|opt_v| opt_v.as_ref())) + } + + fn finish_in_progress(&mut self) { + if !self.in_progress_buffer.is_empty() { + self.completed_buffers + .push(std::mem::take(&mut self.in_progress_buffer).into()); + } + } + + #[inline] + pub fn freeze(self) -> BinaryViewArrayGeneric { + self.into() + } + + /// Returns the element at index `i` + /// # Safety + /// Assumes that the `i < self.len`. + #[inline] + pub unsafe fn value_unchecked(&self, i: usize) -> &T { + let v = *self.views.get_unchecked(i); + let len = v.length; + + // view layout: + // for no-inlined layout: + // length: 4 bytes + // prefix: 4 bytes + // buffer_index: 4 bytes + // offset: 4 bytes + + // for inlined layout: + // length: 4 bytes + // data: 12 bytes + let bytes = if len <= 12 { + let ptr = self.views.as_ptr() as *const u8; + std::slice::from_raw_parts(ptr.add(i * 16 + 4), len as usize) + } else { + let buffer_idx = v.buffer_idx as usize; + let offset = v.offset; + + let data = if buffer_idx == self.completed_buffers.len() { + self.in_progress_buffer.as_slice() + } else { + self.completed_buffers.get_unchecked(buffer_idx) + }; + + let offset = offset as usize; + data.get_unchecked(offset..offset + len as usize) + }; + T::from_bytes_unchecked(bytes) + } + + /// Returns an iterator of `&[u8]` over every element of this array, ignoring the validity + pub fn values_iter(&self) -> MutableBinaryViewValueIter { + MutableBinaryViewValueIter::new(self) + } + + pub fn values(&self) -> Vec<&T> { + self.values_iter().collect() + } +} + +impl MutableBinaryViewArray<[u8]> { + pub fn validate_utf8(&mut self) -> Result<()> { + self.finish_in_progress(); + // views are correct + unsafe { validate_utf8_only(&self.views, &self.completed_buffers) } + } +} + +impl> Extend> for MutableBinaryViewArray { + #[inline] + fn extend>>(&mut self, iter: I) { + Self::extend(self, iter.into_iter()) + } +} + +impl> FromIterator> for MutableBinaryViewArray { + #[inline] + fn from_iter>>(iter: I) -> Self { + Self::from_iterator(iter.into_iter()) + } +} + +impl MutableArray for MutableBinaryViewArray { + fn data_type(&self) -> &DataType { + T::data_type() + } + + fn len(&self) -> usize { + MutableBinaryViewArray::len(self) + } + + fn validity(&self) -> Option<&MutableBitmap> { + self.validity.as_ref() + } + + fn as_box(&mut self) -> Box { + let mutable = std::mem::take(self); + let arr: BinaryViewArrayGeneric = mutable.into(); + arr.boxed() + } + + fn as_any(&self) -> &dyn Any { + self + } + + fn as_mut_any(&mut self) -> &mut dyn Any { + self + } + + fn push_null(&mut self) { + MutableBinaryViewArray::push_null(self) + } + + fn reserve(&mut self, additional: usize) { + MutableBinaryViewArray::reserve(self, additional) + } + + fn shrink_to_fit(&mut self) { + self.views.shrink_to_fit() + } +} diff --git a/src/common/arrow/src/arrow/array/binview/view.rs b/src/common/arrow/src/arrow/array/binview/view.rs new file mode 100644 index 000000000000..182187a9ecfa --- /dev/null +++ b/src/common/arrow/src/arrow/array/binview/view.rs @@ -0,0 +1,190 @@ +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::fmt::Display; +use std::fmt::Formatter; +use std::ops::Add; + +use bytemuck::Pod; +use bytemuck::Zeroable; + +use crate::arrow::buffer::Buffer; +use crate::arrow::datatypes::PrimitiveType; +use crate::arrow::error::Error; +use crate::arrow::error::Result; +use crate::arrow::types::NativeType; + +#[derive(Debug, Copy, Clone, Default)] +#[repr(C)] +pub struct View { + /// The length of the string/bytes. + pub length: u32, + /// First 4 bytes of string/bytes data. + pub prefix: u32, + /// The buffer index. + pub buffer_idx: u32, + /// The offset into the buffer. + pub offset: u32, +} + +impl View { + #[inline(always)] + pub fn as_u128(self) -> u128 { + unsafe { std::mem::transmute(self) } + } +} + +impl Display for View { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "{:?}", self) + } +} + +impl PartialEq for View { + fn eq(&self, other: &Self) -> bool { + self.as_u128() == other.as_u128() + } +} +unsafe impl Pod for View {} +unsafe impl Zeroable for View {} + +impl NativeType for View { + const PRIMITIVE: PrimitiveType = PrimitiveType::UInt128; + type Bytes = [u8; 16]; + + #[inline] + fn to_le_bytes(&self) -> Self::Bytes { + self.as_u128().to_le_bytes() + } + + #[inline] + fn to_be_bytes(&self) -> Self::Bytes { + self.as_u128().to_be_bytes() + } + + #[inline] + fn from_le_bytes(bytes: Self::Bytes) -> Self { + Self::from(u128::from_le_bytes(bytes)) + } + + #[inline] + fn from_be_bytes(bytes: Self::Bytes) -> Self { + Self::from(u128::from_be_bytes(bytes)) + } +} + +impl Add for View { + type Output = View; + + fn add(self, _rhs: Self) -> Self::Output { + unimplemented!() + } +} + +impl num_traits::Zero for View { + fn zero() -> Self { + Default::default() + } + + fn is_zero(&self) -> bool { + *self == Self::zero() + } +} + +impl From for View { + #[inline] + fn from(value: u128) -> Self { + unsafe { std::mem::transmute(value) } + } +} + +impl From for u128 { + #[inline] + fn from(value: View) -> Self { + value.as_u128() + } +} + +fn validate_view(views: &[View], buffers: &[Buffer], validate_bytes: F) -> Result<()> +where F: Fn(&[u8]) -> Result<()> { + for view in views { + let len = view.length; + if len <= 12 { + if len < 12 && view.as_u128() >> (32 + len * 8) != 0 { + return Err(Error::oos("view contained non-zero padding in prefix")); + } + + validate_bytes(&view.to_le_bytes()[4..4 + len as usize])?; + } else { + let data = buffers.get(view.buffer_idx as usize).ok_or_else(|| { + Error::oos(format!( + "view index out of bounds\n\nGot: {} buffers and index: {}", + buffers.len(), + view.buffer_idx + )) + })?; + + let start = view.offset as usize; + let end = start + len as usize; + let b = data + .as_slice() + .get(start..end) + .ok_or_else(|| Error::oos("buffer slice out of bounds"))?; + + if !b.starts_with(&view.prefix.to_le_bytes()) { + return Err(Error::oos("prefix does not match string data")); + } + validate_bytes(b)?; + }; + } + + Ok(()) +} + +pub(super) fn validate_binary_view(views: &[View], buffers: &[Buffer]) -> Result<()> { + validate_view(views, buffers, |_| Ok(())) +} + +fn validate_utf8(b: &[u8]) -> Result<()> { + match simdutf8::basic::from_utf8(b) { + Ok(_) => Ok(()), + Err(_) => Err(Error::oos("invalid utf8")), + } +} + +pub(super) fn validate_utf8_view(views: &[View], buffers: &[Buffer]) -> Result<()> { + validate_view(views, buffers, validate_utf8) +} + +/// # Safety +/// The views and buffers must uphold the invariants of BinaryView otherwise we will go OOB. +pub(super) unsafe fn validate_utf8_only(views: &[View], buffers: &[Buffer]) -> Result<()> { + for view in views { + let len = view.length; + if len <= 12 { + validate_utf8(view.to_le_bytes().get_unchecked(4..4 + len as usize))?; + } else { + let buffer_idx = view.buffer_idx; + let offset = view.offset; + let data = buffers.get_unchecked(buffer_idx as usize); + + let start = offset as usize; + let end = start + len as usize; + let b = &data.as_slice().get_unchecked(start..end); + validate_utf8(b)?; + }; + } + + Ok(()) +} diff --git a/src/common/arrow/src/arrow/array/equal/binary_view.rs b/src/common/arrow/src/arrow/array/equal/binary_view.rs new file mode 100644 index 000000000000..1fe1111b68ca --- /dev/null +++ b/src/common/arrow/src/arrow/array/equal/binary_view.rs @@ -0,0 +1,25 @@ +// Copyright (c) 2020 Ritchie Vink +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use crate::arrow::array::Array; +use crate::arrow::array::BinaryViewArrayGeneric; +use crate::arrow::array::ViewType; + +pub(super) fn equal( + lhs: &BinaryViewArrayGeneric, + rhs: &BinaryViewArrayGeneric, +) -> bool { + lhs.data_type() == rhs.data_type() && lhs.len() == rhs.len() && lhs.iter().eq(rhs.iter()) +} diff --git a/src/common/arrow/src/arrow/array/equal/mod.rs b/src/common/arrow/src/arrow/array/equal/mod.rs index aa5e7778d10b..c6c2f409808c 100644 --- a/src/common/arrow/src/arrow/array/equal/mod.rs +++ b/src/common/arrow/src/arrow/array/equal/mod.rs @@ -18,6 +18,7 @@ use crate::arrow::offset::Offset; use crate::arrow::types::NativeType; mod binary; +mod binary_view; mod boolean; mod dictionary; mod fixed_size_binary; @@ -298,5 +299,15 @@ pub fn equal(lhs: &dyn Array, rhs: &dyn Array) -> bool { let rhs = rhs.as_any().downcast_ref().unwrap(); map::equal(lhs, rhs) } + BinaryView => { + let lhs = lhs.as_any().downcast_ref().unwrap(); + let rhs = rhs.as_any().downcast_ref().unwrap(); + binary_view::equal::<[u8]>(lhs, rhs) + } + Utf8View => { + let lhs = lhs.as_any().downcast_ref().unwrap(); + let rhs = rhs.as_any().downcast_ref().unwrap(); + binary_view::equal::(lhs, rhs) + } } } diff --git a/src/common/arrow/src/arrow/array/ffi.rs b/src/common/arrow/src/arrow/array/ffi.rs index 633f81e9a777..35756afc8b15 100644 --- a/src/common/arrow/src/arrow/array/ffi.rs +++ b/src/common/arrow/src/arrow/array/ffi.rs @@ -86,6 +86,8 @@ pub fn offset_buffers_children_dictionary(array: &dyn Array) -> BuffersChildren Struct => ffi_dyn!(array, StructArray), Union => ffi_dyn!(array, UnionArray), Map => ffi_dyn!(array, MapArray), + BinaryView => ffi_dyn!(array, BinaryViewArray), + Utf8View => ffi_dyn!(array, Utf8ViewArray), Dictionary(key_type) => { match_integer_type!(key_type, |$T| { let array = array.as_any().downcast_ref::>().unwrap(); diff --git a/src/common/arrow/src/arrow/array/fixed_size_binary/ffi.rs b/src/common/arrow/src/arrow/array/fixed_size_binary/ffi.rs index e2d464604029..88700d81d912 100644 --- a/src/common/arrow/src/arrow/array/fixed_size_binary/ffi.rs +++ b/src/common/arrow/src/arrow/array/fixed_size_binary/ffi.rs @@ -24,7 +24,7 @@ unsafe impl ToFfi for FixedSizeBinaryArray { fn buffers(&self) -> Vec> { vec![ self.validity.as_ref().map(|x| x.as_ptr()), - Some(self.values.as_ptr().cast::()), + Some(self.values.data_ptr().cast::()), ] } diff --git a/src/common/arrow/src/arrow/array/fmt.rs b/src/common/arrow/src/arrow/array/fmt.rs index 3cf37af1e354..6dd39f7ed36a 100644 --- a/src/common/arrow/src/arrow/array/fmt.rs +++ b/src/common/arrow/src/arrow/array/fmt.rs @@ -107,6 +107,20 @@ pub fn get_value_display<'a, F: Write + 'a>( Map => Box::new(move |f, index| { super::map::fmt::write_value(array.as_any().downcast_ref().unwrap(), index, null, f) }), + BinaryView => Box::new(move |f, index| { + super::binview::fmt::write_value::<[u8], _>( + array.as_any().downcast_ref().unwrap(), + index, + f, + ) + }), + Utf8View => Box::new(move |f, index| { + super::binview::fmt::write_value::( + array.as_any().downcast_ref().unwrap(), + index, + f, + ) + }), Dictionary(key_type) => match_integer_type!(key_type, |$T| { Box::new(move |f, index| { super::dictionary::fmt::write_value::<$T,_>(array.as_any().downcast_ref().unwrap(), index, null, f) diff --git a/src/common/arrow/src/arrow/array/growable/binview.rs b/src/common/arrow/src/arrow/array/growable/binview.rs new file mode 100644 index 000000000000..a23ba22bffe4 --- /dev/null +++ b/src/common/arrow/src/arrow/array/growable/binview.rs @@ -0,0 +1,222 @@ +// Copyright (c) 2020 Ritchie Vink +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::hash::Hash; +use std::hash::Hasher; +use std::sync::Arc; + +use indexmap::IndexSet; + +use crate::arrow::array::growable::utils::extend_validity; +use crate::arrow::array::growable::utils::prepare_validity; +use crate::arrow::array::growable::Growable; +use crate::arrow::array::Array; +use crate::arrow::array::BinaryViewArrayGeneric; +use crate::arrow::array::View; +use crate::arrow::array::ViewType; +use crate::arrow::bitmap::MutableBitmap; +use crate::arrow::buffer::Buffer; +use crate::arrow::datatypes::DataType; + +pub type ArrowIndexSet = IndexSet; + +struct BufferKey<'a> { + inner: &'a Buffer, +} + +impl Hash for BufferKey<'_> { + fn hash(&self, state: &mut H) { + state.write_u64(self.inner.data_ptr() as u64) + } +} + +impl PartialEq for BufferKey<'_> { + #[inline] + fn eq(&self, other: &Self) -> bool { + self.inner.data_ptr() == other.inner.data_ptr() + } +} + +impl Eq for BufferKey<'_> {} + +/// Concrete [`Growable`] for the [`BinaryArray`]. +pub struct GrowableBinaryViewArray<'a, T: ViewType + ?Sized> { + arrays: Vec<&'a BinaryViewArrayGeneric>, + data_type: DataType, + validity: Option, + views: Vec, + // We need to use a set/hashmap to deduplicate + // A growable can be called with many chunks from self. + buffers: ArrowIndexSet>, + total_bytes_len: usize, + total_buffer_len: usize, +} + +impl<'a, T: ViewType + ?Sized> GrowableBinaryViewArray<'a, T> { + /// Creates a new [`GrowableBinaryViewArray`] bound to `arrays` with a pre-allocated `capacity`. + /// # Panics + /// If `arrays` is empty. + pub fn new( + arrays: Vec<&'a BinaryViewArrayGeneric>, + mut use_validity: bool, + capacity: usize, + ) -> Self { + let data_type = arrays[0].data_type().clone(); + + // if any of the arrays has nulls, insertions from any array requires setting bits + // as there is at least one array with nulls. + if !use_validity & arrays.iter().any(|array| array.null_count() > 0) { + use_validity = true; + }; + + let buffers = arrays + .iter() + .flat_map(|array| { + array + .data_buffers() + .as_ref() + .iter() + .map(|buf| BufferKey { inner: buf }) + }) + .collect::>(); + let total_buffer_len = arrays + .iter() + .map(|arr| arr.data_buffers().len()) + .sum::(); + + Self { + arrays, + data_type, + validity: prepare_validity(use_validity, capacity), + views: Vec::with_capacity(capacity), + buffers, + total_bytes_len: 0, + total_buffer_len, + } + } + + fn to(&mut self) -> BinaryViewArrayGeneric { + let views = std::mem::take(&mut self.views); + let buffers = std::mem::take(&mut self.buffers); + let validity = self.validity.take(); + BinaryViewArrayGeneric::::new_unchecked( + self.data_type.clone(), + views.into(), + Arc::from( + buffers + .into_iter() + .map(|buf| buf.inner.clone()) + .collect::>(), + ), + validity.map(|v| v.into()), + self.total_bytes_len, + self.total_buffer_len, + ) + .maybe_gc() + } + + /// # Safety + /// doesn't check bounds + pub unsafe fn extend_unchecked(&mut self, index: usize, start: usize, len: usize) { + let array = *self.arrays.get_unchecked(index); + let local_buffers = array.data_buffers(); + + extend_validity(&mut self.validity, array, start, len); + + let range = start..start + len; + + self.views + .extend(array.views().get_unchecked(range).iter().map(|view| { + let mut view = *view; + let len = view.length as usize; + self.total_bytes_len += len; + + if len > 12 { + let buffer = local_buffers.get_unchecked(view.buffer_idx as usize); + let key = BufferKey { inner: buffer }; + let idx = self.buffers.get_full(&key).unwrap_unchecked().0; + + view.buffer_idx = idx as u32; + } + view + })); + } + + #[inline] + /// Ignores the buffers and doesn't update the view. This is only correct in a filter. + /// # Safety + /// doesn't check bounds + pub unsafe fn extend_unchecked_no_buffers(&mut self, index: usize, start: usize, len: usize) { + let array = *self.arrays.get_unchecked(index); + + extend_validity(&mut self.validity, array, start, len); + + let range = start..start + len; + + self.views + .extend(array.views().get_unchecked(range).iter().map(|view| { + let len = view.length as usize; + self.total_bytes_len += len; + + *view + })) + } +} + +impl<'a, T: ViewType + ?Sized> Growable<'a> for GrowableBinaryViewArray<'a, T> { + fn extend(&mut self, index: usize, start: usize, len: usize) { + unsafe { self.extend_unchecked(index, start, len) } + } + + fn extend_validity(&mut self, additional: usize) { + self.views + .extend(std::iter::repeat(View::default()).take(additional)); + if let Some(validity) = &mut self.validity { + validity.extend_constant(additional, false); + } + } + + #[inline] + fn len(&self) -> usize { + self.views.len() + } + + fn as_arc(&mut self) -> Arc { + self.to().arced() + } + + fn as_box(&mut self) -> Box { + self.to().boxed() + } +} + +impl<'a, T: ViewType + ?Sized> From> for BinaryViewArrayGeneric { + fn from(val: GrowableBinaryViewArray<'a, T>) -> Self { + BinaryViewArrayGeneric::::new_unchecked( + val.data_type, + val.views.into(), + Arc::from( + val.buffers + .into_iter() + .map(|buf| buf.inner.clone()) + .collect::>(), + ), + val.validity.map(|v| v.into()), + val.total_bytes_len, + val.total_buffer_len, + ) + .maybe_gc() + } +} diff --git a/src/common/arrow/src/arrow/array/growable/mod.rs b/src/common/arrow/src/arrow/array/growable/mod.rs index cad26975a850..8730567827eb 100644 --- a/src/common/arrow/src/arrow/array/growable/mod.rs +++ b/src/common/arrow/src/arrow/array/growable/mod.rs @@ -46,6 +46,7 @@ pub use utf8::GrowableUtf8; mod dictionary; pub use dictionary::GrowableDictionary; +mod binview; mod utils; /// Describes a struct that can be extended from slices of other pre-existing [`Array`]s. @@ -147,6 +148,22 @@ pub fn make_growable<'a>( Box::new(union::GrowableUnion::new(arrays, capacity)) } Map => dyn_growable!(map::GrowableMap, arrays, use_validity, capacity), + BinaryView => { + dyn_growable!( + binview::GrowableBinaryViewArray::<[u8]>, + arrays, + use_validity, + capacity + ) + } + Utf8View => { + dyn_growable!( + binview::GrowableBinaryViewArray::, + arrays, + use_validity, + capacity + ) + } Dictionary(key_type) => { match_integer_type!(key_type, |$T| { let arrays = arrays diff --git a/src/common/arrow/src/arrow/array/growable/utils.rs b/src/common/arrow/src/arrow/array/growable/utils.rs index 3e7179b77a2c..5536113321ae 100644 --- a/src/common/arrow/src/arrow/array/growable/utils.rs +++ b/src/common/arrow/src/arrow/array/growable/utils.rs @@ -40,6 +40,14 @@ pub(super) fn build_extend_null_bits(array: &dyn Array, use_validity: bool) -> E } } +pub(super) fn prepare_validity(use_validity: bool, capacity: usize) -> Option { + if use_validity { + Some(MutableBitmap::with_capacity(capacity)) + } else { + None + } +} + #[inline] pub(super) fn extend_offset_values( buffer: &mut Vec, @@ -53,3 +61,24 @@ pub(super) fn extend_offset_values( let new_values = &values[start_values..end_values]; buffer.extend_from_slice(new_values); } + +pub(super) fn extend_validity( + mutable_validity: &mut Option, + array: &dyn Array, + start: usize, + len: usize, +) { + if let Some(mutable_validity) = mutable_validity { + match array.validity() { + None => mutable_validity.extend_constant(len, true), + Some(validity) => { + debug_assert!(start + len <= validity.len()); + let (slice, offset, _) = validity.as_slice(); + // safety: invariant offset + length <= slice.len() + unsafe { + mutable_validity.extend_from_slice_unchecked(slice, start + offset, len); + } + } + } + } +} diff --git a/src/common/arrow/src/arrow/array/iterator.rs b/src/common/arrow/src/arrow/array/iterator.rs index 00879b6af383..f4e74b6e37c9 100644 --- a/src/common/arrow/src/arrow/array/iterator.rs +++ b/src/common/arrow/src/arrow/array/iterator.rs @@ -13,6 +13,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +use crate::arrow::bitmap::Bitmap; +use crate::arrow::bitmap::TrueIdxIter; use crate::arrow::trusted_len::TrustedLen; mod private { @@ -96,3 +98,35 @@ impl<'a, A: ArrayAccessor<'a>> DoubleEndedIterator for ArrayValuesIter<'a, A> { unsafe impl<'a, A: ArrayAccessor<'a>> TrustedLen for ArrayValuesIter<'a, A> {} impl<'a, A: ArrayAccessor<'a>> ExactSizeIterator for ArrayValuesIter<'a, A> {} + +pub struct NonNullValuesIter<'a, A: ?Sized> { + accessor: &'a A, + idxs: TrueIdxIter<'a>, +} + +impl<'a, A: ArrayAccessor<'a> + ?Sized> NonNullValuesIter<'a, A> { + pub fn new(accessor: &'a A, validity: Option<&'a Bitmap>) -> Self { + Self { + idxs: TrueIdxIter::new(accessor.len(), validity), + accessor, + } + } +} + +impl<'a, A: ArrayAccessor<'a> + ?Sized> Iterator for NonNullValuesIter<'a, A> { + type Item = A::Item; + + #[inline] + fn next(&mut self) -> Option { + if let Some(i) = self.idxs.next() { + return Some(unsafe { self.accessor.value_unchecked(i) }); + } + None + } + + fn size_hint(&self) -> (usize, Option) { + self.idxs.size_hint() + } +} + +unsafe impl<'a, A: ArrayAccessor<'a> + ?Sized> TrustedLen for NonNullValuesIter<'a, A> {} diff --git a/src/common/arrow/src/arrow/array/list/ffi.rs b/src/common/arrow/src/arrow/array/list/ffi.rs index ca4189564b94..fae1a6d4ea1c 100644 --- a/src/common/arrow/src/arrow/array/list/ffi.rs +++ b/src/common/arrow/src/arrow/array/list/ffi.rs @@ -27,7 +27,7 @@ unsafe impl ToFfi for ListArray { fn buffers(&self) -> Vec> { vec![ self.validity.as_ref().map(|x| x.as_ptr()), - Some(self.offsets.buffer().as_ptr().cast::()), + Some(self.offsets.buffer().data_ptr().cast::()), ] } diff --git a/src/common/arrow/src/arrow/array/map/ffi.rs b/src/common/arrow/src/arrow/array/map/ffi.rs index 9f030a287d30..315d47cf12d1 100644 --- a/src/common/arrow/src/arrow/array/map/ffi.rs +++ b/src/common/arrow/src/arrow/array/map/ffi.rs @@ -26,7 +26,7 @@ unsafe impl ToFfi for MapArray { fn buffers(&self) -> Vec> { vec![ self.validity.as_ref().map(|x| x.as_ptr()), - Some(self.offsets.buffer().as_ptr().cast::()), + Some(self.offsets.buffer().data_ptr().cast::()), ] } diff --git a/src/common/arrow/src/arrow/array/mod.rs b/src/common/arrow/src/arrow/array/mod.rs index a6da372f8919..2d9f889bd1d5 100644 --- a/src/common/arrow/src/arrow/array/mod.rs +++ b/src/common/arrow/src/arrow/array/mod.rs @@ -323,6 +323,7 @@ macro_rules! with_match_primitive_type {( Float16 => __with_ty__! { f16 }, Float32 => __with_ty__! { f32 }, Float64 => __with_ty__! { f64 }, + _ => panic!("Do not support primitive `{:?}`", $key_type) } })} @@ -335,6 +336,8 @@ impl std::fmt::Debug for dyn Array + '_ { Primitive(primitive) => with_match_primitive_type!(primitive, |$T| { fmt_dyn!(self, PrimitiveArray<$T>, f) }), + BinaryView => fmt_dyn!(self, BinaryViewArray, f), + Utf8View => fmt_dyn!(self, Utf8ViewArray, f), Binary => fmt_dyn!(self, BinaryArray, f), LargeBinary => fmt_dyn!(self, BinaryArray, f), FixedSizeBinary => fmt_dyn!(self, FixedSizeBinaryArray, f), @@ -375,6 +378,8 @@ pub fn new_empty_array(data_type: DataType) -> Box { Struct => Box::new(StructArray::new_empty(data_type)), Union => Box::new(UnionArray::new_empty(data_type)), Map => Box::new(MapArray::new_empty(data_type)), + Utf8View => Box::new(Utf8ViewArray::new_empty(data_type)), + BinaryView => Box::new(BinaryViewArray::new_empty(data_type)), Dictionary(key_type) => { match_integer_type!(key_type, |$T| { Box::new(DictionaryArray::<$T>::new_empty(data_type)) @@ -405,6 +410,8 @@ pub fn new_null_array(data_type: DataType, length: usize) -> Box { Struct => Box::new(StructArray::new_null(data_type, length)), Union => Box::new(UnionArray::new_null(data_type, length)), Map => Box::new(MapArray::new_null(data_type, length)), + BinaryView => Box::new(BinaryViewArray::new_null(data_type, length)), + Utf8View => Box::new(Utf8ViewArray::new_null(data_type, length)), Dictionary(key_type) => { match_integer_type!(key_type, |$T| { Box::new(DictionaryArray::<$T>::new_null(data_type, length)) @@ -487,6 +494,7 @@ pub fn to_data(array: &dyn Array) -> arrow_data::ArrayData { }) } Map => to_data_dyn!(array, MapArray), + BinaryView | Utf8View => unimplemented!(), } } @@ -517,6 +525,7 @@ pub fn from_data(data: &arrow_data::ArrayData) -> Box { }) } Map => Box::new(MapArray::from_data(data)), + BinaryView | Utf8View => unimplemented!(), } } @@ -702,6 +711,8 @@ pub fn clone(array: &dyn Array) -> Box { Struct => clone_dyn!(array, StructArray), Union => clone_dyn!(array, UnionArray), Map => clone_dyn!(array, MapArray), + BinaryView => clone_dyn!(array, BinaryViewArray), + Utf8View => clone_dyn!(array, Utf8ViewArray), Dictionary(key_type) => { match_integer_type!(key_type, |$T| { clone_dyn!(array, DictionaryArray::<$T>) @@ -719,6 +730,8 @@ impl<'a> AsRef<(dyn Array + 'a)> for dyn Array { } mod binary; +mod binview; + mod boolean; mod dictionary; mod fixed_size_binary; @@ -746,6 +759,13 @@ pub use binary::BinaryArray; pub use binary::BinaryValueIter; pub use binary::MutableBinaryArray; pub use binary::MutableBinaryValuesArray; +pub use binview::BinaryViewArray; +pub use binview::BinaryViewArrayGeneric; +pub use binview::BinaryViewValueIter; +pub use binview::MutableBinaryViewArray; +pub use binview::Utf8ViewArray; +pub use binview::View; +pub use binview::ViewType; pub use boolean::BooleanArray; pub use boolean::MutableBooleanArray; pub use dictionary::DictionaryArray; diff --git a/src/common/arrow/src/arrow/array/primitive/ffi.rs b/src/common/arrow/src/arrow/array/primitive/ffi.rs index bc903228b5c7..cdcba358e20f 100644 --- a/src/common/arrow/src/arrow/array/primitive/ffi.rs +++ b/src/common/arrow/src/arrow/array/primitive/ffi.rs @@ -25,7 +25,7 @@ unsafe impl ToFfi for PrimitiveArray { fn buffers(&self) -> Vec> { vec![ self.validity.as_ref().map(|x| x.as_ptr()), - Some(self.values.as_ptr().cast::()), + Some(self.values.data_ptr().cast::()), ] } diff --git a/src/common/arrow/src/arrow/array/union/ffi.rs b/src/common/arrow/src/arrow/array/union/ffi.rs index 3e5fd098f1b0..db697f4c620c 100644 --- a/src/common/arrow/src/arrow/array/union/ffi.rs +++ b/src/common/arrow/src/arrow/array/union/ffi.rs @@ -24,11 +24,11 @@ unsafe impl ToFfi for UnionArray { fn buffers(&self) -> Vec> { if let Some(offsets) = &self.offsets { vec![ - Some(self.types.as_ptr().cast::()), - Some(offsets.as_ptr().cast::()), + Some(self.types.data_ptr().cast::()), + Some(offsets.data_ptr().cast::()), ] } else { - vec![Some(self.types.as_ptr().cast::())] + vec![Some(self.types.data_ptr().cast::())] } } diff --git a/src/common/arrow/src/arrow/array/utf8/ffi.rs b/src/common/arrow/src/arrow/array/utf8/ffi.rs index 7a4c425f2fd9..f6c479630408 100644 --- a/src/common/arrow/src/arrow/array/utf8/ffi.rs +++ b/src/common/arrow/src/arrow/array/utf8/ffi.rs @@ -26,8 +26,8 @@ unsafe impl ToFfi for Utf8Array { fn buffers(&self) -> Vec> { vec![ self.validity.as_ref().map(|x| x.as_ptr()), - Some(self.offsets.buffer().as_ptr().cast::()), - Some(self.values.as_ptr().cast::()), + Some(self.offsets.buffer().data_ptr().cast::()), + Some(self.values.data_ptr().cast::()), ] } diff --git a/src/common/arrow/src/arrow/bitmap/bitmask.rs b/src/common/arrow/src/arrow/bitmap/bitmask.rs new file mode 100644 index 000000000000..d0f74f11e480 --- /dev/null +++ b/src/common/arrow/src/arrow/bitmap/bitmask.rs @@ -0,0 +1,335 @@ +// Copyright (c) 2020 Ritchie Vink +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#[cfg(feature = "simd")] +use std::simd::LaneCount; +#[cfg(feature = "simd")] +use std::simd::Mask; +#[cfg(feature = "simd")] +use std::simd::MaskElement; +#[cfg(feature = "simd")] +use std::simd::SupportedLaneCount; + +use crate::arrow::bitmap::Bitmap; + +/// Returns the nth set bit in w, if n+1 bits are set. The indexing is +/// zero-based, nth_set_bit_u32(w, 0) returns the least significant set bit in w. +#[allow(dead_code)] +fn nth_set_bit_u32(w: u32, n: u32) -> Option { + // If we have BMI2's PDEP available, we use it. It takes the lower order + // bits of the first argument and spreads it along its second argument + // where those bits are 1. So PDEP(abcdefgh, 11001001) becomes ef00g00h. + // We use this by setting the first argument to 1 << n, which means the + // first n-1 zero bits of it will spread to the first n-1 one bits of w, + // after which the one bit will exactly get copied to the nth one bit of w. + #[cfg(target_feature = "bmi2")] + { + if n >= 32 { + return None; + } + + let nth_set_bit = unsafe { core::arch::x86_64::_pdep_u32(1 << n, w) }; + if nth_set_bit == 0 { + return None; + } + + Some(nth_set_bit.trailing_zeros()) + } + + #[cfg(not(target_feature = "bmi2"))] + { + // Each block of 2/4/8/16 bits contains how many set bits there are in that block. + let set_per_2 = w - ((w >> 1) & 0x55555555); + let set_per_4 = (set_per_2 & 0x33333333) + ((set_per_2 >> 2) & 0x33333333); + let set_per_8 = (set_per_4 + (set_per_4 >> 4)) & 0x0f0f0f0f; + let set_per_16 = (set_per_8 + (set_per_8 >> 8)) & 0x00ff00ff; + let set_per_32 = (set_per_16 + (set_per_16 >> 16)) & 0xff; + if n >= set_per_32 { + return None; + } + + let mut idx = 0; + let mut n = n; + let next16 = set_per_16 & 0xff; + if n >= next16 { + n -= next16; + idx += 16; + } + let next8 = (set_per_8 >> idx) & 0xff; + if n >= next8 { + n -= next8; + idx += 8; + } + let next4 = (set_per_4 >> idx) & 0b1111; + if n >= next4 { + n -= next4; + idx += 4; + } + let next2 = (set_per_2 >> idx) & 0b11; + if n >= next2 { + n -= next2; + idx += 2; + } + let next1 = (w >> idx) & 0b1; + if n >= next1 { + idx += 1; + } + Some(idx) + } +} + +// Loads a u64 from the given byteslice, as if it were padded with zeros. +fn load_padded_le_u64(bytes: &[u8]) -> u64 { + let len = bytes.len(); + if len >= 8 { + return u64::from_le_bytes(bytes[0..8].try_into().unwrap()); + } + + if len >= 4 { + let lo = u32::from_le_bytes(bytes[0..4].try_into().unwrap()); + let hi = u32::from_le_bytes(bytes[len - 4..len].try_into().unwrap()); + return (lo as u64) | ((hi as u64) << (8 * (len - 4))); + } + + if len == 0 { + return 0; + } + + let lo = bytes[0] as u64; + let mid = (bytes[len / 2] as u64) << (8 * (len / 2)); + let hi = (bytes[len - 1] as u64) << (8 * (len - 1)); + lo | mid | hi +} + +#[derive(Default, Clone)] +pub struct BitMask<'a> { + bytes: &'a [u8], + offset: usize, + len: usize, +} + +impl<'a> BitMask<'a> { + pub fn from_bitmap(bitmap: &'a Bitmap) -> Self { + let (bytes, offset, len) = bitmap.as_slice(); + // Check length so we can use unsafe access in our get. + assert!(bytes.len() * 8 >= len + offset); + Self { bytes, offset, len } + } + + #[allow(dead_code)] + #[inline(always)] + pub fn len(&self) -> usize { + self.len + } + + #[allow(dead_code)] + #[inline] + pub fn split_at(&self, idx: usize) -> (Self, Self) { + assert!(idx <= self.len); + unsafe { self.split_at_unchecked(idx) } + } + + /// # Safety + /// The index must be in-bounds. + #[allow(dead_code)] + #[inline] + pub unsafe fn split_at_unchecked(&self, idx: usize) -> (Self, Self) { + debug_assert!(idx <= self.len); + let left = Self { len: idx, ..*self }; + let right = Self { + len: self.len - idx, + offset: self.offset + idx, + ..*self + }; + (left, right) + } + + #[cfg(feature = "simd")] + #[allow(dead_code)] + #[inline] + pub fn get_simd(&self, idx: usize) -> Mask + where + T: MaskElement, + LaneCount: SupportedLaneCount, + { + // We don't support 64-lane masks because then we couldn't load our + // bitwise mask as a u64 and then do the byteshift on it. + + let lanes = LaneCount::::BITMASK_LEN; + assert!(lanes < 64); + + let start_byte_idx = (self.offset + idx) / 8; + let byte_shift = (self.offset + idx) % 8; + if idx + lanes <= self.len { + // SAFETY: fast path, we know this is completely in-bounds. + let mask = load_padded_le_u64(unsafe { self.bytes.get_unchecked(start_byte_idx..) }); + Mask::from_bitmask(mask >> byte_shift) + } else if idx < self.len { + // SAFETY: we know that at least the first byte is in-bounds. + // This is partially out of bounds, we have to do extra masking. + let mask = load_padded_le_u64(unsafe { self.bytes.get_unchecked(start_byte_idx..) }); + let num_out_of_bounds = idx + lanes - self.len; + let shifted = (mask << num_out_of_bounds) >> (num_out_of_bounds + byte_shift); + Mask::from_bitmask(shifted) + } else { + Mask::from_bitmask(0u64) + } + } + + #[inline] + pub fn get_u32(&self, idx: usize) -> u32 { + let start_byte_idx = (self.offset + idx) / 8; + let byte_shift = (self.offset + idx) % 8; + if idx + 32 <= self.len { + // SAFETY: fast path, we know this is completely in-bounds. + let mask = load_padded_le_u64(unsafe { self.bytes.get_unchecked(start_byte_idx..) }); + (mask >> byte_shift) as u32 + } else if idx < self.len { + // SAFETY: we know that at least the first byte is in-bounds. + // This is partially out of bounds, we have to do extra masking. + let mask = load_padded_le_u64(unsafe { self.bytes.get_unchecked(start_byte_idx..) }); + let out_of_bounds_mask = (1u32 << (self.len - idx)) - 1; + ((mask >> byte_shift) as u32) & out_of_bounds_mask + } else { + 0 + } + } + + /// Computes the index of the nth set bit after start. + /// + /// Both are zero-indexed, so nth_set_bit_idx(0, 0) finds the index of the + /// first bit set (which can be 0 as well). The returned index is absolute, + /// not relative to start. + #[allow(dead_code)] + pub fn nth_set_bit_idx(&self, mut n: usize, mut start: usize) -> Option { + while start < self.len { + let next_u32_mask = self.get_u32(start); + if next_u32_mask == u32::MAX { + // Happy fast path for dense non-null section. + if n < 32 { + return Some(start + n); + } + n -= 32; + } else { + let ones = next_u32_mask.count_ones() as usize; + if n < ones { + let idx = unsafe { + // SAFETY: we know the nth bit is in the mask. + nth_set_bit_u32(next_u32_mask, n as u32).unwrap_unchecked() as usize + }; + return Some(start + idx); + } + n -= ones; + } + + start += 32; + } + + None + } + + /// Computes the index of the nth set bit before end, counting backwards. + /// + /// Both are zero-indexed, so nth_set_bit_idx_rev(0, len) finds the index of + /// the last bit set (which can be 0 as well). The returned index is + /// absolute (and starts at the beginning), not relative to end. + #[allow(dead_code)] + pub fn nth_set_bit_idx_rev(&self, mut n: usize, mut end: usize) -> Option { + while end > 0 { + // We want to find bits *before* end, so if end < 32 we must mask + // out the bits after the endth. + let (u32_mask_start, u32_mask_mask) = if end >= 32 { + (end - 32, u32::MAX) + } else { + (0, (1 << end) - 1) + }; + let next_u32_mask = self.get_u32(u32_mask_start) & u32_mask_mask; + if next_u32_mask == u32::MAX { + // Happy fast path for dense non-null section. + if n < 32 { + return Some(end - 1 - n); + } + n -= 32; + } else { + let ones = next_u32_mask.count_ones() as usize; + if n < ones { + let rev_n = ones - 1 - n; + let idx = unsafe { + // SAFETY: we know the rev_nth bit is in the mask. + nth_set_bit_u32(next_u32_mask, rev_n as u32).unwrap_unchecked() as usize + }; + return Some(u32_mask_start + idx); + } + n -= ones; + } + + end = u32_mask_start; + } + + None + } + + #[allow(dead_code)] + #[inline] + pub fn get(&self, idx: usize) -> bool { + let byte_idx = (self.offset + idx) / 8; + let byte_shift = (self.offset + idx) % 8; + + if idx < self.len { + // SAFETY: we know this is in-bounds. + let byte = unsafe { *self.bytes.get_unchecked(byte_idx) }; + (byte >> byte_shift) & 1 == 1 + } else { + false + } + } +} + +#[cfg(test)] +mod test { + use super::*; + + fn naive_nth_bit_set(mut w: u32, mut n: u32) -> Option { + for i in 0..32 { + if w & (1 << i) != 0 { + if n == 0 { + return Some(i); + } + n -= 1; + w ^= 1 << i; + } + } + None + } + + #[test] + fn test_nth_set_bit_u32() { + for n in 0..256 { + assert_eq!(nth_set_bit_u32(0, n), None); + } + + for i in 0..32 { + assert_eq!(nth_set_bit_u32(1 << i, 0), Some(i)); + assert_eq!(nth_set_bit_u32(1 << i, 1), None); + } + + for i in 0..10000 { + let rnd = (0xbdbc9d8ec9d5c461u64.wrapping_mul(i as u64) >> 32) as u32; + for i in 0..=32 { + assert_eq!(nth_set_bit_u32(rnd, i), naive_nth_bit_set(rnd, i)); + } + } + } +} diff --git a/src/common/arrow/src/arrow/bitmap/iterator.rs b/src/common/arrow/src/arrow/bitmap/iterator.rs index 8114927c6d81..439ddd1b3198 100644 --- a/src/common/arrow/src/arrow/bitmap/iterator.rs +++ b/src/common/arrow/src/arrow/bitmap/iterator.rs @@ -14,8 +14,78 @@ // limitations under the License. use super::Bitmap; +use crate::arrow::bitmap::bitmask::BitMask; use crate::arrow::trusted_len::TrustedLen; +pub struct TrueIdxIter<'a> { + mask: BitMask<'a>, + first_unknown: usize, + i: usize, + len: usize, + remaining: usize, +} + +impl<'a> TrueIdxIter<'a> { + #[inline] + pub fn new(len: usize, validity: Option<&'a Bitmap>) -> Self { + if let Some(bitmap) = validity { + assert!(len == bitmap.len()); + Self { + mask: BitMask::from_bitmap(bitmap), + first_unknown: 0, + i: 0, + remaining: bitmap.len() - bitmap.unset_bits(), + len, + } + } else { + Self { + mask: BitMask::default(), + first_unknown: len, + i: 0, + remaining: len, + len, + } + } + } +} + +impl<'a> Iterator for TrueIdxIter<'a> { + type Item = usize; + + #[inline] + fn next(&mut self) -> Option { + // Fast path for many non-nulls in a row. + if self.i < self.first_unknown { + let ret = self.i; + self.i += 1; + self.remaining -= 1; + return Some(ret); + } + + while self.i < self.len { + let mask = self.mask.get_u32(self.i); + let num_null = mask.trailing_zeros(); + self.i += num_null as usize; + if num_null < 32 { + self.first_unknown = self.i + (mask >> num_null).trailing_ones() as usize; + let ret = self.i; + self.i += 1; + self.remaining -= 1; + return Some(ret); + } + } + + None + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + (self.remaining, Some(self.remaining)) + } +} + +unsafe impl<'a> TrustedLen for TrueIdxIter<'a> {} + /// This crates' equivalent of [`std::vec::IntoIter`] for [`Bitmap`]. #[derive(Debug, Clone)] pub struct IntoIter { diff --git a/src/common/arrow/src/arrow/bitmap/mod.rs b/src/common/arrow/src/arrow/bitmap/mod.rs index 164fd347878d..367f48d58590 100644 --- a/src/common/arrow/src/arrow/bitmap/mod.rs +++ b/src/common/arrow/src/arrow/bitmap/mod.rs @@ -19,6 +19,7 @@ pub use immutable::*; mod iterator; pub use iterator::IntoIter; +pub use iterator::TrueIdxIter; mod mutable; pub use mutable::MutableBitmap; @@ -29,4 +30,5 @@ pub use bitmap_ops::*; mod assign_ops; pub use assign_ops::*; +mod bitmask; pub mod utils; diff --git a/src/common/arrow/src/arrow/bitmap/mutable.rs b/src/common/arrow/src/arrow/bitmap/mutable.rs index 8e1258ba3066..3cc56ceed914 100644 --- a/src/common/arrow/src/arrow/bitmap/mutable.rs +++ b/src/common/arrow/src/arrow/bitmap/mutable.rs @@ -347,6 +347,10 @@ impl MutableBitmap { pub(crate) fn bitchunks_exact_mut(&mut self) -> BitChunksExactMut { BitChunksExactMut::new(&mut self.buffer, self.length) } + + pub fn freeze(self) -> Bitmap { + self.into() + } } impl From for Bitmap { diff --git a/src/common/arrow/src/arrow/buffer/immutable.rs b/src/common/arrow/src/arrow/buffer/immutable.rs index a0a33e5de25f..a1ebfecc6e08 100644 --- a/src/common/arrow/src/arrow/buffer/immutable.rs +++ b/src/common/arrow/src/arrow/buffer/immutable.rs @@ -19,9 +19,11 @@ use std::sync::Arc; use std::usize; use either::Either; +use num_traits::Zero; use super::Bytes; use super::IntoIter; +use crate::arrow::array::ArrayAccessor; /// [`Buffer`] is a contiguous memory region that can be shared across /// thread boundaries. @@ -193,7 +195,7 @@ impl Buffer { /// Returns a pointer to the start of this buffer. #[inline] - pub(crate) fn as_ptr(&self) -> *const T { + pub(crate) fn data_ptr(&self) -> *const T { self.data.deref().as_ptr() } @@ -288,6 +290,21 @@ impl Buffer { } } +impl Buffer { + pub fn make_mut(self) -> Vec { + match self.into_mut() { + Either::Right(v) => v, + Either::Left(same) => same.as_slice().to_vec(), + } + } +} + +impl Buffer { + pub fn zeroed(len: usize) -> Self { + vec![T::zero(); len].into() + } +} + impl From> for Buffer { #[inline] fn from(p: Vec) -> Self { @@ -342,3 +359,16 @@ impl From> for arrow_buffer::Buffe ) } } + +unsafe impl<'a, T: 'a> ArrayAccessor<'a> for Buffer { + type Item = &'a T; + + unsafe fn value_unchecked(&'a self, index: usize) -> Self::Item { + debug_assert!(index < self.length); + unsafe { self.get_unchecked(self.offset + index) } + } + + fn len(&self) -> usize { + Buffer::len(self) + } +} diff --git a/src/common/arrow/src/arrow/compute/aggregate/memory.rs b/src/common/arrow/src/arrow/compute/aggregate/memory.rs index 1aac461a772a..b8a78f31b021 100644 --- a/src/common/arrow/src/arrow/compute/aggregate/memory.rs +++ b/src/common/arrow/src/arrow/compute/aggregate/memory.rs @@ -37,6 +37,12 @@ macro_rules! dyn_binary { }}; } +fn binview_size(array: &BinaryViewArrayGeneric) -> usize { + array.views().len() * std::mem::size_of::() + + array.data_buffers().iter().map(|b| b.len()).sum::() + + validity_size(array.validity()) +} + /// Returns the total (heap) allocated size of the array in bytes. /// # Implementation /// This estimation is the sum of the size of its buffers, validity, including nested arrays. @@ -129,5 +135,7 @@ pub fn estimated_bytes_size(array: &dyn Array) -> usize { let offsets = array.offsets().len_proxy() * std::mem::size_of::(); offsets + estimated_bytes_size(array.field().as_ref()) + validity_size(array.validity()) } + Utf8View => binview_size::(array.as_any().downcast_ref().unwrap()), + BinaryView => binview_size::<[u8]>(array.as_any().downcast_ref().unwrap()), } } diff --git a/src/common/arrow/src/arrow/compute/take/binview.rs b/src/common/arrow/src/arrow/compute/take/binview.rs new file mode 100644 index 000000000000..f3b4177ab86a --- /dev/null +++ b/src/common/arrow/src/arrow/compute/take/binview.rs @@ -0,0 +1,39 @@ +// Copyright (c) 2020 Ritchie Vink +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use crate::arrow::array::Array; +use crate::arrow::array::BinaryViewArray; +use crate::arrow::array::PrimitiveArray; +use crate::arrow::compute::take::primitive::take_values_and_validity_unchecked; +use crate::arrow::types::Index; + +/// # Safety +/// No bound checks +pub(super) unsafe fn take_binview_unchecked( + arr: &BinaryViewArray, + indices: &PrimitiveArray, +) -> BinaryViewArray { + let (views, validity) = + take_values_and_validity_unchecked(arr.views(), arr.validity(), indices); + + BinaryViewArray::new_unchecked_unknown_md( + arr.data_type().clone(), + views.into(), + arr.data_buffers().clone(), + validity, + Some(arr.total_buffer_len()), + ) + .maybe_gc() +} diff --git a/src/common/arrow/src/arrow/compute/take/mod.rs b/src/common/arrow/src/arrow/compute/take/mod.rs index 422fd2d3a20e..7c6df0c8c553 100644 --- a/src/common/arrow/src/arrow/compute/take/mod.rs +++ b/src/common/arrow/src/arrow/compute/take/mod.rs @@ -19,11 +19,14 @@ use crate::arrow::array::new_empty_array; use crate::arrow::array::Array; use crate::arrow::array::NullArray; use crate::arrow::array::PrimitiveArray; +use crate::arrow::array::Utf8ViewArray; +use crate::arrow::compute::take::binview::take_binview_unchecked; use crate::arrow::datatypes::DataType; use crate::arrow::error::Result; use crate::arrow::types::Index; mod binary; +mod binview; mod boolean; mod dict; mod fixed_size_list; @@ -92,6 +95,15 @@ pub fn take(values: &dyn Array, indices: &PrimitiveArray) -> Result let array = values.as_any().downcast_ref().unwrap(); Ok(Box::new(fixed_size_list::take::(array, indices))) } + BinaryView => unsafe { + Ok(take_binview_unchecked(values.as_any().downcast_ref().unwrap(), indices).boxed()) + }, + Utf8View => unsafe { + let arr: &Utf8ViewArray = values.as_any().downcast_ref().unwrap(); + Ok(take_binview_unchecked(&arr.to_binview(), indices) + .to_utf8view_unchecked() + .boxed()) + }, t => unimplemented!("Take not supported for data type {:?}", t), } } diff --git a/src/common/arrow/src/arrow/compute/take/primitive.rs b/src/common/arrow/src/arrow/compute/take/primitive.rs index 871f65d4f7e9..2b37ca8a4194 100644 --- a/src/common/arrow/src/arrow/compute/take/primitive.rs +++ b/src/common/arrow/src/arrow/compute/take/primitive.rs @@ -13,6 +13,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +use arrow_buffer::bit_util::unset_bit_raw; + use super::Index; use crate::arrow::array::Array; use crate::arrow::array::PrimitiveArray; @@ -111,6 +113,65 @@ fn take_values_indices_validity( (values.into(), bitmap.into()) } +pub(super) unsafe fn take_values_and_validity_unchecked( + values: &[T], + validity_values: Option<&Bitmap>, + indices: &PrimitiveArray, +) -> (Vec, Option) { + let index_values = indices.values().as_slice(); + + let null_count = validity_values.map(|b| b.unset_bits()).unwrap_or(0); + + // first take the values, these are always needed + let values: Vec = if indices.null_count() == 0 { + index_values + .iter() + .map(|idx| *values.get_unchecked(idx.to_usize())) + .collect() + } else { + indices + .iter() + .map(|idx| match idx { + Some(idx) => *values.get_unchecked(idx.to_usize()), + None => T::default(), + }) + .collect() + }; + + if null_count > 0 { + let validity_values = validity_values.unwrap(); + // the validity buffer we will fill with all valid. And we unset the ones that are null + // in later checks + // this is in the assumption that most values will be valid. + // Maybe we could add another branch based on the null count + let mut validity = MutableBitmap::with_capacity(indices.len()); + validity.extend_constant(indices.len(), true); + let validity_ptr = validity.as_slice().as_ptr() as *mut u8; + + if let Some(validity_indices) = indices.validity().as_ref() { + index_values.iter().enumerate().for_each(|(i, idx)| { + // i is iteration count + // idx is the index that we take from the values array. + let idx = idx.to_usize(); + if !validity_indices.get_bit_unchecked(i) || !validity_values.get_bit_unchecked(idx) + { + unset_bit_raw(validity_ptr, i); + } + }); + } else { + index_values.iter().enumerate().for_each(|(i, idx)| { + let idx = idx.to_usize(); + if !validity_values.get_bit_unchecked(idx) { + unset_bit_raw(validity_ptr, i); + } + }); + }; + (values, Some(validity.freeze())) + } else { + (values, indices.validity().cloned()) + } +} + /// `take` implementation for primitive arrays pub fn take( values: &PrimitiveArray, diff --git a/src/common/arrow/src/arrow/datatypes/mod.rs b/src/common/arrow/src/arrow/datatypes/mod.rs index f1661d834200..b63c0c265d88 100644 --- a/src/common/arrow/src/arrow/datatypes/mod.rs +++ b/src/common/arrow/src/arrow/datatypes/mod.rs @@ -174,6 +174,10 @@ pub enum DataType { Decimal256(usize, usize), /// Extension type. Extension(String, Box, Option), + /// A binary type that inlines small values and can intern bytes. + BinaryView, + /// A string type that inlines small values and can intern strings. + Utf8View, } #[cfg(feature = "arrow")] @@ -232,6 +236,9 @@ impl From for arrow_schema::DataType { DataType::Decimal(precision, scale) => Self::Decimal128(precision as _, scale as _), DataType::Decimal256(precision, scale) => Self::Decimal256(precision as _, scale as _), DataType::Extension(_, d, _) => (*d).into(), + DataType::BinaryView | DataType::Utf8View => { + panic!("view datatypes are not supported by arrow-rs") + } } } } @@ -453,6 +460,8 @@ impl DataType { LargeBinary => PhysicalType::LargeBinary, Utf8 => PhysicalType::Utf8, LargeUtf8 => PhysicalType::LargeUtf8, + BinaryView => PhysicalType::BinaryView, + Utf8View => PhysicalType::Utf8View, List(_) => PhysicalType::List, FixedSizeList(_, _) => PhysicalType::FixedSizeList, LargeList(_) => PhysicalType::LargeList, @@ -509,6 +518,7 @@ impl From for DataType { PrimitiveType::Float64 => DataType::Float64, PrimitiveType::DaysMs => DataType::Interval(IntervalUnit::DayTime), PrimitiveType::MonthDayNano => DataType::Interval(IntervalUnit::MonthDayNano), + PrimitiveType::UInt128 => unimplemented!(), } } } diff --git a/src/common/arrow/src/arrow/datatypes/physical_type.rs b/src/common/arrow/src/arrow/datatypes/physical_type.rs index d74dfca227a2..82f5eb7ee638 100644 --- a/src/common/arrow/src/arrow/datatypes/physical_type.rs +++ b/src/common/arrow/src/arrow/datatypes/physical_type.rs @@ -56,6 +56,10 @@ pub enum PhysicalType { Map, /// A dictionary encoded array by `IntegerType`. Dictionary(IntegerType), + /// A binary type that inlines small values and can intern bytes. + BinaryView, + /// A string type that inlines small values and can intern strings. + Utf8View, } impl PhysicalType { diff --git a/src/common/arrow/src/arrow/ffi/array.rs b/src/common/arrow/src/arrow/ffi/array.rs index 4c3aac1fc047..15e3a8fbef22 100644 --- a/src/common/arrow/src/arrow/ffi/array.rs +++ b/src/common/arrow/src/arrow/ffi/array.rs @@ -61,6 +61,8 @@ pub unsafe fn try_from(array: A) -> Result> { } Union => Box::new(UnionArray::try_from_ffi(array)?), Map => Box::new(MapArray::try_from_ffi(array)?), + BinaryView => Box::new(BinaryViewArray::try_from_ffi(array)?), + Utf8View => Box::new(Utf8ViewArray::try_from_ffi(array)?), }) } @@ -236,6 +238,21 @@ unsafe fn get_buffer_ptr( Ok(ptr as *mut T) } +unsafe fn create_buffer_known_len( + array: &ArrowArray, + data_type: &DataType, + owner: InternalArrowArray, + len: usize, + index: usize, +) -> Result> { + if len == 0 { + return Ok(Buffer::new()); + } + let ptr: *mut T = get_buffer_ptr(array, data_type, index)?; + let bytes = Bytes::from_foreign(ptr, len, BytesAllocator::InternalArrowArray(owner)); + Ok(Buffer::from_bytes(bytes)) +} + /// returns the buffer `i` of `array` interpreted as a [`Buffer`]. /// # Safety /// This function is safe iff: @@ -470,6 +487,17 @@ pub trait ArrowArrayRef: std::fmt::Debug { create_buffer::(self.array(), self.data_type(), self.owner(), index) } + /// # Safety + /// The caller must guarantee that the buffer `index` corresponds to a buffer. + /// This function assumes that the buffer created from FFI is valid; this is impossible to prove. + unsafe fn buffer_known_len( + &self, + index: usize, + len: usize, + ) -> Result> { + create_buffer_known_len::(self.array(), self.data_type(), self.owner(), len, index) + } + /// # Safety /// This function is safe iff: /// * the buffer at position `index` is valid for the declared length diff --git a/src/common/arrow/src/arrow/ffi/bridge.rs b/src/common/arrow/src/arrow/ffi/bridge.rs index 8c5571a16e2b..85c9aa2e1cf5 100644 --- a/src/common/arrow/src/arrow/ffi/bridge.rs +++ b/src/common/arrow/src/arrow/ffi/bridge.rs @@ -52,5 +52,7 @@ pub fn align_to_c_data_interface(array: Box) -> Box { ffi_dyn!(array, DictionaryArray<$T>) }) } + BinaryView => ffi_dyn!(array, BinaryViewArray), + Utf8View => ffi_dyn!(array, Utf8ViewArray), } } diff --git a/src/common/arrow/src/arrow/ffi/schema.rs b/src/common/arrow/src/arrow/ffi/schema.rs index 31a710473b11..b97923c2e414 100644 --- a/src/common/arrow/src/arrow/ffi/schema.rs +++ b/src/common/arrow/src/arrow/ffi/schema.rs @@ -283,6 +283,8 @@ unsafe fn to_data_type(schema: &ArrowSchema) -> Result { "tDn" => DataType::Duration(TimeUnit::Nanosecond), "tiM" => DataType::Interval(IntervalUnit::YearMonth), "tiD" => DataType::Interval(IntervalUnit::DayTime), + "vu" => DataType::Utf8View, + "vz" => DataType::BinaryView, "+l" => { let child = schema.child(0); DataType::List(Box::new(to_field(child)?)) @@ -461,6 +463,8 @@ fn to_format(data_type: &DataType) -> String { tz.as_ref().map(|x| x.as_ref()).unwrap_or("") ) } + DataType::Utf8View => "vu".to_string(), + DataType::BinaryView => "vz".to_string(), DataType::Decimal(precision, scale) => format!("d:{precision},{scale}"), DataType::Decimal256(precision, scale) => format!("d:{precision},{scale},256"), DataType::List(_) => "+l".to_string(), diff --git a/src/common/arrow/src/arrow/io/ipc/read/array/binview.rs b/src/common/arrow/src/arrow/io/ipc/read/array/binview.rs new file mode 100644 index 000000000000..c642dc88ec99 --- /dev/null +++ b/src/common/arrow/src/arrow/io/ipc/read/array/binview.rs @@ -0,0 +1,102 @@ +// Copyright (c) 2020 Ritchie Vink +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::collections::VecDeque; +use std::io::Read; +use std::io::Seek; +use std::sync::Arc; + +use crate::arrow::array::BinaryViewArrayGeneric; +use crate::arrow::array::View; +use crate::arrow::array::ViewType; +use crate::arrow::buffer::Buffer; +use crate::arrow::datatypes::DataType; +use crate::arrow::error::Error; +use crate::arrow::error::Result; +use crate::arrow::io::ipc::read::read_basic::read_buffer; +use crate::arrow::io::ipc::read::read_basic::read_bytes; +use crate::arrow::io::ipc::read::read_basic::read_validity; +use crate::arrow::io::ipc::read::Compression; +use crate::arrow::io::ipc::read::IpcBuffer; +use crate::arrow::io::ipc::read::Node; +use crate::arrow::io::ipc::read::OutOfSpecKind; +use crate::ArrayRef; + +#[allow(clippy::too_many_arguments)] +pub fn read_binview( + field_nodes: &mut VecDeque, + variadic_buffer_counts: &mut VecDeque, + data_type: DataType, + buffers: &mut VecDeque, + reader: &mut R, + block_offset: u64, + is_little_endian: bool, + compression: Option, + limit: Option, + scratch: &mut Vec, +) -> Result { + let field_node = field_nodes.pop_front().ok_or_else(|| { + Error::oos(format!( + "IPC: unable to fetch the field for {data_type:?}. The file or stream is corrupted." + )) + })?; + + let validity = read_validity( + buffers, + field_node, + reader, + block_offset, + is_little_endian, + compression, + limit, + scratch, + )?; + + let length: usize = field_node + .length() + .try_into() + .map_err(|_| Error::from(OutOfSpecKind::NegativeFooterLength))?; + let length = limit.map(|limit| limit.min(length)).unwrap_or(length); + + let views: Buffer = read_buffer( + buffers, + length, + reader, + block_offset, + is_little_endian, + compression, + scratch, + )?; + + let n_variadic = variadic_buffer_counts.pop_front().ok_or_else(|| { + Error::oos("IPC: unable to fetch the variadic buffers\n\nThe file or stream is corrupted.") + })?; + + let variadic_buffers = (0..n_variadic) + .map(|_| { + read_bytes( + buffers, + reader, + block_offset, + is_little_endian, + compression, + scratch, + ) + }) + .collect::>>>()?; + + BinaryViewArrayGeneric::::try_new(data_type, views, Arc::from(variadic_buffers), validity) + .map(|arr| arr.boxed()) +} diff --git a/src/common/arrow/src/arrow/io/ipc/read/array/fixed_size_list.rs b/src/common/arrow/src/arrow/io/ipc/read/array/fixed_size_list.rs index ee7c48b5d4d3..3650f6a811c3 100644 --- a/src/common/arrow/src/arrow/io/ipc/read/array/fixed_size_list.rs +++ b/src/common/arrow/src/arrow/io/ipc/read/array/fixed_size_list.rs @@ -34,6 +34,7 @@ use crate::arrow::error::Result; #[allow(clippy::too_many_arguments)] pub fn read_fixed_size_list( field_nodes: &mut VecDeque, + variadic_buffer_counts: &mut VecDeque, data_type: DataType, ipc_field: &IpcField, buffers: &mut VecDeque, @@ -69,6 +70,7 @@ pub fn read_fixed_size_list( let values = read( field_nodes, + variadic_buffer_counts, field, &ipc_field.fields[0], buffers, diff --git a/src/common/arrow/src/arrow/io/ipc/read/array/list.rs b/src/common/arrow/src/arrow/io/ipc/read/array/list.rs index 089e8323df0b..d3a931cf7df2 100644 --- a/src/common/arrow/src/arrow/io/ipc/read/array/list.rs +++ b/src/common/arrow/src/arrow/io/ipc/read/array/list.rs @@ -38,6 +38,7 @@ use crate::arrow::offset::Offset; #[allow(clippy::too_many_arguments)] pub fn read_list( field_nodes: &mut VecDeque, + variadic_buffer_counts: &mut VecDeque, data_type: DataType, ipc_field: &IpcField, buffers: &mut VecDeque, @@ -94,6 +95,7 @@ where let values = read( field_nodes, + variadic_buffer_counts, field, &ipc_field.fields[0], buffers, diff --git a/src/common/arrow/src/arrow/io/ipc/read/array/map.rs b/src/common/arrow/src/arrow/io/ipc/read/array/map.rs index 2335d105cbfe..9c9576455a87 100644 --- a/src/common/arrow/src/arrow/io/ipc/read/array/map.rs +++ b/src/common/arrow/src/arrow/io/ipc/read/array/map.rs @@ -36,6 +36,7 @@ use crate::arrow::error::Result; #[allow(clippy::too_many_arguments)] pub fn read_map( field_nodes: &mut VecDeque, + variadic_buffer_counts: &mut VecDeque, data_type: DataType, ipc_field: &IpcField, buffers: &mut VecDeque, @@ -89,6 +90,7 @@ pub fn read_map( let field = read( field_nodes, + variadic_buffer_counts, field, &ipc_field.fields[0], buffers, diff --git a/src/common/arrow/src/arrow/io/ipc/read/array/mod.rs b/src/common/arrow/src/arrow/io/ipc/read/array/mod.rs index 4d0252e3cffc..fbef1718964b 100644 --- a/src/common/arrow/src/arrow/io/ipc/read/array/mod.rs +++ b/src/common/arrow/src/arrow/io/ipc/read/array/mod.rs @@ -35,5 +35,8 @@ mod dictionary; pub use dictionary::*; mod union; pub use union::*; +mod binview; +pub use binview::*; mod map; + pub use map::*; diff --git a/src/common/arrow/src/arrow/io/ipc/read/array/struct_.rs b/src/common/arrow/src/arrow/io/ipc/read/array/struct_.rs index 55381f0ae5a0..d4e7fcf702a2 100644 --- a/src/common/arrow/src/arrow/io/ipc/read/array/struct_.rs +++ b/src/common/arrow/src/arrow/io/ipc/read/array/struct_.rs @@ -34,6 +34,7 @@ use crate::arrow::error::Result; #[allow(clippy::too_many_arguments)] pub fn read_struct( field_nodes: &mut VecDeque, + variadic_buffer_counts: &mut VecDeque, data_type: DataType, ipc_field: &IpcField, buffers: &mut VecDeque, @@ -71,6 +72,7 @@ pub fn read_struct( .map(|(field, ipc_field)| { read( field_nodes, + variadic_buffer_counts, field, ipc_field, buffers, diff --git a/src/common/arrow/src/arrow/io/ipc/read/array/union.rs b/src/common/arrow/src/arrow/io/ipc/read/array/union.rs index 271b4b2a9622..f4e0ea8ea173 100644 --- a/src/common/arrow/src/arrow/io/ipc/read/array/union.rs +++ b/src/common/arrow/src/arrow/io/ipc/read/array/union.rs @@ -36,6 +36,7 @@ use crate::arrow::error::Result; #[allow(clippy::too_many_arguments)] pub fn read_union( field_nodes: &mut VecDeque, + variadic_buffer_counts: &mut VecDeque, data_type: DataType, ipc_field: &IpcField, buffers: &mut VecDeque, @@ -102,6 +103,7 @@ pub fn read_union( .map(|(field, ipc_field)| { read( field_nodes, + variadic_buffer_counts, field, ipc_field, buffers, diff --git a/src/common/arrow/src/arrow/io/ipc/read/common.rs b/src/common/arrow/src/arrow/io/ipc/read/common.rs index 46affe903401..466568558118 100644 --- a/src/common/arrow/src/arrow/io/ipc/read/common.rs +++ b/src/common/arrow/src/arrow/io/ipc/read/common.rs @@ -113,6 +113,11 @@ pub fn read_record_batch( .buffers() .map_err(|err| Error::from(OutOfSpecKind::InvalidFlatbufferBuffers(err)))? .ok_or_else(|| Error::from(OutOfSpecKind::MissingMessageBuffers))?; + let mut variadic_buffer_counts = batch + .variadic_buffer_counts() + .map_err(|err| Error::from(OutOfSpecKind::InvalidFlatbufferRecordBatches(err)))? + .map(|v| v.iter().map(|v| v as usize).collect::>()) + .unwrap_or_else(VecDeque::new); let mut buffers: VecDeque = buffers.iter().collect(); // check that the sum of the sizes of all buffers is <= than the size of the file @@ -147,6 +152,7 @@ pub fn read_record_batch( .map(|maybe_field| match maybe_field { ProjectionResult::Selected((field, ipc_field)) => Ok(Some(read( &mut field_nodes, + &mut variadic_buffer_counts, field, ipc_field, &mut buffers, @@ -175,6 +181,7 @@ pub fn read_record_batch( .map(|(field, ipc_field)| { read( &mut field_nodes, + &mut variadic_buffer_counts, field, ipc_field, &mut buffers, diff --git a/src/common/arrow/src/arrow/io/ipc/read/deserialize.rs b/src/common/arrow/src/arrow/io/ipc/read/deserialize.rs index c5c8338daa0c..18038c0b9d75 100644 --- a/src/common/arrow/src/arrow/io/ipc/read/deserialize.rs +++ b/src/common/arrow/src/arrow/io/ipc/read/deserialize.rs @@ -34,6 +34,7 @@ use crate::arrow::io::ipc::IpcField; #[allow(clippy::too_many_arguments)] pub fn read( field_nodes: &mut VecDeque, + variadic_buffer_counts: &mut VecDeque, field: &Field, ipc_field: &IpcField, buffers: &mut VecDeque, @@ -139,6 +140,7 @@ pub fn read( .map(|x| x.boxed()), List => read_list::( field_nodes, + variadic_buffer_counts, data_type, ipc_field, buffers, @@ -154,6 +156,7 @@ pub fn read( .map(|x| x.boxed()), LargeList => read_list::( field_nodes, + variadic_buffer_counts, data_type, ipc_field, buffers, @@ -169,6 +172,7 @@ pub fn read( .map(|x| x.boxed()), FixedSizeList => read_fixed_size_list( field_nodes, + variadic_buffer_counts, data_type, ipc_field, buffers, @@ -184,6 +188,7 @@ pub fn read( .map(|x| x.boxed()), Struct => read_struct( field_nodes, + variadic_buffer_counts, data_type, ipc_field, buffers, @@ -217,6 +222,7 @@ pub fn read( } Union => read_union( field_nodes, + variadic_buffer_counts, data_type, ipc_field, buffers, @@ -232,6 +238,7 @@ pub fn read( .map(|x| x.boxed()), Map => read_map( field_nodes, + variadic_buffer_counts, data_type, ipc_field, buffers, @@ -245,6 +252,30 @@ pub fn read( scratch, ) .map(|x| x.boxed()), + Utf8View => read_binview::( + field_nodes, + variadic_buffer_counts, + data_type, + buffers, + reader, + block_offset, + is_little_endian, + compression, + limit, + scratch, + ), + BinaryView => read_binview::<[u8], _>( + field_nodes, + variadic_buffer_counts, + data_type, + buffers, + reader, + block_offset, + is_little_endian, + compression, + limit, + scratch, + ), } } @@ -268,5 +299,6 @@ pub fn skip( Dictionary(_) => skip_dictionary(field_nodes, buffers), Union => skip_union(field_nodes, data_type, buffers), Map => skip_map(field_nodes, data_type, buffers), + BinaryView | Utf8View => todo!(), } } diff --git a/src/common/arrow/src/arrow/io/ipc/read/read_basic.rs b/src/common/arrow/src/arrow/io/ipc/read/read_basic.rs index e3d82afa52ed..1cf059968747 100644 --- a/src/common/arrow/src/arrow/io/ipc/read/read_basic.rs +++ b/src/common/arrow/src/arrow/io/ipc/read/read_basic.rs @@ -65,6 +65,23 @@ fn read_swapped( Ok(()) } +fn read_uncompressed_bytes( + reader: &mut R, + buffer_length: usize, + is_little_endian: bool, +) -> Result> { + if is_native_little_endian() == is_little_endian { + let mut buffer = Vec::with_capacity(buffer_length); + let _ = reader + .take(buffer_length as u64) + .read_to_end(&mut buffer) + .unwrap(); + Ok(buffer) + } else { + unreachable!() + } +} + fn read_uncompressed_buffer( reader: &mut R, buffer_length: usize, @@ -150,6 +167,65 @@ fn read_compressed_buffer( Ok(buffer) } +fn read_compressed_bytes( + reader: &mut R, + buffer_length: usize, + is_little_endian: bool, + compression: Compression, + scratch: &mut Vec, +) -> Result> { + read_compressed_buffer::( + reader, + buffer_length, + buffer_length, + is_little_endian, + compression, + scratch, + ) +} + +pub fn read_bytes( + buf: &mut VecDeque, + reader: &mut R, + block_offset: u64, + is_little_endian: bool, + compression: Option, + scratch: &mut Vec, +) -> Result> { + let buf = buf + .pop_front() + .ok_or_else(|| Error::oos(format!("out-of-spec: {:?}", OutOfSpecKind::ExpectedBuffer)))?; + + let offset: u64 = buf.offset().try_into().map_err(|_| { + Error::oos(format!( + "out-of-spec: {:?}", + OutOfSpecKind::NegativeFooterLength + )) + })?; + + let buffer_length: usize = buf.length().try_into().map_err(|_| { + Error::oos(format!( + "out-of-spec: {:?}", + OutOfSpecKind::NegativeFooterLength + )) + })?; + + reader.seek(SeekFrom::Start(block_offset + offset))?; + + if let Some(compression) = compression { + Ok(read_compressed_bytes( + reader, + buffer_length, + is_little_endian, + compression, + scratch, + )? + .into()) + } else { + Ok(read_uncompressed_bytes(reader, buffer_length, is_little_endian)?.into()) + } +} + pub fn read_buffer( buf: &mut VecDeque, length: usize, // in slots diff --git a/src/common/arrow/src/arrow/io/ipc/read/schema.rs b/src/common/arrow/src/arrow/io/ipc/read/schema.rs index d5109a33e2b1..690cca728ba4 100644 --- a/src/common/arrow/src/arrow/io/ipc/read/schema.rs +++ b/src/common/arrow/src/arrow/io/ipc/read/schema.rs @@ -292,6 +292,8 @@ fn get_data_type( LargeBinary(_) => (DataType::LargeBinary, IpcField::default()), Utf8(_) => (DataType::Utf8, IpcField::default()), LargeUtf8(_) => (DataType::LargeUtf8, IpcField::default()), + BinaryView(_) => (DataType::BinaryView, IpcField::default()), + Utf8View(_) => (DataType::Utf8View, IpcField::default()), FixedSizeBinary(fixed) => ( DataType::FixedSizeBinary( fixed @@ -364,7 +366,7 @@ fn get_data_type( Struct(_) => deserialize_struct(field)?, Union(union_) => deserialize_union(union_, field)?, Map(map) => deserialize_map(map, field)?, - _ => unimplemented!(), + RunEndEncoded(_) | LargeListView(_) | ListView(_) => unimplemented!(), }) } diff --git a/src/common/arrow/src/arrow/io/ipc/write/common.rs b/src/common/arrow/src/arrow/io/ipc/write/common.rs index fbd379930b1d..fe55287c6c30 100644 --- a/src/common/arrow/src/arrow/io/ipc/write/common.rs +++ b/src/common/arrow/src/arrow/io/ipc/write/common.rs @@ -56,7 +56,7 @@ fn encode_dictionary( use PhysicalType::*; match array.data_type().to_physical_type() { Utf8 | LargeUtf8 | Binary | LargeBinary | Primitive(_) | Boolean | Null - | FixedSizeBinary => Ok(()), + | FixedSizeBinary | BinaryView | Utf8View => Ok(()), Dictionary(key_type) => match_integer_type!(key_type, |$T| { let dict_id = field.dictionary_id .ok_or_else(|| Error::InvalidArgumentError("Dictionaries must have an associated id".to_string()))?; @@ -247,28 +247,36 @@ fn serialize_compression( } } -fn set_variadic_buffer_counts(_counts: &mut Vec, array: &dyn Array) { +fn set_variadic_buffer_counts(counts: &mut Vec, array: &dyn Array) { match array.data_type() { + DataType::Utf8View => { + let array = array.as_any().downcast_ref::().unwrap(); + counts.push(array.data_buffers().len() as i64); + } + DataType::BinaryView => { + let array = array.as_any().downcast_ref::().unwrap(); + counts.push(array.data_buffers().len() as i64); + } DataType::Struct(_) => { let array = array.as_any().downcast_ref::().unwrap(); for array in array.values() { - set_variadic_buffer_counts(_counts, array.as_ref()) + set_variadic_buffer_counts(counts, array.as_ref()) } } DataType::LargeList(_) => { let array = array.as_any().downcast_ref::>().unwrap(); - set_variadic_buffer_counts(_counts, array.values().as_ref()) + set_variadic_buffer_counts(counts, array.values().as_ref()) } DataType::FixedSizeList(_, _) => { let array = array.as_any().downcast_ref::().unwrap(); - set_variadic_buffer_counts(_counts, array.values().as_ref()) + set_variadic_buffer_counts(counts, array.values().as_ref()) } DataType::Dictionary(_, _, _) => { let array = array .as_any() .downcast_ref::>() .unwrap(); - set_variadic_buffer_counts(_counts, array.values().as_ref()) + set_variadic_buffer_counts(counts, array.values().as_ref()) } _ => (), } @@ -288,7 +296,6 @@ fn chunk_to_bytes_amortized( let mut offset = 0; let mut variadic_buffer_counts = vec![]; - for array in chunk.arrays() { set_variadic_buffer_counts(&mut variadic_buffer_counts, array.as_ref()); write( diff --git a/src/common/arrow/src/arrow/io/ipc/write/schema.rs b/src/common/arrow/src/arrow/io/ipc/write/schema.rs index 32809791d662..4f2740843f6c 100644 --- a/src/common/arrow/src/arrow/io/ipc/write/schema.rs +++ b/src/common/arrow/src/arrow/io/ipc/write/schema.rs @@ -274,6 +274,8 @@ fn serialize_type(data_type: &DataType) -> arrow_format::ipc::Type { Struct(_) => ipc::Type::Struct(Box::new(ipc::Struct {})), Dictionary(_, v, _) => serialize_type(v), Extension(_, v, _) => serialize_type(v), + Utf8View => ipc::Type::Utf8View(Box::new(ipc::Utf8View {})), + BinaryView => ipc::Type::BinaryView(Box::new(ipc::BinaryView {})), } } @@ -305,6 +307,8 @@ fn serialize_children(data_type: &DataType, ipc_field: &IpcField) -> Vec vec![], FixedSizeList(inner, _) | LargeList(inner) | List(inner) | Map(inner, _) => { diff --git a/src/common/arrow/src/arrow/io/ipc/write/serialize.rs b/src/common/arrow/src/arrow/io/ipc/write/serialize.rs index f449385ce7d2..21879ff2c8cd 100644 --- a/src/common/arrow/src/arrow/io/ipc/write/serialize.rs +++ b/src/common/arrow/src/arrow/io/ipc/write/serialize.rs @@ -441,6 +441,43 @@ pub(super) fn write_dictionary( } } +#[allow(clippy::too_many_arguments)] +pub(super) fn write_binview( + array: &BinaryViewArrayGeneric, + buffers: &mut Vec, + arrow_data: &mut Vec, + offset: &mut i64, + is_little_endian: bool, + compression: Option, +) { + let array = if array.is_sliced() { + array.clone().maybe_gc() + } else { + array.clone() + }; + write_bitmap( + array.validity(), + Array::len(&array), + buffers, + arrow_data, + offset, + compression, + ); + + write_buffer( + array.views(), + buffers, + arrow_data, + offset, + is_little_endian, + compression, + ); + + for data in array.data_buffers().as_ref() { + write_bytes(data, buffers, arrow_data, offset, compression); + } +} + /// Writes an [`Array`] to `arrow_data` pub fn write( array: &dyn Array, @@ -580,6 +617,22 @@ pub fn write( compression, ); } + Utf8View => write_binview( + array.as_any().downcast_ref::().unwrap(), + buffers, + arrow_data, + offset, + is_little_endian, + compression, + ), + BinaryView => write_binview( + array.as_any().downcast_ref::().unwrap(), + buffers, + arrow_data, + offset, + is_little_endian, + compression, + ), } } diff --git a/src/common/arrow/src/arrow/io/parquet/read/deserialize/binary/basic.rs b/src/common/arrow/src/arrow/io/parquet/read/deserialize/binary/basic.rs index 53cdade6cd62..57d1511c3b9d 100644 --- a/src/common/arrow/src/arrow/io/parquet/read/deserialize/binary/basic.rs +++ b/src/common/arrow/src/arrow/io/parquet/read/deserialize/binary/basic.rs @@ -46,7 +46,7 @@ use crate::arrow::error::Result; use crate::arrow::offset::Offset; #[derive(Debug)] -pub(super) struct Required<'a> { +pub(crate) struct Required<'a> { pub values: SizedBinaryIter<'a>, } @@ -64,7 +64,7 @@ impl<'a> Required<'a> { } #[derive(Debug)] -pub(super) struct Delta<'a> { +pub(crate) struct Delta<'a> { pub lengths: std::vec::IntoIter, pub values: &'a [u8], } @@ -110,7 +110,7 @@ impl<'a> Iterator for Delta<'a> { } #[derive(Debug)] -pub(super) struct FilteredRequired<'a> { +pub(crate) struct FilteredRequired<'a> { pub values: SliceFilteredIter>, } @@ -130,7 +130,7 @@ impl<'a> FilteredRequired<'a> { } #[derive(Debug)] -pub(super) struct FilteredDelta<'a> { +pub(crate) struct FilteredDelta<'a> { pub values: SliceFilteredIter>, } @@ -149,16 +149,16 @@ impl<'a> FilteredDelta<'a> { } } -pub(super) type Dict = Vec>; +pub(crate) type BinaryDict = Vec>; #[derive(Debug)] -pub(super) struct RequiredDictionary<'a> { +pub(crate) struct RequiredDictionary<'a> { pub values: hybrid_rle::HybridRleDecoder<'a>, - pub dict: &'a Dict, + pub dict: &'a BinaryDict, } impl<'a> RequiredDictionary<'a> { - pub fn try_new(page: &'a DataPage, dict: &'a Dict) -> Result { + pub fn try_new(page: &'a DataPage, dict: &'a BinaryDict) -> Result { let values = utils::dict_indices_decoder(page)?; Ok(Self { dict, values }) @@ -171,13 +171,13 @@ impl<'a> RequiredDictionary<'a> { } #[derive(Debug)] -pub(super) struct FilteredRequiredDictionary<'a> { +pub(crate) struct FilteredRequiredDictionary<'a> { pub values: SliceFilteredIter>, - pub dict: &'a Dict, + pub dict: &'a BinaryDict, } impl<'a> FilteredRequiredDictionary<'a> { - pub fn try_new(page: &'a DataPage, dict: &'a Dict) -> Result { + pub fn try_new(page: &'a DataPage, dict: &'a BinaryDict) -> Result { let values = utils::dict_indices_decoder(page)?; let rows = get_selected_rows(page); @@ -193,13 +193,13 @@ impl<'a> FilteredRequiredDictionary<'a> { } #[derive(Debug)] -pub(super) struct ValuesDictionary<'a> { +pub(crate) struct ValuesDictionary<'a> { pub values: hybrid_rle::HybridRleDecoder<'a>, - pub dict: &'a Dict, + pub dict: &'a BinaryDict, } impl<'a> ValuesDictionary<'a> { - pub fn try_new(page: &'a DataPage, dict: &'a Dict) -> Result { + pub fn try_new(page: &'a DataPage, dict: &'a BinaryDict) -> Result { let values = utils::dict_indices_decoder(page)?; Ok(Self { dict, values }) @@ -212,7 +212,7 @@ impl<'a> ValuesDictionary<'a> { } #[derive(Debug)] -enum State<'a> { +pub(crate) enum BinaryState<'a> { Optional(OptionalPageValidity<'a>, BinaryIter<'a>), Required(Required<'a>), RequiredDictionary(RequiredDictionary<'a>), @@ -227,21 +227,21 @@ enum State<'a> { FilteredOptionalDictionary(FilteredOptionalPageValidity<'a>, ValuesDictionary<'a>), } -impl<'a> utils::PageState<'a> for State<'a> { +impl<'a> utils::PageState<'a> for BinaryState<'a> { fn len(&self) -> usize { match self { - State::Optional(validity, _) => validity.len(), - State::Required(state) => state.len(), - State::Delta(state) => state.len(), - State::OptionalDelta(state, _) => state.len(), - State::RequiredDictionary(values) => values.len(), - State::OptionalDictionary(optional, _) => optional.len(), - State::FilteredRequired(state) => state.len(), - State::FilteredOptional(validity, _) => validity.len(), - State::FilteredDelta(state) => state.len(), - State::FilteredOptionalDelta(state, _) => state.len(), - State::FilteredRequiredDictionary(values) => values.len(), - State::FilteredOptionalDictionary(optional, _) => optional.len(), + BinaryState::Optional(validity, _) => validity.len(), + BinaryState::Required(state) => state.len(), + BinaryState::Delta(state) => state.len(), + BinaryState::OptionalDelta(state, _) => state.len(), + BinaryState::RequiredDictionary(values) => values.len(), + BinaryState::OptionalDictionary(optional, _) => optional.len(), + BinaryState::FilteredRequired(state) => state.len(), + BinaryState::FilteredOptional(validity, _) => validity.len(), + BinaryState::FilteredDelta(state) => state.len(), + BinaryState::FilteredOptionalDelta(state, _) => state.len(), + BinaryState::FilteredRequiredDictionary(values) => values.len(), + BinaryState::FilteredOptionalDictionary(optional, _) => optional.len(), } } } @@ -258,73 +258,12 @@ struct BinaryDecoder { } impl<'a, O: Offset> utils::Decoder<'a> for BinaryDecoder { - type State = State<'a>; - type Dict = Dict; + type State = BinaryState<'a>; + type Dict = BinaryDict; type DecodedState = (Binary, MutableBitmap); fn build_state(&self, page: &'a DataPage, dict: Option<&'a Self::Dict>) -> Result { - let is_optional = - page.descriptor.primitive_type.field_info.repetition == Repetition::Optional; - let is_filtered = page.selected_rows().is_some(); - - match (page.encoding(), dict, is_optional, is_filtered) { - (Encoding::PlainDictionary | Encoding::RleDictionary, Some(dict), false, false) => Ok( - State::RequiredDictionary(RequiredDictionary::try_new(page, dict)?), - ), - (Encoding::PlainDictionary | Encoding::RleDictionary, Some(dict), true, false) => { - Ok(State::OptionalDictionary( - OptionalPageValidity::try_new(page)?, - ValuesDictionary::try_new(page, dict)?, - )) - } - (Encoding::PlainDictionary | Encoding::RleDictionary, Some(dict), false, true) => { - FilteredRequiredDictionary::try_new(page, dict) - .map(State::FilteredRequiredDictionary) - } - (Encoding::PlainDictionary | Encoding::RleDictionary, Some(dict), true, true) => { - Ok(State::FilteredOptionalDictionary( - FilteredOptionalPageValidity::try_new(page)?, - ValuesDictionary::try_new(page, dict)?, - )) - } - (Encoding::Plain, _, true, false) => { - let (_, _, values) = split_buffer(page)?; - - let values = BinaryIter::new(values); - - Ok(State::Optional( - OptionalPageValidity::try_new(page)?, - values, - )) - } - (Encoding::Plain, _, false, false) => Ok(State::Required(Required::try_new(page)?)), - (Encoding::Plain, _, false, true) => { - Ok(State::FilteredRequired(FilteredRequired::new(page))) - } - (Encoding::Plain, _, true, true) => { - let (_, _, values) = split_buffer(page)?; - - Ok(State::FilteredOptional( - FilteredOptionalPageValidity::try_new(page)?, - BinaryIter::new(values), - )) - } - (Encoding::DeltaLengthByteArray, _, false, false) => { - Delta::try_new(page).map(State::Delta) - } - (Encoding::DeltaLengthByteArray, _, true, false) => Ok(State::OptionalDelta( - OptionalPageValidity::try_new(page)?, - Delta::try_new(page)?, - )), - (Encoding::DeltaLengthByteArray, _, false, true) => { - FilteredDelta::try_new(page).map(State::FilteredDelta) - } - (Encoding::DeltaLengthByteArray, _, true, true) => Ok(State::FilteredOptionalDelta( - FilteredOptionalPageValidity::try_new(page)?, - Delta::try_new(page)?, - )), - _ => Err(utils::not_implemented(page)), - } + build_binary_state(page, dict) } fn with_capacity(&self, capacity: usize) -> Self::DecodedState { @@ -342,22 +281,22 @@ impl<'a, O: Offset> utils::Decoder<'a> for BinaryDecoder { ) { let (values, validity) = decoded; match state { - State::Optional(page_validity, page_values) => extend_from_decoder( + BinaryState::Optional(page_validity, page_values) => extend_from_decoder( validity, page_validity, Some(additional), values, page_values, ), - State::Required(page) => { + BinaryState::Required(page) => { for x in page.values.by_ref().take(additional) { values.push(x) } } - State::Delta(page) => { + BinaryState::Delta(page) => { values.extend_lengths(page.lengths.by_ref().take(additional), &mut page.values); } - State::OptionalDelta(page_validity, page_values) => { + BinaryState::OptionalDelta(page_validity, page_values) => { let Binary { offsets, values: values_, @@ -378,17 +317,17 @@ impl<'a, O: Offset> utils::Decoder<'a> for BinaryDecoder { page_values.values = remaining; values_.extend_from_slice(consumed); } - State::FilteredRequired(page) => { + BinaryState::FilteredRequired(page) => { for x in page.values.by_ref().take(additional) { values.push(x) } } - State::FilteredDelta(page) => { + BinaryState::FilteredDelta(page) => { for x in page.values.by_ref().take(additional) { values.push(x) } } - State::OptionalDictionary(page_validity, page_values) => { + BinaryState::OptionalDictionary(page_validity, page_values) => { let page_dict = &page_values.dict; utils::extend_from_decoder( validity, @@ -401,7 +340,7 @@ impl<'a, O: Offset> utils::Decoder<'a> for BinaryDecoder { .map(|index| page_dict[index.unwrap() as usize].as_ref()), ) } - State::RequiredDictionary(page) => { + BinaryState::RequiredDictionary(page) => { let page_dict = &page.dict; for x in page @@ -413,7 +352,7 @@ impl<'a, O: Offset> utils::Decoder<'a> for BinaryDecoder { values.push(x) } } - State::FilteredOptional(page_validity, page_values) => { + BinaryState::FilteredOptional(page_validity, page_values) => { utils::extend_from_decoder( validity, page_validity, @@ -422,7 +361,7 @@ impl<'a, O: Offset> utils::Decoder<'a> for BinaryDecoder { page_values.by_ref(), ); } - State::FilteredOptionalDelta(page_validity, page_values) => { + BinaryState::FilteredOptionalDelta(page_validity, page_values) => { utils::extend_from_decoder( validity, page_validity, @@ -431,7 +370,7 @@ impl<'a, O: Offset> utils::Decoder<'a> for BinaryDecoder { page_values.by_ref(), ); } - State::FilteredRequiredDictionary(page) => { + BinaryState::FilteredRequiredDictionary(page) => { let page_dict = &page.dict; for x in page .values @@ -442,7 +381,7 @@ impl<'a, O: Offset> utils::Decoder<'a> for BinaryDecoder { values.push(x) } } - State::FilteredOptionalDictionary(page_validity, page_values) => { + BinaryState::FilteredOptionalDictionary(page_validity, page_values) => { let page_dict = &page_values.dict; utils::extend_from_decoder( validity, @@ -495,7 +434,7 @@ pub struct Iter { iter: I, data_type: DataType, items: VecDeque<(Binary, MutableBitmap)>, - dict: Option, + dict: Option, chunk_size: Option, remaining: usize, } @@ -536,8 +475,75 @@ impl Iterator for Iter { } } -pub(super) fn deserialize_plain(values: &[u8], num_values: usize) -> Dict { +pub(crate) fn deserialize_plain(values: &[u8], num_values: usize) -> BinaryDict { SizedBinaryIter::new(values, num_values) .map(|x| x.to_vec()) .collect() } + +pub(crate) fn build_binary_state<'a>( + page: &'a DataPage, + dict: Option<&'a BinaryDict>, +) -> Result> { + let is_optional = page.descriptor.primitive_type.field_info.repetition == Repetition::Optional; + let is_filtered = page.selected_rows().is_some(); + + match (page.encoding(), dict, is_optional, is_filtered) { + (Encoding::PlainDictionary | Encoding::RleDictionary, Some(dict), false, false) => Ok( + BinaryState::RequiredDictionary(RequiredDictionary::try_new(page, dict)?), + ), + (Encoding::PlainDictionary | Encoding::RleDictionary, Some(dict), true, false) => { + Ok(BinaryState::OptionalDictionary( + OptionalPageValidity::try_new(page)?, + ValuesDictionary::try_new(page, dict)?, + )) + } + (Encoding::PlainDictionary | Encoding::RleDictionary, Some(dict), false, true) => { + FilteredRequiredDictionary::try_new(page, dict) + .map(BinaryState::FilteredRequiredDictionary) + } + (Encoding::PlainDictionary | Encoding::RleDictionary, Some(dict), true, true) => { + Ok(BinaryState::FilteredOptionalDictionary( + FilteredOptionalPageValidity::try_new(page)?, + ValuesDictionary::try_new(page, dict)?, + )) + } + (Encoding::Plain, _, true, false) => { + let (_, _, values) = split_buffer(page)?; + + let values = BinaryIter::new(values); + + Ok(BinaryState::Optional( + OptionalPageValidity::try_new(page)?, + values, + )) + } + (Encoding::Plain, _, false, false) => Ok(BinaryState::Required(Required::try_new(page)?)), + (Encoding::Plain, _, false, true) => { + Ok(BinaryState::FilteredRequired(FilteredRequired::new(page))) + } + (Encoding::Plain, _, true, true) => { + let (_, _, values) = split_buffer(page)?; + + Ok(BinaryState::FilteredOptional( + FilteredOptionalPageValidity::try_new(page)?, + BinaryIter::new(values), + )) + } + (Encoding::DeltaLengthByteArray, _, false, false) => { + Delta::try_new(page).map(BinaryState::Delta) + } + (Encoding::DeltaLengthByteArray, _, true, false) => Ok(BinaryState::OptionalDelta( + OptionalPageValidity::try_new(page)?, + Delta::try_new(page)?, + )), + (Encoding::DeltaLengthByteArray, _, false, true) => { + FilteredDelta::try_new(page).map(BinaryState::FilteredDelta) + } + (Encoding::DeltaLengthByteArray, _, true, true) => Ok(BinaryState::FilteredOptionalDelta( + FilteredOptionalPageValidity::try_new(page)?, + Delta::try_new(page)?, + )), + _ => Err(utils::not_implemented(page)), + } +} diff --git a/src/common/arrow/src/arrow/io/parquet/read/deserialize/binary/mod.rs b/src/common/arrow/src/arrow/io/parquet/read/deserialize/binary/mod.rs index 527e390d1a5b..9b0e7ad8bbdf 100644 --- a/src/common/arrow/src/arrow/io/parquet/read/deserialize/binary/mod.rs +++ b/src/common/arrow/src/arrow/io/parquet/read/deserialize/binary/mod.rs @@ -13,12 +13,13 @@ // See the License for the specific language governing permissions and // limitations under the License. -mod basic; +pub(super) mod basic; mod dictionary; -mod nested; +pub(super) mod nested; mod utils; pub use basic::Iter; pub use dictionary::DictIter; pub use dictionary::NestedDictIter; pub use nested::NestedIter; +pub use utils::SizedBinaryIter; diff --git a/src/common/arrow/src/arrow/io/parquet/read/deserialize/binary/nested.rs b/src/common/arrow/src/arrow/io/parquet/read/deserialize/binary/nested.rs index 83dcec4b7b66..dee1ea5b5afc 100644 --- a/src/common/arrow/src/arrow/io/parquet/read/deserialize/binary/nested.rs +++ b/src/common/arrow/src/arrow/io/parquet/read/deserialize/binary/nested.rs @@ -26,7 +26,7 @@ use super::super::utils; use super::super::utils::MaybeNext; use super::basic::deserialize_plain; use super::basic::finish; -use super::basic::Dict; +use super::basic::BinaryDict; use super::basic::ValuesDictionary; use super::utils::*; use crate::arrow::array::Array; @@ -37,20 +37,20 @@ use crate::arrow::io::parquet::read::Pages; use crate::arrow::offset::Offset; #[derive(Debug)] -enum State<'a> { +pub(crate) enum BinaryNestedState<'a> { Optional(BinaryIter<'a>), Required(BinaryIter<'a>), RequiredDictionary(ValuesDictionary<'a>), OptionalDictionary(ValuesDictionary<'a>), } -impl<'a> utils::PageState<'a> for State<'a> { +impl<'a> utils::PageState<'a> for BinaryNestedState<'a> { fn len(&self) -> usize { match self { - State::Optional(validity) => validity.size_hint().0, - State::Required(state) => state.size_hint().0, - State::RequiredDictionary(required) => required.len(), - State::OptionalDictionary(optional) => optional.len(), + BinaryNestedState::Optional(validity) => validity.size_hint().0, + BinaryNestedState::Required(state) => state.size_hint().0, + BinaryNestedState::RequiredDictionary(required) => required.len(), + BinaryNestedState::OptionalDictionary(optional) => optional.len(), } } } @@ -61,8 +61,8 @@ struct BinaryDecoder { } impl<'a, O: Offset> NestedDecoder<'a> for BinaryDecoder { - type State = State<'a>; - type Dictionary = Dict; + type State = BinaryNestedState<'a>; + type Dictionary = BinaryDict; type DecodedState = (Binary, MutableBitmap); fn build_state( @@ -70,33 +70,7 @@ impl<'a, O: Offset> NestedDecoder<'a> for BinaryDecoder { page: &'a DataPage, dict: Option<&'a Self::Dictionary>, ) -> Result { - let is_optional = - page.descriptor.primitive_type.field_info.repetition == Repetition::Optional; - let is_filtered = page.selected_rows().is_some(); - - match (page.encoding(), dict, is_optional, is_filtered) { - (Encoding::PlainDictionary | Encoding::RleDictionary, Some(dict), false, false) => { - ValuesDictionary::try_new(page, dict).map(State::RequiredDictionary) - } - (Encoding::PlainDictionary | Encoding::RleDictionary, Some(dict), true, false) => { - ValuesDictionary::try_new(page, dict).map(State::OptionalDictionary) - } - (Encoding::Plain, _, true, false) => { - let (_, _, values) = split_buffer(page)?; - - let values = BinaryIter::new(values); - - Ok(State::Optional(values)) - } - (Encoding::Plain, _, false, false) => { - let (_, _, values) = split_buffer(page)?; - - let values = BinaryIter::new(values); - - Ok(State::Required(values)) - } - _ => Err(utils::not_implemented(page)), - } + build_nested_state(page, dict) } fn with_capacity(&self, capacity: usize) -> Self::DecodedState { @@ -109,16 +83,16 @@ impl<'a, O: Offset> NestedDecoder<'a> for BinaryDecoder { fn push_valid(&self, state: &mut Self::State, decoded: &mut Self::DecodedState) -> Result<()> { let (values, validity) = decoded; match state { - State::Optional(page) => { + BinaryNestedState::Optional(page) => { let value = page.next().unwrap_or_default(); values.push(value); validity.push(true); } - State::Required(page) => { + BinaryNestedState::Required(page) => { let value = page.next().unwrap_or_default(); values.push(value); } - State::RequiredDictionary(page) => { + BinaryNestedState::RequiredDictionary(page) => { let dict_values = &page.dict; let item = page .values @@ -127,7 +101,7 @@ impl<'a, O: Offset> NestedDecoder<'a> for BinaryDecoder { .unwrap_or_default(); values.push(item); } - State::OptionalDictionary(page) => { + BinaryNestedState::OptionalDictionary(page) => { let dict_values = &page.dict; let item = page .values @@ -152,12 +126,44 @@ impl<'a, O: Offset> NestedDecoder<'a> for BinaryDecoder { } } +pub(crate) fn build_nested_state<'a>( + page: &'a DataPage, + dict: Option<&'a BinaryDict>, +) -> Result> { + let is_optional = page.descriptor.primitive_type.field_info.repetition == Repetition::Optional; + let is_filtered = page.selected_rows().is_some(); + + match (page.encoding(), dict, is_optional, is_filtered) { + (Encoding::PlainDictionary | Encoding::RleDictionary, Some(dict), false, false) => { + ValuesDictionary::try_new(page, dict).map(BinaryNestedState::RequiredDictionary) + } + (Encoding::PlainDictionary | Encoding::RleDictionary, Some(dict), true, false) => { + ValuesDictionary::try_new(page, dict).map(BinaryNestedState::OptionalDictionary) + } + (Encoding::Plain, _, true, false) => { + let (_, _, values) = split_buffer(page)?; + + let values = BinaryIter::new(values); + + Ok(BinaryNestedState::Optional(values)) + } + (Encoding::Plain, _, false, false) => { + let (_, _, values) = split_buffer(page)?; + + let values = BinaryIter::new(values); + + Ok(BinaryNestedState::Required(values)) + } + _ => Err(utils::not_implemented(page)), + } +} + pub struct NestedIter { iter: I, data_type: DataType, init: Vec, items: VecDeque<(NestedState, (Binary, MutableBitmap))>, - dict: Option, + dict: Option, chunk_size: Option, remaining: usize, } diff --git a/src/common/arrow/src/arrow/io/parquet/read/deserialize/binview/basic.rs b/src/common/arrow/src/arrow/io/parquet/read/deserialize/binview/basic.rs new file mode 100644 index 000000000000..fe3edd0fa90f --- /dev/null +++ b/src/common/arrow/src/arrow/io/parquet/read/deserialize/binview/basic.rs @@ -0,0 +1,292 @@ +// Copyright (c) 2020 Ritchie Vink +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::cell::Cell; +use std::collections::VecDeque; + +use parquet2::page::DataPage; +use parquet2::page::DictPage; + +use super::super::binary::basic::deserialize_plain; +use super::super::binary::basic::BinaryDict; +use super::super::binary::basic::BinaryState; +use super::super::utils; +use super::super::utils::extend_from_decoder; +use super::super::utils::next; +use super::super::utils::DecodedState; +use super::super::utils::MaybeNext; +use crate::arrow::array::Array; +use crate::arrow::array::BinaryViewArray; +use crate::arrow::array::MutableBinaryViewArray; +use crate::arrow::array::Utf8ViewArray; +use crate::arrow::bitmap::Bitmap; +use crate::arrow::bitmap::MutableBitmap; +use crate::arrow::datatypes::DataType; +use crate::arrow::datatypes::PhysicalType; +use crate::arrow::error::Result; +use crate::arrow::io::parquet::read::deserialize::binary::basic::build_binary_state; +use crate::arrow::io::parquet::read::Pages; +use crate::ArrayRef; + +type DecodedStateTuple = (MutableBinaryViewArray<[u8]>, MutableBitmap); + +#[derive(Default)] +struct BinViewDecoder { + check_utf8: Cell, +} + +impl DecodedState for DecodedStateTuple { + fn len(&self) -> usize { + self.0.len() + } +} + +impl<'a> utils::Decoder<'a> for BinViewDecoder { + type State = BinaryState<'a>; + type Dict = BinaryDict; + type DecodedState = DecodedStateTuple; + + fn build_state(&self, page: &'a DataPage, dict: Option<&'a Self::Dict>) -> Result { + build_binary_state(page, dict) + } + + fn with_capacity(&self, capacity: usize) -> Self::DecodedState { + ( + MutableBinaryViewArray::with_capacity(capacity), + MutableBitmap::with_capacity(capacity), + ) + } + + fn extend_from_state( + &self, + state: &mut Self::State, + decoded: &mut Self::DecodedState, + additional: usize, + ) { + let (values, validity) = decoded; + let mut validate_utf8 = self.check_utf8.take(); + + match state { + BinaryState::Optional(page_validity, page_values) => extend_from_decoder( + validity, + page_validity, + Some(additional), + values, + page_values, + ), + BinaryState::Required(page) => { + for x in page.values.by_ref().take(additional) { + values.push_value_ignore_validity(x) + } + } + BinaryState::Delta(page) => { + for value in page { + values.push_value_ignore_validity(value) + } + } + BinaryState::OptionalDelta(page_validity, page_values) => { + extend_from_decoder( + validity, + page_validity, + Some(additional), + values, + page_values, + ); + } + BinaryState::FilteredRequired(page) => { + for x in page.values.by_ref().take(additional) { + values.push_value_ignore_validity(x) + } + } + BinaryState::FilteredDelta(page) => { + for x in page.values.by_ref().take(additional) { + values.push_value_ignore_validity(x) + } + } + BinaryState::OptionalDictionary(page_validity, page_values) => { + // Already done on the dict. + validate_utf8 = false; + let page_dict = &page_values.dict; + utils::extend_from_decoder( + validity, + page_validity, + Some(additional), + values, + &mut page_values + .values + .by_ref() + .map(|index| page_dict[index.unwrap() as usize].as_ref()), + ) + } + BinaryState::RequiredDictionary(page) => { + // Already done on the dict. + validate_utf8 = false; + let page_dict = &page.dict; + + for x in page + .values + .by_ref() + .map(|index| page_dict[index.unwrap() as usize].as_ref()) + .take(additional) + { + values.push_value_ignore_validity::<&[u8]>(x) + } + } + BinaryState::FilteredOptional(page_validity, page_values) => { + extend_from_decoder( + validity, + page_validity, + Some(additional), + values, + page_values.by_ref(), + ); + } + BinaryState::FilteredOptionalDelta(page_validity, page_values) => { + extend_from_decoder( + validity, + page_validity, + Some(additional), + values, + page_values.by_ref(), + ); + } + BinaryState::FilteredRequiredDictionary(page) => { + // TODO! directly set the dict as buffers and only insert the proper views. + // This will save a lot of memory. + // Already done on the dict. + validate_utf8 = false; + let page_dict = &page.dict; + for x in page + .values + .by_ref() + .map(|index| page_dict[index.unwrap() as usize].as_ref()) + .take(additional) + { + values.push_value_ignore_validity::<&[u8]>(x) + } + } + BinaryState::FilteredOptionalDictionary(page_validity, page_values) => { + // Already done on the dict. + validate_utf8 = false; + // TODO! directly set the dict as buffers and only insert the proper views. + // This will save a lot of memory. + let page_dict = &page_values.dict; + extend_from_decoder( + validity, + page_validity, + Some(additional), + values, + &mut page_values + .values + .by_ref() + .map(|index| page_dict[index.unwrap() as usize].as_ref()), + ) + } + } + + if validate_utf8 { + values.validate_utf8().expect("Not an utf8 string buffer.") + } + } + + fn deserialize_dict(&self, page: &DictPage) -> Self::Dict { + deserialize_plain(&page.buffer, page.num_values) + } +} + +pub struct BinaryViewArrayIter { + iter: I, + data_type: DataType, + items: VecDeque, + dict: Option, + chunk_size: Option, + remaining: usize, +} +impl BinaryViewArrayIter { + pub fn new(iter: I, data_type: DataType, chunk_size: Option, num_rows: usize) -> Self { + Self { + iter, + data_type, + items: VecDeque::new(), + dict: None, + chunk_size, + remaining: num_rows, + } + } +} + +impl Iterator for BinaryViewArrayIter { + type Item = Result; + + fn next(&mut self) -> Option { + let decoder = BinViewDecoder::default(); + loop { + let maybe_state = next( + &mut self.iter, + &mut self.items, + &mut self.dict, + &mut self.remaining, + self.chunk_size, + &decoder, + ); + match maybe_state { + MaybeNext::Some(Ok((values, validity))) => { + return Some(finish(&self.data_type, values, validity)); + } + MaybeNext::Some(Err(e)) => return Some(Err(e)), + MaybeNext::None => return None, + MaybeNext::More => continue, + } + } + } +} + +pub(super) fn finish( + data_type: &DataType, + values: MutableBinaryViewArray<[u8]>, + validity: MutableBitmap, +) -> Result> { + let mut array: BinaryViewArray = values.into(); + let validity: Bitmap = validity.into(); + + if validity.unset_bits() != validity.len() { + array = array.with_validity(Some(validity)) + } + + match data_type.to_physical_type() { + PhysicalType::BinaryView => Ok(BinaryViewArray::new_unchecked( + data_type.clone(), + array.views().clone(), + array.data_buffers().clone(), + array.validity().cloned(), + array.total_bytes_len(), + array.total_buffer_len(), + ) + .boxed()), + PhysicalType::Utf8View => { + // Safety: we already checked utf8 + Ok(Utf8ViewArray::new_unchecked( + data_type.clone(), + array.views().clone(), + array.data_buffers().clone(), + array.validity().cloned(), + array.total_bytes_len(), + array.total_buffer_len(), + ) + .boxed()) + } + _ => unreachable!(), + } +} diff --git a/src/common/arrow/src/arrow/io/parquet/read/deserialize/binview/dictionary.rs b/src/common/arrow/src/arrow/io/parquet/read/deserialize/binview/dictionary.rs new file mode 100644 index 000000000000..1c1106a981da --- /dev/null +++ b/src/common/arrow/src/arrow/io/parquet/read/deserialize/binview/dictionary.rs @@ -0,0 +1,181 @@ +// Copyright (c) 2020 Ritchie Vink +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::collections::VecDeque; + +use parquet2::page::DictPage; + +use crate::arrow::array::Array; +use crate::arrow::array::DictionaryArray; +use crate::arrow::array::DictionaryKey; +use crate::arrow::array::MutableBinaryViewArray; +use crate::arrow::bitmap::MutableBitmap; +use crate::arrow::datatypes::DataType; +use crate::arrow::datatypes::PhysicalType; +use crate::arrow::error::Result; +use crate::arrow::io::parquet::read::deserialize::binary::SizedBinaryIter; +use crate::arrow::io::parquet::read::deserialize::dictionary::nested_next_dict; +use crate::arrow::io::parquet::read::deserialize::dictionary::next_dict; +use crate::arrow::io::parquet::read::deserialize::utils::MaybeNext; +use crate::arrow::io::parquet::read::InitNested; +use crate::arrow::io::parquet::read::NestedState; +use crate::arrow::io::parquet::read::Pages; + +// An iterator adapter over [`PagesIter`] assumed to be encoded as parquet's dictionary-encoded binary representation +#[derive(Debug)] +pub struct DictIter +where + I: Pages, + K: DictionaryKey, +{ + iter: I, + data_type: DataType, + values: Option>, + items: VecDeque<(Vec, MutableBitmap)>, + remaining: usize, + chunk_size: Option, +} + +impl DictIter +where + K: DictionaryKey, + I: Pages, +{ + pub fn new(iter: I, data_type: DataType, num_rows: usize, chunk_size: Option) -> Self { + Self { + iter, + data_type, + values: None, + items: VecDeque::new(), + remaining: num_rows, + chunk_size, + } + } +} + +fn read_dict(data_type: DataType, dict: &DictPage) -> Box { + let data_type = match data_type { + DataType::Dictionary(_, values, _) => *values, + _ => data_type, + }; + + let values = SizedBinaryIter::new(&dict.buffer, dict.num_values); + + let mut data = MutableBinaryViewArray::<[u8]>::with_capacity(dict.num_values); + for item in values { + data.push_value(item) + } + + match data_type.to_physical_type() { + PhysicalType::Utf8View => data.freeze().to_utf8view().unwrap().boxed(), + PhysicalType::BinaryView => data.freeze().boxed(), + _ => unreachable!(), + } +} + +impl Iterator for DictIter +where + I: Pages, + K: DictionaryKey, +{ + type Item = Result>; + + fn next(&mut self) -> Option { + let maybe_state = next_dict( + &mut self.iter, + &mut self.items, + &mut self.values, + self.data_type.clone(), + &mut self.remaining, + self.chunk_size, + |dict| read_dict(self.data_type.clone(), dict), + ); + match maybe_state { + MaybeNext::Some(Ok(dict)) => Some(Ok(dict)), + MaybeNext::Some(Err(e)) => Some(Err(e)), + MaybeNext::None => None, + MaybeNext::More => self.next(), + } + } +} + +/// An iterator adapter that converts [`DataPages`] into an [`Iterator`] of [`DictionaryArray`] +#[derive(Debug)] +pub struct NestedDictIter +where + I: Pages, + K: DictionaryKey, +{ + iter: I, + init: Vec, + data_type: DataType, + values: Option>, + items: VecDeque<(NestedState, (Vec, MutableBitmap))>, + remaining: usize, + chunk_size: Option, +} + +impl NestedDictIter +where + I: Pages, + K: DictionaryKey, +{ + pub fn new( + iter: I, + init: Vec, + data_type: DataType, + num_rows: usize, + chunk_size: Option, + ) -> Self { + Self { + iter, + init, + data_type, + values: None, + items: VecDeque::new(), + remaining: num_rows, + chunk_size, + } + } +} + +impl Iterator for NestedDictIter +where + I: Pages, + K: DictionaryKey, +{ + type Item = Result<(NestedState, DictionaryArray)>; + + fn next(&mut self) -> Option { + loop { + let maybe_state = nested_next_dict( + &mut self.iter, + &mut self.items, + &mut self.remaining, + &self.init, + &mut self.values, + self.data_type.clone(), + self.chunk_size, + |dict| read_dict(self.data_type.clone(), dict), + ); + match maybe_state { + MaybeNext::Some(Ok(dict)) => return Some(Ok(dict)), + MaybeNext::Some(Err(e)) => return Some(Err(e)), + MaybeNext::None => return None, + MaybeNext::More => continue, + } + } + } +} diff --git a/src/common/arrow/src/arrow/io/parquet/read/deserialize/binview/mod.rs b/src/common/arrow/src/arrow/io/parquet/read/deserialize/binview/mod.rs new file mode 100644 index 000000000000..0fec4ed0c30f --- /dev/null +++ b/src/common/arrow/src/arrow/io/parquet/read/deserialize/binview/mod.rs @@ -0,0 +1,23 @@ +// Copyright (c) 2020 Ritchie Vink +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +mod basic; +mod dictionary; +mod nested; + +pub use basic::BinaryViewArrayIter; +pub use dictionary::DictIter; +pub use dictionary::NestedDictIter; +pub use nested::NestedIter; diff --git a/src/common/arrow/src/arrow/io/parquet/read/deserialize/binview/nested.rs b/src/common/arrow/src/arrow/io/parquet/read/deserialize/binview/nested.rs new file mode 100644 index 000000000000..34649622364a --- /dev/null +++ b/src/common/arrow/src/arrow/io/parquet/read/deserialize/binview/nested.rs @@ -0,0 +1,165 @@ +// Copyright (c) 2020 Ritchie Vink +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::collections::VecDeque; + +use parquet2::page::DataPage; +use parquet2::page::DictPage; + +use crate::arrow::array::MutableBinaryViewArray; +use crate::arrow::bitmap::MutableBitmap; +use crate::arrow::datatypes::DataType; +use crate::arrow::error::Result; +use crate::arrow::io::parquet::read::deserialize::binary::basic::deserialize_plain; +use crate::arrow::io::parquet::read::deserialize::binary::basic::BinaryDict; +use crate::arrow::io::parquet::read::deserialize::binary::nested::build_nested_state; +use crate::arrow::io::parquet::read::deserialize::binary::nested::BinaryNestedState; +use crate::arrow::io::parquet::read::deserialize::binview::basic::finish; +use crate::arrow::io::parquet::read::deserialize::nested_utils::next; +use crate::arrow::io::parquet::read::deserialize::nested_utils::NestedDecoder; +use crate::arrow::io::parquet::read::deserialize::utils::MaybeNext; +use crate::arrow::io::parquet::read::InitNested; +use crate::arrow::io::parquet::read::NestedState; +use crate::arrow::io::parquet::read::Pages; +use crate::ArrayRef; + +#[derive(Debug, Default)] +struct BinViewDecoder {} + +type DecodedStateTuple = (MutableBinaryViewArray<[u8]>, MutableBitmap); + +impl<'a> NestedDecoder<'a> for BinViewDecoder { + type State = BinaryNestedState<'a>; + type Dictionary = BinaryDict; + type DecodedState = DecodedStateTuple; + + fn build_state( + &self, + page: &'a DataPage, + dict: Option<&'a Self::Dictionary>, + ) -> Result { + build_nested_state(page, dict) + } + + fn with_capacity(&self, capacity: usize) -> Self::DecodedState { + ( + MutableBinaryViewArray::with_capacity(capacity), + MutableBitmap::with_capacity(capacity), + ) + } + + fn push_valid(&self, state: &mut Self::State, decoded: &mut Self::DecodedState) -> Result<()> { + let (values, validity) = decoded; + match state { + BinaryNestedState::Optional(page) => { + let value = page.next().unwrap_or_default(); + values.push_value_ignore_validity(value); + validity.push(true); + } + BinaryNestedState::Required(page) => { + let value = page.next().unwrap_or_default(); + values.push_value_ignore_validity(value); + } + BinaryNestedState::RequiredDictionary(page) => { + let dict_values = &page.dict; + let item = page + .values + .next() + .map(|index| dict_values[index.unwrap() as usize].as_ref()) + .unwrap_or_default(); + values.push_value_ignore_validity::<&[u8]>(item); + } + BinaryNestedState::OptionalDictionary(page) => { + let dict_values = &page.dict; + let item = page + .values + .next() + .map(|index| dict_values[index.unwrap() as usize].as_ref()) + .unwrap_or_default(); + values.push_value_ignore_validity::<&[u8]>(item); + validity.push(true); + } + } + Ok(()) + } + + fn push_null(&self, decoded: &mut Self::DecodedState) { + let (values, validity) = decoded; + values.push_null(); + validity.push(false); + } + + fn deserialize_dict(&self, page: &DictPage) -> Self::Dictionary { + deserialize_plain(&page.buffer, page.num_values) + } +} + +pub struct NestedIter { + iter: I, + data_type: DataType, + init: Vec, + items: VecDeque<(NestedState, DecodedStateTuple)>, + dict: Option, + chunk_size: Option, + remaining: usize, +} + +impl NestedIter { + pub fn new( + iter: I, + init: Vec, + data_type: DataType, + num_rows: usize, + chunk_size: Option, + ) -> Self { + Self { + iter, + data_type, + init, + items: VecDeque::new(), + dict: None, + chunk_size, + remaining: num_rows, + } + } +} + +impl Iterator for NestedIter { + type Item = Result<(NestedState, ArrayRef)>; + + fn next(&mut self) -> Option { + loop { + let maybe_state = next( + &mut self.iter, + &mut self.items, + &mut self.dict, + &mut self.remaining, + &self.init, + self.chunk_size, + &BinViewDecoder::default(), + ); + match maybe_state { + MaybeNext::Some(Ok((nested, decoded))) => { + return Some( + finish(&self.data_type, decoded.0, decoded.1).map(|array| (nested, array)), + ); + } + MaybeNext::Some(Err(e)) => return Some(Err(e)), + MaybeNext::None => return None, + MaybeNext::More => continue, /* Using continue in a loop instead of calling next helps prevent stack overflow. */ + } + } + } +} diff --git a/src/common/arrow/src/arrow/io/parquet/read/deserialize/mod.rs b/src/common/arrow/src/arrow/io/parquet/read/deserialize/mod.rs index de1a27bf0f89..ceaa697ed5e0 100644 --- a/src/common/arrow/src/arrow/io/parquet/read/deserialize/mod.rs +++ b/src/common/arrow/src/arrow/io/parquet/read/deserialize/mod.rs @@ -15,6 +15,7 @@ //! APIs to read from Parquet format. mod binary; +mod binview; mod boolean; mod dictionary; mod fixed_size_binary; @@ -144,6 +145,8 @@ fn is_primitive(data_type: &DataType) -> bool { | crate::arrow::datatypes::PhysicalType::LargeUtf8 | crate::arrow::datatypes::PhysicalType::Binary | crate::arrow::datatypes::PhysicalType::LargeBinary + | crate::arrow::datatypes::PhysicalType::BinaryView + | crate::arrow::datatypes::PhysicalType::Utf8View | crate::arrow::datatypes::PhysicalType::FixedSizeBinary | crate::arrow::datatypes::PhysicalType::Dictionary(_) ) @@ -181,7 +184,7 @@ pub fn n_columns(data_type: &DataType) -> usize { use crate::arrow::datatypes::PhysicalType::*; match data_type.to_physical_type() { Null | Boolean | Primitive(_) | Binary | FixedSizeBinary | LargeBinary | Utf8 - | Dictionary(_) | LargeUtf8 => 1, + | Dictionary(_) | LargeUtf8 | BinaryView | Utf8View => 1, List | FixedSizeList | LargeList => { let a = data_type.to_logical_type(); if let DataType::List(inner) = a { diff --git a/src/common/arrow/src/arrow/io/parquet/read/deserialize/nested.rs b/src/common/arrow/src/arrow/io/parquet/read/deserialize/nested.rs index 783aa98d4fb8..27b7f1ee669c 100644 --- a/src/common/arrow/src/arrow/io/parquet/read/deserialize/nested.rs +++ b/src/common/arrow/src/arrow/io/parquet/read/deserialize/nested.rs @@ -247,6 +247,17 @@ where chunk_size, )) } + BinaryView | Utf8View => { + init.push(InitNested::Primitive(field.is_nullable)); + types.pop(); + remove_nested(binview::NestedIter::new( + columns.pop().unwrap(), + init, + field.data_type().clone(), + num_rows, + chunk_size, + )) + } _ => match field.data_type().to_logical_type() { DataType::Dictionary(key_type, _, _) => { init.push(InitNested::Primitive(field.is_nullable)); @@ -580,6 +591,9 @@ fn dict_read<'a, K: DictionaryKey, I: 'a + Pages>( LargeUtf8 | LargeBinary => primitive(binary::NestedDictIter::::new( iter, init, data_type, num_rows, chunk_size, )), + Utf8View | BinaryView => primitive(binview::NestedDictIter::::new( + iter, init, data_type, num_rows, chunk_size, + )), FixedSizeBinary(_) => primitive(fixed_size_binary::NestedDictIter::::new( iter, init, data_type, num_rows, chunk_size, )), diff --git a/src/common/arrow/src/arrow/io/parquet/read/deserialize/simple.rs b/src/common/arrow/src/arrow/io/parquet/read/deserialize/simple.rs index 364dbf71f700..91ec5b8db03a 100644 --- a/src/common/arrow/src/arrow/io/parquet/read/deserialize/simple.rs +++ b/src/common/arrow/src/arrow/io/parquet/read/deserialize/simple.rs @@ -23,6 +23,7 @@ use parquet2::types::int96_to_i64_ns; use super::super::ArrayIter; use super::super::Pages; use super::binary; +use super::binview; use super::boolean; use super::fixed_size_binary; use super::null; @@ -365,6 +366,9 @@ pub fn page_iter_to_arrays<'a, I: Pages + 'a>( (PhysicalType::ByteArray, LargeBinary | LargeUtf8) => Box::new( binary::Iter::::new(pages, data_type, chunk_size, num_rows), ), + (PhysicalType::ByteArray, BinaryView | Utf8View) => Box::new( + binview::BinaryViewArrayIter::new(pages, data_type, chunk_size, num_rows), + ), (_, Dictionary(key_type, _, _)) => { return match_integer_type!(key_type, |$K| { @@ -668,6 +672,9 @@ fn dict_read<'a, K: DictionaryKey, I: Pages + 'a>( (PhysicalType::ByteArray, LargeUtf8 | LargeBinary) => dyn_iter( binary::DictIter::::new(iter, data_type, num_rows, chunk_size), ), + (PhysicalType::ByteArray, Utf8View | BinaryView) => dyn_iter( + binview::DictIter::::new(iter, data_type, num_rows, chunk_size), + ), (PhysicalType::FixedLenByteArray(_), FixedSizeBinary(_)) => dyn_iter( fixed_size_binary::DictIter::::new(iter, data_type, num_rows, chunk_size), ), diff --git a/src/common/arrow/src/arrow/io/parquet/read/deserialize/utils.rs b/src/common/arrow/src/arrow/io/parquet/read/deserialize/utils.rs index e9e8bf122954..14ca2ca7bb74 100644 --- a/src/common/arrow/src/arrow/io/parquet/read/deserialize/utils.rs +++ b/src/common/arrow/src/arrow/io/parquet/read/deserialize/utils.rs @@ -28,6 +28,8 @@ use parquet2::page::Page; use parquet2::schema::Repetition; use super::super::Pages; +use crate::arrow::array::MutableBinaryViewArray; +use crate::arrow::array::ViewType; use crate::arrow::bitmap::utils::BitmapIter; use crate::arrow::bitmap::MutableBitmap; use crate::arrow::error::Error; @@ -81,6 +83,45 @@ impl Pushable for MutableBitmap { } } +impl Pushable<&T> for MutableBinaryViewArray { + #[inline] + fn reserve(&mut self, additional: usize) { + MutableBinaryViewArray::reserve(self, additional) + } + + #[inline] + fn push(&mut self, value: &T) { + MutableBinaryViewArray::push_value(self, value) + } + + #[inline] + fn len(&self) -> usize { + MutableBinaryViewArray::len(self) + } + + fn push_null(&mut self) { + MutableBinaryViewArray::push_null(self) + } + + fn extend_constant(&mut self, additional: usize, value: &T) { + // First push a value to get the View + MutableBinaryViewArray::push_value(self, value); + + // And then use that new view to extend + let views = self.views_mut(); + let view = *views.last().unwrap(); + + let remaining = additional - 1; + for _ in 0..remaining { + views.push(view); + } + + if let Some(bitmap) = self.validity_mut() { + bitmap.extend_constant(remaining, true) + } + } +} + impl Pushable for Vec { #[inline] fn reserve(&mut self, additional: usize) { diff --git a/src/common/arrow/src/arrow/io/parquet/write/binview/basic.rs b/src/common/arrow/src/arrow/io/parquet/write/binview/basic.rs new file mode 100644 index 000000000000..57a697d1557c --- /dev/null +++ b/src/common/arrow/src/arrow/io/parquet/write/binview/basic.rs @@ -0,0 +1,140 @@ +// Copyright (c) 2020 Ritchie Vink +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use parquet2::encoding::delta_bitpacked; +use parquet2::encoding::Encoding; +use parquet2::page::Page; +use parquet2::schema::types::PrimitiveType; +use parquet2::statistics::serialize_statistics; +use parquet2::statistics::BinaryStatistics; +use parquet2::statistics::ParquetStatistics; +use parquet2::statistics::Statistics; + +use crate::arrow::array::Array; +use crate::arrow::array::BinaryViewArray; +use crate::arrow::error::Error; +use crate::arrow::error::Result; +use crate::arrow::io::parquet::read::schema::is_nullable; +use crate::arrow::io::parquet::write::binary::ord_binary; +use crate::arrow::io::parquet::write::utils; +use crate::arrow::io::parquet::write::WriteOptions; + +pub(crate) fn encode_non_null_values<'a, I: Iterator>( + iter: I, + buffer: &mut Vec, +) { + iter.for_each(|x| { + // BYTE_ARRAY: first 4 bytes denote length in littleendian. + let len = (x.len() as u32).to_le_bytes(); + buffer.extend_from_slice(&len); + buffer.extend_from_slice(x); + }) +} + +pub(crate) fn encode_plain(array: &BinaryViewArray, buffer: &mut Vec) { + let capacity = + array.total_bytes_len() + (array.len() - array.null_count()) * std::mem::size_of::(); + + let len_before = buffer.len(); + buffer.reserve(capacity); + + encode_non_null_values(array.non_null_values_iter(), buffer); + // Append the non-null values. + debug_assert_eq!(buffer.len() - len_before, capacity); +} + +pub(crate) fn encode_delta(array: &BinaryViewArray, buffer: &mut Vec) { + let lengths = array.non_null_views_iter().map(|v| v.length as i64); + delta_bitpacked::encode(lengths, buffer); + + for slice in array.non_null_values_iter() { + buffer.extend_from_slice(slice) + } +} + +pub fn array_to_page( + array: &BinaryViewArray, + options: WriteOptions, + type_: PrimitiveType, + encoding: Encoding, +) -> Result { + let validity = array.validity(); + let is_optional = is_nullable(&type_.field_info); + + let mut buffer = vec![]; + utils::write_def_levels( + &mut buffer, + is_optional, + validity, + array.len(), + options.version, + )?; + + let definition_levels_byte_length = buffer.len(); + + match encoding { + Encoding::Plain => encode_plain(array, &mut buffer), + Encoding::DeltaLengthByteArray => encode_delta(array, &mut buffer), + _ => { + return Err(Error::oos(format!( + "Datatype {:?} cannot be encoded by {:?} encoding", + array.data_type(), + encoding + ))); + } + } + + let statistics = if options.write_statistics { + Some(build_statistics(array, type_.clone())) + } else { + None + }; + + utils::build_plain_page( + buffer, + array.len(), + array.len(), + array.null_count(), + 0, + definition_levels_byte_length, + statistics, + type_, + options, + encoding, + ) + .map(Page::Data) +} + +pub(crate) fn build_statistics( + array: &BinaryViewArray, + primitive_type: PrimitiveType, +) -> ParquetStatistics { + let statistics = &BinaryStatistics { + primitive_type, + null_count: Some(array.null_count() as i64), + distinct_count: None, + max_value: array + .iter() + .flatten() + .max_by(|x, y| ord_binary(x, y)) + .map(|x| x.to_vec()), + min_value: array + .iter() + .flatten() + .min_by(|x, y| ord_binary(x, y)) + .map(|x| x.to_vec()), + } as &dyn Statistics; + serialize_statistics(statistics) +} diff --git a/src/common/arrow/src/arrow/io/parquet/write/binview/mod.rs b/src/common/arrow/src/arrow/io/parquet/write/binview/mod.rs new file mode 100644 index 000000000000..a2acb98474d3 --- /dev/null +++ b/src/common/arrow/src/arrow/io/parquet/write/binview/mod.rs @@ -0,0 +1,22 @@ +// Copyright (c) 2020 Ritchie Vink +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +mod basic; +mod nested; + +pub(crate) use basic::array_to_page; +pub(crate) use basic::build_statistics; +pub(crate) use basic::encode_plain; +pub use nested::array_to_page as nested_array_to_page; diff --git a/src/common/arrow/src/arrow/io/parquet/write/binview/nested.rs b/src/common/arrow/src/arrow/io/parquet/write/binview/nested.rs new file mode 100644 index 000000000000..ee855a4f9387 --- /dev/null +++ b/src/common/arrow/src/arrow/io/parquet/write/binview/nested.rs @@ -0,0 +1,60 @@ +// Copyright (c) 2020 Ritchie Vink +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use parquet2::encoding::Encoding; +use parquet2::page::DataPage; +use parquet2::schema::types::PrimitiveType; + +use super::super::nested; +use super::super::utils; +use super::super::WriteOptions; +use super::basic::build_statistics; +use super::basic::encode_plain; +use crate::arrow::array::Array; +use crate::arrow::array::BinaryViewArray; +use crate::arrow::error::Result; +use crate::arrow::io::parquet::write::Nested; + +pub fn array_to_page( + array: &BinaryViewArray, + options: WriteOptions, + type_: PrimitiveType, + nested: &[Nested], +) -> Result { + let mut buffer = vec![]; + let (repetition_levels_byte_length, definition_levels_byte_length) = + nested::write_rep_and_def(options.version, nested, &mut buffer)?; + + encode_plain(array, &mut buffer); + + let statistics = if options.write_statistics { + Some(build_statistics(array, type_.clone())) + } else { + None + }; + + utils::build_plain_page( + buffer, + nested::num_values(nested), + nested[0].len(), + array.null_count(), + repetition_levels_byte_length, + definition_levels_byte_length, + statistics, + type_, + options, + Encoding::Plain, + ) +} diff --git a/src/common/arrow/src/arrow/io/parquet/write/dictionary.rs b/src/common/arrow/src/arrow/io/parquet/write/dictionary.rs index 7c36db6484a8..9054e8399c6d 100644 --- a/src/common/arrow/src/arrow/io/parquet/write/dictionary.rs +++ b/src/common/arrow/src/arrow/io/parquet/write/dictionary.rs @@ -24,6 +24,7 @@ use parquet2::write::DynIter; use super::binary::build_statistics as binary_build_statistics; use super::binary::encode_plain as binary_encode_plain; +use super::binview; use super::fixed_len_bytes::build_statistics as fixed_binary_build_statistics; use super::fixed_len_bytes::encode_plain as fixed_binary_encode_plain; use super::nested; @@ -36,6 +37,7 @@ use super::WriteOptions; use crate::arrow::array::Array; use crate::arrow::array::DictionaryArray; use crate::arrow::array::DictionaryKey; +use crate::arrow::array::Utf8ViewArray; use crate::arrow::bitmap::Bitmap; use crate::arrow::bitmap::MutableBitmap; use crate::arrow::datatypes::DataType; @@ -268,6 +270,23 @@ pub fn array_to_pages( }; (DictPage::new(buffer, values.len(), false), stats) } + DataType::Utf8View => { + let array = array + .values() + .as_any() + .downcast_ref::() + .unwrap() + .to_binview(); + let mut buffer = vec![]; + binview::encode_plain(&array, &mut buffer); + + let stats = if options.write_statistics { + Some(binview::build_statistics(&array, type_.clone())) + } else { + None + }; + (DictPage::new(buffer, array.len(), false), stats) + } DataType::FixedSizeBinary(_) => { let mut buffer = vec![]; let array = array.values().as_any().downcast_ref().unwrap(); diff --git a/src/common/arrow/src/arrow/io/parquet/write/mod.rs b/src/common/arrow/src/arrow/io/parquet/write/mod.rs index 4cc828c4cf42..8cb70eedf3d1 100644 --- a/src/common/arrow/src/arrow/io/parquet/write/mod.rs +++ b/src/common/arrow/src/arrow/io/parquet/write/mod.rs @@ -28,6 +28,7 @@ //! The use of these arrow types will result in no logical type being stored within a parquet file. mod binary; +mod binview; mod boolean; mod dictionary; mod file; @@ -106,6 +107,7 @@ pub use schema::to_parquet_type; #[cfg_attr(docsrs, doc(cfg(feature = "io_parquet_async")))] pub use sink::FileSink; +use crate::arrow; use crate::arrow::compute::aggregate::estimated_bytes_size; /// returns offset and length to slice the leaf values @@ -436,6 +438,25 @@ pub fn array_to_page_simple( type_, encoding, ), + DataType::BinaryView => { + return binview::array_to_page( + array.as_any().downcast_ref().unwrap(), + options, + type_, + encoding, + ); + } + DataType::Utf8View => { + let array = + arrow::compute::cast::cast(array, &DataType::BinaryView, Default::default()) + .unwrap(); + return binview::array_to_page( + array.as_any().downcast_ref().unwrap(), + options, + type_, + encoding, + ); + } DataType::Null => { let array = Int32Array::new_null(DataType::Int32, array.len()); primitive::array_to_page_plain::(&array, options, type_) @@ -667,6 +688,15 @@ fn array_to_page_nested( let array = array.as_any().downcast_ref().unwrap(); binary::nested_array_to_page::(array, options, type_, nested) } + BinaryView => { + let array = array.as_any().downcast_ref().unwrap(); + binview::nested_array_to_page(array, options, type_, nested) + } + Utf8View => { + let array = arrow::compute::cast::cast(array, &BinaryView, Default::default()).unwrap(); + let array = array.as_any().downcast_ref().unwrap(); + binview::nested_array_to_page(array, options, type_, nested) + } UInt8 => { let array = array.as_any().downcast_ref().unwrap(); primitive::nested_array_to_page::(array, options, type_, nested) @@ -853,7 +883,7 @@ fn transverse_recursive T + Clone>( use crate::arrow::datatypes::PhysicalType::*; match data_type.to_physical_type() { Null | Boolean | Primitive(_) | Binary | FixedSizeBinary | LargeBinary | Utf8 - | Dictionary(_) | LargeUtf8 => encodings.push(map(data_type)), + | Dictionary(_) | LargeUtf8 | BinaryView | Utf8View => encodings.push(map(data_type)), List | FixedSizeList | LargeList => { let a = data_type.to_logical_type(); if let DataType::List(inner) = a { diff --git a/src/common/arrow/src/arrow/io/parquet/write/pages.rs b/src/common/arrow/src/arrow/io/parquet/write/pages.rs index cf879757dc55..c8325bc54a7f 100644 --- a/src/common/arrow/src/arrow/io/parquet/write/pages.rs +++ b/src/common/arrow/src/arrow/io/parquet/write/pages.rs @@ -232,7 +232,7 @@ fn to_leaves_recursive<'a>(array: &'a dyn Array, leaves: &mut Vec<&'a dyn Array> to_leaves_recursive(array.field().as_ref(), leaves); } Null | Boolean | Primitive(_) | Binary | FixedSizeBinary | LargeBinary | Utf8 - | LargeUtf8 | Dictionary(_) => leaves.push(array), + | LargeUtf8 | Dictionary(_) | BinaryView | Utf8View => leaves.push(array), other => todo!("Writing {:?} to parquet not yet implemented", other), } } diff --git a/src/common/arrow/src/arrow/io/parquet/write/schema.rs b/src/common/arrow/src/arrow/io/parquet/write/schema.rs index ef558b4b43f2..b7888bc7b834 100644 --- a/src/common/arrow/src/arrow/io/parquet/write/schema.rs +++ b/src/common/arrow/src/arrow/io/parquet/write/schema.rs @@ -125,22 +125,26 @@ pub fn to_parquet_type(field: &Field) -> Result { None, None, )?), - DataType::Binary | DataType::LargeBinary => Ok(ParquetType::try_from_primitive( - name, - PhysicalType::ByteArray, - repetition, - None, - None, - None, - )?), - DataType::Utf8 | DataType::LargeUtf8 => Ok(ParquetType::try_from_primitive( - name, - PhysicalType::ByteArray, - repetition, - Some(PrimitiveConvertedType::Utf8), - Some(PrimitiveLogicalType::String), - None, - )?), + DataType::Binary | DataType::LargeBinary | DataType::BinaryView => { + Ok(ParquetType::try_from_primitive( + name, + PhysicalType::ByteArray, + repetition, + None, + None, + None, + )?) + } + DataType::Utf8 | DataType::LargeUtf8 | DataType::Utf8View => { + Ok(ParquetType::try_from_primitive( + name, + PhysicalType::ByteArray, + repetition, + Some(PrimitiveConvertedType::Utf8), + Some(PrimitiveLogicalType::String), + None, + )?) + } DataType::Date32 => Ok(ParquetType::try_from_primitive( name, PhysicalType::Int32, diff --git a/src/common/arrow/src/arrow/scalar/binview.rs b/src/common/arrow/src/arrow/scalar/binview.rs new file mode 100644 index 000000000000..9dd48f257f4f --- /dev/null +++ b/src/common/arrow/src/arrow/scalar/binview.rs @@ -0,0 +1,88 @@ +// Copyright (c) 2020 Ritchie Vink +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::fmt::Debug; +use std::fmt::Formatter; + +use crate::arrow::array::ViewType; +use crate::arrow::datatypes::DataType; +use crate::arrow::scalar::Scalar; + +#[derive(PartialEq, Eq)] +pub struct BinaryViewScalar { + value: Option, + phantom: std::marker::PhantomData, +} + +impl Debug for BinaryViewScalar { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "Scalar({:?})", self.value) + } +} + +impl Clone for BinaryViewScalar { + fn clone(&self) -> Self { + Self { + value: self.value.clone(), + phantom: Default::default(), + } + } +} + +impl BinaryViewScalar { + /// Returns a new [`BinaryViewScalar`] + #[inline] + pub fn new(value: Option<&T>) -> Self { + Self { + value: value.map(|x| x.into_owned()), + phantom: std::marker::PhantomData, + } + } + + /// Returns the value irrespectively of the validity. + #[allow(unused)] + #[inline] + pub fn value(&self) -> Option<&T> { + self.value.as_ref().map(|x| x.as_ref()) + } +} + +impl From> for BinaryViewScalar { + #[inline] + fn from(v: Option<&T>) -> Self { + Self::new(v) + } +} + +impl Scalar for BinaryViewScalar { + #[inline] + fn as_any(&self) -> &dyn std::any::Any { + self + } + + #[inline] + fn is_valid(&self) -> bool { + self.value.is_some() + } + + #[inline] + fn data_type(&self) -> &DataType { + if T::IS_UTF8 { + &DataType::Utf8View + } else { + &DataType::BinaryView + } + } +} diff --git a/src/common/arrow/src/arrow/scalar/equal.rs b/src/common/arrow/src/arrow/scalar/equal.rs index f38e92cfc046..7d813dd67e86 100644 --- a/src/common/arrow/src/arrow/scalar/equal.rs +++ b/src/common/arrow/src/arrow/scalar/equal.rs @@ -17,6 +17,7 @@ use std::sync::Arc; use super::*; use crate::arrow::datatypes::PhysicalType; +use crate::arrow::scalar::binview::BinaryViewScalar; impl PartialEq for dyn Scalar + '_ { fn eq(&self, that: &dyn Scalar) -> bool { @@ -70,5 +71,7 @@ fn equal(lhs: &dyn Scalar, rhs: &dyn Scalar) -> bool { FixedSizeList => dyn_eq!(FixedSizeListScalar, lhs, rhs), Union => dyn_eq!(UnionScalar, lhs, rhs), Map => dyn_eq!(MapScalar, lhs, rhs), + BinaryView => dyn_eq!(BinaryViewScalar<[u8]>, lhs, rhs), + Utf8View => dyn_eq!(BinaryViewScalar, lhs, rhs), } } diff --git a/src/common/arrow/src/arrow/scalar/mod.rs b/src/common/arrow/src/arrow/scalar/mod.rs index e25eeee58746..33aa056baa7a 100644 --- a/src/common/arrow/src/arrow/scalar/mod.rs +++ b/src/common/arrow/src/arrow/scalar/mod.rs @@ -43,9 +43,13 @@ mod fixed_size_list; pub use fixed_size_list::*; mod fixed_size_binary; pub use fixed_size_binary::*; +mod binview; mod union; + pub use union::UnionScalar; +use crate::arrow::scalar::binview::BinaryViewScalar; + /// Trait object declaring an optional value with a [`DataType`]. /// This trait is often used in APIs that accept multiple scalar types. pub trait Scalar: std::fmt::Debug + Send + Sync + dyn_clone::DynClone + 'static { @@ -88,6 +92,21 @@ macro_rules! dyn_new_binary { }}; } +macro_rules! dyn_new_binview { + ($array:expr, $index:expr, $type:ty) => {{ + let array = $array + .as_any() + .downcast_ref::>() + .unwrap(); + let value = if array.is_valid($index) { + Some(array.value($index)) + } else { + None + }; + Box::new(BinaryViewScalar::<$type>::new(value)) + }}; +} + macro_rules! dyn_new_list { ($array:expr, $index:expr, $type:ty) => {{ let array = $array.as_any().downcast_ref::>().unwrap(); @@ -130,6 +149,8 @@ pub fn new_scalar(array: &dyn Array, index: usize) -> Box { LargeUtf8 => dyn_new_utf8!(array, index, i64), Binary => dyn_new_binary!(array, index, i32), LargeBinary => dyn_new_binary!(array, index, i64), + BinaryView => dyn_new_binview!(array, index, [u8]), + Utf8View => dyn_new_binview!(array, index, str), List => dyn_new_list!(array, index, i32), LargeList => dyn_new_list!(array, index, i64), Struct => { diff --git a/src/common/arrow/src/arrow/types/mod.rs b/src/common/arrow/src/arrow/types/mod.rs index a796f9c61b0d..f669cd0eaf42 100644 --- a/src/common/arrow/src/arrow/types/mod.rs +++ b/src/common/arrow/src/arrow/types/mod.rs @@ -75,6 +75,8 @@ pub enum PrimitiveType { UInt32, /// An unsigned 64-bit integer. UInt64, + /// An unsigned 128-bit integer. + UInt128, /// A 16-bit floating point number. Float16, /// A 32-bit floating point number. @@ -88,6 +90,8 @@ pub enum PrimitiveType { } mod private { + use crate::arrow::array::View; + pub trait Sealed {} impl Sealed for u8 {} @@ -99,10 +103,12 @@ mod private { impl Sealed for i32 {} impl Sealed for i64 {} impl Sealed for i128 {} + impl Sealed for u128 {} impl Sealed for super::i256 {} impl Sealed for super::f16 {} impl Sealed for f32 {} impl Sealed for f64 {} impl Sealed for super::days_ms {} impl Sealed for super::months_days_ns {} + impl Sealed for View {} } diff --git a/src/common/arrow/src/native/stat.rs b/src/common/arrow/src/native/stat.rs index 96b5b22bad8b..cfd786c77018 100644 --- a/src/common/arrow/src/native/stat.rs +++ b/src/common/arrow/src/native/stat.rs @@ -164,6 +164,7 @@ fn size_of_primitive(p: PrimitiveType) -> usize { PrimitiveType::Float64 => 8, PrimitiveType::DaysMs => unimplemented!(), PrimitiveType::MonthDayNano => unimplemented!(), + PrimitiveType::UInt128 => unimplemented!(), } } diff --git a/src/common/arrow/src/native/util/mod.rs b/src/common/arrow/src/native/util/mod.rs index c2381463d672..fb93dea619f5 100644 --- a/src/common/arrow/src/native/util/mod.rs +++ b/src/common/arrow/src/native/util/mod.rs @@ -58,6 +58,7 @@ macro_rules! with_match_integer_double_type { Float16 => unreachable! {}, DaysMs => unreachable!(), MonthDayNano => unreachable!(), + UInt128 => unimplemented!(), } }}; } diff --git a/src/common/arrow/src/native/write/primitive.rs b/src/common/arrow/src/native/write/primitive.rs index 7230542113c1..5f7f741eb74f 100644 --- a/src/common/arrow/src/native/write/primitive.rs +++ b/src/common/arrow/src/native/write/primitive.rs @@ -86,6 +86,7 @@ pub(crate) fn write_primitive( crate::arrow::types::PrimitiveType::Float16 => unimplemented!(), crate::arrow::types::PrimitiveType::DaysMs => unimplemented!(), crate::arrow::types::PrimitiveType::MonthDayNano => unimplemented!(), + crate::arrow::types::PrimitiveType::UInt128 => unimplemented!(), } w.write_all(scratch.as_slice())?; Ok(()) diff --git a/src/common/arrow/tests/it/arrow/array/binview/mod.rs b/src/common/arrow/tests/it/arrow/array/binview/mod.rs new file mode 100644 index 000000000000..32f10baecf39 --- /dev/null +++ b/src/common/arrow/tests/it/arrow/array/binview/mod.rs @@ -0,0 +1,189 @@ +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +mod mutable; +mod mutable_values; +mod to_mutable; + +use std::sync::Arc; + +use databend_common_arrow::arrow::array::Array; +use databend_common_arrow::arrow::array::BinaryViewArray; +use databend_common_arrow::arrow::array::Utf8ViewArray; +use databend_common_arrow::arrow::bitmap::Bitmap; +use databend_common_arrow::arrow::buffer::Buffer; +use databend_common_arrow::arrow::datatypes::DataType; + +#[test] +fn basics_string_view() { + let data = vec![ + Some("hello"), + None, + // larger than 12 bytes. + Some("Databend Cloud is a Cost-Effective alternative to Snowflake."), + ]; + + let array: Utf8ViewArray = data.into_iter().collect(); + + assert_eq!(array.value(0), "hello"); + assert_eq!(array.value(1), ""); + assert_eq!( + array.value(2), + "Databend Cloud is a Cost-Effective alternative to Snowflake." + ); + assert_eq!( + unsafe { array.value_unchecked(2) }, + "Databend Cloud is a Cost-Effective alternative to Snowflake." + ); + assert_eq!( + array.validity(), + Some(&Bitmap::from_u8_slice([0b00000101], 3)) + ); + assert!(array.is_valid(0)); + assert!(!array.is_valid(1)); + assert!(array.is_valid(2)); + + let array2 = Utf8ViewArray::new_unchecked( + DataType::Utf8View, + array.views().clone(), + array.data_buffers().clone(), + array.validity().cloned(), + array.total_bytes_len(), + array.total_buffer_len(), + ); + + assert_eq!(array, array2); + + let array = array.sliced(1, 2); + + assert_eq!(array.value(0), ""); + assert_eq!( + array.value(1), + "Databend Cloud is a Cost-Effective alternative to Snowflake." + ); +} + +#[test] +fn basics_binary_view() { + let data = vec![ + Some(b"hello".to_vec()), + None, + // larger than 12 bytes. + Some(b"Databend Cloud is a Cost-Effective alternative to Snowflake.".to_vec()), + ]; + + let array: BinaryViewArray = data.into_iter().collect(); + + assert_eq!(array.value(0), b"hello"); + assert_eq!(array.value(1), b""); + assert_eq!( + array.value(2), + b"Databend Cloud is a Cost-Effective alternative to Snowflake." + ); + assert_eq!( + unsafe { array.value_unchecked(2) }, + b"Databend Cloud is a Cost-Effective alternative to Snowflake." + ); + assert_eq!( + array.validity(), + Some(&Bitmap::from_u8_slice([0b00000101], 3)) + ); + assert!(array.is_valid(0)); + assert!(!array.is_valid(1)); + assert!(array.is_valid(2)); + + let array2 = BinaryViewArray::new_unchecked( + DataType::BinaryView, + array.views().clone(), + array.data_buffers().clone(), + array.validity().cloned(), + array.total_bytes_len(), + array.total_buffer_len(), + ); + + assert_eq!(array, array2); + + let array = array.sliced(1, 2); + + assert_eq!(array.value(0), b""); + assert_eq!( + array.value(1), + b"Databend Cloud is a Cost-Effective alternative to Snowflake." + ); +} + +#[test] +fn from() { + let array = Utf8ViewArray::from([Some("hello"), Some(" "), None]); + + let a = array.validity().unwrap(); + assert_eq!(a, &Bitmap::from([true, true, false])); + + let array = BinaryViewArray::from([Some(b"hello".to_vec()), Some(b" ".to_vec()), None]); + + let a = array.validity().unwrap(); + assert_eq!(a, &Bitmap::from([true, true, false])); +} + +#[test] +fn from_iter() { + let iter = std::iter::repeat(b"hello").take(2).map(Some); + let a: BinaryViewArray = iter.collect(); + assert_eq!(a.len(), 2); +} + +#[test] +fn with_validity() { + let array = BinaryViewArray::from([Some(b"hello".as_ref()), Some(b" ".as_ref()), None]); + + let array = array.with_validity(None); + + let a = array.validity(); + assert_eq!(a, None); +} + +#[test] +#[should_panic] +fn wrong_data_type() { + let validity = Some(Bitmap::new_zeroed(3)); + BinaryViewArray::try_new(DataType::Int8, Buffer::zeroed(3), Arc::from([]), validity).unwrap(); +} + +#[test] +fn debug() { + let data = vec![Some([1_u8, 2_u8].to_vec()), Some(vec![]), None]; + + let array: BinaryViewArray = data.into_iter().collect(); + + assert_eq!(format!("{array:?}"), "BinaryViewArray[[1, 2], [], None]"); +} + +#[test] +fn rev_iter() { + let array = BinaryViewArray::from([Some("hello".as_bytes()), Some(" ".as_bytes()), None]); + + assert_eq!(array.into_iter().rev().collect::>(), vec![ + None, + Some(" ".as_bytes()), + Some("hello".as_bytes()) + ]); +} + +#[test] +fn iter_nth() { + let array = BinaryViewArray::from([Some("hello"), Some(" "), None]); + + assert_eq!(array.iter().nth(1), Some(Some(" ".as_bytes()))); + assert_eq!(array.iter().nth(10), None); +} diff --git a/src/common/arrow/tests/it/arrow/array/binview/mutable.rs b/src/common/arrow/tests/it/arrow/array/binview/mutable.rs new file mode 100644 index 000000000000..f2b70037cf7c --- /dev/null +++ b/src/common/arrow/tests/it/arrow/array/binview/mutable.rs @@ -0,0 +1,50 @@ +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use databend_common_arrow::arrow::array::Array; +use databend_common_arrow::arrow::array::MutableBinaryViewArray; +use databend_common_arrow::arrow::array::Utf8ViewArray; +use databend_common_arrow::arrow::bitmap::Bitmap; + +#[test] +fn new() { + assert_eq!(MutableBinaryViewArray::<[u8]>::new().len(), 0); + + let a = MutableBinaryViewArray::<[u8]>::with_capacity(2); + assert_eq!(a.len(), 0); + assert_eq!(a.capacity(), 2); +} + +#[test] +fn from_iter() { + let iter = (0..3u8).map(|x| Some(vec![x; x as usize])); + let a: MutableBinaryViewArray<[u8]> = iter.clone().collect(); + let mut v_iter = a.values_iter(); + assert_eq!(v_iter.next(), Some(&[] as &[u8])); + assert_eq!(v_iter.next(), Some(&[1u8] as &[u8])); + assert_eq!(v_iter.next(), Some(&[2u8, 2] as &[u8])); + assert_eq!(a.validity(), None); + + let a = MutableBinaryViewArray::<[u8]>::from_iter(iter); + assert_eq!(a.validity(), None); +} + +#[test] +fn push_null() { + let mut array = MutableBinaryViewArray::new(); + array.push::<&str>(None); + + let array: Utf8ViewArray = array.into(); + assert_eq!(array.validity(), Some(&Bitmap::from([false]))); +} diff --git a/src/common/arrow/tests/it/arrow/array/binview/mutable_values.rs b/src/common/arrow/tests/it/arrow/array/binview/mutable_values.rs new file mode 100644 index 000000000000..0c23a157f65c --- /dev/null +++ b/src/common/arrow/tests/it/arrow/array/binview/mutable_values.rs @@ -0,0 +1,31 @@ +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use databend_common_arrow::arrow::array::MutableArray; +use databend_common_arrow::arrow::array::MutableBinaryViewArray; + +#[test] +fn extend_from_iter() { + let mut b = MutableBinaryViewArray::::new(); + b.extend_trusted_len_values(vec!["a", "b"].into_iter()); + + let a = b.clone(); + b.extend_trusted_len_values(a.values_iter()); + + assert_eq!( + b.as_box(), + MutableBinaryViewArray::::from_values_iter(vec!["a", "b", "a", "b"].into_iter()) + .as_box() + ) +} diff --git a/src/common/arrow/tests/it/arrow/array/binview/to_mutable.rs b/src/common/arrow/tests/it/arrow/array/binview/to_mutable.rs new file mode 100644 index 000000000000..7ee7856ba01d --- /dev/null +++ b/src/common/arrow/tests/it/arrow/array/binview/to_mutable.rs @@ -0,0 +1,46 @@ +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use databend_common_arrow::arrow::array::BinaryViewArray; +use databend_common_arrow::arrow::bitmap::Bitmap; +use databend_common_arrow::arrow::datatypes::DataType; + +#[test] +fn not_shared() { + let array = BinaryViewArray::from([Some("hello"), Some(" "), None]); + assert!(array.into_mut().is_right()); +} + +#[test] +#[allow(clippy::redundant_clone)] +fn shared() { + let validity = Bitmap::from([true]); + let data = vec![ + Some(b"hello".to_vec()), + None, + // larger than 12 bytes. + Some(b"Databend Cloud is a Cost-Effective alternative to Snowflake.".to_vec()), + ]; + + let array: BinaryViewArray = data.into_iter().collect(); + let array2 = BinaryViewArray::new_unchecked( + DataType::BinaryView, + array.views().clone(), + array.data_buffers().clone(), + Some(validity.clone()), + array.total_bytes_len(), + array.total_buffer_len(), + ); + assert!(array2.into_mut().is_left()) +} diff --git a/src/common/arrow/tests/it/arrow/array/mod.rs b/src/common/arrow/tests/it/arrow/array/mod.rs index 66fefaf3ceec..85944735a3e4 100644 --- a/src/common/arrow/tests/it/arrow/array/mod.rs +++ b/src/common/arrow/tests/it/arrow/array/mod.rs @@ -14,6 +14,7 @@ // limitations under the License. mod binary; +mod binview; mod boolean; mod dictionary; mod equal;