From e6d8d88bcef5a388a0a7a0669d9418d25194f138 Mon Sep 17 00:00:00 2001 From: TennyZhuang Date: Thu, 22 Feb 2024 14:10:12 +0800 Subject: [PATCH 01/35] feat(expr): allow explicit cast serial to bigint (#15184) Signed-off-by: TennyZhuang --- e2e_test/batch/catalog/pg_cast.slt.part | 5 +-- src/common/src/types/serial.rs | 6 ++++ src/expr/impl/src/scalar/cast.rs | 1 + src/frontend/src/expr/type_inference/cast.rs | 33 ++++++++++---------- 4 files changed, 27 insertions(+), 18 deletions(-) diff --git a/e2e_test/batch/catalog/pg_cast.slt.part b/e2e_test/batch/catalog/pg_cast.slt.part index b8ab68a5ed5cd..b1558d1e144c4 100644 --- a/e2e_test/batch/catalog/pg_cast.slt.part +++ b/e2e_test/batch/catalog/pg_cast.slt.part @@ -82,8 +82,9 @@ SELECT * FROM pg_catalog.pg_cast; 78 3802 701 e 79 3802 1700 e 80 3802 1043 a -81 1301 701 e -82 1301 1043 a +81 20 20 e +82 1301 701 e +83 1301 1043 a query TT rowsort SELECT s.typname, t.typname diff --git a/src/common/src/types/serial.rs b/src/common/src/types/serial.rs index 9bfbf5e4fcac7..5c84c95fa0f7a 100644 --- a/src/common/src/types/serial.rs +++ b/src/common/src/types/serial.rs @@ -26,6 +26,12 @@ use crate::util::row_id::RowId; #[derive(Debug, Copy, Clone, PartialEq, Eq, Ord, PartialOrd, Default, Hash)] pub struct Serial(i64); +impl From for i64 { + fn from(value: Serial) -> i64 { + value.0 + } +} + impl From for Serial { fn from(value: i64) -> Self { Self(value) diff --git a/src/expr/impl/src/scalar/cast.rs b/src/expr/impl/src/scalar/cast.rs index dc81e3ab77bac..bf8afc7712f93 100644 --- a/src/expr/impl/src/scalar/cast.rs +++ b/src/expr/impl/src/scalar/cast.rs @@ -87,6 +87,7 @@ pub fn jsonb_to_number>(v: JsonbRef<'_>) -> Result { #[function("cast(int4) -> int2")] #[function("cast(int8) -> int2")] #[function("cast(int8) -> int4")] +#[function("cast(serial) -> int8")] #[function("cast(float4) -> int2")] #[function("cast(float8) -> int2")] #[function("cast(float4) -> int4")] diff --git a/src/frontend/src/expr/type_inference/cast.rs b/src/frontend/src/expr/type_inference/cast.rs index aa7e1c8ee9192..1f1a96e92b826 100644 --- a/src/frontend/src/expr/type_inference/cast.rs +++ b/src/frontend/src/expr/type_inference/cast.rs @@ -216,22 +216,23 @@ pub static CAST_MAP: LazyLock = LazyLock::new(|| { use DataTypeName::*; const CAST_TABLE: &[(&str, DataTypeName)] = &[ // 123456789ABCDEF - (". e a", Boolean), // 0 - (" .iiiiii a", Int16), // 1 - ("ea.iiiii a", Int32), // 2 - (" aa.iiii a", Int64), // 3 - (" aaa.ii a", Decimal), // 4 - (" aaaa.i a", Float32), // 5 - (" aaaaa. a", Float64), // 6 - (" e. a", Int256), // 7 - (" .ii a", Date), // 8 - (" a.ia a", Timestamp), // 9 - (" aa.a a", Timestamptz), // A - (" .i a", Time), // B - (" a. a", Interval), // C - ("eeeeeee . a", Jsonb), // D - (" .a", Bytea), // E - ("eeeeeeeeeeeeeee.", Varchar), // F + (". e a ", Boolean), // 0 + (" .iiiiii a ", Int16), // 1 + ("ea.iiiii a ", Int32), // 2 + (" aa.iiii a ", Int64), // 3 + (" aaa.ii a ", Decimal), // 4 + (" aaaa.i a ", Float32), // 5 + (" aaaaa. a ", Float64), // 6 + (" e. a ", Int256), // 7 + (" .ii a ", Date), // 8 + (" a.ia a ", Timestamp), // 9 + (" aa.a a ", Timestamptz), // A + (" .i a ", Time), // B + (" a. a ", Interval), // C + ("eeeeeee . a ", Jsonb), // D + (" .a ", Bytea), // E + ("eeeeeeeeeeeeeee. ", Varchar), // F + (" e .", Serial), ]; let mut map = BTreeMap::new(); for (row, source) in CAST_TABLE { From d6a1089d8f8cdef395aeace1bb33040f20d421f5 Mon Sep 17 00:00:00 2001 From: Mike Wang <52522981+mikechesterwang@users.noreply.github.com> Date: Fri, 23 Feb 2024 09:49:50 +0800 Subject: [PATCH 02/35] fix: broken link in README (#15145) Co-authored-by: TennyZhuang --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 44443cfab8282..1611af1815175 100644 --- a/README.md +++ b/README.md @@ -72,7 +72,7 @@ Don’t have Docker? Learn how to install RisingWave on Mac, Ubuntu, and other e ## Production deployments -For **single-node deployment**, please refer to [Docker Compose](https://docs.risingwave.com/docs/current/risingwave-trial/?method=docker-compose). +For **single-node deployment**, please refer to [Docker Compose](https://docs.risingwave.com/docs/current/risingwave-docker-compose/). For **distributed deployment**, please refer to [Kubernetes with Helm](https://docs.risingwave.com/docs/current/risingwave-k8s-helm/) or [Kubernetes with Operator](https://docs.risingwave.com/docs/current/risingwave-kubernetes/). From 34bb7e339e6bd1594e8c524cb581a0874fc58684 Mon Sep 17 00:00:00 2001 From: William Wen <44139337+wenym1@users.noreply.github.com> Date: Fri, 23 Feb 2024 09:50:53 +0800 Subject: [PATCH 03/35] chore: upgrade declared pg version to 13.14.0 (#15177) --- e2e_test/batch/catalog/version.slt.part | 4 ++-- e2e_test/batch/functions/setting.slt.part | 6 +++--- src/common/src/lib.rs | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/e2e_test/batch/catalog/version.slt.part b/e2e_test/batch/catalog/version.slt.part index b2ba9e2a877c5..dc3e0399b1e6a 100644 --- a/e2e_test/batch/catalog/version.slt.part +++ b/e2e_test/batch/catalog/version.slt.part @@ -1,4 +1,4 @@ query T -select substring(version() from 1 for 14); +select substring(version() from 1 for 16); ---- -PostgreSQL 9.5 +PostgreSQL 13.14 diff --git a/e2e_test/batch/functions/setting.slt.part b/e2e_test/batch/functions/setting.slt.part index 77d1d80e46590..233399d80a025 100644 --- a/e2e_test/batch/functions/setting.slt.part +++ b/e2e_test/batch/functions/setting.slt.part @@ -1,12 +1,12 @@ query T SELECT current_setting('server_version'); ---- -9.5.0 +13.14.0 query I -SELECT CAST(current_setting('server_version_num') AS INT) / 100 AS version; +SELECT current_setting('server_version_num') AS version; ---- -905 +130014 query T SELECT set_config('client_min_messages', 'warning', false); diff --git a/src/common/src/lib.rs b/src/common/src/lib.rs index 980897d5636e7..313c0bada6616 100644 --- a/src/common/src/lib.rs +++ b/src/common/src/lib.rs @@ -92,9 +92,9 @@ pub const UNKNOWN_GIT_SHA: &str = "unknown"; // The single source of truth of the pg parameters, Used in ConfigMap and current_cluster_version. // The version of PostgreSQL that Risingwave claims to be. -pub const PG_VERSION: &str = "9.5.0"; +pub const PG_VERSION: &str = "13.14.0"; /// The version of PostgreSQL that Risingwave claims to be. -pub const SERVER_VERSION_NUM: i32 = 90500; +pub const SERVER_VERSION_NUM: i32 = 130014; /// Shows the server-side character set encoding. At present, this parameter can be shown but not set, because the encoding is determined at database creation time. It is also the default value of `client_encoding`. pub const SERVER_ENCODING: &str = "UTF8"; /// see From e223b9f165f63ca5c63e4940a31547543a646073 Mon Sep 17 00:00:00 2001 From: Li0k Date: Fri, 23 Feb 2024 10:28:05 +0800 Subject: [PATCH 04/35] fix(storage): fix expired timeout sec (#15194) --- src/meta/node/src/lib.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/meta/node/src/lib.rs b/src/meta/node/src/lib.rs index 2e770fb841ada..8d7c4253631d5 100644 --- a/src/meta/node/src/lib.rs +++ b/src/meta/node/src/lib.rs @@ -254,7 +254,7 @@ pub fn start(opts: MetaNodeOpts) -> Pin + Send>> { const MIN_TIMEOUT_INTERVAL_SEC: u64 = 20; let compaction_task_max_progress_interval_secs = { - config + (config .storage .object_store .object_store_read_timeout_ms @@ -271,7 +271,8 @@ pub fn start(opts: MetaNodeOpts) -> Pin + Send>> { .object_store .object_store_streaming_upload_timeout_ms, ) - .max(config.meta.compaction_task_max_progress_interval_secs) + .max(config.meta.compaction_task_max_progress_interval_secs * 1000)) + / 1000 } + MIN_TIMEOUT_INTERVAL_SEC; let (mut join_handle, leader_lost_handle, shutdown_send) = rpc_serve( From 41e723b08c4de17db2a31ea5909df7c5ca6beb2a Mon Sep 17 00:00:00 2001 From: Li0k Date: Fri, 23 Feb 2024 10:30:35 +0800 Subject: [PATCH 05/35] feat(storage): improve block memory usage (#15024) --- src/storage/src/hummock/file_cache/store.rs | 9 +-- src/storage/src/hummock/sstable/block.rs | 79 ++++++++++++++------- src/storage/src/hummock/sstable/builder.rs | 20 ++++-- 3 files changed, 71 insertions(+), 37 deletions(-) diff --git a/src/storage/src/hummock/file_cache/store.rs b/src/storage/src/hummock/file_cache/store.rs index 3435227bd317b..c640ba8f1db58 100644 --- a/src/storage/src/hummock/file_cache/store.rs +++ b/src/storage/src/hummock/file_cache/store.rs @@ -701,13 +701,8 @@ mod tests { builder.add_for_test(construct_full_key_struct(0, b"k3", 3), b"v03"); builder.add_for_test(construct_full_key_struct(0, b"k4", 4), b"v04"); - Box::new( - Block::decode( - builder.build().to_vec().into(), - builder.uncompressed_block_size(), - ) - .unwrap(), - ) + let uncompress = builder.uncompressed_block_size(); + Box::new(Block::decode(builder.build().to_vec().into(), uncompress).unwrap()) } fn sstable_for_test() -> Sstable { diff --git a/src/storage/src/hummock/sstable/block.rs b/src/storage/src/hummock/sstable/block.rs index 3d0b4f8c0f770..fe465bba5b41f 100644 --- a/src/storage/src/hummock/sstable/block.rs +++ b/src/storage/src/hummock/sstable/block.rs @@ -215,20 +215,20 @@ impl Block { let mut decoder = lz4::Decoder::new(compressed_data.reader()) .map_err(HummockError::decode_error)?; let mut decoded = Vec::with_capacity(uncompressed_capacity); - decoder + let read_size = decoder .read_to_end(&mut decoded) .map_err(HummockError::decode_error)?; - debug_assert_eq!(decoded.capacity(), uncompressed_capacity); + assert_eq!(read_size, uncompressed_capacity); Bytes::from(decoded) } CompressionAlgorithm::Zstd => { let mut decoder = zstd::Decoder::new(compressed_data.reader()) .map_err(HummockError::decode_error)?; let mut decoded = Vec::with_capacity(uncompressed_capacity); - decoder + let read_size = decoder .read_to_end(&mut decoded) .map_err(HummockError::decode_error)?; - debug_assert_eq!(decoded.capacity(), uncompressed_capacity); + assert_eq!(read_size, uncompressed_capacity); Bytes::from(decoded) } }; @@ -445,6 +445,8 @@ impl Default for BlockBuilderOptions { pub struct BlockBuilder { /// Write buffer. buf: BytesMut, + /// Compress buffer + compress_buf: BytesMut, /// Entry interval between restart points. restart_count: usize, /// Restart points. @@ -465,8 +467,9 @@ pub struct BlockBuilder { impl BlockBuilder { pub fn new(options: BlockBuilderOptions) -> Self { Self { - // add more space to avoid re-allocate space. - buf: BytesMut::with_capacity(options.capacity + 256), + // add more space to avoid re-allocate space. (for restart_points and restart_points_type_index) + buf: BytesMut::with_capacity(Self::buf_reserve_size(&options)), + compress_buf: BytesMut::default(), restart_count: options.restart_interval, restart_points: Vec::with_capacity( options.capacity / DEFAULT_ENTRY_SIZE / options.restart_interval + 1, @@ -664,22 +667,35 @@ impl BlockBuilder { ); self.buf.put_u32_le(self.table_id.unwrap()); - if self.compression_algorithm != CompressionAlgorithm::None { - self.buf = Self::compress(&self.buf[..], self.compression_algorithm); - } + let result_buf = if self.compression_algorithm != CompressionAlgorithm::None { + self.compress_buf.clear(); + self.compress_buf = Self::compress( + &self.buf[..], + self.compression_algorithm, + std::mem::take(&mut self.compress_buf), + ); + + &mut self.compress_buf + } else { + &mut self.buf + }; - self.compression_algorithm.encode(&mut self.buf); - let checksum = xxhash64_checksum(&self.buf); - self.buf.put_u64_le(checksum); + self.compression_algorithm.encode(result_buf); + let checksum = xxhash64_checksum(result_buf); + result_buf.put_u64_le(checksum); assert!( - self.buf.len() < (u32::MAX) as usize, + result_buf.len() < (u32::MAX) as usize, "buf_len {} entry_count {} table {:?}", - self.buf.len(), + result_buf.len(), self.entry_count, self.table_id ); - self.buf.as_ref() + if self.compression_algorithm != CompressionAlgorithm::None { + self.compress_buf.as_ref() + } else { + self.buf.as_ref() + } } pub fn compress_block( @@ -693,21 +709,29 @@ impl BlockBuilder { let compression = CompressionAlgorithm::decode(&mut &buf[buf.len() - 9..buf.len() - 8])?; let compressed_data = &buf[..buf.len() - 9]; assert_eq!(compression, CompressionAlgorithm::None); - let mut writer = Self::compress(compressed_data, target_compression); + let mut compress_writer = Self::compress( + compressed_data, + target_compression, + BytesMut::with_capacity(buf.len()), + ); - target_compression.encode(&mut writer); - let checksum = xxhash64_checksum(&writer); - writer.put_u64_le(checksum); - Ok(writer.freeze()) + target_compression.encode(&mut compress_writer); + let checksum = xxhash64_checksum(&compress_writer); + compress_writer.put_u64_le(checksum); + Ok(compress_writer.freeze()) } - pub fn compress(buf: &[u8], compression_algorithm: CompressionAlgorithm) -> BytesMut { + pub fn compress( + buf: &[u8], + compression_algorithm: CompressionAlgorithm, + compress_writer: BytesMut, + ) -> BytesMut { match compression_algorithm { CompressionAlgorithm::None => unreachable!(), CompressionAlgorithm::Lz4 => { let mut encoder = lz4::EncoderBuilder::new() .level(4) - .build(BytesMut::with_capacity(buf.len()).writer()) + .build(compress_writer.writer()) .map_err(HummockError::encode_error) .unwrap(); encoder @@ -719,10 +743,9 @@ impl BlockBuilder { writer.into_inner() } CompressionAlgorithm::Zstd => { - let mut encoder = - zstd::Encoder::new(BytesMut::with_capacity(buf.len()).writer(), 4) - .map_err(HummockError::encode_error) - .unwrap(); + let mut encoder = zstd::Encoder::new(compress_writer.writer(), 4) + .map_err(HummockError::encode_error) + .unwrap(); encoder .write_all(buf) .map_err(HummockError::encode_error) @@ -762,6 +785,10 @@ impl BlockBuilder { pub fn table_id(&self) -> Option { self.table_id } + + fn buf_reserve_size(option: &BlockBuilderOptions) -> usize { + option.capacity + 1024 + 256 + } } #[cfg(test)] diff --git a/src/storage/src/hummock/sstable/builder.rs b/src/storage/src/hummock/sstable/builder.rs index 4fe331f677321..ebaa60e167056 100644 --- a/src/storage/src/hummock/sstable/builder.rs +++ b/src/storage/src/hummock/sstable/builder.rs @@ -240,7 +240,6 @@ impl SstableBuilder { self.add(full_key, value).await } - /// only for test pub fn current_block_size(&self) -> usize { self.block_builder.approximate_len() } @@ -344,6 +343,12 @@ impl SstableBuilder { || !user_key(&self.raw_key).eq(user_key(&self.last_full_key)); let table_id = full_key.user_key.table_id.table_id(); let is_new_table = self.last_table_id.is_none() || self.last_table_id.unwrap() != table_id; + let current_block_size = self.current_block_size(); + let is_block_full = current_block_size >= self.options.block_capacity + || (current_block_size > self.options.block_capacity / 4 * 3 + && current_block_size + self.raw_value.len() + self.raw_key.len() + > self.options.block_capacity); + if is_new_table { assert!( could_switch_block, @@ -356,9 +361,7 @@ impl SstableBuilder { if !self.block_builder.is_empty() { self.build_block().await?; } - } else if self.block_builder.approximate_len() >= self.options.block_capacity - && could_switch_block - { + } else if is_block_full && could_switch_block { self.build_block().await?; } self.last_table_stats.total_key_count += 1; @@ -704,6 +707,15 @@ impl SstableBuilder { data_len, block_meta.offset ) }); + + if data_len as usize > self.options.capacity * 2 { + tracing::warn!( + "WARN unexpected block size {} table {:?}", + data_len, + self.block_builder.table_id() + ); + } + self.block_builder.clear(); Ok(()) } From 65550a0e0c81282ffb4d5286bd23bada5519172b Mon Sep 17 00:00:00 2001 From: congyi wang <58715567+wcy-fdu@users.noreply.github.com> Date: Fri, 23 Feb 2024 10:31:26 +0800 Subject: [PATCH 06/35] fix(object_store): fs and hdfs object store should set atomic_write_dir (#15155) --- risedev.yml | 11 +++++++++++ src/object_store/src/object/opendal_engine/fs.rs | 6 ++++-- src/object_store/src/object/opendal_engine/hdfs.rs | 5 ++++- src/object_store/src/object/opendal_engine/mod.rs | 3 +++ src/object_store/src/object/opendal_engine/webhdfs.rs | 3 +++ 5 files changed, 25 insertions(+), 3 deletions(-) diff --git a/risedev.yml b/risedev.yml index 69b0c23b05dd3..22356f2e1ac89 100644 --- a/risedev.yml +++ b/risedev.yml @@ -164,6 +164,17 @@ profile: - use: compactor # - use: prometheus # - use: grafana + fs: + steps: + # - use: etcd + - use: meta-node + - use: compute-node + - use: frontend + - use: opendal + engine: fs + - use: compactor + # - use: prometheus + # - use: grafana webhdfs: steps: # - use: etcd diff --git a/src/object_store/src/object/opendal_engine/fs.rs b/src/object_store/src/object/opendal_engine/fs.rs index 23d7dcbd503e8..ece3555d5b777 100644 --- a/src/object_store/src/object/opendal_engine/fs.rs +++ b/src/object_store/src/object/opendal_engine/fs.rs @@ -17,15 +17,17 @@ use opendal::services::Fs; use opendal::Operator; use super::{EngineType, OpendalObjectStore}; +use crate::object::opendal_engine::ATOMIC_WRITE_DIR; use crate::object::ObjectResult; + impl OpendalObjectStore { /// create opendal fs engine. pub fn new_fs_engine(root: String) -> ObjectResult { // Create fs backend builder. let mut builder = Fs::default(); - builder.root(&root); - + let atomic_write_dir = format!("{}/{}", root, ATOMIC_WRITE_DIR); + builder.atomic_write_dir(&atomic_write_dir); let op: Operator = Operator::new(builder)? .layer(RetryLayer::default()) .finish(); diff --git a/src/object_store/src/object/opendal_engine/hdfs.rs b/src/object_store/src/object/opendal_engine/hdfs.rs index b52be4094df80..12ee292a85416 100644 --- a/src/object_store/src/object/opendal_engine/hdfs.rs +++ b/src/object_store/src/object/opendal_engine/hdfs.rs @@ -17,7 +17,9 @@ use opendal::services::Hdfs; use opendal::Operator; use super::{EngineType, OpendalObjectStore}; +use crate::object::opendal_engine::ATOMIC_WRITE_DIR; use crate::object::ObjectResult; + impl OpendalObjectStore { /// create opendal hdfs engine. pub fn new_hdfs_engine(namenode: String, root: String) -> ObjectResult { @@ -26,7 +28,8 @@ impl OpendalObjectStore { // Set the name node for hdfs. builder.name_node(&namenode); builder.root(&root); - + let atomic_write_dir = format!("{}/{}", root, ATOMIC_WRITE_DIR); + builder.atomic_write_dir(&atomic_write_dir); let op: Operator = Operator::new(builder)? .layer(LoggingLayer::default()) .layer(RetryLayer::default()) diff --git a/src/object_store/src/object/opendal_engine/mod.rs b/src/object_store/src/object/opendal_engine/mod.rs index ccaba375a1302..c1ab929d5586f 100644 --- a/src/object_store/src/object/opendal_engine/mod.rs +++ b/src/object_store/src/object/opendal_engine/mod.rs @@ -31,3 +31,6 @@ pub mod opendal_s3; pub mod oss; pub mod fs; + +// To make sure the the operation is consistent, we should specially set `atomic_write_dir` for fs, hdfs and webhdfs services. +const ATOMIC_WRITE_DIR: &str = "atomic_write_dir/"; diff --git a/src/object_store/src/object/opendal_engine/webhdfs.rs b/src/object_store/src/object/opendal_engine/webhdfs.rs index ff61b39ec9e79..1f6b87b44fd5e 100644 --- a/src/object_store/src/object/opendal_engine/webhdfs.rs +++ b/src/object_store/src/object/opendal_engine/webhdfs.rs @@ -17,6 +17,7 @@ use opendal::services::Webhdfs; use opendal::Operator; use super::{EngineType, OpendalObjectStore}; +use crate::object::opendal_engine::ATOMIC_WRITE_DIR; use crate::object::ObjectResult; impl OpendalObjectStore { @@ -30,6 +31,8 @@ impl OpendalObjectStore { // NOTE: the root must be absolute path. builder.root(&root); + let atomic_write_dir = format!("{}/{}", root, ATOMIC_WRITE_DIR); + builder.atomic_write_dir(&atomic_write_dir); let op: Operator = Operator::new(builder)? .layer(LoggingLayer::default()) .layer(RetryLayer::default()) From b5e3a22b270e26c439f354f021be201ebd3ab729 Mon Sep 17 00:00:00 2001 From: congyi wang <58715567+wcy-fdu@users.noreply.github.com> Date: Fri, 23 Feb 2024 10:31:35 +0800 Subject: [PATCH 07/35] refactor(object store): use AssumeRoleWithWebIdentity for opendal s3 (#15182) --- .../src/object/opendal_engine/opendal_s3.rs | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/src/object_store/src/object/opendal_engine/opendal_s3.rs b/src/object_store/src/object/opendal_engine/opendal_s3.rs index 425d0a7576691..c10aff55d342b 100644 --- a/src/object_store/src/object/opendal_engine/opendal_s3.rs +++ b/src/object_store/src/object/opendal_engine/opendal_s3.rs @@ -31,30 +31,11 @@ impl OpendalObjectStore { // Create s3 builder. let mut builder = S3::default(); builder.bucket(&bucket); - // For AWS S3, there is no need to set an endpoint; for other S3 compatible object stores, it is necessary to set this field. if let Ok(endpoint_url) = std::env::var("RW_S3_ENDPOINT") { builder.endpoint(&endpoint_url); } - if let Ok(region) = std::env::var("AWS_REGION") { - builder.region(®ion); - } else { - tracing::error!("aws s3 region is not set, bucket {}", bucket); - } - - if let Ok(access) = std::env::var("AWS_ACCESS_KEY_ID") { - builder.access_key_id(&access); - } else { - tracing::error!("access key id of aws s3 is not set, bucket {}", bucket); - } - - if let Ok(secret) = std::env::var("AWS_SECRET_ACCESS_KEY") { - builder.secret_access_key(&secret); - } else { - tracing::error!("secret access key of aws s3 is not set, bucket {}", bucket); - } - if std::env::var("RW_IS_FORCE_PATH_STYLE").is_err() { builder.enable_virtual_host_style(); } From d8cca2ab7f34e63733f91fb0431a50ef42f0da40 Mon Sep 17 00:00:00 2001 From: Xinhao Xu <84456268+xxhZs@users.noreply.github.com> Date: Fri, 23 Feb 2024 10:42:07 +0800 Subject: [PATCH 08/35] feat(test): Add starrocks redis doris cassandra e2e test (#14142) --- ci/docker-compose.yml | 123 +++++++++++++++++++-- ci/scripts/e2e-cassandra-sink-test.sh | 65 +++++++++++ ci/scripts/e2e-clickhouse-sink-test.sh | 2 +- ci/scripts/e2e-deltalake-sink-rust-test.sh | 3 +- ci/scripts/e2e-doris-sink-test.sh | 59 ++++++++++ ci/scripts/e2e-pulsar-sink-test.sh | 2 +- ci/scripts/e2e-redis-sink-test.sh | 48 ++++++++ ci/scripts/e2e-starrocks-sink-test.sh | 58 ++++++++++ ci/workflows/main-cron.yml | 88 +++++++++++++++ ci/workflows/pull-request.yml | 69 ++++++++++++ e2e_test/sink/cassandra_sink.slt | 33 ++++++ e2e_test/sink/doris_sink.slt | 34 ++++++ e2e_test/sink/redis_sink.slt | 41 +++++++ e2e_test/sink/starrocks_sink.slt | 36 ++++++ risedev.yml | 22 +--- src/connector/src/sink/starrocks.rs | 14 +-- src/connector/with_options_sink.yaml | 2 + 17 files changed, 660 insertions(+), 39 deletions(-) create mode 100755 ci/scripts/e2e-cassandra-sink-test.sh create mode 100755 ci/scripts/e2e-doris-sink-test.sh create mode 100755 ci/scripts/e2e-redis-sink-test.sh create mode 100755 ci/scripts/e2e-starrocks-sink-test.sh create mode 100644 e2e_test/sink/cassandra_sink.slt create mode 100644 e2e_test/sink/doris_sink.slt create mode 100644 e2e_test/sink/redis_sink.slt create mode 100644 e2e_test/sink/starrocks_sink.slt diff --git a/ci/docker-compose.yml b/ci/docker-compose.yml index 4a9f2970b84c7..db017be647376 100644 --- a/ci/docker-compose.yml +++ b/ci/docker-compose.yml @@ -88,10 +88,27 @@ services: - message_queue - elasticsearch - clickhouse-server - - pulsar + - redis-server + - pulsar-server + - cassandra-server + - starrocks-fe-server + - starrocks-be-server volumes: - ..:/risingwave + sink-doris-env: + image: public.ecr.aws/x5u3w5h6/rw-build-env:v20231109 + depends_on: + - doris-fe-server + - doris-be-server + volumes: + - ..:/risingwave + command: > + sh -c "sudo sysctl -w vm.max_map_count=2000000" + networks: + mynetwork: + ipv4_address: 172.121.0.4 + rw-build-env: image: public.ecr.aws/x5u3w5h6/rw-build-env:v20240213 volumes: @@ -159,10 +176,96 @@ services: expose: - 9009 -# Temporary workaround for json schema registry test since redpanda only supports -# protobuf/avro schema registry. Should be removed after the support. -# Related tracking issue: -# https://github.com/redpanda-data/redpanda/issues/1878 + redis-server: + container_name: redis-server + image: 'redis:latest' + expose: + - 6379 + ports: + - 6378:6379 + healthcheck: + test: ["CMD", "redis-cli", "ping"] + interval: 5s + timeout: 30s + retries: 50 + + doris-fe-server: + platform: linux/amd64 + image: apache/doris:2.0.0_alpha-fe-x86_64 + hostname: doris-fe-server + command: > + sh -c "sudo sysctl -w vm.max_map_count=2000000" + environment: + - FE_SERVERS=fe1:172.121.0.2:9010 + - FE_ID=1 + ports: + - "8030:8030" + - "9030:9030" + networks: + mynetwork: + ipv4_address: 172.121.0.2 + + doris-be-server: + platform: linux/amd64 + image: apache/doris:2.0.0_alpha-be-x86_64 + hostname: doris-be-server + command: > + sh -c "sudo sysctl -w vm.max_map_count=2000000" + environment: + - FE_SERVERS=fe1:172.121.0.2:9010 + - BE_ADDR=172.121.0.3:9050 + depends_on: + - doris-fe-server + ports: + - "9050:9050" + networks: + mynetwork: + ipv4_address: 172.121.0.3 + + cassandra-server: + container_name: cassandra-server + image: cassandra:4.0 + ports: + - 9042:9042 + environment: + - CASSANDRA_CLUSTER_NAME=cloudinfra + + starrocks-fe-server: + container_name: starrocks-fe-server + image: starrocks/fe-ubuntu:3.1.7 + hostname: starrocks-fe-server + command: + /opt/starrocks/fe/bin/start_fe.sh + ports: + - 28030:8030 + - 29020:9020 + - 29030:9030 + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:9030"] + interval: 5s + timeout: 5s + retries: 30 + + starrocks-be-server: + image: starrocks/be-ubuntu:3.1.7 + command: + - /bin/bash + - -c + - | + sleep 15s; mysql --connect-timeout 2 -h starrocks-fe-server -P9030 -uroot -e "alter system add backend \"starrocks-be-server:9050\";" + /opt/starrocks/be/bin/start_be.sh + ports: + - 28040:8040 + - 29050:9050 + hostname: starrocks-be-server + container_name: starrocks-be-server + depends_on: + - starrocks-fe-server + +# # Temporary workaround for json schema registry test since redpanda only supports +# # protobuf/avro schema registry. Should be removed after the support. +# # Related tracking issue: +# # https://github.com/redpanda-data/redpanda/issues/1878 zookeeper: container_name: zookeeper image: confluentinc/cp-zookeeper:latest @@ -201,8 +304,8 @@ services: KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka:9093,PLAINTEXT_INTERNAL://localhost:29093 KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1 - pulsar: - container_name: pulsar + pulsar-server: + container_name: pulsar-server image: apachepulsar/pulsar:latest command: bin/pulsar standalone ports: @@ -216,3 +319,9 @@ services: interval: 5s timeout: 5s retries: 5 +networks: + mynetwork: + ipam: + config: + - subnet: 172.121.80.0/16 + default: diff --git a/ci/scripts/e2e-cassandra-sink-test.sh b/ci/scripts/e2e-cassandra-sink-test.sh new file mode 100755 index 0000000000000..c393d510d19a2 --- /dev/null +++ b/ci/scripts/e2e-cassandra-sink-test.sh @@ -0,0 +1,65 @@ +#!/usr/bin/env bash + +# Exits as soon as any line fails. +set -euo pipefail + +source ci/scripts/common.sh + +# prepare environment +export CONNECTOR_LIBS_PATH="./connector-node/libs" + +while getopts 'p:' opt; do + case ${opt} in + p ) + profile=$OPTARG + ;; + \? ) + echo "Invalid Option: -$OPTARG" 1>&2 + exit 1 + ;; + : ) + echo "Invalid option: $OPTARG requires an argument" 1>&2 + ;; + esac +done +shift $((OPTIND -1)) + +download_and_prepare_rw "$profile" source + +echo "--- Download connector node package" +buildkite-agent artifact download risingwave-connector.tar.gz ./ +mkdir ./connector-node +tar xf ./risingwave-connector.tar.gz -C ./connector-node + +echo "--- starting risingwave cluster" +cargo make ci-start ci-sink-test +sleep 1 + +echo "--- create cassandra table" +curl https://downloads.apache.org/cassandra/4.1.3/apache-cassandra-4.1.3-bin.tar.gz --output apache-cassandra-4.1.3-bin.tar.gz +tar xfvz apache-cassandra-4.1.3-bin.tar.gz +cd apache-cassandra-4.1.3/bin +export CQLSH_HOST=cassandra-server +export CQLSH_PORT=9042 +./cqlsh -e "CREATE KEYSPACE demo WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 1};use demo; +CREATE table demo_bhv_table(v1 int primary key,v2 smallint,v3 bigint,v4 float,v5 double,v6 text,v7 date,v8 timestamp,v9 boolean);" + +echo "--- testing sinks" +cd ../../ +sqllogictest -p 4566 -d dev './e2e_test/sink/cassandra_sink.slt' +sleep 1 +cd apache-cassandra-4.1.3/bin +./cqlsh -e "COPY demo.demo_bhv_table TO './query_result.csv' WITH HEADER = false AND ENCODING = 'UTF-8';" + +if cat ./query_result.csv | awk -F "," '{ + exit !($1 == 1 && $2 == 1 && $3 == 1 && $4 == 1.1 && $5 == 1.2 && $6 == "test" && $7 == "2013-01-01" && $8 == "2013-01-01 01:01:01.000+0000" && $9 == "False\r"); }'; then + echo "Cassandra sink check passed" +else + cat ./query_result.csv + echo "The output is not as expected." + exit 1 +fi + +echo "--- Kill cluster" +cd ../../ +cargo make ci-kill \ No newline at end of file diff --git a/ci/scripts/e2e-clickhouse-sink-test.sh b/ci/scripts/e2e-clickhouse-sink-test.sh index 3464bd3c3c14d..c14d83e8c4281 100755 --- a/ci/scripts/e2e-clickhouse-sink-test.sh +++ b/ci/scripts/e2e-clickhouse-sink-test.sh @@ -24,7 +24,7 @@ shift $((OPTIND -1)) download_and_prepare_rw "$profile" source echo "--- starting risingwave cluster" -cargo make ci-start ci-clickhouse-test +cargo make ci-start ci-sink-test sleep 1 diff --git a/ci/scripts/e2e-deltalake-sink-rust-test.sh b/ci/scripts/e2e-deltalake-sink-rust-test.sh index 71ff1eede8e4d..cc0c287e8b572 100755 --- a/ci/scripts/e2e-deltalake-sink-rust-test.sh +++ b/ci/scripts/e2e-deltalake-sink-rust-test.sh @@ -32,8 +32,7 @@ mkdir ./connector-node tar xf ./risingwave-connector.tar.gz -C ./connector-node echo "--- starting risingwave cluster" -mkdir -p .risingwave/log -cargo make ci-start ci-deltalake-test +cargo make ci-start ci-sink-test sleep 1 # prepare minio deltalake sink diff --git a/ci/scripts/e2e-doris-sink-test.sh b/ci/scripts/e2e-doris-sink-test.sh new file mode 100755 index 0000000000000..30bfdaf129e26 --- /dev/null +++ b/ci/scripts/e2e-doris-sink-test.sh @@ -0,0 +1,59 @@ +#!/usr/bin/env bash + +# Exits as soon as any line fails. +set -euo pipefail + +source ci/scripts/common.sh + +while getopts 'p:' opt; do + case ${opt} in + p ) + profile=$OPTARG + ;; + \? ) + echo "Invalid Option: -$OPTARG" 1>&2 + exit 1 + ;; + : ) + echo "Invalid option: $OPTARG requires an argument" 1>&2 + ;; + esac +done +shift $((OPTIND -1)) + +download_and_prepare_rw "$profile" source + +echo "--- starting risingwave cluster" +cargo make ci-start ci-sink-test +sleep 1 + +echo "--- create doris table" +apt-get update -y && apt-get install -y mysql-client +sleep 2 +mysql -uroot -P 9030 -h doris-fe-server -e "CREATE database demo;use demo; +CREATE table demo_bhv_table(v1 int,v2 smallint,v3 bigint,v4 float,v5 double,v6 string,v7 datev2,v8 datetime,v9 boolean) UNIQUE KEY(\`v1\`) +DISTRIBUTED BY HASH(\`v1\`) BUCKETS 1 +PROPERTIES ( + \"replication_allocation\" = \"tag.location.default: 1\" +); +CREATE USER 'users'@'%' IDENTIFIED BY '123456'; +GRANT ALL ON *.* TO 'users'@'%';" +sleep 2 + +echo "--- testing sinks" +sqllogictest -p 4566 -d dev './e2e_test/sink/doris_sink.slt' +sleep 1 +mysql -uroot -P 9030 -h doris-fe-server -e "select * from demo.demo_bhv_table" > ./query_result.csv + + +if cat ./query_result.csv | sed '1d; s/\t/,/g' | awk -F "," '{ + exit !($1 == 1 && $2 == 1 && $3 == 1 && $4 == 1.1 && $5 == 1.2 && $6 == "test" && $7 == "2013-01-01" && $8 == "2013-01-01 01:01:01" && $9 == 0); }'; then + echo "Doris sink check passed" +else + cat ./query_result.csv + echo "The output is not as expected." + exit 1 +fi + +echo "--- Kill cluster" +cargo make ci-kill \ No newline at end of file diff --git a/ci/scripts/e2e-pulsar-sink-test.sh b/ci/scripts/e2e-pulsar-sink-test.sh index ee8848832f940..f942ad945b3e9 100755 --- a/ci/scripts/e2e-pulsar-sink-test.sh +++ b/ci/scripts/e2e-pulsar-sink-test.sh @@ -21,7 +21,7 @@ shift $((OPTIND -1)) download_and_prepare_rw "$profile" source echo "--- starting risingwave cluster" -cargo make ci-start ci-pulsar-test +cargo make ci-start ci-sink-test sleep 1 echo "--- waiting until pulsar is healthy" diff --git a/ci/scripts/e2e-redis-sink-test.sh b/ci/scripts/e2e-redis-sink-test.sh new file mode 100755 index 0000000000000..cf64662db4051 --- /dev/null +++ b/ci/scripts/e2e-redis-sink-test.sh @@ -0,0 +1,48 @@ +#!/usr/bin/env bash + +# Exits as soon as any line fails. +set -euo pipefail + +source ci/scripts/common.sh + +while getopts 'p:' opt; do + case ${opt} in + p ) + profile=$OPTARG + ;; + \? ) + echo "Invalid Option: -$OPTARG" 1>&2 + exit 1 + ;; + : ) + echo "Invalid option: $OPTARG requires an argument" 1>&2 + ;; + esac +done +shift $((OPTIND -1)) + +download_and_prepare_rw "$profile" source + +echo "--- starting risingwave cluster" +cargo make ci-start ci-sink-test +apt-get update -y && apt-get install -y redis-server +sleep 1 + +echo "--- testing sinks" +sqllogictest -p 4566 -d dev './e2e_test/sink/redis_sink.slt' +sleep 1 + +redis-cli -h redis-server -p 6379 get {\"v1\":1} >> ./query_result.txt +redis-cli -h redis-server -p 6379 get V1:1 >> ./query_result.txt + +# check sink destination using shell +if cat ./query_result.txt | tr '\n' '\0' | xargs -0 -n1 bash -c '[[ "$0" == "{\"v1\":1,\"v2\":1,\"v3\":1,\"v4\":1.100000023841858,\"v5\":1.2,\"v6\":\"test\",\"v7\":734869,\"v8\":\"2013-01-01T01:01:01.000000Z\",\"v9\":false}" || "$0" == "V2:1,V3:1" ]]'; then + echo "Redis sink check passed" +else + cat ./query_result.txt + echo "The output is not as expected." + exit 1 +fi + +echo "--- Kill cluster" +cargo make ci-kill \ No newline at end of file diff --git a/ci/scripts/e2e-starrocks-sink-test.sh b/ci/scripts/e2e-starrocks-sink-test.sh new file mode 100755 index 0000000000000..256f4448f9198 --- /dev/null +++ b/ci/scripts/e2e-starrocks-sink-test.sh @@ -0,0 +1,58 @@ +#!/usr/bin/env bash + +# Exits as soon as any line fails. +set -euo pipefail + +source ci/scripts/common.sh + +while getopts 'p:' opt; do + case ${opt} in + p ) + profile=$OPTARG + ;; + \? ) + echo "Invalid Option: -$OPTARG" 1>&2 + exit 1 + ;; + : ) + echo "Invalid option: $OPTARG requires an argument" 1>&2 + ;; + esac +done +shift $((OPTIND -1)) + +download_and_prepare_rw "$profile" source + +echo "--- starting risingwave cluster" +cargo make ci-start ci-sink-test +sleep 1 + + +echo "--- create starrocks table" +apt-get update -y && apt-get install -y mysql-client +sleep 2 +mysql -uroot -P 9030 -h starrocks-fe-server -e "CREATE database demo;use demo; +CREATE table demo_bhv_table(v1 int,v2 smallint,v3 bigint,v4 float,v5 double,v6 string,v7 date,v8 datetime,v9 boolean,v10 json) ENGINE=OLAP +PRIMARY KEY(\`v1\`) +DISTRIBUTED BY HASH(\`v1\`) properties(\"replication_num\" = \"1\"); +CREATE USER 'users'@'%' IDENTIFIED BY '123456'; +GRANT ALL ON *.* TO 'users'@'%';" +sleep 2 + +echo "--- testing sinks" +sqllogictest -p 4566 -d dev './e2e_test/sink/starrocks_sink.slt' +sleep 1 +mysql -uroot -P 9030 -h starrocks-fe-server -e "select * from demo.demo_bhv_table" > ./query_result.csv + + +if cat ./query_result.csv | sed '1d; s/\t/,/g' | awk -F "," '{ + exit !($1 == 1 && $2 == 1 && $3 == 1 && $4 == 1.1 && $5 == 1.2 && $6 == "test" && $7 == "2013-01-01" && $8 == "2013-01-01 01:01:01" && $9 == 0 && $10 = "{"v101": 100}"); }'; then + echo "Starrocks sink check passed" +else + cat ./query_result.csv + echo "The output is not as expected." + exit 1 +fi + +echo "--- Kill cluster" +cargo make ci-kill \ No newline at end of file diff --git a/ci/workflows/main-cron.yml b/ci/workflows/main-cron.yml index 835c46fb01e60..934458bcca1bc 100644 --- a/ci/workflows/main-cron.yml +++ b/ci/workflows/main-cron.yml @@ -815,6 +815,94 @@ steps: timeout_in_minutes: 10 retry: *auto-retry + - label: "end-to-end redis sink test" + key: "e2e-redis-sink-tests" + command: "ci/scripts/e2e-redis-sink-test.sh -p ci-release" + if: | + !(build.pull_request.labels includes "ci/main-cron/skip-ci") && build.env("CI_STEPS") == null + || build.pull_request.labels includes "ci/run-e2e-redis-sink-tests" + || build.env("CI_STEPS") =~ /(^|,)e2e-redis-sink-tests?(,|$$)/ + depends_on: + - "build" + - "build-other" + plugins: + - docker-compose#v4.9.0: + run: sink-test-env + config: ci/docker-compose.yml + mount-buildkite-agent: true + - ./ci/plugins/upload-failure-logs + timeout_in_minutes: 10 + retry: *auto-retry + + - label: "set vm_max_map_count_2000000" + key: "set-vm_max_map_count" + if: | + !(build.pull_request.labels includes "ci/main-cron/skip-ci") && build.env("CI_STEPS") == null + || build.pull_request.labels includes "ci/run-e2e-doris-sink-tests" + || build.env("CI_STEPS") =~ /(^|,)e2e-doris-sink-tests?(,|$$)/ + command: "sudo sysctl -w vm.max_map_count=2000000" + depends_on: + - "build" + - "build-other" + + - label: "end-to-end doris sink test" + key: "e2e-doris-sink-tests" + command: "ci/scripts/e2e-doris-sink-test.sh -p ci-release" + if: | + !(build.pull_request.labels includes "ci/main-cron/skip-ci") && build.env("CI_STEPS") == null + || build.pull_request.labels includes "ci/run-e2e-doris-sink-tests" + || build.env("CI_STEPS") =~ /(^|,)e2e-doris-sink-tests?(,|$$)/ + depends_on: + - "build" + - "build-other" + - "set-vm_max_map_count" + plugins: + - docker-compose#v4.9.0: + run: sink-doris-env + config: ci/docker-compose.yml + mount-buildkite-agent: true + - ./ci/plugins/upload-failure-logs + timeout_in_minutes: 10 + retry: *auto-retry + + - label: "end-to-end starrocks sink test" + key: "e2e-starrocks-sink-tests" + command: "ci/scripts/e2e-starrocks-sink-test.sh -p ci-release" + if: | + !(build.pull_request.labels includes "ci/main-cron/skip-ci") && build.env("CI_STEPS") == null + || build.pull_request.labels includes "ci/run-e2e-starrocks-sink-tests" + || build.env("CI_STEPS") =~ /(^|,)e2e-starrocks-sink-tests?(,|$$)/ + depends_on: + - "build" + - "build-other" + plugins: + - docker-compose#v4.9.0: + run: sink-test-env + config: ci/docker-compose.yml + mount-buildkite-agent: true + - ./ci/plugins/upload-failure-logs + timeout_in_minutes: 10 + retry: *auto-retry + + - label: "end-to-end cassandra sink test" + key: "e2e-cassandra-sink-tests" + command: "ci/scripts/e2e-cassandra-sink-test.sh -p ci-release" + if: | + !(build.pull_request.labels includes "ci/main-cron/skip-ci") && build.env("CI_STEPS") == null + || build.pull_request.labels includes "ci/run-e2e-cassandra-sink-tests" + || build.env("CI_STEPS") =~ /(^|,)e2e-cassandra-sink-tests?(,|$$)/ + depends_on: + - "build" + - "build-other" + plugins: + - docker-compose#v4.9.0: + run: sink-test-env + config: ci/docker-compose.yml + mount-buildkite-agent: true + - ./ci/plugins/upload-failure-logs + timeout_in_minutes: 10 + retry: *auto-retry + - label: "end-to-end clickhouse sink test" key: "e2e-clickhouse-sink-tests" command: "ci/scripts/e2e-clickhouse-sink-test.sh -p ci-release" diff --git a/ci/workflows/pull-request.yml b/ci/workflows/pull-request.yml index c48de6df64f1c..a67f915d943cc 100644 --- a/ci/workflows/pull-request.yml +++ b/ci/workflows/pull-request.yml @@ -292,6 +292,75 @@ steps: timeout_in_minutes: 10 retry: *auto-retry + - label: "end-to-end redis sink test" + if: build.pull_request.labels includes "ci/run-e2e-redis-sink-tests" || build.env("CI_STEPS") =~ /(^|,) e2e-redis-sink-tests?(,|$$)/ + command: "ci/scripts/e2e-redis-sink-test.sh -p ci-dev" + depends_on: + - "build" + - "build-other" + plugins: + - docker-compose#v4.9.0: + run: sink-test-env + config: ci/docker-compose.yml + mount-buildkite-agent: true + - ./ci/plugins/upload-failure-logs + timeout_in_minutes: 10 + retry: *auto-retry + + - label: "set vm_max_map_count_2000000" + key: "set-vm_max_map_count" + if: build.pull_request.labels includes "ci/run-e2e-doris-sink-tests" || build.env("CI_STEPS") =~ /(^|,) e2e-doris-sink-tests?(,|$$)/ + command: "sudo sysctl -w vm.max_map_count=2000000" + depends_on: + - "build" + - "build-other" + + - label: "end-to-end doris sink test" + if: build.pull_request.labels includes "ci/run-e2e-doris-sink-tests" || build.env("CI_STEPS") =~ /(^|,) e2e-doris-sink-tests?(,|$$)/ + command: "ci/scripts/e2e-doris-sink-test.sh -p ci-dev" + depends_on: + - "build" + - "build-other" + - "set-vm_max_map_count" + plugins: + - docker-compose#v4.9.0: + run: sink-doris-env + config: ci/docker-compose.yml + mount-buildkite-agent: true + - ./ci/plugins/upload-failure-logs + timeout_in_minutes: 10 + retry: *auto-retry + + - label: "end-to-end starrocks sink test" + if: build.pull_request.labels includes "ci/run-e2e-starrocks-sink-tests" || build.env("CI_STEPS") =~ /(^|,) e2e-starrocks-sink-tests?(,|$$)/ + command: "ci/scripts/e2e-starrocks-sink-test.sh -p ci-dev" + depends_on: + - "build" + - "build-other" + plugins: + - docker-compose#v4.9.0: + run: sink-test-env + config: ci/docker-compose.yml + mount-buildkite-agent: true + - ./ci/plugins/upload-failure-logs + timeout_in_minutes: 10 + retry: *auto-retry + + - label: "end-to-end cassandra sink test" + if: build.pull_request.labels includes "ci/run-e2e-cassandra-sink-tests" || build.env("CI_STEPS") =~ /(^|,) e2e-cassandra-sink-tests?(,|$$)/ + command: "ci/scripts/e2e-cassandra-sink-test.sh -p ci-dev" + depends_on: + - "build" + - "build-other" + plugins: + - docker-compose#v4.9.0: + run: sink-test-env + config: ci/docker-compose.yml + mount-buildkite-agent: true + - ./ci/plugins/upload-failure-logs + timeout_in_minutes: 10 + retry: *auto-retry + - label: "e2e java-binding test" if: build.pull_request.labels includes "ci/run-java-binding-tests" || build.env("CI_STEPS") =~ /(^|,)java-binding-tests?(,|$$)/ command: "ci/scripts/java-binding-test.sh -p ci-dev" diff --git a/e2e_test/sink/cassandra_sink.slt b/e2e_test/sink/cassandra_sink.slt new file mode 100644 index 0000000000000..7091e8da70783 --- /dev/null +++ b/e2e_test/sink/cassandra_sink.slt @@ -0,0 +1,33 @@ +statement ok +CREATE TABLE t6 (v1 int primary key, v2 smallint, v3 bigint, v4 real, v5 float, v6 varchar, v7 date, v8 timestamptz, v9 boolean); + +statement ok +CREATE MATERIALIZED VIEW mv6 AS SELECT * FROM t6; + +statement ok +CREATE SINK s6 +FROM + mv6 WITH ( + connector = 'cassandra', + type = 'append-only', + force_append_only='true', + cassandra.url = 'cassandra-server:9042', + cassandra.keyspace = 'demo', + cassandra.table = 'demo_bhv_table', + cassandra.datacenter = 'datacenter1', +); + +statement ok +INSERT INTO t6 VALUES (1, 1, 1, 1.1, 1.2, 'test', '2013-01-01', '2013-01-01 01:01:01+00:00' , false); + +statement ok +FLUSH; + +statement ok +DROP SINK s6; + +statement ok +DROP MATERIALIZED VIEW mv6; + +statement ok +DROP TABLE t6; \ No newline at end of file diff --git a/e2e_test/sink/doris_sink.slt b/e2e_test/sink/doris_sink.slt new file mode 100644 index 0000000000000..2c552bbb26143 --- /dev/null +++ b/e2e_test/sink/doris_sink.slt @@ -0,0 +1,34 @@ +statement ok +CREATE TABLE t6 (v1 int primary key, v2 smallint, v3 bigint, v4 real, v5 float, v6 varchar, v7 date, v8 timestamp, v9 boolean); + +statement ok +CREATE MATERIALIZED VIEW mv6 AS SELECT * FROM t6; + +statement ok +CREATE SINK s6 +FROM + mv6 WITH ( + connector = 'doris', + type = 'append-only', + doris.url = 'http://doris-fe-server:8030', + doris.user = 'users', + doris.password = '123456', + doris.database = 'demo', + doris.table='demo_bhv_table', + force_append_only='true' +); + +statement ok +INSERT INTO t6 VALUES (1, 1, 1, 1.1, 1.2, 'test', '2013-01-01', '2013-01-01 01:01:01' , false); + +statement ok +FLUSH; + +statement ok +DROP SINK s6; + +statement ok +DROP MATERIALIZED VIEW mv6; + +statement ok +DROP TABLE t6; \ No newline at end of file diff --git a/e2e_test/sink/redis_sink.slt b/e2e_test/sink/redis_sink.slt new file mode 100644 index 0000000000000..7475a80ae696e --- /dev/null +++ b/e2e_test/sink/redis_sink.slt @@ -0,0 +1,41 @@ +statement ok +CREATE TABLE t6 (v1 int primary key, v2 smallint, v3 bigint, v4 real, v5 float, v6 varchar, v7 date, v8 timestamptz, v9 boolean); + +statement ok +CREATE MATERIALIZED VIEW mv6 AS SELECT * FROM t6; + +statement ok +CREATE SINK s61 +FROM + mv6 WITH ( + primary_key = 'v1', + connector = 'redis', + redis.url= 'redis://redis-server:6379/', +)FORMAT PLAIN ENCODE JSON(force_append_only='true'); + +statement ok +CREATE SINK s62 +FROM + mv6 WITH ( + primary_key = 'v1', + connector = 'redis', + redis.url= 'redis://redis-server:6379/', +)FORMAT PLAIN ENCODE TEMPLATE(force_append_only='true', key_format = 'V1:{v1}', value_format = 'V2:{v2},V3:{v3}'); + +statement ok +INSERT INTO t6 VALUES (1, 1, 1, 1.1, 1.2, 'test', '2013-01-01', '2013-01-01 01:01:01+00:00' , false); + +statement ok +FLUSH; + +statement ok +DROP SINK s61; + +statement ok +DROP SINK s62; + +statement ok +DROP MATERIALIZED VIEW mv6; + +statement ok +DROP TABLE t6; \ No newline at end of file diff --git a/e2e_test/sink/starrocks_sink.slt b/e2e_test/sink/starrocks_sink.slt new file mode 100644 index 0000000000000..a1ee1b0ffe039 --- /dev/null +++ b/e2e_test/sink/starrocks_sink.slt @@ -0,0 +1,36 @@ +statement ok +CREATE TABLE t6 (v1 int primary key, v2 smallint, v3 bigint, v4 real, v5 float, v6 varchar, v7 date, v8 timestamp, v9 boolean, v10 jsonb); + +statement ok +CREATE MATERIALIZED VIEW mv6 AS SELECT * FROM t6; + +statement ok +CREATE SINK s6 +FROM + mv6 WITH ( + connector = 'starrocks', + type = 'upsert', + starrocks.host = 'starrocks-fe-server', + starrocks.mysqlport = '9030', + starrocks.httpport = '8030', + starrocks.user = 'users', + starrocks.password = '123456', + starrocks.database = 'demo', + starrocks.table = 'demo_bhv_table', + primary_key = 'v1' +); + +statement ok +INSERT INTO t6 VALUES (1, 1, 1, 1.1, 1.2, 'test', '2013-01-01', '2013-01-01 01:01:01' , false, '{"v101":100}'); + +statement ok +FLUSH; + +statement ok +DROP SINK s6; + +statement ok +DROP MATERIALIZED VIEW mv6; + +statement ok +DROP TABLE t6; \ No newline at end of file diff --git a/risedev.yml b/risedev.yml index 22356f2e1ac89..cb352daab6cf9 100644 --- a/risedev.yml +++ b/risedev.yml @@ -883,27 +883,7 @@ profile: - use: frontend - use: compactor - ci-deltalake-test: - config-path: src/config/ci.toml - steps: - - use: minio - - use: meta-node - - use: compute-node - enable-tiered-cache: true - - use: frontend - - use: compactor - - ci-clickhouse-test: - config-path: src/config/ci.toml - steps: - - use: minio - - use: meta-node - - use: compute-node - enable-tiered-cache: true - - use: frontend - - use: compactor - - ci-pulsar-test: + ci-sink-test: config-path: src/config/ci.toml steps: - use: minio diff --git a/src/connector/src/sink/starrocks.rs b/src/connector/src/sink/starrocks.rs index 11594133695d4..c5a0740b0736f 100644 --- a/src/connector/src/sink/starrocks.rs +++ b/src/connector/src/sink/starrocks.rs @@ -52,10 +52,10 @@ pub struct StarrocksCommon { #[serde(rename = "starrocks.host")] pub host: String, /// The port to the MySQL server of StarRocks FE. - #[serde(rename = "starrocks.mysqlport")] + #[serde(rename = "starrocks.mysqlport", alias = "starrocks.query_port")] pub mysql_port: String, /// The port to the HTTP server of StarRocks FE. - #[serde(rename = "starrocks.httpport")] + #[serde(rename = "starrocks.httpport", alias = "starrocks.http_port")] pub http_port: String, /// The user name used to access the StarRocks database. #[serde(rename = "starrocks.user")] @@ -175,7 +175,7 @@ impl StarrocksSink { Ok(starrocks_data_type.contains("varchar")) } risingwave_common::types::DataType::Time => Err(SinkError::Starrocks( - "starrocks can not support Time".to_string(), + "TIME is not supported for Starrocks sink. Please convert to VARCHAR or other supported types.".to_string(), )), risingwave_common::types::DataType::Timestamp => { Ok(starrocks_data_type.contains("datetime")) @@ -184,24 +184,24 @@ impl StarrocksSink { "TIMESTAMP WITH TIMEZONE is not supported for Starrocks sink as Starrocks doesn't store time values with timezone information. Please convert to TIMESTAMP first.".to_string(), )), risingwave_common::types::DataType::Interval => Err(SinkError::Starrocks( - "starrocks can not support Interval".to_string(), + "INTERVAL is not supported for Starrocks sink. Please convert to VARCHAR or other supported types.".to_string(), )), // todo! Validate the type struct and list risingwave_common::types::DataType::Struct(_) => Err(SinkError::Starrocks( - "starrocks can not support import struct".to_string(), + "STRUCT is not supported for Starrocks sink.".to_string(), )), risingwave_common::types::DataType::List(_) => { Ok(starrocks_data_type.contains("unknown")) } risingwave_common::types::DataType::Bytea => Err(SinkError::Starrocks( - "starrocks can not support Bytea".to_string(), + "BYTEA is not supported for Starrocks sink. Please convert to VARCHAR or other supported types.".to_string(), )), risingwave_common::types::DataType::Jsonb => Ok(starrocks_data_type.contains("json")), risingwave_common::types::DataType::Serial => { Ok(starrocks_data_type.contains("bigint")) } risingwave_common::types::DataType::Int256 => Err(SinkError::Starrocks( - "starrocks can not support Int256".to_string(), + "INT256 is not supported for Starrocks sink.".to_string(), )), } } diff --git a/src/connector/with_options_sink.yaml b/src/connector/with_options_sink.yaml index 74cb5c21e9c7f..2b23913a1fc32 100644 --- a/src/connector/with_options_sink.yaml +++ b/src/connector/with_options_sink.yaml @@ -466,10 +466,12 @@ StarrocksConfig: field_type: String comments: The port to the MySQL server of StarRocks FE. required: true + alias: starrocks.query_port - name: starrocks.httpport field_type: String comments: The port to the HTTP server of StarRocks FE. required: true + alias: starrocks.http_port - name: starrocks.user field_type: String comments: The user name used to access the StarRocks database. From 316f180b0097b75c5e6ce0785110561a1aa6dc58 Mon Sep 17 00:00:00 2001 From: Xinhao Xu <84456268+xxhZs@users.noreply.github.com> Date: Fri, 23 Feb 2024 10:42:20 +0800 Subject: [PATCH 09/35] feat(sink demo): Add http sink demo (#15149) --- integration_tests/http-sink/README.md | 34 +++++++++++++++++ integration_tests/http-sink/create_mv.sql | 6 +++ integration_tests/http-sink/create_sink.sql | 11 ++++++ integration_tests/http-sink/create_source.sql | 18 +++++++++ .../http-sink/docker-compose.yml | 37 +++++++++++++++++++ .../risingwave-connector-service/pom.xml | 1 - .../flink/http/HttpFlinkMockSinkFactory.java | 2 + 7 files changed, 108 insertions(+), 1 deletion(-) create mode 100644 integration_tests/http-sink/README.md create mode 100644 integration_tests/http-sink/create_mv.sql create mode 100644 integration_tests/http-sink/create_sink.sql create mode 100644 integration_tests/http-sink/create_source.sql create mode 100644 integration_tests/http-sink/docker-compose.yml diff --git a/integration_tests/http-sink/README.md b/integration_tests/http-sink/README.md new file mode 100644 index 0000000000000..d956cb4ea95a4 --- /dev/null +++ b/integration_tests/http-sink/README.md @@ -0,0 +1,34 @@ +# Demo: Sinking to Http + +In this demo, we want to showcase how RisingWave is able to sink data to Http. This feature is depended on https://github.com/getindata/flink-http-connector. + +It has a few limitations: +1. It offers only two options for HTTP method, i.e, PUT and POST. +2. It can only execute one request-reply round to the service (session-less). +3. It cannot handle status codes in the SQL API. + +Therefore, we suggest you to try Python UDF at first. + +### Demo: +1. Launch the cluster: + +```sh +docker-compose up -d +``` + +The cluster contains a RisingWave cluster and its necessary dependencies, a datagen that generates the data. + +2. Build an Http Server that can be built on its own + +3. Execute the SQL queries in sequence: + +- create_source.sql +- create_mv.sql +- create_sink.sql + +4. Check the contents in Http Server: +On the Http Server side it will receive the json string, something like: +``` +{"user_id":5,"target_id":"siFqrkdlCn"} +``` +The number of json is 1000 diff --git a/integration_tests/http-sink/create_mv.sql b/integration_tests/http-sink/create_mv.sql new file mode 100644 index 0000000000000..8a291a3c95ea7 --- /dev/null +++ b/integration_tests/http-sink/create_mv.sql @@ -0,0 +1,6 @@ +CREATE MATERIALIZED VIEW bhv_mv AS +SELECT + user_id, + target_id +FROM + user_behaviors; diff --git a/integration_tests/http-sink/create_sink.sql b/integration_tests/http-sink/create_sink.sql new file mode 100644 index 0000000000000..0644d1d51934b --- /dev/null +++ b/integration_tests/http-sink/create_sink.sql @@ -0,0 +1,11 @@ +CREATE sink bhv_http_sink FROM bhv_mv WITH ( + connector = 'http', + url = 'http://localhost:8080/endpoint', + format = 'json', + type = 'append-only', + force_append_only='true', + primary_key = 'user_id', + gid.connector.http.sink.header.Origin = '*', + "gid.connector.http.sink.header.X-Content-Type-Options" = 'nosniff', + "gid.connector.http.sink.header.Content-Type" = 'application/json' +); \ No newline at end of file diff --git a/integration_tests/http-sink/create_source.sql b/integration_tests/http-sink/create_source.sql new file mode 100644 index 0000000000000..c28c10f3616da --- /dev/null +++ b/integration_tests/http-sink/create_source.sql @@ -0,0 +1,18 @@ +CREATE table user_behaviors ( + user_id int, + target_id VARCHAR, + target_type VARCHAR, + event_timestamp TIMESTAMP, + behavior_type VARCHAR, + parent_target_type VARCHAR, + parent_target_id VARCHAR, + PRIMARY KEY(user_id) +) WITH ( + connector = 'datagen', + fields.user_id.kind = 'sequence', + fields.user_id.start = '1', + fields.user_id.end = '1000', + fields.user_name.kind = 'random', + fields.user_name.length = '10', + datagen.rows.per.second = '10' +) FORMAT PLAIN ENCODE JSON; \ No newline at end of file diff --git a/integration_tests/http-sink/docker-compose.yml b/integration_tests/http-sink/docker-compose.yml new file mode 100644 index 0000000000000..8fba5ff352dc0 --- /dev/null +++ b/integration_tests/http-sink/docker-compose.yml @@ -0,0 +1,37 @@ +--- +version: "3" +services: + risingwave-standalone: + extends: + file: ../../docker/docker-compose.yml + service: risingwave-standalone + etcd-0: + extends: + file: ../../docker/docker-compose.yml + service: etcd-0 + grafana-0: + extends: + file: ../../docker/docker-compose.yml + service: grafana-0 + minio-0: + extends: + file: ../../docker/docker-compose.yml + service: minio-0 + prometheus-0: + extends: + file: ../../docker/docker-compose.yml + service: prometheus-0 +volumes: + risingwave-standalone: + external: false + etcd-0: + external: false + grafana-0: + external: false + minio-0: + external: false + prometheus-0: + external: false + message_queue: + external: false +name: risingwave-compose diff --git a/java/connector-node/risingwave-connector-service/pom.xml b/java/connector-node/risingwave-connector-service/pom.xml index 047c523c1c7db..d51d67497ce05 100644 --- a/java/connector-node/risingwave-connector-service/pom.xml +++ b/java/connector-node/risingwave-connector-service/pom.xml @@ -99,7 +99,6 @@ com.risingwave risingwave-sink-mock-flink-http-sink - provided diff --git a/java/connector-node/risingwave-sink-mock-flink/risingwave-sink-mock-flink-http-sink/src/main/java/com/risingwave/mock/flink/http/HttpFlinkMockSinkFactory.java b/java/connector-node/risingwave-sink-mock-flink/risingwave-sink-mock-flink-http-sink/src/main/java/com/risingwave/mock/flink/http/HttpFlinkMockSinkFactory.java index a969dddd620f7..d316eeae74bed 100644 --- a/java/connector-node/risingwave-sink-mock-flink/risingwave-sink-mock-flink-http-sink/src/main/java/com/risingwave/mock/flink/http/HttpFlinkMockSinkFactory.java +++ b/java/connector-node/risingwave-sink-mock-flink/risingwave-sink-mock-flink-http-sink/src/main/java/com/risingwave/mock/flink/http/HttpFlinkMockSinkFactory.java @@ -26,6 +26,8 @@ /** * The `FlinkMockSinkFactory` implementation of the http sink is responsible for creating the http * counterpart of the `DynamicTableSinkFactory`. And `validate` don't need to do anything. + * + *

This feature is depended on https://github.com/getindata/flink-http-connector */ public class HttpFlinkMockSinkFactory implements FlinkMockSinkFactory { @Override From 91d97acbfaae47f95b9ae40984a74ab14b948d49 Mon Sep 17 00:00:00 2001 From: Runji Wang Date: Fri, 23 Feb 2024 11:30:38 +0800 Subject: [PATCH 10/35] refactor(frontend): use `#[derive(Fields)]` in statement handlers (#15130) Signed-off-by: Runji Wang --- src/common/fields-derive/src/lib.rs | 58 +++- src/frontend/src/handler/cancel_job.rs | 24 +- src/frontend/src/handler/describe.rs | 118 +++---- src/frontend/src/handler/explain.rs | 30 +- src/frontend/src/handler/mod.rs | 44 ++- src/frontend/src/handler/show.rs | 314 +++++++++++------- src/frontend/src/handler/transaction.rs | 21 +- src/frontend/src/handler/util.rs | 65 +--- src/frontend/src/handler/variable.rs | 104 +++--- src/frontend/src/session.rs | 40 +-- src/frontend/src/utils/infer_stmt_row_desc.rs | 253 -------------- src/frontend/src/utils/mod.rs | 1 - 12 files changed, 449 insertions(+), 623 deletions(-) delete mode 100644 src/frontend/src/utils/infer_stmt_row_desc.rs diff --git a/src/common/fields-derive/src/lib.rs b/src/common/fields-derive/src/lib.rs index 86fa229a5adcd..b38f579751683 100644 --- a/src/common/fields-derive/src/lib.rs +++ b/src/common/fields-derive/src/lib.rs @@ -16,7 +16,7 @@ use proc_macro2::TokenStream; use quote::quote; use syn::{Data, DeriveInput, Result}; -#[proc_macro_derive(Fields, attributes(primary_key))] +#[proc_macro_derive(Fields, attributes(primary_key, fields))] pub fn fields(tokens: proc_macro::TokenStream) -> proc_macro::TokenStream { inner(tokens.into()).into() } @@ -46,6 +46,16 @@ fn gen(tokens: TokenStream) -> Result { )); }; + let style = get_style(&input); + if let Some(style) = &style { + if !["Title Case", "TITLE CASE", "snake_case"].contains(&style.value().as_str()) { + return Err(syn::Error::new_spanned( + style, + "only `Title Case`, `TITLE CASE`, and `snake_case` are supported", + )); + } + } + let fields_rw: Vec = struct_ .fields .iter() @@ -55,6 +65,12 @@ fn gen(tokens: TokenStream) -> Result { if name.starts_with("r#") { name = name[2..].to_string(); } + // cast style + match style.as_ref().map_or(String::new(), |f| f.value()).as_str() { + "Title Case" => name = to_title_case(&name), + "TITLE CASE" => name = to_title_case(&name).to_uppercase(), + _ => {} + } let ty = &field.ty; quote! { (#name, <#ty as ::risingwave_common::types::WithDataType>::default_data_type()) @@ -132,6 +148,46 @@ fn get_primary_key(input: &syn::DeriveInput) -> Option> { None } +/// Get name style from `#[fields(style = "xxx")]` attribute. +fn get_style(input: &syn::DeriveInput) -> Option { + let style = input.attrs.iter().find_map(|attr| match &attr.meta { + syn::Meta::List(list) if list.path.is_ident("fields") => { + let name_value: syn::MetaNameValue = syn::parse2(list.tokens.clone()).ok()?; + if name_value.path.is_ident("style") { + Some(name_value.value) + } else { + None + } + } + _ => None, + })?; + match style { + syn::Expr::Lit(lit) => match lit.lit { + syn::Lit::Str(s) => Some(s), + _ => None, + }, + _ => None, + } +} + +/// Convert `snake_case` to `Title Case`. +fn to_title_case(s: &str) -> String { + let mut title = String::new(); + let mut next_upper = true; + for c in s.chars() { + if c == '_' { + title.push(' '); + next_upper = true; + } else if next_upper { + title.push(c.to_uppercase().next().unwrap()); + next_upper = false; + } else { + title.push(c); + } + } + title +} + #[cfg(test)] mod tests { use indoc::indoc; diff --git a/src/frontend/src/handler/cancel_job.rs b/src/frontend/src/handler/cancel_job.rs index f124a2a030bd1..278e01e3e1bc0 100644 --- a/src/frontend/src/handler/cancel_job.rs +++ b/src/frontend/src/handler/cancel_job.rs @@ -12,14 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. -use itertools::Itertools; -use pgwire::pg_field_descriptor::PgFieldDescriptor; use pgwire::pg_response::{PgResponse, StatementType}; -use pgwire::types::Row; -use risingwave_common::types::DataType; +use risingwave_common::types::Fields; use risingwave_pb::meta::cancel_creating_jobs_request::{CreatingJobIds, PbJobs}; use risingwave_sqlparser::ast::JobIdents; +use super::RwPgResponseBuilderExt; use crate::error::Result; use crate::handler::{HandlerArgs, RwPgResponse}; @@ -36,16 +34,14 @@ pub(super) async fn handle_cancel( .await?; let rows = canceled_jobs .into_iter() - .map(|id| Row::new(vec![Some(id.to_string().into())])) - .collect_vec(); + .map(|id| CancelRow { id: id.to_string() }); Ok(PgResponse::builder(StatementType::CANCEL_COMMAND) - .values( - rows.into(), - vec![PgFieldDescriptor::new( - "Id".to_string(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - )], - ) + .rows(rows) .into()) } + +#[derive(Fields)] +#[fields(style = "Title Case")] +struct CancelRow { + id: String, +} diff --git a/src/frontend/src/handler/describe.rs b/src/frontend/src/handler/describe.rs index ef1a601cca590..36cff2e20e2b6 100644 --- a/src/frontend/src/handler/describe.rs +++ b/src/frontend/src/handler/describe.rs @@ -17,17 +17,16 @@ use std::fmt::Display; use itertools::Itertools; use pgwire::pg_field_descriptor::PgFieldDescriptor; use pgwire::pg_response::{PgResponse, StatementType}; -use pgwire::types::Row; use risingwave_common::catalog::{ColumnCatalog, ColumnDesc}; -use risingwave_common::types::DataType; +use risingwave_common::types::Fields; use risingwave_sqlparser::ast::{display_comma_separated, ObjectName}; -use super::RwPgResponse; +use super::show::ShowColumnRow; +use super::{fields_to_descriptors, RwPgResponse}; use crate::binder::{Binder, Relation}; use crate::catalog::CatalogError; use crate::error::Result; -use crate::handler::util::col_descs_to_rows; -use crate::handler::HandlerArgs; +use crate::handler::{HandlerArgs, RwPgResponseBuilderExt}; pub fn handle_describe(handler_args: HandlerArgs, object_name: ObjectName) -> Result { let session = handler_args.session; @@ -156,7 +155,10 @@ pub fn handle_describe(handler_args: HandlerArgs, object_name: ObjectName) -> Re }; // Convert all column descs to rows - let mut rows = col_descs_to_rows(columns); + let mut rows = columns + .into_iter() + .flat_map(ShowColumnRow::from_catalog) + .collect_vec(); fn concat(display_elems: impl IntoIterator) -> String where @@ -170,96 +172,68 @@ pub fn handle_describe(handler_args: HandlerArgs, object_name: ObjectName) -> Re // Convert primary key to rows if !pk_columns.is_empty() { - rows.push(Row::new(vec![ - Some("primary key".into()), - Some(concat(pk_columns.iter().map(|x| &x.name)).into()), - None, // Is Hidden - None, // Description - ])); + rows.push(ShowColumnRow { + name: "primary key".into(), + r#type: concat(pk_columns.iter().map(|x| &x.name)), + is_hidden: None, + description: None, + }); } // Convert distribution keys to rows if !dist_columns.is_empty() { - rows.push(Row::new(vec![ - Some("distribution key".into()), - Some(concat(dist_columns.iter().map(|x| &x.name)).into()), - None, // Is Hidden - None, // Description - ])); + rows.push(ShowColumnRow { + name: "distribution key".into(), + r#type: concat(dist_columns.iter().map(|x| &x.name)), + is_hidden: None, + description: None, + }); } // Convert all indexes to rows rows.extend(indices.iter().map(|index| { let index_display = index.display(); - Row::new(vec![ - Some(index.name.clone().into()), - if index_display.include_columns.is_empty() { - Some( - format!( - "index({}) distributed by({})", - display_comma_separated(&index_display.index_columns_with_ordering), - display_comma_separated(&index_display.distributed_by_columns), - ) - .into(), + ShowColumnRow { + name: index.name.clone(), + r#type: if index_display.include_columns.is_empty() { + format!( + "index({}) distributed by({})", + display_comma_separated(&index_display.index_columns_with_ordering), + display_comma_separated(&index_display.distributed_by_columns), ) } else { - Some( - format!( - "index({}) include({}) distributed by({})", - display_comma_separated(&index_display.index_columns_with_ordering), - display_comma_separated(&index_display.include_columns), - display_comma_separated(&index_display.distributed_by_columns), - ) - .into(), + format!( + "index({}) include({}) distributed by({})", + display_comma_separated(&index_display.index_columns_with_ordering), + display_comma_separated(&index_display.include_columns), + display_comma_separated(&index_display.distributed_by_columns), ) }, - // Is Hidden - None, - // Description + is_hidden: None, // TODO: index description - None, - ]) + description: None, + } })); - rows.push(Row::new(vec![ - Some("table description".into()), - Some(relname.into()), - None, // Is Hidden - description.map(Into::into), // Description - ])); + rows.push(ShowColumnRow { + name: "table description".into(), + r#type: relname, + is_hidden: None, + description: description.map(Into::into), + }); // TODO: table name and description as title of response // TODO: recover the original user statement Ok(PgResponse::builder(StatementType::DESCRIBE) - .values( - rows.into(), - vec![ - PgFieldDescriptor::new( - "Name".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - PgFieldDescriptor::new( - "Type".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - PgFieldDescriptor::new( - "Is Hidden".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - PgFieldDescriptor::new( - "Description".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - ], - ) + .rows(rows) .into()) } +pub fn infer_describe() -> Vec { + fields_to_descriptors(ShowColumnRow::fields()) +} + #[cfg(test)] mod tests { use std::collections::HashMap; diff --git a/src/frontend/src/handler/explain.rs b/src/frontend/src/handler/explain.rs index c25bf7678bd04..b966cca8f50cf 100644 --- a/src/frontend/src/handler/explain.rs +++ b/src/frontend/src/handler/explain.rs @@ -12,12 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. -use itertools::Itertools; -use pgwire::pg_field_descriptor::PgFieldDescriptor; use pgwire::pg_response::{PgResponse, StatementType}; -use pgwire::types::Row; use risingwave_common::bail_not_implemented; -use risingwave_common::types::DataType; +use risingwave_common::types::Fields; use risingwave_sqlparser::ast::{ExplainOptions, ExplainType, Statement}; use thiserror_ext::AsReport; @@ -27,7 +24,7 @@ use super::create_sink::{gen_sink_plan, get_partition_compute_info}; use super::create_table::ColumnIdGenerator; use super::query::gen_batch_plan_by_statement; use super::util::SourceSchemaCompatExt; -use super::RwPgResponse; +use super::{RwPgResponse, RwPgResponseBuilderExt}; use crate::error::{ErrorCode, Result}; use crate::handler::create_table::handle_create_table_plan; use crate::handler::HandlerArgs; @@ -254,20 +251,17 @@ pub async fn handle_explain( } } - let rows = blocks - .iter() - .flat_map(|b| b.lines().map(|l| l.to_owned())) - .map(|l| Row::new(vec![Some(l.into())])) - .collect_vec(); + let rows = blocks.iter().flat_map(|b| b.lines()).map(|l| ExplainRow { + query_plan: l.into(), + }); Ok(PgResponse::builder(StatementType::EXPLAIN) - .values( - rows.into(), - vec![PgFieldDescriptor::new( - "QUERY PLAN".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - )], - ) + .rows(rows) .into()) } + +#[derive(Fields)] +#[fields(style = "TITLE CASE")] +struct ExplainRow { + query_plan: String, +} diff --git a/src/frontend/src/handler/mod.rs b/src/frontend/src/handler/mod.rs index 3cdc4b191da92..827f28f87319e 100644 --- a/src/frontend/src/handler/mod.rs +++ b/src/frontend/src/handler/mod.rs @@ -18,11 +18,15 @@ use std::task::{Context, Poll}; use futures::stream::{self, BoxStream}; use futures::{Stream, StreamExt}; +use itertools::Itertools; +use pgwire::pg_field_descriptor::PgFieldDescriptor; use pgwire::pg_response::StatementType::{self, ABORT, BEGIN, COMMIT, ROLLBACK, START_TRANSACTION}; use pgwire::pg_response::{PgResponse, PgResponseBuilder, RowSetResult}; use pgwire::pg_server::BoxedError; use pgwire::types::{Format, Row}; use risingwave_common::bail_not_implemented; +use risingwave_common::types::Fields; +use risingwave_common::util::iter_util::ZipEqFast; use risingwave_sqlparser::ast::*; use self::util::{DataChunkToRowSetAdapter, SourceSchemaCompatExt}; @@ -59,7 +63,7 @@ pub mod create_table; pub mod create_table_as; pub mod create_user; pub mod create_view; -mod describe; +pub mod describe; mod drop_connection; mod drop_database; pub mod drop_function; @@ -78,7 +82,7 @@ pub mod handle_privilege; mod kill_process; pub mod privilege; pub mod query; -mod show; +pub mod show; mod transaction; pub mod util; pub mod variable; @@ -90,6 +94,42 @@ pub type RwPgResponseBuilder = PgResponseBuilder; /// The [`PgResponse`] used by RisingWave. pub type RwPgResponse = PgResponse; +#[easy_ext::ext(RwPgResponseBuilderExt)] +impl RwPgResponseBuilder { + /// Append rows to the response. + pub fn rows(self, rows: impl IntoIterator) -> Self { + let fields = T::fields(); + self.values( + rows.into_iter() + .map(|row| { + Row::new( + row.into_owned_row() + .into_iter() + .zip_eq_fast(&fields) + .map(|(datum, (_, ty))| { + datum.map(|scalar| { + scalar.as_scalar_ref_impl().text_format(ty).into() + }) + }) + .collect(), + ) + }) + .collect_vec() + .into(), + fields_to_descriptors(fields), + ) + } +} + +pub fn fields_to_descriptors( + fields: Vec<(&str, risingwave_common::types::DataType)>, +) -> Vec { + fields + .iter() + .map(|(name, ty)| PgFieldDescriptor::new(name.to_string(), ty.to_oid(), ty.type_len())) + .collect() +} + pub enum PgResponseStream { LocalQuery(DataChunkToRowSetAdapter), DistributedQuery(DataChunkToRowSetAdapter), diff --git a/src/frontend/src/handler/show.rs b/src/frontend/src/handler/show.rs index 4a98b6c7cd33d..226a219a11887 100644 --- a/src/frontend/src/handler/show.rs +++ b/src/frontend/src/handler/show.rs @@ -19,27 +19,24 @@ use pgwire::pg_field_descriptor::PgFieldDescriptor; use pgwire::pg_protocol::truncated_fmt; use pgwire::pg_response::{PgResponse, StatementType}; use pgwire::pg_server::Session; -use pgwire::types::Row; use risingwave_common::bail_not_implemented; use risingwave_common::catalog::{ColumnCatalog, ColumnDesc, DEFAULT_SCHEMA_NAME}; -use risingwave_common::types::DataType; +use risingwave_common::types::{DataType, Fields}; use risingwave_common::util::addr::HostAddr; use risingwave_connector::source::kafka::PRIVATELINK_CONNECTION; use risingwave_expr::scalar::like::{i_like_default, like_default}; use risingwave_pb::catalog::connection; use risingwave_sqlparser::ast::{ - Ident, ObjectName, ShowCreateType, ShowObject, ShowStatementFilter, + display_comma_separated, Ident, ObjectName, ShowCreateType, ShowObject, ShowStatementFilter, }; use serde_json; -use super::RwPgResponse; +use super::{fields_to_descriptors, RwPgResponse, RwPgResponseBuilderExt}; use crate::binder::{Binder, Relation}; use crate::catalog::{CatalogError, IndexCatalog}; use crate::error::Result; -use crate::handler::util::{col_descs_to_rows, indexes_to_rows}; use crate::handler::HandlerArgs; use crate::session::SessionImpl; -use crate::utils::infer_stmt_row_desc::infer_show_object; pub fn get_columns_from_table( session: &SessionImpl, @@ -109,6 +106,136 @@ fn schema_or_default(schema: &Option) -> String { .map_or_else(|| DEFAULT_SCHEMA_NAME.to_string(), |s| s.real_value()) } +#[derive(Fields)] +#[fields(style = "Title Case")] +struct ShowObjectRow { + name: String, +} + +#[derive(Fields)] +#[fields(style = "Title Case")] +pub struct ShowColumnRow { + pub name: String, + pub r#type: String, + pub is_hidden: Option, + pub description: Option, +} + +impl ShowColumnRow { + pub fn from_catalog(col: ColumnCatalog) -> Vec { + col.column_desc + .flatten() + .into_iter() + .map(|c| { + let type_name = if let DataType::Struct { .. } = c.data_type { + c.type_name.clone() + } else { + c.data_type.to_string() + }; + ShowColumnRow { + name: c.name, + r#type: type_name, + is_hidden: Some(col.is_hidden.to_string()), + description: c.description, + } + }) + .collect() + } +} + +#[derive(Fields)] +#[fields(style = "Title Case")] +struct ShowConnectionRow { + name: String, + r#type: String, + properties: String, +} + +#[derive(Fields)] +#[fields(style = "Title Case")] +struct ShowFunctionRow { + name: String, + arguments: String, + return_type: String, + language: String, + link: Option, +} + +#[derive(Fields)] +#[fields(style = "Title Case")] +struct ShowIndexRow { + name: String, + on: String, + key: String, + include: String, + distributed_by: String, +} + +impl From> for ShowIndexRow { + fn from(index: Arc) -> Self { + let index_display = index.display(); + ShowIndexRow { + name: index.name.clone(), + on: index.primary_table.name.clone(), + key: display_comma_separated(&index_display.index_columns_with_ordering).to_string(), + include: display_comma_separated(&index_display.include_columns).to_string(), + distributed_by: display_comma_separated(&index_display.distributed_by_columns) + .to_string(), + } + } +} + +#[derive(Fields)] +#[fields(style = "Title Case")] +struct ShowClusterRow { + addr: String, + state: String, + parallel_units: String, + is_streaming: String, + is_serving: String, + is_unschedulable: String, +} + +#[derive(Fields)] +#[fields(style = "Title Case")] +struct ShowJobRow { + id: i64, + statement: String, + progress: String, +} + +#[derive(Fields)] +#[fields(style = "Title Case")] +struct ShowProcessListRow { + id: String, + user: String, + host: String, + database: String, + time: Option, + info: Option, +} + +#[derive(Fields)] +#[fields(style = "Title Case")] +struct ShowCreateObjectRow { + name: String, + create_sql: String, +} + +/// Infer the row description for different show objects. +pub fn infer_show_object(objects: &ShowObject) -> Vec { + fields_to_descriptors(match objects { + ShowObject::Columns { .. } => ShowColumnRow::fields(), + ShowObject::Connection { .. } => ShowConnectionRow::fields(), + ShowObject::Function { .. } => ShowFunctionRow::fields(), + ShowObject::Indexes { .. } => ShowIndexRow::fields(), + ShowObject::Cluster => ShowClusterRow::fields(), + ShowObject::Jobs => ShowJobRow::fields(), + ShowObject::ProcessList => ShowProcessListRow::fields(), + _ => ShowObjectRow::fields(), + }) +} + pub async fn handle_show_object( handler_args: HandlerArgs, command: ShowObject, @@ -119,7 +246,6 @@ pub async fn handle_show_object( if let Some(ShowStatementFilter::Where(..)) = filter { bail_not_implemented!("WHERE clause in SHOW statement"); } - let row_desc = infer_show_object(&command); let catalog_reader = session.env().catalog_reader(); @@ -178,18 +304,15 @@ pub async fn handle_show_object( .into()); }; - let rows = col_descs_to_rows(columns); - return Ok(PgResponse::builder(StatementType::SHOW_COMMAND) - .values(rows.into(), row_desc) + .rows(columns.into_iter().flat_map(ShowColumnRow::from_catalog)) .into()); } ShowObject::Indexes { table } => { let indexes = get_indexes_from_table(&session, table)?; - let rows = indexes_to_rows(indexes); return Ok(PgResponse::builder(StatementType::SHOW_COMMAND) - .values(rows.into(), row_desc) + .rows(indexes.into_iter().map(ShowIndexRow::from)) .into()); } ShowObject::Connection { schema } => { @@ -200,7 +323,7 @@ pub async fn handle_show_object( .iter_connections() .map(|c| { let name = c.name.clone(); - let conn_type = match &c.info { + let r#type = match &c.info { connection::Info::PrivateLinkService(_) => { PRIVATELINK_CONNECTION.to_string() }, @@ -230,105 +353,81 @@ pub async fn handle_show_object( ) } }; - Row::new(vec![ - Some(name.into()), - Some(conn_type.into()), - Some(properties.into()), - ]) - }) - .collect_vec(); + ShowConnectionRow { + name, + r#type, + properties, + } + }); return Ok(PgResponse::builder(StatementType::SHOW_COMMAND) - .values(rows.into(), row_desc) + .rows(rows) .into()); } ShowObject::Function { schema } => { - let rows = catalog_reader - .read_guard() + let reader = catalog_reader.read_guard(); + let rows = reader .get_schema_by_name(session.database(), &schema_or_default(&schema))? .iter_function() - .map(|t| { - Row::new(vec![ - Some(t.name.clone().into()), - Some(t.arg_types.iter().map(|t| t.to_string()).join(", ").into()), - Some(t.return_type.to_string().into()), - Some(t.language.clone().into()), - t.link.clone().map(Into::into), - ]) - }) - .collect_vec(); + .map(|t| ShowFunctionRow { + name: t.name.clone(), + arguments: t.arg_types.iter().map(|t| t.to_string()).join(", "), + return_type: t.return_type.to_string(), + language: t.language.clone(), + link: t.link.clone(), + }); return Ok(PgResponse::builder(StatementType::SHOW_COMMAND) - .values(rows.into(), row_desc) + .rows(rows) .into()); } ShowObject::Cluster => { let workers = session.env().worker_node_manager().list_worker_nodes(); - let rows = workers - .into_iter() - .map(|worker| { - let addr: HostAddr = worker.host.as_ref().unwrap().into(); - let property = worker.property.as_ref().unwrap(); - Row::new(vec![ - Some(addr.to_string().into()), - Some(worker.get_state().unwrap().as_str_name().into()), - Some( - worker - .parallel_units - .into_iter() - .map(|pu| pu.id) - .join(", ") - .into(), - ), - Some(property.is_streaming.to_string().into()), - Some(property.is_serving.to_string().into()), - Some(property.is_unschedulable.to_string().into()), - ]) - }) - .collect_vec(); + let rows = workers.into_iter().map(|worker| { + let addr: HostAddr = worker.host.as_ref().unwrap().into(); + let property = worker.property.as_ref().unwrap(); + ShowClusterRow { + addr: addr.to_string(), + state: worker.get_state().unwrap().as_str_name().to_string(), + parallel_units: worker.parallel_units.into_iter().map(|pu| pu.id).join(", "), + is_streaming: property.is_streaming.to_string(), + is_serving: property.is_serving.to_string(), + is_unschedulable: property.is_unschedulable.to_string(), + } + }); return Ok(PgResponse::builder(StatementType::SHOW_COMMAND) - .values(rows.into(), row_desc) + .rows(rows) .into()); } ShowObject::Jobs => { let resp = session.env().meta_client().list_ddl_progress().await?; - let rows = resp - .into_iter() - .map(|job| { - Row::new(vec![ - Some(job.id.to_string().into()), - Some(job.statement.into()), - Some(job.progress.into()), - ]) - }) - .collect_vec(); + let rows = resp.into_iter().map(|job| ShowJobRow { + id: job.id as i64, + statement: job.statement, + progress: job.progress, + }); return Ok(PgResponse::builder(StatementType::SHOW_COMMAND) - .values(rows.into(), row_desc) + .rows(rows) .into()); } ShowObject::ProcessList => { - let rows = { - let sessions_map = session.env().sessions_map(); - sessions_map - .read() - .values() - .map(|s| { - Row::new(vec![ - // Since process id and the secret id in the session id are the same in RisingWave, just display the process id. - Some(format!("{}", s.id().0).into()), - Some(s.user_name().to_owned().into()), - Some(format!("{}", s.peer_addr()).into()), - Some(s.database().to_owned().into()), - s.elapse_since_running_sql() - .map(|mills| format!("{}ms", mills).into()), - s.running_sql().map(|sql| { - format!("{}", truncated_fmt::TruncatedFmt(&sql, 1024)).into() - }), - ]) - }) - .collect_vec() - }; + let sessions_map = session.env().sessions_map().read(); + let rows = sessions_map.values().map(|s| { + ShowProcessListRow { + // Since process id and the secret id in the session id are the same in RisingWave, just display the process id. + id: format!("{}", s.id().0), + user: s.user_name().to_owned(), + host: format!("{}", s.peer_addr()), + database: s.database().to_owned(), + time: s + .elapse_since_running_sql() + .map(|mills| format!("{}ms", mills)), + info: s + .running_sql() + .map(|sql| format!("{}", truncated_fmt::TruncatedFmt(&sql, 1024))), + } + }); return Ok(PgResponse::builder(StatementType::SHOW_COMMAND) - .values(rows.into(), row_desc) + .rows(rows) .into()); } }; @@ -341,21 +440,17 @@ pub async fn handle_show_object( Some(ShowStatementFilter::Where(..)) => unreachable!(), None => true, }) - .map(|n| Row::new(vec![Some(n.into())])) - .collect_vec(); + .map(|name| ShowObjectRow { name }); Ok(PgResponse::builder(StatementType::SHOW_COMMAND) - .values( - rows.into(), - vec![PgFieldDescriptor::new( - "Name".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - )], - ) + .rows(rows) .into()) } +pub fn infer_show_create_object() -> Vec { + fields_to_descriptors(ShowCreateObjectRow::fields()) +} + pub fn handle_show_create_object( handle_args: HandlerArgs, show_create_type: ShowCreateType, @@ -415,21 +510,10 @@ pub fn handle_show_create_object( let name = format!("{}.{}", schema_name, object_name); Ok(PgResponse::builder(StatementType::SHOW_COMMAND) - .values( - vec![Row::new(vec![Some(name.into()), Some(sql.into())])].into(), - vec![ - PgFieldDescriptor::new( - "Name".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - PgFieldDescriptor::new( - "Create Sql".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - ], - ) + .rows([ShowCreateObjectRow { + name, + create_sql: sql, + }]) .into()) } diff --git a/src/frontend/src/handler/transaction.rs b/src/frontend/src/handler/transaction.rs index 452cfe0ed9299..8ab7af36c29ca 100644 --- a/src/frontend/src/handler/transaction.rs +++ b/src/frontend/src/handler/transaction.rs @@ -13,14 +13,13 @@ // limitations under the License. use pgwire::pg_response::StatementType; -use pgwire::types::Row; use risingwave_common::bail_not_implemented; +use risingwave_common::types::Fields; use risingwave_sqlparser::ast::{TransactionAccessMode, TransactionMode, Value}; -use super::{HandlerArgs, RwPgResponse}; +use super::{HandlerArgs, RwPgResponse, RwPgResponseBuilderExt}; use crate::error::Result; use crate::session::transaction::AccessMode; -use crate::utils::infer_stmt_row_desc::infer_show_variable; macro_rules! not_impl { ($body:expr) => { @@ -118,16 +117,20 @@ pub async fn handle_set( .into()) } +#[derive(Fields)] +#[fields(style = "Title Case")] +struct ShowVariableRow { + name: String, +} + pub fn handle_show_isolation_level(handler_args: HandlerArgs) -> Result { let config_reader = handler_args.session.config(); - let parameter_name = "transaction_isolation"; - let row_desc = infer_show_variable(parameter_name); - let rows = vec![Row::new(vec![Some( - config_reader.get(parameter_name)?.into(), - )])]; + let rows = [ShowVariableRow { + name: config_reader.get("transaction_isolation")?, + }]; Ok(RwPgResponse::builder(StatementType::SHOW_VARIABLE) - .values(rows.into(), row_desc) + .rows(rows) .into()) } diff --git a/src/frontend/src/handler/util.rs b/src/frontend/src/handler/util.rs index 6e91cf53f0b32..1e49ee8baf540 100644 --- a/src/frontend/src/handler/util.rs +++ b/src/frontend/src/handler/util.rs @@ -27,14 +27,13 @@ use pgwire::pg_server::BoxedError; use pgwire::types::{Format, FormatIterator, Row}; use pin_project_lite::pin_project; use risingwave_common::array::DataChunk; -use risingwave_common::catalog::{ColumnCatalog, Field}; +use risingwave_common::catalog::Field; use risingwave_common::row::Row as _; use risingwave_common::types::{DataType, ScalarRefImpl, Timestamptz}; use risingwave_common::util::iter_util::ZipEqFast; use risingwave_connector::source::KAFKA_CONNECTOR; -use risingwave_sqlparser::ast::{display_comma_separated, CompatibleSourceSchema, ConnectorSchema}; +use risingwave_sqlparser::ast::{CompatibleSourceSchema, ConnectorSchema}; -use crate::catalog::IndexCatalog; use crate::error::{ErrorCode, Result as RwResult}; use crate::handler::create_source::UPSTREAM_SOURCE_KEY; use crate::session::{current, SessionImpl}; @@ -172,66 +171,6 @@ fn to_pg_rows( .try_collect() } -/// Convert column descs to rows which conclude name and type -pub fn col_descs_to_rows(columns: Vec) -> Vec { - columns - .iter() - .flat_map(|col| { - col.column_desc - .flatten() - .into_iter() - .map(|c| { - let type_name = if let DataType::Struct { .. } = c.data_type { - c.type_name.clone() - } else { - c.data_type.to_string() - }; - Row::new(vec![ - Some(c.name.into()), - Some(type_name.into()), - Some(col.is_hidden.to_string().into()), - c.description.map(Into::into), - ]) - }) - .collect_vec() - }) - .collect_vec() -} - -pub fn indexes_to_rows(indexes: Vec>) -> Vec { - indexes - .iter() - .map(|index| { - let index_display = index.display(); - Row::new(vec![ - Some(index.name.clone().into()), - Some(index.primary_table.name.clone().into()), - Some( - format!( - "{}", - display_comma_separated(&index_display.index_columns_with_ordering) - ) - .into(), - ), - Some( - format!( - "{}", - display_comma_separated(&index_display.include_columns) - ) - .into(), - ), - Some( - format!( - "{}", - display_comma_separated(&index_display.distributed_by_columns) - ) - .into(), - ), - ]) - }) - .collect_vec() -} - /// Convert from [`Field`] to [`PgFieldDescriptor`]. pub fn to_pg_field(f: &Field) -> PgFieldDescriptor { PgFieldDescriptor::new( diff --git a/src/frontend/src/handler/variable.rs b/src/frontend/src/handler/variable.rs index 9b4828b232837..96fd232215ccd 100644 --- a/src/frontend/src/handler/variable.rs +++ b/src/frontend/src/handler/variable.rs @@ -14,19 +14,18 @@ use anyhow::Context; use itertools::Itertools; +use pgwire::pg_field_descriptor::PgFieldDescriptor; use pgwire::pg_protocol::ParameterStatus; use pgwire::pg_response::{PgResponse, StatementType}; -use pgwire::types::Row; use risingwave_common::session_config::{ConfigReporter, SESSION_CONFIG_LIST_SEP}; use risingwave_common::system_param::reader::SystemParamsRead; -use risingwave_common::types::{DataType, ScalarRefImpl}; +use risingwave_common::types::Fields; use risingwave_sqlparser::ast::{Ident, SetTimeZoneValue, SetVariableValue, Value}; use risingwave_sqlparser::keywords::Keyword; -use super::RwPgResponse; +use super::{fields_to_descriptors, RwPgResponse, RwPgResponseBuilderExt}; use crate::error::Result; use crate::handler::HandlerArgs; -use crate::utils::infer_stmt_row_desc::infer_show_variable; /// convert `SetVariableValue` to string while remove the quotes on literals. pub(crate) fn set_var_to_param_str(value: &SetVariableValue) -> Option { @@ -117,40 +116,36 @@ pub(super) async fn handle_show( ) -> Result { // TODO: Verify that the name used in `show` command is indeed always case-insensitive. let name = variable.iter().map(|e| e.real_value()).join(" "); - let row_desc = infer_show_variable(&name); - let rows = if name.eq_ignore_ascii_case("PARAMETERS") { - handle_show_system_params(handler_args).await? + if name.eq_ignore_ascii_case("PARAMETERS") { + handle_show_system_params(handler_args).await } else if name.eq_ignore_ascii_case("ALL") { - handle_show_all(handler_args.clone())? + handle_show_all(handler_args.clone()) } else { let config_reader = handler_args.session.config(); - vec![Row::new(vec![Some(config_reader.get(&name)?.into())])] - }; - - Ok(PgResponse::builder(StatementType::SHOW_VARIABLE) - .values(rows.into(), row_desc) - .into()) + Ok(PgResponse::builder(StatementType::SHOW_VARIABLE) + .rows([ShowVariableRow { + name: config_reader.get(&name)?, + }]) + .into()) + } } -fn handle_show_all(handler_args: HandlerArgs) -> Result> { +fn handle_show_all(handler_args: HandlerArgs) -> Result { let config_reader = handler_args.session.config(); let all_variables = config_reader.show_all(); - let rows = all_variables - .iter() - .map(|info| { - Row::new(vec![ - Some(info.name.clone().into()), - Some(info.setting.clone().into()), - Some(info.description.clone().into()), - ]) - }) - .collect_vec(); - Ok(rows) + let rows = all_variables.iter().map(|info| ShowVariableAllRow { + name: info.name.clone(), + setting: info.setting.clone(), + description: info.description.clone(), + }); + Ok(PgResponse::builder(StatementType::SHOW_VARIABLE) + .rows(rows) + .into()) } -async fn handle_show_system_params(handler_args: HandlerArgs) -> Result> { +async fn handle_show_system_params(handler_args: HandlerArgs) -> Result { let params = handler_args .session .env() @@ -160,17 +155,46 @@ async fn handle_show_system_params(handler_args: HandlerArgs) -> Result let rows = params .get_all() .into_iter() - .map(|info| { - let is_mutable_bytes = ScalarRefImpl::Bool(info.mutable) - .text_format(&DataType::Boolean) - .into(); - Row::new(vec![ - Some(info.name.into()), - Some(info.value.into()), - Some(info.description.into()), - Some(is_mutable_bytes), - ]) - }) - .collect_vec(); - Ok(rows) + .map(|info| ShowVariableParamsRow { + name: info.name.into(), + value: info.value, + description: info.description.into(), + mutable: info.mutable, + }); + Ok(PgResponse::builder(StatementType::SHOW_VARIABLE) + .rows(rows) + .into()) +} + +pub fn infer_show_variable(name: &str) -> Vec { + fields_to_descriptors(if name.eq_ignore_ascii_case("ALL") { + ShowVariableAllRow::fields() + } else if name.eq_ignore_ascii_case("PARAMETERS") { + ShowVariableParamsRow::fields() + } else { + ShowVariableRow::fields() + }) +} + +#[derive(Fields)] +#[fields(style = "Title Case")] +struct ShowVariableRow { + name: String, +} + +#[derive(Fields)] +#[fields(style = "Title Case")] +struct ShowVariableAllRow { + name: String, + setting: String, + description: String, +} + +#[derive(Fields)] +#[fields(style = "Title Case")] +struct ShowVariableParamsRow { + name: String, + value: String, + description: String, + mutable: bool, } diff --git a/src/frontend/src/session.rs b/src/frontend/src/session.rs index 460c978535833..67a5da01e1213 100644 --- a/src/frontend/src/session.rs +++ b/src/frontend/src/session.rs @@ -81,11 +81,14 @@ use crate::catalog::{ check_schema_writable, CatalogError, DatabaseId, OwnedByUserCatalog, SchemaId, }; use crate::error::{ErrorCode, Result, RwError}; +use crate::handler::describe::infer_describe; use crate::handler::extended_handle::{ handle_bind, handle_execute, handle_parse, Portal, PrepareStatement, }; use crate::handler::privilege::ObjectCheckItem; +use crate::handler::show::{infer_show_create_object, infer_show_object}; use crate::handler::util::to_pg_field; +use crate::handler::variable::infer_show_variable; use crate::handler::{handle, RwPgResponse}; use crate::health_service::HealthServiceImpl; use crate::meta_client::{FrontendMetaClient, FrontendMetaClientImpl}; @@ -102,7 +105,6 @@ use crate::user::user_authentication::md5_hash_with_salt; use crate::user::user_manager::UserInfoManager; use crate::user::user_service::{UserInfoReader, UserInfoWriter, UserInfoWriterImpl}; use crate::user::UserId; -use crate::utils::infer_stmt_row_desc::{infer_show_object, infer_show_variable}; use crate::{FrontendOpts, PgResponseStream}; pub(crate) mod current; @@ -1242,18 +1244,7 @@ fn infer(bound: Option, stmt: Statement) -> Result Ok(infer_show_object(&show_object)), - Statement::ShowCreateObject { .. } => Ok(vec![ - PgFieldDescriptor::new( - "Name".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - PgFieldDescriptor::new( - "Create Sql".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - ]), + Statement::ShowCreateObject { .. } => Ok(infer_show_create_object()), Statement::ShowTransactionIsolationLevel => { let name = "transaction_isolation"; Ok(infer_show_variable(name)) @@ -1262,28 +1253,7 @@ fn infer(bound: Option, stmt: Statement) -> Result Ok(vec![ - PgFieldDescriptor::new( - "Name".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - PgFieldDescriptor::new( - "Type".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - PgFieldDescriptor::new( - "Is Hidden".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - PgFieldDescriptor::new( - "Description".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - ]), + Statement::Describe { name: _ } => Ok(infer_describe()), Statement::Explain { .. } => Ok(vec![PgFieldDescriptor::new( "QUERY PLAN".to_owned(), DataType::Varchar.to_oid(), diff --git a/src/frontend/src/utils/infer_stmt_row_desc.rs b/src/frontend/src/utils/infer_stmt_row_desc.rs deleted file mode 100644 index 690b2bf81872f..0000000000000 --- a/src/frontend/src/utils/infer_stmt_row_desc.rs +++ /dev/null @@ -1,253 +0,0 @@ -// Copyright 2024 RisingWave Labs -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use pgwire::pg_field_descriptor::PgFieldDescriptor; -use risingwave_common::types::DataType; -use risingwave_sqlparser::ast::ShowObject; - -/// `infer_stmt_row_desc` is used to infer the row description for different show objects. -pub fn infer_show_object(objects: &ShowObject) -> Vec { - match objects { - ShowObject::Columns { .. } => vec![ - PgFieldDescriptor::new( - "Name".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - PgFieldDescriptor::new( - "Type".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - PgFieldDescriptor::new( - "Is Hidden".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - PgFieldDescriptor::new( - "Description".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - ], - ShowObject::Connection { .. } => vec![ - PgFieldDescriptor::new( - "Name".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - PgFieldDescriptor::new( - "Type".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - PgFieldDescriptor::new( - "Properties".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - ], - ShowObject::Function { .. } => vec![ - PgFieldDescriptor::new( - "Name".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - PgFieldDescriptor::new( - "Arguments".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - PgFieldDescriptor::new( - "Return Type".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - PgFieldDescriptor::new( - "Language".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - PgFieldDescriptor::new( - "Link".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - ], - ShowObject::Indexes { .. } => vec![ - PgFieldDescriptor::new( - "Name".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - PgFieldDescriptor::new( - "On".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - PgFieldDescriptor::new( - "Key".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - PgFieldDescriptor::new( - "Include".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - PgFieldDescriptor::new( - "Distributed By".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - ], - ShowObject::Cluster => vec![ - PgFieldDescriptor::new( - "Addr".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - PgFieldDescriptor::new( - "State".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - PgFieldDescriptor::new( - "Parallel Units".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - PgFieldDescriptor::new( - "Is Streaming".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - PgFieldDescriptor::new( - "Is Serving".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - PgFieldDescriptor::new( - "Is Unschedulable".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - ], - ShowObject::Jobs => vec![ - PgFieldDescriptor::new( - "Id".to_owned(), - DataType::Int64.to_oid(), - DataType::Int64.type_len(), - ), - PgFieldDescriptor::new( - "Statement".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - PgFieldDescriptor::new( - "Progress".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - ], - ShowObject::ProcessList => vec![ - PgFieldDescriptor::new( - "Id".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - PgFieldDescriptor::new( - "User".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - PgFieldDescriptor::new( - "Host".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - PgFieldDescriptor::new( - "Database".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - PgFieldDescriptor::new( - "Time".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - PgFieldDescriptor::new( - "Info".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - ], - _ => vec![PgFieldDescriptor::new( - "Name".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - )], - } -} - -pub fn infer_show_variable(name: &str) -> Vec { - if name.eq_ignore_ascii_case("ALL") { - vec![ - PgFieldDescriptor::new( - "Name".to_string(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - PgFieldDescriptor::new( - "Setting".to_string(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - PgFieldDescriptor::new( - "Description".to_string(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - ] - } else if name.eq_ignore_ascii_case("PARAMETERS") { - vec![ - PgFieldDescriptor::new( - "Name".to_string(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - PgFieldDescriptor::new( - "Value".to_string(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - PgFieldDescriptor::new( - "Description".to_string(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - PgFieldDescriptor::new( - "Mutable".to_string(), - DataType::Boolean.to_oid(), - DataType::Boolean.type_len(), - ), - ] - } else { - vec![PgFieldDescriptor::new( - name.to_ascii_lowercase(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - )] - } -} diff --git a/src/frontend/src/utils/mod.rs b/src/frontend/src/utils/mod.rs index bfe7cb093aad0..697b626fb3398 100644 --- a/src/frontend/src/utils/mod.rs +++ b/src/frontend/src/utils/mod.rs @@ -30,7 +30,6 @@ pub use rewrite_index::*; mod index_set; pub use index_set::*; pub(crate) mod group_by; -pub mod infer_stmt_row_desc; pub mod overwrite_options; pub use group_by::*; pub use overwrite_options::*; From 1dd61bc25657e44abc10fa1b54104a3359350f83 Mon Sep 17 00:00:00 2001 From: August Date: Fri, 23 Feb 2024 11:31:41 +0800 Subject: [PATCH 11/35] fix(sql-backend): fix error message for altering unrecognized system parameter (#15161) --- e2e_test/error_ui/simple/main.slt | 2 +- src/common/src/system_param/mod.rs | 2 +- src/meta/src/controller/system_param.rs | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/e2e_test/error_ui/simple/main.slt b/e2e_test/error_ui/simple/main.slt index b4cebbdfeff70..3197544b45d75 100644 --- a/e2e_test/error_ui/simple/main.slt +++ b/e2e_test/error_ui/simple/main.slt @@ -27,7 +27,7 @@ db error: ERROR: Failed to run the query Caused by these errors (recent errors listed first): 1: gRPC request to meta service failed: Internal error - 2: SystemParams error: unrecognized system param "not_exist_key" + 2: SystemParams error: unrecognized system parameter "not_exist_key" query error diff --git a/src/common/src/system_param/mod.rs b/src/common/src/system_param/mod.rs index 278390887dd51..82677e57e9753 100644 --- a/src/common/src/system_param/mod.rs +++ b/src/common/src/system_param/mod.rs @@ -340,7 +340,7 @@ macro_rules! impl_set_system_param { )* _ => { Err(format!( - "unrecognized system param {:?}", + "unrecognized system parameter {:?}", key )) } diff --git a/src/meta/src/controller/system_param.rs b/src/meta/src/controller/system_param.rs index 4b2e598a2c221..855112acb7167 100644 --- a/src/meta/src/controller/system_param.rs +++ b/src/meta/src/controller/system_param.rs @@ -186,7 +186,7 @@ impl SystemParamsController { .await? else { return Err(MetaError::system_params(format!( - "unrecognized system parameter {}", + "unrecognized system parameter {:?}", name ))); }; From 07bd89042aa2261faa38e0a690f5fb49bc7ea0b8 Mon Sep 17 00:00:00 2001 From: August Date: Fri, 23 Feb 2024 11:31:57 +0800 Subject: [PATCH 12/35] feat: add some missing columns and views to support atlas (#15151) --- .../information_schema/columns.rs | 55 +++++++++++++++++-- .../catalog/system_catalog/pg_catalog/mod.rs | 1 + .../system_catalog/pg_catalog/pg_index.rs | 5 +- .../pg_catalog/pg_partitioned_table.rs | 30 ++++++++++ .../system_catalog/rw_catalog/rw_columns.rs | 21 +++++++ src/frontend/src/catalog/table_catalog.rs | 11 +++- 6 files changed, 116 insertions(+), 7 deletions(-) create mode 100644 src/frontend/src/catalog/system_catalog/pg_catalog/pg_partitioned_table.rs diff --git a/src/frontend/src/catalog/system_catalog/information_schema/columns.rs b/src/frontend/src/catalog/system_catalog/information_schema/columns.rs index 074b772ca0bb8..a9a0d8fc4f1b5 100644 --- a/src/frontend/src/catalog/system_catalog/information_schema/columns.rs +++ b/src/frontend/src/catalog/system_catalog/information_schema/columns.rs @@ -34,13 +34,37 @@ use risingwave_frontend_macro::system_catalog; NULL::integer AS numeric_scale, c.position AS ordinal_position, 'YES' AS is_nullable, - NULL AS collation_name, - 'pg_catalog' AS udt_schema, CASE WHEN c.data_type = 'varchar' THEN 'character varying' ELSE c.data_type END AS data_type, - c.udt_type AS udt_name + CURRENT_DATABASE() AS udt_catalog, + 'pg_catalog' AS udt_schema, + c.udt_type AS udt_name, + NULL AS character_set_catalog, + NULL AS character_set_schema, + NULL AS character_set_name, + NULL AS collation_catalog, + NULL AS collation_schema, + NULL AS collation_name, + NULL AS domain_catalog, + NULL AS domain_schema, + NULL AS domain_name, + NULL AS scope_catalog, + NULL AS scope_schema, + NULL AS scope_name, + 'NO' AS is_identity, + NULL AS identity_generation, + NULL AS identity_start, + NULL AS identity_increment, + NULL AS identity_maximum, + NULL AS identity_minimum, + NULL AS identity_cycle, + CASE + WHEN c.is_generated THEN 'ALWAYS' + ELSE 'NEVER' + END AS is_generated, + c.generation_expression FROM rw_catalog.rw_columns c LEFT JOIN rw_catalog.rw_relations r ON c.relation_id = r.id JOIN rw_catalog.rw_schemas s ON s.id = r.schema_id @@ -58,8 +82,29 @@ struct Column { numeric_scale: i32, ordinal_position: i32, is_nullable: String, - collation_name: String, - udt_schema: String, data_type: String, + udt_catalog: String, + udt_schema: String, udt_name: String, + character_set_catalog: String, + character_set_schema: String, + character_set_name: String, + collation_catalog: String, + collation_schema: String, + collation_name: String, + domain_catalog: String, + domain_schema: String, + domain_name: String, + scope_catalog: String, + scope_schema: String, + scope_name: String, + is_identity: String, + identity_generation: String, + identity_start: String, + identity_increment: String, + identity_maximum: String, + identity_minimum: String, + identity_cycle: String, + is_generated: String, + generation_expression: String, } diff --git a/src/frontend/src/catalog/system_catalog/pg_catalog/mod.rs b/src/frontend/src/catalog/system_catalog/pg_catalog/mod.rs index c1a935803f9f4..ce97aeaac552c 100644 --- a/src/frontend/src/catalog/system_catalog/pg_catalog/mod.rs +++ b/src/frontend/src/catalog/system_catalog/pg_catalog/mod.rs @@ -35,6 +35,7 @@ mod pg_matviews; mod pg_namespace; mod pg_opclass; mod pg_operator; +mod pg_partitioned_table; mod pg_proc; mod pg_roles; mod pg_settings; diff --git a/src/frontend/src/catalog/system_catalog/pg_catalog/pg_index.rs b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_index.rs index 196c36ec7f1af..2dfb15f9e527b 100644 --- a/src/frontend/src/catalog/system_catalog/pg_catalog/pg_index.rs +++ b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_index.rs @@ -28,7 +28,8 @@ use risingwave_frontend_macro::system_catalog; ARRAY[]::smallint[] as indoption, NULL AS indexprs, NULL AS indpred, - FALSE AS indisprimary + FALSE AS indisprimary, + ARRAY[]::int[] AS indclass FROM rw_catalog.rw_indexes" )] #[derive(Fields)] @@ -46,4 +47,6 @@ struct PgIndex { indpred: Option, // TODO: we return false as the default value. indisprimary: bool, + // Empty array. We only have a dummy implementation of `pg_opclass` yet. + indclass: Vec, } diff --git a/src/frontend/src/catalog/system_catalog/pg_catalog/pg_partitioned_table.rs b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_partitioned_table.rs new file mode 100644 index 0000000000000..e11739e2609fd --- /dev/null +++ b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_partitioned_table.rs @@ -0,0 +1,30 @@ +// Copyright 2024 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use risingwave_common::types::Fields; +use risingwave_frontend_macro::system_catalog; + +/// The catalog `pg_partitioned_table` stores information about how tables are partitioned. Reference: [`https://www.postgresql.org/docs/current/catalog-pg-partitioned-table.html`] +#[system_catalog(view, "pg_catalog.pg_partitioned_table")] +#[derive(Fields)] +struct PgPartitionedTable { + partrelid: i32, + partstrat: String, + partnatts: i16, + partdefid: i32, + partattrs: Vec, + partclass: Vec, + partcollation: Vec, + partexprs: Option, +} diff --git a/src/frontend/src/catalog/system_catalog/rw_catalog/rw_columns.rs b/src/frontend/src/catalog/system_catalog/rw_catalog/rw_columns.rs index 40760df81a492..8491da7062711 100644 --- a/src/frontend/src/catalog/system_catalog/rw_catalog/rw_columns.rs +++ b/src/frontend/src/catalog/system_catalog/rw_catalog/rw_columns.rs @@ -17,6 +17,7 @@ use risingwave_frontend_macro::system_catalog; use crate::catalog::system_catalog::SysCatalogReaderImpl; use crate::error::Result; +use crate::expr::{ExprDisplay, ExprImpl}; #[derive(Fields)] #[primary_key(relation_id, name)] @@ -27,6 +28,8 @@ struct RwColumn { is_hidden: bool, is_primary_key: bool, is_distribution_key: bool, + is_generated: bool, + generation_expression: Option, data_type: String, type_oid: i32, type_len: i16, @@ -51,6 +54,8 @@ fn read_rw_columns(reader: &SysCatalogReaderImpl) -> Result> { is_hidden: false, is_primary_key: false, is_distribution_key: false, + is_generated: false, + generation_expression: None, data_type: column.data_type().to_string(), type_oid: column.data_type().to_oid(), type_len: column.data_type().type_len(), @@ -71,6 +76,8 @@ fn read_rw_columns(reader: &SysCatalogReaderImpl) -> Result> { is_hidden: column.is_hidden, is_primary_key: sink.downstream_pk.contains(&index), is_distribution_key: sink.distribution_key.contains(&index), + is_generated: false, + generation_expression: None, data_type: column.data_type().to_string(), type_oid: column.data_type().to_oid(), type_len: column.data_type().type_len(), @@ -93,6 +100,8 @@ fn read_rw_columns(reader: &SysCatalogReaderImpl) -> Result> { is_hidden: column.is_hidden, is_primary_key: table.pk.contains(&index), is_distribution_key: false, + is_generated: false, + generation_expression: None, data_type: column.data_type().to_string(), type_oid: column.data_type().to_oid(), type_len: column.data_type().type_len(), @@ -104,6 +113,7 @@ fn read_rw_columns(reader: &SysCatalogReaderImpl) -> Result> { let table_rows = schema .iter_valid_table() .flat_map(|table| { + let schema = table.column_schema(); table .columns .iter() @@ -115,6 +125,15 @@ fn read_rw_columns(reader: &SysCatalogReaderImpl) -> Result> { is_hidden: column.is_hidden, is_primary_key: table.pk().iter().any(|idx| idx.column_index == index), is_distribution_key: table.distribution_key.contains(&index), + is_generated: column.is_generated(), + generation_expression: column.generated_expr().map(|expr_node| { + let expr = ExprImpl::from_expr_proto(expr_node).unwrap(); + let expr_display = ExprDisplay { + expr: &expr, + input_schema: &schema, + }; + expr_display.to_string() + }), data_type: column.data_type().to_string(), type_oid: column.data_type().to_oid(), type_len: column.data_type().type_len(), @@ -138,6 +157,8 @@ fn read_rw_columns(reader: &SysCatalogReaderImpl) -> Result> { is_hidden: column.is_hidden, is_primary_key: source.pk_col_ids.contains(&column.column_id()), is_distribution_key: false, + is_generated: false, + generation_expression: None, data_type: column.data_type().to_string(), type_oid: column.data_type().to_oid(), type_len: column.data_type().type_len(), diff --git a/src/frontend/src/catalog/table_catalog.rs b/src/frontend/src/catalog/table_catalog.rs index fbb77a0ca0bb5..2954cb37384dc 100644 --- a/src/frontend/src/catalog/table_catalog.rs +++ b/src/frontend/src/catalog/table_catalog.rs @@ -17,7 +17,7 @@ use std::collections::{HashMap, HashSet}; use fixedbitset::FixedBitSet; use itertools::Itertools; use risingwave_common::catalog::{ - ColumnCatalog, ConflictBehavior, TableDesc, TableId, TableVersionId, + ColumnCatalog, ConflictBehavior, Field, Schema, TableDesc, TableId, TableVersionId, }; use risingwave_common::util::epoch::Epoch; use risingwave_common::util::sort_util::ColumnOrder; @@ -492,6 +492,15 @@ impl TableCatalog { pub fn has_generated_column(&self) -> bool { self.columns.iter().any(|c| c.is_generated()) } + + pub fn column_schema(&self) -> Schema { + Schema::new( + self.columns + .iter() + .map(|c| Field::from(&c.column_desc)) + .collect(), + ) + } } impl From for TableCatalog { From 304709b2d92b504add49700b13b4da23f9d9ee58 Mon Sep 17 00:00:00 2001 From: Shanicky Chen Date: Fri, 23 Feb 2024 11:50:39 +0800 Subject: [PATCH 13/35] fix: refine cycle check for sink into table (#15170) --- e2e_test/sink/sink_into_table/basic.slt | 29 +++++++++++++++++++ src/frontend/src/catalog/table_catalog.rs | 8 +++++ src/frontend/src/handler/create_sink.rs | 10 ++++++- .../optimizer/plan_node/stream_materialize.rs | 1 + src/frontend/src/optimizer/plan_node/utils.rs | 1 + .../src/scheduler/distributed/query.rs | 1 + 6 files changed, 49 insertions(+), 1 deletion(-) diff --git a/e2e_test/sink/sink_into_table/basic.slt b/e2e_test/sink/sink_into_table/basic.slt index 1bc5a47907077..890087e207fd0 100644 --- a/e2e_test/sink/sink_into_table/basic.slt +++ b/e2e_test/sink/sink_into_table/basic.slt @@ -362,6 +362,35 @@ drop table t_b; statement ok drop table t_c; +# cycle check (with materialize view) + +statement ok +create table t_a(v int primary key); + +statement ok +create materialized view m_a as select v from t_a; + +statement ok +create table t_b(v int primary key); + +statement ok +create sink s_a into t_b as select v from m_a; + +statement error Creating such a sink will result in circular dependency +create sink s_b into t_a as select v from t_b; + +statement ok +drop sink s_a; + +statement ok +drop table t_b; + +statement ok +drop materialized view m_a; + +statement ok +drop table t_a; + # multi sinks statement ok diff --git a/src/frontend/src/catalog/table_catalog.rs b/src/frontend/src/catalog/table_catalog.rs index 2954cb37384dc..edb458997e33f 100644 --- a/src/frontend/src/catalog/table_catalog.rs +++ b/src/frontend/src/catalog/table_catalog.rs @@ -74,6 +74,8 @@ pub struct TableCatalog { pub name: String, + pub dependent_relations: Vec, + /// All columns in this table. pub columns: Vec, @@ -573,6 +575,11 @@ impl From for TableCatalog { created_at_cluster_version: tb.created_at_cluster_version.clone(), initialized_at_cluster_version: tb.initialized_at_cluster_version.clone(), retention_seconds: tb.retention_seconds, + dependent_relations: tb + .dependent_relations + .into_iter() + .map(TableId::from) + .collect_vec(), } } } @@ -724,6 +731,7 @@ mod tests { incoming_sinks: vec![], created_at_cluster_version: None, initialized_at_cluster_version: None, + dependent_relations: vec![], } ); assert_eq!(table, TableCatalog::from(table.to_prost(0, 0))); diff --git a/src/frontend/src/handler/create_sink.rs b/src/frontend/src/handler/create_sink.rs index de8e93e04a784..830253675c1bd 100644 --- a/src/frontend/src/handler/create_sink.rs +++ b/src/frontend/src/handler/create_sink.rs @@ -504,7 +504,7 @@ fn check_cycle_for_sink( if let Ok(table) = reader.get_table_by_id(table_id) { visit_table(session, reader, sink_index, table.as_ref(), visited_tables)? } else { - bail!("table not found: {:?}", table_id); + bail!("streaming job not found: {:?}", table_id); } } @@ -533,6 +533,14 @@ fn check_cycle_for_sink( } } + for table_id in &table.dependent_relations { + if let Ok(table) = reader.get_table_by_id(table_id) { + visit_table(session, reader, sink_index, table.as_ref(), visited_tables)? + } else { + bail!("streaming job not found: {:?}", table_id); + } + } + Ok(()) } diff --git a/src/frontend/src/optimizer/plan_node/stream_materialize.rs b/src/frontend/src/optimizer/plan_node/stream_materialize.rs index 3abc7ace0e494..f2acbcf9d258c 100644 --- a/src/frontend/src/optimizer/plan_node/stream_materialize.rs +++ b/src/frontend/src/optimizer/plan_node/stream_materialize.rs @@ -226,6 +226,7 @@ impl StreamMaterialize { id: TableId::placeholder(), associated_source_id: None, name, + dependent_relations: vec![], columns, pk: table_pk, stream_key, diff --git a/src/frontend/src/optimizer/plan_node/utils.rs b/src/frontend/src/optimizer/plan_node/utils.rs index 39d9ff5e7018d..c8cd1bb05fa83 100644 --- a/src/frontend/src/optimizer/plan_node/utils.rs +++ b/src/frontend/src/optimizer/plan_node/utils.rs @@ -141,6 +141,7 @@ impl TableCatalogBuilder { id: TableId::placeholder(), associated_source_id: None, name: String::new(), + dependent_relations: vec![], columns: self.columns.clone(), pk: self.pk, stream_key: vec![], diff --git a/src/frontend/src/scheduler/distributed/query.rs b/src/frontend/src/scheduler/distributed/query.rs index 6295d8036b566..515a83d0923ef 100644 --- a/src/frontend/src/scheduler/distributed/query.rs +++ b/src/frontend/src/scheduler/distributed/query.rs @@ -543,6 +543,7 @@ pub(crate) mod tests { id: table_id, associated_source_id: None, name: "test".to_string(), + dependent_relations: vec![], columns: vec![ ColumnCatalog { column_desc: ColumnDesc::new_atomic(DataType::Int32, "a", 0), From 219b1b1479e5dfb9068ca23a1307e5290f05bb37 Mon Sep 17 00:00:00 2001 From: William Wen <44139337+wenym1@users.noreply.github.com> Date: Fri, 23 Feb 2024 12:23:14 +0800 Subject: [PATCH 14/35] refactor(meta): track finished create mv job in tracker (#15112) --- src/meta/src/barrier/command.rs | 9 --- src/meta/src/barrier/mod.rs | 101 +++++++------------------------ src/meta/src/barrier/progress.rs | 52 ++++++++++------ 3 files changed, 55 insertions(+), 107 deletions(-) diff --git a/src/meta/src/barrier/command.rs b/src/meta/src/barrier/command.rs index 07765fe840c38..71fc9b98b355b 100644 --- a/src/meta/src/barrier/command.rs +++ b/src/meta/src/barrier/command.rs @@ -709,15 +709,6 @@ impl CommandContext { } } - /// For `CancelStreamingJob`, returns the actors of the `StreamScan` nodes. For other commands, - /// returns an empty set. - pub fn actors_to_cancel(&self) -> HashSet { - match &self.command { - Command::CancelStreamingJob(table_fragments) => table_fragments.backfill_actor_ids(), - _ => Default::default(), - } - } - /// For `CancelStreamingJob`, returns the table id of the target table. pub fn table_to_cancel(&self) -> Option { match &self.command { diff --git a/src/meta/src/barrier/mod.rs b/src/meta/src/barrier/mod.rs index 47bef49c66574..bd2f24f1baf46 100644 --- a/src/meta/src/barrier/mod.rs +++ b/src/meta/src/barrier/mod.rs @@ -50,7 +50,7 @@ use self::notifier::Notifier; use self::progress::TrackingCommand; use crate::barrier::info::InflightActorInfo; use crate::barrier::notifier::BarrierInfo; -use crate::barrier::progress::{CreateMviewProgressTracker, TrackingJob}; +use crate::barrier::progress::CreateMviewProgressTracker; use crate::barrier::rpc::BarrierRpcManager; use crate::barrier::state::BarrierManagerState; use crate::barrier::BarrierEpochState::{Completed, InFlight}; @@ -152,7 +152,7 @@ pub struct GlobalBarrierManagerContext { sink_manager: SinkCoordinatorManager, - metrics: Arc, + pub(super) metrics: Arc, stream_rpc_manager: StreamRpcManager, @@ -196,76 +196,34 @@ struct CheckpointControl { /// Save the state and message of barrier in order. command_ctx_queue: VecDeque, - metrics: Arc, - - /// Get notified when we finished Create MV and collect a barrier(checkpoint = true) - finished_jobs: Vec, + context: GlobalBarrierManagerContext, } impl CheckpointControl { - fn new(metrics: Arc) -> Self { + fn new(context: GlobalBarrierManagerContext) -> Self { Self { command_ctx_queue: Default::default(), - metrics, - finished_jobs: Default::default(), - } - } - - /// Stash a command to finish later. - fn stash_command_to_finish(&mut self, finished_job: TrackingJob) { - self.finished_jobs.push(finished_job); - } - - /// Finish stashed jobs. - /// If checkpoint, means all jobs can be finished. - /// If not checkpoint, jobs which do not require checkpoint can be finished. - /// - /// Returns whether there are still remaining stashed jobs to finish. - async fn finish_jobs(&mut self, checkpoint: bool) -> MetaResult { - for job in self - .finished_jobs - .extract_if(|job| checkpoint || !job.is_checkpoint_required()) - { - // The command is ready to finish. We can now call `pre_finish`. - job.pre_finish().await?; - job.notify_finished(); - } - Ok(!self.finished_jobs.is_empty()) - } - - fn cancel_command(&mut self, cancelled_job: TrackingJob) { - if let TrackingJob::New(cancelled_command) = cancelled_job { - if let Some(index) = self.command_ctx_queue.iter().position(|x| { - x.command_ctx.prev_epoch.value() == cancelled_command.context.prev_epoch.value() - }) { - self.command_ctx_queue.remove(index); - } - } else { - // Recovered jobs do not need to be cancelled since only `RUNNING` actors will get recovered. + context, } } - fn cancel_stashed_command(&mut self, id: TableId) { - self.finished_jobs - .retain(|x| x.table_to_create() != Some(id)); - } - /// Update the metrics of barrier nums. fn update_barrier_nums_metrics(&self) { - self.metrics.in_flight_barrier_nums.set( + self.context.metrics.in_flight_barrier_nums.set( self.command_ctx_queue .iter() .filter(|x| matches!(x.state, InFlight)) .count() as i64, ); - self.metrics + self.context + .metrics .all_barrier_nums .set(self.command_ctx_queue.len() as i64); } /// Enqueue a barrier command, and init its state to `InFlight`. fn enqueue_command(&mut self, command_ctx: Arc, notifiers: Vec) { - let timer = self.metrics.barrier_latency.start_timer(); + let timer = self.context.metrics.barrier_latency.start_timer(); self.command_ctx_queue.push_back(EpochNode { timer: Some(timer), @@ -285,7 +243,11 @@ impl CheckpointControl { result: Vec, ) -> Vec { // change state to complete, and wait for nodes with the smaller epoch to commit - let wait_commit_timer = self.metrics.barrier_wait_commit_latency.start_timer(); + let wait_commit_timer = self + .context + .metrics + .barrier_wait_commit_latency + .start_timer(); if let Some(node) = self .command_ctx_queue .iter_mut() @@ -341,11 +303,6 @@ impl CheckpointControl { .iter() .any(|x| x.command_ctx.prev_epoch.value().0 == epoch) } - - /// We need to make sure there are no changes when doing recovery - pub fn clear_changes(&mut self) { - self.finished_jobs.clear(); - } } /// The state and message of this barrier, a node for concurrent checkpoint. @@ -401,7 +358,6 @@ impl GlobalBarrierManager { InflightActorInfo::default(), None, ); - let checkpoint_control = CheckpointControl::new(metrics.clone()); let active_streaming_nodes = ActiveStreamingWorkerNodes::uninitialized(); @@ -420,6 +376,8 @@ impl GlobalBarrierManager { env: env.clone(), }; + let checkpoint_control = CheckpointControl::new(context.clone()); + let rpc_manager = BarrierRpcManager::new(context.clone()); Self { @@ -738,7 +696,6 @@ impl GlobalBarrierManager { err: MetaError, fail_nodes: impl IntoIterator, ) { - self.checkpoint_control.clear_changes(); self.rpc_manager.clear(); for node in fail_nodes { @@ -833,20 +790,13 @@ impl GlobalBarrierManager { notifier.notify_collected(); }); - // Save `cancelled_command` for Create MVs. - let actors_to_cancel = node.command_ctx.actors_to_cancel(); - let cancelled_command = if !actors_to_cancel.is_empty() { - let mut tracker = self.context.tracker.lock().await; - tracker.find_cancelled_command(actors_to_cancel) - } else { - None - }; + // Notify about collected. + let version_stats = self.context.hummock_manager.get_version_stats().await; + let mut tracker = self.context.tracker.lock().await; // Save `finished_commands` for Create MVs. let finished_commands = { let mut commands = vec![]; - let version_stats = self.context.hummock_manager.get_version_stats().await; - let mut tracker = self.context.tracker.lock().await; // Add the command to tracker. if let Some(command) = tracker.add( TrackingCommand { @@ -872,21 +822,16 @@ impl GlobalBarrierManager { }; for command in finished_commands { - self.checkpoint_control.stash_command_to_finish(command); + tracker.stash_command_to_finish(command); } - if let Some(command) = cancelled_command { - self.checkpoint_control.cancel_command(command); - } else if let Some(table_id) = node.command_ctx.table_to_cancel() { + if let Some(table_id) = node.command_ctx.table_to_cancel() { // the cancelled command is possibly stashed in `finished_commands` and waiting // for checkpoint, we should also clear it. - self.checkpoint_control.cancel_stashed_command(table_id); + tracker.cancel_command(table_id); } - let remaining = self - .checkpoint_control - .finish_jobs(kind.is_checkpoint()) - .await?; + let remaining = tracker.finish_jobs(kind.is_checkpoint()).await?; // If there are remaining commands (that requires checkpoint to finish), we force // the next barrier to be a checkpoint. if remaining { diff --git a/src/meta/src/barrier/progress.rs b/src/meta/src/barrier/progress.rs index f22c5a2bbb216..5c1e701e6fc81 100644 --- a/src/meta/src/barrier/progress.rs +++ b/src/meta/src/barrier/progress.rs @@ -16,7 +16,6 @@ use std::collections::hash_map::Entry; use std::collections::{HashMap, HashSet}; use std::sync::Arc; -use itertools::Itertools; use risingwave_common::catalog::TableId; use risingwave_common::util::epoch::Epoch; use risingwave_pb::ddl_service::DdlProgress; @@ -44,7 +43,7 @@ enum BackfillState { /// Progress of all actors containing backfill executors while creating mview. #[derive(Debug)] -struct Progress { +pub(super) struct Progress { states: HashMap, done_count: usize, @@ -254,6 +253,9 @@ pub(super) struct CreateMviewProgressTracker { /// Find the epoch of the create-mview DDL by the actor containing the backfill executors. actor_map: HashMap, + + /// Get notified when we finished Create MV and collect a barrier(checkpoint = true) + finished_jobs: Vec, } impl CreateMviewProgressTracker { @@ -313,6 +315,7 @@ impl CreateMviewProgressTracker { Self { progress_map, actor_map, + finished_jobs: Vec::new(), } } @@ -320,6 +323,7 @@ impl CreateMviewProgressTracker { Self { progress_map: Default::default(), actor_map: Default::default(), + finished_jobs: Vec::new(), } } @@ -338,25 +342,33 @@ impl CreateMviewProgressTracker { .collect() } - /// Try to find the target create-streaming-job command from track. + /// Stash a command to finish later. + pub(super) fn stash_command_to_finish(&mut self, finished_job: TrackingJob) { + self.finished_jobs.push(finished_job); + } + + /// Finish stashed jobs. + /// If checkpoint, means all jobs can be finished. + /// If not checkpoint, jobs which do not require checkpoint can be finished. /// - /// Return the target command as it should be cancelled based on the input actors. - pub fn find_cancelled_command( - &mut self, - actors_to_cancel: HashSet, - ) -> Option { - let epochs = actors_to_cancel - .into_iter() - .map(|actor_id| self.actor_map.get(&actor_id)) - .collect_vec(); - assert!(epochs.iter().all_equal()); - // If the target command found in progress map, return and remove it. Note that the command - // should have finished if not found. - if let Some(Some(epoch)) = epochs.first() { - Some(self.progress_map.remove(epoch).unwrap().1) - } else { - None + /// Returns whether there are still remaining stashed jobs to finish. + pub(super) async fn finish_jobs(&mut self, checkpoint: bool) -> MetaResult { + for job in self + .finished_jobs + .extract_if(|job| checkpoint || !job.is_checkpoint_required()) + { + // The command is ready to finish. We can now call `pre_finish`. + job.pre_finish().await?; + job.notify_finished(); } + Ok(!self.finished_jobs.is_empty()) + } + + pub(super) fn cancel_command(&mut self, id: TableId) { + let _ = self.progress_map.remove(&id); + self.finished_jobs + .retain(|x| x.table_to_create() != Some(id)); + self.actor_map.retain(|_, table_id| *table_id != id); } /// Add a new create-mview DDL command to track. @@ -496,7 +508,7 @@ impl CreateMviewProgressTracker { table_id ); - // Clean-up the mapping from actors to DDL epoch. + // Clean-up the mapping from actors to DDL table_id. for actor in o.get().0.actors() { self.actor_map.remove(&actor); } From c5f90144469f9781e1a76ada368b1221eca935cb Mon Sep 17 00:00:00 2001 From: Eric Fu Date: Fri, 23 Feb 2024 12:35:20 +0800 Subject: [PATCH 15/35] chore: fix `risedev kill` (#15191) --- Makefile.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile.toml b/Makefile.toml index 983b304d74e51..8820acf67c7bd 100644 --- a/Makefile.toml +++ b/Makefile.toml @@ -757,10 +757,10 @@ tmux list-windows -t risedev -F "#{window_name} #{pane_id}" \ if [[ -n $(tmux list-windows -t risedev | grep kafka) ]]; then echo "kill kafka" - kill_kafka + kill_kafka || true echo "kill zookeeper" - kill_zookeeper + kill_zookeeper || true # Kill their tmux sessions tmux list-windows -t risedev -F "#{pane_id}" | xargs -I {} tmux send-keys -t {} C-c C-d From 86df42b0252a7c6d3adb5c17e4aab60fa884df4a Mon Sep 17 00:00:00 2001 From: Eric Fu Date: Fri, 23 Feb 2024 12:40:58 +0800 Subject: [PATCH 16/35] chore: bump version of spotless plugin (#15203) --- .../main/java/com/risingwave/connector/CassandraConfig.java | 1 + java/pom.xml | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/java/connector-node/risingwave-sink-cassandra/src/main/java/com/risingwave/connector/CassandraConfig.java b/java/connector-node/risingwave-sink-cassandra/src/main/java/com/risingwave/connector/CassandraConfig.java index 9ac3d257b2bad..7c883335cfc23 100644 --- a/java/connector-node/risingwave-sink-cassandra/src/main/java/com/risingwave/connector/CassandraConfig.java +++ b/java/connector-node/risingwave-sink-cassandra/src/main/java/com/risingwave/connector/CassandraConfig.java @@ -23,6 +23,7 @@ public class CassandraConfig extends CommonSinkConfig { /** Required */ private String type; + /** Required */ private String url; diff --git a/java/pom.xml b/java/pom.xml index 5f168c48bd9ef..c6e39b34cfc0b 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -69,7 +69,7 @@ 1.53.0 2.10 0.1.0-SNAPSHOT - 2.27.1 + 2.43.0 2.20.0 2.0.9 1.5.0 @@ -391,7 +391,7 @@ - 1.7 + 1.20.0 From 0c329e9aec39d73635f4af7d87ff2763a6fb9ae5 Mon Sep 17 00:00:00 2001 From: Dylan Date: Fri, 23 Feb 2024 12:53:25 +0800 Subject: [PATCH 17/35] feat(frontend): support create iceberg source (#14971) --- proto/plan_common.proto | 2 + src/connector/src/macros.rs | 3 +- src/connector/src/sink/catalog/mod.rs | 9 +- src/connector/src/sink/iceberg/mod.rs | 2 +- src/connector/src/source/base.rs | 2 +- src/connector/src/source/iceberg/mod.rs | 128 ++++++++++++++++++ src/connector/src/source/mod.rs | 1 + src/connector/with_options_source.yaml | 29 ++++ .../src/handler/alter_source_with_sr.rs | 2 + src/frontend/src/handler/create_sink.rs | 4 +- src/frontend/src/handler/create_source.rs | 112 +++++++++++++-- src/frontend/src/handler/create_table.rs | 23 +++- src/frontend/src/handler/util.rs | 14 ++ .../src/optimizer/plan_node/logical_source.rs | 15 +- src/sqlparser/src/ast/statement.rs | 26 ++++ 15 files changed, 343 insertions(+), 29 deletions(-) create mode 100644 src/connector/src/source/iceberg/mod.rs diff --git a/proto/plan_common.proto b/proto/plan_common.proto index 82f9fbc63a0f8..1dd45ad08a6ef 100644 --- a/proto/plan_common.proto +++ b/proto/plan_common.proto @@ -136,6 +136,7 @@ enum FormatType { FORMAT_TYPE_CANAL = 5; FORMAT_TYPE_UPSERT = 6; FORMAT_TYPE_PLAIN = 7; + FORMAT_TYPE_NONE = 8; } enum EncodeType { @@ -147,6 +148,7 @@ enum EncodeType { ENCODE_TYPE_JSON = 5; ENCODE_TYPE_BYTES = 6; ENCODE_TYPE_TEMPLATE = 7; + ENCODE_TYPE_NONE = 8; } enum RowFormatType { diff --git a/src/connector/src/macros.rs b/src/connector/src/macros.rs index 9a2383dbb4a96..e34171717ae6c 100644 --- a/src/connector/src/macros.rs +++ b/src/connector/src/macros.rs @@ -36,7 +36,8 @@ macro_rules! for_all_classified_sources { { Gcs, $crate::source::filesystem::opendal_source::GcsProperties , $crate::source::filesystem::OpendalFsSplit<$crate::source::filesystem::opendal_source::OpendalGcs> }, { OpendalS3, $crate::source::filesystem::opendal_source::OpendalS3Properties, $crate::source::filesystem::OpendalFsSplit<$crate::source::filesystem::opendal_source::OpendalS3> }, { PosixFs, $crate::source::filesystem::opendal_source::PosixFsProperties, $crate::source::filesystem::OpendalFsSplit<$crate::source::filesystem::opendal_source::OpendalPosixFs> }, - { Test, $crate::source::test_source::TestSourceProperties, $crate::source::test_source::TestSourceSplit} + { Test, $crate::source::test_source::TestSourceProperties, $crate::source::test_source::TestSourceSplit}, + { Iceberg, $crate::source::iceberg::IcebergProperties, $crate::source::iceberg::IcebergSplit} } $( ,$extra_args diff --git a/src/connector/src/sink/catalog/mod.rs b/src/connector/src/sink/catalog/mod.rs index d4e38cac4d1c9..e6a654f75a5fd 100644 --- a/src/connector/src/sink/catalog/mod.rs +++ b/src/connector/src/sink/catalog/mod.rs @@ -205,7 +205,12 @@ impl TryFrom for SinkFormatDesc { F::Plain => SinkFormat::AppendOnly, F::Upsert => SinkFormat::Upsert, F::Debezium => SinkFormat::Debezium, - f @ (F::Unspecified | F::Native | F::DebeziumMongo | F::Maxwell | F::Canal) => { + f @ (F::Unspecified + | F::Native + | F::DebeziumMongo + | F::Maxwell + | F::Canal + | F::None) => { return Err(SinkError::Config(anyhow!( "sink format unsupported: {}", f.as_str_name() @@ -217,7 +222,7 @@ impl TryFrom for SinkFormatDesc { E::Protobuf => SinkEncode::Protobuf, E::Template => SinkEncode::Template, E::Avro => SinkEncode::Avro, - e @ (E::Unspecified | E::Native | E::Csv | E::Bytes) => { + e @ (E::Unspecified | E::Native | E::Csv | E::Bytes | E::None) => { return Err(SinkError::Config(anyhow!( "sink encode unsupported: {}", e.as_str_name() diff --git a/src/connector/src/sink/iceberg/mod.rs b/src/connector/src/sink/iceberg/mod.rs index 68c5654533a64..326f8586d76eb 100644 --- a/src/connector/src/sink/iceberg/mod.rs +++ b/src/connector/src/sink/iceberg/mod.rs @@ -927,7 +927,7 @@ impl SinkCommitCoordinator for IcebergSinkCommitter { } /// Try to match our schema with iceberg schema. -fn try_matches_arrow_schema(rw_schema: &Schema, arrow_schema: &ArrowSchema) -> Result<()> { +pub fn try_matches_arrow_schema(rw_schema: &Schema, arrow_schema: &ArrowSchema) -> Result<()> { if rw_schema.fields.len() != arrow_schema.fields().len() { return Err(SinkError::Iceberg(anyhow!( "Schema length not match, ours is {}, and iceberg is {}", diff --git a/src/connector/src/source/base.rs b/src/connector/src/source/base.rs index 5b909a2738f3c..fed8e0263aac4 100644 --- a/src/connector/src/source/base.rs +++ b/src/connector/src/source/base.rs @@ -150,7 +150,7 @@ pub struct SourceEnumeratorContext { pub connector_client: Option, } -#[derive(Clone, Copy, Debug, Default)] +#[derive(Clone, Debug, Default)] pub struct SourceEnumeratorInfo { pub source_id: u32, } diff --git a/src/connector/src/source/iceberg/mod.rs b/src/connector/src/source/iceberg/mod.rs new file mode 100644 index 0000000000000..e274f639f15b2 --- /dev/null +++ b/src/connector/src/source/iceberg/mod.rs @@ -0,0 +1,128 @@ +// Copyright 2024 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::collections::HashMap; + +use async_trait::async_trait; +use risingwave_common::types::JsonbVal; +use serde::{Deserialize, Serialize}; + +use crate::parser::ParserConfig; +use crate::source::{ + BoxChunkSourceStream, Column, SourceContextRef, SourceEnumeratorContextRef, SourceProperties, + SplitEnumerator, SplitId, SplitMetaData, SplitReader, UnknownFields, +}; + +pub const ICEBERG_CONNECTOR: &str = "iceberg"; + +#[derive(Clone, Debug, Deserialize, PartialEq, with_options::WithOptions)] +pub struct IcebergProperties { + #[serde(rename = "catalog.type")] + pub catalog_type: String, + #[serde(rename = "s3.region")] + pub region_name: String, + #[serde(rename = "s3.endpoint", default)] + pub endpoint: String, + #[serde(rename = "s3.access.key", default)] + pub s3_access: String, + #[serde(rename = "s3.secret.key", default)] + pub s3_secret: String, + #[serde(rename = "warehouse.path")] + pub warehouse_path: String, + #[serde(rename = "database.name")] + pub database_name: String, + #[serde(rename = "table.name")] + pub table_name: String, + + #[serde(flatten)] + pub unknown_fields: HashMap, +} + +impl SourceProperties for IcebergProperties { + type Split = IcebergSplit; + type SplitEnumerator = IcebergSplitEnumerator; + type SplitReader = IcebergFileReader; + + const SOURCE_NAME: &'static str = ICEBERG_CONNECTOR; +} + +impl UnknownFields for IcebergProperties { + fn unknown_fields(&self) -> HashMap { + self.unknown_fields.clone() + } +} + +#[derive(Debug, Clone, Eq, PartialEq, Hash, Serialize, Deserialize)] +pub struct IcebergSplit {} + +impl SplitMetaData for IcebergSplit { + fn id(&self) -> SplitId { + unimplemented!() + } + + fn restore_from_json(_value: JsonbVal) -> anyhow::Result { + unimplemented!() + } + + fn encode_to_json(&self) -> JsonbVal { + unimplemented!() + } + + fn update_with_offset(&mut self, _start_offset: String) -> anyhow::Result<()> { + unimplemented!() + } +} + +#[derive(Debug, Clone)] +pub struct IcebergSplitEnumerator {} + +#[async_trait] +impl SplitEnumerator for IcebergSplitEnumerator { + type Properties = IcebergProperties; + type Split = IcebergSplit; + + async fn new( + _properties: Self::Properties, + _context: SourceEnumeratorContextRef, + ) -> anyhow::Result { + Ok(Self {}) + } + + async fn list_splits(&mut self) -> anyhow::Result> { + Ok(vec![]) + } +} + +#[derive(Debug)] +pub struct IcebergFileReader {} + +#[async_trait] +impl SplitReader for IcebergFileReader { + type Properties = IcebergProperties; + type Split = IcebergSplit; + + async fn new( + _props: IcebergProperties, + _splits: Vec, + _parser_config: ParserConfig, + _source_ctx: SourceContextRef, + _columns: Option>, + ) -> anyhow::Result { + unimplemented!() + } + + fn into_stream(self) -> BoxChunkSourceStream { + unimplemented!() + } +} diff --git a/src/connector/src/source/mod.rs b/src/connector/src/source/mod.rs index cba63b3005c1a..3656820ed95b0 100644 --- a/src/connector/src/source/mod.rs +++ b/src/connector/src/source/mod.rs @@ -31,6 +31,7 @@ pub use kafka::KAFKA_CONNECTOR; pub use kinesis::KINESIS_CONNECTOR; pub use nats::NATS_CONNECTOR; mod common; +pub mod iceberg; mod manager; pub mod reader; pub mod test_source; diff --git a/src/connector/with_options_source.yaml b/src/connector/with_options_source.yaml index 2d811ce639c96..dec3cf6a8941a 100644 --- a/src/connector/with_options_source.yaml +++ b/src/connector/with_options_source.yaml @@ -33,6 +33,35 @@ GcsProperties: field_type: String required: false default: Default::default +IcebergProperties: + fields: + - name: catalog.type + field_type: String + required: true + - name: s3.region + field_type: String + required: true + - name: s3.endpoint + field_type: String + required: false + default: Default::default + - name: s3.access.key + field_type: String + required: false + default: Default::default + - name: s3.secret.key + field_type: String + required: false + default: Default::default + - name: warehouse.path + field_type: String + required: true + - name: database.name + field_type: String + required: true + - name: table.name + field_type: String + required: true KafkaProperties: fields: - name: bytes.per.second diff --git a/src/frontend/src/handler/alter_source_with_sr.rs b/src/frontend/src/handler/alter_source_with_sr.rs index a8e6892e5a908..06bb2d0387479 100644 --- a/src/frontend/src/handler/alter_source_with_sr.rs +++ b/src/frontend/src/handler/alter_source_with_sr.rs @@ -42,6 +42,7 @@ fn format_type_to_format(from: FormatType) -> Option { FormatType::Canal => Format::Canal, FormatType::Upsert => Format::Upsert, FormatType::Plain => Format::Plain, + FormatType::None => Format::None, }) } @@ -55,6 +56,7 @@ fn encode_type_to_encode(from: EncodeType) -> Option { EncodeType::Json => Encode::Json, EncodeType::Bytes => Encode::Bytes, EncodeType::Template => Encode::Template, + EncodeType::None => Encode::None, }) } diff --git a/src/frontend/src/handler/create_sink.rs b/src/frontend/src/handler/create_sink.rs index 830253675c1bd..245976bd913b9 100644 --- a/src/frontend/src/handler/create_sink.rs +++ b/src/frontend/src/handler/create_sink.rs @@ -701,7 +701,7 @@ fn bind_sink_format_desc(value: ConnectorSchema) -> Result { F::Plain => SinkFormat::AppendOnly, F::Upsert => SinkFormat::Upsert, F::Debezium => SinkFormat::Debezium, - f @ (F::Native | F::DebeziumMongo | F::Maxwell | F::Canal) => { + f @ (F::Native | F::DebeziumMongo | F::Maxwell | F::Canal | F::None) => { return Err(ErrorCode::BindError(format!("sink format unsupported: {f}")).into()); } }; @@ -710,7 +710,7 @@ fn bind_sink_format_desc(value: ConnectorSchema) -> Result { E::Protobuf => SinkEncode::Protobuf, E::Avro => SinkEncode::Avro, E::Template => SinkEncode::Template, - e @ (E::Native | E::Csv | E::Bytes) => { + e @ (E::Native | E::Csv | E::Bytes | E::None) => { return Err(ErrorCode::BindError(format!("sink encode unsupported: {e}")).into()); } }; diff --git a/src/frontend/src/handler/create_source.rs b/src/frontend/src/handler/create_source.rs index 0fb4d1cd022f4..bbb2d93b21790 100644 --- a/src/frontend/src/handler/create_source.rs +++ b/src/frontend/src/handler/create_source.rs @@ -16,13 +16,13 @@ use std::collections::{BTreeMap, HashMap}; use std::rc::Rc; use std::sync::LazyLock; -use anyhow::Context; +use anyhow::{anyhow, Context}; use either::Either; use itertools::Itertools; use maplit::{convert_args, hashmap}; use pgwire::pg_response::{PgResponse, StatementType}; use risingwave_common::catalog::{ - is_column_ids_dedup, ColumnCatalog, ColumnDesc, TableId, INITIAL_SOURCE_VERSION_ID, + is_column_ids_dedup, ColumnCatalog, ColumnDesc, Schema, TableId, INITIAL_SOURCE_VERSION_ID, KAFKA_TIMESTAMP_COLUMN_NAME, }; use risingwave_common::types::DataType; @@ -36,17 +36,20 @@ use risingwave_connector::parser::{ use risingwave_connector::schema::schema_registry::{ name_strategy_from_str, SchemaRegistryAuth, SCHEMA_REGISTRY_PASSWORD, SCHEMA_REGISTRY_USERNAME, }; +use risingwave_connector::sink::iceberg::IcebergConfig; use risingwave_connector::source::cdc::external::CdcTableType; use risingwave_connector::source::cdc::{ CDC_SHARING_MODE_KEY, CDC_SNAPSHOT_BACKFILL, CDC_SNAPSHOT_MODE_KEY, CDC_TRANSACTIONAL_KEY, CITUS_CDC_CONNECTOR, MYSQL_CDC_CONNECTOR, POSTGRES_CDC_CONNECTOR, }; use risingwave_connector::source::datagen::DATAGEN_CONNECTOR; +use risingwave_connector::source::iceberg::ICEBERG_CONNECTOR; use risingwave_connector::source::nexmark::source::{get_event_data_types_with_names, EventType}; use risingwave_connector::source::test_source::TEST_CONNECTOR; use risingwave_connector::source::{ - GCS_CONNECTOR, GOOGLE_PUBSUB_CONNECTOR, KAFKA_CONNECTOR, KINESIS_CONNECTOR, NATS_CONNECTOR, - NEXMARK_CONNECTOR, OPENDAL_S3_CONNECTOR, POSIX_FS_CONNECTOR, PULSAR_CONNECTOR, S3_CONNECTOR, + ConnectorProperties, GCS_CONNECTOR, GOOGLE_PUBSUB_CONNECTOR, KAFKA_CONNECTOR, + KINESIS_CONNECTOR, NATS_CONNECTOR, NEXMARK_CONNECTOR, OPENDAL_S3_CONNECTOR, POSIX_FS_CONNECTOR, + PULSAR_CONNECTOR, S3_CONNECTOR, }; use risingwave_pb::catalog::{ PbSchemaRegistryNameStrategy, PbSource, StreamSourceInfo, WatermarkDesc, @@ -72,7 +75,7 @@ use crate::handler::create_table::{ ensure_table_constraints_supported, ColumnIdGenerator, }; use crate::handler::util::{ - get_connector, is_cdc_connector, is_kafka_connector, SourceSchemaCompatExt, + connector_need_pk, get_connector, is_cdc_connector, is_kafka_connector, SourceSchemaCompatExt, }; use crate::handler::HandlerArgs; use crate::optimizer::plan_node::generic::SourceNodeKind; @@ -316,6 +319,7 @@ pub(crate) async fn bind_columns_from_source( let columns = match (&source_schema.format, &source_schema.row_encode) { (Format::Native, Encode::Native) + | (Format::None, Encode::None) | (Format::Plain, Encode::Bytes) | (Format::DebeziumMongo, Encode::Json) => None, (Format::Plain, Encode::Protobuf) => { @@ -706,7 +710,9 @@ pub(crate) async fn bind_source_pk( .collect_vec(); let res = match (&source_schema.format, &source_schema.row_encode) { - (Format::Native, Encode::Native) | (Format::Plain, _) => sql_defined_pk_names, + (Format::Native, Encode::Native) | (Format::None, Encode::None) | (Format::Plain, _) => { + sql_defined_pk_names + } // For all Upsert formats, we only accept one and only key column as primary key. // Additional KEY columns must be set in this case and must be primary key. @@ -977,6 +983,9 @@ static CONNECTORS_COMPATIBLE_FORMATS: LazyLock hashmap!( Format::Plain => vec![Encode::Json], + ), + ICEBERG_CONNECTOR => hashmap!( + Format::None => vec![Encode::None], ) )) }); @@ -1054,12 +1063,11 @@ pub fn validate_compatibility( } /// Performs early stage checking in frontend to see if the schema of the given `columns` is -/// compatible with the connector extracted from the properties. Currently this only works for -/// `nexmark` connector since it's in chunk format. +/// compatible with the connector extracted from the properties. /// /// One should only call this function after all properties of all columns are resolved, like /// generated column descriptors. -pub(super) fn check_source_schema( +pub(super) async fn check_source_schema( props: &HashMap, row_id_index: Option, columns: &[ColumnCatalog], @@ -1068,10 +1076,22 @@ pub(super) fn check_source_schema( return Ok(()); }; - if connector != NEXMARK_CONNECTOR { - return Ok(()); + if connector == NEXMARK_CONNECTOR { + check_nexmark_schema(props, row_id_index, columns) + } else if connector == ICEBERG_CONNECTOR { + Ok(check_iceberg_source(props, columns) + .await + .map_err(|err| ProtocolError(err.to_string()))?) + } else { + Ok(()) } +} +pub(super) fn check_nexmark_schema( + props: &HashMap, + row_id_index: Option, + columns: &[ColumnCatalog], +) -> Result<()> { let table_type = props .get("nexmark.table.type") .map(|t| t.to_ascii_lowercase()); @@ -1121,6 +1141,68 @@ pub(super) fn check_source_schema( Ok(()) } +pub async fn check_iceberg_source( + props: &HashMap, + columns: &[ColumnCatalog], +) -> anyhow::Result<()> { + let props = ConnectorProperties::extract(props.clone(), true)?; + let ConnectorProperties::Iceberg(properties) = props else { + return Err(anyhow!(format!( + "Invalid properties for iceberg source: {:?}", + props + ))); + }; + + let iceberg_config = IcebergConfig { + database_name: properties.database_name, + table_name: properties.table_name, + catalog_type: Some(properties.catalog_type), + path: properties.warehouse_path, + endpoint: Some(properties.endpoint), + access_key: properties.s3_access, + secret_key: properties.s3_secret, + region: Some(properties.region_name), + ..Default::default() + }; + + let schema = Schema { + fields: columns + .iter() + .cloned() + .map(|c| c.column_desc.into()) + .collect(), + }; + + let table = iceberg_config.load_table().await?; + + let iceberg_schema: arrow_schema::Schema = table + .current_table_metadata() + .current_schema()? + .clone() + .try_into()?; + + for f1 in schema.fields() { + if !iceberg_schema.fields.iter().any(|f2| f2.name() == &f1.name) { + return Err(anyhow::anyhow!(format!( + "Column {} not found in iceberg table", + f1.name + ))); + } + } + + let new_iceberg_field = iceberg_schema + .fields + .iter() + .filter(|f1| schema.fields.iter().any(|f2| f1.name() == &f2.name)) + .cloned() + .collect::>(); + let new_iceberg_schema = arrow_schema::Schema::new(new_iceberg_field); + + risingwave_connector::sink::iceberg::try_matches_arrow_schema(&schema, &new_iceberg_schema)?; + + Ok(()) +} + pub async fn handle_create_source( handler_args: HandlerArgs, stmt: CreateSourceStatement, @@ -1215,8 +1297,8 @@ pub async fn handle_create_source( ) .into()); } - - let (mut columns, pk_column_ids, row_id_index) = bind_pk_on_relation(columns, pk_names)?; + let (mut columns, pk_column_ids, row_id_index) = + bind_pk_on_relation(columns, pk_names, connector_need_pk(&with_properties))?; debug_assert!(is_column_ids_dedup(&columns)); @@ -1233,7 +1315,7 @@ pub async fn handle_create_source( &pk_column_ids, )?; - check_source_schema(&with_properties, row_id_index, &columns)?; + check_source_schema(&with_properties, row_id_index, &columns).await?; let pk_column_ids = pk_column_ids.into_iter().map(Into::into).collect(); @@ -1310,6 +1392,7 @@ fn format_to_prost(format: &Format) -> FormatType { Format::DebeziumMongo => FormatType::DebeziumMongo, Format::Maxwell => FormatType::Maxwell, Format::Canal => FormatType::Canal, + Format::None => FormatType::None, } } fn row_encode_to_prost(row_encode: &Encode) -> EncodeType { @@ -1321,6 +1404,7 @@ fn row_encode_to_prost(row_encode: &Encode) -> EncodeType { Encode::Csv => EncodeType::Csv, Encode::Bytes => EncodeType::Bytes, Encode::Template => EncodeType::Template, + Encode::None => EncodeType::None, } } diff --git a/src/frontend/src/handler/create_table.rs b/src/frontend/src/handler/create_table.rs index 8fc30c2c30e19..7fc757b71b6b7 100644 --- a/src/frontend/src/handler/create_table.rs +++ b/src/frontend/src/handler/create_table.rs @@ -61,6 +61,7 @@ use crate::handler::create_source::{ bind_all_columns, bind_columns_from_source, bind_source_pk, bind_source_watermark, check_source_schema, handle_addition_columns, validate_compatibility, UPSTREAM_SOURCE_KEY, }; +use crate::handler::util::is_iceberg_connector; use crate::handler::HandlerArgs; use crate::optimizer::plan_node::generic::SourceNodeKind; use crate::optimizer::plan_node::{LogicalCdcScan, LogicalSource}; @@ -411,6 +412,7 @@ fn multiple_pk_definition_err() -> RwError { pub fn bind_pk_on_relation( mut columns: Vec, pk_names: Vec, + must_need_pk: bool, ) -> Result<(Vec, Vec, Option)> { for c in &columns { assert!(c.column_id() != ColumnId::placeholder()); @@ -431,8 +433,10 @@ pub fn bind_pk_on_relation( }) .try_collect()?; - // Add `_row_id` column if `pk_column_ids` is empty. - let row_id_index = pk_column_ids.is_empty().then(|| { + // Add `_row_id` column if `pk_column_ids` is empty and must_need_pk + let need_row_id = pk_column_ids.is_empty() && must_need_pk; + + let row_id_index = need_row_id.then(|| { let column = ColumnCatalog::row_id_column(); let index = columns.len(); pk_column_ids = vec![column.column_id()]; @@ -510,7 +514,12 @@ pub(crate) async fn gen_create_table_plan_with_source( c.column_desc.column_id = col_id_gen.generate(c.name()) } - let (mut columns, pk_column_ids, row_id_index) = bind_pk_on_relation(columns, pk_names)?; + if is_iceberg_connector(&with_properties) { + return Err( + ErrorCode::BindError("can't create table with iceberg connector".to_string()).into(), + ); + } + let (mut columns, pk_column_ids, row_id_index) = bind_pk_on_relation(columns, pk_names, true)?; let watermark_descs = bind_source_watermark( session, @@ -531,7 +540,7 @@ pub(crate) async fn gen_create_table_plan_with_source( &pk_column_ids, )?; - check_source_schema(&with_properties, row_id_index, &columns)?; + check_source_schema(&with_properties, row_id_index, &columns).await?; gen_table_plan_inner( context.into(), @@ -594,7 +603,7 @@ pub(crate) fn gen_create_table_plan_without_bind( ) -> Result<(PlanRef, Option, PbTable)> { ensure_table_constraints_supported(&constraints)?; let pk_names = bind_sql_pk_names(&column_defs, &constraints)?; - let (mut columns, pk_column_ids, row_id_index) = bind_pk_on_relation(columns, pk_names)?; + let (mut columns, pk_column_ids, row_id_index) = bind_pk_on_relation(columns, pk_names, true)?; let watermark_descs = bind_source_watermark( context.session_ctx(), @@ -774,7 +783,7 @@ pub(crate) fn gen_create_table_plan_for_cdc_source( } let pk_names = bind_sql_pk_names(&column_defs, &constraints)?; - let (columns, pk_column_ids, _) = bind_pk_on_relation(columns, pk_names)?; + let (columns, pk_column_ids, _) = bind_pk_on_relation(columns, pk_names, true)?; let definition = context.normalized_sql().to_owned(); @@ -1275,7 +1284,7 @@ mod tests { } ensure_table_constraints_supported(&constraints)?; let pk_names = bind_sql_pk_names(&column_defs, &constraints)?; - let (_, pk_column_ids, _) = bind_pk_on_relation(columns, pk_names)?; + let (_, pk_column_ids, _) = bind_pk_on_relation(columns, pk_names, true)?; Ok(pk_column_ids) })(); match (expected, actual) { diff --git a/src/frontend/src/handler/util.rs b/src/frontend/src/handler/util.rs index 1e49ee8baf540..ab9d4fe415b33 100644 --- a/src/frontend/src/handler/util.rs +++ b/src/frontend/src/handler/util.rs @@ -31,6 +31,7 @@ use risingwave_common::catalog::Field; use risingwave_common::row::Row as _; use risingwave_common::types::{DataType, ScalarRefImpl, Timestamptz}; use risingwave_common::util::iter_util::ZipEqFast; +use risingwave_connector::source::iceberg::ICEBERG_CONNECTOR; use risingwave_connector::source::KAFKA_CONNECTOR; use risingwave_sqlparser::ast::{CompatibleSourceSchema, ConnectorSchema}; @@ -180,6 +181,11 @@ pub fn to_pg_field(f: &Field) -> PgFieldDescriptor { ) } +pub fn connector_need_pk(with_properties: &HashMap) -> bool { + // Currently only iceberg connector doesn't need primary key + !is_iceberg_connector(with_properties) +} + #[inline(always)] pub fn get_connector(with_properties: &HashMap) -> Option { with_properties @@ -204,6 +210,14 @@ pub fn is_cdc_connector(with_properties: &HashMap) -> bool { connector.contains("-cdc") } +#[inline(always)] +pub fn is_iceberg_connector(with_properties: &HashMap) -> bool { + let Some(connector) = get_connector(with_properties) else { + return false; + }; + connector == ICEBERG_CONNECTOR +} + #[easy_ext::ext(SourceSchemaCompatExt)] impl CompatibleSourceSchema { /// Convert `self` to [`ConnectorSchema`] and warn the user if the syntax is deprecated. diff --git a/src/frontend/src/optimizer/plan_node/logical_source.rs b/src/frontend/src/optimizer/plan_node/logical_source.rs index fa7ad908d01d4..43ec6d2a89de8 100644 --- a/src/frontend/src/optimizer/plan_node/logical_source.rs +++ b/src/frontend/src/optimizer/plan_node/logical_source.rs @@ -23,7 +23,8 @@ use risingwave_common::bail_not_implemented; use risingwave_common::catalog::{ ColumnCatalog, ColumnDesc, Field, Schema, KAFKA_TIMESTAMP_COLUMN_NAME, }; -use risingwave_connector::source::DataType; +use risingwave_connector::source::iceberg::ICEBERG_CONNECTOR; +use risingwave_connector::source::{DataType, UPSTREAM_SOURCE_KEY}; use risingwave_pb::plan_common::column_desc::GeneratedOrDefaultColumn; use risingwave_pb::plan_common::GeneratedColumnDesc; @@ -546,6 +547,18 @@ impl ToStream for LogicalSource { } } } + if let Some(source) = &self.core.catalog { + let connector = &source + .with_properties + .get(UPSTREAM_SOURCE_KEY) + .map(|s| s.to_lowercase()) + .unwrap(); + if ICEBERG_CONNECTOR == connector { + return Err( + anyhow::anyhow!("Iceberg source is not supported in stream queries").into(), + ); + } + } Ok(plan) } diff --git a/src/sqlparser/src/ast/statement.rs b/src/sqlparser/src/ast/statement.rs index 3dd923b610542..e876a197c265d 100644 --- a/src/sqlparser/src/ast/statement.rs +++ b/src/sqlparser/src/ast/statement.rs @@ -94,6 +94,7 @@ pub struct CreateSourceStatement { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub enum Format { Native, + None, // Keyword::NONE Debezium, // Keyword::DEBEZIUM DebeziumMongo, // Keyword::DEBEZIUM_MONGO Maxwell, // Keyword::MAXWELL @@ -116,6 +117,7 @@ impl fmt::Display for Format { Format::Canal => "CANAL", Format::Upsert => "UPSERT", Format::Plain => "PLAIN", + Format::None => "NONE", } ) } @@ -149,6 +151,7 @@ pub enum Encode { Protobuf, // Keyword::PROTOBUF Json, // Keyword::JSON Bytes, // Keyword::BYTES + None, // Keyword::None Native, Template, } @@ -167,6 +170,7 @@ impl fmt::Display for Encode { Encode::Bytes => "BYTES", Encode::Native => "NATIVE", Encode::Template => "TEMPLATE", + Encode::None => "NONE", } ) } @@ -249,6 +253,18 @@ impl Parser { } else { ConnectorSchema::native().into() }) + } else if connector.contains("iceberg") { + let expected = ConnectorSchema::none(); + if self.peek_source_schema_format() { + let schema = parse_source_schema(self)?.into_v2(); + if schema != expected { + return Err(ParserError::ParserError(format!( + "Row format for iceberg connectors should be \ + either omitted or set to `{expected}`", + ))); + } + } + Ok(expected.into()) } else { Ok(parse_source_schema(self)?) } @@ -304,6 +320,16 @@ impl ConnectorSchema { } } + /// Create a new source schema with `None` format and encoding. + /// Used for self-explanatory source like iceberg. + pub const fn none() -> Self { + ConnectorSchema { + format: Format::None, + row_encode: Encode::None, + row_options: Vec::new(), + } + } + pub fn row_options(&self) -> &[SqlOption] { self.row_options.as_ref() } From c6ed6d14aff2644c341868757d6fcf8abb41b64e Mon Sep 17 00:00:00 2001 From: Shanicky Chen Date: Fri, 23 Feb 2024 12:58:20 +0800 Subject: [PATCH 18/35] feat: try to reduce memory usage in scaling (#15193) Signed-off-by: Shanicky Chen --- src/meta/src/stream/scale.rs | 220 +++++++++++++++++++++++++----- src/meta/src/stream/test_scale.rs | 8 +- 2 files changed, 188 insertions(+), 40 deletions(-) diff --git a/src/meta/src/stream/scale.rs b/src/meta/src/stream/scale.rs index 7f40f8e3da033..0e571a0afebf7 100644 --- a/src/meta/src/stream/scale.rs +++ b/src/meta/src/stream/scale.rs @@ -31,15 +31,19 @@ use risingwave_common::catalog::TableId; use risingwave_common::hash::{ActorMapping, ParallelUnitId, VirtualNode}; use risingwave_common::util::iter_util::ZipEqDebug; use risingwave_meta_model_v2::StreamingParallelism; -use risingwave_pb::common::{ActorInfo, ParallelUnit, WorkerNode}; +use risingwave_pb::common::{ActorInfo, Buffer, ParallelUnit, ParallelUnitMapping, WorkerNode}; use risingwave_pb::meta::get_reschedule_plan_request::{Policy, StableResizePolicy}; use risingwave_pb::meta::subscribe_response::{Info, Operation}; use risingwave_pb::meta::table_fragments::actor_status::ActorState; -use risingwave_pb::meta::table_fragments::fragment::FragmentDistributionType; -use risingwave_pb::meta::table_fragments::{self, ActorStatus, Fragment, State}; +use risingwave_pb::meta::table_fragments::fragment::{ + FragmentDistributionType, PbFragmentDistributionType, +}; +use risingwave_pb::meta::table_fragments::{self, ActorStatus, PbFragment, State}; use risingwave_pb::meta::FragmentParallelUnitMappings; use risingwave_pb::stream_plan::stream_node::NodeBody; -use risingwave_pb::stream_plan::{DispatcherType, FragmentTypeFlag, StreamActor, StreamNode}; +use risingwave_pb::stream_plan::{ + Dispatcher, DispatcherType, FragmentTypeFlag, PbStreamActor, StreamNode, +}; use thiserror_ext::AsReport; use tokio::sync::oneshot::Receiver; use tokio::sync::{oneshot, RwLock, RwLockReadGuard, RwLockWriteGuard}; @@ -105,15 +109,85 @@ pub struct ParallelUnitReschedule { pub removed_parallel_units: BTreeSet, } +pub struct CustomFragmentInfo { + pub fragment_id: u32, + pub fragment_type_mask: u32, + pub distribution_type: PbFragmentDistributionType, + pub vnode_mapping: Option, + pub state_table_ids: Vec, + pub upstream_fragment_ids: Vec, + pub actor_template: PbStreamActor, + pub actors: Vec, +} + +#[derive(Default)] +pub struct CustomActorInfo { + pub actor_id: u32, + pub fragment_id: u32, + pub dispatcher: Vec, + pub upstream_actor_id: Vec, + pub vnode_bitmap: Option, +} + +impl From<&PbStreamActor> for CustomActorInfo { + fn from( + PbStreamActor { + actor_id, + fragment_id, + dispatcher, + upstream_actor_id, + vnode_bitmap, + .. + }: &PbStreamActor, + ) -> Self { + CustomActorInfo { + actor_id: *actor_id, + fragment_id: *fragment_id, + dispatcher: dispatcher.clone(), + upstream_actor_id: upstream_actor_id.clone(), + vnode_bitmap: vnode_bitmap.clone(), + } + } +} + +impl From<&PbFragment> for CustomFragmentInfo { + fn from(fragment: &PbFragment) -> Self { + CustomFragmentInfo { + fragment_id: fragment.fragment_id, + fragment_type_mask: fragment.fragment_type_mask, + distribution_type: fragment.distribution_type(), + vnode_mapping: fragment.vnode_mapping.clone(), + state_table_ids: fragment.state_table_ids.clone(), + upstream_fragment_ids: fragment.upstream_fragment_ids.clone(), + actor_template: fragment + .actors + .first() + .cloned() + .expect("no actor in fragment"), + actors: fragment.actors.iter().map(CustomActorInfo::from).collect(), + } + } +} + +impl CustomFragmentInfo { + pub fn get_fragment_type_mask(&self) -> u32 { + self.fragment_type_mask + } + + pub fn distribution_type(&self) -> FragmentDistributionType { + self.distribution_type + } +} + pub struct RescheduleContext { /// Index used to map `ParallelUnitId` to `WorkerId` parallel_unit_id_to_worker_id: BTreeMap, /// Meta information for all Actors - actor_map: HashMap, + actor_map: HashMap, /// Status of all Actors, used to find the location of the `Actor` actor_status: BTreeMap, /// Meta information of all `Fragment`, used to find the `Fragment`'s `Actor` - fragment_map: HashMap, + fragment_map: HashMap, /// Indexes for all `Worker`s worker_nodes: HashMap, /// Index of all `Actor` upstreams, specific to `Dispatcher` @@ -180,7 +254,7 @@ impl RescheduleContext { /// /// The return value is the bitmap distribution after scaling, which covers all virtual node indexes pub fn rebalance_actor_vnode( - actors: &[StreamActor], + actors: &[CustomActorInfo], actors_to_remove: &BTreeSet, actors_to_create: &BTreeSet, ) -> HashMap { @@ -464,16 +538,29 @@ impl ScaleController { let mut fragment_state = HashMap::new(); let mut fragment_to_table = HashMap::new(); - let all_table_fragments = self.list_all_table_fragments().await?; - - for table_fragments in all_table_fragments { + // We are reusing code for the metadata manager of both V1 and V2, which will be deprecated in the future. + fn fulfill_index_by_table_fragments_ref( + actor_map: &mut HashMap, + fragment_map: &mut HashMap, + actor_status: &mut BTreeMap, + fragment_state: &mut HashMap, + fragment_to_table: &mut HashMap, + table_fragments: &TableFragments, + ) { fragment_state.extend( table_fragments .fragment_ids() .map(|f| (f, table_fragments.state())), ); - fragment_map.extend(table_fragments.fragments.clone()); - actor_map.extend(table_fragments.actor_map()); + + for (fragment_id, fragment) in &table_fragments.fragments { + for actor in &fragment.actors { + actor_map.insert(actor.actor_id, CustomActorInfo::from(actor)); + } + + fragment_map.insert(*fragment_id, CustomFragmentInfo::from(fragment)); + } + actor_status.extend(table_fragments.actor_status.clone()); fragment_to_table.extend( @@ -483,6 +570,37 @@ impl ScaleController { ); } + match &self.metadata_manager { + MetadataManager::V1(mgr) => { + let guard = mgr.fragment_manager.get_fragment_read_guard().await; + + for table_fragments in guard.table_fragments().values() { + fulfill_index_by_table_fragments_ref( + &mut actor_map, + &mut fragment_map, + &mut actor_status, + &mut fragment_state, + &mut fragment_to_table, + table_fragments, + ); + } + } + MetadataManager::V2(_) => { + let all_table_fragments = self.list_all_table_fragments().await?; + + for table_fragments in &all_table_fragments { + fulfill_index_by_table_fragments_ref( + &mut actor_map, + &mut fragment_map, + &mut actor_status, + &mut fragment_state, + &mut fragment_to_table, + table_fragments, + ); + } + } + }; + // NoShuffle relation index let mut no_shuffle_source_fragment_ids = HashSet::new(); let mut no_shuffle_target_fragment_ids = HashSet::new(); @@ -608,7 +726,7 @@ impl ScaleController { } if (fragment.get_fragment_type_mask() & FragmentTypeFlag::Source as u32) != 0 { - let stream_node = fragment.actors.first().unwrap().get_nodes().unwrap(); + let stream_node = fragment.actor_template.nodes.as_ref().unwrap(); if TableFragments::find_stream_source(stream_node).is_some() { stream_source_fragment_ids.insert(*fragment_id); } @@ -698,7 +816,7 @@ impl ScaleController { &self, worker_nodes: &HashMap, actor_infos_to_broadcast: BTreeMap, - node_actors_to_create: HashMap>, + node_actors_to_create: HashMap>, broadcast_worker_ids: HashSet, ) -> MetaResult<()> { self.stream_rpc_manager @@ -963,7 +1081,7 @@ impl ScaleController { for (actor_to_create, sample_actor) in actors_to_create .iter() - .zip_eq_debug(repeat(fragment.actors.first().unwrap()).take(actors_to_create.len())) + .zip_eq_debug(repeat(&fragment.actor_template).take(actors_to_create.len())) { let new_actor_id = actor_to_create.0; let mut new_actor = sample_actor.clone(); @@ -1407,7 +1525,7 @@ impl ScaleController { fragment_actor_bitmap: &HashMap>, no_shuffle_upstream_actor_map: &HashMap>, no_shuffle_downstream_actors_map: &HashMap>, - new_actor: &mut StreamActor, + new_actor: &mut PbStreamActor, ) -> MetaResult<()> { let fragment = &ctx.fragment_map.get(&new_actor.fragment_id).unwrap(); let mut applied_upstream_fragment_actor_ids = HashMap::new(); @@ -1953,8 +2071,6 @@ impl ScaleController { }) .collect::>(); - let all_table_fragments = self.list_all_table_fragments().await?; - // FIXME: only need actor id and dispatcher info, avoid clone it. let mut actor_map = HashMap::new(); let mut actor_status = HashMap::new(); @@ -1962,24 +2078,56 @@ impl ScaleController { let mut fragment_map = HashMap::new(); let mut fragment_parallelism = HashMap::new(); - for table_fragments in all_table_fragments { - for (fragment_id, fragment) in table_fragments.fragments { - fragment - .actors - .iter() - .map(|actor| (actor.actor_id, actor)) - .for_each(|(id, actor)| { - actor_map.insert(id as ActorId, actor.clone()); - }); + // We are reusing code for the metadata manager of both V1 and V2, which will be deprecated in the future. + fn fulfill_index_by_table_fragments_ref( + actor_map: &mut HashMap, + actor_status: &mut HashMap, + fragment_map: &mut HashMap, + fragment_parallelism: &mut HashMap, + table_fragments: &TableFragments, + ) { + for (fragment_id, fragment) in &table_fragments.fragments { + for actor in &fragment.actors { + actor_map.insert(actor.actor_id, CustomActorInfo::from(actor)); + } - fragment_map.insert(fragment_id, fragment); + fragment_map.insert(*fragment_id, CustomFragmentInfo::from(fragment)); - fragment_parallelism.insert(fragment_id, table_fragments.assigned_parallelism); + fragment_parallelism.insert(*fragment_id, table_fragments.assigned_parallelism); } - actor_status.extend(table_fragments.actor_status); + actor_status.extend(table_fragments.actor_status.clone()); } + match &self.metadata_manager { + MetadataManager::V1(mgr) => { + let guard = mgr.fragment_manager.get_fragment_read_guard().await; + + for table_fragments in guard.table_fragments().values() { + fulfill_index_by_table_fragments_ref( + &mut actor_map, + &mut actor_status, + &mut fragment_map, + &mut fragment_parallelism, + table_fragments, + ); + } + } + MetadataManager::V2(_) => { + let all_table_fragments = self.list_all_table_fragments().await?; + + for table_fragments in &all_table_fragments { + fulfill_index_by_table_fragments_ref( + &mut actor_map, + &mut actor_status, + &mut fragment_map, + &mut fragment_parallelism, + table_fragments, + ); + } + } + }; + let mut no_shuffle_source_fragment_ids = HashSet::new(); let mut no_shuffle_target_fragment_ids = HashSet::new(); @@ -2034,7 +2182,7 @@ impl ScaleController { }, ) in fragment_worker_changes { - let fragment = match fragment_map.get(&fragment_id).cloned() { + let fragment = match fragment_map.get(&fragment_id) { None => bail!("Fragment id {} not found", fragment_id), Some(fragment) => fragment, }; @@ -2122,7 +2270,7 @@ impl ScaleController { // then we re-add the limited parallel units from the limited workers target_parallel_unit_ids.extend(limited_worker_parallel_unit_ids.into_iter()); } - match fragment.get_distribution_type().unwrap() { + match fragment.distribution_type() { FragmentDistributionType::Unspecified => unreachable!(), FragmentDistributionType::Single => { let single_parallel_unit_id = @@ -2274,7 +2422,7 @@ impl ScaleController { } pub fn build_no_shuffle_relation_index( - actor_map: &HashMap, + actor_map: &HashMap, no_shuffle_source_fragment_ids: &mut HashSet, no_shuffle_target_fragment_ids: &mut HashSet, ) { @@ -2302,7 +2450,7 @@ impl ScaleController { } pub fn build_fragment_dispatcher_index( - actor_map: &HashMap, + actor_map: &HashMap, fragment_dispatcher_map: &mut HashMap>, ) { for actor in actor_map.values() { @@ -2324,7 +2472,7 @@ impl ScaleController { pub fn resolve_no_shuffle_upstream_tables( fragment_ids: HashSet, - fragment_map: &HashMap, + fragment_map: &HashMap, no_shuffle_source_fragment_ids: &HashSet, no_shuffle_target_fragment_ids: &HashSet, fragment_to_table: &HashMap, @@ -2394,7 +2542,7 @@ impl ScaleController { pub fn resolve_no_shuffle_upstream_fragments( reschedule: &mut HashMap, - fragment_map: &HashMap, + fragment_map: &HashMap, no_shuffle_source_fragment_ids: &HashSet, no_shuffle_target_fragment_ids: &HashSet, ) -> MetaResult<()> diff --git a/src/meta/src/stream/test_scale.rs b/src/meta/src/stream/test_scale.rs index 2db55dbddbd4d..73d59ff52f2f4 100644 --- a/src/meta/src/stream/test_scale.rs +++ b/src/meta/src/stream/test_scale.rs @@ -21,10 +21,10 @@ mod tests { use risingwave_common::buffer::Bitmap; use risingwave_common::hash::{ActorMapping, ParallelUnitId, ParallelUnitMapping, VirtualNode}; use risingwave_pb::common::ParallelUnit; - use risingwave_pb::stream_plan::StreamActor; use crate::model::ActorId; use crate::stream::scale::rebalance_actor_vnode; + use crate::stream::CustomActorInfo; fn simulated_parallel_unit_nums(min: Option, max: Option) -> Vec { let mut raw = vec![1, 3, 12, 42, VirtualNode::COUNT]; @@ -39,13 +39,13 @@ mod tests { raw } - fn build_fake_actors(info: &[(ActorId, ParallelUnitId)]) -> Vec { + fn build_fake_actors(info: &[(ActorId, ParallelUnitId)]) -> Vec { let parallel_units = generate_parallel_units(info); let vnode_bitmaps = ParallelUnitMapping::build(¶llel_units).to_bitmaps(); info.iter() - .map(|(actor_id, parallel_unit_id)| StreamActor { + .map(|(actor_id, parallel_unit_id)| CustomActorInfo { actor_id: *actor_id, vnode_bitmap: vnode_bitmaps .get(parallel_unit_id) @@ -64,7 +64,7 @@ mod tests { .collect_vec() } - fn check_affinity_for_scale_in(bitmap: &Bitmap, actor: &StreamActor) { + fn check_affinity_for_scale_in(bitmap: &Bitmap, actor: &CustomActorInfo) { let prev_bitmap = Bitmap::from(actor.vnode_bitmap.as_ref().unwrap()); for idx in 0..VirtualNode::COUNT { From ea0b01220efe97fa003bcfe05cb63a645aaf5e39 Mon Sep 17 00:00:00 2001 From: Bugen Zhao Date: Fri, 23 Feb 2024 13:30:32 +0800 Subject: [PATCH 19/35] chore: set `buf breaking` rule back to `WIRE_JSON` (#15147) --- proto/buf.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/proto/buf.yaml b/proto/buf.yaml index 1aa31816ce0af..abad30f04506c 100644 --- a/proto/buf.yaml +++ b/proto/buf.yaml @@ -1,7 +1,8 @@ version: v1 breaking: use: - - WIRE # https://docs.buf.build/breaking/rules + - WIRE_JSON # https://docs.buf.build/breaking/rules + # https://github.com/risingwavelabs/risingwave/issues/15030 lint: use: - DEFAULT From b95d9a9098d086eaea781c7d7926aa1bdaa91bee Mon Sep 17 00:00:00 2001 From: Bugen Zhao Date: Fri, 23 Feb 2024 13:30:45 +0800 Subject: [PATCH 20/35] fix(frontend): require primary key for system table (#15126) Signed-off-by: Bugen Zhao --- e2e_test/batch/catalog/pg_settings.slt.part | 8 +++ .../fields-derive/src/gen/test_empty_pk.rs | 29 +++++++++ .../fields-derive/src/gen/test_no_pk.rs | 29 +++++++++ .../fields-derive/src/gen/test_output.rs | 4 +- src/common/fields-derive/src/lib.rs | 61 ++++++++++++++----- src/common/src/types/fields.rs | 11 ++-- src/frontend/macro/src/lib.rs | 6 +- .../system_catalog/pg_catalog/pg_cast.rs | 1 + .../system_catalog/pg_catalog/pg_settings.rs | 1 + .../rw_catalog/rw_hummock_branched_objects.rs | 1 + .../rw_catalog/rw_hummock_pinned_snapshots.rs | 1 + .../rw_catalog/rw_hummock_pinned_versions.rs | 1 + .../rw_catalog/rw_hummock_version.rs | 1 + .../rw_catalog/rw_meta_snapshot.rs | 1 + 14 files changed, 132 insertions(+), 23 deletions(-) create mode 100644 src/common/fields-derive/src/gen/test_empty_pk.rs create mode 100644 src/common/fields-derive/src/gen/test_no_pk.rs diff --git a/e2e_test/batch/catalog/pg_settings.slt.part b/e2e_test/batch/catalog/pg_settings.slt.part index 5f37db11fcb91..c8e927ba72b9f 100644 --- a/e2e_test/batch/catalog/pg_settings.slt.part +++ b/e2e_test/batch/catalog/pg_settings.slt.part @@ -63,6 +63,14 @@ query TT SELECT * FROM pg_catalog.pg_settings where name='dummy'; ---- +# https://github.com/risingwavelabs/risingwave/issues/15125 +query TT +SELECT min(name) name, context FROM pg_catalog.pg_settings GROUP BY context; +---- +application_name user +backup_storage_directory postmaster +block_size_kb internal + # Tab-completion of `SET` command query T SELECT name diff --git a/src/common/fields-derive/src/gen/test_empty_pk.rs b/src/common/fields-derive/src/gen/test_empty_pk.rs new file mode 100644 index 0000000000000..ffb5ff268bed1 --- /dev/null +++ b/src/common/fields-derive/src/gen/test_empty_pk.rs @@ -0,0 +1,29 @@ +impl ::risingwave_common::types::Fields for Data { + const PRIMARY_KEY: Option<&'static [usize]> = Some(&[]); + fn fields() -> Vec<(&'static str, ::risingwave_common::types::DataType)> { + vec![ + ("v1", < i16 as ::risingwave_common::types::WithDataType > + ::default_data_type()), ("v2", < String as + ::risingwave_common::types::WithDataType > ::default_data_type()) + ] + } + fn into_owned_row(self) -> ::risingwave_common::row::OwnedRow { + ::risingwave_common::row::OwnedRow::new( + vec![ + ::risingwave_common::types::ToOwnedDatum::to_owned_datum(self.v1), + ::risingwave_common::types::ToOwnedDatum::to_owned_datum(self.v2) + ], + ) + } +} +impl From for ::risingwave_common::types::ScalarImpl { + fn from(v: Data) -> Self { + ::risingwave_common::types::StructValue::new( + vec![ + ::risingwave_common::types::ToOwnedDatum::to_owned_datum(v.v1), + ::risingwave_common::types::ToOwnedDatum::to_owned_datum(v.v2) + ], + ) + .into() + } +} diff --git a/src/common/fields-derive/src/gen/test_no_pk.rs b/src/common/fields-derive/src/gen/test_no_pk.rs new file mode 100644 index 0000000000000..9e1b3e7892969 --- /dev/null +++ b/src/common/fields-derive/src/gen/test_no_pk.rs @@ -0,0 +1,29 @@ +impl ::risingwave_common::types::Fields for Data { + const PRIMARY_KEY: Option<&'static [usize]> = None; + fn fields() -> Vec<(&'static str, ::risingwave_common::types::DataType)> { + vec![ + ("v1", < i16 as ::risingwave_common::types::WithDataType > + ::default_data_type()), ("v2", < String as + ::risingwave_common::types::WithDataType > ::default_data_type()) + ] + } + fn into_owned_row(self) -> ::risingwave_common::row::OwnedRow { + ::risingwave_common::row::OwnedRow::new( + vec![ + ::risingwave_common::types::ToOwnedDatum::to_owned_datum(self.v1), + ::risingwave_common::types::ToOwnedDatum::to_owned_datum(self.v2) + ], + ) + } +} +impl From for ::risingwave_common::types::ScalarImpl { + fn from(v: Data) -> Self { + ::risingwave_common::types::StructValue::new( + vec![ + ::risingwave_common::types::ToOwnedDatum::to_owned_datum(v.v1), + ::risingwave_common::types::ToOwnedDatum::to_owned_datum(v.v2) + ], + ) + .into() + } +} diff --git a/src/common/fields-derive/src/gen/test_output.rs b/src/common/fields-derive/src/gen/test_output.rs index 517dcdefc7a8c..a804a379bfd4a 100644 --- a/src/common/fields-derive/src/gen/test_output.rs +++ b/src/common/fields-derive/src/gen/test_output.rs @@ -1,4 +1,5 @@ impl ::risingwave_common::types::Fields for Data { + const PRIMARY_KEY: Option<&'static [usize]> = Some(&[1usize, 0usize]); fn fields() -> Vec<(&'static str, ::risingwave_common::types::DataType)> { vec![ ("v1", < i16 as ::risingwave_common::types::WithDataType > @@ -21,9 +22,6 @@ impl ::risingwave_common::types::Fields for Data { ], ) } - fn primary_key() -> &'static [usize] { - &[1usize, 0usize] - } } impl From for ::risingwave_common::types::ScalarImpl { fn from(v: Data) -> Self { diff --git a/src/common/fields-derive/src/lib.rs b/src/common/fields-derive/src/lib.rs index b38f579751683..dae648d1dc343 100644 --- a/src/common/fields-derive/src/lib.rs +++ b/src/common/fields-derive/src/lib.rs @@ -82,16 +82,17 @@ fn gen(tokens: TokenStream) -> Result { .iter() .map(|field| field.ident.as_ref().expect("field no name")) .collect::>(); - let primary_key = get_primary_key(&input).map(|indices| { - quote! { - fn primary_key() -> &'static [usize] { - &[#(#indices),*] - } - } - }); + let primary_key = get_primary_key(&input).map_or_else( + || quote! { None }, + |indices| { + quote! { Some(&[#(#indices),*]) } + }, + ); Ok(quote! { impl ::risingwave_common::types::Fields for #ident { + const PRIMARY_KEY: Option<&'static [usize]> = #primary_key; + fn fields() -> Vec<(&'static str, ::risingwave_common::types::DataType)> { vec![#(#fields_rw),*] } @@ -100,7 +101,6 @@ fn gen(tokens: TokenStream) -> Result { ::risingwave_common::types::ToOwnedDatum::to_owned_datum(self.#names) ),*]) } - #primary_key } impl From<#ident> for ::risingwave_common::types::ScalarImpl { fn from(v: #ident) -> Self { @@ -133,7 +133,9 @@ fn get_primary_key(input: &syn::DeriveInput) -> Option> { return Some( keys.to_string() .split(',') - .map(|s| index(s.trim())) + .map(|s| s.trim()) + .filter(|s| !s.is_empty()) + .map(index) .collect(), ); } @@ -199,6 +201,18 @@ mod tests { prettyplease::unparse(&output) } + fn do_test(code: &str, expected_path: &str) { + let input: TokenStream = str::parse(code).unwrap(); + + let output = super::gen(input).unwrap(); + + let output = pretty_print(output); + + let expected = expect_test::expect_file![expected_path]; + + expected.assert_eq(&output); + } + #[test] fn test_gen() { let code = indoc! {r#" @@ -213,14 +227,33 @@ mod tests { } "#}; - let input: TokenStream = str::parse(code).unwrap(); + do_test(code, "gen/test_output.rs"); + } - let output = super::gen(input).unwrap(); + #[test] + fn test_no_pk() { + let code = indoc! {r#" + #[derive(Fields)] + struct Data { + v1: i16, + v2: String, + } + "#}; - let output = pretty_print(output); + do_test(code, "gen/test_no_pk.rs"); + } - let expected = expect_test::expect_file!["gen/test_output.rs"]; + #[test] + fn test_empty_pk() { + let code = indoc! {r#" + #[derive(Fields)] + #[primary_key()] + struct Data { + v1: i16, + v2: String, + } + "#}; - expected.assert_eq(&output); + do_test(code, "gen/test_empty_pk.rs"); } } diff --git a/src/common/src/types/fields.rs b/src/common/src/types/fields.rs index f52717297792e..df1795804af00 100644 --- a/src/common/src/types/fields.rs +++ b/src/common/src/types/fields.rs @@ -58,17 +58,18 @@ use crate::util::chunk_coalesce::DataChunkBuilder; /// } /// ``` pub trait Fields { + /// The primary key of the table. + /// + /// - `None` if the primary key is not applicable. + /// - `Some(&[])` if the primary key is empty, i.e., there'll be at most one row in the table. + const PRIMARY_KEY: Option<&'static [usize]>; + /// Return the schema of the struct. fn fields() -> Vec<(&'static str, DataType)>; /// Convert the struct to an `OwnedRow`. fn into_owned_row(self) -> OwnedRow; - /// The primary key of the table. - fn primary_key() -> &'static [usize] { - &[] - } - /// Create a [`DataChunkBuilder`](crate::util::chunk_coalesce::DataChunkBuilder) with the schema of the struct. fn data_chunk_builder(capacity: usize) -> DataChunkBuilder { DataChunkBuilder::new( diff --git a/src/frontend/macro/src/lib.rs b/src/frontend/macro/src/lib.rs index 8ba10a9f4454a..36b7f33eb99c0 100644 --- a/src/frontend/macro/src/lib.rs +++ b/src/frontend/macro/src/lib.rs @@ -117,11 +117,15 @@ fn gen_sys_table(attr: Attr, item_fn: ItemFn) -> Result { #[linkme::distributed_slice(crate::catalog::system_catalog::SYS_CATALOGS_SLICE)] #[no_mangle] // to prevent duplicate schema.table name fn #gen_fn_name() -> crate::catalog::system_catalog::BuiltinCatalog { + const _: () = { + assert!(#struct_type::PRIMARY_KEY.is_some(), "primary key is required for system table"); + }; + crate::catalog::system_catalog::BuiltinCatalog::Table(crate::catalog::system_catalog::BuiltinTable { name: #table_name, schema: #schema_name, columns: #struct_type::fields(), - pk: #struct_type::primary_key(), + pk: #struct_type::PRIMARY_KEY.unwrap(), function: |reader| std::boxed::Box::pin(async { let rows = #user_fn_name(reader) #_await #handle_error; let mut builder = #struct_type::data_chunk_builder(rows.len() + 1); diff --git a/src/frontend/src/catalog/system_catalog/pg_catalog/pg_cast.rs b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_cast.rs index c13e87f162afe..11bcabcde0f69 100644 --- a/src/frontend/src/catalog/system_catalog/pg_catalog/pg_cast.rs +++ b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_cast.rs @@ -22,6 +22,7 @@ use crate::expr::cast_map_array; /// Ref: [`https://www.postgresql.org/docs/current/catalog-pg-cast.html`] #[derive(Fields)] struct PgCast { + #[primary_key] oid: i32, castsource: i32, casttarget: i32, diff --git a/src/frontend/src/catalog/system_catalog/pg_catalog/pg_settings.rs b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_settings.rs index 0f079ca3f6452..58d44b1aef92b 100644 --- a/src/frontend/src/catalog/system_catalog/pg_catalog/pg_settings.rs +++ b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_settings.rs @@ -21,6 +21,7 @@ use crate::catalog::system_catalog::SysCatalogReaderImpl; /// The catalog `pg_settings` stores settings. /// Ref: [`https://www.postgresql.org/docs/current/view-pg-settings.html`] #[derive(Fields)] +#[primary_key(name, context)] struct PgSetting { name: String, setting: String, diff --git a/src/frontend/src/catalog/system_catalog/rw_catalog/rw_hummock_branched_objects.rs b/src/frontend/src/catalog/system_catalog/rw_catalog/rw_hummock_branched_objects.rs index 2699503a2fdd5..443fa255f4398 100644 --- a/src/frontend/src/catalog/system_catalog/rw_catalog/rw_hummock_branched_objects.rs +++ b/src/frontend/src/catalog/system_catalog/rw_catalog/rw_hummock_branched_objects.rs @@ -19,6 +19,7 @@ use crate::catalog::system_catalog::SysCatalogReaderImpl; use crate::error::Result; #[derive(Fields)] +#[primary_key(object_id, sst_id)] // TODO: is this correct? struct RwHummockBranchedObject { object_id: i64, sst_id: i64, diff --git a/src/frontend/src/catalog/system_catalog/rw_catalog/rw_hummock_pinned_snapshots.rs b/src/frontend/src/catalog/system_catalog/rw_catalog/rw_hummock_pinned_snapshots.rs index ac2b96bdc0023..e4f18c8fecaf3 100644 --- a/src/frontend/src/catalog/system_catalog/rw_catalog/rw_hummock_pinned_snapshots.rs +++ b/src/frontend/src/catalog/system_catalog/rw_catalog/rw_hummock_pinned_snapshots.rs @@ -20,6 +20,7 @@ use crate::error::Result; #[derive(Fields)] struct RwHummockPinnedSnapshot { + #[primary_key] worker_node_id: i32, min_pinned_snapshot_id: i64, } diff --git a/src/frontend/src/catalog/system_catalog/rw_catalog/rw_hummock_pinned_versions.rs b/src/frontend/src/catalog/system_catalog/rw_catalog/rw_hummock_pinned_versions.rs index 45a8e23f0ecc5..c0a9dd9e7fc45 100644 --- a/src/frontend/src/catalog/system_catalog/rw_catalog/rw_hummock_pinned_versions.rs +++ b/src/frontend/src/catalog/system_catalog/rw_catalog/rw_hummock_pinned_versions.rs @@ -20,6 +20,7 @@ use crate::error::Result; #[derive(Fields)] struct RwHummockPinnedVersion { + #[primary_key] worker_node_id: i32, min_pinned_version_id: i64, } diff --git a/src/frontend/src/catalog/system_catalog/rw_catalog/rw_hummock_version.rs b/src/frontend/src/catalog/system_catalog/rw_catalog/rw_hummock_version.rs index 5551170e57a6f..37d1ceb6486ea 100644 --- a/src/frontend/src/catalog/system_catalog/rw_catalog/rw_hummock_version.rs +++ b/src/frontend/src/catalog/system_catalog/rw_catalog/rw_hummock_version.rs @@ -22,6 +22,7 @@ use crate::error::Result; #[derive(Fields)] struct RwHummockVersion { + #[primary_key] version_id: i64, max_committed_epoch: i64, safe_epoch: i64, diff --git a/src/frontend/src/catalog/system_catalog/rw_catalog/rw_meta_snapshot.rs b/src/frontend/src/catalog/system_catalog/rw_catalog/rw_meta_snapshot.rs index ebb969cac462f..f31b1f7c67c5c 100644 --- a/src/frontend/src/catalog/system_catalog/rw_catalog/rw_meta_snapshot.rs +++ b/src/frontend/src/catalog/system_catalog/rw_catalog/rw_meta_snapshot.rs @@ -21,6 +21,7 @@ use crate::error::Result; #[derive(Fields)] struct RwMetaSnapshot { + #[primary_key] meta_snapshot_id: i64, hummock_version_id: i64, // the smallest epoch this meta snapshot includes From bd2914808dd310dc3c3c401525ab3baf3c3e67cb Mon Sep 17 00:00:00 2001 From: Shanicky Chen Date: Fri, 23 Feb 2024 14:02:12 +0800 Subject: [PATCH 21/35] feat: refine sink into table functionalities (#15160) Co-authored-by: August --- src/meta/src/controller/catalog.rs | 276 +++++++++++++++++++++-- src/meta/src/controller/rename.rs | 19 ++ src/meta/src/controller/streaming_job.rs | 20 +- src/meta/src/controller/utils.rs | 131 +++++++++++ 4 files changed, 423 insertions(+), 23 deletions(-) diff --git a/src/meta/src/controller/catalog.rs b/src/meta/src/controller/catalog.rs index 6077efa7f88c1..e26e1af0f0cff 100644 --- a/src/meta/src/controller/catalog.rs +++ b/src/meta/src/controller/catalog.rs @@ -19,16 +19,18 @@ use std::sync::Arc; use anyhow::anyhow; use itertools::Itertools; use risingwave_common::catalog::{TableOption, DEFAULT_SCHEMA_NAME, SYSTEM_SCHEMAS}; +use risingwave_common::util::stream_graph_visitor::visit_stream_node_cont; use risingwave_common::{bail, current_cluster_version}; +use risingwave_meta_model_v2::fragment::StreamNode; use risingwave_meta_model_v2::object::ObjectType; use risingwave_meta_model_v2::prelude::*; use risingwave_meta_model_v2::table::TableType; use risingwave_meta_model_v2::{ - connection, database, function, index, object, object_dependency, schema, sink, source, - streaming_job, table, user_privilege, view, ActorId, ColumnCatalogArray, ConnectionId, - CreateType, DatabaseId, FragmentId, FunctionId, IndexId, JobStatus, ObjectId, - PrivateLinkService, SchemaId, SourceId, StreamSourceInfo, StreamingParallelism, TableId, - UserId, + actor, connection, database, fragment, function, index, object, object_dependency, schema, + sink, source, streaming_job, table, user_privilege, view, ActorId, ActorUpstreamActors, + ColumnCatalogArray, ConnectionId, CreateType, DatabaseId, FragmentId, FunctionId, I32Array, + IndexId, JobStatus, ObjectId, PrivateLinkService, SchemaId, SourceId, StreamSourceInfo, + StreamingParallelism, TableId, UserId, }; use risingwave_pb::catalog::table::PbTableType; use risingwave_pb::catalog::{ @@ -41,6 +43,8 @@ use risingwave_pb::meta::subscribe_response::{ Info as NotificationInfo, Info, Operation as NotificationOperation, Operation, }; use risingwave_pb::meta::{PbRelation, PbRelationGroup}; +use risingwave_pb::stream_plan::stream_node::NodeBody; +use risingwave_pb::stream_plan::FragmentTypeFlag; use risingwave_pb::user::PbUserInfo; use sea_orm::sea_query::{Expr, SimpleExpr}; use sea_orm::ActiveValue::Set; @@ -423,6 +427,7 @@ impl CatalogController { pub async fn clean_dirty_creating_jobs(&self) -> MetaResult { let inner = self.inner.write().await; let txn = inner.db.begin().await?; + let creating_job_ids: Vec = streaming_job::Entity::find() .select_only() .column(streaming_job::Column::JobId) @@ -436,7 +441,14 @@ impl CatalogController { .into_tuple() .all(&txn) .await?; + + let changed = Self::clean_dirty_sink_downstreams(&txn).await?; + if creating_job_ids.is_empty() { + if changed { + txn.commit().await?; + } + return Ok(ReleaseContext::default()); } @@ -476,6 +488,7 @@ impl CatalogController { .exec(&txn) .await?; assert!(res.rows_affected > 0); + txn.commit().await?; Ok(ReleaseContext { @@ -485,6 +498,175 @@ impl CatalogController { }) } + async fn clean_dirty_sink_downstreams(txn: &DatabaseTransaction) -> MetaResult { + // clean incoming sink from (table) + // clean upstream fragment ids from (fragment) + // clean stream node from (fragment) + // clean upstream actor ids from (actor) + let all_fragment_ids: Vec = Fragment::find() + .select_only() + .columns(vec![fragment::Column::FragmentId]) + .into_tuple() + .all(txn) + .await?; + + let all_fragment_ids: HashSet<_> = all_fragment_ids.into_iter().collect(); + + let table_sink_ids: Vec = Sink::find() + .select_only() + .column(sink::Column::SinkId) + .filter(sink::Column::TargetTable.is_not_null()) + .into_tuple() + .all(txn) + .await?; + + let all_table_with_incoming_sinks: Vec<(ObjectId, I32Array)> = Table::find() + .select_only() + .columns(vec![table::Column::TableId, table::Column::IncomingSinks]) + .into_tuple() + .all(txn) + .await?; + + let table_incoming_sinks_to_update = all_table_with_incoming_sinks + .into_iter() + .filter(|(_, incoming_sinks)| { + let inner_ref = incoming_sinks.inner_ref(); + !inner_ref.is_empty() + && inner_ref + .iter() + .any(|sink_id| !table_sink_ids.contains(sink_id)) + }) + .collect_vec(); + + let new_table_incoming_sinks = table_incoming_sinks_to_update + .into_iter() + .map(|(table_id, incoming_sinks)| { + let new_incoming_sinks = incoming_sinks + .into_inner() + .extract_if(|id| table_sink_ids.contains(id)) + .collect_vec(); + (table_id, I32Array::from(new_incoming_sinks)) + }) + .collect_vec(); + + // no need to update, returning + if new_table_incoming_sinks.is_empty() { + return Ok(false); + } + + for (table_id, new_incoming_sinks) in new_table_incoming_sinks { + tracing::info!("cleaning dirty table sink downstream table {}", table_id); + Table::update_many() + .col_expr(table::Column::IncomingSinks, new_incoming_sinks.into()) + .filter(table::Column::TableId.eq(table_id)) + .exec(txn) + .await?; + + let fragments: Vec<(FragmentId, I32Array, StreamNode, i32)> = Fragment::find() + .select_only() + .columns(vec![ + fragment::Column::FragmentId, + fragment::Column::UpstreamFragmentId, + fragment::Column::StreamNode, + fragment::Column::FragmentTypeMask, + ]) + .filter(fragment::Column::JobId.eq(table_id)) + .into_tuple() + .all(txn) + .await?; + + for (fragment_id, upstream_fragment_ids, stream_node, fragment_mask) in fragments { + let mut upstream_fragment_ids = upstream_fragment_ids.into_inner(); + + let dirty_upstream_fragment_ids = upstream_fragment_ids + .extract_if(|id| !all_fragment_ids.contains(id)) + .collect_vec(); + + if !dirty_upstream_fragment_ids.is_empty() { + // dirty downstream should be materialize fragment of table + assert!(fragment_mask & FragmentTypeFlag::Mview as i32 > 0); + + tracing::info!( + "cleaning dirty table sink fragment {:?} from downstream fragment {}", + dirty_upstream_fragment_ids, + fragment_id + ); + + let mut pb_stream_node = stream_node.to_protobuf(); + + visit_stream_node_cont(&mut pb_stream_node, |node| { + if let Some(NodeBody::Union(_)) = node.node_body { + node.input.retain_mut(|input| { + if let Some(NodeBody::Merge(merge_node)) = &mut input.node_body + && all_fragment_ids + .contains(&(merge_node.upstream_fragment_id as i32)) + { + true + } else { + false + } + }); + } + true + }); + + Fragment::update_many() + .col_expr( + fragment::Column::UpstreamFragmentId, + I32Array::from(upstream_fragment_ids).into(), + ) + .col_expr( + fragment::Column::StreamNode, + StreamNode::from_protobuf(&pb_stream_node).into(), + ) + .filter(fragment::Column::FragmentId.eq(fragment_id)) + .exec(txn) + .await?; + + let actors: Vec<(ActorId, ActorUpstreamActors)> = Actor::find() + .select_only() + .columns(vec![ + actor::Column::ActorId, + actor::Column::UpstreamActorIds, + ]) + .filter(actor::Column::FragmentId.eq(fragment_id)) + .into_tuple() + .all(txn) + .await?; + + for (actor_id, upstream_actor_ids) in actors { + let mut upstream_actor_ids = upstream_actor_ids.into_inner(); + + let dirty_actor_upstreams = upstream_actor_ids + .extract_if(|id, _| !all_fragment_ids.contains(id)) + .map(|(id, _)| id) + .collect_vec(); + + if !dirty_actor_upstreams.is_empty() { + tracing::debug!( + "cleaning dirty table sink fragment {:?} from downstream fragment {} actor {}", + dirty_actor_upstreams, + fragment_id, + actor_id, + ); + + Actor::update_many() + .col_expr( + actor::Column::UpstreamActorIds, + ActorUpstreamActors::from(upstream_actor_ids).into(), + ) + .filter(actor::Column::ActorId.eq(actor_id)) + .exec(txn) + .await?; + } + } + } + } + } + + Ok(true) + } + /// `finish_streaming_job` marks job related objects as `Created` and notify frontend. pub async fn finish_streaming_job(&self, job_id: ObjectId) -> MetaResult { let inner = self.inner.write().await; @@ -1487,6 +1669,52 @@ impl CatalogController { ); to_drop_objects.push(obj); + // Special handling for 'sink into table'. + if object_type != ObjectType::Sink { + // When dropping a table downstream, all incoming sinks of the table should be dropped as well. + if object_type == ObjectType::Table { + let table = Table::find_by_id(object_id) + .one(&txn) + .await? + .ok_or_else(|| MetaError::catalog_id_not_found("table", object_id))?; + + let incoming_sinks = table.incoming_sinks.into_inner(); + + if !incoming_sinks.is_empty() { + let objs: Vec = Object::find() + .filter(object::Column::Oid.is_in(incoming_sinks)) + .into_partial_model() + .all(&txn) + .await?; + + to_drop_objects.extend(objs); + } + } + + let to_drop_object_ids: HashSet<_> = + to_drop_objects.iter().map(|obj| obj.oid).collect(); + + // When there is a table sink in the dependency chain of drop cascade, an error message needs to be returned currently to manually drop the sink. + for obj in &to_drop_objects { + if obj.obj_type == ObjectType::Sink { + let sink = Sink::find_by_id(obj.oid) + .one(&txn) + .await? + .ok_or_else(|| MetaError::catalog_id_not_found("sink", obj.oid))?; + + // Since dropping the sink into the table requires the frontend to handle some of the logic (regenerating the plan), it’s not compatible with the current cascade dropping. + if let Some(target_table) = sink.target_table + && !to_drop_object_ids.contains(&target_table) + { + bail!( + "Found sink into table with sink id {} in dependency, please drop them manually", + obj.oid, + ); + } + } + } + } + let to_drop_table_ids = to_drop_objects .iter() .filter(|obj| obj.obj_type == ObjectType::Table || obj.obj_type == ObjectType::Index) @@ -1856,22 +2084,28 @@ impl CatalogController { }); }}; } - let objs = get_referring_objects(object_id, &txn).await?; - // TODO: For sink into table. when sink into table is ready. - // if object_type == ObjectType::Table { - // let incoming_sinks: Vec<_> = Table::find_by_id(object_id) - // .select_only() - // .column(table::Column::IncomingSinks) - // .into_tuple() - // .one(&txn) - // .await? - // .ok_or_else(|| MetaError::catalog_id_not_found("table", object_id))?; - // objs.extend(incoming_sinks.into_iter().map(|id| PartialObject { - // oid: id as _, - // obj_type: ObjectType::Sink, - // ..Default::default() - // })); - // } + let mut objs = get_referring_objects(object_id, &txn).await?; + if object_type == ObjectType::Table { + let incoming_sinks: I32Array = Table::find_by_id(object_id) + .select_only() + .column(table::Column::IncomingSinks) + .into_tuple() + .one(&txn) + .await? + .ok_or_else(|| MetaError::catalog_id_not_found("table", object_id))?; + + objs.extend( + incoming_sinks + .into_inner() + .into_iter() + .map(|id| PartialObject { + oid: id, + obj_type: ObjectType::Sink, + schema_id: None, + database_id: None, + }), + ); + } for obj in objs { match obj.obj_type { diff --git a/src/meta/src/controller/rename.rs b/src/meta/src/controller/rename.rs index bde954a587fdf..15be4d7ef83b8 100644 --- a/src/meta/src/controller/rename.rs +++ b/src/meta/src/controller/rename.rs @@ -79,6 +79,7 @@ pub fn alter_relation_rename_refs(definition: &str, from: &str, to: &str) -> Str stmt: CreateSinkStatement { sink_from: CreateSink::AsQuery(query), + into_table_name: None, .. }, } => { @@ -89,9 +90,27 @@ pub fn alter_relation_rename_refs(definition: &str, from: &str, to: &str) -> Str stmt: CreateSinkStatement { sink_from: CreateSink::From(table_name), + into_table_name: None, .. }, } => replace_table_name(table_name, to), + Statement::CreateSink { + stmt: CreateSinkStatement { + sink_from, + into_table_name: Some(table_name), + .. + } + } => { + let idx = table_name.0.len() - 1; + if table_name.0[idx].real_value() == from { + table_name.0[idx] = Ident::new_unchecked(to); + } else { + match sink_from { + CreateSink::From(table_name) => replace_table_name(table_name, to), + CreateSink::AsQuery(query) => QueryRewriter::rewrite_query(query, from, to), + } + } + } _ => unreachable!(), }; stmt.to_string() diff --git a/src/meta/src/controller/streaming_job.rs b/src/meta/src/controller/streaming_job.rs index 9bb8af6172469..7c4360a92f285 100644 --- a/src/meta/src/controller/streaming_job.rs +++ b/src/meta/src/controller/streaming_job.rs @@ -16,6 +16,7 @@ use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet}; use std::num::NonZeroUsize; use itertools::Itertools; +use risingwave_common::bail; use risingwave_common::buffer::Bitmap; use risingwave_common::hash::{ActorMapping, ParallelUnitId, ParallelUnitMapping}; use risingwave_common::util::column_index_mapping::ColIndexMapping; @@ -64,8 +65,8 @@ use crate::barrier::Reschedule; use crate::controller::catalog::CatalogController; use crate::controller::rename::ReplaceTableExprRewriter; use crate::controller::utils::{ - check_relation_name_duplicate, ensure_object_id, ensure_user_id, get_fragment_actor_ids, - get_fragment_mappings, + check_relation_name_duplicate, check_sink_into_table_cycle, ensure_object_id, ensure_user_id, + get_fragment_actor_ids, get_fragment_mappings, }; use crate::controller::ObjectModel; use crate::manager::{NotificationVersion, SinkId, StreamingJob}; @@ -141,6 +142,21 @@ impl CatalogController { Table::insert(table).exec(&txn).await?; } StreamingJob::Sink(sink, _) => { + if let Some(target_table_id) = sink.target_table { + if check_sink_into_table_cycle( + target_table_id as ObjectId, + sink.dependent_relations + .iter() + .map(|id| *id as ObjectId) + .collect(), + &txn, + ) + .await? + { + bail!("Creating such a sink will result in circular dependency."); + } + } + let job_id = Self::create_streaming_job_obj( &txn, ObjectType::Sink, diff --git a/src/meta/src/controller/utils.rs b/src/meta/src/controller/utils.rs index ff19892d516b5..6c7e61a316add 100644 --- a/src/meta/src/controller/utils.rs +++ b/src/meta/src/controller/utils.rs @@ -118,6 +118,107 @@ pub fn construct_obj_dependency_query(obj_id: ObjectId) -> WithQuery { .to_owned() } +/// This function will construct a query using recursive cte to find if dependent objects are already relying on the target table. +/// +/// # Examples +/// +/// ``` +/// use risingwave_meta::controller::utils::construct_sink_cycle_check_query; +/// use sea_orm::sea_query::*; +/// use sea_orm::*; +/// +/// let query = construct_sink_cycle_check_query(1, vec![2, 3]); +/// +/// assert_eq!( +/// query.to_string(MysqlQueryBuilder), +/// r#"WITH RECURSIVE `used_by_object_ids_with_sink` (`oid`, `used_by`) AS (SELECT `oid`, `used_by` FROM `object_dependency` WHERE `object_dependency`.`oid` = 1 UNION ALL (SELECT `obj_dependency_with_sink`.`oid`, `obj_dependency_with_sink`.`used_by` FROM (SELECT `oid`, `used_by` FROM `object_dependency` UNION ALL (SELECT `sink_id`, `target_table` FROM `sink` WHERE `sink`.`target_table` IS NOT NULL)) AS `obj_dependency_with_sink` INNER JOIN `used_by_object_ids_with_sink` ON `used_by_object_ids_with_sink`.`used_by` = `obj_dependency_with_sink`.`oid` WHERE `used_by_object_ids_with_sink`.`used_by` <> `used_by_object_ids_with_sink`.`oid`)) SELECT COUNT(`used_by_object_ids_with_sink`.`used_by`) FROM `used_by_object_ids_with_sink` WHERE `used_by_object_ids_with_sink`.`used_by` IN (2, 3)"# +/// ); +/// assert_eq!( +/// query.to_string(PostgresQueryBuilder), +/// r#"WITH RECURSIVE "used_by_object_ids_with_sink" ("oid", "used_by") AS (SELECT "oid", "used_by" FROM "object_dependency" WHERE "object_dependency"."oid" = 1 UNION ALL (SELECT "obj_dependency_with_sink"."oid", "obj_dependency_with_sink"."used_by" FROM (SELECT "oid", "used_by" FROM "object_dependency" UNION ALL (SELECT "sink_id", "target_table" FROM "sink" WHERE "sink"."target_table" IS NOT NULL)) AS "obj_dependency_with_sink" INNER JOIN "used_by_object_ids_with_sink" ON "used_by_object_ids_with_sink"."used_by" = "obj_dependency_with_sink"."oid" WHERE "used_by_object_ids_with_sink"."used_by" <> "used_by_object_ids_with_sink"."oid")) SELECT COUNT("used_by_object_ids_with_sink"."used_by") FROM "used_by_object_ids_with_sink" WHERE "used_by_object_ids_with_sink"."used_by" IN (2, 3)"# +/// ); +/// assert_eq!( +/// query.to_string(SqliteQueryBuilder), +/// r#"WITH RECURSIVE "used_by_object_ids_with_sink" ("oid", "used_by") AS (SELECT "oid", "used_by" FROM "object_dependency" WHERE "object_dependency"."oid" = 1 UNION ALL SELECT "obj_dependency_with_sink"."oid", "obj_dependency_with_sink"."used_by" FROM (SELECT "oid", "used_by" FROM "object_dependency" UNION ALL SELECT "sink_id", "target_table" FROM "sink" WHERE "sink"."target_table" IS NOT NULL) AS "obj_dependency_with_sink" INNER JOIN "used_by_object_ids_with_sink" ON "used_by_object_ids_with_sink"."used_by" = "obj_dependency_with_sink"."oid" WHERE "used_by_object_ids_with_sink"."used_by" <> "used_by_object_ids_with_sink"."oid") SELECT COUNT("used_by_object_ids_with_sink"."used_by") FROM "used_by_object_ids_with_sink" WHERE "used_by_object_ids_with_sink"."used_by" IN (2, 3)"# +/// ); +/// ``` +pub fn construct_sink_cycle_check_query( + target_table: ObjectId, + dependent_objects: Vec, +) -> WithQuery { + let cte_alias = Alias::new("used_by_object_ids_with_sink"); + let depend_alias = Alias::new("obj_dependency_with_sink"); + + let mut base_query = SelectStatement::new() + .columns([ + object_dependency::Column::Oid, + object_dependency::Column::UsedBy, + ]) + .from(ObjectDependency) + .and_where(object_dependency::Column::Oid.eq(target_table)) + .to_owned(); + + let query_sink_deps = SelectStatement::new() + .columns([sink::Column::SinkId, sink::Column::TargetTable]) + .from(Sink) + .and_where(sink::Column::TargetTable.is_not_null()) + .to_owned(); + + let cte_referencing = Query::select() + .column((depend_alias.clone(), object_dependency::Column::Oid)) + .column((depend_alias.clone(), object_dependency::Column::UsedBy)) + .from_subquery( + SelectStatement::new() + .columns([ + object_dependency::Column::Oid, + object_dependency::Column::UsedBy, + ]) + .from(ObjectDependency) + .union(UnionType::All, query_sink_deps) + .to_owned(), + depend_alias.clone(), + ) + .inner_join( + cte_alias.clone(), + Expr::col((cte_alias.clone(), object_dependency::Column::UsedBy)).eq(Expr::col(( + depend_alias.clone(), + object_dependency::Column::Oid, + ))), + ) + .and_where( + Expr::col((cte_alias.clone(), object_dependency::Column::UsedBy)).ne(Expr::col(( + cte_alias.clone(), + object_dependency::Column::Oid, + ))), + ) + .to_owned(); + + let common_table_expr = CommonTableExpression::new() + .query(base_query.union(UnionType::All, cte_referencing).to_owned()) + .columns([ + object_dependency::Column::Oid, + object_dependency::Column::UsedBy, + ]) + .table_name(cte_alias.clone()) + .to_owned(); + + SelectStatement::new() + .expr(Expr::col((cte_alias.clone(), object_dependency::Column::UsedBy)).count()) + .from(cte_alias.clone()) + .and_where( + Expr::col((cte_alias.clone(), object_dependency::Column::UsedBy)) + .is_in(dependent_objects), + ) + .to_owned() + .with( + WithClause::new() + .recursive(true) + .cte(common_table_expr) + .to_owned(), + ) + .to_owned() +} + #[derive(Clone, DerivePartialModel, FromQueryResult)] #[sea_orm(entity = "Object")] pub struct PartialObject { @@ -175,6 +276,36 @@ where Ok(objects) } +/// Check if create a sink with given dependent objects into the target table will cause a cycle, return true if it will. +pub async fn check_sink_into_table_cycle( + target_table: ObjectId, + dependent_objs: Vec, + db: &C, +) -> MetaResult +where + C: ConnectionTrait, +{ + if dependent_objs.is_empty() { + return Ok(false); + } + + let query = construct_sink_cycle_check_query(target_table, dependent_objs); + let (sql, values) = query.build_any(&*db.get_database_backend().get_query_builder()); + + let res = db + .query_one(Statement::from_sql_and_values( + db.get_database_backend(), + sql, + values, + )) + .await? + .unwrap(); + + let cnt: i64 = res.try_get_by(0)?; + + Ok(cnt != 0) +} + /// `ensure_object_id` ensures the existence of target object in the cluster. pub async fn ensure_object_id( object_type: ObjectType, From 62d897ccc37004fa84e3d35631ee55e60ac35750 Mon Sep 17 00:00:00 2001 From: Zihao Xu Date: Fri, 23 Feb 2024 01:27:22 -0500 Subject: [PATCH 22/35] fix(optimizer): visit ternary ops when offset is specified for `tumble` (#15199) --- e2e_test/streaming/bug_fixes/issue_15198.slt | 23 ++++++++++++++++++++ src/frontend/src/expr/utils.rs | 22 ++++++++++++++----- 2 files changed, 40 insertions(+), 5 deletions(-) create mode 100644 e2e_test/streaming/bug_fixes/issue_15198.slt diff --git a/e2e_test/streaming/bug_fixes/issue_15198.slt b/e2e_test/streaming/bug_fixes/issue_15198.slt new file mode 100644 index 0000000000000..a69aede18c2c9 --- /dev/null +++ b/e2e_test/streaming/bug_fixes/issue_15198.slt @@ -0,0 +1,23 @@ +# https://github.com/risingwavelabs/risingwave/issues/15198 + +statement ok +SET RW_IMPLICIT_FLUSH TO TRUE; + +statement ok +create materialized view "tumble_with_offset" +as ( + with + input as ( + select 1 as id, TO_TIMESTAMP('2024-01-01 01:30:02', 'YYYY-MM-DD HH24:MI:SS') as timestamps + ) + select * + from tumble(input, timestamps, interval '1 DAY', '+6 HOURS') +); + +query ITTT +select * from tumble_with_offset; +---- +1 2024-01-01 01:30:02+00:00 2023-12-31 06:00:00+00:00 2024-01-01 06:00:00+00:00 + +statement ok +drop materialized view tumble_with_offset; diff --git a/src/frontend/src/expr/utils.rs b/src/frontend/src/expr/utils.rs index 7f768dbb63994..9db25b3dc554e 100644 --- a/src/frontend/src/expr/utils.rs +++ b/src/frontend/src/expr/utils.rs @@ -498,11 +498,23 @@ impl WatermarkAnalyzer { _ => WatermarkDerivation::None, }, ExprType::Subtract | ExprType::TumbleStart => { - match self.visit_binary_op(func_call.inputs()) { - (Constant, Constant) => Constant, - (Watermark(idx), Constant) => Watermark(idx), - (Nondecreasing, Constant) => Nondecreasing, - _ => WatermarkDerivation::None, + if func_call.inputs().len() == 3 { + // With `offset` specified + // e.g., select * from tumble(t1, start, interval, offset); + assert_eq!(ExprType::TumbleStart, func_call.func_type()); + match self.visit_ternary_op(func_call.inputs()) { + (Constant, Constant, Constant) => Constant, + (Watermark(idx), Constant, Constant) => Watermark(idx), + (Nondecreasing, Constant, Constant) => Nondecreasing, + _ => WatermarkDerivation::None, + } + } else { + match self.visit_binary_op(func_call.inputs()) { + (Constant, Constant) => Constant, + (Watermark(idx), Constant) => Watermark(idx), + (Nondecreasing, Constant) => Nondecreasing, + _ => WatermarkDerivation::None, + } } } ExprType::Multiply | ExprType::Divide | ExprType::Modulus => { From 59ce8df5bc89474380a8a2a2ccab81d7393eb0bc Mon Sep 17 00:00:00 2001 From: TennyZhuang Date: Fri, 23 Feb 2024 14:58:03 +0800 Subject: [PATCH 23/35] feat(stream): concurrent fetch for temporal join (take 2) (#15115) Signed-off-by: TennyZhuang --- src/stream/src/cache/managed_lru.rs | 8 ++ src/stream/src/executor/temporal_join.rs | 150 ++++++++++------------- 2 files changed, 76 insertions(+), 82 deletions(-) diff --git a/src/stream/src/cache/managed_lru.rs b/src/stream/src/cache/managed_lru.rs index d91eb664d43a2..9773f3fb51bf0 100644 --- a/src/stream/src/cache/managed_lru.rs +++ b/src/stream/src/cache/managed_lru.rs @@ -156,6 +156,14 @@ impl(&self, k: &Q) -> Option<&V> + where + K: Borrow, + Q: Hash + Eq + ?Sized, + { + self.inner.peek(k) + } + pub fn peek_mut(&mut self, k: &K) -> Option> { let v = self.inner.peek_mut(k); v.map(|inner| { diff --git a/src/stream/src/executor/temporal_join.rs b/src/stream/src/executor/temporal_join.rs index 32a0c5747083b..da0ac7b45dbdc 100644 --- a/src/stream/src/executor/temporal_join.rs +++ b/src/stream/src/executor/temporal_join.rs @@ -15,14 +15,13 @@ use std::alloc::Global; use std::collections::hash_map::Entry; use std::collections::HashMap; -use std::ops::{Deref, DerefMut}; use std::pin::pin; use std::sync::Arc; use either::Either; use futures::stream::{self, PollNext}; use futures::{pin_mut, StreamExt, TryStreamExt}; -use futures_async_stream::try_stream; +use futures_async_stream::{for_await, try_stream}; use local_stats_alloc::{SharedStatsAlloc, StatsAlloc}; use lru::DefaultHasher; use risingwave_common::array::{Op, StreamChunk}; @@ -108,99 +107,84 @@ impl JoinEntry { } } -struct JoinEntryWrapper(Option); - -impl EstimateSize for JoinEntryWrapper { - fn estimated_heap_size(&self) -> usize { - self.0.estimated_heap_size() - } -} - -impl JoinEntryWrapper { - const MESSAGE: &'static str = "the state should always be `Some`"; - - /// Take the value out of the wrapper. Panic if the value is `None`. - pub fn take(&mut self) -> JoinEntry { - self.0.take().expect(Self::MESSAGE) - } -} - -impl Deref for JoinEntryWrapper { - type Target = JoinEntry; - - fn deref(&self) -> &Self::Target { - self.0.as_ref().expect(Self::MESSAGE) - } -} - -impl DerefMut for JoinEntryWrapper { - fn deref_mut(&mut self) -> &mut Self::Target { - self.0.as_mut().expect(Self::MESSAGE) - } -} - struct TemporalSide { source: StorageTable, table_stream_key_indices: Vec, table_output_indices: Vec, - cache: ManagedLruCache>, + cache: ManagedLruCache>, ctx: ActorContextRef, join_key_data_types: Vec, } impl TemporalSide { - /// Lookup the temporal side table and return a `JoinEntry` which could be empty if there are no - /// matched records. - async fn lookup(&mut self, key: &K, epoch: HummockEpoch) -> StreamExecutorResult { + /// Fetch records from temporal side table and ensure the entry in the cache. + /// If already exists, the entry will be promoted. + async fn fetch_or_promote_keys( + &mut self, + keys: impl Iterator, + epoch: HummockEpoch, + ) -> StreamExecutorResult<()> { let table_id_str = self.source.table_id().to_string(); let actor_id_str = self.ctx.id.to_string(); let fragment_id_str = self.ctx.id.to_string(); - self.ctx - .streaming_metrics - .temporal_join_total_query_cache_count - .with_label_values(&[&table_id_str, &actor_id_str, &fragment_id_str]) - .inc(); - - let res = if self.cache.contains(key) { - let mut state = self.cache.peek_mut(key).unwrap(); - state.take() - } else { - // cache miss + + let mut futs = Vec::with_capacity(keys.size_hint().1.unwrap_or(0)); + for key in keys { self.ctx .streaming_metrics - .temporal_join_cache_miss_count + .temporal_join_total_query_cache_count .with_label_values(&[&table_id_str, &actor_id_str, &fragment_id_str]) .inc(); - let pk_prefix = key.deserialize(&self.join_key_data_types)?; - - let iter = self - .source - .batch_iter_with_pk_bounds( - HummockReadEpoch::NoWait(epoch), - &pk_prefix, - .., - false, - PrefetchOptions::default(), - ) - .await?; - - let mut entry = JoinEntry::default(); - - pin_mut!(iter); - while let Some(row) = iter.next_row().await? { - entry.insert( - row.as_ref() - .project(&self.table_stream_key_indices) - .into_owned_row(), - row.project(&self.table_output_indices).into_owned_row(), - ); + if self.cache.get(key).is_none() { + self.ctx + .streaming_metrics + .temporal_join_cache_miss_count + .with_label_values(&[&table_id_str, &actor_id_str, &fragment_id_str]) + .inc(); + + futs.push(async { + let pk_prefix = key.deserialize(&self.join_key_data_types)?; + + let iter = self + .source + .batch_iter_with_pk_bounds( + HummockReadEpoch::NoWait(epoch), + &pk_prefix, + .., + false, + PrefetchOptions::default(), + ) + .await?; + + let mut entry = JoinEntry::default(); + + pin_mut!(iter); + while let Some(row) = iter.next_row().await? { + entry.insert( + row.as_ref() + .project(&self.table_stream_key_indices) + .into_owned_row(), + row.project(&self.table_output_indices).into_owned_row(), + ); + } + let key = key.clone(); + Ok((key, entry)) as StreamExecutorResult<_> + }); } + } - entry - }; + #[for_await] + for res in stream::iter(futs).buffered(16) { + let (key, entry) = res?; + self.cache.put(key, entry); + } + + Ok(()) + } - Ok(res) + fn force_peek(&self, key: &K) -> &JoinEntry { + self.cache.peek(key).expect("key should exists") } fn update( @@ -230,10 +214,6 @@ impl TemporalSide { } Ok(()) } - - pub fn insert_back(&mut self, key: K, state: JoinEntry) { - self.cache.put(key, JoinEntryWrapper(Some(state))); - } } enum InternalMessage { @@ -428,12 +408,20 @@ impl TemporalJoinExecutor ); let epoch = prev_epoch.expect("Chunk data should come after some barrier."); let keys = K::build(&self.left_join_keys, chunk.data_chunk())?; + let to_fetch_keys = chunk + .visibility() + .iter() + .zip_eq_debug(keys.iter()) + .filter_map(|(vis, key)| if vis { Some(key) } else { None }); + self.right_table + .fetch_or_promote_keys(to_fetch_keys, epoch) + .await?; for (r, key) in chunk.rows_with_holes().zip_eq_debug(keys.into_iter()) { let Some((op, left_row)) = r else { continue; }; if key.null_bitmap().is_subset(&null_matched) - && let join_entry = self.right_table.lookup(&key, epoch).await? + && let join_entry = self.right_table.force_peek(&key) && !join_entry.is_empty() { for right_row in join_entry.cached.values() { @@ -455,8 +443,6 @@ impl TemporalJoinExecutor } } } - // Insert back the state taken from ht. - self.right_table.insert_back(key.clone(), join_entry); } else if T == JoinType::LeftOuter { if let Some(chunk) = builder.append_row_update(op, left_row) { yield Message::Chunk(chunk); From 6033ee6c2a63bbb2d3c5147987c8e08b3b010de2 Mon Sep 17 00:00:00 2001 From: Noel Kwan <47273164+kwannoel@users.noreply.github.com> Date: Fri, 23 Feb 2024 15:02:47 +0800 Subject: [PATCH 24/35] feat(cmd_all): create directories in `single_node` mode (#15176) --- src/cmd_all/src/bin/risingwave.rs | 1 + src/cmd_all/src/single_node.rs | 16 +++++++++++++++- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/src/cmd_all/src/bin/risingwave.rs b/src/cmd_all/src/bin/risingwave.rs index 2c167fc1bdc20..e9173abefe1df 100644 --- a/src/cmd_all/src/bin/risingwave.rs +++ b/src/cmd_all/src/bin/risingwave.rs @@ -239,6 +239,7 @@ fn standalone(opts: StandaloneOpts) { /// high level options to standalone mode node-level options. /// We will start a standalone instance, with all nodes in the same process. fn single_node(opts: SingleNodeOpts) { + opts.create_store_directories().unwrap(); let opts = risingwave_cmd_all::map_single_node_opts_to_standalone_opts(&opts); let settings = risingwave_rt::LoggerSettings::from_opts(&opts) .with_target("risingwave_storage", Level::WARN) diff --git a/src/cmd_all/src/single_node.rs b/src/cmd_all/src/single_node.rs index b89f861f6e4fd..042a0feee9863 100644 --- a/src/cmd_all/src/single_node.rs +++ b/src/cmd_all/src/single_node.rs @@ -14,6 +14,7 @@ use std::sync::LazyLock; +use anyhow::Result; use clap::Parser; use home::home_dir; use risingwave_common::config::{AsyncStackTraceOption, MetaBackend}; @@ -64,7 +65,7 @@ pub struct SingleNodeOpts { /// The store directory used by meta store and object store. #[clap(long, env = "RW_SINGLE_NODE_STORE_DIRECTORY")] - store_directory: Option, + pub store_directory: Option, /// The address of the meta node. #[clap(long, env = "RW_SINGLE_NODE_META_ADDR")] @@ -142,6 +143,7 @@ pub fn map_single_node_opts_to_standalone_opts(opts: &SingleNodeOpts) -> ParsedS } } +// Defaults impl SingleNodeOpts { fn default_frontend_opts() -> FrontendOpts { FrontendOpts { @@ -227,3 +229,15 @@ impl SingleNodeOpts { } } } + +impl SingleNodeOpts { + pub fn create_store_directories(&self) -> Result<()> { + let store_directory = self + .store_directory + .as_ref() + .unwrap_or_else(|| &*DEFAULT_STORE_DIRECTORY); + std::fs::create_dir_all(format!("{}/meta_store", store_directory))?; + std::fs::create_dir_all(format!("{}/state_store", store_directory))?; + Ok(()) + } +} From fb6dbe2b7dc287c5710bf2c21ee34160e036fdf8 Mon Sep 17 00:00:00 2001 From: Kexiang Wang Date: Fri, 23 Feb 2024 02:33:54 -0500 Subject: [PATCH 25/35] fix(expr): align timestamp(tz)'s output format with pg's (#15053) --- Cargo.lock | 1 + e2e_test/batch/basic/make_time.slt.part | 33 ++++++++++++++++++++++--- src/common/src/types/datetime.rs | 32 +++++++++++++++--------- src/common/src/types/timestamptz.rs | 22 ++++++++++++++++- src/expr/impl/Cargo.toml | 1 + src/expr/impl/src/scalar/make_time.rs | 4 +-- src/expr/impl/src/scalar/timestamptz.rs | 12 +++------ src/frontend/src/handler/util.rs | 11 ++++----- 8 files changed, 82 insertions(+), 34 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1c074e276553a..b67e3700387de 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -9195,6 +9195,7 @@ dependencies = [ "async-trait", "auto_enums", "chrono", + "chrono-tz", "criterion", "expect-test", "fancy-regex", diff --git a/e2e_test/batch/basic/make_time.slt.part b/e2e_test/batch/basic/make_time.slt.part index 7a11b837c4fdb..ff1d4453e0efd 100644 --- a/e2e_test/batch/basic/make_time.slt.part +++ b/e2e_test/batch/basic/make_time.slt.part @@ -9,7 +9,12 @@ SELECT make_timestamptz(1973, 07, 15, 08, 15, 55.33); query T SELECT make_timestamptz(-1973, 07, 15, 08, 15, 55.33); ---- --1972-07-15 08:15:55.330+00:00 +1973-07-15 08:15:55.330+00:00 BC + +query T +SELECT make_timestamptz(20240, 1, 26, 14, 20, 26); +---- +20240-01-26 14:20:26+00:00 query error Invalid parameter year, month, day: invalid date: -3-2-29 SELECT make_timestamptz(-4, 02, 29, 08, 15, 55.33); @@ -17,7 +22,7 @@ SELECT make_timestamptz(-4, 02, 29, 08, 15, 55.33); query T SELECT make_timestamptz(-5, 02, 29, 08, 15, 55.33); ---- --0004-02-29 08:15:55.330+00:00 +0005-02-29 08:15:55.330+00:00 BC query error Invalid parameter sec: invalid sec: -55.33 SELECT make_timestamptz(1973, 07, 15, 08, 15, -55.33); @@ -105,6 +110,11 @@ SELECT make_date(2024, 1, 26); ---- 2024-01-26 +query T +SELECT make_date(20240, 1, 26); +---- +20240-01-26 + query T SELECT make_date(-2024, 1, 26); ---- @@ -146,10 +156,15 @@ SELECT make_timestamp(2024, 1, 26, 14, 20, 26); ---- 2024-01-26 14:20:26 +query T +SELECT make_timestamp(20240, 1, 26, 14, 20, 26); +---- +20240-01-26 14:20:26 + query T SELECT make_timestamp(-1973, 07, 15, 08, 15, 55.33); ---- --1972-07-15 08:15:55.330 +1973-07-15 08:15:55.330 BC query error Invalid parameter year, month, day: invalid date: -3-2-29 SELECT make_timestamp(-4, 02, 29, 08, 15, 55.33); @@ -157,4 +172,14 @@ SELECT make_timestamp(-4, 02, 29, 08, 15, 55.33); query T SELECT make_timestamp(-5, 02, 29, 08, 15, 55.33); ---- --0004-02-29 08:15:55.330 +0005-02-29 08:15:55.330 BC + +query T +select '0001-01-01 12:34:56'::timestamp - '10 year'::interval; +---- +0010-01-01 12:34:56 BC + +query T +select '0001-01-01 12:34:56'::timestamptz - '10 year'::interval; +---- +0010-01-01 12:34:56+00:00 BC diff --git a/src/common/src/types/datetime.rs b/src/common/src/types/datetime.rs index af6b54b057c82..c609017d06e3f 100644 --- a/src/common/src/types/datetime.rs +++ b/src/common/src/types/datetime.rs @@ -328,17 +328,15 @@ impl ToText for Date { /// ``` fn write(&self, f: &mut W) -> std::fmt::Result { let (ce, year) = self.0.year_ce(); - if ce { - write!(f, "{}", self.0) - } else { - write!( - f, - "{:04}-{:02}-{:02} BC", - year, - self.0.month(), - self.0.day() - ) - } + let suffix = if ce { "" } else { " BC" }; + write!( + f, + "{:04}-{:02}-{:02}{}", + year, + self.0.month(), + self.0.day(), + suffix + ) } fn write_with_type(&self, ty: &DataType, f: &mut W) -> std::fmt::Result { @@ -364,7 +362,17 @@ impl ToText for Time { impl ToText for Timestamp { fn write(&self, f: &mut W) -> std::fmt::Result { - write!(f, "{}", self.0) + let (ce, year) = self.0.year_ce(); + let suffix = if ce { "" } else { " BC" }; + write!( + f, + "{:04}-{:02}-{:02} {}{}", + year, + self.0.month(), + self.0.day(), + self.0.time(), + suffix + ) } fn write_with_type(&self, ty: &DataType, f: &mut W) -> std::fmt::Result { diff --git a/src/common/src/types/timestamptz.rs b/src/common/src/types/timestamptz.rs index 41359bdf84377..f14d5d2edee6e 100644 --- a/src/common/src/types/timestamptz.rs +++ b/src/common/src/types/timestamptz.rs @@ -17,7 +17,7 @@ use std::io::Write; use std::str::FromStr; use bytes::{Bytes, BytesMut}; -use chrono::{TimeZone, Utc}; +use chrono::{DateTime, Datelike, TimeZone, Utc}; use chrono_tz::Tz; use postgres_types::{accepts, to_sql_checked, IsNull, ToSql, Type}; use serde::{Deserialize, Serialize}; @@ -201,6 +201,26 @@ impl std::fmt::Display for Timestamptz { } } +pub fn write_date_time_tz( + instant_local: DateTime, + writer: &mut impl std::fmt::Write, +) -> std::fmt::Result { + let date = instant_local.date_naive(); + let (ce, year) = date.year_ce(); + write!( + writer, + "{:04}-{:02}-{:02} {}", + year, + date.month(), + date.day(), + instant_local.format(if ce { + "%H:%M:%S%.f%:z" + } else { + "%H:%M:%S%.f%:z BC" + }) + ) +} + #[cfg(test)] mod test { use super::*; diff --git a/src/expr/impl/Cargo.toml b/src/expr/impl/Cargo.toml index a8c66be8a2281..e7b765820dfdd 100644 --- a/src/expr/impl/Cargo.toml +++ b/src/expr/impl/Cargo.toml @@ -25,6 +25,7 @@ chrono = { version = "0.4", default-features = false, features = [ "clock", "std", ] } +chrono-tz = { version = "0.8", features = ["case-insensitive"] } fancy-regex = "0.13" futures-async-stream = { workspace = true } futures-util = "0.3" diff --git a/src/expr/impl/src/scalar/make_time.rs b/src/expr/impl/src/scalar/make_time.rs index add8759299197..ae1ff58033eed 100644 --- a/src/expr/impl/src/scalar/make_time.rs +++ b/src/expr/impl/src/scalar/make_time.rs @@ -129,8 +129,6 @@ mod tests { use chrono::{NaiveDate, NaiveDateTime, NaiveTime}; use risingwave_common::types::{Date, Timestamp}; - /// This test is to testify that our `Date` expressess a year `-X` as `X+1 BC`, while `Timestamp` expresses it as `-X`. - /// Can be removed if we change the `ToText` implementation of `Date` or `Timestamp`. #[test] fn test_naive_date_and_time() { let year = -1973; @@ -154,7 +152,7 @@ mod tests { let date_time = Timestamp(NaiveDateTime::new(naive_date, naive_time)); assert_eq!( date_time.to_string(), - String::from("-1973-02-02 12:34:56.789") + String::from("1974-02-02 12:34:56.789 BC") ); } } diff --git a/src/expr/impl/src/scalar/timestamptz.rs b/src/expr/impl/src/scalar/timestamptz.rs index 83e77011ec6be..7e7f1a0164d6a 100644 --- a/src/expr/impl/src/scalar/timestamptz.rs +++ b/src/expr/impl/src/scalar/timestamptz.rs @@ -15,7 +15,9 @@ use std::fmt::Write; use num_traits::CheckedNeg; -use risingwave_common::types::{CheckedAdd, Interval, IntoOrdered, Timestamp, Timestamptz, F64}; +use risingwave_common::types::{ + write_date_time_tz, CheckedAdd, Interval, IntoOrdered, Timestamp, Timestamptz, F64, +}; use risingwave_expr::{function, ExprError, Result}; use thiserror_ext::AsReport; @@ -72,13 +74,7 @@ pub fn timestamptz_to_string( ) -> Result<()> { let time_zone = Timestamptz::lookup_time_zone(time_zone).map_err(time_zone_err)?; let instant_local = elem.to_datetime_in_zone(time_zone); - write!( - writer, - "{}", - instant_local.format("%Y-%m-%d %H:%M:%S%.f%:z") - ) - .map_err(|e| ExprError::Internal(e.into()))?; - Ok(()) + write_date_time_tz(instant_local, writer).map_err(|e| ExprError::Internal(e.into())) } // Tries to interpret the string with a timezone, and if failing, tries to interpret the string as a diff --git a/src/frontend/src/handler/util.rs b/src/frontend/src/handler/util.rs index ab9d4fe415b33..6c9a9bb45f2ac 100644 --- a/src/frontend/src/handler/util.rs +++ b/src/frontend/src/handler/util.rs @@ -18,7 +18,7 @@ use std::sync::Arc; use std::task::{Context, Poll}; use anyhow::Context as _; -use bytes::Bytes; +use bytes::{Bytes, BytesMut}; use futures::Stream; use itertools::Itertools; use pgwire::pg_field_descriptor::PgFieldDescriptor; @@ -29,7 +29,7 @@ use pin_project_lite::pin_project; use risingwave_common::array::DataChunk; use risingwave_common::catalog::Field; use risingwave_common::row::Row as _; -use risingwave_common::types::{DataType, ScalarRefImpl, Timestamptz}; +use risingwave_common::types::{write_date_time_tz, DataType, ScalarRefImpl, Timestamptz}; use risingwave_common::util::iter_util::ZipEqFast; use risingwave_connector::source::iceberg::ICEBERG_CONNECTOR; use risingwave_connector::source::KAFKA_CONNECTOR; @@ -139,10 +139,9 @@ fn timestamptz_to_string_with_session_data( let tz = d.into_timestamptz(); let time_zone = Timestamptz::lookup_time_zone(&session_data.timezone).unwrap(); let instant_local = tz.to_datetime_in_zone(time_zone); - instant_local - .format("%Y-%m-%d %H:%M:%S%.f%:z") - .to_string() - .into() + let mut result_string = BytesMut::new(); + write_date_time_tz(instant_local, &mut result_string).unwrap(); + result_string.into() } fn to_pg_rows( From 2cec3445a9c7c1de59e9aa7d628dc61bb968ddf3 Mon Sep 17 00:00:00 2001 From: Noel Kwan <47273164+kwannoel@users.noreply.github.com> Date: Fri, 23 Feb 2024 15:39:36 +0800 Subject: [PATCH 26/35] feat(connector): partition source reader batch_size by `rate_limit` (#13800) Co-authored-by: kwannoel --- .../{rate_limit.slt => rate_limit/basic.slt} | 0 .../rate_limit/upstream_amplification.slt | 44 +++++++++++++++++++ src/batch/src/executor/source.rs | 1 + src/connector/src/parser/mod.rs | 25 ++++++++++- src/connector/src/source/base.rs | 3 ++ src/stream/src/from_proto/source/fs_fetch.rs | 1 + .../src/from_proto/source/trad_source.rs | 1 + 7 files changed, 73 insertions(+), 2 deletions(-) rename e2e_test/streaming/{rate_limit.slt => rate_limit/basic.slt} (100%) create mode 100644 e2e_test/streaming/rate_limit/upstream_amplification.slt diff --git a/e2e_test/streaming/rate_limit.slt b/e2e_test/streaming/rate_limit/basic.slt similarity index 100% rename from e2e_test/streaming/rate_limit.slt rename to e2e_test/streaming/rate_limit/basic.slt diff --git a/e2e_test/streaming/rate_limit/upstream_amplification.slt b/e2e_test/streaming/rate_limit/upstream_amplification.slt new file mode 100644 index 0000000000000..71be801a78fc2 --- /dev/null +++ b/e2e_test/streaming/rate_limit/upstream_amplification.slt @@ -0,0 +1,44 @@ +# This test will test that barrier latency does not spike +# when there's rate limit on source. +# The upstream side should backpressure the source reader, +# but still allow barriers to flow through. + +statement ok +SET STREAMING_PARALLELISM=2; + +statement ok +SET STREAMING_RATE_LIMIT=1; + +statement ok +CREATE TABLE source_table (i1 int) +WITH ( + connector = 'datagen', + fields.i1.start = '1', + fields.i1.end = '5', + datagen.rows.per.second = '10000' +) FORMAT PLAIN ENCODE JSON; + +statement ok +CREATE SINK sink AS + SELECT x.i1 as i1 FROM source_table x + JOIN source_table s1 ON x.i1 = s1.i1 + JOIN source_table s2 ON x.i1 = s2.i1 + JOIN source_table s3 ON x.i1 = s3.i1 + WITH (connector = 'blackhole'); + +# The following sequence of FLUSH should be fast, since barrier should be able to bypass sink. +# Otherwise, these FLUSH will take a long time to complete, and trigger timeout. +statement ok +flush; + +statement ok +flush; + +statement ok +flush; + +statement ok +drop sink sink; + +statement ok +drop table source_table; \ No newline at end of file diff --git a/src/batch/src/executor/source.rs b/src/batch/src/executor/source.rs index e67d4b26f850d..fe053ff63dfc8 100644 --- a/src/batch/src/executor/source.rs +++ b/src/batch/src/executor/source.rs @@ -87,6 +87,7 @@ impl BoxedExecutorBuilder for SourceExecutor { }; let source_ctrl_opts = SourceCtrlOpts { chunk_size: source.context().get_config().developer.chunk_size, + rate_limit: None, }; let column_ids: Vec<_> = source_node diff --git a/src/connector/src/parser/mod.rs b/src/connector/src/parser/mod.rs index c5b470db966ab..952ccd9774d39 100644 --- a/src/connector/src/parser/mod.rs +++ b/src/connector/src/parser/mod.rs @@ -56,7 +56,7 @@ use crate::schema::schema_registry::SchemaRegistryAuth; use crate::source::monitor::GLOBAL_SOURCE_METRICS; use crate::source::{ extract_source_struct, BoxSourceStream, ChunkSourceStream, SourceColumnDesc, SourceColumnType, - SourceContext, SourceContextRef, SourceEncode, SourceFormat, SourceMeta, + SourceContext, SourceContextRef, SourceEncode, SourceFormat, SourceMessage, SourceMeta, }; pub mod additional_columns; @@ -556,6 +556,21 @@ pub trait ByteStreamSourceParser: Send + Debug + Sized + 'static { } } +#[try_stream(ok = Vec, error = anyhow::Error)] +async fn ensure_largest_at_rate_limit(stream: BoxSourceStream, rate_limit: u32) { + #[for_await] + for batch in stream { + let mut batch = batch?; + let mut start = 0; + let end = batch.len(); + while start < end { + let next = std::cmp::min(start + rate_limit as usize, end); + yield std::mem::take(&mut batch[start..next].as_mut()).to_vec(); + start = next; + } + } +} + #[easy_ext::ext(SourceParserIntoStreamExt)] impl P { /// Parse a data stream of one source split into a stream of [`StreamChunk`]. @@ -568,9 +583,15 @@ impl P { /// /// A [`ChunkSourceStream`] which is a stream of parsed messages. pub fn into_stream(self, data_stream: BoxSourceStream) -> impl ChunkSourceStream { - // Enable tracing to provide more information for parsing failures. let source_info = self.source_ctx().source_info.clone(); + // Ensure chunk size is smaller than rate limit + let data_stream = if let Some(rate_limit) = &self.source_ctx().source_ctrl_opts.rate_limit { + Box::pin(ensure_largest_at_rate_limit(data_stream, *rate_limit)) + } else { + data_stream + }; + // The parser stream will be long-lived. We use `instrument_with` here to create // a new span for the polling of each chunk. into_chunk_stream(self, data_stream).instrument_with(move || { diff --git a/src/connector/src/source/base.rs b/src/connector/src/source/base.rs index fed8e0263aac4..e26bc2dbcb401 100644 --- a/src/connector/src/source/base.rs +++ b/src/connector/src/source/base.rs @@ -133,12 +133,15 @@ pub struct SourceCtrlOpts { // comes from developer::stream_chunk_size in stream scenario and developer::batch_chunk_size // in batch scenario pub chunk_size: usize, + /// Rate limit of source + pub rate_limit: Option, } impl Default for SourceCtrlOpts { fn default() -> Self { Self { chunk_size: MAX_CHUNK_SIZE, + rate_limit: None, } } } diff --git a/src/stream/src/from_proto/source/fs_fetch.rs b/src/stream/src/from_proto/source/fs_fetch.rs index 97cf26129d8d5..2cedce5a8cd0a 100644 --- a/src/stream/src/from_proto/source/fs_fetch.rs +++ b/src/stream/src/from_proto/source/fs_fetch.rs @@ -61,6 +61,7 @@ impl ExecutorBuilder for FsFetchExecutorBuilder { ); let source_ctrl_opts = SourceCtrlOpts { chunk_size: params.env.config().developer.chunk_size, + rate_limit: source.rate_limit.map(|x| x as _), }; let source_column_ids: Vec<_> = source_desc_builder diff --git a/src/stream/src/from_proto/source/trad_source.rs b/src/stream/src/from_proto/source/trad_source.rs index 28d923ffb69cc..142b4ad9e1553 100644 --- a/src/stream/src/from_proto/source/trad_source.rs +++ b/src/stream/src/from_proto/source/trad_source.rs @@ -157,6 +157,7 @@ impl ExecutorBuilder for SourceExecutorBuilder { let source_ctrl_opts = SourceCtrlOpts { chunk_size: params.env.config().developer.chunk_size, + rate_limit: source.rate_limit.map(|x| x as _), }; let source_column_ids: Vec<_> = source_desc_builder From 9c1af73128d73640abb1dcc22b41a54a63615540 Mon Sep 17 00:00:00 2001 From: xxchan Date: Fri, 23 Feb 2024 15:41:24 +0800 Subject: [PATCH 27/35] fix(tracing): fix missing new barrier enqueued event (#15216) --- src/meta/src/barrier/mod.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/meta/src/barrier/mod.rs b/src/meta/src/barrier/mod.rs index bd2f24f1baf46..b64829c9005c5 100644 --- a/src/meta/src/barrier/mod.rs +++ b/src/meta/src/barrier/mod.rs @@ -608,7 +608,9 @@ impl GlobalBarrierManager { }; // Tracing related stuff - tracing::info!(target: "rw_tracing", parent: prev_epoch.span(), epoch = curr_epoch.value().0, "new barrier enqueued"); + prev_epoch.span().in_scope(|| { + tracing::info!(target: "rw_tracing", epoch = curr_epoch.value().0, "new barrier enqueued"); + }); span.record("epoch", curr_epoch.value().0); let command_ctx = Arc::new(CommandContext::new( From b0b9fb43298a7e319af3aafd728de272cd199412 Mon Sep 17 00:00:00 2001 From: Bugen Zhao Date: Fri, 23 Feb 2024 17:23:59 +0800 Subject: [PATCH 28/35] refactor(connector): migrate `anyhow::Error` to `ConnectorError` newtype (#15042) Signed-off-by: Bugen Zhao --- e2e_test/source/basic/ddl.slt | 3 +- src/batch/src/error.rs | 7 +++ src/batch/src/executor/iceberg_scan.rs | 6 +-- src/common/src/field_generator/mod.rs | 1 + src/connector/src/aws_utils.rs | 3 +- src/connector/src/common.rs | 53 +++++++++++-------- src/connector/src/error.rs | 46 +++++++++++++++- src/connector/src/macros.rs | 4 +- .../src/parser/additional_columns.rs | 3 +- src/connector/src/parser/avro/parser.rs | 22 ++++---- .../src/parser/avro/schema_resolver.rs | 9 ++-- src/connector/src/parser/avro/util.rs | 29 +++++----- src/connector/src/parser/bytes_parser.rs | 5 +- .../src/parser/canal/simd_json_parser.rs | 10 ++-- src/connector/src/parser/csv_parser.rs | 9 ++-- .../src/parser/debezium/avro_parser.rs | 13 ++--- .../src/parser/debezium/debezium_parser.rs | 13 ++--- .../src/parser/debezium/mongo_json_parser.rs | 10 ++-- .../src/parser/debezium/simd_json_parser.rs | 13 +++-- src/connector/src/parser/json_parser.rs | 21 ++++---- .../src/parser/maxwell/maxwell_parser.rs | 7 +-- src/connector/src/parser/mod.rs | 30 ++++++----- src/connector/src/parser/plain_parser.rs | 11 ++-- src/connector/src/parser/protobuf/parser.rs | 26 ++++----- .../src/parser/protobuf/schema_resolver.rs | 3 +- src/connector/src/parser/unified/avro.rs | 10 ++-- src/connector/src/parser/upsert_parser.rs | 9 ++-- src/connector/src/parser/util.rs | 7 +-- src/connector/src/schema/mod.rs | 4 +- src/connector/src/sink/clickhouse.rs | 3 +- src/connector/src/sink/iceberg/jni_catalog.rs | 5 +- src/connector/src/sink/iceberg/mod.rs | 50 ++++++++++------- src/connector/src/sink/kinesis.rs | 2 +- src/connector/src/sink/log_store.rs | 19 +++---- src/connector/src/sink/mod.rs | 7 +++ src/connector/src/sink/nats.rs | 17 +++--- src/connector/src/sink/redis.rs | 3 +- src/connector/src/sink/remote.rs | 34 ++++++------ src/connector/src/source/base.rs | 37 +++++++------ .../src/source/cdc/enumerator/mod.rs | 21 ++++---- src/connector/src/source/cdc/external/mod.rs | 2 +- .../src/source/cdc/external/postgres.rs | 2 +- src/connector/src/source/cdc/mod.rs | 5 +- src/connector/src/source/cdc/source/reader.rs | 18 +++---- src/connector/src/source/cdc/split.rs | 13 ++--- src/connector/src/source/common.rs | 5 +- .../src/source/datagen/enumerator/mod.rs | 9 ++-- .../src/source/datagen/source/generator.rs | 7 +-- .../src/source/datagen/source/reader.rs | 15 ++++-- src/connector/src/source/datagen/split.rs | 8 +-- .../src/source/filesystem/file_common.rs | 14 ++--- .../src/source/filesystem/nd_streaming.rs | 8 +-- .../filesystem/opendal_source/gcs_source.rs | 3 +- .../source/filesystem/opendal_source/mod.rs | 9 ++-- .../opendal_source/opendal_enumerator.rs | 9 ++-- .../opendal_source/opendal_reader.rs | 7 +-- .../opendal_source/posix_fs_source.rs | 3 +- .../filesystem/opendal_source/s3_source.rs | 3 +- .../src/source/filesystem/s3/enumerator.rs | 4 +- .../src/source/filesystem/s3/source/reader.rs | 11 ++-- .../src/source/filesystem/s3_v2/lister.rs | 3 +- .../source/google_pubsub/enumerator/client.rs | 13 +++-- src/connector/src/source/google_pubsub/mod.rs | 3 +- .../src/source/google_pubsub/source/reader.rs | 7 +-- .../src/source/google_pubsub/split.rs | 8 +-- src/connector/src/source/iceberg/mod.rs | 11 ++-- .../src/source/kafka/enumerator/client.rs | 44 +++++++-------- .../src/source/kafka/private_link.rs | 29 +++++----- .../src/source/kafka/source/reader.rs | 5 +- src/connector/src/source/kafka/split.rs | 8 +-- .../src/source/kinesis/enumerator/client.rs | 14 +++-- .../src/source/kinesis/source/reader.rs | 33 ++++++------ src/connector/src/source/kinesis/split.rs | 8 +-- .../src/source/nats/enumerator/mod.rs | 11 ++-- .../src/source/nats/source/reader.rs | 16 +++--- src/connector/src/source/nats/split.rs | 8 +-- .../src/source/nexmark/enumerator/mod.rs | 8 +-- .../src/source/nexmark/source/reader.rs | 9 ++-- src/connector/src/source/nexmark/split.rs | 8 +-- .../src/source/pulsar/enumerator/client.rs | 8 +-- .../src/source/pulsar/source/reader.rs | 36 +++++++------ src/connector/src/source/pulsar/split.rs | 8 +-- src/connector/src/source/pulsar/topic.rs | 21 ++++---- src/connector/src/source/reader/desc.rs | 5 +- src/connector/src/source/reader/fs_reader.rs | 8 +-- src/connector/src/source/reader/reader.rs | 15 +++--- src/connector/src/source/test_source.rs | 23 ++++---- src/frontend/src/error.rs | 7 +++ src/frontend/src/scheduler/error.rs | 8 +++ src/meta/service/src/cloud_service.rs | 17 +++--- src/meta/src/controller/catalog.rs | 2 +- src/meta/src/error.rs | 8 +++ src/meta/src/rpc/ddl_controller.rs | 7 +-- src/meta/src/stream/source_manager.rs | 36 +++++++------ .../src/executor/source/fs_source_executor.rs | 3 +- 95 files changed, 691 insertions(+), 507 deletions(-) diff --git a/e2e_test/source/basic/ddl.slt b/e2e_test/source/basic/ddl.slt index 6e640e047d4c2..402cf129b86ba 100644 --- a/e2e_test/source/basic/ddl.slt +++ b/e2e_test/source/basic/ddl.slt @@ -28,7 +28,8 @@ db error: ERROR: Failed to run the query Caused by these errors (recent errors listed first): 1: gRPC request to meta service failed: Internal error 2: failed to create source worker - 3: missing field `properties.bootstrap.server` + 3: failed to parse json + 4: missing field `properties.bootstrap.server` statement error diff --git a/src/batch/src/error.rs b/src/batch/src/error.rs index 5631707e2f422..f4d7341cbc6dd 100644 --- a/src/batch/src/error.rs +++ b/src/batch/src/error.rs @@ -20,6 +20,7 @@ pub use anyhow::anyhow; use risingwave_common::array::ArrayError; use risingwave_common::error::BoxedError; use risingwave_common::util::value_encoding::error::ValueEncodingError; +use risingwave_connector::error::ConnectorError; use risingwave_dml::error::DmlError; use risingwave_expr::ExprError; use risingwave_pb::PbFieldNotFound; @@ -156,3 +157,9 @@ impl From for Status { Self::from(&err) } } + +impl From for BatchError { + fn from(value: ConnectorError) -> Self { + Self::Connector(value.into()) + } +} diff --git a/src/batch/src/executor/iceberg_scan.rs b/src/batch/src/executor/iceberg_scan.rs index caf1289220d48..9c24e554c8e1f 100644 --- a/src/batch/src/executor/iceberg_scan.rs +++ b/src/batch/src/executor/iceberg_scan.rs @@ -116,11 +116,7 @@ impl IcebergScanExecutor { #[try_stream(ok = DataChunk, error = BatchError)] async fn do_execute(self: Box) { - let table = self - .iceberg_config - .load_table() - .await - .map_err(BatchError::Internal)?; + let table = self.iceberg_config.load_table().await?; let table_scan: TableScan = table .new_scan_builder() diff --git a/src/common/src/field_generator/mod.rs b/src/common/src/field_generator/mod.rs index 0309798068854..679d60ba1f188 100644 --- a/src/common/src/field_generator/mod.rs +++ b/src/common/src/field_generator/mod.rs @@ -18,6 +18,7 @@ mod varchar; use std::time::Duration; +// TODO(error-handling): use a new error type use anyhow::{anyhow, Result}; use chrono::{DateTime, FixedOffset}; pub use numeric::*; diff --git a/src/connector/src/aws_utils.rs b/src/connector/src/aws_utils.rs index cf70a90e07cda..1578c7b844422 100644 --- a/src/connector/src/aws_utils.rs +++ b/src/connector/src/aws_utils.rs @@ -21,6 +21,7 @@ use aws_sdk_s3::{client as s3_client, config as s3_config}; use url::Url; use crate::common::AwsAuthProps; +use crate::error::ConnectorResult; const AWS_CUSTOM_CONFIG_KEY: [&str; 3] = ["retry_times", "conn_timeout", "read_timeout"]; @@ -106,7 +107,7 @@ pub fn s3_client( pub async fn load_file_descriptor_from_s3( location: &Url, config: &AwsAuthProps, -) -> anyhow::Result> { +) -> ConnectorResult> { let bucket = location .domain() .with_context(|| format!("illegal file path {}", location))?; diff --git a/src/connector/src/common.rs b/src/connector/src/common.rs index 418155250e74e..d5944eb07fa3c 100644 --- a/src/connector/src/common.rs +++ b/src/connector/src/common.rs @@ -17,7 +17,7 @@ use std::collections::HashMap; use std::io::Write; use std::time::Duration; -use anyhow::{anyhow, Context, Ok}; +use anyhow::{anyhow, Context}; use async_nats::jetstream::consumer::DeliverPolicy; use async_nats::jetstream::{self}; use aws_sdk_kinesis::Client as KinesisClient; @@ -35,6 +35,7 @@ use with_options::WithOptions; use crate::aws_utils::load_file_descriptor_from_s3; use crate::deserialize_duration_from_string; +use crate::error::ConnectorResult; use crate::sink::SinkError; use crate::source::nats::source::NatsOffset; // The file describes the common abstractions for each connector and can be used in both source and @@ -72,7 +73,7 @@ pub struct AwsAuthProps { } impl AwsAuthProps { - async fn build_region(&self) -> anyhow::Result { + async fn build_region(&self) -> ConnectorResult { if let Some(region_name) = &self.region { Ok(Region::new(region_name.clone())) } else { @@ -85,11 +86,11 @@ impl AwsAuthProps { .build() .region() .await - .ok_or_else(|| anyhow::format_err!("region should be provided"))?) + .context("region should be provided")?) } } - fn build_credential_provider(&self) -> anyhow::Result { + fn build_credential_provider(&self) -> ConnectorResult { if self.access_key.is_some() && self.secret_key.is_some() { Ok(SharedCredentialsProvider::new( aws_credential_types::Credentials::from_keys( @@ -99,16 +100,14 @@ impl AwsAuthProps { ), )) } else { - Err(anyhow!( - "Both \"access_key\" and \"secret_access\" are required." - )) + bail!("Both \"access_key\" and \"secret_access\" are required.") } } async fn with_role_provider( &self, credential: SharedCredentialsProvider, - ) -> anyhow::Result { + ) -> ConnectorResult { if let Some(role_name) = &self.arn { let region = self.build_region().await?; let mut role = AssumeRoleProvider::builder(role_name) @@ -124,7 +123,7 @@ impl AwsAuthProps { } } - pub async fn build_config(&self) -> anyhow::Result { + pub async fn build_config(&self) -> ConnectorResult { let region = self.build_region().await?; let credentials_provider = self .with_role_provider(self.build_credential_provider()?) @@ -386,12 +385,19 @@ pub struct PulsarOauthCommon { pub scope: Option, } +fn create_credential_temp_file(credentials: &[u8]) -> std::io::Result { + let mut f = NamedTempFile::new()?; + f.write_all(credentials)?; + f.as_file().sync_all()?; + Ok(f) +} + impl PulsarCommon { pub(crate) async fn build_client( &self, oauth: &Option, aws_auth_props: &AwsAuthProps, - ) -> anyhow::Result> { + ) -> ConnectorResult> { let mut pulsar_builder = Pulsar::builder(&self.service_url, TokioExecutor); let mut temp_file = None; if let Some(oauth) = oauth.as_ref() { @@ -399,10 +405,10 @@ impl PulsarCommon { match url.scheme() { "s3" => { let credentials = load_file_descriptor_from_s3(&url, aws_auth_props).await?; - let mut f = NamedTempFile::new()?; - f.write_all(&credentials)?; - f.as_file().sync_all()?; - temp_file = Some(f); + temp_file = Some( + create_credential_temp_file(&credentials) + .context("failed to create temp file for pulsar credentials")?, + ); } "file" => {} _ => { @@ -477,7 +483,7 @@ pub struct KinesisCommon { } impl KinesisCommon { - pub(crate) async fn build_client(&self) -> anyhow::Result { + pub(crate) async fn build_client(&self) -> ConnectorResult { let config = AwsAuthProps { region: Some(self.stream_region.clone()), endpoint: self.endpoint.clone(), @@ -539,7 +545,7 @@ pub struct NatsCommon { } impl NatsCommon { - pub(crate) async fn build_client(&self) -> anyhow::Result { + pub(crate) async fn build_client(&self) -> ConnectorResult { let mut connect_options = async_nats::ConnectOptions::new(); match self.connect_mode.as_str() { "user_and_password" => { @@ -582,7 +588,7 @@ impl NatsCommon { Ok(client) } - pub(crate) async fn build_context(&self) -> anyhow::Result { + pub(crate) async fn build_context(&self) -> ConnectorResult { let client = self.build_client().await?; let jetstream = async_nats::jetstream::new(client); Ok(jetstream) @@ -593,7 +599,7 @@ impl NatsCommon { stream: String, split_id: String, start_sequence: NatsOffset, - ) -> anyhow::Result< + ) -> ConnectorResult< async_nats::jetstream::consumer::Consumer, > { let context = self.build_context().await?; @@ -612,13 +618,16 @@ impl NatsCommon { NatsOffset::Earliest => DeliverPolicy::All, NatsOffset::Latest => DeliverPolicy::Last, NatsOffset::SequenceNumber(v) => { - let parsed = v.parse::()?; + let parsed = v + .parse::() + .context("failed to parse nats offset as sequence number")?; DeliverPolicy::ByStartSequence { start_sequence: 1 + parsed, } } NatsOffset::Timestamp(v) => DeliverPolicy::ByStartTime { - start_time: OffsetDateTime::from_unix_timestamp_nanos(v * 1_000_000)?, + start_time: OffsetDateTime::from_unix_timestamp_nanos(v * 1_000_000) + .context("invalid timestamp for nats offset")?, }, NatsOffset::None => DeliverPolicy::All, }; @@ -635,7 +644,7 @@ impl NatsCommon { &self, jetstream: jetstream::Context, stream: String, - ) -> anyhow::Result { + ) -> ConnectorResult { let subjects: Vec = self.subject.split(',').map(|s| s.to_string()).collect(); let mut config = jetstream::stream::Config { name: stream, @@ -662,7 +671,7 @@ impl NatsCommon { Ok(stream) } - pub(crate) fn create_credential(&self, seed: &str, jwt: &str) -> anyhow::Result { + pub(crate) fn create_credential(&self, seed: &str, jwt: &str) -> ConnectorResult { let creds = format!( "-----BEGIN NATS USER JWT-----\n{}\n------END NATS USER JWT------\n\n\ ************************* IMPORTANT *************************\n\ diff --git a/src/connector/src/error.rs b/src/connector/src/error.rs index 4cf36e9859d36..3dc10af3d8e7a 100644 --- a/src/connector/src/error.rs +++ b/src/connector/src/error.rs @@ -13,13 +13,57 @@ // limitations under the License. use risingwave_common::error::v2::def_anyhow_newtype; +use risingwave_pb::PbFieldNotFound; +use risingwave_rpc_client::error::RpcError; + +use crate::parser::AccessError; +use crate::schema::schema_registry::{ConcurrentRequestError, WireFormatError}; +use crate::schema::InvalidOptionError; +use crate::sink::SinkError; def_anyhow_newtype! { pub ConnectorError, + // Common errors + std::io::Error => transparent, + + // Fine-grained connector errors + AccessError => transparent, + WireFormatError => transparent, + ConcurrentRequestError => transparent, + InvalidOptionError => transparent, + SinkError => transparent, + PbFieldNotFound => transparent, + // TODO(error-handling): Remove implicit contexts below and specify ad-hoc context for each conversion. + + // Parsing errors + url::ParseError => "failed to parse url", + serde_json::Error => "failed to parse json", + csv::Error => "failed to parse csv", + + // Connector errors + opendal::Error => transparent, // believed to be self-explanatory + mysql_async::Error => "MySQL error", tokio_postgres::Error => "Postgres error", + apache_avro::Error => "Avro error", + rdkafka::error::KafkaError => "Kafka error", + pulsar::Error => "Pulsar error", + async_nats::jetstream::consumer::StreamError => "Nats error", + async_nats::jetstream::consumer::pull::MessagesError => "Nats error", + async_nats::jetstream::context::CreateStreamError => "Nats error", + async_nats::jetstream::stream::ConsumerError => "Nats error", + icelake::Error => "Iceberg error", + redis::RedisError => "Redis error", + arrow_schema::ArrowError => "Arrow error", + google_cloud_pubsub::client::google_cloud_auth::error::Error => "Google Cloud error", } -pub type ConnectorResult = Result; +pub type ConnectorResult = std::result::Result; + +impl From for RpcError { + fn from(value: ConnectorError) -> Self { + RpcError::Internal(value.0) + } +} diff --git a/src/connector/src/macros.rs b/src/connector/src/macros.rs index e34171717ae6c..d4a546c6a00a7 100644 --- a/src/connector/src/macros.rs +++ b/src/connector/src/macros.rs @@ -168,12 +168,12 @@ macro_rules! impl_split { $( impl TryFrom for $split { - type Error = anyhow::Error; + type Error = $crate::error::ConnectorError; fn try_from(split: SplitImpl) -> std::result::Result { match split { SplitImpl::$variant_name(inner) => Ok(inner), - other => Err(anyhow::anyhow!("expect {} but get {:?}", stringify!($split), other)) + other => risingwave_common::bail!("expect {} but get {:?}", stringify!($split), other), } } } diff --git a/src/connector/src/parser/additional_columns.rs b/src/connector/src/parser/additional_columns.rs index c1da30f788b3e..06cc061566690 100644 --- a/src/connector/src/parser/additional_columns.rs +++ b/src/connector/src/parser/additional_columns.rs @@ -27,6 +27,7 @@ use risingwave_pb::plan_common::{ AdditionalColumnTimestamp, }; +use crate::error::ConnectorResult; use crate::source::{ GCS_CONNECTOR, KAFKA_CONNECTOR, KINESIS_CONNECTOR, OPENDAL_S3_CONNECTOR, PULSAR_CONNECTOR, S3_CONNECTOR, @@ -86,7 +87,7 @@ pub fn build_additional_column_catalog( inner_field_name: Option<&str>, data_type: Option<&str>, reject_unknown_connector: bool, -) -> anyhow::Result { +) -> ConnectorResult { let compatible_columns = match ( COMPATIBLE_ADDITIONAL_COLUMNS.get(connector_name), reject_unknown_connector, diff --git a/src/connector/src/parser/avro/parser.rs b/src/connector/src/parser/avro/parser.rs index 5e876d2ce9324..7343f1c43118c 100644 --- a/src/connector/src/parser/avro/parser.rs +++ b/src/connector/src/parser/avro/parser.rs @@ -23,6 +23,7 @@ use risingwave_pb::plan_common::ColumnDesc; use super::schema_resolver::ConfluentSchemaResolver; use super::util::avro_schema_to_column_descs; +use crate::error::ConnectorResult; use crate::parser::unified::avro::{AvroAccess, AvroParseOptions}; use crate::parser::unified::AccessImpl; use crate::parser::util::bytes_from_url; @@ -40,7 +41,7 @@ pub struct AvroAccessBuilder { } impl AccessBuilder for AvroAccessBuilder { - async fn generate_accessor(&mut self, payload: Vec) -> anyhow::Result> { + async fn generate_accessor(&mut self, payload: Vec) -> ConnectorResult> { self.value = self.parse_avro_value(&payload, Some(&*self.schema)).await?; Ok(AccessImpl::Avro(AvroAccess::new( self.value.as_ref().unwrap(), @@ -50,7 +51,7 @@ impl AccessBuilder for AvroAccessBuilder { } impl AvroAccessBuilder { - pub fn new(config: AvroParserConfig, encoding_type: EncodingType) -> anyhow::Result { + pub fn new(config: AvroParserConfig, encoding_type: EncodingType) -> ConnectorResult { let AvroParserConfig { schema, key_schema, @@ -71,7 +72,7 @@ impl AvroAccessBuilder { &self, payload: &[u8], reader_schema: Option<&Schema>, - ) -> anyhow::Result> { + ) -> ConnectorResult> { // parse payload to avro value // if use confluent schema, get writer schema from confluent schema registry if let Some(resolver) = &self.schema_resolver { @@ -87,9 +88,7 @@ impl AvroAccessBuilder { match reader.next() { Some(Ok(v)) => Ok(Some(v)), Some(Err(e)) => Err(e)?, - None => { - anyhow::bail!("avro parse unexpected eof") - } + None => bail!("avro parse unexpected eof"), } } else { unreachable!("both schema_resolver and reader_schema not exist"); @@ -105,7 +104,7 @@ pub struct AvroParserConfig { } impl AvroParserConfig { - pub async fn new(encoding_properties: EncodingProperties) -> anyhow::Result { + pub async fn new(encoding_properties: EncodingProperties) -> ConnectorResult { let avro_config = try_match_expand!(encoding_properties, EncodingProperties::Avro)?; let schema_location = &avro_config.row_schema_location; let enable_upsert = avro_config.enable_upsert; @@ -160,7 +159,7 @@ impl AvroParserConfig { } } - pub fn extract_pks(&self) -> anyhow::Result> { + pub fn extract_pks(&self) -> ConnectorResult> { avro_schema_to_column_descs( self.key_schema .as_deref() @@ -168,7 +167,7 @@ impl AvroParserConfig { ) } - pub fn map_to_columns(&self) -> anyhow::Result> { + pub fn map_to_columns(&self) -> ConnectorResult> { avro_schema_to_column_descs(self.schema.as_ref()) } } @@ -196,6 +195,7 @@ mod test { use super::*; use crate::common::AwsAuthProps; + use crate::error::ConnectorResult; use crate::parser::plain_parser::PlainParser; use crate::parser::unified::avro::unix_epoch_days; use crate::parser::{ @@ -256,7 +256,7 @@ mod test { println!("schema = {:?}", schema.unwrap()); } - async fn new_avro_conf_from_local(file_name: &str) -> anyhow::Result { + async fn new_avro_conf_from_local(file_name: &str) -> ConnectorResult { let schema_path = "file://".to_owned() + &test_data_path(file_name); let info = StreamSourceInfo { row_schema_location: schema_path.clone(), @@ -269,7 +269,7 @@ mod test { AvroParserConfig::new(parser_config.encoding_config).await } - async fn new_avro_parser_from_local(file_name: &str) -> anyhow::Result { + async fn new_avro_parser_from_local(file_name: &str) -> ConnectorResult { let conf = new_avro_conf_from_local(file_name).await?; Ok(PlainParser { diff --git a/src/connector/src/parser/avro/schema_resolver.rs b/src/connector/src/parser/avro/schema_resolver.rs index ef2dd9fc5f731..cdc52de7accee 100644 --- a/src/connector/src/parser/avro/schema_resolver.rs +++ b/src/connector/src/parser/avro/schema_resolver.rs @@ -18,6 +18,7 @@ use anyhow::Context; use apache_avro::Schema; use moka::future::Cache; +use crate::error::ConnectorResult; use crate::schema::schema_registry::{Client, ConfluentSchema}; #[derive(Debug)] @@ -30,7 +31,7 @@ impl ConfluentSchemaResolver { async fn parse_and_cache_schema( &self, raw_schema: ConfluentSchema, - ) -> anyhow::Result> { + ) -> ConnectorResult> { let schema = Schema::parse_str(&raw_schema.content).context("failed to parse avro schema")?; let schema = Arc::new(schema); @@ -48,7 +49,7 @@ impl ConfluentSchemaResolver { } } - pub async fn get_by_subject_name(&self, subject_name: &str) -> anyhow::Result> { + pub async fn get_by_subject_name(&self, subject_name: &str) -> ConnectorResult> { let raw_schema = self.get_raw_schema_by_subject_name(subject_name).await?; self.parse_and_cache_schema(raw_schema).await } @@ -56,7 +57,7 @@ impl ConfluentSchemaResolver { pub async fn get_raw_schema_by_subject_name( &self, subject_name: &str, - ) -> anyhow::Result { + ) -> ConnectorResult { self.confluent_client .get_schema_by_subject(subject_name) .await @@ -64,7 +65,7 @@ impl ConfluentSchemaResolver { } // get the writer schema by id - pub async fn get(&self, schema_id: i32) -> anyhow::Result> { + pub async fn get(&self, schema_id: i32) -> ConnectorResult> { // TODO: use `get_with` if let Some(schema) = self.writer_schemas.get(&schema_id).await { Ok(schema) diff --git a/src/connector/src/parser/avro/util.rs b/src/connector/src/parser/avro/util.rs index 8d2d4265883e6..ba065b7da4dc4 100644 --- a/src/connector/src/parser/avro/util.rs +++ b/src/connector/src/parser/avro/util.rs @@ -16,20 +16,23 @@ use std::sync::LazyLock; use apache_avro::schema::{DecimalSchema, RecordSchema, Schema}; use itertools::Itertools; +use risingwave_common::bail; use risingwave_common::log::LogSuppresser; use risingwave_common::types::{DataType, Decimal}; use risingwave_pb::plan_common::{AdditionalColumn, ColumnDesc, ColumnDescVersion}; -pub fn avro_schema_to_column_descs(schema: &Schema) -> anyhow::Result> { +use crate::error::ConnectorResult; + +pub fn avro_schema_to_column_descs(schema: &Schema) -> ConnectorResult> { if let Schema::Record(RecordSchema { fields, .. }) = schema { let mut index = 0; let fields = fields .iter() .map(|field| avro_field_to_column_desc(&field.name, &field.schema, &mut index)) - .collect::>>()?; + .collect::>>()?; Ok(fields) } else { - anyhow::bail!("schema invalid, record type required at top level of the schema."); + bail!("schema invalid, record type required at top level of the schema."); } } @@ -40,7 +43,7 @@ fn avro_field_to_column_desc( name: &str, schema: &Schema, index: &mut i32, -) -> anyhow::Result { +) -> ConnectorResult { let data_type = avro_type_mapping(schema)?; match schema { Schema::Record(RecordSchema { @@ -51,7 +54,7 @@ fn avro_field_to_column_desc( let vec_column = fields .iter() .map(|f| avro_field_to_column_desc(&f.name, &f.schema, index)) - .collect::>>()?; + .collect::>>()?; *index += 1; Ok(ColumnDesc { column_type: Some(data_type.to_protobuf()), @@ -79,7 +82,7 @@ fn avro_field_to_column_desc( } } -fn avro_type_mapping(schema: &Schema) -> anyhow::Result { +fn avro_type_mapping(schema: &Schema) -> ConnectorResult { let data_type = match schema { Schema::String => DataType::Varchar, Schema::Int => DataType::Int32, @@ -122,7 +125,7 @@ fn avro_type_mapping(schema: &Schema) -> anyhow::Result { let struct_fields = fields .iter() .map(|f| avro_type_mapping(&f.schema)) - .collect::>>()?; + .collect::>>()?; let struct_names = fields.iter().map(|f| f.name.clone()).collect_vec(); DataType::new_struct(struct_fields, struct_names) } @@ -147,18 +150,10 @@ fn avro_type_mapping(schema: &Schema) -> anyhow::Result { { DataType::Decimal } else { - return Err(anyhow::format_err!( - "unsupported type in Avro: {:?}", - schema - )); + bail!("unsupported type in Avro: {:?}", schema); } } - _ => { - return Err(anyhow::format_err!( - "unsupported type in Avro: {:?}", - schema - )); - } + _ => bail!("unsupported type in Avro: {:?}", schema), }; Ok(data_type) diff --git a/src/connector/src/parser/bytes_parser.rs b/src/connector/src/parser/bytes_parser.rs index 44c035fa3ff5d..4f353ce2c60e6 100644 --- a/src/connector/src/parser/bytes_parser.rs +++ b/src/connector/src/parser/bytes_parser.rs @@ -17,6 +17,7 @@ use risingwave_common::try_match_expand; use super::unified::bytes::BytesAccess; use super::unified::AccessImpl; use super::{AccessBuilder, EncodingProperties}; +use crate::error::ConnectorResult; #[derive(Debug)] pub struct BytesAccessBuilder { @@ -25,7 +26,7 @@ pub struct BytesAccessBuilder { impl AccessBuilder for BytesAccessBuilder { #[allow(clippy::unused_async)] - async fn generate_accessor(&mut self, payload: Vec) -> anyhow::Result> { + async fn generate_accessor(&mut self, payload: Vec) -> ConnectorResult> { Ok(AccessImpl::Bytes(BytesAccess::new( &self.column_name, payload, @@ -34,7 +35,7 @@ impl AccessBuilder for BytesAccessBuilder { } impl BytesAccessBuilder { - pub fn new(encoding_properties: EncodingProperties) -> anyhow::Result { + pub fn new(encoding_properties: EncodingProperties) -> ConnectorResult { let config = try_match_expand!(encoding_properties, EncodingProperties::Bytes)?; Ok(Self { column_name: config.column_name, diff --git a/src/connector/src/parser/canal/simd_json_parser.rs b/src/connector/src/parser/canal/simd_json_parser.rs index 09a00c490a9f5..75e6656fd7a7a 100644 --- a/src/connector/src/parser/canal/simd_json_parser.rs +++ b/src/connector/src/parser/canal/simd_json_parser.rs @@ -18,6 +18,7 @@ use risingwave_common::bail; use simd_json::prelude::{MutableObject, ValueAsScalar, ValueObjectAccess}; use simd_json::BorrowedValue; +use crate::error::ConnectorResult; use crate::only_parse_payload; use crate::parser::canal::operators::*; use crate::parser::unified::json::{JsonAccess, JsonParseOptions}; @@ -44,7 +45,7 @@ impl CanalJsonParser { rw_columns: Vec, source_ctx: SourceContextRef, config: &JsonProperties, - ) -> anyhow::Result { + ) -> ConnectorResult { Ok(Self { rw_columns, source_ctx, @@ -57,9 +58,10 @@ impl CanalJsonParser { &self, mut payload: Vec, mut writer: SourceStreamChunkRowWriter<'_>, - ) -> anyhow::Result<()> { + ) -> ConnectorResult<()> { let mut event: BorrowedValue<'_> = - simd_json::to_borrowed_value(&mut payload[self.payload_start_idx..])?; + simd_json::to_borrowed_value(&mut payload[self.payload_start_idx..]) + .context("failed to parse canal json payload")?; let is_ddl = event .get(IS_DDL) @@ -123,7 +125,7 @@ impl ByteStreamSourceParser for CanalJsonParser { _key: Option>, payload: Option>, writer: SourceStreamChunkRowWriter<'a>, - ) -> anyhow::Result<()> { + ) -> ConnectorResult<()> { only_parse_payload!(self, payload, writer) } } diff --git a/src/connector/src/parser/csv_parser.rs b/src/connector/src/parser/csv_parser.rs index 7bb67c9d7b510..8a8bb211da327 100644 --- a/src/connector/src/parser/csv_parser.rs +++ b/src/connector/src/parser/csv_parser.rs @@ -16,6 +16,7 @@ use risingwave_common::types::{Date, Decimal, Time, Timestamp, Timestamptz}; use super::unified::{AccessError, AccessResult}; use super::{ByteStreamSourceParser, CsvProperties}; +use crate::error::ConnectorResult; use crate::only_parse_payload; use crate::parser::{ParserFormat, SourceStreamChunkRowWriter}; use crate::source::{DataType, SourceColumnDesc, SourceContext, SourceContextRef}; @@ -44,7 +45,7 @@ impl CsvParser { rw_columns: Vec, csv_props: CsvProperties, source_ctx: SourceContextRef, - ) -> anyhow::Result { + ) -> ConnectorResult { let CsvProperties { delimiter, has_header, @@ -58,7 +59,7 @@ impl CsvParser { }) } - fn read_row(&self, buf: &[u8]) -> anyhow::Result> { + fn read_row(&self, buf: &[u8]) -> ConnectorResult> { let mut reader_builder = csv::ReaderBuilder::default(); reader_builder.delimiter(self.delimiter).has_headers(false); let record = reader_builder @@ -102,7 +103,7 @@ impl CsvParser { &mut self, payload: Vec, mut writer: SourceStreamChunkRowWriter<'_>, - ) -> anyhow::Result<()> { + ) -> ConnectorResult<()> { let mut fields = self.read_row(&payload)?; if let Some(headers) = &mut self.headers { @@ -158,7 +159,7 @@ impl ByteStreamSourceParser for CsvParser { _key: Option>, payload: Option>, writer: SourceStreamChunkRowWriter<'a>, - ) -> anyhow::Result<()> { + ) -> ConnectorResult<()> { only_parse_payload!(self, payload, writer) } } diff --git a/src/connector/src/parser/debezium/avro_parser.rs b/src/connector/src/parser/debezium/avro_parser.rs index 6320ef5cdf3ec..ca1574af3d6b2 100644 --- a/src/connector/src/parser/debezium/avro_parser.rs +++ b/src/connector/src/parser/debezium/avro_parser.rs @@ -21,6 +21,7 @@ use risingwave_common::try_match_expand; use risingwave_pb::catalog::PbSchemaRegistryNameStrategy; use risingwave_pb::plan_common::ColumnDesc; +use crate::error::ConnectorResult; use crate::parser::avro::schema_resolver::ConfluentSchemaResolver; use crate::parser::avro::util::avro_schema_to_column_descs; use crate::parser::unified::avro::{ @@ -48,7 +49,7 @@ pub struct DebeziumAvroAccessBuilder { // TODO: reduce encodingtype match impl AccessBuilder for DebeziumAvroAccessBuilder { - async fn generate_accessor(&mut self, payload: Vec) -> anyhow::Result> { + async fn generate_accessor(&mut self, payload: Vec) -> ConnectorResult> { let (schema_id, mut raw_payload) = extract_schema_id(&payload)?; let schema = self.schema_resolver.get(schema_id).await?; self.value = Some(from_avro_datum(schema.as_ref(), &mut raw_payload, None)?); @@ -70,7 +71,7 @@ impl DebeziumAvroAccessBuilder { pub fn new( config: DebeziumAvroParserConfig, encoding_type: EncodingType, - ) -> anyhow::Result { + ) -> ConnectorResult { let DebeziumAvroParserConfig { outer_schema, schema_resolver, @@ -99,7 +100,7 @@ pub struct DebeziumAvroParserConfig { } impl DebeziumAvroParserConfig { - pub async fn new(encoding_config: EncodingProperties) -> anyhow::Result { + pub async fn new(encoding_config: EncodingProperties) -> ConnectorResult { let avro_config = try_match_expand!(encoding_config, EncodingProperties::Avro)?; let schema_location = &avro_config.row_schema_location; let client_config = &avro_config.client_config; @@ -121,11 +122,11 @@ impl DebeziumAvroParserConfig { }) } - pub fn extract_pks(&self) -> anyhow::Result> { + pub fn extract_pks(&self) -> ConnectorResult> { avro_schema_to_column_descs(&self.key_schema) } - pub fn map_to_columns(&self) -> anyhow::Result> { + pub fn map_to_columns(&self) -> ConnectorResult> { avro_schema_to_column_descs(avro_schema_skip_union(avro_extract_field_schema( &self.outer_schema, Some("before"), @@ -348,7 +349,7 @@ mod tests { #[ignore] #[tokio::test] - async fn test_debezium_avro_parser() -> anyhow::Result<()> { + async fn test_debezium_avro_parser() -> crate::error::ConnectorResult<()> { let props = convert_args!(hashmap!( "kafka.topic" => "dbserver1.inventory.customers" )); diff --git a/src/connector/src/parser/debezium/debezium_parser.rs b/src/connector/src/parser/debezium/debezium_parser.rs index f69c19e691e97..d9c824882e1ea 100644 --- a/src/connector/src/parser/debezium/debezium_parser.rs +++ b/src/connector/src/parser/debezium/debezium_parser.rs @@ -16,6 +16,7 @@ use risingwave_common::bail; use super::simd_json_parser::DebeziumJsonAccessBuilder; use super::{DebeziumAvroAccessBuilder, DebeziumAvroParserConfig}; +use crate::error::ConnectorResult; use crate::extract_key_config; use crate::parser::unified::debezium::DebeziumChangeEvent; use crate::parser::unified::util::apply_row_operation_on_stream_chunk_writer; @@ -37,7 +38,7 @@ pub struct DebeziumParser { async fn build_accessor_builder( config: EncodingProperties, encoding_type: EncodingType, -) -> anyhow::Result { +) -> ConnectorResult { match config { EncodingProperties::Avro(_) => { let config = DebeziumAvroParserConfig::new(config).await?; @@ -60,7 +61,7 @@ impl DebeziumParser { props: SpecificParserConfig, rw_columns: Vec, source_ctx: SourceContextRef, - ) -> anyhow::Result { + ) -> ConnectorResult { let (key_config, key_type) = extract_key_config!(props); let key_builder = build_accessor_builder(key_config, key_type).await?; let payload_builder = @@ -73,7 +74,7 @@ impl DebeziumParser { }) } - pub async fn new_for_test(rw_columns: Vec) -> anyhow::Result { + pub async fn new_for_test(rw_columns: Vec) -> ConnectorResult { let props = SpecificParserConfig { key_encoding_config: None, encoding_config: EncodingProperties::Json(JsonProperties { @@ -89,7 +90,7 @@ impl DebeziumParser { key: Option>, payload: Option>, mut writer: SourceStreamChunkRowWriter<'_>, - ) -> anyhow::Result { + ) -> ConnectorResult { // tombetone messages are handled implicitly by these accessors let key_accessor = match key { None => None, @@ -137,7 +138,7 @@ impl ByteStreamSourceParser for DebeziumParser { _key: Option>, _payload: Option>, _writer: SourceStreamChunkRowWriter<'a>, - ) -> anyhow::Result<()> { + ) -> ConnectorResult<()> { unreachable!("should call `parse_one_with_txn` instead") } @@ -146,7 +147,7 @@ impl ByteStreamSourceParser for DebeziumParser { key: Option>, payload: Option>, writer: SourceStreamChunkRowWriter<'a>, - ) -> anyhow::Result { + ) -> ConnectorResult { self.parse_inner(key, payload, writer).await } } diff --git a/src/connector/src/parser/debezium/mongo_json_parser.rs b/src/connector/src/parser/debezium/mongo_json_parser.rs index 74c8a4ee8cfbc..aa4263ace7cbc 100644 --- a/src/connector/src/parser/debezium/mongo_json_parser.rs +++ b/src/connector/src/parser/debezium/mongo_json_parser.rs @@ -20,6 +20,7 @@ use risingwave_common::types::DataType; use simd_json::prelude::MutableObject; use simd_json::BorrowedValue; +use crate::error::ConnectorResult; use crate::only_parse_payload; use crate::parser::unified::debezium::{DebeziumChangeEvent, MongoProjection}; use crate::parser::unified::json::{JsonAccess, JsonParseOptions}; @@ -39,7 +40,7 @@ impl DebeziumMongoJsonParser { pub fn new( rw_columns: Vec, source_ctx: SourceContextRef, - ) -> anyhow::Result { + ) -> ConnectorResult { let id_column = rw_columns .iter() .find(|desc| { @@ -78,8 +79,9 @@ impl DebeziumMongoJsonParser { &self, mut payload: Vec, mut writer: SourceStreamChunkRowWriter<'_>, - ) -> anyhow::Result<()> { - let mut event: BorrowedValue<'_> = simd_json::to_borrowed_value(&mut payload)?; + ) -> ConnectorResult<()> { + let mut event: BorrowedValue<'_> = simd_json::to_borrowed_value(&mut payload) + .context("failed to parse debezium mongo json payload")?; // Event can be configured with and without the "payload" field present. // See https://github.com/risingwavelabs/risingwave/issues/10178 @@ -115,7 +117,7 @@ impl ByteStreamSourceParser for DebeziumMongoJsonParser { _key: Option>, payload: Option>, writer: SourceStreamChunkRowWriter<'a>, - ) -> anyhow::Result<()> { + ) -> ConnectorResult<()> { only_parse_payload!(self, payload, writer) } } diff --git a/src/connector/src/parser/debezium/simd_json_parser.rs b/src/connector/src/parser/debezium/simd_json_parser.rs index 3ad3609c78063..bc516ebce4f23 100644 --- a/src/connector/src/parser/debezium/simd_json_parser.rs +++ b/src/connector/src/parser/debezium/simd_json_parser.rs @@ -14,9 +14,11 @@ use std::fmt::Debug; +use anyhow::Context; use simd_json::prelude::MutableObject; use simd_json::BorrowedValue; +use crate::error::ConnectorResult; use crate::parser::unified::json::{JsonAccess, JsonParseOptions}; use crate::parser::unified::AccessImpl; use crate::parser::AccessBuilder; @@ -27,17 +29,18 @@ pub struct DebeziumJsonAccessBuilder { } impl DebeziumJsonAccessBuilder { - pub fn new() -> anyhow::Result { + pub fn new() -> ConnectorResult { Ok(Self { value: None }) } } impl AccessBuilder for DebeziumJsonAccessBuilder { #[allow(clippy::unused_async)] - async fn generate_accessor(&mut self, payload: Vec) -> anyhow::Result> { + async fn generate_accessor(&mut self, payload: Vec) -> ConnectorResult> { self.value = Some(payload); let mut event: BorrowedValue<'_> = - simd_json::to_borrowed_value(self.value.as_mut().unwrap())?; + simd_json::to_borrowed_value(self.value.as_mut().unwrap()) + .context("failed to parse debezium json payload")?; let payload = if let Some(payload) = event.get_mut("payload") { std::mem::take(payload) @@ -64,12 +67,14 @@ mod tests { DataType, Date, Interval, Scalar, ScalarImpl, StructType, Time, Timestamp, }; use serde_json::Value; + use thiserror_ext::AsReport; use crate::parser::{ DebeziumParser, EncodingProperties, JsonProperties, ProtocolProperties, SourceColumnDesc, SourceStreamChunkBuilder, SpecificParserConfig, }; use crate::source::SourceContextRef; + fn assert_json_eq(parse_result: &Option, json_str: &str) { if let Some(ScalarImpl::Jsonb(json_val)) = parse_result { let mut json_string = String::new(); @@ -491,7 +496,7 @@ mod tests { } else { // For f64 overflow, the parsing fails let e = res.unwrap_err(); - assert!(e.to_string().contains("InvalidNumber"), "{i}: {e}"); + assert!(e.to_report_string().contains("InvalidNumber"), "{i}: {e}"); } } } diff --git a/src/connector/src/parser/json_parser.rs b/src/connector/src/parser/json_parser.rs index 74c82d14d8065..47db36a0b2b8f 100644 --- a/src/connector/src/parser/json_parser.rs +++ b/src/connector/src/parser/json_parser.rs @@ -24,6 +24,7 @@ use risingwave_pb::plan_common::ColumnDesc; use super::avro::schema_resolver::ConfluentSchemaResolver; use super::util::{bytes_from_url, get_kafka_topic}; use super::{EncodingProperties, SchemaRegistryAuth, SpecificParserConfig}; +use crate::error::ConnectorResult; use crate::only_parse_payload; use crate::parser::avro::util::avro_schema_to_column_descs; use crate::parser::unified::json::{JsonAccess, JsonParseOptions}; @@ -43,7 +44,7 @@ pub struct JsonAccessBuilder { impl AccessBuilder for JsonAccessBuilder { #[allow(clippy::unused_async)] - async fn generate_accessor(&mut self, payload: Vec) -> anyhow::Result> { + async fn generate_accessor(&mut self, payload: Vec) -> ConnectorResult> { if payload.is_empty() { self.value = Some("{}".into()); } else { @@ -51,7 +52,8 @@ impl AccessBuilder for JsonAccessBuilder { } let value = simd_json::to_borrowed_value( &mut self.value.as_mut().unwrap()[self.payload_start_idx..], - )?; + ) + .context("failed to parse json payload")?; Ok(AccessImpl::Json(JsonAccess::new_with_options( value, // Debezium and Canal have their special json access builder and will not @@ -62,7 +64,7 @@ impl AccessBuilder for JsonAccessBuilder { } impl JsonAccessBuilder { - pub fn new(use_schema_registry: bool) -> anyhow::Result { + pub fn new(use_schema_registry: bool) -> ConnectorResult { Ok(Self { value: None, payload_start_idx: if use_schema_registry { 5 } else { 0 }, @@ -84,7 +86,7 @@ impl JsonParser { props: SpecificParserConfig, rw_columns: Vec, source_ctx: SourceContextRef, - ) -> anyhow::Result { + ) -> ConnectorResult { let json_config = try_match_expand!(props.encoding_config, EncodingProperties::Json)?; let payload_start_idx = if json_config.use_schema_registry { 5 @@ -98,7 +100,7 @@ impl JsonParser { }) } - pub fn new_for_test(rw_columns: Vec) -> anyhow::Result { + pub fn new_for_test(rw_columns: Vec) -> ConnectorResult { Ok(Self { rw_columns, source_ctx: Default::default(), @@ -111,8 +113,9 @@ impl JsonParser { &self, mut payload: Vec, mut writer: SourceStreamChunkRowWriter<'_>, - ) -> anyhow::Result<()> { - let value = simd_json::to_borrowed_value(&mut payload[self.payload_start_idx..])?; + ) -> ConnectorResult<()> { + let value = simd_json::to_borrowed_value(&mut payload[self.payload_start_idx..]) + .context("failed to parse json payload")?; let values = if let simd_json::BorrowedValue::Array(arr) = value { Either::Left(arr.into_iter()) } else { @@ -145,7 +148,7 @@ pub async fn schema_to_columns( schema_location: &str, schema_registry_auth: Option, props: &HashMap, -) -> anyhow::Result> { +) -> ConnectorResult> { let url = handle_sr_list(schema_location)?; let json_schema = if let Some(schema_registry_auth) = schema_registry_auth { let client = Client::new(url, &schema_registry_auth)?; @@ -185,7 +188,7 @@ impl ByteStreamSourceParser for JsonParser { _key: Option>, payload: Option>, writer: SourceStreamChunkRowWriter<'a>, - ) -> anyhow::Result<()> { + ) -> ConnectorResult<()> { only_parse_payload!(self, payload, writer) } } diff --git a/src/connector/src/parser/maxwell/maxwell_parser.rs b/src/connector/src/parser/maxwell/maxwell_parser.rs index aa661585fa1ff..8ba95ad212130 100644 --- a/src/connector/src/parser/maxwell/maxwell_parser.rs +++ b/src/connector/src/parser/maxwell/maxwell_parser.rs @@ -14,6 +14,7 @@ use risingwave_common::bail; +use crate::error::ConnectorResult; use crate::only_parse_payload; use crate::parser::unified::maxwell::MaxwellChangeEvent; use crate::parser::unified::util::apply_row_operation_on_stream_chunk_writer; @@ -35,7 +36,7 @@ impl MaxwellParser { props: SpecificParserConfig, rw_columns: Vec, source_ctx: SourceContextRef, - ) -> anyhow::Result { + ) -> ConnectorResult { match props.encoding_config { EncodingProperties::Json(_) => { let payload_builder = @@ -55,7 +56,7 @@ impl MaxwellParser { &mut self, payload: Vec, mut writer: SourceStreamChunkRowWriter<'_>, - ) -> anyhow::Result<()> { + ) -> ConnectorResult<()> { let payload_accessor = self.payload_builder.generate_accessor(payload).await?; let row_op = MaxwellChangeEvent::new(payload_accessor); @@ -81,7 +82,7 @@ impl ByteStreamSourceParser for MaxwellParser { _key: Option>, payload: Option>, writer: SourceStreamChunkRowWriter<'a>, - ) -> anyhow::Result<()> { + ) -> ConnectorResult<()> { // restrict the behaviours since there is no corresponding // key/value test for maxwell yet. only_parse_payload!(self, payload, writer) diff --git a/src/connector/src/parser/mod.rs b/src/connector/src/parser/mod.rs index 952ccd9774d39..721cfa2f241c7 100644 --- a/src/connector/src/parser/mod.rs +++ b/src/connector/src/parser/mod.rs @@ -44,10 +44,11 @@ pub use self::mysql::mysql_row_to_owned_row; use self::plain_parser::PlainParser; pub use self::postgres::postgres_row_to_owned_row; use self::simd_json_parser::DebeziumJsonAccessBuilder; -use self::unified::{AccessImpl, AccessResult}; +use self::unified::AccessImpl; use self::upsert_parser::UpsertParser; use self::util::get_kafka_topic; use crate::common::AwsAuthProps; +use crate::error::{ConnectorError, ConnectorResult}; use crate::parser::maxwell::MaxwellParser; use crate::parser::util::{ extract_header_inner_from_meta, extract_headers_from_meta, extreact_timestamp_from_meta, @@ -76,6 +77,8 @@ mod unified; mod upsert_parser; mod util; +pub use unified::{AccessError, AccessResult}; + /// A builder for building a [`StreamChunk`] from [`SourceColumnDesc`]. pub struct SourceStreamChunkBuilder { descs: Vec, @@ -536,7 +539,7 @@ pub trait ByteStreamSourceParser: Send + Debug + Sized + 'static { key: Option>, payload: Option>, writer: SourceStreamChunkRowWriter<'a>, - ) -> impl Future> + Send + 'a; + ) -> impl Future> + Send + 'a; /// Parse one record from the given `payload`, either write rows to the `writer` or interpret it /// as a transaction control message. @@ -550,13 +553,13 @@ pub trait ByteStreamSourceParser: Send + Debug + Sized + 'static { key: Option>, payload: Option>, writer: SourceStreamChunkRowWriter<'a>, - ) -> impl Future> + Send + 'a { + ) -> impl Future> + Send + 'a { self.parse_one(key, payload, writer) .map_ok(|_| ParseResult::Rows) } } -#[try_stream(ok = Vec, error = anyhow::Error)] +#[try_stream(ok = Vec, error = ConnectorError)] async fn ensure_largest_at_rate_limit(stream: BoxSourceStream, rate_limit: u32) { #[for_await] for batch in stream { @@ -610,7 +613,7 @@ const MAX_ROWS_FOR_TRANSACTION: usize = 4096; // TODO: when upsert is disabled, how to filter those empty payload // Currently, an err is returned for non upsert with empty payload -#[try_stream(ok = StreamChunk, error = anyhow::Error)] +#[try_stream(ok = StreamChunk, error = crate::error::ConnectorError)] async fn into_chunk_stream(mut parser: P, data_stream: BoxSourceStream) { let columns = parser.columns().to_vec(); @@ -700,14 +703,14 @@ async fn into_chunk_stream(mut parser: P, data_stream LazyLock::new(LogSuppresser::default); if let Ok(suppressed_count) = LOG_SUPPERSSER.check() { tracing::error!( - %error, + error = %error.as_report(), split_id = &*msg.split_id, offset = msg.offset, suppressed_count, "failed to parse message, skipping" ); } - parser.source_ctx().report_user_source_error(&*error); + parser.source_ctx().report_user_source_error(&error); } } @@ -750,7 +753,7 @@ async fn into_chunk_stream(mut parser: P, data_stream } pub trait AccessBuilder { - async fn generate_accessor(&mut self, payload: Vec) -> anyhow::Result>; + async fn generate_accessor(&mut self, payload: Vec) -> ConnectorResult>; } #[derive(Debug)] @@ -770,7 +773,10 @@ pub enum AccessBuilderImpl { } impl AccessBuilderImpl { - pub async fn new_default(config: EncodingProperties, kv: EncodingType) -> anyhow::Result { + pub async fn new_default( + config: EncodingProperties, + kv: EncodingType, + ) -> ConnectorResult { let accessor = match config { EncodingProperties::Avro(_) => { let config = AvroParserConfig::new(config).await?; @@ -794,7 +800,7 @@ impl AccessBuilderImpl { pub async fn generate_accessor( &mut self, payload: Vec, - ) -> anyhow::Result> { + ) -> ConnectorResult> { let accessor = match self { Self::Avro(builder) => builder.generate_accessor(payload).await?, Self::Protobuf(builder) => builder.generate_accessor(payload).await?, @@ -843,7 +849,7 @@ impl ByteStreamSourceParserImpl { pub async fn create( parser_config: ParserConfig, source_ctx: SourceContextRef, - ) -> anyhow::Result { + ) -> ConnectorResult { let CommonParserConfig { rw_columns } = parser_config.common; let protocol = &parser_config.specific.protocol_config; let encode = &parser_config.specific.encoding_config; @@ -990,7 +996,7 @@ impl SpecificParserConfig { pub fn new( info: &StreamSourceInfo, with_properties: &HashMap, - ) -> anyhow::Result { + ) -> ConnectorResult { let source_struct = extract_source_struct(info)?; let format = source_struct.format; let encode = source_struct.encode; diff --git a/src/connector/src/parser/plain_parser.rs b/src/connector/src/parser/plain_parser.rs index 5fdb9fbf3c6ca..3b5460de1bfb8 100644 --- a/src/connector/src/parser/plain_parser.rs +++ b/src/connector/src/parser/plain_parser.rs @@ -18,6 +18,7 @@ use super::{ AccessBuilderImpl, ByteStreamSourceParser, EncodingProperties, EncodingType, SourceStreamChunkRowWriter, SpecificParserConfig, }; +use crate::error::ConnectorResult; use crate::parser::bytes_parser::BytesAccessBuilder; use crate::parser::simd_json_parser::DebeziumJsonAccessBuilder; use crate::parser::unified::debezium::parse_transaction_meta; @@ -43,7 +44,7 @@ impl PlainParser { props: SpecificParserConfig, rw_columns: Vec, source_ctx: SourceContextRef, - ) -> anyhow::Result { + ) -> ConnectorResult { let key_builder = if let Some(key_column_name) = get_key_column_name(&rw_columns) { Some(AccessBuilderImpl::Bytes(BytesAccessBuilder::new( EncodingProperties::Bytes(BytesProperties { @@ -81,7 +82,7 @@ impl PlainParser { key: Option>, payload: Option>, mut writer: SourceStreamChunkRowWriter<'_>, - ) -> anyhow::Result { + ) -> ConnectorResult { // if the message is transaction metadata, parse it and return if let Some(msg_meta) = writer.row_meta && let SourceMeta::DebeziumCdc(cdc_meta) = msg_meta.meta @@ -145,7 +146,7 @@ impl ByteStreamSourceParser for PlainParser { _key: Option>, _payload: Option>, _writer: SourceStreamChunkRowWriter<'a>, - ) -> anyhow::Result<()> { + ) -> ConnectorResult<()> { unreachable!("should call `parse_one_with_txn` instead") } @@ -154,7 +155,7 @@ impl ByteStreamSourceParser for PlainParser { key: Option>, payload: Option>, writer: SourceStreamChunkRowWriter<'a>, - ) -> anyhow::Result { + ) -> ConnectorResult { self.parse_inner(key, payload, writer).await } } @@ -262,7 +263,7 @@ mod tests { assert_eq!(1, output.len()); } - #[try_stream(ok = Vec, error = anyhow::Error)] + #[try_stream(ok = Vec, error = crate::error::ConnectorError)] async fn source_message_stream(transactional: bool) { let begin_msg = r#"{"schema":null,"payload":{"status":"BEGIN","id":"35352:3962948040","event_count":null,"data_collections":null,"ts_ms":1704269323180}}"#; let commit_msg = r#"{"schema":null,"payload":{"status":"END","id":"35352:3962950064","event_count":11,"data_collections":[{"data_collection":"public.orders_tx","event_count":5},{"data_collection":"public.person","event_count":6}],"ts_ms":1704269323180}}"#; diff --git a/src/connector/src/parser/protobuf/parser.rs b/src/connector/src/parser/protobuf/parser.rs index 922705e3d3f8f..d4287a869b221 100644 --- a/src/connector/src/parser/protobuf/parser.rs +++ b/src/connector/src/parser/protobuf/parser.rs @@ -28,6 +28,7 @@ use thiserror::Error; use thiserror_ext::{AsReport, Macro}; use super::schema_resolver::*; +use crate::error::ConnectorResult; use crate::parser::unified::protobuf::ProtobufAccess; use crate::parser::unified::{ bail_uncategorized, uncategorized, AccessError, AccessImpl, AccessResult, @@ -47,7 +48,7 @@ pub struct ProtobufAccessBuilder { impl AccessBuilder for ProtobufAccessBuilder { #[allow(clippy::unused_async)] - async fn generate_accessor(&mut self, payload: Vec) -> anyhow::Result> { + async fn generate_accessor(&mut self, payload: Vec) -> ConnectorResult> { let payload = if self.confluent_wire_type { resolve_pb_header(&payload)? } else { @@ -65,7 +66,7 @@ impl AccessBuilder for ProtobufAccessBuilder { } impl ProtobufAccessBuilder { - pub fn new(config: ProtobufParserConfig) -> anyhow::Result { + pub fn new(config: ProtobufParserConfig) -> ConnectorResult { let ProtobufParserConfig { confluent_wire_type, message_descriptor, @@ -89,7 +90,7 @@ pub struct ProtobufParserConfig { } impl ProtobufParserConfig { - pub async fn new(encoding_properties: EncodingProperties) -> anyhow::Result { + pub async fn new(encoding_properties: EncodingProperties) -> ConnectorResult { let protobuf_config = try_match_expand!(encoding_properties, EncodingProperties::Protobuf)?; let location = &protobuf_config.row_schema_location; let message_name = &protobuf_config.message_name; @@ -133,7 +134,7 @@ impl ProtobufParserConfig { } /// Maps the protobuf schema to relational schema. - pub fn map_to_columns(&self) -> anyhow::Result> { + pub fn map_to_columns(&self) -> ConnectorResult> { let mut columns = Vec::with_capacity(self.message_descriptor.fields().len()); let mut index = 0; let mut parse_trace: Vec = vec![]; @@ -153,8 +154,9 @@ impl ProtobufParserConfig { field_descriptor: &FieldDescriptor, index: &mut i32, parse_trace: &mut Vec, - ) -> anyhow::Result { - let field_type = protobuf_type_mapping(field_descriptor, parse_trace)?; + ) -> ConnectorResult { + let field_type = protobuf_type_mapping(field_descriptor, parse_trace) + .context("failed to map protobuf type")?; if let Kind::Message(m) = field_descriptor.kind() { let field_descs = if let DataType::List { .. } = field_type { vec![] @@ -525,7 +527,7 @@ fn protobuf_type_mapping( /// Wire format for Confluent pb header is: /// | 0 | 1-4 | 5-x | x+1-end /// | magic-byte | schema-id | message-indexes | protobuf-payload -pub(crate) fn resolve_pb_header(payload: &[u8]) -> anyhow::Result<&[u8]> { +pub(crate) fn resolve_pb_header(payload: &[u8]) -> ConnectorResult<&[u8]> { // there's a message index array at the front of payload // if it is the first message in proto def, the array is just and `0` // TODO: support parsing more complex index array @@ -575,7 +577,7 @@ mod test { static PRE_GEN_PROTO_DATA: &[u8] = b"\x08\x7b\x12\x0c\x74\x65\x73\x74\x20\x61\x64\x64\x72\x65\x73\x73\x1a\x09\x74\x65\x73\x74\x20\x63\x69\x74\x79\x20\xc8\x03\x2d\x19\x04\x9e\x3f\x32\x0a\x32\x30\x32\x31\x2d\x30\x31\x2d\x30\x31"; #[tokio::test] - async fn test_simple_schema() -> anyhow::Result<()> { + async fn test_simple_schema() -> crate::error::ConnectorResult<()> { let location = schema_dir() + "/simple-schema"; println!("location: {}", location); let message_name = "test.TestRecord"; @@ -620,7 +622,7 @@ mod test { } #[tokio::test] - async fn test_complex_schema() -> anyhow::Result<()> { + async fn test_complex_schema() -> crate::error::ConnectorResult<()> { let location = schema_dir() + "/complex-schema"; let message_name = "test.User"; @@ -912,7 +914,7 @@ mod test { static ANY_GEN_PROTO_DATA: &[u8] = b"\x08\xb9\x60\x12\x32\x0a\x24\x74\x79\x70\x65\x2e\x67\x6f\x6f\x67\x6c\x65\x61\x70\x69\x73\x2e\x63\x6f\x6d\x2f\x74\x65\x73\x74\x2e\x53\x74\x72\x69\x6e\x67\x56\x61\x6c\x75\x65\x12\x0a\x0a\x08\x4a\x6f\x68\x6e\x20\x44\x6f\x65"; #[tokio::test] - async fn test_any_schema() -> anyhow::Result<()> { + async fn test_any_schema() -> crate::error::ConnectorResult<()> { let conf = create_recursive_pb_parser_config("/any-schema.pb", "test.TestAny").await; println!("Current conf: {:#?}", conf); @@ -973,7 +975,7 @@ mod test { static ANY_GEN_PROTO_DATA_1: &[u8] = b"\x08\xb9\x60\x12\x2b\x0a\x23\x74\x79\x70\x65\x2e\x67\x6f\x6f\x67\x6c\x65\x61\x70\x69\x73\x2e\x63\x6f\x6d\x2f\x74\x65\x73\x74\x2e\x49\x6e\x74\x33\x32\x56\x61\x6c\x75\x65\x12\x04\x08\xd2\xfe\x06"; #[tokio::test] - async fn test_any_schema_1() -> anyhow::Result<()> { + async fn test_any_schema_1() -> crate::error::ConnectorResult<()> { let conf = create_recursive_pb_parser_config("/any-schema.pb", "test.TestAny").await; println!("Current conf: {:#?}", conf); @@ -1042,7 +1044,7 @@ mod test { static ANY_RECURSIVE_GEN_PROTO_DATA: &[u8] = b"\x08\xb9\x60\x12\x84\x01\x0a\x21\x74\x79\x70\x65\x2e\x67\x6f\x6f\x67\x6c\x65\x61\x70\x69\x73\x2e\x63\x6f\x6d\x2f\x74\x65\x73\x74\x2e\x41\x6e\x79\x56\x61\x6c\x75\x65\x12\x5f\x0a\x30\x0a\x24\x74\x79\x70\x65\x2e\x67\x6f\x6f\x67\x6c\x65\x61\x70\x69\x73\x2e\x63\x6f\x6d\x2f\x74\x65\x73\x74\x2e\x53\x74\x72\x69\x6e\x67\x56\x61\x6c\x75\x65\x12\x08\x0a\x06\x31\x31\x34\x35\x31\x34\x12\x2b\x0a\x23\x74\x79\x70\x65\x2e\x67\x6f\x6f\x67\x6c\x65\x61\x70\x69\x73\x2e\x63\x6f\x6d\x2f\x74\x65\x73\x74\x2e\x49\x6e\x74\x33\x32\x56\x61\x6c\x75\x65\x12\x04\x08\xd2\xfe\x06"; #[tokio::test] - async fn test_any_recursive() -> anyhow::Result<()> { + async fn test_any_recursive() -> crate::error::ConnectorResult<()> { let conf = create_recursive_pb_parser_config("/any-schema.pb", "test.TestAny").await; println!("Current conf: {:#?}", conf); diff --git a/src/connector/src/parser/protobuf/schema_resolver.rs b/src/connector/src/parser/protobuf/schema_resolver.rs index 919413a0c50dc..828843842c785 100644 --- a/src/connector/src/parser/protobuf/schema_resolver.rs +++ b/src/connector/src/parser/protobuf/schema_resolver.rs @@ -22,6 +22,7 @@ use protobuf_native::compiler::{ }; use protobuf_native::MessageLite; +use crate::error::ConnectorResult; use crate::schema::schema_registry::Client; macro_rules! embed_wkts { @@ -54,7 +55,7 @@ const WELL_KNOWN_TYPES: &[(&str, &[u8])] = embed_wkts![ pub(super) async fn compile_file_descriptor_from_schema_registry( subject_name: &str, client: &Client, -) -> anyhow::Result> { +) -> ConnectorResult> { let (primary_subject, dependency_subjects) = client .get_subject_and_references(subject_name) .await diff --git a/src/connector/src/parser/unified/avro.rs b/src/connector/src/parser/unified/avro.rs index f00e26a32faed..f0825d9af4bbe 100644 --- a/src/connector/src/parser/unified/avro.rs +++ b/src/connector/src/parser/unified/avro.rs @@ -22,6 +22,7 @@ use chrono::Datelike; use itertools::Itertools; use num_bigint::{BigInt, Sign}; use risingwave_common::array::{ListValue, StructValue}; +use risingwave_common::bail; use risingwave_common::log::LogSuppresser; use risingwave_common::types::{ DataType, Date, Datum, Interval, JsonbVal, ScalarImpl, Time, Timestamp, Timestamptz, @@ -29,6 +30,7 @@ use risingwave_common::types::{ use risingwave_common::util::iter_util::ZipEqFast; use super::{bail_uncategorized, uncategorized, Access, AccessError, AccessResult}; +use crate::error::ConnectorResult; #[derive(Clone)] /// Options for parsing an `AvroValue` into Datum, with an optional avro schema. pub struct AvroParseOptions<'a> { @@ -384,7 +386,7 @@ pub(crate) fn extract_decimal(bytes: Vec) -> AccessResult<(u32, u32, u32)> { } } -pub fn avro_schema_skip_union(schema: &Schema) -> anyhow::Result<&Schema> { +pub fn avro_schema_skip_union(schema: &Schema) -> ConnectorResult<&Schema> { match schema { Schema::Union(union_schema) => { let inner_schema = union_schema @@ -403,7 +405,7 @@ pub fn avro_schema_skip_union(schema: &Schema) -> anyhow::Result<&Schema> { pub fn avro_extract_field_schema<'a>( schema: &'a Schema, name: Option<&'a str>, -) -> anyhow::Result<&'a Schema> { +) -> ConnectorResult<&'a Schema> { match schema { Schema::Record(RecordSchema { fields, lookup, .. }) => { let name = @@ -418,7 +420,7 @@ pub fn avro_extract_field_schema<'a>( } Schema::Array(schema) => Ok(schema), Schema::Union(_) => avro_schema_skip_union(schema), - _ => Err(anyhow::format_err!("avro schema is not a record or array")), + _ => bail!("avro schema is not a record or array"), } } @@ -481,7 +483,7 @@ mod tests { value: Value, value_schema: &Schema, shape: &DataType, - ) -> anyhow::Result { + ) -> crate::error::ConnectorResult { AvroParseOptions { schema: Some(value_schema), relax_numeric: true, diff --git a/src/connector/src/parser/upsert_parser.rs b/src/connector/src/parser/upsert_parser.rs index b8abf4785bb4a..048fd0beca3ff 100644 --- a/src/connector/src/parser/upsert_parser.rs +++ b/src/connector/src/parser/upsert_parser.rs @@ -23,6 +23,7 @@ use super::{ AccessBuilderImpl, ByteStreamSourceParser, BytesProperties, EncodingProperties, EncodingType, SourceStreamChunkRowWriter, SpecificParserConfig, }; +use crate::error::ConnectorResult; use crate::parser::ParserFormat; use crate::source::{SourceColumnDesc, SourceContext, SourceContextRef}; @@ -37,7 +38,7 @@ pub struct UpsertParser { async fn build_accessor_builder( config: EncodingProperties, encoding_type: EncodingType, -) -> anyhow::Result { +) -> ConnectorResult { match config { EncodingProperties::Json(_) | EncodingProperties::Protobuf(_) @@ -66,7 +67,7 @@ impl UpsertParser { props: SpecificParserConfig, rw_columns: Vec, source_ctx: SourceContextRef, - ) -> anyhow::Result { + ) -> ConnectorResult { // check whether columns has Key as AdditionalColumnType, if so, the key accessor should be // bytes let key_builder = if let Some(key_column_name) = get_key_column_name(&rw_columns) { @@ -95,7 +96,7 @@ impl UpsertParser { key: Option>, payload: Option>, mut writer: SourceStreamChunkRowWriter<'_>, - ) -> anyhow::Result<()> { + ) -> ConnectorResult<()> { let mut row_op: UpsertChangeEvent, AccessImpl<'_, '_>> = UpsertChangeEvent::default(); let mut change_event_op = ChangeEventOperation::Delete; @@ -133,7 +134,7 @@ impl ByteStreamSourceParser for UpsertParser { key: Option>, payload: Option>, writer: SourceStreamChunkRowWriter<'a>, - ) -> anyhow::Result<()> { + ) -> ConnectorResult<()> { self.parse_inner(key, payload, writer).await } } diff --git a/src/connector/src/parser/util.rs b/src/connector/src/parser/util.rs index ccb0e47310d9a..eeaa09bcc031d 100644 --- a/src/connector/src/parser/util.rs +++ b/src/connector/src/parser/util.rs @@ -22,10 +22,11 @@ use risingwave_pb::data::DataType as PbDataType; use crate::aws_utils::load_file_descriptor_from_s3; use crate::common::AwsAuthProps; +use crate::error::ConnectorResult; use crate::source::SourceMeta; /// get kafka topic name -pub(super) fn get_kafka_topic(props: &HashMap) -> anyhow::Result<&String> { +pub(super) fn get_kafka_topic(props: &HashMap) -> ConnectorResult<&String> { const KAFKA_TOPIC_KEY1: &str = "kafka.topic"; const KAFKA_TOPIC_KEY2: &str = "topic"; @@ -45,7 +46,7 @@ pub(super) fn get_kafka_topic(props: &HashMap) -> anyhow::Result } /// download bytes from http(s) url -pub(super) async fn download_from_http(location: &Url) -> anyhow::Result { +pub(super) async fn download_from_http(location: &Url) -> ConnectorResult { let res = reqwest::get(location.clone()) .await .with_context(|| format!("failed to make request to {location}"))? @@ -95,7 +96,7 @@ macro_rules! extract_key_config { pub(super) async fn bytes_from_url( url: &Url, config: Option<&AwsAuthProps>, -) -> anyhow::Result> { +) -> ConnectorResult> { match (url.scheme(), config) { // TODO(Tao): support local file only when it's compiled in debug mode. ("file", _) => { diff --git a/src/connector/src/schema/mod.rs b/src/connector/src/schema/mod.rs index 0f64ee85e57dd..8d2a9ae780572 100644 --- a/src/connector/src/schema/mod.rs +++ b/src/connector/src/schema/mod.rs @@ -12,6 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +use crate::error::ConnectorError; + pub mod avro; pub mod protobuf; pub mod schema_registry; @@ -46,6 +48,6 @@ pub enum SchemaFetchError { YetToMigrate( #[source] #[backtrace] - anyhow::Error, + ConnectorError, ), } diff --git a/src/connector/src/sink/clickhouse.rs b/src/connector/src/sink/clickhouse.rs index 2a9a2e5a39eb6..6f658318f69b5 100644 --- a/src/connector/src/sink/clickhouse.rs +++ b/src/connector/src/sink/clickhouse.rs @@ -30,6 +30,7 @@ use thiserror_ext::AsReport; use with_options::WithOptions; use super::{DummySinkCommitCoordinator, SinkWriterParam}; +use crate::error::ConnectorResult; use crate::sink::catalog::desc::SinkDesc; use crate::sink::log_store::DeliveryFutureManagerAddFuture; use crate::sink::writer::{ @@ -132,7 +133,7 @@ impl ClickHouseEngine { const POOL_IDLE_TIMEOUT: Duration = Duration::from_secs(5); impl ClickHouseCommon { - pub(crate) fn build_client(&self) -> anyhow::Result { + pub(crate) fn build_client(&self) -> ConnectorResult { use hyper_tls::HttpsConnector; let https = HttpsConnector::new(); diff --git a/src/connector/src/sink/iceberg/jni_catalog.rs b/src/connector/src/sink/iceberg/jni_catalog.rs index 11de50e69936a..08a876fb62ac8 100644 --- a/src/connector/src/sink/iceberg/jni_catalog.rs +++ b/src/connector/src/sink/iceberg/jni_catalog.rs @@ -29,6 +29,8 @@ use jni::JavaVM; use risingwave_jni_core::call_method; use risingwave_jni_core::jvm_runtime::{execute_with_jni_env, jobj_to_str, JVM}; +use crate::error::ConnectorResult; + pub struct JniCatalog { java_catalog: GlobalRef, jvm: &'static JavaVM, @@ -142,7 +144,7 @@ impl JniCatalog { name: impl ToString, catalog_impl: impl ToString, java_catalog_props: HashMap, - ) -> anyhow::Result { + ) -> ConnectorResult { let jvm = JVM.get_or_init()?; execute_with_jni_env(jvm, |env| { @@ -182,5 +184,6 @@ impl JniCatalog { config: base_config, }) as CatalogRef) }) + .map_err(Into::into) } } diff --git a/src/connector/src/sink/iceberg/mod.rs b/src/connector/src/sink/iceberg/mod.rs index 326f8586d76eb..0c3840af1f3bb 100644 --- a/src/connector/src/sink/iceberg/mod.rs +++ b/src/connector/src/sink/iceberg/mod.rs @@ -59,6 +59,7 @@ use super::{ Sink, SinkError, SinkWriterParam, SINK_TYPE_APPEND_ONLY, SINK_TYPE_OPTION, SINK_TYPE_UPSERT, }; use crate::deserialize_bool_from_string; +use crate::error::ConnectorResult; use crate::sink::coordinate::CoordinatedSinkWriter; use crate::sink::writer::{LogSinkerOf, SinkWriter, SinkWriterExt}; use crate::sink::{Result, SinkCommitCoordinator, SinkParam}; @@ -350,16 +351,19 @@ impl IcebergConfig { Ok((base_catalog_config, java_catalog_configs)) } - async fn create_catalog(&self) -> anyhow::Result { + async fn create_catalog(&self) -> ConnectorResult { match self.catalog_type() { "storage" | "rest" => { let iceberg_configs = self.build_iceberg_configs()?; - let catalog = load_catalog(&iceberg_configs) - .await - .map_err(|e| anyhow!(e))?; + let catalog = load_catalog(&iceberg_configs).await?; Ok(catalog) } - catalog_type if catalog_type == "hive" || catalog_type == "sql" || catalog_type == "glue" || catalog_type == "dynamodb" => { + catalog_type + if catalog_type == "hive" + || catalog_type == "sql" + || catalog_type == "glue" + || catalog_type == "dynamodb" => + { // Create java catalog let (base_catalog_config, java_catalog_props) = self.build_jni_catalog_configs()?; let catalog_impl = match catalog_type { @@ -370,19 +374,24 @@ impl IcebergConfig { _ => unreachable!(), }; - jni_catalog::JniCatalog::build(base_catalog_config, "risingwave", catalog_impl, java_catalog_props) + jni_catalog::JniCatalog::build( + base_catalog_config, + "risingwave", + catalog_impl, + java_catalog_props, + ) } - "mock" => Ok(Arc::new(MockCatalog{})), + "mock" => Ok(Arc::new(MockCatalog {})), _ => { - Err(anyhow!( - "Unsupported catalog type: {}, only support `storage`, `rest`, `hive`, `sql`, `glue`, `dynamodb`", - self.catalog_type() - )) + bail!( + "Unsupported catalog type: {}, only support `storage`, `rest`, `hive`, `sql`, `glue`, `dynamodb`", + self.catalog_type() + ) } } } - pub async fn load_table(&self) -> anyhow::Result { + pub async fn load_table(&self) -> ConnectorResult
{ let catalog = self .create_catalog() .await @@ -395,10 +404,7 @@ impl IcebergConfig { ) .context("Unable to parse table name")?; - catalog - .load_table(&table_id) - .await - .map_err(|err| anyhow!(err)) + catalog.load_table(&table_id).await.map_err(Into::into) } } @@ -428,7 +434,11 @@ impl Debug for IcebergSink { impl IcebergSink { async fn create_and_validate_table(&self) -> Result
{ - let table = self.config.load_table().await.map_err(SinkError::Iceberg)?; + let table = self + .config + .load_table() + .await + .map_err(|err| SinkError::Iceberg(anyhow!(err)))?; let sink_schema = self.param.schema(); let iceberg_schema = table @@ -825,7 +835,7 @@ impl WriteResult { .collect::, icelake::Error>>() .unwrap(); } else { - return Err(anyhow!("icberg sink metadata should have data_files object").into()); + bail!("icberg sink metadata should have data_files object"); } if let serde_json::Value::Array(values) = values .remove(DELETE_FILES) @@ -837,14 +847,14 @@ impl WriteResult { .collect::, icelake::Error>>() .context("Failed to parse data file from json")?; } else { - return Err(anyhow!("icberg sink metadata should have data_files object").into()); + bail!("icberg sink metadata should have data_files object"); } Ok(Self { data_files, delete_files, }) } else { - Err(anyhow!("Can't create iceberg sink write result from empty data!").into()) + bail!("Can't create iceberg sink write result from empty data!") } } } diff --git a/src/connector/src/sink/kinesis.rs b/src/connector/src/sink/kinesis.rs index aca370d7ab1b3..54e9394b511db 100644 --- a/src/connector/src/sink/kinesis.rs +++ b/src/connector/src/sink/kinesis.rs @@ -176,7 +176,7 @@ impl KinesisSinkWriter { .common .build_client() .await - .map_err(SinkError::Kinesis)?; + .map_err(|err| SinkError::Kinesis(anyhow!(err)))?; Ok(Self { config: config.clone(), formatter, diff --git a/src/connector/src/sink/log_store.rs b/src/connector/src/sink/log_store.rs index f74a22d3b80e5..3879d817d1a84 100644 --- a/src/connector/src/sink/log_store.rs +++ b/src/connector/src/sink/log_store.rs @@ -19,9 +19,9 @@ use std::future::{poll_fn, Future}; use std::sync::Arc; use std::task::Poll; -use anyhow::anyhow; use futures::{TryFuture, TryFutureExt}; use risingwave_common::array::StreamChunk; +use risingwave_common::bail; use risingwave_common::buffer::Bitmap; use risingwave_common::util::epoch::{EpochPair, INVALID_EPOCH}; @@ -62,13 +62,13 @@ impl TruncateOffset { } } - pub fn check_next_offset(&self, next_offset: TruncateOffset) -> anyhow::Result<()> { + pub fn check_next_offset(&self, next_offset: TruncateOffset) -> LogStoreResult<()> { if *self >= next_offset { - Err(anyhow!( + bail!( "next offset {:?} should be later than current offset {:?}", next_offset, self - )) + ) } else { Ok(()) } @@ -81,22 +81,22 @@ impl TruncateOffset { .. } => { if epoch != *offset_epoch { - return Err(anyhow!( + bail!( "new item epoch {} not match current chunk offset epoch {}", epoch, offset_epoch - )); + ); } } TruncateOffset::Barrier { epoch: offset_epoch, } => { if epoch <= *offset_epoch { - return Err(anyhow!( + bail!( "new item epoch {} not exceed barrier offset epoch {}", epoch, offset_epoch - )); + ); } } } @@ -534,6 +534,7 @@ mod tests { use tokio::sync::oneshot; use tokio::sync::oneshot::Receiver; + use super::LogStoreResult; use crate::sink::log_store::{DeliveryFutureManager, TruncateOffset}; #[test] @@ -587,7 +588,7 @@ mod tests { } type TestFuture = impl TryFuture + Unpin + 'static; - fn to_test_future(rx: Receiver>) -> TestFuture { + fn to_test_future(rx: Receiver>) -> TestFuture { async move { rx.await.unwrap() }.boxed() } diff --git a/src/connector/src/sink/mod.rs b/src/connector/src/sink/mod.rs index fc6712f17604b..6abe8d93b5956 100644 --- a/src/connector/src/sink/mod.rs +++ b/src/connector/src/sink/mod.rs @@ -61,6 +61,7 @@ pub use tracing; use self::catalog::{SinkFormatDesc, SinkType}; use self::mock_coordination_client::{MockMetaClient, SinkCoordinationRpcClientEnum}; +use crate::error::ConnectorError; use crate::sink::catalog::desc::SinkDesc; use crate::sink::catalog::{SinkCatalog, SinkId}; use crate::sink::log_store::{LogReader, LogStoreReadItem, LogStoreResult, TruncateOffset}; @@ -532,6 +533,12 @@ pub enum SinkError { #[backtrace] anyhow::Error, ), + #[error(transparent)] + Connector( + #[from] + #[backtrace] + ConnectorError, + ), } impl From for SinkError { diff --git a/src/connector/src/sink/nats.rs b/src/connector/src/sink/nats.rs index 7a97771dee8ef..2bc4160e7a263 100644 --- a/src/connector/src/sink/nats.rs +++ b/src/connector/src/sink/nats.rs @@ -107,15 +107,12 @@ impl Sink for NatsSink { "Nats sink only support append-only mode" ))); } - match self.config.common.build_client().await { - Ok(_client) => {} - Err(error) => { - return Err(SinkError::Nats(anyhow!( - "validate nats sink error: {:?}", - error - ))); - } - } + let _client = self + .config + .common + .build_client() + .await + .context("validate nats sink error")?; Ok(()) } @@ -134,7 +131,7 @@ impl NatsSinkWriter { .common .build_context() .await - .map_err(|e| SinkError::Nats(anyhow!("nats sink error: {:?}", e)))?; + .map_err(|e| SinkError::Nats(anyhow!(e)))?; Ok::<_, SinkError>(Self { config: config.clone(), context, diff --git a/src/connector/src/sink/redis.rs b/src/connector/src/sink/redis.rs index d79d67e4adc2e..f1a07b66cb692 100644 --- a/src/connector/src/sink/redis.rs +++ b/src/connector/src/sink/redis.rs @@ -30,6 +30,7 @@ use super::formatter::SinkFormatterImpl; use super::writer::FormattedSink; use super::{SinkError, SinkParam}; use crate::dispatch_sink_formatter_str_key_impl; +use crate::error::ConnectorResult; use crate::sink::log_store::DeliveryFutureManagerAddFuture; use crate::sink::writer::{ AsyncTruncateLogSinkerOf, AsyncTruncateSinkWriter, AsyncTruncateSinkWriterExt, @@ -47,7 +48,7 @@ pub struct RedisCommon { } impl RedisCommon { - pub(crate) fn build_client(&self) -> anyhow::Result { + pub(crate) fn build_client(&self) -> ConnectorResult { let client = RedisClient::open(self.url.clone())?; Ok(client) } diff --git a/src/connector/src/sink/remote.rs b/src/connector/src/sink/remote.rs index dfc3bed0e372c..4c4a662f83178 100644 --- a/src/connector/src/sink/remote.rs +++ b/src/connector/src/sink/remote.rs @@ -26,6 +26,7 @@ use itertools::Itertools; use jni::JavaVM; use prost::Message; use risingwave_common::array::StreamChunk; +use risingwave_common::bail; use risingwave_common::catalog::{ColumnDesc, ColumnId}; use risingwave_common::types::DataType; use risingwave_jni_core::jvm_runtime::JVM; @@ -56,9 +57,10 @@ use tokio_stream::wrappers::ReceiverStream; use tracing::warn; use super::elasticsearch::{StreamChunkConverter, ES_OPTION_DELIMITER}; +use crate::error::ConnectorResult; use crate::sink::catalog::desc::SinkDesc; use crate::sink::coordinate::CoordinatedSinkWriter; -use crate::sink::log_store::{LogStoreReadItem, TruncateOffset}; +use crate::sink::log_store::{LogStoreReadItem, LogStoreResult, TruncateOffset}; use crate::sink::writer::{LogSinkerOf, SinkWriter, SinkWriterExt}; use crate::sink::{ DummySinkCommitCoordinator, LogSinker, Result, Sink, SinkCommitCoordinator, SinkError, @@ -157,14 +159,12 @@ impl Sink for RemoteSink { } } -async fn validate_remote_sink(param: &SinkParam, sink_name: &str) -> anyhow::Result<()> { +async fn validate_remote_sink(param: &SinkParam, sink_name: &str) -> ConnectorResult<()> { if sink_name == ElasticSearchSink::SINK_NAME && param.downstream_pk.len() > 1 && param.properties.get(ES_OPTION_DELIMITER).is_none() { - return Err(anyhow!( - "Es sink only support single pk or pk with delimiter option" - )); + bail!("Es sink only support single pk or pk with delimiter option"); } // FIXME: support struct and array in stream sink param.columns.iter().map(|col| { @@ -215,7 +215,7 @@ async fn validate_remote_sink(param: &SinkParam, sink_name: &str) -> anyhow::Res let jvm = JVM.get_or_init()?; let sink_param = param.to_proto(); - spawn_blocking(move || { + spawn_blocking(move || -> anyhow::Result<()> { let mut env = jvm.attach_current_thread()?; let validate_sink_request = ValidateSinkRequest { sink_param: Some(sink_param), @@ -236,16 +236,13 @@ async fn validate_remote_sink(param: &SinkParam, sink_name: &str) -> anyhow::Res validate_sink_response.error.map_or_else( || Ok(()), // If there is no error message, return Ok here. - |err| { - Err(anyhow!(format!( - "sink cannot pass validation: {}", - err.error_message - ))) - }, + |err| bail!("sink cannot pass validation: {}", err.error_message), ) }) .await - .context("JoinHandle returns error")? + .context("JoinHandle returns error")??; + + Ok(()) } pub struct RemoteLogSinker { @@ -338,12 +335,11 @@ impl LogSinker for RemoteLogSinker { anyhow!("get unsent offset {:?} in response", persisted_offset) })?; if sent_offset != persisted_offset { - return Err(anyhow!( + bail!( "new response offset {:?} not match the buffer offset {:?}", persisted_offset, sent_offset - ) - .into()); + ); } if let (TruncateOffset::Barrier { .. }, Some(start_time)) = @@ -366,13 +362,13 @@ impl LogSinker for RemoteLogSinker { loop { let either_result: futures::future::Either< Option, - anyhow::Result<(u64, LogStoreReadItem)>, + LogStoreResult<(u64, LogStoreReadItem)>, > = drop_either_future( select(pin!(response_rx.recv()), pin!(log_reader.next_item())).await, ); match either_result { futures::future::Either::Left(opt) => { - let response = opt.ok_or_else(|| anyhow!("end of response stream"))?; + let response = opt.context("end of response stream")?; match response { SinkWriterStreamResponse { response: @@ -569,7 +565,7 @@ impl CoordinatedRemoteSinkWriter { } fn for_test( - response_receiver: Receiver>, + response_receiver: Receiver>, request_sender: Sender, ) -> CoordinatedRemoteSinkWriter { let properties = HashMap::from([("output.path".to_string(), "/tmp/rw".to_string())]); diff --git a/src/connector/src/source/base.rs b/src/connector/src/source/base.rs index e26bc2dbcb401..625f3dedb30bc 100644 --- a/src/connector/src/source/base.rs +++ b/src/connector/src/source/base.rs @@ -15,7 +15,7 @@ use std::collections::{BTreeMap, HashMap}; use std::sync::Arc; -use anyhow::{anyhow, Result}; +use anyhow::anyhow; use async_trait::async_trait; use aws_sdk_s3::types::Object; use bytes::Bytes; @@ -25,6 +25,7 @@ use futures::Stream; use itertools::Itertools; use parking_lot::Mutex; use risingwave_common::array::StreamChunk; +use risingwave_common::bail; use risingwave_common::catalog::TableId; use risingwave_common::error::ErrorSuppressor; use risingwave_common::metrics::GLOBAL_ERROR_METRICS; @@ -45,6 +46,7 @@ use super::kinesis::KinesisMeta; use super::monitor::SourceMetrics; use super::nexmark::source::message::NexmarkMeta; use super::{GCS_CONNECTOR, OPENDAL_S3_CONNECTOR, POSIX_FS_CONNECTOR}; +use crate::error::ConnectorResult as Result; use crate::parser::ParserConfig; pub(crate) use crate::source::common::CommonSplitReader; use crate::source::filesystem::FsPageItem; @@ -69,7 +71,9 @@ pub trait TryFromHashmap: Sized + UnknownFields { /// Each instance should add a `#[derive(with_options::WithOptions)]` marker. pub trait SourceProperties: TryFromHashmap + Clone + WithOptions { const SOURCE_NAME: &'static str; - type Split: SplitMetaData + TryFrom + Into; + type Split: SplitMetaData + + TryFrom + + Into; type SplitEnumerator: SplitEnumerator; type SplitReader: SplitReader; @@ -91,10 +95,10 @@ impl TryFromHashmap for P { if !deny_unknown_fields || res.unknown_fields().is_empty() { Ok(res) } else { - Err(anyhow!( + bail!( "Unknown fields in the WITH clause: {:?}", res.unknown_fields() - )) + ) } } } @@ -340,21 +344,22 @@ pub fn extract_source_struct(info: &PbStreamSourceInfo) -> Result } (PbFormatType::Plain, PbEncodeType::Bytes) => (SourceFormat::Plain, SourceEncode::Bytes), (format, encode) => { - return Err(anyhow!( + bail!( "Unsupported combination of format {:?} and encode {:?}", format, encode - )); + ); } }; Ok(SourceStruct::new(format, encode)) } -pub type BoxSourceStream = BoxStream<'static, anyhow::Result>>; +pub type BoxSourceStream = BoxStream<'static, crate::error::ConnectorResult>>; -pub trait ChunkSourceStream = Stream> + Send + 'static; -pub type BoxChunkSourceStream = BoxStream<'static, anyhow::Result>; -pub type BoxTryStream = BoxStream<'static, anyhow::Result>; +pub trait ChunkSourceStream = + Stream> + Send + 'static; +pub type BoxChunkSourceStream = BoxStream<'static, crate::error::ConnectorResult>; +pub type BoxTryStream = BoxStream<'static, crate::error::ConnectorResult>; /// [`SplitReader`] is a new abstraction of the external connector read interface which is /// responsible for parsing, it is used to read messages from the outside and transform them into a @@ -370,7 +375,7 @@ pub trait SplitReader: Sized + Send { parser_config: ParserConfig, source_ctx: SourceContextRef, columns: Option>, - ) -> anyhow::Result; + ) -> crate::error::ConnectorResult; fn into_stream(self) -> BoxChunkSourceStream; } @@ -426,7 +431,7 @@ impl ConnectorProperties { PropType, PropType::try_from_hashmap(with_properties, deny_unknown_fields) .map(ConnectorProperties::from), - |other| Err(anyhow!("connector '{}' is not supported", other)) + |other| bail!("connector '{}' is not supported", other) ) } @@ -462,7 +467,7 @@ impl From<&SplitImpl> for ConnectorSplit { } impl TryFrom<&ConnectorSplit> for SplitImpl { - type Error = anyhow::Error; + type Error = crate::error::ConnectorError; fn try_from(split: &ConnectorSplit) -> std::result::Result { match_source_name_str!( @@ -474,7 +479,7 @@ impl TryFrom<&ConnectorSplit> for SplitImpl { ) .map(Into::into) }, - |other| Err(anyhow!("connector '{}' is not supported", other)) + |other| bail!("connector '{}' is not supported", other) ) } } @@ -503,7 +508,7 @@ impl SplitImpl { split_type.to_lowercase().as_str(), PropType, ::Split::restore_from_json(value).map(Into::into), - |other| Err(anyhow!("connector '{}' is not supported", other)) + |other| bail!("connector '{}' is not supported", other) ) } } @@ -621,7 +626,7 @@ pub trait SplitMetaData: Sized { fn encode_to_json(&self) -> JsonbVal; fn restore_from_json(value: JsonbVal) -> Result; - fn update_with_offset(&mut self, start_offset: String) -> anyhow::Result<()>; + fn update_with_offset(&mut self, start_offset: String) -> crate::error::ConnectorResult<()>; } /// [`ConnectorState`] maintains the consuming splits' info. In specific split readers, diff --git a/src/connector/src/source/cdc/enumerator/mod.rs b/src/connector/src/source/cdc/enumerator/mod.rs index 58bc42e537578..b5ac4826921bc 100644 --- a/src/connector/src/source/cdc/enumerator/mod.rs +++ b/src/connector/src/source/cdc/enumerator/mod.rs @@ -27,6 +27,7 @@ use risingwave_pb::connector_service::{ SourceCommonParam, SourceType, ValidateSourceRequest, ValidateSourceResponse, }; +use crate::error::ConnectorResult; use crate::source::cdc::{ CdcProperties, CdcSourceTypeTrait, CdcSplitBase, Citus, DebeziumCdcSplit, MySqlCdcSplit, Mysql, Postgres, PostgresCdcSplit, @@ -54,7 +55,7 @@ where async fn new( props: CdcProperties, context: SourceEnumeratorContextRef, - ) -> anyhow::Result { + ) -> ConnectorResult { let server_addrs = props .properties .get(DATABASE_SERVERS_KEY) @@ -72,7 +73,7 @@ where ); let source_id = context.info.source_id; - tokio::task::spawn_blocking(move || { + tokio::task::spawn_blocking(move || -> anyhow::Result<()> { let mut env = JVM.get_or_init()?.attach_current_thread()?; let validate_source_request = ValidateSourceRequest { @@ -100,15 +101,11 @@ where .deref(), )?; - validate_source_response.error.map_or_else( - || Ok(()), - |err| { - Err(anyhow!(format!( - "source cannot pass validation: {}", - err.error_message - ))) - }, - ) + if let Some(error) = validate_source_response.error { + return Err(anyhow!(error.error_message).context("source cannot pass validation")); + } + + Ok(()) }) .await .context("failed to validate source")??; @@ -121,7 +118,7 @@ where }) } - async fn list_splits(&mut self) -> anyhow::Result>> { + async fn list_splits(&mut self) -> ConnectorResult>> { Ok(self.list_cdc_splits()) } } diff --git a/src/connector/src/source/cdc/external/mod.rs b/src/connector/src/source/cdc/external/mod.rs index f281d1ecea58a..a9808e3a9e1e2 100644 --- a/src/connector/src/source/cdc/external/mod.rs +++ b/src/connector/src/source/cdc/external/mod.rs @@ -407,7 +407,7 @@ impl MySqlExternalTableReader { bail!("primary key {} cannot be null", pk); } }) - .try_collect()?; + .try_collect::<_, _, ConnectorError>()?; let rs_stream = sql .with(Params::from(params)) diff --git a/src/connector/src/source/cdc/external/postgres.rs b/src/connector/src/source/cdc/external/postgres.rs index bd8a0b51c04e7..9f9a055fd7d8f 100644 --- a/src/connector/src/source/cdc/external/postgres.rs +++ b/src/connector/src/source/cdc/external/postgres.rs @@ -199,7 +199,7 @@ impl PostgresExternalTableReader { let stream = client.query_raw(&sql, ¶ms).await?; let row_stream = stream.map(|row| { let row = row?; - Ok::<_, anyhow::Error>(postgres_row_to_owned_row(row, &self.rw_schema)) + Ok::<_, crate::error::ConnectorError>(postgres_row_to_owned_row(row, &self.rw_schema)) }); pin_mut!(row_stream); diff --git a/src/connector/src/source/cdc/mod.rs b/src/connector/src/source/cdc/mod.rs index 5fc6aefdfefdd..b663274c30207 100644 --- a/src/connector/src/source/cdc/mod.rs +++ b/src/connector/src/source/cdc/mod.rs @@ -28,6 +28,7 @@ use risingwave_pb::plan_common::ExternalTableDesc; use simd_json::prelude::ArrayTrait; pub use source::*; +use crate::error::ConnectorResult; use crate::source::{SourceProperties, SplitImpl, TryFromHashmap}; use crate::{for_all_classified_sources, impl_cdc_source_type}; @@ -91,7 +92,7 @@ impl TryFromHashmap for CdcProperties { fn try_from_hashmap( properties: HashMap, _deny_unknown_fields: bool, - ) -> anyhow::Result { + ) -> ConnectorResult { let is_multi_table_shared = properties .get(CDC_SHARING_MODE_KEY) .is_some_and(|v| v == "true"); @@ -107,7 +108,7 @@ impl TryFromHashmap for CdcProperties { impl SourceProperties for CdcProperties where - DebeziumCdcSplit: TryFrom + Into, + DebeziumCdcSplit: TryFrom + Into, DebeziumSplitEnumerator: ListCdcSplits, { type Split = DebeziumCdcSplit; diff --git a/src/connector/src/source/cdc/source/reader.rs b/src/connector/src/source/cdc/source/reader.rs index c21d579df7778..43753dad599c7 100644 --- a/src/connector/src/source/cdc/source/reader.rs +++ b/src/connector/src/source/cdc/source/reader.rs @@ -14,11 +14,12 @@ use std::str::FromStr; -use anyhow::{anyhow, Result}; +use anyhow::anyhow; use async_trait::async_trait; use futures_async_stream::try_stream; use itertools::Itertools; use prost::Message; +use risingwave_common::bail; use risingwave_common::metrics::GLOBAL_ERROR_METRICS; use risingwave_common::util::addr::HostAddr; use risingwave_jni_core::jvm_runtime::JVM; @@ -29,6 +30,7 @@ use risingwave_pb::connector_service::{ use thiserror_ext::AsReport; use tokio::sync::mpsc; +use crate::error::{ConnectorError, ConnectorResult}; use crate::parser::ParserConfig; use crate::source::base::SourceMessage; use crate::source::cdc::{CdcProperties, CdcSourceType, CdcSourceTypeTrait, DebeziumCdcSplit}; @@ -66,7 +68,7 @@ impl SplitReader for CdcSplitReader { parser_config: ParserConfig, source_ctx: SourceContextRef, _columns: Option>, - ) -> Result { + ) -> ConnectorResult { assert_eq!(splits.len(), 1); let split = splits.into_iter().next().unwrap(); let split_id = split.id(); @@ -117,10 +119,8 @@ impl SplitReader for CdcSplitReader { let (mut env, get_event_stream_request_bytes) = match result { Ok(inner) => inner, Err(e) => { - let _ = tx.blocking_send(Err(anyhow!( - "err before calling runJniDbzSourceThread: {:?}", - e - ))); + let _ = tx + .blocking_send(Err(e.context("err before calling runJniDbzSourceThread"))); return; } }; @@ -154,7 +154,7 @@ impl SplitReader for CdcSplitReader { } }; if !inited { - return Err(anyhow!("failed to start cdc connector")); + bail!("failed to start cdc connector"); } } tracing::info!(?source_id, "cdc connector started"); @@ -196,7 +196,7 @@ impl SplitReader for CdcSplitReader { } impl CommonSplitReader for CdcSplitReader { - #[try_stream(ok = Vec, error = anyhow::Error)] + #[try_stream(ok = Vec, error = ConnectorError)] async fn into_data_stream(self) { let source_type = T::source_type(); let mut rx = self.rx; @@ -225,6 +225,6 @@ impl CommonSplitReader for CdcSplitReader { } } - Err(anyhow!("all senders are dropped"))?; + bail!("all senders are dropped"); } } diff --git a/src/connector/src/source/cdc/split.rs b/src/connector/src/source/cdc/split.rs index a7357d231b78b..30165f1939e0f 100644 --- a/src/connector/src/source/cdc/split.rs +++ b/src/connector/src/source/cdc/split.rs @@ -14,10 +14,11 @@ use std::marker::PhantomData; -use anyhow::{anyhow, Context}; +use anyhow::Context; use risingwave_common::types::JsonbVal; use serde::{Deserialize, Serialize}; +use crate::error::ConnectorResult; use crate::source::cdc::external::DebeziumOffset; use crate::source::cdc::CdcSourceTypeTrait; use crate::source::{SplitId, SplitMetaData}; @@ -63,7 +64,7 @@ impl MySqlCdcSplit { Self { inner: split } } - pub fn update_with_offset(&mut self, start_offset: String) -> anyhow::Result<()> { + pub fn update_with_offset(&mut self, start_offset: String) -> ConnectorResult<()> { let mut snapshot_done = self.inner.snapshot_done; if !snapshot_done { let dbz_offset: DebeziumOffset = @@ -102,7 +103,7 @@ impl PostgresCdcSplit { } } - pub fn update_with_offset(&mut self, start_offset: String) -> anyhow::Result<()> { + pub fn update_with_offset(&mut self, start_offset: String) -> ConnectorResult<()> { let mut snapshot_done = self.inner.snapshot_done; if !snapshot_done { let dbz_offset: DebeziumOffset = @@ -154,11 +155,11 @@ impl SplitMetaData for DebeziumCdcSplit { serde_json::to_value(self.clone()).unwrap().into() } - fn restore_from_json(value: JsonbVal) -> anyhow::Result { - serde_json::from_value(value.take()).map_err(|e| anyhow!(e)) + fn restore_from_json(value: JsonbVal) -> ConnectorResult { + serde_json::from_value(value.take()).map_err(Into::into) } - fn update_with_offset(&mut self, start_offset: String) -> anyhow::Result<()> { + fn update_with_offset(&mut self, start_offset: String) -> ConnectorResult<()> { // TODO: may check T to get the specific cdc type assert!(self.mysql_split.is_some() || self.pg_split.is_some()); if let Some(split) = &mut self.mysql_split { diff --git a/src/connector/src/source/common.rs b/src/connector/src/source/common.rs index 5d39e303e7590..c145bd7f403da 100644 --- a/src/connector/src/source/common.rs +++ b/src/connector/src/source/common.rs @@ -16,14 +16,15 @@ use futures::{Stream, StreamExt, TryStreamExt}; use futures_async_stream::try_stream; use risingwave_common::array::StreamChunk; +use crate::error::{ConnectorError, ConnectorResult}; use crate::parser::ParserConfig; use crate::source::{SourceContextRef, SourceMessage, SplitReader}; pub(crate) trait CommonSplitReader: SplitReader + 'static { - fn into_data_stream(self) -> impl Stream>> + Send; + fn into_data_stream(self) -> impl Stream>> + Send; } -#[try_stream(boxed, ok = StreamChunk, error = anyhow::Error)] +#[try_stream(boxed, ok = StreamChunk, error = ConnectorError)] pub(crate) async fn into_chunk_stream( reader: impl CommonSplitReader, parser_config: ParserConfig, diff --git a/src/connector/src/source/datagen/enumerator/mod.rs b/src/connector/src/source/datagen/enumerator/mod.rs index 47eb54cf24005..5b5b473656d09 100644 --- a/src/connector/src/source/datagen/enumerator/mod.rs +++ b/src/connector/src/source/datagen/enumerator/mod.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use anyhow::Context; use async_trait::async_trait; use crate::source::datagen::{DatagenProperties, DatagenSplit}; @@ -30,13 +31,15 @@ impl SplitEnumerator for DatagenSplitEnumerator { async fn new( properties: DatagenProperties, _context: SourceEnumeratorContextRef, - ) -> anyhow::Result { + ) -> crate::error::ConnectorResult { let split_num = properties.split_num.unwrap_or_else(|| "1".to_string()); - let split_num = split_num.parse::()?; + let split_num = split_num + .parse::() + .context("failed to parse datagen split num")?; Ok(Self { split_num }) } - async fn list_splits(&mut self) -> anyhow::Result> { + async fn list_splits(&mut self) -> crate::error::ConnectorResult> { let mut splits = vec![]; for i in 0..self.split_num { splits.push(DatagenSplit { diff --git a/src/connector/src/source/datagen/source/generator.rs b/src/connector/src/source/datagen/source/generator.rs index 18376f3ce73ab..1c05c6b4ffc8f 100644 --- a/src/connector/src/source/datagen/source/generator.rs +++ b/src/connector/src/source/datagen/source/generator.rs @@ -21,6 +21,7 @@ use risingwave_common::row::OwnedRow; use risingwave_common::types::{DataType, ScalarImpl}; use risingwave_common::util::iter_util::ZipEqFast; +use crate::error::ConnectorResult; use crate::parser::{EncodingProperties, ProtocolProperties, SpecificParserConfig}; use crate::source::{SourceMessage, SourceMeta, SplitId}; @@ -59,7 +60,7 @@ impl DatagenEventGenerator { split_id: SplitId, split_num: u64, split_index: u64, - ) -> anyhow::Result { + ) -> ConnectorResult { let partition_rows_per_second = if rows_per_second % split_num > split_index { rows_per_second / split_num + 1 } else { @@ -76,7 +77,7 @@ impl DatagenEventGenerator { }) } - #[try_stream(boxed, ok = Vec, error = anyhow::Error)] + #[try_stream(boxed, ok = Vec, error = crate::error::ConnectorError)] pub async fn into_msg_stream(mut self) { let mut interval = tokio::time::interval(Duration::from_secs(1)); const MAX_ROWS_PER_YIELD: u64 = 1024; @@ -156,7 +157,7 @@ impl DatagenEventGenerator { } } - #[try_stream(ok = StreamChunk, error = anyhow::Error)] + #[try_stream(ok = StreamChunk, error = crate::error::ConnectorError)] pub async fn into_native_stream(mut self) { let mut interval = tokio::time::interval(Duration::from_secs(1)); const MAX_ROWS_PER_YIELD: u64 = 1024; diff --git a/src/connector/src/source/datagen/source/reader.rs b/src/connector/src/source/datagen/source/reader.rs index 2e1b5f7917261..0b522c4e7c938 100644 --- a/src/connector/src/source/datagen/source/reader.rs +++ b/src/connector/src/source/datagen/source/reader.rs @@ -14,13 +14,14 @@ use std::collections::HashMap; -use anyhow::{Context, Result}; +use anyhow::Context; use async_trait::async_trait; use futures::{Stream, StreamExt, TryStreamExt}; use risingwave_common::field_generator::{FieldGeneratorImpl, VarcharProperty}; use thiserror_ext::AsReport; use super::generator::DatagenEventGenerator; +use crate::error::{ConnectorResult, ConnectorResult as Result}; use crate::parser::{EncodingProperties, ParserConfig, ProtocolProperties}; use crate::source::data_gen_util::spawn_data_generation_stream; use crate::source::datagen::source::SEQUENCE_FIELD_KIND; @@ -183,7 +184,7 @@ impl SplitReader for DatagenSplitReader { } impl CommonSplitReader for DatagenSplitReader { - fn into_data_stream(self) -> impl Stream, anyhow::Error>> { + fn into_data_stream(self) -> impl Stream>> { // Will buffer at most 4 event chunks. const BUFFER_SIZE: usize = 4; spawn_data_generation_stream(self.generator.into_msg_stream(), BUFFER_SIZE) @@ -253,13 +254,15 @@ fn generator_from_data_type( random_seed, ) } + .map_err(Into::into) } DataType::Varchar => { let length_key = format!("fields.{}.length", name); let length_value = fields_option_map .get(&length_key) .map(|s| s.parse::()) - .transpose()?; + .transpose() + .context("failed to parse the length of varchar field")?; Ok(FieldGeneratorImpl::with_varchar( &VarcharProperty::RandomFixedLength(length_value), random_seed, @@ -280,7 +283,7 @@ fn generator_from_data_type( Ok((field_name.to_string(), gen)) }) .collect::>()?; - FieldGeneratorImpl::with_struct_fields(struct_fields) + FieldGeneratorImpl::with_struct_fields(struct_fields).map_err(Into::into) } DataType::List(datatype) => { let length_key = format!("fields.{}.length", name); @@ -293,7 +296,7 @@ fn generator_from_data_type( split_num, offset, )?; - FieldGeneratorImpl::with_list(generator, length_value) + FieldGeneratorImpl::with_list(generator, length_value).map_err(Into::into) } _ => { let kind_key = format!("fields.{}.kind", name); @@ -312,12 +315,14 @@ fn generator_from_data_type( split_num, offset, ) + .map_err(Into::into) } else { let min_key = format!("fields.{}.min", name); let max_key = format!("fields.{}.max", name); let min_value = fields_option_map.get(&min_key).map(|s| s.to_string()); let max_value = fields_option_map.get(&max_key).map(|s| s.to_string()); FieldGeneratorImpl::with_number_random(data_type, min_value, max_value, random_seed) + .map_err(Into::into) } } } diff --git a/src/connector/src/source/datagen/split.rs b/src/connector/src/source/datagen/split.rs index c2e0bea1f8fb4..6d51cfa7d47ae 100644 --- a/src/connector/src/source/datagen/split.rs +++ b/src/connector/src/source/datagen/split.rs @@ -12,10 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -use anyhow::anyhow; use risingwave_common::types::JsonbVal; use serde::{Deserialize, Serialize}; +use crate::error::ConnectorResult; use crate::source::base::SplitMetaData; use crate::source::SplitId; @@ -32,15 +32,15 @@ impl SplitMetaData for DatagenSplit { format!("{}-{}", self.split_num, self.split_index).into() } - fn restore_from_json(value: JsonbVal) -> anyhow::Result { - serde_json::from_value(value.take()).map_err(|e| anyhow!(e)) + fn restore_from_json(value: JsonbVal) -> ConnectorResult { + serde_json::from_value(value.take()).map_err(Into::into) } fn encode_to_json(&self) -> JsonbVal { serde_json::to_value(self.clone()).unwrap().into() } - fn update_with_offset(&mut self, start_offset: String) -> anyhow::Result<()> { + fn update_with_offset(&mut self, start_offset: String) -> ConnectorResult<()> { self.start_offset = Some(start_offset.as_str().parse::().unwrap()); Ok(()) } diff --git a/src/connector/src/source/filesystem/file_common.rs b/src/connector/src/source/filesystem/file_common.rs index 3a537509edb88..ccff7315491ba 100644 --- a/src/connector/src/source/filesystem/file_common.rs +++ b/src/connector/src/source/filesystem/file_common.rs @@ -15,12 +15,12 @@ use std::fmt::Debug; use std::hash::Hash; use std::marker::PhantomData; -use anyhow::anyhow; use aws_sdk_s3::types::Object; use risingwave_common::types::{JsonbVal, Timestamptz}; use serde::{Deserialize, Serialize}; use super::opendal_source::OpendalSource; +use crate::error::ConnectorResult; use crate::source::{SplitId, SplitMetaData}; /// [`FsSplit`] Describes a file or a split of a file. A file is a generic concept, @@ -47,15 +47,15 @@ impl SplitMetaData for FsSplit { self.name.as_str().into() } - fn restore_from_json(value: JsonbVal) -> anyhow::Result { - serde_json::from_value(value.take()).map_err(|e| anyhow!(e)) + fn restore_from_json(value: JsonbVal) -> ConnectorResult { + serde_json::from_value(value.take()).map_err(Into::into) } fn encode_to_json(&self) -> JsonbVal { serde_json::to_value(self.clone()).unwrap().into() } - fn update_with_offset(&mut self, start_offset: String) -> anyhow::Result<()> { + fn update_with_offset(&mut self, start_offset: String) -> ConnectorResult<()> { let offset = start_offset.parse().unwrap(); self.offset = offset; Ok(()) @@ -98,15 +98,15 @@ impl SplitMetaData for OpendalFsSplit { self.name.as_str().into() } - fn restore_from_json(value: JsonbVal) -> anyhow::Result { - serde_json::from_value(value.take()).map_err(|e| anyhow!(e)) + fn restore_from_json(value: JsonbVal) -> ConnectorResult { + serde_json::from_value(value.take()).map_err(Into::into) } fn encode_to_json(&self) -> JsonbVal { serde_json::to_value(self.clone()).unwrap().into() } - fn update_with_offset(&mut self, start_offset: String) -> anyhow::Result<()> { + fn update_with_offset(&mut self, start_offset: String) -> ConnectorResult<()> { let offset = start_offset.parse().unwrap(); self.offset = offset; Ok(()) diff --git a/src/connector/src/source/filesystem/nd_streaming.rs b/src/connector/src/source/filesystem/nd_streaming.rs index 7eb8a84c503ad..711e1a15e981c 100644 --- a/src/connector/src/source/filesystem/nd_streaming.rs +++ b/src/connector/src/source/filesystem/nd_streaming.rs @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -use anyhow::anyhow; +use anyhow::Context as _; use bytes::BytesMut; use futures::io::Cursor; use futures::AsyncBufReadExt; @@ -26,7 +26,7 @@ pub fn need_nd_streaming(encode_config: &EncodingProperties) -> bool { || matches!(encode_config, EncodingProperties::Csv(_)) } -#[try_stream(boxed, ok = Vec, error = anyhow::Error)] +#[try_stream(boxed, ok = Vec, error = crate::error::ConnectorError)] /// This function splits a byte stream by the newline separator "(\r)\n" into a message stream. /// It can be difficult to split and compute offsets correctly when the bytes are received in /// chunks. There are two cases to consider: @@ -50,7 +50,7 @@ pub async fn split_stream(data_stream: BoxSourceStream) { .map(|msg| (msg.offset.clone(), msg.split_id.clone(), msg.meta.clone())) .unwrap(); - let mut offset: usize = offset.parse()?; + let mut offset: usize = offset.parse().context("failed to parse the offset")?; let mut buf = BytesMut::new(); for msg in batch { let payload = msg.payload.unwrap_or_default(); @@ -108,7 +108,7 @@ pub async fn split_stream(data_stream: BoxSourceStream) { last_message = msgs.pop(); } } - Err(e) => return Err(anyhow!(e)), + Err(e) => return Err(e.into()), } line_cnt += 1; diff --git a/src/connector/src/source/filesystem/opendal_source/gcs_source.rs b/src/connector/src/source/filesystem/opendal_source/gcs_source.rs index d6f7b44bff591..01594af4e4bad 100644 --- a/src/connector/src/source/filesystem/opendal_source/gcs_source.rs +++ b/src/connector/src/source/filesystem/opendal_source/gcs_source.rs @@ -21,11 +21,12 @@ use opendal::Operator; use super::opendal_enumerator::OpendalEnumerator; use super::{GcsProperties, OpendalSource}; +use crate::error::ConnectorResult; use crate::source::filesystem::s3::enumerator::get_prefix; impl OpendalEnumerator { /// create opendal gcs source. - pub fn new_gcs_source(gcs_properties: GcsProperties) -> anyhow::Result { + pub fn new_gcs_source(gcs_properties: GcsProperties) -> ConnectorResult { // Create gcs builder. let mut builder = Gcs::default(); diff --git a/src/connector/src/source/filesystem/opendal_source/mod.rs b/src/connector/src/source/filesystem/opendal_source/mod.rs index e0c5a22f1fd90..15371a0da90a6 100644 --- a/src/connector/src/source/filesystem/opendal_source/mod.rs +++ b/src/connector/src/source/filesystem/opendal_source/mod.rs @@ -27,6 +27,7 @@ use self::opendal_enumerator::OpendalEnumerator; use self::opendal_reader::OpendalReader; use super::s3::S3PropertiesCommon; use super::OpendalFsSplit; +use crate::error::ConnectorResult; use crate::source::{SourceProperties, UnknownFields}; pub const GCS_CONNECTOR: &str = "gcs"; @@ -71,7 +72,7 @@ impl SourceProperties for GcsProperties { pub trait OpendalSource: Send + Sync + 'static + Clone + PartialEq { type Properties: SourceProperties + Send + Sync; - fn new_enumerator(properties: Self::Properties) -> anyhow::Result>; + fn new_enumerator(properties: Self::Properties) -> ConnectorResult>; } #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -80,7 +81,7 @@ pub struct OpendalS3; impl OpendalSource for OpendalS3 { type Properties = OpendalS3Properties; - fn new_enumerator(properties: Self::Properties) -> anyhow::Result> { + fn new_enumerator(properties: Self::Properties) -> ConnectorResult> { OpendalEnumerator::new_s3_source(properties.s3_properties, properties.assume_role) } } @@ -91,7 +92,7 @@ pub struct OpendalGcs; impl OpendalSource for OpendalGcs { type Properties = GcsProperties; - fn new_enumerator(properties: Self::Properties) -> anyhow::Result> { + fn new_enumerator(properties: Self::Properties) -> ConnectorResult> { OpendalEnumerator::new_gcs_source(properties) } } @@ -102,7 +103,7 @@ pub struct OpendalPosixFs; impl OpendalSource for OpendalPosixFs { type Properties = PosixFsProperties; - fn new_enumerator(properties: Self::Properties) -> anyhow::Result> { + fn new_enumerator(properties: Self::Properties) -> ConnectorResult> { OpendalEnumerator::new_posix_fs_source(properties) } } diff --git a/src/connector/src/source/filesystem/opendal_source/opendal_enumerator.rs b/src/connector/src/source/filesystem/opendal_source/opendal_enumerator.rs index 318467eea6069..96646ade0e1df 100644 --- a/src/connector/src/source/filesystem/opendal_source/opendal_enumerator.rs +++ b/src/connector/src/source/filesystem/opendal_source/opendal_enumerator.rs @@ -22,6 +22,7 @@ use opendal::{Metakey, Operator}; use risingwave_common::types::Timestamptz; use super::OpendalSource; +use crate::error::ConnectorResult; use crate::source::filesystem::{FsPageItem, OpendalFsSplit}; use crate::source::{SourceEnumeratorContextRef, SplitEnumerator}; @@ -42,11 +43,11 @@ impl SplitEnumerator for OpendalEnumerator { async fn new( properties: Src::Properties, _context: SourceEnumeratorContextRef, - ) -> anyhow::Result { + ) -> ConnectorResult { Src::new_enumerator(properties) } - async fn list_splits(&mut self) -> anyhow::Result>> { + async fn list_splits(&mut self) -> ConnectorResult>> { let empty_split: OpendalFsSplit = OpendalFsSplit::empty_split(); Ok(vec![empty_split]) @@ -54,7 +55,7 @@ impl SplitEnumerator for OpendalEnumerator { } impl OpendalEnumerator { - pub async fn list(&self) -> anyhow::Result { + pub async fn list(&self) -> ConnectorResult { let prefix = match &self.prefix { Some(prefix) => prefix, None => "", @@ -100,4 +101,4 @@ impl OpendalEnumerator { &self.matcher } } -pub type ObjectMetadataIter = BoxStream<'static, anyhow::Result>; +pub type ObjectMetadataIter = BoxStream<'static, ConnectorResult>; diff --git a/src/connector/src/source/filesystem/opendal_source/opendal_reader.rs b/src/connector/src/source/filesystem/opendal_source/opendal_reader.rs index 8ed913cad64a4..5cb84652fbab6 100644 --- a/src/connector/src/source/filesystem/opendal_source/opendal_reader.rs +++ b/src/connector/src/source/filesystem/opendal_source/opendal_reader.rs @@ -22,6 +22,7 @@ use tokio_util::io::{ReaderStream, StreamReader}; use super::opendal_enumerator::OpendalEnumerator; use super::OpendalSource; +use crate::error::ConnectorResult; use crate::parser::{ByteStreamSourceParserImpl, ParserConfig}; use crate::source::filesystem::nd_streaming::need_nd_streaming; use crate::source::filesystem::{nd_streaming, OpendalFsSplit}; @@ -50,7 +51,7 @@ impl SplitReader for OpendalReader { parser_config: ParserConfig, source_ctx: SourceContextRef, _columns: Option>, - ) -> anyhow::Result { + ) -> ConnectorResult { let connector = Src::new_enumerator(properties)?; let opendal_reader = OpendalReader { connector, @@ -67,7 +68,7 @@ impl SplitReader for OpendalReader { } impl OpendalReader { - #[try_stream(boxed, ok = StreamChunk, error = anyhow::Error)] + #[try_stream(boxed, ok = StreamChunk, error = crate::error::ConnectorError)] async fn into_chunk_stream(self) { for split in self.splits { let actor_id = self.source_ctx.source_info.actor_id.to_string(); @@ -107,7 +108,7 @@ impl OpendalReader { } } - #[try_stream(boxed, ok = Vec, error = anyhow::Error)] + #[try_stream(boxed, ok = Vec, error = crate::error::ConnectorError)] pub async fn stream_read_object( op: Operator, split: OpendalFsSplit, diff --git a/src/connector/src/source/filesystem/opendal_source/posix_fs_source.rs b/src/connector/src/source/filesystem/opendal_source/posix_fs_source.rs index 748230ba5a16e..3a4fb7fcfeaa7 100644 --- a/src/connector/src/source/filesystem/opendal_source/posix_fs_source.rs +++ b/src/connector/src/source/filesystem/opendal_source/posix_fs_source.rs @@ -21,6 +21,7 @@ use opendal::Operator; use super::opendal_enumerator::OpendalEnumerator; use super::{OpendalSource, PosixFsProperties}; +use crate::error::ConnectorResult; // Posix fs source should only be used for testing. // For a single-CN cluster, the behavior is well-defined. It will read from the local file system. @@ -28,7 +29,7 @@ use super::{OpendalSource, PosixFsProperties}; impl OpendalEnumerator { /// create opendal posix fs source. - pub fn new_posix_fs_source(posix_fs_properties: PosixFsProperties) -> anyhow::Result { + pub fn new_posix_fs_source(posix_fs_properties: PosixFsProperties) -> ConnectorResult { // Create Fs builder. let mut builder = Fs::default(); diff --git a/src/connector/src/source/filesystem/opendal_source/s3_source.rs b/src/connector/src/source/filesystem/opendal_source/s3_source.rs index ef18ffa4b8fec..f4a548306885a 100644 --- a/src/connector/src/source/filesystem/opendal_source/s3_source.rs +++ b/src/connector/src/source/filesystem/opendal_source/s3_source.rs @@ -21,6 +21,7 @@ use opendal::Operator; use super::opendal_enumerator::OpendalEnumerator; use super::OpendalSource; +use crate::error::ConnectorResult; use crate::source::filesystem::s3::enumerator::get_prefix; use crate::source::filesystem::s3::S3PropertiesCommon; @@ -29,7 +30,7 @@ impl OpendalEnumerator { pub fn new_s3_source( s3_properties: S3PropertiesCommon, assume_role: Option, - ) -> anyhow::Result { + ) -> ConnectorResult { // Create s3 builder. let mut builder = S3::default(); builder.bucket(&s3_properties.bucket_name); diff --git a/src/connector/src/source/filesystem/s3/enumerator.rs b/src/connector/src/source/filesystem/s3/enumerator.rs index 7033ebdaf91ba..63a57b8574b6d 100644 --- a/src/connector/src/source/filesystem/s3/enumerator.rs +++ b/src/connector/src/source/filesystem/s3/enumerator.rs @@ -75,7 +75,7 @@ impl SplitEnumerator for S3SplitEnumerator { async fn new( properties: Self::Properties, _context: SourceEnumeratorContextRef, - ) -> anyhow::Result { + ) -> crate::error::ConnectorResult { let config = AwsAuthProps::from(&properties); let sdk_config = config.build_config().await?; let s3_client = s3_client(&sdk_config, Some(default_conn_config())); @@ -98,7 +98,7 @@ impl SplitEnumerator for S3SplitEnumerator { }) } - async fn list_splits(&mut self) -> anyhow::Result> { + async fn list_splits(&mut self) -> crate::error::ConnectorResult> { let mut objects = Vec::new(); loop { let (files, has_finished) = self.get_next_page::().await?; diff --git a/src/connector/src/source/filesystem/s3/source/reader.rs b/src/connector/src/source/filesystem/s3/source/reader.rs index 884f1d19062ce..3f485dc22383a 100644 --- a/src/connector/src/source/filesystem/s3/source/reader.rs +++ b/src/connector/src/source/filesystem/s3/source/reader.rs @@ -32,6 +32,7 @@ use tokio_util::io::ReaderStream; use crate::aws_utils::{default_conn_config, s3_client}; use crate::common::AwsAuthProps; +use crate::error::ConnectorResult; use crate::parser::{ByteStreamSourceParserImpl, ParserConfig}; use crate::source::base::{SplitMetaData, SplitReader}; use crate::source::filesystem::file_common::FsSplit; @@ -54,7 +55,7 @@ pub struct S3FileReader { } impl S3FileReader { - #[try_stream(boxed, ok = Vec, error = anyhow::Error)] + #[try_stream(boxed, ok = Vec, error = crate::error::ConnectorError)] pub async fn stream_read_object( client_for_s3: s3_client::Client, bucket_name: String, @@ -85,7 +86,9 @@ impl S3FileReader { return Ok(()); } Err(e) => { - return Err(anyhow!(e).context(format!("S3 GetObject from {bucket_name} error"))); + return Err(anyhow!(e) + .context(format!("S3 GetObject from {bucket_name} error")) + .into()); } }; @@ -180,7 +183,7 @@ impl SplitReader for S3FileReader { parser_config: ParserConfig, source_ctx: SourceContextRef, _columns: Option>, - ) -> anyhow::Result { + ) -> ConnectorResult { let config = AwsAuthProps::from(&props); let sdk_config = config.build_config().await?; @@ -206,7 +209,7 @@ impl SplitReader for S3FileReader { } impl S3FileReader { - #[try_stream(boxed, ok = StreamChunk, error = anyhow::Error)] + #[try_stream(boxed, ok = StreamChunk, error = crate::error::ConnectorError)] async fn into_chunk_stream(self) { for split in self.splits { let actor_id = self.source_ctx.source_info.actor_id.to_string(); diff --git a/src/connector/src/source/filesystem/s3_v2/lister.rs b/src/connector/src/source/filesystem/s3_v2/lister.rs index d6a6b6c1e68a3..ccc40f1ccef80 100644 --- a/src/connector/src/source/filesystem/s3_v2/lister.rs +++ b/src/connector/src/source/filesystem/s3_v2/lister.rs @@ -18,6 +18,7 @@ use aws_sdk_s3::error::DisplayErrorContext; use aws_sdk_s3::types::Object; use itertools::Itertools; +use crate::error::ConnectorResult; use crate::source::filesystem::{FsPageItem, S3SplitEnumerator}; use crate::source::{FsFilterCtrlCtx, FsListInner}; @@ -25,7 +26,7 @@ use crate::source::{FsFilterCtrlCtx, FsListInner}; impl FsListInner for S3SplitEnumerator { async fn get_next_page From<&'a Object>>( &mut self, - ) -> anyhow::Result<(Vec, bool)> { + ) -> ConnectorResult<(Vec, bool)> { let mut has_finished = false; let mut req = self .client diff --git a/src/connector/src/source/google_pubsub/enumerator/client.rs b/src/connector/src/source/google_pubsub/enumerator/client.rs index bc1d9d078b66a..25cb28909c479 100644 --- a/src/connector/src/source/google_pubsub/enumerator/client.rs +++ b/src/connector/src/source/google_pubsub/enumerator/client.rs @@ -12,12 +12,14 @@ // See the License for the specific language governing permissions and // limitations under the License. -use anyhow::{bail, Context}; +use anyhow::Context; use async_trait::async_trait; use chrono::{TimeZone, Utc}; use google_cloud_pubsub::client::{Client, ClientConfig}; use google_cloud_pubsub::subscription::{SeekTo, SubscriptionConfig}; +use risingwave_common::bail; +use crate::error::ConnectorResult; use crate::source::base::SplitEnumerator; use crate::source::google_pubsub::split::PubsubSplit; use crate::source::google_pubsub::PubsubProperties; @@ -36,7 +38,7 @@ impl SplitEnumerator for PubsubSplitEnumerator { async fn new( properties: Self::Properties, _context: SourceEnumeratorContextRef, - ) -> anyhow::Result { + ) -> ConnectorResult { let subscription = properties.subscription.to_owned(); if properties.credentials.is_none() && properties.emulator_host.is_none() { @@ -62,7 +64,10 @@ impl SplitEnumerator for PubsubSplitEnumerator { // We need the `retain_acked_messages` configuration to be true to seek back to timestamps // as done in the [`PubsubSplitReader`] and here. - let (_, subscription_config) = sub.config(None).await?; + let (_, subscription_config) = sub + .config(None) + .await + .context("failed to fetch subscription config")?; if let SubscriptionConfig { retain_acked_messages: false, .. @@ -98,7 +103,7 @@ impl SplitEnumerator for PubsubSplitEnumerator { }) } - async fn list_splits(&mut self) -> anyhow::Result> { + async fn list_splits(&mut self) -> ConnectorResult> { tracing::debug!("enumerating pubsub splits"); let splits: Vec = (0..self.split_count) .map(|i| PubsubSplit { diff --git a/src/connector/src/source/google_pubsub/mod.rs b/src/connector/src/source/google_pubsub/mod.rs index aeec1accd820b..0a49fa6467f66 100644 --- a/src/connector/src/source/google_pubsub/mod.rs +++ b/src/connector/src/source/google_pubsub/mod.rs @@ -101,9 +101,8 @@ impl PubsubProperties { #[cfg(test)] mod tests { - use anyhow::Result; - use super::*; + use crate::error::ConnectorResult as Result; const EMULATOR_HOST: &str = "localhost:8081"; const CREDENTIALS: &str = "{}"; diff --git a/src/connector/src/source/google_pubsub/source/reader.rs b/src/connector/src/source/google_pubsub/source/reader.rs index fd5fab15ed10b..0887cb06594f9 100644 --- a/src/connector/src/source/google_pubsub/source/reader.rs +++ b/src/connector/src/source/google_pubsub/source/reader.rs @@ -12,16 +12,17 @@ // See the License for the specific language governing permissions and // limitations under the License. -use anyhow::{anyhow, ensure, Context, Result}; +use anyhow::{anyhow, Context}; use async_trait::async_trait; use chrono::{NaiveDateTime, TimeZone, Utc}; use futures_async_stream::try_stream; use google_cloud_pubsub::client::{Client, ClientConfig}; use google_cloud_pubsub::subscription::{SeekTo, Subscription}; -use risingwave_common::bail; +use risingwave_common::{bail, ensure}; use tonic::Code; use super::TaggedReceivedMessage; +use crate::error::{ConnectorError, ConnectorResult as Result}; use crate::parser::ParserConfig; use crate::source::google_pubsub::{PubsubProperties, PubsubSplit}; use crate::source::{ @@ -41,7 +42,7 @@ pub struct PubsubSplitReader { } impl CommonSplitReader for PubsubSplitReader { - #[try_stream(ok = Vec, error = anyhow::Error)] + #[try_stream(ok = Vec, error = ConnectorError)] async fn into_data_stream(self) { loop { let pull_result = self diff --git a/src/connector/src/source/google_pubsub/split.rs b/src/connector/src/source/google_pubsub/split.rs index e52ffa8ef72a4..f150f7f08038b 100644 --- a/src/connector/src/source/google_pubsub/split.rs +++ b/src/connector/src/source/google_pubsub/split.rs @@ -12,10 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -use anyhow::anyhow; use risingwave_common::types::JsonbVal; use serde::{Deserialize, Serialize}; +use crate::error::ConnectorResult; use crate::source::{SplitId, SplitMetaData}; #[derive(Clone, Serialize, Deserialize, Debug, PartialEq, Hash)] @@ -36,8 +36,8 @@ pub struct PubsubSplit { } impl SplitMetaData for PubsubSplit { - fn restore_from_json(value: JsonbVal) -> anyhow::Result { - serde_json::from_value(value.take()).map_err(|e| anyhow!(e)) + fn restore_from_json(value: JsonbVal) -> ConnectorResult { + serde_json::from_value(value.take()).map_err(Into::into) } fn encode_to_json(&self) -> JsonbVal { @@ -48,7 +48,7 @@ impl SplitMetaData for PubsubSplit { format!("{}-{}", self.subscription, self.index).into() } - fn update_with_offset(&mut self, start_offset: String) -> anyhow::Result<()> { + fn update_with_offset(&mut self, start_offset: String) -> ConnectorResult<()> { self.start_offset = Some(start_offset); Ok(()) } diff --git a/src/connector/src/source/iceberg/mod.rs b/src/connector/src/source/iceberg/mod.rs index e274f639f15b2..899828322c810 100644 --- a/src/connector/src/source/iceberg/mod.rs +++ b/src/connector/src/source/iceberg/mod.rs @@ -18,6 +18,7 @@ use async_trait::async_trait; use risingwave_common::types::JsonbVal; use serde::{Deserialize, Serialize}; +use crate::error::ConnectorResult; use crate::parser::ParserConfig; use crate::source::{ BoxChunkSourceStream, Column, SourceContextRef, SourceEnumeratorContextRef, SourceProperties, @@ -71,7 +72,7 @@ impl SplitMetaData for IcebergSplit { unimplemented!() } - fn restore_from_json(_value: JsonbVal) -> anyhow::Result { + fn restore_from_json(_value: JsonbVal) -> ConnectorResult { unimplemented!() } @@ -79,7 +80,7 @@ impl SplitMetaData for IcebergSplit { unimplemented!() } - fn update_with_offset(&mut self, _start_offset: String) -> anyhow::Result<()> { + fn update_with_offset(&mut self, _start_offset: String) -> ConnectorResult<()> { unimplemented!() } } @@ -95,11 +96,11 @@ impl SplitEnumerator for IcebergSplitEnumerator { async fn new( _properties: Self::Properties, _context: SourceEnumeratorContextRef, - ) -> anyhow::Result { + ) -> ConnectorResult { Ok(Self {}) } - async fn list_splits(&mut self) -> anyhow::Result> { + async fn list_splits(&mut self) -> ConnectorResult> { Ok(vec![]) } } @@ -118,7 +119,7 @@ impl SplitReader for IcebergFileReader { _parser_config: ParserConfig, _source_ctx: SourceContextRef, _columns: Option>, - ) -> anyhow::Result { + ) -> ConnectorResult { unimplemented!() } diff --git a/src/connector/src/source/kafka/enumerator/client.rs b/src/connector/src/source/kafka/enumerator/client.rs index 9850a4c244920..16314d21dbc1e 100644 --- a/src/connector/src/source/kafka/enumerator/client.rs +++ b/src/connector/src/source/kafka/enumerator/client.rs @@ -15,12 +15,14 @@ use std::collections::HashMap; use std::time::Duration; -use anyhow::anyhow; +use anyhow::{anyhow, Context as _}; use async_trait::async_trait; use rdkafka::consumer::{BaseConsumer, Consumer}; use rdkafka::error::KafkaResult; use rdkafka::{Offset, TopicPartitionList}; +use risingwave_common::bail; +use crate::error::ConnectorResult; use crate::source::base::SplitEnumerator; use crate::source::kafka::split::KafkaSplit; use crate::source::kafka::{KafkaProperties, PrivateLinkConsumerContext, KAFKA_ISOLATION_LEVEL}; @@ -57,7 +59,7 @@ impl SplitEnumerator for KafkaSplitEnumerator { async fn new( properties: KafkaProperties, context: SourceEnumeratorContextRef, - ) -> anyhow::Result { + ) -> ConnectorResult { let mut config = rdkafka::ClientConfig::new(); let common_props = &properties.common; @@ -77,11 +79,9 @@ impl SplitEnumerator for KafkaSplitEnumerator { Some("earliest") => KafkaEnumeratorOffset::Earliest, Some("latest") => KafkaEnumeratorOffset::Latest, None => KafkaEnumeratorOffset::Earliest, - _ => { - return Err(anyhow!( - "properties `scan_startup_mode` only support earliest and latest or leave it empty" - )); - } + _ => bail!( + "properties `scan_startup_mode` only support earliest and latest or leave it empty" + ), }; if let Some(s) = &properties.time_offset { @@ -105,12 +105,12 @@ impl SplitEnumerator for KafkaSplitEnumerator { }) } - async fn list_splits(&mut self) -> anyhow::Result> { - let topic_partitions = self.fetch_topic_partition().await.map_err(|e| { - anyhow!(format!( - "failed to fetch metadata from kafka ({}), error: {}", - self.broker_address, e - )) + async fn list_splits(&mut self) -> ConnectorResult> { + let topic_partitions = self.fetch_topic_partition().await.with_context(|| { + format!( + "failed to fetch metadata from kafka ({})", + self.broker_address + ) })?; let watermarks = self.get_watermarks(topic_partitions.as_ref()).await?; let mut start_offsets = self @@ -153,12 +153,12 @@ impl KafkaSplitEnumerator { &mut self, expect_start_timestamp_millis: Option, expect_stop_timestamp_millis: Option, - ) -> anyhow::Result> { - let topic_partitions = self.fetch_topic_partition().await.map_err(|e| { - anyhow!(format!( - "failed to fetch metadata from kafka ({}), error: {}", - self.broker_address, e - )) + ) -> ConnectorResult> { + let topic_partitions = self.fetch_topic_partition().await.with_context(|| { + format!( + "failed to fetch metadata from kafka ({})", + self.broker_address + ) })?; // here we are getting the start offset and end offset for each partition with the given @@ -349,7 +349,7 @@ impl KafkaSplitEnumerator { .is_ok() } - async fn fetch_topic_partition(&self) -> anyhow::Result> { + async fn fetch_topic_partition(&self) -> ConnectorResult> { // for now, we only support one topic let metadata = self .client @@ -358,11 +358,11 @@ impl KafkaSplitEnumerator { let topic_meta = match metadata.topics() { [meta] => meta, - _ => return Err(anyhow!("topic {} not found", self.topic)), + _ => bail!("topic {} not found", self.topic), }; if topic_meta.partitions().is_empty() { - return Err(anyhow!("topic {} not found", self.topic)); + bail!("topic {} not found", self.topic); } Ok(topic_meta diff --git a/src/connector/src/source/kafka/private_link.rs b/src/connector/src/source/kafka/private_link.rs index 645588f457498..3eebacca09f93 100644 --- a/src/connector/src/source/kafka/private_link.rs +++ b/src/connector/src/source/kafka/private_link.rs @@ -16,12 +16,13 @@ use std::collections::{BTreeMap, HashMap}; use std::str::FromStr; use std::sync::Arc; -use anyhow::anyhow; +use anyhow::{anyhow, Context}; use itertools::Itertools; use rdkafka::client::BrokerAddr; use rdkafka::consumer::ConsumerContext; use rdkafka::producer::{DeliveryResult, ProducerContext}; use rdkafka::{ClientContext, Statistics}; +use risingwave_common::bail; use risingwave_common::util::addr::HostAddr; use risingwave_common::util::iter_util::ZipEqFast; use risingwave_pb::catalog::connection::PrivateLinkService; @@ -29,6 +30,7 @@ use risingwave_pb::catalog::connection::PrivateLinkService; use crate::common::{ AwsPrivateLinkItem, PRIVATE_LINK_BROKER_REWRITE_MAP_KEY, PRIVATE_LINK_TARGETS_KEY, }; +use crate::error::ConnectorResult; use crate::source::kafka::stats::RdKafkaStats; use crate::source::kafka::{KAFKA_PROPS_BROKER_KEY, KAFKA_PROPS_BROKER_KEY_ALIAS}; use crate::source::KAFKA_CONNECTOR; @@ -68,9 +70,9 @@ impl BrokerAddrRewriter { pub fn new( role: PrivateLinkContextRole, broker_rewrite_map: Option>, - ) -> anyhow::Result { + ) -> ConnectorResult { tracing::info!("[{}] rewrite map {:?}", role, broker_rewrite_map); - let rewrite_map: anyhow::Result> = broker_rewrite_map + let rewrite_map: ConnectorResult> = broker_rewrite_map .map_or(Ok(BTreeMap::new()), |addr_map| { addr_map .into_iter() @@ -109,7 +111,7 @@ impl PrivateLinkConsumerContext { broker_rewrite_map: Option>, identifier: Option, metrics: Option>, - ) -> anyhow::Result { + ) -> ConnectorResult { let inner = BrokerAddrRewriter::new(PrivateLinkContextRole::Consumer, broker_rewrite_map)?; Ok(Self { inner, @@ -152,7 +154,7 @@ impl PrivateLinkProducerContext { broker_rewrite_map: Option>, identifier: Option, metrics: Option>, - ) -> anyhow::Result { + ) -> ConnectorResult { let inner = BrokerAddrRewriter::new(PrivateLinkContextRole::Producer, broker_rewrite_map)?; Ok(Self { inner, @@ -195,11 +197,12 @@ fn kafka_props_broker_key(with_properties: &BTreeMap) -> &str { fn get_property_required( with_properties: &BTreeMap, property: &str, -) -> anyhow::Result { +) -> ConnectorResult { with_properties .get(property) .map(|s| s.to_lowercase()) - .ok_or_else(|| anyhow!("Required property \"{property}\" is not provided")) + .with_context(|| format!("Required property \"{property}\" is not provided")) + .map_err(Into::into) } #[inline(always)] @@ -216,7 +219,7 @@ pub fn insert_privatelink_broker_rewrite_map( with_options: &mut BTreeMap, svc: Option<&PrivateLinkService>, privatelink_endpoint: Option, -) -> anyhow::Result<()> { +) -> ConnectorResult<()> { let mut broker_rewrite_map = HashMap::new(); let servers = get_property_required(with_options, kafka_props_broker_key(with_options))?; let broker_addrs = servers.split(',').collect_vec(); @@ -227,11 +230,11 @@ pub fn insert_privatelink_broker_rewrite_map( with_options.remove(PRIVATE_LINK_TARGETS_KEY); if broker_addrs.len() != link_targets.len() { - return Err(anyhow!( + bail!( "The number of broker addrs {} does not match the number of private link targets {}", broker_addrs.len(), link_targets.len() - )); + ); } if let Some(endpoint) = privatelink_endpoint { @@ -241,15 +244,15 @@ pub fn insert_privatelink_broker_rewrite_map( } } else { if svc.is_none() { - return Err(anyhow!("Privatelink endpoint not found.",)); + bail!("Privatelink endpoint not found."); } let svc = svc.unwrap(); for (link, broker) in link_targets.iter().zip_eq_fast(broker_addrs.into_iter()) { if svc.dns_entries.is_empty() { - return Err(anyhow!( + bail!( "No available private link endpoints for Kafka broker {}", broker - )); + ); } // rewrite the broker address to the dns name w/o az // requires the NLB has enabled the cross-zone load balancing diff --git a/src/connector/src/source/kafka/source/reader.rs b/src/connector/src/source/kafka/source/reader.rs index bb8e70471282f..d67e45fee3837 100644 --- a/src/connector/src/source/kafka/source/reader.rs +++ b/src/connector/src/source/kafka/source/reader.rs @@ -17,7 +17,7 @@ use std::collections::HashMap; use std::mem::swap; use std::time::Duration; -use anyhow::{Context, Result}; +use anyhow::Context; use async_trait::async_trait; use futures::StreamExt; use futures_async_stream::try_stream; @@ -27,6 +27,7 @@ use rdkafka::error::KafkaError; use rdkafka::{ClientConfig, Message, Offset, TopicPartitionList}; use risingwave_pb::plan_common::additional_column::ColumnType as AdditionalColumnType; +use crate::error::ConnectorResult as Result; use crate::parser::ParserConfig; use crate::source::base::SourceMessage; use crate::source::kafka::{ @@ -168,7 +169,7 @@ impl KafkaSplitReader { } impl CommonSplitReader for KafkaSplitReader { - #[try_stream(ok = Vec, error = anyhow::Error)] + #[try_stream(ok = Vec, error = crate::error::ConnectorError)] async fn into_data_stream(self) { if self.offsets.values().all(|(start_offset, stop_offset)| { match (start_offset, stop_offset) { diff --git a/src/connector/src/source/kafka/split.rs b/src/connector/src/source/kafka/split.rs index 1043cdd01f2f6..a98707eb4ab3a 100644 --- a/src/connector/src/source/kafka/split.rs +++ b/src/connector/src/source/kafka/split.rs @@ -12,10 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -use anyhow::anyhow; use risingwave_common::types::JsonbVal; use serde::{Deserialize, Serialize}; +use crate::error::ConnectorResult; use crate::source::{SplitId, SplitMetaData}; #[derive(Clone, Serialize, Deserialize, Debug, PartialEq, Hash)] @@ -32,15 +32,15 @@ impl SplitMetaData for KafkaSplit { format!("{}", self.partition).into() } - fn restore_from_json(value: JsonbVal) -> anyhow::Result { - serde_json::from_value(value.take()).map_err(|e| anyhow!(e)) + fn restore_from_json(value: JsonbVal) -> ConnectorResult { + serde_json::from_value(value.take()).map_err(Into::into) } fn encode_to_json(&self) -> JsonbVal { serde_json::to_value(self.clone()).unwrap().into() } - fn update_with_offset(&mut self, start_offset: String) -> anyhow::Result<()> { + fn update_with_offset(&mut self, start_offset: String) -> ConnectorResult<()> { self.start_offset = Some(start_offset.as_str().parse::().unwrap()); Ok(()) } diff --git a/src/connector/src/source/kinesis/enumerator/client.rs b/src/connector/src/source/kinesis/enumerator/client.rs index bbf4e29258260..b8966f99ae1ac 100644 --- a/src/connector/src/source/kinesis/enumerator/client.rs +++ b/src/connector/src/source/kinesis/enumerator/client.rs @@ -12,11 +12,13 @@ // See the License for the specific language governing permissions and // limitations under the License. -use anyhow::Result; +use anyhow::Context as _; use async_trait::async_trait; use aws_sdk_kinesis::types::Shard; use aws_sdk_kinesis::Client as kinesis_client; +use risingwave_common::bail; +use crate::error::ConnectorResult as Result; use crate::source::kinesis::split::{KinesisOffset, KinesisSplit}; use crate::source::kinesis::*; use crate::source::{SourceEnumeratorContextRef, SplitEnumerator}; @@ -56,15 +58,11 @@ impl SplitEnumerator for KinesisSplitEnumerator { .set_next_token(next_token) .stream_name(&self.stream_name) .send() - .await?; + .await + .context("failed to list kinesis shards")?; match list_shard_output.shards { Some(shard) => shard_collect.extend(shard), - None => { - return Err(anyhow::Error::msg(format!( - "no shards in stream {}", - &self.stream_name - ))); - } + None => bail!("no shards in stream {}", &self.stream_name), } match list_shard_output.next_token { diff --git a/src/connector/src/source/kinesis/source/reader.rs b/src/connector/src/source/kinesis/source/reader.rs index 51b3c77710410..363dfb8777b12 100644 --- a/src/connector/src/source/kinesis/source/reader.rs +++ b/src/connector/src/source/kinesis/source/reader.rs @@ -14,7 +14,7 @@ use std::time::Duration; -use anyhow::{anyhow, Result}; +use anyhow::anyhow; use async_trait::async_trait; use aws_sdk_kinesis::error::{DisplayErrorContext, SdkError}; use aws_sdk_kinesis::operation::get_records::{GetRecordsError, GetRecordsOutput}; @@ -22,8 +22,11 @@ use aws_sdk_kinesis::primitives::DateTime; use aws_sdk_kinesis::types::ShardIteratorType; use aws_sdk_kinesis::Client as KinesisClient; use futures_async_stream::try_stream; +use risingwave_common::bail; +use thiserror_ext::AsReport; use tokio_retry; +use crate::error::ConnectorResult as Result; use crate::parser::ParserConfig; use crate::source::kinesis::source::message::from_kinesis_record; use crate::source::kinesis::split::{KinesisOffset, KinesisSplit}; @@ -74,13 +77,11 @@ impl SplitReader for KinesisSplitReader { if let Some(ts) = &properties.timestamp_offset { KinesisOffset::Timestamp(*ts) } else { - return Err(anyhow!("scan.startup.timestamp.millis is required")); + bail!("scan.startup.timestamp.millis is required"); } } _ => { - return Err(anyhow!( - "invalid scan_startup_mode, accept earliest/latest/timestamp" - )) + bail!("invalid scan_startup_mode, accept earliest/latest/timestamp") } }, }, @@ -90,9 +91,7 @@ impl SplitReader for KinesisSplitReader { if !matches!(start_position, KinesisOffset::Timestamp(_)) && properties.timestamp_offset.is_some() { - return Err( - anyhow!("scan.startup.mode need to be set to 'timestamp' if you want to start with a specific timestamp") - ); + bail!("scan.startup.mode need to be set to 'timestamp' if you want to start with a specific timestamp"); } let stream_name = properties.common.stream_name.clone(); @@ -121,7 +120,7 @@ impl SplitReader for KinesisSplitReader { } impl CommonSplitReader for KinesisSplitReader { - #[try_stream(ok = Vec < SourceMessage >, error = anyhow::Error)] + #[try_stream(ok = Vec < SourceMessage >, error = crate::error::ConnectorError)] async fn into_data_stream(mut self) { self.new_shard_iter().await?; loop { @@ -189,14 +188,12 @@ impl CommonSplitReader for KinesisSplitReader { continue; } Err(e) => { - let error_msg = format!( - "Kinesis got a unhandled error: {:?}, stream {:?}, shard {:?}", - DisplayErrorContext(e), - self.stream_name, - self.shard_id, - ); - tracing::error!("{}", error_msg); - return Err(anyhow!("{}", error_msg)); + let error = anyhow!(e).context(format!( + "Kinesis got a unhandled error on stream {:?}, shard {:?}", + self.stream_name, self.shard_id + )); + tracing::error!(error = %error.as_report()); + return Err(error.into()); } } } @@ -251,7 +248,7 @@ impl KinesisSplitReader { if let Some(iter) = resp.shard_iterator() { Ok(iter.to_owned()) } else { - Err(anyhow!("shard iterator is none")) + bail!("shard iterator is none") } } diff --git a/src/connector/src/source/kinesis/split.rs b/src/connector/src/source/kinesis/split.rs index c327e6da61e92..1c7bea61f8744 100644 --- a/src/connector/src/source/kinesis/split.rs +++ b/src/connector/src/source/kinesis/split.rs @@ -12,10 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -use anyhow::anyhow; use risingwave_common::types::JsonbVal; use serde::{Deserialize, Serialize}; +use crate::error::ConnectorResult; use crate::source::{SplitId, SplitMetaData}; #[derive(Debug, Clone, Eq, PartialEq, Serialize, Deserialize, Hash)] @@ -39,15 +39,15 @@ impl SplitMetaData for KinesisSplit { self.shard_id.clone() } - fn restore_from_json(value: JsonbVal) -> anyhow::Result { - serde_json::from_value(value.take()).map_err(|e| anyhow!(e)) + fn restore_from_json(value: JsonbVal) -> ConnectorResult { + serde_json::from_value(value.take()).map_err(Into::into) } fn encode_to_json(&self) -> JsonbVal { serde_json::to_value(self.clone()).unwrap().into() } - fn update_with_offset(&mut self, start_offset: String) -> anyhow::Result<()> { + fn update_with_offset(&mut self, start_offset: String) -> ConnectorResult<()> { let start_offset = if start_offset.is_empty() { KinesisOffset::Earliest } else { diff --git a/src/connector/src/source/nats/enumerator/mod.rs b/src/connector/src/source/nats/enumerator/mod.rs index c5059fdc8186c..557921747b8f0 100644 --- a/src/connector/src/source/nats/enumerator/mod.rs +++ b/src/connector/src/source/nats/enumerator/mod.rs @@ -14,11 +14,12 @@ use std::sync::Arc; -use anyhow; use async_trait::async_trait; +use risingwave_common::bail; use super::source::{NatsOffset, NatsSplit}; use super::NatsProperties; +use crate::error::ConnectorResult; use crate::source::{SourceEnumeratorContextRef, SplitEnumerator, SplitId}; #[derive(Debug, Clone)] @@ -36,7 +37,7 @@ impl SplitEnumerator for NatsSplitEnumerator { async fn new( properties: Self::Properties, _context: SourceEnumeratorContextRef, - ) -> anyhow::Result { + ) -> ConnectorResult { let client = properties.common.build_client().await?; Ok(Self { subject: properties.common.subject, @@ -45,14 +46,14 @@ impl SplitEnumerator for NatsSplitEnumerator { }) } - async fn list_splits(&mut self) -> anyhow::Result> { + async fn list_splits(&mut self) -> ConnectorResult> { // Nats currently does not support list_splits API, if we simple return the default 0 without checking the client status, will result executor crash let state = self.client.connection_state(); if state != async_nats::connection::State::Connected { - return Err(anyhow::anyhow!( + bail!( "Nats connection status is not connected, current status is {:?}", state - )); + ); } // TODO: to simplify the logic, return 1 split for first version let nats_split = NatsSplit { diff --git a/src/connector/src/source/nats/source/reader.rs b/src/connector/src/source/nats/source/reader.rs index 20a8f9c0dbc0b..05954538b91cf 100644 --- a/src/connector/src/source/nats/source/reader.rs +++ b/src/connector/src/source/nats/source/reader.rs @@ -12,14 +12,16 @@ // See the License for the specific language governing permissions and // limitations under the License. -use anyhow::{anyhow, Result}; +use anyhow::Context as _; use async_nats::jetstream::consumer; use async_trait::async_trait; use futures::StreamExt; use futures_async_stream::try_stream; +use risingwave_common::bail; use super::message::NatsMessage; use super::{NatsOffset, NatsSplit}; +use crate::error::ConnectorResult as Result; use crate::parser::ParserConfig; use crate::source::common::{into_chunk_stream, CommonSplitReader}; use crate::source::nats::NatsProperties; @@ -60,15 +62,15 @@ impl SplitReader for NatsSplitReader { "earliest" => NatsOffset::Earliest, "timestamp_millis" => { if let Some(time) = &properties.start_time { - NatsOffset::Timestamp(time.parse()?) + NatsOffset::Timestamp(time.parse().context( + "failed to parse the start time as nats offset timestamp", + )?) } else { - return Err(anyhow!("scan_startup_timestamp_millis is required")); + bail!("scan_startup_timestamp_millis is required"); } } _ => { - return Err(anyhow!( - "invalid scan_startup_mode, accept earliest/latest/timestamp_millis" - )) + bail!("invalid scan_startup_mode, accept earliest/latest/timestamp_millis") } }, }, @@ -101,7 +103,7 @@ impl SplitReader for NatsSplitReader { } impl CommonSplitReader for NatsSplitReader { - #[try_stream(ok = Vec, error = anyhow::Error)] + #[try_stream(ok = Vec, error = crate::error::ConnectorError)] async fn into_data_stream(self) { let capacity = self.source_ctx.source_ctrl_opts.chunk_size; let messages = self.consumer.messages().await?; diff --git a/src/connector/src/source/nats/split.rs b/src/connector/src/source/nats/split.rs index 1a176102efb60..d3b4ded019016 100644 --- a/src/connector/src/source/nats/split.rs +++ b/src/connector/src/source/nats/split.rs @@ -12,10 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -use anyhow::{anyhow, Ok}; use risingwave_common::types::JsonbVal; use serde::{Deserialize, Serialize}; +use crate::error::ConnectorResult; use crate::source::{SplitId, SplitMetaData}; #[derive(Debug, Clone, Eq, PartialEq, Serialize, Deserialize, Hash)] @@ -43,15 +43,15 @@ impl SplitMetaData for NatsSplit { format!("{}", self.split_id).into() } - fn restore_from_json(value: JsonbVal) -> anyhow::Result { - serde_json::from_value(value.take()).map_err(|e| anyhow!(e)) + fn restore_from_json(value: JsonbVal) -> ConnectorResult { + serde_json::from_value(value.take()).map_err(Into::into) } fn encode_to_json(&self) -> JsonbVal { serde_json::to_value(self.clone()).unwrap().into() } - fn update_with_offset(&mut self, start_sequence: String) -> anyhow::Result<()> { + fn update_with_offset(&mut self, start_sequence: String) -> ConnectorResult<()> { let start_sequence = if start_sequence.is_empty() { NatsOffset::Earliest } else { diff --git a/src/connector/src/source/nexmark/enumerator/mod.rs b/src/connector/src/source/nexmark/enumerator/mod.rs index b67a58f548965..7a53f02489fb9 100644 --- a/src/connector/src/source/nexmark/enumerator/mod.rs +++ b/src/connector/src/source/nexmark/enumerator/mod.rs @@ -14,6 +14,7 @@ use async_trait::async_trait; +use crate::error::ConnectorResult; use crate::source::nexmark::split::NexmarkSplit; use crate::source::nexmark::NexmarkProperties; use crate::source::{SourceEnumeratorContextRef, SplitEnumerator}; @@ -32,12 +33,12 @@ impl SplitEnumerator for NexmarkSplitEnumerator { async fn new( properties: NexmarkProperties, _context: SourceEnumeratorContextRef, - ) -> anyhow::Result { + ) -> ConnectorResult { let split_num = properties.split_num; Ok(Self { split_num }) } - async fn list_splits(&mut self) -> anyhow::Result> { + async fn list_splits(&mut self) -> ConnectorResult> { let mut splits = vec![]; for i in 0..self.split_num { splits.push(NexmarkSplit { @@ -52,9 +53,8 @@ impl SplitEnumerator for NexmarkSplitEnumerator { #[cfg(test)] mod tests { - use anyhow::Result; - use super::*; + use crate::error::ConnectorResult as Result; use crate::source::SplitMetaData; #[tokio::test] diff --git a/src/connector/src/source/nexmark/source/reader.rs b/src/connector/src/source/nexmark/source/reader.rs index e7621e5325524..fd68348d6faf6 100644 --- a/src/connector/src/source/nexmark/source/reader.rs +++ b/src/connector/src/source/nexmark/source/reader.rs @@ -27,6 +27,7 @@ use risingwave_common::row::OwnedRow; use risingwave_common::types::{DataType, ScalarImpl}; use tokio::time::Instant; +use crate::error::ConnectorResult; use crate::parser::ParserConfig; use crate::source::data_gen_util::spawn_data_generation_stream; use crate::source::nexmark::source::combined_event::{ @@ -64,7 +65,7 @@ impl SplitReader for NexmarkSplitReader { parser_config: ParserConfig, source_ctx: SourceContextRef, _columns: Option>, - ) -> anyhow::Result { + ) -> ConnectorResult { tracing::debug!("Splits for nexmark found! {:?}", splits); assert!(splits.len() == 1); // TODO: currently, assume there's only one split in one reader @@ -163,7 +164,7 @@ impl NexmarkSplitReader { } } - #[try_stream(boxed, ok = StreamChunk, error = anyhow::Error)] + #[try_stream(boxed, ok = StreamChunk, error = crate::error::ConnectorError)] async fn into_native_stream(mut self) { let start_time = Instant::now(); let start_offset = self.generator.global_offset(); @@ -213,7 +214,7 @@ mod tests { use crate::source::{SourceEnumeratorContext, SplitEnumerator}; #[tokio::test] - async fn test_nexmark_split_reader() -> anyhow::Result<()> { + async fn test_nexmark_split_reader() -> crate::error::ConnectorResult<()> { let props = NexmarkProperties { split_num: 2, min_event_gap_in_ns: 0, @@ -247,7 +248,7 @@ mod tests { } #[tokio::test] - async fn test_nexmark_event_num() -> anyhow::Result<()> { + async fn test_nexmark_event_num() -> crate::error::ConnectorResult<()> { let max_chunk_size = 32; let event_num = max_chunk_size * 128 + 1; let props = NexmarkProperties { diff --git a/src/connector/src/source/nexmark/split.rs b/src/connector/src/source/nexmark/split.rs index d68aa2c8e1aa4..5150f1b6a1e1d 100644 --- a/src/connector/src/source/nexmark/split.rs +++ b/src/connector/src/source/nexmark/split.rs @@ -12,10 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -use anyhow::anyhow; use risingwave_common::types::JsonbVal; use serde::{Deserialize, Serialize}; +use crate::error::ConnectorResult; use crate::source::{SplitId, SplitMetaData}; #[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Hash)] @@ -31,15 +31,15 @@ impl SplitMetaData for NexmarkSplit { format!("{}-{}", self.split_num, self.split_index).into() } - fn restore_from_json(value: JsonbVal) -> anyhow::Result { - serde_json::from_value(value.take()).map_err(|e| anyhow!(e)) + fn restore_from_json(value: JsonbVal) -> ConnectorResult { + serde_json::from_value(value.take()).map_err(Into::into) } fn encode_to_json(&self) -> JsonbVal { serde_json::to_value(self.clone()).unwrap().into() } - fn update_with_offset(&mut self, start_offset: String) -> anyhow::Result<()> { + fn update_with_offset(&mut self, start_offset: String) -> ConnectorResult<()> { self.start_offset = Some(start_offset.as_str().parse::().unwrap()); Ok(()) } diff --git a/src/connector/src/source/pulsar/enumerator/client.rs b/src/connector/src/source/pulsar/enumerator/client.rs index d92e633060616..dddcf927d0c3f 100644 --- a/src/connector/src/source/pulsar/enumerator/client.rs +++ b/src/connector/src/source/pulsar/enumerator/client.rs @@ -12,12 +12,14 @@ // See the License for the specific language governing permissions and // limitations under the License. -use anyhow::{anyhow, bail, Result}; +use anyhow::anyhow; use async_trait::async_trait; use itertools::Itertools; use pulsar::{Pulsar, TokioExecutor}; +use risingwave_common::bail; use serde::{Deserialize, Serialize}; +use crate::error::ConnectorResult; use crate::source::pulsar::split::PulsarSplit; use crate::source::pulsar::topic::{parse_topic, Topic}; use crate::source::pulsar::PulsarProperties; @@ -45,7 +47,7 @@ impl SplitEnumerator for PulsarSplitEnumerator { async fn new( properties: PulsarProperties, _context: SourceEnumeratorContextRef, - ) -> Result { + ) -> ConnectorResult { let pulsar = properties .common .build_client(&properties.oauth, &properties.aws_auth_props) @@ -80,7 +82,7 @@ impl SplitEnumerator for PulsarSplitEnumerator { }) } - async fn list_splits(&mut self) -> anyhow::Result> { + async fn list_splits(&mut self) -> ConnectorResult> { let offset = self.start_offset.clone(); // MessageId is only used when recovering from a State assert!(!matches!(offset, PulsarEnumeratorOffset::MessageId(_))); diff --git a/src/connector/src/source/pulsar/source/reader.rs b/src/connector/src/source/pulsar/source/reader.rs index 139af839bd16d..9ed810dfc933a 100644 --- a/src/connector/src/source/pulsar/source/reader.rs +++ b/src/connector/src/source/pulsar/source/reader.rs @@ -31,7 +31,9 @@ use pulsar::{Consumer, ConsumerBuilder, ConsumerOptions, Pulsar, SubType, TokioE use risingwave_common::array::{DataChunk, StreamChunk}; use risingwave_common::catalog::ROWID_PREFIX; use risingwave_common::{bail, ensure}; +use thiserror_ext::AsReport; +use crate::error::ConnectorResult; use crate::parser::ParserConfig; use crate::source::pulsar::split::PulsarSplit; use crate::source::pulsar::{PulsarEnumeratorOffset, PulsarProperties}; @@ -56,7 +58,7 @@ impl SplitReader for PulsarSplitReader { parser_config: ParserConfig, source_ctx: SourceContextRef, _columns: Option>, - ) -> anyhow::Result { + ) -> ConnectorResult { ensure!(splits.len() == 1, "only support single split"); let split = splits.into_iter().next().unwrap(); let topic = split.topic.to_string(); @@ -106,7 +108,7 @@ pub struct PulsarBrokerReader { } // {ledger_id}:{entry_id}:{partition}:{batch_index} -fn parse_message_id(id: &str) -> anyhow::Result { +fn parse_message_id(id: &str) -> ConnectorResult { let splits = id.split(':').collect_vec(); if splits.len() < 2 || splits.len() > 4 { @@ -150,7 +152,7 @@ impl SplitReader for PulsarBrokerReader { parser_config: ParserConfig, source_ctx: SourceContextRef, _columns: Option>, - ) -> anyhow::Result { + ) -> ConnectorResult { ensure!(splits.len() == 1, "only support single split"); let split = splits.into_iter().next().unwrap(); let pulsar = props @@ -233,7 +235,7 @@ impl SplitReader for PulsarBrokerReader { } impl CommonSplitReader for PulsarBrokerReader { - #[try_stream(ok = Vec, error = anyhow::Error)] + #[try_stream(ok = Vec, error = crate::error::ConnectorError)] async fn into_data_stream(self) { let max_chunk_size = self.source_ctx.source_ctrl_opts.chunk_size; #[for_await] @@ -278,7 +280,7 @@ impl PulsarIcebergReader { } } - async fn scan(&self) -> anyhow::Result { + async fn scan(&self) -> ConnectorResult { let table = self.create_iceberg_table().await?; let schema = table.current_table_metadata().current_schema()?; tracing::debug!("Created iceberg pulsar table, schema is: {:?}", schema,); @@ -321,12 +323,13 @@ impl PulsarIcebergReader { .new_scan_builder() .with_partition_value(partition_value) .with_batch_size(max_chunk_size) - .build()? + .build() + .context("failed to build iceberg table scan")? .scan(&table) .await?) } - async fn create_iceberg_table(&self) -> anyhow::Result
{ + async fn create_iceberg_table(&self) -> ConnectorResult
{ let catalog = load_catalog(&self.build_iceberg_configs()?) .await .context("Unable to load iceberg catalog")?; @@ -340,7 +343,7 @@ impl PulsarIcebergReader { Ok(table) } - #[try_stream(ok = (StreamChunk, HashMap), error = anyhow::Error)] + #[try_stream(ok = (StreamChunk, HashMap), error = crate::error::ConnectorError)] async fn as_stream_chunk_stream(&self) { #[for_await] for file_scan in self.scan().await? { @@ -355,7 +358,7 @@ impl PulsarIcebergReader { } } - #[try_stream(ok = StreamChunk, error = anyhow::Error)] + #[try_stream(ok = StreamChunk, error = crate::error::ConnectorError)] async fn into_stream(self) { let (props, mut split, parser_config, source_ctx) = ( self.props.clone(), @@ -368,8 +371,9 @@ impl PulsarIcebergReader { #[for_await] for msg in self.as_stream_chunk_stream() { - let (_chunk, mapping) = - msg.inspect_err(|e| tracing::error!("Failed to read message from iceberg: {}", e))?; + let (_chunk, mapping) = msg.inspect_err( + |e| tracing::error!(error = %e.as_report(), "Failed to read message from iceberg"), + )?; last_msg_id = mapping.get(self.split.topic.to_string().as_str()).cloned(); } @@ -394,7 +398,7 @@ impl PulsarIcebergReader { } } - fn build_iceberg_configs(&self) -> anyhow::Result> { + fn build_iceberg_configs(&self) -> ConnectorResult> { let mut iceberg_configs = HashMap::new(); let bucket = self @@ -451,7 +455,7 @@ impl PulsarIcebergReader { fn convert_record_batch_to_source_with_state( &self, record_batch: &RecordBatch, - ) -> anyhow::Result<(StreamChunk, HashMap)> { + ) -> ConnectorResult<(StreamChunk, HashMap)> { let mut offsets = Vec::with_capacity(record_batch.num_rows()); let ledger_id_array = record_batch @@ -493,7 +497,8 @@ impl PulsarIcebergReader { .iter() .filter(|col| col.name != ROWID_PREFIX) .map(|col| record_batch.schema().index_of(col.name.as_str())) - .try_collect()?; + .try_collect() + .context("failed to look up column name in arrow record batch")?; for row in 0..record_batch.num_rows() { let offset = format!( @@ -507,7 +512,8 @@ impl PulsarIcebergReader { offsets.push(offset); } - let data_chunk = DataChunk::try_from(&record_batch.project(&field_indices)?)?; + let data_chunk = DataChunk::try_from(&record_batch.project(&field_indices)?) + .context("failed to convert arrow record batch to data chunk")?; let stream_chunk = StreamChunk::from(data_chunk); diff --git a/src/connector/src/source/pulsar/split.rs b/src/connector/src/source/pulsar/split.rs index 36f9bc47e3ec5..bf9b63d99d74f 100644 --- a/src/connector/src/source/pulsar/split.rs +++ b/src/connector/src/source/pulsar/split.rs @@ -12,10 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -use anyhow::anyhow; use risingwave_common::types::JsonbVal; use serde::{Deserialize, Serialize}; +use crate::error::ConnectorResult; use crate::source::pulsar::topic::Topic; use crate::source::pulsar::PulsarEnumeratorOffset; use crate::source::{SplitId, SplitMetaData}; @@ -32,15 +32,15 @@ impl SplitMetaData for PulsarSplit { self.topic.to_string().into() } - fn restore_from_json(value: JsonbVal) -> anyhow::Result { - serde_json::from_value(value.take()).map_err(|e| anyhow!(e)) + fn restore_from_json(value: JsonbVal) -> ConnectorResult { + serde_json::from_value(value.take()).map_err(Into::into) } fn encode_to_json(&self) -> JsonbVal { serde_json::to_value(self.clone()).unwrap().into() } - fn update_with_offset(&mut self, start_offset: String) -> anyhow::Result<()> { + fn update_with_offset(&mut self, start_offset: String) -> ConnectorResult<()> { let start_offset = if start_offset.is_empty() { PulsarEnumeratorOffset::Earliest } else { diff --git a/src/connector/src/source/pulsar/topic.rs b/src/connector/src/source/pulsar/topic.rs index 4512662d6252c..352c7e47d8da2 100644 --- a/src/connector/src/source/pulsar/topic.rs +++ b/src/connector/src/source/pulsar/topic.rs @@ -12,10 +12,13 @@ // See the License for the specific language governing permissions and // limitations under the License. -use anyhow::{anyhow, Result}; +use anyhow::anyhow; +use risingwave_common::bail; use serde::{Deserialize, Serialize}; use urlencoding::encode; +use crate::error::ConnectorResult as Result; + const PERSISTENT_DOMAIN: &str = "persistent"; const NON_PERSISTENT_DOMAIN: &str = "non-persistent"; const PUBLIC_TENANT: &str = "public"; @@ -59,7 +62,7 @@ impl Topic { pub fn sub_topic(&self, partition: i32) -> Result { if partition < 0 { - return Err(anyhow!("invalid partition index number")); + bail!("invalid partition index number"); } if self.topic.contains(PARTITIONED_TOPIC_SUFFIX) { @@ -119,11 +122,11 @@ pub fn parse_topic(topic: &str) -> Result { ), 3 => format!("{}://{}", PERSISTENT_DOMAIN, topic), _ => { - return Err(anyhow!( + bail!( "Invalid short topic name '{}', \ it should be in the format of // or ", topic - )); + ); } }; } @@ -133,10 +136,10 @@ pub fn parse_topic(topic: &str) -> Result { let domain = match parts[0] { PERSISTENT_DOMAIN | NON_PERSISTENT_DOMAIN => parts[0], _ => { - return Err(anyhow!( + bail!( "The domain only can be specified as 'persistent' or 'non-persistent'. Input domain is '{}'", parts[0] - )); + ); } }; @@ -144,10 +147,10 @@ pub fn parse_topic(topic: &str) -> Result { let parts: Vec<&str> = rest.splitn(3, '/').collect(); if parts.len() != 3 { - return Err(anyhow!( + bail!( "invalid topic name '{}', it should be in the format of //", rest - )); + ); } let parsed_topic = Topic { @@ -159,7 +162,7 @@ pub fn parse_topic(topic: &str) -> Result { }; if parsed_topic.topic.is_empty() { - return Err(anyhow!("topic name cannot be empty".to_string(),)); + bail!("topic name cannot be empty".to_string()); } Ok(parsed_topic) diff --git a/src/connector/src/source/reader/desc.rs b/src/connector/src/source/reader/desc.rs index a842b091ab928..46107c2d73d0a 100644 --- a/src/connector/src/source/reader/desc.rs +++ b/src/connector/src/source/reader/desc.rs @@ -25,6 +25,7 @@ use risingwave_pb::plan_common::{AdditionalColumn, PbColumnCatalog}; #[expect(deprecated)] use super::fs_reader::FsSourceReader; use super::reader::SourceReader; +use crate::error::ConnectorResult; use crate::parser::additional_columns::{ build_additional_column_catalog, COMMON_COMPATIBLE_ADDITIONAL_COLUMNS, COMPATIBLE_ADDITIONAL_COLUMNS, @@ -176,7 +177,7 @@ impl SourceDescBuilder { columns } - pub fn build(self) -> anyhow::Result { + pub fn build(self) -> ConnectorResult { let columns = self.column_catalogs_to_source_column_descs(); let psrser_config = SpecificParserConfig::new(&self.source_info, &self.with_properties)?; @@ -201,7 +202,7 @@ impl SourceDescBuilder { #[deprecated = "will be replaced by new fs source (list + fetch)"] #[expect(deprecated)] - pub fn build_fs_source_desc(&self) -> anyhow::Result { + pub fn build_fs_source_desc(&self) -> ConnectorResult { let parser_config = SpecificParserConfig::new(&self.source_info, &self.with_properties)?; match ( diff --git a/src/connector/src/source/reader/fs_reader.rs b/src/connector/src/source/reader/fs_reader.rs index f64a9def6aab6..93a0bd2c2d6a8 100644 --- a/src/connector/src/source/reader/fs_reader.rs +++ b/src/connector/src/source/reader/fs_reader.rs @@ -23,6 +23,7 @@ use futures::StreamExt; use risingwave_common::catalog::ColumnId; use crate::dispatch_source_prop; +use crate::error::ConnectorResult; use crate::parser::{CommonParserConfig, ParserConfig, SpecificParserConfig}; use crate::source::{ create_split_reader, BoxChunkSourceStream, ConnectorProperties, ConnectorState, @@ -44,7 +45,7 @@ impl FsSourceReader { columns: Vec, connector_node_addr: Option, parser_config: SpecificParserConfig, - ) -> anyhow::Result { + ) -> ConnectorResult { // Store the connector node address to properties for later use. let mut source_props: HashMap = HashMap::from_iter(properties.clone()); connector_node_addr @@ -62,7 +63,7 @@ impl FsSourceReader { fn get_target_columns( &self, column_ids: Vec, - ) -> anyhow::Result> { + ) -> ConnectorResult> { column_ids .iter() .map(|id| { @@ -75,6 +76,7 @@ impl FsSourceReader { .cloned() }) .try_collect() + .map_err(Into::into) } pub async fn to_stream( @@ -82,7 +84,7 @@ impl FsSourceReader { state: ConnectorState, column_ids: Vec, source_ctx: Arc, - ) -> anyhow::Result { + ) -> ConnectorResult { let config = self.config.clone(); let columns = self.get_target_columns(column_ids)?; diff --git a/src/connector/src/source/reader/reader.rs b/src/connector/src/source/reader/reader.rs index ba9bd4dded4d8..833c9661c3ca1 100644 --- a/src/connector/src/source/reader/reader.rs +++ b/src/connector/src/source/reader/reader.rs @@ -24,8 +24,10 @@ use itertools::Itertools; use risingwave_common::bail; use risingwave_common::catalog::ColumnId; use rw_futures_util::select_all; +use thiserror_ext::AsReport as _; use crate::dispatch_source_prop; +use crate::error::ConnectorResult; use crate::parser::{CommonParserConfig, ParserConfig, SpecificParserConfig}; use crate::source::filesystem::opendal_source::opendal_enumerator::OpendalEnumerator; use crate::source::filesystem::opendal_source::{ @@ -51,7 +53,7 @@ impl SourceReader { columns: Vec, connector_message_buffer_size: usize, parser_config: SpecificParserConfig, - ) -> anyhow::Result { + ) -> ConnectorResult { let config = ConnectorProperties::extract(properties, false)?; Ok(Self { @@ -65,7 +67,7 @@ impl SourceReader { fn get_target_columns( &self, column_ids: Vec, - ) -> anyhow::Result> { + ) -> ConnectorResult> { column_ids .iter() .map(|id| { @@ -78,9 +80,10 @@ impl SourceReader { .cloned() }) .try_collect() + .map_err(Into::into) } - pub fn get_source_list(&self) -> anyhow::Result> { + pub fn get_source_list(&self) -> ConnectorResult> { let config = self.config.clone(); match config { ConnectorProperties::Gcs(prop) => { @@ -107,7 +110,7 @@ impl SourceReader { state: ConnectorState, column_ids: Vec, source_ctx: Arc, - ) -> anyhow::Result { + ) -> ConnectorResult { let Some(splits) = state else { return Ok(pending().boxed()); }; @@ -165,7 +168,7 @@ impl SourceReader { } } -#[try_stream(boxed, ok = FsPageItem, error = anyhow::Error)] +#[try_stream(boxed, ok = FsPageItem, error = crate::error::ConnectorError)] async fn build_opendal_fs_list_stream(lister: OpendalEnumerator) { let matcher = lister.get_matcher(); let mut object_metadata_iter = lister.list().await?; @@ -185,7 +188,7 @@ async fn build_opendal_fs_list_stream(lister: OpendalEnumera } } Err(err) => { - tracing::error!("list object fail, err {}", err); + tracing::error!(error = %err.as_report(), "list object fail"); return Err(err); } } diff --git a/src/connector/src/source/test_source.rs b/src/connector/src/source/test_source.rs index e0b901ddbf253..6d224593d7a2e 100644 --- a/src/connector/src/source/test_source.rs +++ b/src/connector/src/source/test_source.rs @@ -15,13 +15,14 @@ use std::collections::HashMap; use std::sync::{Arc, OnceLock}; -use anyhow::anyhow; use async_trait::async_trait; use parking_lot::Mutex; +use risingwave_common::bail; use risingwave_common::types::JsonbVal; use serde_derive::{Deserialize, Serialize}; use with_options::WithOptions; +use crate::error::ConnectorResult; use crate::parser::ParserConfig; use crate::source::{ BoxChunkSourceStream, Column, SourceContextRef, SourceEnumeratorContextRef, SourceProperties, @@ -32,7 +33,7 @@ pub type BoxListSplits = Box< dyn FnMut( TestSourceProperties, SourceEnumeratorContextRef, - ) -> anyhow::Result> + ) -> ConnectorResult> + Send + 'static, >; @@ -59,7 +60,7 @@ impl BoxSource { list_splits: impl FnMut( TestSourceProperties, SourceEnumeratorContextRef, - ) -> anyhow::Result> + ) -> ConnectorResult> + Send + 'static, into_source_stream: impl FnMut( @@ -124,11 +125,11 @@ impl TryFromHashmap for TestSourceProperties { fn try_from_hashmap( props: HashMap, _deny_unknown_fields: bool, - ) -> anyhow::Result { + ) -> ConnectorResult { if cfg!(any(madsim, test)) { Ok(TestSourceProperties { properties: props }) } else { - Err(anyhow!("test source only available at test")) + bail!("test source only available at test") } } } @@ -149,11 +150,11 @@ impl SplitMetaData for TestSourceSplit { serde_json::to_value(self.clone()).unwrap().into() } - fn restore_from_json(value: JsonbVal) -> anyhow::Result { - serde_json::from_value(value.take()).map_err(|e| anyhow!(e)) + fn restore_from_json(value: JsonbVal) -> ConnectorResult { + serde_json::from_value(value.take()).map_err(Into::into) } - fn update_with_offset(&mut self, start_offset: String) -> anyhow::Result<()> { + fn update_with_offset(&mut self, start_offset: String) -> ConnectorResult<()> { self.offset = start_offset; Ok(()) } @@ -172,14 +173,14 @@ impl SplitEnumerator for TestSourceSplitEnumerator { async fn new( properties: Self::Properties, context: SourceEnumeratorContextRef, - ) -> anyhow::Result { + ) -> ConnectorResult { Ok(Self { properties, context, }) } - async fn list_splits(&mut self) -> anyhow::Result> { + async fn list_splits(&mut self) -> ConnectorResult> { (get_registry() .box_source .lock() @@ -208,7 +209,7 @@ impl SplitReader for TestSourceSplitReader { parser_config: ParserConfig, source_ctx: SourceContextRef, columns: Option>, - ) -> anyhow::Result { + ) -> ConnectorResult { Ok(Self { properties, state, diff --git a/src/frontend/src/error.rs b/src/frontend/src/error.rs index 5c1daa024afb1..9898e26a79ba0 100644 --- a/src/frontend/src/error.rs +++ b/src/frontend/src/error.rs @@ -17,6 +17,7 @@ use risingwave_common::array::ArrayError; use risingwave_common::error::{BoxedError, NoFunction, NotImplemented}; use risingwave_common::session_config::SessionConfigError; use risingwave_common::util::value_encoding::error::ValueEncodingError; +use risingwave_connector::error::ConnectorError; use risingwave_connector::sink::SinkError; use risingwave_expr::ExprError; use risingwave_pb::PbFieldNotFound; @@ -208,6 +209,12 @@ impl From for RwError { } } +impl From for RwError { + fn from(e: ConnectorError) -> Self { + ErrorCode::ConnectorError(e.into()).into() + } +} + impl From for RwError { fn from(err: PbFieldNotFound) -> Self { ErrorCode::InternalError(format!( diff --git a/src/frontend/src/scheduler/error.rs b/src/frontend/src/scheduler/error.rs index f68f72b8727e7..590c235e13901 100644 --- a/src/frontend/src/scheduler/error.rs +++ b/src/frontend/src/scheduler/error.rs @@ -14,6 +14,7 @@ use risingwave_batch::error::BatchError; use risingwave_common::session_config::QueryMode; +use risingwave_connector::error::ConnectorError; use risingwave_rpc_client::error::RpcError; use thiserror::Error; use tonic::{Code, Status}; @@ -63,6 +64,13 @@ pub enum SchedulerError { BatchError, ), + #[error(transparent)] + Connector( + #[from] + #[backtrace] + ConnectorError, + ), + #[error(transparent)] Internal( #[from] diff --git a/src/meta/service/src/cloud_service.rs b/src/meta/service/src/cloud_service.rs index 2ee28b1427edc..9c213bd7cb9e2 100644 --- a/src/meta/service/src/cloud_service.rs +++ b/src/meta/service/src/cloud_service.rs @@ -18,6 +18,7 @@ use std::sync::LazyLock; use async_trait::async_trait; use regex::Regex; use risingwave_connector::dispatch_source_prop; +use risingwave_connector::error::ConnectorResult; use risingwave_connector::source::kafka::private_link::insert_privatelink_broker_rewrite_map; use risingwave_connector::source::{ ConnectorProperties, SourceEnumeratorContext, SourceProperties, SplitEnumerator, @@ -135,7 +136,7 @@ impl CloudService for CloudServiceImpl { { return Ok(new_rwc_validate_fail_response( ErrorType::PrivatelinkResolveErr, - e.to_string(), + e.to_report_string(), )); } } else { @@ -151,13 +152,13 @@ impl CloudService for CloudServiceImpl { if let Err(e) = props { return Ok(new_rwc_validate_fail_response( ErrorType::KafkaInvalidProperties, - e.to_string(), + e.to_report_string(), )); }; async fn new_enumerator( props: P, - ) -> Result { + ) -> ConnectorResult { P::SplitEnumerator::new(props, SourceEnumeratorContext::default().into()).await } @@ -166,15 +167,15 @@ impl CloudService for CloudServiceImpl { if let Err(e) = enumerator { return Ok(new_rwc_validate_fail_response( ErrorType::KafkaInvalidProperties, - e.to_string(), + e.to_report_string(), )); } if let Err(e) = enumerator.unwrap().list_splits().await { - let error_message = e.to_string(); + let error_message = e.to_report_string(); if error_message.contains("BrokerTransportFailure") { return Ok(new_rwc_validate_fail_response( ErrorType::KafkaBrokerUnreachable, - e.to_string(), + e.to_report_string(), )); } static TOPIC_NOT_FOUND: LazyLock = @@ -182,12 +183,12 @@ impl CloudService for CloudServiceImpl { if TOPIC_NOT_FOUND.is_match(error_message.as_str()) { return Ok(new_rwc_validate_fail_response( ErrorType::KafkaTopicNotFound, - e.to_string(), + e.to_report_string(), )); } return Ok(new_rwc_validate_fail_response( ErrorType::KafkaOther, - e.to_string(), + e.to_report_string(), )); } }); diff --git a/src/meta/src/controller/catalog.rs b/src/meta/src/controller/catalog.rs index e26e1af0f0cff..a1efaa756bb44 100644 --- a/src/meta/src/controller/catalog.rs +++ b/src/meta/src/controller/catalog.rs @@ -845,7 +845,7 @@ impl CatalogController { let ret = src_manager.register_source(&pb_source).await; if let Err(e) = ret { txn.rollback().await?; - return Err(e.into()); + return Err(e); } } txn.commit().await?; diff --git a/src/meta/src/error.rs b/src/meta/src/error.rs index 9833a51cc1934..18230bf74c213 100644 --- a/src/meta/src/error.rs +++ b/src/meta/src/error.rs @@ -14,6 +14,7 @@ use aws_sdk_ec2::error::DisplayErrorContext; use risingwave_common::error::BoxedError; +use risingwave_connector::error::ConnectorError; use risingwave_connector::sink::SinkError; use risingwave_pb::PbFieldNotFound; use risingwave_rpc_client::error::{RpcError, ToTonicStatus}; @@ -88,6 +89,13 @@ pub enum MetaErrorInner { #[error("SystemParams error: {0}")] SystemParams(String), + #[error(transparent)] + Connector( + #[from] + #[backtrace] + ConnectorError, + ), + #[error("Sink error: {0}")] Sink( #[from] diff --git a/src/meta/src/rpc/ddl_controller.rs b/src/meta/src/rpc/ddl_controller.rs index 7cff9dc4a9b7a..908c62ebdffe6 100644 --- a/src/meta/src/rpc/ddl_controller.rs +++ b/src/meta/src/rpc/ddl_controller.rs @@ -31,6 +31,7 @@ use risingwave_common::util::stream_graph_visitor::{ }; use risingwave_common::{bail, current_cluster_version}; use risingwave_connector::dispatch_source_prop; +use risingwave_connector::error::ConnectorError; use risingwave_connector::source::cdc::CdcSourceType; use risingwave_connector::source::{ ConnectorProperties, SourceEnumeratorContext, SourceProperties, SplitEnumerator, @@ -436,7 +437,7 @@ impl DdlController { mgr.catalog_manager .cancel_create_source_procedure(&source) .await?; - return Err(e.into()); + return Err(e); } mgr.catalog_manager @@ -804,7 +805,7 @@ impl DdlController { pub(crate) async fn validate_cdc_table( table: &Table, table_fragments: &TableFragments, - ) -> anyhow::Result<()> { + ) -> MetaResult<()> { let stream_scan_fragment = table_fragments .fragments .values() @@ -820,7 +821,7 @@ impl DdlController { async fn new_enumerator_for_validate( source_props: P, - ) -> Result { + ) -> Result { P::SplitEnumerator::new(source_props, SourceEnumeratorContext::default().into()).await } diff --git a/src/meta/src/stream/source_manager.rs b/src/meta/src/stream/source_manager.rs index 8af470ce7df65..eb3d6b3205c4c 100644 --- a/src/meta/src/stream/source_manager.rs +++ b/src/meta/src/stream/source_manager.rs @@ -20,10 +20,11 @@ use std::ops::Deref; use std::sync::Arc; use std::time::Duration; -use anyhow::{anyhow, Context}; +use anyhow::Context; use risingwave_common::catalog::TableId; use risingwave_common::metrics::LabelGuardedIntGauge; use risingwave_connector::dispatch_source_prop; +use risingwave_connector::error::ConnectorResult; use risingwave_connector::source::{ ConnectorProperties, SourceEnumeratorContext, SourceEnumeratorInfo, SourceProperties, SplitEnumerator, SplitId, SplitImpl, SplitMetaData, @@ -81,12 +82,12 @@ struct ConnectorSourceWorker { source_is_up: LabelGuardedIntGauge<2>, } -fn extract_prop_from_existing_source(source: &Source) -> anyhow::Result { +fn extract_prop_from_existing_source(source: &Source) -> ConnectorResult { let mut properties = ConnectorProperties::extract(source.with_properties.clone(), false)?; properties.init_from_pb_source(source); Ok(properties) } -fn extract_prop_from_new_source(source: &Source) -> anyhow::Result { +fn extract_prop_from_new_source(source: &Source) -> ConnectorResult { let mut properties = ConnectorProperties::extract(source.with_properties.clone(), true)?; properties.init_from_pb_source(source); Ok(properties) @@ -96,7 +97,7 @@ const DEFAULT_SOURCE_WORKER_TICK_INTERVAL: Duration = Duration::from_secs(30); impl ConnectorSourceWorker

{ /// Recreate the `SplitEnumerator` to establish a new connection to the external source service. - async fn refresh(&mut self) -> anyhow::Result<()> { + async fn refresh(&mut self) -> MetaResult<()> { let enumerator = P::SplitEnumerator::new( self.connector_properties.clone(), Arc::new(SourceEnumeratorContext { @@ -124,7 +125,7 @@ impl ConnectorSourceWorker

{ period: Duration, splits: Arc>, metrics: Arc, - ) -> anyhow::Result { + ) -> MetaResult { let enumerator = P::SplitEnumerator::new( connector_properties.clone(), Arc::new(SourceEnumeratorContext { @@ -711,7 +712,7 @@ impl SourceManager { let handle = core .managed_sources .get(&source_id) - .ok_or_else(|| anyhow!("could not found source {}", source_id))?; + .with_context(|| format!("could not find source {}", source_id))?; if handle.splits.lock().await.splits.is_none() { // force refresh source @@ -758,7 +759,7 @@ impl SourceManager { } /// register connector worker for source. - pub async fn register_source(&self, source: &Source) -> anyhow::Result<()> { + pub async fn register_source(&self, source: &Source) -> MetaResult<()> { let mut core = self.core.lock().await; if core.managed_sources.contains_key(&source.get_id()) { tracing::warn!("source {} already registered", source.get_id()); @@ -823,7 +824,7 @@ impl SourceManager { break worker; } Err(e) => { - tracing::warn!("failed to create source worker: {}", e); + tracing::warn!(error = %e.as_report(), "failed to create source worker"); } } }; @@ -852,7 +853,7 @@ impl SourceManager { source: &Source, managed_sources: &mut HashMap, metrics: Arc, - ) -> anyhow::Result<()> { + ) -> MetaResult<()> { tracing::info!("spawning new watcher for source {}", source.id); let splits = Arc::new(Mutex::new(SharedSplitMap { splits: None })); @@ -879,11 +880,12 @@ impl SourceManager { // in kafka tokio::time::timeout(Self::DEFAULT_SOURCE_TICK_TIMEOUT, worker.tick()) .await - .map_err(|_e| { - anyhow!( - "failed to fetch meta info for source {}, error: timeout {}", + .ok() + .with_context(|| { + format!( + "failed to fetch meta info for source {}, timeout {:?}", source.id, - Self::DEFAULT_SOURCE_TICK_TIMEOUT.as_secs() + Self::DEFAULT_SOURCE_TICK_TIMEOUT ) })??; @@ -984,8 +986,8 @@ pub fn build_actor_split_impls( mod tests { use std::collections::{BTreeMap, HashMap, HashSet}; - use anyhow::anyhow; use risingwave_common::types::JsonbVal; + use risingwave_connector::error::ConnectorResult; use risingwave_connector::source::{SplitId, SplitMetaData}; use serde::{Deserialize, Serialize}; @@ -1006,11 +1008,11 @@ mod tests { serde_json::to_value(*self).unwrap().into() } - fn restore_from_json(value: JsonbVal) -> anyhow::Result { - serde_json::from_value(value.take()).map_err(|e| anyhow!(e)) + fn restore_from_json(value: JsonbVal) -> ConnectorResult { + serde_json::from_value(value.take()).map_err(Into::into) } - fn update_with_offset(&mut self, _start_offset: String) -> anyhow::Result<()> { + fn update_with_offset(&mut self, _start_offset: String) -> ConnectorResult<()> { Ok(()) } } diff --git a/src/stream/src/executor/source/fs_source_executor.rs b/src/stream/src/executor/source/fs_source_executor.rs index a2478cdb6bb0d..95894429e9361 100644 --- a/src/stream/src/executor/source/fs_source_executor.rs +++ b/src/stream/src/executor/source/fs_source_executor.rs @@ -25,6 +25,7 @@ use futures_async_stream::try_stream; use risingwave_common::catalog::Schema; use risingwave_common::system_param::local_manager::SystemParamsReaderRef; use risingwave_common::system_param::reader::SystemParamsRead; +use risingwave_connector::error::ConnectorError; use risingwave_connector::source::reader::desc::{FsSourceDesc, SourceDescBuilder}; use risingwave_connector::source::{ BoxChunkSourceStream, ConnectorState, SourceContext, SourceCtrlOpts, SplitId, SplitImpl, @@ -442,7 +443,7 @@ impl FsSourceExecutor { self.stream_source_core.latest_split_info.get_mut(id).map( |origin_split| { origin_split.update_in_place(offset.clone())?; - Ok::<_, anyhow::Error>((id.clone(), origin_split.clone())) + Ok::<_, ConnectorError>((id.clone(), origin_split.clone())) }, ) }) From 5cda6dd693ddc0a7645c78aa65078fc7c90bc748 Mon Sep 17 00:00:00 2001 From: Bugen Zhao Date: Fri, 23 Feb 2024 18:03:01 +0800 Subject: [PATCH 29/35] feat(lints): lint `format_error` on `anyhow::Error` (#15158) Signed-off-by: Bugen Zhao --- Cargo.lock | 1 + lints/src/format_error.rs | 75 +++++++++++++----- lints/src/lib.rs | 1 + lints/ui/format_error.rs | 18 +++++ lints/ui/format_error.stderr | 78 ++++++++++++++++++- src/cmd/src/lib.rs | 10 +-- src/cmd_all/Cargo.toml | 1 + src/cmd_all/build.rs | 3 +- src/common/src/config.rs | 6 +- src/connector/src/sink/nats.rs | 9 +-- src/connector/src/source/cdc/source/reader.rs | 8 +- .../src/source/kafka/enumerator/client.rs | 3 +- src/error/src/lib.rs | 3 + src/frontend/planner_test/src/lib.rs | 2 +- src/frontend/src/handler/create_source.rs | 3 +- src/meta/src/dashboard/mod.rs | 3 +- .../sink_coordination/coordinator_worker.rs | 2 +- src/risedevtool/src/bin/risedev-dev.rs | 5 +- src/risedevtool/src/preflight_check.rs | 5 +- src/tests/regress/src/lib.rs | 3 + .../recovery/background_ddl.rs | 3 +- 21 files changed, 191 insertions(+), 51 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b67e3700387de..1e711a211887b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8685,6 +8685,7 @@ dependencies = [ "strum_macros 0.26.1", "task_stats_alloc", "tempfile", + "thiserror-ext", "tikv-jemallocator", "tracing", "vergen", diff --git a/lints/src/format_error.rs b/lints/src/format_error.rs index 0d1df649460e8..8dcbed8cb520d 100644 --- a/lints/src/format_error.rs +++ b/lints/src/format_error.rs @@ -16,7 +16,7 @@ use clippy_utils::diagnostics::span_lint_and_help; use clippy_utils::macros::{ find_format_arg_expr, find_format_args, is_format_macro, macro_backtrace, }; -use clippy_utils::ty::implements_trait; +use clippy_utils::ty::{implements_trait, match_type}; use clippy_utils::{ is_in_cfg_test, is_in_test_function, is_trait_method, match_def_path, match_function_call, }; @@ -64,6 +64,7 @@ const TRACING_FIELD_DEBUG: [&str; 3] = ["tracing_core", "field", "debug"]; const TRACING_FIELD_DISPLAY: [&str; 3] = ["tracing_core", "field", "display"]; const TRACING_MACROS_EVENT: [&str; 3] = ["tracing", "macros", "event"]; const ANYHOW_MACROS_ANYHOW: [&str; 3] = ["anyhow", "macros", "anyhow"]; +const ANYHOW_ERROR: [&str; 2] = ["anyhow", "Error"]; impl<'tcx> LateLintPass<'tcx> for FormatError { fn check_expr(&mut self, cx: &LateContext<'tcx>, expr: &'tcx Expr<'_>) { @@ -143,7 +144,10 @@ fn check_fmt_arg_in_anyhow_error(cx: &LateContext<'_>, arg_expr: &Expr<'_>) { check_fmt_arg_with_help( cx, arg_expr, - "consider directly wrapping the error with `anyhow::anyhow!(..)` instead of formatting it", + ( + "consider directly wrapping the error with `anyhow::anyhow!(..)` instead of formatting it", + "consider removing the redundant wrapping of `anyhow::anyhow!(..)`", + ), ); } @@ -151,12 +155,16 @@ fn check_fmt_arg_in_anyhow_context(cx: &LateContext<'_>, arg_expr: &Expr<'_>) { check_fmt_arg_with_help( cx, arg_expr, - "consider using `anyhow::Error::context`, `anyhow::Context::(with_)context` to \ + ( + "consider using `anyhow::Context::(with_)context` to \ attach additional message to the error and make it an error source instead", + "consider using `.context(..)` to \ + attach additional message to the error and make it an error source instead", + ), ); } -fn check_fmt_arg_with_help(cx: &LateContext<'_>, arg_expr: &Expr<'_>, help: &str) { +fn check_fmt_arg_with_help(cx: &LateContext<'_>, arg_expr: &Expr<'_>, help: impl Help) { check_arg(cx, arg_expr, arg_expr.span, help); } @@ -169,27 +177,56 @@ fn check_to_string_call(cx: &LateContext<'_>, receiver: &Expr<'_>, to_string_spa ); } -fn check_arg(cx: &LateContext<'_>, arg_expr: &Expr<'_>, span: Span, help: &str) { +fn check_arg(cx: &LateContext<'_>, arg_expr: &Expr<'_>, span: Span, help: impl Help) { let Some(error_trait_id) = cx.tcx.get_diagnostic_item(sym::Error) else { return; }; let ty = cx.typeck_results().expr_ty(arg_expr).peel_refs(); - if implements_trait(cx, ty, error_trait_id, &[]) { - if let Some(span) = core::iter::successors(Some(span), |s| s.parent_callsite()) - .find(|s| s.can_be_used_for_suggestions()) - { - // TODO: applicable suggestions - span_lint_and_help( - cx, - FORMAT_ERROR, - span, - "should not format error directly", - None, - help, - ); - } + let help = if implements_trait(cx, ty, error_trait_id, &[]) { + help.normal_help() + } else if match_type(cx, ty, &ANYHOW_ERROR) { + help.anyhow_help() + } else { + return; + }; + + if let Some(span) = core::iter::successors(Some(span), |s| s.parent_callsite()) + .find(|s| s.can_be_used_for_suggestions()) + { + // TODO: applicable suggestions + span_lint_and_help( + cx, + FORMAT_ERROR, + span, + "should not format error directly", + None, + help, + ); + } +} + +trait Help { + fn normal_help(&self) -> &str; + fn anyhow_help(&self) -> &str { + self.normal_help() + } +} + +impl Help for &str { + fn normal_help(&self) -> &str { + self + } +} + +impl Help for (&str, &str) { + fn normal_help(&self) -> &str { + self.0 + } + + fn anyhow_help(&self) -> &str { + self.1 } } diff --git a/lints/src/lib.rs b/lints/src/lib.rs index d2c78515272f4..df77538d3cf17 100644 --- a/lints/src/lib.rs +++ b/lints/src/lib.rs @@ -14,6 +14,7 @@ #![feature(rustc_private)] #![feature(let_chains)] +#![feature(lazy_cell)] #![warn(unused_extern_crates)] extern crate rustc_ast; diff --git a/lints/ui/format_error.rs b/lints/ui/format_error.rs index eeead1306ea3f..0e46c72766157 100644 --- a/lints/ui/format_error.rs +++ b/lints/ui/format_error.rs @@ -55,4 +55,22 @@ fn main() { let _ = anyhow!("{:?}", err); let _ = anyhow!("some error occurred: {}", err); let _ = anyhow!("some error occurred: {:?}", err); + + // `anyhow::Error` does not implement `Error` trait, test the special path here. + let make_anyhow_err = || anyhow!("foobar"); + let anyhow_err = make_anyhow_err(); + + let _ = format!("{}", anyhow_err); + let _ = format!("{}", &anyhow_err); + let _ = format!("{}", &&anyhow_err); + let _ = format!("{}", Box::new(&anyhow_err)); // TODO: fail to lint + + tracing::field::display(&anyhow_err); + tracing::field::debug(make_anyhow_err()); + + let _ = anyhow_err.to_string(); + let _ = (&&anyhow_err).to_string(); + + let _ = anyhow!("{}", anyhow_err); + let _ = anyhow!("some error occurred: {:?}", anyhow_err); } diff --git a/lints/ui/format_error.stderr b/lints/ui/format_error.stderr index 8ec6e69b7fcf4..0eb4786380a79 100644 --- a/lints/ui/format_error.stderr +++ b/lints/ui/format_error.stderr @@ -262,7 +262,7 @@ error: should not format error directly LL | let _ = anyhow!("some error occurred: {}", err); | ^^^ | - = help: consider using `anyhow::Error::context`, `anyhow::Context::(with_)context` to attach additional message to the error and make it an error source instead + = help: consider using `anyhow::Context::(with_)context` to attach additional message to the error and make it an error source instead error: should not format error directly --> $DIR/format_error.rs:57:50 @@ -270,7 +270,79 @@ error: should not format error directly LL | let _ = anyhow!("some error occurred: {:?}", err); | ^^^ | - = help: consider using `anyhow::Error::context`, `anyhow::Context::(with_)context` to attach additional message to the error and make it an error source instead + = help: consider using `anyhow::Context::(with_)context` to attach additional message to the error and make it an error source instead -error: aborting due to 34 previous errors +error: should not format error directly + --> $DIR/format_error.rs:63:27 + | +LL | let _ = format!("{}", anyhow_err); + | ^^^^^^^^^^ + | + = help: consider importing `thiserror_ext::AsReport` and using `.as_report()` instead + +error: should not format error directly + --> $DIR/format_error.rs:64:27 + | +LL | let _ = format!("{}", &anyhow_err); + | ^^^^^^^^^^^ + | + = help: consider importing `thiserror_ext::AsReport` and using `.as_report()` instead + +error: should not format error directly + --> $DIR/format_error.rs:65:27 + | +LL | let _ = format!("{}", &&anyhow_err); + | ^^^^^^^^^^^^ + | + = help: consider importing `thiserror_ext::AsReport` and using `.as_report()` instead + +error: should not format error directly + --> $DIR/format_error.rs:68:29 + | +LL | tracing::field::display(&anyhow_err); + | ^^^^^^^^^^^ + | + = help: consider importing `thiserror_ext::AsReport` and recording the error as a field with `error = %.as_report()` instead + +error: should not format error directly + --> $DIR/format_error.rs:69:27 + | +LL | tracing::field::debug(make_anyhow_err()); + | ^^^^^^^^^^^^^^^^^ + | + = help: consider importing `thiserror_ext::AsReport` and recording the error as a field with `error = %.as_report()` instead + +error: should not format error directly + --> $DIR/format_error.rs:71:24 + | +LL | let _ = anyhow_err.to_string(); + | ^^^^^^^^^^^ + | + = help: consider importing `thiserror_ext::AsReport` and using `.to_report_string()` instead + +error: should not format error directly + --> $DIR/format_error.rs:72:28 + | +LL | let _ = (&&anyhow_err).to_string(); + | ^^^^^^^^^^^ + | + = help: consider importing `thiserror_ext::AsReport` and using `.to_report_string()` instead + +error: should not format error directly + --> $DIR/format_error.rs:74:27 + | +LL | let _ = anyhow!("{}", anyhow_err); + | ^^^^^^^^^^ + | + = help: consider removing the redundant wrapping of `anyhow::anyhow!(..)` + +error: should not format error directly + --> $DIR/format_error.rs:75:50 + | +LL | let _ = anyhow!("some error occurred: {:?}", anyhow_err); + | ^^^^^^^^^^ + | + = help: consider using `.context(..)` to attach additional message to the error and make it an error source instead + +error: aborting due to 43 previous errors diff --git a/src/cmd/src/lib.rs b/src/cmd/src/lib.rs index ce110c9effc17..13d5fcad5ec8c 100644 --- a/src/cmd/src/lib.rs +++ b/src/cmd/src/lib.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use risingwave_common::error::v2::AsReport as _; use risingwave_compactor::CompactorOpts; use risingwave_compute::ComputeNodeOpts; use risingwave_ctl::CliOpts as CtlOpts; @@ -67,13 +68,12 @@ pub fn ctl(opts: CtlOpts) { // Note: Use a simple current thread runtime for ctl. // When there's a heavy workload, multiple thread runtime seems to respond slowly. May need // further investigation. - tokio::runtime::Builder::new_current_thread() + if let Err(e) = tokio::runtime::Builder::new_current_thread() .enable_all() .build() .unwrap() .block_on(risingwave_ctl::start(opts)) - .inspect_err(|e| { - eprintln!("{:#?}", e); - }) - .unwrap(); + { + eprintln!("Error: {:#?}", e.as_report()); + } } diff --git a/src/cmd_all/Cargo.toml b/src/cmd_all/Cargo.toml index bb57fbfe88a09..c5f193ef8a2a3 100644 --- a/src/cmd_all/Cargo.toml +++ b/src/cmd_all/Cargo.toml @@ -58,6 +58,7 @@ workspace-hack = { path = "../workspace-hack" } expect-test = "1" [build-dependencies] +thiserror-ext = { workspace = true } vergen = { version = "8", default-features = false, features = [ "build", "git", diff --git a/src/cmd_all/build.rs b/src/cmd_all/build.rs index a4a7c27e65685..38d9f2d7107a6 100644 --- a/src/cmd_all/build.rs +++ b/src/cmd_all/build.rs @@ -12,11 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. +use thiserror_ext::AsReport; use vergen::EmitBuilder; fn main() { if let Err(e) = EmitBuilder::builder().git_sha(true).fail_on_error().emit() { // Leave the environment variable unset if error occurs. - println!("cargo:warning={}", e) + println!("cargo:warning={}", e.as_report()) } } diff --git a/src/common/src/config.rs b/src/common/src/config.rs index 971fb28d208c2..b1415a00b1362 100644 --- a/src/common/src/config.rs +++ b/src/common/src/config.rs @@ -391,9 +391,9 @@ impl<'de> Deserialize<'de> for DefaultParallelism { VirtualNode::COUNT )))? } else { - NonZeroUsize::new(i) - .context("default parallelism should be greater than 0") - .map_err(|e| serde::de::Error::custom(e.to_string()))? + NonZeroUsize::new(i).ok_or_else(|| { + serde::de::Error::custom("default parallelism should be greater than 0") + })? })), } } diff --git a/src/connector/src/sink/nats.rs b/src/connector/src/sink/nats.rs index 2bc4160e7a263..fc6afc379eb76 100644 --- a/src/connector/src/sink/nats.rs +++ b/src/connector/src/sink/nats.rs @@ -107,12 +107,9 @@ impl Sink for NatsSink { "Nats sink only support append-only mode" ))); } - let _client = self - .config - .common - .build_client() - .await - .context("validate nats sink error")?; + let _client = (self.config.common.build_client().await) + .context("validate nats sink error") + .map_err(SinkError::Nats)?; Ok(()) } diff --git a/src/connector/src/source/cdc/source/reader.rs b/src/connector/src/source/cdc/source/reader.rs index 43753dad599c7..902f113526a2e 100644 --- a/src/connector/src/source/cdc/source/reader.rs +++ b/src/connector/src/source/cdc/source/reader.rs @@ -14,7 +14,7 @@ use std::str::FromStr; -use anyhow::anyhow; +use anyhow::{anyhow, Context}; use async_trait::async_trait; use futures_async_stream::try_stream; use itertools::Itertools; @@ -79,8 +79,8 @@ impl SplitReader for CdcSplitReader { if matches!(T::source_type(), CdcSourceType::Citus) && let Some(server_addr) = split.server_addr() { - let host_addr = HostAddr::from_str(&server_addr) - .map_err(|err| anyhow!("invalid server address for cdc split. {}", err))?; + let host_addr = + HostAddr::from_str(&server_addr).context("invalid server address for cdc split")?; properties.insert("hostname".to_string(), host_addr.host); properties.insert("port".to_string(), host_addr.port.to_string()); // rewrite table name with suffix to capture all shards in the split @@ -218,7 +218,7 @@ impl CommonSplitReader for CdcSplitReader { GLOBAL_ERROR_METRICS.cdc_source_error.report([ source_type.as_str_name().into(), source_id.clone(), - e.to_string(), + e.to_report_string(), ]); Err(e)?; } diff --git a/src/connector/src/source/kafka/enumerator/client.rs b/src/connector/src/source/kafka/enumerator/client.rs index 16314d21dbc1e..4441be8c9db21 100644 --- a/src/connector/src/source/kafka/enumerator/client.rs +++ b/src/connector/src/source/kafka/enumerator/client.rs @@ -15,7 +15,7 @@ use std::collections::HashMap; use std::time::Duration; -use anyhow::{anyhow, Context as _}; +use anyhow::{anyhow, Context}; use async_trait::async_trait; use rdkafka::consumer::{BaseConsumer, Consumer}; use rdkafka::error::KafkaResult; @@ -112,6 +112,7 @@ impl SplitEnumerator for KafkaSplitEnumerator { self.broker_address ) })?; + let watermarks = self.get_watermarks(topic_partitions.as_ref()).await?; let mut start_offsets = self .fetch_start_offset(topic_partitions.as_ref(), &watermarks) diff --git a/src/error/src/lib.rs b/src/error/src/lib.rs index d3364485e8f2f..f7a2611b84a65 100644 --- a/src/error/src/lib.rs +++ b/src/error/src/lib.rs @@ -23,3 +23,6 @@ pub mod anyhow; pub mod tonic; + +// Re-export the `thiserror-ext` crate. +pub use thiserror_ext::*; diff --git a/src/frontend/planner_test/src/lib.rs b/src/frontend/planner_test/src/lib.rs index f7ce8cc04a950..1578fa5cd0dfe 100644 --- a/src/frontend/planner_test/src/lib.rs +++ b/src/frontend/planner_test/src/lib.rs @@ -944,7 +944,7 @@ pub async fn run_test_file(file_path: &Path, file_content: &str) -> Result<()> { "Test #{i} (id: {}) failed, SQL:\n{}\nError: {}", c.id().clone().unwrap_or_else(|| "".to_string()), c.sql(), - e + e.as_report() ); failed_num += 1; } diff --git a/src/frontend/src/handler/create_source.rs b/src/frontend/src/handler/create_source.rs index bbb2d93b21790..c8cfd938c23a2 100644 --- a/src/frontend/src/handler/create_source.rs +++ b/src/frontend/src/handler/create_source.rs @@ -62,6 +62,7 @@ use risingwave_sqlparser::ast::{ ProtobufSchema, SourceWatermark, }; use risingwave_sqlparser::parser::IncludeOption; +use thiserror_ext::AsReport; use super::RwPgResponse; use crate::binder::Binder; @@ -1081,7 +1082,7 @@ pub(super) async fn check_source_schema( } else if connector == ICEBERG_CONNECTOR { Ok(check_iceberg_source(props, columns) .await - .map_err(|err| ProtocolError(err.to_string()))?) + .map_err(|err| ProtocolError(err.to_report_string()))?) } else { Ok(()) } diff --git a/src/meta/src/dashboard/mod.rs b/src/meta/src/dashboard/mod.rs index 814e14d42af9e..b5104e557a1b2 100644 --- a/src/meta/src/dashboard/mod.rs +++ b/src/meta/src/dashboard/mod.rs @@ -30,6 +30,7 @@ use axum::Router; use hyper::Request; use parking_lot::Mutex; use risingwave_rpc_client::ComputeClientPool; +use thiserror_ext::AsReport; use tower::{ServiceBuilder, ServiceExt}; use tower_http::add_extension::AddExtensionLayer; use tower_http::cors::{self, CorsLayer}; @@ -455,7 +456,7 @@ impl DashboardService { proxy::proxy(req, cache).await.or_else(|err| { Ok(( StatusCode::INTERNAL_SERVER_ERROR, - format!("Unhandled internal error: {}", err), + err.context("Unhandled internal error").to_report_string(), ) .into_response()) }) diff --git a/src/meta/src/manager/sink_coordination/coordinator_worker.rs b/src/meta/src/manager/sink_coordination/coordinator_worker.rs index bebef2d307dcc..e1c096aa3cf98 100644 --- a/src/meta/src/manager/sink_coordination/coordinator_worker.rs +++ b/src/meta/src/manager/sink_coordination/coordinator_worker.rs @@ -110,7 +110,7 @@ impl CoordinatorWorker { .wait_for_writers(first_writer_request.vnode_bitmap) .await { - error!("failed to wait for all writers: {:?}", e); + error!(error = %e.as_report(), "failed to wait for all writers"); worker .send_to_all_sink_writers(|| { Err(Status::cancelled("failed to wait for all writers")) diff --git a/src/risedevtool/src/bin/risedev-dev.rs b/src/risedevtool/src/bin/risedev-dev.rs index ea9099eae319c..9723ee89fbd51 100644 --- a/src/risedevtool/src/bin/risedev-dev.rs +++ b/src/risedevtool/src/bin/risedev-dev.rs @@ -31,6 +31,7 @@ use risedev::{ RISEDEV_SESSION_NAME, }; use tempfile::tempdir; +use thiserror_ext::AsReport; use yaml_rust::YamlEmitter; #[derive(Default)] @@ -444,9 +445,9 @@ fn main() -> Result<()> { } Err(err) => { println!( - "{} - Failed to start: {:?}", // with `Caused by` + "{} - Failed to start: {:#}", // pretty with `Caused by` style("ERROR").red().bold(), - err, + err.as_report(), ); println!(); println!( diff --git a/src/risedevtool/src/preflight_check.rs b/src/risedevtool/src/preflight_check.rs index 17dc48884fea8..9b25d39423566 100644 --- a/src/risedevtool/src/preflight_check.rs +++ b/src/risedevtool/src/preflight_check.rs @@ -17,6 +17,7 @@ use std::process::Command; use anyhow::Result; use console::style; +use thiserror_ext::AsReport; fn preflight_check_proxy() -> Result<()> { if env::var("http_proxy").is_ok() @@ -72,7 +73,7 @@ pub fn preflight_check() -> Result<()> { "[{}] {} - failed to run proxy preflight check: {}", style("risedev-preflight-check").bold(), style("WARN").yellow().bold(), - e + e.as_report() ); } @@ -81,7 +82,7 @@ pub fn preflight_check() -> Result<()> { "[{}] {} - failed to run ulimit preflight check: {}", style("risedev-preflight-check").bold(), style("WARN").yellow().bold(), - e + e.as_report() ); } diff --git a/src/tests/regress/src/lib.rs b/src/tests/regress/src/lib.rs index efdd0f1422c00..a18afdebc843f 100644 --- a/src/tests/regress/src/lib.rs +++ b/src/tests/regress/src/lib.rs @@ -27,6 +27,9 @@ #![deny(rustdoc::broken_intra_doc_links)] #![feature(path_file_prefix)] #![feature(let_chains)] +#![feature(register_tool)] +#![register_tool(rw)] +#![allow(rw::format_error)] mod opts; diff --git a/src/tests/simulation/tests/integration_tests/recovery/background_ddl.rs b/src/tests/simulation/tests/integration_tests/recovery/background_ddl.rs index 97c08d098f6c9..27cf4985dc4fd 100644 --- a/src/tests/simulation/tests/integration_tests/recovery/background_ddl.rs +++ b/src/tests/simulation/tests/integration_tests/recovery/background_ddl.rs @@ -15,6 +15,7 @@ use std::time::Duration; use anyhow::{anyhow, Result}; +use risingwave_common::error::v2::AsReport; use risingwave_simulation::cluster::{Cluster, Configuration, Session}; use tokio::time::sleep; @@ -348,7 +349,7 @@ async fn test_high_barrier_latency_cancel(config: Configuration) -> Result<()> { .run("CREATE MATERIALIZED VIEW mv1 as values(1)") .await { - tracing::info!("Recreate mv failed with {e:?}"); + tracing::info!(error = %e.as_report(), "Recreate mv failed"); continue; } else { tracing::info!("recreated mv"); From 6ab2342222df52b037ab6bcd56c0be83719f15fc Mon Sep 17 00:00:00 2001 From: xfz <73645462+xuefengze@users.noreply.github.com> Date: Fri, 23 Feb 2024 18:22:22 +0800 Subject: [PATCH 30/35] test: fix starrocks_sink integration test (#15229) --- integration_tests/starrocks-sink/docker-compose.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/integration_tests/starrocks-sink/docker-compose.yml b/integration_tests/starrocks-sink/docker-compose.yml index 4210206aa7705..81ef7c277dad0 100644 --- a/integration_tests/starrocks-sink/docker-compose.yml +++ b/integration_tests/starrocks-sink/docker-compose.yml @@ -2,7 +2,7 @@ version: "3" services: starrocks-fe: - image: starrocks/fe-ubuntu:latest + image: starrocks/fe-ubuntu:3.1.7 hostname: starrocks-fe container_name: starrocks-fe volumes: @@ -19,7 +19,7 @@ services: timeout: 5s retries: 30 starrocks-be: - image: starrocks/be-ubuntu:latest + image: starrocks/be-ubuntu:3.1.7 command: - /bin/bash - -c @@ -27,6 +27,7 @@ services: sleep 15s; mysql --connect-timeout 2 -h starrocks-fe -P9030 -uroot -e "alter system add backend \"starrocks-be:9050\";" /opt/starrocks/be/bin/start_be.sh ports: + - 9050:9050 - 8040:8040 hostname: starrocks-be container_name: starrocks-be From b0a90004f16c490dba8734f145e5a66996419935 Mon Sep 17 00:00:00 2001 From: Zihao Xu Date: Fri, 23 Feb 2024 10:47:39 -0500 Subject: [PATCH 31/35] feat(binder): add const case-when evaluation optimization during binding (#14965) --- src/frontend/src/binder/expr/mod.rs | 83 ++++++++++++++++++++++++++++- 1 file changed, 82 insertions(+), 1 deletion(-) diff --git a/src/frontend/src/binder/expr/mod.rs b/src/frontend/src/binder/expr/mod.rs index 0bc12545984ca..b1bd37b168010 100644 --- a/src/frontend/src/binder/expr/mod.rs +++ b/src/frontend/src/binder/expr/mod.rs @@ -471,6 +471,60 @@ impl Binder { Ok(func_call.into()) } + /// The optimization check for the following case-when expression pattern + /// e.g., select case 1 when (...) then (...) else (...) end; + fn check_constant_case_when_optimization( + &mut self, + conditions: Vec, + results_expr: Vec, + operand: Option>, + fallback: Option, + constant_case_when_eval_inputs: &mut Vec, + ) -> bool { + // The operand value to be compared later + let operand_value; + + if let Some(operand) = operand { + let Ok(operand) = self.bind_expr_inner(*operand) else { + return false; + }; + if !operand.is_const() { + return false; + } + operand_value = operand; + } else { + return false; + } + + for (condition, result) in zip_eq_fast(conditions, results_expr) { + if let Expr::Value(_) = condition.clone() { + let Ok(res) = self.bind_expr_inner(condition.clone()) else { + return false; + }; + // Found a match + if res == operand_value { + constant_case_when_eval_inputs.push(result); + return true; + } + } else { + return false; + } + } + + // Otherwise this will eventually go through fallback arm + debug_assert!( + constant_case_when_eval_inputs.is_empty(), + "expect `inputs` to be empty" + ); + + let Some(fallback) = fallback else { + return false; + }; + + constant_case_when_eval_inputs.push(fallback); + true + } + /// The helper function to check if the current case-when /// expression in `bind_case` could be optimized /// into `ConstantLookupExpression` @@ -493,6 +547,12 @@ impl Binder { let Ok(operand) = self.bind_expr_inner(*operand) else { return false; }; + // This optimization should be done in subsequent optimization phase + // if the operand is const + // e.g., select case 1 when 1 then 114514 else 1919810 end; + if operand.is_const() { + return false; + } constant_lookup_inputs.push(operand); } else { return false; @@ -506,7 +566,7 @@ impl Binder { constant_lookup_inputs.push(input); } else { // If at least one condition is not in the simple form / not constant, - // we can NOT do the subsequent optimization then + // we can NOT do the subsequent optimization pass return false; } @@ -538,6 +598,27 @@ impl Binder { .transpose()?; let mut constant_lookup_inputs = Vec::new(); + let mut constant_case_when_eval_inputs = Vec::new(); + + let constant_case_when_flag = self.check_constant_case_when_optimization( + conditions.clone(), + results_expr.clone(), + operand.clone(), + else_result_expr.clone(), + &mut constant_case_when_eval_inputs, + ); + + if constant_case_when_flag { + // Sanity check + if constant_case_when_eval_inputs.len() != 1 { + return Err(ErrorCode::BindError( + "expect `constant_case_when_eval_inputs` only contains a single bound expression".to_string() + ) + .into()); + } + // Directly return the first element of the vector + return Ok(constant_case_when_eval_inputs[0].take()); + } // See if the case-when expression can be optimized let optimize_flag = self.check_bind_case_optimization( From e19aaecb1843dc9c3b309ecd8b6e863435a4fa98 Mon Sep 17 00:00:00 2001 From: Wallace Date: Sat, 24 Feb 2024 01:17:50 +0800 Subject: [PATCH 32/35] fix(storage): fix leaving put key after delete keys are dropped cause inconsistent (#15232) Signed-off-by: Little-Wallace --- src/storage/src/hummock/compactor/compactor_runner.rs | 5 ++++- src/storage/src/hummock/compactor/fast_compactor_runner.rs | 4 +++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/storage/src/hummock/compactor/compactor_runner.rs b/src/storage/src/hummock/compactor/compactor_runner.rs index 710a6fb3c4a65..441480d66a6b7 100644 --- a/src/storage/src/hummock/compactor/compactor_runner.rs +++ b/src/storage/src/hummock/compactor/compactor_runner.rs @@ -766,6 +766,7 @@ where let mut is_new_user_key = full_key_tracker.observe(iter.key()).is_some(); let mut drop = false; + // CRITICAL WARN: Because of memtable spill, there may be several versions of the same user-key share the same `pure_epoch`. Do not change this code unless necessary. let epoch = iter_key.epoch_with_gap.pure_epoch(); let value = iter.value(); if is_new_user_key { @@ -818,7 +819,9 @@ where // in our design, frontend avoid to access keys which had be deleted, so we dont // need to consider the epoch when the compaction_filter match (it // means that mv had drop) - if (epoch <= task_config.watermark && task_config.gc_delete_keys && value.is_delete()) + // Because of memtable spill, there may be a PUT key share the same `pure_epoch` with DELETE key. + // Do not assume that "the epoch of keys behind must be smaller than the current key." + if (epoch < task_config.watermark && task_config.gc_delete_keys && value.is_delete()) || (epoch < task_config.watermark && (watermark_can_see_last_key || earliest_range_delete_which_can_see_iter_key <= task_config.watermark)) diff --git a/src/storage/src/hummock/compactor/fast_compactor_runner.rs b/src/storage/src/hummock/compactor/fast_compactor_runner.rs index 935bfa27c4666..ab4ddc216b1a8 100644 --- a/src/storage/src/hummock/compactor/fast_compactor_runner.rs +++ b/src/storage/src/hummock/compactor/fast_compactor_runner.rs @@ -600,7 +600,9 @@ impl CompactTaskExecutor { self.watermark_can_see_last_key = false; self.last_key_is_delete = false; } - if epoch <= self.task_config.watermark + + // See note in `compactor_runner.rs`. + if epoch < self.task_config.watermark && self.task_config.gc_delete_keys && value.is_delete() { From fcdeb3f75a1ef443b3dac1b1dfcde47379ec506a Mon Sep 17 00:00:00 2001 From: Bohan Zhang Date: Mon, 26 Feb 2024 12:11:44 +0800 Subject: [PATCH 33/35] fix: handle upsert json in prev versions (#15226) Signed-off-by: tabVersion --- backwards-compat-tests/scripts/utils.sh | 28 +++++++++++------ .../slt/kafka/upsert/deprecate_upsert.slt | 16 ++++++++++ .../slt/kafka/upsert/include_key_as.slt | 18 +++++++++++ .../slt/kafka/validate_restart.slt | 13 ++++++++ proto/plan_common.proto | 17 +++++++--- src/common/src/catalog/column.rs | 2 ++ src/common/src/catalog/test_utils.rs | 1 + src/connector/src/parser/avro/util.rs | 1 + src/connector/src/parser/protobuf/parser.rs | 1 + .../src/from_proto/source/trad_source.rs | 31 +++++++++++++------ 10 files changed, 105 insertions(+), 23 deletions(-) create mode 100644 backwards-compat-tests/slt/kafka/upsert/deprecate_upsert.slt create mode 100644 backwards-compat-tests/slt/kafka/upsert/include_key_as.slt diff --git a/backwards-compat-tests/scripts/utils.sh b/backwards-compat-tests/scripts/utils.sh index 1afbf08dd4441..5990aac026077 100644 --- a/backwards-compat-tests/scripts/utils.sh +++ b/backwards-compat-tests/scripts/utils.sh @@ -103,19 +103,21 @@ insert_json_kafka() { local JSON=$1 echo "$JSON" | "$KAFKA_PATH"/bin/kafka-console-producer.sh \ --topic backwards_compat_test_kafka_source \ - --bootstrap-server localhost:29092 + --bootstrap-server localhost:29092 \ + --property "parse.key=true" \ + --property "key.separator=," } seed_json_kafka() { - insert_json_kafka '{"timestamp": "2023-07-28 07:11:00", "user_id": 1, "page_id": 1, "action": "gtrgretrg"}' - insert_json_kafka '{"timestamp": "2023-07-28 07:11:00", "user_id": 2, "page_id": 1, "action": "fsdfgerrg"}' - insert_json_kafka '{"timestamp": "2023-07-28 07:11:00", "user_id": 3, "page_id": 1, "action": "sdfergtth"}' - insert_json_kafka '{"timestamp": "2023-07-28 06:54:00", "user_id": 4, "page_id": 2, "action": "erwerhghj"}' - insert_json_kafka '{"timestamp": "2023-07-28 06:54:00", "user_id": 5, "page_id": 2, "action": "kiku7ikkk"}' - insert_json_kafka '{"timestamp": "2023-07-28 06:54:00", "user_id": 6, "page_id": 3, "action": "6786745ge"}' - insert_json_kafka '{"timestamp": "2023-07-28 06:54:00", "user_id": 7, "page_id": 3, "action": "fgbgfnyyy"}' - insert_json_kafka '{"timestamp": "2023-07-28 06:54:00", "user_id": 8, "page_id": 4, "action": "werwerwwe"}' - insert_json_kafka '{"timestamp": "2023-07-28 06:54:00", "user_id": 9, "page_id": 4, "action": "yjtyjtyyy"}' + insert_json_kafka '{"user_id": 1},{"timestamp": "2023-07-28 07:11:00", "user_id": 1, "page_id": 1, "action": "gtrgretrg"}' + insert_json_kafka '{"user_id": 2},{"timestamp": "2023-07-28 07:11:00", "user_id": 2, "page_id": 1, "action": "fsdfgerrg"}' + insert_json_kafka '{"user_id": 3},{"timestamp": "2023-07-28 07:11:00", "user_id": 3, "page_id": 1, "action": "sdfergtth"}' + insert_json_kafka '{"user_id": 4},{"timestamp": "2023-07-28 06:54:00", "user_id": 4, "page_id": 2, "action": "erwerhghj"}' + insert_json_kafka '{"user_id": 5},{"timestamp": "2023-07-28 06:54:00", "user_id": 5, "page_id": 2, "action": "kiku7ikkk"}' + insert_json_kafka '{"user_id": 6},{"timestamp": "2023-07-28 06:54:00", "user_id": 6, "page_id": 3, "action": "6786745ge"}' + insert_json_kafka '{"user_id": 7},{"timestamp": "2023-07-28 06:54:00", "user_id": 7, "page_id": 3, "action": "fgbgfnyyy"}' + insert_json_kafka '{"user_id": 8},{"timestamp": "2023-07-28 06:54:00", "user_id": 8, "page_id": 4, "action": "werwerwwe"}' + insert_json_kafka '{"user_id": 9},{"timestamp": "2023-07-28 06:54:00", "user_id": 9, "page_id": 4, "action": "yjtyjtyyy"}' } # https://stackoverflow.com/a/4024263 @@ -225,6 +227,12 @@ seed_old_cluster() { create_kafka_topic seed_json_kafka sqllogictest -d dev -h localhost -p 4566 "$TEST_DIR/kafka/seed.slt" + # use the old syntax for version at most 1.5.4 + if version_le "$OLD_VERSION" "1.5.4" ; then + sqllogictest -d dev -h localhost -p 4566 "$TEST_DIR/kafka/upsert/deprecate_upsert.slt" + else + sqllogictest -d dev -h localhost -p 4566 "$TEST_DIR/kafka/upsert/include_key_as.slt" + fi echo "--- KAFKA TEST: wait 5s for kafka to process data" sleep 5 diff --git a/backwards-compat-tests/slt/kafka/upsert/deprecate_upsert.slt b/backwards-compat-tests/slt/kafka/upsert/deprecate_upsert.slt new file mode 100644 index 0000000000000..55cfce886455d --- /dev/null +++ b/backwards-compat-tests/slt/kafka/upsert/deprecate_upsert.slt @@ -0,0 +1,16 @@ +statement ok +CREATE TABLE IF NOT EXISTS kafka_table +( + action varchar, + user_id integer, + obj_id integer, + name varchar, + page_id integer, + age integer +) +WITH ( + connector='kafka', + topic='backwards_compat_test_kafka_source', + properties.bootstrap.server='localhost:29092', + scan.startup.mode='earliest', +) FORMAT UPSERT ENCODE JSON; \ No newline at end of file diff --git a/backwards-compat-tests/slt/kafka/upsert/include_key_as.slt b/backwards-compat-tests/slt/kafka/upsert/include_key_as.slt new file mode 100644 index 0000000000000..36ef426574223 --- /dev/null +++ b/backwards-compat-tests/slt/kafka/upsert/include_key_as.slt @@ -0,0 +1,18 @@ +statement ok +CREATE TABLE IF NOT EXISTS kafka_table +( + action varchar, + user_id integer, + obj_id integer, + name varchar, + page_id integer, + age integer, + primary key (_rw_key) +) +INCLUDE key as _rw_key +WITH ( + connector='kafka', + topic='backwards_compat_test_kafka_source', + properties.bootstrap.server='localhost:29092', + scan.startup.mode='earliest', +) FORMAT UPSERT ENCODE JSON; \ No newline at end of file diff --git a/backwards-compat-tests/slt/kafka/validate_restart.slt b/backwards-compat-tests/slt/kafka/validate_restart.slt index 7058b118f4d20..6d853007b9829 100644 --- a/backwards-compat-tests/slt/kafka/validate_restart.slt +++ b/backwards-compat-tests/slt/kafka/validate_restart.slt @@ -50,3 +50,16 @@ werwerwwe 8 NULL NULL 4 NULL yjtyjtyyy 9 NULL NULL 4 NULL yjtyjtyyy 9 NULL NULL 4 NULL +# kafka_table should do the upsert and overwrite the existing records +query I rowsort +SELECT action, user_id, obj_id, name, page_id, age, _rw_key FROM kafka_table; +---- +6786745ge 6 NULL NULL 3 NULL \x7b22757365725f6964223a20367d +erwerhghj 4 NULL NULL 2 NULL \x7b22757365725f6964223a20347d +fgbgfnyyy 7 NULL NULL 3 NULL \x7b22757365725f6964223a20377d +fsdfgerrg 2 NULL NULL 1 NULL \x7b22757365725f6964223a20327d +gtrgretrg 1 NULL NULL 1 NULL \x7b22757365725f6964223a20317d +kiku7ikkk 5 NULL NULL 2 NULL \x7b22757365725f6964223a20357d +sdfergtth 3 NULL NULL 1 NULL \x7b22757365725f6964223a20337d +werwerwwe 8 NULL NULL 4 NULL \x7b22757365725f6964223a20387d +yjtyjtyyy 9 NULL NULL 4 NULL \x7b22757365725f6964223a20397d diff --git a/proto/plan_common.proto b/proto/plan_common.proto index 1dd45ad08a6ef..79a1b1622704e 100644 --- a/proto/plan_common.proto +++ b/proto/plan_common.proto @@ -54,10 +54,8 @@ message ColumnDesc { // This field is used to represent the connector-spec additional column type. // UNSPECIFIED or unset for normal column. - // deprecated, use AdditionalColumn instead - // AdditionalColumnType additional_column_type = 9; - reserved "additional_column_type"; - reserved 9; + // deprecated, use AdditionalColumn instead, keep for compatibility with v1.6.x + AdditionalColumnType additional_column_type = 9; ColumnDescVersion version = 10; @@ -218,3 +216,14 @@ message AdditionalColumn { AdditionalColumnHeaders headers = 7; } } + +enum AdditionalColumnType { + ADDITIONAL_COLUMN_TYPE_UNSPECIFIED = 0; + ADDITIONAL_COLUMN_TYPE_KEY = 1; + ADDITIONAL_COLUMN_TYPE_TIMESTAMP = 2; + ADDITIONAL_COLUMN_TYPE_PARTITION = 3; + ADDITIONAL_COLUMN_TYPE_OFFSET = 4; + ADDITIONAL_COLUMN_TYPE_HEADER = 5; + ADDITIONAL_COLUMN_TYPE_FILENAME = 6; + ADDITIONAL_COLUMN_TYPE_NORMAL = 7; +} diff --git a/src/common/src/catalog/column.rs b/src/common/src/catalog/column.rs index f82e96a80c0e2..82d2f22f41cb4 100644 --- a/src/common/src/catalog/column.rs +++ b/src/common/src/catalog/column.rs @@ -170,6 +170,7 @@ impl ColumnDesc { type_name: self.type_name.clone(), generated_or_default_column: self.generated_or_default_column.clone(), description: self.description.clone(), + additional_column_type: 0, // deprecated additional_column: Some(self.additional_column.clone()), version: self.version as i32, } @@ -305,6 +306,7 @@ impl From<&ColumnDesc> for PbColumnDesc { type_name: c.type_name.clone(), generated_or_default_column: c.generated_or_default_column.clone(), description: c.description.clone(), + additional_column_type: 0, // deprecated additional_column: c.additional_column.clone().into(), version: c.version as i32, } diff --git a/src/common/src/catalog/test_utils.rs b/src/common/src/catalog/test_utils.rs index 9930a5717b849..ae87b3a881f84 100644 --- a/src/common/src/catalog/test_utils.rs +++ b/src/common/src/catalog/test_utils.rs @@ -60,6 +60,7 @@ impl ColumnDescTestExt for ColumnDesc { field_descs: fields, generated_or_default_column: None, description: None, + additional_column_type: 0, // deprecated additional_column: Some(AdditionalColumn { column_type: None }), version: ColumnDescVersion::Pr13707 as i32, } diff --git a/src/connector/src/parser/avro/util.rs b/src/connector/src/parser/avro/util.rs index ba065b7da4dc4..958f4c9ca5db5 100644 --- a/src/connector/src/parser/avro/util.rs +++ b/src/connector/src/parser/avro/util.rs @@ -64,6 +64,7 @@ fn avro_field_to_column_desc( type_name: schema_name.to_string(), generated_or_default_column: None, description: None, + additional_column_type: 0, // deprecated additional_column: Some(AdditionalColumn { column_type: None }), version: ColumnDescVersion::Pr13707 as i32, }) diff --git a/src/connector/src/parser/protobuf/parser.rs b/src/connector/src/parser/protobuf/parser.rs index d4287a869b221..4248fa2b7470c 100644 --- a/src/connector/src/parser/protobuf/parser.rs +++ b/src/connector/src/parser/protobuf/parser.rs @@ -174,6 +174,7 @@ impl ProtobufParserConfig { type_name: m.full_name().to_string(), generated_or_default_column: None, description: None, + additional_column_type: 0, // deprecated additional_column: Some(AdditionalColumn { column_type: None }), version: ColumnDescVersion::Pr13707 as i32, }) diff --git a/src/stream/src/from_proto/source/trad_source.rs b/src/stream/src/from_proto/source/trad_source.rs index 142b4ad9e1553..8ce6b88b0196b 100644 --- a/src/stream/src/from_proto/source/trad_source.rs +++ b/src/stream/src/from_proto/source/trad_source.rs @@ -22,8 +22,9 @@ use risingwave_connector::source::{ use risingwave_pb::data::data_type::TypeName as PbTypeName; use risingwave_pb::plan_common::additional_column::ColumnType as AdditionalColumnType; use risingwave_pb::plan_common::{ - AdditionalColumn, AdditionalColumnKey, AdditionalColumnTimestamp, ColumnDescVersion, - FormatType, PbEncodeType, + AdditionalColumn, AdditionalColumnKey, AdditionalColumnTimestamp, + AdditionalColumnType as LegacyAdditionalColumnType, ColumnDescVersion, FormatType, + PbEncodeType, }; use risingwave_pb::stream_plan::SourceNode; use risingwave_storage::panic_store::PanicStateStore; @@ -75,16 +76,16 @@ impl ExecutorBuilder for SourceExecutorBuilder { } let mut source_columns = source.columns.clone(); - { // compatible code: introduced in https://github.com/risingwavelabs/risingwave/pull/13707 // for upsert and (avro | protobuf) overwrite the `_rw_key` column's ColumnDesc.additional_column_type to Key if source_info.format() == FormatType::Upsert && (source_info.row_encode() == PbEncodeType::Avro - || source_info.row_encode() == PbEncodeType::Protobuf) + || source_info.row_encode() == PbEncodeType::Protobuf + || source_info.row_encode() == PbEncodeType::Json) { - let _ = source_columns.iter_mut().map(|c| { - let _ = c.column_desc.as_mut().map(|desc| { + for c in &mut source_columns { + if let Some(desc) = c.column_desc.as_mut() { let is_bytea = desc .get_column_type() .map(|col_type| col_type.type_name == PbTypeName::Bytea as i32) @@ -93,7 +94,7 @@ impl ExecutorBuilder for SourceExecutorBuilder { &desc.version() ) && is_bytea - // the column is from a legacy version + // the column is from a legacy version (before v1.5.x) && desc.version == ColumnDescVersion::Unspecified as i32 { desc.additional_column = Some(AdditionalColumn { @@ -102,8 +103,20 @@ impl ExecutorBuilder for SourceExecutorBuilder { )), }); } - }); - }); + + // the column is from a legacy version (v1.6.x) + // introduced in https://github.com/risingwavelabs/risingwave/pull/15226 + if desc.additional_column_type + == LegacyAdditionalColumnType::Key as i32 + { + desc.additional_column = Some(AdditionalColumn { + column_type: Some(AdditionalColumnType::Key( + AdditionalColumnKey {}, + )), + }); + } + } + } } } From 9651f39dedae705a359676ffd0911a77fe01328d Mon Sep 17 00:00:00 2001 From: Kai Jellinghaus Date: Mon, 26 Feb 2024 06:36:01 +0100 Subject: [PATCH 34/35] feat(sink): Support Sinking Enum16 Ids to Clickhouse (#14668) Co-authored-by: Xinhao Xu <84456268+xxhZs@users.noreply.github.com> Co-authored-by: Richard Chien --- ci/scripts/e2e-clickhouse-sink-test.sh | 16 ++++++++-------- e2e_test/sink/clickhouse_sink.slt | 6 +++--- src/connector/src/sink/clickhouse.rs | 8 +++++--- 3 files changed, 16 insertions(+), 14 deletions(-) diff --git a/ci/scripts/e2e-clickhouse-sink-test.sh b/ci/scripts/e2e-clickhouse-sink-test.sh index c14d83e8c4281..5443d4e53b7fd 100755 --- a/ci/scripts/e2e-clickhouse-sink-test.sh +++ b/ci/scripts/e2e-clickhouse-sink-test.sh @@ -31,7 +31,7 @@ sleep 1 echo "--- create clickhouse table" curl https://clickhouse.com/ | sh sleep 2 -./clickhouse client --host=clickhouse-server --port=9000 --query="CREATE table demo_test(v1 Int32,v2 Int64,v3 String)ENGINE = ReplacingMergeTree PRIMARY KEY (v1);" +./clickhouse client --host=clickhouse-server --port=9000 --query="CREATE table demo_test(v1 Int32,v2 Int64,v3 String,v4 Enum16('A'=1,'B'=2))ENGINE = ReplacingMergeTree PRIMARY KEY (v1);" echo "--- testing sinks" sqllogictest -p 4566 -d dev './e2e_test/sink/clickhouse_sink.slt' @@ -41,13 +41,13 @@ sleep 5 # check sink destination using shell if cat ./query_result.csv | sort | awk -F "," '{ -if ($1 == 1 && $2 == 50 && $3 == "\"1-50\"") c1++; - if ($1 == 13 && $2 == 2 && $3 == "\"13-2\"") c2++; - if ($1 == 2 && $2 == 2 && $3 == "\"2-2\"") c3++; - if ($1 == 21 && $2 == 2 && $3 == "\"21-2\"") c4++; - if ($1 == 3 && $2 == 2 && $3 == "\"3-2\"") c5++; - if ($1 == 5 && $2 == 2 && $3 == "\"5-2\"") c6++; - if ($1 == 8 && $2 == 2 && $3 == "\"8-2\"") c7++; } +if ($1 == 1 && $2 == 50 && $3 == "\"1-50\"" && $4 == "\"A\"") c1++; + if ($1 == 13 && $2 == 2 && $3 == "\"13-2\"" && $4 == "\"B\"") c2++; + if ($1 == 2 && $2 == 2 && $3 == "\"2-2\"" && $4 == "\"B\"") c3++; + if ($1 == 21 && $2 == 2 && $3 == "\"21-2\"" && $4 == "\"A\"") c4++; + if ($1 == 3 && $2 == 2 && $3 == "\"3-2\"" && $4 == "\"A\"") c5++; + if ($1 == 5 && $2 == 2 && $3 == "\"5-2\"" && $4 == "\"B\"") c6++; + if ($1 == 8 && $2 == 2 && $3 == "\"8-2\"" && $4 == "\"A\"") c7++; } END { exit !(c1 == 1 && c2 == 1 && c3 == 1 && c4 == 1 && c5 == 1 && c6 == 1 && c7 == 1); }'; then echo "Clickhouse sink check passed" else diff --git a/e2e_test/sink/clickhouse_sink.slt b/e2e_test/sink/clickhouse_sink.slt index 909bdbfd6356b..9791f484326d7 100644 --- a/e2e_test/sink/clickhouse_sink.slt +++ b/e2e_test/sink/clickhouse_sink.slt @@ -1,11 +1,11 @@ statement ok -CREATE TABLE t6 (v1 int primary key, v2 bigint, v3 varchar); +CREATE TABLE t6 (v1 int primary key, v2 bigint, v3 varchar, v4 smallint); statement ok CREATE MATERIALIZED VIEW mv6 AS SELECT * FROM t6; statement ok -CREATE SINK s6 AS select mv6.v1 as v1, mv6.v2 as v2, mv6.v3 as v3 from mv6 WITH ( +CREATE SINK s6 AS select mv6.v1 as v1, mv6.v2 as v2, mv6.v3 as v3, mv6.v4 as v4 from mv6 WITH ( connector = 'clickhouse', type = 'append-only', force_append_only='true', @@ -17,7 +17,7 @@ CREATE SINK s6 AS select mv6.v1 as v1, mv6.v2 as v2, mv6.v3 as v3 from mv6 WITH ); statement ok -INSERT INTO t6 VALUES (1, 50, '1-50'), (2, 2, '2-2'), (3, 2, '3-2'), (5, 2, '5-2'), (8, 2, '8-2'), (13, 2, '13-2'), (21, 2, '21-2'); +INSERT INTO t6 VALUES (1, 50, '1-50', 1), (2, 2, '2-2', 2), (3, 2, '3-2', 1), (5, 2, '5-2', 2), (8, 2, '8-2', 1), (13, 2, '13-2', 2), (21, 2, '21-2', 1); statement ok FLUSH; diff --git a/src/connector/src/sink/clickhouse.rs b/src/connector/src/sink/clickhouse.rs index 6f658318f69b5..4e21659dae49e 100644 --- a/src/connector/src/sink/clickhouse.rs +++ b/src/connector/src/sink/clickhouse.rs @@ -262,9 +262,11 @@ impl ClickHouseSink { ) -> Result<()> { let is_match = match fields_type { risingwave_common::types::DataType::Boolean => Ok(ck_column.r#type.contains("Bool")), - risingwave_common::types::DataType::Int16 => { - Ok(ck_column.r#type.contains("UInt16") | ck_column.r#type.contains("Int16")) - } + risingwave_common::types::DataType::Int16 => Ok(ck_column.r#type.contains("UInt16") + | ck_column.r#type.contains("Int16") + // Allow Int16 to be pushed to Enum16, they share an encoding and value range + // No special care is taken to ensure values are valid. + | ck_column.r#type.contains("Enum16")), risingwave_common::types::DataType::Int32 => { Ok(ck_column.r#type.contains("UInt32") | ck_column.r#type.contains("Int32")) } From d50b4cb9326ae5c49be338952cf0783d98e94cb8 Mon Sep 17 00:00:00 2001 From: Eric Fu Date: Mon, 26 Feb 2024 13:45:06 +0800 Subject: [PATCH 35/35] feat(catalog): support DBeaver constraints view (#15227) --- e2e_test/batch/catalog/pg_class.slt.part | 4 +- e2e_test/batch/catalog/pg_constraint.slt.part | 10 + .../planner_test/tests/planner_test_runner.rs | 2 +- .../tests/testdata/input/dbeaver.yaml | 43 +++ .../tests/testdata/output/dbeaver.yaml | 171 ++++++++++++ .../tests/testdata/output/subquery.yaml | 2 +- src/frontend/src/binder/expr/function.rs | 1 + .../catalog/system_catalog/pg_catalog/mod.rs | 3 + .../system_catalog/pg_catalog/pg_class.rs | 5 +- .../pg_catalog/pg_constraint.rs | 116 +++++++- .../system_catalog/pg_catalog/pg_language.rs | 34 +++ .../system_catalog/pg_catalog/pg_rewrite.rs | 33 +++ .../system_catalog/pg_catalog/pg_trigger.rs | 44 +++ .../system_catalog/rw_catalog/rw_columns.rs | 250 +++++++++--------- 14 files changed, 578 insertions(+), 140 deletions(-) create mode 100644 e2e_test/batch/catalog/pg_constraint.slt.part create mode 100644 src/frontend/planner_test/tests/testdata/input/dbeaver.yaml create mode 100644 src/frontend/planner_test/tests/testdata/output/dbeaver.yaml create mode 100644 src/frontend/src/catalog/system_catalog/pg_catalog/pg_language.rs create mode 100644 src/frontend/src/catalog/system_catalog/pg_catalog/pg_rewrite.rs create mode 100644 src/frontend/src/catalog/system_catalog/pg_catalog/pg_trigger.rs diff --git a/e2e_test/batch/catalog/pg_class.slt.part b/e2e_test/batch/catalog/pg_class.slt.part index 2f2ffbe016e3a..ff31c27dcc17d 100644 --- a/e2e_test/batch/catalog/pg_class.slt.part +++ b/e2e_test/batch/catalog/pg_class.slt.part @@ -11,7 +11,7 @@ SELECT oid,relname,relowner,relkind FROM pg_catalog.pg_class ORDER BY oid limit 8 pg_cast 1 r 9 pg_class 1 v 10 pg_collation 1 v -11 pg_constraint 1 v +11 pg_constraint 1 r 12 pg_conversion 1 v 13 pg_database 1 v 14 pg_depend 1 v @@ -20,4 +20,4 @@ SELECT oid,relname,relowner,relkind FROM pg_catalog.pg_class ORDER BY oid limit query ITIT SELECT oid,relname,relowner,relkind FROM pg_catalog.pg_class WHERE oid = 'pg_namespace'::regclass; ---- -24 pg_namespace 1 v +25 pg_namespace 1 v diff --git a/e2e_test/batch/catalog/pg_constraint.slt.part b/e2e_test/batch/catalog/pg_constraint.slt.part new file mode 100644 index 0000000000000..a2a36e73f5416 --- /dev/null +++ b/e2e_test/batch/catalog/pg_constraint.slt.part @@ -0,0 +1,10 @@ +statement ok +create table t(a int, b int, c varchar, primary key(a,b)); + +query TTTT +select conname, contype, conkey from pg_constraint where conname='t_pkey'; +---- +t_pkey p {1,2} + +statement ok +drop table t; diff --git a/src/frontend/planner_test/tests/planner_test_runner.rs b/src/frontend/planner_test/tests/planner_test_runner.rs index 00adc280ddce3..0ce6bba6d5e66 100644 --- a/src/frontend/planner_test/tests/planner_test_runner.rs +++ b/src/frontend/planner_test/tests/planner_test_runner.rs @@ -43,7 +43,7 @@ fn main() { let file_name = path.file_name().unwrap().to_string_lossy().to_string(); let test_case_name = file_name.split('.').next().unwrap().to_string(); - tests.push(Trial::test(format!("{test_case_name}_test"), move || { + tests.push(Trial::test(test_case_name, move || { let path = test_data_dir().join("input").join(file_name); let file_content = std::fs::read_to_string(&path).unwrap(); diff --git a/src/frontend/planner_test/tests/testdata/input/dbeaver.yaml b/src/frontend/planner_test/tests/testdata/input/dbeaver.yaml new file mode 100644 index 0000000000000..ebe7d16659ca4 --- /dev/null +++ b/src/frontend/planner_test/tests/testdata/input/dbeaver.yaml @@ -0,0 +1,43 @@ +- sql: | + SELECT DISTINCT dep.deptype, dep.classid, dep.objid, cl.relkind, attr.attname,pg_get_expr(ad.adbin, ad.adrelid) adefval, + CASE WHEN cl.relkind IS NOT NULL THEN cl.relkind::text || COALESCE(dep.objsubid::text, '')::text + WHEN tg.oid IS NOT NULL THEN 'T'::text + WHEN ty.oid IS NOT NULL THEN 'y'::text + WHEN ns.oid IS NOT NULL THEN 'n'::text + WHEN pr.oid IS NOT NULL THEN 'p'::text + WHEN la.oid IS NOT NULL THEN 'l'::text + WHEN rw.oid IS NOT NULL THEN 'R'::text + WHEN co.oid IS NOT NULL THEN 'C'::text || contype::text + WHEN ad.oid IS NOT NULL THEN 'A'::text + ELSE '' + END AS type, + COALESCE(coc.relname, clrw.relname, tgr.relname) AS ownertable, + CASE WHEN cl.relname IS NOT NULL AND att.attname IS NOT NULL THEN cl.relname || '.' || att.attname + ELSE COALESCE(cl.relname, co.conname, pr.proname, tg.tgname, ty.typname, la.lanname, rw.rulename, ns.nspname) + END AS refname, + COALESCE(nsc.nspname, nso.nspname, nsp.nspname, nst.nspname, nsrw.nspname, tgrn.nspname) AS nspname + FROM pg_depend dep + LEFT JOIN pg_class cl ON dep.objid=cl.oid + LEFT JOIN pg_attribute att ON dep.objid=att.attrelid AND dep.objsubid=att.attnum + LEFT JOIN pg_namespace nsc ON cl.relnamespace=nsc.oid + LEFT JOIN pg_proc pr ON dep.objid=pr.oid + LEFT JOIN pg_namespace nsp ON pr.pronamespace=nsp.oid + LEFT JOIN pg_trigger tg ON dep.objid=tg.oid + LEFT JOIN pg_class tgr ON tg.tgrelid=tgr.oid + LEFT JOIN pg_namespace tgrn ON tgr.relnamespace=tgrn.oid + LEFT JOIN pg_type ty ON dep.objid=ty.oid + LEFT JOIN pg_namespace nst ON ty.typnamespace=nst.oid + LEFT JOIN pg_constraint co ON dep.objid=co.oid + LEFT JOIN pg_class coc ON co.conrelid=coc.oid + LEFT JOIN pg_namespace nso ON co.connamespace=nso.oid + LEFT JOIN pg_rewrite rw ON dep.objid=rw.oid + LEFT JOIN pg_class clrw ON clrw.oid=rw.ev_class + LEFT JOIN pg_namespace nsrw ON clrw.relnamespace=nsrw.oid + LEFT JOIN pg_language la ON dep.objid=la.oid + LEFT JOIN pg_namespace ns ON dep.objid=ns.oid + LEFT JOIN pg_attrdef ad ON ad.oid=dep.objid + LEFT JOIN pg_attribute attr ON attr.attrelid=ad.adrelid and attr.attnum=ad.adnum + WHERE dep.refobjid=$1 + ORDER BY type + expected_outputs: + - batch_plan diff --git a/src/frontend/planner_test/tests/testdata/output/dbeaver.yaml b/src/frontend/planner_test/tests/testdata/output/dbeaver.yaml new file mode 100644 index 0000000000000..d0fcf6db1cfd4 --- /dev/null +++ b/src/frontend/planner_test/tests/testdata/output/dbeaver.yaml @@ -0,0 +1,171 @@ +# This file is automatically generated. See `src/frontend/planner_test/README.md` for more information. +- sql: | + SELECT DISTINCT dep.deptype, dep.classid, dep.objid, cl.relkind, attr.attname,pg_get_expr(ad.adbin, ad.adrelid) adefval, + CASE WHEN cl.relkind IS NOT NULL THEN cl.relkind::text || COALESCE(dep.objsubid::text, '')::text + WHEN tg.oid IS NOT NULL THEN 'T'::text + WHEN ty.oid IS NOT NULL THEN 'y'::text + WHEN ns.oid IS NOT NULL THEN 'n'::text + WHEN pr.oid IS NOT NULL THEN 'p'::text + WHEN la.oid IS NOT NULL THEN 'l'::text + WHEN rw.oid IS NOT NULL THEN 'R'::text + WHEN co.oid IS NOT NULL THEN 'C'::text || contype::text + WHEN ad.oid IS NOT NULL THEN 'A'::text + ELSE '' + END AS type, + COALESCE(coc.relname, clrw.relname, tgr.relname) AS ownertable, + CASE WHEN cl.relname IS NOT NULL AND att.attname IS NOT NULL THEN cl.relname || '.' || att.attname + ELSE COALESCE(cl.relname, co.conname, pr.proname, tg.tgname, ty.typname, la.lanname, rw.rulename, ns.nspname) + END AS refname, + COALESCE(nsc.nspname, nso.nspname, nsp.nspname, nst.nspname, nsrw.nspname, tgrn.nspname) AS nspname + FROM pg_depend dep + LEFT JOIN pg_class cl ON dep.objid=cl.oid + LEFT JOIN pg_attribute att ON dep.objid=att.attrelid AND dep.objsubid=att.attnum + LEFT JOIN pg_namespace nsc ON cl.relnamespace=nsc.oid + LEFT JOIN pg_proc pr ON dep.objid=pr.oid + LEFT JOIN pg_namespace nsp ON pr.pronamespace=nsp.oid + LEFT JOIN pg_trigger tg ON dep.objid=tg.oid + LEFT JOIN pg_class tgr ON tg.tgrelid=tgr.oid + LEFT JOIN pg_namespace tgrn ON tgr.relnamespace=tgrn.oid + LEFT JOIN pg_type ty ON dep.objid=ty.oid + LEFT JOIN pg_namespace nst ON ty.typnamespace=nst.oid + LEFT JOIN pg_constraint co ON dep.objid=co.oid + LEFT JOIN pg_class coc ON co.conrelid=coc.oid + LEFT JOIN pg_namespace nso ON co.connamespace=nso.oid + LEFT JOIN pg_rewrite rw ON dep.objid=rw.oid + LEFT JOIN pg_class clrw ON clrw.oid=rw.ev_class + LEFT JOIN pg_namespace nsrw ON clrw.relnamespace=nsrw.oid + LEFT JOIN pg_language la ON dep.objid=la.oid + LEFT JOIN pg_namespace ns ON dep.objid=ns.oid + LEFT JOIN pg_attrdef ad ON ad.oid=dep.objid + LEFT JOIN pg_attribute attr ON attr.attrelid=ad.adrelid and attr.attnum=ad.adnum + WHERE dep.refobjid=$1 + ORDER BY type + batch_plan: |- + BatchExchange { order: [$expr4 ASC], dist: Single } + └─BatchSort { order: [$expr4 ASC] } + └─BatchHashAgg { group_key: [null:Varchar, null:Int32, null:Int32, $expr1, rw_columns.name, '':Varchar, $expr4, $expr5, $expr6, $expr7], aggs: [] } + └─BatchExchange { order: [], dist: HashShard(null:Varchar, null:Int32, null:Int32, $expr1, rw_columns.name, '':Varchar, $expr4, $expr5, $expr6, $expr7) } + └─BatchProject { exprs: [null:Varchar, null:Int32, null:Int32, $expr1, rw_columns.name, '':Varchar, Case(IsNotNull($expr1), ConcatOp($expr1, Coalesce(null:Int16::Varchar, '':Varchar)), IsNotNull(null:Int32), 'T':Varchar, IsNotNull(rw_types.id), 'y':Varchar, IsNotNull(rw_schemas.id), 'n':Varchar, IsNotNull(null:Int32), 'p':Varchar, IsNotNull(null:Int32), 'l':Varchar, IsNotNull(null:Int32), 'R':Varchar, IsNotNull(pg_constraint.oid), ConcatOp('C':Varchar, pg_constraint.contype), IsNotNull(null:Int32), 'A':Varchar, '':Varchar) as $expr4, Coalesce(rw_tables.name, rw_tables.name, rw_tables.name) as $expr5, Case((IsNotNull(rw_tables.name) AND IsNotNull(rw_columns.name)), ConcatOp(ConcatOp(rw_tables.name, '.':Varchar), rw_columns.name), Coalesce(rw_tables.name, pg_constraint.conname, null:Varchar, null:Varchar, rw_types.name, null:Varchar, null:Varchar, rw_schemas.name)) as $expr6, Coalesce(rw_schemas.name, rw_schemas.name, rw_schemas.name, rw_schemas.name, rw_schemas.name, rw_schemas.name) as $expr7] } + └─BatchHashJoin { type: LeftOuter, predicate: null:Int32 = rw_columns.relation_id AND null:Int16 = $expr3, output: [null:Int32, null:Int32, null:Int16, null:Varchar, rw_tables.name, $expr1, rw_columns.name, rw_schemas.name, null:Int32, null:Varchar, rw_schemas.name, null:Int32, null:Varchar, rw_tables.name, rw_schemas.name, rw_types.id, rw_types.name, rw_schemas.name, pg_constraint.oid, pg_constraint.conname, pg_constraint.contype, rw_tables.name, rw_schemas.name, null:Int32, null:Varchar, rw_tables.name, rw_schemas.name, null:Int32, null:Varchar, rw_schemas.id, rw_schemas.name, null:Int32, rw_columns.name] } + ├─BatchExchange { order: [], dist: HashShard(null:Int32, null:Int16) } + │ └─BatchHashJoin { type: LeftOuter, predicate: null:Int32 = null:Int32, output: all } + │ ├─BatchHashJoin { type: LeftOuter, predicate: null:Int32 = rw_schemas.id, output: all } + │ │ ├─BatchHashJoin { type: LeftOuter, predicate: null:Int32 = null:Int32, output: all } + │ │ │ ├─BatchExchange { order: [], dist: HashShard(null:Int32) } + │ │ │ │ └─BatchHashJoin { type: LeftOuter, predicate: rw_tables.schema_id = rw_schemas.id, output: [null:Int32, null:Int32, null:Int16, null:Varchar, rw_tables.name, $expr1, rw_columns.name, rw_schemas.name, null:Int32, null:Varchar, rw_schemas.name, null:Int32, null:Varchar, rw_tables.name, rw_schemas.name, rw_types.id, rw_types.name, rw_schemas.name, pg_constraint.oid, pg_constraint.conname, pg_constraint.contype, rw_tables.name, rw_schemas.name, null:Int32, null:Varchar, rw_tables.name, rw_schemas.name] } + │ │ │ │ ├─BatchExchange { order: [], dist: HashShard(rw_tables.schema_id) } + │ │ │ │ │ └─BatchHashJoin { type: LeftOuter, predicate: null:Int32 = rw_tables.id, output: [null:Int32, null:Int32, null:Int16, null:Varchar, rw_tables.name, $expr1, rw_columns.name, rw_schemas.name, null:Int32, null:Varchar, rw_schemas.name, null:Int32, null:Varchar, rw_tables.name, rw_schemas.name, rw_types.id, rw_types.name, rw_schemas.name, pg_constraint.oid, pg_constraint.conname, pg_constraint.contype, rw_tables.name, rw_schemas.name, null:Int32, null:Varchar, rw_tables.name, rw_tables.schema_id] } + │ │ │ │ │ ├─BatchExchange { order: [], dist: HashShard(null:Int32) } + │ │ │ │ │ │ └─BatchHashJoin { type: LeftOuter, predicate: null:Int32 = null:Int32, output: all } + │ │ │ │ │ │ ├─BatchExchange { order: [], dist: HashShard(null:Int32) } + │ │ │ │ │ │ │ └─BatchHashJoin { type: LeftOuter, predicate: pg_constraint.connamespace = rw_schemas.id, output: [null:Int32, null:Int32, null:Int16, null:Varchar, rw_tables.name, $expr1, rw_columns.name, rw_schemas.name, null:Int32, null:Varchar, rw_schemas.name, null:Int32, null:Varchar, rw_tables.name, rw_schemas.name, rw_types.id, rw_types.name, rw_schemas.name, pg_constraint.oid, pg_constraint.conname, pg_constraint.contype, rw_tables.name, rw_schemas.name] } + │ │ │ │ │ │ │ ├─BatchExchange { order: [], dist: HashShard(pg_constraint.connamespace) } + │ │ │ │ │ │ │ │ └─BatchHashJoin { type: LeftOuter, predicate: pg_constraint.conrelid = rw_tables.id, output: [null:Int32, null:Int32, null:Int16, null:Varchar, rw_tables.name, $expr1, rw_columns.name, rw_schemas.name, null:Int32, null:Varchar, rw_schemas.name, null:Int32, null:Varchar, rw_tables.name, rw_schemas.name, rw_types.id, rw_types.name, rw_schemas.name, pg_constraint.oid, pg_constraint.conname, pg_constraint.connamespace, pg_constraint.contype, rw_tables.name] } + │ │ │ │ │ │ │ │ ├─BatchExchange { order: [], dist: HashShard(pg_constraint.conrelid) } + │ │ │ │ │ │ │ │ │ └─BatchHashJoin { type: LeftOuter, predicate: null:Int32 = pg_constraint.oid, output: all } + │ │ │ │ │ │ │ │ │ ├─BatchExchange { order: [], dist: HashShard(null:Int32) } + │ │ │ │ │ │ │ │ │ │ └─BatchHashJoin { type: LeftOuter, predicate: rw_schemas.id = rw_schemas.id, output: [null:Int32, null:Int32, null:Int16, null:Varchar, rw_tables.name, $expr1, rw_columns.name, rw_schemas.name, null:Int32, null:Varchar, rw_schemas.name, null:Int32, null:Varchar, rw_tables.name, rw_schemas.name, rw_types.id, rw_types.name, rw_schemas.name] } + │ │ │ │ │ │ │ │ │ │ ├─BatchExchange { order: [], dist: HashShard(rw_schemas.id) } + │ │ │ │ │ │ │ │ │ │ │ └─BatchHashJoin { type: LeftOuter, predicate: null:Int32 = rw_types.id, output: all } + │ │ │ │ │ │ │ │ │ │ │ ├─BatchExchange { order: [], dist: HashShard(null:Int32) } + │ │ │ │ │ │ │ │ │ │ │ │ └─BatchHashJoin { type: LeftOuter, predicate: rw_tables.schema_id = rw_schemas.id, output: [null:Int32, null:Int32, null:Int16, null:Varchar, rw_tables.name, $expr1, rw_columns.name, rw_schemas.name, null:Int32, null:Varchar, rw_schemas.name, null:Int32, null:Varchar, rw_tables.name, rw_schemas.name] } + │ │ │ │ │ │ │ │ │ │ │ │ ├─BatchExchange { order: [], dist: HashShard(rw_tables.schema_id) } + │ │ │ │ │ │ │ │ │ │ │ │ │ └─BatchHashJoin { type: LeftOuter, predicate: null:Int32 = rw_tables.id, output: [null:Int32, null:Int32, null:Int16, null:Varchar, rw_tables.name, $expr1, rw_columns.name, rw_schemas.name, null:Int32, null:Varchar, rw_schemas.name, null:Int32, null:Varchar, rw_tables.name, rw_tables.schema_id] } + │ │ │ │ │ │ │ │ │ │ │ │ │ ├─BatchExchange { order: [], dist: HashShard(null:Int32) } + │ │ │ │ │ │ │ │ │ │ │ │ │ │ └─BatchHashJoin { type: LeftOuter, predicate: null:Int32 = null:Int32, output: all } + │ │ │ │ │ │ │ │ │ │ │ │ │ │ ├─BatchExchange { order: [], dist: HashShard(null:Int32) } + │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ └─BatchHashJoin { type: LeftOuter, predicate: null:Int32 = rw_schemas.id, output: [null:Int32, null:Int32, null:Int16, null:Varchar, rw_tables.name, $expr1, rw_columns.name, rw_schemas.name, null:Int32, null:Varchar, rw_schemas.name] } + │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ ├─BatchExchange { order: [], dist: HashShard(null:Int32) } + │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ └─BatchHashJoin { type: LeftOuter, predicate: null:Int32 = null:Int32, output: all } + │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ ├─BatchExchange { order: [], dist: HashShard(null:Int32) } + │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ └─BatchHashJoin { type: LeftOuter, predicate: rw_tables.schema_id = rw_schemas.id, output: [null:Int32, null:Int32, null:Int16, null:Varchar, rw_tables.name, $expr1, rw_columns.name, rw_schemas.name] } + │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ ├─BatchExchange { order: [], dist: HashShard(rw_tables.schema_id) } + │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ └─BatchHashJoin { type: LeftOuter, predicate: null:Int32 = rw_columns.relation_id AND null:Int16 = $expr2, output: [null:Int32, null:Int32, null:Int16, null:Varchar, rw_tables.name, rw_tables.schema_id, $expr1, rw_columns.name] } + │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ ├─BatchExchange { order: [], dist: HashShard(null:Int32, null:Int16) } + │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ └─BatchHashJoin { type: LeftOuter, predicate: null:Int32 = rw_tables.id, output: [null:Int32, null:Int32, null:Int16, null:Varchar, rw_tables.name, rw_tables.schema_id, $expr1] } + │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ ├─BatchExchange { order: [], dist: HashShard(null:Int32) } + │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ └─BatchValues { rows: [] } + │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ └─BatchExchange { order: [], dist: HashShard(rw_tables.id) } + │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ └─BatchProject { exprs: [rw_tables.id, rw_tables.name, rw_tables.schema_id, Case(('table':Varchar = 'table':Varchar), 'r':Varchar, ('table':Varchar = 'system table':Varchar), 'r':Varchar, ('table':Varchar = 'index':Varchar), 'i':Varchar, ('table':Varchar = 'view':Varchar), 'v':Varchar, ('table':Varchar = 'materialized view':Varchar), 'm':Varchar) as $expr1] } + │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ └─BatchUnion { all: true } + │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ ├─BatchProject { exprs: [rw_tables.id, rw_tables.name, 'table':Varchar, rw_tables.schema_id] } + │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ └─BatchScan { table: rw_tables, columns: [rw_tables.id, rw_tables.name, rw_tables.schema_id], distribution: Single } + │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ ├─BatchProject { exprs: [rw_system_tables.id, rw_system_tables.name, 'system table':Varchar, rw_system_tables.schema_id] } + │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ └─BatchScan { table: rw_system_tables, columns: [rw_system_tables.id, rw_system_tables.name, rw_system_tables.schema_id], distribution: Single } + │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ ├─BatchProject { exprs: [rw_sources.id, rw_sources.name, 'source':Varchar, rw_sources.schema_id] } + │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ └─BatchScan { table: rw_sources, columns: [rw_sources.id, rw_sources.name, rw_sources.schema_id], distribution: Single } + │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ ├─BatchProject { exprs: [rw_indexes.id, rw_indexes.name, 'index':Varchar, rw_indexes.schema_id] } + │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ └─BatchScan { table: rw_indexes, columns: [rw_indexes.id, rw_indexes.name, rw_indexes.schema_id], distribution: Single } + │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ ├─BatchProject { exprs: [rw_sinks.id, rw_sinks.name, 'sink':Varchar, rw_sinks.schema_id] } + │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ └─BatchScan { table: rw_sinks, columns: [rw_sinks.id, rw_sinks.name, rw_sinks.schema_id], distribution: Single } + │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ ├─BatchProject { exprs: [rw_materialized_views.id, rw_materialized_views.name, 'materialized view':Varchar, rw_materialized_views.schema_id] } + │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ └─BatchScan { table: rw_materialized_views, columns: [rw_materialized_views.id, rw_materialized_views.name, rw_materialized_views.schema_id], distribution: Single } + │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ └─BatchProject { exprs: [rw_views.id, rw_views.name, 'view':Varchar, rw_views.schema_id] } + │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ └─BatchScan { table: rw_views, columns: [rw_views.id, rw_views.name, rw_views.schema_id], distribution: Single } + │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ └─BatchExchange { order: [], dist: HashShard(rw_columns.relation_id, $expr2) } + │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ └─BatchProject { exprs: [rw_columns.relation_id, rw_columns.name, rw_columns.position::Int16 as $expr2] } + │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ └─BatchFilter { predicate: (rw_columns.is_hidden = false:Boolean) } + │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ └─BatchScan { table: rw_columns, columns: [rw_columns.relation_id, rw_columns.name, rw_columns.position, rw_columns.is_hidden], distribution: Single } + │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ └─BatchExchange { order: [], dist: HashShard(rw_schemas.id) } + │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ └─BatchScan { table: rw_schemas, columns: [rw_schemas.id, rw_schemas.name], distribution: Single } + │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ └─BatchExchange { order: [], dist: HashShard(null:Int32) } + │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ └─BatchValues { rows: [] } + │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ └─BatchExchange { order: [], dist: HashShard(rw_schemas.id) } + │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ └─BatchScan { table: rw_schemas, columns: [rw_schemas.id, rw_schemas.name], distribution: Single } + │ │ │ │ │ │ │ │ │ │ │ │ │ │ └─BatchExchange { order: [], dist: HashShard(null:Int32) } + │ │ │ │ │ │ │ │ │ │ │ │ │ │ └─BatchValues { rows: [] } + │ │ │ │ │ │ │ │ │ │ │ │ │ └─BatchExchange { order: [], dist: HashShard(rw_tables.id) } + │ │ │ │ │ │ │ │ │ │ │ │ │ └─BatchUnion { all: true } + │ │ │ │ │ │ │ │ │ │ │ │ │ ├─BatchScan { table: rw_tables, columns: [rw_tables.id, rw_tables.name, rw_tables.schema_id], distribution: Single } + │ │ │ │ │ │ │ │ │ │ │ │ │ ├─BatchScan { table: rw_system_tables, columns: [rw_system_tables.id, rw_system_tables.name, rw_system_tables.schema_id], distribution: Single } + │ │ │ │ │ │ │ │ │ │ │ │ │ ├─BatchScan { table: rw_sources, columns: [rw_sources.id, rw_sources.name, rw_sources.schema_id], distribution: Single } + │ │ │ │ │ │ │ │ │ │ │ │ │ ├─BatchScan { table: rw_indexes, columns: [rw_indexes.id, rw_indexes.name, rw_indexes.schema_id], distribution: Single } + │ │ │ │ │ │ │ │ │ │ │ │ │ ├─BatchScan { table: rw_sinks, columns: [rw_sinks.id, rw_sinks.name, rw_sinks.schema_id], distribution: Single } + │ │ │ │ │ │ │ │ │ │ │ │ │ ├─BatchScan { table: rw_materialized_views, columns: [rw_materialized_views.id, rw_materialized_views.name, rw_materialized_views.schema_id], distribution: Single } + │ │ │ │ │ │ │ │ │ │ │ │ │ └─BatchScan { table: rw_views, columns: [rw_views.id, rw_views.name, rw_views.schema_id], distribution: Single } + │ │ │ │ │ │ │ │ │ │ │ │ └─BatchExchange { order: [], dist: HashShard(rw_schemas.id) } + │ │ │ │ │ │ │ │ │ │ │ │ └─BatchScan { table: rw_schemas, columns: [rw_schemas.id, rw_schemas.name], distribution: Single } + │ │ │ │ │ │ │ │ │ │ │ └─BatchExchange { order: [], dist: HashShard(rw_types.id) } + │ │ │ │ │ │ │ │ │ │ │ └─BatchNestedLoopJoin { type: Inner, predicate: true, output: all } + │ │ │ │ │ │ │ │ │ │ │ ├─BatchScan { table: rw_types, columns: [rw_types.id, rw_types.name], distribution: Single } + │ │ │ │ │ │ │ │ │ │ │ └─BatchProject { exprs: [rw_schemas.id] } + │ │ │ │ │ │ │ │ │ │ │ └─BatchFilter { predicate: (rw_schemas.name = 'pg_catalog':Varchar) } + │ │ │ │ │ │ │ │ │ │ │ └─BatchScan { table: rw_schemas, columns: [rw_schemas.id, rw_schemas.name], distribution: Single } + │ │ │ │ │ │ │ │ │ │ └─BatchExchange { order: [], dist: HashShard(rw_schemas.id) } + │ │ │ │ │ │ │ │ │ │ └─BatchScan { table: rw_schemas, columns: [rw_schemas.id, rw_schemas.name], distribution: Single } + │ │ │ │ │ │ │ │ │ └─BatchExchange { order: [], dist: HashShard(pg_constraint.oid) } + │ │ │ │ │ │ │ │ │ └─BatchScan { table: pg_constraint, columns: [pg_constraint.oid, pg_constraint.conname, pg_constraint.connamespace, pg_constraint.contype, pg_constraint.conrelid], distribution: Single } + │ │ │ │ │ │ │ │ └─BatchExchange { order: [], dist: HashShard(rw_tables.id) } + │ │ │ │ │ │ │ │ └─BatchUnion { all: true } + │ │ │ │ │ │ │ │ ├─BatchScan { table: rw_tables, columns: [rw_tables.id, rw_tables.name], distribution: Single } + │ │ │ │ │ │ │ │ ├─BatchScan { table: rw_system_tables, columns: [rw_system_tables.id, rw_system_tables.name], distribution: Single } + │ │ │ │ │ │ │ │ ├─BatchScan { table: rw_sources, columns: [rw_sources.id, rw_sources.name], distribution: Single } + │ │ │ │ │ │ │ │ ├─BatchScan { table: rw_indexes, columns: [rw_indexes.id, rw_indexes.name], distribution: Single } + │ │ │ │ │ │ │ │ ├─BatchScan { table: rw_sinks, columns: [rw_sinks.id, rw_sinks.name], distribution: Single } + │ │ │ │ │ │ │ │ ├─BatchScan { table: rw_materialized_views, columns: [rw_materialized_views.id, rw_materialized_views.name], distribution: Single } + │ │ │ │ │ │ │ │ └─BatchScan { table: rw_views, columns: [rw_views.id, rw_views.name], distribution: Single } + │ │ │ │ │ │ │ └─BatchExchange { order: [], dist: HashShard(rw_schemas.id) } + │ │ │ │ │ │ │ └─BatchScan { table: rw_schemas, columns: [rw_schemas.id, rw_schemas.name], distribution: Single } + │ │ │ │ │ │ └─BatchExchange { order: [], dist: HashShard(null:Int32) } + │ │ │ │ │ │ └─BatchValues { rows: [] } + │ │ │ │ │ └─BatchExchange { order: [], dist: HashShard(rw_tables.id) } + │ │ │ │ │ └─BatchUnion { all: true } + │ │ │ │ │ ├─BatchScan { table: rw_tables, columns: [rw_tables.id, rw_tables.name, rw_tables.schema_id], distribution: Single } + │ │ │ │ │ ├─BatchScan { table: rw_system_tables, columns: [rw_system_tables.id, rw_system_tables.name, rw_system_tables.schema_id], distribution: Single } + │ │ │ │ │ ├─BatchScan { table: rw_sources, columns: [rw_sources.id, rw_sources.name, rw_sources.schema_id], distribution: Single } + │ │ │ │ │ ├─BatchScan { table: rw_indexes, columns: [rw_indexes.id, rw_indexes.name, rw_indexes.schema_id], distribution: Single } + │ │ │ │ │ ├─BatchScan { table: rw_sinks, columns: [rw_sinks.id, rw_sinks.name, rw_sinks.schema_id], distribution: Single } + │ │ │ │ │ ├─BatchScan { table: rw_materialized_views, columns: [rw_materialized_views.id, rw_materialized_views.name, rw_materialized_views.schema_id], distribution: Single } + │ │ │ │ │ └─BatchScan { table: rw_views, columns: [rw_views.id, rw_views.name, rw_views.schema_id], distribution: Single } + │ │ │ │ └─BatchExchange { order: [], dist: HashShard(rw_schemas.id) } + │ │ │ │ └─BatchScan { table: rw_schemas, columns: [rw_schemas.id, rw_schemas.name], distribution: Single } + │ │ │ └─BatchExchange { order: [], dist: HashShard(null:Int32) } + │ │ │ └─BatchValues { rows: [] } + │ │ └─BatchExchange { order: [], dist: HashShard(rw_schemas.id) } + │ │ └─BatchScan { table: rw_schemas, columns: [rw_schemas.id, rw_schemas.name], distribution: Single } + │ └─BatchExchange { order: [], dist: HashShard(null:Int32) } + │ └─BatchValues { rows: [] } + └─BatchExchange { order: [], dist: HashShard(rw_columns.relation_id, $expr3) } + └─BatchProject { exprs: [rw_columns.relation_id, rw_columns.name, rw_columns.position::Int16 as $expr3] } + └─BatchFilter { predicate: (rw_columns.is_hidden = false:Boolean) } + └─BatchScan { table: rw_columns, columns: [rw_columns.relation_id, rw_columns.name, rw_columns.position, rw_columns.is_hidden], distribution: Single } diff --git a/src/frontend/planner_test/tests/testdata/output/subquery.yaml b/src/frontend/planner_test/tests/testdata/output/subquery.yaml index 6196244e193a5..c7e56eec3da13 100644 --- a/src/frontend/planner_test/tests/testdata/output/subquery.yaml +++ b/src/frontend/planner_test/tests/testdata/output/subquery.yaml @@ -227,7 +227,7 @@ └─LogicalFilter { predicate: In($expr1, 'r':Varchar, 'p':Varchar, 'v':Varchar, 'm':Varchar, 'S':Varchar, 'f':Varchar, '':Varchar) AND (rw_schemas.name <> 'pg_catalog':Varchar) AND Not(RegexpEq(rw_schemas.name, '^pg_toast':Varchar)) AND (rw_schemas.name <> 'information_schema':Varchar) } └─LogicalJoin { type: LeftOuter, on: (rw_schemas.id = rw_tables.schema_id), output: all } ├─LogicalShare { id: 16 } - │ └─LogicalProject { exprs: [rw_tables.id, rw_tables.name, rw_tables.schema_id, rw_tables.owner, 'p':Varchar, Case(('table':Varchar = 'table':Varchar), 'r':Varchar, ('table':Varchar = 'system table':Varchar), 'r':Varchar, ('table':Varchar = 'index':Varchar), 'i':Varchar, ('table':Varchar = 'view':Varchar), 'v':Varchar, ('table':Varchar = 'materialized view':Varchar), 'm':Varchar) as $expr1, 0:Int32, 0:Int32, Array as $expr2] } + │ └─LogicalProject { exprs: [rw_tables.id, rw_tables.name, rw_tables.schema_id, rw_tables.owner, 'p':Varchar, Case(('table':Varchar = 'table':Varchar), 'r':Varchar, ('table':Varchar = 'system table':Varchar), 'r':Varchar, ('table':Varchar = 'index':Varchar), 'i':Varchar, ('table':Varchar = 'view':Varchar), 'v':Varchar, ('table':Varchar = 'materialized view':Varchar), 'm':Varchar) as $expr1, 0:Int32, 0:Int32, Array as $expr2, null:Varchar] } │ └─LogicalShare { id: 14 } │ └─LogicalUnion { all: true } │ ├─LogicalUnion { all: true } diff --git a/src/frontend/src/binder/expr/function.rs b/src/frontend/src/binder/expr/function.rs index 22fc4ce99c45f..86b8fa93671b2 100644 --- a/src/frontend/src/binder/expr/function.rs +++ b/src/frontend/src/binder/expr/function.rs @@ -1323,6 +1323,7 @@ impl Binder { ("pg_table_is_visible", raw_literal(ExprImpl::literal_bool(true))), ("pg_type_is_visible", raw_literal(ExprImpl::literal_bool(true))), ("pg_get_constraintdef", raw_literal(ExprImpl::literal_null(DataType::Varchar))), + ("pg_get_partkeydef", raw_literal(ExprImpl::literal_null(DataType::Varchar))), ("pg_encoding_to_char", raw_literal(ExprImpl::literal_varchar("UTF8".into()))), ("has_database_privilege", raw_literal(ExprImpl::literal_bool(true))), ("pg_backend_pid", raw(|binder, _inputs| { diff --git a/src/frontend/src/catalog/system_catalog/pg_catalog/mod.rs b/src/frontend/src/catalog/system_catalog/pg_catalog/mod.rs index ce97aeaac552c..f1bcc3f46f62c 100644 --- a/src/frontend/src/catalog/system_catalog/pg_catalog/mod.rs +++ b/src/frontend/src/catalog/system_catalog/pg_catalog/mod.rs @@ -30,6 +30,7 @@ mod pg_index; mod pg_indexes; mod pg_inherits; mod pg_keywords; +mod pg_language; mod pg_locks; mod pg_matviews; mod pg_namespace; @@ -37,6 +38,7 @@ mod pg_opclass; mod pg_operator; mod pg_partitioned_table; mod pg_proc; +mod pg_rewrite; mod pg_roles; mod pg_settings; mod pg_shadow; @@ -44,6 +46,7 @@ mod pg_shdescription; mod pg_stat_activity; mod pg_tables; mod pg_tablespace; +mod pg_trigger; mod pg_type; mod pg_user; mod pg_views; diff --git a/src/frontend/src/catalog/system_catalog/pg_catalog/pg_class.rs b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_class.rs index 0d2dc8c8a41a7..d2954f033b3b3 100644 --- a/src/frontend/src/catalog/system_catalog/pg_catalog/pg_class.rs +++ b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_class.rs @@ -29,7 +29,8 @@ use risingwave_frontend_macro::system_catalog; END relkind, 0 AS relam, 0 AS reltablespace, - ARRAY[]::varchar[] AS reloptions + ARRAY[]::varchar[] AS reloptions, + null AS relpartbound FROM rw_catalog.rw_relations ")] #[derive(Fields)] @@ -46,4 +47,6 @@ struct PgClass { relam: i32, reltablespace: i32, reloptions: Vec, + // PG uses pg_node_tree type but RW doesn't support it + relpartbound: Option, } diff --git a/src/frontend/src/catalog/system_catalog/pg_catalog/pg_constraint.rs b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_constraint.rs index d69a4b881570e..c25755f4c3c9c 100644 --- a/src/frontend/src/catalog/system_catalog/pg_catalog/pg_constraint.rs +++ b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_constraint.rs @@ -15,12 +15,17 @@ use risingwave_common::types::Fields; use risingwave_frontend_macro::system_catalog; +use crate::catalog::schema_catalog::SchemaCatalog; +use crate::catalog::system_catalog::{SysCatalogReaderImpl, SystemTableCatalog}; +use crate::error::Result; +use crate::TableCatalog; + /// The catalog `pg_constraint` records information about table and index inheritance hierarchies. /// Ref: [`https://www.postgresql.org/docs/current/catalog-pg-constraint.html`] /// This is introduced only for pg compatibility and is not used in our system. -#[system_catalog(view, "pg_catalog.pg_constraint")] #[derive(Fields)] struct PgConstraint { + #[primary_key] oid: i32, conname: String, connamespace: i32, @@ -38,12 +43,105 @@ struct PgConstraint { conislocal: bool, coninhcount: i32, connoinherit: bool, - conkey: Vec, - confkey: Vec, - conpfeqop: Vec, - conppeqop: Vec, - conffeqop: Vec, - confdelsetcols: Vec, - conexclop: Vec, - conbin: String, + conkey: Option>, + confkey: Option>, + conpfeqop: Option>, + conppeqop: Option>, + conffeqop: Option>, + confdelsetcols: Option>, + conexclop: Option>, + conbin: Option, +} + +impl PgConstraint { + fn from_system_table(schema: &SchemaCatalog, table: &SystemTableCatalog) -> PgConstraint { + // List of the constrained columns. First column starts from 1. + let conkey: Vec<_> = table.pk.iter().map(|i| (*i + 1) as i16).collect(); + PgConstraint { + oid: table.id.table_id() as i32, // Use table_id as a mock oid of constraint here. + conname: format!("{}_pkey", &table.name), + connamespace: schema.id() as i32, + contype: "p".to_owned(), // p = primary key constraint + condeferrable: false, + convalidated: true, + conrelid: table.id.table_id() as i32, + contypid: 0, + // Use table_id as a mock index oid of constraint here. + conindid: table.id.table_id() as i32, + conparentid: 0, + confrelid: 0, + confupdtype: " ".to_owned(), + confdeltype: " ".to_owned(), + confmatchtype: " ".to_owned(), + conislocal: true, + coninhcount: 0, + connoinherit: true, + conkey: Some(conkey), + confkey: None, + conpfeqop: None, + conppeqop: None, + conffeqop: None, + confdelsetcols: None, + conexclop: None, + conbin: None, + } + } + + fn from_table(schema: &SchemaCatalog, table: &TableCatalog) -> PgConstraint { + // List of the constrained columns. First column starts from 1. + let conkey: Vec<_> = table + .pk + .iter() + .map(|i| (i.column_index + 1) as i16) + .collect(); + PgConstraint { + oid: table.id.table_id() as i32, // Use table_id as a mock oid of constraint here. + conname: format!("{}_pkey", &table.name), + connamespace: schema.id() as i32, + contype: "p".to_owned(), // p = primary key constraint + condeferrable: false, + convalidated: true, + conrelid: table.id.table_id() as i32, + contypid: 0, + // Use table_id as a mock index oid of constraint here. + conindid: table.id.table_id() as i32, + conparentid: 0, + confrelid: 0, + confupdtype: " ".to_owned(), + confdeltype: " ".to_owned(), + confmatchtype: " ".to_owned(), + conislocal: true, + coninhcount: 0, + connoinherit: true, + conkey: Some(conkey), + confkey: None, + conpfeqop: None, + conppeqop: None, + conffeqop: None, + confdelsetcols: None, + conexclop: None, + conbin: None, + } + } +} + +#[system_catalog(table, "pg_catalog.pg_constraint")] +fn read_pg_constraint(reader: &SysCatalogReaderImpl) -> Result> { + let catalog_reader = reader.catalog_reader.read_guard(); + let schemas = catalog_reader.iter_schemas(&reader.auth_context.database)?; + + Ok(schemas.flat_map(read_pg_constraint_in_schema).collect()) +} + +fn read_pg_constraint_in_schema(schema: &SchemaCatalog) -> Vec { + // Note: We only support primary key constraints now. + let system_table_rows = schema + .iter_system_tables() + .map(|table| PgConstraint::from_system_table(schema, table.as_ref())); + + let table_rows = schema + .iter_valid_table() + .map(|table| PgConstraint::from_table(schema, table.as_ref())); + + system_table_rows.chain(table_rows).collect() } diff --git a/src/frontend/src/catalog/system_catalog/pg_catalog/pg_language.rs b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_language.rs new file mode 100644 index 0000000000000..3f29502bca3e8 --- /dev/null +++ b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_language.rs @@ -0,0 +1,34 @@ +// Copyright 2024 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use risingwave_common::types::Fields; +use risingwave_frontend_macro::system_catalog; + +/// The catalog `pg_language` registers languages in which you can write functions or stored procedures. +/// Ref: [`https://www.postgresql.org/docs/current/catalog-pg-language.html`] +/// This is introduced only for pg compatibility and is not used in our system. +#[system_catalog(view, "pg_catalog.pg_language")] +#[derive(Fields)] +struct PgLanguage { + #[primary_key] + oid: i32, + lanname: String, + lanowner: i32, + lanispl: bool, + lanpltrusted: bool, + lanplcallfoid: i32, + laninline: i32, + lanvalidator: i32, + lanacl: Vec, +} diff --git a/src/frontend/src/catalog/system_catalog/pg_catalog/pg_rewrite.rs b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_rewrite.rs new file mode 100644 index 0000000000000..3a048d56ec414 --- /dev/null +++ b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_rewrite.rs @@ -0,0 +1,33 @@ +// Copyright 2024 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use risingwave_common::types::Fields; +use risingwave_frontend_macro::system_catalog; + +/// The catalog `pg_rewrite` stores rewrite rules for tables and views. +/// Ref: [`https://www.postgresql.org/docs/current/catalog-pg-rewrite.html`] +/// This is introduced only for pg compatibility and is not used in our system. +#[system_catalog(view, "pg_catalog.pg_rewrite")] +#[derive(Fields)] +struct PgRewrite { + #[primary_key] + oid: i32, + rulename: String, + ev_class: i32, + ev_type: String, + ev_enabled: String, + is_instead: bool, + ev_qual: String, + ev_action: String, +} diff --git a/src/frontend/src/catalog/system_catalog/pg_catalog/pg_trigger.rs b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_trigger.rs new file mode 100644 index 0000000000000..223363f48b4a8 --- /dev/null +++ b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_trigger.rs @@ -0,0 +1,44 @@ +// Copyright 2024 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use risingwave_common::types::Fields; +use risingwave_frontend_macro::system_catalog; + +/// The catalog `pg_trigger` stores triggers on tables and views. +/// Ref: [`https://www.postgresql.org/docs/current/catalog-pg-trigger.html`] +/// This is introduced only for pg compatibility and is not used in our system. +#[system_catalog(view, "pg_catalog.pg_trigger")] +#[derive(Fields)] +struct PgTrigger { + #[primary_key] + oid: i32, + tgrelid: i32, + tgparentid: i32, + tgname: String, + tgfoid: i32, + tgtype: i16, + tgenabled: String, + tgisinternal: bool, + tgconstrrelid: i32, + tgconstrindid: i32, + tgconstraint: i32, + tgdeferrable: bool, + tginitdeferred: bool, + tgnargs: i16, + tgattr: Vec, + tgargs: Vec, + tgqual: Option, + tgoldtable: Option, + tgnewtable: Option, +} diff --git a/src/frontend/src/catalog/system_catalog/rw_catalog/rw_columns.rs b/src/frontend/src/catalog/system_catalog/rw_catalog/rw_columns.rs index 8491da7062711..7a5f48a190a0d 100644 --- a/src/frontend/src/catalog/system_catalog/rw_catalog/rw_columns.rs +++ b/src/frontend/src/catalog/system_catalog/rw_catalog/rw_columns.rs @@ -15,6 +15,7 @@ use risingwave_common::types::Fields; use risingwave_frontend_macro::system_catalog; +use crate::catalog::schema_catalog::SchemaCatalog; use crate::catalog::system_catalog::SysCatalogReaderImpl; use crate::error::Result; use crate::expr::{ExprDisplay, ExprImpl}; @@ -22,9 +23,12 @@ use crate::expr::{ExprDisplay, ExprImpl}; #[derive(Fields)] #[primary_key(relation_id, name)] struct RwColumn { - relation_id: i32, // belonged relation id - name: String, // column name - position: i32, // 1-indexed position + relation_id: i32, + // belonged relation id + name: String, + // column name + position: i32, + // 1-indexed position is_hidden: bool, is_primary_key: bool, is_distribution_key: bool, @@ -41,131 +45,125 @@ fn read_rw_columns(reader: &SysCatalogReaderImpl) -> Result> { let catalog_reader = reader.catalog_reader.read_guard(); let schemas = catalog_reader.iter_schemas(&reader.auth_context.database)?; - Ok(schemas - .flat_map(|schema| { - let view_rows = schema.iter_view().flat_map(|view| { - view.columns - .iter() - .enumerate() - .map(|(index, column)| RwColumn { - relation_id: view.id as i32, - name: column.name.clone(), - position: index as i32 + 1, - is_hidden: false, - is_primary_key: false, - is_distribution_key: false, - is_generated: false, - generation_expression: None, - data_type: column.data_type().to_string(), - type_oid: column.data_type().to_oid(), - type_len: column.data_type().type_len(), - udt_type: column.data_type().pg_name().into(), - }) - }); + Ok(schemas.flat_map(read_rw_columns_in_schema).collect()) +} + +fn read_rw_columns_in_schema(schema: &SchemaCatalog) -> Vec { + let view_rows = schema.iter_view().flat_map(|view| { + view.columns + .iter() + .enumerate() + .map(|(index, column)| RwColumn { + relation_id: view.id as i32, + name: column.name.clone(), + position: index as i32 + 1, + is_hidden: false, + is_primary_key: false, + is_distribution_key: false, + is_generated: false, + generation_expression: None, + data_type: column.data_type().to_string(), + type_oid: column.data_type().to_oid(), + type_len: column.data_type().type_len(), + udt_type: column.data_type().pg_name().into(), + }) + }); + + let sink_rows = schema.iter_sink().flat_map(|sink| { + sink.full_columns() + .iter() + .enumerate() + .map(|(index, column)| RwColumn { + relation_id: sink.id.sink_id as i32, + name: column.name().into(), + position: index as i32 + 1, + is_hidden: column.is_hidden, + is_primary_key: sink.downstream_pk.contains(&index), + is_distribution_key: sink.distribution_key.contains(&index), + is_generated: false, + generation_expression: None, + data_type: column.data_type().to_string(), + type_oid: column.data_type().to_oid(), + type_len: column.data_type().type_len(), + udt_type: column.data_type().pg_name().into(), + }) + }); - let sink_rows = schema - .iter_sink() - .flat_map(|sink| { - sink.full_columns() - .iter() - .enumerate() - .map(|(index, column)| RwColumn { - relation_id: sink.id.sink_id as i32, - name: column.name().into(), - position: index as i32 + 1, - is_hidden: column.is_hidden, - is_primary_key: sink.downstream_pk.contains(&index), - is_distribution_key: sink.distribution_key.contains(&index), - is_generated: false, - generation_expression: None, - data_type: column.data_type().to_string(), - type_oid: column.data_type().to_oid(), - type_len: column.data_type().type_len(), - udt_type: column.data_type().pg_name().into(), - }) - }) - .chain(view_rows); + let catalog_rows = schema.iter_system_tables().flat_map(|table| { + table + .columns + .iter() + .enumerate() + .map(move |(index, column)| RwColumn { + relation_id: table.id.table_id as i32, + name: column.name().into(), + position: index as i32 + 1, + is_hidden: column.is_hidden, + is_primary_key: table.pk.contains(&index), + is_distribution_key: false, + is_generated: false, + generation_expression: None, + data_type: column.data_type().to_string(), + type_oid: column.data_type().to_oid(), + type_len: column.data_type().type_len(), + udt_type: column.data_type().pg_name().into(), + }) + }); - let catalog_rows = schema - .iter_system_tables() - .flat_map(|table| { - table - .columns - .iter() - .enumerate() - .map(move |(index, column)| RwColumn { - relation_id: table.id.table_id as i32, - name: column.name().into(), - position: index as i32 + 1, - is_hidden: column.is_hidden, - is_primary_key: table.pk.contains(&index), - is_distribution_key: false, - is_generated: false, - generation_expression: None, - data_type: column.data_type().to_string(), - type_oid: column.data_type().to_oid(), - type_len: column.data_type().type_len(), - udt_type: column.data_type().pg_name().into(), - }) - }) - .chain(sink_rows); + let table_rows = schema.iter_valid_table().flat_map(|table| { + let schema = table.column_schema(); + table + .columns + .iter() + .enumerate() + .map(move |(index, column)| RwColumn { + relation_id: table.id.table_id as i32, + name: column.name().into(), + position: index as i32 + 1, + is_hidden: column.is_hidden, + is_primary_key: table.pk().iter().any(|idx| idx.column_index == index), + is_distribution_key: table.distribution_key.contains(&index), + is_generated: column.is_generated(), + generation_expression: column.generated_expr().map(|expr_node| { + let expr = ExprImpl::from_expr_proto(expr_node).unwrap(); + let expr_display = ExprDisplay { + expr: &expr, + input_schema: &schema, + }; + expr_display.to_string() + }), + data_type: column.data_type().to_string(), + type_oid: column.data_type().to_oid(), + type_len: column.data_type().type_len(), + udt_type: column.data_type().pg_name().into(), + }) + }); - let table_rows = schema - .iter_valid_table() - .flat_map(|table| { - let schema = table.column_schema(); - table - .columns - .iter() - .enumerate() - .map(move |(index, column)| RwColumn { - relation_id: table.id.table_id as i32, - name: column.name().into(), - position: index as i32 + 1, - is_hidden: column.is_hidden, - is_primary_key: table.pk().iter().any(|idx| idx.column_index == index), - is_distribution_key: table.distribution_key.contains(&index), - is_generated: column.is_generated(), - generation_expression: column.generated_expr().map(|expr_node| { - let expr = ExprImpl::from_expr_proto(expr_node).unwrap(); - let expr_display = ExprDisplay { - expr: &expr, - input_schema: &schema, - }; - expr_display.to_string() - }), - data_type: column.data_type().to_string(), - type_oid: column.data_type().to_oid(), - type_len: column.data_type().type_len(), - udt_type: column.data_type().pg_name().into(), - }) - }) - .chain(catalog_rows); + let schema_rows = schema.iter_source().flat_map(|source| { + source + .columns + .iter() + .enumerate() + .map(move |(index, column)| RwColumn { + relation_id: source.id as i32, + name: column.name().into(), + position: index as i32 + 1, + is_hidden: column.is_hidden, + is_primary_key: source.pk_col_ids.contains(&column.column_id()), + is_distribution_key: false, + is_generated: false, + generation_expression: None, + data_type: column.data_type().to_string(), + type_oid: column.data_type().to_oid(), + type_len: column.data_type().type_len(), + udt_type: column.data_type().pg_name().into(), + }) + }); - // source columns - schema - .iter_source() - .flat_map(|source| { - source - .columns - .iter() - .enumerate() - .map(move |(index, column)| RwColumn { - relation_id: source.id as i32, - name: column.name().into(), - position: index as i32 + 1, - is_hidden: column.is_hidden, - is_primary_key: source.pk_col_ids.contains(&column.column_id()), - is_distribution_key: false, - is_generated: false, - generation_expression: None, - data_type: column.data_type().to_string(), - type_oid: column.data_type().to_oid(), - type_len: column.data_type().type_len(), - udt_type: column.data_type().pg_name().into(), - }) - }) - .chain(table_rows) - }) - .collect()) + view_rows + .chain(sink_rows) + .chain(catalog_rows) + .chain(table_rows) + .chain(schema_rows) + .collect() }